first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,30 @@
const OError = require('@overleaf/o-error')
const { Errors } = require('@overleaf/object-persistor')
class HealthCheckError extends OError {}
class ConversionsDisabledError extends OError {}
class ConversionError extends OError {}
class TimeoutError extends OError {}
class InvalidParametersError extends OError {}
class FailedCommandError extends OError {
constructor(command, code, stdout, stderr) {
super('command failed with error exit code', {
command,
code,
})
this.stdout = stdout
this.stderr = stderr
this.code = code
}
}
module.exports = {
FailedCommandError,
ConversionsDisabledError,
ConversionError,
HealthCheckError,
TimeoutError,
InvalidParametersError,
...Errors,
}

View File

@@ -0,0 +1,214 @@
const FileHandler = require('./FileHandler')
const metrics = require('@overleaf/metrics')
const parseRange = require('range-parser')
const Errors = require('./Errors')
const { pipeline } = require('node:stream')
const maxSizeInBytes = 1024 * 1024 * 1024 // 1GB
module.exports = {
getFile,
getFileHead,
insertFile,
copyFile,
deleteFile,
deleteProject,
directorySize,
}
function getFile(req, res, next) {
const { key, bucket } = req
const { format, style } = req.query
const options = {
key,
bucket,
format,
style,
}
metrics.inc('getFile')
req.requestLogger.setMessage('getting file')
req.requestLogger.addFields({
key,
bucket,
format,
style,
cacheWarm: req.query.cacheWarm,
})
if (req.headers.range) {
const range = _getRange(req.headers.range)
if (range) {
options.start = range.start
options.end = range.end
req.requestLogger.addFields({ range })
}
}
FileHandler.getRedirectUrl(bucket, key, options, function (err, redirectUrl) {
if (err) {
metrics.inc('file_redirect_error')
}
if (redirectUrl) {
metrics.inc('file_redirect')
return res.redirect(redirectUrl)
}
FileHandler.getFile(bucket, key, options, function (err, fileStream) {
if (err) {
if (err instanceof Errors.NotFoundError) {
res.sendStatus(404)
} else {
next(err)
}
return
}
if (req.query.cacheWarm) {
fileStream.destroy()
return res.sendStatus(200).end()
}
pipeline(fileStream, res, err => {
if (err && err.code === 'ERR_STREAM_PREMATURE_CLOSE') {
res.end()
} else if (err) {
next(
new Errors.ReadError(
'error transferring stream',
{ bucket, key, format, style },
err
)
)
}
})
})
})
}
function getFileHead(req, res, next) {
const { key, bucket } = req
metrics.inc('getFileSize')
req.requestLogger.setMessage('getting file size')
req.requestLogger.addFields({ key, bucket })
FileHandler.getFileSize(bucket, key, function (err, fileSize) {
if (err) {
if (err instanceof Errors.NotFoundError) {
res.sendStatus(404)
} else {
next(err)
}
return
}
res.set('Content-Length', fileSize)
res.status(200).end()
})
}
function insertFile(req, res, next) {
metrics.inc('insertFile')
const { key, bucket } = req
req.requestLogger.setMessage('inserting file')
req.requestLogger.addFields({ key, bucket })
FileHandler.insertFile(bucket, key, req, function (err) {
if (err) {
next(err)
} else {
res.sendStatus(200)
}
})
}
function copyFile(req, res, next) {
metrics.inc('copyFile')
const { key, bucket } = req
const oldProjectId = req.body.source.project_id
const oldFileId = req.body.source.file_id
req.requestLogger.addFields({
key,
bucket,
oldProject_id: oldProjectId,
oldFile_id: oldFileId,
})
req.requestLogger.setMessage('copying file')
FileHandler.copyObject(bucket, `${oldProjectId}/${oldFileId}`, key, err => {
if (err) {
if (err instanceof Errors.NotFoundError) {
res.sendStatus(404)
} else {
next(err)
}
} else {
res.sendStatus(200)
}
})
}
function deleteFile(req, res, next) {
metrics.inc('deleteFile')
const { key, bucket } = req
req.requestLogger.addFields({ key, bucket })
req.requestLogger.setMessage('deleting file')
FileHandler.deleteFile(bucket, key, function (err) {
if (err) {
next(err)
} else {
res.sendStatus(204)
}
})
}
function deleteProject(req, res, next) {
metrics.inc('deleteProject')
const { key, bucket } = req
req.requestLogger.setMessage('deleting project')
req.requestLogger.addFields({ key, bucket })
FileHandler.deleteProject(bucket, key, function (err) {
if (err) {
if (err instanceof Errors.InvalidParametersError) {
return res.sendStatus(400)
}
next(err)
} else {
res.sendStatus(204)
}
})
}
function directorySize(req, res, next) {
metrics.inc('projectSize')
const { project_id: projectId, bucket } = req
req.requestLogger.setMessage('getting project size')
req.requestLogger.addFields({ projectId, bucket })
FileHandler.getDirectorySize(bucket, projectId, function (err, size) {
if (err) {
return next(err)
}
res.json({ 'total bytes': size })
req.requestLogger.addFields({ size })
})
}
function _getRange(header) {
const parsed = parseRange(maxSizeInBytes, header)
if (parsed === -1 || parsed === -2 || parsed.type !== 'bytes') {
return null
} else {
const range = parsed[0]
return { start: range.start, end: range.end }
}
}

View File

@@ -0,0 +1,98 @@
const metrics = require('@overleaf/metrics')
const Settings = require('@overleaf/settings')
const { callbackify } = require('node:util')
const safeExec = require('./SafeExec').promises
const { ConversionError } = require('./Errors')
const APPROVED_FORMATS = ['png']
const FOURTY_SECONDS = 40 * 1000
const KILL_SIGNAL = 'SIGTERM'
module.exports = {
convert: callbackify(convert),
thumbnail: callbackify(thumbnail),
preview: callbackify(preview),
promises: {
convert,
thumbnail,
preview,
},
}
async function convert(sourcePath, requestedFormat) {
const width = '600x'
return await _convert(sourcePath, requestedFormat, [
'convert',
'-define',
`pdf:fit-page=${width}`,
'-flatten',
'-density',
'300',
`${sourcePath}[0]`,
])
}
async function thumbnail(sourcePath) {
const width = '260x'
return await convert(sourcePath, 'png', [
'convert',
'-flatten',
'-background',
'white',
'-density',
'300',
'-define',
`pdf:fit-page=${width}`,
`${sourcePath}[0]`,
'-resize',
width,
])
}
async function preview(sourcePath) {
const width = '548x'
return await convert(sourcePath, 'png', [
'convert',
'-flatten',
'-background',
'white',
'-density',
'300',
'-define',
`pdf:fit-page=${width}`,
`${sourcePath}[0]`,
'-resize',
width,
])
}
async function _convert(sourcePath, requestedFormat, command) {
if (!APPROVED_FORMATS.includes(requestedFormat)) {
throw new ConversionError('invalid format requested', {
format: requestedFormat,
})
}
const timer = new metrics.Timer('imageConvert')
const destPath = `${sourcePath}.${requestedFormat}`
command.push(destPath)
command = Settings.commands.convertCommandPrefix.concat(command)
try {
await safeExec(command, {
killSignal: KILL_SIGNAL,
timeout: FOURTY_SECONDS,
})
} catch (err) {
throw new ConversionError(
'something went wrong converting file',
{ stderr: err.stderr, sourcePath, requestedFormat, destPath },
err
)
}
timer.done()
return destPath
}

View File

@@ -0,0 +1,228 @@
const Settings = require('@overleaf/settings')
const { callbackify } = require('node:util')
const fs = require('node:fs')
let PersistorManager = require('./PersistorManager')
const LocalFileWriter = require('./LocalFileWriter')
const FileConverter = require('./FileConverter')
const KeyBuilder = require('./KeyBuilder')
const ImageOptimiser = require('./ImageOptimiser')
const { ConversionError, InvalidParametersError } = require('./Errors')
const metrics = require('@overleaf/metrics')
module.exports = {
copyObject: callbackify(copyObject),
insertFile: callbackify(insertFile),
deleteFile: callbackify(deleteFile),
deleteProject: callbackify(deleteProject),
getFile: callbackify(getFile),
getRedirectUrl: callbackify(getRedirectUrl),
getFileSize: callbackify(getFileSize),
getDirectorySize: callbackify(getDirectorySize),
promises: {
copyObject,
getFile,
getRedirectUrl,
insertFile,
deleteFile,
deleteProject,
getFileSize,
getDirectorySize,
},
}
if (process.env.NODE_ENV === 'test') {
module.exports._TESTONLYSwapPersistorManager = _PersistorManager => {
PersistorManager = _PersistorManager
}
}
async function copyObject(bucket, sourceKey, destinationKey) {
await PersistorManager.copyObject(bucket, sourceKey, destinationKey)
}
async function insertFile(bucket, key, stream) {
const convertedKey = KeyBuilder.getConvertedFolderKey(key)
if (!convertedKey.match(/^[0-9a-f]{24}\/([0-9a-f]{24}|v\/[0-9]+\/[a-z]+)/i)) {
throw new InvalidParametersError('key does not match validation regex', {
bucket,
key,
convertedKey,
})
}
await PersistorManager.sendStream(bucket, key, stream)
}
async function deleteFile(bucket, key) {
const convertedKey = KeyBuilder.getConvertedFolderKey(key)
if (!convertedKey.match(/^[0-9a-f]{24}\/([0-9a-f]{24}|v\/[0-9]+\/[a-z]+)/i)) {
throw new InvalidParametersError('key does not match validation regex', {
bucket,
key,
convertedKey,
})
}
const jobs = [PersistorManager.deleteObject(bucket, key)]
if (
Settings.enableConversions &&
bucket === Settings.filestore.stores.template_files
) {
jobs.push(PersistorManager.deleteDirectory(bucket, convertedKey))
}
await Promise.all(jobs)
}
async function deleteProject(bucket, key) {
if (!key.match(/^[0-9a-f]{24}\//i)) {
throw new InvalidParametersError('key does not match validation regex', {
bucket,
key,
})
}
await PersistorManager.deleteDirectory(bucket, key)
}
async function getFile(bucket, key, opts) {
opts = opts || {}
if (!opts.format && !opts.style) {
return await PersistorManager.getObjectStream(bucket, key, opts)
} else {
return await _getConvertedFile(bucket, key, opts)
}
}
let ACTIVE_SIGNED_URL_CALLS = 0
async function getRedirectUrl(bucket, key, opts) {
// if we're doing anything unusual with options, or the request isn't for
// one of the default buckets, return null so that we proxy the file
opts = opts || {}
if (
!opts.start &&
!opts.end &&
!opts.format &&
!opts.style &&
Object.values(Settings.filestore.stores).includes(bucket) &&
Settings.filestore.allowRedirects
) {
// record the number of in-flight calls to generate signed URLs
metrics.gauge('active_signed_url_calls', ++ACTIVE_SIGNED_URL_CALLS, {
path: bucket,
})
try {
const timer = new metrics.Timer('signed_url_call_time', {
path: bucket,
})
const redirectUrl = await PersistorManager.getRedirectUrl(bucket, key)
timer.done()
return redirectUrl
} finally {
metrics.gauge('active_signed_url_calls', --ACTIVE_SIGNED_URL_CALLS, {
path: bucket,
})
}
}
return null
}
async function getFileSize(bucket, key) {
return await PersistorManager.getObjectSize(bucket, key)
}
async function getDirectorySize(bucket, projectId) {
return await PersistorManager.directorySize(bucket, projectId)
}
async function _getConvertedFile(bucket, key, opts) {
const convertedKey = KeyBuilder.addCachingToKey(key, opts)
const exists = await PersistorManager.checkIfObjectExists(
bucket,
convertedKey
)
if (exists) {
return await PersistorManager.getObjectStream(bucket, convertedKey, opts)
} else {
return await _getConvertedFileAndCache(bucket, key, convertedKey, opts)
}
}
async function _getConvertedFileAndCache(bucket, key, convertedKey, opts) {
let convertedFsPath
try {
convertedFsPath = await _convertFile(bucket, key, opts)
await ImageOptimiser.promises.compressPng(convertedFsPath)
await PersistorManager.sendFile(bucket, convertedKey, convertedFsPath)
} catch (err) {
LocalFileWriter.deleteFile(convertedFsPath, () => {})
throw new ConversionError(
'failed to convert file',
{ opts, bucket, key, convertedKey },
err
)
}
// Send back the converted file from the local copy to avoid problems
// with the file not being present in S3 yet. As described in the
// documentation below, we have already made a 'HEAD' request in
// checkIfFileExists so we only have "eventual consistency" if we try
// to stream it from S3 here. This was a cause of many 403 errors.
//
// "Amazon S3 provides read-after-write consistency for PUTS of new
// objects in your S3 bucket in all regions with one caveat. The
// caveat is that if you make a HEAD or GET request to the key name
// (to find if the object exists) before creating the object, Amazon
// S3 provides eventual consistency for read-after-write.""
// https://docs.aws.amazon.com/AmazonS3/latest/dev/Introduction.html#ConsistencyModel
const readStream = fs.createReadStream(convertedFsPath)
readStream.on('error', function () {
LocalFileWriter.deleteFile(convertedFsPath, function () {})
})
readStream.on('end', function () {
LocalFileWriter.deleteFile(convertedFsPath, function () {})
})
return readStream
}
async function _convertFile(bucket, originalKey, opts) {
let originalFsPath
try {
originalFsPath = await _writeFileToDisk(bucket, originalKey, opts)
} catch (err) {
throw new ConversionError(
'unable to write file to disk',
{ bucket, originalKey, opts },
err
)
}
let promise
if (opts.format) {
promise = FileConverter.promises.convert(originalFsPath, opts.format)
} else if (opts.style === 'thumbnail') {
promise = FileConverter.promises.thumbnail(originalFsPath)
} else if (opts.style === 'preview') {
promise = FileConverter.promises.preview(originalFsPath)
} else {
throw new ConversionError('invalid file conversion options', {
bucket,
originalKey,
opts,
})
}
let destPath
try {
destPath = await promise
} catch (err) {
throw new ConversionError(
'error converting file',
{ bucket, originalKey, opts },
err
)
}
LocalFileWriter.deleteFile(originalFsPath, function () {})
return destPath
}
async function _writeFileToDisk(bucket, key, opts) {
const fileStream = await PersistorManager.getObjectStream(bucket, key, opts)
return await LocalFileWriter.promises.writeStream(fileStream, key)
}

View File

@@ -0,0 +1,67 @@
const fs = require('node:fs')
const path = require('node:path')
const Settings = require('@overleaf/settings')
const { WritableBuffer } = require('@overleaf/stream-utils')
const { promisify } = require('node:util')
const Stream = require('node:stream')
const pipeline = promisify(Stream.pipeline)
const fsCopy = promisify(fs.copyFile)
const fsUnlink = promisify(fs.unlink)
const { HealthCheckError } = require('./Errors')
const FileConverter = require('./FileConverter').promises
const FileHandler = require('./FileHandler').promises
async function checkCanGetFiles() {
if (!Settings.health_check) {
return
}
const projectId = Settings.health_check.project_id
const fileId = Settings.health_check.file_id
const key = `${projectId}/${fileId}`
const bucket = Settings.filestore.stores.user_files
const buffer = new WritableBuffer({ initialSize: 100 })
const sourceStream = await FileHandler.getFile(bucket, key, {})
try {
await pipeline(sourceStream, buffer)
} catch (err) {
throw new HealthCheckError('failed to get health-check file', {}, err)
}
if (!buffer.size()) {
throw new HealthCheckError('no bytes written to download stream')
}
}
async function checkFileConvert() {
if (!Settings.enableConversions) {
return
}
const imgPath = path.join(Settings.path.uploadFolder, '/tiny.pdf')
let resultPath
try {
await fsCopy('./tiny.pdf', imgPath)
resultPath = await FileConverter.thumbnail(imgPath)
} finally {
if (resultPath) {
await fsUnlink(resultPath)
}
await fsUnlink(imgPath)
}
}
module.exports = {
check(req, res, next) {
Promise.all([checkCanGetFiles(), checkFileConvert()])
.then(() => res.sendStatus(200))
.catch(err => {
next(err)
})
},
}

View File

@@ -0,0 +1,34 @@
const logger = require('@overleaf/logger')
const metrics = require('@overleaf/metrics')
const { callbackify } = require('node:util')
const safeExec = require('./SafeExec').promises
module.exports = {
compressPng: callbackify(compressPng),
promises: {
compressPng,
},
}
async function compressPng(localPath, callback) {
const timer = new metrics.Timer('compressPng')
const args = ['optipng', localPath]
const opts = {
timeout: 30 * 1000,
killSignal: 'SIGKILL',
}
try {
await safeExec(args, opts)
timer.done()
} catch (err) {
if (err.code === 'SIGKILL') {
logger.warn(
{ err, stderr: err.stderr, localPath },
'optimiser timeout reached'
)
} else {
throw err
}
}
}

View File

@@ -0,0 +1,71 @@
const settings = require('@overleaf/settings')
module.exports = {
getConvertedFolderKey,
addCachingToKey,
userFileKeyMiddleware,
userProjectKeyMiddleware,
bucketFileKeyMiddleware,
templateFileKeyMiddleware,
}
function getConvertedFolderKey(key) {
return `${key}-converted-cache/`
}
function addCachingToKey(key, opts) {
key = this.getConvertedFolderKey(key)
if (opts.format && !opts.style) {
key = `${key}format-${opts.format}`
}
if (opts.style && !opts.format) {
key = `${key}style-${opts.style}`
}
if (opts.style && opts.format) {
key = `${key}format-${opts.format}-style-${opts.style}`
}
return key
}
function userFileKeyMiddleware(req, res, next) {
const { project_id: projectId, file_id: fileId } = req.params
req.key = `${projectId}/${fileId}`
req.bucket = settings.filestore.stores.user_files
next()
}
function userProjectKeyMiddleware(req, res, next) {
const { project_id: projectId } = req.params
req.project_id = projectId
req.key = `${projectId}/`
req.bucket = settings.filestore.stores.user_files
next()
}
function bucketFileKeyMiddleware(req, res, next) {
req.bucket = req.params.bucket
req.key = req.params[0]
next()
}
function templateFileKeyMiddleware(req, res, next) {
const {
template_id: templateId,
format,
version,
sub_type: subType,
} = req.params
req.key = `${templateId}/v/${version}/${format}`
if (subType) {
req.key = `${req.key}/${subType}`
}
req.bucket = settings.filestore.stores.template_files
req.version = version
next()
}

View File

@@ -0,0 +1,56 @@
const fs = require('node:fs')
const crypto = require('node:crypto')
const path = require('node:path')
const Stream = require('node:stream')
const { callbackify, promisify } = require('node:util')
const metrics = require('@overleaf/metrics')
const Settings = require('@overleaf/settings')
const { WriteError } = require('./Errors')
module.exports = {
promises: {
writeStream,
deleteFile,
},
writeStream: callbackify(writeStream),
deleteFile: callbackify(deleteFile),
}
const pipeline = promisify(Stream.pipeline)
async function writeStream(stream, key) {
const timer = new metrics.Timer('writingFile')
const fsPath = _getPath(key)
const writeStream = fs.createWriteStream(fsPath)
try {
await pipeline(stream, writeStream)
timer.done()
return fsPath
} catch (err) {
await deleteFile(fsPath)
throw new WriteError('problem writing file locally', { fsPath }, err)
}
}
async function deleteFile(fsPath) {
if (!fsPath) {
return
}
try {
await promisify(fs.unlink)(fsPath)
} catch (err) {
if (err.code !== 'ENOENT') {
throw new WriteError('failed to delete file', { fsPath }, err)
}
}
}
function _getPath(key) {
if (key == null) {
key = crypto.randomUUID()
}
key = key.replace(/\//g, '-')
return path.join(Settings.path.uploadFolder, key)
}

View File

@@ -0,0 +1,9 @@
const settings = require('@overleaf/settings')
const persistorSettings = settings.filestore
persistorSettings.paths = settings.path
const ObjectPersistor = require('@overleaf/object-persistor')
const persistor = ObjectPersistor(persistorSettings)
module.exports = persistor

View File

@@ -0,0 +1,61 @@
const logger = require('@overleaf/logger')
const metrics = require('@overleaf/metrics')
class RequestLogger {
constructor() {
this._logInfo = {}
this._logMessage = 'http request'
}
addFields(fields) {
Object.assign(this._logInfo, fields)
}
setMessage(message) {
this._logMessage = message
}
static errorHandler(err, req, res, next) {
req.requestLogger.addFields({ error: err })
res.status(500).send(err.message)
}
static middleware(req, res, next) {
const startTime = new Date()
req.requestLogger = new RequestLogger()
// override the 'end' method to log and record metrics
const end = res.end
res.end = function () {
// apply the standard request 'end' method before logging and metrics
end.apply(this, arguments)
const responseTime = new Date() - startTime
const routePath = req.route && req.route.path.toString()
if (routePath) {
metrics.timing('http_request', responseTime, null, {
method: req.method,
status_code: res.statusCode,
path: routePath.replace(/\//g, '_').replace(/:/g, '').slice(1),
})
}
const level = res.statusCode >= 500 ? 'err' : 'debug'
logger[level](
{
req,
res,
responseTimeMs: responseTime,
info: req.requestLogger._logInfo,
},
req.requestLogger._logMessage
)
}
next()
}
}
module.exports = RequestLogger

View File

@@ -0,0 +1,85 @@
const lodashOnce = require('lodash.once')
const childProcess = require('node:child_process')
const Settings = require('@overleaf/settings')
const { ConversionsDisabledError, FailedCommandError } = require('./Errors')
// execute a command in the same way as 'exec' but with a timeout that
// kills all child processes
//
// we spawn the command with 'detached:true' to make a new process
// group, then we can kill everything in that process group.
module.exports = safeExec
module.exports.promises = safeExecPromise
// options are {timeout: number-of-milliseconds, killSignal: signal-name}
function safeExec(command, options, callback) {
if (!Settings.enableConversions) {
return callback(
new ConversionsDisabledError('image conversions are disabled')
)
}
const [cmd, ...args] = command
const child = childProcess.spawn(cmd, args, { detached: true })
let stdout = ''
let stderr = ''
let killTimer
const cleanup = lodashOnce(function (err) {
if (killTimer) {
clearTimeout(killTimer)
}
callback(err, stdout, stderr)
})
if (options.timeout) {
killTimer = setTimeout(function () {
try {
// use negative process id to kill process group
process.kill(-child.pid, options.killSignal || 'SIGTERM')
} catch (error) {
cleanup(
new FailedCommandError('failed to kill process after timeout', {
command,
options,
pid: child.pid,
})
)
}
}, options.timeout)
}
child.on('close', function (code, signal) {
if (code || signal) {
return cleanup(
new FailedCommandError(command, code || signal, stdout, stderr)
)
}
cleanup()
})
child.on('error', err => {
cleanup(err)
})
child.stdout.on('data', chunk => {
stdout += chunk
})
child.stderr.on('data', chunk => {
stderr += chunk
})
}
function safeExecPromise(command, options) {
return new Promise((resolve, reject) => {
safeExec(command, options, (err, stdout, stderr) => {
if (err) {
reject(err)
}
resolve({ stdout, stderr })
})
})
}