Files
overleaf-server-arm/services/web/app/src/Features/Compile/CompileController.js
2025-04-24 13:11:28 +08:00

803 lines
23 KiB
JavaScript

let CompileController
const { URL, URLSearchParams } = require('url')
const { pipeline } = require('stream/promises')
const { Cookie } = require('tough-cookie')
const OError = require('@overleaf/o-error')
const Metrics = require('@overleaf/metrics')
const ProjectGetter = require('../Project/ProjectGetter')
const CompileManager = require('./CompileManager')
const ClsiManager = require('./ClsiManager')
const logger = require('@overleaf/logger')
const Settings = require('@overleaf/settings')
const SessionManager = require('../Authentication/SessionManager')
const { RateLimiter } = require('../../infrastructure/RateLimiter')
const ClsiCookieManager = require('./ClsiCookieManager')(
Settings.apis.clsi?.backendGroupName
)
const Path = require('path')
const AnalyticsManager = require('../Analytics/AnalyticsManager')
const SplitTestHandler = require('../SplitTests/SplitTestHandler')
const { callbackify } = require('@overleaf/promise-utils')
const {
fetchStreamWithResponse,
RequestFailedError,
} = require('@overleaf/fetch-utils')
const COMPILE_TIMEOUT_MS = 10 * 60 * 1000
const pdfDownloadRateLimiter = new RateLimiter('full-pdf-download', {
points: 1000,
duration: 60 * 60,
})
function getOutputFilesArchiveSpecification(projectId, userId, buildId) {
const fileName = 'output.zip'
return {
path: fileName,
url: CompileController._getFileUrl(projectId, userId, buildId, fileName),
type: 'zip',
}
}
function getImageNameForProject(projectId, callback) {
ProjectGetter.getProject(projectId, { imageName: 1 }, (err, project) => {
if (err) return callback(err)
if (!project) return callback(new Error('project not found'))
callback(null, project.imageName)
})
}
async function getPdfCachingMinChunkSize(req, res) {
const { variant } = await SplitTestHandler.promises.getAssignment(
req,
res,
'pdf-caching-min-chunk-size'
)
if (variant === 'default') return 1_000_000
return parseInt(variant, 10)
}
async function _getSplitTestOptions(req, res) {
// Use the query flags from the editor request for overriding the split test.
let query = {}
try {
const u = new URL(req.headers.referer || req.url, Settings.siteUrl)
query = Object.fromEntries(u.searchParams.entries())
} catch (e) {}
const editorReq = { ...req, query }
// Lookup the clsi-cache flag in the backend.
// We may need to turn off the feature on a short notice, without requiring
// all users to reload their editor page to disable the feature.
const { variant: compileFromClsiCacheVariant } =
await SplitTestHandler.promises.getAssignment(
editorReq,
res,
'compile-from-clsi-cache'
)
const compileFromClsiCache = compileFromClsiCacheVariant === 'enabled'
const { variant: populateClsiCacheVariant } =
await SplitTestHandler.promises.getAssignment(
editorReq,
res,
'populate-clsi-cache'
)
const populateClsiCache = populateClsiCacheVariant === 'enabled'
const pdfDownloadDomain = Settings.pdfDownloadDomain
if (!req.query.enable_pdf_caching) {
// The frontend does not want to do pdf caching.
return {
compileFromClsiCache,
populateClsiCache,
pdfDownloadDomain,
enablePdfCaching: false,
}
}
// Double check with the latest split test assignment.
// We may need to turn off the feature on a short notice, without requiring
// all users to reload their editor page to disable the feature.
const { variant } = await SplitTestHandler.promises.getAssignment(
editorReq,
res,
'pdf-caching-mode'
)
const enablePdfCaching = variant === 'enabled'
if (!enablePdfCaching) {
// Skip the lookup of the chunk size when caching is not enabled.
return {
compileFromClsiCache,
populateClsiCache,
pdfDownloadDomain,
enablePdfCaching: false,
}
}
const pdfCachingMinChunkSize = await getPdfCachingMinChunkSize(editorReq, res)
return {
compileFromClsiCache,
populateClsiCache,
pdfDownloadDomain,
enablePdfCaching,
pdfCachingMinChunkSize,
}
}
const getSplitTestOptionsCb = callbackify(_getSplitTestOptions)
module.exports = CompileController = {
compile(req, res, next) {
res.setTimeout(COMPILE_TIMEOUT_MS)
const projectId = req.params.Project_id
const isAutoCompile = !!req.query.auto_compile
const fileLineErrors = !!req.query.file_line_errors
const stopOnFirstError = !!req.body.stopOnFirstError
const userId = SessionManager.getLoggedInUserId(req.session)
const options = {
isAutoCompile,
fileLineErrors,
stopOnFirstError,
editorId: req.body.editorId,
}
if (req.body.rootDoc_id) {
options.rootDoc_id = req.body.rootDoc_id
} else if (
req.body.settingsOverride &&
req.body.settingsOverride.rootDoc_id
) {
// Can be removed after deploy
options.rootDoc_id = req.body.settingsOverride.rootDoc_id
}
if (req.body.compiler) {
options.compiler = req.body.compiler
}
if (req.body.draft) {
options.draft = req.body.draft
}
if (['validate', 'error', 'silent'].includes(req.body.check)) {
options.check = req.body.check
}
if (req.body.incrementalCompilesEnabled) {
options.incrementalCompilesEnabled = true
}
getSplitTestOptionsCb(req, res, (err, splitTestOptions) => {
if (err) return next(err)
let {
compileFromClsiCache,
populateClsiCache,
enablePdfCaching,
pdfCachingMinChunkSize,
pdfDownloadDomain,
} = splitTestOptions
options.compileFromClsiCache = compileFromClsiCache
options.populateClsiCache = populateClsiCache
options.enablePdfCaching = enablePdfCaching
if (enablePdfCaching) {
options.pdfCachingMinChunkSize = pdfCachingMinChunkSize
}
CompileManager.compile(
projectId,
userId,
options,
(
error,
status,
outputFiles,
clsiServerId,
limits,
validationProblems,
stats,
timings,
outputUrlPrefix,
buildId
) => {
if (error) {
Metrics.inc('compile-error')
return next(error)
}
Metrics.inc('compile-status', 1, { status })
if (pdfDownloadDomain && outputUrlPrefix) {
pdfDownloadDomain += outputUrlPrefix
}
if (
limits &&
SplitTestHandler.getPercentile(
AnalyticsManager.getIdsFromSession(req.session).analyticsId,
'compile-result-backend',
'release'
) === 1
) {
// For a compile request to be sent to clsi we need limits.
// If we get here without having the limits object populated, it is
// a reasonable assumption to make that nothing was compiled.
// We need to know the limits in order to make use of the events.
AnalyticsManager.recordEventForSession(
req.session,
'compile-result-backend',
{
projectId,
ownerAnalyticsId: limits.ownerAnalyticsId,
status,
compileTime: timings?.compileE2E,
timeout: limits.timeout === 60 ? 'short' : 'long',
server: clsiServerId?.includes('-c2d-') ? 'faster' : 'normal',
isAutoCompile,
isInitialCompile: stats?.isInitialCompile === 1,
restoredClsiCache: stats?.restoredClsiCache === 1,
stopOnFirstError,
}
)
}
const outputFilesArchive = buildId
? getOutputFilesArchiveSpecification(projectId, userId, buildId)
: null
res.json({
status,
outputFiles,
outputFilesArchive,
compileGroup: limits?.compileGroup,
clsiServerId,
validationProblems,
stats,
timings,
outputUrlPrefix,
pdfDownloadDomain,
pdfCachingMinChunkSize,
})
}
)
})
},
stopCompile(req, res, next) {
const projectId = req.params.Project_id
const userId = SessionManager.getLoggedInUserId(req.session)
CompileManager.stopCompile(projectId, userId, function (error) {
if (error) {
return next(error)
}
res.sendStatus(200)
})
},
// Used for submissions through the public API
compileSubmission(req, res, next) {
res.setTimeout(COMPILE_TIMEOUT_MS)
const submissionId = req.params.submission_id
const options = {}
if (req.body?.rootResourcePath != null) {
options.rootResourcePath = req.body.rootResourcePath
}
if (req.body?.compiler) {
options.compiler = req.body.compiler
}
if (req.body?.draft) {
options.draft = req.body.draft
}
if (['validate', 'error', 'silent'].includes(req.body?.check)) {
options.check = req.body.check
}
options.compileGroup =
req.body?.compileGroup || Settings.defaultFeatures.compileGroup
options.compileBackendClass = Settings.apis.clsi.submissionBackendClass
options.timeout =
req.body?.timeout || Settings.defaultFeatures.compileTimeout
ClsiManager.sendExternalRequest(
submissionId,
req.body,
options,
function (error, status, outputFiles, clsiServerId, validationProblems) {
if (error) {
return next(error)
}
res.json({
status,
outputFiles,
clsiServerId,
validationProblems,
})
}
)
},
_getSplitTestOptions,
_getUserIdForCompile(req) {
if (!Settings.disablePerUserCompiles) {
return SessionManager.getLoggedInUserId(req.session)
}
return null
},
_compileAsUser(req, callback) {
callback(null, CompileController._getUserIdForCompile(req))
},
_downloadAsUser(req, callback) {
callback(null, CompileController._getUserIdForCompile(req))
},
downloadPdf(req, res, next) {
Metrics.inc('pdf-downloads')
const projectId = req.params.Project_id
const rateLimit = function (callback) {
pdfDownloadRateLimiter
.consume(req.ip, 1, { method: 'ip' })
.then(() => {
callback(null, true)
})
.catch(err => {
if (err instanceof Error) {
callback(err)
} else {
callback(null, false)
}
})
}
ProjectGetter.getProject(projectId, { name: 1 }, function (err, project) {
if (err) {
return next(err)
}
res.contentType('application/pdf')
const filename = `${CompileController._getSafeProjectName(project)}.pdf`
if (req.query.popupDownload) {
res.setContentDisposition('attachment', { filename })
} else {
res.setContentDisposition('inline', { filename })
}
rateLimit(function (err, canContinue) {
if (err) {
logger.err({ err }, 'error checking rate limit for pdf download')
res.sendStatus(500)
} else if (!canContinue) {
logger.debug(
{ projectId, ip: req.ip },
'rate limit hit downloading pdf'
)
res.sendStatus(500)
} else {
CompileController._downloadAsUser(req, function (error, userId) {
if (error) {
return next(error)
}
const url = CompileController._getFileUrl(
projectId,
userId,
req.params.build_id,
'output.pdf'
)
CompileController.proxyToClsi(
projectId,
'output-file',
url,
{},
req,
res,
next
)
})
}
})
})
},
_getSafeProjectName(project) {
return project.name.replace(/[^\p{L}\p{Nd}]/gu, '_')
},
deleteAuxFiles(req, res, next) {
const projectId = req.params.Project_id
const { clsiserverid } = req.query
CompileController._compileAsUser(req, function (error, userId) {
if (error) {
return next(error)
}
CompileManager.deleteAuxFiles(
projectId,
userId,
clsiserverid,
function (error) {
if (error) {
return next(error)
}
res.sendStatus(200)
}
)
})
},
// this is only used by templates, so is not called with a userId
compileAndDownloadPdf(req, res, next) {
const projectId = req.params.project_id
// pass userId as null, since templates are an "anonymous" compile
CompileManager.compile(projectId, null, {}, (err, _status, outputFiles) => {
if (err) {
logger.err(
{ err, projectId },
'something went wrong compile and downloading pdf'
)
res.sendStatus(500)
return
}
const pdf = outputFiles.find(f => f.path === 'output.pdf')
if (!pdf) {
logger.warn(
{ projectId },
'something went wrong compile and downloading pdf: no pdf'
)
res.sendStatus(500)
return
}
CompileController.proxyToClsi(
projectId,
'output-file',
pdf.url,
{},
req,
res,
next
)
})
},
getFileFromClsi(req, res, next) {
const projectId = req.params.Project_id
CompileController._downloadAsUser(req, function (error, userId) {
if (error) {
return next(error)
}
const qs = {}
const url = CompileController._getFileUrl(
projectId,
userId,
req.params.build_id,
req.params.file
)
CompileController.proxyToClsi(
projectId,
'output-file',
url,
qs,
req,
res,
next
)
})
},
getFileFromClsiWithoutUser(req, res, next) {
const submissionId = req.params.submission_id
const url = CompileController._getFileUrl(
submissionId,
null,
req.params.build_id,
req.params.file
)
const limits = {
compileGroup:
req.body?.compileGroup ||
req.query?.compileGroup ||
Settings.defaultFeatures.compileGroup,
compileBackendClass: Settings.apis.clsi.submissionBackendClass,
}
CompileController.proxyToClsiWithLimits(
submissionId,
'output-file',
url,
{},
limits,
req,
res,
next
)
},
// compute a GET file url for a given project, user (optional), build (optional) and file
_getFileUrl(projectId, userId, buildId, file) {
let url
if (userId != null && buildId != null) {
url = `/project/${projectId}/user/${userId}/build/${buildId}/output/${file}`
} else if (userId != null) {
url = `/project/${projectId}/user/${userId}/output/${file}`
} else if (buildId != null) {
url = `/project/${projectId}/build/${buildId}/output/${file}`
} else {
url = `/project/${projectId}/output/${file}`
}
return url
},
// compute a POST url for a project, user (optional) and action
_getUrl(projectId, userId, action) {
let path = `/project/${projectId}`
if (userId != null) {
path += `/user/${userId}`
}
return `${path}/${action}`
},
proxySyncPdf(req, res, next) {
const projectId = req.params.Project_id
const { page, h, v, editorId, buildId } = req.query
if (!page?.match(/^\d+$/)) {
return next(new Error('invalid page parameter'))
}
if (!h?.match(/^-?\d+\.\d+$/)) {
return next(new Error('invalid h parameter'))
}
if (!v?.match(/^-?\d+\.\d+$/)) {
return next(new Error('invalid v parameter'))
}
// whether this request is going to a per-user container
CompileController._compileAsUser(req, function (error, userId) {
if (error) {
return next(error)
}
getImageNameForProject(projectId, (error, imageName) => {
if (error) return next(error)
getSplitTestOptionsCb(req, res, (error, splitTestOptions) => {
if (error) return next(error)
const { compileFromClsiCache } = splitTestOptions
const url = CompileController._getUrl(projectId, userId, 'sync/pdf')
CompileController.proxyToClsi(
projectId,
'sync-to-pdf',
url,
{ page, h, v, imageName, editorId, buildId, compileFromClsiCache },
req,
res,
next
)
})
})
})
},
proxySyncCode(req, res, next) {
const projectId = req.params.Project_id
const { file, line, column, editorId, buildId } = req.query
if (file == null) {
return next(new Error('missing file parameter'))
}
// Check that we are dealing with a simple file path (this is not
// strictly needed because synctex uses this parameter as a label
// to look up in the synctex output, and does not open the file
// itself). Since we have valid synctex paths like foo/./bar we
// allow those by replacing /./ with /
const testPath = file.replace('/./', '/')
if (Path.resolve('/', testPath) !== `/${testPath}`) {
return next(new Error('invalid file parameter'))
}
if (!line?.match(/^\d+$/)) {
return next(new Error('invalid line parameter'))
}
if (!column?.match(/^\d+$/)) {
return next(new Error('invalid column parameter'))
}
CompileController._compileAsUser(req, function (error, userId) {
if (error) {
return next(error)
}
getImageNameForProject(projectId, (error, imageName) => {
if (error) return next(error)
getSplitTestOptionsCb(req, res, (error, splitTestOptions) => {
if (error) return next(error)
const { compileFromClsiCache } = splitTestOptions
const url = CompileController._getUrl(projectId, userId, 'sync/code')
CompileController.proxyToClsi(
projectId,
'sync-to-code',
url,
{
file,
line,
column,
imageName,
editorId,
buildId,
compileFromClsiCache,
},
req,
res,
next
)
})
})
})
},
proxyToClsi(projectId, action, url, qs, req, res, next) {
CompileManager.getProjectCompileLimits(projectId, function (error, limits) {
if (error) {
return next(error)
}
CompileController.proxyToClsiWithLimits(
projectId,
action,
url,
qs,
limits,
req,
res,
next
)
})
},
proxyToClsiWithLimits(projectId, action, url, qs, limits, req, res, next) {
_getPersistenceOptions(
req,
projectId,
limits.compileGroup,
limits.compileBackendClass,
(err, persistenceOptions) => {
if (err) {
OError.tag(err, 'error getting cookie jar for clsi request')
return next(err)
}
url = new URL(`${Settings.apis.clsi.url}${url}`)
url.search = new URLSearchParams({
...persistenceOptions.qs,
...qs,
}).toString()
const timer = new Metrics.Timer(
'proxy_to_clsi',
1,
{ path: action },
[0, 100, 1000, 2000, 5000, 10000, 15000, 20000, 30000, 45000, 60000]
)
Metrics.inc('proxy_to_clsi', 1, { path: action, status: 'start' })
fetchStreamWithResponse(url.href, {
method: req.method,
signal: AbortSignal.timeout(60 * 1000),
headers: persistenceOptions.headers,
})
.then(({ stream, response }) => {
if (req.destroyed) {
// The client has disconnected already, avoid trying to write into the broken connection.
Metrics.inc('proxy_to_clsi', 1, {
path: action,
status: 'req-aborted',
})
return
}
Metrics.inc('proxy_to_clsi', 1, {
path: action,
status: response.status,
})
for (const key of ['Content-Length', 'Content-Type']) {
if (response.headers.has(key)) {
res.setHeader(key, response.headers.get(key))
}
}
res.writeHead(response.status)
return pipeline(stream, res)
})
.then(() => {
timer.labels.status = 'success'
timer.done()
})
.catch(err => {
const reqAborted = Boolean(req.destroyed)
const status = reqAborted ? 'req-aborted-late' : 'error'
timer.labels.status = status
const duration = timer.done()
Metrics.inc('proxy_to_clsi', 1, { path: action, status })
const streamingStarted = Boolean(res.headersSent)
if (!streamingStarted) {
if (err instanceof RequestFailedError) {
res.sendStatus(err.response.status)
} else {
res.sendStatus(500)
}
}
if (
streamingStarted &&
reqAborted &&
err.code === 'ERR_STREAM_PREMATURE_CLOSE'
) {
// Ignore noisy spurious error
return
}
if (
err instanceof RequestFailedError &&
['sync-to-code', 'sync-to-pdf', 'output-file'].includes(action)
) {
// Ignore noisy error
// https://github.com/overleaf/internal/issues/15201
return
}
logger.warn(
{
err,
projectId,
url,
action,
reqAborted,
streamingStarted,
duration,
},
'CLSI proxy error'
)
})
}
)
},
wordCount(req, res, next) {
const projectId = req.params.Project_id
const file = req.query.file || false
const { clsiserverid } = req.query
CompileController._compileAsUser(req, function (error, userId) {
if (error) {
return next(error)
}
CompileManager.wordCount(
projectId,
userId,
file,
clsiserverid,
function (error, body) {
if (error) {
return next(error)
}
res.json(body)
}
)
})
},
}
function _getPersistenceOptions(
req,
projectId,
compileGroup,
compileBackendClass,
callback
) {
const { clsiserverid } = req.query
const userId = SessionManager.getLoggedInUserId(req)
if (clsiserverid && typeof clsiserverid === 'string') {
callback(null, {
qs: { clsiserverid, compileGroup, compileBackendClass },
headers: {},
})
} else {
ClsiCookieManager.getServerId(
projectId,
userId,
compileGroup,
compileBackendClass,
(err, clsiServerId) => {
if (err) return callback(err)
callback(null, {
qs: { compileGroup, compileBackendClass },
headers: clsiServerId
? {
Cookie: new Cookie({
key: Settings.clsiCookie.key,
value: clsiServerId,
}).cookieString(),
}
: {},
})
}
)
}
}