first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,10 @@
/**
* Turn an async function into an Express middleware
*/
function expressify(fn) {
return (req, res, next) => {
fn(req, res, next).catch(next)
}
}
module.exports = expressify

View File

@@ -0,0 +1,23 @@
const logger = require('@overleaf/logger')
const expressify = require('./expressify')
const { mongodb } = require('../../storage')
async function status(req, res) {
try {
await mongodb.db.command({ ping: 1 })
} catch (err) {
logger.warn({ err }, 'Lost connection with MongoDB')
res.status(500).send('Lost connection with MongoDB')
return
}
res.send('history-v1 is up')
}
function healthCheck(req, res) {
res.send('OK')
}
module.exports = {
status: expressify(status),
healthCheck,
}

View File

@@ -0,0 +1,141 @@
// @ts-check
'use strict'
const { expressify } = require('@overleaf/promise-utils')
const HTTPStatus = require('http-status')
const core = require('overleaf-editor-core')
const Change = core.Change
const Chunk = core.Chunk
const File = core.File
const FileMap = core.FileMap
const Snapshot = core.Snapshot
const TextOperation = core.TextOperation
const logger = require('@overleaf/logger')
const storage = require('../../storage')
const BatchBlobStore = storage.BatchBlobStore
const BlobStore = storage.BlobStore
const chunkStore = storage.chunkStore
const HashCheckBlobStore = storage.HashCheckBlobStore
const persistChanges = storage.persistChanges
const InvalidChangeError = storage.InvalidChangeError
const render = require('./render')
async function importSnapshot(req, res) {
const projectId = req.swagger.params.project_id.value
const rawSnapshot = req.swagger.params.snapshot.value
let snapshot
try {
snapshot = Snapshot.fromRaw(rawSnapshot)
} catch (err) {
return render.unprocessableEntity(res)
}
let historyId
try {
historyId = await chunkStore.initializeProject(projectId, snapshot)
} catch (err) {
if (err instanceof chunkStore.AlreadyInitialized) {
return render.conflict(res)
} else {
throw err
}
}
res.status(HTTPStatus.OK).json({ projectId: historyId })
}
async function importChanges(req, res, next) {
const projectId = req.swagger.params.project_id.value
const rawChanges = req.swagger.params.changes.value
const endVersion = req.swagger.params.end_version.value
const returnSnapshot = req.swagger.params.return_snapshot.value || 'none'
let changes
try {
changes = rawChanges.map(Change.fromRaw)
} catch (err) {
logger.warn({ err, projectId }, 'failed to parse changes')
return render.unprocessableEntity(res)
}
// Set limits to force us to persist all of the changes.
const farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
const limits = {
maxChanges: 0,
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
}
const blobStore = new BlobStore(projectId)
const batchBlobStore = new BatchBlobStore(blobStore)
const hashCheckBlobStore = new HashCheckBlobStore(blobStore)
async function loadFiles() {
const blobHashes = new Set()
for (const change of changes) {
// This populates the set blobHashes with blobs referred to in the change
change.findBlobHashes(blobHashes)
}
await batchBlobStore.preload(Array.from(blobHashes))
for (const change of changes) {
await change.loadFiles('lazy', batchBlobStore)
}
}
async function buildResultSnapshot(resultChunk) {
const chunk = resultChunk || (await chunkStore.loadLatest(projectId))
const snapshot = chunk.getSnapshot()
snapshot.applyAll(chunk.getChanges())
const rawSnapshot = await snapshot.store(hashCheckBlobStore)
return rawSnapshot
}
await loadFiles()
let result
try {
result = await persistChanges(projectId, changes, limits, endVersion)
} catch (err) {
if (
err instanceof Chunk.ConflictingEndVersion ||
err instanceof TextOperation.UnprocessableError ||
err instanceof File.NotEditableError ||
err instanceof FileMap.PathnameError ||
err instanceof Snapshot.EditMissingFileError ||
err instanceof chunkStore.ChunkVersionConflictError ||
err instanceof InvalidChangeError
) {
// If we failed to apply operations, that's probably because they were
// invalid.
logger.warn({ err, projectId, endVersion }, 'changes rejected by history')
return render.unprocessableEntity(res)
} else if (err instanceof Chunk.NotFoundError) {
logger.warn({ err, projectId }, 'chunk not found')
return render.notFound(res)
} else {
throw err
}
}
if (returnSnapshot === 'none') {
res.status(HTTPStatus.CREATED).json({})
} else {
const rawSnapshot = await buildResultSnapshot(result && result.currentChunk)
res.status(HTTPStatus.CREATED).json(rawSnapshot)
}
}
exports.importSnapshot = expressify(importSnapshot)
exports.importChanges = expressify(importChanges)

View File

@@ -0,0 +1,388 @@
'use strict'
const _ = require('lodash')
const Path = require('node:path')
const Stream = require('node:stream')
const HTTPStatus = require('http-status')
const fs = require('node:fs')
const { promisify } = require('node:util')
const config = require('config')
const OError = require('@overleaf/o-error')
const logger = require('@overleaf/logger')
const { Chunk, ChunkResponse, Blob } = require('overleaf-editor-core')
const {
BlobStore,
blobHash,
chunkStore,
HashCheckBlobStore,
ProjectArchive,
zipStore,
chunkBuffer,
} = require('../../storage')
const render = require('./render')
const expressify = require('./expressify')
const withTmpDir = require('./with_tmp_dir')
const StreamSizeLimit = require('./stream_size_limit')
const pipeline = promisify(Stream.pipeline)
async function initializeProject(req, res, next) {
let projectId = req.swagger.params.body.value.projectId
try {
projectId = await chunkStore.initializeProject(projectId)
res.status(HTTPStatus.OK).json({ projectId })
} catch (err) {
if (err instanceof chunkStore.AlreadyInitialized) {
render.conflict(res)
} else {
throw err
}
}
}
async function getLatestContent(req, res, next) {
const projectId = req.swagger.params.project_id.value
const blobStore = new BlobStore(projectId)
const chunk = await chunkBuffer.loadLatest(projectId)
const snapshot = chunk.getSnapshot()
snapshot.applyAll(chunk.getChanges())
await snapshot.loadFiles('eager', blobStore)
res.json(snapshot.toRaw())
}
async function getContentAtVersion(req, res, next) {
const projectId = req.swagger.params.project_id.value
const version = req.swagger.params.version.value
const blobStore = new BlobStore(projectId)
const snapshot = await getSnapshotAtVersion(projectId, version)
await snapshot.loadFiles('eager', blobStore)
res.json(snapshot.toRaw())
}
async function getLatestHashedContent(req, res, next) {
const projectId = req.swagger.params.project_id.value
const blobStore = new HashCheckBlobStore(new BlobStore(projectId))
const chunk = await chunkBuffer.loadLatest(projectId)
const snapshot = chunk.getSnapshot()
snapshot.applyAll(chunk.getChanges())
await snapshot.loadFiles('eager', blobStore)
const rawSnapshot = await snapshot.store(blobStore)
res.json(rawSnapshot)
}
async function getLatestHistory(req, res, next) {
const projectId = req.swagger.params.project_id.value
try {
const chunk = await chunkBuffer.loadLatest(projectId)
const chunkResponse = new ChunkResponse(chunk)
res.json(chunkResponse.toRaw())
} catch (err) {
if (err instanceof Chunk.NotFoundError) {
render.notFound(res)
} else {
throw err
}
}
}
async function getLatestHistoryRaw(req, res, next) {
const projectId = req.swagger.params.project_id.value
const readOnly = req.swagger.params.readOnly.value
try {
const { startVersion, endVersion, endTimestamp } =
await chunkStore.loadLatestRaw(projectId, { readOnly })
res.json({
startVersion,
endVersion,
endTimestamp,
})
} catch (err) {
if (err instanceof Chunk.NotFoundError) {
render.notFound(res)
} else {
throw err
}
}
}
async function getHistory(req, res, next) {
const projectId = req.swagger.params.project_id.value
const version = req.swagger.params.version.value
try {
const chunk = await chunkStore.loadAtVersion(projectId, version)
const chunkResponse = new ChunkResponse(chunk)
res.json(chunkResponse.toRaw())
} catch (err) {
if (err instanceof Chunk.NotFoundError) {
render.notFound(res)
} else {
throw err
}
}
}
async function getHistoryBefore(req, res, next) {
const projectId = req.swagger.params.project_id.value
const timestamp = req.swagger.params.timestamp.value
try {
const chunk = await chunkStore.loadAtTimestamp(projectId, timestamp)
const chunkResponse = new ChunkResponse(chunk)
res.json(chunkResponse.toRaw())
} catch (err) {
if (err instanceof Chunk.NotFoundError) {
render.notFound(res)
} else {
throw err
}
}
}
/**
* Get all changes since the beginning of history or since a given version
*/
async function getChanges(req, res, next) {
const projectId = req.swagger.params.project_id.value
const since = req.swagger.params.since.value ?? 0
if (since < 0) {
// Negative values would cause an infinite loop
return res.status(400).json({
error: `Version out of bounds: ${since}`,
})
}
const changes = []
let chunk = await chunkBuffer.loadLatest(projectId)
if (since > chunk.getEndVersion()) {
return res.status(400).json({
error: `Version out of bounds: ${since}`,
})
}
// Fetch all chunks that come after the chunk that contains the start version
while (chunk.getStartVersion() > since) {
const changesInChunk = chunk.getChanges()
changes.unshift(...changesInChunk)
chunk = await chunkStore.loadAtVersion(projectId, chunk.getStartVersion())
}
// Extract the relevant changes from the chunk that contains the start version
const changesInChunk = chunk
.getChanges()
.slice(since - chunk.getStartVersion())
changes.unshift(...changesInChunk)
res.json(changes.map(change => change.toRaw()))
}
async function getZip(req, res, next) {
const projectId = req.swagger.params.project_id.value
const version = req.swagger.params.version.value
const blobStore = new BlobStore(projectId)
let snapshot
try {
snapshot = await getSnapshotAtVersion(projectId, version)
} catch (err) {
if (err instanceof Chunk.NotFoundError) {
return render.notFound(res)
} else {
throw err
}
}
await withTmpDir('get-zip-', async tmpDir => {
const tmpFilename = Path.join(tmpDir, 'project.zip')
const archive = new ProjectArchive(snapshot)
await archive.writeZip(blobStore, tmpFilename)
res.set('Content-Type', 'application/octet-stream')
res.set('Content-Disposition', 'attachment; filename=project.zip')
const stream = fs.createReadStream(tmpFilename)
await pipeline(stream, res)
})
}
async function createZip(req, res, next) {
const projectId = req.swagger.params.project_id.value
const version = req.swagger.params.version.value
try {
const snapshot = await getSnapshotAtVersion(projectId, version)
const zipUrl = await zipStore.getSignedUrl(projectId, version)
// Do not await this; run it in the background.
zipStore.storeZip(projectId, version, snapshot).catch(err => {
logger.error({ err, projectId, version }, 'createZip: storeZip failed')
})
res.status(HTTPStatus.OK).json({ zipUrl })
} catch (error) {
if (error instanceof Chunk.NotFoundError) {
render.notFound(res)
} else {
next(error)
}
}
}
async function deleteProject(req, res, next) {
const projectId = req.swagger.params.project_id.value
const blobStore = new BlobStore(projectId)
await Promise.all([
chunkStore.deleteProjectChunks(projectId),
blobStore.deleteBlobs(),
])
res.status(HTTPStatus.NO_CONTENT).send()
}
async function createProjectBlob(req, res, next) {
const projectId = req.swagger.params.project_id.value
const expectedHash = req.swagger.params.hash.value
const maxUploadSize = parseInt(config.get('maxFileUploadSize'), 10)
await withTmpDir('blob-', async tmpDir => {
const tmpPath = Path.join(tmpDir, 'content')
const sizeLimit = new StreamSizeLimit(maxUploadSize)
await pipeline(req, sizeLimit, fs.createWriteStream(tmpPath))
if (sizeLimit.sizeLimitExceeded) {
return render.requestEntityTooLarge(res)
}
const hash = await blobHash.fromFile(tmpPath)
if (hash !== expectedHash) {
logger.debug({ hash, expectedHash }, 'Hash mismatch')
return render.conflict(res, 'File hash mismatch')
}
const blobStore = new BlobStore(projectId)
const newBlob = await blobStore.putFile(tmpPath)
try {
const { backupBlob } = await import('../../storage/lib/backupBlob.mjs')
await backupBlob(projectId, newBlob, tmpPath)
} catch (error) {
logger.warn({ error, projectId, hash }, 'Failed to backup blob')
}
res.status(HTTPStatus.CREATED).end()
})
}
async function headProjectBlob(req, res) {
const projectId = req.swagger.params.project_id.value
const hash = req.swagger.params.hash.value
const blobStore = new BlobStore(projectId)
const blob = await blobStore.getBlob(hash)
if (blob) {
res.set('Content-Length', blob.getByteLength())
res.status(200).end()
} else {
res.status(404).end()
}
}
// Support simple, singular ranges starting from zero only, up-to 2MB = 2_000_000, 7 digits
const RANGE_HEADER = /^bytes=0-(\d{1,7})$/
/**
* @param {string} header
* @return {{}|{start: number, end: number}}
* @private
*/
function _getRangeOpts(header) {
if (!header) return {}
const match = header.match(RANGE_HEADER)
if (match) {
const end = parseInt(match[1], 10)
return { start: 0, end }
}
return {}
}
async function getProjectBlob(req, res, next) {
const projectId = req.swagger.params.project_id.value
const hash = req.swagger.params.hash.value
const opts = _getRangeOpts(req.swagger.params.range.value || '')
const blobStore = new BlobStore(projectId)
logger.debug({ projectId, hash }, 'getProjectBlob started')
try {
let stream
try {
stream = await blobStore.getStream(hash, opts)
} catch (err) {
if (err instanceof Blob.NotFoundError) {
logger.warn({ projectId, hash }, 'Blob not found')
return res.status(404).end()
} else {
throw err
}
}
res.set('Content-Type', 'application/octet-stream')
try {
await pipeline(stream, res)
} catch (err) {
if (err?.code === 'ERR_STREAM_PREMATURE_CLOSE') {
res.end()
} else {
throw OError.tag(err, 'error transferring stream', { projectId, hash })
}
}
} finally {
logger.debug({ projectId, hash }, 'getProjectBlob finished')
}
}
async function copyProjectBlob(req, res, next) {
const sourceProjectId = req.swagger.params.copyFrom.value
const targetProjectId = req.swagger.params.project_id.value
const blobHash = req.swagger.params.hash.value
// Check that blob exists in source project
const sourceBlobStore = new BlobStore(sourceProjectId)
const targetBlobStore = new BlobStore(targetProjectId)
const [sourceBlob, targetBlob] = await Promise.all([
sourceBlobStore.getBlob(blobHash),
targetBlobStore.getBlob(blobHash),
])
if (!sourceBlob) {
return render.notFound(res)
}
// Exit early if the blob exists in the target project.
// This will also catch global blobs, which always exist.
if (targetBlob) {
return res.status(HTTPStatus.NO_CONTENT).end()
}
// Otherwise, copy blob from source project to target project
await sourceBlobStore.copyBlob(sourceBlob, targetProjectId)
res.status(HTTPStatus.CREATED).end()
}
async function getSnapshotAtVersion(projectId, version) {
const chunk = await chunkStore.loadAtVersion(projectId, version)
const snapshot = chunk.getSnapshot()
const changes = _.dropRight(
chunk.getChanges(),
chunk.getEndVersion() - version
)
snapshot.applyAll(changes)
return snapshot
}
module.exports = {
initializeProject: expressify(initializeProject),
getLatestContent: expressify(getLatestContent),
getContentAtVersion: expressify(getContentAtVersion),
getLatestHashedContent: expressify(getLatestHashedContent),
getLatestPersistedHistory: expressify(getLatestHistory),
getLatestHistory: expressify(getLatestHistory),
getLatestHistoryRaw: expressify(getLatestHistoryRaw),
getHistory: expressify(getHistory),
getHistoryBefore: expressify(getHistoryBefore),
getChanges: expressify(getChanges),
getZip: expressify(getZip),
createZip: expressify(createZip),
deleteProject: expressify(deleteProject),
createProjectBlob: expressify(createProjectBlob),
getProjectBlob: expressify(getProjectBlob),
headProjectBlob: expressify(headProjectBlob),
copyProjectBlob: expressify(copyProjectBlob),
}

View File

@@ -0,0 +1,17 @@
'use strict'
const HTTPStatus = require('http-status')
function makeErrorRenderer(status) {
return (res, message) => {
res.status(status).json({ message: message || HTTPStatus[status] })
}
}
module.exports = {
badRequest: makeErrorRenderer(HTTPStatus.BAD_REQUEST),
notFound: makeErrorRenderer(HTTPStatus.NOT_FOUND),
unprocessableEntity: makeErrorRenderer(HTTPStatus.UNPROCESSABLE_ENTITY),
conflict: makeErrorRenderer(HTTPStatus.CONFLICT),
requestEntityTooLarge: makeErrorRenderer(HTTPStatus.REQUEST_ENTITY_TOO_LARGE),
}

View File

@@ -0,0 +1,26 @@
const stream = require('node:stream')
/**
* Transform stream that stops passing bytes through after some threshold has
* been reached.
*/
class StreamSizeLimit extends stream.Transform {
constructor(maxSize) {
super()
this.maxSize = maxSize
this.accumulatedSize = 0
this.sizeLimitExceeded = false
}
_transform(chunk, encoding, cb) {
this.accumulatedSize += chunk.length
if (this.accumulatedSize > this.maxSize) {
this.sizeLimitExceeded = true
} else {
this.push(chunk)
}
cb()
}
}
module.exports = StreamSizeLimit

View File

@@ -0,0 +1,27 @@
const fs = require('node:fs')
const fsExtra = require('fs-extra')
const logger = require('@overleaf/logger')
const os = require('node:os')
const path = require('node:path')
/**
* Create a temporary directory before executing a function and cleaning up
* after.
*
* @param {string} prefix - prefix for the temporary directory name
* @param {Function} fn - async function to call
*/
async function withTmpDir(prefix, fn) {
const tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), prefix))
try {
await fn(tmpDir)
} finally {
fsExtra.remove(tmpDir).catch(err => {
if (err.code !== 'ENOENT') {
logger.error({ err }, 'failed to delete temporary file')
}
})
}
}
module.exports = withTmpDir