first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,227 @@
const { callbackify } = require('node:util')
const MongoManager = require('./MongoManager').promises
const Errors = require('./Errors')
const logger = require('@overleaf/logger')
const Settings = require('@overleaf/settings')
const crypto = require('node:crypto')
const { ReadableString } = require('@overleaf/stream-utils')
const RangeManager = require('./RangeManager')
const PersistorManager = require('./PersistorManager')
const pMap = require('p-map')
const { streamToBuffer } = require('./StreamToBuffer').promises
const { BSON } = require('mongodb-legacy')
const PARALLEL_JOBS = Settings.parallelArchiveJobs
const UN_ARCHIVE_BATCH_SIZE = Settings.unArchiveBatchSize
module.exports = {
archiveAllDocs: callbackify(archiveAllDocs),
archiveDoc: callbackify(archiveDoc),
unArchiveAllDocs: callbackify(unArchiveAllDocs),
unarchiveDoc: callbackify(unarchiveDoc),
destroyProject: callbackify(destroyProject),
getDoc: callbackify(getDoc),
promises: {
archiveAllDocs,
archiveDoc,
unArchiveAllDocs,
unarchiveDoc,
destroyProject,
getDoc,
},
}
async function archiveAllDocs(projectId) {
if (!_isArchivingEnabled()) {
return
}
const docIds = await MongoManager.getNonArchivedProjectDocIds(projectId)
await pMap(docIds, docId => archiveDoc(projectId, docId), {
concurrency: PARALLEL_JOBS,
})
}
async function archiveDoc(projectId, docId) {
if (!_isArchivingEnabled()) {
return
}
const doc = await MongoManager.getDocForArchiving(projectId, docId)
if (!doc) {
// The doc wasn't found, it was already archived, or the lock couldn't be
// acquired. Since we don't know which it is, silently return.
return
}
logger.debug({ projectId, docId: doc._id }, 'sending doc to persistor')
const key = `${projectId}/${doc._id}`
if (doc.lines == null) {
throw new Error('doc has no lines')
}
// warn about any oversized docs already in mongo
const linesSize = BSON.calculateObjectSize(doc.lines || {})
const rangesSize = BSON.calculateObjectSize(doc.ranges || {})
if (
linesSize > Settings.max_doc_length ||
rangesSize > Settings.max_doc_length
) {
logger.warn(
{ projectId, docId: doc._id, linesSize, rangesSize },
'large doc found when archiving project'
)
}
const json = JSON.stringify({
lines: doc.lines,
ranges: doc.ranges,
rev: doc.rev,
schema_v: 1,
})
// this should never happen, but protects against memory-corruption errors that
// have happened in the past
if (json.indexOf('\u0000') > -1) {
const error = new Error('null bytes detected')
logger.err({ err: error, doc }, error.message)
throw error
}
const md5 = crypto.createHash('md5').update(json).digest('hex')
const stream = new ReadableString(json)
await PersistorManager.sendStream(Settings.docstore.bucket, key, stream, {
sourceMd5: md5,
})
await MongoManager.markDocAsArchived(projectId, docId, doc.rev)
}
async function unArchiveAllDocs(projectId) {
if (!_isArchivingEnabled()) {
return
}
while (true) {
let docs
if (Settings.docstore.keepSoftDeletedDocsArchived) {
docs = await MongoManager.getNonDeletedArchivedProjectDocs(
projectId,
UN_ARCHIVE_BATCH_SIZE
)
} else {
docs = await MongoManager.getArchivedProjectDocs(
projectId,
UN_ARCHIVE_BATCH_SIZE
)
}
if (!docs || docs.length === 0) {
break
}
await pMap(docs, doc => unarchiveDoc(projectId, doc._id), {
concurrency: PARALLEL_JOBS,
})
}
}
// get the doc from the PersistorManager without storing it in mongo
async function getDoc(projectId, docId) {
const key = `${projectId}/${docId}`
const sourceMd5 = await PersistorManager.getObjectMd5Hash(
Settings.docstore.bucket,
key
)
const stream = await PersistorManager.getObjectStream(
Settings.docstore.bucket,
key
)
stream.resume()
const buffer = await streamToBuffer(projectId, docId, stream)
const md5 = crypto.createHash('md5').update(buffer).digest('hex')
if (sourceMd5 !== md5) {
throw new Errors.Md5MismatchError('md5 mismatch when downloading doc', {
key,
sourceMd5,
md5,
})
}
return _deserializeArchivedDoc(buffer)
}
// get the doc and unarchive it to mongo
async function unarchiveDoc(projectId, docId) {
logger.debug({ projectId, docId }, 'getting doc from persistor')
const mongoDoc = await MongoManager.findDoc(projectId, docId, {
inS3: 1,
rev: 1,
})
if (!mongoDoc.inS3) {
// The doc is already unarchived
return
}
if (!_isArchivingEnabled()) {
throw new Error(
'found archived doc, but archiving backend is not configured'
)
}
const archivedDoc = await getDoc(projectId, docId)
if (archivedDoc.rev == null) {
// Older archived docs didn't have a rev. Assume that the rev of the
// archived doc is the rev that was stored in Mongo when we retrieved it
// earlier.
archivedDoc.rev = mongoDoc.rev
}
await MongoManager.restoreArchivedDoc(projectId, docId, archivedDoc)
}
async function destroyProject(projectId) {
const tasks = [MongoManager.destroyProject(projectId)]
if (_isArchivingEnabled()) {
tasks.push(
PersistorManager.deleteDirectory(Settings.docstore.bucket, projectId)
)
}
await Promise.all(tasks)
}
function _deserializeArchivedDoc(buffer) {
const doc = JSON.parse(buffer)
const result = {}
if (doc.schema_v === 1 && doc.lines != null) {
result.lines = doc.lines
if (doc.ranges != null) {
result.ranges = RangeManager.jsonRangesToMongo(doc.ranges)
}
} else if (Array.isArray(doc)) {
result.lines = doc
} else {
throw new Error("I don't understand the doc format in s3")
}
if (doc.rev != null) {
result.rev = doc.rev
}
return result
}
function _isArchivingEnabled() {
const backend = Settings.docstore.backend
if (!backend) {
return false
}
// The default backend is S3. If another backend is configured or the S3
// backend itself is correctly configured, then archiving is enabled.
if (backend === 's3' && Settings.docstore.s3 == null) {
return false
}
return true
}

View File

@@ -0,0 +1,297 @@
const MongoManager = require('./MongoManager')
const Errors = require('./Errors')
const logger = require('@overleaf/logger')
const _ = require('lodash')
const DocArchive = require('./DocArchiveManager')
const RangeManager = require('./RangeManager')
const Settings = require('@overleaf/settings')
const { callbackifyAll } = require('@overleaf/promise-utils')
const { setTimeout } = require('node:timers/promises')
/**
* @import { Document } from 'mongodb'
* @import { WithId } from 'mongodb'
*/
const DocManager = {
/**
* @param {string} projectId
* @param {string} docId
* @param {{inS3: boolean}} filter
* @returns {Promise<WithId<Document>>}
* @private
*/
async _getDoc(projectId, docId, filter) {
if (filter == null) {
filter = {}
}
if (filter.inS3 !== true) {
throw new Error('must include inS3 when getting doc')
}
const doc = await MongoManager.promises.findDoc(projectId, docId, filter)
if (doc == null) {
throw new Errors.NotFoundError(
`No such doc: ${docId} in project ${projectId}`
)
}
if (doc.inS3) {
await DocArchive.promises.unarchiveDoc(projectId, docId)
return await DocManager._getDoc(projectId, docId, filter)
}
return doc
},
async isDocDeleted(projectId, docId) {
const doc = await MongoManager.promises.findDoc(projectId, docId, {
deleted: true,
})
if (!doc) {
throw new Errors.NotFoundError(
`No such project/doc: ${projectId}/${docId}`
)
}
// `doc.deleted` is `undefined` for non deleted docs
return Boolean(doc.deleted)
},
async getFullDoc(projectId, docId) {
const doc = await DocManager._getDoc(projectId, docId, {
lines: true,
rev: true,
deleted: true,
version: true,
ranges: true,
inS3: true,
})
return doc
},
// returns the doc without any version information
async _peekRawDoc(projectId, docId) {
const doc = await MongoManager.promises.findDoc(projectId, docId, {
lines: true,
rev: true,
deleted: true,
version: true,
ranges: true,
inS3: true,
})
if (doc == null) {
throw new Errors.NotFoundError(
`No such doc: ${docId} in project ${projectId}`
)
}
if (doc.inS3) {
// skip the unarchiving to mongo when getting a doc
const archivedDoc = await DocArchive.promises.getDoc(projectId, docId)
Object.assign(doc, archivedDoc)
}
return doc
},
// get the doc from mongo if possible, or from the persistent store otherwise,
// without unarchiving it (avoids unnecessary writes to mongo)
async peekDoc(projectId, docId) {
const doc = await DocManager._peekRawDoc(projectId, docId)
await MongoManager.promises.checkRevUnchanged(doc)
return doc
},
async getDocLines(projectId, docId) {
const doc = await DocManager._getDoc(projectId, docId, {
lines: true,
inS3: true,
})
return doc
},
async getAllDeletedDocs(projectId, filter) {
return await MongoManager.promises.getProjectsDeletedDocs(projectId, filter)
},
async getAllNonDeletedDocs(projectId, filter) {
await DocArchive.promises.unArchiveAllDocs(projectId)
const docs = await MongoManager.promises.getProjectsDocs(
projectId,
{ include_deleted: false },
filter
)
if (docs == null) {
throw new Errors.NotFoundError(`No docs for project ${projectId}`)
}
return docs
},
async projectHasRanges(projectId) {
const docs = await MongoManager.promises.getProjectsDocs(
projectId,
{},
{ _id: 1 }
)
const docIds = docs.map(doc => doc._id)
for (const docId of docIds) {
const doc = await DocManager.peekDoc(projectId, docId)
if (
(doc.ranges?.comments != null && doc.ranges.comments.length > 0) ||
(doc.ranges?.changes != null && doc.ranges.changes.length > 0)
) {
return true
}
}
return false
},
async updateDoc(projectId, docId, lines, version, ranges) {
const MAX_ATTEMPTS = 2
for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
try {
const { modified, rev } = await DocManager._tryUpdateDoc(
projectId,
docId,
lines,
version,
ranges
)
return { modified, rev }
} catch (err) {
if (err instanceof Errors.DocRevValueError && attempt < MAX_ATTEMPTS) {
// Another updateDoc call was racing with ours.
// Retry once in a bit.
logger.warn(
{ projectId, docId, err },
'detected concurrent updateDoc call'
)
await setTimeout(100 + Math.random() * 100)
continue
} else {
throw err
}
}
}
},
async _tryUpdateDoc(projectId, docId, lines, version, ranges) {
if (lines == null || version == null || ranges == null) {
throw new Error('no lines, version or ranges provided')
}
let doc
try {
doc = await DocManager._getDoc(projectId, docId, {
version: true,
rev: true,
lines: true,
ranges: true,
inS3: true,
})
} catch (err) {
if (err instanceof Errors.NotFoundError) {
doc = null
} else {
throw err
}
}
ranges = RangeManager.jsonRangesToMongo(ranges)
let updateLines, updateRanges, updateVersion
if (doc == null) {
// If the document doesn't exist, we'll make sure to create/update all parts of it.
updateLines = true
updateVersion = true
updateRanges = true
} else {
if (doc.version > version) {
// Reject update when the version was decremented.
// Potential reasons: racing flush, broken history.
throw new Errors.DocVersionDecrementedError('rejecting stale update', {
updateVersion: version,
flushedVersion: doc.version,
})
}
updateLines = !_.isEqual(doc.lines, lines)
updateVersion = doc.version !== version
updateRanges = RangeManager.shouldUpdateRanges(doc.ranges, ranges)
}
let modified = false
let rev = doc?.rev || 0
if (updateLines || updateRanges || updateVersion) {
const update = {}
if (updateLines) {
update.lines = lines
}
if (updateRanges) {
update.ranges = ranges
}
if (updateVersion) {
update.version = version
}
logger.debug(
{ projectId, docId, oldVersion: doc?.version, newVersion: version },
'updating doc'
)
if (updateLines || updateRanges) {
rev += 1 // rev will be incremented in mongo by MongoManager.upsertIntoDocCollection
}
modified = true
await MongoManager.promises.upsertIntoDocCollection(
projectId,
docId,
doc?.rev,
update
)
} else {
logger.debug({ projectId, docId }, 'doc has not changed - not updating')
}
return { modified, rev }
},
async patchDoc(projectId, docId, meta) {
const projection = { _id: 1, deleted: true }
const doc = await MongoManager.promises.findDoc(
projectId,
docId,
projection
)
if (!doc) {
throw new Errors.NotFoundError(
`No such project/doc to delete: ${projectId}/${docId}`
)
}
if (meta.deleted && Settings.docstore.archiveOnSoftDelete) {
// The user will not read this doc anytime soon. Flush it out of mongo.
DocArchive.promises.archiveDoc(projectId, docId).catch(err => {
logger.warn(
{ projectId, docId, err },
'archiving a single doc in the background failed'
)
})
}
await MongoManager.promises.patchDoc(projectId, docId, meta)
},
}
module.exports = {
...callbackifyAll(DocManager, {
multiResult: {
updateDoc: ['modified', 'rev'],
},
}),
promises: DocManager,
}

View File

@@ -0,0 +1,19 @@
// import Errors from object-persistor to pass instanceof checks
const OError = require('@overleaf/o-error')
const { Errors } = require('@overleaf/object-persistor')
class Md5MismatchError extends OError {}
class DocModifiedError extends OError {}
class DocRevValueError extends OError {}
class DocVersionDecrementedError extends OError {}
module.exports = {
Md5MismatchError,
DocModifiedError,
DocRevValueError,
DocVersionDecrementedError,
...Errors,
}

View File

@@ -0,0 +1,67 @@
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const { db, ObjectId } = require('./mongodb')
const request = require('request')
const async = require('async')
const _ = require('lodash')
const crypto = require('node:crypto')
const settings = require('@overleaf/settings')
const { port } = settings.internal.docstore
const logger = require('@overleaf/logger')
module.exports = {
check(callback) {
const docId = new ObjectId()
const projectId = new ObjectId(settings.docstore.healthCheck.project_id)
const url = `http://127.0.0.1:${port}/project/${projectId}/doc/${docId}`
const lines = [
'smoke test - delete me',
`${crypto.randomBytes(32).toString('hex')}`,
]
const getOpts = () => ({
url,
timeout: 3000,
})
logger.debug({ lines, url, docId, projectId }, 'running health check')
const jobs = [
function (cb) {
const opts = getOpts()
opts.json = { lines, version: 42, ranges: {} }
return request.post(opts, cb)
},
function (cb) {
const opts = getOpts()
opts.json = true
return request.get(opts, function (err, res, body) {
if (err != null) {
logger.err({ err }, 'docstore returned a error in health check get')
return cb(err)
} else if (res == null) {
return cb(new Error('no response from docstore with get check'))
} else if ((res != null ? res.statusCode : undefined) !== 200) {
return cb(new Error(`status code not 200, its ${res.statusCode}`))
} else if (
_.isEqual(body != null ? body.lines : undefined, lines) &&
(body != null ? body._id : undefined) === docId.toString()
) {
return cb()
} else {
return cb(
new Error(
`health check lines not equal ${body.lines} != ${lines}`
)
)
}
})
},
cb => db.docs.deleteOne({ _id: docId, project_id: projectId }, cb),
]
return async.series(jobs, callback)
},
}

View File

@@ -0,0 +1,319 @@
const DocManager = require('./DocManager')
const logger = require('@overleaf/logger')
const DocArchive = require('./DocArchiveManager')
const HealthChecker = require('./HealthChecker')
const Errors = require('./Errors')
const Settings = require('@overleaf/settings')
function getDoc(req, res, next) {
const { doc_id: docId, project_id: projectId } = req.params
const includeDeleted = req.query.include_deleted === 'true'
logger.debug({ projectId, docId }, 'getting doc')
DocManager.getFullDoc(projectId, docId, function (error, doc) {
if (error) {
return next(error)
}
logger.debug({ docId, projectId }, 'got doc')
if (doc == null) {
res.sendStatus(404)
} else if (doc.deleted && !includeDeleted) {
res.sendStatus(404)
} else {
res.json(_buildDocView(doc))
}
})
}
function peekDoc(req, res, next) {
const { doc_id: docId, project_id: projectId } = req.params
logger.debug({ projectId, docId }, 'peeking doc')
DocManager.peekDoc(projectId, docId, function (error, doc) {
if (error) {
return next(error)
}
if (doc == null) {
res.sendStatus(404)
} else {
res.setHeader('x-doc-status', doc.inS3 ? 'archived' : 'active')
res.json(_buildDocView(doc))
}
})
}
function isDocDeleted(req, res, next) {
const { doc_id: docId, project_id: projectId } = req.params
DocManager.isDocDeleted(projectId, docId, function (error, deleted) {
if (error) {
return next(error)
}
res.json({ deleted })
})
}
function getRawDoc(req, res, next) {
const { doc_id: docId, project_id: projectId } = req.params
logger.debug({ projectId, docId }, 'getting raw doc')
DocManager.getDocLines(projectId, docId, function (error, doc) {
if (error) {
return next(error)
}
if (doc == null) {
res.sendStatus(404)
} else {
res.setHeader('content-type', 'text/plain')
res.send(_buildRawDocView(doc))
}
})
}
function getAllDocs(req, res, next) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'getting all docs')
DocManager.getAllNonDeletedDocs(
projectId,
{ lines: true, rev: true },
function (error, docs) {
if (docs == null) {
docs = []
}
if (error) {
return next(error)
}
const docViews = _buildDocsArrayView(projectId, docs)
for (const docView of docViews) {
if (!docView.lines) {
logger.warn({ projectId, docId: docView._id }, 'missing doc lines')
docView.lines = []
}
}
res.json(docViews)
}
)
}
function getAllDeletedDocs(req, res, next) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'getting all deleted docs')
DocManager.getAllDeletedDocs(
projectId,
{ name: true, deletedAt: true },
function (error, docs) {
if (error) {
return next(error)
}
res.json(
docs.map(doc => ({
_id: doc._id.toString(),
name: doc.name,
deletedAt: doc.deletedAt,
}))
)
}
)
}
function getAllRanges(req, res, next) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'getting all ranges')
DocManager.getAllNonDeletedDocs(
projectId,
{ ranges: true },
function (error, docs) {
if (docs == null) {
docs = []
}
if (error) {
return next(error)
}
res.json(_buildDocsArrayView(projectId, docs))
}
)
}
function projectHasRanges(req, res, next) {
const { project_id: projectId } = req.params
DocManager.projectHasRanges(projectId, (err, projectHasRanges) => {
if (err) {
return next(err)
}
res.json({ projectHasRanges })
})
}
function updateDoc(req, res, next) {
const { doc_id: docId, project_id: projectId } = req.params
const lines = req.body?.lines
const version = req.body?.version
const ranges = req.body?.ranges
if (lines == null || !(lines instanceof Array)) {
logger.error({ projectId, docId }, 'no doc lines provided')
res.sendStatus(400) // Bad Request
return
}
if (version == null || typeof version !== 'number') {
logger.error({ projectId, docId }, 'no doc version provided')
res.sendStatus(400) // Bad Request
return
}
if (ranges == null) {
logger.error({ projectId, docId }, 'no doc ranges provided')
res.sendStatus(400) // Bad Request
return
}
const bodyLength = lines.reduce((len, line) => line.length + len, 0)
if (bodyLength > Settings.max_doc_length) {
logger.error({ projectId, docId, bodyLength }, 'document body too large')
res.status(413).send('document body too large')
return
}
logger.debug({ projectId, docId }, 'got http request to update doc')
DocManager.updateDoc(
projectId,
docId,
lines,
version,
ranges,
function (error, modified, rev) {
if (error) {
return next(error)
}
res.json({
modified,
rev,
})
}
)
}
function patchDoc(req, res, next) {
const { doc_id: docId, project_id: projectId } = req.params
logger.debug({ projectId, docId }, 'patching doc')
const allowedFields = ['deleted', 'deletedAt', 'name']
const meta = {}
Object.entries(req.body).forEach(([field, value]) => {
if (allowedFields.includes(field)) {
meta[field] = value
} else {
logger.fatal({ field }, 'joi validation for pathDoc is broken')
}
})
DocManager.patchDoc(projectId, docId, meta, function (error) {
if (error) {
return next(error)
}
res.sendStatus(204)
})
}
function _buildDocView(doc) {
const docView = { _id: doc._id?.toString() }
for (const attribute of ['lines', 'rev', 'version', 'ranges', 'deleted']) {
if (doc[attribute] != null) {
docView[attribute] = doc[attribute]
}
}
return docView
}
function _buildRawDocView(doc) {
return (doc?.lines ?? []).join('\n')
}
function _buildDocsArrayView(projectId, docs) {
const docViews = []
for (const doc of docs) {
if (doc != null) {
// There can end up being null docs for some reason :( (probably a race condition)
docViews.push(_buildDocView(doc))
} else {
logger.error(
{ err: new Error('null doc'), projectId },
'encountered null doc'
)
}
}
return docViews
}
function archiveAllDocs(req, res, next) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'archiving all docs')
DocArchive.archiveAllDocs(projectId, function (error) {
if (error) {
return next(error)
}
res.sendStatus(204)
})
}
function archiveDoc(req, res, next) {
const { doc_id: docId, project_id: projectId } = req.params
logger.debug({ projectId, docId }, 'archiving a doc')
DocArchive.archiveDoc(projectId, docId, function (error) {
if (error) {
return next(error)
}
res.sendStatus(204)
})
}
function unArchiveAllDocs(req, res, next) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'unarchiving all docs')
DocArchive.unArchiveAllDocs(projectId, function (err) {
if (err) {
if (err instanceof Errors.DocRevValueError) {
logger.warn({ err }, 'Failed to unarchive doc')
return res.sendStatus(409)
}
return next(err)
}
res.sendStatus(200)
})
}
function destroyProject(req, res, next) {
const { project_id: projectId } = req.params
logger.debug({ projectId }, 'destroying all docs')
DocArchive.destroyProject(projectId, function (error) {
if (error) {
return next(error)
}
res.sendStatus(204)
})
}
function healthCheck(req, res) {
HealthChecker.check(function (err) {
if (err) {
logger.err({ err }, 'error performing health check')
res.sendStatus(500)
} else {
res.sendStatus(200)
}
})
}
module.exports = {
getDoc,
peekDoc,
isDocDeleted,
getRawDoc,
getAllDocs,
getAllDeletedDocs,
getAllRanges,
projectHasRanges,
updateDoc,
patchDoc,
archiveAllDocs,
archiveDoc,
unArchiveAllDocs,
destroyProject,
healthCheck,
}

View File

@@ -0,0 +1,274 @@
const { db, ObjectId } = require('./mongodb')
const Settings = require('@overleaf/settings')
const Errors = require('./Errors')
const { callbackify } = require('node:util')
const ARCHIVING_LOCK_DURATION_MS = Settings.archivingLockDurationMs
async function findDoc(projectId, docId, projection) {
const doc = await db.docs.findOne(
{
_id: new ObjectId(docId.toString()),
project_id: new ObjectId(projectId.toString()),
},
{ projection }
)
if (doc && projection.version && !doc.version) {
doc.version = 0
}
return doc
}
async function getProjectsDeletedDocs(projectId, projection) {
const docs = await db.docs
.find(
{
project_id: new ObjectId(projectId.toString()),
deleted: true,
},
{
projection,
sort: { deletedAt: -1 },
limit: Settings.max_deleted_docs,
}
)
.toArray()
return docs
}
async function getProjectsDocs(projectId, options, projection) {
const query = { project_id: new ObjectId(projectId.toString()) }
if (!options.include_deleted) {
query.deleted = { $ne: true }
}
const queryOptions = {
projection,
}
if (options.limit) {
queryOptions.limit = options.limit
}
const docs = await db.docs.find(query, queryOptions).toArray()
return docs
}
async function getArchivedProjectDocs(projectId, maxResults) {
const query = {
project_id: new ObjectId(projectId.toString()),
inS3: true,
}
const docs = await db.docs
.find(query, { projection: { _id: 1 }, limit: maxResults })
.toArray()
return docs
}
async function getNonArchivedProjectDocIds(projectId) {
const docs = await db.docs
.find(
{
project_id: new ObjectId(projectId),
inS3: { $ne: true },
},
{ projection: { _id: 1 } }
)
.map(doc => doc._id)
.toArray()
return docs
}
async function getNonDeletedArchivedProjectDocs(projectId, maxResults) {
const query = {
project_id: new ObjectId(projectId.toString()),
deleted: { $ne: true },
inS3: true,
}
const docs = await db.docs
.find(query, { projection: { _id: 1 }, limit: maxResults })
.toArray()
return docs
}
async function upsertIntoDocCollection(projectId, docId, previousRev, updates) {
if (previousRev) {
const update = {
$set: updates,
$unset: { inS3: true },
}
if (updates.lines || updates.ranges) {
update.$inc = { rev: 1 }
}
const result = await db.docs.updateOne(
{
_id: new ObjectId(docId),
project_id: new ObjectId(projectId),
rev: previousRev,
},
update
)
if (result.matchedCount !== 1) {
throw new Errors.DocRevValueError()
}
} else {
try {
await db.docs.insertOne({
_id: new ObjectId(docId),
project_id: new ObjectId(projectId),
rev: 1,
...updates,
})
} catch (err) {
if (err.code === 11000) {
// duplicate doc _id
throw new Errors.DocRevValueError()
} else {
throw err
}
}
}
}
async function patchDoc(projectId, docId, meta) {
await db.docs.updateOne(
{
_id: new ObjectId(docId),
project_id: new ObjectId(projectId),
},
{ $set: meta }
)
}
/**
* Fetch a doc and lock it for archiving
*
* This will return null if the doc is not found, if it's already archived or
* if the lock can't be acquired.
*/
async function getDocForArchiving(projectId, docId) {
const archivingUntil = new Date(Date.now() + ARCHIVING_LOCK_DURATION_MS)
const result = await db.docs.findOneAndUpdate(
{
_id: new ObjectId(docId),
project_id: new ObjectId(projectId),
inS3: { $ne: true },
$or: [{ archivingUntil: null }, { archivingUntil: { $lt: new Date() } }],
},
{ $set: { archivingUntil } },
{
projection: { lines: 1, ranges: 1, rev: 1 },
includeResultMetadata: true,
}
)
return result.value
}
/**
* Clear the doc contents from Mongo and release the archiving lock
*/
async function markDocAsArchived(projectId, docId, rev) {
await db.docs.updateOne(
{ _id: new ObjectId(docId), rev },
{
$set: { inS3: true },
$unset: { lines: 1, ranges: 1, archivingUntil: 1 },
}
)
}
/**
* Restore an archived doc
*
* This checks that the archived doc's rev matches.
*/
async function restoreArchivedDoc(projectId, docId, archivedDoc) {
const query = {
_id: new ObjectId(docId),
project_id: new ObjectId(projectId),
rev: archivedDoc.rev,
}
const update = {
$set: {
lines: archivedDoc.lines,
ranges: archivedDoc.ranges || {},
},
$unset: {
inS3: true,
},
}
const result = await db.docs.updateOne(query, update)
if (result.matchedCount === 0) {
throw new Errors.DocRevValueError('failed to unarchive doc', {
docId,
rev: archivedDoc.rev,
})
}
}
async function getDocRev(docId) {
const doc = await db.docs.findOne(
{ _id: new ObjectId(docId.toString()) },
{ projection: { rev: 1 } }
)
return doc && doc.rev
}
/**
* Helper method to support optimistic locking.
*
* Check that the rev of an existing doc is unchanged. If the rev has
* changed, return a DocModifiedError.
*/
async function checkRevUnchanged(doc) {
const currentRev = await getDocRev(doc._id)
if (isNaN(currentRev) || isNaN(doc.rev)) {
throw new Errors.DocRevValueError('doc rev is NaN', {
doc_id: doc._id,
rev: doc.rev,
currentRev,
})
}
if (doc.rev !== currentRev) {
throw new Errors.DocModifiedError('doc rev has changed', {
doc_id: doc._id,
rev: doc.rev,
currentRev,
})
}
}
async function destroyProject(projectId) {
await db.docs.deleteMany({ project_id: new ObjectId(projectId) })
}
module.exports = {
findDoc: callbackify(findDoc),
getProjectsDeletedDocs: callbackify(getProjectsDeletedDocs),
getProjectsDocs: callbackify(getProjectsDocs),
getArchivedProjectDocs: callbackify(getArchivedProjectDocs),
getNonArchivedProjectDocIds: callbackify(getNonArchivedProjectDocIds),
getNonDeletedArchivedProjectDocs: callbackify(
getNonDeletedArchivedProjectDocs
),
upsertIntoDocCollection: callbackify(upsertIntoDocCollection),
restoreArchivedDoc: callbackify(restoreArchivedDoc),
patchDoc: callbackify(patchDoc),
getDocForArchiving: callbackify(getDocForArchiving),
markDocAsArchived: callbackify(markDocAsArchived),
checkRevUnchanged: callbackify(checkRevUnchanged),
destroyProject: callbackify(destroyProject),
promises: {
findDoc,
getProjectsDeletedDocs,
getProjectsDocs,
getArchivedProjectDocs,
getNonArchivedProjectDocIds,
getNonDeletedArchivedProjectDocs,
upsertIntoDocCollection,
restoreArchivedDoc,
patchDoc,
getDocForArchiving,
markDocAsArchived,
checkRevUnchanged,
destroyProject,
},
}

View File

@@ -0,0 +1,12 @@
const settings = require('@overleaf/settings')
const persistorSettings = settings.docstore
persistorSettings.Metrics = require('@overleaf/metrics')
const ObjectPersistor = require('@overleaf/object-persistor')
const AbstractPersistor = require('@overleaf/object-persistor/src/AbstractPersistor')
const persistor = settings.docstore.backend
? ObjectPersistor(persistorSettings)
: new AbstractPersistor()
module.exports = persistor

View File

@@ -0,0 +1,68 @@
/* eslint-disable
no-return-assign,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let RangeManager
const _ = require('lodash')
const { ObjectId } = require('./mongodb')
module.exports = RangeManager = {
shouldUpdateRanges(docRanges, incomingRanges) {
if (incomingRanges == null) {
throw new Error('expected incoming_ranges')
}
// If the ranges are empty, we don't store them in the DB, so set
// doc_ranges to an empty object as default, since this is was the
// incoming_ranges will be for an empty range set.
if (docRanges == null) {
docRanges = {}
}
return !_.isEqual(docRanges, incomingRanges)
},
jsonRangesToMongo(ranges) {
if (ranges == null) {
return null
}
const updateMetadata = function (metadata) {
if ((metadata != null ? metadata.ts : undefined) != null) {
metadata.ts = new Date(metadata.ts)
}
if ((metadata != null ? metadata.user_id : undefined) != null) {
return (metadata.user_id = RangeManager._safeObjectId(metadata.user_id))
}
}
for (const change of Array.from(ranges.changes || [])) {
change.id = RangeManager._safeObjectId(change.id)
updateMetadata(change.metadata)
}
for (const comment of Array.from(ranges.comments || [])) {
comment.id = RangeManager._safeObjectId(comment.id)
if ((comment.op != null ? comment.op.t : undefined) != null) {
comment.op.t = RangeManager._safeObjectId(comment.op.t)
}
updateMetadata(comment.metadata)
}
return ranges
},
_safeObjectId(data) {
try {
return new ObjectId(data)
} catch (error) {
return data
}
},
}

View File

@@ -0,0 +1,28 @@
const { LoggerStream, WritableBuffer } = require('@overleaf/stream-utils')
const Settings = require('@overleaf/settings')
const logger = require('@overleaf/logger/logging-manager')
const { pipeline } = require('node:stream/promises')
const { callbackify } = require('node:util')
module.exports = {
streamToBuffer: callbackify(streamToBuffer),
promises: {
streamToBuffer,
},
}
async function streamToBuffer(projectId, docId, stream) {
const loggerTransform = new LoggerStream(
Settings.max_doc_length,
(size, isFlush) => {
logger.warn(
{ projectId, docId, size, finishedReading: isFlush },
'potentially large doc pulled down from gcs'
)
}
)
const buffer = new WritableBuffer()
await pipeline(stream, loggerTransform, buffer)
return buffer.contents()
}

View File

@@ -0,0 +1,18 @@
const Metrics = require('@overleaf/metrics')
const Settings = require('@overleaf/settings')
const { MongoClient, ObjectId } = require('mongodb-legacy')
const mongoClient = new MongoClient(Settings.mongo.url, Settings.mongo.options)
const mongoDb = mongoClient.db()
const db = {
docs: mongoDb.collection('docs'),
}
Metrics.mongodb.monitor(mongoClient)
module.exports = {
db,
mongoClient,
ObjectId,
}