first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,129 @@
import _ from 'lodash'
import async from 'async'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as UpdateTranslator from './UpdateTranslator.js'
// avoid creating too many blobs at the same time
const MAX_CONCURRENT_REQUESTS = 4
// number of retry attempts for blob creation
const RETRY_ATTEMPTS = 3
// delay between retries
const RETRY_INTERVAL = 100
export function createBlobsForUpdates(
projectId,
historyId,
updates,
extendLock,
callback
) {
// async.mapLimit runs jobs in parallel and returns on the first error. It
// doesn't wait for concurrent jobs to finish. We want to make sure all jobs
// are wrapped within our lock so we collect the first error enountered here
// and wait for all jobs to finish before returning the error.
let firstBlobCreationError = null
function createBlobForUpdate(update, cb) {
// For file additions we need to first create a blob in the history-store
// with the contents of the file. Then we can create a change containing a
// file addition operation which references the blob.
//
// To do this we decorate file creation updates with a blobHash
if (!UpdateTranslator.isAddUpdate(update)) {
return async.setImmediate(() => cb(null, { update }))
}
let attempts = 0
// Since we may be creating O(1000) blobs in an update, allow for the
// occasional failure to prevent the whole update failing.
let lastErr
async.retry(
{
times: RETRY_ATTEMPTS,
interval: RETRY_INTERVAL,
},
_cb => {
attempts++
if (attempts > 1) {
logger.error(
{
err: lastErr,
projectId,
historyId,
update: _.pick(
update,
'doc',
'file',
'hash',
'createdBlob',
'url'
),
attempts,
},
'previous createBlob attempt failed, retrying'
)
}
// extend the lock for each file because large files may take a long time
extendLock(err => {
if (err) {
lastErr = OError.tag(err)
return _cb(lastErr)
}
HistoryStoreManager.createBlobForUpdate(
projectId,
historyId,
update,
(err, hashes) => {
if (err) {
lastErr = OError.tag(err, 'retry: error creating blob', {
projectId,
doc: update.doc,
file: update.file,
})
_cb(lastErr)
} else {
_cb(null, hashes)
}
}
)
})
},
(error, blobHashes) => {
if (error) {
if (!firstBlobCreationError) {
firstBlobCreationError = error
}
return cb(null, { update, blobHashes })
}
extendLock(error => {
if (error) {
if (!firstBlobCreationError) {
firstBlobCreationError = error
}
}
cb(null, { update, blobHashes })
})
}
)
}
async.mapLimit(
updates,
MAX_CONCURRENT_REQUESTS,
createBlobForUpdate,
(unusedError, updatesWithBlobs) => {
// As indicated by the name this is unexpected, but changes in the future
// could cause it to be set and ignoring it would be unexpected
if (unusedError) {
return callback(unusedError)
}
if (firstBlobCreationError) {
return callback(firstBlobCreationError)
}
callback(null, updatesWithBlobs)
}
)
}

View File

@@ -0,0 +1,626 @@
import _ from 'lodash'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as Errors from './Errors.js'
import {
TextOperation,
InsertOp,
RemoveOp,
RetainOp,
Range,
TrackedChangeList,
} from 'overleaf-editor-core'
/**
* @import { RawEditOperation, TrackedChangeRawData } from 'overleaf-editor-core/lib/types'
*/
export function convertToSummarizedUpdates(chunk, callback) {
const version = chunk.chunk.startVersion
const { files } = chunk.chunk.history.snapshot
const builder = new UpdateSetBuilder(version, files)
for (const change of chunk.chunk.history.changes) {
try {
builder.applyChange(change)
} catch (error1) {
const error = error1
return callback(error)
}
}
callback(null, builder.summarizedUpdates)
}
export function convertToDiffUpdates(
projectId,
chunk,
pathname,
fromVersion,
toVersion,
callback
) {
let error
let version = chunk.chunk.startVersion
const { files } = chunk.chunk.history.snapshot
const builder = new UpdateSetBuilder(version, files)
let file = null
for (const change of chunk.chunk.history.changes) {
// Because we're referencing by pathname, which can change, we
// want to get the last file in the range fromVersion:toVersion
// that has the pathname we want. Note that this might not exist yet
// at fromVersion, so we'll just settle for the last existing one we find
// after that.
if (fromVersion <= version && version <= toVersion) {
const currentFile = builder.getFile(pathname)
if (currentFile) {
file = currentFile
}
}
try {
builder.applyChange(change)
} catch (error1) {
error = error1
return callback(error)
}
version += 1
}
// Versions act as fence posts, with updates taking us from one to another,
// so we also need to check after the final update, when we're at the last version.
if (fromVersion <= version && version <= toVersion) {
const currentFile = builder.getFile(pathname)
if (currentFile) {
file = currentFile
}
}
// return an empty diff if the file was flagged as missing with an explicit null
if (builder.getFile(pathname) === null) {
return callback(null, { initialContent: '', updates: [] })
}
if (file == null) {
error = new Errors.NotFoundError(
`pathname '${pathname}' not found in range`
)
return callback(error)
}
WebApiManager.getHistoryId(projectId, (err, historyId) => {
if (err) {
return callback(err)
}
file.getDiffUpdates(historyId, fromVersion, toVersion, callback)
})
}
class UpdateSetBuilder {
constructor(startVersion, files) {
this.version = startVersion
this.summarizedUpdates = []
this.files = Object.create(null)
for (const pathname in files) {
// initialize file from snapshot
const data = files[pathname]
this.files[pathname] = new File(pathname, data, startVersion)
}
}
getFile(pathname) {
return this.files[pathname]
}
applyChange(change) {
const timestamp = new Date(change.timestamp)
let authors = _.map(change.authors, id => {
if (id == null) {
return null
}
return id
})
authors = authors.concat(change.v2Authors || [])
this.currentUpdate = {
meta: {
users: authors,
start_ts: timestamp.getTime(),
end_ts: timestamp.getTime(),
},
v: this.version,
pathnames: new Set([]),
project_ops: [],
}
if (change.origin) {
this.currentUpdate.meta.origin = change.origin
}
for (const op of change.operations) {
this.applyOperation(op, timestamp, authors, change.origin)
}
this.currentUpdate.pathnames = Array.from(this.currentUpdate.pathnames)
this.summarizedUpdates.push(this.currentUpdate)
this.version += 1
}
applyOperation(op, timestamp, authors, origin) {
if (UpdateSetBuilder._isTextOperation(op)) {
this.applyTextOperation(op, timestamp, authors, origin)
} else if (UpdateSetBuilder._isRenameOperation(op)) {
this.applyRenameOperation(op, timestamp, authors)
} else if (UpdateSetBuilder._isRemoveFileOperation(op)) {
this.applyRemoveFileOperation(op, timestamp, authors)
} else if (UpdateSetBuilder._isAddFileOperation(op)) {
this.applyAddFileOperation(op, timestamp, authors)
}
}
applyTextOperation(operation, timestamp, authors, origin) {
const { pathname } = operation
if (pathname === '') {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'pathname is empty for text operation'
)
return
}
const file = this.files[pathname]
if (file == null) {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'file is missing for text operation'
)
this.files[pathname] = null // marker for a missing file
return
}
file.applyTextOperation(authors, timestamp, this.version, operation, origin)
this.currentUpdate.pathnames.add(pathname)
}
applyRenameOperation(operation, timestamp, authors) {
const { pathname, newPathname } = operation
const file = this.files[pathname]
if (file == null) {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'file is missing for rename operation'
)
this.files[pathname] = null // marker for a missing file
return
}
file.rename(newPathname)
delete this.files[pathname]
this.files[newPathname] = file
this.currentUpdate.project_ops.push({
rename: { pathname, newPathname },
})
}
applyAddFileOperation(operation, timestamp, authors) {
const { pathname } = operation
// add file
this.files[pathname] = new File(pathname, operation.file, this.version)
this.currentUpdate.project_ops.push({ add: { pathname } })
}
applyRemoveFileOperation(operation, timestamp, authors) {
const { pathname } = operation
const file = this.files[pathname]
if (file == null) {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'pathname not found when removing file'
)
this.files[pathname] = null // marker for a missing file
return
}
delete this.files[pathname]
this.currentUpdate.project_ops.push({ remove: { pathname } })
}
static _isTextOperation(op) {
return Object.prototype.hasOwnProperty.call(op, 'textOperation')
}
static _isRenameOperation(op) {
return (
Object.prototype.hasOwnProperty.call(op, 'newPathname') &&
op.newPathname !== ''
)
}
static _isRemoveFileOperation(op) {
return (
Object.prototype.hasOwnProperty.call(op, 'newPathname') &&
op.newPathname === ''
)
}
static _isAddFileOperation(op) {
return Object.prototype.hasOwnProperty.call(op, 'file')
}
}
/**
* @param {string} content
* @param {TrackedChangeList} trackedChanges
* @returns {string}
*/
function removeTrackedDeletesFromString(content, trackedChanges) {
let result = ''
let cursor = 0
const trackedDeletes = trackedChanges
.asSorted()
.filter(tc => tc.tracking.type === 'delete')
for (const trackedChange of trackedDeletes) {
if (cursor < trackedChange.range.start) {
result += content.slice(cursor, trackedChange.range.start)
}
// skip the tracked change itself
cursor = trackedChange.range.end
}
result += content.slice(cursor)
return result
}
class File {
constructor(pathname, snapshot, initialVersion) {
this.pathname = pathname
this.snapshot = snapshot
this.initialVersion = initialVersion
this.operations = []
}
applyTextOperation(authors, timestamp, version, operation, origin) {
this.operations.push({ authors, timestamp, version, operation, origin })
}
rename(pathname) {
this.pathname = pathname
}
getDiffUpdates(historyId, fromVersion, toVersion, callback) {
if (this.snapshot.stringLength == null) {
// Binary file
return callback(null, { binary: true })
}
this._loadContentAndRanges(historyId, (error, content, ranges) => {
if (error != null) {
return callback(OError.tag(error))
}
const trackedChanges = TrackedChangeList.fromRaw(
ranges?.trackedChanges || []
)
/** @type {string | undefined} */
let initialContent
const updates = []
for (const operationInfo of this.operations) {
if (!('textOperation' in operationInfo.operation)) {
// We only care about text operations
continue
}
const { authors, timestamp, version, operation } = operationInfo
// Set the initialContent to the latest version we have before the diff
// begins. 'version' here refers to the document version as we are
// applying the updates. So we store the content *before* applying the
// updates.
if (version >= fromVersion && initialContent === undefined) {
initialContent = removeTrackedDeletesFromString(
content,
trackedChanges
)
}
let ops
;({ content, ops } = this._convertTextOperation(
content,
operation,
trackedChanges
))
// We only need to return the updates between fromVersion and toVersion
if (fromVersion <= version && version < toVersion) {
const update = {
meta: {
users: authors,
start_ts: timestamp.getTime(),
end_ts: timestamp.getTime(),
},
v: version,
op: ops,
}
if (operationInfo.origin) {
update.meta.origin = operationInfo.origin
}
updates.push(update)
}
}
if (initialContent === undefined) {
initialContent = removeTrackedDeletesFromString(content, trackedChanges)
}
callback(null, { initialContent, updates })
})
}
/**
*
* @param {string} initialContent
* @param {RawEditOperation} operation
* @param {TrackedChangeList} trackedChanges
*/
_convertTextOperation(initialContent, operation, trackedChanges) {
const textOp = TextOperation.fromJSON(operation)
const textUpdateBuilder = new TextUpdateBuilder(
initialContent,
trackedChanges
)
for (const op of textOp.ops) {
textUpdateBuilder.applyOp(op)
}
textUpdateBuilder.finish()
return {
content: textUpdateBuilder.result,
ops: textUpdateBuilder.changes,
}
}
_loadContentAndRanges(historyId, callback) {
HistoryStoreManager.getProjectBlob(
historyId,
this.snapshot.hash,
(err, content) => {
if (err) {
return callback(err)
}
if (this.snapshot.rangesHash) {
HistoryStoreManager.getProjectBlob(
historyId,
this.snapshot.rangesHash,
(err, ranges) => {
if (err) {
return callback(err)
}
return callback(null, content, JSON.parse(ranges))
}
)
} else {
return callback(null, content, undefined)
}
}
)
}
}
class TextUpdateBuilder {
/**
*
* @param {string} source
* @param {TrackedChangeList} ranges
*/
constructor(source, ranges) {
this.trackedChanges = ranges
this.source = source
this.sourceCursor = 0
this.result = ''
/** @type {({i: string, p: number} | {d: string, p: number})[]} */
this.changes = []
}
applyOp(op) {
if (op instanceof RetainOp) {
const length = this.result.length
this.applyRetain(op)
this.trackedChanges.applyRetain(length, op.length, {
tracking: op.tracking,
})
}
if (op instanceof InsertOp) {
const length = this.result.length
this.applyInsert(op)
this.trackedChanges.applyInsert(length, op.insertion, {
tracking: op.tracking,
})
}
if (op instanceof RemoveOp) {
const length = this.result.length
this.applyDelete(op)
this.trackedChanges.applyDelete(length, op.length)
}
}
/**
*
* @param {RetainOp} retain
*/
applyRetain(retain) {
const resultRetentionRange = new Range(this.result.length, retain.length)
const sourceRetentionRange = new Range(this.sourceCursor, retain.length)
let scanCursor = this.result.length
if (retain.tracking) {
// We are modifying existing tracked deletes. We need to treat removal
// (type insert/none) of a tracked delete as an insertion. Similarly, any
// range we introduce as a tracked deletion must be reported as a deletion.
const trackedDeletes = this.trackedChanges
.asSorted()
.filter(
tc =>
tc.tracking.type === 'delete' &&
tc.range.overlaps(resultRetentionRange)
)
const sourceOffset = this.sourceCursor - this.result.length
for (const trackedDelete of trackedDeletes) {
const resultTrackedDelete = trackedDelete.range
const sourceTrackedDelete = trackedDelete.range.moveBy(sourceOffset)
if (scanCursor < resultTrackedDelete.start) {
if (retain.tracking.type === 'delete') {
this.changes.push({
d: this.source.slice(
this.sourceCursor,
sourceTrackedDelete.start
),
p: this.result.length,
})
}
this.result += this.source.slice(
this.sourceCursor,
sourceTrackedDelete.start
)
scanCursor = resultTrackedDelete.start
this.sourceCursor = sourceTrackedDelete.start
}
const endOfInsertionResult = Math.min(
resultTrackedDelete.end,
resultRetentionRange.end
)
const endOfInsertionSource = Math.min(
sourceTrackedDelete.end,
sourceRetentionRange.end
)
const text = this.source.slice(this.sourceCursor, endOfInsertionSource)
if (
retain.tracking.type === 'none' ||
retain.tracking.type === 'insert'
) {
this.changes.push({
i: text,
p: this.result.length,
})
}
this.result += text
// skip the tracked delete itself
scanCursor = endOfInsertionResult
this.sourceCursor = endOfInsertionSource
if (scanCursor >= resultRetentionRange.end) {
break
}
}
}
if (scanCursor < resultRetentionRange.end) {
// The last region is not a tracked delete. But we should still handle
// a new tracked delete as a deletion.
const text = this.source.slice(
this.sourceCursor,
sourceRetentionRange.end
)
if (retain.tracking?.type === 'delete') {
this.changes.push({
d: text,
p: this.result.length,
})
}
this.result += text
}
this.sourceCursor = sourceRetentionRange.end
}
/**
*
* @param {InsertOp} insert
*/
applyInsert(insert) {
if (insert.tracking?.type !== 'delete') {
// Skip tracked deletions
this.changes.push({
i: insert.insertion,
p: this.result.length,
})
}
this.result += insert.insertion
// The source cursor doesn't advance
}
/**
*
* @param {RemoveOp} deletion
*/
applyDelete(deletion) {
const sourceDeletionRange = new Range(this.sourceCursor, deletion.length)
const resultDeletionRange = new Range(this.result.length, deletion.length)
const trackedDeletes = this.trackedChanges
.asSorted()
.filter(
tc =>
tc.tracking.type === 'delete' &&
tc.range.overlaps(resultDeletionRange)
)
.sort((a, b) => a.range.start - b.range.start)
let scanCursor = this.result.length
const sourceOffset = this.sourceCursor - this.result.length
for (const trackedDelete of trackedDeletes) {
const resultTrackDeleteRange = trackedDelete.range
const sourceTrackDeleteRange = trackedDelete.range.moveBy(sourceOffset)
if (scanCursor < resultTrackDeleteRange.start) {
this.changes.push({
d: this.source.slice(this.sourceCursor, sourceTrackDeleteRange.start),
p: this.result.length,
})
}
// skip the tracked delete itself
scanCursor = Math.min(resultTrackDeleteRange.end, resultDeletionRange.end)
this.sourceCursor = Math.min(
sourceTrackDeleteRange.end,
sourceDeletionRange.end
)
if (scanCursor >= resultDeletionRange.end) {
break
}
}
if (scanCursor < resultDeletionRange.end) {
this.changes.push({
d: this.source.slice(this.sourceCursor, sourceDeletionRange.end),
p: this.result.length,
})
}
this.sourceCursor = sourceDeletionRange.end
}
finish() {
if (this.sourceCursor < this.source.length) {
this.result += this.source.slice(this.sourceCursor)
}
for (const op of this.changes) {
if ('p' in op && typeof op.p === 'number') {
// Maybe we have to move the position of the deletion to account for
// tracked changes that we're hiding in the UI.
op.p -= this.trackedChanges
.asSorted()
.filter(tc => tc.tracking.type === 'delete' && tc.range.start < op.p)
.map(tc => {
if (tc.range.end < op.p) {
return tc.range.length
}
return op.p - tc.range.start
})
.reduce((a, b) => a + b, 0)
}
}
}
}

View File

@@ -0,0 +1,274 @@
import _ from 'lodash'
import OError from '@overleaf/o-error'
export class ConsistencyError extends OError {}
/**
* Container for functions that need to be mocked in tests
*
* TODO: Rewrite tests in terms of exported functions only
*/
export const _mocks = {}
export function buildDiff(initialContent, updates) {
let diff = [{ u: initialContent }]
for (const update of updates) {
diff = applyUpdateToDiff(diff, update)
}
diff = compressDiff(diff)
return diff
}
_mocks.compressDiff = diff => {
const newDiff = []
for (const part of diff) {
const users = part.meta?.users ?? []
if (part.meta?.origin?.kind === 'history-resync') {
// Skip history resync updates. Inserts are converted to unchanged text
// and deletes are skipped, so that they effectively don't appear in the
// diff.
if (part.u != null) {
newDiff.push(part)
} else if (part.i != null) {
newDiff.push({ u: part.i })
}
continue
}
if (newDiff.length === 0) {
// If we haven't seen other parts yet, we have nothing to merge.
newDiff.push(part)
continue
}
const lastPart = newDiff[newDiff.length - 1]
const lastUsers = lastPart.meta?.users ?? []
const usersNotInBothParts = _.xor(users, lastUsers)
if (usersNotInBothParts.length > 0) {
// If the set of users in the last part and this part are not the same, we
// can't merge.
newDiff.push(part)
continue
}
if (lastPart.i != null && part.i != null) {
// Merge two inserts
lastPart.i += part.i
lastPart.meta.start_ts = Math.min(
lastPart.meta.start_ts,
part.meta.start_ts
)
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts)
} else if (lastPart.d != null && part.d != null) {
// Merge two deletes
lastPart.d += part.d
lastPart.meta.start_ts = Math.min(
lastPart.meta.start_ts,
part.meta.start_ts
)
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts)
} else {
newDiff.push(part)
}
}
return newDiff
}
export function compressDiff(...args) {
return _mocks.compressDiff(...args)
}
export function applyOpToDiff(diff, op, meta) {
let consumedDiff
let remainingDiff = diff.slice()
;({ consumedDiff, remainingDiff } = _consumeToOffset(remainingDiff, op.p))
const newDiff = consumedDiff
if (op.i != null) {
newDiff.push({
i: op.i,
meta,
})
} else if (op.d != null) {
;({ consumedDiff, remainingDiff } = _consumeDiffAffectedByDeleteOp(
remainingDiff,
op,
meta
))
newDiff.push(...(consumedDiff || []))
}
newDiff.push(...(remainingDiff || []))
return newDiff
}
_mocks.applyUpdateToDiff = (diff, update) => {
for (const op of update.op) {
if (op.broken !== true) {
diff = applyOpToDiff(diff, op, update.meta)
}
}
return diff
}
export function applyUpdateToDiff(...args) {
return _mocks.applyUpdateToDiff(...args)
}
function _consumeToOffset(remainingDiff, totalOffset) {
let part
const consumedDiff = []
let position = 0
while ((part = remainingDiff.shift())) {
const length = _getLengthOfDiffPart(part)
if (part.d != null) {
consumedDiff.push(part)
} else if (position + length >= totalOffset) {
const partOffset = totalOffset - position
if (partOffset > 0) {
consumedDiff.push(_slicePart(part, 0, partOffset))
}
if (partOffset < length) {
remainingDiff.unshift(_slicePart(part, partOffset))
}
break
} else {
position += length
consumedDiff.push(part)
}
}
return {
consumedDiff,
remainingDiff,
}
}
function _consumeDiffAffectedByDeleteOp(remainingDiff, deleteOp, meta) {
const consumedDiff = []
let remainingOp = deleteOp
while (remainingOp && remainingDiff.length > 0) {
let newPart
;({ newPart, remainingDiff, remainingOp } = _consumeDeletedPart(
remainingDiff,
remainingOp,
meta
))
if (newPart != null) {
consumedDiff.push(newPart)
}
}
return {
consumedDiff,
remainingDiff,
}
}
function _consumeDeletedPart(remainingDiff, op, meta) {
let deletedContent, newPart, remainingOp
const part = remainingDiff.shift()
const partLength = _getLengthOfDiffPart(part)
if (part.d != null) {
// Skip existing deletes
remainingOp = op
newPart = part
} else if (partLength > op.d.length) {
// Only the first bit of the part has been deleted
const remainingPart = _slicePart(part, op.d.length)
remainingDiff.unshift(remainingPart)
deletedContent = _getContentOfPart(part).slice(0, op.d.length)
if (deletedContent !== op.d) {
throw new ConsistencyError(
`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`
)
}
if (part.u != null) {
newPart = {
d: op.d,
meta,
}
} else if (part.i != null) {
newPart = null
}
remainingOp = null
} else if (partLength === op.d.length) {
// The entire part has been deleted, but it is the last part
deletedContent = _getContentOfPart(part)
if (deletedContent !== op.d) {
throw new ConsistencyError(
`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`
)
}
if (part.u != null) {
newPart = {
d: op.d,
meta,
}
} else if (part.i != null) {
newPart = null
}
remainingOp = null
} else if (partLength < op.d.length) {
// The entire part has been deleted and there is more
deletedContent = _getContentOfPart(part)
const opContent = op.d.slice(0, deletedContent.length)
if (deletedContent !== opContent) {
throw new ConsistencyError(
`deleted content, '${deletedContent}', does not match delete op, '${opContent}'`
)
}
if (part.u) {
newPart = {
d: part.u,
meta,
}
} else if (part.i != null) {
newPart = null
}
remainingOp = {
p: op.p,
d: op.d.slice(_getLengthOfDiffPart(part)),
}
}
return {
newPart,
remainingDiff,
remainingOp,
}
}
function _slicePart(basePart, from, to) {
let part
if (basePart.u != null) {
part = { u: basePart.u.slice(from, to) }
} else if (basePart.i != null) {
part = { i: basePart.i.slice(from, to) }
}
if (basePart.meta != null) {
part.meta = basePart.meta
}
return part
}
function _getLengthOfDiffPart(part) {
return (part.u || part.d || part.i || '').length
}
function _getContentOfPart(part) {
return part.u || part.d || part.i || ''
}

View File

@@ -0,0 +1,240 @@
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import async from 'async'
import * as DiffGenerator from './DiffGenerator.js'
import * as FileTreeDiffGenerator from './FileTreeDiffGenerator.js'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as ChunkTranslator from './ChunkTranslator.js'
import * as Errors from './Errors.js'
let MAX_CHUNK_REQUESTS = 10
/**
* Container for functions that need to be mocked in tests
*
* TODO: Rewrite tests in terms of exported functions only
*/
export const _mocks = {}
export function getDiff(projectId, pathname, fromVersion, toVersion, callback) {
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error) {
return callback(OError.tag(error))
}
_getProjectUpdatesBetweenVersions(
projectId,
pathname,
fromVersion,
toVersion,
(error, result) => {
if (error) {
return callback(OError.tag(error))
}
const { binary, initialContent, updates } = result
let diff
if (binary) {
diff = { binary: true }
} else {
try {
diff = DiffGenerator.buildDiff(initialContent, updates)
} catch (err) {
return callback(
OError.tag(err, 'failed to build diff', {
projectId,
pathname,
fromVersion,
toVersion,
})
)
}
}
callback(null, diff)
}
)
})
}
export function getFileTreeDiff(projectId, fromVersion, toVersion, callback) {
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error) {
return callback(OError.tag(error))
}
_getChunksAsSingleChunk(
projectId,
fromVersion,
toVersion,
(error, chunk) => {
let diff
if (error) {
return callback(OError.tag(error))
}
try {
diff = FileTreeDiffGenerator.buildDiff(chunk, fromVersion, toVersion)
} catch (error1) {
error = error1
if (error instanceof Errors.InconsistentChunkError) {
return callback(error)
} else {
throw OError.tag(error)
}
}
callback(null, diff)
}
)
})
}
export function _getChunksAsSingleChunk(
projectId,
fromVersion,
toVersion,
callback
) {
logger.debug(
{ projectId, fromVersion, toVersion },
'[_getChunksAsSingleChunk] getting chunks'
)
_getChunks(projectId, fromVersion, toVersion, (error, chunks) => {
if (error) {
return callback(OError.tag(error))
}
logger.debug(
{ projectId, fromVersion, toVersion, chunks },
'[_getChunksAsSingleChunk] got chunks'
)
const chunk = _concatChunks(chunks)
callback(null, chunk)
})
}
_mocks._getProjectUpdatesBetweenVersions = (
projectId,
pathname,
fromVersion,
toVersion,
callback
) => {
_getChunksAsSingleChunk(projectId, fromVersion, toVersion, (error, chunk) => {
if (error) {
return callback(OError.tag(error))
}
logger.debug(
{ projectId, pathname, fromVersion, toVersion, chunk },
'[_getProjectUpdatesBetweenVersions] concatted chunk'
)
ChunkTranslator.convertToDiffUpdates(
projectId,
chunk,
pathname,
fromVersion,
toVersion,
callback
)
})
}
export function _getProjectUpdatesBetweenVersions(...args) {
_mocks._getProjectUpdatesBetweenVersions(...args)
}
_mocks._getChunks = (projectId, fromVersion, toVersion, callback) => {
let chunksRequested = 0
let lastChunkStartVersion = toVersion
const chunks = []
function shouldRequestAnotherChunk(cb) {
const stillUnderChunkLimit = chunksRequested < MAX_CHUNK_REQUESTS
const stillNeedVersions = fromVersion < lastChunkStartVersion
const stillSaneStartVersion = lastChunkStartVersion > 0
logger.debug(
{
projectId,
stillUnderChunkLimit,
stillNeedVersions,
stillSaneStartVersion,
fromVersion,
lastChunkStartVersion,
chunksRequested,
},
'[_getChunks.shouldRequestAnotherChunk]'
)
return cb(
null,
stillUnderChunkLimit && stillNeedVersions && stillSaneStartVersion
)
}
function getNextChunk(cb) {
logger.debug(
{
projectId,
lastChunkStartVersion,
},
'[_getChunks.getNextChunk]'
)
WebApiManager.getHistoryId(projectId, (error, historyId) => {
if (error) {
return cb(OError.tag(error))
}
HistoryStoreManager.getChunkAtVersion(
projectId,
historyId,
lastChunkStartVersion,
(error, chunk) => {
if (error) {
return cb(OError.tag(error))
}
lastChunkStartVersion = chunk.chunk.startVersion
chunksRequested += 1
chunks.push(chunk)
cb()
}
)
})
}
getNextChunk(error => {
if (error) {
return callback(OError.tag(error))
}
async.whilst(shouldRequestAnotherChunk, getNextChunk, error => {
if (error) {
return callback(error)
}
if (chunksRequested >= MAX_CHUNK_REQUESTS) {
error = new Errors.BadRequestError('Diff spans too many chunks')
callback(error)
} else {
callback(null, chunks)
}
})
})
}
export function _getChunks(...args) {
_mocks._getChunks(...args)
}
_mocks._concatChunks = chunks => {
chunks.reverse()
const chunk = chunks[0]
// We will append all of the changes from the later
// chunks onto the first one, to form one 'big' chunk.
for (const nextChunk of chunks.slice(1)) {
chunk.chunk.history.changes = chunk.chunk.history.changes.concat(
nextChunk.chunk.history.changes
)
}
return chunk
}
function _concatChunks(...args) {
return _mocks._concatChunks(...args)
}
// for tests
export function setMaxChunkRequests(value) {
MAX_CHUNK_REQUESTS = value
}

View File

@@ -0,0 +1,80 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import request from 'request'
import logger from '@overleaf/logger'
import Settings from '@overleaf/settings'
import OError from '@overleaf/o-error'
export function getDocument(projectId, docId, callback) {
if (callback == null) {
callback = function () {}
}
const url = `${Settings.apis.documentupdater.url}/project/${projectId}/doc/${docId}`
logger.debug({ projectId, docId }, 'getting doc from document updater')
return request.get(url, function (error, res, body) {
if (error != null) {
return callback(OError.tag(error))
}
if (res.statusCode >= 200 && res.statusCode < 300) {
try {
body = JSON.parse(body)
} catch (error1) {
error = error1
return callback(error)
}
logger.debug(
{ projectId, docId, version: body.version },
'got doc from document updater'
)
return callback(null, body.lines.join('\n'), body.version)
} else {
error = new OError(
`doc updater returned a non-success status code: ${res.statusCode}`,
{ project_id: projectId, doc_id: docId, url }
)
return callback(error)
}
})
}
export function setDocument(projectId, docId, content, userId, callback) {
if (callback == null) {
callback = function () {}
}
const url = `${Settings.apis.documentupdater.url}/project/${projectId}/doc/${docId}`
logger.debug({ projectId, docId }, 'setting doc in document updater')
return request.post(
{
url,
json: {
lines: content.split('\n'),
source: 'restore',
user_id: userId,
undoing: true,
},
},
function (error, res, body) {
if (error != null) {
return callback(OError.tag(error))
}
if (res.statusCode >= 200 && res.statusCode < 300) {
return callback(null)
} else {
error = new OError(
`doc updater returned a non-success status code: ${res.statusCode}`,
{ project_id: projectId, doc_id: docId, url }
)
return callback(error)
}
}
)
}

View File

@@ -0,0 +1,267 @@
// @ts-check
import { callbackify } from 'node:util'
import logger from '@overleaf/logger'
import metrics from '@overleaf/metrics'
import OError from '@overleaf/o-error'
import { db } from './mongodb.js'
/**
* @import { ProjectHistoryFailure } from './mongo-types'
*/
/**
* @param {string} projectId
* @param {number} queueSize
* @param {Error} error
* @return {Promise<ProjectHistoryFailure>} the failure record
*/
async function record(projectId, queueSize, error) {
const errorRecord = {
queueSize,
error: error.toString(),
stack: error.stack ?? '',
ts: new Date(),
}
logger.debug(
{ projectId, errorRecord },
'recording failed attempt to process updates'
)
const result = await db.projectHistoryFailures.findOneAndUpdate(
{ project_id: projectId },
{
$set: errorRecord,
$inc: { attempts: 1 },
$push: {
history: {
$each: [errorRecord],
$position: 0,
// only keep recent failures
$slice: 10,
},
},
},
{ upsert: true, returnDocument: 'after', includeResultMetadata: true }
)
if (result.value == null) {
// Since we upsert, the result should always have a value
throw new OError('no value returned when recording an error', { projectId })
}
return result.value
}
async function clearError(projectId) {
await db.projectHistoryFailures.deleteOne({ project_id: projectId })
}
async function setForceDebug(projectId, state) {
if (state == null) {
state = true
}
logger.debug({ projectId, state }, 'setting forceDebug state for project')
await db.projectHistoryFailures.updateOne(
{ project_id: projectId },
{ $set: { forceDebug: state } },
{ upsert: true }
)
}
// we only record the sync start time, and not the end time, because the
// record should be cleared on success.
async function recordSyncStart(projectId) {
await db.projectHistoryFailures.updateOne(
{ project_id: projectId },
{
$currentDate: { resyncStartedAt: true },
$inc: { resyncAttempts: 1 },
$push: {
history: {
$each: [{ resyncStartedAt: new Date() }],
$position: 0,
$slice: 10,
},
},
},
{ upsert: true }
)
}
/**
* @param projectId
*/
async function getFailureRecord(projectId) {
return await db.projectHistoryFailures.findOne({ project_id: projectId })
}
async function getLastFailure(projectId) {
const result = await db.projectHistoryFailures.findOneAndUpdate(
{ project_id: projectId },
{ $inc: { requestCount: 1 } }, // increment the request count every time we check the last failure
{ projection: { error: 1, ts: 1 } }
)
return result && result.value
}
async function getFailedProjects() {
return await db.projectHistoryFailures.find({}).toArray()
}
async function getFailuresByType() {
const results = await db.projectHistoryFailures.find({}).toArray()
const failureCounts = {}
const failureAttempts = {}
const failureRequests = {}
const maxQueueSize = {}
// count all the failures and number of attempts by type
for (const result of results || []) {
const failureType = result.error
const attempts = result.attempts || 1 // allow for field to be absent
const requests = result.requestCount || 0
const queueSize = result.queueSize || 0
if (failureCounts[failureType] > 0) {
failureCounts[failureType]++
failureAttempts[failureType] += attempts
failureRequests[failureType] += requests
maxQueueSize[failureType] = Math.max(queueSize, maxQueueSize[failureType])
} else {
failureCounts[failureType] = 1
failureAttempts[failureType] = attempts
failureRequests[failureType] = requests
maxQueueSize[failureType] = queueSize
}
}
return { failureCounts, failureAttempts, failureRequests, maxQueueSize }
}
async function getFailures() {
const { failureCounts, failureAttempts, failureRequests, maxQueueSize } =
await getFailuresByType()
let attempts, failureType, label, requests
const shortNames = {
'Error: bad response from filestore: 404': 'filestore-404',
'Error: bad response from filestore: 500': 'filestore-500',
'NotFoundError: got a 404 from web api': 'web-api-404',
'OError: history store a non-success status code: 413': 'history-store-413',
'OError: history store a non-success status code: 422': 'history-store-422',
'OError: history store a non-success status code: 500': 'history-store-500',
'OError: history store a non-success status code: 503': 'history-store-503',
'Error: history store a non-success status code: 413': 'history-store-413',
'Error: history store a non-success status code: 422': 'history-store-422',
'Error: history store a non-success status code: 500': 'history-store-500',
'Error: history store a non-success status code: 503': 'history-store-503',
'Error: web returned a non-success status code: 500 (attempts: 2)':
'web-500',
'Error: ESOCKETTIMEDOUT': 'socket-timeout',
'Error: no project found': 'no-project-found',
'OpsOutOfOrderError: project structure version out of order on incoming updates':
'incoming-project-version-out-of-order',
'OpsOutOfOrderError: doc version out of order on incoming updates':
'incoming-doc-version-out-of-order',
'OpsOutOfOrderError: project structure version out of order':
'chunk-project-version-out-of-order',
'OpsOutOfOrderError: doc version out of order':
'chunk-doc-version-out-of-order',
'Error: failed to extend lock': 'lock-overrun',
'Error: tried to release timed out lock': 'lock-overrun',
'Error: Timeout': 'lock-overrun',
'Error: sync ongoing': 'sync-ongoing',
'SyncError: unexpected resyncProjectStructure update': 'sync-error',
'[object Error]': 'unknown-error-object',
'UpdateWithUnknownFormatError: update with unknown format':
'unknown-format',
'Error: update with unknown format': 'unknown-format',
'TextOperationError: The base length of the second operation has to be the target length of the first operation':
'text-op-error',
'Error: ENOSPC: no space left on device, write': 'ENOSPC',
'*': 'other',
}
// set all the known errors to zero if not present (otherwise gauges stay on their last value)
const summaryCounts = {}
const summaryAttempts = {}
const summaryRequests = {}
const summaryMaxQueueSize = {}
for (failureType in shortNames) {
label = shortNames[failureType]
summaryCounts[label] = 0
summaryAttempts[label] = 0
summaryRequests[label] = 0
summaryMaxQueueSize[label] = 0
}
// record a metric for each type of failure
for (failureType in failureCounts) {
const failureCount = failureCounts[failureType]
label = shortNames[failureType] || shortNames['*']
summaryCounts[label] += failureCount
summaryAttempts[label] += failureAttempts[failureType]
summaryRequests[label] += failureRequests[failureType]
summaryMaxQueueSize[label] = Math.max(
maxQueueSize[failureType],
summaryMaxQueueSize[label]
)
}
for (label in summaryCounts) {
const count = summaryCounts[label]
metrics.globalGauge('failed', count, 1, { status: label })
}
for (label in summaryAttempts) {
attempts = summaryAttempts[label]
metrics.globalGauge('attempts', attempts, 1, { status: label })
}
for (label in summaryRequests) {
requests = summaryRequests[label]
metrics.globalGauge('requests', requests, 1, { status: label })
}
for (label in summaryMaxQueueSize) {
const queueSize = summaryMaxQueueSize[label]
metrics.globalGauge('max-queue-size', queueSize, 1, { status: label })
}
return {
counts: summaryCounts,
attempts: summaryAttempts,
requests: summaryRequests,
maxQueueSize: summaryMaxQueueSize,
}
}
// EXPORTS
const getFailedProjectsCb = callbackify(getFailedProjects)
const getFailureRecordCb = callbackify(getFailureRecord)
const getFailuresCb = callbackify(getFailures)
const getLastFailureCb = callbackify(getLastFailure)
const recordCb = callbackify(record)
const clearErrorCb = callbackify(clearError)
const recordSyncStartCb = callbackify(recordSyncStart)
const setForceDebugCb = callbackify(setForceDebug)
export {
getFailedProjectsCb as getFailedProjects,
getFailureRecordCb as getFailureRecord,
getLastFailureCb as getLastFailure,
getFailuresCb as getFailures,
recordCb as record,
clearErrorCb as clearError,
recordSyncStartCb as recordSyncStart,
setForceDebugCb as setForceDebug,
}
export const promises = {
getFailedProjects,
getFailureRecord,
getLastFailure,
getFailures,
record,
clearError,
recordSyncStart,
setForceDebug,
}

View File

@@ -0,0 +1,11 @@
import OError from '@overleaf/o-error'
export class NotFoundError extends OError {}
export class BadRequestError extends OError {}
export class SyncError extends OError {}
export class OpsOutOfOrderError extends OError {}
export class InconsistentChunkError extends OError {}
export class UpdateWithUnknownFormatError extends OError {}
export class UnexpectedOpTypeError extends OError {}
export class TooManyRequestsError extends OError {}
export class NeedFullProjectStructureResyncError extends OError {}

View File

@@ -0,0 +1,129 @@
/* eslint-disable
no-return-assign,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import Core from 'overleaf-editor-core'
import logger from '@overleaf/logger'
import * as Errors from './Errors.js'
const { MoveFileOperation, AddFileOperation, EditFileOperation } = Core
export function buildDiff(chunk, fromVersion, toVersion) {
chunk = Core.Chunk.fromRaw(chunk.chunk)
const chunkStartVersion = chunk.getStartVersion()
const diff = _getInitialDiffSnapshot(chunk, fromVersion)
const changes = chunk
.getChanges()
.slice(fromVersion - chunkStartVersion, toVersion - chunkStartVersion)
for (let i = 0; i < changes.length; i++) {
const change = changes[i]
for (const operation of Array.from(change.getOperations())) {
if (operation.pathname === null || operation.pathname === '') {
// skip operations for missing files
logger.warn({ diff, operation }, 'invalid pathname in operation')
} else if (operation instanceof EditFileOperation) {
_applyEditFileToDiff(diff, operation)
} else if (operation instanceof AddFileOperation) {
_applyAddFileToDiff(diff, operation)
} else if (operation instanceof MoveFileOperation) {
if (operation.isRemoveFile()) {
const deletedAtV = fromVersion + i
_applyDeleteFileToDiff(diff, operation, deletedAtV)
} else {
_applyMoveFileToDiff(diff, operation)
}
}
}
}
return Object.values(diff)
}
function _getInitialDiffSnapshot(chunk, fromVersion) {
// Start with a 'diff' which is snapshot of the filetree at the beginning,
// with nothing in the diff marked as changed.
// Use a bare object to protect against reserved names.
const diff = Object.create(null)
const files = _getInitialFiles(chunk, fromVersion)
for (const [pathname, file] of Object.entries(files)) {
diff[pathname] = { pathname, editable: file.isEditable() }
}
return diff
}
function _getInitialFiles(chunk, fromVersion) {
const snapshot = chunk.getSnapshot()
const changes = chunk
.getChanges()
.slice(0, fromVersion - chunk.getStartVersion())
snapshot.applyAll(changes)
return snapshot.fileMap.files
}
function _applyAddFileToDiff(diff, operation) {
return (diff[operation.pathname] = {
pathname: operation.pathname,
operation: 'added',
editable: operation.file.isEditable(),
})
}
function _applyEditFileToDiff(diff, operation) {
const change = diff[operation.pathname]
if ((change != null ? change.operation : undefined) == null) {
// avoid exception for non-existent change
return (diff[operation.pathname] = {
pathname: operation.pathname,
operation: 'edited',
})
}
}
function _applyMoveFileToDiff(diff, operation) {
if (
diff[operation.newPathname] != null &&
diff[operation.newPathname].operation !== 'removed'
) {
const err = new Errors.InconsistentChunkError(
'trying to move to file that already exists',
{ diff, operation }
)
throw err
}
const change = diff[operation.pathname]
if (change == null) {
logger.warn({ diff, operation }, 'tried to rename non-existent file')
return
}
change.newPathname = operation.newPathname
if (change.operation === 'added') {
// If this file was added this time, just leave it as an add, but
// at the new name.
change.pathname = operation.newPathname
delete change.newPathname
} else {
change.operation = 'renamed'
}
diff[operation.newPathname] = change
return delete diff[operation.pathname]
}
function _applyDeleteFileToDiff(diff, operation, deletedAtV) {
// avoid exception for non-existent change
if (diff[operation.pathname] != null) {
diff[operation.pathname].operation = 'removed'
}
return diff[operation.pathname] != null
? (diff[operation.pathname].deletedAtV = deletedAtV)
: undefined
}

View File

@@ -0,0 +1,142 @@
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import async from 'async'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import metrics from '@overleaf/metrics'
import _ from 'lodash'
import * as RedisManager from './RedisManager.js'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as ErrorRecorder from './ErrorRecorder.js'
export function flushIfOld(projectId, cutoffTime, callback) {
if (callback == null) {
callback = function () {}
}
return RedisManager.getFirstOpTimestamp(
projectId,
function (err, firstOpTimestamp) {
if (err != null) {
return callback(OError.tag(err))
}
// In the normal case, the flush marker will be set with the
// timestamp of the oldest operation in the queue by docupdater.
// If the marker is not set for any reason, we flush it anyway
// for safety.
if (!firstOpTimestamp || firstOpTimestamp < cutoffTime) {
logger.debug(
{ projectId, firstOpTimestamp, cutoffTime },
'flushing old project'
)
metrics.inc('flush-old-updates', 1, { status: 'flushed' })
return UpdatesProcessor.processUpdatesForProject(projectId, callback)
} else {
metrics.inc('flush-old-updates', 1, { status: 'skipped' })
return callback()
}
}
)
}
export function flushOldOps(options, callback) {
if (callback == null) {
callback = function () {}
}
logger.debug({ options }, 'starting flush of old ops')
// allow running flush in background for cron jobs
if (options.background) {
// return immediate response to client, then discard callback
callback(null, { message: 'running flush in background' })
callback = function () {}
}
return RedisManager.getProjectIdsWithHistoryOps(
null,
function (error, projectIds) {
if (error != null) {
return callback(OError.tag(error))
}
return ErrorRecorder.getFailedProjects(
function (error, projectHistoryFailures) {
if (error != null) {
return callback(OError.tag(error))
}
// exclude failed projects already in projectHistoryFailures
const failedProjects = new Set()
for (const entry of Array.from(projectHistoryFailures)) {
failedProjects.add(entry.project_id)
}
// randomise order so we get different projects if there is a limit
projectIds = _.shuffle(projectIds)
const maxAge = options.maxAge || 6 * 3600 // default to 6 hours
const cutoffTime = new Date(Date.now() - maxAge * 1000)
const startTime = new Date()
let count = 0
const jobs = projectIds.map(
projectId =>
function (cb) {
const timeTaken = new Date() - startTime
count++
if (
(options != null ? options.timeout : undefined) &&
timeTaken > options.timeout
) {
// finish early due to timeout, return an error to bail out of the async iteration
logger.debug('background retries timed out')
return cb(new OError('retries timed out'))
}
if (
(options != null ? options.limit : undefined) &&
count > options.limit
) {
// finish early due to reaching limit, return an error to bail out of the async iteration
logger.debug({ count }, 'background retries hit limit')
return cb(new OError('hit limit'))
}
if (failedProjects.has(projectId)) {
// skip failed projects
return setTimeout(cb, options.queueDelay || 100) // pause between flushes
}
return flushIfOld(projectId, cutoffTime, function (err) {
if (err != null) {
logger.warn(
{ projectId, err },
'error flushing old project'
)
}
return setTimeout(cb, options.queueDelay || 100)
})
}
) // pause between flushes
return async.series(
async.reflectAll(jobs),
function (error, results) {
const success = []
const failure = []
results.forEach((result, i) => {
if (
result.error != null &&
!['retries timed out', 'hit limit'].includes(
result?.error?.message
)
) {
// ignore expected errors
return failure.push(projectIds[i])
} else {
return success.push(projectIds[i])
}
})
return callback(error, { success, failure, failedProjects })
}
)
}
)
}
)
}

View File

@@ -0,0 +1,58 @@
/* eslint-disable
no-undef,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import { promisify } from 'node:util'
import fs from 'node:fs'
import crypto from 'node:crypto'
import OError from '@overleaf/o-error'
import { pipeline } from 'node:stream'
export function _getBlobHashFromString(string) {
const byteLength = Buffer.byteLength(string)
const hash = crypto.createHash('sha1')
hash.setEncoding('hex')
hash.update('blob ' + byteLength + '\x00')
hash.update(string, 'utf8')
hash.end()
return hash.read()
}
export function _getBlobHash(fsPath, callback) {
return fs.stat(fsPath, function (err, stats) {
if (err != null) {
OError.tag(err, 'failed to stat file in _getBlobHash', { fsPath })
return callback(err)
}
const byteLength = stats.size
const hash = crypto.createHash('sha1')
hash.setEncoding('hex')
hash.update('blob ' + byteLength + '\x00')
pipeline(fs.createReadStream(fsPath), hash, err => {
if (err) {
callback(
OError.tag(err, 'error streaming file from disk', {
fsPath,
byteLength,
})
)
} else {
hash.end()
callback(null, hash.read(), byteLength)
}
})
})
}
export const promises = {
_getBlobHash: promisify(_getBlobHash),
}

View File

@@ -0,0 +1,78 @@
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import { ObjectId } from './mongodb.js'
import request from 'request'
import async from 'async'
import settings from '@overleaf/settings'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import * as LockManager from './LockManager.js'
const { port } = settings.internal.history
export function check(callback) {
const projectId = new ObjectId(settings.history.healthCheck.project_id)
const url = `http://127.0.0.1:${port}/project/${projectId}`
logger.debug({ projectId }, 'running health check')
const jobs = [
cb =>
request.get(
{ url: `http://127.0.0.1:${port}/check_lock`, timeout: 3000 },
function (err, res, body) {
if (err != null) {
OError.tag(err, 'error checking lock for health check', {
project_id: projectId,
})
return cb(err)
} else if ((res != null ? res.statusCode : undefined) !== 200) {
return cb(new Error(`status code not 200, it's ${res.statusCode}`))
} else {
return cb()
}
}
),
cb =>
request.post(
{ url: `${url}/flush`, timeout: 10000 },
function (err, res, body) {
if (err != null) {
OError.tag(err, 'error flushing for health check', {
project_id: projectId,
})
return cb(err)
} else if ((res != null ? res.statusCode : undefined) !== 204) {
return cb(new Error(`status code not 204, it's ${res.statusCode}`))
} else {
return cb()
}
}
),
cb =>
request.get(
{ url: `${url}/updates`, timeout: 10000 },
function (err, res, body) {
if (err != null) {
OError.tag(err, 'error getting updates for health check', {
project_id: projectId,
})
return cb(err)
} else if ((res != null ? res.statusCode : undefined) !== 200) {
return cb(new Error(`status code not 200, it's ${res.statusCode}`))
} else {
return cb()
}
}
),
]
return async.series(jobs, callback)
}
export function checkLock(callback) {
return LockManager.healthCheck(callback)
}

View File

@@ -0,0 +1,22 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import * as WebApiManager from './WebApiManager.js'
import logger from '@overleaf/logger'
export function shouldUseProjectHistory(projectId, callback) {
if (callback == null) {
callback = function () {}
}
return WebApiManager.getHistoryId(projectId, (error, historyId) =>
callback(error, historyId != null)
)
}

View File

@@ -0,0 +1,123 @@
// @ts-check
import {
Range,
TrackedChange,
TrackedChangeList,
CommentList,
Comment,
TrackingProps,
} from 'overleaf-editor-core'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
/**
* @import { AddDocUpdate } from './types'
* @import { CommentRawData, TrackedChangeRawData } from 'overleaf-editor-core/lib/types'
*/
/**
*
* @param {AddDocUpdate} update
* @returns {{trackedChanges: TrackedChangeRawData[], comments: CommentRawData[]} | undefined}
*/
export function createRangeBlobDataFromUpdate(update) {
logger.debug({ update }, 'createBlobDataFromUpdate')
if (update.doc == null || update.docLines == null) {
throw new OError('Not an AddFileUpdate')
}
if (
!update.ranges ||
(update.ranges.changes == null && update.ranges.comments == null)
) {
return undefined
}
if (
(!update.ranges.changes || update.ranges.changes.length === 0) &&
(!update.ranges.comments || update.ranges.comments.length === 0)
) {
return undefined
}
const sortedRanges = [...(update.ranges.changes || [])].sort((a, b) => {
if (a.op.p !== b.op.p) {
return a.op.p - b.op.p
}
if ('i' in a.op && a.op.i != null && 'd' in b.op && b.op.d != null) {
// Move deletes before inserts
return 1
}
return -1
})
const tcList = new TrackedChangeList([])
for (const change of sortedRanges) {
if ('d' in change.op && change.op.d != null) {
const length = change.op.d.length
const range = new Range(change.op.hpos ?? change.op.p, length)
tcList.add(
new TrackedChange(
range,
new TrackingProps(
'delete',
change.metadata.user_id,
new Date(change.metadata.ts)
)
)
)
} else if ('i' in change.op && change.op.i != null) {
const length = change.op.i.length
const range = new Range(change.op.hpos ?? change.op.p, length)
tcList.add(
new TrackedChange(
range,
new TrackingProps(
'insert',
change.metadata.user_id,
new Date(change.metadata.ts)
)
)
)
}
}
const comments = [...(update.ranges.comments || [])].sort((a, b) => {
return a.op.p - b.op.p
})
/** @type {Map<string, {ranges: Range[], resolved: boolean}>} */
const commentMap = new Map()
for (const comment of comments) {
const id = comment.op.t
if (!commentMap.has(id)) {
commentMap.set(id, {
ranges: [],
resolved: comment.op.resolved ?? false,
})
}
const entry = commentMap.get(id)
if (!entry) {
throw new Error('Comment entry not found')
}
if (entry.resolved !== (comment.op.resolved ?? false)) {
throw new Error('Mismatching resolved status for comment')
}
const commentLength = comment.op.c.length
if (commentLength > 0) {
// Empty comments in operations are translated to detached comments
const range = new Range(comment.op.hpos ?? comment.op.p, commentLength)
entry.ranges.push(range)
}
}
const commentList = new CommentList(
[...commentMap.entries()].map(
([id, commentObj]) =>
new Comment(id, commentObj.ranges, commentObj.resolved)
)
)
return { trackedChanges: tcList.toRaw(), comments: commentList.toRaw() }
}

View File

@@ -0,0 +1,625 @@
import { promisify } from 'node:util'
import fs from 'node:fs'
import request from 'request'
import stream from 'node:stream'
import logger from '@overleaf/logger'
import _ from 'lodash'
import { URL } from 'node:url'
import OError from '@overleaf/o-error'
import Settings from '@overleaf/settings'
import {
fetchStream,
fetchNothing,
RequestFailedError,
} from '@overleaf/fetch-utils'
import * as Versions from './Versions.js'
import * as Errors from './Errors.js'
import * as LocalFileWriter from './LocalFileWriter.js'
import * as HashManager from './HashManager.js'
import * as HistoryBlobTranslator from './HistoryBlobTranslator.js'
import { promisifyMultiResult } from '@overleaf/promise-utils'
const HTTP_REQUEST_TIMEOUT = Settings.overleaf.history.requestTimeout
/**
* Container for functions that need to be mocked in tests
*
* TODO: Rewrite tests in terms of exported functions only
*/
export const _mocks = {}
class StringStream extends stream.Readable {
_read() {}
}
_mocks.getMostRecentChunk = (projectId, historyId, callback) => {
const path = `projects/${historyId}/latest/history`
logger.debug({ projectId, historyId }, 'getting chunk from history service')
_requestChunk({ path, json: true }, callback)
}
/**
* @param {Callback} callback
*/
export function getMostRecentChunk(projectId, historyId, callback) {
_mocks.getMostRecentChunk(projectId, historyId, callback)
}
/**
* @param {Callback} callback
*/
export function getChunkAtVersion(projectId, historyId, version, callback) {
const path = `projects/${historyId}/versions/${version}/history`
logger.debug(
{ projectId, historyId, version },
'getting chunk from history service for version'
)
_requestChunk({ path, json: true }, callback)
}
export function getMostRecentVersion(projectId, historyId, callback) {
getMostRecentChunk(projectId, historyId, (error, chunk) => {
if (error) {
return callback(OError.tag(error))
}
const mostRecentVersion =
chunk.chunk.startVersion + (chunk.chunk.history.changes || []).length
const lastChange = _.last(
_.sortBy(chunk.chunk.history.changes || [], x => x.timestamp)
)
// find the latest project and doc versions in the chunk
_getLatestProjectVersion(projectId, chunk, (err1, projectVersion) =>
_getLatestV2DocVersions(projectId, chunk, (err2, v2DocVersions) => {
// return the project and doc versions
const projectStructureAndDocVersions = {
project: projectVersion,
docs: v2DocVersions,
}
callback(
err1 || err2,
mostRecentVersion,
projectStructureAndDocVersions,
lastChange,
chunk
)
})
)
})
}
/**
* @param {string} projectId
* @param {string} historyId
* @param {Object} opts
* @param {boolean} [opts.readOnly]
* @param {(error: Error, rawChunk?: { startVersion: number, endVersion: number, endTimestamp: Date}) => void} callback
*/
export function getMostRecentVersionRaw(projectId, historyId, opts, callback) {
const path = `projects/${historyId}/latest/history/raw`
logger.debug(
{ projectId, historyId },
'getting raw chunk from history service'
)
const qs = opts.readOnly ? { readOnly: true } : {}
_requestHistoryService({ path, json: true, qs }, (err, body) => {
if (err) return callback(OError.tag(err))
const { startVersion, endVersion, endTimestamp } = body
callback(null, {
startVersion,
endVersion,
endTimestamp: new Date(endTimestamp),
})
})
}
function _requestChunk(options, callback) {
_requestHistoryService(options, (err, chunk) => {
if (err) {
return callback(OError.tag(err))
}
if (
chunk == null ||
chunk.chunk == null ||
chunk.chunk.startVersion == null
) {
const { path } = options
return callback(new OError('unexpected response', { path }))
}
callback(null, chunk)
})
}
function _getLatestProjectVersion(projectId, chunk, callback) {
// find the initial project version
const projectVersionInSnapshot = chunk.chunk.history.snapshot?.projectVersion
let projectVersion = projectVersionInSnapshot
const chunkStartVersion = chunk.chunk.startVersion
// keep track of any first error
let error = null
// iterate over the changes in chunk to find the most recent project version
for (const [changeIdx, change] of (
chunk.chunk.history.changes || []
).entries()) {
const projectVersionInChange = change.projectVersion
if (projectVersionInChange != null) {
if (
projectVersion != null &&
Versions.lt(projectVersionInChange, projectVersion)
) {
if (!error) {
error = new Errors.OpsOutOfOrderError(
'project structure version out of order',
{
projectId,
chunkStartVersion,
projectVersionInSnapshot,
changeIdx,
projectVersion,
projectVersionInChange,
}
)
}
} else {
projectVersion = projectVersionInChange
}
}
}
callback(error, projectVersion)
}
function _getLatestV2DocVersions(projectId, chunk, callback) {
// find the initial doc versions (indexed by docId as this is immutable)
const v2DocVersions =
(chunk.chunk.history.snapshot &&
chunk.chunk.history.snapshot.v2DocVersions) ||
{}
// keep track of any errors
let error = null
// iterate over the changes in the chunk to find the most recent doc versions
for (const change of chunk.chunk.history.changes || []) {
if (change.v2DocVersions != null) {
for (const docId in change.v2DocVersions) {
const docInfo = change.v2DocVersions[docId]
const { v } = docInfo
if (
v2DocVersions[docId] &&
v2DocVersions[docId].v != null &&
Versions.lt(v, v2DocVersions[docId].v)
) {
if (!error) {
logger.warn(
{
projectId,
docId,
changeVersion: docInfo,
previousVersion: v2DocVersions[docId],
},
'doc version out of order in chunk'
)
error = new Errors.OpsOutOfOrderError('doc version out of order')
}
} else {
v2DocVersions[docId] = docInfo
}
}
}
}
callback(error, v2DocVersions)
}
export function getProjectBlob(historyId, blobHash, callback) {
logger.debug({ historyId, blobHash }, 'getting blob from history service')
_requestHistoryService(
{ path: `projects/${historyId}/blobs/${blobHash}` },
callback
)
}
/**
* @param {Callback} callback
*/
export function getProjectBlobStream(historyId, blobHash, callback) {
const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${blobHash}`
logger.debug(
{ historyId, blobHash },
'getting blob stream from history service'
)
fetchStream(url, getHistoryFetchOptions())
.then(stream => {
callback(null, stream)
})
.catch(err => callback(OError.tag(err)))
}
export function sendChanges(
projectId,
historyId,
changes,
endVersion,
callback
) {
logger.debug(
{ projectId, historyId, endVersion },
'sending changes to history service'
)
_requestHistoryService(
{
path: `projects/${historyId}/legacy_changes`,
qs: { end_version: endVersion },
method: 'POST',
json: changes,
},
error => {
if (error) {
OError.tag(error, 'failed to send changes to v1', {
projectId,
historyId,
endVersion,
errorCode: error.code,
statusCode: error.statusCode,
body: error.body,
})
return callback(error)
}
callback()
}
)
}
function createBlobFromString(historyId, data, fileId, callback) {
const stringStream = new StringStream()
stringStream.push(data)
stringStream.push(null)
LocalFileWriter.bufferOnDisk(
stringStream,
'',
fileId,
(fsPath, cb) => {
_createBlob(historyId, fsPath, cb)
},
callback
)
}
function _checkBlobExists(historyId, hash, callback) {
if (!hash) return callback(null, false)
const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${hash}`
fetchNothing(url, {
method: 'HEAD',
...getHistoryFetchOptions(),
})
.then(res => {
callback(null, true)
})
.catch(err => {
if (err instanceof RequestFailedError && err.response.status === 404) {
return callback(null, false)
}
callback(OError.tag(err), false)
})
}
function _rewriteFilestoreUrl(url, projectId, callback) {
if (!url) {
return { fileId: null, filestoreURL: null }
}
// Rewrite the filestore url to point to the location in the local
// settings for this service (this avoids problems with cross-
// datacentre requests when running filestore in multiple locations).
const { pathname: fileStorePath } = new URL(url)
const urlMatch = /^\/project\/([0-9a-f]{24})\/file\/([0-9a-f]{24})$/.exec(
fileStorePath
)
if (urlMatch == null) {
return callback(new OError('invalid file for blob creation'))
}
if (urlMatch[1] !== projectId) {
return callback(new OError('invalid project for blob creation'))
}
const fileId = urlMatch[2]
const filestoreURL = `${Settings.apis.filestore.url}/project/${projectId}/file/${fileId}`
return { filestoreURL, fileId }
}
export function createBlobForUpdate(projectId, historyId, update, callback) {
callback = _.once(callback)
if (update.doc != null && update.docLines != null) {
let ranges
try {
ranges = HistoryBlobTranslator.createRangeBlobDataFromUpdate(update)
} catch (error) {
return callback(error)
}
createBlobFromString(
historyId,
update.docLines,
`project-${projectId}-doc-${update.doc}`,
(err, fileHash) => {
if (err) {
return callback(err)
}
if (ranges) {
createBlobFromString(
historyId,
JSON.stringify(ranges),
`project-${projectId}-doc-${update.doc}-ranges`,
(err, rangesHash) => {
if (err) {
return callback(err)
}
logger.debug(
{ fileHash, rangesHash },
'created blobs for both ranges and content'
)
return callback(null, { file: fileHash, ranges: rangesHash })
}
)
} else {
logger.debug({ fileHash }, 'created blob for content')
return callback(null, { file: fileHash })
}
}
)
} else if (
update.file != null &&
(update.url != null || update.createdBlob)
) {
const { fileId, filestoreURL } = _rewriteFilestoreUrl(
update.url,
projectId,
callback
)
_checkBlobExists(historyId, update.hash, (err, blobExists) => {
if (err) {
return callback(
new OError(
'error checking whether blob exists',
{ projectId, historyId, update },
err
)
)
} else if (blobExists) {
logger.debug(
{ projectId, fileId, update },
'Skipping blob creation as it has already been created'
)
return callback(null, { file: update.hash })
} else if (update.createdBlob) {
logger.warn(
{ projectId, fileId, update },
'created blob does not exist, reading from filestore'
)
}
if (!filestoreURL) {
return callback(
new OError('no filestore URL provided and blob was not created')
)
}
if (!Settings.apis.filestore.enabled) {
return callback(new OError('blocking filestore read', { update }))
}
fetchStream(filestoreURL, {
signal: AbortSignal.timeout(HTTP_REQUEST_TIMEOUT),
})
.then(stream => {
LocalFileWriter.bufferOnDisk(
stream,
filestoreURL,
`project-${projectId}-file-${fileId}`,
(fsPath, cb) => {
_createBlob(historyId, fsPath, cb)
},
(err, fileHash) => {
if (err) {
return callback(err)
}
if (update.hash && update.hash !== fileHash) {
logger.warn(
{ projectId, fileId, webHash: update.hash, fileHash },
'hash mismatch between web and project-history'
)
}
logger.debug({ fileHash }, 'created blob for file')
callback(null, { file: fileHash })
}
)
})
.catch(err => {
if (
err instanceof RequestFailedError &&
err.response.status === 404
) {
logger.warn(
{ projectId, historyId, filestoreURL },
'File contents not found in filestore. Storing in history as an empty file'
)
const emptyStream = new StringStream()
LocalFileWriter.bufferOnDisk(
emptyStream,
filestoreURL,
`project-${projectId}-file-${fileId}`,
(fsPath, cb) => {
_createBlob(historyId, fsPath, cb)
},
(err, fileHash) => {
if (err) {
return callback(err)
}
logger.debug({ fileHash }, 'created empty blob for file')
callback(null, { file: fileHash })
}
)
emptyStream.push(null) // send an EOF signal
} else {
callback(OError.tag(err, 'error from filestore', { filestoreURL }))
}
})
})
} else {
const error = new OError('invalid update for blob creation')
callback(error)
}
}
function _createBlob(historyId, fsPath, _callback) {
const callback = _.once(_callback)
HashManager._getBlobHash(fsPath, (error, hash, byteLength) => {
if (error) {
return callback(OError.tag(error))
}
const outStream = fs.createReadStream(fsPath)
logger.debug(
{ fsPath, historyId, hash, byteLength },
'sending blob to history service'
)
const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${hash}`
fetchNothing(url, {
method: 'PUT',
body: outStream,
headers: { 'Content-Length': byteLength }, // add the content length to work around problems with chunked encoding in node 18
...getHistoryFetchOptions(),
})
.then(res => {
callback(null, hash)
})
.catch(err => {
callback(OError.tag(err))
})
})
}
export function initializeProject(historyId, callback) {
_requestHistoryService(
{
method: 'POST',
path: 'projects',
json: historyId == null ? true : { projectId: historyId },
},
(error, project) => {
if (error) {
return callback(OError.tag(error))
}
const id = project.projectId
if (id == null) {
error = new OError('history store did not return a project id', id)
return callback(error)
}
callback(null, id)
}
)
}
export function deleteProject(projectId, callback) {
_requestHistoryService(
{ method: 'DELETE', path: `projects/${projectId}` },
callback
)
}
const getProjectBlobAsync = promisify(getProjectBlob)
class BlobStore {
constructor(projectId) {
this.projectId = projectId
}
async getString(hash) {
return await getProjectBlobAsync(this.projectId, hash)
}
async getObject(hash) {
const string = await this.getString(hash)
return JSON.parse(string)
}
}
export function getBlobStore(projectId) {
return new BlobStore(projectId)
}
function _requestOptions(options) {
const requestOptions = {
method: options.method || 'GET',
url: `${Settings.overleaf.history.host}/${options.path}`,
timeout: HTTP_REQUEST_TIMEOUT,
auth: {
user: Settings.overleaf.history.user,
pass: Settings.overleaf.history.pass,
sendImmediately: true,
},
}
if (options.json != null) {
requestOptions.json = options.json
}
if (options.body != null) {
requestOptions.body = options.body
}
if (options.qs != null) {
requestOptions.qs = options.qs
}
return requestOptions
}
/**
* @return {RequestInit}
*/
function getHistoryFetchOptions() {
return {
signal: AbortSignal.timeout(HTTP_REQUEST_TIMEOUT),
basicAuth: {
user: Settings.overleaf.history.user,
password: Settings.overleaf.history.pass,
},
}
}
function _requestHistoryService(options, callback) {
const requestOptions = _requestOptions(options)
request(requestOptions, (error, res, body) => {
if (error) {
return callback(OError.tag(error))
}
if (res.statusCode >= 200 && res.statusCode < 300) {
callback(null, body)
} else {
const { method, url, qs } = requestOptions
error = new OError(
`history store a non-success status code: ${res.statusCode}`,
{ method, url, qs, statusCode: res.statusCode }
)
callback(error)
}
})
}
export const promises = {
/** @type {(projectId: string, historyId: string) => Promise<{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}>} */
getMostRecentChunk: promisify(getMostRecentChunk),
getChunkAtVersion: promisify(getChunkAtVersion),
getMostRecentVersion: promisifyMultiResult(getMostRecentVersion, [
'version',
'projectStructureAndDocVersions',
'lastChange',
'mostRecentChunk',
]),
getMostRecentVersionRaw: promisify(getMostRecentVersionRaw),
getProjectBlob: promisify(getProjectBlob),
getProjectBlobStream: promisify(getProjectBlobStream),
sendChanges: promisify(sendChanges),
createBlobForUpdate: promisify(createBlobForUpdate),
initializeProject: promisify(initializeProject),
deleteProject: promisify(deleteProject),
}

View File

@@ -0,0 +1,582 @@
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import request from 'request'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as SummarizedUpdatesManager from './SummarizedUpdatesManager.js'
import * as DiffManager from './DiffManager.js'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as SnapshotManager from './SnapshotManager.js'
import * as HealthChecker from './HealthChecker.js'
import * as SyncManager from './SyncManager.js'
import * as ErrorRecorder from './ErrorRecorder.js'
import * as RedisManager from './RedisManager.js'
import * as LabelsManager from './LabelsManager.js'
import * as HistoryApiManager from './HistoryApiManager.js'
import * as RetryManager from './RetryManager.js'
import * as FlushManager from './FlushManager.js'
import { pipeline } from 'node:stream'
import { RequestFailedError } from '@overleaf/fetch-utils'
const ONE_DAY_IN_SECONDS = 24 * 60 * 60
export function getProjectBlob(req, res, next) {
const historyId = req.params.history_id
const blobHash = req.params.hash
HistoryStoreManager.getProjectBlobStream(
historyId,
blobHash,
(err, stream) => {
if (err != null) {
if (err instanceof RequestFailedError && err.response.status === 404) {
return res.status(404).end()
}
return next(OError.tag(err))
}
res.setHeader('Cache-Control', `private, max-age=${ONE_DAY_IN_SECONDS}`)
pipeline(stream, res, err => {
if (err) next(err)
// res.end() is already called via 'end' event by pipeline.
})
}
)
}
export function initializeProject(req, res, next) {
const { historyId } = req.body
HistoryStoreManager.initializeProject(historyId, (error, id) => {
if (error != null) {
return next(OError.tag(error))
}
res.json({ project: { id } })
})
}
export function flushProject(req, res, next) {
const projectId = req.params.project_id
if (req.query.debug) {
logger.debug(
{ projectId },
'compressing project history in single-step mode'
)
UpdatesProcessor.processSingleUpdateForProject(projectId, error => {
if (error != null) {
return next(OError.tag(error))
}
res.sendStatus(204)
})
} else if (req.query.bisect) {
logger.debug({ projectId }, 'compressing project history in bisect mode')
UpdatesProcessor.processUpdatesForProjectUsingBisect(
projectId,
UpdatesProcessor.REDIS_READ_BATCH_SIZE,
error => {
if (error != null) {
return next(OError.tag(error))
}
res.sendStatus(204)
}
)
} else {
logger.debug({ projectId }, 'compressing project history')
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error != null) {
return next(OError.tag(error))
}
res.sendStatus(204)
})
}
}
export function dumpProject(req, res, next) {
const projectId = req.params.project_id
const batchSize = req.query.count || UpdatesProcessor.REDIS_READ_BATCH_SIZE
logger.debug({ projectId }, 'retrieving raw updates')
UpdatesProcessor.getRawUpdates(projectId, batchSize, (error, rawUpdates) => {
if (error != null) {
return next(OError.tag(error))
}
res.json(rawUpdates)
})
}
export function flushOld(req, res, next) {
const { maxAge, queueDelay, limit, timeout, background } = req.query
const options = { maxAge, queueDelay, limit, timeout, background }
FlushManager.flushOldOps(options, (error, results) => {
if (error != null) {
return next(OError.tag(error))
}
res.send(results)
})
}
export function getDiff(req, res, next) {
const projectId = req.params.project_id
const { pathname, from, to } = req.query
if (pathname == null) {
return res.sendStatus(400)
}
logger.debug({ projectId, pathname, from, to }, 'getting diff')
DiffManager.getDiff(projectId, pathname, from, to, (error, diff) => {
if (error != null) {
return next(OError.tag(error))
}
res.json({ diff })
})
}
export function getFileTreeDiff(req, res, next) {
const projectId = req.params.project_id
const { to, from } = req.query
DiffManager.getFileTreeDiff(projectId, from, to, (error, diff) => {
if (error != null) {
return next(OError.tag(error))
}
res.json({ diff })
})
}
export function getUpdates(req, res, next) {
const projectId = req.params.project_id
const { before, min_count: minCount } = req.query
SummarizedUpdatesManager.getSummarizedProjectUpdates(
projectId,
{ before, min_count: minCount },
(error, updates, nextBeforeTimestamp) => {
if (error != null) {
return next(OError.tag(error))
}
for (const update of updates) {
// Sets don't JSONify, so convert to arrays
update.pathnames = Array.from(update.pathnames || []).sort()
}
res.json({
updates,
nextBeforeTimestamp,
})
}
)
}
export function latestVersion(req, res, next) {
const projectId = req.params.project_id
logger.debug({ projectId }, 'compressing project history and getting version')
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error != null) {
return next(OError.tag(error))
}
WebApiManager.getHistoryId(projectId, (error, historyId) => {
if (error != null) {
return next(OError.tag(error))
}
HistoryStoreManager.getMostRecentVersion(
projectId,
historyId,
(error, version, projectStructureAndDocVersions, lastChange) => {
if (error != null) {
return next(OError.tag(error))
}
res.json({
version,
timestamp: lastChange != null ? lastChange.timestamp : undefined,
v2Authors: lastChange != null ? lastChange.v2Authors : undefined,
})
}
)
})
})
}
export function getFileSnapshot(req, res, next) {
const { project_id: projectId, version, pathname } = req.params
SnapshotManager.getFileSnapshotStream(
projectId,
version,
pathname,
(error, stream) => {
if (error != null) {
return next(OError.tag(error))
}
pipeline(stream, res, err => {
if (err) next(err)
// res.end() is already called via 'end' event by pipeline.
})
}
)
}
export function getRangesSnapshot(req, res, next) {
const { project_id: projectId, version, pathname } = req.params
SnapshotManager.getRangesSnapshot(
projectId,
version,
pathname,
(err, ranges) => {
if (err) {
return next(OError.tag(err))
}
res.json(ranges)
}
)
}
export function getFileMetadataSnapshot(req, res, next) {
const { project_id: projectId, version, pathname } = req.params
SnapshotManager.getFileMetadataSnapshot(
projectId,
version,
pathname,
(err, data) => {
if (err) {
return next(OError.tag(err))
}
res.json(data)
}
)
}
export function getLatestSnapshot(req, res, next) {
const { project_id: projectId } = req.params
WebApiManager.getHistoryId(projectId, (error, historyId) => {
if (error) return next(OError.tag(error))
SnapshotManager.getLatestSnapshot(
projectId,
historyId,
(error, details) => {
if (error != null) {
return next(error)
}
const { snapshot, version } = details
res.json({ snapshot: snapshot.toRaw(), version })
}
)
})
}
export function getChangesInChunkSince(req, res, next) {
const { project_id: projectId } = req.params
const { since } = req.query
WebApiManager.getHistoryId(projectId, (error, historyId) => {
if (error) return next(OError.tag(error))
SnapshotManager.getChangesInChunkSince(
projectId,
historyId,
since,
(error, details) => {
if (error != null) {
return next(error)
}
const { latestStartVersion, changes } = details
res.json({
latestStartVersion,
changes: changes.map(c => c.toRaw()),
})
}
)
})
}
export function getProjectSnapshot(req, res, next) {
const { project_id: projectId, version } = req.params
SnapshotManager.getProjectSnapshot(
projectId,
version,
(error, snapshotData) => {
if (error != null) {
return next(error)
}
res.json(snapshotData)
}
)
}
export function getPathsAtVersion(req, res, next) {
const { project_id: projectId, version } = req.params
SnapshotManager.getPathsAtVersion(projectId, version, (error, result) => {
if (error != null) {
return next(error)
}
res.json(result)
})
}
export function healthCheck(req, res) {
HealthChecker.check(err => {
if (err != null) {
logger.err({ err }, 'error performing health check')
res.sendStatus(500)
} else {
res.sendStatus(200)
}
})
}
export function checkLock(req, res) {
HealthChecker.checkLock(err => {
if (err != null) {
logger.err({ err }, 'error performing lock check')
res.sendStatus(500)
} else {
res.sendStatus(200)
}
})
}
export function resyncProject(req, res, next) {
const projectId = req.params.project_id
const options = {}
if (req.body.origin) {
options.origin = req.body.origin
}
if (req.body.historyRangesMigration) {
options.historyRangesMigration = req.body.historyRangesMigration
}
if (req.query.force || req.body.force) {
// this will delete the queue and clear the sync state
// use if the project is completely broken
SyncManager.startHardResync(projectId, options, error => {
if (error != null) {
return next(error)
}
// flush the sync operations
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error != null) {
return next(error)
}
res.sendStatus(204)
})
})
} else {
SyncManager.startResync(projectId, options, error => {
if (error != null) {
return next(error)
}
// flush the sync operations
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error != null) {
return next(error)
}
res.sendStatus(204)
})
})
}
}
export function forceDebugProject(req, res, next) {
const projectId = req.params.project_id
// set the debug flag to true unless we see ?clear=true
const state = !req.query.clear
ErrorRecorder.setForceDebug(projectId, state, error => {
if (error != null) {
return next(error)
}
// display the failure record to help debugging
ErrorRecorder.getFailureRecord(projectId, (error, result) => {
if (error != null) {
return next(error)
}
res.send(result)
})
})
}
export function getFailures(req, res, next) {
ErrorRecorder.getFailures((error, result) => {
if (error != null) {
return next(error)
}
res.send({ failures: result })
})
}
export function getQueueCounts(req, res, next) {
RedisManager.getProjectIdsWithHistoryOpsCount((err, queuedProjectsCount) => {
if (err != null) {
return next(err)
}
res.send({ queuedProjects: queuedProjectsCount })
})
}
export function getLabels(req, res, next) {
const projectId = req.params.project_id
HistoryApiManager.shouldUseProjectHistory(
projectId,
(error, shouldUseProjectHistory) => {
if (error != null) {
return next(error)
}
if (shouldUseProjectHistory) {
LabelsManager.getLabels(projectId, (error, labels) => {
if (error != null) {
return next(error)
}
res.json(labels)
})
} else {
res.sendStatus(409)
}
}
)
}
export function createLabel(req, res, next) {
const { project_id: projectId, user_id: userIdParam } = req.params
const {
version,
comment,
user_id: userIdBody,
created_at: createdAt,
validate_exists: validateExists,
} = req.body
// Temporarily looking up both params and body while rolling out changes
// in the router path - https://github.com/overleaf/internal/pull/20200
const userId = userIdParam || userIdBody
HistoryApiManager.shouldUseProjectHistory(
projectId,
(error, shouldUseProjectHistory) => {
if (error != null) {
return next(error)
}
if (shouldUseProjectHistory) {
LabelsManager.createLabel(
projectId,
userId,
version,
comment,
createdAt,
validateExists,
(error, label) => {
if (error != null) {
return next(error)
}
res.json(label)
}
)
} else {
logger.error(
{
projectId,
userId,
version,
comment,
createdAt,
validateExists,
},
'not using v2 history'
)
res.sendStatus(409)
}
}
)
}
/**
* This will delete a label if it is owned by the current user. If you wish to
* delete a label regardless of the current user, then use `deleteLabel` instead.
*/
export function deleteLabelForUser(req, res, next) {
const {
project_id: projectId,
user_id: userId,
label_id: labelId,
} = req.params
LabelsManager.deleteLabelForUser(projectId, userId, labelId, error => {
if (error != null) {
return next(error)
}
res.sendStatus(204)
})
}
export function deleteLabel(req, res, next) {
const { project_id: projectId, label_id: labelId } = req.params
LabelsManager.deleteLabel(projectId, labelId, error => {
if (error != null) {
return next(error)
}
res.sendStatus(204)
})
}
export function retryFailures(req, res, next) {
const { failureType, timeout, limit, callbackUrl } = req.query
if (callbackUrl) {
// send response but run in background when callbackUrl provided
res.send({ retryStatus: 'running retryFailures in background' })
}
RetryManager.retryFailures(
{ failureType, timeout, limit },
(error, result) => {
if (callbackUrl) {
// if present, notify the callbackUrl on success
if (!error) {
// Needs Node 12
// const callbackHeaders = Object.fromEntries(Object.entries(req.headers || {}).filter(([k,v]) => k.match(/^X-CALLBACK-/i)))
const callbackHeaders = {}
for (const key of Object.getOwnPropertyNames(
req.headers || {}
).filter(key => key.match(/^X-CALLBACK-/i))) {
const found = key.match(/^X-CALLBACK-(.*)/i)
callbackHeaders[found[1]] = req.headers[key]
}
request({ url: callbackUrl, headers: callbackHeaders })
}
} else {
if (error != null) {
return next(error)
}
res.send({ retryStatus: result })
}
}
)
}
export function transferLabels(req, res, next) {
const { from_user: fromUser, to_user: toUser } = req.params
LabelsManager.transferLabels(fromUser, toUser, error => {
if (error != null) {
return next(error)
}
res.sendStatus(204)
})
}
export function deleteProject(req, res, next) {
const { project_id: projectId } = req.params
// clear the timestamp before clearing the queue,
// because the queue location is used in the migration
RedisManager.clearFirstOpTimestamp(projectId, err => {
if (err) {
return next(err)
}
RedisManager.clearCachedHistoryId(projectId, err => {
if (err) {
return next(err)
}
RedisManager.destroyDocUpdatesQueue(projectId, err => {
if (err) {
return next(err)
}
SyncManager.clearResyncState(projectId, err => {
if (err) {
return next(err)
}
ErrorRecorder.clearError(projectId, err => {
if (err) {
return next(err)
}
res.sendStatus(204)
})
})
})
})
})
}

View File

@@ -0,0 +1,175 @@
import OError from '@overleaf/o-error'
import { db, ObjectId } from './mongodb.js'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as WebApiManager from './WebApiManager.js'
export function getLabels(projectId, callback) {
_toObjectId(projectId, function (error, projectId) {
if (error) {
return callback(OError.tag(error))
}
db.projectHistoryLabels
.find({ project_id: new ObjectId(projectId) })
.toArray(function (error, labels) {
if (error) {
return callback(OError.tag(error))
}
const formattedLabels = labels.map(_formatLabel)
callback(null, formattedLabels)
})
})
}
export function createLabel(
projectId,
userId,
version,
comment,
createdAt,
shouldValidateExists,
callback
) {
const validateVersionExists = function (callback) {
if (shouldValidateExists === false) {
callback()
} else {
_validateChunkExistsForVersion(projectId.toString(), version, callback)
}
}
_toObjectId(projectId, userId, function (error, projectId, userId) {
if (error) {
return callback(OError.tag(error))
}
validateVersionExists(function (error) {
if (error) {
return callback(OError.tag(error))
}
createdAt = createdAt != null ? new Date(createdAt) : new Date()
const label = {
project_id: new ObjectId(projectId),
comment,
version,
created_at: createdAt,
}
if (userId) {
label.user_id = userId
}
db.projectHistoryLabels.insertOne(label, function (error, confirmation) {
if (error) {
return callback(OError.tag(error))
}
label._id = confirmation.insertedId
callback(null, _formatLabel(label))
})
})
})
}
export function deleteLabelForUser(projectId, userId, labelId, callback) {
_toObjectId(
projectId,
userId,
labelId,
function (error, projectId, userId, labelId) {
if (error) {
return callback(OError.tag(error))
}
db.projectHistoryLabels.deleteOne(
{
_id: new ObjectId(labelId),
project_id: new ObjectId(projectId),
user_id: new ObjectId(userId),
},
callback
)
}
)
}
export function deleteLabel(projectId, labelId, callback) {
_toObjectId(projectId, labelId, function (error, projectId, labelId) {
if (error) {
return callback(OError.tag(error))
}
db.projectHistoryLabels.deleteOne(
{
_id: new ObjectId(labelId),
project_id: new ObjectId(projectId),
},
callback
)
})
}
export function transferLabels(fromUserId, toUserId, callback) {
_toObjectId(fromUserId, toUserId, function (error, fromUserId, toUserId) {
if (error) {
return callback(OError.tag(error))
}
db.projectHistoryLabels.updateMany(
{
user_id: fromUserId,
},
{
$set: { user_id: toUserId },
},
callback
)
})
}
function _toObjectId(...args1) {
const adjustedLength = Math.max(args1.length, 1)
const args = args1.slice(0, adjustedLength - 1)
const callback = args1[adjustedLength - 1]
try {
const ids = args.map(id => {
if (id) {
return new ObjectId(id)
} else {
return undefined
}
})
callback(null, ...ids)
} catch (error) {
callback(error)
}
}
function _formatLabel(label) {
return {
id: label._id,
comment: label.comment,
version: label.version,
user_id: label.user_id,
created_at: label.created_at,
}
}
function _validateChunkExistsForVersion(projectId, version, callback) {
UpdatesProcessor.processUpdatesForProject(projectId, function (error) {
if (error) {
return callback(error)
}
WebApiManager.getHistoryId(projectId, function (error, historyId) {
if (error) {
return callback(error)
}
HistoryStoreManager.getChunkAtVersion(
projectId,
historyId,
version,
function (error) {
if (error) {
return callback(error)
}
callback()
}
)
})
})
}

View File

@@ -0,0 +1,88 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import fs from 'node:fs'
import { randomUUID } from 'node:crypto'
import Path from 'node:path'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import _ from 'lodash'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as HashManager from './HashManager.js'
export function createStub(fsPath, fileId, fileSize, fileHash, callback) {
if (callback == null) {
callback = function () {}
}
callback = _.once(callback)
const newFsPath = Path.join(
Settings.path.uploadFolder,
randomUUID() + `-${fileId}-stub`
)
const writeStream = fs.createWriteStream(newFsPath)
writeStream.on('error', function (error) {
OError.tag(error, 'error writing stub file', { fsPath, newFsPath })
return fs.unlink(newFsPath, () => callback(error))
})
writeStream.on('finish', function () {
logger.debug(
{ fsPath, fileId, fileSize, fileHash },
'replaced large file with stub'
)
return callback(null, newFsPath)
}) // let the consumer unlink the file
const stubLines = [
'FileTooLargeError v1',
'File too large to be stored in history service',
`id ${fileId}`,
`size ${fileSize} bytes`,
`hash ${fileHash}`,
'\0', // null byte to make this a binary file
]
writeStream.write(stubLines.join('\n'))
return writeStream.end()
}
export function replaceWithStubIfNeeded(fsPath, fileId, fileSize, callback) {
if (callback == null) {
callback = function () {}
}
if (
Settings.maxFileSizeInBytes != null &&
fileSize > Settings.maxFileSizeInBytes
) {
logger.error(
{ fsPath, fileId, maxFileSizeInBytes: Settings.maxFileSizeInBytes },
'file too large, will use stub'
)
return HashManager._getBlobHash(fsPath, function (error, fileHash) {
if (error != null) {
return callback(error)
}
return createStub(
fsPath,
fileId,
fileSize,
fileHash,
function (error, newFsPath) {
if (error != null) {
return callback(error)
}
return callback(null, newFsPath)
}
)
})
} else {
return callback(null, fsPath)
}
}

View File

@@ -0,0 +1,114 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import fs from 'node:fs'
import { pipeline } from 'node:stream'
import { randomUUID } from 'node:crypto'
import path from 'node:path'
import _ from 'lodash'
import logger from '@overleaf/logger'
import metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import OError from '@overleaf/o-error'
import * as LargeFileManager from './LargeFileManager.js'
//
// This method takes a stream and provides you a new stream which is now
// reading from disk.
//
// This is useful if we're piping one network stream to another. If the stream
// we're piping to can't consume data as quickly as the one we're consuming
// from then large quantities of data may be held in memory. Instead the read
// stream can be passed to this method, the data will then be held on disk
// rather than in memory and will be cleaned up once it has been consumed.
//
export function bufferOnDisk(
inStream,
url,
fileId,
consumeOutStream,
callback
) {
const timer = new metrics.Timer('LocalFileWriter.writeStream')
const fsPath = path.join(
Settings.path.uploadFolder,
randomUUID() + `-${fileId}`
)
const cleanup = _.once((streamError, res) => {
return deleteFile(fsPath, function (cleanupError) {
if (streamError) {
OError.tag(streamError, 'error deleting temporary file', {
fsPath,
url,
})
}
if (cleanupError) {
OError.tag(cleanupError)
}
if (streamError && cleanupError) {
// logging the cleanup error in case only the stream error is sent to the callback
logger.error(cleanupError)
}
return callback(streamError || cleanupError, res)
})
})
logger.debug({ fsPath, url }, 'writing file locally')
const writeStream = fs.createWriteStream(fsPath)
pipeline(inStream, writeStream, err => {
if (err) {
OError.tag(err, 'problem writing file locally', {
fsPath,
url,
})
return cleanup(err)
}
timer.done()
// in future check inStream.response.headers for hash value here
logger.debug({ fsPath, url }, 'stream closed after writing file locally')
const fileSize = writeStream.bytesWritten
return LargeFileManager.replaceWithStubIfNeeded(
fsPath,
fileId,
fileSize,
function (err, newFsPath) {
if (err != null) {
OError.tag(err, 'problem in large file manager', {
newFsPath,
fsPath,
fileId,
fileSize,
})
return cleanup(err)
}
return consumeOutStream(newFsPath, cleanup)
}
)
})
}
export function deleteFile(fsPath, callback) {
if (fsPath == null || fsPath === '') {
return callback()
}
logger.debug({ fsPath }, 'removing local temp file')
return fs.unlink(fsPath, function (err) {
if (err != null && err.code !== 'ENOENT') {
// ignore errors deleting the file when it was never created
return callback(OError.tag(err))
} else {
return callback()
}
})
}

View File

@@ -0,0 +1,314 @@
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import { promisify } from 'node:util'
import async from 'async'
import metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import redis from '@overleaf/redis-wrapper'
import os from 'node:os'
import crypto from 'node:crypto'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
const LOCK_TEST_INTERVAL = 50 // 50ms between each test of the lock
const MAX_LOCK_WAIT_TIME = 10000 // 10s maximum time to spend trying to get the lock
export const LOCK_TTL = 360 // seconds
export const MIN_LOCK_EXTENSION_INTERVAL = 1000 // 1s minimum interval when extending a lock
export const UNLOCK_SCRIPT =
'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end'
const EXTEND_SCRIPT =
'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("expire", KEYS[1], ARGV[2]) else return 0 end'
const HOST = os.hostname()
const PID = process.pid
const RND = crypto.randomBytes(4).toString('hex')
let COUNT = 0
const rclient = redis.createClient(Settings.redis.lock)
/**
* Container for functions that need to be mocked in tests
*
* TODO: Rewrite tests in terms of exported functions only
*/
export const _mocks = {}
// Use a signed lock value as described in
// http://redis.io/topics/distlock#correct-implementation-with-a-single-instance
// to prevent accidental unlocking by multiple processes
_mocks.randomLock = () => {
const time = Date.now()
return `locked:host=${HOST}:pid=${PID}:random=${RND}:time=${time}:count=${COUNT++}`
}
export function randomLock(...args) {
return _mocks.randomLock(...args)
}
_mocks.tryLock = (key, callback) => {
if (callback == null) {
callback = function () {}
}
const lockValue = randomLock()
return rclient.set(
key,
lockValue,
'EX',
LOCK_TTL,
'NX',
function (err, gotLock) {
if (err != null) {
return callback(
OError.tag(err, 'redis error trying to get lock', { key })
)
}
if (gotLock === 'OK') {
metrics.inc('lock.project.try.success')
return callback(err, true, lockValue)
} else {
metrics.inc('lock.project.try.failed')
return callback(err, false)
}
}
)
}
export function tryLock(...args) {
_mocks.tryLock(...args)
}
_mocks.extendLock = (key, lockValue, callback) => {
if (callback == null) {
callback = function () {}
}
return rclient.eval(
EXTEND_SCRIPT,
1,
key,
lockValue,
LOCK_TTL,
function (err, result) {
if (err != null) {
return callback(
OError.tag(err, 'redis error trying to extend lock', { key })
)
}
if (result != null && result !== 1) {
// successful extension should release exactly one key
metrics.inc('lock.project.extend.failed')
const error = new OError('failed to extend lock', {
key,
lockValue,
result,
})
return callback(error)
}
metrics.inc('lock.project.extend.success')
return callback()
}
)
}
export function extendLock(...args) {
_mocks.extendLock(...args)
}
_mocks.getLock = (key, callback) => {
let attempt
if (callback == null) {
callback = function () {}
}
const startTime = Date.now()
let attempts = 0
return (attempt = function () {
if (Date.now() - startTime > MAX_LOCK_WAIT_TIME) {
metrics.inc('lock.project.get.failed')
return callback(new OError('Timeout', { key }))
}
attempts += 1
return tryLock(key, function (error, gotLock, lockValue) {
if (error != null) {
return callback(OError.tag(error))
}
if (gotLock) {
metrics.gauge('lock.project.get.success.tries', attempts)
return callback(null, lockValue)
} else {
return setTimeout(attempt, LOCK_TEST_INTERVAL)
}
})
})()
}
export function getLock(...args) {
_mocks.getLock(...args)
}
export function checkLock(key, callback) {
if (callback == null) {
callback = function () {}
}
return rclient.exists(key, function (err, exists) {
if (err != null) {
return callback(OError.tag(err))
}
exists = parseInt(exists)
if (exists === 1) {
return callback(err, false)
} else {
return callback(err, true)
}
})
}
_mocks.releaseLock = (key, lockValue, callback) => {
return rclient.eval(UNLOCK_SCRIPT, 1, key, lockValue, function (err, result) {
if (err != null) {
return callback(OError.tag(err))
}
if (result != null && result !== 1) {
// successful unlock should release exactly one key
const error = new OError('tried to release timed out lock', {
key,
lockValue,
redis_result: result,
})
return callback(error)
}
return callback(err, result)
})
}
export function releaseLock(...args) {
_mocks.releaseLock(...args)
}
export function runWithLock(key, runner, callback) {
if (callback == null) {
callback = function () {}
}
return getLock(key, function (error, lockValue) {
if (error != null) {
return callback(OError.tag(error))
}
const lock = new Lock(key, lockValue)
return runner(lock.extend.bind(lock), (error1, ...args) =>
lock.release(function (error2) {
error = error1 || error2
if (error != null) {
return callback(OError.tag(error), ...Array.from(args))
}
return callback(null, ...Array.from(args))
})
)
})
}
export function healthCheck(callback) {
const action = (extendLock, releaseLock) => releaseLock()
return runWithLock(
`HistoryLock:HealthCheck:host=${HOST}:pid=${PID}:random=${RND}`,
action,
callback
)
}
export function close(callback) {
rclient.quit()
return rclient.once('end', callback)
}
class Lock {
constructor(key, value) {
this.key = key
this.value = value
this.slowExecutionError = new OError('slow execution during lock')
this.lockTakenAt = Date.now()
this.timer = new metrics.Timer('lock.project')
}
extend(callback) {
const lockLength = Date.now() - this.lockTakenAt
if (lockLength < MIN_LOCK_EXTENSION_INTERVAL) {
return async.setImmediate(callback)
}
return extendLock(this.key, this.value, error => {
if (error != null) {
return callback(OError.tag(error))
}
this.lockTakenAt = Date.now()
return callback()
})
}
release(callback) {
// The lock can expire in redis but the process carry on. This setTimout call
// is designed to log if this happens.
const lockLength = Date.now() - this.lockTakenAt
if (lockLength > LOCK_TTL * 1000) {
metrics.inc('lock.project.exceeded_lock_timeout')
logger.debug('exceeded lock timeout', {
key: this.key,
slowExecutionError: this.slowExecutionError,
})
}
return releaseLock(this.key, this.value, error => {
this.timer.done()
if (error != null) {
return callback(OError.tag(error))
}
return callback()
})
}
}
/**
* Promisified version of runWithLock.
*
* @param {string} key
* @param {(extendLock: Function) => Promise<any>} runner
*/
async function runWithLockPromises(key, runner) {
const runnerCb = (extendLock, callback) => {
const extendLockPromises = promisify(extendLock)
runner(extendLockPromises)
.then(result => {
callback(null, result)
})
.catch(err => {
callback(err)
})
}
return await new Promise((resolve, reject) => {
runWithLock(key, runnerCb, (err, result) => {
if (err) {
reject(err)
} else {
resolve(result)
}
})
})
}
export const promises = {
tryLock: promisify(tryLock),
extendLock: promisify(extendLock),
getLock: promisify(getLock),
checkLock: promisify(checkLock),
releaseLock: promisify(releaseLock),
runWithLock: runWithLockPromises,
}

View File

@@ -0,0 +1,15 @@
// @ts-check
import { prom } from '@overleaf/metrics'
export const historyFlushDurationSeconds = new prom.Histogram({
name: 'history_flush_duration_seconds',
help: 'Duration of a history flush in seconds',
buckets: [0.05, 0.1, 0.2, 0.3, 0.5, 1, 2, 5, 10],
})
export const historyFlushQueueSize = new prom.Histogram({
name: 'history_flush_queue_size',
help: 'Size of the queue during history flushes',
buckets: prom.exponentialBuckets(1, 2, 10),
})

View File

@@ -0,0 +1,20 @@
export function compressOperations(operations) {
if (!operations.length) return []
const newOperations = []
let currentOperation = operations[0]
for (let operationId = 1; operationId < operations.length; operationId++) {
const nextOperation = operations[operationId]
if (currentOperation.canBeComposedWith(nextOperation)) {
currentOperation = currentOperation.compose(nextOperation)
} else {
// currentOperation and nextOperation cannot be composed. Push the
// currentOperation and start over with nextOperation.
newOperations.push(currentOperation)
currentOperation = nextOperation
}
}
newOperations.push(currentOperation)
return newOperations
}

View File

@@ -0,0 +1,80 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS206: Consider reworking classes to avoid initClass
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import Settings from '@overleaf/settings'
import logger from '@overleaf/logger'
import metrics from '@overleaf/metrics'
const LOG_CUTOFF_TIME = 1000
const deltaMs = function (ta, tb) {
const nanoSeconds = (ta[0] - tb[0]) * 1e9 + (ta[1] - tb[1])
const milliSeconds = Math.floor(nanoSeconds * 1e-6)
return milliSeconds
}
export class Profiler {
constructor(name, args) {
this.name = name
this.args = args
this.t0 = this.t = process.hrtime()
this.start = new Date()
this.updateTimes = []
}
log(label) {
const t1 = process.hrtime()
const dtMilliSec = deltaMs(t1, this.t)
this.t = t1
this.updateTimes.push([label, dtMilliSec]) // timings in ms
return this // make it chainable
}
end(message) {
const totalTime = deltaMs(this.t, this.t0)
// record the update times in metrics
for (const update of Array.from(this.updateTimes)) {
metrics.timing(`profile.${this.name}.${update[0]}`, update[1])
}
if (totalTime > LOG_CUTOFF_TIME) {
// log anything greater than cutoff
const args = {}
for (const k in this.args) {
const v = this.args[k]
args[k] = v
}
args.updateTimes = this.updateTimes
args.start = this.start
args.end = new Date()
logger.debug(args, this.name)
}
return totalTime
}
getTimeDelta() {
const lastIdx = this.updateTimes.length - 1
if (lastIdx >= 0) {
return this.updateTimes[lastIdx][1]
} else {
return 0
}
}
wrap(label, fn) {
// create a wrapped function which calls profile.log(label) before continuing execution
const newFn = (...args) => {
this.log(label)
return fn(...Array.from(args || []))
}
return newFn
}
}

View File

@@ -0,0 +1,445 @@
import { callbackify, promisify } from 'node:util'
import { setTimeout } from 'node:timers/promises'
import logger from '@overleaf/logger'
import Settings from '@overleaf/settings'
import redis from '@overleaf/redis-wrapper'
import metrics from '@overleaf/metrics'
import OError from '@overleaf/o-error'
/**
* Maximum size taken from the redis queue, to prevent project history
* consuming unbounded amounts of memory
*/
export const RAW_UPDATE_SIZE_THRESHOLD = 4 * 1024 * 1024
/**
* Batch size when reading updates from Redis
*/
export const RAW_UPDATES_BATCH_SIZE = 50
/**
* Maximum length of ops (insertion and deletions) to process in a single
* iteration
*/
export const MAX_UPDATE_OP_LENGTH = 1024
/**
* Warn if we exceed this raw update size, the final compressed updates we
* send could be smaller than this
*/
const WARN_RAW_UPDATE_SIZE = 1024 * 1024
/**
* Maximum number of new docs to process in a single iteration
*/
export const MAX_NEW_DOC_CONTENT_COUNT = 32
const CACHE_TTL_IN_SECONDS = 3600
const Keys = Settings.redis.project_history.key_schema
const rclient = redis.createClient(Settings.redis.project_history)
async function countUnprocessedUpdates(projectId) {
const key = Keys.projectHistoryOps({ project_id: projectId })
const updates = await rclient.llen(key)
return updates
}
async function* getRawUpdates(projectId) {
const key = Keys.projectHistoryOps({ project_id: projectId })
let start = 0
while (true) {
const stop = start + RAW_UPDATES_BATCH_SIZE - 1
const updates = await rclient.lrange(key, start, stop)
for (const update of updates) {
yield update
}
if (updates.length < RAW_UPDATES_BATCH_SIZE) {
return
}
start += RAW_UPDATES_BATCH_SIZE
}
}
async function getRawUpdatesBatch(projectId, batchSize) {
const rawUpdates = []
let totalRawUpdatesSize = 0
let hasMore = false
for await (const rawUpdate of getRawUpdates(projectId)) {
totalRawUpdatesSize += rawUpdate.length
if (
rawUpdates.length > 0 &&
totalRawUpdatesSize > RAW_UPDATE_SIZE_THRESHOLD
) {
hasMore = true
break
}
rawUpdates.push(rawUpdate)
if (rawUpdates.length >= batchSize) {
hasMore = true
break
}
}
metrics.timing('redis.incoming.bytes', totalRawUpdatesSize, 1)
if (totalRawUpdatesSize > WARN_RAW_UPDATE_SIZE) {
const rawUpdateSizes = rawUpdates.map(rawUpdate => rawUpdate.length)
logger.warn(
{
projectId,
totalRawUpdatesSize,
rawUpdateSizes,
},
'large raw update size'
)
}
return { rawUpdates, hasMore }
}
export function parseDocUpdates(jsonUpdates) {
return jsonUpdates.map(update => JSON.parse(update))
}
async function getUpdatesInBatches(projectId, batchSize, runner) {
let moreBatches = true
while (moreBatches) {
const redisBatch = await getRawUpdatesBatch(projectId, batchSize)
if (redisBatch.rawUpdates.length === 0) {
break
}
moreBatches = redisBatch.hasMore
const rawUpdates = []
const updates = []
let totalOpLength = 0
let totalDocContentCount = 0
for (const rawUpdate of redisBatch.rawUpdates) {
let update
try {
update = JSON.parse(rawUpdate)
} catch (error) {
throw OError.tag(error, 'failed to parse update', {
projectId,
update,
})
}
totalOpLength += update?.op?.length || 1
if (update.resyncDocContent) {
totalDocContentCount += 1
}
if (
updates.length > 0 &&
(totalOpLength > MAX_UPDATE_OP_LENGTH ||
totalDocContentCount > MAX_NEW_DOC_CONTENT_COUNT)
) {
moreBatches = true
break
}
if (update.resyncProjectStructureOnly) {
update._raw = rawUpdate
}
rawUpdates.push(rawUpdate)
updates.push(update)
}
await runner(updates)
await deleteAppliedDocUpdates(projectId, rawUpdates)
if (batchSize === 1) {
// Special case for single stepping, don't process more batches
break
}
}
}
/**
* @param {string} projectId
* @param {ResyncProjectStructureUpdate} update
* @return {Promise<void>}
*/
async function deleteAppliedDocUpdate(projectId, update) {
const raw = update._raw
// Delete the first occurrence of the update with LREM KEY COUNT
// VALUE by setting COUNT to 1 which 'removes COUNT elements equal to
// value moving from head to tail.'
//
// If COUNT is 0 the entire list would be searched which would block
// redis since it would be an O(N) operation where N is the length of
// the queue, in a multi of the batch size.
metrics.summary('redis.projectHistoryOps', raw.length, {
status: 'lrem',
})
await rclient.lrem(Keys.projectHistoryOps({ project_id: projectId }), 1, raw)
}
async function deleteAppliedDocUpdates(projectId, updates) {
const multi = rclient.multi()
// Delete all the updates which have been applied (exact match)
for (const update of updates) {
// Delete the first occurrence of the update with LREM KEY COUNT
// VALUE by setting COUNT to 1 which 'removes COUNT elements equal to
// value moving from head to tail.'
//
// If COUNT is 0 the entire list would be searched which would block
// redis since it would be an O(N) operation where N is the length of
// the queue, in a multi of the batch size.
metrics.summary('redis.projectHistoryOps', update.length, {
status: 'lrem',
})
multi.lrem(Keys.projectHistoryOps({ project_id: projectId }), 1, update)
}
if (updates.length > 0) {
multi.del(Keys.projectHistoryFirstOpTimestamp({ project_id: projectId }))
}
await multi.exec()
}
/**
* Deletes the entire queue - use with caution
*/
async function destroyDocUpdatesQueue(projectId) {
await rclient.del(
Keys.projectHistoryOps({ project_id: projectId }),
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
)
}
/**
* Iterate over keys asynchronously using redis scan (non-blocking)
*
* handle all the cluster nodes or single redis server
*/
async function _getKeys(pattern, limit) {
const nodes = rclient.nodes?.('master') || [rclient]
const keysByNode = []
for (const node of nodes) {
const keys = await _getKeysFromNode(node, pattern, limit)
keysByNode.push(keys)
}
return [].concat(...keysByNode)
}
async function _getKeysFromNode(node, pattern, limit) {
let cursor = 0 // redis iterator
const keySet = new Set() // avoid duplicate results
const batchSize = limit != null ? Math.min(limit, 1000) : 1000
// scan over all keys looking for pattern
while (true) {
const reply = await node.scan(cursor, 'MATCH', pattern, 'COUNT', batchSize)
const [newCursor, keys] = reply
cursor = newCursor
for (const key of keys) {
keySet.add(key)
}
const noResults = cursor === '0' // redis returns string results not numeric
const limitReached = limit != null && keySet.size >= limit
if (noResults || limitReached) {
return Array.from(keySet)
}
// avoid hitting redis too hard
await setTimeout(10)
}
}
/**
* Extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b
* or DocsWithHistoryOps:{57fd0b1f53a8396d22b2c24b} (for redis cluster)
*/
function _extractIds(keyList) {
return keyList.map(key => {
const m = key.match(/:\{?([0-9a-f]{24})\}?/) // extract object id
return m[1]
})
}
async function getProjectIdsWithHistoryOps(limit) {
const projectKeys = await _getKeys(
Keys.projectHistoryOps({ project_id: '*' }),
limit
)
const projectIds = _extractIds(projectKeys)
return projectIds
}
async function getProjectIdsWithHistoryOpsCount() {
const projectIds = await getProjectIdsWithHistoryOps()
const queuedProjectsCount = projectIds.length
metrics.globalGauge('queued-projects', queuedProjectsCount)
return queuedProjectsCount
}
async function setFirstOpTimestamp(projectId) {
const key = Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
// store current time as an integer (string)
await rclient.setnx(key, Date.now())
}
async function getFirstOpTimestamp(projectId) {
const key = Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
const result = await rclient.get(key)
// convert stored time back to a numeric timestamp
const timestamp = parseInt(result, 10)
// check for invalid timestamp
if (isNaN(timestamp)) {
return null
}
// convert numeric timestamp to a date object
const firstOpTimestamp = new Date(timestamp)
return firstOpTimestamp
}
async function getFirstOpTimestamps(projectIds) {
const keys = projectIds.map(projectId =>
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
)
const results = await rclient.mget(keys)
const timestamps = results.map(result => {
// convert stored time back to a numeric timestamp
const timestamp = parseInt(result, 10)
// check for invalid timestamp
if (isNaN(timestamp)) {
return null
}
// convert numeric timestamp to a date object
return new Date(timestamp)
})
return timestamps
}
async function clearFirstOpTimestamp(projectId) {
const key = Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
await rclient.del(key)
}
async function getProjectIdsWithFirstOpTimestamps(limit) {
const projectKeys = await _getKeys(
Keys.projectHistoryFirstOpTimestamp({ project_id: '*' }),
limit
)
const projectIds = _extractIds(projectKeys)
return projectIds
}
async function clearDanglingFirstOpTimestamp(projectId) {
const count = await rclient.exists(
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId }),
Keys.projectHistoryOps({ project_id: projectId })
)
if (count === 2 || count === 0) {
// both (or neither) keys are present, so don't delete the timestamp
return 0
}
// only one key is present, which makes this a dangling record,
// so delete the timestamp
const cleared = await rclient.del(
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
)
return cleared
}
async function getCachedHistoryId(projectId) {
const key = Keys.projectHistoryCachedHistoryId({ project_id: projectId })
const historyId = await rclient.get(key)
return historyId
}
async function setCachedHistoryId(projectId, historyId) {
const key = Keys.projectHistoryCachedHistoryId({ project_id: projectId })
await rclient.setex(key, CACHE_TTL_IN_SECONDS, historyId)
}
async function clearCachedHistoryId(projectId) {
const key = Keys.projectHistoryCachedHistoryId({ project_id: projectId })
await rclient.del(key)
}
// EXPORTS
const countUnprocessedUpdatesCb = callbackify(countUnprocessedUpdates)
const getRawUpdatesBatchCb = callbackify(getRawUpdatesBatch)
const deleteAppliedDocUpdatesCb = callbackify(deleteAppliedDocUpdates)
const destroyDocUpdatesQueueCb = callbackify(destroyDocUpdatesQueue)
const getProjectIdsWithHistoryOpsCb = callbackify(getProjectIdsWithHistoryOps)
const getProjectIdsWithHistoryOpsCountCb = callbackify(
getProjectIdsWithHistoryOpsCount
)
const setFirstOpTimestampCb = callbackify(setFirstOpTimestamp)
const getFirstOpTimestampCb = callbackify(getFirstOpTimestamp)
const getFirstOpTimestampsCb = callbackify(getFirstOpTimestamps)
const clearFirstOpTimestampCb = callbackify(clearFirstOpTimestamp)
const getProjectIdsWithFirstOpTimestampsCb = callbackify(
getProjectIdsWithFirstOpTimestamps
)
const clearDanglingFirstOpTimestampCb = callbackify(
clearDanglingFirstOpTimestamp
)
const getCachedHistoryIdCb = callbackify(getCachedHistoryId)
const setCachedHistoryIdCb = callbackify(setCachedHistoryId)
const clearCachedHistoryIdCb = callbackify(clearCachedHistoryId)
const getUpdatesInBatchesCb = function (
projectId,
batchSize,
runner,
callback
) {
const runnerPromises = promisify(runner)
getUpdatesInBatches(projectId, batchSize, runnerPromises)
.then(result => {
callback(null, result)
})
.catch(err => {
callback(err)
})
}
export {
countUnprocessedUpdatesCb as countUnprocessedUpdates,
getRawUpdatesBatchCb as getRawUpdatesBatch,
deleteAppliedDocUpdatesCb as deleteAppliedDocUpdates,
destroyDocUpdatesQueueCb as destroyDocUpdatesQueue,
getUpdatesInBatchesCb as getUpdatesInBatches,
getProjectIdsWithHistoryOpsCb as getProjectIdsWithHistoryOps,
getProjectIdsWithHistoryOpsCountCb as getProjectIdsWithHistoryOpsCount,
setFirstOpTimestampCb as setFirstOpTimestamp,
getFirstOpTimestampCb as getFirstOpTimestamp,
getFirstOpTimestampsCb as getFirstOpTimestamps,
clearFirstOpTimestampCb as clearFirstOpTimestamp,
getProjectIdsWithFirstOpTimestampsCb as getProjectIdsWithFirstOpTimestamps,
clearDanglingFirstOpTimestampCb as clearDanglingFirstOpTimestamp,
getCachedHistoryIdCb as getCachedHistoryId,
setCachedHistoryIdCb as setCachedHistoryId,
clearCachedHistoryIdCb as clearCachedHistoryId,
}
export const promises = {
countUnprocessedUpdates,
getRawUpdatesBatch,
deleteAppliedDocUpdates,
deleteAppliedDocUpdate,
destroyDocUpdatesQueue,
getUpdatesInBatches,
getProjectIdsWithHistoryOps,
getProjectIdsWithHistoryOpsCount,
setFirstOpTimestamp,
getFirstOpTimestamp,
getFirstOpTimestamps,
clearFirstOpTimestamp,
getProjectIdsWithFirstOpTimestamps,
clearDanglingFirstOpTimestamp,
getCachedHistoryId,
setCachedHistoryId,
clearCachedHistoryId,
}

View File

@@ -0,0 +1,194 @@
import _ from 'lodash'
import { promisify, callbackify } from 'node:util'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as SyncManager from './SyncManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as RedisManager from './RedisManager.js'
import * as ErrorRecorder from './ErrorRecorder.js'
const sleep = promisify(setTimeout)
const TEMPORARY_FAILURES = [
'Error: ENOSPC: no space left on device, write',
'Error: ESOCKETTIMEDOUT',
'Error: failed to extend lock',
'Error: tried to release timed out lock',
'Error: Timeout',
]
const HARD_FAILURES = [
'Error: history store a non-success status code: 422',
'OError: history store a non-success status code: 422',
'OpsOutOfOrderError: project structure version out of order',
'OpsOutOfOrderError: project structure version out of order on incoming updates',
'OpsOutOfOrderError: doc version out of order',
'OpsOutOfOrderError: doc version out of order on incoming updates',
]
const MAX_RESYNC_ATTEMPTS = 2
const MAX_SOFT_RESYNC_ATTEMPTS = 1
export const promises = {}
promises.retryFailures = async (options = {}) => {
const { failureType, timeout, limit } = options
if (failureType === 'soft') {
const batch = await getFailureBatch(softErrorSelector, limit)
const result = await retryFailureBatch(batch, timeout, async failure => {
await UpdatesProcessor.promises.processUpdatesForProject(
failure.project_id
)
})
return result
} else if (failureType === 'hard') {
const batch = await getFailureBatch(hardErrorSelector, limit)
const result = await retryFailureBatch(batch, timeout, async failure => {
await resyncProject(failure.project_id, {
hard: failureRequiresHardResync(failure),
})
})
return result
}
}
export const retryFailures = callbackify(promises.retryFailures)
function softErrorSelector(failure) {
return (
(isTemporaryFailure(failure) && !isRepeatedFailure(failure)) ||
(isFirstFailure(failure) && !isHardFailure(failure))
)
}
function hardErrorSelector(failure) {
return (
(isHardFailure(failure) || isRepeatedFailure(failure)) &&
!isStuckFailure(failure)
)
}
function isTemporaryFailure(failure) {
return TEMPORARY_FAILURES.includes(failure.error)
}
export function isHardFailure(failure) {
return HARD_FAILURES.includes(failure.error)
}
export function isFirstFailure(failure) {
return failure.attempts <= 1
}
function isRepeatedFailure(failure) {
return failure.attempts > 3
}
function isStuckFailure(failure) {
return (
failure.resyncAttempts != null &&
failure.resyncAttempts >= MAX_RESYNC_ATTEMPTS
)
}
function failureRequiresHardResync(failure) {
return (
failure.resyncAttempts != null &&
failure.resyncAttempts >= MAX_SOFT_RESYNC_ATTEMPTS
)
}
async function getFailureBatch(selector, limit) {
let failures = await ErrorRecorder.promises.getFailedProjects()
failures = failures.filter(selector)
// randomise order
failures = _.shuffle(failures)
// put a limit on the number to retry
const projectsToRetryCount = failures.length
if (limit && projectsToRetryCount > limit) {
failures = failures.slice(0, limit)
}
logger.debug({ projectsToRetryCount, limit }, 'retrying failed projects')
return failures
}
async function retryFailureBatch(failures, timeout, retryHandler) {
const startTime = new Date()
// keep track of successes and failures
const failed = []
const succeeded = []
for (const failure of failures) {
const projectId = failure.project_id
const timeTaken = new Date() - startTime
if (timeout && timeTaken > timeout) {
// finish early due to timeout
logger.debug('background retries timed out')
break
}
logger.debug(
{ projectId, timeTaken },
'retrying failed project in background'
)
try {
await retryHandler(failure)
succeeded.push(projectId)
} catch (err) {
failed.push(projectId)
}
}
return { succeeded, failed }
}
async function resyncProject(projectId, options = {}) {
const { hard = false } = options
try {
if (!/^[0-9a-f]{24}$/.test(projectId)) {
logger.debug({ projectId }, 'clearing bad project id')
await ErrorRecorder.promises.clearError(projectId)
return
}
await checkProjectHasHistoryId(projectId)
if (hard) {
await SyncManager.promises.startHardResync(projectId)
} else {
await SyncManager.promises.startResync(projectId)
}
await waitUntilRedisQueueIsEmpty(projectId)
await checkFailureRecordWasRemoved(projectId)
} catch (err) {
throw new OError({
message: 'failed to resync project',
info: { projectId, hard },
}).withCause(err)
}
}
async function checkProjectHasHistoryId(projectId) {
const historyId = await WebApiManager.promises.getHistoryId(projectId)
if (historyId == null) {
throw new OError('no history id')
}
}
async function waitUntilRedisQueueIsEmpty(projectId) {
for (let attempts = 0; attempts < 30; attempts++) {
const updatesCount =
await RedisManager.promises.countUnprocessedUpdates(projectId)
if (updatesCount === 0) {
return
}
await sleep(1000)
}
throw new OError('queue not empty')
}
async function checkFailureRecordWasRemoved(projectId) {
const failureRecord = await ErrorRecorder.promises.getFailureRecord(projectId)
if (failureRecord) {
throw new OError('failure record still exists')
}
}

View File

@@ -0,0 +1,250 @@
import OError from '@overleaf/o-error'
import * as HttpController from './HttpController.js'
import { Joi, validate } from './Validation.js'
export function initialize(app) {
app.use(
validate({
params: Joi.object({
project_id: Joi.string().regex(/^[0-9a-f]{24}$/),
user_id: Joi.string().regex(/^[0-9a-f]{24}$/),
label_id: Joi.string().regex(/^[0-9a-f]{24}$/),
version: Joi.number().integer(),
}),
})
)
// use an extended timeout on all endpoints, to allow for long requests to history-v1
app.use(longerTimeout)
app.post('/project', HttpController.initializeProject)
app.delete('/project/:project_id', HttpController.deleteProject)
app.get('/project/:project_id/snapshot', HttpController.getLatestSnapshot)
app.get(
'/project/:project_id/diff',
validate({
query: {
pathname: Joi.string().required(),
from: Joi.number().integer().required(),
to: Joi.number().integer().required(),
},
}),
HttpController.getDiff
)
app.get(
'/project/:project_id/filetree/diff',
validate({
query: {
from: Joi.number().integer().required(),
to: Joi.number().integer().required(),
},
}),
HttpController.getFileTreeDiff
)
app.get(
'/project/:project_id/updates',
validate({
query: {
before: Joi.number().integer(),
min_count: Joi.number().integer(),
},
}),
HttpController.getUpdates
)
app.get(
'/project/:project_id/changes-in-chunk',
validate({
query: {
since: Joi.number().integer().min(0),
},
}),
HttpController.getChangesInChunkSince
)
app.get('/project/:project_id/version', HttpController.latestVersion)
app.post(
'/project/:project_id/flush',
validate({
query: {
debug: Joi.boolean().default(false),
bisect: Joi.boolean().default(false),
},
}),
HttpController.flushProject
)
app.post(
'/project/:project_id/resync',
validate({
query: {
force: Joi.boolean().default(false),
},
body: {
force: Joi.boolean().default(false),
origin: Joi.object({
kind: Joi.string().required(),
}),
historyRangesMigration: Joi.string()
.optional()
.valid('forwards', 'backwards'),
},
}),
HttpController.resyncProject
)
app.get(
'/project/:project_id/dump',
validate({
query: {
count: Joi.number().integer(),
},
}),
HttpController.dumpProject
)
app.get('/project/:project_id/labels', HttpController.getLabels)
app.post(
'/project/:project_id/labels',
validate({
body: {
version: Joi.number().integer().required(),
comment: Joi.string().required(),
created_at: Joi.string(),
validate_exists: Joi.boolean().default(true),
user_id: Joi.string().allow(null),
},
}),
HttpController.createLabel
)
app.delete(
'/project/:project_id/user/:user_id/labels/:label_id',
validate({
params: Joi.object({
project_id: Joi.string().regex(/^[0-9a-f]{24}$/),
user_id: Joi.string().regex(/^[0-9a-f]{24}$/),
label_id: Joi.string().regex(/^[0-9a-f]{24}$/),
}),
}),
HttpController.deleteLabelForUser
)
app.delete(
'/project/:project_id/labels/:label_id',
validate({
params: Joi.object({
project_id: Joi.string().regex(/^[0-9a-f]{24}$/),
label_id: Joi.string().regex(/^[0-9a-f]{24}$/),
}),
}),
HttpController.deleteLabel
)
app.post(
'/user/:from_user/labels/transfer/:to_user',
HttpController.transferLabels
)
app.get(
'/project/:project_id/version/:version/:pathname',
HttpController.getFileSnapshot
)
app.get(
'/project/:project_id/ranges/version/:version/:pathname',
HttpController.getRangesSnapshot
)
app.get(
'/project/:project_id/metadata/version/:version/:pathname',
HttpController.getFileMetadataSnapshot
)
app.get(
'/project/:project_id/version/:version',
HttpController.getProjectSnapshot
)
app.get(
'/project/:project_id/paths/version/:version',
HttpController.getPathsAtVersion
)
app.post(
'/project/:project_id/force',
validate({
query: {
clear: Joi.boolean().default(false),
},
}),
HttpController.forceDebugProject
)
app.get('/project/:history_id/blob/:hash', HttpController.getProjectBlob)
app.get('/status/failures', HttpController.getFailures)
app.get('/status/queue', HttpController.getQueueCounts)
app.post(
'/retry/failures',
validate({
query: {
failureType: Joi.string().valid('soft', 'hard'),
// bail out after this time limit
timeout: Joi.number().integer().default(300),
// maximum number of projects to check
limit: Joi.number().integer().default(100),
callbackUrl: Joi.string(),
},
}),
HttpController.retryFailures
)
app.post(
'/flush/old',
validate({
query: {
// flush projects with queued ops older than this
maxAge: Joi.number()
.integer()
.default(6 * 3600),
// pause this amount of time between checking queues
queueDelay: Joi.number().integer().default(100),
// maximum number of queues to check
limit: Joi.number().integer().default(1000),
// maximum amount of time allowed
timeout: Joi.number()
.integer()
.default(60 * 1000),
// whether to run in the background
background: Joi.boolean().falsy('0').truthy('1').default(false),
},
}),
HttpController.flushOld
)
app.get('/status', (req, res, next) => res.send('project-history is up'))
app.get('/oops', function (req, res, next) {
throw new OError('dummy test error')
})
app.get('/check_lock', HttpController.checkLock)
app.get('/health_check', HttpController.healthCheck)
}
function longerTimeout(req, res, next) {
res.setTimeout(6 * 60 * 1000)
next()
}

View File

@@ -0,0 +1,426 @@
// @ts-check
import { callbackify } from 'node:util'
import Core from 'overleaf-editor-core'
import { Readable as StringStream } from 'node:stream'
import OError from '@overleaf/o-error'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as Errors from './Errors.js'
import _ from 'lodash'
/**
* @import { Snapshot } from 'overleaf-editor-core'
* @import { RangesSnapshot } from './types'
*/
StringStream.prototype._read = function () {}
const MAX_REQUESTS = 4 // maximum number of parallel requests to v1 history service
/**
*
* @param {string} projectId
* @param {number} version
* @param {string} pathname
*/
async function getFileSnapshotStream(projectId, version, pathname) {
const snapshot = await _getSnapshotAtVersion(projectId, version)
const file = snapshot.getFile(pathname)
if (file == null) {
throw new Errors.NotFoundError(`${pathname} not found`, {
projectId,
version,
pathname,
})
}
const historyId = await WebApiManager.promises.getHistoryId(projectId)
if (file.isEditable()) {
await file.load('eager', HistoryStoreManager.getBlobStore(historyId))
const stream = new StringStream()
stream.push(file.getContent({ filterTrackedDeletes: true }))
stream.push(null)
return stream
} else {
return await HistoryStoreManager.promises.getProjectBlobStream(
historyId,
file.getHash()
)
}
}
/**
* Constructs a snapshot of the ranges in a document-updater compatible format.
* Positions will be relative to a document where tracked deletes have been
* removed from the string. This also means that if a tracked delete overlaps
* a comment range, the comment range will be truncated.
*
* @param {string} projectId
* @param {number} version
* @param {string} pathname
* @returns {Promise<RangesSnapshot>}
*/
async function getRangesSnapshot(projectId, version, pathname) {
const snapshot = await _getSnapshotAtVersion(projectId, version)
const file = snapshot.getFile(pathname)
if (!file) {
throw new Errors.NotFoundError(`${pathname} not found`, {
projectId,
version,
pathname,
})
}
if (!file.isEditable()) {
// A binary file has no tracked changes or comments
return {
changes: [],
comments: [],
}
}
const historyId = await WebApiManager.promises.getHistoryId(projectId)
await file.load('eager', HistoryStoreManager.getBlobStore(historyId))
const content = file.getContent()
if (content == null) {
throw new Error('Unable to read file contents')
}
const trackedChanges = file.getTrackedChanges().asSorted()
const comments = file.getComments().toArray()
const docUpdaterCompatibleTrackedChanges = []
let trackedDeletionOffset = 0
for (const trackedChange of trackedChanges) {
const isTrackedDeletion = trackedChange.tracking.type === 'delete'
const trackedChangeContent = content.slice(
trackedChange.range.start,
trackedChange.range.end
)
const tcContent = isTrackedDeletion
? { d: trackedChangeContent }
: { i: trackedChangeContent }
docUpdaterCompatibleTrackedChanges.push({
op: {
p: trackedChange.range.start - trackedDeletionOffset,
...tcContent,
},
metadata: {
ts: trackedChange.tracking.ts.toISOString(),
user_id: trackedChange.tracking.userId,
},
})
if (isTrackedDeletion) {
trackedDeletionOffset += trackedChange.range.length
}
}
// Comments are shifted left by the length of any previous tracked deletions.
// If they overlap with a tracked deletion, they are truncated.
//
// Example:
// { } comment
// [ ] tracked deletion
// the quic[k {b]rown [fox] jum[ps} ove]r the lazy dog
// => rown jum
// starting at position 8
const trackedDeletions = trackedChanges.filter(
tc => tc.tracking.type === 'delete'
)
const docUpdaterCompatibleComments = []
for (const comment of comments) {
let trackedDeletionIndex = 0
if (comment.ranges.length === 0) {
// Translate detached comments into zero length comments at position 0
docUpdaterCompatibleComments.push({
op: {
p: 0,
c: '',
t: comment.id,
resolved: comment.resolved,
},
})
continue
}
// Consider a multiple range comment as a single comment that joins all its
// ranges
const commentStart = comment.ranges[0].start
const commentEnd = comment.ranges[comment.ranges.length - 1].end
let commentContent = ''
// Docupdater position
let position = commentStart
while (trackedDeletions[trackedDeletionIndex]?.range.end <= commentStart) {
// Skip over tracked deletions that are before the current comment range
position -= trackedDeletions[trackedDeletionIndex].range.length
trackedDeletionIndex++
}
if (trackedDeletions[trackedDeletionIndex]?.range.start < commentStart) {
// There's overlap with a tracked deletion, move the position left and
// truncate the overlap
position -=
commentStart - trackedDeletions[trackedDeletionIndex].range.start
}
// Cursor in the history content
let cursor = commentStart
while (cursor < commentEnd) {
const trackedDeletion = trackedDeletions[trackedDeletionIndex]
if (!trackedDeletion || trackedDeletion.range.start >= commentEnd) {
// We've run out of relevant tracked changes
commentContent += content.slice(cursor, commentEnd)
break
}
if (trackedDeletion.range.start > cursor) {
// There's a gap between the current cursor and the tracked deletion
commentContent += content.slice(cursor, trackedDeletion.range.start)
}
if (trackedDeletion.range.end <= commentEnd) {
// Skip to the end of the tracked delete
cursor = trackedDeletion.range.end
trackedDeletionIndex++
} else {
// We're done with that comment
break
}
}
docUpdaterCompatibleComments.push({
op: {
p: position,
c: commentContent,
t: comment.id,
resolved: comment.resolved,
},
id: comment.id,
})
}
return {
changes: docUpdaterCompatibleTrackedChanges,
comments: docUpdaterCompatibleComments,
}
}
/**
* Gets the file metadata at a specific version.
*
* @param {string} projectId
* @param {number} version
* @param {string} pathname
* @returns {Promise<{metadata: any}>}
*/
async function getFileMetadataSnapshot(projectId, version, pathname) {
const snapshot = await _getSnapshotAtVersion(projectId, version)
const file = snapshot.getFile(pathname)
if (!file) {
throw new Errors.NotFoundError(`${pathname} not found`, {
projectId,
version,
pathname,
})
}
const rawMetadata = file.getMetadata()
const metadata = _.isEmpty(rawMetadata) ? undefined : rawMetadata
return { metadata }
}
// Returns project snapshot containing the document content for files with
// text operations in the relevant chunk, and hashes for unmodified/binary
// files. Used by git bridge to get the state of the project.
async function getProjectSnapshot(projectId, version) {
const snapshot = await _getSnapshotAtVersion(projectId, version)
const historyId = await WebApiManager.promises.getHistoryId(projectId)
await _loadFilesLimit(
snapshot,
'eager',
HistoryStoreManager.getBlobStore(historyId)
)
return {
projectId,
files: snapshot.getFileMap().map(file => {
if (!file) {
return null
}
const content = file.getContent({
filterTrackedDeletes: true,
})
if (content === null) {
return { data: { hash: file.getHash() } }
}
return { data: { content } }
}),
}
}
async function getPathsAtVersion(projectId, version) {
const snapshot = await _getSnapshotAtVersion(projectId, version)
return {
paths: snapshot.getFilePathnames(),
}
}
/**
*
* @param {string} projectId
* @param {number} version
*/
async function _getSnapshotAtVersion(projectId, version) {
const historyId = await WebApiManager.promises.getHistoryId(projectId)
const data = await HistoryStoreManager.promises.getChunkAtVersion(
projectId,
historyId,
version
)
const chunk = Core.Chunk.fromRaw(data.chunk)
const snapshot = chunk.getSnapshot()
const changes = chunk.getChanges().slice(0, version - chunk.getStartVersion())
snapshot.applyAll(changes)
return snapshot
}
/**
* @param {string} projectId
* @param {string} historyId
* @return {Promise<Record<string, import('overleaf-editor-core').File>>}
*/
async function getLatestSnapshotFiles(projectId, historyId) {
const data = await HistoryStoreManager.promises.getMostRecentChunk(
projectId,
historyId
)
return await getLatestSnapshotFilesForChunk(historyId, data)
}
/**
* @param {string} historyId
* @param {{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}} chunk
* @return {Promise<Record<string, import('overleaf-editor-core').File>>}
*/
async function getLatestSnapshotFilesForChunk(historyId, chunk) {
const { snapshot } = getLatestSnapshotFromChunk(chunk)
const snapshotFiles = await snapshot.loadFiles(
'lazy',
HistoryStoreManager.getBlobStore(historyId)
)
return snapshotFiles
}
/**
* @param {string} projectId
* @param {string} historyId
* @return {Promise<{version: number, snapshot: import('overleaf-editor-core').Snapshot}>}
*/
async function getLatestSnapshot(projectId, historyId) {
const data = await HistoryStoreManager.promises.getMostRecentChunk(
projectId,
historyId
)
return getLatestSnapshotFromChunk(data)
}
/**
* @param {{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}} data
* @return {{version: number, snapshot: import('overleaf-editor-core').Snapshot}}
*/
function getLatestSnapshotFromChunk(data) {
if (data == null || data.chunk == null) {
throw new OError('undefined chunk')
}
// apply all the changes in the chunk to get the current snapshot
const chunk = Core.Chunk.fromRaw(data.chunk)
const snapshot = chunk.getSnapshot()
const changes = chunk.getChanges()
snapshot.applyAll(changes)
return {
snapshot,
version: chunk.getEndVersion(),
}
}
async function getChangesInChunkSince(projectId, historyId, sinceVersion) {
const latestChunk = Core.Chunk.fromRaw(
(
await HistoryStoreManager.promises.getMostRecentChunk(
projectId,
historyId
)
).chunk
)
if (sinceVersion > latestChunk.getEndVersion()) {
throw new Errors.BadRequestError(
'requested version past the end of the history'
)
}
const latestStartVersion = latestChunk.getStartVersion()
let chunk = latestChunk
if (sinceVersion < latestStartVersion) {
chunk = Core.Chunk.fromRaw(
(
await HistoryStoreManager.promises.getChunkAtVersion(
projectId,
historyId,
sinceVersion
)
).chunk
)
}
const changes = chunk
.getChanges()
.slice(sinceVersion - chunk.getStartVersion())
return { latestStartVersion, changes }
}
async function _loadFilesLimit(snapshot, kind, blobStore) {
await snapshot.fileMap.mapAsync(async file => {
// only load changed files or files with tracked changes, others can be
// dereferenced from their blobs (this method is only used by the git
// bridge which understands how to load blobs).
if (!file.isEditable() || (file.getHash() && !file.getRangesHash())) {
return
}
await file.load(kind, blobStore)
}, MAX_REQUESTS)
}
// EXPORTS
const getChangesInChunkSinceCb = callbackify(getChangesInChunkSince)
const getFileSnapshotStreamCb = callbackify(getFileSnapshotStream)
const getProjectSnapshotCb = callbackify(getProjectSnapshot)
const getLatestSnapshotCb = callbackify(getLatestSnapshot)
const getLatestSnapshotFilesCb = callbackify(getLatestSnapshotFiles)
const getLatestSnapshotFilesForChunkCb = callbackify(
getLatestSnapshotFilesForChunk
)
const getRangesSnapshotCb = callbackify(getRangesSnapshot)
const getFileMetadataSnapshotCb = callbackify(getFileMetadataSnapshot)
const getPathsAtVersionCb = callbackify(getPathsAtVersion)
export {
getLatestSnapshotFromChunk,
getChangesInChunkSinceCb as getChangesInChunkSince,
getFileSnapshotStreamCb as getFileSnapshotStream,
getProjectSnapshotCb as getProjectSnapshot,
getFileMetadataSnapshotCb as getFileMetadataSnapshot,
getLatestSnapshotCb as getLatestSnapshot,
getLatestSnapshotFilesCb as getLatestSnapshotFiles,
getLatestSnapshotFilesForChunkCb as getLatestSnapshotFilesForChunk,
getRangesSnapshotCb as getRangesSnapshot,
getPathsAtVersionCb as getPathsAtVersion,
}
export const promises = {
getChangesInChunkSince,
getFileSnapshotStream,
getProjectSnapshot,
getLatestSnapshot,
getLatestSnapshotFiles,
getLatestSnapshotFilesForChunk,
getRangesSnapshot,
getPathsAtVersion,
getFileMetadataSnapshot,
}

View File

@@ -0,0 +1,354 @@
import _ from 'lodash'
import async from 'async'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import * as ChunkTranslator from './ChunkTranslator.js'
import * as HistoryApiManager from './HistoryApiManager.js'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as LabelsManager from './LabelsManager.js'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as WebApiManager from './WebApiManager.js'
const MAX_CHUNK_REQUESTS = 5
const TIME_BETWEEN_DISTINCT_UPDATES = 5 * 60 * 1000 // five minutes
export function getSummarizedProjectUpdates(projectId, options, callback) {
// Some notes on versions:
//
// Versions of the project are like the fenceposts between updates.
// An update applies to a certain version of the project, and gives us the
// next version.
//
// When we ask for updates 'before' a version, this includes the update
// that created the version equal to 'before'.
//
// A chunk in OL has a 'startVersion', which is the version of the project
// before any of the updates in it were applied. This is the same version as
// the last update in the previous chunk would have created.
//
// If we ask the OL history store for the chunk with version that is the end of one
// chunk and the start of another, it will return the older chunk, i.e.
// the chunk with the updates that led up to that version.
//
// So once we read in the updates from a chunk, and want to get the updates from
// the previous chunk, we ask OL for the chunk with the version equal to the
// 'startVersion' of the newer chunk we just read.
let nextVersionToRequest
if (options == null) {
options = {}
}
if (!options.min_count) {
options.min_count = 25
}
if (options.before != null) {
// The version is of the doc, so we want the updates before that version,
// which includes the update that created that version.
nextVersionToRequest = options.before
} else {
// Return the latest updates first if no nextVersionToRequest is set.
nextVersionToRequest = null
}
UpdatesProcessor.processUpdatesForProject(projectId, function (error) {
if (error) {
return callback(OError.tag(error))
}
LabelsManager.getLabels(projectId, function (error, labels) {
if (error) {
return callback(OError.tag(error))
}
const labelsByVersion = {}
for (const label of labels) {
if (labelsByVersion[label.version] == null) {
labelsByVersion[label.version] = []
}
labelsByVersion[label.version].push(label)
}
WebApiManager.getHistoryId(projectId, function (error, historyId) {
if (error) return callback(error)
let chunksRequested = 0
let summarizedUpdates = []
let toV = null
const shouldRequestMoreUpdates = cb => {
return cb(
null,
chunksRequested < MAX_CHUNK_REQUESTS &&
(nextVersionToRequest == null || nextVersionToRequest > 0) &&
summarizedUpdates.length < options.min_count
)
}
const getNextBatchOfUpdates = cb =>
_getProjectUpdates(
projectId,
historyId,
nextVersionToRequest,
function (error, updateSet, startVersion) {
if (error) {
return cb(OError.tag(error))
}
// Updates are returned in time order, but we want to go back in time
updateSet.reverse()
updateSet = discardUnwantedUpdates(updateSet)
;({ summarizedUpdates, toV } = _summarizeUpdates(
updateSet,
labelsByVersion,
summarizedUpdates,
toV
))
nextVersionToRequest = startVersion
chunksRequested += 1
cb()
}
)
function discardUnwantedUpdates(updateSet) {
// We're getting whole chunks from the OL history store, but we might
// only want updates from before a certain version
if (options.before == null) {
return updateSet
} else {
return updateSet.filter(u => u.v < options.before)
}
}
// If the project doesn't have a history then we can bail out here
HistoryApiManager.shouldUseProjectHistory(
projectId,
function (error, shouldUseProjectHistory) {
if (error) {
return callback(OError.tag(error))
}
if (shouldUseProjectHistory) {
async.whilst(
shouldRequestMoreUpdates,
getNextBatchOfUpdates,
function (error) {
if (error) {
return callback(OError.tag(error))
}
callback(
null,
summarizedUpdates,
nextVersionToRequest > 0 ? nextVersionToRequest : undefined
)
}
)
} else {
logger.debug(
{ projectId },
'returning no updates as project does not use history'
)
callback(null, [])
}
}
)
})
})
})
}
function _getProjectUpdates(projectId, historyId, version, callback) {
function getChunk(cb) {
if (version != null) {
HistoryStoreManager.getChunkAtVersion(projectId, historyId, version, cb)
} else {
HistoryStoreManager.getMostRecentChunk(projectId, historyId, cb)
}
}
getChunk(function (error, chunk) {
if (error) {
return callback(OError.tag(error))
}
const oldestVersion = chunk.chunk.startVersion
ChunkTranslator.convertToSummarizedUpdates(
chunk,
function (error, updateSet) {
if (error) {
return callback(OError.tag(error))
}
callback(error, updateSet, oldestVersion)
}
)
})
}
function _summarizeUpdates(updates, labels, existingSummarizedUpdates, toV) {
if (existingSummarizedUpdates == null) {
existingSummarizedUpdates = []
}
const summarizedUpdates = existingSummarizedUpdates.slice()
for (const update of updates) {
if (toV == null) {
// This is the first update we've seen. Initialize toV.
toV = update.v + 1
}
// Skip empty updates (only record their version). Empty updates are
// updates that only contain comment operations. We don't have a UI for
// these yet.
if (isUpdateEmpty(update)) {
continue
}
// The client needs to know the exact version that a delete happened, in order
// to be able to restore. So even when summarizing, retain the version that each
// projectOp happened at.
for (const projectOp of update.project_ops) {
projectOp.atV = update.v
}
const summarizedUpdate = summarizedUpdates[summarizedUpdates.length - 1]
const labelsForVersion = labels[update.v + 1] || []
if (
summarizedUpdate &&
_shouldMergeUpdate(update, summarizedUpdate, labelsForVersion)
) {
_mergeUpdate(update, summarizedUpdate)
} else {
const newUpdate = {
fromV: update.v,
toV,
meta: {
users: update.meta.users,
start_ts: update.meta.start_ts,
end_ts: update.meta.end_ts,
},
labels: labelsForVersion,
pathnames: new Set(update.pathnames),
project_ops: update.project_ops.slice(), // Clone since we'll modify
}
if (update.meta.origin) {
newUpdate.meta.origin = update.meta.origin
}
summarizedUpdates.push(newUpdate)
}
toV = update.v
}
return { summarizedUpdates, toV }
}
/**
* Given an update, the latest summarized update, and the labels that apply to
* the update, figure out if we can merge the update into the summarized
* update.
*/
function _shouldMergeUpdate(update, summarizedUpdate, labels) {
// Split updates on labels
if (labels.length > 0) {
return false
}
// Split updates on origin
if (update.meta.origin) {
if (summarizedUpdate.meta.origin) {
if (update.meta.origin.kind !== summarizedUpdate.meta.origin.kind) {
return false
}
if (update.meta.origin.path !== summarizedUpdate.meta.origin.path) {
return false
}
if (
update.meta.origin.kind === 'file-restore' &&
update.meta.origin.timestamp !== summarizedUpdate.meta.origin.timestamp
) {
return false
}
if (
update.meta.origin.kind === 'project-restore' &&
update.meta.origin.timestamp !== summarizedUpdate.meta.origin.timestamp
) {
return false
}
} else {
return false
}
} else if (summarizedUpdate.meta.origin) {
return false
}
// Split updates if it's been too long since the last update. We're going
// backwards in time through the updates, so the update comes before the summarized update.
if (
summarizedUpdate.meta.end_ts - update.meta.start_ts >=
TIME_BETWEEN_DISTINCT_UPDATES
) {
return false
}
// Do not merge text operations and file operations, except for history resyncs
const updateHasTextOps = update.pathnames.length > 0
const updateHasFileOps = update.project_ops.length > 0
const summarizedUpdateHasTextOps = summarizedUpdate.pathnames.size > 0
const summarizedUpdateHasFileOps = summarizedUpdate.project_ops.length > 0
const isHistoryResync =
update.meta.origin &&
['history-resync', 'history-migration'].includes(update.meta.origin.kind)
if (
!isHistoryResync &&
((updateHasTextOps && summarizedUpdateHasFileOps) ||
(updateHasFileOps && summarizedUpdateHasTextOps))
) {
return false
}
return true
}
/**
* Merge an update into a summarized update.
*
* This mutates the summarized update.
*/
function _mergeUpdate(update, summarizedUpdate) {
// check if the user in this update is already present in the earliest update,
// if not, add them to the users list of the earliest update
summarizedUpdate.meta.users = _.uniqBy(
_.union(summarizedUpdate.meta.users, update.meta.users),
function (user) {
if (user == null) {
return null
}
if (user.id == null) {
return user
}
return user.id
}
)
summarizedUpdate.fromV = Math.min(summarizedUpdate.fromV, update.v)
summarizedUpdate.toV = Math.max(summarizedUpdate.toV, update.v + 1)
summarizedUpdate.meta.start_ts = Math.min(
summarizedUpdate.meta.start_ts,
update.meta.start_ts
)
summarizedUpdate.meta.end_ts = Math.max(
summarizedUpdate.meta.end_ts,
update.meta.end_ts
)
// Add file operations
for (const op of update.project_ops || []) {
summarizedUpdate.project_ops.push(op)
if (op.add) {
// Merging a file creation. Remove any corresponding edit since that's redundant.
summarizedUpdate.pathnames.delete(op.add.pathname)
}
}
// Add edit operations
for (const pathname of update.pathnames || []) {
summarizedUpdate.pathnames.add(pathname)
}
}
function isUpdateEmpty(update) {
return update.project_ops.length === 0 && update.pathnames.length === 0
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,491 @@
// @ts-check
import OError from '@overleaf/o-error'
import DMP from 'diff-match-patch'
/**
* @import { DeleteOp, InsertOp, Op, Update } from './types'
*/
const MAX_TIME_BETWEEN_UPDATES = 60 * 1000 // one minute
const MAX_UPDATE_SIZE = 2 * 1024 * 1024 // 2 MB
const ADDED = 1
const REMOVED = -1
const UNCHANGED = 0
const strInject = (s1, pos, s2) => s1.slice(0, pos) + s2 + s1.slice(pos)
const strRemove = (s1, pos, length) => s1.slice(0, pos) + s1.slice(pos + length)
const dmp = new DMP()
dmp.Diff_Timeout = 0.1 // prevent the diff algorithm from searching too hard for changes in unrelated content
const cloneWithOp = function (update, op) {
// to improve performance, shallow clone the update
// and its meta property (also an object), then
// overwrite the op property directly.
update = Object.assign({}, update)
update.meta = Object.assign({}, update.meta)
update.op = op
return update
}
const mergeUpdatesWithOp = function (firstUpdate, secondUpdate, op) {
// We want to take doc_length and ts from the firstUpdate, v and doc_hash from the second
const update = cloneWithOp(firstUpdate, op)
if (secondUpdate.v != null) {
update.v = secondUpdate.v
}
if (secondUpdate.meta.doc_hash != null) {
update.meta.doc_hash = secondUpdate.meta.doc_hash
} else {
delete update.meta.doc_hash
}
return update
}
/**
* Adjust the given length to account for the given op
*
* The resulting length is the new length of the doc after the op is applied.
*
* @param {number} length
* @param {Op} op
* @param {object} opts
* @param {boolean} [opts.tracked] - whether or not the update is a tracked change
* @returns {number} the adjusted length
*/
function adjustLengthByOp(length, op, opts = {}) {
if ('i' in op && op.i != null) {
if (op.trackedDeleteRejection) {
// Tracked delete rejection: will be translated into a retain
return length
} else {
return length + op.i.length
}
} else if ('d' in op && op.d != null) {
if (opts.tracked) {
// Tracked delete: will be translated into a retain, except where it overlaps tracked inserts.
for (const change of op.trackedChanges ?? []) {
if (change.type === 'insert') {
length -= change.length
}
}
return length
} else {
return length - op.d.length
}
} else if ('r' in op && op.r != null) {
return length
} else if ('c' in op && op.c != null) {
return length
} else {
throw new OError('unexpected op type')
}
}
/**
* Updates come from the doc updater in format
* {
* op: [ { ... op1 ... }, { ... op2 ... } ]
* meta: { ts: ..., user_id: ... }
* }
* but it's easier to work with on op per update, so convert these updates to
* our compressed format
* [{
* op: op1
* meta: { ts: ..., user_id: ... }
* }, {
* op: op2
* meta: { ts: ..., user_id: ... }
* }]
*
* @param {Update[]} updates
* @returns {Update[]} single op updates
*/
export function convertToSingleOpUpdates(updates) {
const splitUpdates = []
for (const update of updates) {
if (!('op' in update)) {
// Not a text op, likely a project strucure op
splitUpdates.push(update)
continue
}
const ops = update.op
let docLength = update.meta.history_doc_length ?? update.meta.doc_length
// Temporary fix for document-updater sending a length of -1 for empty
// documents. This can be removed after all queues have been flushed.
if (docLength === -1) {
docLength = 0
}
const docHash = update.meta.doc_hash
for (const op of ops) {
const splitUpdate = cloneWithOp(update, op)
// Only the last update will keep the doc_hash property
delete splitUpdate.meta.doc_hash
if (docLength != null) {
splitUpdate.meta.doc_length = docLength
docLength = adjustLengthByOp(docLength, op, {
tracked: update.meta.tc != null,
})
delete splitUpdate.meta.history_doc_length
}
splitUpdates.push(splitUpdate)
}
if (docHash != null && splitUpdates.length > 0) {
splitUpdates[splitUpdates.length - 1].meta.doc_hash = docHash
}
}
return splitUpdates
}
export function filterBlankUpdates(updates) {
// Diffing an insert and delete can return blank inserts and deletes
// which the OL history service doesn't have an equivalent for.
//
// NOTE: this relies on the updates only containing either op.i or op.d entries
// but not both, which is the case because diffAsShareJsOps does this
return updates.filter(
update => !(update.op && (update.op.i === '' || update.op.d === ''))
)
}
export function concatUpdatesWithSameVersion(updates) {
const concattedUpdates = []
for (let update of updates) {
if (update.op != null) {
update = cloneWithOp(update, [update.op])
const lastUpdate = concattedUpdates[concattedUpdates.length - 1]
if (
lastUpdate != null &&
lastUpdate.op != null &&
lastUpdate.v === update.v &&
lastUpdate.doc === update.doc &&
lastUpdate.pathname === update.pathname
) {
lastUpdate.op = lastUpdate.op.concat(update.op)
if (update.meta.doc_hash == null) {
delete lastUpdate.meta.doc_hash
} else {
lastUpdate.meta.doc_hash = update.meta.doc_hash
}
} else {
concattedUpdates.push(update)
}
} else {
concattedUpdates.push(update)
}
}
return concattedUpdates
}
export function compressRawUpdates(rawUpdates) {
let updates = convertToSingleOpUpdates(rawUpdates)
updates = compressUpdates(updates)
updates = filterBlankUpdates(updates)
updates = concatUpdatesWithSameVersion(updates)
return updates
}
export function compressUpdates(updates) {
if (updates.length === 0) {
return []
}
let compressedUpdates = [updates.shift()]
for (const update of updates) {
const lastCompressedUpdate = compressedUpdates.pop()
if (lastCompressedUpdate != null) {
const newCompressedUpdates = _concatTwoUpdates(
lastCompressedUpdate,
update
)
compressedUpdates = compressedUpdates.concat(newCompressedUpdates)
} else {
compressedUpdates.push(update)
}
}
return compressedUpdates
}
/**
* If possible, merge two updates into a single update that has the same effect.
*
* It's useful to do some of this work at this point while we're dealing with
* document-updater updates. The deletes, in particular include the deleted
* text. This allows us to find pieces of inserts and deletes that cancel each
* other out because they insert/delete the exact same text. This compression
* makes the diff smaller.
*/
function _concatTwoUpdates(firstUpdate, secondUpdate) {
// Previously we cloned firstUpdate and secondUpdate at this point but we
// can skip this step because whenever they are returned with
// modification there is always a clone at that point via
// mergeUpdatesWithOp.
if (firstUpdate.op == null || secondUpdate.op == null) {
// Project structure ops
return [firstUpdate, secondUpdate]
}
if (
firstUpdate.doc !== secondUpdate.doc ||
firstUpdate.pathname !== secondUpdate.pathname
) {
return [firstUpdate, secondUpdate]
}
if (firstUpdate.meta.user_id !== secondUpdate.meta.user_id) {
return [firstUpdate, secondUpdate]
}
if (
(firstUpdate.meta.type === 'external' &&
secondUpdate.meta.type !== 'external') ||
(firstUpdate.meta.type !== 'external' &&
secondUpdate.meta.type === 'external') ||
(firstUpdate.meta.type === 'external' &&
secondUpdate.meta.type === 'external' &&
firstUpdate.meta.source !== secondUpdate.meta.source)
) {
return [firstUpdate, secondUpdate]
}
if (secondUpdate.meta.ts - firstUpdate.meta.ts > MAX_TIME_BETWEEN_UPDATES) {
return [firstUpdate, secondUpdate]
}
if (
(firstUpdate.meta.tc == null && secondUpdate.meta.tc != null) ||
(firstUpdate.meta.tc != null && secondUpdate.meta.tc == null)
) {
// One update is tracking changes and the other isn't. Tracking changes
// results in different behaviour in the history, so we need to keep these
// two updates separate.
return [firstUpdate, secondUpdate]
}
if (Boolean(firstUpdate.op.u) !== Boolean(secondUpdate.op.u)) {
// One update is an undo and the other isn't. If we were to merge the two
// updates, we would have to choose one value for the flag, which would be
// partially incorrect. Moreover, a tracked delete that is also an undo is
// treated as a tracked insert rejection by the history, so these updates
// need to be well separated.
return [firstUpdate, secondUpdate]
}
if (
firstUpdate.op.trackedDeleteRejection ||
secondUpdate.op.trackedDeleteRejection
) {
// Do not merge tracked delete rejections. Each tracked delete rejection is
// a separate operation.
return [firstUpdate, secondUpdate]
}
if (
firstUpdate.op.trackedChanges != null ||
secondUpdate.op.trackedChanges != null
) {
// Do not merge ops that span tracked changes.
// TODO: This could theoretically be handled, but it would be complex. One
// would need to take tracked deletes into account when merging inserts and
// deletes together.
return [firstUpdate, secondUpdate]
}
const firstOp = firstUpdate.op
const secondOp = secondUpdate.op
const firstSize =
(firstOp.i && firstOp.i.length) || (firstOp.d && firstOp.d.length)
const secondSize =
(secondOp.i && secondOp.i.length) || (secondOp.d && secondOp.d.length)
const firstOpInsideSecondOp =
secondOp.p <= firstOp.p && firstOp.p <= secondOp.p + secondSize
const secondOpInsideFirstOp =
firstOp.p <= secondOp.p && secondOp.p <= firstOp.p + firstSize
const combinedLengthUnderLimit = firstSize + secondSize < MAX_UPDATE_SIZE
// Two inserts
if (
firstOp.i != null &&
secondOp.i != null &&
secondOpInsideFirstOp &&
combinedLengthUnderLimit &&
insertOpsInsideSameComments(firstOp, secondOp)
) {
return [
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
...firstOp,
i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i),
}),
]
}
// Two deletes
if (
firstOp.d != null &&
secondOp.d != null &&
firstOpInsideSecondOp &&
combinedLengthUnderLimit &&
firstUpdate.meta.tc == null &&
secondUpdate.meta.tc == null
) {
return [
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
...secondOp,
d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d),
}),
]
}
// An insert and then a delete
if (
firstOp.i != null &&
secondOp.d != null &&
secondOpInsideFirstOp &&
firstUpdate.meta.tc == null &&
secondUpdate.meta.tc == null
) {
const offset = secondOp.p - firstOp.p
const insertedText = firstOp.i.slice(offset, offset + secondOp.d.length)
// Only trim the insert when the delete is fully contained within in it
if (insertedText === secondOp.d) {
const insert = strRemove(firstOp.i, offset, secondOp.d.length)
if (insert === '') {
return []
} else {
return [
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
...firstOp,
i: insert,
}),
]
}
} else {
// This will only happen if the delete extends outside the insert
return [firstUpdate, secondUpdate]
}
}
// A delete then an insert at the same place, likely a copy-paste of a chunk of content
if (
firstOp.d != null &&
secondOp.i != null &&
firstOp.p === secondOp.p &&
firstUpdate.meta.tc == null &&
secondUpdate.meta.tc == null
) {
const offset = firstOp.p
const hoffset = firstOp.hpos
const diffUpdates = diffAsShareJsOps(firstOp.d, secondOp.i).map(
function (op) {
// diffAsShareJsOps() returns ops with positions relative to the position
// of the copy/paste. We need to adjust these positions so that they
// apply to the whole document instead.
const pos = op.p
op.p = pos + offset
if (hoffset != null) {
op.hpos = pos + hoffset
}
if (firstOp.u && secondOp.u) {
op.u = true
}
if ('i' in op && secondOp.commentIds != null) {
// Make sure that commentIds metadata is propagated to inserts
op.commentIds = secondOp.commentIds
}
const update = mergeUpdatesWithOp(firstUpdate, secondUpdate, op)
// Set the doc hash only on the last update
delete update.meta.doc_hash
return update
}
)
const docHash = secondUpdate.meta.doc_hash
if (docHash != null && diffUpdates.length > 0) {
diffUpdates[diffUpdates.length - 1].meta.doc_hash = docHash
}
// Doing a diff like this loses track of the doc lengths for each
// update, so recalculate them
let docLength =
firstUpdate.meta.history_doc_length ?? firstUpdate.meta.doc_length
for (const update of diffUpdates) {
update.meta.doc_length = docLength
docLength = adjustLengthByOp(docLength, update.op, {
tracked: update.meta.tc != null,
})
delete update.meta.history_doc_length
}
return diffUpdates
}
return [firstUpdate, secondUpdate]
}
/**
* Return the diff between two strings
*
* @param {string} before
* @param {string} after
* @returns {(InsertOp | DeleteOp)[]} the ops that generate that diff
*/
export function diffAsShareJsOps(before, after) {
const diffs = dmp.diff_main(before, after)
dmp.diff_cleanupSemantic(diffs)
const ops = []
let position = 0
for (const diff of diffs) {
const type = diff[0]
const content = diff[1]
if (type === ADDED) {
ops.push({
i: content,
p: position,
})
position += content.length
} else if (type === REMOVED) {
ops.push({
d: content,
p: position,
})
} else if (type === UNCHANGED) {
position += content.length
} else {
throw new Error('Unknown type')
}
}
return ops
}
/**
* Checks if two insert ops are inside the same comments
*
* @param {InsertOp} op1
* @param {InsertOp} op2
* @returns {boolean}
*/
function insertOpsInsideSameComments(op1, op2) {
const commentIds1 = op1.commentIds
const commentIds2 = op2.commentIds
if (commentIds1 == null && commentIds2 == null) {
// None are inside comments
return true
}
if (
commentIds1 != null &&
commentIds2 != null &&
commentIds1.every(id => commentIds2.includes(id)) &&
commentIds2.every(id => commentIds1.includes(id))
) {
// Both are inside the same comments
return true
}
return false
}

View File

@@ -0,0 +1,487 @@
// @ts-check
import _ from 'lodash'
import Core from 'overleaf-editor-core'
import * as Errors from './Errors.js'
import * as OperationsCompressor from './OperationsCompressor.js'
import { isInsert, isRetain, isDelete, isComment } from './Utils.js'
/**
* @import { AddDocUpdate, AddFileUpdate, DeleteCommentUpdate, Op, RawScanOp } from './types'
* @import { RenameUpdate, TextUpdate, TrackingDirective, TrackingProps } from './types'
* @import { SetCommentStateUpdate, SetFileMetadataOperation, Update, UpdateWithBlob } from './types'
*/
/**
* Convert updates into history changes
*
* @param {string} projectId
* @param {UpdateWithBlob[]} updatesWithBlobs
* @returns {Array<Core.Change | null>}
*/
export function convertToChanges(projectId, updatesWithBlobs) {
return updatesWithBlobs.map(update => _convertToChange(projectId, update))
}
/**
* Convert an update into a history change
*
* @param {string} projectId
* @param {UpdateWithBlob} updateWithBlob
* @returns {Core.Change | null}
*/
function _convertToChange(projectId, updateWithBlob) {
let operations
const { update } = updateWithBlob
let projectVersion = null
const v2DocVersions = {}
if (_isRenameUpdate(update)) {
operations = [
{
pathname: _convertPathname(update.pathname),
newPathname: _convertPathname(update.new_pathname),
},
]
projectVersion = update.version
} else if (isAddUpdate(update)) {
const op = {
pathname: _convertPathname(update.pathname),
file: {
hash: updateWithBlob.blobHashes.file,
},
}
if (_isAddDocUpdate(update)) {
op.file.rangesHash = updateWithBlob.blobHashes.ranges
}
if (_isAddFileUpdate(update)) {
op.file.metadata = update.metadata
}
operations = [op]
projectVersion = update.version
} else if (isTextUpdate(update)) {
const docLength = update.meta.history_doc_length ?? update.meta.doc_length
let pathname = update.meta.pathname
pathname = _convertPathname(pathname)
const builder = new OperationsBuilder(docLength, pathname)
// convert ops
for (const op of update.op) {
builder.addOp(op, update)
}
// add doc hash if present
if (update.meta.doc_hash != null) {
// This will commit the text operation that the builder is currently
// building and set the contentHash property.
builder.commitTextOperation({ contentHash: update.meta.doc_hash })
}
operations = builder.finish()
// add doc version information if present
if (update.v != null) {
v2DocVersions[update.doc] = { pathname, v: update.v }
}
} else if (isSetCommentStateUpdate(update)) {
operations = [
{
pathname: _convertPathname(update.pathname),
commentId: update.commentId,
resolved: update.resolved,
},
]
} else if (isSetFileMetadataOperation(update)) {
operations = [
{
pathname: _convertPathname(update.pathname),
metadata: update.metadata,
},
]
} else if (isDeleteCommentUpdate(update)) {
operations = [
{
pathname: _convertPathname(update.pathname),
deleteComment: update.deleteComment,
},
]
} else {
const error = new Errors.UpdateWithUnknownFormatError(
'update with unknown format',
{ projectId, update }
)
throw error
}
let v2Authors
if (update.meta.user_id === 'anonymous-user') {
// history-v1 uses null to represent an anonymous author
v2Authors = [null]
} else {
// user_id is missing on resync operations that update the contents of a doc
v2Authors = _.compact([update.meta.user_id])
}
const rawChange = {
operations,
v2Authors,
timestamp: new Date(update.meta.ts).toISOString(),
projectVersion,
v2DocVersions: Object.keys(v2DocVersions).length ? v2DocVersions : null,
}
if (update.meta.origin) {
rawChange.origin = update.meta.origin
} else if (update.meta.type === 'external' && update.meta.source) {
rawChange.origin = { kind: update.meta.source }
}
const change = Core.Change.fromRaw(rawChange)
if (change != null) {
change.operations = OperationsCompressor.compressOperations(
change.operations
)
}
return change
}
/**
* @param {Update} update
* @returns {update is RenameUpdate}
*/
function _isRenameUpdate(update) {
return 'new_pathname' in update && update.new_pathname != null
}
/**
* @param {Update} update
* @returns {update is AddDocUpdate}
*/
function _isAddDocUpdate(update) {
return (
'doc' in update &&
update.doc != null &&
'docLines' in update &&
update.docLines != null
)
}
/**
* @param {Update} update
* @returns {update is AddFileUpdate}
*/
function _isAddFileUpdate(update) {
return (
'file' in update &&
update.file != null &&
(('createdBlob' in update && update.createdBlob) ||
('url' in update && update.url != null))
)
}
/**
* @param {Update} update
* @returns {update is TextUpdate}
*/
export function isTextUpdate(update) {
return (
'doc' in update &&
update.doc != null &&
'op' in update &&
update.op != null &&
'pathname' in update.meta &&
update.meta.pathname != null &&
'doc_length' in update.meta &&
update.meta.doc_length != null
)
}
export function isProjectStructureUpdate(update) {
return isAddUpdate(update) || _isRenameUpdate(update)
}
/**
* @param {Update} update
* @returns {update is AddDocUpdate | AddFileUpdate}
*/
export function isAddUpdate(update) {
return _isAddDocUpdate(update) || _isAddFileUpdate(update)
}
/**
* @param {Update} update
* @returns {update is SetCommentStateUpdate}
*/
export function isSetCommentStateUpdate(update) {
return 'commentId' in update && 'resolved' in update
}
/**
* @param {Update} update
* @returns {update is DeleteCommentUpdate}
*/
export function isDeleteCommentUpdate(update) {
return 'deleteComment' in update
}
/**
* @param {Update} update
* @returns {update is SetFileMetadataOperation}
*/
export function isSetFileMetadataOperation(update) {
return 'metadata' in update
}
export function _convertPathname(pathname) {
// Strip leading /
pathname = pathname.replace(/^\//, '')
// Replace \\ with _. Backslashes are no longer allowed
// in projects in web, but we have some which have gone through
// into history before this restriction was added. This makes
// them valid for the history store.
// See https://github.com/overleaf/write_latex/issues/4471
pathname = pathname.replace(/\\/g, '_')
// workaround for filenames containing asterisks, this will
// fail if a corresponding replacement file already exists but it
// would fail anyway without this attempt to fix the pathname.
// See https://github.com/overleaf/internal/issues/900
pathname = pathname.replace(/\*/g, '__ASTERISK__')
// workaround for filenames beginning with spaces
// See https://github.com/overleaf/internal/issues/1404
// note: we have already stripped any leading slash above
pathname = pathname.replace(/^ /, '__SPACE__') // handle top-level
pathname = pathname.replace(/\/ /g, '/__SPACE__') // handle folders
return pathname
}
class OperationsBuilder {
/**
* @param {number} docLength
* @param {string} pathname
*/
constructor(docLength, pathname) {
/**
* List of operations being built
*/
this.operations = []
/**
* Currently built text operation
*
* @type {RawScanOp[]}
*/
this.textOperation = []
/**
* Cursor inside the current text operation
*/
this.cursor = 0
this.docLength = docLength
this.pathname = pathname
}
/**
* @param {Op} op
* @param {TextUpdate} update
* @returns {void}
*/
addOp(op, update) {
// We sometimes receive operations that operate at positions outside the
// docLength. Document updater coerces the position to the end of the
// document. We do the same here.
const pos = Math.min(op.hpos ?? op.p, this.docLength)
if (isComment(op)) {
// Commit the current text operation
this.commitTextOperation()
// Add a comment operation
const commentLength = op.hlen ?? op.c.length
const commentOp = {
pathname: this.pathname,
commentId: op.t,
ranges: commentLength > 0 ? [{ pos, length: commentLength }] : [],
}
if ('resolved' in op) {
commentOp.resolved = op.resolved
}
this.operations.push(commentOp)
return
}
if (!isInsert(op) && !isDelete(op) && !isRetain(op)) {
throw new Errors.UnexpectedOpTypeError('unexpected op type', { op })
}
if (pos < this.cursor) {
this.commitTextOperation()
// At this point, this.cursor === 0 and we can continue
}
if (pos > this.cursor) {
this.retain(pos - this.cursor)
}
if (isInsert(op)) {
if (op.trackedDeleteRejection) {
this.retain(op.i.length, {
tracking: { type: 'none' },
})
} else {
const opts = {}
if (update.meta.tc != null) {
opts.tracking = {
type: 'insert',
userId: update.meta.user_id,
ts: new Date(update.meta.ts).toISOString(),
}
}
if (op.commentIds != null) {
opts.commentIds = op.commentIds
}
this.insert(op.i, opts)
}
}
if (isRetain(op)) {
if (op.tracking) {
this.retain(op.r.length, { tracking: op.tracking })
} else {
this.retain(op.r.length)
}
}
if (isDelete(op)) {
const changes = op.trackedChanges ?? []
// Tracked changes should already be ordered by offset, but let's make
// sure they are.
changes.sort((a, b) => {
const posOrder = a.offset - b.offset
if (posOrder !== 0) {
return posOrder
} else if (a.type === 'insert' && b.type === 'delete') {
return 1
} else if (a.type === 'delete' && b.type === 'insert') {
return -1
} else {
return 0
}
})
let offset = 0
for (const change of changes) {
if (change.offset > offset) {
// Handle the portion before the tracked change
if (update.meta.tc != null) {
// This is a tracked delete
this.retain(change.offset - offset, {
tracking: {
type: 'delete',
userId: update.meta.user_id,
ts: new Date(update.meta.ts).toISOString(),
},
})
} else {
// This is a regular delete
this.delete(change.offset - offset)
}
offset = change.offset
}
// Now, handle the portion inside the tracked change
if (change.type === 'delete') {
// Tracked deletes are skipped over when deleting
this.retain(change.length)
} else if (change.type === 'insert') {
// Deletes inside tracked inserts are always regular deletes
this.delete(change.length)
offset += change.length
}
}
if (offset < op.d.length) {
// Handle the portion after the last tracked change
if (update.meta.tc != null) {
// This is a tracked delete
this.retain(op.d.length - offset, {
tracking: {
type: 'delete',
userId: update.meta.user_id,
ts: new Date(update.meta.ts).toISOString(),
},
})
} else {
// This is a regular delete
this.delete(op.d.length - offset)
}
}
}
}
/**
* @param {number} length
* @param {object} opts
* @param {TrackingDirective} [opts.tracking]
*/
retain(length, opts = {}) {
if (opts.tracking) {
this.textOperation.push({ r: length, ...opts })
} else {
this.textOperation.push(length)
}
this.cursor += length
}
/**
* @param {string} str
* @param {object} opts
* @param {TrackingProps} [opts.tracking]
* @param {string[]} [opts.commentIds]
*/
insert(str, opts = {}) {
if (opts.tracking || opts.commentIds) {
this.textOperation.push({ i: str, ...opts })
} else {
this.textOperation.push(str)
}
this.cursor += str.length
this.docLength += str.length
}
/**
* @param {number} length
* @param {object} opts
*/
delete(length, opts = {}) {
this.textOperation.push(-length)
this.docLength -= length
}
/**
* Finalize the current text operation and push it to the queue
*
* @param {object} [opts]
* @param {string} [opts.contentHash]
*/
commitTextOperation(opts = {}) {
if (this.textOperation.length > 0 && this.cursor < this.docLength) {
this.retain(this.docLength - this.cursor)
}
if (this.textOperation.length > 0) {
const operation = {
pathname: this.pathname,
textOperation: this.textOperation,
}
if (opts.contentHash != null) {
operation.contentHash = opts.contentHash
}
this.operations.push(operation)
this.textOperation = []
}
this.cursor = 0
}
finish() {
this.commitTextOperation()
return this.operations
}
}

View File

@@ -0,0 +1,800 @@
import { promisify } from 'node:util'
import logger from '@overleaf/logger'
import async from 'async'
import metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import OError from '@overleaf/o-error'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as UpdateTranslator from './UpdateTranslator.js'
import * as BlobManager from './BlobManager.js'
import * as RedisManager from './RedisManager.js'
import * as ErrorRecorder from './ErrorRecorder.js'
import * as LockManager from './LockManager.js'
import * as UpdateCompressor from './UpdateCompressor.js'
import * as WebApiManager from './WebApiManager.js'
import * as SyncManager from './SyncManager.js'
import * as Versions from './Versions.js'
import * as Errors from './Errors.js'
import * as Metrics from './Metrics.js'
import * as RetryManager from './RetryManager.js'
import { Profiler } from './Profiler.js'
const keys = Settings.redis.lock.key_schema
export const REDIS_READ_BATCH_SIZE = 500
/**
* Container for functions that need to be mocked in tests
*
* TODO: Rewrite tests in terms of exported functions only
*/
export const _mocks = {}
export function getRawUpdates(projectId, batchSize, callback) {
RedisManager.getRawUpdatesBatch(projectId, batchSize, (error, batch) => {
if (error != null) {
return callback(OError.tag(error))
}
let updates
try {
updates = RedisManager.parseDocUpdates(batch.rawUpdates)
} catch (error) {
return callback(OError.tag(error))
}
_getHistoryId(projectId, updates, (error, historyId) => {
if (error != null) {
return callback(OError.tag(error))
}
HistoryStoreManager.getMostRecentChunk(
projectId,
historyId,
(error, chunk) => {
if (error != null) {
return callback(OError.tag(error))
}
callback(null, { project_id: projectId, chunk, updates })
}
)
})
})
}
// Trigger resync and start processing under lock to avoid other operations to
// flush the resync updates.
export function startResyncAndProcessUpdatesUnderLock(
projectId,
opts,
callback
) {
const startTimeMs = Date.now()
LockManager.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
(extendLock, releaseLock) => {
SyncManager.startResyncWithoutLock(projectId, opts, err => {
if (err) return callback(OError.tag(err))
extendLock(err => {
if (err) return callback(OError.tag(err))
_countAndProcessUpdates(
projectId,
extendLock,
REDIS_READ_BATCH_SIZE,
releaseLock
)
})
})
},
(flushError, queueSize) => {
if (flushError) {
OError.tag(flushError)
ErrorRecorder.record(projectId, queueSize, flushError, recordError => {
if (recordError) {
logger.error(
{ err: recordError, projectId },
'failed to record error'
)
}
callback(flushError)
})
} else {
ErrorRecorder.clearError(projectId, clearError => {
if (clearError) {
logger.error(
{ err: clearError, projectId },
'failed to clear error'
)
}
callback()
})
}
if (queueSize > 0) {
const duration = (Date.now() - startTimeMs) / 1000
Metrics.historyFlushDurationSeconds.observe(duration)
Metrics.historyFlushQueueSize.observe(queueSize)
}
// clear the timestamp in the background if the queue is now empty
RedisManager.clearDanglingFirstOpTimestamp(projectId, () => {})
}
)
}
// Process all updates for a project, only check project-level information once
export function processUpdatesForProject(projectId, callback) {
const startTimeMs = Date.now()
LockManager.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
(extendLock, releaseLock) => {
_countAndProcessUpdates(
projectId,
extendLock,
REDIS_READ_BATCH_SIZE,
releaseLock
)
},
(flushError, queueSize) => {
if (flushError) {
OError.tag(flushError)
ErrorRecorder.record(
projectId,
queueSize,
flushError,
(recordError, failure) => {
if (recordError) {
logger.error(
{ err: recordError, projectId },
'failed to record error'
)
callback(recordError)
} else if (
RetryManager.isFirstFailure(failure) &&
RetryManager.isHardFailure(failure)
) {
// This is the first failed flush since the last successful flush.
// Immediately attempt a resync.
logger.warn({ projectId }, 'Flush failed, attempting resync')
resyncProject(projectId, callback)
} else {
callback(flushError)
}
}
)
} else {
ErrorRecorder.clearError(projectId, clearError => {
if (clearError) {
logger.error(
{ err: clearError, projectId },
'failed to clear error'
)
}
callback()
})
}
if (queueSize > 0) {
const duration = (Date.now() - startTimeMs) / 1000
Metrics.historyFlushDurationSeconds.observe(duration)
Metrics.historyFlushQueueSize.observe(queueSize)
}
// clear the timestamp in the background if the queue is now empty
RedisManager.clearDanglingFirstOpTimestamp(projectId, () => {})
}
)
}
export function resyncProject(projectId, callback) {
SyncManager.startHardResync(projectId, {}, error => {
if (error != null) {
return callback(OError.tag(error))
}
// Flush the sync operations; this will not loop indefinitely
// because any failure won't be the first failure anymore.
LockManager.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
(extendLock, releaseLock) => {
_countAndProcessUpdates(
projectId,
extendLock,
REDIS_READ_BATCH_SIZE,
releaseLock
)
},
(flushError, queueSize) => {
if (flushError) {
ErrorRecorder.record(
projectId,
queueSize,
flushError,
(recordError, failure) => {
if (recordError) {
logger.error(
{ err: recordError, projectId },
'failed to record error'
)
callback(OError.tag(recordError))
} else {
callback(OError.tag(flushError))
}
}
)
} else {
ErrorRecorder.clearError(projectId, clearError => {
if (clearError) {
logger.error(
{ err: clearError, projectId },
'failed to clear error'
)
}
callback()
})
}
}
)
})
}
export function processUpdatesForProjectUsingBisect(
projectId,
amountToProcess,
callback
) {
LockManager.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
(extendLock, releaseLock) => {
_countAndProcessUpdates(
projectId,
extendLock,
amountToProcess,
releaseLock
)
},
(flushError, queueSize) => {
if (amountToProcess === 0 || queueSize === 0) {
// no further processing possible
if (flushError != null) {
ErrorRecorder.record(
projectId,
queueSize,
OError.tag(flushError),
recordError => {
if (recordError) {
logger.error(
{ err: recordError, projectId },
'failed to record error'
)
}
callback(flushError)
}
)
} else {
callback()
}
} else {
if (flushError != null) {
// decrease the batch size when we hit an error
processUpdatesForProjectUsingBisect(
projectId,
Math.floor(amountToProcess / 2),
callback
)
} else {
// otherwise continue processing with the same batch size
processUpdatesForProjectUsingBisect(
projectId,
amountToProcess,
callback
)
}
}
}
)
}
export function processSingleUpdateForProject(projectId, callback) {
LockManager.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
(
extendLock,
releaseLock // set the batch size to 1 for single-stepping
) => {
_countAndProcessUpdates(projectId, extendLock, 1, releaseLock)
},
(flushError, queueSize) => {
// no need to clear the flush marker when single stepping
// it will be cleared up on the next background flush if
// the queue is empty
if (flushError) {
ErrorRecorder.record(projectId, queueSize, flushError, recordError => {
if (recordError) {
logger.error(
{ err: recordError, projectId },
'failed to record error'
)
}
callback(flushError)
})
} else {
ErrorRecorder.clearError(projectId, clearError => {
if (clearError) {
logger.error(
{ err: clearError, projectId },
'failed to clear error'
)
}
callback()
})
}
}
)
}
_mocks._countAndProcessUpdates = (
projectId,
extendLock,
batchSize,
callback
) => {
RedisManager.countUnprocessedUpdates(projectId, (error, queueSize) => {
if (error != null) {
return callback(OError.tag(error))
}
if (queueSize > 0) {
logger.debug({ projectId, queueSize }, 'processing uncompressed updates')
RedisManager.getUpdatesInBatches(
projectId,
batchSize,
(updates, cb) => {
_processUpdatesBatch(projectId, updates, extendLock, cb)
},
error => {
// Unconventional callback signature. The caller needs the queue size
// even when an error is thrown in order to record the queue size in
// the projectHistoryFailures collection. We'll have to find another
// way to achieve this when we promisify.
callback(error, queueSize)
}
)
} else {
logger.debug({ projectId }, 'no updates to process')
callback(null, queueSize)
}
})
}
function _countAndProcessUpdates(...args) {
_mocks._countAndProcessUpdates(...args)
}
function _processUpdatesBatch(projectId, updates, extendLock, callback) {
// If the project doesn't have a history then we can bail out here
_getHistoryId(projectId, updates, (error, historyId) => {
if (error != null) {
return callback(OError.tag(error))
}
if (historyId == null) {
logger.debug(
{ projectId },
'discarding updates as project does not use history'
)
return callback()
}
_processUpdates(projectId, historyId, updates, extendLock, error => {
if (error != null) {
return callback(OError.tag(error))
}
callback()
})
})
}
export function _getHistoryId(projectId, updates, callback) {
let idFromUpdates = null
// check that all updates have the same history id
for (const update of updates) {
if (update.projectHistoryId != null) {
if (idFromUpdates == null) {
idFromUpdates = update.projectHistoryId.toString()
} else if (idFromUpdates !== update.projectHistoryId.toString()) {
metrics.inc('updates.batches.project-history-id.inconsistent-update')
return callback(
new OError('inconsistent project history id between updates', {
projectId,
idFromUpdates,
currentId: update.projectHistoryId,
})
)
}
}
}
WebApiManager.getHistoryId(projectId, (error, idFromWeb) => {
if (error != null && idFromUpdates != null) {
// present only on updates
// 404s from web are an error
metrics.inc('updates.batches.project-history-id.from-updates')
return callback(null, idFromUpdates)
} else if (error != null) {
return callback(OError.tag(error))
}
if (idFromWeb == null && idFromUpdates == null) {
// present on neither web nor updates
callback(null, null)
} else if (idFromWeb != null && idFromUpdates == null) {
// present only on web
metrics.inc('updates.batches.project-history-id.from-web')
callback(null, idFromWeb)
} else if (idFromWeb == null && idFromUpdates != null) {
// present only on updates
metrics.inc('updates.batches.project-history-id.from-updates')
callback(null, idFromUpdates)
} else if (idFromWeb.toString() !== idFromUpdates.toString()) {
// inconsistent between web and updates
metrics.inc('updates.batches.project-history-id.inconsistent-with-web')
logger.warn(
{
projectId,
idFromWeb,
idFromUpdates,
updates,
},
'inconsistent project history id between updates and web'
)
callback(
new OError('inconsistent project history id between updates and web')
)
} else {
// the same on web and updates
metrics.inc('updates.batches.project-history-id.from-updates')
callback(null, idFromWeb)
}
})
}
function _handleOpsOutOfOrderError(projectId, projectHistoryId, err, ...rest) {
const adjustedLength = Math.max(rest.length, 1)
const results = rest.slice(0, adjustedLength - 1)
const callback = rest[adjustedLength - 1]
ErrorRecorder.getFailureRecord(projectId, (error, failureRecord) => {
if (error != null) {
return callback(error)
}
// Bypass ops-out-of-order errors in the stored chunk when in forceDebug mode
if (failureRecord != null && failureRecord.forceDebug === true) {
logger.warn(
{ err, projectId, projectHistoryId },
'ops out of order in chunk, forced continue'
)
callback(null, ...results) // return results without error
} else {
callback(err, ...results)
}
})
}
function _getMostRecentVersionWithDebug(projectId, projectHistoryId, callback) {
HistoryStoreManager.getMostRecentVersion(
projectId,
projectHistoryId,
(err, ...results) => {
if (err instanceof Errors.OpsOutOfOrderError) {
_handleOpsOutOfOrderError(
projectId,
projectHistoryId,
err,
...results,
callback
)
} else {
callback(err, ...results)
}
}
)
}
export function _processUpdates(
projectId,
projectHistoryId,
updates,
extendLock,
callback
) {
const profile = new Profiler('_processUpdates', {
project_id: projectId,
projectHistoryId,
})
// skip updates first if we're in a sync, we might not need to do anything else
SyncManager.skipUpdatesDuringSync(
projectId,
updates,
(error, filteredUpdates, newSyncState) => {
profile.log('skipUpdatesDuringSync')
if (error != null) {
return callback(error)
}
if (filteredUpdates.length === 0) {
// return early if there are no updates to apply
return SyncManager.setResyncState(projectId, newSyncState, callback)
}
// only make request to history service if we have actual updates to process
_getMostRecentVersionWithDebug(
projectId,
projectHistoryId,
(
error,
baseVersion,
projectStructureAndDocVersions,
_lastChange,
mostRecentChunk
) => {
if (projectStructureAndDocVersions == null) {
projectStructureAndDocVersions = { project: null, docs: {} }
}
profile.log('getMostRecentVersion')
if (error != null) {
return callback(error)
}
async.waterfall(
[
cb => {
cb = profile.wrap('expandSyncUpdates', cb)
SyncManager.expandSyncUpdates(
projectId,
projectHistoryId,
mostRecentChunk,
filteredUpdates,
extendLock,
cb
)
},
(expandedUpdates, cb) => {
let unappliedUpdates
try {
unappliedUpdates = _skipAlreadyAppliedUpdates(
projectId,
expandedUpdates,
projectStructureAndDocVersions
)
} catch (err) {
return cb(err)
}
profile.log('skipAlreadyAppliedUpdates')
const compressedUpdates =
UpdateCompressor.compressRawUpdates(unappliedUpdates)
const timeTaken = profile
.log('compressRawUpdates')
.getTimeDelta()
if (timeTaken >= 1000) {
logger.debug(
{ projectId, updates: unappliedUpdates, timeTaken },
'slow compression of raw updates'
)
}
cb = profile.wrap('createBlobs', cb)
BlobManager.createBlobsForUpdates(
projectId,
projectHistoryId,
compressedUpdates,
extendLock,
cb
)
},
(updatesWithBlobs, cb) => {
let changes
try {
changes = UpdateTranslator.convertToChanges(
projectId,
updatesWithBlobs
).map(change => change.toRaw())
} catch (err) {
return cb(err)
} finally {
profile.log('convertToChanges')
}
cb(null, changes)
},
(changes, cb) => {
let change
const numChanges = changes.length
const byteLength = Buffer.byteLength(
JSON.stringify(changes),
'utf8'
)
let numOperations = 0
for (change of changes) {
if (change.operations != null) {
numOperations += change.operations.length
}
}
metrics.timing('history-store.request.changes', numChanges, 1)
metrics.timing('history-store.request.bytes', byteLength, 1)
metrics.timing(
'history-store.request.operations',
numOperations,
1
)
// thresholds taken from write_latex/main/lib/history_exporter.rb
if (numChanges > 1000) {
metrics.inc('history-store.request.exceeds-threshold.changes')
}
if (byteLength > Math.pow(1024, 2)) {
metrics.inc('history-store.request.exceeds-threshold.bytes')
const changeLengths = changes.map(change =>
Buffer.byteLength(JSON.stringify(change), 'utf8')
)
logger.warn(
{ projectId, byteLength, changeLengths },
'change size exceeds limit'
)
}
cb = profile.wrap('sendChanges', cb)
// this is usually the longest request, so extend the lock before starting it
extendLock(error => {
if (error != null) {
return cb(error)
}
if (changes.length === 0) {
return cb()
} // avoid unnecessary requests to history service
HistoryStoreManager.sendChanges(
projectId,
projectHistoryId,
changes,
baseVersion,
cb
)
})
},
cb => {
cb = profile.wrap('setResyncState', cb)
SyncManager.setResyncState(projectId, newSyncState, cb)
},
],
error => {
profile.end()
callback(error)
}
)
}
)
}
)
}
_mocks._skipAlreadyAppliedUpdates = (
projectId,
updates,
projectStructureAndDocVersions
) => {
function alreadySeenProjectVersion(previousProjectStructureVersion, update) {
return (
UpdateTranslator.isProjectStructureUpdate(update) &&
previousProjectStructureVersion != null &&
update.version != null &&
Versions.gte(previousProjectStructureVersion, update.version)
)
}
function alreadySeenDocVersion(previousDocVersions, update) {
if (UpdateTranslator.isTextUpdate(update) && update.v != null) {
const docId = update.doc
return (
previousDocVersions[docId] != null &&
previousDocVersions[docId].v != null &&
Versions.gte(previousDocVersions[docId].v, update.v)
)
} else {
return false
}
}
// check that the incoming updates are in the correct order (we do not
// want to send out of order updates to the history service)
let incomingProjectStructureVersion = null
const incomingDocVersions = {}
for (const update of updates) {
if (alreadySeenProjectVersion(incomingProjectStructureVersion, update)) {
logger.warn(
{ projectId, update, incomingProjectStructureVersion },
'incoming project structure updates are out of order'
)
throw new Errors.OpsOutOfOrderError(
'project structure version out of order on incoming updates'
)
} else if (alreadySeenDocVersion(incomingDocVersions, update)) {
logger.warn(
{ projectId, update, incomingDocVersions },
'incoming doc updates are out of order'
)
throw new Errors.OpsOutOfOrderError(
'doc version out of order on incoming updates'
)
}
// update the current project structure and doc versions
if (UpdateTranslator.isProjectStructureUpdate(update)) {
incomingProjectStructureVersion = update.version
} else if (UpdateTranslator.isTextUpdate(update)) {
incomingDocVersions[update.doc] = { v: update.v }
}
}
// discard updates already applied
const updatesToApply = []
const previousProjectStructureVersion = projectStructureAndDocVersions.project
const previousDocVersions = projectStructureAndDocVersions.docs
if (projectStructureAndDocVersions != null) {
const updateProjectVersions = []
for (const update of updates) {
if (update != null && update.version != null) {
updateProjectVersions.push(update.version)
}
}
logger.debug(
{ projectId, projectStructureAndDocVersions, updateProjectVersions },
'comparing updates with existing project versions'
)
}
for (const update of updates) {
if (alreadySeenProjectVersion(previousProjectStructureVersion, update)) {
metrics.inc('updates.discarded_project_structure_version')
logger.debug(
{ projectId, update, previousProjectStructureVersion },
'discarding previously applied project structure update'
)
continue
}
if (alreadySeenDocVersion(previousDocVersions, update)) {
metrics.inc('updates.discarded_doc_version')
logger.debug(
{ projectId, update, previousDocVersions },
'discarding previously applied doc update'
)
continue
}
// remove non-BMP characters from resync updates that have bypassed the normal docupdater flow
_sanitizeUpdate(update)
// if all checks above are ok then accept the update
updatesToApply.push(update)
}
return updatesToApply
}
export function _skipAlreadyAppliedUpdates(...args) {
return _mocks._skipAlreadyAppliedUpdates(...args)
}
function _sanitizeUpdate(update) {
// adapted from docupdater's UpdateManager, we should clean these in docupdater
// too but we already have queues with this problem so we will handle it here
// too for robustness.
// Replace high and low surrogate characters with 'replacement character' (\uFFFD)
const removeBadChars = str => str.replace(/[\uD800-\uDFFF]/g, '\uFFFD')
// clean up any bad chars in resync diffs
if (update.op) {
for (const op of update.op) {
if (op.i != null) {
op.i = removeBadChars(op.i)
}
}
}
// clean up any bad chars in resync new docs
if (update.docLines != null) {
update.docLines = removeBadChars(update.docLines)
}
return update
}
export const promises = {
/** @type {(projectId: string) => Promise<number>} */
processUpdatesForProject: promisify(processUpdatesForProject),
/** @type {(projectId: string, opts: any) => Promise<number>} */
startResyncAndProcessUpdatesUnderLock: promisify(
startResyncAndProcessUpdatesUnderLock
),
}

View File

@@ -0,0 +1,37 @@
// @ts-check
/**
* @import { CommentOp, DeleteOp, InsertOp, Op, RetainOp } from './types'
*/
/**
* @param {Op} op
* @returns {op is InsertOp}
*/
export function isInsert(op) {
return 'i' in op && op.i != null
}
/**
* @param {Op} op
* @returns {op is RetainOp}
*/
export function isRetain(op) {
return 'r' in op && op.r != null
}
/**
* @param {Op} op
* @returns {op is DeleteOp}
*/
export function isDelete(op) {
return 'd' in op && op.d != null
}
/**
* @param {Op} op
* @returns {op is CommentOp}
*/
export function isComment(op) {
return 'c' in op && op.c != null && 't' in op && op.t != null
}

View File

@@ -0,0 +1,12 @@
import { celebrate, errors } from 'celebrate'
export { Joi } from 'celebrate'
export const errorMiddleware = errors()
/**
* Validation middleware
*/
export function validate(schema) {
return celebrate(schema, { allowUnknown: true })
}

View File

@@ -0,0 +1,68 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// Compare Versions like 1.2 < 4.1
const convertToArray = v => Array.from(v.split('.')).map(x => parseInt(x, 10))
const cmp = function (v1, v2) {
// allow comparison to work with integers
if (typeof v1 === 'number' && typeof v2 === 'number') {
if (v1 > v2) {
return +1
}
if (v1 < v2) {
return -1
}
// otherwise equal
return 0
}
// comparison with strings
v1 = convertToArray(v1)
v2 = convertToArray(v2)
while (v1.length || v2.length) {
const [x, y] = Array.from([v1.shift(), v2.shift()])
if (x > y) {
return +1
}
if (x < y) {
return -1
}
if (x != null && y == null) {
return +1
}
if (x == null && y != null) {
return -1
}
}
return 0
}
export function compare(v1, v2) {
return cmp(v1, v2)
}
export function gt(v1, v2) {
return cmp(v1, v2) > 0
}
export function lt(v1, v2) {
return cmp(v1, v2) < 0
}
export function gte(v1, v2) {
return cmp(v1, v2) >= 0
}
export function lte(v1, v2) {
return cmp(v1, v2) <= 0
}

View File

@@ -0,0 +1,112 @@
import { callbackify } from 'node:util'
import { setTimeout } from 'node:timers/promises'
import logger from '@overleaf/logger'
import Metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import {
fetchNothing,
fetchJson,
RequestFailedError,
} from '@overleaf/fetch-utils'
import * as Errors from './Errors.js'
import * as RedisManager from './RedisManager.js'
let RETRY_TIMEOUT_MS = 5000
async function getHistoryId(projectId) {
Metrics.inc('history_id_cache_requests_total')
const cachedHistoryId =
await RedisManager.promises.getCachedHistoryId(projectId)
if (cachedHistoryId) {
Metrics.inc('history_id_cache_hits_total')
return cachedHistoryId
} else {
const project = await _getProjectDetails(projectId)
const historyId =
project.overleaf &&
project.overleaf.history &&
project.overleaf.history.id
if (historyId != null) {
await RedisManager.promises.setCachedHistoryId(projectId, historyId)
}
return historyId
}
}
async function requestResync(projectId, opts = {}) {
try {
const body = {}
if (opts.historyRangesMigration) {
body.historyRangesMigration = opts.historyRangesMigration
}
if (opts.resyncProjectStructureOnly) {
body.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
}
await fetchNothing(
`${Settings.apis.web.url}/project/${projectId}/history/resync`,
{
method: 'POST',
signal: AbortSignal.timeout(6 * 60000),
basicAuth: {
user: Settings.apis.web.user,
password: Settings.apis.web.pass,
},
json: body,
}
)
} catch (err) {
if (err instanceof RequestFailedError && err.response.status === 404) {
throw new Errors.NotFoundError('got a 404 from web api').withCause(err)
} else {
throw err
}
}
}
async function _getProjectDetails(projectId, callback) {
logger.debug({ projectId }, 'getting project details from web')
let attempts = 0
while (true) {
attempts += 1
try {
return await fetchJson(
`${Settings.apis.web.url}/project/${projectId}/details`,
{
signal: AbortSignal.timeout(16000),
basicAuth: {
user: Settings.apis.web.user,
password: Settings.apis.web.pass,
},
}
)
} catch (err) {
if (err instanceof RequestFailedError && err.response.status === 404) {
throw new Errors.NotFoundError('got a 404 from web api').withCause(err)
} else if (attempts < 2) {
// retry after 5 seconds
await setTimeout(RETRY_TIMEOUT_MS)
} else {
throw err
}
}
}
}
/**
* Adjust the retry timeout in tests
*/
export async function setRetryTimeoutMs(timeoutMs) {
RETRY_TIMEOUT_MS = timeoutMs
}
// EXPORTS
const getHistoryIdCb = callbackify(getHistoryId)
const requestResyncCb = callbackify(requestResync)
export { getHistoryIdCb as getHistoryId, requestResyncCb as requestResync }
export const promises = {
getHistoryId,
requestResync,
}

View File

@@ -0,0 +1,22 @@
import { ObjectId } from 'mongodb-legacy'
export type ProjectHistoryFailure = {
_id: ObjectId
project_id: string
attempts: number
resyncAttempts: number
resyncStartedAt: Date
requestCount?: number
history: (ErrorRecord | SyncStartRecord)[]
} & ErrorRecord
type ErrorRecord = {
error: string
stack: string
queueSize: number
ts: Date
}
type SyncStartRecord = {
resyncStartedAt: Date
}

View File

@@ -0,0 +1,27 @@
import Metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import mongodb from 'mongodb-legacy'
const { MongoClient, ObjectId } = mongodb
/**
* @import { ProjectHistoryFailure } from './mongo-types.ts'
*/
export { ObjectId }
export const mongoClient = new MongoClient(
Settings.mongo.url,
Settings.mongo.options
)
const mongoDb = mongoClient.db()
Metrics.mongodb.monitor(mongoClient)
export const db = {
deletedProjects: mongoDb.collection('deletedProjects'),
projects: mongoDb.collection('projects'),
/** @type {mongodb.Collection<ProjectHistoryFailure>} */
projectHistoryFailures: mongoDb.collection('projectHistoryFailures'),
projectHistoryLabels: mongoDb.collection('projectHistoryLabels'),
projectHistorySyncState: mongoDb.collection('projectHistorySyncState'),
}

View File

@@ -0,0 +1,61 @@
import Metrics from '@overleaf/metrics'
import logger from '@overleaf/logger'
import express from 'express'
import bodyParser from 'body-parser'
import * as Errors from './Errors.js'
import * as Router from './Router.js'
import * as Validation from './Validation.js'
const HistoryLogger = logger.initialize('project-history').logger
Metrics.event_loop.monitor(logger)
Metrics.memory.monitor(logger)
Metrics.leaked_sockets.monitor(logger)
Metrics.open_sockets.monitor()
// log updates as truncated strings
function truncateFn(updates) {
return JSON.parse(
JSON.stringify(updates, function (key, value) {
let len
if (typeof value === 'string' && (len = value.length) > 80) {
return (
value.substr(0, 32) +
`...(message of length ${len} truncated)...` +
value.substr(-32)
)
} else {
return value
}
})
)
}
HistoryLogger.addSerializers({
rawUpdate: truncateFn,
rawUpdates: truncateFn,
newUpdates: truncateFn,
lastUpdate: truncateFn,
})
export const app = express()
app.use(bodyParser.json())
app.use(bodyParser.urlencoded({ extended: true }))
app.use(Metrics.http.monitor(logger))
Router.initialize(app)
Metrics.injectMetricsRoute(app)
app.use(Validation.errorMiddleware)
app.use(function (error, req, res, next) {
if (error instanceof Errors.NotFoundError) {
res.sendStatus(404)
} else if (error instanceof Errors.BadRequestError) {
res.sendStatus(400)
} else if (error instanceof Errors.InconsistentChunkError) {
res.sendStatus(422)
} else if (error instanceof Errors.TooManyRequestsError) {
res.status(429).set('Retry-After', 300).end()
} else {
logger.error({ err: error, req }, error.message)
res.status(500).json({ message: 'an internal error occurred' })
}
})

View File

@@ -0,0 +1,253 @@
import { HistoryRanges } from '../../../document-updater/app/js/types'
import { LinkedFileData, RawOrigin } from 'overleaf-editor-core/lib/types'
export type Update =
| TextUpdate
| AddDocUpdate
| AddFileUpdate
| RenameUpdate
| DeleteCommentUpdate
| SetCommentStateUpdate
| SetFileMetadataOperation
| ResyncProjectStructureUpdate
| ResyncDocContentUpdate
export type ProjectStructureUpdate =
| AddDocUpdate
| AddFileUpdate
| RenameUpdate
| SetFileMetadataOperation
export type UpdateMeta = {
user_id: string
ts: number
source?: string
type?: string
origin?: RawOrigin
tc?: string
resync?: boolean
}
export type TextUpdate = {
doc: string
op: Op[]
v: number
meta: UpdateMeta & {
pathname: string
doc_length: number
doc_hash?: string
history_doc_length?: number
}
}
export type SetCommentStateUpdate = {
pathname: string
commentId: string
resolved: boolean
meta: UpdateMeta
}
export type SetFileMetadataOperation = {
pathname: string
meta: UpdateMeta
metadata: LinkedFileData | object
}
export type DeleteCommentUpdate = {
pathname: string
deleteComment: string
meta: UpdateMeta
}
type ProjectUpdateBase = {
version: string
projectHistoryId: string
meta: UpdateMeta
doc: string
}
export type AddDocUpdate = ProjectUpdateBase & {
pathname: string
docLines: string
ranges?: HistoryRanges
}
export type AddFileUpdate = ProjectUpdateBase & {
pathname: string
file: string
url: string
hash: string
createdBlob?: boolean
metadata?: LinkedFileData
}
export type RenameUpdate = ProjectUpdateBase & {
pathname: string
new_pathname: string
}
export type ResyncProjectStructureUpdate = {
resyncProjectStructure: {
docs: Doc[]
files: File[]
}
projectHistoryId: string
meta: {
ts: string
}
// optional fields for resyncProjectStructureOnly=true
resyncProjectStructureOnly?: boolean
_raw: string
}
export type ResyncDocContentUpdate = {
resyncDocContent: {
content: string
version: number
ranges?: Ranges
resolvedCommentIds?: string[]
}
projectHistoryId: string
path: string
doc: string
meta: {
ts: string
}
}
export type Op = RetainOp | InsertOp | DeleteOp | CommentOp
export type RetainOp = {
r: string
p: number
hpos?: number
tracking?: TrackingDirective
}
export type InsertOp = {
i: string
p: number
u?: boolean
hpos?: number
trackedDeleteRejection?: boolean
commentIds?: string[]
}
export type DeleteOp = {
d: string
p: number
u?: boolean
hpos?: number
trackedChanges?: TrackedChangesInsideDelete[]
}
export type TrackedChangesInsideDelete = {
type: 'insert' | 'delete'
offset: number
length: number
}
export type CommentOp = {
c: string
p: number
t: string
hpos?: number
hlen?: number
resolved?: boolean
}
export type UpdateWithBlob<T extends Update = Update> = {
update: T
blobHashes: T extends AddDocUpdate | AddFileUpdate
? {
file: string
ranges?: string
}
: never
}
export type TrackingProps = {
type: 'insert' | 'delete'
userId: string
ts: string
}
export type TrackingDirective = TrackingProps | { type: 'none' }
export type TrackingType = 'insert' | 'delete' | 'none'
export type RawScanOp =
| number
| string
| { r: number; tracking?: TrackingDirective }
| { i: string; tracking?: TrackingProps; commentIds?: string[] }
| { d: number }
export type TrackedChangeSnapshot = {
op: {
p: number
} & ({ d: string } | { i: string })
metadata: {
ts: string
user_id: string
}
}
export type CommentSnapshot = {
op: {
p: number
t: string
c: string
resolved: boolean
}
}
export type RangesSnapshot = {
changes: TrackedChangeSnapshot[]
comments: CommentSnapshot[]
}
export type Doc = {
doc: string
path: string
}
export type File = {
file: string
url?: string
path: string
_hash?: string
createdBlob?: boolean
metadata?: LinkedFileData
}
export type Entity = Doc | File
export type Ranges = {
comments?: Comment[]
changes?: TrackedChange[]
}
export type Comment = {
id: string
op: CommentOp
metadata: {
user_id: string
ts: string
}
}
export type TrackedChange = {
id: string
op: InsertOp | DeleteOp
metadata: {
user_id: string
ts: string
}
}
export type TrackedChangeTransition = {
pos: number
tracking: TrackingDirective
stage: 'persisted' | 'expected'
}