first commit
This commit is contained in:
129
services/project-history/app/js/BlobManager.js
Normal file
129
services/project-history/app/js/BlobManager.js
Normal file
@@ -0,0 +1,129 @@
|
||||
import _ from 'lodash'
|
||||
import async from 'async'
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import * as HistoryStoreManager from './HistoryStoreManager.js'
|
||||
import * as UpdateTranslator from './UpdateTranslator.js'
|
||||
|
||||
// avoid creating too many blobs at the same time
|
||||
const MAX_CONCURRENT_REQUESTS = 4
|
||||
// number of retry attempts for blob creation
|
||||
const RETRY_ATTEMPTS = 3
|
||||
// delay between retries
|
||||
const RETRY_INTERVAL = 100
|
||||
|
||||
export function createBlobsForUpdates(
|
||||
projectId,
|
||||
historyId,
|
||||
updates,
|
||||
extendLock,
|
||||
callback
|
||||
) {
|
||||
// async.mapLimit runs jobs in parallel and returns on the first error. It
|
||||
// doesn't wait for concurrent jobs to finish. We want to make sure all jobs
|
||||
// are wrapped within our lock so we collect the first error enountered here
|
||||
// and wait for all jobs to finish before returning the error.
|
||||
let firstBlobCreationError = null
|
||||
|
||||
function createBlobForUpdate(update, cb) {
|
||||
// For file additions we need to first create a blob in the history-store
|
||||
// with the contents of the file. Then we can create a change containing a
|
||||
// file addition operation which references the blob.
|
||||
//
|
||||
// To do this we decorate file creation updates with a blobHash
|
||||
if (!UpdateTranslator.isAddUpdate(update)) {
|
||||
return async.setImmediate(() => cb(null, { update }))
|
||||
}
|
||||
|
||||
let attempts = 0
|
||||
// Since we may be creating O(1000) blobs in an update, allow for the
|
||||
// occasional failure to prevent the whole update failing.
|
||||
let lastErr
|
||||
async.retry(
|
||||
{
|
||||
times: RETRY_ATTEMPTS,
|
||||
interval: RETRY_INTERVAL,
|
||||
},
|
||||
_cb => {
|
||||
attempts++
|
||||
if (attempts > 1) {
|
||||
logger.error(
|
||||
{
|
||||
err: lastErr,
|
||||
projectId,
|
||||
historyId,
|
||||
update: _.pick(
|
||||
update,
|
||||
'doc',
|
||||
'file',
|
||||
'hash',
|
||||
'createdBlob',
|
||||
'url'
|
||||
),
|
||||
attempts,
|
||||
},
|
||||
'previous createBlob attempt failed, retrying'
|
||||
)
|
||||
}
|
||||
// extend the lock for each file because large files may take a long time
|
||||
extendLock(err => {
|
||||
if (err) {
|
||||
lastErr = OError.tag(err)
|
||||
return _cb(lastErr)
|
||||
}
|
||||
HistoryStoreManager.createBlobForUpdate(
|
||||
projectId,
|
||||
historyId,
|
||||
update,
|
||||
(err, hashes) => {
|
||||
if (err) {
|
||||
lastErr = OError.tag(err, 'retry: error creating blob', {
|
||||
projectId,
|
||||
doc: update.doc,
|
||||
file: update.file,
|
||||
})
|
||||
_cb(lastErr)
|
||||
} else {
|
||||
_cb(null, hashes)
|
||||
}
|
||||
}
|
||||
)
|
||||
})
|
||||
},
|
||||
(error, blobHashes) => {
|
||||
if (error) {
|
||||
if (!firstBlobCreationError) {
|
||||
firstBlobCreationError = error
|
||||
}
|
||||
return cb(null, { update, blobHashes })
|
||||
}
|
||||
|
||||
extendLock(error => {
|
||||
if (error) {
|
||||
if (!firstBlobCreationError) {
|
||||
firstBlobCreationError = error
|
||||
}
|
||||
}
|
||||
cb(null, { update, blobHashes })
|
||||
})
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async.mapLimit(
|
||||
updates,
|
||||
MAX_CONCURRENT_REQUESTS,
|
||||
createBlobForUpdate,
|
||||
(unusedError, updatesWithBlobs) => {
|
||||
// As indicated by the name this is unexpected, but changes in the future
|
||||
// could cause it to be set and ignoring it would be unexpected
|
||||
if (unusedError) {
|
||||
return callback(unusedError)
|
||||
}
|
||||
if (firstBlobCreationError) {
|
||||
return callback(firstBlobCreationError)
|
||||
}
|
||||
callback(null, updatesWithBlobs)
|
||||
}
|
||||
)
|
||||
}
|
626
services/project-history/app/js/ChunkTranslator.js
Normal file
626
services/project-history/app/js/ChunkTranslator.js
Normal file
@@ -0,0 +1,626 @@
|
||||
import _ from 'lodash'
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import * as HistoryStoreManager from './HistoryStoreManager.js'
|
||||
import * as WebApiManager from './WebApiManager.js'
|
||||
import * as Errors from './Errors.js'
|
||||
import {
|
||||
TextOperation,
|
||||
InsertOp,
|
||||
RemoveOp,
|
||||
RetainOp,
|
||||
Range,
|
||||
TrackedChangeList,
|
||||
} from 'overleaf-editor-core'
|
||||
|
||||
/**
|
||||
* @import { RawEditOperation, TrackedChangeRawData } from 'overleaf-editor-core/lib/types'
|
||||
*/
|
||||
|
||||
export function convertToSummarizedUpdates(chunk, callback) {
|
||||
const version = chunk.chunk.startVersion
|
||||
const { files } = chunk.chunk.history.snapshot
|
||||
const builder = new UpdateSetBuilder(version, files)
|
||||
|
||||
for (const change of chunk.chunk.history.changes) {
|
||||
try {
|
||||
builder.applyChange(change)
|
||||
} catch (error1) {
|
||||
const error = error1
|
||||
return callback(error)
|
||||
}
|
||||
}
|
||||
callback(null, builder.summarizedUpdates)
|
||||
}
|
||||
|
||||
export function convertToDiffUpdates(
|
||||
projectId,
|
||||
chunk,
|
||||
pathname,
|
||||
fromVersion,
|
||||
toVersion,
|
||||
callback
|
||||
) {
|
||||
let error
|
||||
let version = chunk.chunk.startVersion
|
||||
const { files } = chunk.chunk.history.snapshot
|
||||
const builder = new UpdateSetBuilder(version, files)
|
||||
|
||||
let file = null
|
||||
for (const change of chunk.chunk.history.changes) {
|
||||
// Because we're referencing by pathname, which can change, we
|
||||
// want to get the last file in the range fromVersion:toVersion
|
||||
// that has the pathname we want. Note that this might not exist yet
|
||||
// at fromVersion, so we'll just settle for the last existing one we find
|
||||
// after that.
|
||||
if (fromVersion <= version && version <= toVersion) {
|
||||
const currentFile = builder.getFile(pathname)
|
||||
if (currentFile) {
|
||||
file = currentFile
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
builder.applyChange(change)
|
||||
} catch (error1) {
|
||||
error = error1
|
||||
return callback(error)
|
||||
}
|
||||
version += 1
|
||||
}
|
||||
// Versions act as fence posts, with updates taking us from one to another,
|
||||
// so we also need to check after the final update, when we're at the last version.
|
||||
if (fromVersion <= version && version <= toVersion) {
|
||||
const currentFile = builder.getFile(pathname)
|
||||
if (currentFile) {
|
||||
file = currentFile
|
||||
}
|
||||
}
|
||||
|
||||
// return an empty diff if the file was flagged as missing with an explicit null
|
||||
if (builder.getFile(pathname) === null) {
|
||||
return callback(null, { initialContent: '', updates: [] })
|
||||
}
|
||||
|
||||
if (file == null) {
|
||||
error = new Errors.NotFoundError(
|
||||
`pathname '${pathname}' not found in range`
|
||||
)
|
||||
return callback(error)
|
||||
}
|
||||
|
||||
WebApiManager.getHistoryId(projectId, (err, historyId) => {
|
||||
if (err) {
|
||||
return callback(err)
|
||||
}
|
||||
file.getDiffUpdates(historyId, fromVersion, toVersion, callback)
|
||||
})
|
||||
}
|
||||
|
||||
class UpdateSetBuilder {
|
||||
constructor(startVersion, files) {
|
||||
this.version = startVersion
|
||||
this.summarizedUpdates = []
|
||||
|
||||
this.files = Object.create(null)
|
||||
for (const pathname in files) {
|
||||
// initialize file from snapshot
|
||||
const data = files[pathname]
|
||||
this.files[pathname] = new File(pathname, data, startVersion)
|
||||
}
|
||||
}
|
||||
|
||||
getFile(pathname) {
|
||||
return this.files[pathname]
|
||||
}
|
||||
|
||||
applyChange(change) {
|
||||
const timestamp = new Date(change.timestamp)
|
||||
let authors = _.map(change.authors, id => {
|
||||
if (id == null) {
|
||||
return null
|
||||
}
|
||||
return id
|
||||
})
|
||||
authors = authors.concat(change.v2Authors || [])
|
||||
this.currentUpdate = {
|
||||
meta: {
|
||||
users: authors,
|
||||
start_ts: timestamp.getTime(),
|
||||
end_ts: timestamp.getTime(),
|
||||
},
|
||||
v: this.version,
|
||||
pathnames: new Set([]),
|
||||
project_ops: [],
|
||||
}
|
||||
if (change.origin) {
|
||||
this.currentUpdate.meta.origin = change.origin
|
||||
}
|
||||
|
||||
for (const op of change.operations) {
|
||||
this.applyOperation(op, timestamp, authors, change.origin)
|
||||
}
|
||||
|
||||
this.currentUpdate.pathnames = Array.from(this.currentUpdate.pathnames)
|
||||
this.summarizedUpdates.push(this.currentUpdate)
|
||||
|
||||
this.version += 1
|
||||
}
|
||||
|
||||
applyOperation(op, timestamp, authors, origin) {
|
||||
if (UpdateSetBuilder._isTextOperation(op)) {
|
||||
this.applyTextOperation(op, timestamp, authors, origin)
|
||||
} else if (UpdateSetBuilder._isRenameOperation(op)) {
|
||||
this.applyRenameOperation(op, timestamp, authors)
|
||||
} else if (UpdateSetBuilder._isRemoveFileOperation(op)) {
|
||||
this.applyRemoveFileOperation(op, timestamp, authors)
|
||||
} else if (UpdateSetBuilder._isAddFileOperation(op)) {
|
||||
this.applyAddFileOperation(op, timestamp, authors)
|
||||
}
|
||||
}
|
||||
|
||||
applyTextOperation(operation, timestamp, authors, origin) {
|
||||
const { pathname } = operation
|
||||
if (pathname === '') {
|
||||
// this shouldn't happen, but we continue to allow the user to see the history
|
||||
logger.warn(
|
||||
{ operation, timestamp, authors },
|
||||
'pathname is empty for text operation'
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
const file = this.files[pathname]
|
||||
if (file == null) {
|
||||
// this shouldn't happen, but we continue to allow the user to see the history
|
||||
logger.warn(
|
||||
{ operation, timestamp, authors },
|
||||
'file is missing for text operation'
|
||||
)
|
||||
this.files[pathname] = null // marker for a missing file
|
||||
return
|
||||
}
|
||||
|
||||
file.applyTextOperation(authors, timestamp, this.version, operation, origin)
|
||||
this.currentUpdate.pathnames.add(pathname)
|
||||
}
|
||||
|
||||
applyRenameOperation(operation, timestamp, authors) {
|
||||
const { pathname, newPathname } = operation
|
||||
const file = this.files[pathname]
|
||||
if (file == null) {
|
||||
// this shouldn't happen, but we continue to allow the user to see the history
|
||||
logger.warn(
|
||||
{ operation, timestamp, authors },
|
||||
'file is missing for rename operation'
|
||||
)
|
||||
this.files[pathname] = null // marker for a missing file
|
||||
return
|
||||
}
|
||||
|
||||
file.rename(newPathname)
|
||||
delete this.files[pathname]
|
||||
this.files[newPathname] = file
|
||||
|
||||
this.currentUpdate.project_ops.push({
|
||||
rename: { pathname, newPathname },
|
||||
})
|
||||
}
|
||||
|
||||
applyAddFileOperation(operation, timestamp, authors) {
|
||||
const { pathname } = operation
|
||||
// add file
|
||||
this.files[pathname] = new File(pathname, operation.file, this.version)
|
||||
|
||||
this.currentUpdate.project_ops.push({ add: { pathname } })
|
||||
}
|
||||
|
||||
applyRemoveFileOperation(operation, timestamp, authors) {
|
||||
const { pathname } = operation
|
||||
const file = this.files[pathname]
|
||||
if (file == null) {
|
||||
// this shouldn't happen, but we continue to allow the user to see the history
|
||||
logger.warn(
|
||||
{ operation, timestamp, authors },
|
||||
'pathname not found when removing file'
|
||||
)
|
||||
this.files[pathname] = null // marker for a missing file
|
||||
return
|
||||
}
|
||||
|
||||
delete this.files[pathname]
|
||||
|
||||
this.currentUpdate.project_ops.push({ remove: { pathname } })
|
||||
}
|
||||
|
||||
static _isTextOperation(op) {
|
||||
return Object.prototype.hasOwnProperty.call(op, 'textOperation')
|
||||
}
|
||||
|
||||
static _isRenameOperation(op) {
|
||||
return (
|
||||
Object.prototype.hasOwnProperty.call(op, 'newPathname') &&
|
||||
op.newPathname !== ''
|
||||
)
|
||||
}
|
||||
|
||||
static _isRemoveFileOperation(op) {
|
||||
return (
|
||||
Object.prototype.hasOwnProperty.call(op, 'newPathname') &&
|
||||
op.newPathname === ''
|
||||
)
|
||||
}
|
||||
|
||||
static _isAddFileOperation(op) {
|
||||
return Object.prototype.hasOwnProperty.call(op, 'file')
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} content
|
||||
* @param {TrackedChangeList} trackedChanges
|
||||
* @returns {string}
|
||||
*/
|
||||
function removeTrackedDeletesFromString(content, trackedChanges) {
|
||||
let result = ''
|
||||
let cursor = 0
|
||||
const trackedDeletes = trackedChanges
|
||||
.asSorted()
|
||||
.filter(tc => tc.tracking.type === 'delete')
|
||||
for (const trackedChange of trackedDeletes) {
|
||||
if (cursor < trackedChange.range.start) {
|
||||
result += content.slice(cursor, trackedChange.range.start)
|
||||
}
|
||||
// skip the tracked change itself
|
||||
cursor = trackedChange.range.end
|
||||
}
|
||||
result += content.slice(cursor)
|
||||
return result
|
||||
}
|
||||
|
||||
class File {
|
||||
constructor(pathname, snapshot, initialVersion) {
|
||||
this.pathname = pathname
|
||||
this.snapshot = snapshot
|
||||
this.initialVersion = initialVersion
|
||||
this.operations = []
|
||||
}
|
||||
|
||||
applyTextOperation(authors, timestamp, version, operation, origin) {
|
||||
this.operations.push({ authors, timestamp, version, operation, origin })
|
||||
}
|
||||
|
||||
rename(pathname) {
|
||||
this.pathname = pathname
|
||||
}
|
||||
|
||||
getDiffUpdates(historyId, fromVersion, toVersion, callback) {
|
||||
if (this.snapshot.stringLength == null) {
|
||||
// Binary file
|
||||
return callback(null, { binary: true })
|
||||
}
|
||||
this._loadContentAndRanges(historyId, (error, content, ranges) => {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
const trackedChanges = TrackedChangeList.fromRaw(
|
||||
ranges?.trackedChanges || []
|
||||
)
|
||||
/** @type {string | undefined} */
|
||||
let initialContent
|
||||
const updates = []
|
||||
|
||||
for (const operationInfo of this.operations) {
|
||||
if (!('textOperation' in operationInfo.operation)) {
|
||||
// We only care about text operations
|
||||
continue
|
||||
}
|
||||
const { authors, timestamp, version, operation } = operationInfo
|
||||
// Set the initialContent to the latest version we have before the diff
|
||||
// begins. 'version' here refers to the document version as we are
|
||||
// applying the updates. So we store the content *before* applying the
|
||||
// updates.
|
||||
if (version >= fromVersion && initialContent === undefined) {
|
||||
initialContent = removeTrackedDeletesFromString(
|
||||
content,
|
||||
trackedChanges
|
||||
)
|
||||
}
|
||||
|
||||
let ops
|
||||
;({ content, ops } = this._convertTextOperation(
|
||||
content,
|
||||
operation,
|
||||
trackedChanges
|
||||
))
|
||||
|
||||
// We only need to return the updates between fromVersion and toVersion
|
||||
if (fromVersion <= version && version < toVersion) {
|
||||
const update = {
|
||||
meta: {
|
||||
users: authors,
|
||||
start_ts: timestamp.getTime(),
|
||||
end_ts: timestamp.getTime(),
|
||||
},
|
||||
v: version,
|
||||
op: ops,
|
||||
}
|
||||
if (operationInfo.origin) {
|
||||
update.meta.origin = operationInfo.origin
|
||||
}
|
||||
updates.push(update)
|
||||
}
|
||||
}
|
||||
|
||||
if (initialContent === undefined) {
|
||||
initialContent = removeTrackedDeletesFromString(content, trackedChanges)
|
||||
}
|
||||
callback(null, { initialContent, updates })
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} initialContent
|
||||
* @param {RawEditOperation} operation
|
||||
* @param {TrackedChangeList} trackedChanges
|
||||
*/
|
||||
_convertTextOperation(initialContent, operation, trackedChanges) {
|
||||
const textOp = TextOperation.fromJSON(operation)
|
||||
const textUpdateBuilder = new TextUpdateBuilder(
|
||||
initialContent,
|
||||
trackedChanges
|
||||
)
|
||||
for (const op of textOp.ops) {
|
||||
textUpdateBuilder.applyOp(op)
|
||||
}
|
||||
textUpdateBuilder.finish()
|
||||
return {
|
||||
content: textUpdateBuilder.result,
|
||||
ops: textUpdateBuilder.changes,
|
||||
}
|
||||
}
|
||||
|
||||
_loadContentAndRanges(historyId, callback) {
|
||||
HistoryStoreManager.getProjectBlob(
|
||||
historyId,
|
||||
this.snapshot.hash,
|
||||
(err, content) => {
|
||||
if (err) {
|
||||
return callback(err)
|
||||
}
|
||||
if (this.snapshot.rangesHash) {
|
||||
HistoryStoreManager.getProjectBlob(
|
||||
historyId,
|
||||
this.snapshot.rangesHash,
|
||||
(err, ranges) => {
|
||||
if (err) {
|
||||
return callback(err)
|
||||
}
|
||||
return callback(null, content, JSON.parse(ranges))
|
||||
}
|
||||
)
|
||||
} else {
|
||||
return callback(null, content, undefined)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
class TextUpdateBuilder {
|
||||
/**
|
||||
*
|
||||
* @param {string} source
|
||||
* @param {TrackedChangeList} ranges
|
||||
*/
|
||||
constructor(source, ranges) {
|
||||
this.trackedChanges = ranges
|
||||
this.source = source
|
||||
this.sourceCursor = 0
|
||||
this.result = ''
|
||||
/** @type {({i: string, p: number} | {d: string, p: number})[]} */
|
||||
this.changes = []
|
||||
}
|
||||
|
||||
applyOp(op) {
|
||||
if (op instanceof RetainOp) {
|
||||
const length = this.result.length
|
||||
this.applyRetain(op)
|
||||
this.trackedChanges.applyRetain(length, op.length, {
|
||||
tracking: op.tracking,
|
||||
})
|
||||
}
|
||||
|
||||
if (op instanceof InsertOp) {
|
||||
const length = this.result.length
|
||||
this.applyInsert(op)
|
||||
this.trackedChanges.applyInsert(length, op.insertion, {
|
||||
tracking: op.tracking,
|
||||
})
|
||||
}
|
||||
|
||||
if (op instanceof RemoveOp) {
|
||||
const length = this.result.length
|
||||
this.applyDelete(op)
|
||||
this.trackedChanges.applyDelete(length, op.length)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {RetainOp} retain
|
||||
*/
|
||||
applyRetain(retain) {
|
||||
const resultRetentionRange = new Range(this.result.length, retain.length)
|
||||
const sourceRetentionRange = new Range(this.sourceCursor, retain.length)
|
||||
|
||||
let scanCursor = this.result.length
|
||||
if (retain.tracking) {
|
||||
// We are modifying existing tracked deletes. We need to treat removal
|
||||
// (type insert/none) of a tracked delete as an insertion. Similarly, any
|
||||
// range we introduce as a tracked deletion must be reported as a deletion.
|
||||
const trackedDeletes = this.trackedChanges
|
||||
.asSorted()
|
||||
.filter(
|
||||
tc =>
|
||||
tc.tracking.type === 'delete' &&
|
||||
tc.range.overlaps(resultRetentionRange)
|
||||
)
|
||||
|
||||
const sourceOffset = this.sourceCursor - this.result.length
|
||||
for (const trackedDelete of trackedDeletes) {
|
||||
const resultTrackedDelete = trackedDelete.range
|
||||
const sourceTrackedDelete = trackedDelete.range.moveBy(sourceOffset)
|
||||
|
||||
if (scanCursor < resultTrackedDelete.start) {
|
||||
if (retain.tracking.type === 'delete') {
|
||||
this.changes.push({
|
||||
d: this.source.slice(
|
||||
this.sourceCursor,
|
||||
sourceTrackedDelete.start
|
||||
),
|
||||
p: this.result.length,
|
||||
})
|
||||
}
|
||||
this.result += this.source.slice(
|
||||
this.sourceCursor,
|
||||
sourceTrackedDelete.start
|
||||
)
|
||||
scanCursor = resultTrackedDelete.start
|
||||
this.sourceCursor = sourceTrackedDelete.start
|
||||
}
|
||||
const endOfInsertionResult = Math.min(
|
||||
resultTrackedDelete.end,
|
||||
resultRetentionRange.end
|
||||
)
|
||||
const endOfInsertionSource = Math.min(
|
||||
sourceTrackedDelete.end,
|
||||
sourceRetentionRange.end
|
||||
)
|
||||
const text = this.source.slice(this.sourceCursor, endOfInsertionSource)
|
||||
if (
|
||||
retain.tracking.type === 'none' ||
|
||||
retain.tracking.type === 'insert'
|
||||
) {
|
||||
this.changes.push({
|
||||
i: text,
|
||||
p: this.result.length,
|
||||
})
|
||||
}
|
||||
this.result += text
|
||||
// skip the tracked delete itself
|
||||
scanCursor = endOfInsertionResult
|
||||
this.sourceCursor = endOfInsertionSource
|
||||
|
||||
if (scanCursor >= resultRetentionRange.end) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if (scanCursor < resultRetentionRange.end) {
|
||||
// The last region is not a tracked delete. But we should still handle
|
||||
// a new tracked delete as a deletion.
|
||||
const text = this.source.slice(
|
||||
this.sourceCursor,
|
||||
sourceRetentionRange.end
|
||||
)
|
||||
if (retain.tracking?.type === 'delete') {
|
||||
this.changes.push({
|
||||
d: text,
|
||||
p: this.result.length,
|
||||
})
|
||||
}
|
||||
this.result += text
|
||||
}
|
||||
this.sourceCursor = sourceRetentionRange.end
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {InsertOp} insert
|
||||
*/
|
||||
applyInsert(insert) {
|
||||
if (insert.tracking?.type !== 'delete') {
|
||||
// Skip tracked deletions
|
||||
this.changes.push({
|
||||
i: insert.insertion,
|
||||
p: this.result.length,
|
||||
})
|
||||
}
|
||||
this.result += insert.insertion
|
||||
// The source cursor doesn't advance
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {RemoveOp} deletion
|
||||
*/
|
||||
applyDelete(deletion) {
|
||||
const sourceDeletionRange = new Range(this.sourceCursor, deletion.length)
|
||||
const resultDeletionRange = new Range(this.result.length, deletion.length)
|
||||
|
||||
const trackedDeletes = this.trackedChanges
|
||||
.asSorted()
|
||||
.filter(
|
||||
tc =>
|
||||
tc.tracking.type === 'delete' &&
|
||||
tc.range.overlaps(resultDeletionRange)
|
||||
)
|
||||
.sort((a, b) => a.range.start - b.range.start)
|
||||
|
||||
let scanCursor = this.result.length
|
||||
const sourceOffset = this.sourceCursor - this.result.length
|
||||
|
||||
for (const trackedDelete of trackedDeletes) {
|
||||
const resultTrackDeleteRange = trackedDelete.range
|
||||
const sourceTrackDeleteRange = trackedDelete.range.moveBy(sourceOffset)
|
||||
|
||||
if (scanCursor < resultTrackDeleteRange.start) {
|
||||
this.changes.push({
|
||||
d: this.source.slice(this.sourceCursor, sourceTrackDeleteRange.start),
|
||||
p: this.result.length,
|
||||
})
|
||||
}
|
||||
// skip the tracked delete itself
|
||||
scanCursor = Math.min(resultTrackDeleteRange.end, resultDeletionRange.end)
|
||||
this.sourceCursor = Math.min(
|
||||
sourceTrackDeleteRange.end,
|
||||
sourceDeletionRange.end
|
||||
)
|
||||
|
||||
if (scanCursor >= resultDeletionRange.end) {
|
||||
break
|
||||
}
|
||||
}
|
||||
if (scanCursor < resultDeletionRange.end) {
|
||||
this.changes.push({
|
||||
d: this.source.slice(this.sourceCursor, sourceDeletionRange.end),
|
||||
p: this.result.length,
|
||||
})
|
||||
}
|
||||
this.sourceCursor = sourceDeletionRange.end
|
||||
}
|
||||
|
||||
finish() {
|
||||
if (this.sourceCursor < this.source.length) {
|
||||
this.result += this.source.slice(this.sourceCursor)
|
||||
}
|
||||
for (const op of this.changes) {
|
||||
if ('p' in op && typeof op.p === 'number') {
|
||||
// Maybe we have to move the position of the deletion to account for
|
||||
// tracked changes that we're hiding in the UI.
|
||||
op.p -= this.trackedChanges
|
||||
.asSorted()
|
||||
.filter(tc => tc.tracking.type === 'delete' && tc.range.start < op.p)
|
||||
.map(tc => {
|
||||
if (tc.range.end < op.p) {
|
||||
return tc.range.length
|
||||
}
|
||||
return op.p - tc.range.start
|
||||
})
|
||||
.reduce((a, b) => a + b, 0)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
274
services/project-history/app/js/DiffGenerator.js
Normal file
274
services/project-history/app/js/DiffGenerator.js
Normal file
@@ -0,0 +1,274 @@
|
||||
import _ from 'lodash'
|
||||
import OError from '@overleaf/o-error'
|
||||
|
||||
export class ConsistencyError extends OError {}
|
||||
|
||||
/**
|
||||
* Container for functions that need to be mocked in tests
|
||||
*
|
||||
* TODO: Rewrite tests in terms of exported functions only
|
||||
*/
|
||||
export const _mocks = {}
|
||||
|
||||
export function buildDiff(initialContent, updates) {
|
||||
let diff = [{ u: initialContent }]
|
||||
for (const update of updates) {
|
||||
diff = applyUpdateToDiff(diff, update)
|
||||
}
|
||||
diff = compressDiff(diff)
|
||||
return diff
|
||||
}
|
||||
|
||||
_mocks.compressDiff = diff => {
|
||||
const newDiff = []
|
||||
for (const part of diff) {
|
||||
const users = part.meta?.users ?? []
|
||||
|
||||
if (part.meta?.origin?.kind === 'history-resync') {
|
||||
// Skip history resync updates. Inserts are converted to unchanged text
|
||||
// and deletes are skipped, so that they effectively don't appear in the
|
||||
// diff.
|
||||
if (part.u != null) {
|
||||
newDiff.push(part)
|
||||
} else if (part.i != null) {
|
||||
newDiff.push({ u: part.i })
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if (newDiff.length === 0) {
|
||||
// If we haven't seen other parts yet, we have nothing to merge.
|
||||
newDiff.push(part)
|
||||
continue
|
||||
}
|
||||
|
||||
const lastPart = newDiff[newDiff.length - 1]
|
||||
const lastUsers = lastPart.meta?.users ?? []
|
||||
const usersNotInBothParts = _.xor(users, lastUsers)
|
||||
|
||||
if (usersNotInBothParts.length > 0) {
|
||||
// If the set of users in the last part and this part are not the same, we
|
||||
// can't merge.
|
||||
newDiff.push(part)
|
||||
continue
|
||||
}
|
||||
|
||||
if (lastPart.i != null && part.i != null) {
|
||||
// Merge two inserts
|
||||
lastPart.i += part.i
|
||||
lastPart.meta.start_ts = Math.min(
|
||||
lastPart.meta.start_ts,
|
||||
part.meta.start_ts
|
||||
)
|
||||
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts)
|
||||
} else if (lastPart.d != null && part.d != null) {
|
||||
// Merge two deletes
|
||||
lastPart.d += part.d
|
||||
lastPart.meta.start_ts = Math.min(
|
||||
lastPart.meta.start_ts,
|
||||
part.meta.start_ts
|
||||
)
|
||||
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts)
|
||||
} else {
|
||||
newDiff.push(part)
|
||||
}
|
||||
}
|
||||
return newDiff
|
||||
}
|
||||
|
||||
export function compressDiff(...args) {
|
||||
return _mocks.compressDiff(...args)
|
||||
}
|
||||
|
||||
export function applyOpToDiff(diff, op, meta) {
|
||||
let consumedDiff
|
||||
|
||||
let remainingDiff = diff.slice()
|
||||
;({ consumedDiff, remainingDiff } = _consumeToOffset(remainingDiff, op.p))
|
||||
const newDiff = consumedDiff
|
||||
|
||||
if (op.i != null) {
|
||||
newDiff.push({
|
||||
i: op.i,
|
||||
meta,
|
||||
})
|
||||
} else if (op.d != null) {
|
||||
;({ consumedDiff, remainingDiff } = _consumeDiffAffectedByDeleteOp(
|
||||
remainingDiff,
|
||||
op,
|
||||
meta
|
||||
))
|
||||
newDiff.push(...(consumedDiff || []))
|
||||
}
|
||||
|
||||
newDiff.push(...(remainingDiff || []))
|
||||
|
||||
return newDiff
|
||||
}
|
||||
|
||||
_mocks.applyUpdateToDiff = (diff, update) => {
|
||||
for (const op of update.op) {
|
||||
if (op.broken !== true) {
|
||||
diff = applyOpToDiff(diff, op, update.meta)
|
||||
}
|
||||
}
|
||||
return diff
|
||||
}
|
||||
|
||||
export function applyUpdateToDiff(...args) {
|
||||
return _mocks.applyUpdateToDiff(...args)
|
||||
}
|
||||
|
||||
function _consumeToOffset(remainingDiff, totalOffset) {
|
||||
let part
|
||||
const consumedDiff = []
|
||||
let position = 0
|
||||
while ((part = remainingDiff.shift())) {
|
||||
const length = _getLengthOfDiffPart(part)
|
||||
if (part.d != null) {
|
||||
consumedDiff.push(part)
|
||||
} else if (position + length >= totalOffset) {
|
||||
const partOffset = totalOffset - position
|
||||
if (partOffset > 0) {
|
||||
consumedDiff.push(_slicePart(part, 0, partOffset))
|
||||
}
|
||||
if (partOffset < length) {
|
||||
remainingDiff.unshift(_slicePart(part, partOffset))
|
||||
}
|
||||
break
|
||||
} else {
|
||||
position += length
|
||||
consumedDiff.push(part)
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
consumedDiff,
|
||||
remainingDiff,
|
||||
}
|
||||
}
|
||||
|
||||
function _consumeDiffAffectedByDeleteOp(remainingDiff, deleteOp, meta) {
|
||||
const consumedDiff = []
|
||||
let remainingOp = deleteOp
|
||||
while (remainingOp && remainingDiff.length > 0) {
|
||||
let newPart
|
||||
;({ newPart, remainingDiff, remainingOp } = _consumeDeletedPart(
|
||||
remainingDiff,
|
||||
remainingOp,
|
||||
meta
|
||||
))
|
||||
if (newPart != null) {
|
||||
consumedDiff.push(newPart)
|
||||
}
|
||||
}
|
||||
return {
|
||||
consumedDiff,
|
||||
remainingDiff,
|
||||
}
|
||||
}
|
||||
|
||||
function _consumeDeletedPart(remainingDiff, op, meta) {
|
||||
let deletedContent, newPart, remainingOp
|
||||
const part = remainingDiff.shift()
|
||||
const partLength = _getLengthOfDiffPart(part)
|
||||
|
||||
if (part.d != null) {
|
||||
// Skip existing deletes
|
||||
remainingOp = op
|
||||
newPart = part
|
||||
} else if (partLength > op.d.length) {
|
||||
// Only the first bit of the part has been deleted
|
||||
const remainingPart = _slicePart(part, op.d.length)
|
||||
remainingDiff.unshift(remainingPart)
|
||||
|
||||
deletedContent = _getContentOfPart(part).slice(0, op.d.length)
|
||||
if (deletedContent !== op.d) {
|
||||
throw new ConsistencyError(
|
||||
`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`
|
||||
)
|
||||
}
|
||||
|
||||
if (part.u != null) {
|
||||
newPart = {
|
||||
d: op.d,
|
||||
meta,
|
||||
}
|
||||
} else if (part.i != null) {
|
||||
newPart = null
|
||||
}
|
||||
|
||||
remainingOp = null
|
||||
} else if (partLength === op.d.length) {
|
||||
// The entire part has been deleted, but it is the last part
|
||||
|
||||
deletedContent = _getContentOfPart(part)
|
||||
if (deletedContent !== op.d) {
|
||||
throw new ConsistencyError(
|
||||
`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`
|
||||
)
|
||||
}
|
||||
|
||||
if (part.u != null) {
|
||||
newPart = {
|
||||
d: op.d,
|
||||
meta,
|
||||
}
|
||||
} else if (part.i != null) {
|
||||
newPart = null
|
||||
}
|
||||
|
||||
remainingOp = null
|
||||
} else if (partLength < op.d.length) {
|
||||
// The entire part has been deleted and there is more
|
||||
|
||||
deletedContent = _getContentOfPart(part)
|
||||
const opContent = op.d.slice(0, deletedContent.length)
|
||||
if (deletedContent !== opContent) {
|
||||
throw new ConsistencyError(
|
||||
`deleted content, '${deletedContent}', does not match delete op, '${opContent}'`
|
||||
)
|
||||
}
|
||||
|
||||
if (part.u) {
|
||||
newPart = {
|
||||
d: part.u,
|
||||
meta,
|
||||
}
|
||||
} else if (part.i != null) {
|
||||
newPart = null
|
||||
}
|
||||
|
||||
remainingOp = {
|
||||
p: op.p,
|
||||
d: op.d.slice(_getLengthOfDiffPart(part)),
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
newPart,
|
||||
remainingDiff,
|
||||
remainingOp,
|
||||
}
|
||||
}
|
||||
|
||||
function _slicePart(basePart, from, to) {
|
||||
let part
|
||||
if (basePart.u != null) {
|
||||
part = { u: basePart.u.slice(from, to) }
|
||||
} else if (basePart.i != null) {
|
||||
part = { i: basePart.i.slice(from, to) }
|
||||
}
|
||||
if (basePart.meta != null) {
|
||||
part.meta = basePart.meta
|
||||
}
|
||||
return part
|
||||
}
|
||||
|
||||
function _getLengthOfDiffPart(part) {
|
||||
return (part.u || part.d || part.i || '').length
|
||||
}
|
||||
|
||||
function _getContentOfPart(part) {
|
||||
return part.u || part.d || part.i || ''
|
||||
}
|
240
services/project-history/app/js/DiffManager.js
Normal file
240
services/project-history/app/js/DiffManager.js
Normal file
@@ -0,0 +1,240 @@
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import async from 'async'
|
||||
import * as DiffGenerator from './DiffGenerator.js'
|
||||
import * as FileTreeDiffGenerator from './FileTreeDiffGenerator.js'
|
||||
import * as UpdatesProcessor from './UpdatesProcessor.js'
|
||||
import * as HistoryStoreManager from './HistoryStoreManager.js'
|
||||
import * as WebApiManager from './WebApiManager.js'
|
||||
import * as ChunkTranslator from './ChunkTranslator.js'
|
||||
import * as Errors from './Errors.js'
|
||||
|
||||
let MAX_CHUNK_REQUESTS = 10
|
||||
|
||||
/**
|
||||
* Container for functions that need to be mocked in tests
|
||||
*
|
||||
* TODO: Rewrite tests in terms of exported functions only
|
||||
*/
|
||||
export const _mocks = {}
|
||||
|
||||
export function getDiff(projectId, pathname, fromVersion, toVersion, callback) {
|
||||
UpdatesProcessor.processUpdatesForProject(projectId, error => {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
_getProjectUpdatesBetweenVersions(
|
||||
projectId,
|
||||
pathname,
|
||||
fromVersion,
|
||||
toVersion,
|
||||
(error, result) => {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
const { binary, initialContent, updates } = result
|
||||
let diff
|
||||
if (binary) {
|
||||
diff = { binary: true }
|
||||
} else {
|
||||
try {
|
||||
diff = DiffGenerator.buildDiff(initialContent, updates)
|
||||
} catch (err) {
|
||||
return callback(
|
||||
OError.tag(err, 'failed to build diff', {
|
||||
projectId,
|
||||
pathname,
|
||||
fromVersion,
|
||||
toVersion,
|
||||
})
|
||||
)
|
||||
}
|
||||
}
|
||||
callback(null, diff)
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
export function getFileTreeDiff(projectId, fromVersion, toVersion, callback) {
|
||||
UpdatesProcessor.processUpdatesForProject(projectId, error => {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
_getChunksAsSingleChunk(
|
||||
projectId,
|
||||
fromVersion,
|
||||
toVersion,
|
||||
(error, chunk) => {
|
||||
let diff
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
try {
|
||||
diff = FileTreeDiffGenerator.buildDiff(chunk, fromVersion, toVersion)
|
||||
} catch (error1) {
|
||||
error = error1
|
||||
if (error instanceof Errors.InconsistentChunkError) {
|
||||
return callback(error)
|
||||
} else {
|
||||
throw OError.tag(error)
|
||||
}
|
||||
}
|
||||
callback(null, diff)
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
export function _getChunksAsSingleChunk(
|
||||
projectId,
|
||||
fromVersion,
|
||||
toVersion,
|
||||
callback
|
||||
) {
|
||||
logger.debug(
|
||||
{ projectId, fromVersion, toVersion },
|
||||
'[_getChunksAsSingleChunk] getting chunks'
|
||||
)
|
||||
_getChunks(projectId, fromVersion, toVersion, (error, chunks) => {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
logger.debug(
|
||||
{ projectId, fromVersion, toVersion, chunks },
|
||||
'[_getChunksAsSingleChunk] got chunks'
|
||||
)
|
||||
const chunk = _concatChunks(chunks)
|
||||
callback(null, chunk)
|
||||
})
|
||||
}
|
||||
|
||||
_mocks._getProjectUpdatesBetweenVersions = (
|
||||
projectId,
|
||||
pathname,
|
||||
fromVersion,
|
||||
toVersion,
|
||||
callback
|
||||
) => {
|
||||
_getChunksAsSingleChunk(projectId, fromVersion, toVersion, (error, chunk) => {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
logger.debug(
|
||||
{ projectId, pathname, fromVersion, toVersion, chunk },
|
||||
'[_getProjectUpdatesBetweenVersions] concatted chunk'
|
||||
)
|
||||
ChunkTranslator.convertToDiffUpdates(
|
||||
projectId,
|
||||
chunk,
|
||||
pathname,
|
||||
fromVersion,
|
||||
toVersion,
|
||||
callback
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
export function _getProjectUpdatesBetweenVersions(...args) {
|
||||
_mocks._getProjectUpdatesBetweenVersions(...args)
|
||||
}
|
||||
|
||||
_mocks._getChunks = (projectId, fromVersion, toVersion, callback) => {
|
||||
let chunksRequested = 0
|
||||
let lastChunkStartVersion = toVersion
|
||||
const chunks = []
|
||||
|
||||
function shouldRequestAnotherChunk(cb) {
|
||||
const stillUnderChunkLimit = chunksRequested < MAX_CHUNK_REQUESTS
|
||||
const stillNeedVersions = fromVersion < lastChunkStartVersion
|
||||
const stillSaneStartVersion = lastChunkStartVersion > 0
|
||||
logger.debug(
|
||||
{
|
||||
projectId,
|
||||
stillUnderChunkLimit,
|
||||
stillNeedVersions,
|
||||
stillSaneStartVersion,
|
||||
fromVersion,
|
||||
lastChunkStartVersion,
|
||||
chunksRequested,
|
||||
},
|
||||
'[_getChunks.shouldRequestAnotherChunk]'
|
||||
)
|
||||
return cb(
|
||||
null,
|
||||
stillUnderChunkLimit && stillNeedVersions && stillSaneStartVersion
|
||||
)
|
||||
}
|
||||
|
||||
function getNextChunk(cb) {
|
||||
logger.debug(
|
||||
{
|
||||
projectId,
|
||||
lastChunkStartVersion,
|
||||
},
|
||||
'[_getChunks.getNextChunk]'
|
||||
)
|
||||
WebApiManager.getHistoryId(projectId, (error, historyId) => {
|
||||
if (error) {
|
||||
return cb(OError.tag(error))
|
||||
}
|
||||
HistoryStoreManager.getChunkAtVersion(
|
||||
projectId,
|
||||
historyId,
|
||||
lastChunkStartVersion,
|
||||
(error, chunk) => {
|
||||
if (error) {
|
||||
return cb(OError.tag(error))
|
||||
}
|
||||
lastChunkStartVersion = chunk.chunk.startVersion
|
||||
chunksRequested += 1
|
||||
chunks.push(chunk)
|
||||
cb()
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
getNextChunk(error => {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
async.whilst(shouldRequestAnotherChunk, getNextChunk, error => {
|
||||
if (error) {
|
||||
return callback(error)
|
||||
}
|
||||
if (chunksRequested >= MAX_CHUNK_REQUESTS) {
|
||||
error = new Errors.BadRequestError('Diff spans too many chunks')
|
||||
callback(error)
|
||||
} else {
|
||||
callback(null, chunks)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export function _getChunks(...args) {
|
||||
_mocks._getChunks(...args)
|
||||
}
|
||||
|
||||
_mocks._concatChunks = chunks => {
|
||||
chunks.reverse()
|
||||
const chunk = chunks[0]
|
||||
// We will append all of the changes from the later
|
||||
// chunks onto the first one, to form one 'big' chunk.
|
||||
for (const nextChunk of chunks.slice(1)) {
|
||||
chunk.chunk.history.changes = chunk.chunk.history.changes.concat(
|
||||
nextChunk.chunk.history.changes
|
||||
)
|
||||
}
|
||||
return chunk
|
||||
}
|
||||
|
||||
function _concatChunks(...args) {
|
||||
return _mocks._concatChunks(...args)
|
||||
}
|
||||
|
||||
// for tests
|
||||
export function setMaxChunkRequests(value) {
|
||||
MAX_CHUNK_REQUESTS = value
|
||||
}
|
80
services/project-history/app/js/DocumentUpdaterManager.js
Normal file
80
services/project-history/app/js/DocumentUpdaterManager.js
Normal file
@@ -0,0 +1,80 @@
|
||||
/* eslint-disable
|
||||
no-unused-vars,
|
||||
*/
|
||||
// TODO: This file was created by bulk-decaffeinate.
|
||||
// Fix any style issues and re-enable lint.
|
||||
/*
|
||||
* decaffeinate suggestions:
|
||||
* DS102: Remove unnecessary code created because of implicit returns
|
||||
* DS207: Consider shorter variations of null checks
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
import request from 'request'
|
||||
import logger from '@overleaf/logger'
|
||||
import Settings from '@overleaf/settings'
|
||||
import OError from '@overleaf/o-error'
|
||||
|
||||
export function getDocument(projectId, docId, callback) {
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
const url = `${Settings.apis.documentupdater.url}/project/${projectId}/doc/${docId}`
|
||||
logger.debug({ projectId, docId }, 'getting doc from document updater')
|
||||
return request.get(url, function (error, res, body) {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
if (res.statusCode >= 200 && res.statusCode < 300) {
|
||||
try {
|
||||
body = JSON.parse(body)
|
||||
} catch (error1) {
|
||||
error = error1
|
||||
return callback(error)
|
||||
}
|
||||
logger.debug(
|
||||
{ projectId, docId, version: body.version },
|
||||
'got doc from document updater'
|
||||
)
|
||||
return callback(null, body.lines.join('\n'), body.version)
|
||||
} else {
|
||||
error = new OError(
|
||||
`doc updater returned a non-success status code: ${res.statusCode}`,
|
||||
{ project_id: projectId, doc_id: docId, url }
|
||||
)
|
||||
return callback(error)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export function setDocument(projectId, docId, content, userId, callback) {
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
const url = `${Settings.apis.documentupdater.url}/project/${projectId}/doc/${docId}`
|
||||
logger.debug({ projectId, docId }, 'setting doc in document updater')
|
||||
return request.post(
|
||||
{
|
||||
url,
|
||||
json: {
|
||||
lines: content.split('\n'),
|
||||
source: 'restore',
|
||||
user_id: userId,
|
||||
undoing: true,
|
||||
},
|
||||
},
|
||||
function (error, res, body) {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
if (res.statusCode >= 200 && res.statusCode < 300) {
|
||||
return callback(null)
|
||||
} else {
|
||||
error = new OError(
|
||||
`doc updater returned a non-success status code: ${res.statusCode}`,
|
||||
{ project_id: projectId, doc_id: docId, url }
|
||||
)
|
||||
return callback(error)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
267
services/project-history/app/js/ErrorRecorder.js
Normal file
267
services/project-history/app/js/ErrorRecorder.js
Normal file
@@ -0,0 +1,267 @@
|
||||
// @ts-check
|
||||
|
||||
import { callbackify } from 'node:util'
|
||||
import logger from '@overleaf/logger'
|
||||
import metrics from '@overleaf/metrics'
|
||||
import OError from '@overleaf/o-error'
|
||||
import { db } from './mongodb.js'
|
||||
|
||||
/**
|
||||
* @import { ProjectHistoryFailure } from './mongo-types'
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {number} queueSize
|
||||
* @param {Error} error
|
||||
* @return {Promise<ProjectHistoryFailure>} the failure record
|
||||
*/
|
||||
async function record(projectId, queueSize, error) {
|
||||
const errorRecord = {
|
||||
queueSize,
|
||||
error: error.toString(),
|
||||
stack: error.stack ?? '',
|
||||
ts: new Date(),
|
||||
}
|
||||
logger.debug(
|
||||
{ projectId, errorRecord },
|
||||
'recording failed attempt to process updates'
|
||||
)
|
||||
const result = await db.projectHistoryFailures.findOneAndUpdate(
|
||||
{ project_id: projectId },
|
||||
{
|
||||
$set: errorRecord,
|
||||
$inc: { attempts: 1 },
|
||||
$push: {
|
||||
history: {
|
||||
$each: [errorRecord],
|
||||
$position: 0,
|
||||
// only keep recent failures
|
||||
$slice: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ upsert: true, returnDocument: 'after', includeResultMetadata: true }
|
||||
)
|
||||
if (result.value == null) {
|
||||
// Since we upsert, the result should always have a value
|
||||
throw new OError('no value returned when recording an error', { projectId })
|
||||
}
|
||||
return result.value
|
||||
}
|
||||
|
||||
async function clearError(projectId) {
|
||||
await db.projectHistoryFailures.deleteOne({ project_id: projectId })
|
||||
}
|
||||
|
||||
async function setForceDebug(projectId, state) {
|
||||
if (state == null) {
|
||||
state = true
|
||||
}
|
||||
logger.debug({ projectId, state }, 'setting forceDebug state for project')
|
||||
await db.projectHistoryFailures.updateOne(
|
||||
{ project_id: projectId },
|
||||
{ $set: { forceDebug: state } },
|
||||
{ upsert: true }
|
||||
)
|
||||
}
|
||||
|
||||
// we only record the sync start time, and not the end time, because the
|
||||
// record should be cleared on success.
|
||||
async function recordSyncStart(projectId) {
|
||||
await db.projectHistoryFailures.updateOne(
|
||||
{ project_id: projectId },
|
||||
{
|
||||
$currentDate: { resyncStartedAt: true },
|
||||
$inc: { resyncAttempts: 1 },
|
||||
$push: {
|
||||
history: {
|
||||
$each: [{ resyncStartedAt: new Date() }],
|
||||
$position: 0,
|
||||
$slice: 10,
|
||||
},
|
||||
},
|
||||
},
|
||||
{ upsert: true }
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param projectId
|
||||
*/
|
||||
async function getFailureRecord(projectId) {
|
||||
return await db.projectHistoryFailures.findOne({ project_id: projectId })
|
||||
}
|
||||
|
||||
async function getLastFailure(projectId) {
|
||||
const result = await db.projectHistoryFailures.findOneAndUpdate(
|
||||
{ project_id: projectId },
|
||||
{ $inc: { requestCount: 1 } }, // increment the request count every time we check the last failure
|
||||
{ projection: { error: 1, ts: 1 } }
|
||||
)
|
||||
return result && result.value
|
||||
}
|
||||
|
||||
async function getFailedProjects() {
|
||||
return await db.projectHistoryFailures.find({}).toArray()
|
||||
}
|
||||
|
||||
async function getFailuresByType() {
|
||||
const results = await db.projectHistoryFailures.find({}).toArray()
|
||||
const failureCounts = {}
|
||||
const failureAttempts = {}
|
||||
const failureRequests = {}
|
||||
const maxQueueSize = {}
|
||||
// count all the failures and number of attempts by type
|
||||
for (const result of results || []) {
|
||||
const failureType = result.error
|
||||
const attempts = result.attempts || 1 // allow for field to be absent
|
||||
const requests = result.requestCount || 0
|
||||
const queueSize = result.queueSize || 0
|
||||
if (failureCounts[failureType] > 0) {
|
||||
failureCounts[failureType]++
|
||||
failureAttempts[failureType] += attempts
|
||||
failureRequests[failureType] += requests
|
||||
maxQueueSize[failureType] = Math.max(queueSize, maxQueueSize[failureType])
|
||||
} else {
|
||||
failureCounts[failureType] = 1
|
||||
failureAttempts[failureType] = attempts
|
||||
failureRequests[failureType] = requests
|
||||
maxQueueSize[failureType] = queueSize
|
||||
}
|
||||
}
|
||||
|
||||
return { failureCounts, failureAttempts, failureRequests, maxQueueSize }
|
||||
}
|
||||
|
||||
async function getFailures() {
|
||||
const { failureCounts, failureAttempts, failureRequests, maxQueueSize } =
|
||||
await getFailuresByType()
|
||||
|
||||
let attempts, failureType, label, requests
|
||||
const shortNames = {
|
||||
'Error: bad response from filestore: 404': 'filestore-404',
|
||||
'Error: bad response from filestore: 500': 'filestore-500',
|
||||
'NotFoundError: got a 404 from web api': 'web-api-404',
|
||||
'OError: history store a non-success status code: 413': 'history-store-413',
|
||||
'OError: history store a non-success status code: 422': 'history-store-422',
|
||||
'OError: history store a non-success status code: 500': 'history-store-500',
|
||||
'OError: history store a non-success status code: 503': 'history-store-503',
|
||||
'Error: history store a non-success status code: 413': 'history-store-413',
|
||||
'Error: history store a non-success status code: 422': 'history-store-422',
|
||||
'Error: history store a non-success status code: 500': 'history-store-500',
|
||||
'Error: history store a non-success status code: 503': 'history-store-503',
|
||||
'Error: web returned a non-success status code: 500 (attempts: 2)':
|
||||
'web-500',
|
||||
'Error: ESOCKETTIMEDOUT': 'socket-timeout',
|
||||
'Error: no project found': 'no-project-found',
|
||||
'OpsOutOfOrderError: project structure version out of order on incoming updates':
|
||||
'incoming-project-version-out-of-order',
|
||||
'OpsOutOfOrderError: doc version out of order on incoming updates':
|
||||
'incoming-doc-version-out-of-order',
|
||||
'OpsOutOfOrderError: project structure version out of order':
|
||||
'chunk-project-version-out-of-order',
|
||||
'OpsOutOfOrderError: doc version out of order':
|
||||
'chunk-doc-version-out-of-order',
|
||||
'Error: failed to extend lock': 'lock-overrun',
|
||||
'Error: tried to release timed out lock': 'lock-overrun',
|
||||
'Error: Timeout': 'lock-overrun',
|
||||
'Error: sync ongoing': 'sync-ongoing',
|
||||
'SyncError: unexpected resyncProjectStructure update': 'sync-error',
|
||||
'[object Error]': 'unknown-error-object',
|
||||
'UpdateWithUnknownFormatError: update with unknown format':
|
||||
'unknown-format',
|
||||
'Error: update with unknown format': 'unknown-format',
|
||||
'TextOperationError: The base length of the second operation has to be the target length of the first operation':
|
||||
'text-op-error',
|
||||
'Error: ENOSPC: no space left on device, write': 'ENOSPC',
|
||||
'*': 'other',
|
||||
}
|
||||
|
||||
// set all the known errors to zero if not present (otherwise gauges stay on their last value)
|
||||
const summaryCounts = {}
|
||||
const summaryAttempts = {}
|
||||
const summaryRequests = {}
|
||||
const summaryMaxQueueSize = {}
|
||||
|
||||
for (failureType in shortNames) {
|
||||
label = shortNames[failureType]
|
||||
summaryCounts[label] = 0
|
||||
summaryAttempts[label] = 0
|
||||
summaryRequests[label] = 0
|
||||
summaryMaxQueueSize[label] = 0
|
||||
}
|
||||
|
||||
// record a metric for each type of failure
|
||||
for (failureType in failureCounts) {
|
||||
const failureCount = failureCounts[failureType]
|
||||
label = shortNames[failureType] || shortNames['*']
|
||||
summaryCounts[label] += failureCount
|
||||
summaryAttempts[label] += failureAttempts[failureType]
|
||||
summaryRequests[label] += failureRequests[failureType]
|
||||
summaryMaxQueueSize[label] = Math.max(
|
||||
maxQueueSize[failureType],
|
||||
summaryMaxQueueSize[label]
|
||||
)
|
||||
}
|
||||
|
||||
for (label in summaryCounts) {
|
||||
const count = summaryCounts[label]
|
||||
metrics.globalGauge('failed', count, 1, { status: label })
|
||||
}
|
||||
|
||||
for (label in summaryAttempts) {
|
||||
attempts = summaryAttempts[label]
|
||||
metrics.globalGauge('attempts', attempts, 1, { status: label })
|
||||
}
|
||||
|
||||
for (label in summaryRequests) {
|
||||
requests = summaryRequests[label]
|
||||
metrics.globalGauge('requests', requests, 1, { status: label })
|
||||
}
|
||||
|
||||
for (label in summaryMaxQueueSize) {
|
||||
const queueSize = summaryMaxQueueSize[label]
|
||||
metrics.globalGauge('max-queue-size', queueSize, 1, { status: label })
|
||||
}
|
||||
|
||||
return {
|
||||
counts: summaryCounts,
|
||||
attempts: summaryAttempts,
|
||||
requests: summaryRequests,
|
||||
maxQueueSize: summaryMaxQueueSize,
|
||||
}
|
||||
}
|
||||
|
||||
// EXPORTS
|
||||
|
||||
const getFailedProjectsCb = callbackify(getFailedProjects)
|
||||
const getFailureRecordCb = callbackify(getFailureRecord)
|
||||
const getFailuresCb = callbackify(getFailures)
|
||||
const getLastFailureCb = callbackify(getLastFailure)
|
||||
const recordCb = callbackify(record)
|
||||
const clearErrorCb = callbackify(clearError)
|
||||
const recordSyncStartCb = callbackify(recordSyncStart)
|
||||
const setForceDebugCb = callbackify(setForceDebug)
|
||||
|
||||
export {
|
||||
getFailedProjectsCb as getFailedProjects,
|
||||
getFailureRecordCb as getFailureRecord,
|
||||
getLastFailureCb as getLastFailure,
|
||||
getFailuresCb as getFailures,
|
||||
recordCb as record,
|
||||
clearErrorCb as clearError,
|
||||
recordSyncStartCb as recordSyncStart,
|
||||
setForceDebugCb as setForceDebug,
|
||||
}
|
||||
|
||||
export const promises = {
|
||||
getFailedProjects,
|
||||
getFailureRecord,
|
||||
getLastFailure,
|
||||
getFailures,
|
||||
record,
|
||||
clearError,
|
||||
recordSyncStart,
|
||||
setForceDebug,
|
||||
}
|
11
services/project-history/app/js/Errors.js
Normal file
11
services/project-history/app/js/Errors.js
Normal file
@@ -0,0 +1,11 @@
|
||||
import OError from '@overleaf/o-error'
|
||||
|
||||
export class NotFoundError extends OError {}
|
||||
export class BadRequestError extends OError {}
|
||||
export class SyncError extends OError {}
|
||||
export class OpsOutOfOrderError extends OError {}
|
||||
export class InconsistentChunkError extends OError {}
|
||||
export class UpdateWithUnknownFormatError extends OError {}
|
||||
export class UnexpectedOpTypeError extends OError {}
|
||||
export class TooManyRequestsError extends OError {}
|
||||
export class NeedFullProjectStructureResyncError extends OError {}
|
129
services/project-history/app/js/FileTreeDiffGenerator.js
Normal file
129
services/project-history/app/js/FileTreeDiffGenerator.js
Normal file
@@ -0,0 +1,129 @@
|
||||
/* eslint-disable
|
||||
no-return-assign,
|
||||
*/
|
||||
// TODO: This file was created by bulk-decaffeinate.
|
||||
// Fix any style issues and re-enable lint.
|
||||
/*
|
||||
* decaffeinate suggestions:
|
||||
* DS101: Remove unnecessary use of Array.from
|
||||
* DS102: Remove unnecessary code created because of implicit returns
|
||||
* DS207: Consider shorter variations of null checks
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
import Core from 'overleaf-editor-core'
|
||||
import logger from '@overleaf/logger'
|
||||
import * as Errors from './Errors.js'
|
||||
|
||||
const { MoveFileOperation, AddFileOperation, EditFileOperation } = Core
|
||||
|
||||
export function buildDiff(chunk, fromVersion, toVersion) {
|
||||
chunk = Core.Chunk.fromRaw(chunk.chunk)
|
||||
const chunkStartVersion = chunk.getStartVersion()
|
||||
|
||||
const diff = _getInitialDiffSnapshot(chunk, fromVersion)
|
||||
|
||||
const changes = chunk
|
||||
.getChanges()
|
||||
.slice(fromVersion - chunkStartVersion, toVersion - chunkStartVersion)
|
||||
for (let i = 0; i < changes.length; i++) {
|
||||
const change = changes[i]
|
||||
for (const operation of Array.from(change.getOperations())) {
|
||||
if (operation.pathname === null || operation.pathname === '') {
|
||||
// skip operations for missing files
|
||||
logger.warn({ diff, operation }, 'invalid pathname in operation')
|
||||
} else if (operation instanceof EditFileOperation) {
|
||||
_applyEditFileToDiff(diff, operation)
|
||||
} else if (operation instanceof AddFileOperation) {
|
||||
_applyAddFileToDiff(diff, operation)
|
||||
} else if (operation instanceof MoveFileOperation) {
|
||||
if (operation.isRemoveFile()) {
|
||||
const deletedAtV = fromVersion + i
|
||||
_applyDeleteFileToDiff(diff, operation, deletedAtV)
|
||||
} else {
|
||||
_applyMoveFileToDiff(diff, operation)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Object.values(diff)
|
||||
}
|
||||
|
||||
function _getInitialDiffSnapshot(chunk, fromVersion) {
|
||||
// Start with a 'diff' which is snapshot of the filetree at the beginning,
|
||||
// with nothing in the diff marked as changed.
|
||||
// Use a bare object to protect against reserved names.
|
||||
const diff = Object.create(null)
|
||||
const files = _getInitialFiles(chunk, fromVersion)
|
||||
for (const [pathname, file] of Object.entries(files)) {
|
||||
diff[pathname] = { pathname, editable: file.isEditable() }
|
||||
}
|
||||
return diff
|
||||
}
|
||||
|
||||
function _getInitialFiles(chunk, fromVersion) {
|
||||
const snapshot = chunk.getSnapshot()
|
||||
const changes = chunk
|
||||
.getChanges()
|
||||
.slice(0, fromVersion - chunk.getStartVersion())
|
||||
snapshot.applyAll(changes)
|
||||
return snapshot.fileMap.files
|
||||
}
|
||||
|
||||
function _applyAddFileToDiff(diff, operation) {
|
||||
return (diff[operation.pathname] = {
|
||||
pathname: operation.pathname,
|
||||
operation: 'added',
|
||||
editable: operation.file.isEditable(),
|
||||
})
|
||||
}
|
||||
|
||||
function _applyEditFileToDiff(diff, operation) {
|
||||
const change = diff[operation.pathname]
|
||||
if ((change != null ? change.operation : undefined) == null) {
|
||||
// avoid exception for non-existent change
|
||||
return (diff[operation.pathname] = {
|
||||
pathname: operation.pathname,
|
||||
operation: 'edited',
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
function _applyMoveFileToDiff(diff, operation) {
|
||||
if (
|
||||
diff[operation.newPathname] != null &&
|
||||
diff[operation.newPathname].operation !== 'removed'
|
||||
) {
|
||||
const err = new Errors.InconsistentChunkError(
|
||||
'trying to move to file that already exists',
|
||||
{ diff, operation }
|
||||
)
|
||||
throw err
|
||||
}
|
||||
const change = diff[operation.pathname]
|
||||
if (change == null) {
|
||||
logger.warn({ diff, operation }, 'tried to rename non-existent file')
|
||||
return
|
||||
}
|
||||
change.newPathname = operation.newPathname
|
||||
if (change.operation === 'added') {
|
||||
// If this file was added this time, just leave it as an add, but
|
||||
// at the new name.
|
||||
change.pathname = operation.newPathname
|
||||
delete change.newPathname
|
||||
} else {
|
||||
change.operation = 'renamed'
|
||||
}
|
||||
diff[operation.newPathname] = change
|
||||
return delete diff[operation.pathname]
|
||||
}
|
||||
|
||||
function _applyDeleteFileToDiff(diff, operation, deletedAtV) {
|
||||
// avoid exception for non-existent change
|
||||
if (diff[operation.pathname] != null) {
|
||||
diff[operation.pathname].operation = 'removed'
|
||||
}
|
||||
return diff[operation.pathname] != null
|
||||
? (diff[operation.pathname].deletedAtV = deletedAtV)
|
||||
: undefined
|
||||
}
|
142
services/project-history/app/js/FlushManager.js
Normal file
142
services/project-history/app/js/FlushManager.js
Normal file
@@ -0,0 +1,142 @@
|
||||
// TODO: This file was created by bulk-decaffeinate.
|
||||
// Fix any style issues and re-enable lint.
|
||||
/*
|
||||
* decaffeinate suggestions:
|
||||
* DS101: Remove unnecessary use of Array.from
|
||||
* DS102: Remove unnecessary code created because of implicit returns
|
||||
* DS207: Consider shorter variations of null checks
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
import async from 'async'
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import metrics from '@overleaf/metrics'
|
||||
import _ from 'lodash'
|
||||
import * as RedisManager from './RedisManager.js'
|
||||
import * as UpdatesProcessor from './UpdatesProcessor.js'
|
||||
import * as ErrorRecorder from './ErrorRecorder.js'
|
||||
|
||||
export function flushIfOld(projectId, cutoffTime, callback) {
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
return RedisManager.getFirstOpTimestamp(
|
||||
projectId,
|
||||
function (err, firstOpTimestamp) {
|
||||
if (err != null) {
|
||||
return callback(OError.tag(err))
|
||||
}
|
||||
// In the normal case, the flush marker will be set with the
|
||||
// timestamp of the oldest operation in the queue by docupdater.
|
||||
// If the marker is not set for any reason, we flush it anyway
|
||||
// for safety.
|
||||
if (!firstOpTimestamp || firstOpTimestamp < cutoffTime) {
|
||||
logger.debug(
|
||||
{ projectId, firstOpTimestamp, cutoffTime },
|
||||
'flushing old project'
|
||||
)
|
||||
metrics.inc('flush-old-updates', 1, { status: 'flushed' })
|
||||
return UpdatesProcessor.processUpdatesForProject(projectId, callback)
|
||||
} else {
|
||||
metrics.inc('flush-old-updates', 1, { status: 'skipped' })
|
||||
return callback()
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function flushOldOps(options, callback) {
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
logger.debug({ options }, 'starting flush of old ops')
|
||||
// allow running flush in background for cron jobs
|
||||
if (options.background) {
|
||||
// return immediate response to client, then discard callback
|
||||
callback(null, { message: 'running flush in background' })
|
||||
callback = function () {}
|
||||
}
|
||||
return RedisManager.getProjectIdsWithHistoryOps(
|
||||
null,
|
||||
function (error, projectIds) {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
return ErrorRecorder.getFailedProjects(
|
||||
function (error, projectHistoryFailures) {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
// exclude failed projects already in projectHistoryFailures
|
||||
const failedProjects = new Set()
|
||||
for (const entry of Array.from(projectHistoryFailures)) {
|
||||
failedProjects.add(entry.project_id)
|
||||
}
|
||||
// randomise order so we get different projects if there is a limit
|
||||
projectIds = _.shuffle(projectIds)
|
||||
const maxAge = options.maxAge || 6 * 3600 // default to 6 hours
|
||||
const cutoffTime = new Date(Date.now() - maxAge * 1000)
|
||||
const startTime = new Date()
|
||||
let count = 0
|
||||
const jobs = projectIds.map(
|
||||
projectId =>
|
||||
function (cb) {
|
||||
const timeTaken = new Date() - startTime
|
||||
count++
|
||||
if (
|
||||
(options != null ? options.timeout : undefined) &&
|
||||
timeTaken > options.timeout
|
||||
) {
|
||||
// finish early due to timeout, return an error to bail out of the async iteration
|
||||
logger.debug('background retries timed out')
|
||||
return cb(new OError('retries timed out'))
|
||||
}
|
||||
if (
|
||||
(options != null ? options.limit : undefined) &&
|
||||
count > options.limit
|
||||
) {
|
||||
// finish early due to reaching limit, return an error to bail out of the async iteration
|
||||
logger.debug({ count }, 'background retries hit limit')
|
||||
return cb(new OError('hit limit'))
|
||||
}
|
||||
if (failedProjects.has(projectId)) {
|
||||
// skip failed projects
|
||||
return setTimeout(cb, options.queueDelay || 100) // pause between flushes
|
||||
}
|
||||
return flushIfOld(projectId, cutoffTime, function (err) {
|
||||
if (err != null) {
|
||||
logger.warn(
|
||||
{ projectId, err },
|
||||
'error flushing old project'
|
||||
)
|
||||
}
|
||||
return setTimeout(cb, options.queueDelay || 100)
|
||||
})
|
||||
}
|
||||
) // pause between flushes
|
||||
return async.series(
|
||||
async.reflectAll(jobs),
|
||||
function (error, results) {
|
||||
const success = []
|
||||
const failure = []
|
||||
results.forEach((result, i) => {
|
||||
if (
|
||||
result.error != null &&
|
||||
!['retries timed out', 'hit limit'].includes(
|
||||
result?.error?.message
|
||||
)
|
||||
) {
|
||||
// ignore expected errors
|
||||
return failure.push(projectIds[i])
|
||||
} else {
|
||||
return success.push(projectIds[i])
|
||||
}
|
||||
})
|
||||
return callback(error, { success, failure, failedProjects })
|
||||
}
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
58
services/project-history/app/js/HashManager.js
Normal file
58
services/project-history/app/js/HashManager.js
Normal file
@@ -0,0 +1,58 @@
|
||||
/* eslint-disable
|
||||
no-undef,
|
||||
no-unused-vars,
|
||||
*/
|
||||
// TODO: This file was created by bulk-decaffeinate.
|
||||
// Fix any style issues and re-enable lint.
|
||||
/*
|
||||
* decaffeinate suggestions:
|
||||
* DS102: Remove unnecessary code created because of implicit returns
|
||||
* DS207: Consider shorter variations of null checks
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
import { promisify } from 'node:util'
|
||||
import fs from 'node:fs'
|
||||
import crypto from 'node:crypto'
|
||||
import OError from '@overleaf/o-error'
|
||||
import { pipeline } from 'node:stream'
|
||||
|
||||
export function _getBlobHashFromString(string) {
|
||||
const byteLength = Buffer.byteLength(string)
|
||||
const hash = crypto.createHash('sha1')
|
||||
hash.setEncoding('hex')
|
||||
hash.update('blob ' + byteLength + '\x00')
|
||||
hash.update(string, 'utf8')
|
||||
hash.end()
|
||||
return hash.read()
|
||||
}
|
||||
|
||||
export function _getBlobHash(fsPath, callback) {
|
||||
return fs.stat(fsPath, function (err, stats) {
|
||||
if (err != null) {
|
||||
OError.tag(err, 'failed to stat file in _getBlobHash', { fsPath })
|
||||
return callback(err)
|
||||
}
|
||||
const byteLength = stats.size
|
||||
const hash = crypto.createHash('sha1')
|
||||
hash.setEncoding('hex')
|
||||
hash.update('blob ' + byteLength + '\x00')
|
||||
|
||||
pipeline(fs.createReadStream(fsPath), hash, err => {
|
||||
if (err) {
|
||||
callback(
|
||||
OError.tag(err, 'error streaming file from disk', {
|
||||
fsPath,
|
||||
byteLength,
|
||||
})
|
||||
)
|
||||
} else {
|
||||
hash.end()
|
||||
callback(null, hash.read(), byteLength)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export const promises = {
|
||||
_getBlobHash: promisify(_getBlobHash),
|
||||
}
|
78
services/project-history/app/js/HealthChecker.js
Normal file
78
services/project-history/app/js/HealthChecker.js
Normal file
@@ -0,0 +1,78 @@
|
||||
// TODO: This file was created by bulk-decaffeinate.
|
||||
// Fix any style issues and re-enable lint.
|
||||
/*
|
||||
* decaffeinate suggestions:
|
||||
* DS102: Remove unnecessary code created because of implicit returns
|
||||
* DS207: Consider shorter variations of null checks
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
import { ObjectId } from './mongodb.js'
|
||||
import request from 'request'
|
||||
import async from 'async'
|
||||
import settings from '@overleaf/settings'
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import * as LockManager from './LockManager.js'
|
||||
|
||||
const { port } = settings.internal.history
|
||||
|
||||
export function check(callback) {
|
||||
const projectId = new ObjectId(settings.history.healthCheck.project_id)
|
||||
const url = `http://127.0.0.1:${port}/project/${projectId}`
|
||||
logger.debug({ projectId }, 'running health check')
|
||||
const jobs = [
|
||||
cb =>
|
||||
request.get(
|
||||
{ url: `http://127.0.0.1:${port}/check_lock`, timeout: 3000 },
|
||||
function (err, res, body) {
|
||||
if (err != null) {
|
||||
OError.tag(err, 'error checking lock for health check', {
|
||||
project_id: projectId,
|
||||
})
|
||||
return cb(err)
|
||||
} else if ((res != null ? res.statusCode : undefined) !== 200) {
|
||||
return cb(new Error(`status code not 200, it's ${res.statusCode}`))
|
||||
} else {
|
||||
return cb()
|
||||
}
|
||||
}
|
||||
),
|
||||
cb =>
|
||||
request.post(
|
||||
{ url: `${url}/flush`, timeout: 10000 },
|
||||
function (err, res, body) {
|
||||
if (err != null) {
|
||||
OError.tag(err, 'error flushing for health check', {
|
||||
project_id: projectId,
|
||||
})
|
||||
return cb(err)
|
||||
} else if ((res != null ? res.statusCode : undefined) !== 204) {
|
||||
return cb(new Error(`status code not 204, it's ${res.statusCode}`))
|
||||
} else {
|
||||
return cb()
|
||||
}
|
||||
}
|
||||
),
|
||||
cb =>
|
||||
request.get(
|
||||
{ url: `${url}/updates`, timeout: 10000 },
|
||||
function (err, res, body) {
|
||||
if (err != null) {
|
||||
OError.tag(err, 'error getting updates for health check', {
|
||||
project_id: projectId,
|
||||
})
|
||||
return cb(err)
|
||||
} else if ((res != null ? res.statusCode : undefined) !== 200) {
|
||||
return cb(new Error(`status code not 200, it's ${res.statusCode}`))
|
||||
} else {
|
||||
return cb()
|
||||
}
|
||||
}
|
||||
),
|
||||
]
|
||||
return async.series(jobs, callback)
|
||||
}
|
||||
|
||||
export function checkLock(callback) {
|
||||
return LockManager.healthCheck(callback)
|
||||
}
|
22
services/project-history/app/js/HistoryApiManager.js
Normal file
22
services/project-history/app/js/HistoryApiManager.js
Normal file
@@ -0,0 +1,22 @@
|
||||
/* eslint-disable
|
||||
no-unused-vars,
|
||||
*/
|
||||
// TODO: This file was created by bulk-decaffeinate.
|
||||
// Fix any style issues and re-enable lint.
|
||||
/*
|
||||
* decaffeinate suggestions:
|
||||
* DS102: Remove unnecessary code created because of implicit returns
|
||||
* DS207: Consider shorter variations of null checks
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
import * as WebApiManager from './WebApiManager.js'
|
||||
import logger from '@overleaf/logger'
|
||||
|
||||
export function shouldUseProjectHistory(projectId, callback) {
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
return WebApiManager.getHistoryId(projectId, (error, historyId) =>
|
||||
callback(error, historyId != null)
|
||||
)
|
||||
}
|
123
services/project-history/app/js/HistoryBlobTranslator.js
Normal file
123
services/project-history/app/js/HistoryBlobTranslator.js
Normal file
@@ -0,0 +1,123 @@
|
||||
// @ts-check
|
||||
|
||||
import {
|
||||
Range,
|
||||
TrackedChange,
|
||||
TrackedChangeList,
|
||||
CommentList,
|
||||
Comment,
|
||||
TrackingProps,
|
||||
} from 'overleaf-editor-core'
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
|
||||
/**
|
||||
* @import { AddDocUpdate } from './types'
|
||||
* @import { CommentRawData, TrackedChangeRawData } from 'overleaf-editor-core/lib/types'
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {AddDocUpdate} update
|
||||
* @returns {{trackedChanges: TrackedChangeRawData[], comments: CommentRawData[]} | undefined}
|
||||
*/
|
||||
export function createRangeBlobDataFromUpdate(update) {
|
||||
logger.debug({ update }, 'createBlobDataFromUpdate')
|
||||
|
||||
if (update.doc == null || update.docLines == null) {
|
||||
throw new OError('Not an AddFileUpdate')
|
||||
}
|
||||
if (
|
||||
!update.ranges ||
|
||||
(update.ranges.changes == null && update.ranges.comments == null)
|
||||
) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
if (
|
||||
(!update.ranges.changes || update.ranges.changes.length === 0) &&
|
||||
(!update.ranges.comments || update.ranges.comments.length === 0)
|
||||
) {
|
||||
return undefined
|
||||
}
|
||||
|
||||
const sortedRanges = [...(update.ranges.changes || [])].sort((a, b) => {
|
||||
if (a.op.p !== b.op.p) {
|
||||
return a.op.p - b.op.p
|
||||
}
|
||||
if ('i' in a.op && a.op.i != null && 'd' in b.op && b.op.d != null) {
|
||||
// Move deletes before inserts
|
||||
return 1
|
||||
}
|
||||
return -1
|
||||
})
|
||||
|
||||
const tcList = new TrackedChangeList([])
|
||||
|
||||
for (const change of sortedRanges) {
|
||||
if ('d' in change.op && change.op.d != null) {
|
||||
const length = change.op.d.length
|
||||
const range = new Range(change.op.hpos ?? change.op.p, length)
|
||||
tcList.add(
|
||||
new TrackedChange(
|
||||
range,
|
||||
new TrackingProps(
|
||||
'delete',
|
||||
change.metadata.user_id,
|
||||
new Date(change.metadata.ts)
|
||||
)
|
||||
)
|
||||
)
|
||||
} else if ('i' in change.op && change.op.i != null) {
|
||||
const length = change.op.i.length
|
||||
const range = new Range(change.op.hpos ?? change.op.p, length)
|
||||
tcList.add(
|
||||
new TrackedChange(
|
||||
range,
|
||||
new TrackingProps(
|
||||
'insert',
|
||||
change.metadata.user_id,
|
||||
new Date(change.metadata.ts)
|
||||
)
|
||||
)
|
||||
)
|
||||
}
|
||||
}
|
||||
const comments = [...(update.ranges.comments || [])].sort((a, b) => {
|
||||
return a.op.p - b.op.p
|
||||
})
|
||||
|
||||
/** @type {Map<string, {ranges: Range[], resolved: boolean}>} */
|
||||
const commentMap = new Map()
|
||||
for (const comment of comments) {
|
||||
const id = comment.op.t
|
||||
if (!commentMap.has(id)) {
|
||||
commentMap.set(id, {
|
||||
ranges: [],
|
||||
resolved: comment.op.resolved ?? false,
|
||||
})
|
||||
}
|
||||
const entry = commentMap.get(id)
|
||||
if (!entry) {
|
||||
throw new Error('Comment entry not found')
|
||||
}
|
||||
if (entry.resolved !== (comment.op.resolved ?? false)) {
|
||||
throw new Error('Mismatching resolved status for comment')
|
||||
}
|
||||
|
||||
const commentLength = comment.op.c.length
|
||||
if (commentLength > 0) {
|
||||
// Empty comments in operations are translated to detached comments
|
||||
const range = new Range(comment.op.hpos ?? comment.op.p, commentLength)
|
||||
entry.ranges.push(range)
|
||||
}
|
||||
}
|
||||
const commentList = new CommentList(
|
||||
[...commentMap.entries()].map(
|
||||
([id, commentObj]) =>
|
||||
new Comment(id, commentObj.ranges, commentObj.resolved)
|
||||
)
|
||||
)
|
||||
|
||||
return { trackedChanges: tcList.toRaw(), comments: commentList.toRaw() }
|
||||
}
|
625
services/project-history/app/js/HistoryStoreManager.js
Normal file
625
services/project-history/app/js/HistoryStoreManager.js
Normal file
@@ -0,0 +1,625 @@
|
||||
import { promisify } from 'node:util'
|
||||
import fs from 'node:fs'
|
||||
import request from 'request'
|
||||
import stream from 'node:stream'
|
||||
import logger from '@overleaf/logger'
|
||||
import _ from 'lodash'
|
||||
import { URL } from 'node:url'
|
||||
import OError from '@overleaf/o-error'
|
||||
import Settings from '@overleaf/settings'
|
||||
import {
|
||||
fetchStream,
|
||||
fetchNothing,
|
||||
RequestFailedError,
|
||||
} from '@overleaf/fetch-utils'
|
||||
import * as Versions from './Versions.js'
|
||||
import * as Errors from './Errors.js'
|
||||
import * as LocalFileWriter from './LocalFileWriter.js'
|
||||
import * as HashManager from './HashManager.js'
|
||||
import * as HistoryBlobTranslator from './HistoryBlobTranslator.js'
|
||||
import { promisifyMultiResult } from '@overleaf/promise-utils'
|
||||
|
||||
const HTTP_REQUEST_TIMEOUT = Settings.overleaf.history.requestTimeout
|
||||
|
||||
/**
|
||||
* Container for functions that need to be mocked in tests
|
||||
*
|
||||
* TODO: Rewrite tests in terms of exported functions only
|
||||
*/
|
||||
export const _mocks = {}
|
||||
|
||||
class StringStream extends stream.Readable {
|
||||
_read() {}
|
||||
}
|
||||
|
||||
_mocks.getMostRecentChunk = (projectId, historyId, callback) => {
|
||||
const path = `projects/${historyId}/latest/history`
|
||||
logger.debug({ projectId, historyId }, 'getting chunk from history service')
|
||||
_requestChunk({ path, json: true }, callback)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Callback} callback
|
||||
*/
|
||||
export function getMostRecentChunk(projectId, historyId, callback) {
|
||||
_mocks.getMostRecentChunk(projectId, historyId, callback)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Callback} callback
|
||||
*/
|
||||
export function getChunkAtVersion(projectId, historyId, version, callback) {
|
||||
const path = `projects/${historyId}/versions/${version}/history`
|
||||
logger.debug(
|
||||
{ projectId, historyId, version },
|
||||
'getting chunk from history service for version'
|
||||
)
|
||||
_requestChunk({ path, json: true }, callback)
|
||||
}
|
||||
|
||||
export function getMostRecentVersion(projectId, historyId, callback) {
|
||||
getMostRecentChunk(projectId, historyId, (error, chunk) => {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
const mostRecentVersion =
|
||||
chunk.chunk.startVersion + (chunk.chunk.history.changes || []).length
|
||||
const lastChange = _.last(
|
||||
_.sortBy(chunk.chunk.history.changes || [], x => x.timestamp)
|
||||
)
|
||||
// find the latest project and doc versions in the chunk
|
||||
_getLatestProjectVersion(projectId, chunk, (err1, projectVersion) =>
|
||||
_getLatestV2DocVersions(projectId, chunk, (err2, v2DocVersions) => {
|
||||
// return the project and doc versions
|
||||
const projectStructureAndDocVersions = {
|
||||
project: projectVersion,
|
||||
docs: v2DocVersions,
|
||||
}
|
||||
callback(
|
||||
err1 || err2,
|
||||
mostRecentVersion,
|
||||
projectStructureAndDocVersions,
|
||||
lastChange,
|
||||
chunk
|
||||
)
|
||||
})
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} historyId
|
||||
* @param {Object} opts
|
||||
* @param {boolean} [opts.readOnly]
|
||||
* @param {(error: Error, rawChunk?: { startVersion: number, endVersion: number, endTimestamp: Date}) => void} callback
|
||||
*/
|
||||
export function getMostRecentVersionRaw(projectId, historyId, opts, callback) {
|
||||
const path = `projects/${historyId}/latest/history/raw`
|
||||
logger.debug(
|
||||
{ projectId, historyId },
|
||||
'getting raw chunk from history service'
|
||||
)
|
||||
const qs = opts.readOnly ? { readOnly: true } : {}
|
||||
_requestHistoryService({ path, json: true, qs }, (err, body) => {
|
||||
if (err) return callback(OError.tag(err))
|
||||
const { startVersion, endVersion, endTimestamp } = body
|
||||
callback(null, {
|
||||
startVersion,
|
||||
endVersion,
|
||||
endTimestamp: new Date(endTimestamp),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
function _requestChunk(options, callback) {
|
||||
_requestHistoryService(options, (err, chunk) => {
|
||||
if (err) {
|
||||
return callback(OError.tag(err))
|
||||
}
|
||||
if (
|
||||
chunk == null ||
|
||||
chunk.chunk == null ||
|
||||
chunk.chunk.startVersion == null
|
||||
) {
|
||||
const { path } = options
|
||||
return callback(new OError('unexpected response', { path }))
|
||||
}
|
||||
callback(null, chunk)
|
||||
})
|
||||
}
|
||||
|
||||
function _getLatestProjectVersion(projectId, chunk, callback) {
|
||||
// find the initial project version
|
||||
const projectVersionInSnapshot = chunk.chunk.history.snapshot?.projectVersion
|
||||
let projectVersion = projectVersionInSnapshot
|
||||
const chunkStartVersion = chunk.chunk.startVersion
|
||||
// keep track of any first error
|
||||
let error = null
|
||||
// iterate over the changes in chunk to find the most recent project version
|
||||
for (const [changeIdx, change] of (
|
||||
chunk.chunk.history.changes || []
|
||||
).entries()) {
|
||||
const projectVersionInChange = change.projectVersion
|
||||
if (projectVersionInChange != null) {
|
||||
if (
|
||||
projectVersion != null &&
|
||||
Versions.lt(projectVersionInChange, projectVersion)
|
||||
) {
|
||||
if (!error) {
|
||||
error = new Errors.OpsOutOfOrderError(
|
||||
'project structure version out of order',
|
||||
{
|
||||
projectId,
|
||||
chunkStartVersion,
|
||||
projectVersionInSnapshot,
|
||||
changeIdx,
|
||||
projectVersion,
|
||||
projectVersionInChange,
|
||||
}
|
||||
)
|
||||
}
|
||||
} else {
|
||||
projectVersion = projectVersionInChange
|
||||
}
|
||||
}
|
||||
}
|
||||
callback(error, projectVersion)
|
||||
}
|
||||
|
||||
function _getLatestV2DocVersions(projectId, chunk, callback) {
|
||||
// find the initial doc versions (indexed by docId as this is immutable)
|
||||
const v2DocVersions =
|
||||
(chunk.chunk.history.snapshot &&
|
||||
chunk.chunk.history.snapshot.v2DocVersions) ||
|
||||
{}
|
||||
// keep track of any errors
|
||||
let error = null
|
||||
// iterate over the changes in the chunk to find the most recent doc versions
|
||||
for (const change of chunk.chunk.history.changes || []) {
|
||||
if (change.v2DocVersions != null) {
|
||||
for (const docId in change.v2DocVersions) {
|
||||
const docInfo = change.v2DocVersions[docId]
|
||||
const { v } = docInfo
|
||||
if (
|
||||
v2DocVersions[docId] &&
|
||||
v2DocVersions[docId].v != null &&
|
||||
Versions.lt(v, v2DocVersions[docId].v)
|
||||
) {
|
||||
if (!error) {
|
||||
logger.warn(
|
||||
{
|
||||
projectId,
|
||||
docId,
|
||||
changeVersion: docInfo,
|
||||
previousVersion: v2DocVersions[docId],
|
||||
},
|
||||
'doc version out of order in chunk'
|
||||
)
|
||||
error = new Errors.OpsOutOfOrderError('doc version out of order')
|
||||
}
|
||||
} else {
|
||||
v2DocVersions[docId] = docInfo
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
callback(error, v2DocVersions)
|
||||
}
|
||||
|
||||
export function getProjectBlob(historyId, blobHash, callback) {
|
||||
logger.debug({ historyId, blobHash }, 'getting blob from history service')
|
||||
_requestHistoryService(
|
||||
{ path: `projects/${historyId}/blobs/${blobHash}` },
|
||||
callback
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Callback} callback
|
||||
*/
|
||||
export function getProjectBlobStream(historyId, blobHash, callback) {
|
||||
const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${blobHash}`
|
||||
logger.debug(
|
||||
{ historyId, blobHash },
|
||||
'getting blob stream from history service'
|
||||
)
|
||||
fetchStream(url, getHistoryFetchOptions())
|
||||
.then(stream => {
|
||||
callback(null, stream)
|
||||
})
|
||||
.catch(err => callback(OError.tag(err)))
|
||||
}
|
||||
|
||||
export function sendChanges(
|
||||
projectId,
|
||||
historyId,
|
||||
changes,
|
||||
endVersion,
|
||||
callback
|
||||
) {
|
||||
logger.debug(
|
||||
{ projectId, historyId, endVersion },
|
||||
'sending changes to history service'
|
||||
)
|
||||
_requestHistoryService(
|
||||
{
|
||||
path: `projects/${historyId}/legacy_changes`,
|
||||
qs: { end_version: endVersion },
|
||||
method: 'POST',
|
||||
json: changes,
|
||||
},
|
||||
error => {
|
||||
if (error) {
|
||||
OError.tag(error, 'failed to send changes to v1', {
|
||||
projectId,
|
||||
historyId,
|
||||
endVersion,
|
||||
errorCode: error.code,
|
||||
statusCode: error.statusCode,
|
||||
body: error.body,
|
||||
})
|
||||
return callback(error)
|
||||
}
|
||||
callback()
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
function createBlobFromString(historyId, data, fileId, callback) {
|
||||
const stringStream = new StringStream()
|
||||
stringStream.push(data)
|
||||
stringStream.push(null)
|
||||
LocalFileWriter.bufferOnDisk(
|
||||
stringStream,
|
||||
'',
|
||||
fileId,
|
||||
(fsPath, cb) => {
|
||||
_createBlob(historyId, fsPath, cb)
|
||||
},
|
||||
callback
|
||||
)
|
||||
}
|
||||
|
||||
function _checkBlobExists(historyId, hash, callback) {
|
||||
if (!hash) return callback(null, false)
|
||||
const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${hash}`
|
||||
fetchNothing(url, {
|
||||
method: 'HEAD',
|
||||
...getHistoryFetchOptions(),
|
||||
})
|
||||
.then(res => {
|
||||
callback(null, true)
|
||||
})
|
||||
.catch(err => {
|
||||
if (err instanceof RequestFailedError && err.response.status === 404) {
|
||||
return callback(null, false)
|
||||
}
|
||||
callback(OError.tag(err), false)
|
||||
})
|
||||
}
|
||||
|
||||
function _rewriteFilestoreUrl(url, projectId, callback) {
|
||||
if (!url) {
|
||||
return { fileId: null, filestoreURL: null }
|
||||
}
|
||||
// Rewrite the filestore url to point to the location in the local
|
||||
// settings for this service (this avoids problems with cross-
|
||||
// datacentre requests when running filestore in multiple locations).
|
||||
const { pathname: fileStorePath } = new URL(url)
|
||||
const urlMatch = /^\/project\/([0-9a-f]{24})\/file\/([0-9a-f]{24})$/.exec(
|
||||
fileStorePath
|
||||
)
|
||||
if (urlMatch == null) {
|
||||
return callback(new OError('invalid file for blob creation'))
|
||||
}
|
||||
if (urlMatch[1] !== projectId) {
|
||||
return callback(new OError('invalid project for blob creation'))
|
||||
}
|
||||
|
||||
const fileId = urlMatch[2]
|
||||
const filestoreURL = `${Settings.apis.filestore.url}/project/${projectId}/file/${fileId}`
|
||||
return { filestoreURL, fileId }
|
||||
}
|
||||
|
||||
export function createBlobForUpdate(projectId, historyId, update, callback) {
|
||||
callback = _.once(callback)
|
||||
|
||||
if (update.doc != null && update.docLines != null) {
|
||||
let ranges
|
||||
try {
|
||||
ranges = HistoryBlobTranslator.createRangeBlobDataFromUpdate(update)
|
||||
} catch (error) {
|
||||
return callback(error)
|
||||
}
|
||||
createBlobFromString(
|
||||
historyId,
|
||||
update.docLines,
|
||||
`project-${projectId}-doc-${update.doc}`,
|
||||
(err, fileHash) => {
|
||||
if (err) {
|
||||
return callback(err)
|
||||
}
|
||||
if (ranges) {
|
||||
createBlobFromString(
|
||||
historyId,
|
||||
JSON.stringify(ranges),
|
||||
`project-${projectId}-doc-${update.doc}-ranges`,
|
||||
(err, rangesHash) => {
|
||||
if (err) {
|
||||
return callback(err)
|
||||
}
|
||||
logger.debug(
|
||||
{ fileHash, rangesHash },
|
||||
'created blobs for both ranges and content'
|
||||
)
|
||||
return callback(null, { file: fileHash, ranges: rangesHash })
|
||||
}
|
||||
)
|
||||
} else {
|
||||
logger.debug({ fileHash }, 'created blob for content')
|
||||
return callback(null, { file: fileHash })
|
||||
}
|
||||
}
|
||||
)
|
||||
} else if (
|
||||
update.file != null &&
|
||||
(update.url != null || update.createdBlob)
|
||||
) {
|
||||
const { fileId, filestoreURL } = _rewriteFilestoreUrl(
|
||||
update.url,
|
||||
projectId,
|
||||
callback
|
||||
)
|
||||
_checkBlobExists(historyId, update.hash, (err, blobExists) => {
|
||||
if (err) {
|
||||
return callback(
|
||||
new OError(
|
||||
'error checking whether blob exists',
|
||||
{ projectId, historyId, update },
|
||||
err
|
||||
)
|
||||
)
|
||||
} else if (blobExists) {
|
||||
logger.debug(
|
||||
{ projectId, fileId, update },
|
||||
'Skipping blob creation as it has already been created'
|
||||
)
|
||||
return callback(null, { file: update.hash })
|
||||
} else if (update.createdBlob) {
|
||||
logger.warn(
|
||||
{ projectId, fileId, update },
|
||||
'created blob does not exist, reading from filestore'
|
||||
)
|
||||
}
|
||||
|
||||
if (!filestoreURL) {
|
||||
return callback(
|
||||
new OError('no filestore URL provided and blob was not created')
|
||||
)
|
||||
}
|
||||
if (!Settings.apis.filestore.enabled) {
|
||||
return callback(new OError('blocking filestore read', { update }))
|
||||
}
|
||||
|
||||
fetchStream(filestoreURL, {
|
||||
signal: AbortSignal.timeout(HTTP_REQUEST_TIMEOUT),
|
||||
})
|
||||
.then(stream => {
|
||||
LocalFileWriter.bufferOnDisk(
|
||||
stream,
|
||||
filestoreURL,
|
||||
`project-${projectId}-file-${fileId}`,
|
||||
(fsPath, cb) => {
|
||||
_createBlob(historyId, fsPath, cb)
|
||||
},
|
||||
(err, fileHash) => {
|
||||
if (err) {
|
||||
return callback(err)
|
||||
}
|
||||
if (update.hash && update.hash !== fileHash) {
|
||||
logger.warn(
|
||||
{ projectId, fileId, webHash: update.hash, fileHash },
|
||||
'hash mismatch between web and project-history'
|
||||
)
|
||||
}
|
||||
logger.debug({ fileHash }, 'created blob for file')
|
||||
callback(null, { file: fileHash })
|
||||
}
|
||||
)
|
||||
})
|
||||
.catch(err => {
|
||||
if (
|
||||
err instanceof RequestFailedError &&
|
||||
err.response.status === 404
|
||||
) {
|
||||
logger.warn(
|
||||
{ projectId, historyId, filestoreURL },
|
||||
'File contents not found in filestore. Storing in history as an empty file'
|
||||
)
|
||||
const emptyStream = new StringStream()
|
||||
LocalFileWriter.bufferOnDisk(
|
||||
emptyStream,
|
||||
filestoreURL,
|
||||
`project-${projectId}-file-${fileId}`,
|
||||
(fsPath, cb) => {
|
||||
_createBlob(historyId, fsPath, cb)
|
||||
},
|
||||
(err, fileHash) => {
|
||||
if (err) {
|
||||
return callback(err)
|
||||
}
|
||||
logger.debug({ fileHash }, 'created empty blob for file')
|
||||
callback(null, { file: fileHash })
|
||||
}
|
||||
)
|
||||
emptyStream.push(null) // send an EOF signal
|
||||
} else {
|
||||
callback(OError.tag(err, 'error from filestore', { filestoreURL }))
|
||||
}
|
||||
})
|
||||
})
|
||||
} else {
|
||||
const error = new OError('invalid update for blob creation')
|
||||
callback(error)
|
||||
}
|
||||
}
|
||||
|
||||
function _createBlob(historyId, fsPath, _callback) {
|
||||
const callback = _.once(_callback)
|
||||
|
||||
HashManager._getBlobHash(fsPath, (error, hash, byteLength) => {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
const outStream = fs.createReadStream(fsPath)
|
||||
|
||||
logger.debug(
|
||||
{ fsPath, historyId, hash, byteLength },
|
||||
'sending blob to history service'
|
||||
)
|
||||
const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${hash}`
|
||||
fetchNothing(url, {
|
||||
method: 'PUT',
|
||||
body: outStream,
|
||||
headers: { 'Content-Length': byteLength }, // add the content length to work around problems with chunked encoding in node 18
|
||||
...getHistoryFetchOptions(),
|
||||
})
|
||||
.then(res => {
|
||||
callback(null, hash)
|
||||
})
|
||||
.catch(err => {
|
||||
callback(OError.tag(err))
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export function initializeProject(historyId, callback) {
|
||||
_requestHistoryService(
|
||||
{
|
||||
method: 'POST',
|
||||
path: 'projects',
|
||||
json: historyId == null ? true : { projectId: historyId },
|
||||
},
|
||||
(error, project) => {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
|
||||
const id = project.projectId
|
||||
if (id == null) {
|
||||
error = new OError('history store did not return a project id', id)
|
||||
return callback(error)
|
||||
}
|
||||
|
||||
callback(null, id)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function deleteProject(projectId, callback) {
|
||||
_requestHistoryService(
|
||||
{ method: 'DELETE', path: `projects/${projectId}` },
|
||||
callback
|
||||
)
|
||||
}
|
||||
|
||||
const getProjectBlobAsync = promisify(getProjectBlob)
|
||||
|
||||
class BlobStore {
|
||||
constructor(projectId) {
|
||||
this.projectId = projectId
|
||||
}
|
||||
|
||||
async getString(hash) {
|
||||
return await getProjectBlobAsync(this.projectId, hash)
|
||||
}
|
||||
|
||||
async getObject(hash) {
|
||||
const string = await this.getString(hash)
|
||||
return JSON.parse(string)
|
||||
}
|
||||
}
|
||||
|
||||
export function getBlobStore(projectId) {
|
||||
return new BlobStore(projectId)
|
||||
}
|
||||
|
||||
function _requestOptions(options) {
|
||||
const requestOptions = {
|
||||
method: options.method || 'GET',
|
||||
url: `${Settings.overleaf.history.host}/${options.path}`,
|
||||
timeout: HTTP_REQUEST_TIMEOUT,
|
||||
auth: {
|
||||
user: Settings.overleaf.history.user,
|
||||
pass: Settings.overleaf.history.pass,
|
||||
sendImmediately: true,
|
||||
},
|
||||
}
|
||||
|
||||
if (options.json != null) {
|
||||
requestOptions.json = options.json
|
||||
}
|
||||
|
||||
if (options.body != null) {
|
||||
requestOptions.body = options.body
|
||||
}
|
||||
|
||||
if (options.qs != null) {
|
||||
requestOptions.qs = options.qs
|
||||
}
|
||||
|
||||
return requestOptions
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {RequestInit}
|
||||
*/
|
||||
function getHistoryFetchOptions() {
|
||||
return {
|
||||
signal: AbortSignal.timeout(HTTP_REQUEST_TIMEOUT),
|
||||
basicAuth: {
|
||||
user: Settings.overleaf.history.user,
|
||||
password: Settings.overleaf.history.pass,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
function _requestHistoryService(options, callback) {
|
||||
const requestOptions = _requestOptions(options)
|
||||
request(requestOptions, (error, res, body) => {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
|
||||
if (res.statusCode >= 200 && res.statusCode < 300) {
|
||||
callback(null, body)
|
||||
} else {
|
||||
const { method, url, qs } = requestOptions
|
||||
error = new OError(
|
||||
`history store a non-success status code: ${res.statusCode}`,
|
||||
{ method, url, qs, statusCode: res.statusCode }
|
||||
)
|
||||
callback(error)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export const promises = {
|
||||
/** @type {(projectId: string, historyId: string) => Promise<{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}>} */
|
||||
getMostRecentChunk: promisify(getMostRecentChunk),
|
||||
getChunkAtVersion: promisify(getChunkAtVersion),
|
||||
getMostRecentVersion: promisifyMultiResult(getMostRecentVersion, [
|
||||
'version',
|
||||
'projectStructureAndDocVersions',
|
||||
'lastChange',
|
||||
'mostRecentChunk',
|
||||
]),
|
||||
getMostRecentVersionRaw: promisify(getMostRecentVersionRaw),
|
||||
getProjectBlob: promisify(getProjectBlob),
|
||||
getProjectBlobStream: promisify(getProjectBlobStream),
|
||||
sendChanges: promisify(sendChanges),
|
||||
createBlobForUpdate: promisify(createBlobForUpdate),
|
||||
initializeProject: promisify(initializeProject),
|
||||
deleteProject: promisify(deleteProject),
|
||||
}
|
582
services/project-history/app/js/HttpController.js
Normal file
582
services/project-history/app/js/HttpController.js
Normal file
@@ -0,0 +1,582 @@
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import request from 'request'
|
||||
import * as UpdatesProcessor from './UpdatesProcessor.js'
|
||||
import * as SummarizedUpdatesManager from './SummarizedUpdatesManager.js'
|
||||
import * as DiffManager from './DiffManager.js'
|
||||
import * as HistoryStoreManager from './HistoryStoreManager.js'
|
||||
import * as WebApiManager from './WebApiManager.js'
|
||||
import * as SnapshotManager from './SnapshotManager.js'
|
||||
import * as HealthChecker from './HealthChecker.js'
|
||||
import * as SyncManager from './SyncManager.js'
|
||||
import * as ErrorRecorder from './ErrorRecorder.js'
|
||||
import * as RedisManager from './RedisManager.js'
|
||||
import * as LabelsManager from './LabelsManager.js'
|
||||
import * as HistoryApiManager from './HistoryApiManager.js'
|
||||
import * as RetryManager from './RetryManager.js'
|
||||
import * as FlushManager from './FlushManager.js'
|
||||
import { pipeline } from 'node:stream'
|
||||
import { RequestFailedError } from '@overleaf/fetch-utils'
|
||||
|
||||
const ONE_DAY_IN_SECONDS = 24 * 60 * 60
|
||||
|
||||
export function getProjectBlob(req, res, next) {
|
||||
const historyId = req.params.history_id
|
||||
const blobHash = req.params.hash
|
||||
HistoryStoreManager.getProjectBlobStream(
|
||||
historyId,
|
||||
blobHash,
|
||||
(err, stream) => {
|
||||
if (err != null) {
|
||||
if (err instanceof RequestFailedError && err.response.status === 404) {
|
||||
return res.status(404).end()
|
||||
}
|
||||
return next(OError.tag(err))
|
||||
}
|
||||
res.setHeader('Cache-Control', `private, max-age=${ONE_DAY_IN_SECONDS}`)
|
||||
pipeline(stream, res, err => {
|
||||
if (err) next(err)
|
||||
// res.end() is already called via 'end' event by pipeline.
|
||||
})
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function initializeProject(req, res, next) {
|
||||
const { historyId } = req.body
|
||||
HistoryStoreManager.initializeProject(historyId, (error, id) => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
res.json({ project: { id } })
|
||||
})
|
||||
}
|
||||
|
||||
export function flushProject(req, res, next) {
|
||||
const projectId = req.params.project_id
|
||||
if (req.query.debug) {
|
||||
logger.debug(
|
||||
{ projectId },
|
||||
'compressing project history in single-step mode'
|
||||
)
|
||||
UpdatesProcessor.processSingleUpdateForProject(projectId, error => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
res.sendStatus(204)
|
||||
})
|
||||
} else if (req.query.bisect) {
|
||||
logger.debug({ projectId }, 'compressing project history in bisect mode')
|
||||
UpdatesProcessor.processUpdatesForProjectUsingBisect(
|
||||
projectId,
|
||||
UpdatesProcessor.REDIS_READ_BATCH_SIZE,
|
||||
error => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
res.sendStatus(204)
|
||||
}
|
||||
)
|
||||
} else {
|
||||
logger.debug({ projectId }, 'compressing project history')
|
||||
UpdatesProcessor.processUpdatesForProject(projectId, error => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
res.sendStatus(204)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export function dumpProject(req, res, next) {
|
||||
const projectId = req.params.project_id
|
||||
const batchSize = req.query.count || UpdatesProcessor.REDIS_READ_BATCH_SIZE
|
||||
logger.debug({ projectId }, 'retrieving raw updates')
|
||||
UpdatesProcessor.getRawUpdates(projectId, batchSize, (error, rawUpdates) => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
res.json(rawUpdates)
|
||||
})
|
||||
}
|
||||
|
||||
export function flushOld(req, res, next) {
|
||||
const { maxAge, queueDelay, limit, timeout, background } = req.query
|
||||
const options = { maxAge, queueDelay, limit, timeout, background }
|
||||
FlushManager.flushOldOps(options, (error, results) => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
res.send(results)
|
||||
})
|
||||
}
|
||||
|
||||
export function getDiff(req, res, next) {
|
||||
const projectId = req.params.project_id
|
||||
const { pathname, from, to } = req.query
|
||||
if (pathname == null) {
|
||||
return res.sendStatus(400)
|
||||
}
|
||||
|
||||
logger.debug({ projectId, pathname, from, to }, 'getting diff')
|
||||
DiffManager.getDiff(projectId, pathname, from, to, (error, diff) => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
res.json({ diff })
|
||||
})
|
||||
}
|
||||
|
||||
export function getFileTreeDiff(req, res, next) {
|
||||
const projectId = req.params.project_id
|
||||
const { to, from } = req.query
|
||||
|
||||
DiffManager.getFileTreeDiff(projectId, from, to, (error, diff) => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
res.json({ diff })
|
||||
})
|
||||
}
|
||||
|
||||
export function getUpdates(req, res, next) {
|
||||
const projectId = req.params.project_id
|
||||
const { before, min_count: minCount } = req.query
|
||||
SummarizedUpdatesManager.getSummarizedProjectUpdates(
|
||||
projectId,
|
||||
{ before, min_count: minCount },
|
||||
(error, updates, nextBeforeTimestamp) => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
for (const update of updates) {
|
||||
// Sets don't JSONify, so convert to arrays
|
||||
update.pathnames = Array.from(update.pathnames || []).sort()
|
||||
}
|
||||
res.json({
|
||||
updates,
|
||||
nextBeforeTimestamp,
|
||||
})
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function latestVersion(req, res, next) {
|
||||
const projectId = req.params.project_id
|
||||
logger.debug({ projectId }, 'compressing project history and getting version')
|
||||
UpdatesProcessor.processUpdatesForProject(projectId, error => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
WebApiManager.getHistoryId(projectId, (error, historyId) => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
HistoryStoreManager.getMostRecentVersion(
|
||||
projectId,
|
||||
historyId,
|
||||
(error, version, projectStructureAndDocVersions, lastChange) => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
res.json({
|
||||
version,
|
||||
timestamp: lastChange != null ? lastChange.timestamp : undefined,
|
||||
v2Authors: lastChange != null ? lastChange.v2Authors : undefined,
|
||||
})
|
||||
}
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export function getFileSnapshot(req, res, next) {
|
||||
const { project_id: projectId, version, pathname } = req.params
|
||||
SnapshotManager.getFileSnapshotStream(
|
||||
projectId,
|
||||
version,
|
||||
pathname,
|
||||
(error, stream) => {
|
||||
if (error != null) {
|
||||
return next(OError.tag(error))
|
||||
}
|
||||
pipeline(stream, res, err => {
|
||||
if (err) next(err)
|
||||
// res.end() is already called via 'end' event by pipeline.
|
||||
})
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function getRangesSnapshot(req, res, next) {
|
||||
const { project_id: projectId, version, pathname } = req.params
|
||||
SnapshotManager.getRangesSnapshot(
|
||||
projectId,
|
||||
version,
|
||||
pathname,
|
||||
(err, ranges) => {
|
||||
if (err) {
|
||||
return next(OError.tag(err))
|
||||
}
|
||||
res.json(ranges)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function getFileMetadataSnapshot(req, res, next) {
|
||||
const { project_id: projectId, version, pathname } = req.params
|
||||
SnapshotManager.getFileMetadataSnapshot(
|
||||
projectId,
|
||||
version,
|
||||
pathname,
|
||||
(err, data) => {
|
||||
if (err) {
|
||||
return next(OError.tag(err))
|
||||
}
|
||||
res.json(data)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function getLatestSnapshot(req, res, next) {
|
||||
const { project_id: projectId } = req.params
|
||||
WebApiManager.getHistoryId(projectId, (error, historyId) => {
|
||||
if (error) return next(OError.tag(error))
|
||||
SnapshotManager.getLatestSnapshot(
|
||||
projectId,
|
||||
historyId,
|
||||
(error, details) => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
const { snapshot, version } = details
|
||||
res.json({ snapshot: snapshot.toRaw(), version })
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
export function getChangesInChunkSince(req, res, next) {
|
||||
const { project_id: projectId } = req.params
|
||||
const { since } = req.query
|
||||
WebApiManager.getHistoryId(projectId, (error, historyId) => {
|
||||
if (error) return next(OError.tag(error))
|
||||
SnapshotManager.getChangesInChunkSince(
|
||||
projectId,
|
||||
historyId,
|
||||
since,
|
||||
(error, details) => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
const { latestStartVersion, changes } = details
|
||||
res.json({
|
||||
latestStartVersion,
|
||||
changes: changes.map(c => c.toRaw()),
|
||||
})
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
export function getProjectSnapshot(req, res, next) {
|
||||
const { project_id: projectId, version } = req.params
|
||||
SnapshotManager.getProjectSnapshot(
|
||||
projectId,
|
||||
version,
|
||||
(error, snapshotData) => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.json(snapshotData)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function getPathsAtVersion(req, res, next) {
|
||||
const { project_id: projectId, version } = req.params
|
||||
SnapshotManager.getPathsAtVersion(projectId, version, (error, result) => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.json(result)
|
||||
})
|
||||
}
|
||||
|
||||
export function healthCheck(req, res) {
|
||||
HealthChecker.check(err => {
|
||||
if (err != null) {
|
||||
logger.err({ err }, 'error performing health check')
|
||||
res.sendStatus(500)
|
||||
} else {
|
||||
res.sendStatus(200)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export function checkLock(req, res) {
|
||||
HealthChecker.checkLock(err => {
|
||||
if (err != null) {
|
||||
logger.err({ err }, 'error performing lock check')
|
||||
res.sendStatus(500)
|
||||
} else {
|
||||
res.sendStatus(200)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
export function resyncProject(req, res, next) {
|
||||
const projectId = req.params.project_id
|
||||
const options = {}
|
||||
if (req.body.origin) {
|
||||
options.origin = req.body.origin
|
||||
}
|
||||
if (req.body.historyRangesMigration) {
|
||||
options.historyRangesMigration = req.body.historyRangesMigration
|
||||
}
|
||||
if (req.query.force || req.body.force) {
|
||||
// this will delete the queue and clear the sync state
|
||||
// use if the project is completely broken
|
||||
SyncManager.startHardResync(projectId, options, error => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
// flush the sync operations
|
||||
UpdatesProcessor.processUpdatesForProject(projectId, error => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.sendStatus(204)
|
||||
})
|
||||
})
|
||||
} else {
|
||||
SyncManager.startResync(projectId, options, error => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
// flush the sync operations
|
||||
UpdatesProcessor.processUpdatesForProject(projectId, error => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.sendStatus(204)
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
export function forceDebugProject(req, res, next) {
|
||||
const projectId = req.params.project_id
|
||||
// set the debug flag to true unless we see ?clear=true
|
||||
const state = !req.query.clear
|
||||
ErrorRecorder.setForceDebug(projectId, state, error => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
// display the failure record to help debugging
|
||||
ErrorRecorder.getFailureRecord(projectId, (error, result) => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.send(result)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export function getFailures(req, res, next) {
|
||||
ErrorRecorder.getFailures((error, result) => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.send({ failures: result })
|
||||
})
|
||||
}
|
||||
|
||||
export function getQueueCounts(req, res, next) {
|
||||
RedisManager.getProjectIdsWithHistoryOpsCount((err, queuedProjectsCount) => {
|
||||
if (err != null) {
|
||||
return next(err)
|
||||
}
|
||||
res.send({ queuedProjects: queuedProjectsCount })
|
||||
})
|
||||
}
|
||||
|
||||
export function getLabels(req, res, next) {
|
||||
const projectId = req.params.project_id
|
||||
HistoryApiManager.shouldUseProjectHistory(
|
||||
projectId,
|
||||
(error, shouldUseProjectHistory) => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
if (shouldUseProjectHistory) {
|
||||
LabelsManager.getLabels(projectId, (error, labels) => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.json(labels)
|
||||
})
|
||||
} else {
|
||||
res.sendStatus(409)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function createLabel(req, res, next) {
|
||||
const { project_id: projectId, user_id: userIdParam } = req.params
|
||||
const {
|
||||
version,
|
||||
comment,
|
||||
user_id: userIdBody,
|
||||
created_at: createdAt,
|
||||
validate_exists: validateExists,
|
||||
} = req.body
|
||||
|
||||
// Temporarily looking up both params and body while rolling out changes
|
||||
// in the router path - https://github.com/overleaf/internal/pull/20200
|
||||
const userId = userIdParam || userIdBody
|
||||
|
||||
HistoryApiManager.shouldUseProjectHistory(
|
||||
projectId,
|
||||
(error, shouldUseProjectHistory) => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
if (shouldUseProjectHistory) {
|
||||
LabelsManager.createLabel(
|
||||
projectId,
|
||||
userId,
|
||||
version,
|
||||
comment,
|
||||
createdAt,
|
||||
validateExists,
|
||||
(error, label) => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.json(label)
|
||||
}
|
||||
)
|
||||
} else {
|
||||
logger.error(
|
||||
{
|
||||
projectId,
|
||||
userId,
|
||||
version,
|
||||
comment,
|
||||
createdAt,
|
||||
validateExists,
|
||||
},
|
||||
'not using v2 history'
|
||||
)
|
||||
res.sendStatus(409)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* This will delete a label if it is owned by the current user. If you wish to
|
||||
* delete a label regardless of the current user, then use `deleteLabel` instead.
|
||||
*/
|
||||
export function deleteLabelForUser(req, res, next) {
|
||||
const {
|
||||
project_id: projectId,
|
||||
user_id: userId,
|
||||
label_id: labelId,
|
||||
} = req.params
|
||||
|
||||
LabelsManager.deleteLabelForUser(projectId, userId, labelId, error => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.sendStatus(204)
|
||||
})
|
||||
}
|
||||
|
||||
export function deleteLabel(req, res, next) {
|
||||
const { project_id: projectId, label_id: labelId } = req.params
|
||||
|
||||
LabelsManager.deleteLabel(projectId, labelId, error => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.sendStatus(204)
|
||||
})
|
||||
}
|
||||
|
||||
export function retryFailures(req, res, next) {
|
||||
const { failureType, timeout, limit, callbackUrl } = req.query
|
||||
if (callbackUrl) {
|
||||
// send response but run in background when callbackUrl provided
|
||||
res.send({ retryStatus: 'running retryFailures in background' })
|
||||
}
|
||||
RetryManager.retryFailures(
|
||||
{ failureType, timeout, limit },
|
||||
(error, result) => {
|
||||
if (callbackUrl) {
|
||||
// if present, notify the callbackUrl on success
|
||||
if (!error) {
|
||||
// Needs Node 12
|
||||
// const callbackHeaders = Object.fromEntries(Object.entries(req.headers || {}).filter(([k,v]) => k.match(/^X-CALLBACK-/i)))
|
||||
const callbackHeaders = {}
|
||||
for (const key of Object.getOwnPropertyNames(
|
||||
req.headers || {}
|
||||
).filter(key => key.match(/^X-CALLBACK-/i))) {
|
||||
const found = key.match(/^X-CALLBACK-(.*)/i)
|
||||
callbackHeaders[found[1]] = req.headers[key]
|
||||
}
|
||||
request({ url: callbackUrl, headers: callbackHeaders })
|
||||
}
|
||||
} else {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.send({ retryStatus: result })
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function transferLabels(req, res, next) {
|
||||
const { from_user: fromUser, to_user: toUser } = req.params
|
||||
LabelsManager.transferLabels(fromUser, toUser, error => {
|
||||
if (error != null) {
|
||||
return next(error)
|
||||
}
|
||||
res.sendStatus(204)
|
||||
})
|
||||
}
|
||||
|
||||
export function deleteProject(req, res, next) {
|
||||
const { project_id: projectId } = req.params
|
||||
// clear the timestamp before clearing the queue,
|
||||
// because the queue location is used in the migration
|
||||
RedisManager.clearFirstOpTimestamp(projectId, err => {
|
||||
if (err) {
|
||||
return next(err)
|
||||
}
|
||||
RedisManager.clearCachedHistoryId(projectId, err => {
|
||||
if (err) {
|
||||
return next(err)
|
||||
}
|
||||
RedisManager.destroyDocUpdatesQueue(projectId, err => {
|
||||
if (err) {
|
||||
return next(err)
|
||||
}
|
||||
SyncManager.clearResyncState(projectId, err => {
|
||||
if (err) {
|
||||
return next(err)
|
||||
}
|
||||
ErrorRecorder.clearError(projectId, err => {
|
||||
if (err) {
|
||||
return next(err)
|
||||
}
|
||||
res.sendStatus(204)
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
175
services/project-history/app/js/LabelsManager.js
Normal file
175
services/project-history/app/js/LabelsManager.js
Normal file
@@ -0,0 +1,175 @@
|
||||
import OError from '@overleaf/o-error'
|
||||
import { db, ObjectId } from './mongodb.js'
|
||||
import * as HistoryStoreManager from './HistoryStoreManager.js'
|
||||
import * as UpdatesProcessor from './UpdatesProcessor.js'
|
||||
import * as WebApiManager from './WebApiManager.js'
|
||||
|
||||
export function getLabels(projectId, callback) {
|
||||
_toObjectId(projectId, function (error, projectId) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
db.projectHistoryLabels
|
||||
.find({ project_id: new ObjectId(projectId) })
|
||||
.toArray(function (error, labels) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
const formattedLabels = labels.map(_formatLabel)
|
||||
callback(null, formattedLabels)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export function createLabel(
|
||||
projectId,
|
||||
userId,
|
||||
version,
|
||||
comment,
|
||||
createdAt,
|
||||
shouldValidateExists,
|
||||
callback
|
||||
) {
|
||||
const validateVersionExists = function (callback) {
|
||||
if (shouldValidateExists === false) {
|
||||
callback()
|
||||
} else {
|
||||
_validateChunkExistsForVersion(projectId.toString(), version, callback)
|
||||
}
|
||||
}
|
||||
|
||||
_toObjectId(projectId, userId, function (error, projectId, userId) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
validateVersionExists(function (error) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
|
||||
createdAt = createdAt != null ? new Date(createdAt) : new Date()
|
||||
|
||||
const label = {
|
||||
project_id: new ObjectId(projectId),
|
||||
comment,
|
||||
version,
|
||||
created_at: createdAt,
|
||||
}
|
||||
if (userId) {
|
||||
label.user_id = userId
|
||||
}
|
||||
db.projectHistoryLabels.insertOne(label, function (error, confirmation) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
label._id = confirmation.insertedId
|
||||
callback(null, _formatLabel(label))
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export function deleteLabelForUser(projectId, userId, labelId, callback) {
|
||||
_toObjectId(
|
||||
projectId,
|
||||
userId,
|
||||
labelId,
|
||||
function (error, projectId, userId, labelId) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
db.projectHistoryLabels.deleteOne(
|
||||
{
|
||||
_id: new ObjectId(labelId),
|
||||
project_id: new ObjectId(projectId),
|
||||
user_id: new ObjectId(userId),
|
||||
},
|
||||
callback
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function deleteLabel(projectId, labelId, callback) {
|
||||
_toObjectId(projectId, labelId, function (error, projectId, labelId) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
db.projectHistoryLabels.deleteOne(
|
||||
{
|
||||
_id: new ObjectId(labelId),
|
||||
project_id: new ObjectId(projectId),
|
||||
},
|
||||
callback
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
export function transferLabels(fromUserId, toUserId, callback) {
|
||||
_toObjectId(fromUserId, toUserId, function (error, fromUserId, toUserId) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
db.projectHistoryLabels.updateMany(
|
||||
{
|
||||
user_id: fromUserId,
|
||||
},
|
||||
{
|
||||
$set: { user_id: toUserId },
|
||||
},
|
||||
callback
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
function _toObjectId(...args1) {
|
||||
const adjustedLength = Math.max(args1.length, 1)
|
||||
const args = args1.slice(0, adjustedLength - 1)
|
||||
const callback = args1[adjustedLength - 1]
|
||||
try {
|
||||
const ids = args.map(id => {
|
||||
if (id) {
|
||||
return new ObjectId(id)
|
||||
} else {
|
||||
return undefined
|
||||
}
|
||||
})
|
||||
callback(null, ...ids)
|
||||
} catch (error) {
|
||||
callback(error)
|
||||
}
|
||||
}
|
||||
|
||||
function _formatLabel(label) {
|
||||
return {
|
||||
id: label._id,
|
||||
comment: label.comment,
|
||||
version: label.version,
|
||||
user_id: label.user_id,
|
||||
created_at: label.created_at,
|
||||
}
|
||||
}
|
||||
|
||||
function _validateChunkExistsForVersion(projectId, version, callback) {
|
||||
UpdatesProcessor.processUpdatesForProject(projectId, function (error) {
|
||||
if (error) {
|
||||
return callback(error)
|
||||
}
|
||||
WebApiManager.getHistoryId(projectId, function (error, historyId) {
|
||||
if (error) {
|
||||
return callback(error)
|
||||
}
|
||||
HistoryStoreManager.getChunkAtVersion(
|
||||
projectId,
|
||||
historyId,
|
||||
version,
|
||||
function (error) {
|
||||
if (error) {
|
||||
return callback(error)
|
||||
}
|
||||
callback()
|
||||
}
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
88
services/project-history/app/js/LargeFileManager.js
Normal file
88
services/project-history/app/js/LargeFileManager.js
Normal file
@@ -0,0 +1,88 @@
|
||||
/* eslint-disable
|
||||
no-unused-vars,
|
||||
*/
|
||||
// TODO: This file was created by bulk-decaffeinate.
|
||||
// Fix any style issues and re-enable lint.
|
||||
/*
|
||||
* decaffeinate suggestions:
|
||||
* DS102: Remove unnecessary code created because of implicit returns
|
||||
* DS207: Consider shorter variations of null checks
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
import fs from 'node:fs'
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import Path from 'node:path'
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import metrics from '@overleaf/metrics'
|
||||
import Settings from '@overleaf/settings'
|
||||
import _ from 'lodash'
|
||||
import * as HistoryStoreManager from './HistoryStoreManager.js'
|
||||
import * as HashManager from './HashManager.js'
|
||||
|
||||
export function createStub(fsPath, fileId, fileSize, fileHash, callback) {
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
callback = _.once(callback)
|
||||
const newFsPath = Path.join(
|
||||
Settings.path.uploadFolder,
|
||||
randomUUID() + `-${fileId}-stub`
|
||||
)
|
||||
const writeStream = fs.createWriteStream(newFsPath)
|
||||
writeStream.on('error', function (error) {
|
||||
OError.tag(error, 'error writing stub file', { fsPath, newFsPath })
|
||||
return fs.unlink(newFsPath, () => callback(error))
|
||||
})
|
||||
writeStream.on('finish', function () {
|
||||
logger.debug(
|
||||
{ fsPath, fileId, fileSize, fileHash },
|
||||
'replaced large file with stub'
|
||||
)
|
||||
return callback(null, newFsPath)
|
||||
}) // let the consumer unlink the file
|
||||
const stubLines = [
|
||||
'FileTooLargeError v1',
|
||||
'File too large to be stored in history service',
|
||||
`id ${fileId}`,
|
||||
`size ${fileSize} bytes`,
|
||||
`hash ${fileHash}`,
|
||||
'\0', // null byte to make this a binary file
|
||||
]
|
||||
writeStream.write(stubLines.join('\n'))
|
||||
return writeStream.end()
|
||||
}
|
||||
|
||||
export function replaceWithStubIfNeeded(fsPath, fileId, fileSize, callback) {
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
if (
|
||||
Settings.maxFileSizeInBytes != null &&
|
||||
fileSize > Settings.maxFileSizeInBytes
|
||||
) {
|
||||
logger.error(
|
||||
{ fsPath, fileId, maxFileSizeInBytes: Settings.maxFileSizeInBytes },
|
||||
'file too large, will use stub'
|
||||
)
|
||||
return HashManager._getBlobHash(fsPath, function (error, fileHash) {
|
||||
if (error != null) {
|
||||
return callback(error)
|
||||
}
|
||||
return createStub(
|
||||
fsPath,
|
||||
fileId,
|
||||
fileSize,
|
||||
fileHash,
|
||||
function (error, newFsPath) {
|
||||
if (error != null) {
|
||||
return callback(error)
|
||||
}
|
||||
return callback(null, newFsPath)
|
||||
}
|
||||
)
|
||||
})
|
||||
} else {
|
||||
return callback(null, fsPath)
|
||||
}
|
||||
}
|
114
services/project-history/app/js/LocalFileWriter.js
Normal file
114
services/project-history/app/js/LocalFileWriter.js
Normal file
@@ -0,0 +1,114 @@
|
||||
/* eslint-disable
|
||||
no-unused-vars,
|
||||
*/
|
||||
// TODO: This file was created by bulk-decaffeinate.
|
||||
// Fix any style issues and re-enable lint.
|
||||
/*
|
||||
* decaffeinate suggestions:
|
||||
* DS102: Remove unnecessary code created because of implicit returns
|
||||
* DS207: Consider shorter variations of null checks
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
import fs from 'node:fs'
|
||||
import { pipeline } from 'node:stream'
|
||||
import { randomUUID } from 'node:crypto'
|
||||
import path from 'node:path'
|
||||
import _ from 'lodash'
|
||||
import logger from '@overleaf/logger'
|
||||
import metrics from '@overleaf/metrics'
|
||||
import Settings from '@overleaf/settings'
|
||||
import OError from '@overleaf/o-error'
|
||||
import * as LargeFileManager from './LargeFileManager.js'
|
||||
|
||||
//
|
||||
// This method takes a stream and provides you a new stream which is now
|
||||
// reading from disk.
|
||||
//
|
||||
// This is useful if we're piping one network stream to another. If the stream
|
||||
// we're piping to can't consume data as quickly as the one we're consuming
|
||||
// from then large quantities of data may be held in memory. Instead the read
|
||||
// stream can be passed to this method, the data will then be held on disk
|
||||
// rather than in memory and will be cleaned up once it has been consumed.
|
||||
//
|
||||
export function bufferOnDisk(
|
||||
inStream,
|
||||
url,
|
||||
fileId,
|
||||
consumeOutStream,
|
||||
callback
|
||||
) {
|
||||
const timer = new metrics.Timer('LocalFileWriter.writeStream')
|
||||
|
||||
const fsPath = path.join(
|
||||
Settings.path.uploadFolder,
|
||||
randomUUID() + `-${fileId}`
|
||||
)
|
||||
|
||||
const cleanup = _.once((streamError, res) => {
|
||||
return deleteFile(fsPath, function (cleanupError) {
|
||||
if (streamError) {
|
||||
OError.tag(streamError, 'error deleting temporary file', {
|
||||
fsPath,
|
||||
url,
|
||||
})
|
||||
}
|
||||
if (cleanupError) {
|
||||
OError.tag(cleanupError)
|
||||
}
|
||||
if (streamError && cleanupError) {
|
||||
// logging the cleanup error in case only the stream error is sent to the callback
|
||||
logger.error(cleanupError)
|
||||
}
|
||||
return callback(streamError || cleanupError, res)
|
||||
})
|
||||
})
|
||||
|
||||
logger.debug({ fsPath, url }, 'writing file locally')
|
||||
|
||||
const writeStream = fs.createWriteStream(fsPath)
|
||||
pipeline(inStream, writeStream, err => {
|
||||
if (err) {
|
||||
OError.tag(err, 'problem writing file locally', {
|
||||
fsPath,
|
||||
url,
|
||||
})
|
||||
return cleanup(err)
|
||||
}
|
||||
timer.done()
|
||||
// in future check inStream.response.headers for hash value here
|
||||
logger.debug({ fsPath, url }, 'stream closed after writing file locally')
|
||||
const fileSize = writeStream.bytesWritten
|
||||
return LargeFileManager.replaceWithStubIfNeeded(
|
||||
fsPath,
|
||||
fileId,
|
||||
fileSize,
|
||||
function (err, newFsPath) {
|
||||
if (err != null) {
|
||||
OError.tag(err, 'problem in large file manager', {
|
||||
newFsPath,
|
||||
fsPath,
|
||||
fileId,
|
||||
fileSize,
|
||||
})
|
||||
return cleanup(err)
|
||||
}
|
||||
return consumeOutStream(newFsPath, cleanup)
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
export function deleteFile(fsPath, callback) {
|
||||
if (fsPath == null || fsPath === '') {
|
||||
return callback()
|
||||
}
|
||||
logger.debug({ fsPath }, 'removing local temp file')
|
||||
return fs.unlink(fsPath, function (err) {
|
||||
if (err != null && err.code !== 'ENOENT') {
|
||||
// ignore errors deleting the file when it was never created
|
||||
return callback(OError.tag(err))
|
||||
} else {
|
||||
return callback()
|
||||
}
|
||||
})
|
||||
}
|
314
services/project-history/app/js/LockManager.js
Normal file
314
services/project-history/app/js/LockManager.js
Normal file
@@ -0,0 +1,314 @@
|
||||
// TODO: This file was created by bulk-decaffeinate.
|
||||
// Fix any style issues and re-enable lint.
|
||||
/*
|
||||
* decaffeinate suggestions:
|
||||
* DS101: Remove unnecessary use of Array.from
|
||||
* DS102: Remove unnecessary code created because of implicit returns
|
||||
* DS207: Consider shorter variations of null checks
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
import { promisify } from 'node:util'
|
||||
import async from 'async'
|
||||
import metrics from '@overleaf/metrics'
|
||||
import Settings from '@overleaf/settings'
|
||||
import redis from '@overleaf/redis-wrapper'
|
||||
import os from 'node:os'
|
||||
import crypto from 'node:crypto'
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
|
||||
const LOCK_TEST_INTERVAL = 50 // 50ms between each test of the lock
|
||||
const MAX_LOCK_WAIT_TIME = 10000 // 10s maximum time to spend trying to get the lock
|
||||
export const LOCK_TTL = 360 // seconds
|
||||
export const MIN_LOCK_EXTENSION_INTERVAL = 1000 // 1s minimum interval when extending a lock
|
||||
|
||||
export const UNLOCK_SCRIPT =
|
||||
'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end'
|
||||
const EXTEND_SCRIPT =
|
||||
'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("expire", KEYS[1], ARGV[2]) else return 0 end'
|
||||
|
||||
const HOST = os.hostname()
|
||||
const PID = process.pid
|
||||
const RND = crypto.randomBytes(4).toString('hex')
|
||||
let COUNT = 0
|
||||
|
||||
const rclient = redis.createClient(Settings.redis.lock)
|
||||
|
||||
/**
|
||||
* Container for functions that need to be mocked in tests
|
||||
*
|
||||
* TODO: Rewrite tests in terms of exported functions only
|
||||
*/
|
||||
export const _mocks = {}
|
||||
|
||||
// Use a signed lock value as described in
|
||||
// http://redis.io/topics/distlock#correct-implementation-with-a-single-instance
|
||||
// to prevent accidental unlocking by multiple processes
|
||||
_mocks.randomLock = () => {
|
||||
const time = Date.now()
|
||||
return `locked:host=${HOST}:pid=${PID}:random=${RND}:time=${time}:count=${COUNT++}`
|
||||
}
|
||||
|
||||
export function randomLock(...args) {
|
||||
return _mocks.randomLock(...args)
|
||||
}
|
||||
|
||||
_mocks.tryLock = (key, callback) => {
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
const lockValue = randomLock()
|
||||
return rclient.set(
|
||||
key,
|
||||
lockValue,
|
||||
'EX',
|
||||
LOCK_TTL,
|
||||
'NX',
|
||||
function (err, gotLock) {
|
||||
if (err != null) {
|
||||
return callback(
|
||||
OError.tag(err, 'redis error trying to get lock', { key })
|
||||
)
|
||||
}
|
||||
if (gotLock === 'OK') {
|
||||
metrics.inc('lock.project.try.success')
|
||||
return callback(err, true, lockValue)
|
||||
} else {
|
||||
metrics.inc('lock.project.try.failed')
|
||||
return callback(err, false)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function tryLock(...args) {
|
||||
_mocks.tryLock(...args)
|
||||
}
|
||||
|
||||
_mocks.extendLock = (key, lockValue, callback) => {
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
return rclient.eval(
|
||||
EXTEND_SCRIPT,
|
||||
1,
|
||||
key,
|
||||
lockValue,
|
||||
LOCK_TTL,
|
||||
function (err, result) {
|
||||
if (err != null) {
|
||||
return callback(
|
||||
OError.tag(err, 'redis error trying to extend lock', { key })
|
||||
)
|
||||
}
|
||||
|
||||
if (result != null && result !== 1) {
|
||||
// successful extension should release exactly one key
|
||||
metrics.inc('lock.project.extend.failed')
|
||||
const error = new OError('failed to extend lock', {
|
||||
key,
|
||||
lockValue,
|
||||
result,
|
||||
})
|
||||
return callback(error)
|
||||
}
|
||||
|
||||
metrics.inc('lock.project.extend.success')
|
||||
return callback()
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function extendLock(...args) {
|
||||
_mocks.extendLock(...args)
|
||||
}
|
||||
|
||||
_mocks.getLock = (key, callback) => {
|
||||
let attempt
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
const startTime = Date.now()
|
||||
let attempts = 0
|
||||
return (attempt = function () {
|
||||
if (Date.now() - startTime > MAX_LOCK_WAIT_TIME) {
|
||||
metrics.inc('lock.project.get.failed')
|
||||
return callback(new OError('Timeout', { key }))
|
||||
}
|
||||
|
||||
attempts += 1
|
||||
return tryLock(key, function (error, gotLock, lockValue) {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
if (gotLock) {
|
||||
metrics.gauge('lock.project.get.success.tries', attempts)
|
||||
return callback(null, lockValue)
|
||||
} else {
|
||||
return setTimeout(attempt, LOCK_TEST_INTERVAL)
|
||||
}
|
||||
})
|
||||
})()
|
||||
}
|
||||
|
||||
export function getLock(...args) {
|
||||
_mocks.getLock(...args)
|
||||
}
|
||||
|
||||
export function checkLock(key, callback) {
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
return rclient.exists(key, function (err, exists) {
|
||||
if (err != null) {
|
||||
return callback(OError.tag(err))
|
||||
}
|
||||
exists = parseInt(exists)
|
||||
if (exists === 1) {
|
||||
return callback(err, false)
|
||||
} else {
|
||||
return callback(err, true)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
_mocks.releaseLock = (key, lockValue, callback) => {
|
||||
return rclient.eval(UNLOCK_SCRIPT, 1, key, lockValue, function (err, result) {
|
||||
if (err != null) {
|
||||
return callback(OError.tag(err))
|
||||
}
|
||||
if (result != null && result !== 1) {
|
||||
// successful unlock should release exactly one key
|
||||
const error = new OError('tried to release timed out lock', {
|
||||
key,
|
||||
lockValue,
|
||||
redis_result: result,
|
||||
})
|
||||
return callback(error)
|
||||
}
|
||||
return callback(err, result)
|
||||
})
|
||||
}
|
||||
|
||||
export function releaseLock(...args) {
|
||||
_mocks.releaseLock(...args)
|
||||
}
|
||||
|
||||
export function runWithLock(key, runner, callback) {
|
||||
if (callback == null) {
|
||||
callback = function () {}
|
||||
}
|
||||
return getLock(key, function (error, lockValue) {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
|
||||
const lock = new Lock(key, lockValue)
|
||||
return runner(lock.extend.bind(lock), (error1, ...args) =>
|
||||
lock.release(function (error2) {
|
||||
error = error1 || error2
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error), ...Array.from(args))
|
||||
}
|
||||
return callback(null, ...Array.from(args))
|
||||
})
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
export function healthCheck(callback) {
|
||||
const action = (extendLock, releaseLock) => releaseLock()
|
||||
return runWithLock(
|
||||
`HistoryLock:HealthCheck:host=${HOST}:pid=${PID}:random=${RND}`,
|
||||
action,
|
||||
callback
|
||||
)
|
||||
}
|
||||
|
||||
export function close(callback) {
|
||||
rclient.quit()
|
||||
return rclient.once('end', callback)
|
||||
}
|
||||
|
||||
class Lock {
|
||||
constructor(key, value) {
|
||||
this.key = key
|
||||
this.value = value
|
||||
this.slowExecutionError = new OError('slow execution during lock')
|
||||
this.lockTakenAt = Date.now()
|
||||
this.timer = new metrics.Timer('lock.project')
|
||||
}
|
||||
|
||||
extend(callback) {
|
||||
const lockLength = Date.now() - this.lockTakenAt
|
||||
if (lockLength < MIN_LOCK_EXTENSION_INTERVAL) {
|
||||
return async.setImmediate(callback)
|
||||
}
|
||||
return extendLock(this.key, this.value, error => {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
this.lockTakenAt = Date.now()
|
||||
return callback()
|
||||
})
|
||||
}
|
||||
|
||||
release(callback) {
|
||||
// The lock can expire in redis but the process carry on. This setTimout call
|
||||
// is designed to log if this happens.
|
||||
const lockLength = Date.now() - this.lockTakenAt
|
||||
if (lockLength > LOCK_TTL * 1000) {
|
||||
metrics.inc('lock.project.exceeded_lock_timeout')
|
||||
logger.debug('exceeded lock timeout', {
|
||||
key: this.key,
|
||||
slowExecutionError: this.slowExecutionError,
|
||||
})
|
||||
}
|
||||
|
||||
return releaseLock(this.key, this.value, error => {
|
||||
this.timer.done()
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
return callback()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Promisified version of runWithLock.
|
||||
*
|
||||
* @param {string} key
|
||||
* @param {(extendLock: Function) => Promise<any>} runner
|
||||
*/
|
||||
async function runWithLockPromises(key, runner) {
|
||||
const runnerCb = (extendLock, callback) => {
|
||||
const extendLockPromises = promisify(extendLock)
|
||||
runner(extendLockPromises)
|
||||
.then(result => {
|
||||
callback(null, result)
|
||||
})
|
||||
.catch(err => {
|
||||
callback(err)
|
||||
})
|
||||
}
|
||||
|
||||
return await new Promise((resolve, reject) => {
|
||||
runWithLock(key, runnerCb, (err, result) => {
|
||||
if (err) {
|
||||
reject(err)
|
||||
} else {
|
||||
resolve(result)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export const promises = {
|
||||
tryLock: promisify(tryLock),
|
||||
extendLock: promisify(extendLock),
|
||||
getLock: promisify(getLock),
|
||||
checkLock: promisify(checkLock),
|
||||
releaseLock: promisify(releaseLock),
|
||||
runWithLock: runWithLockPromises,
|
||||
}
|
15
services/project-history/app/js/Metrics.js
Normal file
15
services/project-history/app/js/Metrics.js
Normal file
@@ -0,0 +1,15 @@
|
||||
// @ts-check
|
||||
|
||||
import { prom } from '@overleaf/metrics'
|
||||
|
||||
export const historyFlushDurationSeconds = new prom.Histogram({
|
||||
name: 'history_flush_duration_seconds',
|
||||
help: 'Duration of a history flush in seconds',
|
||||
buckets: [0.05, 0.1, 0.2, 0.3, 0.5, 1, 2, 5, 10],
|
||||
})
|
||||
|
||||
export const historyFlushQueueSize = new prom.Histogram({
|
||||
name: 'history_flush_queue_size',
|
||||
help: 'Size of the queue during history flushes',
|
||||
buckets: prom.exponentialBuckets(1, 2, 10),
|
||||
})
|
20
services/project-history/app/js/OperationsCompressor.js
Normal file
20
services/project-history/app/js/OperationsCompressor.js
Normal file
@@ -0,0 +1,20 @@
|
||||
export function compressOperations(operations) {
|
||||
if (!operations.length) return []
|
||||
|
||||
const newOperations = []
|
||||
let currentOperation = operations[0]
|
||||
for (let operationId = 1; operationId < operations.length; operationId++) {
|
||||
const nextOperation = operations[operationId]
|
||||
if (currentOperation.canBeComposedWith(nextOperation)) {
|
||||
currentOperation = currentOperation.compose(nextOperation)
|
||||
} else {
|
||||
// currentOperation and nextOperation cannot be composed. Push the
|
||||
// currentOperation and start over with nextOperation.
|
||||
newOperations.push(currentOperation)
|
||||
currentOperation = nextOperation
|
||||
}
|
||||
}
|
||||
newOperations.push(currentOperation)
|
||||
|
||||
return newOperations
|
||||
}
|
80
services/project-history/app/js/Profiler.js
Normal file
80
services/project-history/app/js/Profiler.js
Normal file
@@ -0,0 +1,80 @@
|
||||
/* eslint-disable
|
||||
no-unused-vars,
|
||||
*/
|
||||
// TODO: This file was created by bulk-decaffeinate.
|
||||
// Fix any style issues and re-enable lint.
|
||||
/*
|
||||
* decaffeinate suggestions:
|
||||
* DS101: Remove unnecessary use of Array.from
|
||||
* DS102: Remove unnecessary code created because of implicit returns
|
||||
* DS206: Consider reworking classes to avoid initClass
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
import Settings from '@overleaf/settings'
|
||||
import logger from '@overleaf/logger'
|
||||
import metrics from '@overleaf/metrics'
|
||||
|
||||
const LOG_CUTOFF_TIME = 1000
|
||||
|
||||
const deltaMs = function (ta, tb) {
|
||||
const nanoSeconds = (ta[0] - tb[0]) * 1e9 + (ta[1] - tb[1])
|
||||
const milliSeconds = Math.floor(nanoSeconds * 1e-6)
|
||||
return milliSeconds
|
||||
}
|
||||
|
||||
export class Profiler {
|
||||
constructor(name, args) {
|
||||
this.name = name
|
||||
this.args = args
|
||||
this.t0 = this.t = process.hrtime()
|
||||
this.start = new Date()
|
||||
this.updateTimes = []
|
||||
}
|
||||
|
||||
log(label) {
|
||||
const t1 = process.hrtime()
|
||||
const dtMilliSec = deltaMs(t1, this.t)
|
||||
this.t = t1
|
||||
this.updateTimes.push([label, dtMilliSec]) // timings in ms
|
||||
return this // make it chainable
|
||||
}
|
||||
|
||||
end(message) {
|
||||
const totalTime = deltaMs(this.t, this.t0)
|
||||
// record the update times in metrics
|
||||
for (const update of Array.from(this.updateTimes)) {
|
||||
metrics.timing(`profile.${this.name}.${update[0]}`, update[1])
|
||||
}
|
||||
if (totalTime > LOG_CUTOFF_TIME) {
|
||||
// log anything greater than cutoff
|
||||
const args = {}
|
||||
for (const k in this.args) {
|
||||
const v = this.args[k]
|
||||
args[k] = v
|
||||
}
|
||||
args.updateTimes = this.updateTimes
|
||||
args.start = this.start
|
||||
args.end = new Date()
|
||||
logger.debug(args, this.name)
|
||||
}
|
||||
return totalTime
|
||||
}
|
||||
|
||||
getTimeDelta() {
|
||||
const lastIdx = this.updateTimes.length - 1
|
||||
if (lastIdx >= 0) {
|
||||
return this.updateTimes[lastIdx][1]
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
wrap(label, fn) {
|
||||
// create a wrapped function which calls profile.log(label) before continuing execution
|
||||
const newFn = (...args) => {
|
||||
this.log(label)
|
||||
return fn(...Array.from(args || []))
|
||||
}
|
||||
return newFn
|
||||
}
|
||||
}
|
445
services/project-history/app/js/RedisManager.js
Normal file
445
services/project-history/app/js/RedisManager.js
Normal file
@@ -0,0 +1,445 @@
|
||||
import { callbackify, promisify } from 'node:util'
|
||||
import { setTimeout } from 'node:timers/promises'
|
||||
import logger from '@overleaf/logger'
|
||||
import Settings from '@overleaf/settings'
|
||||
import redis from '@overleaf/redis-wrapper'
|
||||
import metrics from '@overleaf/metrics'
|
||||
import OError from '@overleaf/o-error'
|
||||
|
||||
/**
|
||||
* Maximum size taken from the redis queue, to prevent project history
|
||||
* consuming unbounded amounts of memory
|
||||
*/
|
||||
export const RAW_UPDATE_SIZE_THRESHOLD = 4 * 1024 * 1024
|
||||
|
||||
/**
|
||||
* Batch size when reading updates from Redis
|
||||
*/
|
||||
export const RAW_UPDATES_BATCH_SIZE = 50
|
||||
|
||||
/**
|
||||
* Maximum length of ops (insertion and deletions) to process in a single
|
||||
* iteration
|
||||
*/
|
||||
export const MAX_UPDATE_OP_LENGTH = 1024
|
||||
|
||||
/**
|
||||
* Warn if we exceed this raw update size, the final compressed updates we
|
||||
* send could be smaller than this
|
||||
*/
|
||||
const WARN_RAW_UPDATE_SIZE = 1024 * 1024
|
||||
|
||||
/**
|
||||
* Maximum number of new docs to process in a single iteration
|
||||
*/
|
||||
export const MAX_NEW_DOC_CONTENT_COUNT = 32
|
||||
|
||||
const CACHE_TTL_IN_SECONDS = 3600
|
||||
|
||||
const Keys = Settings.redis.project_history.key_schema
|
||||
const rclient = redis.createClient(Settings.redis.project_history)
|
||||
|
||||
async function countUnprocessedUpdates(projectId) {
|
||||
const key = Keys.projectHistoryOps({ project_id: projectId })
|
||||
const updates = await rclient.llen(key)
|
||||
return updates
|
||||
}
|
||||
|
||||
async function* getRawUpdates(projectId) {
|
||||
const key = Keys.projectHistoryOps({ project_id: projectId })
|
||||
let start = 0
|
||||
while (true) {
|
||||
const stop = start + RAW_UPDATES_BATCH_SIZE - 1
|
||||
const updates = await rclient.lrange(key, start, stop)
|
||||
for (const update of updates) {
|
||||
yield update
|
||||
}
|
||||
if (updates.length < RAW_UPDATES_BATCH_SIZE) {
|
||||
return
|
||||
}
|
||||
start += RAW_UPDATES_BATCH_SIZE
|
||||
}
|
||||
}
|
||||
|
||||
async function getRawUpdatesBatch(projectId, batchSize) {
|
||||
const rawUpdates = []
|
||||
let totalRawUpdatesSize = 0
|
||||
let hasMore = false
|
||||
for await (const rawUpdate of getRawUpdates(projectId)) {
|
||||
totalRawUpdatesSize += rawUpdate.length
|
||||
if (
|
||||
rawUpdates.length > 0 &&
|
||||
totalRawUpdatesSize > RAW_UPDATE_SIZE_THRESHOLD
|
||||
) {
|
||||
hasMore = true
|
||||
break
|
||||
}
|
||||
rawUpdates.push(rawUpdate)
|
||||
if (rawUpdates.length >= batchSize) {
|
||||
hasMore = true
|
||||
break
|
||||
}
|
||||
}
|
||||
metrics.timing('redis.incoming.bytes', totalRawUpdatesSize, 1)
|
||||
if (totalRawUpdatesSize > WARN_RAW_UPDATE_SIZE) {
|
||||
const rawUpdateSizes = rawUpdates.map(rawUpdate => rawUpdate.length)
|
||||
logger.warn(
|
||||
{
|
||||
projectId,
|
||||
totalRawUpdatesSize,
|
||||
rawUpdateSizes,
|
||||
},
|
||||
'large raw update size'
|
||||
)
|
||||
}
|
||||
return { rawUpdates, hasMore }
|
||||
}
|
||||
|
||||
export function parseDocUpdates(jsonUpdates) {
|
||||
return jsonUpdates.map(update => JSON.parse(update))
|
||||
}
|
||||
|
||||
async function getUpdatesInBatches(projectId, batchSize, runner) {
|
||||
let moreBatches = true
|
||||
while (moreBatches) {
|
||||
const redisBatch = await getRawUpdatesBatch(projectId, batchSize)
|
||||
if (redisBatch.rawUpdates.length === 0) {
|
||||
break
|
||||
}
|
||||
moreBatches = redisBatch.hasMore
|
||||
|
||||
const rawUpdates = []
|
||||
const updates = []
|
||||
let totalOpLength = 0
|
||||
let totalDocContentCount = 0
|
||||
for (const rawUpdate of redisBatch.rawUpdates) {
|
||||
let update
|
||||
try {
|
||||
update = JSON.parse(rawUpdate)
|
||||
} catch (error) {
|
||||
throw OError.tag(error, 'failed to parse update', {
|
||||
projectId,
|
||||
update,
|
||||
})
|
||||
}
|
||||
|
||||
totalOpLength += update?.op?.length || 1
|
||||
if (update.resyncDocContent) {
|
||||
totalDocContentCount += 1
|
||||
}
|
||||
|
||||
if (
|
||||
updates.length > 0 &&
|
||||
(totalOpLength > MAX_UPDATE_OP_LENGTH ||
|
||||
totalDocContentCount > MAX_NEW_DOC_CONTENT_COUNT)
|
||||
) {
|
||||
moreBatches = true
|
||||
break
|
||||
}
|
||||
if (update.resyncProjectStructureOnly) {
|
||||
update._raw = rawUpdate
|
||||
}
|
||||
|
||||
rawUpdates.push(rawUpdate)
|
||||
updates.push(update)
|
||||
}
|
||||
|
||||
await runner(updates)
|
||||
await deleteAppliedDocUpdates(projectId, rawUpdates)
|
||||
|
||||
if (batchSize === 1) {
|
||||
// Special case for single stepping, don't process more batches
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {ResyncProjectStructureUpdate} update
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function deleteAppliedDocUpdate(projectId, update) {
|
||||
const raw = update._raw
|
||||
// Delete the first occurrence of the update with LREM KEY COUNT
|
||||
// VALUE by setting COUNT to 1 which 'removes COUNT elements equal to
|
||||
// value moving from head to tail.'
|
||||
//
|
||||
// If COUNT is 0 the entire list would be searched which would block
|
||||
// redis since it would be an O(N) operation where N is the length of
|
||||
// the queue, in a multi of the batch size.
|
||||
metrics.summary('redis.projectHistoryOps', raw.length, {
|
||||
status: 'lrem',
|
||||
})
|
||||
await rclient.lrem(Keys.projectHistoryOps({ project_id: projectId }), 1, raw)
|
||||
}
|
||||
|
||||
async function deleteAppliedDocUpdates(projectId, updates) {
|
||||
const multi = rclient.multi()
|
||||
// Delete all the updates which have been applied (exact match)
|
||||
for (const update of updates) {
|
||||
// Delete the first occurrence of the update with LREM KEY COUNT
|
||||
// VALUE by setting COUNT to 1 which 'removes COUNT elements equal to
|
||||
// value moving from head to tail.'
|
||||
//
|
||||
// If COUNT is 0 the entire list would be searched which would block
|
||||
// redis since it would be an O(N) operation where N is the length of
|
||||
// the queue, in a multi of the batch size.
|
||||
metrics.summary('redis.projectHistoryOps', update.length, {
|
||||
status: 'lrem',
|
||||
})
|
||||
multi.lrem(Keys.projectHistoryOps({ project_id: projectId }), 1, update)
|
||||
}
|
||||
if (updates.length > 0) {
|
||||
multi.del(Keys.projectHistoryFirstOpTimestamp({ project_id: projectId }))
|
||||
}
|
||||
await multi.exec()
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes the entire queue - use with caution
|
||||
*/
|
||||
async function destroyDocUpdatesQueue(projectId) {
|
||||
await rclient.del(
|
||||
Keys.projectHistoryOps({ project_id: projectId }),
|
||||
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterate over keys asynchronously using redis scan (non-blocking)
|
||||
*
|
||||
* handle all the cluster nodes or single redis server
|
||||
*/
|
||||
async function _getKeys(pattern, limit) {
|
||||
const nodes = rclient.nodes?.('master') || [rclient]
|
||||
const keysByNode = []
|
||||
for (const node of nodes) {
|
||||
const keys = await _getKeysFromNode(node, pattern, limit)
|
||||
keysByNode.push(keys)
|
||||
}
|
||||
return [].concat(...keysByNode)
|
||||
}
|
||||
|
||||
async function _getKeysFromNode(node, pattern, limit) {
|
||||
let cursor = 0 // redis iterator
|
||||
const keySet = new Set() // avoid duplicate results
|
||||
const batchSize = limit != null ? Math.min(limit, 1000) : 1000
|
||||
|
||||
// scan over all keys looking for pattern
|
||||
while (true) {
|
||||
const reply = await node.scan(cursor, 'MATCH', pattern, 'COUNT', batchSize)
|
||||
const [newCursor, keys] = reply
|
||||
cursor = newCursor
|
||||
|
||||
for (const key of keys) {
|
||||
keySet.add(key)
|
||||
}
|
||||
|
||||
const noResults = cursor === '0' // redis returns string results not numeric
|
||||
const limitReached = limit != null && keySet.size >= limit
|
||||
if (noResults || limitReached) {
|
||||
return Array.from(keySet)
|
||||
}
|
||||
|
||||
// avoid hitting redis too hard
|
||||
await setTimeout(10)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b
|
||||
* or DocsWithHistoryOps:{57fd0b1f53a8396d22b2c24b} (for redis cluster)
|
||||
*/
|
||||
function _extractIds(keyList) {
|
||||
return keyList.map(key => {
|
||||
const m = key.match(/:\{?([0-9a-f]{24})\}?/) // extract object id
|
||||
return m[1]
|
||||
})
|
||||
}
|
||||
|
||||
async function getProjectIdsWithHistoryOps(limit) {
|
||||
const projectKeys = await _getKeys(
|
||||
Keys.projectHistoryOps({ project_id: '*' }),
|
||||
limit
|
||||
)
|
||||
const projectIds = _extractIds(projectKeys)
|
||||
return projectIds
|
||||
}
|
||||
|
||||
async function getProjectIdsWithHistoryOpsCount() {
|
||||
const projectIds = await getProjectIdsWithHistoryOps()
|
||||
const queuedProjectsCount = projectIds.length
|
||||
metrics.globalGauge('queued-projects', queuedProjectsCount)
|
||||
return queuedProjectsCount
|
||||
}
|
||||
|
||||
async function setFirstOpTimestamp(projectId) {
|
||||
const key = Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
|
||||
// store current time as an integer (string)
|
||||
await rclient.setnx(key, Date.now())
|
||||
}
|
||||
|
||||
async function getFirstOpTimestamp(projectId) {
|
||||
const key = Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
|
||||
const result = await rclient.get(key)
|
||||
|
||||
// convert stored time back to a numeric timestamp
|
||||
const timestamp = parseInt(result, 10)
|
||||
|
||||
// check for invalid timestamp
|
||||
if (isNaN(timestamp)) {
|
||||
return null
|
||||
}
|
||||
|
||||
// convert numeric timestamp to a date object
|
||||
const firstOpTimestamp = new Date(timestamp)
|
||||
|
||||
return firstOpTimestamp
|
||||
}
|
||||
|
||||
async function getFirstOpTimestamps(projectIds) {
|
||||
const keys = projectIds.map(projectId =>
|
||||
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
|
||||
)
|
||||
const results = await rclient.mget(keys)
|
||||
const timestamps = results.map(result => {
|
||||
// convert stored time back to a numeric timestamp
|
||||
const timestamp = parseInt(result, 10)
|
||||
|
||||
// check for invalid timestamp
|
||||
if (isNaN(timestamp)) {
|
||||
return null
|
||||
}
|
||||
|
||||
// convert numeric timestamp to a date object
|
||||
return new Date(timestamp)
|
||||
})
|
||||
return timestamps
|
||||
}
|
||||
|
||||
async function clearFirstOpTimestamp(projectId) {
|
||||
const key = Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
|
||||
await rclient.del(key)
|
||||
}
|
||||
|
||||
async function getProjectIdsWithFirstOpTimestamps(limit) {
|
||||
const projectKeys = await _getKeys(
|
||||
Keys.projectHistoryFirstOpTimestamp({ project_id: '*' }),
|
||||
limit
|
||||
)
|
||||
const projectIds = _extractIds(projectKeys)
|
||||
return projectIds
|
||||
}
|
||||
|
||||
async function clearDanglingFirstOpTimestamp(projectId) {
|
||||
const count = await rclient.exists(
|
||||
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId }),
|
||||
Keys.projectHistoryOps({ project_id: projectId })
|
||||
)
|
||||
if (count === 2 || count === 0) {
|
||||
// both (or neither) keys are present, so don't delete the timestamp
|
||||
return 0
|
||||
}
|
||||
// only one key is present, which makes this a dangling record,
|
||||
// so delete the timestamp
|
||||
const cleared = await rclient.del(
|
||||
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
|
||||
)
|
||||
return cleared
|
||||
}
|
||||
|
||||
async function getCachedHistoryId(projectId) {
|
||||
const key = Keys.projectHistoryCachedHistoryId({ project_id: projectId })
|
||||
const historyId = await rclient.get(key)
|
||||
return historyId
|
||||
}
|
||||
|
||||
async function setCachedHistoryId(projectId, historyId) {
|
||||
const key = Keys.projectHistoryCachedHistoryId({ project_id: projectId })
|
||||
await rclient.setex(key, CACHE_TTL_IN_SECONDS, historyId)
|
||||
}
|
||||
|
||||
async function clearCachedHistoryId(projectId) {
|
||||
const key = Keys.projectHistoryCachedHistoryId({ project_id: projectId })
|
||||
await rclient.del(key)
|
||||
}
|
||||
|
||||
// EXPORTS
|
||||
|
||||
const countUnprocessedUpdatesCb = callbackify(countUnprocessedUpdates)
|
||||
const getRawUpdatesBatchCb = callbackify(getRawUpdatesBatch)
|
||||
const deleteAppliedDocUpdatesCb = callbackify(deleteAppliedDocUpdates)
|
||||
const destroyDocUpdatesQueueCb = callbackify(destroyDocUpdatesQueue)
|
||||
const getProjectIdsWithHistoryOpsCb = callbackify(getProjectIdsWithHistoryOps)
|
||||
const getProjectIdsWithHistoryOpsCountCb = callbackify(
|
||||
getProjectIdsWithHistoryOpsCount
|
||||
)
|
||||
const setFirstOpTimestampCb = callbackify(setFirstOpTimestamp)
|
||||
const getFirstOpTimestampCb = callbackify(getFirstOpTimestamp)
|
||||
const getFirstOpTimestampsCb = callbackify(getFirstOpTimestamps)
|
||||
const clearFirstOpTimestampCb = callbackify(clearFirstOpTimestamp)
|
||||
const getProjectIdsWithFirstOpTimestampsCb = callbackify(
|
||||
getProjectIdsWithFirstOpTimestamps
|
||||
)
|
||||
const clearDanglingFirstOpTimestampCb = callbackify(
|
||||
clearDanglingFirstOpTimestamp
|
||||
)
|
||||
const getCachedHistoryIdCb = callbackify(getCachedHistoryId)
|
||||
const setCachedHistoryIdCb = callbackify(setCachedHistoryId)
|
||||
const clearCachedHistoryIdCb = callbackify(clearCachedHistoryId)
|
||||
|
||||
const getUpdatesInBatchesCb = function (
|
||||
projectId,
|
||||
batchSize,
|
||||
runner,
|
||||
callback
|
||||
) {
|
||||
const runnerPromises = promisify(runner)
|
||||
getUpdatesInBatches(projectId, batchSize, runnerPromises)
|
||||
.then(result => {
|
||||
callback(null, result)
|
||||
})
|
||||
.catch(err => {
|
||||
callback(err)
|
||||
})
|
||||
}
|
||||
|
||||
export {
|
||||
countUnprocessedUpdatesCb as countUnprocessedUpdates,
|
||||
getRawUpdatesBatchCb as getRawUpdatesBatch,
|
||||
deleteAppliedDocUpdatesCb as deleteAppliedDocUpdates,
|
||||
destroyDocUpdatesQueueCb as destroyDocUpdatesQueue,
|
||||
getUpdatesInBatchesCb as getUpdatesInBatches,
|
||||
getProjectIdsWithHistoryOpsCb as getProjectIdsWithHistoryOps,
|
||||
getProjectIdsWithHistoryOpsCountCb as getProjectIdsWithHistoryOpsCount,
|
||||
setFirstOpTimestampCb as setFirstOpTimestamp,
|
||||
getFirstOpTimestampCb as getFirstOpTimestamp,
|
||||
getFirstOpTimestampsCb as getFirstOpTimestamps,
|
||||
clearFirstOpTimestampCb as clearFirstOpTimestamp,
|
||||
getProjectIdsWithFirstOpTimestampsCb as getProjectIdsWithFirstOpTimestamps,
|
||||
clearDanglingFirstOpTimestampCb as clearDanglingFirstOpTimestamp,
|
||||
getCachedHistoryIdCb as getCachedHistoryId,
|
||||
setCachedHistoryIdCb as setCachedHistoryId,
|
||||
clearCachedHistoryIdCb as clearCachedHistoryId,
|
||||
}
|
||||
|
||||
export const promises = {
|
||||
countUnprocessedUpdates,
|
||||
getRawUpdatesBatch,
|
||||
deleteAppliedDocUpdates,
|
||||
deleteAppliedDocUpdate,
|
||||
destroyDocUpdatesQueue,
|
||||
getUpdatesInBatches,
|
||||
getProjectIdsWithHistoryOps,
|
||||
getProjectIdsWithHistoryOpsCount,
|
||||
setFirstOpTimestamp,
|
||||
getFirstOpTimestamp,
|
||||
getFirstOpTimestamps,
|
||||
clearFirstOpTimestamp,
|
||||
getProjectIdsWithFirstOpTimestamps,
|
||||
clearDanglingFirstOpTimestamp,
|
||||
getCachedHistoryId,
|
||||
setCachedHistoryId,
|
||||
clearCachedHistoryId,
|
||||
}
|
194
services/project-history/app/js/RetryManager.js
Normal file
194
services/project-history/app/js/RetryManager.js
Normal file
@@ -0,0 +1,194 @@
|
||||
import _ from 'lodash'
|
||||
import { promisify, callbackify } from 'node:util'
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import * as UpdatesProcessor from './UpdatesProcessor.js'
|
||||
import * as SyncManager from './SyncManager.js'
|
||||
import * as WebApiManager from './WebApiManager.js'
|
||||
import * as RedisManager from './RedisManager.js'
|
||||
import * as ErrorRecorder from './ErrorRecorder.js'
|
||||
|
||||
const sleep = promisify(setTimeout)
|
||||
|
||||
const TEMPORARY_FAILURES = [
|
||||
'Error: ENOSPC: no space left on device, write',
|
||||
'Error: ESOCKETTIMEDOUT',
|
||||
'Error: failed to extend lock',
|
||||
'Error: tried to release timed out lock',
|
||||
'Error: Timeout',
|
||||
]
|
||||
|
||||
const HARD_FAILURES = [
|
||||
'Error: history store a non-success status code: 422',
|
||||
'OError: history store a non-success status code: 422',
|
||||
'OpsOutOfOrderError: project structure version out of order',
|
||||
'OpsOutOfOrderError: project structure version out of order on incoming updates',
|
||||
'OpsOutOfOrderError: doc version out of order',
|
||||
'OpsOutOfOrderError: doc version out of order on incoming updates',
|
||||
]
|
||||
|
||||
const MAX_RESYNC_ATTEMPTS = 2
|
||||
const MAX_SOFT_RESYNC_ATTEMPTS = 1
|
||||
|
||||
export const promises = {}
|
||||
|
||||
promises.retryFailures = async (options = {}) => {
|
||||
const { failureType, timeout, limit } = options
|
||||
if (failureType === 'soft') {
|
||||
const batch = await getFailureBatch(softErrorSelector, limit)
|
||||
const result = await retryFailureBatch(batch, timeout, async failure => {
|
||||
await UpdatesProcessor.promises.processUpdatesForProject(
|
||||
failure.project_id
|
||||
)
|
||||
})
|
||||
return result
|
||||
} else if (failureType === 'hard') {
|
||||
const batch = await getFailureBatch(hardErrorSelector, limit)
|
||||
const result = await retryFailureBatch(batch, timeout, async failure => {
|
||||
await resyncProject(failure.project_id, {
|
||||
hard: failureRequiresHardResync(failure),
|
||||
})
|
||||
})
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
export const retryFailures = callbackify(promises.retryFailures)
|
||||
|
||||
function softErrorSelector(failure) {
|
||||
return (
|
||||
(isTemporaryFailure(failure) && !isRepeatedFailure(failure)) ||
|
||||
(isFirstFailure(failure) && !isHardFailure(failure))
|
||||
)
|
||||
}
|
||||
|
||||
function hardErrorSelector(failure) {
|
||||
return (
|
||||
(isHardFailure(failure) || isRepeatedFailure(failure)) &&
|
||||
!isStuckFailure(failure)
|
||||
)
|
||||
}
|
||||
|
||||
function isTemporaryFailure(failure) {
|
||||
return TEMPORARY_FAILURES.includes(failure.error)
|
||||
}
|
||||
|
||||
export function isHardFailure(failure) {
|
||||
return HARD_FAILURES.includes(failure.error)
|
||||
}
|
||||
|
||||
export function isFirstFailure(failure) {
|
||||
return failure.attempts <= 1
|
||||
}
|
||||
|
||||
function isRepeatedFailure(failure) {
|
||||
return failure.attempts > 3
|
||||
}
|
||||
|
||||
function isStuckFailure(failure) {
|
||||
return (
|
||||
failure.resyncAttempts != null &&
|
||||
failure.resyncAttempts >= MAX_RESYNC_ATTEMPTS
|
||||
)
|
||||
}
|
||||
|
||||
function failureRequiresHardResync(failure) {
|
||||
return (
|
||||
failure.resyncAttempts != null &&
|
||||
failure.resyncAttempts >= MAX_SOFT_RESYNC_ATTEMPTS
|
||||
)
|
||||
}
|
||||
|
||||
async function getFailureBatch(selector, limit) {
|
||||
let failures = await ErrorRecorder.promises.getFailedProjects()
|
||||
failures = failures.filter(selector)
|
||||
// randomise order
|
||||
failures = _.shuffle(failures)
|
||||
|
||||
// put a limit on the number to retry
|
||||
const projectsToRetryCount = failures.length
|
||||
if (limit && projectsToRetryCount > limit) {
|
||||
failures = failures.slice(0, limit)
|
||||
}
|
||||
logger.debug({ projectsToRetryCount, limit }, 'retrying failed projects')
|
||||
return failures
|
||||
}
|
||||
|
||||
async function retryFailureBatch(failures, timeout, retryHandler) {
|
||||
const startTime = new Date()
|
||||
|
||||
// keep track of successes and failures
|
||||
const failed = []
|
||||
const succeeded = []
|
||||
for (const failure of failures) {
|
||||
const projectId = failure.project_id
|
||||
const timeTaken = new Date() - startTime
|
||||
if (timeout && timeTaken > timeout) {
|
||||
// finish early due to timeout
|
||||
logger.debug('background retries timed out')
|
||||
break
|
||||
}
|
||||
logger.debug(
|
||||
{ projectId, timeTaken },
|
||||
'retrying failed project in background'
|
||||
)
|
||||
try {
|
||||
await retryHandler(failure)
|
||||
succeeded.push(projectId)
|
||||
} catch (err) {
|
||||
failed.push(projectId)
|
||||
}
|
||||
}
|
||||
return { succeeded, failed }
|
||||
}
|
||||
|
||||
async function resyncProject(projectId, options = {}) {
|
||||
const { hard = false } = options
|
||||
try {
|
||||
if (!/^[0-9a-f]{24}$/.test(projectId)) {
|
||||
logger.debug({ projectId }, 'clearing bad project id')
|
||||
await ErrorRecorder.promises.clearError(projectId)
|
||||
return
|
||||
}
|
||||
|
||||
await checkProjectHasHistoryId(projectId)
|
||||
if (hard) {
|
||||
await SyncManager.promises.startHardResync(projectId)
|
||||
} else {
|
||||
await SyncManager.promises.startResync(projectId)
|
||||
}
|
||||
await waitUntilRedisQueueIsEmpty(projectId)
|
||||
await checkFailureRecordWasRemoved(projectId)
|
||||
} catch (err) {
|
||||
throw new OError({
|
||||
message: 'failed to resync project',
|
||||
info: { projectId, hard },
|
||||
}).withCause(err)
|
||||
}
|
||||
}
|
||||
|
||||
async function checkProjectHasHistoryId(projectId) {
|
||||
const historyId = await WebApiManager.promises.getHistoryId(projectId)
|
||||
if (historyId == null) {
|
||||
throw new OError('no history id')
|
||||
}
|
||||
}
|
||||
|
||||
async function waitUntilRedisQueueIsEmpty(projectId) {
|
||||
for (let attempts = 0; attempts < 30; attempts++) {
|
||||
const updatesCount =
|
||||
await RedisManager.promises.countUnprocessedUpdates(projectId)
|
||||
if (updatesCount === 0) {
|
||||
return
|
||||
}
|
||||
await sleep(1000)
|
||||
}
|
||||
throw new OError('queue not empty')
|
||||
}
|
||||
|
||||
async function checkFailureRecordWasRemoved(projectId) {
|
||||
const failureRecord = await ErrorRecorder.promises.getFailureRecord(projectId)
|
||||
if (failureRecord) {
|
||||
throw new OError('failure record still exists')
|
||||
}
|
||||
}
|
250
services/project-history/app/js/Router.js
Normal file
250
services/project-history/app/js/Router.js
Normal file
@@ -0,0 +1,250 @@
|
||||
import OError from '@overleaf/o-error'
|
||||
import * as HttpController from './HttpController.js'
|
||||
import { Joi, validate } from './Validation.js'
|
||||
|
||||
export function initialize(app) {
|
||||
app.use(
|
||||
validate({
|
||||
params: Joi.object({
|
||||
project_id: Joi.string().regex(/^[0-9a-f]{24}$/),
|
||||
user_id: Joi.string().regex(/^[0-9a-f]{24}$/),
|
||||
label_id: Joi.string().regex(/^[0-9a-f]{24}$/),
|
||||
version: Joi.number().integer(),
|
||||
}),
|
||||
})
|
||||
)
|
||||
|
||||
// use an extended timeout on all endpoints, to allow for long requests to history-v1
|
||||
app.use(longerTimeout)
|
||||
|
||||
app.post('/project', HttpController.initializeProject)
|
||||
|
||||
app.delete('/project/:project_id', HttpController.deleteProject)
|
||||
|
||||
app.get('/project/:project_id/snapshot', HttpController.getLatestSnapshot)
|
||||
|
||||
app.get(
|
||||
'/project/:project_id/diff',
|
||||
validate({
|
||||
query: {
|
||||
pathname: Joi.string().required(),
|
||||
from: Joi.number().integer().required(),
|
||||
to: Joi.number().integer().required(),
|
||||
},
|
||||
}),
|
||||
HttpController.getDiff
|
||||
)
|
||||
|
||||
app.get(
|
||||
'/project/:project_id/filetree/diff',
|
||||
validate({
|
||||
query: {
|
||||
from: Joi.number().integer().required(),
|
||||
to: Joi.number().integer().required(),
|
||||
},
|
||||
}),
|
||||
HttpController.getFileTreeDiff
|
||||
)
|
||||
|
||||
app.get(
|
||||
'/project/:project_id/updates',
|
||||
validate({
|
||||
query: {
|
||||
before: Joi.number().integer(),
|
||||
min_count: Joi.number().integer(),
|
||||
},
|
||||
}),
|
||||
HttpController.getUpdates
|
||||
)
|
||||
|
||||
app.get(
|
||||
'/project/:project_id/changes-in-chunk',
|
||||
validate({
|
||||
query: {
|
||||
since: Joi.number().integer().min(0),
|
||||
},
|
||||
}),
|
||||
HttpController.getChangesInChunkSince
|
||||
)
|
||||
|
||||
app.get('/project/:project_id/version', HttpController.latestVersion)
|
||||
|
||||
app.post(
|
||||
'/project/:project_id/flush',
|
||||
validate({
|
||||
query: {
|
||||
debug: Joi.boolean().default(false),
|
||||
bisect: Joi.boolean().default(false),
|
||||
},
|
||||
}),
|
||||
HttpController.flushProject
|
||||
)
|
||||
|
||||
app.post(
|
||||
'/project/:project_id/resync',
|
||||
validate({
|
||||
query: {
|
||||
force: Joi.boolean().default(false),
|
||||
},
|
||||
body: {
|
||||
force: Joi.boolean().default(false),
|
||||
origin: Joi.object({
|
||||
kind: Joi.string().required(),
|
||||
}),
|
||||
historyRangesMigration: Joi.string()
|
||||
.optional()
|
||||
.valid('forwards', 'backwards'),
|
||||
},
|
||||
}),
|
||||
HttpController.resyncProject
|
||||
)
|
||||
|
||||
app.get(
|
||||
'/project/:project_id/dump',
|
||||
validate({
|
||||
query: {
|
||||
count: Joi.number().integer(),
|
||||
},
|
||||
}),
|
||||
HttpController.dumpProject
|
||||
)
|
||||
|
||||
app.get('/project/:project_id/labels', HttpController.getLabels)
|
||||
|
||||
app.post(
|
||||
'/project/:project_id/labels',
|
||||
validate({
|
||||
body: {
|
||||
version: Joi.number().integer().required(),
|
||||
comment: Joi.string().required(),
|
||||
created_at: Joi.string(),
|
||||
validate_exists: Joi.boolean().default(true),
|
||||
user_id: Joi.string().allow(null),
|
||||
},
|
||||
}),
|
||||
|
||||
HttpController.createLabel
|
||||
)
|
||||
|
||||
app.delete(
|
||||
'/project/:project_id/user/:user_id/labels/:label_id',
|
||||
validate({
|
||||
params: Joi.object({
|
||||
project_id: Joi.string().regex(/^[0-9a-f]{24}$/),
|
||||
user_id: Joi.string().regex(/^[0-9a-f]{24}$/),
|
||||
label_id: Joi.string().regex(/^[0-9a-f]{24}$/),
|
||||
}),
|
||||
}),
|
||||
HttpController.deleteLabelForUser
|
||||
)
|
||||
|
||||
app.delete(
|
||||
'/project/:project_id/labels/:label_id',
|
||||
validate({
|
||||
params: Joi.object({
|
||||
project_id: Joi.string().regex(/^[0-9a-f]{24}$/),
|
||||
label_id: Joi.string().regex(/^[0-9a-f]{24}$/),
|
||||
}),
|
||||
}),
|
||||
HttpController.deleteLabel
|
||||
)
|
||||
|
||||
app.post(
|
||||
'/user/:from_user/labels/transfer/:to_user',
|
||||
HttpController.transferLabels
|
||||
)
|
||||
|
||||
app.get(
|
||||
'/project/:project_id/version/:version/:pathname',
|
||||
HttpController.getFileSnapshot
|
||||
)
|
||||
|
||||
app.get(
|
||||
'/project/:project_id/ranges/version/:version/:pathname',
|
||||
HttpController.getRangesSnapshot
|
||||
)
|
||||
|
||||
app.get(
|
||||
'/project/:project_id/metadata/version/:version/:pathname',
|
||||
HttpController.getFileMetadataSnapshot
|
||||
)
|
||||
|
||||
app.get(
|
||||
'/project/:project_id/version/:version',
|
||||
HttpController.getProjectSnapshot
|
||||
)
|
||||
|
||||
app.get(
|
||||
'/project/:project_id/paths/version/:version',
|
||||
HttpController.getPathsAtVersion
|
||||
)
|
||||
|
||||
app.post(
|
||||
'/project/:project_id/force',
|
||||
validate({
|
||||
query: {
|
||||
clear: Joi.boolean().default(false),
|
||||
},
|
||||
}),
|
||||
HttpController.forceDebugProject
|
||||
)
|
||||
|
||||
app.get('/project/:history_id/blob/:hash', HttpController.getProjectBlob)
|
||||
|
||||
app.get('/status/failures', HttpController.getFailures)
|
||||
|
||||
app.get('/status/queue', HttpController.getQueueCounts)
|
||||
|
||||
app.post(
|
||||
'/retry/failures',
|
||||
validate({
|
||||
query: {
|
||||
failureType: Joi.string().valid('soft', 'hard'),
|
||||
// bail out after this time limit
|
||||
timeout: Joi.number().integer().default(300),
|
||||
// maximum number of projects to check
|
||||
limit: Joi.number().integer().default(100),
|
||||
callbackUrl: Joi.string(),
|
||||
},
|
||||
}),
|
||||
HttpController.retryFailures
|
||||
)
|
||||
|
||||
app.post(
|
||||
'/flush/old',
|
||||
validate({
|
||||
query: {
|
||||
// flush projects with queued ops older than this
|
||||
maxAge: Joi.number()
|
||||
.integer()
|
||||
.default(6 * 3600),
|
||||
// pause this amount of time between checking queues
|
||||
queueDelay: Joi.number().integer().default(100),
|
||||
// maximum number of queues to check
|
||||
limit: Joi.number().integer().default(1000),
|
||||
// maximum amount of time allowed
|
||||
timeout: Joi.number()
|
||||
.integer()
|
||||
.default(60 * 1000),
|
||||
// whether to run in the background
|
||||
background: Joi.boolean().falsy('0').truthy('1').default(false),
|
||||
},
|
||||
}),
|
||||
HttpController.flushOld
|
||||
)
|
||||
|
||||
app.get('/status', (req, res, next) => res.send('project-history is up'))
|
||||
|
||||
app.get('/oops', function (req, res, next) {
|
||||
throw new OError('dummy test error')
|
||||
})
|
||||
|
||||
app.get('/check_lock', HttpController.checkLock)
|
||||
|
||||
app.get('/health_check', HttpController.healthCheck)
|
||||
}
|
||||
|
||||
function longerTimeout(req, res, next) {
|
||||
res.setTimeout(6 * 60 * 1000)
|
||||
next()
|
||||
}
|
426
services/project-history/app/js/SnapshotManager.js
Normal file
426
services/project-history/app/js/SnapshotManager.js
Normal file
@@ -0,0 +1,426 @@
|
||||
// @ts-check
|
||||
|
||||
import { callbackify } from 'node:util'
|
||||
import Core from 'overleaf-editor-core'
|
||||
import { Readable as StringStream } from 'node:stream'
|
||||
import OError from '@overleaf/o-error'
|
||||
import * as HistoryStoreManager from './HistoryStoreManager.js'
|
||||
import * as WebApiManager from './WebApiManager.js'
|
||||
import * as Errors from './Errors.js'
|
||||
import _ from 'lodash'
|
||||
|
||||
/**
|
||||
* @import { Snapshot } from 'overleaf-editor-core'
|
||||
* @import { RangesSnapshot } from './types'
|
||||
*/
|
||||
|
||||
StringStream.prototype._read = function () {}
|
||||
|
||||
const MAX_REQUESTS = 4 // maximum number of parallel requests to v1 history service
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {number} version
|
||||
* @param {string} pathname
|
||||
*/
|
||||
async function getFileSnapshotStream(projectId, version, pathname) {
|
||||
const snapshot = await _getSnapshotAtVersion(projectId, version)
|
||||
|
||||
const file = snapshot.getFile(pathname)
|
||||
if (file == null) {
|
||||
throw new Errors.NotFoundError(`${pathname} not found`, {
|
||||
projectId,
|
||||
version,
|
||||
pathname,
|
||||
})
|
||||
}
|
||||
|
||||
const historyId = await WebApiManager.promises.getHistoryId(projectId)
|
||||
if (file.isEditable()) {
|
||||
await file.load('eager', HistoryStoreManager.getBlobStore(historyId))
|
||||
const stream = new StringStream()
|
||||
stream.push(file.getContent({ filterTrackedDeletes: true }))
|
||||
stream.push(null)
|
||||
return stream
|
||||
} else {
|
||||
return await HistoryStoreManager.promises.getProjectBlobStream(
|
||||
historyId,
|
||||
file.getHash()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a snapshot of the ranges in a document-updater compatible format.
|
||||
* Positions will be relative to a document where tracked deletes have been
|
||||
* removed from the string. This also means that if a tracked delete overlaps
|
||||
* a comment range, the comment range will be truncated.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {number} version
|
||||
* @param {string} pathname
|
||||
* @returns {Promise<RangesSnapshot>}
|
||||
*/
|
||||
async function getRangesSnapshot(projectId, version, pathname) {
|
||||
const snapshot = await _getSnapshotAtVersion(projectId, version)
|
||||
const file = snapshot.getFile(pathname)
|
||||
if (!file) {
|
||||
throw new Errors.NotFoundError(`${pathname} not found`, {
|
||||
projectId,
|
||||
version,
|
||||
pathname,
|
||||
})
|
||||
}
|
||||
if (!file.isEditable()) {
|
||||
// A binary file has no tracked changes or comments
|
||||
return {
|
||||
changes: [],
|
||||
comments: [],
|
||||
}
|
||||
}
|
||||
const historyId = await WebApiManager.promises.getHistoryId(projectId)
|
||||
await file.load('eager', HistoryStoreManager.getBlobStore(historyId))
|
||||
const content = file.getContent()
|
||||
if (content == null) {
|
||||
throw new Error('Unable to read file contents')
|
||||
}
|
||||
const trackedChanges = file.getTrackedChanges().asSorted()
|
||||
const comments = file.getComments().toArray()
|
||||
const docUpdaterCompatibleTrackedChanges = []
|
||||
|
||||
let trackedDeletionOffset = 0
|
||||
for (const trackedChange of trackedChanges) {
|
||||
const isTrackedDeletion = trackedChange.tracking.type === 'delete'
|
||||
const trackedChangeContent = content.slice(
|
||||
trackedChange.range.start,
|
||||
trackedChange.range.end
|
||||
)
|
||||
const tcContent = isTrackedDeletion
|
||||
? { d: trackedChangeContent }
|
||||
: { i: trackedChangeContent }
|
||||
docUpdaterCompatibleTrackedChanges.push({
|
||||
op: {
|
||||
p: trackedChange.range.start - trackedDeletionOffset,
|
||||
...tcContent,
|
||||
},
|
||||
metadata: {
|
||||
ts: trackedChange.tracking.ts.toISOString(),
|
||||
user_id: trackedChange.tracking.userId,
|
||||
},
|
||||
})
|
||||
if (isTrackedDeletion) {
|
||||
trackedDeletionOffset += trackedChange.range.length
|
||||
}
|
||||
}
|
||||
|
||||
// Comments are shifted left by the length of any previous tracked deletions.
|
||||
// If they overlap with a tracked deletion, they are truncated.
|
||||
//
|
||||
// Example:
|
||||
// { } comment
|
||||
// [ ] tracked deletion
|
||||
// the quic[k {b]rown [fox] jum[ps} ove]r the lazy dog
|
||||
// => rown jum
|
||||
// starting at position 8
|
||||
const trackedDeletions = trackedChanges.filter(
|
||||
tc => tc.tracking.type === 'delete'
|
||||
)
|
||||
const docUpdaterCompatibleComments = []
|
||||
for (const comment of comments) {
|
||||
let trackedDeletionIndex = 0
|
||||
if (comment.ranges.length === 0) {
|
||||
// Translate detached comments into zero length comments at position 0
|
||||
docUpdaterCompatibleComments.push({
|
||||
op: {
|
||||
p: 0,
|
||||
c: '',
|
||||
t: comment.id,
|
||||
resolved: comment.resolved,
|
||||
},
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
// Consider a multiple range comment as a single comment that joins all its
|
||||
// ranges
|
||||
const commentStart = comment.ranges[0].start
|
||||
const commentEnd = comment.ranges[comment.ranges.length - 1].end
|
||||
|
||||
let commentContent = ''
|
||||
// Docupdater position
|
||||
let position = commentStart
|
||||
while (trackedDeletions[trackedDeletionIndex]?.range.end <= commentStart) {
|
||||
// Skip over tracked deletions that are before the current comment range
|
||||
position -= trackedDeletions[trackedDeletionIndex].range.length
|
||||
trackedDeletionIndex++
|
||||
}
|
||||
|
||||
if (trackedDeletions[trackedDeletionIndex]?.range.start < commentStart) {
|
||||
// There's overlap with a tracked deletion, move the position left and
|
||||
// truncate the overlap
|
||||
position -=
|
||||
commentStart - trackedDeletions[trackedDeletionIndex].range.start
|
||||
}
|
||||
|
||||
// Cursor in the history content
|
||||
let cursor = commentStart
|
||||
while (cursor < commentEnd) {
|
||||
const trackedDeletion = trackedDeletions[trackedDeletionIndex]
|
||||
if (!trackedDeletion || trackedDeletion.range.start >= commentEnd) {
|
||||
// We've run out of relevant tracked changes
|
||||
commentContent += content.slice(cursor, commentEnd)
|
||||
break
|
||||
}
|
||||
if (trackedDeletion.range.start > cursor) {
|
||||
// There's a gap between the current cursor and the tracked deletion
|
||||
commentContent += content.slice(cursor, trackedDeletion.range.start)
|
||||
}
|
||||
|
||||
if (trackedDeletion.range.end <= commentEnd) {
|
||||
// Skip to the end of the tracked delete
|
||||
cursor = trackedDeletion.range.end
|
||||
trackedDeletionIndex++
|
||||
} else {
|
||||
// We're done with that comment
|
||||
break
|
||||
}
|
||||
}
|
||||
docUpdaterCompatibleComments.push({
|
||||
op: {
|
||||
p: position,
|
||||
c: commentContent,
|
||||
t: comment.id,
|
||||
resolved: comment.resolved,
|
||||
},
|
||||
id: comment.id,
|
||||
})
|
||||
}
|
||||
|
||||
return {
|
||||
changes: docUpdaterCompatibleTrackedChanges,
|
||||
comments: docUpdaterCompatibleComments,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the file metadata at a specific version.
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {number} version
|
||||
* @param {string} pathname
|
||||
* @returns {Promise<{metadata: any}>}
|
||||
*/
|
||||
async function getFileMetadataSnapshot(projectId, version, pathname) {
|
||||
const snapshot = await _getSnapshotAtVersion(projectId, version)
|
||||
const file = snapshot.getFile(pathname)
|
||||
if (!file) {
|
||||
throw new Errors.NotFoundError(`${pathname} not found`, {
|
||||
projectId,
|
||||
version,
|
||||
pathname,
|
||||
})
|
||||
}
|
||||
const rawMetadata = file.getMetadata()
|
||||
const metadata = _.isEmpty(rawMetadata) ? undefined : rawMetadata
|
||||
|
||||
return { metadata }
|
||||
}
|
||||
|
||||
// Returns project snapshot containing the document content for files with
|
||||
// text operations in the relevant chunk, and hashes for unmodified/binary
|
||||
// files. Used by git bridge to get the state of the project.
|
||||
async function getProjectSnapshot(projectId, version) {
|
||||
const snapshot = await _getSnapshotAtVersion(projectId, version)
|
||||
const historyId = await WebApiManager.promises.getHistoryId(projectId)
|
||||
await _loadFilesLimit(
|
||||
snapshot,
|
||||
'eager',
|
||||
HistoryStoreManager.getBlobStore(historyId)
|
||||
)
|
||||
return {
|
||||
projectId,
|
||||
files: snapshot.getFileMap().map(file => {
|
||||
if (!file) {
|
||||
return null
|
||||
}
|
||||
const content = file.getContent({
|
||||
filterTrackedDeletes: true,
|
||||
})
|
||||
if (content === null) {
|
||||
return { data: { hash: file.getHash() } }
|
||||
}
|
||||
return { data: { content } }
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
async function getPathsAtVersion(projectId, version) {
|
||||
const snapshot = await _getSnapshotAtVersion(projectId, version)
|
||||
return {
|
||||
paths: snapshot.getFilePathnames(),
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {number} version
|
||||
*/
|
||||
async function _getSnapshotAtVersion(projectId, version) {
|
||||
const historyId = await WebApiManager.promises.getHistoryId(projectId)
|
||||
const data = await HistoryStoreManager.promises.getChunkAtVersion(
|
||||
projectId,
|
||||
historyId,
|
||||
version
|
||||
)
|
||||
const chunk = Core.Chunk.fromRaw(data.chunk)
|
||||
const snapshot = chunk.getSnapshot()
|
||||
const changes = chunk.getChanges().slice(0, version - chunk.getStartVersion())
|
||||
snapshot.applyAll(changes)
|
||||
return snapshot
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} historyId
|
||||
* @return {Promise<Record<string, import('overleaf-editor-core').File>>}
|
||||
*/
|
||||
async function getLatestSnapshotFiles(projectId, historyId) {
|
||||
const data = await HistoryStoreManager.promises.getMostRecentChunk(
|
||||
projectId,
|
||||
historyId
|
||||
)
|
||||
return await getLatestSnapshotFilesForChunk(historyId, data)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} historyId
|
||||
* @param {{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}} chunk
|
||||
* @return {Promise<Record<string, import('overleaf-editor-core').File>>}
|
||||
*/
|
||||
async function getLatestSnapshotFilesForChunk(historyId, chunk) {
|
||||
const { snapshot } = getLatestSnapshotFromChunk(chunk)
|
||||
const snapshotFiles = await snapshot.loadFiles(
|
||||
'lazy',
|
||||
HistoryStoreManager.getBlobStore(historyId)
|
||||
)
|
||||
return snapshotFiles
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} historyId
|
||||
* @return {Promise<{version: number, snapshot: import('overleaf-editor-core').Snapshot}>}
|
||||
*/
|
||||
async function getLatestSnapshot(projectId, historyId) {
|
||||
const data = await HistoryStoreManager.promises.getMostRecentChunk(
|
||||
projectId,
|
||||
historyId
|
||||
)
|
||||
return getLatestSnapshotFromChunk(data)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}} data
|
||||
* @return {{version: number, snapshot: import('overleaf-editor-core').Snapshot}}
|
||||
*/
|
||||
function getLatestSnapshotFromChunk(data) {
|
||||
if (data == null || data.chunk == null) {
|
||||
throw new OError('undefined chunk')
|
||||
}
|
||||
|
||||
// apply all the changes in the chunk to get the current snapshot
|
||||
const chunk = Core.Chunk.fromRaw(data.chunk)
|
||||
const snapshot = chunk.getSnapshot()
|
||||
const changes = chunk.getChanges()
|
||||
snapshot.applyAll(changes)
|
||||
return {
|
||||
snapshot,
|
||||
version: chunk.getEndVersion(),
|
||||
}
|
||||
}
|
||||
|
||||
async function getChangesInChunkSince(projectId, historyId, sinceVersion) {
|
||||
const latestChunk = Core.Chunk.fromRaw(
|
||||
(
|
||||
await HistoryStoreManager.promises.getMostRecentChunk(
|
||||
projectId,
|
||||
historyId
|
||||
)
|
||||
).chunk
|
||||
)
|
||||
if (sinceVersion > latestChunk.getEndVersion()) {
|
||||
throw new Errors.BadRequestError(
|
||||
'requested version past the end of the history'
|
||||
)
|
||||
}
|
||||
const latestStartVersion = latestChunk.getStartVersion()
|
||||
let chunk = latestChunk
|
||||
if (sinceVersion < latestStartVersion) {
|
||||
chunk = Core.Chunk.fromRaw(
|
||||
(
|
||||
await HistoryStoreManager.promises.getChunkAtVersion(
|
||||
projectId,
|
||||
historyId,
|
||||
sinceVersion
|
||||
)
|
||||
).chunk
|
||||
)
|
||||
}
|
||||
const changes = chunk
|
||||
.getChanges()
|
||||
.slice(sinceVersion - chunk.getStartVersion())
|
||||
return { latestStartVersion, changes }
|
||||
}
|
||||
|
||||
async function _loadFilesLimit(snapshot, kind, blobStore) {
|
||||
await snapshot.fileMap.mapAsync(async file => {
|
||||
// only load changed files or files with tracked changes, others can be
|
||||
// dereferenced from their blobs (this method is only used by the git
|
||||
// bridge which understands how to load blobs).
|
||||
if (!file.isEditable() || (file.getHash() && !file.getRangesHash())) {
|
||||
return
|
||||
}
|
||||
await file.load(kind, blobStore)
|
||||
}, MAX_REQUESTS)
|
||||
}
|
||||
|
||||
// EXPORTS
|
||||
|
||||
const getChangesInChunkSinceCb = callbackify(getChangesInChunkSince)
|
||||
const getFileSnapshotStreamCb = callbackify(getFileSnapshotStream)
|
||||
const getProjectSnapshotCb = callbackify(getProjectSnapshot)
|
||||
const getLatestSnapshotCb = callbackify(getLatestSnapshot)
|
||||
const getLatestSnapshotFilesCb = callbackify(getLatestSnapshotFiles)
|
||||
const getLatestSnapshotFilesForChunkCb = callbackify(
|
||||
getLatestSnapshotFilesForChunk
|
||||
)
|
||||
const getRangesSnapshotCb = callbackify(getRangesSnapshot)
|
||||
const getFileMetadataSnapshotCb = callbackify(getFileMetadataSnapshot)
|
||||
const getPathsAtVersionCb = callbackify(getPathsAtVersion)
|
||||
|
||||
export {
|
||||
getLatestSnapshotFromChunk,
|
||||
getChangesInChunkSinceCb as getChangesInChunkSince,
|
||||
getFileSnapshotStreamCb as getFileSnapshotStream,
|
||||
getProjectSnapshotCb as getProjectSnapshot,
|
||||
getFileMetadataSnapshotCb as getFileMetadataSnapshot,
|
||||
getLatestSnapshotCb as getLatestSnapshot,
|
||||
getLatestSnapshotFilesCb as getLatestSnapshotFiles,
|
||||
getLatestSnapshotFilesForChunkCb as getLatestSnapshotFilesForChunk,
|
||||
getRangesSnapshotCb as getRangesSnapshot,
|
||||
getPathsAtVersionCb as getPathsAtVersion,
|
||||
}
|
||||
|
||||
export const promises = {
|
||||
getChangesInChunkSince,
|
||||
getFileSnapshotStream,
|
||||
getProjectSnapshot,
|
||||
getLatestSnapshot,
|
||||
getLatestSnapshotFiles,
|
||||
getLatestSnapshotFilesForChunk,
|
||||
getRangesSnapshot,
|
||||
getPathsAtVersion,
|
||||
getFileMetadataSnapshot,
|
||||
}
|
354
services/project-history/app/js/SummarizedUpdatesManager.js
Normal file
354
services/project-history/app/js/SummarizedUpdatesManager.js
Normal file
@@ -0,0 +1,354 @@
|
||||
import _ from 'lodash'
|
||||
import async from 'async'
|
||||
import logger from '@overleaf/logger'
|
||||
import OError from '@overleaf/o-error'
|
||||
import * as ChunkTranslator from './ChunkTranslator.js'
|
||||
import * as HistoryApiManager from './HistoryApiManager.js'
|
||||
import * as HistoryStoreManager from './HistoryStoreManager.js'
|
||||
import * as LabelsManager from './LabelsManager.js'
|
||||
import * as UpdatesProcessor from './UpdatesProcessor.js'
|
||||
import * as WebApiManager from './WebApiManager.js'
|
||||
|
||||
const MAX_CHUNK_REQUESTS = 5
|
||||
const TIME_BETWEEN_DISTINCT_UPDATES = 5 * 60 * 1000 // five minutes
|
||||
|
||||
export function getSummarizedProjectUpdates(projectId, options, callback) {
|
||||
// Some notes on versions:
|
||||
//
|
||||
// Versions of the project are like the fenceposts between updates.
|
||||
// An update applies to a certain version of the project, and gives us the
|
||||
// next version.
|
||||
//
|
||||
// When we ask for updates 'before' a version, this includes the update
|
||||
// that created the version equal to 'before'.
|
||||
//
|
||||
// A chunk in OL has a 'startVersion', which is the version of the project
|
||||
// before any of the updates in it were applied. This is the same version as
|
||||
// the last update in the previous chunk would have created.
|
||||
//
|
||||
// If we ask the OL history store for the chunk with version that is the end of one
|
||||
// chunk and the start of another, it will return the older chunk, i.e.
|
||||
// the chunk with the updates that led up to that version.
|
||||
//
|
||||
// So once we read in the updates from a chunk, and want to get the updates from
|
||||
// the previous chunk, we ask OL for the chunk with the version equal to the
|
||||
// 'startVersion' of the newer chunk we just read.
|
||||
|
||||
let nextVersionToRequest
|
||||
if (options == null) {
|
||||
options = {}
|
||||
}
|
||||
if (!options.min_count) {
|
||||
options.min_count = 25
|
||||
}
|
||||
if (options.before != null) {
|
||||
// The version is of the doc, so we want the updates before that version,
|
||||
// which includes the update that created that version.
|
||||
nextVersionToRequest = options.before
|
||||
} else {
|
||||
// Return the latest updates first if no nextVersionToRequest is set.
|
||||
nextVersionToRequest = null
|
||||
}
|
||||
|
||||
UpdatesProcessor.processUpdatesForProject(projectId, function (error) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
LabelsManager.getLabels(projectId, function (error, labels) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
|
||||
const labelsByVersion = {}
|
||||
for (const label of labels) {
|
||||
if (labelsByVersion[label.version] == null) {
|
||||
labelsByVersion[label.version] = []
|
||||
}
|
||||
labelsByVersion[label.version].push(label)
|
||||
}
|
||||
|
||||
WebApiManager.getHistoryId(projectId, function (error, historyId) {
|
||||
if (error) return callback(error)
|
||||
let chunksRequested = 0
|
||||
let summarizedUpdates = []
|
||||
let toV = null
|
||||
|
||||
const shouldRequestMoreUpdates = cb => {
|
||||
return cb(
|
||||
null,
|
||||
chunksRequested < MAX_CHUNK_REQUESTS &&
|
||||
(nextVersionToRequest == null || nextVersionToRequest > 0) &&
|
||||
summarizedUpdates.length < options.min_count
|
||||
)
|
||||
}
|
||||
|
||||
const getNextBatchOfUpdates = cb =>
|
||||
_getProjectUpdates(
|
||||
projectId,
|
||||
historyId,
|
||||
nextVersionToRequest,
|
||||
function (error, updateSet, startVersion) {
|
||||
if (error) {
|
||||
return cb(OError.tag(error))
|
||||
}
|
||||
// Updates are returned in time order, but we want to go back in time
|
||||
updateSet.reverse()
|
||||
updateSet = discardUnwantedUpdates(updateSet)
|
||||
;({ summarizedUpdates, toV } = _summarizeUpdates(
|
||||
updateSet,
|
||||
labelsByVersion,
|
||||
summarizedUpdates,
|
||||
toV
|
||||
))
|
||||
nextVersionToRequest = startVersion
|
||||
chunksRequested += 1
|
||||
cb()
|
||||
}
|
||||
)
|
||||
|
||||
function discardUnwantedUpdates(updateSet) {
|
||||
// We're getting whole chunks from the OL history store, but we might
|
||||
// only want updates from before a certain version
|
||||
if (options.before == null) {
|
||||
return updateSet
|
||||
} else {
|
||||
return updateSet.filter(u => u.v < options.before)
|
||||
}
|
||||
}
|
||||
|
||||
// If the project doesn't have a history then we can bail out here
|
||||
HistoryApiManager.shouldUseProjectHistory(
|
||||
projectId,
|
||||
function (error, shouldUseProjectHistory) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
if (shouldUseProjectHistory) {
|
||||
async.whilst(
|
||||
shouldRequestMoreUpdates,
|
||||
getNextBatchOfUpdates,
|
||||
function (error) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
callback(
|
||||
null,
|
||||
summarizedUpdates,
|
||||
nextVersionToRequest > 0 ? nextVersionToRequest : undefined
|
||||
)
|
||||
}
|
||||
)
|
||||
} else {
|
||||
logger.debug(
|
||||
{ projectId },
|
||||
'returning no updates as project does not use history'
|
||||
)
|
||||
callback(null, [])
|
||||
}
|
||||
}
|
||||
)
|
||||
})
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
function _getProjectUpdates(projectId, historyId, version, callback) {
|
||||
function getChunk(cb) {
|
||||
if (version != null) {
|
||||
HistoryStoreManager.getChunkAtVersion(projectId, historyId, version, cb)
|
||||
} else {
|
||||
HistoryStoreManager.getMostRecentChunk(projectId, historyId, cb)
|
||||
}
|
||||
}
|
||||
|
||||
getChunk(function (error, chunk) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
const oldestVersion = chunk.chunk.startVersion
|
||||
ChunkTranslator.convertToSummarizedUpdates(
|
||||
chunk,
|
||||
function (error, updateSet) {
|
||||
if (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
callback(error, updateSet, oldestVersion)
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
function _summarizeUpdates(updates, labels, existingSummarizedUpdates, toV) {
|
||||
if (existingSummarizedUpdates == null) {
|
||||
existingSummarizedUpdates = []
|
||||
}
|
||||
const summarizedUpdates = existingSummarizedUpdates.slice()
|
||||
for (const update of updates) {
|
||||
if (toV == null) {
|
||||
// This is the first update we've seen. Initialize toV.
|
||||
toV = update.v + 1
|
||||
}
|
||||
|
||||
// Skip empty updates (only record their version). Empty updates are
|
||||
// updates that only contain comment operations. We don't have a UI for
|
||||
// these yet.
|
||||
if (isUpdateEmpty(update)) {
|
||||
continue
|
||||
}
|
||||
|
||||
// The client needs to know the exact version that a delete happened, in order
|
||||
// to be able to restore. So even when summarizing, retain the version that each
|
||||
// projectOp happened at.
|
||||
for (const projectOp of update.project_ops) {
|
||||
projectOp.atV = update.v
|
||||
}
|
||||
|
||||
const summarizedUpdate = summarizedUpdates[summarizedUpdates.length - 1]
|
||||
const labelsForVersion = labels[update.v + 1] || []
|
||||
if (
|
||||
summarizedUpdate &&
|
||||
_shouldMergeUpdate(update, summarizedUpdate, labelsForVersion)
|
||||
) {
|
||||
_mergeUpdate(update, summarizedUpdate)
|
||||
} else {
|
||||
const newUpdate = {
|
||||
fromV: update.v,
|
||||
toV,
|
||||
meta: {
|
||||
users: update.meta.users,
|
||||
start_ts: update.meta.start_ts,
|
||||
end_ts: update.meta.end_ts,
|
||||
},
|
||||
labels: labelsForVersion,
|
||||
pathnames: new Set(update.pathnames),
|
||||
project_ops: update.project_ops.slice(), // Clone since we'll modify
|
||||
}
|
||||
if (update.meta.origin) {
|
||||
newUpdate.meta.origin = update.meta.origin
|
||||
}
|
||||
|
||||
summarizedUpdates.push(newUpdate)
|
||||
}
|
||||
toV = update.v
|
||||
}
|
||||
|
||||
return { summarizedUpdates, toV }
|
||||
}
|
||||
|
||||
/**
|
||||
* Given an update, the latest summarized update, and the labels that apply to
|
||||
* the update, figure out if we can merge the update into the summarized
|
||||
* update.
|
||||
*/
|
||||
function _shouldMergeUpdate(update, summarizedUpdate, labels) {
|
||||
// Split updates on labels
|
||||
if (labels.length > 0) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Split updates on origin
|
||||
if (update.meta.origin) {
|
||||
if (summarizedUpdate.meta.origin) {
|
||||
if (update.meta.origin.kind !== summarizedUpdate.meta.origin.kind) {
|
||||
return false
|
||||
}
|
||||
if (update.meta.origin.path !== summarizedUpdate.meta.origin.path) {
|
||||
return false
|
||||
}
|
||||
if (
|
||||
update.meta.origin.kind === 'file-restore' &&
|
||||
update.meta.origin.timestamp !== summarizedUpdate.meta.origin.timestamp
|
||||
) {
|
||||
return false
|
||||
}
|
||||
if (
|
||||
update.meta.origin.kind === 'project-restore' &&
|
||||
update.meta.origin.timestamp !== summarizedUpdate.meta.origin.timestamp
|
||||
) {
|
||||
return false
|
||||
}
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
} else if (summarizedUpdate.meta.origin) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Split updates if it's been too long since the last update. We're going
|
||||
// backwards in time through the updates, so the update comes before the summarized update.
|
||||
if (
|
||||
summarizedUpdate.meta.end_ts - update.meta.start_ts >=
|
||||
TIME_BETWEEN_DISTINCT_UPDATES
|
||||
) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Do not merge text operations and file operations, except for history resyncs
|
||||
const updateHasTextOps = update.pathnames.length > 0
|
||||
const updateHasFileOps = update.project_ops.length > 0
|
||||
const summarizedUpdateHasTextOps = summarizedUpdate.pathnames.size > 0
|
||||
const summarizedUpdateHasFileOps = summarizedUpdate.project_ops.length > 0
|
||||
const isHistoryResync =
|
||||
update.meta.origin &&
|
||||
['history-resync', 'history-migration'].includes(update.meta.origin.kind)
|
||||
if (
|
||||
!isHistoryResync &&
|
||||
((updateHasTextOps && summarizedUpdateHasFileOps) ||
|
||||
(updateHasFileOps && summarizedUpdateHasTextOps))
|
||||
) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge an update into a summarized update.
|
||||
*
|
||||
* This mutates the summarized update.
|
||||
*/
|
||||
function _mergeUpdate(update, summarizedUpdate) {
|
||||
// check if the user in this update is already present in the earliest update,
|
||||
// if not, add them to the users list of the earliest update
|
||||
summarizedUpdate.meta.users = _.uniqBy(
|
||||
_.union(summarizedUpdate.meta.users, update.meta.users),
|
||||
function (user) {
|
||||
if (user == null) {
|
||||
return null
|
||||
}
|
||||
if (user.id == null) {
|
||||
return user
|
||||
}
|
||||
return user.id
|
||||
}
|
||||
)
|
||||
|
||||
summarizedUpdate.fromV = Math.min(summarizedUpdate.fromV, update.v)
|
||||
summarizedUpdate.toV = Math.max(summarizedUpdate.toV, update.v + 1)
|
||||
summarizedUpdate.meta.start_ts = Math.min(
|
||||
summarizedUpdate.meta.start_ts,
|
||||
update.meta.start_ts
|
||||
)
|
||||
summarizedUpdate.meta.end_ts = Math.max(
|
||||
summarizedUpdate.meta.end_ts,
|
||||
update.meta.end_ts
|
||||
)
|
||||
|
||||
// Add file operations
|
||||
for (const op of update.project_ops || []) {
|
||||
summarizedUpdate.project_ops.push(op)
|
||||
if (op.add) {
|
||||
// Merging a file creation. Remove any corresponding edit since that's redundant.
|
||||
summarizedUpdate.pathnames.delete(op.add.pathname)
|
||||
}
|
||||
}
|
||||
|
||||
// Add edit operations
|
||||
for (const pathname of update.pathnames || []) {
|
||||
summarizedUpdate.pathnames.add(pathname)
|
||||
}
|
||||
}
|
||||
|
||||
function isUpdateEmpty(update) {
|
||||
return update.project_ops.length === 0 && update.pathnames.length === 0
|
||||
}
|
1219
services/project-history/app/js/SyncManager.js
Normal file
1219
services/project-history/app/js/SyncManager.js
Normal file
File diff suppressed because it is too large
Load Diff
491
services/project-history/app/js/UpdateCompressor.js
Normal file
491
services/project-history/app/js/UpdateCompressor.js
Normal file
@@ -0,0 +1,491 @@
|
||||
// @ts-check
|
||||
|
||||
import OError from '@overleaf/o-error'
|
||||
import DMP from 'diff-match-patch'
|
||||
|
||||
/**
|
||||
* @import { DeleteOp, InsertOp, Op, Update } from './types'
|
||||
*/
|
||||
|
||||
const MAX_TIME_BETWEEN_UPDATES = 60 * 1000 // one minute
|
||||
const MAX_UPDATE_SIZE = 2 * 1024 * 1024 // 2 MB
|
||||
const ADDED = 1
|
||||
const REMOVED = -1
|
||||
const UNCHANGED = 0
|
||||
|
||||
const strInject = (s1, pos, s2) => s1.slice(0, pos) + s2 + s1.slice(pos)
|
||||
const strRemove = (s1, pos, length) => s1.slice(0, pos) + s1.slice(pos + length)
|
||||
|
||||
const dmp = new DMP()
|
||||
dmp.Diff_Timeout = 0.1 // prevent the diff algorithm from searching too hard for changes in unrelated content
|
||||
|
||||
const cloneWithOp = function (update, op) {
|
||||
// to improve performance, shallow clone the update
|
||||
// and its meta property (also an object), then
|
||||
// overwrite the op property directly.
|
||||
update = Object.assign({}, update)
|
||||
update.meta = Object.assign({}, update.meta)
|
||||
update.op = op
|
||||
return update
|
||||
}
|
||||
const mergeUpdatesWithOp = function (firstUpdate, secondUpdate, op) {
|
||||
// We want to take doc_length and ts from the firstUpdate, v and doc_hash from the second
|
||||
const update = cloneWithOp(firstUpdate, op)
|
||||
if (secondUpdate.v != null) {
|
||||
update.v = secondUpdate.v
|
||||
}
|
||||
if (secondUpdate.meta.doc_hash != null) {
|
||||
update.meta.doc_hash = secondUpdate.meta.doc_hash
|
||||
} else {
|
||||
delete update.meta.doc_hash
|
||||
}
|
||||
return update
|
||||
}
|
||||
|
||||
/**
|
||||
* Adjust the given length to account for the given op
|
||||
*
|
||||
* The resulting length is the new length of the doc after the op is applied.
|
||||
*
|
||||
* @param {number} length
|
||||
* @param {Op} op
|
||||
* @param {object} opts
|
||||
* @param {boolean} [opts.tracked] - whether or not the update is a tracked change
|
||||
* @returns {number} the adjusted length
|
||||
*/
|
||||
function adjustLengthByOp(length, op, opts = {}) {
|
||||
if ('i' in op && op.i != null) {
|
||||
if (op.trackedDeleteRejection) {
|
||||
// Tracked delete rejection: will be translated into a retain
|
||||
return length
|
||||
} else {
|
||||
return length + op.i.length
|
||||
}
|
||||
} else if ('d' in op && op.d != null) {
|
||||
if (opts.tracked) {
|
||||
// Tracked delete: will be translated into a retain, except where it overlaps tracked inserts.
|
||||
for (const change of op.trackedChanges ?? []) {
|
||||
if (change.type === 'insert') {
|
||||
length -= change.length
|
||||
}
|
||||
}
|
||||
return length
|
||||
} else {
|
||||
return length - op.d.length
|
||||
}
|
||||
} else if ('r' in op && op.r != null) {
|
||||
return length
|
||||
} else if ('c' in op && op.c != null) {
|
||||
return length
|
||||
} else {
|
||||
throw new OError('unexpected op type')
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Updates come from the doc updater in format
|
||||
* {
|
||||
* op: [ { ... op1 ... }, { ... op2 ... } ]
|
||||
* meta: { ts: ..., user_id: ... }
|
||||
* }
|
||||
* but it's easier to work with on op per update, so convert these updates to
|
||||
* our compressed format
|
||||
* [{
|
||||
* op: op1
|
||||
* meta: { ts: ..., user_id: ... }
|
||||
* }, {
|
||||
* op: op2
|
||||
* meta: { ts: ..., user_id: ... }
|
||||
* }]
|
||||
*
|
||||
* @param {Update[]} updates
|
||||
* @returns {Update[]} single op updates
|
||||
*/
|
||||
export function convertToSingleOpUpdates(updates) {
|
||||
const splitUpdates = []
|
||||
for (const update of updates) {
|
||||
if (!('op' in update)) {
|
||||
// Not a text op, likely a project strucure op
|
||||
splitUpdates.push(update)
|
||||
continue
|
||||
}
|
||||
const ops = update.op
|
||||
|
||||
let docLength = update.meta.history_doc_length ?? update.meta.doc_length
|
||||
// Temporary fix for document-updater sending a length of -1 for empty
|
||||
// documents. This can be removed after all queues have been flushed.
|
||||
if (docLength === -1) {
|
||||
docLength = 0
|
||||
}
|
||||
const docHash = update.meta.doc_hash
|
||||
for (const op of ops) {
|
||||
const splitUpdate = cloneWithOp(update, op)
|
||||
// Only the last update will keep the doc_hash property
|
||||
delete splitUpdate.meta.doc_hash
|
||||
if (docLength != null) {
|
||||
splitUpdate.meta.doc_length = docLength
|
||||
docLength = adjustLengthByOp(docLength, op, {
|
||||
tracked: update.meta.tc != null,
|
||||
})
|
||||
delete splitUpdate.meta.history_doc_length
|
||||
}
|
||||
splitUpdates.push(splitUpdate)
|
||||
}
|
||||
if (docHash != null && splitUpdates.length > 0) {
|
||||
splitUpdates[splitUpdates.length - 1].meta.doc_hash = docHash
|
||||
}
|
||||
}
|
||||
return splitUpdates
|
||||
}
|
||||
|
||||
export function filterBlankUpdates(updates) {
|
||||
// Diffing an insert and delete can return blank inserts and deletes
|
||||
// which the OL history service doesn't have an equivalent for.
|
||||
//
|
||||
// NOTE: this relies on the updates only containing either op.i or op.d entries
|
||||
// but not both, which is the case because diffAsShareJsOps does this
|
||||
return updates.filter(
|
||||
update => !(update.op && (update.op.i === '' || update.op.d === ''))
|
||||
)
|
||||
}
|
||||
|
||||
export function concatUpdatesWithSameVersion(updates) {
|
||||
const concattedUpdates = []
|
||||
for (let update of updates) {
|
||||
if (update.op != null) {
|
||||
update = cloneWithOp(update, [update.op])
|
||||
|
||||
const lastUpdate = concattedUpdates[concattedUpdates.length - 1]
|
||||
if (
|
||||
lastUpdate != null &&
|
||||
lastUpdate.op != null &&
|
||||
lastUpdate.v === update.v &&
|
||||
lastUpdate.doc === update.doc &&
|
||||
lastUpdate.pathname === update.pathname
|
||||
) {
|
||||
lastUpdate.op = lastUpdate.op.concat(update.op)
|
||||
if (update.meta.doc_hash == null) {
|
||||
delete lastUpdate.meta.doc_hash
|
||||
} else {
|
||||
lastUpdate.meta.doc_hash = update.meta.doc_hash
|
||||
}
|
||||
} else {
|
||||
concattedUpdates.push(update)
|
||||
}
|
||||
} else {
|
||||
concattedUpdates.push(update)
|
||||
}
|
||||
}
|
||||
return concattedUpdates
|
||||
}
|
||||
|
||||
export function compressRawUpdates(rawUpdates) {
|
||||
let updates = convertToSingleOpUpdates(rawUpdates)
|
||||
updates = compressUpdates(updates)
|
||||
updates = filterBlankUpdates(updates)
|
||||
updates = concatUpdatesWithSameVersion(updates)
|
||||
return updates
|
||||
}
|
||||
|
||||
export function compressUpdates(updates) {
|
||||
if (updates.length === 0) {
|
||||
return []
|
||||
}
|
||||
|
||||
let compressedUpdates = [updates.shift()]
|
||||
for (const update of updates) {
|
||||
const lastCompressedUpdate = compressedUpdates.pop()
|
||||
if (lastCompressedUpdate != null) {
|
||||
const newCompressedUpdates = _concatTwoUpdates(
|
||||
lastCompressedUpdate,
|
||||
update
|
||||
)
|
||||
|
||||
compressedUpdates = compressedUpdates.concat(newCompressedUpdates)
|
||||
} else {
|
||||
compressedUpdates.push(update)
|
||||
}
|
||||
}
|
||||
|
||||
return compressedUpdates
|
||||
}
|
||||
|
||||
/**
|
||||
* If possible, merge two updates into a single update that has the same effect.
|
||||
*
|
||||
* It's useful to do some of this work at this point while we're dealing with
|
||||
* document-updater updates. The deletes, in particular include the deleted
|
||||
* text. This allows us to find pieces of inserts and deletes that cancel each
|
||||
* other out because they insert/delete the exact same text. This compression
|
||||
* makes the diff smaller.
|
||||
*/
|
||||
function _concatTwoUpdates(firstUpdate, secondUpdate) {
|
||||
// Previously we cloned firstUpdate and secondUpdate at this point but we
|
||||
// can skip this step because whenever they are returned with
|
||||
// modification there is always a clone at that point via
|
||||
// mergeUpdatesWithOp.
|
||||
|
||||
if (firstUpdate.op == null || secondUpdate.op == null) {
|
||||
// Project structure ops
|
||||
return [firstUpdate, secondUpdate]
|
||||
}
|
||||
|
||||
if (
|
||||
firstUpdate.doc !== secondUpdate.doc ||
|
||||
firstUpdate.pathname !== secondUpdate.pathname
|
||||
) {
|
||||
return [firstUpdate, secondUpdate]
|
||||
}
|
||||
|
||||
if (firstUpdate.meta.user_id !== secondUpdate.meta.user_id) {
|
||||
return [firstUpdate, secondUpdate]
|
||||
}
|
||||
|
||||
if (
|
||||
(firstUpdate.meta.type === 'external' &&
|
||||
secondUpdate.meta.type !== 'external') ||
|
||||
(firstUpdate.meta.type !== 'external' &&
|
||||
secondUpdate.meta.type === 'external') ||
|
||||
(firstUpdate.meta.type === 'external' &&
|
||||
secondUpdate.meta.type === 'external' &&
|
||||
firstUpdate.meta.source !== secondUpdate.meta.source)
|
||||
) {
|
||||
return [firstUpdate, secondUpdate]
|
||||
}
|
||||
|
||||
if (secondUpdate.meta.ts - firstUpdate.meta.ts > MAX_TIME_BETWEEN_UPDATES) {
|
||||
return [firstUpdate, secondUpdate]
|
||||
}
|
||||
|
||||
if (
|
||||
(firstUpdate.meta.tc == null && secondUpdate.meta.tc != null) ||
|
||||
(firstUpdate.meta.tc != null && secondUpdate.meta.tc == null)
|
||||
) {
|
||||
// One update is tracking changes and the other isn't. Tracking changes
|
||||
// results in different behaviour in the history, so we need to keep these
|
||||
// two updates separate.
|
||||
return [firstUpdate, secondUpdate]
|
||||
}
|
||||
|
||||
if (Boolean(firstUpdate.op.u) !== Boolean(secondUpdate.op.u)) {
|
||||
// One update is an undo and the other isn't. If we were to merge the two
|
||||
// updates, we would have to choose one value for the flag, which would be
|
||||
// partially incorrect. Moreover, a tracked delete that is also an undo is
|
||||
// treated as a tracked insert rejection by the history, so these updates
|
||||
// need to be well separated.
|
||||
return [firstUpdate, secondUpdate]
|
||||
}
|
||||
|
||||
if (
|
||||
firstUpdate.op.trackedDeleteRejection ||
|
||||
secondUpdate.op.trackedDeleteRejection
|
||||
) {
|
||||
// Do not merge tracked delete rejections. Each tracked delete rejection is
|
||||
// a separate operation.
|
||||
return [firstUpdate, secondUpdate]
|
||||
}
|
||||
|
||||
if (
|
||||
firstUpdate.op.trackedChanges != null ||
|
||||
secondUpdate.op.trackedChanges != null
|
||||
) {
|
||||
// Do not merge ops that span tracked changes.
|
||||
// TODO: This could theoretically be handled, but it would be complex. One
|
||||
// would need to take tracked deletes into account when merging inserts and
|
||||
// deletes together.
|
||||
return [firstUpdate, secondUpdate]
|
||||
}
|
||||
|
||||
const firstOp = firstUpdate.op
|
||||
const secondOp = secondUpdate.op
|
||||
const firstSize =
|
||||
(firstOp.i && firstOp.i.length) || (firstOp.d && firstOp.d.length)
|
||||
const secondSize =
|
||||
(secondOp.i && secondOp.i.length) || (secondOp.d && secondOp.d.length)
|
||||
const firstOpInsideSecondOp =
|
||||
secondOp.p <= firstOp.p && firstOp.p <= secondOp.p + secondSize
|
||||
const secondOpInsideFirstOp =
|
||||
firstOp.p <= secondOp.p && secondOp.p <= firstOp.p + firstSize
|
||||
const combinedLengthUnderLimit = firstSize + secondSize < MAX_UPDATE_SIZE
|
||||
|
||||
// Two inserts
|
||||
if (
|
||||
firstOp.i != null &&
|
||||
secondOp.i != null &&
|
||||
secondOpInsideFirstOp &&
|
||||
combinedLengthUnderLimit &&
|
||||
insertOpsInsideSameComments(firstOp, secondOp)
|
||||
) {
|
||||
return [
|
||||
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
|
||||
...firstOp,
|
||||
i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i),
|
||||
}),
|
||||
]
|
||||
}
|
||||
|
||||
// Two deletes
|
||||
if (
|
||||
firstOp.d != null &&
|
||||
secondOp.d != null &&
|
||||
firstOpInsideSecondOp &&
|
||||
combinedLengthUnderLimit &&
|
||||
firstUpdate.meta.tc == null &&
|
||||
secondUpdate.meta.tc == null
|
||||
) {
|
||||
return [
|
||||
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
|
||||
...secondOp,
|
||||
d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d),
|
||||
}),
|
||||
]
|
||||
}
|
||||
|
||||
// An insert and then a delete
|
||||
if (
|
||||
firstOp.i != null &&
|
||||
secondOp.d != null &&
|
||||
secondOpInsideFirstOp &&
|
||||
firstUpdate.meta.tc == null &&
|
||||
secondUpdate.meta.tc == null
|
||||
) {
|
||||
const offset = secondOp.p - firstOp.p
|
||||
const insertedText = firstOp.i.slice(offset, offset + secondOp.d.length)
|
||||
// Only trim the insert when the delete is fully contained within in it
|
||||
if (insertedText === secondOp.d) {
|
||||
const insert = strRemove(firstOp.i, offset, secondOp.d.length)
|
||||
if (insert === '') {
|
||||
return []
|
||||
} else {
|
||||
return [
|
||||
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
|
||||
...firstOp,
|
||||
i: insert,
|
||||
}),
|
||||
]
|
||||
}
|
||||
} else {
|
||||
// This will only happen if the delete extends outside the insert
|
||||
return [firstUpdate, secondUpdate]
|
||||
}
|
||||
}
|
||||
|
||||
// A delete then an insert at the same place, likely a copy-paste of a chunk of content
|
||||
if (
|
||||
firstOp.d != null &&
|
||||
secondOp.i != null &&
|
||||
firstOp.p === secondOp.p &&
|
||||
firstUpdate.meta.tc == null &&
|
||||
secondUpdate.meta.tc == null
|
||||
) {
|
||||
const offset = firstOp.p
|
||||
const hoffset = firstOp.hpos
|
||||
const diffUpdates = diffAsShareJsOps(firstOp.d, secondOp.i).map(
|
||||
function (op) {
|
||||
// diffAsShareJsOps() returns ops with positions relative to the position
|
||||
// of the copy/paste. We need to adjust these positions so that they
|
||||
// apply to the whole document instead.
|
||||
const pos = op.p
|
||||
op.p = pos + offset
|
||||
if (hoffset != null) {
|
||||
op.hpos = pos + hoffset
|
||||
}
|
||||
if (firstOp.u && secondOp.u) {
|
||||
op.u = true
|
||||
}
|
||||
if ('i' in op && secondOp.commentIds != null) {
|
||||
// Make sure that commentIds metadata is propagated to inserts
|
||||
op.commentIds = secondOp.commentIds
|
||||
}
|
||||
const update = mergeUpdatesWithOp(firstUpdate, secondUpdate, op)
|
||||
// Set the doc hash only on the last update
|
||||
delete update.meta.doc_hash
|
||||
return update
|
||||
}
|
||||
)
|
||||
const docHash = secondUpdate.meta.doc_hash
|
||||
if (docHash != null && diffUpdates.length > 0) {
|
||||
diffUpdates[diffUpdates.length - 1].meta.doc_hash = docHash
|
||||
}
|
||||
|
||||
// Doing a diff like this loses track of the doc lengths for each
|
||||
// update, so recalculate them
|
||||
let docLength =
|
||||
firstUpdate.meta.history_doc_length ?? firstUpdate.meta.doc_length
|
||||
for (const update of diffUpdates) {
|
||||
update.meta.doc_length = docLength
|
||||
docLength = adjustLengthByOp(docLength, update.op, {
|
||||
tracked: update.meta.tc != null,
|
||||
})
|
||||
delete update.meta.history_doc_length
|
||||
}
|
||||
|
||||
return diffUpdates
|
||||
}
|
||||
|
||||
return [firstUpdate, secondUpdate]
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the diff between two strings
|
||||
*
|
||||
* @param {string} before
|
||||
* @param {string} after
|
||||
* @returns {(InsertOp | DeleteOp)[]} the ops that generate that diff
|
||||
*/
|
||||
export function diffAsShareJsOps(before, after) {
|
||||
const diffs = dmp.diff_main(before, after)
|
||||
dmp.diff_cleanupSemantic(diffs)
|
||||
|
||||
const ops = []
|
||||
let position = 0
|
||||
for (const diff of diffs) {
|
||||
const type = diff[0]
|
||||
const content = diff[1]
|
||||
if (type === ADDED) {
|
||||
ops.push({
|
||||
i: content,
|
||||
p: position,
|
||||
})
|
||||
position += content.length
|
||||
} else if (type === REMOVED) {
|
||||
ops.push({
|
||||
d: content,
|
||||
p: position,
|
||||
})
|
||||
} else if (type === UNCHANGED) {
|
||||
position += content.length
|
||||
} else {
|
||||
throw new Error('Unknown type')
|
||||
}
|
||||
}
|
||||
return ops
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if two insert ops are inside the same comments
|
||||
*
|
||||
* @param {InsertOp} op1
|
||||
* @param {InsertOp} op2
|
||||
* @returns {boolean}
|
||||
*/
|
||||
function insertOpsInsideSameComments(op1, op2) {
|
||||
const commentIds1 = op1.commentIds
|
||||
const commentIds2 = op2.commentIds
|
||||
if (commentIds1 == null && commentIds2 == null) {
|
||||
// None are inside comments
|
||||
return true
|
||||
}
|
||||
|
||||
if (
|
||||
commentIds1 != null &&
|
||||
commentIds2 != null &&
|
||||
commentIds1.every(id => commentIds2.includes(id)) &&
|
||||
commentIds2.every(id => commentIds1.includes(id))
|
||||
) {
|
||||
// Both are inside the same comments
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
487
services/project-history/app/js/UpdateTranslator.js
Normal file
487
services/project-history/app/js/UpdateTranslator.js
Normal file
@@ -0,0 +1,487 @@
|
||||
// @ts-check
|
||||
|
||||
import _ from 'lodash'
|
||||
import Core from 'overleaf-editor-core'
|
||||
import * as Errors from './Errors.js'
|
||||
import * as OperationsCompressor from './OperationsCompressor.js'
|
||||
import { isInsert, isRetain, isDelete, isComment } from './Utils.js'
|
||||
|
||||
/**
|
||||
* @import { AddDocUpdate, AddFileUpdate, DeleteCommentUpdate, Op, RawScanOp } from './types'
|
||||
* @import { RenameUpdate, TextUpdate, TrackingDirective, TrackingProps } from './types'
|
||||
* @import { SetCommentStateUpdate, SetFileMetadataOperation, Update, UpdateWithBlob } from './types'
|
||||
*/
|
||||
|
||||
/**
|
||||
* Convert updates into history changes
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {UpdateWithBlob[]} updatesWithBlobs
|
||||
* @returns {Array<Core.Change | null>}
|
||||
*/
|
||||
export function convertToChanges(projectId, updatesWithBlobs) {
|
||||
return updatesWithBlobs.map(update => _convertToChange(projectId, update))
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an update into a history change
|
||||
*
|
||||
* @param {string} projectId
|
||||
* @param {UpdateWithBlob} updateWithBlob
|
||||
* @returns {Core.Change | null}
|
||||
*/
|
||||
function _convertToChange(projectId, updateWithBlob) {
|
||||
let operations
|
||||
const { update } = updateWithBlob
|
||||
|
||||
let projectVersion = null
|
||||
const v2DocVersions = {}
|
||||
|
||||
if (_isRenameUpdate(update)) {
|
||||
operations = [
|
||||
{
|
||||
pathname: _convertPathname(update.pathname),
|
||||
newPathname: _convertPathname(update.new_pathname),
|
||||
},
|
||||
]
|
||||
projectVersion = update.version
|
||||
} else if (isAddUpdate(update)) {
|
||||
const op = {
|
||||
pathname: _convertPathname(update.pathname),
|
||||
file: {
|
||||
hash: updateWithBlob.blobHashes.file,
|
||||
},
|
||||
}
|
||||
if (_isAddDocUpdate(update)) {
|
||||
op.file.rangesHash = updateWithBlob.blobHashes.ranges
|
||||
}
|
||||
if (_isAddFileUpdate(update)) {
|
||||
op.file.metadata = update.metadata
|
||||
}
|
||||
operations = [op]
|
||||
projectVersion = update.version
|
||||
} else if (isTextUpdate(update)) {
|
||||
const docLength = update.meta.history_doc_length ?? update.meta.doc_length
|
||||
let pathname = update.meta.pathname
|
||||
|
||||
pathname = _convertPathname(pathname)
|
||||
const builder = new OperationsBuilder(docLength, pathname)
|
||||
// convert ops
|
||||
for (const op of update.op) {
|
||||
builder.addOp(op, update)
|
||||
}
|
||||
// add doc hash if present
|
||||
if (update.meta.doc_hash != null) {
|
||||
// This will commit the text operation that the builder is currently
|
||||
// building and set the contentHash property.
|
||||
builder.commitTextOperation({ contentHash: update.meta.doc_hash })
|
||||
}
|
||||
operations = builder.finish()
|
||||
// add doc version information if present
|
||||
if (update.v != null) {
|
||||
v2DocVersions[update.doc] = { pathname, v: update.v }
|
||||
}
|
||||
} else if (isSetCommentStateUpdate(update)) {
|
||||
operations = [
|
||||
{
|
||||
pathname: _convertPathname(update.pathname),
|
||||
commentId: update.commentId,
|
||||
resolved: update.resolved,
|
||||
},
|
||||
]
|
||||
} else if (isSetFileMetadataOperation(update)) {
|
||||
operations = [
|
||||
{
|
||||
pathname: _convertPathname(update.pathname),
|
||||
metadata: update.metadata,
|
||||
},
|
||||
]
|
||||
} else if (isDeleteCommentUpdate(update)) {
|
||||
operations = [
|
||||
{
|
||||
pathname: _convertPathname(update.pathname),
|
||||
deleteComment: update.deleteComment,
|
||||
},
|
||||
]
|
||||
} else {
|
||||
const error = new Errors.UpdateWithUnknownFormatError(
|
||||
'update with unknown format',
|
||||
{ projectId, update }
|
||||
)
|
||||
throw error
|
||||
}
|
||||
|
||||
let v2Authors
|
||||
if (update.meta.user_id === 'anonymous-user') {
|
||||
// history-v1 uses null to represent an anonymous author
|
||||
v2Authors = [null]
|
||||
} else {
|
||||
// user_id is missing on resync operations that update the contents of a doc
|
||||
v2Authors = _.compact([update.meta.user_id])
|
||||
}
|
||||
|
||||
const rawChange = {
|
||||
operations,
|
||||
v2Authors,
|
||||
timestamp: new Date(update.meta.ts).toISOString(),
|
||||
projectVersion,
|
||||
v2DocVersions: Object.keys(v2DocVersions).length ? v2DocVersions : null,
|
||||
}
|
||||
if (update.meta.origin) {
|
||||
rawChange.origin = update.meta.origin
|
||||
} else if (update.meta.type === 'external' && update.meta.source) {
|
||||
rawChange.origin = { kind: update.meta.source }
|
||||
}
|
||||
const change = Core.Change.fromRaw(rawChange)
|
||||
|
||||
if (change != null) {
|
||||
change.operations = OperationsCompressor.compressOperations(
|
||||
change.operations
|
||||
)
|
||||
}
|
||||
|
||||
return change
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Update} update
|
||||
* @returns {update is RenameUpdate}
|
||||
*/
|
||||
function _isRenameUpdate(update) {
|
||||
return 'new_pathname' in update && update.new_pathname != null
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Update} update
|
||||
* @returns {update is AddDocUpdate}
|
||||
*/
|
||||
function _isAddDocUpdate(update) {
|
||||
return (
|
||||
'doc' in update &&
|
||||
update.doc != null &&
|
||||
'docLines' in update &&
|
||||
update.docLines != null
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Update} update
|
||||
* @returns {update is AddFileUpdate}
|
||||
*/
|
||||
function _isAddFileUpdate(update) {
|
||||
return (
|
||||
'file' in update &&
|
||||
update.file != null &&
|
||||
(('createdBlob' in update && update.createdBlob) ||
|
||||
('url' in update && update.url != null))
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Update} update
|
||||
* @returns {update is TextUpdate}
|
||||
*/
|
||||
export function isTextUpdate(update) {
|
||||
return (
|
||||
'doc' in update &&
|
||||
update.doc != null &&
|
||||
'op' in update &&
|
||||
update.op != null &&
|
||||
'pathname' in update.meta &&
|
||||
update.meta.pathname != null &&
|
||||
'doc_length' in update.meta &&
|
||||
update.meta.doc_length != null
|
||||
)
|
||||
}
|
||||
|
||||
export function isProjectStructureUpdate(update) {
|
||||
return isAddUpdate(update) || _isRenameUpdate(update)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Update} update
|
||||
* @returns {update is AddDocUpdate | AddFileUpdate}
|
||||
*/
|
||||
export function isAddUpdate(update) {
|
||||
return _isAddDocUpdate(update) || _isAddFileUpdate(update)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Update} update
|
||||
* @returns {update is SetCommentStateUpdate}
|
||||
*/
|
||||
export function isSetCommentStateUpdate(update) {
|
||||
return 'commentId' in update && 'resolved' in update
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Update} update
|
||||
* @returns {update is DeleteCommentUpdate}
|
||||
*/
|
||||
export function isDeleteCommentUpdate(update) {
|
||||
return 'deleteComment' in update
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Update} update
|
||||
* @returns {update is SetFileMetadataOperation}
|
||||
*/
|
||||
export function isSetFileMetadataOperation(update) {
|
||||
return 'metadata' in update
|
||||
}
|
||||
|
||||
export function _convertPathname(pathname) {
|
||||
// Strip leading /
|
||||
pathname = pathname.replace(/^\//, '')
|
||||
// Replace \\ with _. Backslashes are no longer allowed
|
||||
// in projects in web, but we have some which have gone through
|
||||
// into history before this restriction was added. This makes
|
||||
// them valid for the history store.
|
||||
// See https://github.com/overleaf/write_latex/issues/4471
|
||||
pathname = pathname.replace(/\\/g, '_')
|
||||
// workaround for filenames containing asterisks, this will
|
||||
// fail if a corresponding replacement file already exists but it
|
||||
// would fail anyway without this attempt to fix the pathname.
|
||||
// See https://github.com/overleaf/internal/issues/900
|
||||
pathname = pathname.replace(/\*/g, '__ASTERISK__')
|
||||
// workaround for filenames beginning with spaces
|
||||
// See https://github.com/overleaf/internal/issues/1404
|
||||
// note: we have already stripped any leading slash above
|
||||
pathname = pathname.replace(/^ /, '__SPACE__') // handle top-level
|
||||
pathname = pathname.replace(/\/ /g, '/__SPACE__') // handle folders
|
||||
return pathname
|
||||
}
|
||||
|
||||
class OperationsBuilder {
|
||||
/**
|
||||
* @param {number} docLength
|
||||
* @param {string} pathname
|
||||
*/
|
||||
constructor(docLength, pathname) {
|
||||
/**
|
||||
* List of operations being built
|
||||
*/
|
||||
this.operations = []
|
||||
|
||||
/**
|
||||
* Currently built text operation
|
||||
*
|
||||
* @type {RawScanOp[]}
|
||||
*/
|
||||
this.textOperation = []
|
||||
|
||||
/**
|
||||
* Cursor inside the current text operation
|
||||
*/
|
||||
this.cursor = 0
|
||||
|
||||
this.docLength = docLength
|
||||
this.pathname = pathname
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Op} op
|
||||
* @param {TextUpdate} update
|
||||
* @returns {void}
|
||||
*/
|
||||
addOp(op, update) {
|
||||
// We sometimes receive operations that operate at positions outside the
|
||||
// docLength. Document updater coerces the position to the end of the
|
||||
// document. We do the same here.
|
||||
const pos = Math.min(op.hpos ?? op.p, this.docLength)
|
||||
|
||||
if (isComment(op)) {
|
||||
// Commit the current text operation
|
||||
this.commitTextOperation()
|
||||
|
||||
// Add a comment operation
|
||||
const commentLength = op.hlen ?? op.c.length
|
||||
const commentOp = {
|
||||
pathname: this.pathname,
|
||||
commentId: op.t,
|
||||
ranges: commentLength > 0 ? [{ pos, length: commentLength }] : [],
|
||||
}
|
||||
if ('resolved' in op) {
|
||||
commentOp.resolved = op.resolved
|
||||
}
|
||||
this.operations.push(commentOp)
|
||||
return
|
||||
}
|
||||
|
||||
if (!isInsert(op) && !isDelete(op) && !isRetain(op)) {
|
||||
throw new Errors.UnexpectedOpTypeError('unexpected op type', { op })
|
||||
}
|
||||
|
||||
if (pos < this.cursor) {
|
||||
this.commitTextOperation()
|
||||
// At this point, this.cursor === 0 and we can continue
|
||||
}
|
||||
|
||||
if (pos > this.cursor) {
|
||||
this.retain(pos - this.cursor)
|
||||
}
|
||||
|
||||
if (isInsert(op)) {
|
||||
if (op.trackedDeleteRejection) {
|
||||
this.retain(op.i.length, {
|
||||
tracking: { type: 'none' },
|
||||
})
|
||||
} else {
|
||||
const opts = {}
|
||||
if (update.meta.tc != null) {
|
||||
opts.tracking = {
|
||||
type: 'insert',
|
||||
userId: update.meta.user_id,
|
||||
ts: new Date(update.meta.ts).toISOString(),
|
||||
}
|
||||
}
|
||||
if (op.commentIds != null) {
|
||||
opts.commentIds = op.commentIds
|
||||
}
|
||||
this.insert(op.i, opts)
|
||||
}
|
||||
}
|
||||
|
||||
if (isRetain(op)) {
|
||||
if (op.tracking) {
|
||||
this.retain(op.r.length, { tracking: op.tracking })
|
||||
} else {
|
||||
this.retain(op.r.length)
|
||||
}
|
||||
}
|
||||
|
||||
if (isDelete(op)) {
|
||||
const changes = op.trackedChanges ?? []
|
||||
|
||||
// Tracked changes should already be ordered by offset, but let's make
|
||||
// sure they are.
|
||||
changes.sort((a, b) => {
|
||||
const posOrder = a.offset - b.offset
|
||||
if (posOrder !== 0) {
|
||||
return posOrder
|
||||
} else if (a.type === 'insert' && b.type === 'delete') {
|
||||
return 1
|
||||
} else if (a.type === 'delete' && b.type === 'insert') {
|
||||
return -1
|
||||
} else {
|
||||
return 0
|
||||
}
|
||||
})
|
||||
|
||||
let offset = 0
|
||||
for (const change of changes) {
|
||||
if (change.offset > offset) {
|
||||
// Handle the portion before the tracked change
|
||||
if (update.meta.tc != null) {
|
||||
// This is a tracked delete
|
||||
this.retain(change.offset - offset, {
|
||||
tracking: {
|
||||
type: 'delete',
|
||||
userId: update.meta.user_id,
|
||||
ts: new Date(update.meta.ts).toISOString(),
|
||||
},
|
||||
})
|
||||
} else {
|
||||
// This is a regular delete
|
||||
this.delete(change.offset - offset)
|
||||
}
|
||||
offset = change.offset
|
||||
}
|
||||
|
||||
// Now, handle the portion inside the tracked change
|
||||
if (change.type === 'delete') {
|
||||
// Tracked deletes are skipped over when deleting
|
||||
this.retain(change.length)
|
||||
} else if (change.type === 'insert') {
|
||||
// Deletes inside tracked inserts are always regular deletes
|
||||
this.delete(change.length)
|
||||
offset += change.length
|
||||
}
|
||||
}
|
||||
if (offset < op.d.length) {
|
||||
// Handle the portion after the last tracked change
|
||||
if (update.meta.tc != null) {
|
||||
// This is a tracked delete
|
||||
this.retain(op.d.length - offset, {
|
||||
tracking: {
|
||||
type: 'delete',
|
||||
userId: update.meta.user_id,
|
||||
ts: new Date(update.meta.ts).toISOString(),
|
||||
},
|
||||
})
|
||||
} else {
|
||||
// This is a regular delete
|
||||
this.delete(op.d.length - offset)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} length
|
||||
* @param {object} opts
|
||||
* @param {TrackingDirective} [opts.tracking]
|
||||
*/
|
||||
retain(length, opts = {}) {
|
||||
if (opts.tracking) {
|
||||
this.textOperation.push({ r: length, ...opts })
|
||||
} else {
|
||||
this.textOperation.push(length)
|
||||
}
|
||||
this.cursor += length
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} str
|
||||
* @param {object} opts
|
||||
* @param {TrackingProps} [opts.tracking]
|
||||
* @param {string[]} [opts.commentIds]
|
||||
*/
|
||||
insert(str, opts = {}) {
|
||||
if (opts.tracking || opts.commentIds) {
|
||||
this.textOperation.push({ i: str, ...opts })
|
||||
} else {
|
||||
this.textOperation.push(str)
|
||||
}
|
||||
this.cursor += str.length
|
||||
this.docLength += str.length
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {number} length
|
||||
* @param {object} opts
|
||||
*/
|
||||
delete(length, opts = {}) {
|
||||
this.textOperation.push(-length)
|
||||
this.docLength -= length
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalize the current text operation and push it to the queue
|
||||
*
|
||||
* @param {object} [opts]
|
||||
* @param {string} [opts.contentHash]
|
||||
*/
|
||||
commitTextOperation(opts = {}) {
|
||||
if (this.textOperation.length > 0 && this.cursor < this.docLength) {
|
||||
this.retain(this.docLength - this.cursor)
|
||||
}
|
||||
if (this.textOperation.length > 0) {
|
||||
const operation = {
|
||||
pathname: this.pathname,
|
||||
textOperation: this.textOperation,
|
||||
}
|
||||
if (opts.contentHash != null) {
|
||||
operation.contentHash = opts.contentHash
|
||||
}
|
||||
this.operations.push(operation)
|
||||
this.textOperation = []
|
||||
}
|
||||
this.cursor = 0
|
||||
}
|
||||
|
||||
finish() {
|
||||
this.commitTextOperation()
|
||||
return this.operations
|
||||
}
|
||||
}
|
800
services/project-history/app/js/UpdatesProcessor.js
Normal file
800
services/project-history/app/js/UpdatesProcessor.js
Normal file
@@ -0,0 +1,800 @@
|
||||
import { promisify } from 'node:util'
|
||||
import logger from '@overleaf/logger'
|
||||
import async from 'async'
|
||||
import metrics from '@overleaf/metrics'
|
||||
import Settings from '@overleaf/settings'
|
||||
import OError from '@overleaf/o-error'
|
||||
import * as HistoryStoreManager from './HistoryStoreManager.js'
|
||||
import * as UpdateTranslator from './UpdateTranslator.js'
|
||||
import * as BlobManager from './BlobManager.js'
|
||||
import * as RedisManager from './RedisManager.js'
|
||||
import * as ErrorRecorder from './ErrorRecorder.js'
|
||||
import * as LockManager from './LockManager.js'
|
||||
import * as UpdateCompressor from './UpdateCompressor.js'
|
||||
import * as WebApiManager from './WebApiManager.js'
|
||||
import * as SyncManager from './SyncManager.js'
|
||||
import * as Versions from './Versions.js'
|
||||
import * as Errors from './Errors.js'
|
||||
import * as Metrics from './Metrics.js'
|
||||
import * as RetryManager from './RetryManager.js'
|
||||
import { Profiler } from './Profiler.js'
|
||||
|
||||
const keys = Settings.redis.lock.key_schema
|
||||
|
||||
export const REDIS_READ_BATCH_SIZE = 500
|
||||
|
||||
/**
|
||||
* Container for functions that need to be mocked in tests
|
||||
*
|
||||
* TODO: Rewrite tests in terms of exported functions only
|
||||
*/
|
||||
export const _mocks = {}
|
||||
|
||||
export function getRawUpdates(projectId, batchSize, callback) {
|
||||
RedisManager.getRawUpdatesBatch(projectId, batchSize, (error, batch) => {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
|
||||
let updates
|
||||
try {
|
||||
updates = RedisManager.parseDocUpdates(batch.rawUpdates)
|
||||
} catch (error) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
|
||||
_getHistoryId(projectId, updates, (error, historyId) => {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
HistoryStoreManager.getMostRecentChunk(
|
||||
projectId,
|
||||
historyId,
|
||||
(error, chunk) => {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
callback(null, { project_id: projectId, chunk, updates })
|
||||
}
|
||||
)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
// Trigger resync and start processing under lock to avoid other operations to
|
||||
// flush the resync updates.
|
||||
export function startResyncAndProcessUpdatesUnderLock(
|
||||
projectId,
|
||||
opts,
|
||||
callback
|
||||
) {
|
||||
const startTimeMs = Date.now()
|
||||
LockManager.runWithLock(
|
||||
keys.projectHistoryLock({ project_id: projectId }),
|
||||
(extendLock, releaseLock) => {
|
||||
SyncManager.startResyncWithoutLock(projectId, opts, err => {
|
||||
if (err) return callback(OError.tag(err))
|
||||
extendLock(err => {
|
||||
if (err) return callback(OError.tag(err))
|
||||
_countAndProcessUpdates(
|
||||
projectId,
|
||||
extendLock,
|
||||
REDIS_READ_BATCH_SIZE,
|
||||
releaseLock
|
||||
)
|
||||
})
|
||||
})
|
||||
},
|
||||
(flushError, queueSize) => {
|
||||
if (flushError) {
|
||||
OError.tag(flushError)
|
||||
ErrorRecorder.record(projectId, queueSize, flushError, recordError => {
|
||||
if (recordError) {
|
||||
logger.error(
|
||||
{ err: recordError, projectId },
|
||||
'failed to record error'
|
||||
)
|
||||
}
|
||||
callback(flushError)
|
||||
})
|
||||
} else {
|
||||
ErrorRecorder.clearError(projectId, clearError => {
|
||||
if (clearError) {
|
||||
logger.error(
|
||||
{ err: clearError, projectId },
|
||||
'failed to clear error'
|
||||
)
|
||||
}
|
||||
callback()
|
||||
})
|
||||
}
|
||||
if (queueSize > 0) {
|
||||
const duration = (Date.now() - startTimeMs) / 1000
|
||||
Metrics.historyFlushDurationSeconds.observe(duration)
|
||||
Metrics.historyFlushQueueSize.observe(queueSize)
|
||||
}
|
||||
// clear the timestamp in the background if the queue is now empty
|
||||
RedisManager.clearDanglingFirstOpTimestamp(projectId, () => {})
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
// Process all updates for a project, only check project-level information once
|
||||
export function processUpdatesForProject(projectId, callback) {
|
||||
const startTimeMs = Date.now()
|
||||
LockManager.runWithLock(
|
||||
keys.projectHistoryLock({ project_id: projectId }),
|
||||
(extendLock, releaseLock) => {
|
||||
_countAndProcessUpdates(
|
||||
projectId,
|
||||
extendLock,
|
||||
REDIS_READ_BATCH_SIZE,
|
||||
releaseLock
|
||||
)
|
||||
},
|
||||
(flushError, queueSize) => {
|
||||
if (flushError) {
|
||||
OError.tag(flushError)
|
||||
ErrorRecorder.record(
|
||||
projectId,
|
||||
queueSize,
|
||||
flushError,
|
||||
(recordError, failure) => {
|
||||
if (recordError) {
|
||||
logger.error(
|
||||
{ err: recordError, projectId },
|
||||
'failed to record error'
|
||||
)
|
||||
callback(recordError)
|
||||
} else if (
|
||||
RetryManager.isFirstFailure(failure) &&
|
||||
RetryManager.isHardFailure(failure)
|
||||
) {
|
||||
// This is the first failed flush since the last successful flush.
|
||||
// Immediately attempt a resync.
|
||||
logger.warn({ projectId }, 'Flush failed, attempting resync')
|
||||
resyncProject(projectId, callback)
|
||||
} else {
|
||||
callback(flushError)
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
ErrorRecorder.clearError(projectId, clearError => {
|
||||
if (clearError) {
|
||||
logger.error(
|
||||
{ err: clearError, projectId },
|
||||
'failed to clear error'
|
||||
)
|
||||
}
|
||||
callback()
|
||||
})
|
||||
}
|
||||
if (queueSize > 0) {
|
||||
const duration = (Date.now() - startTimeMs) / 1000
|
||||
Metrics.historyFlushDurationSeconds.observe(duration)
|
||||
Metrics.historyFlushQueueSize.observe(queueSize)
|
||||
}
|
||||
// clear the timestamp in the background if the queue is now empty
|
||||
RedisManager.clearDanglingFirstOpTimestamp(projectId, () => {})
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function resyncProject(projectId, callback) {
|
||||
SyncManager.startHardResync(projectId, {}, error => {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
// Flush the sync operations; this will not loop indefinitely
|
||||
// because any failure won't be the first failure anymore.
|
||||
LockManager.runWithLock(
|
||||
keys.projectHistoryLock({ project_id: projectId }),
|
||||
(extendLock, releaseLock) => {
|
||||
_countAndProcessUpdates(
|
||||
projectId,
|
||||
extendLock,
|
||||
REDIS_READ_BATCH_SIZE,
|
||||
releaseLock
|
||||
)
|
||||
},
|
||||
(flushError, queueSize) => {
|
||||
if (flushError) {
|
||||
ErrorRecorder.record(
|
||||
projectId,
|
||||
queueSize,
|
||||
flushError,
|
||||
(recordError, failure) => {
|
||||
if (recordError) {
|
||||
logger.error(
|
||||
{ err: recordError, projectId },
|
||||
'failed to record error'
|
||||
)
|
||||
callback(OError.tag(recordError))
|
||||
} else {
|
||||
callback(OError.tag(flushError))
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
ErrorRecorder.clearError(projectId, clearError => {
|
||||
if (clearError) {
|
||||
logger.error(
|
||||
{ err: clearError, projectId },
|
||||
'failed to clear error'
|
||||
)
|
||||
}
|
||||
callback()
|
||||
})
|
||||
}
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
export function processUpdatesForProjectUsingBisect(
|
||||
projectId,
|
||||
amountToProcess,
|
||||
callback
|
||||
) {
|
||||
LockManager.runWithLock(
|
||||
keys.projectHistoryLock({ project_id: projectId }),
|
||||
(extendLock, releaseLock) => {
|
||||
_countAndProcessUpdates(
|
||||
projectId,
|
||||
extendLock,
|
||||
amountToProcess,
|
||||
releaseLock
|
||||
)
|
||||
},
|
||||
(flushError, queueSize) => {
|
||||
if (amountToProcess === 0 || queueSize === 0) {
|
||||
// no further processing possible
|
||||
if (flushError != null) {
|
||||
ErrorRecorder.record(
|
||||
projectId,
|
||||
queueSize,
|
||||
OError.tag(flushError),
|
||||
recordError => {
|
||||
if (recordError) {
|
||||
logger.error(
|
||||
{ err: recordError, projectId },
|
||||
'failed to record error'
|
||||
)
|
||||
}
|
||||
callback(flushError)
|
||||
}
|
||||
)
|
||||
} else {
|
||||
callback()
|
||||
}
|
||||
} else {
|
||||
if (flushError != null) {
|
||||
// decrease the batch size when we hit an error
|
||||
processUpdatesForProjectUsingBisect(
|
||||
projectId,
|
||||
Math.floor(amountToProcess / 2),
|
||||
callback
|
||||
)
|
||||
} else {
|
||||
// otherwise continue processing with the same batch size
|
||||
processUpdatesForProjectUsingBisect(
|
||||
projectId,
|
||||
amountToProcess,
|
||||
callback
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function processSingleUpdateForProject(projectId, callback) {
|
||||
LockManager.runWithLock(
|
||||
keys.projectHistoryLock({ project_id: projectId }),
|
||||
(
|
||||
extendLock,
|
||||
releaseLock // set the batch size to 1 for single-stepping
|
||||
) => {
|
||||
_countAndProcessUpdates(projectId, extendLock, 1, releaseLock)
|
||||
},
|
||||
(flushError, queueSize) => {
|
||||
// no need to clear the flush marker when single stepping
|
||||
// it will be cleared up on the next background flush if
|
||||
// the queue is empty
|
||||
if (flushError) {
|
||||
ErrorRecorder.record(projectId, queueSize, flushError, recordError => {
|
||||
if (recordError) {
|
||||
logger.error(
|
||||
{ err: recordError, projectId },
|
||||
'failed to record error'
|
||||
)
|
||||
}
|
||||
callback(flushError)
|
||||
})
|
||||
} else {
|
||||
ErrorRecorder.clearError(projectId, clearError => {
|
||||
if (clearError) {
|
||||
logger.error(
|
||||
{ err: clearError, projectId },
|
||||
'failed to clear error'
|
||||
)
|
||||
}
|
||||
callback()
|
||||
})
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
_mocks._countAndProcessUpdates = (
|
||||
projectId,
|
||||
extendLock,
|
||||
batchSize,
|
||||
callback
|
||||
) => {
|
||||
RedisManager.countUnprocessedUpdates(projectId, (error, queueSize) => {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
if (queueSize > 0) {
|
||||
logger.debug({ projectId, queueSize }, 'processing uncompressed updates')
|
||||
RedisManager.getUpdatesInBatches(
|
||||
projectId,
|
||||
batchSize,
|
||||
(updates, cb) => {
|
||||
_processUpdatesBatch(projectId, updates, extendLock, cb)
|
||||
},
|
||||
error => {
|
||||
// Unconventional callback signature. The caller needs the queue size
|
||||
// even when an error is thrown in order to record the queue size in
|
||||
// the projectHistoryFailures collection. We'll have to find another
|
||||
// way to achieve this when we promisify.
|
||||
callback(error, queueSize)
|
||||
}
|
||||
)
|
||||
} else {
|
||||
logger.debug({ projectId }, 'no updates to process')
|
||||
callback(null, queueSize)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function _countAndProcessUpdates(...args) {
|
||||
_mocks._countAndProcessUpdates(...args)
|
||||
}
|
||||
|
||||
function _processUpdatesBatch(projectId, updates, extendLock, callback) {
|
||||
// If the project doesn't have a history then we can bail out here
|
||||
_getHistoryId(projectId, updates, (error, historyId) => {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
|
||||
if (historyId == null) {
|
||||
logger.debug(
|
||||
{ projectId },
|
||||
'discarding updates as project does not use history'
|
||||
)
|
||||
return callback()
|
||||
}
|
||||
|
||||
_processUpdates(projectId, historyId, updates, extendLock, error => {
|
||||
if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
callback()
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
export function _getHistoryId(projectId, updates, callback) {
|
||||
let idFromUpdates = null
|
||||
|
||||
// check that all updates have the same history id
|
||||
for (const update of updates) {
|
||||
if (update.projectHistoryId != null) {
|
||||
if (idFromUpdates == null) {
|
||||
idFromUpdates = update.projectHistoryId.toString()
|
||||
} else if (idFromUpdates !== update.projectHistoryId.toString()) {
|
||||
metrics.inc('updates.batches.project-history-id.inconsistent-update')
|
||||
return callback(
|
||||
new OError('inconsistent project history id between updates', {
|
||||
projectId,
|
||||
idFromUpdates,
|
||||
currentId: update.projectHistoryId,
|
||||
})
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WebApiManager.getHistoryId(projectId, (error, idFromWeb) => {
|
||||
if (error != null && idFromUpdates != null) {
|
||||
// present only on updates
|
||||
// 404s from web are an error
|
||||
metrics.inc('updates.batches.project-history-id.from-updates')
|
||||
return callback(null, idFromUpdates)
|
||||
} else if (error != null) {
|
||||
return callback(OError.tag(error))
|
||||
}
|
||||
|
||||
if (idFromWeb == null && idFromUpdates == null) {
|
||||
// present on neither web nor updates
|
||||
callback(null, null)
|
||||
} else if (idFromWeb != null && idFromUpdates == null) {
|
||||
// present only on web
|
||||
metrics.inc('updates.batches.project-history-id.from-web')
|
||||
callback(null, idFromWeb)
|
||||
} else if (idFromWeb == null && idFromUpdates != null) {
|
||||
// present only on updates
|
||||
metrics.inc('updates.batches.project-history-id.from-updates')
|
||||
callback(null, idFromUpdates)
|
||||
} else if (idFromWeb.toString() !== idFromUpdates.toString()) {
|
||||
// inconsistent between web and updates
|
||||
metrics.inc('updates.batches.project-history-id.inconsistent-with-web')
|
||||
logger.warn(
|
||||
{
|
||||
projectId,
|
||||
idFromWeb,
|
||||
idFromUpdates,
|
||||
updates,
|
||||
},
|
||||
'inconsistent project history id between updates and web'
|
||||
)
|
||||
callback(
|
||||
new OError('inconsistent project history id between updates and web')
|
||||
)
|
||||
} else {
|
||||
// the same on web and updates
|
||||
metrics.inc('updates.batches.project-history-id.from-updates')
|
||||
callback(null, idFromWeb)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function _handleOpsOutOfOrderError(projectId, projectHistoryId, err, ...rest) {
|
||||
const adjustedLength = Math.max(rest.length, 1)
|
||||
const results = rest.slice(0, adjustedLength - 1)
|
||||
const callback = rest[adjustedLength - 1]
|
||||
ErrorRecorder.getFailureRecord(projectId, (error, failureRecord) => {
|
||||
if (error != null) {
|
||||
return callback(error)
|
||||
}
|
||||
// Bypass ops-out-of-order errors in the stored chunk when in forceDebug mode
|
||||
if (failureRecord != null && failureRecord.forceDebug === true) {
|
||||
logger.warn(
|
||||
{ err, projectId, projectHistoryId },
|
||||
'ops out of order in chunk, forced continue'
|
||||
)
|
||||
callback(null, ...results) // return results without error
|
||||
} else {
|
||||
callback(err, ...results)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
function _getMostRecentVersionWithDebug(projectId, projectHistoryId, callback) {
|
||||
HistoryStoreManager.getMostRecentVersion(
|
||||
projectId,
|
||||
projectHistoryId,
|
||||
(err, ...results) => {
|
||||
if (err instanceof Errors.OpsOutOfOrderError) {
|
||||
_handleOpsOutOfOrderError(
|
||||
projectId,
|
||||
projectHistoryId,
|
||||
err,
|
||||
...results,
|
||||
callback
|
||||
)
|
||||
} else {
|
||||
callback(err, ...results)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
export function _processUpdates(
|
||||
projectId,
|
||||
projectHistoryId,
|
||||
updates,
|
||||
extendLock,
|
||||
callback
|
||||
) {
|
||||
const profile = new Profiler('_processUpdates', {
|
||||
project_id: projectId,
|
||||
projectHistoryId,
|
||||
})
|
||||
// skip updates first if we're in a sync, we might not need to do anything else
|
||||
SyncManager.skipUpdatesDuringSync(
|
||||
projectId,
|
||||
updates,
|
||||
(error, filteredUpdates, newSyncState) => {
|
||||
profile.log('skipUpdatesDuringSync')
|
||||
if (error != null) {
|
||||
return callback(error)
|
||||
}
|
||||
if (filteredUpdates.length === 0) {
|
||||
// return early if there are no updates to apply
|
||||
return SyncManager.setResyncState(projectId, newSyncState, callback)
|
||||
}
|
||||
// only make request to history service if we have actual updates to process
|
||||
_getMostRecentVersionWithDebug(
|
||||
projectId,
|
||||
projectHistoryId,
|
||||
(
|
||||
error,
|
||||
baseVersion,
|
||||
projectStructureAndDocVersions,
|
||||
_lastChange,
|
||||
mostRecentChunk
|
||||
) => {
|
||||
if (projectStructureAndDocVersions == null) {
|
||||
projectStructureAndDocVersions = { project: null, docs: {} }
|
||||
}
|
||||
profile.log('getMostRecentVersion')
|
||||
if (error != null) {
|
||||
return callback(error)
|
||||
}
|
||||
async.waterfall(
|
||||
[
|
||||
cb => {
|
||||
cb = profile.wrap('expandSyncUpdates', cb)
|
||||
SyncManager.expandSyncUpdates(
|
||||
projectId,
|
||||
projectHistoryId,
|
||||
mostRecentChunk,
|
||||
filteredUpdates,
|
||||
extendLock,
|
||||
cb
|
||||
)
|
||||
},
|
||||
(expandedUpdates, cb) => {
|
||||
let unappliedUpdates
|
||||
try {
|
||||
unappliedUpdates = _skipAlreadyAppliedUpdates(
|
||||
projectId,
|
||||
expandedUpdates,
|
||||
projectStructureAndDocVersions
|
||||
)
|
||||
} catch (err) {
|
||||
return cb(err)
|
||||
}
|
||||
profile.log('skipAlreadyAppliedUpdates')
|
||||
const compressedUpdates =
|
||||
UpdateCompressor.compressRawUpdates(unappliedUpdates)
|
||||
const timeTaken = profile
|
||||
.log('compressRawUpdates')
|
||||
.getTimeDelta()
|
||||
if (timeTaken >= 1000) {
|
||||
logger.debug(
|
||||
{ projectId, updates: unappliedUpdates, timeTaken },
|
||||
'slow compression of raw updates'
|
||||
)
|
||||
}
|
||||
cb = profile.wrap('createBlobs', cb)
|
||||
BlobManager.createBlobsForUpdates(
|
||||
projectId,
|
||||
projectHistoryId,
|
||||
compressedUpdates,
|
||||
extendLock,
|
||||
cb
|
||||
)
|
||||
},
|
||||
(updatesWithBlobs, cb) => {
|
||||
let changes
|
||||
try {
|
||||
changes = UpdateTranslator.convertToChanges(
|
||||
projectId,
|
||||
updatesWithBlobs
|
||||
).map(change => change.toRaw())
|
||||
} catch (err) {
|
||||
return cb(err)
|
||||
} finally {
|
||||
profile.log('convertToChanges')
|
||||
}
|
||||
cb(null, changes)
|
||||
},
|
||||
(changes, cb) => {
|
||||
let change
|
||||
const numChanges = changes.length
|
||||
const byteLength = Buffer.byteLength(
|
||||
JSON.stringify(changes),
|
||||
'utf8'
|
||||
)
|
||||
let numOperations = 0
|
||||
for (change of changes) {
|
||||
if (change.operations != null) {
|
||||
numOperations += change.operations.length
|
||||
}
|
||||
}
|
||||
|
||||
metrics.timing('history-store.request.changes', numChanges, 1)
|
||||
metrics.timing('history-store.request.bytes', byteLength, 1)
|
||||
metrics.timing(
|
||||
'history-store.request.operations',
|
||||
numOperations,
|
||||
1
|
||||
)
|
||||
|
||||
// thresholds taken from write_latex/main/lib/history_exporter.rb
|
||||
if (numChanges > 1000) {
|
||||
metrics.inc('history-store.request.exceeds-threshold.changes')
|
||||
}
|
||||
if (byteLength > Math.pow(1024, 2)) {
|
||||
metrics.inc('history-store.request.exceeds-threshold.bytes')
|
||||
const changeLengths = changes.map(change =>
|
||||
Buffer.byteLength(JSON.stringify(change), 'utf8')
|
||||
)
|
||||
logger.warn(
|
||||
{ projectId, byteLength, changeLengths },
|
||||
'change size exceeds limit'
|
||||
)
|
||||
}
|
||||
|
||||
cb = profile.wrap('sendChanges', cb)
|
||||
// this is usually the longest request, so extend the lock before starting it
|
||||
extendLock(error => {
|
||||
if (error != null) {
|
||||
return cb(error)
|
||||
}
|
||||
if (changes.length === 0) {
|
||||
return cb()
|
||||
} // avoid unnecessary requests to history service
|
||||
HistoryStoreManager.sendChanges(
|
||||
projectId,
|
||||
projectHistoryId,
|
||||
changes,
|
||||
baseVersion,
|
||||
cb
|
||||
)
|
||||
})
|
||||
},
|
||||
cb => {
|
||||
cb = profile.wrap('setResyncState', cb)
|
||||
SyncManager.setResyncState(projectId, newSyncState, cb)
|
||||
},
|
||||
],
|
||||
error => {
|
||||
profile.end()
|
||||
callback(error)
|
||||
}
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
_mocks._skipAlreadyAppliedUpdates = (
|
||||
projectId,
|
||||
updates,
|
||||
projectStructureAndDocVersions
|
||||
) => {
|
||||
function alreadySeenProjectVersion(previousProjectStructureVersion, update) {
|
||||
return (
|
||||
UpdateTranslator.isProjectStructureUpdate(update) &&
|
||||
previousProjectStructureVersion != null &&
|
||||
update.version != null &&
|
||||
Versions.gte(previousProjectStructureVersion, update.version)
|
||||
)
|
||||
}
|
||||
|
||||
function alreadySeenDocVersion(previousDocVersions, update) {
|
||||
if (UpdateTranslator.isTextUpdate(update) && update.v != null) {
|
||||
const docId = update.doc
|
||||
return (
|
||||
previousDocVersions[docId] != null &&
|
||||
previousDocVersions[docId].v != null &&
|
||||
Versions.gte(previousDocVersions[docId].v, update.v)
|
||||
)
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// check that the incoming updates are in the correct order (we do not
|
||||
// want to send out of order updates to the history service)
|
||||
let incomingProjectStructureVersion = null
|
||||
const incomingDocVersions = {}
|
||||
for (const update of updates) {
|
||||
if (alreadySeenProjectVersion(incomingProjectStructureVersion, update)) {
|
||||
logger.warn(
|
||||
{ projectId, update, incomingProjectStructureVersion },
|
||||
'incoming project structure updates are out of order'
|
||||
)
|
||||
throw new Errors.OpsOutOfOrderError(
|
||||
'project structure version out of order on incoming updates'
|
||||
)
|
||||
} else if (alreadySeenDocVersion(incomingDocVersions, update)) {
|
||||
logger.warn(
|
||||
{ projectId, update, incomingDocVersions },
|
||||
'incoming doc updates are out of order'
|
||||
)
|
||||
throw new Errors.OpsOutOfOrderError(
|
||||
'doc version out of order on incoming updates'
|
||||
)
|
||||
}
|
||||
// update the current project structure and doc versions
|
||||
if (UpdateTranslator.isProjectStructureUpdate(update)) {
|
||||
incomingProjectStructureVersion = update.version
|
||||
} else if (UpdateTranslator.isTextUpdate(update)) {
|
||||
incomingDocVersions[update.doc] = { v: update.v }
|
||||
}
|
||||
}
|
||||
|
||||
// discard updates already applied
|
||||
const updatesToApply = []
|
||||
const previousProjectStructureVersion = projectStructureAndDocVersions.project
|
||||
const previousDocVersions = projectStructureAndDocVersions.docs
|
||||
if (projectStructureAndDocVersions != null) {
|
||||
const updateProjectVersions = []
|
||||
for (const update of updates) {
|
||||
if (update != null && update.version != null) {
|
||||
updateProjectVersions.push(update.version)
|
||||
}
|
||||
}
|
||||
logger.debug(
|
||||
{ projectId, projectStructureAndDocVersions, updateProjectVersions },
|
||||
'comparing updates with existing project versions'
|
||||
)
|
||||
}
|
||||
for (const update of updates) {
|
||||
if (alreadySeenProjectVersion(previousProjectStructureVersion, update)) {
|
||||
metrics.inc('updates.discarded_project_structure_version')
|
||||
logger.debug(
|
||||
{ projectId, update, previousProjectStructureVersion },
|
||||
'discarding previously applied project structure update'
|
||||
)
|
||||
continue
|
||||
}
|
||||
if (alreadySeenDocVersion(previousDocVersions, update)) {
|
||||
metrics.inc('updates.discarded_doc_version')
|
||||
logger.debug(
|
||||
{ projectId, update, previousDocVersions },
|
||||
'discarding previously applied doc update'
|
||||
)
|
||||
continue
|
||||
}
|
||||
// remove non-BMP characters from resync updates that have bypassed the normal docupdater flow
|
||||
_sanitizeUpdate(update)
|
||||
// if all checks above are ok then accept the update
|
||||
updatesToApply.push(update)
|
||||
}
|
||||
|
||||
return updatesToApply
|
||||
}
|
||||
|
||||
export function _skipAlreadyAppliedUpdates(...args) {
|
||||
return _mocks._skipAlreadyAppliedUpdates(...args)
|
||||
}
|
||||
|
||||
function _sanitizeUpdate(update) {
|
||||
// adapted from docupdater's UpdateManager, we should clean these in docupdater
|
||||
// too but we already have queues with this problem so we will handle it here
|
||||
// too for robustness.
|
||||
// Replace high and low surrogate characters with 'replacement character' (\uFFFD)
|
||||
const removeBadChars = str => str.replace(/[\uD800-\uDFFF]/g, '\uFFFD')
|
||||
// clean up any bad chars in resync diffs
|
||||
if (update.op) {
|
||||
for (const op of update.op) {
|
||||
if (op.i != null) {
|
||||
op.i = removeBadChars(op.i)
|
||||
}
|
||||
}
|
||||
}
|
||||
// clean up any bad chars in resync new docs
|
||||
if (update.docLines != null) {
|
||||
update.docLines = removeBadChars(update.docLines)
|
||||
}
|
||||
return update
|
||||
}
|
||||
|
||||
export const promises = {
|
||||
/** @type {(projectId: string) => Promise<number>} */
|
||||
processUpdatesForProject: promisify(processUpdatesForProject),
|
||||
/** @type {(projectId: string, opts: any) => Promise<number>} */
|
||||
startResyncAndProcessUpdatesUnderLock: promisify(
|
||||
startResyncAndProcessUpdatesUnderLock
|
||||
),
|
||||
}
|
37
services/project-history/app/js/Utils.js
Normal file
37
services/project-history/app/js/Utils.js
Normal file
@@ -0,0 +1,37 @@
|
||||
// @ts-check
|
||||
|
||||
/**
|
||||
* @import { CommentOp, DeleteOp, InsertOp, Op, RetainOp } from './types'
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param {Op} op
|
||||
* @returns {op is InsertOp}
|
||||
*/
|
||||
export function isInsert(op) {
|
||||
return 'i' in op && op.i != null
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Op} op
|
||||
* @returns {op is RetainOp}
|
||||
*/
|
||||
export function isRetain(op) {
|
||||
return 'r' in op && op.r != null
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Op} op
|
||||
* @returns {op is DeleteOp}
|
||||
*/
|
||||
export function isDelete(op) {
|
||||
return 'd' in op && op.d != null
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Op} op
|
||||
* @returns {op is CommentOp}
|
||||
*/
|
||||
export function isComment(op) {
|
||||
return 'c' in op && op.c != null && 't' in op && op.t != null
|
||||
}
|
12
services/project-history/app/js/Validation.js
Normal file
12
services/project-history/app/js/Validation.js
Normal file
@@ -0,0 +1,12 @@
|
||||
import { celebrate, errors } from 'celebrate'
|
||||
|
||||
export { Joi } from 'celebrate'
|
||||
|
||||
export const errorMiddleware = errors()
|
||||
|
||||
/**
|
||||
* Validation middleware
|
||||
*/
|
||||
export function validate(schema) {
|
||||
return celebrate(schema, { allowUnknown: true })
|
||||
}
|
68
services/project-history/app/js/Versions.js
Normal file
68
services/project-history/app/js/Versions.js
Normal file
@@ -0,0 +1,68 @@
|
||||
/* eslint-disable
|
||||
no-unused-vars,
|
||||
*/
|
||||
// TODO: This file was created by bulk-decaffeinate.
|
||||
// Fix any style issues and re-enable lint.
|
||||
/*
|
||||
* decaffeinate suggestions:
|
||||
* DS101: Remove unnecessary use of Array.from
|
||||
* DS102: Remove unnecessary code created because of implicit returns
|
||||
* DS207: Consider shorter variations of null checks
|
||||
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
|
||||
*/
|
||||
// Compare Versions like 1.2 < 4.1
|
||||
|
||||
const convertToArray = v => Array.from(v.split('.')).map(x => parseInt(x, 10))
|
||||
|
||||
const cmp = function (v1, v2) {
|
||||
// allow comparison to work with integers
|
||||
if (typeof v1 === 'number' && typeof v2 === 'number') {
|
||||
if (v1 > v2) {
|
||||
return +1
|
||||
}
|
||||
if (v1 < v2) {
|
||||
return -1
|
||||
}
|
||||
// otherwise equal
|
||||
return 0
|
||||
}
|
||||
// comparison with strings
|
||||
v1 = convertToArray(v1)
|
||||
v2 = convertToArray(v2)
|
||||
while (v1.length || v2.length) {
|
||||
const [x, y] = Array.from([v1.shift(), v2.shift()])
|
||||
if (x > y) {
|
||||
return +1
|
||||
}
|
||||
if (x < y) {
|
||||
return -1
|
||||
}
|
||||
if (x != null && y == null) {
|
||||
return +1
|
||||
}
|
||||
if (x == null && y != null) {
|
||||
return -1
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
export function compare(v1, v2) {
|
||||
return cmp(v1, v2)
|
||||
}
|
||||
|
||||
export function gt(v1, v2) {
|
||||
return cmp(v1, v2) > 0
|
||||
}
|
||||
|
||||
export function lt(v1, v2) {
|
||||
return cmp(v1, v2) < 0
|
||||
}
|
||||
|
||||
export function gte(v1, v2) {
|
||||
return cmp(v1, v2) >= 0
|
||||
}
|
||||
|
||||
export function lte(v1, v2) {
|
||||
return cmp(v1, v2) <= 0
|
||||
}
|
112
services/project-history/app/js/WebApiManager.js
Normal file
112
services/project-history/app/js/WebApiManager.js
Normal file
@@ -0,0 +1,112 @@
|
||||
import { callbackify } from 'node:util'
|
||||
import { setTimeout } from 'node:timers/promises'
|
||||
import logger from '@overleaf/logger'
|
||||
import Metrics from '@overleaf/metrics'
|
||||
import Settings from '@overleaf/settings'
|
||||
import {
|
||||
fetchNothing,
|
||||
fetchJson,
|
||||
RequestFailedError,
|
||||
} from '@overleaf/fetch-utils'
|
||||
import * as Errors from './Errors.js'
|
||||
import * as RedisManager from './RedisManager.js'
|
||||
|
||||
let RETRY_TIMEOUT_MS = 5000
|
||||
|
||||
async function getHistoryId(projectId) {
|
||||
Metrics.inc('history_id_cache_requests_total')
|
||||
const cachedHistoryId =
|
||||
await RedisManager.promises.getCachedHistoryId(projectId)
|
||||
if (cachedHistoryId) {
|
||||
Metrics.inc('history_id_cache_hits_total')
|
||||
return cachedHistoryId
|
||||
} else {
|
||||
const project = await _getProjectDetails(projectId)
|
||||
const historyId =
|
||||
project.overleaf &&
|
||||
project.overleaf.history &&
|
||||
project.overleaf.history.id
|
||||
if (historyId != null) {
|
||||
await RedisManager.promises.setCachedHistoryId(projectId, historyId)
|
||||
}
|
||||
return historyId
|
||||
}
|
||||
}
|
||||
|
||||
async function requestResync(projectId, opts = {}) {
|
||||
try {
|
||||
const body = {}
|
||||
if (opts.historyRangesMigration) {
|
||||
body.historyRangesMigration = opts.historyRangesMigration
|
||||
}
|
||||
if (opts.resyncProjectStructureOnly) {
|
||||
body.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
|
||||
}
|
||||
await fetchNothing(
|
||||
`${Settings.apis.web.url}/project/${projectId}/history/resync`,
|
||||
{
|
||||
method: 'POST',
|
||||
signal: AbortSignal.timeout(6 * 60000),
|
||||
basicAuth: {
|
||||
user: Settings.apis.web.user,
|
||||
password: Settings.apis.web.pass,
|
||||
},
|
||||
json: body,
|
||||
}
|
||||
)
|
||||
} catch (err) {
|
||||
if (err instanceof RequestFailedError && err.response.status === 404) {
|
||||
throw new Errors.NotFoundError('got a 404 from web api').withCause(err)
|
||||
} else {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function _getProjectDetails(projectId, callback) {
|
||||
logger.debug({ projectId }, 'getting project details from web')
|
||||
let attempts = 0
|
||||
while (true) {
|
||||
attempts += 1
|
||||
try {
|
||||
return await fetchJson(
|
||||
`${Settings.apis.web.url}/project/${projectId}/details`,
|
||||
{
|
||||
signal: AbortSignal.timeout(16000),
|
||||
basicAuth: {
|
||||
user: Settings.apis.web.user,
|
||||
password: Settings.apis.web.pass,
|
||||
},
|
||||
}
|
||||
)
|
||||
} catch (err) {
|
||||
if (err instanceof RequestFailedError && err.response.status === 404) {
|
||||
throw new Errors.NotFoundError('got a 404 from web api').withCause(err)
|
||||
} else if (attempts < 2) {
|
||||
// retry after 5 seconds
|
||||
await setTimeout(RETRY_TIMEOUT_MS)
|
||||
} else {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adjust the retry timeout in tests
|
||||
*/
|
||||
export async function setRetryTimeoutMs(timeoutMs) {
|
||||
RETRY_TIMEOUT_MS = timeoutMs
|
||||
}
|
||||
|
||||
// EXPORTS
|
||||
|
||||
const getHistoryIdCb = callbackify(getHistoryId)
|
||||
const requestResyncCb = callbackify(requestResync)
|
||||
|
||||
export { getHistoryIdCb as getHistoryId, requestResyncCb as requestResync }
|
||||
|
||||
export const promises = {
|
||||
getHistoryId,
|
||||
requestResync,
|
||||
}
|
22
services/project-history/app/js/mongo-types.ts
Normal file
22
services/project-history/app/js/mongo-types.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import { ObjectId } from 'mongodb-legacy'
|
||||
|
||||
export type ProjectHistoryFailure = {
|
||||
_id: ObjectId
|
||||
project_id: string
|
||||
attempts: number
|
||||
resyncAttempts: number
|
||||
resyncStartedAt: Date
|
||||
requestCount?: number
|
||||
history: (ErrorRecord | SyncStartRecord)[]
|
||||
} & ErrorRecord
|
||||
|
||||
type ErrorRecord = {
|
||||
error: string
|
||||
stack: string
|
||||
queueSize: number
|
||||
ts: Date
|
||||
}
|
||||
|
||||
type SyncStartRecord = {
|
||||
resyncStartedAt: Date
|
||||
}
|
27
services/project-history/app/js/mongodb.js
Normal file
27
services/project-history/app/js/mongodb.js
Normal file
@@ -0,0 +1,27 @@
|
||||
import Metrics from '@overleaf/metrics'
|
||||
import Settings from '@overleaf/settings'
|
||||
import mongodb from 'mongodb-legacy'
|
||||
const { MongoClient, ObjectId } = mongodb
|
||||
|
||||
/**
|
||||
* @import { ProjectHistoryFailure } from './mongo-types.ts'
|
||||
*/
|
||||
|
||||
export { ObjectId }
|
||||
|
||||
export const mongoClient = new MongoClient(
|
||||
Settings.mongo.url,
|
||||
Settings.mongo.options
|
||||
)
|
||||
const mongoDb = mongoClient.db()
|
||||
|
||||
Metrics.mongodb.monitor(mongoClient)
|
||||
|
||||
export const db = {
|
||||
deletedProjects: mongoDb.collection('deletedProjects'),
|
||||
projects: mongoDb.collection('projects'),
|
||||
/** @type {mongodb.Collection<ProjectHistoryFailure>} */
|
||||
projectHistoryFailures: mongoDb.collection('projectHistoryFailures'),
|
||||
projectHistoryLabels: mongoDb.collection('projectHistoryLabels'),
|
||||
projectHistorySyncState: mongoDb.collection('projectHistorySyncState'),
|
||||
}
|
61
services/project-history/app/js/server.js
Normal file
61
services/project-history/app/js/server.js
Normal file
@@ -0,0 +1,61 @@
|
||||
import Metrics from '@overleaf/metrics'
|
||||
import logger from '@overleaf/logger'
|
||||
import express from 'express'
|
||||
import bodyParser from 'body-parser'
|
||||
import * as Errors from './Errors.js'
|
||||
import * as Router from './Router.js'
|
||||
import * as Validation from './Validation.js'
|
||||
|
||||
const HistoryLogger = logger.initialize('project-history').logger
|
||||
|
||||
Metrics.event_loop.monitor(logger)
|
||||
Metrics.memory.monitor(logger)
|
||||
Metrics.leaked_sockets.monitor(logger)
|
||||
Metrics.open_sockets.monitor()
|
||||
|
||||
// log updates as truncated strings
|
||||
function truncateFn(updates) {
|
||||
return JSON.parse(
|
||||
JSON.stringify(updates, function (key, value) {
|
||||
let len
|
||||
if (typeof value === 'string' && (len = value.length) > 80) {
|
||||
return (
|
||||
value.substr(0, 32) +
|
||||
`...(message of length ${len} truncated)...` +
|
||||
value.substr(-32)
|
||||
)
|
||||
} else {
|
||||
return value
|
||||
}
|
||||
})
|
||||
)
|
||||
}
|
||||
|
||||
HistoryLogger.addSerializers({
|
||||
rawUpdate: truncateFn,
|
||||
rawUpdates: truncateFn,
|
||||
newUpdates: truncateFn,
|
||||
lastUpdate: truncateFn,
|
||||
})
|
||||
|
||||
export const app = express()
|
||||
app.use(bodyParser.json())
|
||||
app.use(bodyParser.urlencoded({ extended: true }))
|
||||
app.use(Metrics.http.monitor(logger))
|
||||
Router.initialize(app)
|
||||
Metrics.injectMetricsRoute(app)
|
||||
app.use(Validation.errorMiddleware)
|
||||
app.use(function (error, req, res, next) {
|
||||
if (error instanceof Errors.NotFoundError) {
|
||||
res.sendStatus(404)
|
||||
} else if (error instanceof Errors.BadRequestError) {
|
||||
res.sendStatus(400)
|
||||
} else if (error instanceof Errors.InconsistentChunkError) {
|
||||
res.sendStatus(422)
|
||||
} else if (error instanceof Errors.TooManyRequestsError) {
|
||||
res.status(429).set('Retry-After', 300).end()
|
||||
} else {
|
||||
logger.error({ err: error, req }, error.message)
|
||||
res.status(500).json({ message: 'an internal error occurred' })
|
||||
}
|
||||
})
|
253
services/project-history/app/js/types.ts
Normal file
253
services/project-history/app/js/types.ts
Normal file
@@ -0,0 +1,253 @@
|
||||
import { HistoryRanges } from '../../../document-updater/app/js/types'
|
||||
import { LinkedFileData, RawOrigin } from 'overleaf-editor-core/lib/types'
|
||||
|
||||
export type Update =
|
||||
| TextUpdate
|
||||
| AddDocUpdate
|
||||
| AddFileUpdate
|
||||
| RenameUpdate
|
||||
| DeleteCommentUpdate
|
||||
| SetCommentStateUpdate
|
||||
| SetFileMetadataOperation
|
||||
| ResyncProjectStructureUpdate
|
||||
| ResyncDocContentUpdate
|
||||
|
||||
export type ProjectStructureUpdate =
|
||||
| AddDocUpdate
|
||||
| AddFileUpdate
|
||||
| RenameUpdate
|
||||
| SetFileMetadataOperation
|
||||
|
||||
export type UpdateMeta = {
|
||||
user_id: string
|
||||
ts: number
|
||||
source?: string
|
||||
type?: string
|
||||
origin?: RawOrigin
|
||||
tc?: string
|
||||
resync?: boolean
|
||||
}
|
||||
|
||||
export type TextUpdate = {
|
||||
doc: string
|
||||
op: Op[]
|
||||
v: number
|
||||
meta: UpdateMeta & {
|
||||
pathname: string
|
||||
doc_length: number
|
||||
doc_hash?: string
|
||||
history_doc_length?: number
|
||||
}
|
||||
}
|
||||
|
||||
export type SetCommentStateUpdate = {
|
||||
pathname: string
|
||||
commentId: string
|
||||
resolved: boolean
|
||||
meta: UpdateMeta
|
||||
}
|
||||
|
||||
export type SetFileMetadataOperation = {
|
||||
pathname: string
|
||||
meta: UpdateMeta
|
||||
metadata: LinkedFileData | object
|
||||
}
|
||||
|
||||
export type DeleteCommentUpdate = {
|
||||
pathname: string
|
||||
deleteComment: string
|
||||
meta: UpdateMeta
|
||||
}
|
||||
|
||||
type ProjectUpdateBase = {
|
||||
version: string
|
||||
projectHistoryId: string
|
||||
meta: UpdateMeta
|
||||
doc: string
|
||||
}
|
||||
|
||||
export type AddDocUpdate = ProjectUpdateBase & {
|
||||
pathname: string
|
||||
docLines: string
|
||||
ranges?: HistoryRanges
|
||||
}
|
||||
|
||||
export type AddFileUpdate = ProjectUpdateBase & {
|
||||
pathname: string
|
||||
file: string
|
||||
url: string
|
||||
hash: string
|
||||
createdBlob?: boolean
|
||||
metadata?: LinkedFileData
|
||||
}
|
||||
|
||||
export type RenameUpdate = ProjectUpdateBase & {
|
||||
pathname: string
|
||||
new_pathname: string
|
||||
}
|
||||
|
||||
export type ResyncProjectStructureUpdate = {
|
||||
resyncProjectStructure: {
|
||||
docs: Doc[]
|
||||
files: File[]
|
||||
}
|
||||
projectHistoryId: string
|
||||
meta: {
|
||||
ts: string
|
||||
}
|
||||
// optional fields for resyncProjectStructureOnly=true
|
||||
resyncProjectStructureOnly?: boolean
|
||||
_raw: string
|
||||
}
|
||||
|
||||
export type ResyncDocContentUpdate = {
|
||||
resyncDocContent: {
|
||||
content: string
|
||||
version: number
|
||||
ranges?: Ranges
|
||||
resolvedCommentIds?: string[]
|
||||
}
|
||||
projectHistoryId: string
|
||||
path: string
|
||||
doc: string
|
||||
meta: {
|
||||
ts: string
|
||||
}
|
||||
}
|
||||
|
||||
export type Op = RetainOp | InsertOp | DeleteOp | CommentOp
|
||||
|
||||
export type RetainOp = {
|
||||
r: string
|
||||
p: number
|
||||
hpos?: number
|
||||
tracking?: TrackingDirective
|
||||
}
|
||||
|
||||
export type InsertOp = {
|
||||
i: string
|
||||
p: number
|
||||
u?: boolean
|
||||
hpos?: number
|
||||
trackedDeleteRejection?: boolean
|
||||
commentIds?: string[]
|
||||
}
|
||||
|
||||
export type DeleteOp = {
|
||||
d: string
|
||||
p: number
|
||||
u?: boolean
|
||||
hpos?: number
|
||||
trackedChanges?: TrackedChangesInsideDelete[]
|
||||
}
|
||||
|
||||
export type TrackedChangesInsideDelete = {
|
||||
type: 'insert' | 'delete'
|
||||
offset: number
|
||||
length: number
|
||||
}
|
||||
|
||||
export type CommentOp = {
|
||||
c: string
|
||||
p: number
|
||||
t: string
|
||||
hpos?: number
|
||||
hlen?: number
|
||||
resolved?: boolean
|
||||
}
|
||||
|
||||
export type UpdateWithBlob<T extends Update = Update> = {
|
||||
update: T
|
||||
blobHashes: T extends AddDocUpdate | AddFileUpdate
|
||||
? {
|
||||
file: string
|
||||
ranges?: string
|
||||
}
|
||||
: never
|
||||
}
|
||||
|
||||
export type TrackingProps = {
|
||||
type: 'insert' | 'delete'
|
||||
userId: string
|
||||
ts: string
|
||||
}
|
||||
|
||||
export type TrackingDirective = TrackingProps | { type: 'none' }
|
||||
|
||||
export type TrackingType = 'insert' | 'delete' | 'none'
|
||||
|
||||
export type RawScanOp =
|
||||
| number
|
||||
| string
|
||||
| { r: number; tracking?: TrackingDirective }
|
||||
| { i: string; tracking?: TrackingProps; commentIds?: string[] }
|
||||
| { d: number }
|
||||
|
||||
export type TrackedChangeSnapshot = {
|
||||
op: {
|
||||
p: number
|
||||
} & ({ d: string } | { i: string })
|
||||
metadata: {
|
||||
ts: string
|
||||
user_id: string
|
||||
}
|
||||
}
|
||||
|
||||
export type CommentSnapshot = {
|
||||
op: {
|
||||
p: number
|
||||
t: string
|
||||
c: string
|
||||
resolved: boolean
|
||||
}
|
||||
}
|
||||
|
||||
export type RangesSnapshot = {
|
||||
changes: TrackedChangeSnapshot[]
|
||||
comments: CommentSnapshot[]
|
||||
}
|
||||
|
||||
export type Doc = {
|
||||
doc: string
|
||||
path: string
|
||||
}
|
||||
|
||||
export type File = {
|
||||
file: string
|
||||
url?: string
|
||||
path: string
|
||||
_hash?: string
|
||||
createdBlob?: boolean
|
||||
metadata?: LinkedFileData
|
||||
}
|
||||
|
||||
export type Entity = Doc | File
|
||||
|
||||
export type Ranges = {
|
||||
comments?: Comment[]
|
||||
changes?: TrackedChange[]
|
||||
}
|
||||
|
||||
export type Comment = {
|
||||
id: string
|
||||
op: CommentOp
|
||||
metadata: {
|
||||
user_id: string
|
||||
ts: string
|
||||
}
|
||||
}
|
||||
|
||||
export type TrackedChange = {
|
||||
id: string
|
||||
op: InsertOp | DeleteOp
|
||||
metadata: {
|
||||
user_id: string
|
||||
ts: string
|
||||
}
|
||||
}
|
||||
|
||||
export type TrackedChangeTransition = {
|
||||
pos: number
|
||||
tracking: TrackingDirective
|
||||
stage: 'persisted' | 'expected'
|
||||
}
|
Reference in New Issue
Block a user