first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,145 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DeleteQueueManager
const Settings = require('@overleaf/settings')
const RedisManager = require('./RedisManager')
const ProjectManager = require('./ProjectManager')
const logger = require('@overleaf/logger')
const metrics = require('./Metrics')
// Maintain a sorted set of project flushAndDelete requests, ordered by timestamp
// (ZADD), and process them from oldest to newest. A flushAndDelete request comes
// from real-time and is triggered when a user leaves a project.
//
// The aim is to remove the project from redis 5 minutes after the last request
// if there has been no activity (document updates) in that time. If there is
// activity we can expect a further flushAndDelete request when the editing user
// leaves the project.
//
// If a new flushAndDelete request comes in while an existing request is already
// in the queue we update the timestamp as we can postpone flushing further.
//
// Documents are processed by checking the queue, seeing if the first entry is
// older than 5 minutes, and popping it from the queue in that case.
module.exports = DeleteQueueManager = {
flushAndDeleteOldProjects(options, callback) {
const startTime = Date.now()
const cutoffTime =
startTime - options.min_delete_age + 100 * (Math.random() - 0.5)
let count = 0
const flushProjectIfNotModified = (projectId, flushTimestamp, cb) =>
ProjectManager.getProjectDocsTimestamps(
projectId,
function (err, timestamps) {
if (err != null) {
return callback(err)
}
if (timestamps.length === 0) {
logger.debug(
{ projectId },
'skipping flush of queued project - no timestamps'
)
return cb()
}
// are any of the timestamps newer than the time the project was flushed?
for (const timestamp of Array.from(timestamps)) {
if (timestamp > flushTimestamp) {
metrics.inc('queued-delete-skipped')
logger.debug(
{ projectId, timestamps, flushTimestamp },
'found newer timestamp, will skip delete'
)
return cb()
}
}
logger.debug({ projectId, flushTimestamp }, 'flushing queued project')
return ProjectManager.flushAndDeleteProjectWithLocks(
projectId,
{ skip_history_flush: false },
function (err) {
if (err != null) {
logger.err({ projectId, err }, 'error flushing queued project')
}
metrics.inc('queued-delete-completed')
return cb(null, true)
}
)
}
)
function flushNextProject() {
const now = Date.now()
if (now - startTime > options.timeout) {
logger.debug('hit time limit on flushing old projects')
return callback(null, count)
}
if (count > options.limit) {
logger.debug('hit count limit on flushing old projects')
return callback(null, count)
}
return RedisManager.getNextProjectToFlushAndDelete(
cutoffTime,
function (err, projectId, flushTimestamp, queueLength) {
if (err != null) {
return callback(err, count)
}
if (projectId == null) {
return callback(null, count)
}
logger.debug({ projectId, queueLength }, 'flushing queued project')
metrics.globalGauge('queued-flush-backlog', queueLength)
return flushProjectIfNotModified(
projectId,
flushTimestamp,
function (err, flushed) {
if (err) {
// Do not stop processing the queue in case the flush fails.
// Slowing down the processing can fill up redis.
metrics.inc('queued-delete-error')
}
if (flushed) {
count++
}
return flushNextProject()
}
)
}
)
}
return flushNextProject()
},
startBackgroundFlush() {
const SHORT_DELAY = 10
const LONG_DELAY = 1000
function doFlush() {
if (Settings.shuttingDown) {
logger.info('discontinuing background flush due to shutdown')
return
}
return DeleteQueueManager.flushAndDeleteOldProjects(
{
timeout: 1000,
min_delete_age: 3 * 60 * 1000,
limit: 1000, // high value, to ensure we always flush enough projects
},
(_err, flushed) =>
setTimeout(doFlush, flushed > 10 ? SHORT_DELAY : LONG_DELAY)
)
}
return doFlush()
},
}

View File

@@ -0,0 +1,40 @@
const DMP = require('diff-match-patch')
const dmp = new DMP()
// Do not attempt to produce a diff for more than 100ms
dmp.Diff_Timeout = 0.1
module.exports = {
ADDED: 1,
REMOVED: -1,
UNCHANGED: 0,
diffAsShareJsOp(before, after) {
const diffs = dmp.diff_main(before.join('\n'), after.join('\n'))
dmp.diff_cleanupSemantic(diffs)
const ops = []
let position = 0
for (const diff of diffs) {
const type = diff[0]
const content = diff[1]
if (type === this.ADDED) {
ops.push({
i: content,
p: position,
})
position += content.length
} else if (type === this.REMOVED) {
ops.push({
d: content,
p: position,
})
} else if (type === this.UNCHANGED) {
position += content.length
} else {
throw new Error('Unknown type')
}
}
return ops
},
}

View File

@@ -0,0 +1,112 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS202: Simplify dynamic range loops
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let DispatchManager
const Settings = require('@overleaf/settings')
const logger = require('@overleaf/logger')
const Keys = require('./UpdateKeys')
const redis = require('@overleaf/redis-wrapper')
const Errors = require('./Errors')
const _ = require('lodash')
const UpdateManager = require('./UpdateManager')
const Metrics = require('./Metrics')
const RateLimitManager = require('./RateLimitManager')
module.exports = DispatchManager = {
createDispatcher(RateLimiter, queueShardNumber) {
let pendingListKey
if (queueShardNumber === 0) {
pendingListKey = 'pending-updates-list'
} else {
pendingListKey = `pending-updates-list-${queueShardNumber}`
}
const client = redis.createClient(Settings.redis.documentupdater)
const worker = {
client,
_waitForUpdateThenDispatchWorker(callback) {
if (callback == null) {
callback = function () {}
}
const timer = new Metrics.Timer('worker.waiting')
return worker.client.blpop(pendingListKey, 0, function (error, result) {
logger.debug(`getting ${queueShardNumber}`, error, result)
timer.done()
if (error != null) {
return callback(error)
}
if (result == null) {
return callback()
}
const [listName, docKey] = Array.from(result)
const [projectId, docId] = Array.from(
Keys.splitProjectIdAndDocId(docKey)
)
// Dispatch this in the background
const backgroundTask = cb =>
UpdateManager.processOutstandingUpdatesWithLock(
projectId,
docId,
function (error) {
// log everything except OpRangeNotAvailable errors, these are normal
if (error != null) {
// downgrade OpRangeNotAvailable and "Delete component" errors so they are not sent to sentry
const logAsDebug =
error instanceof Errors.OpRangeNotAvailableError ||
error instanceof Errors.DeleteMismatchError
if (logAsDebug) {
logger.debug(
{ err: error, projectId, docId },
'error processing update'
)
} else {
logger.error(
{ err: error, projectId, docId },
'error processing update'
)
}
}
return cb()
}
)
return RateLimiter.run(backgroundTask, callback)
})
},
run() {
if (Settings.shuttingDown) {
return
}
return worker._waitForUpdateThenDispatchWorker(error => {
if (error != null) {
logger.error({ err: error }, 'Error in worker process')
throw error
} else {
return worker.run()
}
})
},
}
return worker
},
createAndStartDispatchers(number) {
const RateLimiter = new RateLimitManager(number)
_.times(number, function (shardNumber) {
return DispatchManager.createDispatcher(RateLimiter, shardNumber).run()
})
},
}

View File

@@ -0,0 +1,708 @@
const { callbackifyAll } = require('@overleaf/promise-utils')
const RedisManager = require('./RedisManager')
const ProjectHistoryRedisManager = require('./ProjectHistoryRedisManager')
const PersistenceManager = require('./PersistenceManager')
const DiffCodec = require('./DiffCodec')
const logger = require('@overleaf/logger')
const Metrics = require('./Metrics')
const HistoryManager = require('./HistoryManager')
const Errors = require('./Errors')
const RangesManager = require('./RangesManager')
const { extractOriginOrSource } = require('./Utils')
const { getTotalSizeOfLines } = require('./Limits')
const Settings = require('@overleaf/settings')
const MAX_UNFLUSHED_AGE = 300 * 1000 // 5 mins, document should be flushed to mongo this time after a change
const DocumentManager = {
async getDoc(projectId, docId) {
const {
lines,
version,
ranges,
resolvedCommentIds,
pathname,
projectHistoryId,
unflushedTime,
historyRangesSupport,
} = await RedisManager.promises.getDoc(projectId, docId)
if (lines == null || version == null) {
logger.debug(
{ projectId, docId },
'doc not in redis so getting from persistence API'
)
const {
lines,
version,
ranges,
resolvedCommentIds,
pathname,
projectHistoryId,
historyRangesSupport,
} = await PersistenceManager.promises.getDoc(projectId, docId)
logger.debug(
{
projectId,
docId,
lines,
ranges,
resolvedCommentIds,
version,
pathname,
projectHistoryId,
historyRangesSupport,
},
'got doc from persistence API'
)
await RedisManager.promises.putDocInMemory(
projectId,
docId,
lines,
version,
ranges,
resolvedCommentIds,
pathname,
projectHistoryId,
historyRangesSupport
)
return {
lines,
version,
ranges: ranges || {},
resolvedCommentIds,
pathname,
projectHistoryId,
unflushedTime: null,
alreadyLoaded: false,
historyRangesSupport,
}
} else {
return {
lines,
version,
ranges,
pathname,
projectHistoryId,
resolvedCommentIds,
unflushedTime,
alreadyLoaded: true,
historyRangesSupport,
}
}
},
async getDocAndRecentOps(projectId, docId, fromVersion) {
const { lines, version, ranges, pathname, projectHistoryId } =
await DocumentManager.getDoc(projectId, docId)
if (fromVersion === -1) {
return { lines, version, ops: [], ranges, pathname, projectHistoryId }
} else {
const ops = await RedisManager.promises.getPreviousDocOps(
docId,
fromVersion,
version
)
return {
lines,
version,
ops,
ranges,
pathname,
projectHistoryId,
}
}
},
async appendToDoc(projectId, docId, linesToAppend, originOrSource, userId) {
const { lines: currentLines } = await DocumentManager.getDoc(
projectId,
docId
)
const currentLineSize = getTotalSizeOfLines(currentLines)
const addedSize = getTotalSizeOfLines(linesToAppend)
const newlineSize = '\n'.length
if (currentLineSize + newlineSize + addedSize > Settings.max_doc_length) {
throw new Errors.FileTooLargeError(
'doc would become too large if appending this text'
)
}
return await DocumentManager.setDoc(
projectId,
docId,
currentLines.concat(linesToAppend),
originOrSource,
userId,
false,
false
)
},
async setDoc(
projectId,
docId,
newLines,
originOrSource,
userId,
undoing,
external
) {
if (newLines == null) {
throw new Error('No lines were provided to setDoc')
}
const UpdateManager = require('./UpdateManager')
const {
lines: oldLines,
version,
alreadyLoaded,
} = await DocumentManager.getDoc(projectId, docId)
if (oldLines != null && oldLines.length > 0 && oldLines[0].text != null) {
logger.debug(
{ docId, projectId, oldLines, newLines },
'document is JSON so not updating'
)
return
}
logger.debug(
{ docId, projectId, oldLines, newLines },
'setting a document via http'
)
const op = DiffCodec.diffAsShareJsOp(oldLines, newLines)
if (undoing) {
for (const o of op || []) {
o.u = true
} // Turn on undo flag for each op for track changes
}
const { origin, source } = extractOriginOrSource(originOrSource)
const update = {
doc: docId,
op,
v: version,
meta: {
user_id: userId,
},
}
if (external) {
update.meta.type = 'external'
}
if (origin) {
update.meta.origin = origin
} else if (source) {
update.meta.source = source
}
// Keep track of external updates, whether they are for live documents
// (flush) or unloaded documents (evict), and whether the update is a no-op.
Metrics.inc('external-update', 1, {
status: op.length > 0 ? 'diff' : 'noop',
method: alreadyLoaded ? 'flush' : 'evict',
path: source,
})
// Do not notify the frontend about a noop update.
// We still want to execute the code below
// to evict the doc if we loaded it into redis for
// this update, otherwise the doc would never be
// removed from redis.
if (op.length > 0) {
await UpdateManager.promises.applyUpdate(projectId, docId, update)
}
// If the document was loaded already, then someone has it open
// in a project, and the usual flushing mechanism will happen.
// Otherwise we should remove it immediately since nothing else
// is using it.
if (alreadyLoaded) {
return await DocumentManager.flushDocIfLoaded(projectId, docId)
} else {
try {
return await DocumentManager.flushAndDeleteDoc(projectId, docId, {})
} finally {
// There is no harm in flushing project history if the previous
// call failed and sometimes it is required
HistoryManager.flushProjectChangesAsync(projectId)
}
}
},
async flushDocIfLoaded(projectId, docId) {
const {
lines,
version,
ranges,
unflushedTime,
lastUpdatedAt,
lastUpdatedBy,
} = await RedisManager.promises.getDoc(projectId, docId)
if (lines == null || version == null) {
Metrics.inc('flush-doc-if-loaded', 1, { status: 'not-loaded' })
logger.debug({ projectId, docId }, 'doc is not loaded so not flushing')
// TODO: return a flag to bail out, as we go on to remove doc from memory?
return
} else if (unflushedTime == null) {
Metrics.inc('flush-doc-if-loaded', 1, { status: 'unmodified' })
logger.debug({ projectId, docId }, 'doc is not modified so not flushing')
return
}
logger.debug({ projectId, docId, version }, 'flushing doc')
Metrics.inc('flush-doc-if-loaded', 1, { status: 'modified' })
const result = await PersistenceManager.promises.setDoc(
projectId,
docId,
lines,
version,
ranges,
lastUpdatedAt,
lastUpdatedBy || null
)
await RedisManager.promises.clearUnflushedTime(docId)
return result
},
async flushAndDeleteDoc(projectId, docId, options) {
let result
try {
result = await DocumentManager.flushDocIfLoaded(projectId, docId)
} catch (error) {
if (options.ignoreFlushErrors) {
logger.warn(
{ projectId, docId, err: error },
'ignoring flush error while deleting document'
)
} else {
throw error
}
}
await RedisManager.promises.removeDocFromMemory(projectId, docId)
return result
},
async acceptChanges(projectId, docId, changeIds) {
if (changeIds == null) {
changeIds = []
}
const {
lines,
version,
ranges,
pathname,
projectHistoryId,
historyRangesSupport,
} = await DocumentManager.getDoc(projectId, docId)
if (lines == null || version == null) {
throw new Errors.NotFoundError(`document not found: ${docId}`)
}
const newRanges = RangesManager.acceptChanges(
projectId,
docId,
changeIds,
ranges,
lines
)
await RedisManager.promises.updateDocument(
projectId,
docId,
lines,
version,
[],
newRanges,
{}
)
if (historyRangesSupport) {
const historyUpdates = RangesManager.getHistoryUpdatesForAcceptedChanges({
docId,
acceptedChangeIds: changeIds,
changes: ranges.changes || [],
lines,
pathname,
projectHistoryId,
})
if (historyUpdates.length === 0) {
return
}
await ProjectHistoryRedisManager.promises.queueOps(
projectId,
...historyUpdates.map(op => JSON.stringify(op))
)
}
},
async updateCommentState(projectId, docId, commentId, userId, resolved) {
const { lines, version, pathname, historyRangesSupport } =
await DocumentManager.getDoc(projectId, docId)
if (lines == null || version == null) {
throw new Errors.NotFoundError(`document not found: ${docId}`)
}
if (historyRangesSupport) {
await RedisManager.promises.updateCommentState(docId, commentId, resolved)
await ProjectHistoryRedisManager.promises.queueOps(
projectId,
JSON.stringify({
pathname,
commentId,
resolved,
meta: {
ts: new Date(),
user_id: userId,
},
})
)
}
},
async getComment(projectId, docId, commentId) {
const { ranges } = await DocumentManager.getDoc(projectId, docId)
const comment = ranges?.comments?.find(comment => comment.id === commentId)
if (!comment) {
throw new Errors.NotFoundError({
message: 'comment not found',
info: { commentId },
})
}
return { comment }
},
async deleteComment(projectId, docId, commentId, userId) {
const { lines, version, ranges, pathname, historyRangesSupport } =
await DocumentManager.getDoc(projectId, docId)
if (lines == null || version == null) {
throw new Errors.NotFoundError(`document not found: ${docId}`)
}
const newRanges = RangesManager.deleteComment(commentId, ranges)
await RedisManager.promises.updateDocument(
projectId,
docId,
lines,
version,
[],
newRanges,
{}
)
if (historyRangesSupport) {
await RedisManager.promises.updateCommentState(docId, commentId, false)
await ProjectHistoryRedisManager.promises.queueOps(
projectId,
JSON.stringify({
pathname,
deleteComment: commentId,
meta: {
ts: new Date(),
user_id: userId,
},
})
)
}
},
async renameDoc(projectId, docId, userId, update, projectHistoryId) {
await RedisManager.promises.renameDoc(
projectId,
docId,
userId,
update,
projectHistoryId
)
},
async getDocAndFlushIfOld(projectId, docId) {
const { lines, version, unflushedTime, alreadyLoaded } =
await DocumentManager.getDoc(projectId, docId)
// if doc was already loaded see if it needs to be flushed
if (
alreadyLoaded &&
unflushedTime != null &&
Date.now() - unflushedTime > MAX_UNFLUSHED_AGE
) {
await DocumentManager.flushDocIfLoaded(projectId, docId)
}
return { lines, version }
},
async resyncDocContents(projectId, docId, path, opts = {}) {
logger.debug({ projectId, docId, path }, 'start resyncing doc contents')
let {
lines,
ranges,
resolvedCommentIds,
version,
projectHistoryId,
historyRangesSupport,
} = await RedisManager.promises.getDoc(projectId, docId)
// To avoid issues where the same docId appears with different paths,
// we use the path from the resyncProjectStructure update. If we used
// the path from the getDoc call to web then the two occurences of the
// docId would map to the same path, and this would be rejected by
// project-history as an unexpected resyncDocContent update.
if (lines == null || version == null) {
logger.debug(
{ projectId, docId },
'resyncing doc contents - not found in redis - retrieving from web'
)
;({
lines,
ranges,
resolvedCommentIds,
version,
projectHistoryId,
historyRangesSupport,
} = await PersistenceManager.promises.getDoc(projectId, docId, {
peek: true,
}))
} else {
logger.debug(
{ projectId, docId },
'resyncing doc contents - doc in redis - will queue in redis'
)
}
if (opts.historyRangesMigration) {
historyRangesSupport = opts.historyRangesMigration === 'forwards'
}
await ProjectHistoryRedisManager.promises.queueResyncDocContent(
projectId,
projectHistoryId,
docId,
lines,
ranges ?? {},
resolvedCommentIds,
version,
// use the path from the resyncProjectStructure update
path,
historyRangesSupport
)
if (opts.historyRangesMigration) {
await RedisManager.promises.setHistoryRangesSupportFlag(
docId,
historyRangesSupport
)
}
},
async getDocWithLock(projectId, docId) {
const UpdateManager = require('./UpdateManager')
return await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.getDoc,
projectId,
docId
)
},
async getCommentWithLock(projectId, docId, commentId) {
const UpdateManager = require('./UpdateManager')
return await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.getComment,
projectId,
docId,
commentId
)
},
async getDocAndRecentOpsWithLock(projectId, docId, fromVersion) {
const UpdateManager = require('./UpdateManager')
return await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.getDocAndRecentOps,
projectId,
docId,
fromVersion
)
},
async getDocAndFlushIfOldWithLock(projectId, docId) {
const UpdateManager = require('./UpdateManager')
return await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.getDocAndFlushIfOld,
projectId,
docId
)
},
async setDocWithLock(
projectId,
docId,
lines,
source,
userId,
undoing,
external
) {
const UpdateManager = require('./UpdateManager')
return await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.setDoc,
projectId,
docId,
lines,
source,
userId,
undoing,
external
)
},
async appendToDocWithLock(projectId, docId, lines, source, userId) {
const UpdateManager = require('./UpdateManager')
return await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.appendToDoc,
projectId,
docId,
lines,
source,
userId
)
},
async flushDocIfLoadedWithLock(projectId, docId) {
const UpdateManager = require('./UpdateManager')
return await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.flushDocIfLoaded,
projectId,
docId
)
},
async flushAndDeleteDocWithLock(projectId, docId, options) {
const UpdateManager = require('./UpdateManager')
return await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.flushAndDeleteDoc,
projectId,
docId,
options
)
},
async acceptChangesWithLock(projectId, docId, changeIds) {
const UpdateManager = require('./UpdateManager')
await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.acceptChanges,
projectId,
docId,
changeIds
)
},
async updateCommentStateWithLock(
projectId,
docId,
threadId,
userId,
resolved
) {
const UpdateManager = require('./UpdateManager')
await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.updateCommentState,
projectId,
docId,
threadId,
userId,
resolved
)
},
async deleteCommentWithLock(projectId, docId, threadId, userId) {
const UpdateManager = require('./UpdateManager')
await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.deleteComment,
projectId,
docId,
threadId,
userId
)
},
async renameDocWithLock(projectId, docId, userId, update, projectHistoryId) {
const UpdateManager = require('./UpdateManager')
await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.renameDoc,
projectId,
docId,
userId,
update,
projectHistoryId
)
},
async resyncDocContentsWithLock(projectId, docId, path, opts) {
const UpdateManager = require('./UpdateManager')
await UpdateManager.promises.lockUpdatesAndDo(
DocumentManager.resyncDocContents,
projectId,
docId,
path,
opts
)
},
}
module.exports = {
...callbackifyAll(DocumentManager, {
multiResult: {
getDoc: [
'lines',
'version',
'ranges',
'pathname',
'projectHistoryId',
'unflushedTime',
'alreadyLoaded',
'historyRangesSupport',
],
getDocWithLock: [
'lines',
'version',
'ranges',
'pathname',
'projectHistoryId',
'unflushedTime',
'alreadyLoaded',
'historyRangesSupport',
],
getDocAndFlushIfOld: ['lines', 'version'],
getDocAndFlushIfOldWithLock: ['lines', 'version'],
getDocAndRecentOps: [
'lines',
'version',
'ops',
'ranges',
'pathname',
'projectHistoryId',
],
getDocAndRecentOpsWithLock: [
'lines',
'version',
'ops',
'ranges',
'pathname',
'projectHistoryId',
],
getCommentWithLock: ['comment'],
},
}),
promises: DocumentManager,
}

View File

@@ -0,0 +1,15 @@
const OError = require('@overleaf/o-error')
class NotFoundError extends OError {}
class OpRangeNotAvailableError extends OError {}
class ProjectStateChangedError extends OError {}
class DeleteMismatchError extends OError {}
class FileTooLargeError extends OError {}
module.exports = {
NotFoundError,
OpRangeNotAvailableError,
ProjectStateChangedError,
DeleteMismatchError,
FileTooLargeError,
}

View File

@@ -0,0 +1,179 @@
// @ts-check
const _ = require('lodash')
const { isDelete } = require('./Utils')
/**
* @import { Comment, HistoryComment, HistoryRanges, HistoryTrackedChange } from './types'
* @import { Ranges, TrackedChange } from './types'
*/
/**
* Convert editor ranges to history ranges
*
* @param {Ranges} ranges
* @return {HistoryRanges}
*/
function toHistoryRanges(ranges) {
const changes = ranges.changes ?? []
const comments = (ranges.comments ?? []).slice()
// Changes are assumed to be sorted, but not comments
comments.sort((a, b) => a.op.p - b.op.p)
/**
* This will allow us to go through comments at a different pace as we loop
* through tracked changes
*/
const commentsIterator = new CommentsIterator(comments)
/**
* Current offset between editor pos and history pos
*/
let offset = 0
/**
* History comments that might overlap with the tracked change considered
*
* @type {HistoryComment[]}
*/
let pendingComments = []
/**
* The final history comments generated
*
* @type {HistoryComment[]}
*/
const historyComments = []
/**
* The final history tracked changes generated
*
* @type {HistoryTrackedChange[]}
*/
const historyChanges = []
for (const change of changes) {
historyChanges.push(toHistoryChange(change, offset))
// After this point, we're only interested in tracked deletes
if (!isDelete(change.op)) {
continue
}
// Fill pendingComments with new comments that start before this tracked
// delete and might overlap
for (const comment of commentsIterator.nextComments(change.op.p)) {
pendingComments.push(toHistoryComment(comment, offset))
}
// Save comments that are fully before this tracked delete
const newPendingComments = []
for (const historyComment of pendingComments) {
const commentEnd = historyComment.op.p + historyComment.op.c.length
if (commentEnd <= change.op.p) {
historyComments.push(historyComment)
} else {
newPendingComments.push(historyComment)
}
}
pendingComments = newPendingComments
// The rest of pending comments overlap with this tracked change. Adjust
// their history length.
for (const historyComment of pendingComments) {
historyComment.op.hlen =
(historyComment.op.hlen ?? historyComment.op.c.length) +
change.op.d.length
}
// Adjust the offset
offset += change.op.d.length
}
// Save the last pending comments
for (const historyComment of pendingComments) {
historyComments.push(historyComment)
}
// Save any comments that came after the last tracked change
for (const comment of commentsIterator.nextComments()) {
historyComments.push(toHistoryComment(comment, offset))
}
const historyRanges = {}
if (historyComments.length > 0) {
historyRanges.comments = historyComments
}
if (historyChanges.length > 0) {
historyRanges.changes = historyChanges
}
return historyRanges
}
class CommentsIterator {
/**
* Build a CommentsIterator
*
* @param {Comment[]} comments
*/
constructor(comments) {
this.comments = comments
this.currentIndex = 0
}
/**
* Generator that returns the next comments to consider
*
* @param {number} beforePos - only return comments that start before this position
* @return {Iterable<Comment>}
*/
*nextComments(beforePos = Infinity) {
while (this.currentIndex < this.comments.length) {
const comment = this.comments[this.currentIndex]
if (comment.op.p < beforePos) {
yield comment
this.currentIndex += 1
} else {
return
}
}
}
}
/**
* Convert an editor tracked change into a history tracked change
*
* @param {TrackedChange} change
* @param {number} offset - how much the history change is ahead of the
* editor change
* @return {HistoryTrackedChange}
*/
function toHistoryChange(change, offset) {
/** @type {HistoryTrackedChange} */
const historyChange = _.cloneDeep(change)
if (offset > 0) {
historyChange.op.hpos = change.op.p + offset
}
return historyChange
}
/**
* Convert an editor comment into a history comment
*
* @param {Comment} comment
* @param {number} offset - how much the history comment is ahead of the
* editor comment
* @return {HistoryComment}
*/
function toHistoryComment(comment, offset) {
/** @type {HistoryComment} */
const historyComment = _.cloneDeep(comment)
if (offset > 0) {
historyComment.op.hpos = comment.op.p + offset
}
return historyComment
}
module.exports = {
toHistoryRanges,
}

View File

@@ -0,0 +1,143 @@
const async = require('async')
const logger = require('@overleaf/logger')
const { promisifyAll } = require('@overleaf/promise-utils')
const request = require('request')
const Settings = require('@overleaf/settings')
const ProjectHistoryRedisManager = require('./ProjectHistoryRedisManager')
const metrics = require('./Metrics')
const HistoryManager = {
// flush changes in the background
flushProjectChangesAsync(projectId) {
HistoryManager.flushProjectChanges(
projectId,
{ background: true },
function () {}
)
},
// flush changes and callback (for when we need to know the queue is flushed)
flushProjectChanges(projectId, options, callback) {
if (callback == null) {
callback = function () {}
}
if (options.skip_history_flush) {
logger.debug({ projectId }, 'skipping flush of project history')
return callback()
}
metrics.inc('history-flush', 1, { status: 'project-history' })
const url = `${Settings.apis.project_history.url}/project/${projectId}/flush`
const qs = {}
if (options.background) {
qs.background = true
} // pass on the background flush option if present
logger.debug({ projectId, url, qs }, 'flushing doc in project history api')
request.post({ url, qs }, function (error, res, body) {
if (error) {
logger.error({ error, projectId }, 'project history api request failed')
callback(error)
} else if (res.statusCode < 200 && res.statusCode >= 300) {
logger.error(
{ projectId },
`project history api returned a failure status code: ${res.statusCode}`
)
callback(error)
} else {
callback()
}
})
},
FLUSH_DOC_EVERY_N_OPS: 100,
FLUSH_PROJECT_EVERY_N_OPS: 500,
recordAndFlushHistoryOps(projectId, ops, projectOpsLength) {
if (ops == null) {
ops = []
}
if (ops.length === 0) {
return
}
// record updates for project history
if (
HistoryManager.shouldFlushHistoryOps(
projectOpsLength,
ops.length,
HistoryManager.FLUSH_PROJECT_EVERY_N_OPS
)
) {
// Do this in the background since it uses HTTP and so may be too
// slow to wait for when processing a doc update.
logger.debug(
{ projectOpsLength, projectId },
'flushing project history api'
)
HistoryManager.flushProjectChangesAsync(projectId)
}
},
shouldFlushHistoryOps(length, opsLength, threshold) {
if (!length) {
return false
} // don't flush unless we know the length
// We want to flush every 100 ops, i.e. 100, 200, 300, etc
// Find out which 'block' (i.e. 0-99, 100-199) we were in before and after pushing these
// ops. If we've changed, then we've gone over a multiple of 100 and should flush.
// (Most of the time, we will only hit 100 and then flushing will put us back to 0)
const previousLength = length - opsLength
const prevBlock = Math.floor(previousLength / threshold)
const newBlock = Math.floor(length / threshold)
return newBlock !== prevBlock
},
MAX_PARALLEL_REQUESTS: 4,
resyncProjectHistory(
projectId,
projectHistoryId,
docs,
files,
opts,
callback
) {
ProjectHistoryRedisManager.queueResyncProjectStructure(
projectId,
projectHistoryId,
docs,
files,
opts,
function (error) {
if (error) {
return callback(error)
}
if (opts.resyncProjectStructureOnly) return callback()
const DocumentManager = require('./DocumentManager')
const resyncDoc = (doc, cb) => {
DocumentManager.resyncDocContentsWithLock(
projectId,
doc.doc,
doc.path,
opts,
cb
)
}
async.eachLimit(
docs,
HistoryManager.MAX_PARALLEL_REQUESTS,
resyncDoc,
callback
)
}
)
},
}
module.exports = HistoryManager
module.exports.promises = promisifyAll(HistoryManager, {
without: [
'flushProjectChangesAsync',
'recordAndFlushHistoryOps',
'shouldFlushHistoryOps',
],
})

View File

@@ -0,0 +1,559 @@
const DocumentManager = require('./DocumentManager')
const HistoryManager = require('./HistoryManager')
const ProjectManager = require('./ProjectManager')
const RedisManager = require('./RedisManager')
const Errors = require('./Errors')
const logger = require('@overleaf/logger')
const Settings = require('@overleaf/settings')
const Metrics = require('./Metrics')
const DeleteQueueManager = require('./DeleteQueueManager')
const { getTotalSizeOfLines } = require('./Limits')
const async = require('async')
function getDoc(req, res, next) {
let fromVersion
const docId = req.params.doc_id
const projectId = req.params.project_id
logger.debug({ projectId, docId }, 'getting doc via http')
const timer = new Metrics.Timer('http.getDoc')
if (req.query.fromVersion != null) {
fromVersion = parseInt(req.query.fromVersion, 10)
} else {
fromVersion = -1
}
DocumentManager.getDocAndRecentOpsWithLock(
projectId,
docId,
fromVersion,
(error, lines, version, ops, ranges, pathname) => {
timer.done()
if (error) {
return next(error)
}
logger.debug({ projectId, docId }, 'got doc via http')
if (lines == null || version == null) {
return next(new Errors.NotFoundError('document not found'))
}
res.json({
id: docId,
lines,
version,
ops,
ranges,
pathname,
ttlInS: RedisManager.DOC_OPS_TTL,
})
}
)
}
function getComment(req, res, next) {
const docId = req.params.doc_id
const projectId = req.params.project_id
const commentId = req.params.comment_id
logger.debug({ projectId, docId, commentId }, 'getting comment via http')
DocumentManager.getCommentWithLock(
projectId,
docId,
commentId,
(error, comment) => {
if (error) {
return next(error)
}
if (comment == null) {
return next(new Errors.NotFoundError('comment not found'))
}
res.json(comment)
}
)
}
// return the doc from redis if present, but don't load it from mongo
function peekDoc(req, res, next) {
const docId = req.params.doc_id
const projectId = req.params.project_id
logger.debug({ projectId, docId }, 'peeking at doc via http')
RedisManager.getDoc(projectId, docId, function (error, lines, version) {
if (error) {
return next(error)
}
if (lines == null || version == null) {
return next(new Errors.NotFoundError('document not found'))
}
res.json({ id: docId, lines, version })
})
}
function getProjectDocsAndFlushIfOld(req, res, next) {
const projectId = req.params.project_id
const projectStateHash = req.query.state
// exclude is string of existing docs "id:version,id:version,..."
const excludeItems =
req.query.exclude != null ? req.query.exclude.split(',') : []
logger.debug({ projectId, exclude: excludeItems }, 'getting docs via http')
const timer = new Metrics.Timer('http.getAllDocs')
const excludeVersions = {}
for (const item of excludeItems) {
const [id, version] = item.split(':')
excludeVersions[id] = version
}
logger.debug(
{ projectId, projectStateHash, excludeVersions },
'excluding versions'
)
ProjectManager.getProjectDocsAndFlushIfOld(
projectId,
projectStateHash,
excludeVersions,
(error, result) => {
timer.done()
if (error instanceof Errors.ProjectStateChangedError) {
res.sendStatus(409) // conflict
} else if (error) {
next(error)
} else {
logger.debug(
{
projectId,
result: result.map(doc => `${doc._id}:${doc.v}`),
},
'got docs via http'
)
res.send(result)
}
}
)
}
function getProjectLastUpdatedAt(req, res, next) {
const projectId = req.params.project_id
ProjectManager.getProjectDocsTimestamps(projectId, (err, timestamps) => {
if (err) return next(err)
// Filter out nulls. This can happen when
// - docs get flushed between the listing and getting the individual docs ts
// - a doc flush failed half way (doc keys removed, project tracking not updated)
timestamps = timestamps.filter(ts => !!ts)
timestamps = timestamps.map(ts => parseInt(ts, 10))
timestamps.sort((a, b) => (a > b ? 1 : -1))
res.json({ lastUpdatedAt: timestamps.pop() })
})
}
function clearProjectState(req, res, next) {
const projectId = req.params.project_id
const timer = new Metrics.Timer('http.clearProjectState')
logger.debug({ projectId }, 'clearing project state via http')
ProjectManager.clearProjectState(projectId, error => {
timer.done()
if (error) {
next(error)
} else {
res.sendStatus(200)
}
})
}
function setDoc(req, res, next) {
const docId = req.params.doc_id
const projectId = req.params.project_id
const { lines, source, user_id: userId, undoing } = req.body
const lineSize = getTotalSizeOfLines(lines)
if (lineSize > Settings.max_doc_length) {
logger.warn(
{ projectId, docId, source, lineSize, userId },
'document too large, returning 406 response'
)
return res.sendStatus(406)
}
logger.debug(
{ projectId, docId, lines, source, userId, undoing },
'setting doc via http'
)
const timer = new Metrics.Timer('http.setDoc')
DocumentManager.setDocWithLock(
projectId,
docId,
lines,
source,
userId,
undoing,
true,
(error, result) => {
timer.done()
if (error) {
return next(error)
}
logger.debug({ projectId, docId }, 'set doc via http')
res.json(result)
}
)
}
function appendToDoc(req, res, next) {
const docId = req.params.doc_id
const projectId = req.params.project_id
const { lines, source, user_id: userId } = req.body
const timer = new Metrics.Timer('http.appendToDoc')
DocumentManager.appendToDocWithLock(
projectId,
docId,
lines,
source,
userId,
(error, result) => {
timer.done()
if (error instanceof Errors.FileTooLargeError) {
logger.warn('refusing to append to file, it would become too large')
return res.sendStatus(422)
}
if (error) {
return next(error)
}
logger.debug(
{ projectId, docId, lines, source, userId },
'appending to doc via http'
)
res.json(result)
}
)
}
function flushDocIfLoaded(req, res, next) {
const docId = req.params.doc_id
const projectId = req.params.project_id
logger.debug({ projectId, docId }, 'flushing doc via http')
const timer = new Metrics.Timer('http.flushDoc')
DocumentManager.flushDocIfLoadedWithLock(projectId, docId, error => {
timer.done()
if (error) {
return next(error)
}
logger.debug({ projectId, docId }, 'flushed doc via http')
res.sendStatus(204) // No Content
})
}
function deleteDoc(req, res, next) {
const docId = req.params.doc_id
const projectId = req.params.project_id
const ignoreFlushErrors = req.query.ignore_flush_errors === 'true'
const timer = new Metrics.Timer('http.deleteDoc')
logger.debug({ projectId, docId }, 'deleting doc via http')
DocumentManager.flushAndDeleteDocWithLock(
projectId,
docId,
{ ignoreFlushErrors },
error => {
timer.done()
// There is no harm in flushing project history if the previous call
// failed and sometimes it is required
HistoryManager.flushProjectChangesAsync(projectId)
if (error) {
return next(error)
}
logger.debug({ projectId, docId }, 'deleted doc via http')
res.sendStatus(204) // No Content
}
)
}
function flushProject(req, res, next) {
const projectId = req.params.project_id
logger.debug({ projectId }, 'flushing project via http')
const timer = new Metrics.Timer('http.flushProject')
ProjectManager.flushProjectWithLocks(projectId, error => {
timer.done()
if (error) {
return next(error)
}
logger.debug({ projectId }, 'flushed project via http')
res.sendStatus(204) // No Content
})
}
function deleteProject(req, res, next) {
const projectId = req.params.project_id
logger.debug({ projectId }, 'deleting project via http')
const options = {}
if (req.query.background) {
options.background = true
} // allow non-urgent flushes to be queued
if (req.query.shutdown) {
options.skip_history_flush = true
} // don't flush history when realtime shuts down
if (req.query.background) {
ProjectManager.queueFlushAndDeleteProject(projectId, error => {
if (error) {
return next(error)
}
logger.debug({ projectId }, 'queue delete of project via http')
res.sendStatus(204)
}) // No Content
} else {
const timer = new Metrics.Timer('http.deleteProject')
ProjectManager.flushAndDeleteProjectWithLocks(projectId, options, error => {
timer.done()
if (error) {
return next(error)
}
logger.debug({ projectId }, 'deleted project via http')
res.sendStatus(204) // No Content
})
}
}
function deleteMultipleProjects(req, res, next) {
const projectIds = req.body.project_ids || []
logger.debug({ projectIds }, 'deleting multiple projects via http')
async.eachSeries(
projectIds,
(projectId, cb) => {
logger.debug({ projectId }, 'queue delete of project via http')
ProjectManager.queueFlushAndDeleteProject(projectId, cb)
},
error => {
if (error) {
return next(error)
}
res.sendStatus(204) // No Content
}
)
}
function acceptChanges(req, res, next) {
const { project_id: projectId, doc_id: docId } = req.params
let changeIds = req.body.change_ids
if (changeIds == null) {
changeIds = [req.params.change_id]
}
logger.debug(
{ projectId, docId },
`accepting ${changeIds.length} changes via http`
)
const timer = new Metrics.Timer('http.acceptChanges')
DocumentManager.acceptChangesWithLock(projectId, docId, changeIds, error => {
timer.done()
if (error) {
return next(error)
}
logger.debug(
{ projectId, docId },
`accepted ${changeIds.length} changes via http`
)
res.sendStatus(204) // No Content
})
}
function resolveComment(req, res, next) {
const {
project_id: projectId,
doc_id: docId,
comment_id: commentId,
} = req.params
const userId = req.body.user_id
logger.debug({ projectId, docId, commentId }, 'resolving comment via http')
DocumentManager.updateCommentStateWithLock(
projectId,
docId,
commentId,
userId,
true,
error => {
if (error) {
return next(error)
}
logger.debug({ projectId, docId, commentId }, 'resolved comment via http')
res.sendStatus(204) // No Content
}
)
}
function reopenComment(req, res, next) {
const {
project_id: projectId,
doc_id: docId,
comment_id: commentId,
} = req.params
const userId = req.body.user_id
logger.debug({ projectId, docId, commentId }, 'reopening comment via http')
DocumentManager.updateCommentStateWithLock(
projectId,
docId,
commentId,
userId,
false,
error => {
if (error) {
return next(error)
}
logger.debug({ projectId, docId, commentId }, 'reopened comment via http')
res.sendStatus(204) // No Content
}
)
}
function deleteComment(req, res, next) {
const {
project_id: projectId,
doc_id: docId,
comment_id: commentId,
} = req.params
const userId = req.body.user_id
logger.debug({ projectId, docId, commentId }, 'deleting comment via http')
const timer = new Metrics.Timer('http.deleteComment')
DocumentManager.deleteCommentWithLock(
projectId,
docId,
commentId,
userId,
error => {
timer.done()
if (error) {
return next(error)
}
logger.debug({ projectId, docId, commentId }, 'deleted comment via http')
res.sendStatus(204) // No Content
}
)
}
function updateProject(req, res, next) {
const timer = new Metrics.Timer('http.updateProject')
const projectId = req.params.project_id
const { projectHistoryId, userId, updates = [], version, source } = req.body
logger.debug({ projectId, updates, version }, 'updating project via http')
ProjectManager.updateProjectWithLocks(
projectId,
projectHistoryId,
userId,
updates,
version,
source,
error => {
timer.done()
if (error) {
return next(error)
}
logger.debug({ projectId }, 'updated project via http')
res.sendStatus(204) // No Content
}
)
}
function resyncProjectHistory(req, res, next) {
const projectId = req.params.project_id
const {
projectHistoryId,
docs,
files,
historyRangesMigration,
resyncProjectStructureOnly,
} = req.body
logger.debug(
{ projectId, docs, files },
'queuing project history resync via http'
)
const opts = {}
if (historyRangesMigration) {
opts.historyRangesMigration = historyRangesMigration
}
if (resyncProjectStructureOnly) {
opts.resyncProjectStructureOnly = resyncProjectStructureOnly
}
HistoryManager.resyncProjectHistory(
projectId,
projectHistoryId,
docs,
files,
opts,
error => {
if (error) {
return next(error)
}
logger.debug({ projectId }, 'queued project history resync via http')
res.sendStatus(204)
}
)
}
function flushQueuedProjects(req, res, next) {
res.setTimeout(10 * 60 * 1000)
const options = {
limit: req.query.limit || 1000,
timeout: 5 * 60 * 1000,
min_delete_age: req.query.min_delete_age || 5 * 60 * 1000,
}
DeleteQueueManager.flushAndDeleteOldProjects(options, (err, flushed) => {
if (err) {
logger.err({ err }, 'error flushing old projects')
res.sendStatus(500)
} else {
logger.info({ flushed }, 'flush of queued projects completed')
res.send({ flushed })
}
})
}
/**
* Block a project from getting loaded in docupdater
*
* The project is blocked only if it's not already loaded in docupdater. The
* response indicates whether the project has been blocked or not.
*/
function blockProject(req, res, next) {
const projectId = req.params.project_id
RedisManager.blockProject(projectId, (err, blocked) => {
if (err) {
return next(err)
}
res.json({ blocked })
})
}
/**
* Unblock a project
*/
function unblockProject(req, res, next) {
const projectId = req.params.project_id
RedisManager.unblockProject(projectId, (err, wasBlocked) => {
if (err) {
return next(err)
}
res.json({ wasBlocked })
})
}
module.exports = {
getDoc,
peekDoc,
getProjectDocsAndFlushIfOld,
getProjectLastUpdatedAt,
clearProjectState,
appendToDoc,
setDoc,
flushDocIfLoaded,
deleteDoc,
flushProject,
deleteProject,
deleteMultipleProjects,
acceptChanges,
resolveComment,
reopenComment,
deleteComment,
updateProject,
resyncProjectHistory,
flushQueuedProjects,
blockProject,
unblockProject,
getComment,
}

View File

@@ -0,0 +1,31 @@
module.exports = {
// compute the total size of the document in chararacters, including newlines
getTotalSizeOfLines(lines) {
let size = 0
for (const line of lines) {
size += line.length + 1 // include the newline
}
return size
},
// check whether the total size of the document in characters exceeds the
// maxDocLength.
//
// The estimated size should be an upper bound on the true size, typically
// it will be the size of the JSON.stringified array of lines. If the
// estimated size is less than the maxDocLength then we know that the total
// size of lines will also be less than maxDocLength.
docIsTooLarge(estimatedSize, lines, maxDocLength) {
if (estimatedSize <= maxDocLength) {
return false // definitely under the limit, no need to calculate the total size
}
// calculate the total size, bailing out early if the size limit is reached
let size = 0
for (const line of lines) {
size += line.length + 1 // include the newline
if (size > maxDocLength) return true
}
// since we didn't hit the limit in the loop, the document is within the allowed length
return false
},
}

View File

@@ -0,0 +1,18 @@
const Settings = require('@overleaf/settings')
const redis = require('@overleaf/redis-wrapper')
const rclient = redis.createClient(Settings.redis.lock)
const keys = Settings.redis.lock.key_schema
const RedisLocker = require('@overleaf/redis-wrapper/RedisLocker')
module.exports = new RedisLocker({
rclient,
getKey(docId) {
return keys.blockingKey({ doc_id: docId })
},
wrapTimeoutError(err, docId) {
err.doc_id = docId
return err
},
metricsPrefix: 'doc',
lockTTLSeconds: Settings.redisLockTTLSeconds,
})

View File

@@ -0,0 +1,51 @@
/* eslint-disable
no-return-assign,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const _ = require('lodash')
const showLength = function (thing) {
if (thing != null ? thing.length : undefined) {
return thing.length
} else {
return thing
}
}
const showUpdateLength = function (update) {
if ((update != null ? update.op : undefined) instanceof Array) {
const copy = _.cloneDeep(update)
copy.op.forEach(function (element, index) {
if (element?.i?.length != null) {
copy.op[index].i = element.i.length
}
if (element?.d?.length != null) {
copy.op[index].d = element.d.length
}
if (element?.c?.length != null) {
return (copy.op[index].c = element.c.length)
}
})
return copy
} else {
return update
}
}
module.exports = {
// replace long values with their length
lines: showLength,
oldLines: showLength,
newLines: showLength,
docLines: showLength,
newDocLines: showLength,
ranges: showLength,
update: showUpdateLength,
}

View File

@@ -0,0 +1,3 @@
// TODO: This file was created by bulk-decaffeinate.
// Sanity-check the conversion and remove this comment.
module.exports = require('@overleaf/metrics')

View File

@@ -0,0 +1,196 @@
const { promisify } = require('node:util')
const { promisifyMultiResult } = require('@overleaf/promise-utils')
const Settings = require('@overleaf/settings')
const Errors = require('./Errors')
const Metrics = require('./Metrics')
const logger = require('@overleaf/logger')
const request = require('requestretry').defaults({
maxAttempts: 2,
retryDelay: 10,
})
// We have to be quick with HTTP calls because we're holding a lock that
// expires after 30 seconds. We can't let any errors in the rest of the stack
// hold us up, and need to bail out quickly if there is a problem.
const MAX_HTTP_REQUEST_LENGTH = 5000 // 5 seconds
function updateMetric(method, error, response) {
// find the status, with special handling for connection timeouts
// https://github.com/request/request#timeouts
let status
if (error && error.connect === true) {
status = `${error.code} (connect)`
} else if (error) {
status = error.code
} else if (response) {
status = response.statusCode
}
Metrics.inc(method, 1, { status })
if (error && error.attempts > 1) {
Metrics.inc(`${method}-retries`, 1, { status: 'error' })
}
if (response && response.attempts > 1) {
Metrics.inc(`${method}-retries`, 1, { status: 'success' })
}
}
function getDoc(projectId, docId, options = {}, _callback) {
const timer = new Metrics.Timer('persistenceManager.getDoc')
if (typeof options === 'function') {
_callback = options
options = {}
}
const callback = function (...args) {
timer.done()
_callback(...args)
}
const urlPath = `/project/${projectId}/doc/${docId}`
const requestParams = {
url: `${Settings.apis.web.url}${urlPath}`,
method: 'GET',
headers: {
accept: 'application/json',
},
auth: {
user: Settings.apis.web.user,
pass: Settings.apis.web.pass,
sendImmediately: true,
},
jar: false,
timeout: MAX_HTTP_REQUEST_LENGTH,
}
if (options.peek) {
requestParams.qs = { peek: 'true' }
}
request(requestParams, (error, res, body) => {
updateMetric('getDoc', error, res)
if (error) {
logger.error({ err: error, projectId, docId }, 'web API request failed')
return callback(new Error('error connecting to web API'))
}
if (res.statusCode >= 200 && res.statusCode < 300) {
try {
body = JSON.parse(body)
} catch (e) {
return callback(e)
}
if (body.lines == null) {
return callback(new Error('web API response had no doc lines'))
}
if (body.version == null) {
return callback(new Error('web API response had no valid doc version'))
}
if (body.pathname == null) {
return callback(new Error('web API response had no valid doc pathname'))
}
if (!body.pathname) {
logger.warn(
{ projectId, docId },
'missing pathname in PersistenceManager getDoc'
)
Metrics.inc('pathname', 1, {
path: 'PersistenceManager.getDoc',
status: body.pathname === '' ? 'zero-length' : 'undefined',
})
}
callback(
null,
body.lines,
body.version,
body.ranges,
body.pathname,
body.projectHistoryId?.toString(),
body.historyRangesSupport || false,
body.resolvedCommentIds || []
)
} else if (res.statusCode === 404) {
callback(new Errors.NotFoundError(`doc not not found: ${urlPath}`))
} else if (res.statusCode === 413) {
callback(
new Errors.FileTooLargeError(`doc exceeds maximum size: ${urlPath}`)
)
} else {
callback(
new Error(`error accessing web API: ${urlPath} ${res.statusCode}`)
)
}
})
}
function setDoc(
projectId,
docId,
lines,
version,
ranges,
lastUpdatedAt,
lastUpdatedBy,
_callback
) {
const timer = new Metrics.Timer('persistenceManager.setDoc')
const callback = function (...args) {
timer.done()
_callback(...args)
}
const urlPath = `/project/${projectId}/doc/${docId}`
request(
{
url: `${Settings.apis.web.url}${urlPath}`,
method: 'POST',
json: {
lines,
ranges,
version,
lastUpdatedBy,
lastUpdatedAt,
},
auth: {
user: Settings.apis.web.user,
pass: Settings.apis.web.pass,
sendImmediately: true,
},
jar: false,
timeout: MAX_HTTP_REQUEST_LENGTH,
},
(error, res, body) => {
updateMetric('setDoc', error, res)
if (error) {
logger.error({ err: error, projectId, docId }, 'web API request failed')
return callback(new Error('error connecting to web API'))
}
if (res.statusCode >= 200 && res.statusCode < 300) {
callback(null, body)
} else if (res.statusCode === 404) {
callback(new Errors.NotFoundError(`doc not not found: ${urlPath}`))
} else if (res.statusCode === 413) {
callback(
new Errors.FileTooLargeError(`doc exceeds maximum size: ${urlPath}`)
)
} else {
callback(
new Error(`error accessing web API: ${urlPath} ${res.statusCode}`)
)
}
}
)
}
module.exports = {
getDoc,
setDoc,
promises: {
getDoc: promisifyMultiResult(getDoc, [
'lines',
'version',
'ranges',
'pathname',
'projectHistoryId',
'historyRangesSupport',
'resolvedCommentIds',
]),
setDoc: promisify(setDoc),
},
}

View File

@@ -0,0 +1,68 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS206: Consider reworking classes to avoid initClass
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let Profiler
const Settings = require('@overleaf/settings')
const logger = require('@overleaf/logger')
const deltaMs = function (ta, tb) {
const nanoSeconds = (ta[0] - tb[0]) * 1e9 + (ta[1] - tb[1])
const milliSeconds = Math.floor(nanoSeconds * 1e-6)
return milliSeconds
}
module.exports = Profiler = (function () {
Profiler = class Profiler {
static initClass() {
this.prototype.LOG_CUTOFF_TIME = 15 * 1000
this.prototype.LOG_SYNC_CUTOFF_TIME = 1000
}
constructor(name, args) {
this.name = name
this.args = args
this.t0 = this.t = process.hrtime()
this.start = new Date()
this.updateTimes = []
this.totalSyncTime = 0
}
log(label, options = {}) {
const t1 = process.hrtime()
const dtMilliSec = deltaMs(t1, this.t)
this.t = t1
this.totalSyncTime += options.sync ? dtMilliSec : 0
this.updateTimes.push([label, dtMilliSec]) // timings in ms
return this // make it chainable
}
end(message) {
const totalTime = deltaMs(this.t, this.t0)
const exceedsCutoff = totalTime > this.LOG_CUTOFF_TIME
const exceedsSyncCutoff = this.totalSyncTime > this.LOG_SYNC_CUTOFF_TIME
if (exceedsCutoff || exceedsSyncCutoff) {
// log anything greater than cutoffs
const args = {}
for (const k in this.args) {
const v = this.args[k]
args[k] = v
}
args.updateTimes = this.updateTimes
args.start = this.start
args.end = new Date()
args.status = { exceedsCutoff, exceedsSyncCutoff }
logger.warn(args, this.name)
}
return totalTime
}
}
Profiler.initClass()
return Profiler
})()

View File

@@ -0,0 +1,139 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const request = require('request')
const Settings = require('@overleaf/settings')
const RedisManager = require('./RedisManager')
const { rclient } = RedisManager
const docUpdaterKeys = Settings.redis.documentupdater.key_schema
const async = require('async')
const ProjectManager = require('./ProjectManager')
const _ = require('lodash')
const logger = require('@overleaf/logger')
const { promisifyAll } = require('@overleaf/promise-utils')
const ProjectFlusher = {
// iterate over keys asynchronously using redis scan (non-blocking)
// handle all the cluster nodes or single redis server
_getKeys(pattern, limit, callback) {
const nodes = (typeof rclient.nodes === 'function'
? rclient.nodes('master')
: undefined) || [rclient]
const doKeyLookupForNode = (node, cb) =>
ProjectFlusher._getKeysFromNode(node, pattern, limit, cb)
return async.concatSeries(nodes, doKeyLookupForNode, callback)
},
_getKeysFromNode(node, pattern, limit, callback) {
if (limit == null) {
limit = 1000
}
let cursor = 0 // redis iterator
const keySet = {} // use hash to avoid duplicate results
const batchSize = limit != null ? Math.min(limit, 1000) : 1000
// scan over all keys looking for pattern
const doIteration = (
cb // avoid hitting redis too hard
) =>
node.scan(
cursor,
'MATCH',
pattern,
'COUNT',
batchSize,
function (error, reply) {
let keys
if (error != null) {
return callback(error)
}
;[cursor, keys] = Array.from(reply)
for (const key of Array.from(keys)) {
keySet[key] = true
}
keys = Object.keys(keySet)
const noResults = cursor === '0' // redis returns string results not numeric
const limitReached = limit != null && keys.length >= limit
if (noResults || limitReached) {
return callback(null, keys)
} else {
return setTimeout(doIteration, 10)
}
}
)
return doIteration()
},
// extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b
// or docsInProject:{57fd0b1f53a8396d22b2c24b} (for redis cluster)
_extractIds(keyList) {
const ids = (() => {
const result = []
for (const key of Array.from(keyList)) {
const m = key.match(/:\{?([0-9a-f]{24})\}?/) // extract object id
result.push(m[1])
}
return result
})()
return ids
},
flushAllProjects(options, callback) {
logger.info({ options }, 'flushing all projects')
return ProjectFlusher._getKeys(
docUpdaterKeys.docsInProject({ project_id: '*' }),
options.limit,
function (error, projectKeys) {
if (error != null) {
logger.err({ err: error }, 'error getting keys for flushing')
return callback(error)
}
const projectIds = ProjectFlusher._extractIds(projectKeys)
if (options.dryRun) {
return callback(null, projectIds)
}
const jobs = _.map(
projectIds,
projectId => cb =>
ProjectManager.flushAndDeleteProjectWithLocks(
projectId,
{ background: true },
cb
)
)
return async.parallelLimit(
async.reflectAll(jobs),
options.concurrency,
function (error, results) {
const success = []
const failure = []
_.each(results, function (result, i) {
if (result.error != null) {
return failure.push(projectIds[i])
} else {
return success.push(projectIds[i])
}
})
logger.info(
{ successCount: success.length, failureCount: failure.length },
'finished flushing all projects'
)
return callback(error, { success, failure })
}
)
}
)
},
}
module.exports = ProjectFlusher
module.exports.promises = promisifyAll(ProjectFlusher)

View File

@@ -0,0 +1,245 @@
// @ts-check
const Settings = require('@overleaf/settings')
const { callbackifyAll } = require('@overleaf/promise-utils')
const projectHistoryKeys = Settings.redis?.project_history?.key_schema
const rclient = require('@overleaf/redis-wrapper').createClient(
Settings.redis.project_history
)
const logger = require('@overleaf/logger')
const metrics = require('./Metrics')
const { docIsTooLarge } = require('./Limits')
const { addTrackedDeletesToContent, extractOriginOrSource } = require('./Utils')
const HistoryConversions = require('./HistoryConversions')
const OError = require('@overleaf/o-error')
/**
* @import { Ranges } from './types'
*/
const ProjectHistoryRedisManager = {
async queueOps(projectId, ...ops) {
// Record metric for ops pushed onto queue
for (const op of ops) {
metrics.summary('redis.projectHistoryOps', op.length, { status: 'push' })
}
// Make sure that this MULTI operation only operates on project
// specific keys, i.e. keys that have the project id in curly braces.
// The curly braces identify a hash key for Redis and ensures that
// the MULTI's operations are all done on the same node in a
// cluster environment.
const multi = rclient.multi()
// Push the ops onto the project history queue
multi.rpush(
projectHistoryKeys.projectHistoryOps({ project_id: projectId }),
...ops
)
// To record the age of the oldest op on the queue set a timestamp if not
// already present (SETNX).
multi.setnx(
projectHistoryKeys.projectHistoryFirstOpTimestamp({
project_id: projectId,
}),
Date.now()
)
const result = await multi.exec()
return result[0]
},
async queueRenameEntity(
projectId,
projectHistoryId,
entityType,
entityId,
userId,
projectUpdate,
originOrSource
) {
projectUpdate = {
pathname: projectUpdate.pathname,
new_pathname: projectUpdate.newPathname,
meta: {
user_id: userId,
ts: new Date(),
},
version: projectUpdate.version,
projectHistoryId,
}
projectUpdate[entityType] = entityId
const { origin, source } = extractOriginOrSource(originOrSource)
if (origin != null) {
projectUpdate.meta.origin = origin
if (origin.kind !== 'editor') {
projectUpdate.meta.type = 'external'
}
} else if (source != null) {
projectUpdate.meta.source = source
if (source !== 'editor') {
projectUpdate.meta.type = 'external'
}
}
logger.debug(
{ projectId, projectUpdate },
'queue rename operation to project-history'
)
const jsonUpdate = JSON.stringify(projectUpdate)
return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate)
},
async queueAddEntity(
projectId,
projectHistoryId,
entityType,
entityId,
userId,
projectUpdate,
originOrSource
) {
let docLines = projectUpdate.docLines
let ranges
if (projectUpdate.historyRangesSupport && projectUpdate.ranges) {
docLines = addTrackedDeletesToContent(
docLines,
projectUpdate.ranges.changes ?? []
)
ranges = HistoryConversions.toHistoryRanges(projectUpdate.ranges)
}
projectUpdate = {
pathname: projectUpdate.pathname,
docLines,
url: projectUpdate.url,
meta: {
user_id: userId,
ts: new Date(),
},
version: projectUpdate.version,
hash: projectUpdate.hash,
metadata: projectUpdate.metadata,
projectHistoryId,
createdBlob: projectUpdate.createdBlob ?? false,
}
if (ranges) {
projectUpdate.ranges = ranges
}
projectUpdate[entityType] = entityId
const { origin, source } = extractOriginOrSource(originOrSource)
if (origin != null) {
projectUpdate.meta.origin = origin
if (origin.kind !== 'editor') {
projectUpdate.meta.type = 'external'
}
} else if (source != null) {
projectUpdate.meta.source = source
if (source !== 'editor') {
projectUpdate.meta.type = 'external'
}
}
logger.debug(
{ projectId, projectUpdate },
'queue add operation to project-history'
)
const jsonUpdate = JSON.stringify(projectUpdate)
return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate)
},
async queueResyncProjectStructure(
projectId,
projectHistoryId,
docs,
files,
opts
) {
logger.debug({ projectId, docs, files }, 'queue project structure resync')
const projectUpdate = {
resyncProjectStructure: { docs, files },
projectHistoryId,
meta: {
ts: new Date(),
},
}
if (opts.resyncProjectStructureOnly) {
projectUpdate.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
}
const jsonUpdate = JSON.stringify(projectUpdate)
return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate)
},
/**
* Add a resync doc update to the project-history queue
*
* @param {string} projectId
* @param {string} projectHistoryId
* @param {string} docId
* @param {string[]} lines
* @param {Ranges} ranges
* @param {string[]} resolvedCommentIds
* @param {number} version
* @param {string} pathname
* @param {boolean} historyRangesSupport
* @return {Promise<number>} the number of ops added
*/
async queueResyncDocContent(
projectId,
projectHistoryId,
docId,
lines,
ranges,
resolvedCommentIds,
version,
pathname,
historyRangesSupport
) {
logger.debug(
{ projectId, docId, lines, version, pathname },
'queue doc content resync'
)
let content = lines.join('\n')
if (historyRangesSupport) {
content = addTrackedDeletesToContent(content, ranges.changes ?? [])
}
const projectUpdate = {
resyncDocContent: { content, version },
projectHistoryId,
path: pathname,
doc: docId,
meta: {
ts: new Date(),
},
}
if (historyRangesSupport) {
projectUpdate.resyncDocContent.ranges =
HistoryConversions.toHistoryRanges(ranges)
projectUpdate.resyncDocContent.resolvedCommentIds = resolvedCommentIds
}
const jsonUpdate = JSON.stringify(projectUpdate)
// Do an optimised size check on the docLines using the serialised
// project update length as an upper bound
const sizeBound = jsonUpdate.length
if (docIsTooLarge(sizeBound, lines, Settings.max_doc_length)) {
throw new OError(
'blocking resync doc content insert into project history queue: doc is too large',
{ projectId, docId, docSize: sizeBound }
)
}
return await ProjectHistoryRedisManager.queueOps(projectId, jsonUpdate)
},
}
module.exports = {
...callbackifyAll(ProjectHistoryRedisManager),
promises: ProjectHistoryRedisManager,
}

View File

@@ -0,0 +1,341 @@
const RedisManager = require('./RedisManager')
const ProjectHistoryRedisManager = require('./ProjectHistoryRedisManager')
const DocumentManager = require('./DocumentManager')
const HistoryManager = require('./HistoryManager')
const async = require('async')
const logger = require('@overleaf/logger')
const Metrics = require('./Metrics')
const Errors = require('./Errors')
const { promisifyAll } = require('@overleaf/promise-utils')
function flushProjectWithLocks(projectId, _callback) {
const timer = new Metrics.Timer('projectManager.flushProjectWithLocks')
const callback = function (...args) {
timer.done()
_callback(...args)
}
RedisManager.getDocIdsInProject(projectId, (error, docIds) => {
if (error) {
return callback(error)
}
const errors = []
const jobs = docIds.map(docId => callback => {
DocumentManager.flushDocIfLoadedWithLock(projectId, docId, error => {
if (error instanceof Errors.NotFoundError) {
logger.warn(
{ err: error, projectId, docId },
'found deleted doc when flushing'
)
callback()
} else if (error) {
logger.error({ err: error, projectId, docId }, 'error flushing doc')
errors.push(error)
callback()
} else {
callback()
}
})
})
logger.debug({ projectId, docIds }, 'flushing docs')
async.series(jobs, () => {
if (errors.length > 0) {
callback(new Error('Errors flushing docs. See log for details'))
} else {
callback(null)
}
})
})
}
function flushAndDeleteProjectWithLocks(projectId, options, _callback) {
const timer = new Metrics.Timer(
'projectManager.flushAndDeleteProjectWithLocks'
)
const callback = function (...args) {
timer.done()
_callback(...args)
}
RedisManager.getDocIdsInProject(projectId, (error, docIds) => {
if (error) {
return callback(error)
}
const errors = []
const jobs = docIds.map(docId => callback => {
DocumentManager.flushAndDeleteDocWithLock(projectId, docId, {}, error => {
if (error) {
logger.error({ err: error, projectId, docId }, 'error deleting doc')
errors.push(error)
}
callback()
})
})
logger.debug({ projectId, docIds }, 'deleting docs')
async.series(jobs, () =>
// When deleting the project here we want to ensure that project
// history is completely flushed because the project may be
// deleted in web after this call completes, and so further
// attempts to flush would fail after that.
HistoryManager.flushProjectChanges(projectId, options, error => {
if (errors.length > 0) {
callback(new Error('Errors deleting docs. See log for details'))
} else if (error) {
callback(error)
} else {
callback(null)
}
})
)
})
}
function queueFlushAndDeleteProject(projectId, callback) {
RedisManager.queueFlushAndDeleteProject(projectId, error => {
if (error) {
logger.error(
{ projectId, error },
'error adding project to flush and delete queue'
)
return callback(error)
}
Metrics.inc('queued-delete')
callback()
})
}
function getProjectDocsTimestamps(projectId, callback) {
RedisManager.getDocIdsInProject(projectId, (error, docIds) => {
if (error) {
return callback(error)
}
if (docIds.length === 0) {
return callback(null, [])
}
RedisManager.getDocTimestamps(docIds, (error, timestamps) => {
if (error) {
return callback(error)
}
callback(null, timestamps)
})
})
}
function getProjectDocsAndFlushIfOld(
projectId,
projectStateHash,
excludeVersions,
_callback
) {
const timer = new Metrics.Timer('projectManager.getProjectDocsAndFlushIfOld')
const callback = function (...args) {
timer.done()
_callback(...args)
}
RedisManager.checkOrSetProjectState(
projectId,
projectStateHash,
(error, projectStateChanged) => {
if (error) {
logger.error(
{ err: error, projectId },
'error getting/setting project state in getProjectDocsAndFlushIfOld'
)
return callback(error)
}
// we can't return docs if project structure has changed
if (projectStateChanged) {
return callback(
new Errors.ProjectStateChangedError('project state changed')
)
}
// project structure hasn't changed, return doc content from redis
RedisManager.getDocIdsInProject(projectId, (error, docIds) => {
if (error) {
logger.error(
{ err: error, projectId },
'error getting doc ids in getProjectDocs'
)
return callback(error)
}
// get the doc lines from redis
const jobs = docIds.map(docId => cb => {
DocumentManager.getDocAndFlushIfOldWithLock(
projectId,
docId,
(err, lines, version) => {
if (err) {
logger.error(
{ err, projectId, docId },
'error getting project doc lines in getProjectDocsAndFlushIfOld'
)
return cb(err)
}
const doc = { _id: docId, lines, v: version } // create a doc object to return
cb(null, doc)
}
)
})
async.series(jobs, (error, docs) => {
if (error) {
return callback(error)
}
callback(null, docs)
})
})
}
)
}
function clearProjectState(projectId, callback) {
RedisManager.clearProjectState(projectId, callback)
}
function updateProjectWithLocks(
projectId,
projectHistoryId,
userId,
updates,
projectVersion,
source,
_callback
) {
const timer = new Metrics.Timer('projectManager.updateProject')
const callback = function (...args) {
timer.done()
_callback(...args)
}
let projectSubversion = 0 // project versions can have multiple operations
let projectOpsLength = 0
function handleUpdate(update, cb) {
update.version = `${projectVersion}.${projectSubversion++}`
switch (update.type) {
case 'add-doc':
ProjectHistoryRedisManager.queueAddEntity(
projectId,
projectHistoryId,
'doc',
update.id,
userId,
update,
source,
(error, count) => {
projectOpsLength = count
cb(error)
}
)
break
case 'rename-doc':
if (!update.newPathname) {
// an empty newPathname signifies a delete, so there is no need to
// update the pathname in redis
ProjectHistoryRedisManager.queueRenameEntity(
projectId,
projectHistoryId,
'doc',
update.id,
userId,
update,
source,
(error, count) => {
projectOpsLength = count
cb(error)
}
)
} else {
// rename the doc in redis before queuing the update
DocumentManager.renameDocWithLock(
projectId,
update.id,
userId,
update,
projectHistoryId,
error => {
if (error) {
return cb(error)
}
ProjectHistoryRedisManager.queueRenameEntity(
projectId,
projectHistoryId,
'doc',
update.id,
userId,
update,
source,
(error, count) => {
projectOpsLength = count
cb(error)
}
)
}
)
}
break
case 'add-file':
ProjectHistoryRedisManager.queueAddEntity(
projectId,
projectHistoryId,
'file',
update.id,
userId,
update,
source,
(error, count) => {
projectOpsLength = count
cb(error)
}
)
break
case 'rename-file':
ProjectHistoryRedisManager.queueRenameEntity(
projectId,
projectHistoryId,
'file',
update.id,
userId,
update,
source,
(error, count) => {
projectOpsLength = count
cb(error)
}
)
break
default:
cb(new Error(`Unknown update type: ${update.type}`))
}
}
async.eachSeries(updates, handleUpdate, error => {
if (error) {
return callback(error)
}
if (
HistoryManager.shouldFlushHistoryOps(
projectOpsLength,
updates.length,
HistoryManager.FLUSH_PROJECT_EVERY_N_OPS
)
) {
HistoryManager.flushProjectChangesAsync(projectId)
}
callback()
})
}
module.exports = {
flushProjectWithLocks,
flushAndDeleteProjectWithLocks,
queueFlushAndDeleteProject,
getProjectDocsTimestamps,
getProjectDocsAndFlushIfOld,
clearProjectState,
updateProjectWithLocks,
}
module.exports.promises = promisifyAll(module.exports)

View File

@@ -0,0 +1,577 @@
// @ts-check
const RangesTracker = require('@overleaf/ranges-tracker')
const logger = require('@overleaf/logger')
const OError = require('@overleaf/o-error')
const Metrics = require('./Metrics')
const _ = require('lodash')
const { isInsert, isDelete, isComment, getDocLength } = require('./Utils')
/**
* @import { Comment, CommentOp, InsertOp, DeleteOp, HistoryOp, Op } from './types'
* @import { HistoryCommentOp, HistoryDeleteOp, HistoryInsertOp, HistoryRetainOp } from './types'
* @import { HistoryDeleteTrackedChange, HistoryUpdate, Ranges, TrackedChange, Update } from './types'
*/
const RANGE_DELTA_BUCKETS = [0, 1, 2, 3, 4, 5, 10, 20, 50]
const RangesManager = {
MAX_COMMENTS: 500,
MAX_CHANGES: 2000,
/**
* Apply an update to the given doc (lines and ranges) and return new ranges
*
* @param {string} projectId
* @param {string} docId
* @param {Ranges} ranges - ranges before the updates were applied
* @param {Update[]} updates
* @param {string[]} newDocLines - the document lines after the updates were applied
* @param {object} opts
* @param {boolean} [opts.historyRangesSupport] - whether history ranges support is enabled
* @returns {{ newRanges: Ranges, rangesWereCollapsed: boolean, historyUpdates: HistoryUpdate[] }}
*/
applyUpdate(projectId, docId, ranges, updates, newDocLines, opts = {}) {
if (ranges == null) {
ranges = {}
}
if (updates == null) {
updates = []
}
const { changes, comments } = _.cloneDeep(ranges)
const rangesTracker = new RangesTracker(changes, comments)
const [emptyRangeCountBefore, totalRangeCountBefore] =
RangesManager._emptyRangesCount(rangesTracker)
const historyUpdates = []
for (const update of updates) {
const trackingChanges = Boolean(update.meta?.tc)
rangesTracker.track_changes = trackingChanges
if (update.meta?.tc) {
rangesTracker.setIdSeed(update.meta.tc)
}
const historyOps = []
for (const op of update.op) {
let croppedCommentOps = []
if (opts.historyRangesSupport) {
historyOps.push(
getHistoryOp(op, rangesTracker.comments, rangesTracker.changes)
)
if (isDelete(op) && trackingChanges) {
// If a tracked delete overlaps a comment, the comment must be
// cropped. The extent of the cropping is calculated before the
// delete is applied, but the cropping operations are applied
// later, after the delete is applied.
croppedCommentOps = getCroppedCommentOps(op, rangesTracker.comments)
}
} else if (isInsert(op) || isDelete(op)) {
historyOps.push(op)
}
rangesTracker.applyOp(op, { user_id: update.meta?.user_id })
if (croppedCommentOps.length > 0) {
historyOps.push(
...croppedCommentOps.map(op =>
getHistoryOpForComment(op, rangesTracker.changes)
)
)
}
}
if (historyOps.length > 0) {
historyUpdates.push({ ...update, op: historyOps })
}
}
if (
rangesTracker.changes?.length > RangesManager.MAX_CHANGES ||
rangesTracker.comments?.length > RangesManager.MAX_COMMENTS
) {
throw new Error('too many comments or tracked changes')
}
try {
// This is a consistency check that all of our ranges and
// comments still match the corresponding text
rangesTracker.validate(newDocLines.join('\n'))
} catch (err) {
logger.error(
{ err, projectId, docId, newDocLines, updates },
'error validating ranges'
)
throw err
}
const [emptyRangeCountAfter, totalRangeCountAfter] =
RangesManager._emptyRangesCount(rangesTracker)
const rangesWereCollapsed =
emptyRangeCountAfter > emptyRangeCountBefore ||
totalRangeCountAfter + 1 < totalRangeCountBefore // also include the case where multiple ranges were removed
// monitor the change in range count, we may want to snapshot before large decreases
if (totalRangeCountAfter < totalRangeCountBefore) {
Metrics.histogram(
'range-delta',
totalRangeCountBefore - totalRangeCountAfter,
RANGE_DELTA_BUCKETS,
{ status_code: rangesWereCollapsed ? 'saved' : 'unsaved' }
)
}
const newRanges = RangesManager._getRanges(rangesTracker)
logger.debug(
{
projectId,
docId,
changesCount: newRanges.changes?.length,
commentsCount: newRanges.comments?.length,
rangesWereCollapsed,
},
'applied updates to ranges'
)
return { newRanges, rangesWereCollapsed, historyUpdates }
},
acceptChanges(projectId, docId, changeIds, ranges, lines) {
const { changes, comments } = ranges
logger.debug(`accepting ${changeIds.length} changes in ranges`)
const rangesTracker = new RangesTracker(changes, comments)
rangesTracker.removeChangeIds(changeIds)
const newRanges = RangesManager._getRanges(rangesTracker)
return newRanges
},
deleteComment(commentId, ranges) {
const { changes, comments } = ranges
logger.debug({ commentId }, 'deleting comment in ranges')
const rangesTracker = new RangesTracker(changes, comments)
rangesTracker.removeCommentId(commentId)
const newRanges = RangesManager._getRanges(rangesTracker)
return newRanges
},
/**
*
* @param {object} args
* @param {string} args.docId
* @param {string[]} args.acceptedChangeIds
* @param {TrackedChange[]} args.changes
* @param {string} args.pathname
* @param {string} args.projectHistoryId
* @param {string[]} args.lines
*/
getHistoryUpdatesForAcceptedChanges({
docId,
acceptedChangeIds,
changes,
pathname,
projectHistoryId,
lines,
}) {
/** @type {(change: TrackedChange) => boolean} */
const isAccepted = change => acceptedChangeIds.includes(change.id)
const historyOps = []
// Keep ops in order of offset, with deletes before inserts
const sortedChanges = changes.slice().sort(function (c1, c2) {
const result = c1.op.p - c2.op.p
if (result !== 0) {
return result
} else if (isInsert(c1.op) && isDelete(c2.op)) {
return 1
} else if (isDelete(c1.op) && isInsert(c2.op)) {
return -1
} else {
return 0
}
})
const docLength = getDocLength(lines)
let historyDocLength = docLength
for (const change of sortedChanges) {
if (isDelete(change.op)) {
historyDocLength += change.op.d.length
}
}
let unacceptedDeletes = 0
for (const change of sortedChanges) {
/** @type {HistoryOp | undefined} */
let op
if (isDelete(change.op)) {
if (isAccepted(change)) {
op = {
p: change.op.p,
d: change.op.d,
}
if (unacceptedDeletes > 0) {
op.hpos = op.p + unacceptedDeletes
}
} else {
unacceptedDeletes += change.op.d.length
}
} else if (isInsert(change.op)) {
if (isAccepted(change)) {
op = {
p: change.op.p,
r: change.op.i,
tracking: { type: 'none' },
}
if (unacceptedDeletes > 0) {
op.hpos = op.p + unacceptedDeletes
}
}
}
if (!op) {
continue
}
/** @type {HistoryUpdate} */
const historyOp = {
doc: docId,
op: [op],
meta: {
...change.metadata,
ts: Date.now(),
doc_length: docLength,
pathname,
},
}
if (projectHistoryId) {
historyOp.projectHistoryId = projectHistoryId
}
if (historyOp.meta && historyDocLength !== docLength) {
historyOp.meta.history_doc_length = historyDocLength
}
historyOps.push(historyOp)
if (isDelete(change.op) && isAccepted(change)) {
historyDocLength -= change.op.d.length
}
}
return historyOps
},
_getRanges(rangesTracker) {
// Return the minimal data structure needed, since most documents won't have any
// changes or comments
const response = {}
if (rangesTracker.changes != null && rangesTracker.changes.length > 0) {
response.changes = rangesTracker.changes
}
if (rangesTracker.comments != null && rangesTracker.comments.length > 0) {
response.comments = rangesTracker.comments
}
return response
},
_emptyRangesCount(ranges) {
let emptyCount = 0
let totalCount = 0
for (const comment of ranges.comments || []) {
totalCount++
if (comment.op.c === '') {
emptyCount++
}
}
for (const change of ranges.changes || []) {
totalCount++
if (change.op.i != null) {
if (change.op.i === '') {
emptyCount++
}
}
}
return [emptyCount, totalCount]
},
}
/**
* Calculate ops to be sent to the history system.
*
* @param {Op} op - the editor op
* @param {TrackedChange[]} changes - the list of tracked changes in the
* document before the op is applied. That list, coming from
* RangesTracker is ordered by position.
* @returns {HistoryOp}
*/
function getHistoryOp(op, comments, changes, opts = {}) {
if (isInsert(op)) {
return getHistoryOpForInsert(op, comments, changes)
} else if (isDelete(op)) {
return getHistoryOpForDelete(op, changes)
} else if (isComment(op)) {
return getHistoryOpForComment(op, changes)
} else {
throw new OError('Unrecognized op', { op })
}
}
/**
* Calculate history ops for an insert
*
* Inserts are moved forward by tracked deletes placed strictly before the
* op. When an insert is made at the same position as a tracked delete, the
* insert is placed before the tracked delete.
*
* We also add a commentIds property when inserts are made inside a comment.
* The current behaviour is to include the insert in the comment only if the
* insert is made strictly inside the comment. Inserts made at the edges are
* not included in the comment.
*
* @param {InsertOp} op
* @param {Comment[]} comments
* @param {TrackedChange[]} changes
* @returns {HistoryInsertOp}
*/
function getHistoryOpForInsert(op, comments, changes) {
let hpos = op.p
let trackedDeleteRejection = false
const commentIds = new Set()
for (const comment of comments) {
if (comment.op.p < op.p && op.p < comment.op.p + comment.op.c.length) {
// Insert is inside the comment; add the comment id
commentIds.add(comment.op.t)
}
}
// If it's determined that the op is a tracked delete rejection, we have to
// calculate its proper history position. If multiple tracked deletes are
// found at the same position as the insert, the tracked deletes that come
// before the tracked delete that was actually rejected offset the history
// position.
let trackedDeleteRejectionOffset = 0
for (const change of changes) {
if (!isDelete(change.op)) {
// We're only interested in tracked deletes
continue
}
if (change.op.p < op.p) {
// Tracked delete is before the op. Move the op forward.
hpos += change.op.d.length
} else if (change.op.p === op.p) {
// Tracked delete is at the same position as the op.
if (op.u && change.op.d.startsWith(op.i)) {
// We're undoing and the insert matches the start of the tracked
// delete. RangesManager treats this as a tracked delete rejection. We
// will note this in the op so that project-history can take the
// appropriate action.
trackedDeleteRejection = true
// The history must be updated to take into account all preceding
// tracked deletes at the same position
hpos += trackedDeleteRejectionOffset
// No need to continue. All subsequent tracked deletes are after the
// insert.
break
} else {
// This tracked delete does not match the insert. Note its length in
// case we find a tracked delete that matches later.
trackedDeleteRejectionOffset += change.op.d.length
}
} else {
// Tracked delete is after the insert. Tracked deletes are ordered, so
// we know that all subsequent tracked deletes will be after the insert
// and we can bail out.
break
}
}
/** @type {HistoryInsertOp} */
const historyOp = { ...op }
if (commentIds.size > 0) {
historyOp.commentIds = Array.from(commentIds)
}
if (hpos !== op.p) {
historyOp.hpos = hpos
}
if (trackedDeleteRejection) {
historyOp.trackedDeleteRejection = true
}
return historyOp
}
/**
* Calculate history op for a delete
*
* Deletes are moved forward by tracked deletes placed before or at the position of the
* op. If a tracked delete is inside the delete, the delete is split in parts
* so that characters are deleted around the tracked delete, but the tracked
* delete itself is not deleted.
*
* @param {DeleteOp} op
* @param {TrackedChange[]} changes
* @returns {HistoryDeleteOp}
*/
function getHistoryOpForDelete(op, changes, opts = {}) {
let hpos = op.p
const opEnd = op.p + op.d.length
/** @type HistoryDeleteTrackedChange[] */
const changesInsideDelete = []
for (const change of changes) {
if (change.op.p <= op.p) {
if (isDelete(change.op)) {
// Tracked delete is before or at the position of the incoming delete.
// Move the op forward.
hpos += change.op.d.length
} else if (isInsert(change.op)) {
const changeEnd = change.op.p + change.op.i.length
const endPos = Math.min(changeEnd, opEnd)
if (endPos > op.p) {
// Part of the tracked insert is inside the delete
changesInsideDelete.push({
type: 'insert',
offset: 0,
length: endPos - op.p,
})
}
}
} else if (change.op.p < op.p + op.d.length) {
// Tracked change inside the deleted text. Record it for the history system.
if (isDelete(change.op)) {
changesInsideDelete.push({
type: 'delete',
offset: change.op.p - op.p,
length: change.op.d.length,
})
} else if (isInsert(change.op)) {
changesInsideDelete.push({
type: 'insert',
offset: change.op.p - op.p,
length: Math.min(change.op.i.length, opEnd - change.op.p),
})
}
} else {
// We've seen all tracked changes before or inside the delete
break
}
}
/** @type {HistoryDeleteOp} */
const historyOp = { ...op }
if (hpos !== op.p) {
historyOp.hpos = hpos
}
if (changesInsideDelete.length > 0) {
historyOp.trackedChanges = changesInsideDelete
}
return historyOp
}
/**
* Calculate history ops for a comment
*
* Comments are moved forward by tracked deletes placed before or at the
* position of the op. If a tracked delete is inside the comment, the length of
* the comment is extended to include the tracked delete.
*
* @param {CommentOp} op
* @param {TrackedChange[]} changes
* @returns {HistoryCommentOp}
*/
function getHistoryOpForComment(op, changes) {
let hpos = op.p
let hlen = op.c.length
for (const change of changes) {
if (!isDelete(change.op)) {
// We're only interested in tracked deletes
continue
}
if (change.op.p <= op.p) {
// Tracked delete is before or at the position of the incoming comment.
// Move the op forward.
hpos += change.op.d.length
} else if (change.op.p < op.p + op.c.length) {
// Tracked comment inside the comment. Extend the length
hlen += change.op.d.length
} else {
// We've seen all tracked deletes before or inside the comment
break
}
}
/** @type {HistoryCommentOp} */
const historyOp = { ...op }
if (hpos !== op.p) {
historyOp.hpos = hpos
}
if (hlen !== op.c.length) {
historyOp.hlen = hlen
}
return historyOp
}
/**
* Return the ops necessary to properly crop comments when a tracked delete is
* received
*
* The editor treats a tracked delete as a proper delete and updates the
* comment range accordingly. The history doesn't do that and remembers the
* extent of the comment in the tracked delete. In order to keep the history
* consistent with the editor, we'll send ops that will crop the comment in
* the history.
*
* @param {DeleteOp} op
* @param {Comment[]} comments
* @returns {CommentOp[]}
*/
function getCroppedCommentOps(op, comments) {
const deleteStart = op.p
const deleteLength = op.d.length
const deleteEnd = deleteStart + deleteLength
/** @type {HistoryCommentOp[]} */
const historyCommentOps = []
for (const comment of comments) {
const commentStart = comment.op.p
const commentLength = comment.op.c.length
const commentEnd = commentStart + commentLength
if (deleteStart <= commentStart && deleteEnd > commentStart) {
// The comment overlaps the start of the comment or all of it.
const overlapLength = Math.min(deleteEnd, commentEnd) - commentStart
/** @type {CommentOp} */
const commentOp = {
p: deleteStart,
c: comment.op.c.slice(overlapLength),
t: comment.op.t,
}
if (comment.op.resolved) {
commentOp.resolved = true
}
historyCommentOps.push(commentOp)
} else if (
deleteStart > commentStart &&
deleteStart < commentEnd &&
deleteEnd >= commentEnd
) {
// The comment overlaps the end of the comment.
const overlapLength = commentEnd - deleteStart
/** @type {CommentOp} */
const commentOp = {
p: commentStart,
c: comment.op.c.slice(0, -overlapLength),
t: comment.op.t,
}
if (comment.op.resolved) {
commentOp.resolved = true
}
historyCommentOps.push(commentOp)
}
}
return historyCommentOps
}
module.exports = RangesManager

View File

@@ -0,0 +1,85 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let RateLimiter
const Settings = require('@overleaf/settings')
const logger = require('@overleaf/logger')
const Metrics = require('./Metrics')
module.exports = RateLimiter = class RateLimiter {
constructor(number) {
if (number == null) {
number = 10
}
this.ActiveWorkerCount = 0
this.CurrentWorkerLimit = number
this.BaseWorkerCount = number
}
_adjustLimitUp() {
this.CurrentWorkerLimit += 0.1 // allow target worker limit to increase gradually
return Metrics.gauge('currentLimit', Math.ceil(this.CurrentWorkerLimit))
}
_adjustLimitDown() {
this.CurrentWorkerLimit = Math.max(
this.BaseWorkerCount,
this.CurrentWorkerLimit * 0.9
)
logger.debug(
{ currentLimit: Math.ceil(this.CurrentWorkerLimit) },
'reducing rate limit'
)
return Metrics.gauge('currentLimit', Math.ceil(this.CurrentWorkerLimit))
}
_trackAndRun(task, callback) {
if (callback == null) {
callback = function () {}
}
this.ActiveWorkerCount++
Metrics.gauge('processingUpdates', this.ActiveWorkerCount)
return task(err => {
this.ActiveWorkerCount--
Metrics.gauge('processingUpdates', this.ActiveWorkerCount)
return callback(err)
})
}
run(task, callback) {
if (this.ActiveWorkerCount < this.CurrentWorkerLimit) {
// below the limit, just put the task in the background
this._trackAndRun(task, err => {
if (err) {
logger.error({ err }, 'error in background task')
}
})
callback() // return immediately
if (this.CurrentWorkerLimit > this.BaseWorkerCount) {
return this._adjustLimitDown()
}
} else {
logger.debug(
{
active: this.ActiveWorkerCount,
currentLimit: Math.ceil(this.CurrentWorkerLimit),
},
'hit rate limit'
)
return this._trackAndRun(task, err => {
if (err == null) {
this._adjustLimitUp()
} // don't increment rate limit if there was an error
return callback(err)
}) // only return after task completes
}
}
}

View File

@@ -0,0 +1,136 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const Settings = require('@overleaf/settings')
const { promisifyAll } = require('@overleaf/promise-utils')
const rclient = require('@overleaf/redis-wrapper').createClient(
Settings.redis.documentupdater
)
const pubsubClient = require('@overleaf/redis-wrapper').createClient(
Settings.redis.pubsub
)
const Keys = Settings.redis.documentupdater.key_schema
const logger = require('@overleaf/logger')
const os = require('node:os')
const crypto = require('node:crypto')
const metrics = require('./Metrics')
const HOST = os.hostname()
const RND = crypto.randomBytes(4).toString('hex') // generate a random key for this process
let COUNT = 0
const MAX_OPS_PER_ITERATION = 8 // process a limited number of ops for safety
const RealTimeRedisManager = {
getPendingUpdatesForDoc(docId, callback) {
// Make sure that this MULTI operation only operates on doc
// specific keys, i.e. keys that have the doc id in curly braces.
// The curly braces identify a hash key for Redis and ensures that
// the MULTI's operations are all done on the same node in a
// cluster environment.
const multi = rclient.multi()
multi.llen(Keys.pendingUpdates({ doc_id: docId }))
multi.lrange(
Keys.pendingUpdates({ doc_id: docId }),
0,
MAX_OPS_PER_ITERATION - 1
)
multi.ltrim(
Keys.pendingUpdates({ doc_id: docId }),
MAX_OPS_PER_ITERATION,
-1
)
return multi.exec(function (error, replys) {
if (error != null) {
return callback(error)
}
const [llen, jsonUpdates, _trimResult] = replys
metrics.histogram(
'redis.pendingUpdates.llen',
llen,
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 50, 75, 100]
)
for (const jsonUpdate of jsonUpdates) {
// record metric for each update removed from queue
metrics.summary('redis.pendingUpdates', jsonUpdate.length, {
status: 'pop',
})
}
const updates = []
for (const jsonUpdate of jsonUpdates) {
let update
try {
update = JSON.parse(jsonUpdate)
} catch (e) {
return callback(e)
}
updates.push(update)
}
return callback(error, updates)
})
},
getUpdatesLength(docId, callback) {
return rclient.llen(Keys.pendingUpdates({ doc_id: docId }), callback)
},
sendCanaryAppliedOp({ projectId, docId, op }) {
const ack = JSON.stringify({ v: op.v, doc: docId }).length
// Updates with op.dup===true will not get sent to other clients, they only get acked.
const broadcast = op.dup ? 0 : JSON.stringify(op).length
const payload = JSON.stringify({
message: 'canary-applied-op',
payload: {
ack,
broadcast,
docId,
projectId,
source: op.meta.source,
},
})
// Publish on the editor-events channel of the project as real-time already listens to that before completing the connection startup.
// publish on separate channels for individual projects and docs when
// configured (needs realtime to be configured for this too).
if (Settings.publishOnIndividualChannels) {
return pubsubClient.publish(`editor-events:${projectId}`, payload)
} else {
return pubsubClient.publish('editor-events', payload)
}
},
sendData(data) {
// create a unique message id using a counter
const messageId = `doc:${HOST}:${RND}-${COUNT++}`
if (data != null) {
data._id = messageId
}
const blob = JSON.stringify(data)
metrics.summary('redis.publish.applied-ops', blob.length)
// publish on separate channels for individual projects and docs when
// configured (needs realtime to be configured for this too).
if (Settings.publishOnIndividualChannels) {
return pubsubClient.publish(`applied-ops:${data.doc_id}`, blob)
} else {
return pubsubClient.publish('applied-ops', blob)
}
},
}
module.exports = RealTimeRedisManager
module.exports.promises = promisifyAll(RealTimeRedisManager, {
without: ['sendData'],
})

View File

@@ -0,0 +1,796 @@
const Settings = require('@overleaf/settings')
const rclient = require('@overleaf/redis-wrapper').createClient(
Settings.redis.documentupdater
)
const logger = require('@overleaf/logger')
const OError = require('@overleaf/o-error')
const { promisifyAll } = require('@overleaf/promise-utils')
const metrics = require('./Metrics')
const Errors = require('./Errors')
const crypto = require('node:crypto')
const async = require('async')
const { docIsTooLarge } = require('./Limits')
// Sometimes Redis calls take an unexpectedly long time. We have to be
// quick with Redis calls because we're holding a lock that expires
// after 30 seconds. We can't let any errors in the rest of the stack
// hold us up, and need to bail out quickly if there is a problem.
const MAX_REDIS_REQUEST_LENGTH = 5000 // 5 seconds
const PROJECT_BLOCK_TTL_SECS = 30
// Make times easy to read
const minutes = 60 // seconds for Redis expire
const logHashReadErrors = Settings.documentupdater?.logHashErrors?.read
const MEGABYTES = 1024 * 1024
const MAX_RANGES_SIZE = 3 * MEGABYTES
const keys = Settings.redis.documentupdater.key_schema
const RedisManager = {
rclient,
putDocInMemory(
projectId,
docId,
docLines,
version,
ranges,
resolvedCommentIds,
pathname,
projectHistoryId,
historyRangesSupport,
_callback
) {
const timer = new metrics.Timer('redis.put-doc')
const callback = error => {
timer.done()
_callback(error)
}
const docLinesArray = docLines
docLines = JSON.stringify(docLines)
if (docLines.indexOf('\u0000') !== -1) {
const error = new Error('null bytes found in doc lines')
// this check was added to catch memory corruption in JSON.stringify.
// It sometimes returned null bytes at the end of the string.
logger.error({ err: error, docId, docLines }, error.message)
return callback(error)
}
// Do an optimised size check on the docLines using the serialised
// length as an upper bound
const sizeBound = docLines.length
if (docIsTooLarge(sizeBound, docLinesArray, Settings.max_doc_length)) {
const docSize = docLines.length
const err = new Error('blocking doc insert into redis: doc is too large')
logger.error({ projectId, docId, err, docSize }, err.message)
return callback(err)
}
const docHash = RedisManager._computeHash(docLines)
// record bytes sent to redis
metrics.summary('redis.docLines', docLines.length, { status: 'set' })
logger.debug(
{ projectId, docId, version, docHash, pathname, projectHistoryId },
'putting doc in redis'
)
RedisManager._serializeRanges(ranges, (error, ranges) => {
if (error) {
logger.error({ err: error, docId, projectId }, error.message)
return callback(error)
}
// update docsInProject set before writing doc contents
const multi = rclient.multi()
multi.exists(keys.projectBlock({ project_id: projectId }))
multi.sadd(keys.docsInProject({ project_id: projectId }), docId)
multi.exec((err, reply) => {
if (err) {
return callback(err)
}
const projectBlocked = reply[0] === 1
if (projectBlocked) {
// We don't clean up the spurious docId added in the docsInProject
// set. There is a risk that the docId was successfully added by a
// concurrent process. This set is used when unloading projects. An
// extra docId will not prevent the project from being uploaded, but
// a missing docId means that the doc might stay in Redis forever.
return callback(
new OError('Project blocked from loading docs', { projectId })
)
}
RedisManager.setHistoryRangesSupportFlag(
docId,
historyRangesSupport,
err => {
if (err) {
return callback(err)
}
if (!pathname) {
metrics.inc('pathname', 1, {
path: 'RedisManager.setDoc',
status: pathname === '' ? 'zero-length' : 'undefined',
})
}
// Make sure that this MULTI operation only operates on doc
// specific keys, i.e. keys that have the doc id in curly braces.
// The curly braces identify a hash key for Redis and ensures that
// the MULTI's operations are all done on the same node in a
// cluster environment.
const multi = rclient.multi()
multi.mset({
[keys.docLines({ doc_id: docId })]: docLines,
[keys.projectKey({ doc_id: docId })]: projectId,
[keys.docVersion({ doc_id: docId })]: version,
[keys.docHash({ doc_id: docId })]: docHash,
[keys.ranges({ doc_id: docId })]: ranges,
[keys.pathname({ doc_id: docId })]: pathname,
[keys.projectHistoryId({ doc_id: docId })]: projectHistoryId,
})
if (historyRangesSupport) {
multi.del(keys.resolvedCommentIds({ doc_id: docId }))
if (resolvedCommentIds.length > 0) {
multi.sadd(
keys.resolvedCommentIds({ doc_id: docId }),
...resolvedCommentIds
)
}
}
multi.exec(err => {
if (err) {
callback(
OError.tag(err, 'failed to write doc to Redis in MULTI', {
previousErrors: err.previousErrors.map(e => ({
name: e.name,
message: e.message,
command: e.command,
})),
})
)
} else {
callback()
}
})
}
)
})
})
},
removeDocFromMemory(projectId, docId, _callback) {
logger.debug({ projectId, docId }, 'removing doc from redis')
const callback = err => {
if (err) {
logger.err({ projectId, docId, err }, 'error removing doc from redis')
_callback(err)
} else {
logger.debug({ projectId, docId }, 'removed doc from redis')
_callback()
}
}
// Make sure that this MULTI operation only operates on doc
// specific keys, i.e. keys that have the doc id in curly braces.
// The curly braces identify a hash key for Redis and ensures that
// the MULTI's operations are all done on the same node in a
// cluster environment.
let multi = rclient.multi()
multi.strlen(keys.docLines({ doc_id: docId }))
multi.del(
keys.docLines({ doc_id: docId }),
keys.projectKey({ doc_id: docId }),
keys.docVersion({ doc_id: docId }),
keys.docHash({ doc_id: docId }),
keys.ranges({ doc_id: docId }),
keys.pathname({ doc_id: docId }),
keys.projectHistoryId({ doc_id: docId }),
keys.unflushedTime({ doc_id: docId }),
keys.lastUpdatedAt({ doc_id: docId }),
keys.lastUpdatedBy({ doc_id: docId }),
keys.resolvedCommentIds({ doc_id: docId })
)
multi.exec((error, response) => {
if (error) {
return callback(error)
}
const length = response?.[0]
if (length > 0) {
// record bytes freed in redis
metrics.summary('redis.docLines', length, { status: 'del' })
}
// Make sure that this MULTI operation only operates on project
// specific keys, i.e. keys that have the project id in curly braces.
// The curly braces identify a hash key for Redis and ensures that
// the MULTI's operations are all done on the same node in a
// cluster environment.
multi = rclient.multi()
multi.srem(keys.docsInProject({ project_id: projectId }), docId)
multi.del(keys.projectState({ project_id: projectId }))
multi.exec(err => {
if (err) {
return callback(err)
}
rclient.srem(keys.historyRangesSupport(), docId, callback)
})
})
},
checkOrSetProjectState(projectId, newState, callback) {
// Make sure that this MULTI operation only operates on project
// specific keys, i.e. keys that have the project id in curly braces.
// The curly braces identify a hash key for Redis and ensures that
// the MULTI's operations are all done on the same node in a
// cluster environment.
const multi = rclient.multi()
multi.getset(keys.projectState({ project_id: projectId }), newState)
multi.expire(keys.projectState({ project_id: projectId }), 30 * minutes)
multi.exec((error, response) => {
if (error) {
return callback(error)
}
logger.debug(
{ projectId, newState, oldState: response[0] },
'checking project state'
)
callback(null, response[0] !== newState)
})
},
clearProjectState(projectId, callback) {
rclient.del(keys.projectState({ project_id: projectId }), callback)
},
getDoc(projectId, docId, callback) {
const timer = new metrics.Timer('redis.get-doc')
const collectKeys = [
keys.docLines({ doc_id: docId }),
keys.docVersion({ doc_id: docId }),
keys.docHash({ doc_id: docId }),
keys.projectKey({ doc_id: docId }),
keys.ranges({ doc_id: docId }),
keys.pathname({ doc_id: docId }),
keys.projectHistoryId({ doc_id: docId }),
keys.unflushedTime({ doc_id: docId }),
keys.lastUpdatedAt({ doc_id: docId }),
keys.lastUpdatedBy({ doc_id: docId }),
]
rclient.mget(...collectKeys, (error, result) => {
if (error) {
return callback(error)
}
let [
docLines,
version,
storedHash,
docProjectId,
ranges,
pathname,
projectHistoryId,
unflushedTime,
lastUpdatedAt,
lastUpdatedBy,
] = result
rclient.sismember(keys.historyRangesSupport(), docId, (error, result) => {
if (error) {
return callback(error)
}
rclient.smembers(
keys.resolvedCommentIds({ doc_id: docId }),
(error, resolvedCommentIds) => {
if (error) {
return callback(error)
}
const historyRangesSupport = result === 1
const timeSpan = timer.done()
// check if request took too long and bail out. only do this for
// get, because it is the first call in each update, so if this
// passes we'll assume others have a reasonable chance to succeed.
if (timeSpan > MAX_REDIS_REQUEST_LENGTH) {
error = new Error('redis getDoc exceeded timeout')
return callback(error)
}
// record bytes loaded from redis
if (docLines != null) {
metrics.summary('redis.docLines', docLines.length, {
status: 'get',
})
}
// check sha1 hash value if present
if (docLines != null && storedHash != null) {
const computedHash = RedisManager._computeHash(docLines)
if (logHashReadErrors && computedHash !== storedHash) {
logger.error(
{
projectId,
docId,
docProjectId,
computedHash,
storedHash,
docLines,
},
'hash mismatch on retrieved document'
)
}
}
try {
docLines = JSON.parse(docLines)
ranges = RedisManager._deserializeRanges(ranges)
} catch (e) {
return callback(e)
}
version = parseInt(version || 0, 10)
// check doc is in requested project
if (docProjectId != null && docProjectId !== projectId) {
logger.error(
{ projectId, docId, docProjectId },
'doc not in project'
)
return callback(new Errors.NotFoundError('document not found'))
}
if (docLines && version && !pathname) {
metrics.inc('pathname', 1, {
path: 'RedisManager.getDoc',
status: pathname === '' ? 'zero-length' : 'undefined',
})
}
callback(
null,
docLines,
version,
ranges,
pathname,
projectHistoryId,
unflushedTime,
lastUpdatedAt,
lastUpdatedBy,
historyRangesSupport,
resolvedCommentIds
)
}
)
})
})
},
getDocVersion(docId, callback) {
rclient.mget(keys.docVersion({ doc_id: docId }), (error, result) => {
if (error) {
return callback(error)
}
let [version] = result || []
version = parseInt(version, 10)
callback(null, version)
})
},
getDocLines(docId, callback) {
rclient.get(keys.docLines({ doc_id: docId }), (error, docLines) => {
if (error) {
return callback(error)
}
callback(null, docLines)
})
},
getPreviousDocOps(docId, start, end, callback) {
const timer = new metrics.Timer('redis.get-prev-docops')
rclient.llen(keys.docOps({ doc_id: docId }), (error, length) => {
if (error) {
return callback(error)
}
rclient.get(keys.docVersion({ doc_id: docId }), (error, version) => {
if (error) {
return callback(error)
}
version = parseInt(version, 10)
const firstVersionInRedis = version - length
if (start < firstVersionInRedis || end > version) {
error = new Errors.OpRangeNotAvailableError(
'doc ops range is not loaded in redis',
{ firstVersionInRedis, version, ttlInS: RedisManager.DOC_OPS_TTL }
)
logger.debug(
{ err: error, docId, length, version, start, end },
'doc ops range is not loaded in redis'
)
return callback(error)
}
start = start - firstVersionInRedis
if (end > -1) {
end = end - firstVersionInRedis
}
if (isNaN(start) || isNaN(end)) {
error = new Error('inconsistent version or lengths')
logger.error(
{ err: error, docId, length, version, start, end },
'inconsistent version or length'
)
return callback(error)
}
rclient.lrange(
keys.docOps({ doc_id: docId }),
start,
end,
(error, jsonOps) => {
let ops
if (error) {
return callback(error)
}
try {
ops = jsonOps.map(jsonOp => JSON.parse(jsonOp))
} catch (e) {
return callback(e)
}
const timeSpan = timer.done()
if (timeSpan > MAX_REDIS_REQUEST_LENGTH) {
error = new Error('redis getPreviousDocOps exceeded timeout')
return callback(error)
}
callback(null, ops)
}
)
})
})
},
DOC_OPS_TTL: 60 * minutes,
DOC_OPS_MAX_LENGTH: 100,
updateDocument(
projectId,
docId,
docLines,
newVersion,
appliedOps,
ranges,
updateMeta,
callback
) {
if (appliedOps == null) {
appliedOps = []
}
RedisManager.getDocVersion(docId, (error, currentVersion) => {
if (error) {
return callback(error)
}
if (currentVersion + appliedOps.length !== newVersion) {
error = new Error(`Version mismatch. '${docId}' is corrupted.`)
logger.error(
{
err: error,
docId,
currentVersion,
newVersion,
opsLength: appliedOps.length,
},
'version mismatch'
)
return callback(error)
}
const jsonOps = appliedOps.map(op => JSON.stringify(op))
for (const op of jsonOps) {
if (op.indexOf('\u0000') !== -1) {
error = new Error('null bytes found in jsonOps')
// this check was added to catch memory corruption in JSON.stringify
logger.error({ err: error, docId, jsonOps }, error.message)
return callback(error)
}
}
const newDocLines = JSON.stringify(docLines)
if (newDocLines.indexOf('\u0000') !== -1) {
error = new Error('null bytes found in doc lines')
// this check was added to catch memory corruption in JSON.stringify
logger.error({ err: error, docId, newDocLines }, error.message)
return callback(error)
}
// Do an optimised size check on the docLines using the serialised
// length as an upper bound
const sizeBound = newDocLines.length
if (docIsTooLarge(sizeBound, docLines, Settings.max_doc_length)) {
const err = new Error('blocking doc update: doc is too large')
const docSize = newDocLines.length
logger.error({ projectId, docId, err, docSize }, err.message)
return callback(err)
}
const newHash = RedisManager._computeHash(newDocLines)
const opVersions = appliedOps.map(op => op?.v)
logger.debug(
{
docId,
version: newVersion,
hash: newHash,
opVersions,
},
'updating doc in redis'
)
// record bytes sent to redis in update
metrics.summary('redis.docLines', newDocLines.length, {
status: 'update',
})
RedisManager._serializeRanges(ranges, (error, ranges) => {
if (error) {
logger.error({ err: error, docId }, error.message)
return callback(error)
}
if (ranges && ranges.indexOf('\u0000') !== -1) {
error = new Error('null bytes found in ranges')
// this check was added to catch memory corruption in JSON.stringify
logger.error({ err: error, docId, ranges }, error.message)
return callback(error)
}
// Make sure that this MULTI operation only operates on doc
// specific keys, i.e. keys that have the doc id in curly braces.
// The curly braces identify a hash key for Redis and ensures that
// the MULTI's operations are all done on the same node in a
// cluster environment.
const multi = rclient.multi()
multi.mset({
[keys.docLines({ doc_id: docId })]: newDocLines,
[keys.docVersion({ doc_id: docId })]: newVersion,
[keys.docHash({ doc_id: docId })]: newHash,
[keys.ranges({ doc_id: docId })]: ranges,
[keys.lastUpdatedAt({ doc_id: docId })]: Date.now(),
[keys.lastUpdatedBy({ doc_id: docId })]:
updateMeta && updateMeta.user_id,
})
multi.ltrim(
keys.docOps({ doc_id: docId }),
-RedisManager.DOC_OPS_MAX_LENGTH,
-1
) // index 3
// push the ops last so we can get the lengths at fixed index position 7
if (jsonOps.length > 0) {
multi.rpush(keys.docOps({ doc_id: docId }), ...jsonOps) // index 5
// expire must come after rpush since before it will be a no-op if the list is empty
multi.expire(keys.docOps({ doc_id: docId }), RedisManager.DOC_OPS_TTL) // index 6
}
// Set the unflushed timestamp to the current time if not set ("NX" flag).
multi.set(keys.unflushedTime({ doc_id: docId }), Date.now(), 'NX')
multi.exec((error, result) => {
if (error) {
return callback(error)
}
callback()
})
})
})
},
renameDoc(projectId, docId, userId, update, projectHistoryId, callback) {
RedisManager.getDoc(projectId, docId, (error, lines, version) => {
if (error) {
return callback(error)
}
if (lines != null && version != null) {
if (!update.newPathname) {
logger.warn(
{ projectId, docId, update },
'missing pathname in RedisManager.renameDoc'
)
metrics.inc('pathname', 1, {
path: 'RedisManager.renameDoc',
status: update.newPathname === '' ? 'zero-length' : 'undefined',
})
}
rclient.set(
keys.pathname({ doc_id: docId }),
update.newPathname,
callback
)
} else {
callback()
}
})
},
clearUnflushedTime(docId, callback) {
rclient.del(keys.unflushedTime({ doc_id: docId }), callback)
},
updateCommentState(docId, commentId, resolved, callback) {
if (resolved) {
rclient.sadd(
keys.resolvedCommentIds({ doc_id: docId }),
commentId,
callback
)
} else {
rclient.srem(
keys.resolvedCommentIds({ doc_id: docId }),
commentId,
callback
)
}
},
getDocIdsInProject(projectId, callback) {
rclient.smembers(keys.docsInProject({ project_id: projectId }), callback)
},
/**
* Get lastupdatedat timestamps for an array of docIds
*/
getDocTimestamps(docIds, callback) {
async.mapSeries(
docIds,
(docId, cb) => rclient.get(keys.lastUpdatedAt({ doc_id: docId }), cb),
callback
)
},
/**
* Store the project id in a sorted set ordered by time with a random offset
* to smooth out spikes
*/
queueFlushAndDeleteProject(projectId, callback) {
const SMOOTHING_OFFSET =
Settings.smoothingOffset > 0
? Math.round(Settings.smoothingOffset * Math.random())
: 0
rclient.zadd(
keys.flushAndDeleteQueue(),
Date.now() + SMOOTHING_OFFSET,
projectId,
callback
)
},
/**
* Find the oldest queued flush that is before the cutoff time
*/
getNextProjectToFlushAndDelete(cutoffTime, callback) {
rclient.zrangebyscore(
keys.flushAndDeleteQueue(),
0,
cutoffTime,
'WITHSCORES',
'LIMIT',
0,
1,
(err, reply) => {
if (err) {
return callback(err)
}
// return if no projects ready to be processed
if (!reply || reply.length === 0) {
return callback()
}
// pop the oldest entry (get and remove in a multi)
const multi = rclient.multi()
// Poor man's version of ZPOPMIN, which is only available in Redis 5.
multi.zrange(keys.flushAndDeleteQueue(), 0, 0, 'WITHSCORES')
multi.zremrangebyrank(keys.flushAndDeleteQueue(), 0, 0)
multi.zcard(keys.flushAndDeleteQueue()) // the total length of the queue (for metrics)
multi.exec((err, reply) => {
if (err) {
return callback(err)
}
if (!reply || reply.length === 0) {
return callback()
}
const [key, timestamp] = reply[0]
const queueLength = reply[2]
callback(null, key, timestamp, queueLength)
})
}
)
},
setHistoryRangesSupportFlag(docId, historyRangesSupport, callback) {
if (historyRangesSupport) {
rclient.sadd(keys.historyRangesSupport(), docId, callback)
} else {
rclient.srem(keys.historyRangesSupport(), docId, callback)
}
},
blockProject(projectId, callback) {
// Make sure that this MULTI operation only operates on project
// specific keys, i.e. keys that have the project id in curly braces.
// The curly braces identify a hash key for Redis and ensures that
// the MULTI's operations are all done on the same node in a
// cluster environment.
const multi = rclient.multi()
multi.setex(
keys.projectBlock({ project_id: projectId }),
PROJECT_BLOCK_TTL_SECS,
'1'
)
multi.scard(keys.docsInProject({ project_id: projectId }))
multi.exec((err, reply) => {
if (err) {
return callback(err)
}
const docsInProject = reply[1]
if (docsInProject > 0) {
// Too late to lock the project
rclient.del(keys.projectBlock({ project_id: projectId }), err => {
if (err) {
return callback(err)
}
callback(null, false)
})
} else {
callback(null, true)
}
})
},
unblockProject(projectId, callback) {
rclient.del(keys.projectBlock({ project_id: projectId }), (err, reply) => {
if (err) {
return callback(err)
}
const wasBlocked = reply === 1
callback(null, wasBlocked)
})
},
_serializeRanges(ranges, callback) {
let jsonRanges = JSON.stringify(ranges)
if (jsonRanges && jsonRanges.length > MAX_RANGES_SIZE) {
return callback(new Error('ranges are too large'))
}
if (jsonRanges === '{}') {
// Most doc will have empty ranges so don't fill redis with lots of '{}' keys
jsonRanges = null
}
callback(null, jsonRanges)
},
_deserializeRanges(ranges) {
if (ranges == null || ranges === '') {
return {}
} else {
return JSON.parse(ranges)
}
},
_computeHash(docLines) {
// use sha1 checksum of doclines to detect data corruption.
//
// note: must specify 'utf8' encoding explicitly, as the default is
// binary in node < v5
return crypto.createHash('sha1').update(docLines, 'utf8').digest('hex')
},
}
module.exports = RedisManager
module.exports.promises = promisifyAll(RedisManager, {
without: ['_deserializeRanges', '_computeHash'],
multiResult: {
getDoc: [
'lines',
'version',
'ranges',
'pathname',
'projectHistoryId',
'unflushedTime',
'lastUpdatedAt',
'lastUpdatedBy',
'historyRangesSupport',
'resolvedCommentIds',
],
getNextProjectToFlushAndDelete: [
'projectId',
'flushTimestamp',
'queueLength',
],
},
})

View File

@@ -0,0 +1,147 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
let ShareJsDB
const logger = require('@overleaf/logger')
const Metrics = require('@overleaf/metrics')
const Keys = require('./UpdateKeys')
const RedisManager = require('./RedisManager')
const Errors = require('./Errors')
const TRANSFORM_UPDATES_COUNT_BUCKETS = [
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 20, 25, 50, 75, 100,
// prepare buckets for full-project history/larger buffer experiments
150, 200, 300, 400,
]
module.exports = ShareJsDB = class ShareJsDB {
constructor(projectId, docId, lines, version) {
this.project_id = projectId
this.doc_id = docId
this.lines = lines
this.version = version
this.appliedOps = {}
// ShareJS calls this detacted from the instance, so we need
// bind it to keep our context that can access @appliedOps
this.writeOp = this._writeOp.bind(this)
this.startTimeShareJsDB = performance.now()
}
getOps(docKey, start, end, callback) {
if (start === end || (start === this.version && end === null)) {
const status = 'is-up-to-date'
Metrics.inc('transform-updates', 1, {
status,
path: 'sharejs',
})
Metrics.histogram(
'transform-updates.count',
0,
TRANSFORM_UPDATES_COUNT_BUCKETS,
{ path: 'sharejs', status }
)
return callback(null, [])
}
// In redis, lrange values are inclusive.
if (end != null) {
end--
} else {
end = -1
}
const [projectId, docId] = Array.from(Keys.splitProjectIdAndDocId(docKey))
const timer = new Metrics.Timer(
'transform-updates.timing',
1,
{ path: 'sharejs' },
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 50, 100, 200, 500, 1000]
)
RedisManager.getPreviousDocOps(docId, start, end, (err, ops) => {
let status
if (err) {
if (err instanceof Errors.OpRangeNotAvailableError) {
status = 'out-of-range'
} else {
status = 'error'
}
} else {
if (ops.length === 0) {
status = 'fetched-zero'
// The sharejs processing is happening under a lock.
// In case there are no other ops available, something bypassed the lock (or we overran it).
logger.warn(
{
projectId,
docId,
start,
end,
timeSinceShareJsDBInit:
performance.now() - this.startTimeShareJsDB,
},
'found zero docOps while transforming update'
)
} else {
status = 'fetched'
}
Metrics.histogram(
'transform-updates.count',
ops.length,
TRANSFORM_UPDATES_COUNT_BUCKETS,
{ path: 'sharejs', status }
)
}
timer.done({ status })
Metrics.inc('transform-updates', 1, { status, path: 'sharejs' })
callback(err, ops)
})
}
_writeOp(docKey, opData, callback) {
if (this.appliedOps[docKey] == null) {
this.appliedOps[docKey] = []
}
this.appliedOps[docKey].push(opData)
return callback()
}
getSnapshot(docKey, callback) {
if (
docKey !== Keys.combineProjectIdAndDocId(this.project_id, this.doc_id)
) {
return callback(
new Errors.NotFoundError(
`unexpected doc_key ${docKey}, expected ${Keys.combineProjectIdAndDocId(
this.project_id,
this.doc_id
)}`
)
)
} else {
return callback(null, {
snapshot: this.lines.join('\n'),
v: parseInt(this.version, 10),
type: 'text',
})
}
}
// To be able to remove a doc from the ShareJS memory
// we need to called Model::delete, which calls this
// method on the database. However, we will handle removing
// it from Redis ourselves
delete(docName, dbMeta, callback) {
return callback()
}
}

View File

@@ -0,0 +1,158 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const ShareJsModel = require('./sharejs/server/model')
const ShareJsDB = require('./ShareJsDB')
const logger = require('@overleaf/logger')
const Settings = require('@overleaf/settings')
const { promisifyAll } = require('@overleaf/promise-utils')
const Keys = require('./UpdateKeys')
const { EventEmitter } = require('node:events')
const util = require('node:util')
const RealTimeRedisManager = require('./RealTimeRedisManager')
const crypto = require('node:crypto')
const metrics = require('./Metrics')
const Errors = require('./Errors')
ShareJsModel.prototype = {}
util.inherits(ShareJsModel, EventEmitter)
const MAX_AGE_OF_OP = 80
const ShareJsUpdateManager = {
getNewShareJsModel(projectId, docId, lines, version) {
const db = new ShareJsDB(projectId, docId, lines, version)
const model = new ShareJsModel(db, {
maxDocLength: Settings.max_doc_length,
maximumAge: MAX_AGE_OF_OP,
})
model.db = db
return model
},
applyUpdate(projectId, docId, update, lines, version, callback) {
if (callback == null) {
callback = function () {}
}
logger.debug({ projectId, docId, update }, 'applying sharejs updates')
const jobs = []
// record the update version before it is modified
const incomingUpdateVersion = update.v
// We could use a global model for all docs, but we're hitting issues with the
// internal state of ShareJS not being accessible for clearing caches, and
// getting stuck due to queued callbacks (line 260 of sharejs/server/model.coffee)
// This adds a small but hopefully acceptable overhead (~12ms per 1000 updates on
// my 2009 MBP).
const model = this.getNewShareJsModel(projectId, docId, lines, version)
this._listenForOps(model)
const docKey = Keys.combineProjectIdAndDocId(projectId, docId)
return model.applyOp(docKey, update, function (error) {
if (error != null) {
if (error === 'Op already submitted') {
metrics.inc('sharejs.already-submitted')
logger.debug(
{ projectId, docId, update },
'op has already been submitted'
)
update.dup = true
ShareJsUpdateManager._sendOp(projectId, docId, update)
} else if (/^Delete component/.test(error)) {
metrics.inc('sharejs.delete-mismatch')
logger.debug(
{ projectId, docId, update, shareJsErr: error },
'sharejs delete does not match'
)
error = new Errors.DeleteMismatchError(
'Delete component does not match'
)
return callback(error)
} else {
metrics.inc('sharejs.other-error')
return callback(error)
}
}
logger.debug({ projectId, docId, error }, 'applied update')
return model.getSnapshot(docKey, (error, data) => {
if (error != null) {
return callback(error)
}
const docSizeAfter = data.snapshot.length
if (docSizeAfter > Settings.max_doc_length) {
const docSizeBefore = lines.join('\n').length
const err = new Error(
'blocking persistence of ShareJs update: doc size exceeds limits'
)
logger.error(
{ projectId, docId, err, docSizeBefore, docSizeAfter },
err.message
)
metrics.inc('sharejs.other-error')
const publicError = 'Update takes doc over max doc size'
return callback(publicError)
}
// only check hash when present and no other updates have been applied
if (update.hash != null && incomingUpdateVersion === version) {
const ourHash = ShareJsUpdateManager._computeHash(data.snapshot)
if (ourHash !== update.hash) {
metrics.inc('sharejs.hash-fail')
return callback(new Error('Invalid hash'))
} else {
metrics.inc('sharejs.hash-pass', 0.001)
}
}
const docLines = data.snapshot.split(/\r\n|\n|\r/)
return callback(
null,
docLines,
data.v,
model.db.appliedOps[docKey] || []
)
})
})
},
_listenForOps(model) {
return model.on('applyOp', function (docKey, opData) {
const [projectId, docId] = Array.from(Keys.splitProjectIdAndDocId(docKey))
return ShareJsUpdateManager._sendOp(projectId, docId, opData)
})
},
_sendOp(projectId, docId, op) {
RealTimeRedisManager.sendData({
project_id: projectId,
doc_id: docId,
op,
})
RealTimeRedisManager.sendCanaryAppliedOp({
projectId,
docId,
op,
})
},
_computeHash(content) {
return crypto
.createHash('sha1')
.update('blob ' + content.length + '\x00')
.update(content, 'utf8')
.digest('hex')
},
}
module.exports = ShareJsUpdateManager
module.exports.promises = promisifyAll(ShareJsUpdateManager, {
without: ['getNewShareJsModel', '_listenForOps', '_sendOp', '_computeHash'],
multiResult: {
applyUpdate: ['updatedDocLines', 'version', 'appliedOps'],
},
})

View File

@@ -0,0 +1,83 @@
/* eslint-disable
no-return-assign,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const { promisifyAll } = require('@overleaf/promise-utils')
const { db, ObjectId } = require('./mongodb')
const SnapshotManager = {
recordSnapshot(projectId, docId, version, pathname, lines, ranges, callback) {
try {
projectId = new ObjectId(projectId)
docId = new ObjectId(docId)
} catch (error) {
return callback(error)
}
db.docSnapshots.insertOne(
{
project_id: projectId,
doc_id: docId,
version,
lines,
pathname,
ranges: SnapshotManager.jsonRangesToMongo(ranges),
ts: new Date(),
},
callback
)
},
// Suggested indexes:
// db.docSnapshots.createIndex({project_id:1, doc_id:1})
// db.docSnapshots.createIndex({ts:1},{expiresAfterSeconds: 30*24*3600)) # expires after 30 days
jsonRangesToMongo(ranges) {
if (ranges == null) {
return null
}
const updateMetadata = function (metadata) {
if ((metadata != null ? metadata.ts : undefined) != null) {
metadata.ts = new Date(metadata.ts)
}
if ((metadata != null ? metadata.user_id : undefined) != null) {
return (metadata.user_id = SnapshotManager._safeObjectId(
metadata.user_id
))
}
}
for (const change of Array.from(ranges.changes || [])) {
change.id = SnapshotManager._safeObjectId(change.id)
updateMetadata(change.metadata)
}
for (const comment of Array.from(ranges.comments || [])) {
comment.id = SnapshotManager._safeObjectId(comment.id)
if ((comment.op != null ? comment.op.t : undefined) != null) {
comment.op.t = SnapshotManager._safeObjectId(comment.op.t)
}
updateMetadata(comment.metadata)
}
return ranges
},
_safeObjectId(data) {
try {
return new ObjectId(data)
} catch (error) {
return data
}
},
}
module.exports = SnapshotManager
module.exports.promises = promisifyAll(SnapshotManager, {
without: ['jsonRangesToMongo', '_safeObjectId'],
})

View File

@@ -0,0 +1,10 @@
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
module.exports = {
combineProjectIdAndDocId(projectId, docId) {
return `${projectId}:${docId}`
},
splitProjectIdAndDocId(projectAndDocId) {
return projectAndDocId.split(':')
},
}

View File

@@ -0,0 +1,378 @@
// @ts-check
const { callbackifyAll } = require('@overleaf/promise-utils')
const LockManager = require('./LockManager')
const RedisManager = require('./RedisManager')
const ProjectHistoryRedisManager = require('./ProjectHistoryRedisManager')
const RealTimeRedisManager = require('./RealTimeRedisManager')
const ShareJsUpdateManager = require('./ShareJsUpdateManager')
const HistoryManager = require('./HistoryManager')
const logger = require('@overleaf/logger')
const Metrics = require('./Metrics')
const Errors = require('./Errors')
const DocumentManager = require('./DocumentManager')
const RangesManager = require('./RangesManager')
const SnapshotManager = require('./SnapshotManager')
const Profiler = require('./Profiler')
const { isInsert, isDelete, getDocLength, computeDocHash } = require('./Utils')
/**
* @import { DeleteOp, InsertOp, Op, Ranges, Update, HistoryUpdate } from "./types"
*/
const UpdateManager = {
async processOutstandingUpdates(projectId, docId) {
const timer = new Metrics.Timer('updateManager.processOutstandingUpdates')
try {
await UpdateManager.fetchAndApplyUpdates(projectId, docId)
timer.done({ status: 'success' })
} catch (err) {
timer.done({ status: 'error' })
throw err
}
},
async processOutstandingUpdatesWithLock(projectId, docId) {
const profile = new Profiler('processOutstandingUpdatesWithLock', {
project_id: projectId,
doc_id: docId,
})
const lockValue = await LockManager.promises.tryLock(docId)
if (lockValue == null) {
return
}
profile.log('tryLock')
try {
await UpdateManager.processOutstandingUpdates(projectId, docId)
profile.log('processOutstandingUpdates')
} finally {
await LockManager.promises.releaseLock(docId, lockValue)
profile.log('releaseLock').end()
}
await UpdateManager.continueProcessingUpdatesWithLock(projectId, docId)
},
async continueProcessingUpdatesWithLock(projectId, docId) {
const length = await RealTimeRedisManager.promises.getUpdatesLength(docId)
if (length > 0) {
await UpdateManager.processOutstandingUpdatesWithLock(projectId, docId)
}
},
async fetchAndApplyUpdates(projectId, docId) {
const profile = new Profiler('fetchAndApplyUpdates', {
project_id: projectId,
doc_id: docId,
})
const updates =
await RealTimeRedisManager.promises.getPendingUpdatesForDoc(docId)
logger.debug(
{ projectId, docId, count: updates.length },
'processing updates'
)
if (updates.length === 0) {
return
}
profile.log('getPendingUpdatesForDoc')
for (const update of updates) {
await UpdateManager.applyUpdate(projectId, docId, update)
profile.log('applyUpdate')
}
profile.log('async done').end()
},
/**
* Apply an update to the given document
*
* @param {string} projectId
* @param {string} docId
* @param {Update} update
*/
async applyUpdate(projectId, docId, update) {
const profile = new Profiler('applyUpdate', {
project_id: projectId,
doc_id: docId,
})
UpdateManager._sanitizeUpdate(update)
profile.log('sanitizeUpdate', { sync: true })
try {
let {
lines,
version,
ranges,
pathname,
projectHistoryId,
historyRangesSupport,
} = await DocumentManager.promises.getDoc(projectId, docId)
profile.log('getDoc')
if (lines == null || version == null) {
throw new Errors.NotFoundError(`document not found: ${docId}`)
}
const previousVersion = version
const incomingUpdateVersion = update.v
let updatedDocLines, appliedOps
;({ updatedDocLines, version, appliedOps } =
await ShareJsUpdateManager.promises.applyUpdate(
projectId,
docId,
update,
lines,
version
))
profile.log('sharejs.applyUpdate', {
// only synchronous when the update applies directly to the
// doc version, otherwise getPreviousDocOps is called.
sync: incomingUpdateVersion === previousVersion,
})
const { newRanges, rangesWereCollapsed, historyUpdates } =
RangesManager.applyUpdate(
projectId,
docId,
ranges,
appliedOps,
updatedDocLines,
{ historyRangesSupport }
)
profile.log('RangesManager.applyUpdate', { sync: true })
await RedisManager.promises.updateDocument(
projectId,
docId,
updatedDocLines,
version,
appliedOps,
newRanges,
update.meta
)
profile.log('RedisManager.updateDocument')
UpdateManager._adjustHistoryUpdatesMetadata(
historyUpdates,
pathname,
projectHistoryId,
lines,
ranges,
updatedDocLines,
historyRangesSupport
)
if (historyUpdates.length > 0) {
Metrics.inc('history-queue', 1, { status: 'project-history' })
try {
const projectOpsLength =
await ProjectHistoryRedisManager.promises.queueOps(
projectId,
...historyUpdates.map(op => JSON.stringify(op))
)
HistoryManager.recordAndFlushHistoryOps(
projectId,
historyUpdates,
projectOpsLength
)
profile.log('recordAndFlushHistoryOps')
} catch (err) {
// The full project history can re-sync a project in case
// updates went missing.
// Just record the error here and acknowledge the write-op.
Metrics.inc('history-queue-error')
}
}
if (rangesWereCollapsed) {
Metrics.inc('doc-snapshot')
logger.debug(
{
projectId,
docId,
previousVersion,
lines,
ranges,
update,
},
'update collapsed some ranges, snapshotting previous content'
)
// Do this last, since it's a mongo call, and so potentially longest running
// If it overruns the lock, it's ok, since all of our redis work is done
await SnapshotManager.promises.recordSnapshot(
projectId,
docId,
previousVersion,
pathname,
lines,
ranges
)
}
} catch (error) {
RealTimeRedisManager.sendData({
project_id: projectId,
doc_id: docId,
error: error instanceof Error ? error.message : error,
})
profile.log('sendData')
throw error
} finally {
profile.end()
}
},
async lockUpdatesAndDo(method, projectId, docId, ...args) {
const profile = new Profiler('lockUpdatesAndDo', {
project_id: projectId,
doc_id: docId,
})
const lockValue = await LockManager.promises.getLock(docId)
profile.log('getLock')
let result
try {
await UpdateManager.processOutstandingUpdates(projectId, docId)
profile.log('processOutstandingUpdates')
result = await method(projectId, docId, ...args)
profile.log('method')
} finally {
await LockManager.promises.releaseLock(docId, lockValue)
profile.log('releaseLock').end()
}
// We held the lock for a while so updates might have queued up
UpdateManager.continueProcessingUpdatesWithLock(projectId, docId).catch(
err => {
// The processing may fail for invalid user updates.
// This can be very noisy, put them on level DEBUG
// and record a metric.
Metrics.inc('background-processing-updates-error')
logger.debug(
{ err, projectId, docId },
'error processing updates in background'
)
}
)
return result
},
_sanitizeUpdate(update) {
// In Javascript, characters are 16-bits wide. It does not understand surrogates as characters.
//
// From Wikipedia (http://en.wikipedia.org/wiki/Plane_(Unicode)#Basic_Multilingual_Plane):
// "The High Surrogates (U+D800U+DBFF) and Low Surrogate (U+DC00U+DFFF) codes are reserved
// for encoding non-BMP characters in UTF-16 by using a pair of 16-bit codes: one High Surrogate
// and one Low Surrogate. A single surrogate code point will never be assigned a character.""
//
// The main offender seems to be \uD835 as a stand alone character, which would be the first
// 16-bit character of a blackboard bold character (http://www.fileformat.info/info/unicode/char/1d400/index.htm).
// Something must be going on client side that is screwing up the encoding and splitting the
// two 16-bit characters so that \uD835 is standalone.
for (const op of update.op || []) {
if (op.i != null) {
// Replace high and low surrogate characters with 'replacement character' (\uFFFD)
op.i = op.i.replace(/[\uD800-\uDFFF]/g, '\uFFFD')
}
}
return update
},
/**
* Add metadata that will be useful to project history
*
* @param {HistoryUpdate[]} updates
* @param {string} pathname
* @param {string} projectHistoryId
* @param {string[]} lines - document lines before updates were applied
* @param {Ranges} ranges - ranges before updates were applied
* @param {string[]} newLines - document lines after updates were applied
* @param {boolean} historyRangesSupport
*/
_adjustHistoryUpdatesMetadata(
updates,
pathname,
projectHistoryId,
lines,
ranges,
newLines,
historyRangesSupport
) {
let docLength = getDocLength(lines)
let historyDocLength = docLength
for (const change of ranges.changes ?? []) {
if ('d' in change.op) {
historyDocLength += change.op.d.length
}
}
for (const update of updates) {
update.projectHistoryId = projectHistoryId
if (!update.meta) {
update.meta = {}
}
update.meta.pathname = pathname
update.meta.doc_length = docLength
if (historyRangesSupport && historyDocLength !== docLength) {
update.meta.history_doc_length = historyDocLength
}
// Each update may contain multiple ops, i.e.
// [{
// ops: [{i: "foo", p: 4}, {d: "bar", p:8}]
// }, {
// ops: [{d: "baz", p: 40}, {i: "qux", p:8}]
// }]
// We want to include the doc_length at the start of each update,
// before it's ops are applied. However, we need to track any
// changes to it for the next update.
for (const op of update.op) {
if (isInsert(op)) {
docLength += op.i.length
if (!op.trackedDeleteRejection) {
// Tracked delete rejections end up retaining characters rather
// than inserting
historyDocLength += op.i.length
}
}
if (isDelete(op)) {
docLength -= op.d.length
if (update.meta.tc) {
// This is a tracked delete. It will be translated into a retain in
// history, except any enclosed tracked inserts, which will be
// translated into regular deletes.
for (const change of op.trackedChanges ?? []) {
if (change.type === 'insert') {
historyDocLength -= change.length
}
}
} else {
// This is a regular delete. It will be translated to a delete in
// history.
historyDocLength -= op.d.length
}
}
}
if (!historyRangesSupport) {
// Prevent project-history from processing tracked changes
delete update.meta.tc
}
}
if (historyRangesSupport && updates.length > 0) {
const lastUpdate = updates[updates.length - 1]
lastUpdate.meta ??= {}
lastUpdate.meta.doc_hash = computeDocHash(newLines)
}
},
}
module.exports = { ...callbackifyAll(UpdateManager), promises: UpdateManager }

View File

@@ -0,0 +1,129 @@
// @ts-check
const { createHash } = require('node:crypto')
const _ = require('lodash')
/**
* @import { CommentOp, DeleteOp, InsertOp, Op, TrackedChange } from './types'
*/
/**
* Returns true if the op is an insert
*
* @param {Op} op
* @returns {op is InsertOp}
*/
function isInsert(op) {
return 'i' in op && op.i != null
}
/**
* Returns true if the op is an insert
*
* @param {Op} op
* @returns {op is DeleteOp}
*/
function isDelete(op) {
return 'd' in op && op.d != null
}
/**
* Returns true if the op is a comment
*
* @param {Op} op
* @returns {op is CommentOp}
*/
function isComment(op) {
return 'c' in op && op.c != null
}
/**
* Get the length of a document from its lines
*
* @param {string[]} lines
* @returns {number}
*/
function getDocLength(lines) {
let docLength = _.reduce(lines, (chars, line) => chars + line.length, 0)
// Add newline characters. Lines are joined by newlines, but the last line
// doesn't include a newline. We must make a special case for an empty list
// so that it doesn't report a doc length of -1.
docLength += Math.max(lines.length - 1, 0)
return docLength
}
/**
* Adds given tracked deletes to the given content.
*
* The history system includes tracked deletes in the document content.
*
* @param {string} content
* @param {TrackedChange[]} trackedChanges
* @return {string} content for the history service
*/
function addTrackedDeletesToContent(content, trackedChanges) {
let cursor = 0
let result = ''
for (const change of trackedChanges) {
if (isDelete(change.op)) {
// Add the content before the tracked delete
result += content.slice(cursor, change.op.p)
cursor = change.op.p
// Add the content of the tracked delete
result += change.op.d
}
}
// Add the content after all tracked deletes
result += content.slice(cursor)
return result
}
/**
* Compute the content hash for a doc
*
* This hash is sent to the history to validate updates.
*
* @param {string[]} lines
* @return {string} the doc hash
*/
function computeDocHash(lines) {
const hash = createHash('sha1')
if (lines.length > 0) {
for (const line of lines.slice(0, lines.length - 1)) {
hash.update(line)
hash.update('\n')
}
// The last line doesn't end with a newline
hash.update(lines[lines.length - 1])
}
return hash.digest('hex')
}
/**
* checks if the given originOrSource should be treated as a source or origin
* TODO: remove this hack and remove all "source" references
*/
function extractOriginOrSource(originOrSource) {
let source = null
let origin = null
if (typeof originOrSource === 'string') {
source = originOrSource
} else if (originOrSource && typeof originOrSource === 'object') {
origin = originOrSource
}
return { source, origin }
}
module.exports = {
isInsert,
isDelete,
isComment,
addTrackedDeletesToContent,
getDocLength,
computeDocHash,
extractOriginOrSource,
}

View File

@@ -0,0 +1,28 @@
const Metrics = require('@overleaf/metrics')
const Settings = require('@overleaf/settings')
const { MongoClient, ObjectId } = require('mongodb-legacy')
const mongoClient = new MongoClient(Settings.mongo.url, Settings.mongo.options)
const mongoDb = mongoClient.db()
const db = {
docs: mongoDb.collection('docs'),
docSnapshots: mongoDb.collection('docSnapshots'),
projects: mongoDb.collection('projects'),
}
async function healthCheck() {
const res = await mongoDb.command({ ping: 1 })
if (!res.ok) {
throw new Error('failed mongo ping')
}
}
Metrics.mongodb.monitor(mongoClient)
module.exports = {
db,
ObjectId,
mongoClient,
healthCheck: require('node:util').callbackify(healthCheck),
}

View File

@@ -0,0 +1,22 @@
Licensed under the standard MIT license:
Copyright 2011 Joseph Gentle.
Copyright 2012-2024 Overleaf.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@@ -0,0 +1,6 @@
This folder contains a modified version of the ShareJS source code, forked from [v0.5.0](https://github.com/josephg/ShareJS/tree/v0.5.0/).
The original CoffeeScript code has been decaffeinated to JavaScript, and further modified. Some folders have been removed. See https://github.com/josephg/ShareJS/blob/v0.5.0/src/types/README.md for the original README.
The original code, and the current modified code in this directory, are published under the MIT license.

View File

@@ -0,0 +1,895 @@
/* eslint-disable
no-console,
no-return-assign,
n/no-callback-literal,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS103: Rewrite code to no longer use __guard__
* DS104: Avoid inline assignments
* DS204: Change includes calls to have a more natural evaluation order
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// The model of all the ops. Responsible for applying & transforming remote deltas
// and managing the storage layer.
//
// Actual storage is handled by the database wrappers in db/*, wrapped by DocCache
let Model
const { EventEmitter } = require('node:events')
const queue = require('./syncqueue')
const types = require('../types')
const Profiler = require('../../Profiler')
const isArray = o => Object.prototype.toString.call(o) === '[object Array]'
// This constructor creates a new Model object. There will be one model object
// per server context.
//
// The model object is responsible for a lot of things:
//
// - It manages the interactions with the database
// - It maintains (in memory) a set of all active documents
// - It calls out to the OT functions when necessary
//
// The model is an event emitter. It emits the following events:
//
// create(docName, data): A document has been created with the specified name & data
module.exports = Model = function (db, options) {
// db can be null if the user doesn't want persistance.
let getOps
if (!(this instanceof Model)) {
return new Model(db, options)
}
const model = this
if (options == null) {
options = {}
}
// This is a cache of 'live' documents.
//
// The cache is a map from docName -> {
// ops:[{op, meta}]
// snapshot
// type
// v
// meta
// eventEmitter
// reapTimer
// committedVersion: v
// snapshotWriteLock: bool to make sure writeSnapshot isn't re-entrant
// dbMeta: database specific data
// opQueue: syncQueue for processing ops
// }
//
// The ops list contains the document's last options.numCachedOps ops. (Or all
// of them if we're using a memory store).
//
// Documents are stored in this set so long as the document has been accessed in
// the last few seconds (options.reapTime) OR at least one client has the document
// open. I don't know if I should keep open (but not being edited) documents live -
// maybe if a client has a document open but the document isn't being edited, I should
// flush it from the cache.
//
// In any case, the API to model is designed such that if we want to change that later
// it should be pretty easy to do so without any external-to-the-model code changes.
const docs = {}
// This is a map from docName -> [callback]. It is used when a document hasn't been
// cached and multiple getSnapshot() / getVersion() requests come in. All requests
// are added to the callback list and called when db.getSnapshot() returns.
//
// callback(error, snapshot data)
const awaitingGetSnapshot = {}
// The time that documents which no clients have open will stay in the cache.
// Should be > 0.
if (options.reapTime == null) {
options.reapTime = 3000
}
// The number of operations the cache holds before reusing the space
if (options.numCachedOps == null) {
options.numCachedOps = 10
}
// This option forces documents to be reaped, even when there's no database backend.
// This is useful when you don't care about persistance and don't want to gradually
// fill memory.
//
// You might want to set reapTime to a day or something.
if (options.forceReaping == null) {
options.forceReaping = false
}
// Until I come up with a better strategy, we'll save a copy of the document snapshot
// to the database every ~20 submitted ops.
if (options.opsBeforeCommit == null) {
options.opsBeforeCommit = 20
}
// It takes some processing time to transform client ops. The server will punt ops back to the
// client to transform if they're too old.
if (options.maximumAge == null) {
options.maximumAge = 40
}
// **** Cache API methods
// Its important that all ops are applied in order. This helper method creates the op submission queue
// for a single document. This contains the logic for transforming & applying ops.
const makeOpQueue = (docName, doc) =>
queue(function (opData, callback) {
if (!(opData.v >= 0)) {
return callback('Version missing')
}
if (opData.v > doc.v) {
return callback('Op at future version')
}
// Punt the transforming work back to the client if the op is too old.
if (opData.v + options.maximumAge < doc.v) {
return callback('Op too old')
}
if (!opData.meta) {
opData.meta = {}
}
opData.meta.ts = Date.now()
// We'll need to transform the op to the current version of the document. This
// calls the callback immediately if opVersion == doc.v.
return getOps(docName, opData.v, doc.v, function (error, ops) {
let snapshot
if (error) {
return callback(error)
}
if (doc.v - opData.v !== ops.length) {
// This should never happen. It indicates that we didn't get all the ops we
// asked for. Its important that the submitted op is correctly transformed.
console.error(
`Could not get old ops in model for document ${docName}`
)
console.error(
`Expected ops ${opData.v} to ${doc.v} and got ${ops.length} ops`
)
return callback('Internal error')
}
if (ops.length > 0) {
try {
const profile = new Profiler('model.transform')
// If there's enough ops, it might be worth spinning this out into a webworker thread.
for (const oldOp of Array.from(ops)) {
// Dup detection works by sending the id(s) the op has been submitted with previously.
// If the id matches, we reject it. The client can also detect the op has been submitted
// already if it sees its own previous id in the ops it sees when it does catchup.
if (
oldOp.meta.source &&
opData.dupIfSource &&
Array.from(opData.dupIfSource).includes(oldOp.meta.source)
) {
return callback('Op already submitted')
}
opData.op = doc.type.transform(opData.op, oldOp.op, 'left')
opData.v++
}
profile.log('transform', { sync: true }).end()
} catch (error1) {
error = error1
return callback(error.message)
}
}
try {
const profile = new Profiler('model.apply')
snapshot = doc.type.apply(doc.snapshot, opData.op)
profile.log('model.apply', { sync: true }).end()
} catch (error2) {
error = error2
return callback(error.message)
}
if (
options.maxDocLength != null &&
doc.snapshot.length > options.maxDocLength
) {
return callback('Update takes doc over max doc size')
}
// The op data should be at the current version, and the new document data should be at
// the next version.
//
// This should never happen in practice, but its a nice little check to make sure everything
// is hunky-dory.
if (opData.v !== doc.v) {
// This should never happen.
console.error(
'Version mismatch detected in model. File a ticket - this is a bug.'
)
console.error(`Expecting ${opData.v} == ${doc.v}`)
return callback('Internal error')
}
// newDocData = {snapshot, type:type.name, v:opVersion + 1, meta:docData.meta}
const writeOp =
(db != null ? db.writeOp : undefined) ||
((docName, newOpData, callback) => callback())
return writeOp(docName, opData, function (error) {
if (error) {
// The user should probably know about this.
console.warn(`Error writing ops to database: ${error}`)
return callback(error)
}
__guardMethod__(options.stats, 'writeOp', o => o.writeOp())
// This is needed when we emit the 'change' event, below.
const oldSnapshot = doc.snapshot
// All the heavy lifting is now done. Finally, we'll update the cache with the new data
// and (maybe!) save a new document snapshot to the database.
doc.v = opData.v + 1
doc.snapshot = snapshot
doc.ops.push(opData)
if (db && doc.ops.length > options.numCachedOps) {
doc.ops.shift()
}
model.emit('applyOp', docName, opData, snapshot, oldSnapshot)
doc.eventEmitter.emit('op', opData, snapshot, oldSnapshot)
// The callback is called with the version of the document at which the op was applied.
// This is the op.v after transformation, and its doc.v - 1.
callback(null, opData.v)
// I need a decent strategy here for deciding whether or not to save the snapshot.
//
// The 'right' strategy looks something like "Store the snapshot whenever the snapshot
// is smaller than the accumulated op data". For now, I'll just store it every 20
// ops or something. (Configurable with doc.committedVersion)
if (
!doc.snapshotWriteLock &&
doc.committedVersion + options.opsBeforeCommit <= doc.v
) {
return tryWriteSnapshot(docName, function (error) {
if (error) {
return console.warn(
`Error writing snapshot ${error}. This is nonfatal`
)
}
})
}
})
})
})
// Add the data for the given docName to the cache. The named document shouldn't already
// exist in the doc set.
//
// Returns the new doc.
const add = function (docName, error, data, committedVersion, ops, dbMeta) {
let callback, doc
const callbacks = awaitingGetSnapshot[docName]
delete awaitingGetSnapshot[docName]
if (error) {
if (callbacks) {
for (callback of Array.from(callbacks)) {
callback(error)
}
}
} else {
doc = docs[docName] = {
snapshot: data.snapshot,
v: data.v,
type: data.type,
meta: data.meta,
// Cache of ops
ops: ops || [],
eventEmitter: new EventEmitter(),
// Timer before the document will be invalidated from the cache (if the document has no
// listeners)
reapTimer: null,
// Version of the snapshot thats in the database
committedVersion: committedVersion != null ? committedVersion : data.v,
snapshotWriteLock: false,
dbMeta,
}
doc.opQueue = makeOpQueue(docName, doc)
refreshReapingTimeout(docName)
model.emit('add', docName, data)
if (callbacks) {
for (callback of Array.from(callbacks)) {
callback(null, doc)
}
}
}
return doc
}
// This is a little helper wrapper around db.getOps. It does two things:
//
// - If there's no database set, it returns an error to the callback
// - It adds version numbers to each op returned from the database
// (These can be inferred from context so the DB doesn't store them, but its useful to have them).
const getOpsInternal = function (docName, start, end, callback) {
if (!db) {
return typeof callback === 'function'
? callback('Document does not exist')
: undefined
}
return db.getOps(docName, start, end, function (error, ops) {
if (error) {
return typeof callback === 'function' ? callback(error) : undefined
}
let v = start
for (const op of Array.from(ops)) {
op.v = v++
}
return typeof callback === 'function' ? callback(null, ops) : undefined
})
}
// Load the named document into the cache. This function is re-entrant.
//
// The callback is called with (error, doc)
const load = function (docName, callback) {
if (docs[docName]) {
// The document is already loaded. Return immediately.
__guardMethod__(options.stats, 'cacheHit', o => o.cacheHit('getSnapshot'))
return callback(null, docs[docName])
}
// We're a memory store. If we don't have it, nobody does.
if (!db) {
return callback('Document does not exist')
}
const callbacks = awaitingGetSnapshot[docName]
// The document is being loaded already. Add ourselves as a callback.
if (callbacks) {
return callbacks.push(callback)
}
__guardMethod__(options.stats, 'cacheMiss', o1 =>
o1.cacheMiss('getSnapshot')
)
// The document isn't loaded and isn't being loaded. Load it.
awaitingGetSnapshot[docName] = [callback]
return db.getSnapshot(docName, function (error, data, dbMeta) {
if (error) {
return add(docName, error)
}
const type = types[data.type]
if (!type) {
console.warn(`Type '${data.type}' missing`)
return callback('Type not found')
}
data.type = type
const committedVersion = data.v
// The server can close without saving the most recent document snapshot.
// In this case, there are extra ops which need to be applied before
// returning the snapshot.
return getOpsInternal(docName, data.v, null, function (error, ops) {
if (error) {
return callback(error)
}
if (ops.length > 0) {
console.log(`Catchup ${docName} ${data.v} -> ${data.v + ops.length}`)
try {
for (const op of Array.from(ops)) {
data.snapshot = type.apply(data.snapshot, op.op)
data.v++
}
} catch (e) {
// This should never happen - it indicates that whats in the
// database is invalid.
console.error(`Op data invalid for ${docName}: ${e.stack}`)
return callback('Op data invalid')
}
}
model.emit('load', docName, data)
return add(docName, error, data, committedVersion, ops, dbMeta)
})
})
}
// This makes sure the cache contains a document. If the doc cache doesn't contain
// a document, it is loaded from the database and stored.
//
// Documents are stored so long as either:
// - They have been accessed within the past #{PERIOD}
// - At least one client has the document open
function refreshReapingTimeout(docName) {
const doc = docs[docName]
if (!doc) {
return
}
// I want to let the clients list be updated before this is called.
return process.nextTick(function () {
// This is an awkward way to find out the number of clients on a document. If this
// causes performance issues, add a numClients field to the document.
//
// The first check is because its possible that between refreshReapingTimeout being called and this
// event being fired, someone called delete() on the document and hence the doc is something else now.
if (
doc === docs[docName] &&
doc.eventEmitter.listeners('op').length === 0 &&
(db || options.forceReaping) &&
doc.opQueue.busy === false
) {
let reapTimer
clearTimeout(doc.reapTimer)
return (doc.reapTimer = reapTimer =
setTimeout(
() =>
tryWriteSnapshot(docName, function () {
// If the reaping timeout has been refreshed while we're writing the snapshot, or if we're
// in the middle of applying an operation, don't reap.
if (
docs[docName].reapTimer === reapTimer &&
doc.opQueue.busy === false
) {
return delete docs[docName]
}
}),
options.reapTime
))
}
})
}
function tryWriteSnapshot(docName, callback) {
if (!db) {
return typeof callback === 'function' ? callback() : undefined
}
const doc = docs[docName]
// The doc is closed
if (!doc) {
return typeof callback === 'function' ? callback() : undefined
}
// The document is already saved.
if (doc.committedVersion === doc.v) {
return typeof callback === 'function' ? callback() : undefined
}
if (doc.snapshotWriteLock) {
return typeof callback === 'function'
? callback('Another snapshot write is in progress')
: undefined
}
doc.snapshotWriteLock = true
__guardMethod__(options.stats, 'writeSnapshot', o => o.writeSnapshot())
const writeSnapshot =
(db != null ? db.writeSnapshot : undefined) ||
((docName, docData, dbMeta, callback) => callback())
const data = {
v: doc.v,
meta: doc.meta,
snapshot: doc.snapshot,
// The database doesn't know about object types.
type: doc.type.name,
}
// Commit snapshot.
return writeSnapshot(docName, data, doc.dbMeta, function (error, dbMeta) {
doc.snapshotWriteLock = false
// We have to use data.v here because the version in the doc could
// have been updated between the call to writeSnapshot() and now.
doc.committedVersion = data.v
doc.dbMeta = dbMeta
return typeof callback === 'function' ? callback(error) : undefined
})
}
// *** Model interface methods
// Create a new document.
//
// data should be {snapshot, type, [meta]}. The version of a new document is 0.
this.create = function (docName, type, meta, callback) {
if (typeof meta === 'function') {
;[meta, callback] = Array.from([{}, meta])
}
if (docName.match(/\//)) {
return typeof callback === 'function'
? callback('Invalid document name')
: undefined
}
if (docs[docName]) {
return typeof callback === 'function'
? callback('Document already exists')
: undefined
}
if (typeof type === 'string') {
type = types[type]
}
if (!type) {
return typeof callback === 'function'
? callback('Type not found')
: undefined
}
const data = {
snapshot: type.create(),
type: type.name,
meta: meta || {},
v: 0,
}
const done = function (error, dbMeta) {
// dbMeta can be used to cache extra state needed by the database to access the document, like an ID or something.
if (error) {
return typeof callback === 'function' ? callback(error) : undefined
}
// From here on we'll store the object version of the type name.
data.type = type
add(docName, null, data, 0, [], dbMeta)
model.emit('create', docName, data)
return typeof callback === 'function' ? callback() : undefined
}
if (db) {
return db.create(docName, data, done)
} else {
return done()
}
}
// Perminantly deletes the specified document.
// If listeners are attached, they are removed.
//
// The callback is called with (error) if there was an error. If error is null / undefined, the
// document was deleted.
//
// WARNING: This isn't well supported throughout the code. (Eg, streaming clients aren't told about the
// deletion. Subsequent op submissions will fail).
this.delete = function (docName, callback) {
const doc = docs[docName]
if (doc) {
clearTimeout(doc.reapTimer)
delete docs[docName]
}
const done = function (error) {
if (!error) {
model.emit('delete', docName)
}
return typeof callback === 'function' ? callback(error) : undefined
}
if (db) {
return db.delete(docName, doc != null ? doc.dbMeta : undefined, done)
} else {
return done(!doc ? 'Document does not exist' : undefined)
}
}
// This gets all operations from [start...end]. (That is, its not inclusive.)
//
// end can be null. This means 'get me all ops from start'.
//
// Each op returned is in the form {op:o, meta:m, v:version}.
//
// Callback is called with (error, [ops])
//
// If the document does not exist, getOps doesn't necessarily return an error. This is because
// its awkward to figure out whether or not the document exists for things
// like the redis database backend. I guess its a bit gross having this inconsistant
// with the other DB calls, but its certainly convenient.
//
// Use getVersion() to determine if a document actually exists, if thats what you're
// after.
this.getOps = getOps = function (docName, start, end, callback) {
// getOps will only use the op cache if its there. It won't fill the op cache in.
if (!(start >= 0)) {
throw new Error('start must be 0+')
}
if (typeof end === 'function') {
;[end, callback] = Array.from([null, end])
}
const ops = docs[docName] != null ? docs[docName].ops : undefined
if (ops) {
const version = docs[docName].v
// Ops contains an array of ops. The last op in the list is the last op applied
if (end == null) {
end = version
}
start = Math.min(start, end)
if (start === end) {
return callback(null, [])
}
// Base is the version number of the oldest op we have cached
const base = version - ops.length
// If the database is null, we'll trim to the ops we do have and hope thats enough.
if (start >= base || db === null) {
refreshReapingTimeout(docName)
if (options.stats != null) {
options.stats.cacheHit('getOps')
}
return callback(null, ops.slice(start - base, end - base))
}
}
if (options.stats != null) {
options.stats.cacheMiss('getOps')
}
return getOpsInternal(docName, start, end, callback)
}
// Gets the snapshot data for the specified document.
// getSnapshot(docName, callback)
// Callback is called with (error, {v: <version>, type: <type>, snapshot: <snapshot>, meta: <meta>})
this.getSnapshot = (docName, callback) =>
load(docName, (error, doc) =>
callback(
error,
doc
? { v: doc.v, type: doc.type, snapshot: doc.snapshot, meta: doc.meta }
: undefined
)
)
// Gets the latest version # of the document.
// getVersion(docName, callback)
// callback is called with (error, version).
this.getVersion = (docName, callback) =>
load(docName, (error, doc) =>
callback(error, doc != null ? doc.v : undefined)
)
// Apply an op to the specified document.
// The callback is passed (error, applied version #)
// opData = {op:op, v:v, meta:metadata}
//
// Ops are queued before being applied so that the following code applies op C before op B:
// model.applyOp 'doc', OPA, -> model.applyOp 'doc', OPB
// model.applyOp 'doc', OPC
this.applyOp = (
docName,
opData,
callback // All the logic for this is in makeOpQueue, above.
) =>
load(docName, function (error, doc) {
if (error) {
return callback(error)
}
return process.nextTick(() =>
doc.opQueue(opData, function (error, newVersion) {
refreshReapingTimeout(docName)
return typeof callback === 'function'
? callback(error, newVersion)
: undefined
})
)
})
// TODO: store (some) metadata in DB
// TODO: op and meta should be combineable in the op that gets sent
this.applyMetaOp = function (docName, metaOpData, callback) {
const { path, value } = metaOpData.meta
if (!isArray(path)) {
return typeof callback === 'function'
? callback('path should be an array')
: undefined
}
return load(docName, function (error, doc) {
if (error != null) {
return typeof callback === 'function' ? callback(error) : undefined
} else {
let applied = false
switch (path[0]) {
case 'shout':
doc.eventEmitter.emit('op', metaOpData)
applied = true
break
}
if (applied) {
model.emit('applyMetaOp', docName, path, value)
}
return typeof callback === 'function'
? callback(null, doc.v)
: undefined
}
})
}
// Listen to all ops from the specified version. If version is in the past, all
// ops since that version are sent immediately to the listener.
//
// The callback is called once the listener is attached, but before any ops have been passed
// to the listener.
//
// This will _not_ edit the document metadata.
//
// If there are any listeners, we don't purge the document from the cache. But be aware, this behaviour
// might change in a future version.
//
// version is the document version at which the document is opened. It can be left out if you want to open
// the document at the most recent version.
//
// listener is called with (opData) each time an op is applied.
//
// callback(error, openedVersion)
this.listen = function (docName, version, listener, callback) {
if (typeof version === 'function') {
;[version, listener, callback] = Array.from([null, version, listener])
}
return load(docName, function (error, doc) {
if (error) {
return typeof callback === 'function' ? callback(error) : undefined
}
clearTimeout(doc.reapTimer)
if (version != null) {
return getOps(docName, version, null, function (error, data) {
if (error) {
return typeof callback === 'function' ? callback(error) : undefined
}
doc.eventEmitter.on('op', listener)
if (typeof callback === 'function') {
callback(null, version)
}
return (() => {
const result = []
for (const op of Array.from(data)) {
let needle
listener(op)
// The listener may well remove itself during the catchup phase. If this happens, break early.
// This is done in a quite inefficient way. (O(n) where n = #listeners on doc)
if (
((needle = listener),
!Array.from(doc.eventEmitter.listeners('op')).includes(needle))
) {
break
} else {
result.push(undefined)
}
}
return result
})()
})
} else {
// Version is null / undefined. Just add the listener.
doc.eventEmitter.on('op', listener)
return typeof callback === 'function'
? callback(null, doc.v)
: undefined
}
})
}
// Remove a listener for a particular document.
//
// removeListener(docName, listener)
//
// This is synchronous.
this.removeListener = function (docName, listener) {
// The document should already be loaded.
const doc = docs[docName]
if (!doc) {
throw new Error('removeListener called but document not loaded')
}
doc.eventEmitter.removeListener('op', listener)
return refreshReapingTimeout(docName)
}
// Flush saves all snapshot data to the database. I'm not sure whether or not this is actually needed -
// sharejs will happily replay uncommitted ops when documents are re-opened anyway.
this.flush = function (callback) {
if (!db) {
return typeof callback === 'function' ? callback() : undefined
}
let pendingWrites = 0
for (const docName in docs) {
const doc = docs[docName]
if (doc.committedVersion < doc.v) {
pendingWrites++
// I'm hoping writeSnapshot will always happen in another thread.
tryWriteSnapshot(docName, () =>
process.nextTick(function () {
pendingWrites--
if (pendingWrites === 0) {
return typeof callback === 'function' ? callback() : undefined
}
})
)
}
}
// If nothing was queued, terminate immediately.
if (pendingWrites === 0) {
return typeof callback === 'function' ? callback() : undefined
}
}
// Close the database connection. This is needed so nodejs can shut down cleanly.
this.closeDb = function () {
__guardMethod__(db, 'close', o => o.close())
return (db = null)
}
}
// Model inherits from EventEmitter.
Model.prototype = new EventEmitter()
function __guardMethod__(obj, methodName, transform) {
if (
typeof obj !== 'undefined' &&
obj !== null &&
typeof obj[methodName] === 'function'
) {
return transform(obj, methodName)
} else {
return undefined
}
}

View File

@@ -0,0 +1,60 @@
// TODO: This file was created by bulk-decaffeinate.
// Sanity-check the conversion and remove this comment.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// A synchronous processing queue. The queue calls process on the arguments,
// ensuring that process() is only executing once at a time.
//
// process(data, callback) _MUST_ eventually call its callback.
//
// Example:
//
// queue = require 'syncqueue'
//
// fn = queue (data, callback) ->
// asyncthing data, ->
// callback(321)
//
// fn(1)
// fn(2)
// fn(3, (result) -> console.log(result))
//
// ^--- async thing will only be running once at any time.
module.exports = function (process) {
if (typeof process !== 'function') {
throw new Error('process is not a function')
}
const queue = []
const enqueue = function (data, callback) {
queue.push([data, callback])
return flush()
}
enqueue.busy = false
function flush() {
if (enqueue.busy || queue.length === 0) {
return
}
enqueue.busy = true
const [data, callback] = Array.from(queue.shift())
return process(data, function (...result) {
// TODO: Make this not use varargs - varargs are really slow.
enqueue.busy = false
// This is called after busy = false so a user can check if enqueue.busy is set in the callback.
if (callback) {
callback.apply(null, result)
}
return flush()
})
}
return enqueue
}

View File

@@ -0,0 +1,48 @@
This directory contains all the operational transform code. Each file defines a type.
Most of the types in here are for testing or demonstration. The only types which are sent to the webclient
are `text` and `json`.
# An OT type
All OT types have the following fields:
`name`: _(string)_ Name of the type. Should match the filename.
`create() -> snapshot`: Function which creates and returns a new document snapshot
`apply(snapshot, op) -> snapshot`: A function which creates a new document snapshot with the op applied
`transform(op1, op2, side) -> op1'`: OT transform function.
Given op1, op2, `apply(s, op2, transform(op1, op2, 'left')) == apply(s, op1, transform(op2, op1, 'right'))`.
Transform and apply must never modify their arguments.
Optional properties:
`tp2`: _(bool)_ True if the transform function supports TP2. This allows p2p architectures to work.
`compose(op1, op2) -> op`: Create and return a new op which has the same effect as op1 + op2.
`serialize(snapshot) -> JSON object`: Serialize a document to something we can JSON.stringify()
`deserialize(object) -> snapshot`: Deserialize a JSON object into the document's internal snapshot format
`prune(op1', op2, side) -> op1`: Inserse transform function. Only required for TP2 types.
`normalize(op) -> op`: Fix up an op to make it valid. Eg, remove skips of size zero.
`api`: _(object)_ Set of helper methods which will be mixed in to the client document object for manipulating documents. See below.
# Examples
`count` and `simple` are two trivial OT type definitions if you want to take a look. JSON defines
the ot-for-JSON type (see the wiki for documentation) and all the text types define different text
implementations. (I still have no idea which one I like the most, and they're fun to write!)
# API
Types can also define API functions. These methods are mixed into the client's Doc object when a document is created.
You can use them to help construct ops programatically (so users don't need to understand how ops are structured).
For example, the three text types defined here (text, text-composable and text-tp2) all provide the text API, supplying
`.insert()`, `.del()`, `.getLength` and `.getText` methods.
See text-api.coffee for an example.

View File

@@ -0,0 +1,37 @@
// TODO: This file was created by bulk-decaffeinate.
// Sanity-check the conversion and remove this comment.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// This is a simple type used for testing other OT code. Each op is [expectedSnapshot, increment]
exports.name = 'count'
exports.create = () => 1
exports.apply = function (snapshot, op) {
const [v, inc] = Array.from(op)
if (snapshot !== v) {
throw new Error(`Op ${v} != snapshot ${snapshot}`)
}
return snapshot + inc
}
// transform op1 by op2. Return transformed version of op1.
exports.transform = function (op1, op2) {
if (op1[0] !== op2[0]) {
throw new Error(`Op1 ${op1[0]} != op2 ${op2[0]}`)
}
return [op1[0] + op2[1], op1[1]]
}
exports.compose = function (op1, op2) {
if (op1[0] + op1[1] !== op2[0]) {
throw new Error(`Op1 ${op1} + 1 != op2 ${op2}`)
}
return [op1[0], op1[1] + op2[1]]
}
exports.generateRandomOp = doc => [[doc, 1], doc + 1]

View File

@@ -0,0 +1,116 @@
/* eslint-disable
no-return-assign,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// These methods let you build a transform function from a transformComponent function
// for OT types like text and JSON in which operations are lists of components
// and transforming them requires N^2 work.
// Add transform and transformX functions for an OT type which has transformComponent defined.
// transformComponent(destination array, component, other component, side)
let bootstrapTransform
exports._bt = bootstrapTransform = function (
type,
transformComponent,
checkValidOp,
append
) {
let transformX
const transformComponentX = function (left, right, destLeft, destRight) {
transformComponent(destLeft, left, right, 'left')
return transformComponent(destRight, right, left, 'right')
}
// Transforms rightOp by leftOp. Returns ['rightOp', clientOp']
type.transformX =
type.transformX =
transformX =
function (leftOp, rightOp) {
checkValidOp(leftOp)
checkValidOp(rightOp)
const newRightOp = []
for (let rightComponent of Array.from(rightOp)) {
// Generate newLeftOp by composing leftOp by rightComponent
const newLeftOp = []
let k = 0
while (k < leftOp.length) {
let l
const nextC = []
transformComponentX(leftOp[k], rightComponent, newLeftOp, nextC)
k++
if (nextC.length === 1) {
rightComponent = nextC[0]
} else if (nextC.length === 0) {
for (l of Array.from(leftOp.slice(k))) {
append(newLeftOp, l)
}
rightComponent = null
break
} else {
// Recurse.
const [l_, r_] = Array.from(transformX(leftOp.slice(k), nextC))
for (l of Array.from(l_)) {
append(newLeftOp, l)
}
for (const r of Array.from(r_)) {
append(newRightOp, r)
}
rightComponent = null
break
}
}
if (rightComponent != null) {
append(newRightOp, rightComponent)
}
leftOp = newLeftOp
}
return [leftOp, newRightOp]
}
// Transforms op with specified type ('left' or 'right') by otherOp.
return (type.transform = type.transform =
function (op, otherOp, type) {
let _
if (type !== 'left' && type !== 'right') {
throw new Error("type must be 'left' or 'right'")
}
if (otherOp.length === 0) {
return op
}
// TODO: Benchmark with and without this line. I _think_ it'll make a big difference...?
if (op.length === 1 && otherOp.length === 1) {
return transformComponent([], op[0], otherOp[0], type)
}
if (type === 'left') {
let left
;[left, _] = Array.from(transformX(op, otherOp))
return left
} else {
let right
;[_, right] = Array.from(transformX(otherOp, op))
return right
}
})
}
if (typeof WEB === 'undefined') {
exports.bootstrapTransform = bootstrapTransform
}

View File

@@ -0,0 +1,25 @@
// TODO: This file was created by bulk-decaffeinate.
// Sanity-check the conversion and remove this comment.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
const register = function (file) {
const type = require(file)
exports[type.name] = type
try {
return require(`${file}-api`)
} catch (error) {}
}
// Import all the built-in types.
register('./simple')
register('./count')
register('./text')
register('./text-composable')
register('./text-tp2')
register('./json')

View File

@@ -0,0 +1,356 @@
/* eslint-disable
no-undef,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// API for JSON OT
let json
if (typeof WEB === 'undefined') {
json = require('./json')
}
if (typeof WEB !== 'undefined' && WEB !== null) {
const { extendDoc } = exports
exports.extendDoc = function (name, fn) {
SubDoc.prototype[name] = fn
return extendDoc(name, fn)
}
}
const depath = function (path) {
if (path.length === 1 && path[0].constructor === Array) {
return path[0]
} else {
return path
}
}
class SubDoc {
constructor(doc, path) {
this.doc = doc
this.path = path
}
at(...path) {
return this.doc.at(this.path.concat(depath(path)))
}
get() {
return this.doc.getAt(this.path)
}
// for objects and lists
set(value, cb) {
return this.doc.setAt(this.path, value, cb)
}
// for strings and lists.
insert(pos, value, cb) {
return this.doc.insertAt(this.path, pos, value, cb)
}
// for strings
del(pos, length, cb) {
return this.doc.deleteTextAt(this.path, length, pos, cb)
}
// for objects and lists
remove(cb) {
return this.doc.removeAt(this.path, cb)
}
push(value, cb) {
return this.insert(this.get().length, value, cb)
}
move(from, to, cb) {
return this.doc.moveAt(this.path, from, to, cb)
}
add(amount, cb) {
return this.doc.addAt(this.path, amount, cb)
}
on(event, cb) {
return this.doc.addListener(this.path, event, cb)
}
removeListener(l) {
return this.doc.removeListener(l)
}
// text API compatibility
getLength() {
return this.get().length
}
getText() {
return this.get()
}
}
const traverse = function (snapshot, path) {
const container = { data: snapshot }
let key = 'data'
let elem = container
for (const p of Array.from(path)) {
elem = elem[key]
key = p
if (typeof elem === 'undefined') {
throw new Error('bad path')
}
}
return { elem, key }
}
const pathEquals = function (p1, p2) {
if (p1.length !== p2.length) {
return false
}
for (let i = 0; i < p1.length; i++) {
const e = p1[i]
if (e !== p2[i]) {
return false
}
}
return true
}
json.api = {
provides: { json: true },
at(...path) {
return new SubDoc(this, depath(path))
},
get() {
return this.snapshot
},
set(value, cb) {
return this.setAt([], value, cb)
},
getAt(path) {
const { elem, key } = traverse(this.snapshot, path)
return elem[key]
},
setAt(path, value, cb) {
const { elem, key } = traverse(this.snapshot, path)
const op = { p: path }
if (elem.constructor === Array) {
op.li = value
if (typeof elem[key] !== 'undefined') {
op.ld = elem[key]
}
} else if (typeof elem === 'object') {
op.oi = value
if (typeof elem[key] !== 'undefined') {
op.od = elem[key]
}
} else {
throw new Error('bad path')
}
return this.submitOp([op], cb)
},
removeAt(path, cb) {
const { elem, key } = traverse(this.snapshot, path)
if (typeof elem[key] === 'undefined') {
throw new Error('no element at that path')
}
const op = { p: path }
if (elem.constructor === Array) {
op.ld = elem[key]
} else if (typeof elem === 'object') {
op.od = elem[key]
} else {
throw new Error('bad path')
}
return this.submitOp([op], cb)
},
insertAt(path, pos, value, cb) {
const { elem, key } = traverse(this.snapshot, path)
const op = { p: path.concat(pos) }
if (elem[key].constructor === Array) {
op.li = value
} else if (typeof elem[key] === 'string') {
op.si = value
}
return this.submitOp([op], cb)
},
moveAt(path, from, to, cb) {
const op = [{ p: path.concat(from), lm: to }]
return this.submitOp(op, cb)
},
addAt(path, amount, cb) {
const op = [{ p: path, na: amount }]
return this.submitOp(op, cb)
},
deleteTextAt(path, length, pos, cb) {
const { elem, key } = traverse(this.snapshot, path)
const op = [{ p: path.concat(pos), sd: elem[key].slice(pos, pos + length) }]
return this.submitOp(op, cb)
},
addListener(path, event, cb) {
const l = { path, event, cb }
this._listeners.push(l)
return l
},
removeListener(l) {
const i = this._listeners.indexOf(l)
if (i < 0) {
return false
}
this._listeners.splice(i, 1)
return true
},
_register() {
this._listeners = []
this.on('change', function (op) {
return (() => {
const result = []
for (const c of Array.from(op)) {
let i
if (c.na !== undefined || c.si !== undefined || c.sd !== undefined) {
// no change to structure
continue
}
const toRemove = []
for (i = 0; i < this._listeners.length; i++) {
// Transform a dummy op by the incoming op to work out what
// should happen to the listener.
const l = this._listeners[i]
const dummy = { p: l.path, na: 0 }
const xformed = this.type.transformComponent([], dummy, c, 'left')
if (xformed.length === 0) {
// The op was transformed to noop, so we should delete the listener.
toRemove.push(i)
} else if (xformed.length === 1) {
// The op remained, so grab its new path into the listener.
l.path = xformed[0].p
} else {
throw new Error(
"Bad assumption in json-api: xforming an 'si' op will always result in 0 or 1 components."
)
}
}
toRemove.sort((a, b) => b - a)
result.push(
(() => {
const result1 = []
for (i of Array.from(toRemove)) {
result1.push(this._listeners.splice(i, 1))
}
return result1
})()
)
}
return result
})()
})
return this.on('remoteop', function (op) {
return (() => {
const result = []
for (const c of Array.from(op)) {
const matchPath =
c.na === undefined ? c.p.slice(0, c.p.length - 1) : c.p
result.push(
(() => {
const result1 = []
for (const { path, event, cb } of Array.from(this._listeners)) {
let common
if (pathEquals(path, matchPath)) {
switch (event) {
case 'insert':
if (c.li !== undefined && c.ld === undefined) {
result1.push(cb(c.p[c.p.length - 1], c.li))
} else if (c.oi !== undefined && c.od === undefined) {
result1.push(cb(c.p[c.p.length - 1], c.oi))
} else if (c.si !== undefined) {
result1.push(cb(c.p[c.p.length - 1], c.si))
} else {
result1.push(undefined)
}
break
case 'delete':
if (c.li === undefined && c.ld !== undefined) {
result1.push(cb(c.p[c.p.length - 1], c.ld))
} else if (c.oi === undefined && c.od !== undefined) {
result1.push(cb(c.p[c.p.length - 1], c.od))
} else if (c.sd !== undefined) {
result1.push(cb(c.p[c.p.length - 1], c.sd))
} else {
result1.push(undefined)
}
break
case 'replace':
if (c.li !== undefined && c.ld !== undefined) {
result1.push(cb(c.p[c.p.length - 1], c.ld, c.li))
} else if (c.oi !== undefined && c.od !== undefined) {
result1.push(cb(c.p[c.p.length - 1], c.od, c.oi))
} else {
result1.push(undefined)
}
break
case 'move':
if (c.lm !== undefined) {
result1.push(cb(c.p[c.p.length - 1], c.lm))
} else {
result1.push(undefined)
}
break
case 'add':
if (c.na !== undefined) {
result1.push(cb(c.na))
} else {
result1.push(undefined)
}
break
default:
result1.push(undefined)
}
} else if (
(common = this.type.commonPath(matchPath, path)) != null
) {
if (event === 'child op') {
if (
matchPath.length === path.length &&
path.length === common
) {
throw new Error(
"paths match length and have commonality, but aren't equal?"
)
}
const childPath = c.p.slice(common + 1)
result1.push(cb(childPath, c))
} else {
result1.push(undefined)
}
} else {
result1.push(undefined)
}
}
return result1
})()
)
}
return result
})()
})
},
}

View File

@@ -0,0 +1,630 @@
/* eslint-disable
no-return-assign,
no-undef,
no-useless-catch,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// This is the implementation of the JSON OT type.
//
// Spec is here: https://github.com/josephg/ShareJS/wiki/JSON-Operations
let text
if (typeof WEB !== 'undefined' && WEB !== null) {
;({ text } = exports.types)
} else {
text = require('./text')
}
const json = {}
json.name = 'json'
json.create = () => null
json.invertComponent = function (c) {
const c_ = { p: c.p }
if (c.si !== undefined) {
c_.sd = c.si
}
if (c.sd !== undefined) {
c_.si = c.sd
}
if (c.oi !== undefined) {
c_.od = c.oi
}
if (c.od !== undefined) {
c_.oi = c.od
}
if (c.li !== undefined) {
c_.ld = c.li
}
if (c.ld !== undefined) {
c_.li = c.ld
}
if (c.na !== undefined) {
c_.na = -c.na
}
if (c.lm !== undefined) {
c_.lm = c.p[c.p.length - 1]
c_.p = c.p.slice(0, c.p.length - 1).concat([c.lm])
}
return c_
}
json.invert = op =>
Array.from(op.slice().reverse()).map(c => json.invertComponent(c))
json.checkValidOp = function (op) {}
const isArray = o => Object.prototype.toString.call(o) === '[object Array]'
json.checkList = function (elem) {
if (!isArray(elem)) {
throw new Error('Referenced element not a list')
}
}
json.checkObj = function (elem) {
if (elem.constructor !== Object) {
throw new Error(
`Referenced element not an object (it was ${JSON.stringify(elem)})`
)
}
}
json.apply = function (snapshot, op) {
json.checkValidOp(op)
op = clone(op)
const container = { data: clone(snapshot) }
try {
for (let i = 0; i < op.length; i++) {
const c = op[i]
let parent = null
let parentkey = null
let elem = container
let key = 'data'
for (const p of Array.from(c.p)) {
parent = elem
parentkey = key
elem = elem[key]
key = p
if (parent == null) {
throw new Error('Path invalid')
}
}
if (c.na !== undefined) {
// Number add
if (typeof elem[key] !== 'number') {
throw new Error('Referenced element not a number')
}
elem[key] += c.na
} else if (c.si !== undefined) {
// String insert
if (typeof elem !== 'string') {
throw new Error(
`Referenced element not a string (it was ${JSON.stringify(elem)})`
)
}
parent[parentkey] = elem.slice(0, key) + c.si + elem.slice(key)
} else if (c.sd !== undefined) {
// String delete
if (typeof elem !== 'string') {
throw new Error('Referenced element not a string')
}
if (elem.slice(key, key + c.sd.length) !== c.sd) {
throw new Error('Deleted string does not match')
}
parent[parentkey] = elem.slice(0, key) + elem.slice(key + c.sd.length)
} else if (c.li !== undefined && c.ld !== undefined) {
// List replace
json.checkList(elem)
// Should check the list element matches c.ld
elem[key] = c.li
} else if (c.li !== undefined) {
// List insert
json.checkList(elem)
elem.splice(key, 0, c.li)
} else if (c.ld !== undefined) {
// List delete
json.checkList(elem)
// Should check the list element matches c.ld here too.
elem.splice(key, 1)
} else if (c.lm !== undefined) {
// List move
json.checkList(elem)
if (c.lm !== key) {
const e = elem[key]
// Remove it...
elem.splice(key, 1)
// And insert it back.
elem.splice(c.lm, 0, e)
}
} else if (c.oi !== undefined) {
// Object insert / replace
json.checkObj(elem)
// Should check that elem[key] == c.od
elem[key] = c.oi
} else if (c.od !== undefined) {
// Object delete
json.checkObj(elem)
// Should check that elem[key] == c.od
delete elem[key]
} else {
throw new Error('invalid / missing instruction in op')
}
}
} catch (error) {
// TODO: Roll back all already applied changes. Write tests before implementing this code.
throw error
}
return container.data
}
// Checks if two paths, p1 and p2 match.
json.pathMatches = function (p1, p2, ignoreLast) {
if (p1.length !== p2.length) {
return false
}
for (let i = 0; i < p1.length; i++) {
const p = p1[i]
if (p !== p2[i] && (!ignoreLast || i !== p1.length - 1)) {
return false
}
}
return true
}
json.append = function (dest, c) {
let last
c = clone(c)
if (
dest.length !== 0 &&
json.pathMatches(c.p, (last = dest[dest.length - 1]).p)
) {
if (last.na !== undefined && c.na !== undefined) {
return (dest[dest.length - 1] = { p: last.p, na: last.na + c.na })
} else if (
last.li !== undefined &&
c.li === undefined &&
c.ld === last.li
) {
// insert immediately followed by delete becomes a noop.
if (last.ld !== undefined) {
// leave the delete part of the replace
return delete last.li
} else {
return dest.pop()
}
} else if (
last.od !== undefined &&
last.oi === undefined &&
c.oi !== undefined &&
c.od === undefined
) {
return (last.oi = c.oi)
} else if (c.lm !== undefined && c.p[c.p.length - 1] === c.lm) {
return null // don't do anything
} else {
return dest.push(c)
}
} else {
return dest.push(c)
}
}
json.compose = function (op1, op2) {
json.checkValidOp(op1)
json.checkValidOp(op2)
const newOp = clone(op1)
for (const c of Array.from(op2)) {
json.append(newOp, c)
}
return newOp
}
json.normalize = function (op) {
const newOp = []
if (!isArray(op)) {
op = [op]
}
for (const c of Array.from(op)) {
if (c.p == null) {
c.p = []
}
json.append(newOp, c)
}
return newOp
}
// hax, copied from test/types/json. Apparently this is still the fastest way to deep clone an object, assuming
// we have browser support for JSON.
// http://jsperf.com/cloning-an-object/12
const clone = o => JSON.parse(JSON.stringify(o))
json.commonPath = function (p1, p2) {
p1 = p1.slice()
p2 = p2.slice()
p1.unshift('data')
p2.unshift('data')
p1 = p1.slice(0, p1.length - 1)
p2 = p2.slice(0, p2.length - 1)
if (p2.length === 0) {
return -1
}
let i = 0
while (p1[i] === p2[i] && i < p1.length) {
i++
if (i === p2.length) {
return i - 1
}
}
}
// transform c so it applies to a document with otherC applied.
json.transformComponent = function (dest, c, otherC, type) {
let oc
c = clone(c)
if (c.na !== undefined) {
c.p.push(0)
}
if (otherC.na !== undefined) {
otherC.p.push(0)
}
const common = json.commonPath(c.p, otherC.p)
const common2 = json.commonPath(otherC.p, c.p)
const cplength = c.p.length
const otherCplength = otherC.p.length
if (c.na !== undefined) {
c.p.pop()
} // hax
if (otherC.na !== undefined) {
otherC.p.pop()
}
if (otherC.na) {
if (
common2 != null &&
otherCplength >= cplength &&
otherC.p[common2] === c.p[common2]
) {
if (c.ld !== undefined) {
oc = clone(otherC)
oc.p = oc.p.slice(cplength)
c.ld = json.apply(clone(c.ld), [oc])
} else if (c.od !== undefined) {
oc = clone(otherC)
oc.p = oc.p.slice(cplength)
c.od = json.apply(clone(c.od), [oc])
}
}
json.append(dest, c)
return dest
}
if (
common2 != null &&
otherCplength > cplength &&
c.p[common2] === otherC.p[common2]
) {
// transform based on c
if (c.ld !== undefined) {
oc = clone(otherC)
oc.p = oc.p.slice(cplength)
c.ld = json.apply(clone(c.ld), [oc])
} else if (c.od !== undefined) {
oc = clone(otherC)
oc.p = oc.p.slice(cplength)
c.od = json.apply(clone(c.od), [oc])
}
}
if (common != null) {
let from, p, to
const commonOperand = cplength === otherCplength
// transform based on otherC
if (otherC.na !== undefined) {
// this case is handled above due to icky path hax
} else if (otherC.si !== undefined || otherC.sd !== undefined) {
// String op vs string op - pass through to text type
if (c.si !== undefined || c.sd !== undefined) {
if (!commonOperand) {
throw new Error('must be a string?')
}
// Convert an op component to a text op component
const convert = function (component) {
const newC = { p: component.p[component.p.length - 1] }
if (component.si) {
newC.i = component.si
} else {
newC.d = component.sd
}
return newC
}
const tc1 = convert(c)
const tc2 = convert(otherC)
const res = []
text._tc(res, tc1, tc2, type)
for (const tc of Array.from(res)) {
const jc = { p: c.p.slice(0, common) }
jc.p.push(tc.p)
if (tc.i != null) {
jc.si = tc.i
}
if (tc.d != null) {
jc.sd = tc.d
}
json.append(dest, jc)
}
return dest
}
} else if (otherC.li !== undefined && otherC.ld !== undefined) {
if (otherC.p[common] === c.p[common]) {
// noop
if (!commonOperand) {
// we're below the deleted element, so -> noop
return dest
} else if (c.ld !== undefined) {
// we're trying to delete the same element, -> noop
if (c.li !== undefined && type === 'left') {
// we're both replacing one element with another. only one can
// survive!
c.ld = clone(otherC.li)
} else {
return dest
}
}
}
} else if (otherC.li !== undefined) {
if (
c.li !== undefined &&
c.ld === undefined &&
commonOperand &&
c.p[common] === otherC.p[common]
) {
// in li vs. li, left wins.
if (type === 'right') {
c.p[common]++
}
} else if (otherC.p[common] <= c.p[common]) {
c.p[common]++
}
if (c.lm !== undefined) {
if (commonOperand) {
// otherC edits the same list we edit
if (otherC.p[common] <= c.lm) {
c.lm++
}
}
}
// changing c.from is handled above.
} else if (otherC.ld !== undefined) {
if (c.lm !== undefined) {
if (commonOperand) {
if (otherC.p[common] === c.p[common]) {
// they deleted the thing we're trying to move
return dest
}
// otherC edits the same list we edit
p = otherC.p[common]
from = c.p[common]
to = c.lm
if (p < to || (p === to && from < to)) {
c.lm--
}
}
}
if (otherC.p[common] < c.p[common]) {
c.p[common]--
} else if (otherC.p[common] === c.p[common]) {
if (otherCplength < cplength) {
// we're below the deleted element, so -> noop
return dest
} else if (c.ld !== undefined) {
if (c.li !== undefined) {
// we're replacing, they're deleting. we become an insert.
delete c.ld
} else {
// we're trying to delete the same element, -> noop
return dest
}
}
}
} else if (otherC.lm !== undefined) {
if (c.lm !== undefined && cplength === otherCplength) {
// lm vs lm, here we go!
from = c.p[common]
to = c.lm
const otherFrom = otherC.p[common]
const otherTo = otherC.lm
if (otherFrom !== otherTo) {
// if otherFrom == otherTo, we don't need to change our op.
// where did my thing go?
if (from === otherFrom) {
// they moved it! tie break.
if (type === 'left') {
c.p[common] = otherTo
if (from === to) {
// ugh
c.lm = otherTo
}
} else {
return dest
}
} else {
// they moved around it
if (from > otherFrom) {
c.p[common]--
}
if (from > otherTo) {
c.p[common]++
} else if (from === otherTo) {
if (otherFrom > otherTo) {
c.p[common]++
if (from === to) {
// ugh, again
c.lm++
}
}
}
// step 2: where am i going to put it?
if (to > otherFrom) {
c.lm--
} else if (to === otherFrom) {
if (to > from) {
c.lm--
}
}
if (to > otherTo) {
c.lm++
} else if (to === otherTo) {
// if we're both moving in the same direction, tie break
if (
(otherTo > otherFrom && to > from) ||
(otherTo < otherFrom && to < from)
) {
if (type === 'right') {
c.lm++
}
} else {
if (to > from) {
c.lm++
} else if (to === otherFrom) {
c.lm--
}
}
}
}
}
} else if (c.li !== undefined && c.ld === undefined && commonOperand) {
// li
from = otherC.p[common]
to = otherC.lm
p = c.p[common]
if (p > from) {
c.p[common]--
}
if (p > to) {
c.p[common]++
}
} else {
// ld, ld+li, si, sd, na, oi, od, oi+od, any li on an element beneath
// the lm
//
// i.e. things care about where their item is after the move.
from = otherC.p[common]
to = otherC.lm
p = c.p[common]
if (p === from) {
c.p[common] = to
} else {
if (p > from) {
c.p[common]--
}
if (p > to) {
c.p[common]++
} else if (p === to) {
if (from > to) {
c.p[common]++
}
}
}
}
} else if (otherC.oi !== undefined && otherC.od !== undefined) {
if (c.p[common] === otherC.p[common]) {
if (c.oi !== undefined && commonOperand) {
// we inserted where someone else replaced
if (type === 'right') {
// left wins
return dest
} else {
// we win, make our op replace what they inserted
c.od = otherC.oi
}
} else {
// -> noop if the other component is deleting the same object (or any
// parent)
return dest
}
}
} else if (otherC.oi !== undefined) {
if (c.oi !== undefined && c.p[common] === otherC.p[common]) {
// left wins if we try to insert at the same place
if (type === 'left') {
json.append(dest, { p: c.p, od: otherC.oi })
} else {
return dest
}
}
} else if (otherC.od !== undefined) {
if (c.p[common] === otherC.p[common]) {
if (!commonOperand) {
return dest
}
if (c.oi !== undefined) {
delete c.od
} else {
return dest
}
}
}
}
json.append(dest, c)
return dest
}
if (typeof WEB !== 'undefined' && WEB !== null) {
if (!exports.types) {
exports.types = {}
}
// This is kind of awful - come up with a better way to hook this helper code up.
exports._bt(json, json.transformComponent, json.checkValidOp, json.append)
// [] is used to prevent closure from renaming types.text
exports.types.json = json
} else {
module.exports = json
require('./helpers').bootstrapTransform(
json,
json.transformComponent,
json.checkValidOp,
json.append
)
}

View File

@@ -0,0 +1,882 @@
/* eslint-disable
no-console,
no-return-assign,
n/no-callback-literal,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS103: Rewrite code to no longer use __guard__
* DS104: Avoid inline assignments
* DS204: Change includes calls to have a more natural evaluation order
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// The model of all the ops. Responsible for applying & transforming remote deltas
// and managing the storage layer.
//
// Actual storage is handled by the database wrappers in db/*, wrapped by DocCache
let Model
const { EventEmitter } = require('node:events')
const queue = require('./syncqueue')
const types = require('../types')
const isArray = o => Object.prototype.toString.call(o) === '[object Array]'
// This constructor creates a new Model object. There will be one model object
// per server context.
//
// The model object is responsible for a lot of things:
//
// - It manages the interactions with the database
// - It maintains (in memory) a set of all active documents
// - It calls out to the OT functions when necessary
//
// The model is an event emitter. It emits the following events:
//
// create(docName, data): A document has been created with the specified name & data
module.exports = Model = function (db, options) {
// db can be null if the user doesn't want persistance.
let getOps
if (!(this instanceof Model)) {
return new Model(db, options)
}
const model = this
if (options == null) {
options = {}
}
// This is a cache of 'live' documents.
//
// The cache is a map from docName -> {
// ops:[{op, meta}]
// snapshot
// type
// v
// meta
// eventEmitter
// reapTimer
// committedVersion: v
// snapshotWriteLock: bool to make sure writeSnapshot isn't re-entrant
// dbMeta: database specific data
// opQueue: syncQueue for processing ops
// }
//
// The ops list contains the document's last options.numCachedOps ops. (Or all
// of them if we're using a memory store).
//
// Documents are stored in this set so long as the document has been accessed in
// the last few seconds (options.reapTime) OR at least one client has the document
// open. I don't know if I should keep open (but not being edited) documents live -
// maybe if a client has a document open but the document isn't being edited, I should
// flush it from the cache.
//
// In any case, the API to model is designed such that if we want to change that later
// it should be pretty easy to do so without any external-to-the-model code changes.
const docs = {}
// This is a map from docName -> [callback]. It is used when a document hasn't been
// cached and multiple getSnapshot() / getVersion() requests come in. All requests
// are added to the callback list and called when db.getSnapshot() returns.
//
// callback(error, snapshot data)
const awaitingGetSnapshot = {}
// The time that documents which no clients have open will stay in the cache.
// Should be > 0.
if (options.reapTime == null) {
options.reapTime = 3000
}
// The number of operations the cache holds before reusing the space
if (options.numCachedOps == null) {
options.numCachedOps = 10
}
// This option forces documents to be reaped, even when there's no database backend.
// This is useful when you don't care about persistance and don't want to gradually
// fill memory.
//
// You might want to set reapTime to a day or something.
if (options.forceReaping == null) {
options.forceReaping = false
}
// Until I come up with a better strategy, we'll save a copy of the document snapshot
// to the database every ~20 submitted ops.
if (options.opsBeforeCommit == null) {
options.opsBeforeCommit = 20
}
// It takes some processing time to transform client ops. The server will punt ops back to the
// client to transform if they're too old.
if (options.maximumAge == null) {
options.maximumAge = 40
}
// **** Cache API methods
// Its important that all ops are applied in order. This helper method creates the op submission queue
// for a single document. This contains the logic for transforming & applying ops.
const makeOpQueue = (docName, doc) =>
queue(function (opData, callback) {
if (!(opData.v >= 0)) {
return callback('Version missing')
}
if (opData.v > doc.v) {
return callback('Op at future version')
}
// Punt the transforming work back to the client if the op is too old.
if (opData.v + options.maximumAge < doc.v) {
return callback('Op too old')
}
if (!opData.meta) {
opData.meta = {}
}
opData.meta.ts = Date.now()
// We'll need to transform the op to the current version of the document. This
// calls the callback immediately if opVersion == doc.v.
return getOps(docName, opData.v, doc.v, function (error, ops) {
let snapshot
if (error) {
return callback(error)
}
if (doc.v - opData.v !== ops.length) {
// This should never happen. It indicates that we didn't get all the ops we
// asked for. Its important that the submitted op is correctly transformed.
console.error(
`Could not get old ops in model for document ${docName}`
)
console.error(
`Expected ops ${opData.v} to ${doc.v} and got ${ops.length} ops`
)
return callback('Internal error')
}
if (ops.length > 0) {
try {
// If there's enough ops, it might be worth spinning this out into a webworker thread.
for (const oldOp of Array.from(ops)) {
// Dup detection works by sending the id(s) the op has been submitted with previously.
// If the id matches, we reject it. The client can also detect the op has been submitted
// already if it sees its own previous id in the ops it sees when it does catchup.
if (
oldOp.meta.source &&
opData.dupIfSource &&
Array.from(opData.dupIfSource).includes(oldOp.meta.source)
) {
return callback('Op already submitted')
}
opData.op = doc.type.transform(opData.op, oldOp.op, 'left')
opData.v++
}
} catch (error1) {
error = error1
return callback(error.message)
}
}
try {
snapshot = doc.type.apply(doc.snapshot, opData.op)
} catch (error2) {
error = error2
return callback(error.message)
}
// The op data should be at the current version, and the new document data should be at
// the next version.
//
// This should never happen in practice, but its a nice little check to make sure everything
// is hunky-dory.
if (opData.v !== doc.v) {
// This should never happen.
console.error(
'Version mismatch detected in model. File a ticket - this is a bug.'
)
console.error(`Expecting ${opData.v} == ${doc.v}`)
return callback('Internal error')
}
// newDocData = {snapshot, type:type.name, v:opVersion + 1, meta:docData.meta}
const writeOp =
(db != null ? db.writeOp : undefined) ||
((docName, newOpData, callback) => callback())
return writeOp(docName, opData, function (error) {
if (error) {
// The user should probably know about this.
console.warn(`Error writing ops to database: ${error}`)
return callback(error)
}
__guardMethod__(options.stats, 'writeOp', o => o.writeOp())
// This is needed when we emit the 'change' event, below.
const oldSnapshot = doc.snapshot
// All the heavy lifting is now done. Finally, we'll update the cache with the new data
// and (maybe!) save a new document snapshot to the database.
doc.v = opData.v + 1
doc.snapshot = snapshot
doc.ops.push(opData)
if (db && doc.ops.length > options.numCachedOps) {
doc.ops.shift()
}
model.emit('applyOp', docName, opData, snapshot, oldSnapshot)
doc.eventEmitter.emit('op', opData, snapshot, oldSnapshot)
// The callback is called with the version of the document at which the op was applied.
// This is the op.v after transformation, and its doc.v - 1.
callback(null, opData.v)
// I need a decent strategy here for deciding whether or not to save the snapshot.
//
// The 'right' strategy looks something like "Store the snapshot whenever the snapshot
// is smaller than the accumulated op data". For now, I'll just store it every 20
// ops or something. (Configurable with doc.committedVersion)
if (
!doc.snapshotWriteLock &&
doc.committedVersion + options.opsBeforeCommit <= doc.v
) {
return tryWriteSnapshot(docName, function (error) {
if (error) {
return console.warn(
`Error writing snapshot ${error}. This is nonfatal`
)
}
})
}
})
})
})
// Add the data for the given docName to the cache. The named document shouldn't already
// exist in the doc set.
//
// Returns the new doc.
const add = function (docName, error, data, committedVersion, ops, dbMeta) {
let callback, doc
const callbacks = awaitingGetSnapshot[docName]
delete awaitingGetSnapshot[docName]
if (error) {
if (callbacks) {
for (callback of Array.from(callbacks)) {
callback(error)
}
}
} else {
doc = docs[docName] = {
snapshot: data.snapshot,
v: data.v,
type: data.type,
meta: data.meta,
// Cache of ops
ops: ops || [],
eventEmitter: new EventEmitter(),
// Timer before the document will be invalidated from the cache (if the document has no
// listeners)
reapTimer: null,
// Version of the snapshot thats in the database
committedVersion: committedVersion != null ? committedVersion : data.v,
snapshotWriteLock: false,
dbMeta,
}
doc.opQueue = makeOpQueue(docName, doc)
refreshReapingTimeout(docName)
model.emit('add', docName, data)
if (callbacks) {
for (callback of Array.from(callbacks)) {
callback(null, doc)
}
}
}
return doc
}
// This is a little helper wrapper around db.getOps. It does two things:
//
// - If there's no database set, it returns an error to the callback
// - It adds version numbers to each op returned from the database
// (These can be inferred from context so the DB doesn't store them, but its useful to have them).
const getOpsInternal = function (docName, start, end, callback) {
if (!db) {
return typeof callback === 'function'
? callback('Document does not exist')
: undefined
}
return db.getOps(docName, start, end, function (error, ops) {
if (error) {
return typeof callback === 'function' ? callback(error) : undefined
}
let v = start
for (const op of Array.from(ops)) {
op.v = v++
}
return typeof callback === 'function' ? callback(null, ops) : undefined
})
}
// Load the named document into the cache. This function is re-entrant.
//
// The callback is called with (error, doc)
const load = function (docName, callback) {
if (docs[docName]) {
// The document is already loaded. Return immediately.
__guardMethod__(options.stats, 'cacheHit', o => o.cacheHit('getSnapshot'))
return callback(null, docs[docName])
}
// We're a memory store. If we don't have it, nobody does.
if (!db) {
return callback('Document does not exist')
}
const callbacks = awaitingGetSnapshot[docName]
// The document is being loaded already. Add ourselves as a callback.
if (callbacks) {
return callbacks.push(callback)
}
__guardMethod__(options.stats, 'cacheMiss', o1 =>
o1.cacheMiss('getSnapshot')
)
// The document isn't loaded and isn't being loaded. Load it.
awaitingGetSnapshot[docName] = [callback]
return db.getSnapshot(docName, function (error, data, dbMeta) {
if (error) {
return add(docName, error)
}
const type = types[data.type]
if (!type) {
console.warn(`Type '${data.type}' missing`)
return callback('Type not found')
}
data.type = type
const committedVersion = data.v
// The server can close without saving the most recent document snapshot.
// In this case, there are extra ops which need to be applied before
// returning the snapshot.
return getOpsInternal(docName, data.v, null, function (error, ops) {
if (error) {
return callback(error)
}
if (ops.length > 0) {
console.log(`Catchup ${docName} ${data.v} -> ${data.v + ops.length}`)
try {
for (const op of Array.from(ops)) {
data.snapshot = type.apply(data.snapshot, op.op)
data.v++
}
} catch (e) {
// This should never happen - it indicates that whats in the
// database is invalid.
console.error(`Op data invalid for ${docName}: ${e.stack}`)
return callback('Op data invalid')
}
}
model.emit('load', docName, data)
return add(docName, error, data, committedVersion, ops, dbMeta)
})
})
}
// This makes sure the cache contains a document. If the doc cache doesn't contain
// a document, it is loaded from the database and stored.
//
// Documents are stored so long as either:
// - They have been accessed within the past #{PERIOD}
// - At least one client has the document open
function refreshReapingTimeout(docName) {
const doc = docs[docName]
if (!doc) {
return
}
// I want to let the clients list be updated before this is called.
return process.nextTick(function () {
// This is an awkward way to find out the number of clients on a document. If this
// causes performance issues, add a numClients field to the document.
//
// The first check is because its possible that between refreshReapingTimeout being called and this
// event being fired, someone called delete() on the document and hence the doc is something else now.
if (
doc === docs[docName] &&
doc.eventEmitter.listeners('op').length === 0 &&
(db || options.forceReaping) &&
doc.opQueue.busy === false
) {
let reapTimer
clearTimeout(doc.reapTimer)
return (doc.reapTimer = reapTimer =
setTimeout(
() =>
tryWriteSnapshot(docName, function () {
// If the reaping timeout has been refreshed while we're writing the snapshot, or if we're
// in the middle of applying an operation, don't reap.
if (
docs[docName].reapTimer === reapTimer &&
doc.opQueue.busy === false
) {
return delete docs[docName]
}
}),
options.reapTime
))
}
})
}
function tryWriteSnapshot(docName, callback) {
if (!db) {
return typeof callback === 'function' ? callback() : undefined
}
const doc = docs[docName]
// The doc is closed
if (!doc) {
return typeof callback === 'function' ? callback() : undefined
}
// The document is already saved.
if (doc.committedVersion === doc.v) {
return typeof callback === 'function' ? callback() : undefined
}
if (doc.snapshotWriteLock) {
return typeof callback === 'function'
? callback('Another snapshot write is in progress')
: undefined
}
doc.snapshotWriteLock = true
__guardMethod__(options.stats, 'writeSnapshot', o => o.writeSnapshot())
const writeSnapshot =
(db != null ? db.writeSnapshot : undefined) ||
((docName, docData, dbMeta, callback) => callback())
const data = {
v: doc.v,
meta: doc.meta,
snapshot: doc.snapshot,
// The database doesn't know about object types.
type: doc.type.name,
}
// Commit snapshot.
return writeSnapshot(docName, data, doc.dbMeta, function (error, dbMeta) {
doc.snapshotWriteLock = false
// We have to use data.v here because the version in the doc could
// have been updated between the call to writeSnapshot() and now.
doc.committedVersion = data.v
doc.dbMeta = dbMeta
return typeof callback === 'function' ? callback(error) : undefined
})
}
// *** Model interface methods
// Create a new document.
//
// data should be {snapshot, type, [meta]}. The version of a new document is 0.
this.create = function (docName, type, meta, callback) {
if (typeof meta === 'function') {
;[meta, callback] = Array.from([{}, meta])
}
if (docName.match(/\//)) {
return typeof callback === 'function'
? callback('Invalid document name')
: undefined
}
if (docs[docName]) {
return typeof callback === 'function'
? callback('Document already exists')
: undefined
}
if (typeof type === 'string') {
type = types[type]
}
if (!type) {
return typeof callback === 'function'
? callback('Type not found')
: undefined
}
const data = {
snapshot: type.create(),
type: type.name,
meta: meta || {},
v: 0,
}
const done = function (error, dbMeta) {
// dbMeta can be used to cache extra state needed by the database to access the document, like an ID or something.
if (error) {
return typeof callback === 'function' ? callback(error) : undefined
}
// From here on we'll store the object version of the type name.
data.type = type
add(docName, null, data, 0, [], dbMeta)
model.emit('create', docName, data)
return typeof callback === 'function' ? callback() : undefined
}
if (db) {
return db.create(docName, data, done)
} else {
return done()
}
}
// Perminantly deletes the specified document.
// If listeners are attached, they are removed.
//
// The callback is called with (error) if there was an error. If error is null / undefined, the
// document was deleted.
//
// WARNING: This isn't well supported throughout the code. (Eg, streaming clients aren't told about the
// deletion. Subsequent op submissions will fail).
this.delete = function (docName, callback) {
const doc = docs[docName]
if (doc) {
clearTimeout(doc.reapTimer)
delete docs[docName]
}
const done = function (error) {
if (!error) {
model.emit('delete', docName)
}
return typeof callback === 'function' ? callback(error) : undefined
}
if (db) {
return db.delete(docName, doc != null ? doc.dbMeta : undefined, done)
} else {
return done(!doc ? 'Document does not exist' : undefined)
}
}
// This gets all operations from [start...end]. (That is, its not inclusive.)
//
// end can be null. This means 'get me all ops from start'.
//
// Each op returned is in the form {op:o, meta:m, v:version}.
//
// Callback is called with (error, [ops])
//
// If the document does not exist, getOps doesn't necessarily return an error. This is because
// its awkward to figure out whether or not the document exists for things
// like the redis database backend. I guess its a bit gross having this inconsistant
// with the other DB calls, but its certainly convenient.
//
// Use getVersion() to determine if a document actually exists, if thats what you're
// after.
this.getOps = getOps = function (docName, start, end, callback) {
// getOps will only use the op cache if its there. It won't fill the op cache in.
if (!(start >= 0)) {
throw new Error('start must be 0+')
}
if (typeof end === 'function') {
;[end, callback] = Array.from([null, end])
}
const ops = docs[docName] != null ? docs[docName].ops : undefined
if (ops) {
const version = docs[docName].v
// Ops contains an array of ops. The last op in the list is the last op applied
if (end == null) {
end = version
}
start = Math.min(start, end)
if (start === end) {
return callback(null, [])
}
// Base is the version number of the oldest op we have cached
const base = version - ops.length
// If the database is null, we'll trim to the ops we do have and hope thats enough.
if (start >= base || db === null) {
refreshReapingTimeout(docName)
if (options.stats != null) {
options.stats.cacheHit('getOps')
}
return callback(null, ops.slice(start - base, end - base))
}
}
if (options.stats != null) {
options.stats.cacheMiss('getOps')
}
return getOpsInternal(docName, start, end, callback)
}
// Gets the snapshot data for the specified document.
// getSnapshot(docName, callback)
// Callback is called with (error, {v: <version>, type: <type>, snapshot: <snapshot>, meta: <meta>})
this.getSnapshot = (docName, callback) =>
load(docName, (error, doc) =>
callback(
error,
doc
? { v: doc.v, type: doc.type, snapshot: doc.snapshot, meta: doc.meta }
: undefined
)
)
// Gets the latest version # of the document.
// getVersion(docName, callback)
// callback is called with (error, version).
this.getVersion = (docName, callback) =>
load(docName, (error, doc) =>
callback(error, doc != null ? doc.v : undefined)
)
// Apply an op to the specified document.
// The callback is passed (error, applied version #)
// opData = {op:op, v:v, meta:metadata}
//
// Ops are queued before being applied so that the following code applies op C before op B:
// model.applyOp 'doc', OPA, -> model.applyOp 'doc', OPB
// model.applyOp 'doc', OPC
this.applyOp = (
docName,
opData,
callback // All the logic for this is in makeOpQueue, above.
) =>
load(docName, function (error, doc) {
if (error) {
return callback(error)
}
return process.nextTick(() =>
doc.opQueue(opData, function (error, newVersion) {
refreshReapingTimeout(docName)
return typeof callback === 'function'
? callback(error, newVersion)
: undefined
})
)
})
// TODO: store (some) metadata in DB
// TODO: op and meta should be combineable in the op that gets sent
this.applyMetaOp = function (docName, metaOpData, callback) {
const { path, value } = metaOpData.meta
if (!isArray(path)) {
return typeof callback === 'function'
? callback('path should be an array')
: undefined
}
return load(docName, function (error, doc) {
if (error != null) {
return typeof callback === 'function' ? callback(error) : undefined
} else {
let applied = false
switch (path[0]) {
case 'shout':
doc.eventEmitter.emit('op', metaOpData)
applied = true
break
}
if (applied) {
model.emit('applyMetaOp', docName, path, value)
}
return typeof callback === 'function'
? callback(null, doc.v)
: undefined
}
})
}
// Listen to all ops from the specified version. If version is in the past, all
// ops since that version are sent immediately to the listener.
//
// The callback is called once the listener is attached, but before any ops have been passed
// to the listener.
//
// This will _not_ edit the document metadata.
//
// If there are any listeners, we don't purge the document from the cache. But be aware, this behaviour
// might change in a future version.
//
// version is the document version at which the document is opened. It can be left out if you want to open
// the document at the most recent version.
//
// listener is called with (opData) each time an op is applied.
//
// callback(error, openedVersion)
this.listen = function (docName, version, listener, callback) {
if (typeof version === 'function') {
;[version, listener, callback] = Array.from([null, version, listener])
}
return load(docName, function (error, doc) {
if (error) {
return typeof callback === 'function' ? callback(error) : undefined
}
clearTimeout(doc.reapTimer)
if (version != null) {
return getOps(docName, version, null, function (error, data) {
if (error) {
return typeof callback === 'function' ? callback(error) : undefined
}
doc.eventEmitter.on('op', listener)
if (typeof callback === 'function') {
callback(null, version)
}
return (() => {
const result = []
for (const op of Array.from(data)) {
let needle
listener(op)
// The listener may well remove itself during the catchup phase. If this happens, break early.
// This is done in a quite inefficient way. (O(n) where n = #listeners on doc)
if (
((needle = listener),
!Array.from(doc.eventEmitter.listeners('op')).includes(needle))
) {
break
} else {
result.push(undefined)
}
}
return result
})()
})
} else {
// Version is null / undefined. Just add the listener.
doc.eventEmitter.on('op', listener)
return typeof callback === 'function'
? callback(null, doc.v)
: undefined
}
})
}
// Remove a listener for a particular document.
//
// removeListener(docName, listener)
//
// This is synchronous.
this.removeListener = function (docName, listener) {
// The document should already be loaded.
const doc = docs[docName]
if (!doc) {
throw new Error('removeListener called but document not loaded')
}
doc.eventEmitter.removeListener('op', listener)
return refreshReapingTimeout(docName)
}
// Flush saves all snapshot data to the database. I'm not sure whether or not this is actually needed -
// sharejs will happily replay uncommitted ops when documents are re-opened anyway.
this.flush = function (callback) {
if (!db) {
return typeof callback === 'function' ? callback() : undefined
}
let pendingWrites = 0
for (const docName in docs) {
const doc = docs[docName]
if (doc.committedVersion < doc.v) {
pendingWrites++
// I'm hoping writeSnapshot will always happen in another thread.
tryWriteSnapshot(docName, () =>
process.nextTick(function () {
pendingWrites--
if (pendingWrites === 0) {
return typeof callback === 'function' ? callback() : undefined
}
})
)
}
}
// If nothing was queued, terminate immediately.
if (pendingWrites === 0) {
return typeof callback === 'function' ? callback() : undefined
}
}
// Close the database connection. This is needed so nodejs can shut down cleanly.
this.closeDb = function () {
__guardMethod__(db, 'close', o => o.close())
return (db = null)
}
}
// Model inherits from EventEmitter.
Model.prototype = new EventEmitter()
function __guardMethod__(obj, methodName, transform) {
if (
typeof obj !== 'undefined' &&
obj !== null &&
typeof obj[methodName] === 'function'
) {
return transform(obj, methodName)
} else {
return undefined
}
}

View File

@@ -0,0 +1,54 @@
// TODO: This file was created by bulk-decaffeinate.
// Sanity-check the conversion and remove this comment.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// This is a really simple OT type. Its not compiled with the web client, but it could be.
//
// Its mostly included for demonstration purposes and its used in a lot of unit tests.
//
// This defines a really simple text OT type which only allows inserts. (No deletes).
//
// Ops look like:
// {position:#, text:"asdf"}
//
// Document snapshots look like:
// {str:string}
module.exports = {
// The name of the OT type. The type is stored in types[type.name]. The name can be
// used in place of the actual type in all the API methods.
name: 'simple',
// Create a new document snapshot
create() {
return { str: '' }
},
// Apply the given op to the document snapshot. Returns the new snapshot.
//
// The original snapshot should not be modified.
apply(snapshot, op) {
if (!(op.position >= 0 && op.position <= snapshot.str.length)) {
throw new Error('Invalid position')
}
let { str } = snapshot
str = str.slice(0, op.position) + op.text + str.slice(op.position)
return { str }
},
// transform op1 by op2. Return transformed version of op1.
// sym describes the symmetry of the op. Its 'left' or 'right' depending on whether the
// op being transformed comes from the client or the server.
transform(op1, op2, sym) {
let pos = op1.position
if (op2.position < pos || (op2.position === pos && sym === 'left')) {
pos += op2.text.length
}
return { position: pos, text: op1.text }
},
}

View File

@@ -0,0 +1,60 @@
// TODO: This file was created by bulk-decaffeinate.
// Sanity-check the conversion and remove this comment.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// A synchronous processing queue. The queue calls process on the arguments,
// ensuring that process() is only executing once at a time.
//
// process(data, callback) _MUST_ eventually call its callback.
//
// Example:
//
// queue = require 'syncqueue'
//
// fn = queue (data, callback) ->
// asyncthing data, ->
// callback(321)
//
// fn(1)
// fn(2)
// fn(3, (result) -> console.log(result))
//
// ^--- async thing will only be running once at any time.
module.exports = function (process) {
if (typeof process !== 'function') {
throw new Error('process is not a function')
}
const queue = []
const enqueue = function (data, callback) {
queue.push([data, callback])
return flush()
}
enqueue.busy = false
function flush() {
if (enqueue.busy || queue.length === 0) {
return
}
enqueue.busy = true
const [data, callback] = Array.from(queue.shift())
return process(data, function (...result) {
// TODO: Make this not use varargs - varargs are really slow.
enqueue.busy = false
// This is called after busy = false so a user can check if enqueue.busy is set in the callback.
if (callback) {
callback.apply(null, result)
}
return flush()
})
}
return enqueue
}

View File

@@ -0,0 +1,52 @@
// TODO: This file was created by bulk-decaffeinate.
// Sanity-check the conversion and remove this comment.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// Text document API for text
let text
if (typeof WEB === 'undefined') {
text = require('./text')
}
text.api = {
provides: { text: true },
// The number of characters in the string
getLength() {
return this.snapshot.length
},
// Get the text contents of a document
getText() {
return this.snapshot
},
insert(pos, text, callback) {
const op = [{ p: pos, i: text }]
this.submitOp(op, callback)
return op
},
del(pos, length, callback) {
const op = [{ p: pos, d: this.snapshot.slice(pos, pos + length) }]
this.submitOp(op, callback)
return op
},
_register() {
return this.on('remoteop', function (op) {
return Array.from(op).map(component =>
component.i !== undefined
? this.emit('insert', component.p, component.i)
: this.emit('delete', component.p, component.d)
)
})
},
}

View File

@@ -0,0 +1,76 @@
/* eslint-disable
no-undef,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// Text document API for text
let type
if (typeof WEB !== 'undefined' && WEB !== null) {
type = exports.types['text-composable']
} else {
type = require('./text-composable')
}
type.api = {
provides: { text: true },
// The number of characters in the string
getLength() {
return this.snapshot.length
},
// Get the text contents of a document
getText() {
return this.snapshot
},
insert(pos, text, callback) {
const op = type.normalize([pos, { i: text }, this.snapshot.length - pos])
this.submitOp(op, callback)
return op
},
del(pos, length, callback) {
const op = type.normalize([
pos,
{ d: this.snapshot.slice(pos, pos + length) },
this.snapshot.length - pos - length,
])
this.submitOp(op, callback)
return op
},
_register() {
return this.on('remoteop', function (op) {
let pos = 0
return (() => {
const result = []
for (const component of Array.from(op)) {
if (typeof component === 'number') {
result.push((pos += component))
} else if (component.i !== undefined) {
this.emit('insert', pos, component.i)
result.push((pos += component.i.length))
} else {
// delete
result.push(this.emit('delete', pos, component.d))
}
}
return result
})()
})
},
}
// We don't increment pos, because the position
// specified is after the delete has happened.

View File

@@ -0,0 +1,400 @@
/* eslint-disable
no-cond-assign,
no-return-assign,
no-undef,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// An alternate composable implementation for text. This is much closer
// to the implementation used by google wave.
//
// Ops are lists of components which iterate over the whole document.
// Components are either:
// A number N: Skip N characters in the original document
// {i:'str'}: Insert 'str' at the current position in the document
// {d:'str'}: Delete 'str', which appears at the current position in the document
//
// Eg: [3, {i:'hi'}, 5, {d:'internet'}]
//
// Snapshots are strings.
let makeAppend
const p = function () {} // require('util').debug
const i = function () {} // require('util').inspect
const moduleExport =
typeof WEB !== 'undefined' && WEB !== null ? {} : module.exports
moduleExport.name = 'text-composable'
moduleExport.create = () => ''
// -------- Utility methods
const checkOp = function (op) {
if (!Array.isArray(op)) {
throw new Error('Op must be an array of components')
}
let last = null
return (() => {
const result = []
for (const c of Array.from(op)) {
if (typeof c === 'object') {
if (
(c.i == null || !(c.i.length > 0)) &&
(c.d == null || !(c.d.length > 0))
) {
throw new Error(`Invalid op component: ${i(c)}`)
}
} else {
if (typeof c !== 'number') {
throw new Error('Op components must be objects or numbers')
}
if (!(c > 0)) {
throw new Error('Skip components must be a positive number')
}
if (typeof last === 'number') {
throw new Error('Adjacent skip components should be added')
}
}
result.push((last = c))
}
return result
})()
}
// Makes a function for appending components to a given op.
// Exported for the randomOpGenerator.
moduleExport._makeAppend = makeAppend = op =>
function (component) {
if (component === 0 || component.i === '' || component.d === '') {
return
}
if (op.length === 0) {
return op.push(component)
} else if (
typeof component === 'number' &&
typeof op[op.length - 1] === 'number'
) {
return (op[op.length - 1] += component)
} else if (component.i != null && op[op.length - 1].i != null) {
return (op[op.length - 1].i += component.i)
} else if (component.d != null && op[op.length - 1].d != null) {
return (op[op.length - 1].d += component.d)
} else {
return op.push(component)
}
}
// checkOp op
// Makes 2 functions for taking components from the start of an op, and for peeking
// at the next op that could be taken.
const makeTake = function (op) {
// The index of the next component to take
let idx = 0
// The offset into the component
let offset = 0
// Take up to length n from the front of op. If n is null, take the next
// op component. If indivisableField == 'd', delete components won't be separated.
// If indivisableField == 'i', insert components won't be separated.
const take = function (n, indivisableField) {
let c
if (idx === op.length) {
return null
}
// assert.notStrictEqual op.length, i, 'The op is too short to traverse the document'
if (typeof op[idx] === 'number') {
if (n == null || op[idx] - offset <= n) {
c = op[idx] - offset
++idx
offset = 0
return c
} else {
offset += n
return n
}
} else {
// Take from the string
const field = op[idx].i ? 'i' : 'd'
c = {}
if (
n == null ||
op[idx][field].length - offset <= n ||
field === indivisableField
) {
c[field] = op[idx][field].slice(offset)
++idx
offset = 0
} else {
c[field] = op[idx][field].slice(offset, offset + n)
offset += n
}
return c
}
}
const peekType = () => op[idx]
return [take, peekType]
}
// Find and return the length of an op component
const componentLength = function (component) {
if (typeof component === 'number') {
return component
} else if (component.i != null) {
return component.i.length
} else {
return component.d.length
}
}
// Normalize an op, removing all empty skips and empty inserts / deletes. Concatenate
// adjacent inserts and deletes.
moduleExport.normalize = function (op) {
const newOp = []
const append = makeAppend(newOp)
for (const component of Array.from(op)) {
append(component)
}
return newOp
}
// Apply the op to the string. Returns the new string.
moduleExport.apply = function (str, op) {
p(`Applying ${i(op)} to '${str}'`)
if (typeof str !== 'string') {
throw new Error('Snapshot should be a string')
}
checkOp(op)
const pos = 0
const newDoc = []
for (const component of Array.from(op)) {
if (typeof component === 'number') {
if (component > str.length) {
throw new Error('The op is too long for this document')
}
newDoc.push(str.slice(0, component))
str = str.slice(component)
} else if (component.i != null) {
newDoc.push(component.i)
} else {
if (component.d !== str.slice(0, component.d.length)) {
throw new Error(
`The deleted text '${
component.d
}' doesn't match the next characters in the document '${str.slice(
0,
component.d.length
)}'`
)
}
str = str.slice(component.d.length)
}
}
if (str !== '') {
throw new Error("The applied op doesn't traverse the entire document")
}
return newDoc.join('')
}
// transform op1 by op2. Return transformed version of op1.
// op1 and op2 are unchanged by transform.
moduleExport.transform = function (op, otherOp, side) {
if (side !== 'left' && side !== 'right') {
throw new Error(`side (${side} must be 'left' or 'right'`)
}
checkOp(op)
checkOp(otherOp)
const newOp = []
const append = makeAppend(newOp)
const [take, peek] = Array.from(makeTake(op))
for (component of Array.from(otherOp)) {
let chunk, length
if (typeof component === 'number') {
// Skip
length = component
while (length > 0) {
chunk = take(length, 'i')
if (chunk === null) {
throw new Error(
'The op traverses more elements than the document has'
)
}
append(chunk)
if (typeof chunk !== 'object' || chunk.i == null) {
length -= componentLength(chunk)
}
}
} else if (component.i != null) {
// Insert
if (side === 'left') {
// The left insert should go first.
const o = peek()
if (o != null ? o.i : undefined) {
append(take())
}
}
// Otherwise, skip the inserted text.
append(component.i.length)
} else {
// Delete.
// assert.ok component.d
;({ length } = component.d)
while (length > 0) {
chunk = take(length, 'i')
if (chunk === null) {
throw new Error(
'The op traverses more elements than the document has'
)
}
if (typeof chunk === 'number') {
length -= chunk
} else if (chunk.i != null) {
append(chunk)
} else {
// assert.ok chunk.d
// The delete is unnecessary now.
length -= chunk.d.length
}
}
}
}
// Append extras from op1
while ((component = take())) {
if ((component != null ? component.i : undefined) == null) {
throw new Error(`Remaining fragments in the op: ${i(component)}`)
}
append(component)
}
return newOp
}
// Compose 2 ops into 1 op.
moduleExport.compose = function (op1, op2) {
p(`COMPOSE ${i(op1)} + ${i(op2)}`)
checkOp(op1)
checkOp(op2)
const result = []
const append = makeAppend(result)
const [take, _] = Array.from(makeTake(op1))
for (component of Array.from(op2)) {
let chunk, length
if (typeof component === 'number') {
// Skip
length = component
while (length > 0) {
chunk = take(length, 'd')
if (chunk === null) {
throw new Error(
'The op traverses more elements than the document has'
)
}
append(chunk)
if (typeof chunk !== 'object' || chunk.d == null) {
length -= componentLength(chunk)
}
}
} else if (component.i != null) {
// Insert
append({ i: component.i })
} else {
// Delete
let offset = 0
while (offset < component.d.length) {
chunk = take(component.d.length - offset, 'd')
if (chunk === null) {
throw new Error(
'The op traverses more elements than the document has'
)
}
// If its delete, append it. If its skip, drop it and decrease length. If its insert, check the strings match, drop it and decrease length.
if (typeof chunk === 'number') {
append({ d: component.d.slice(offset, offset + chunk) })
offset += chunk
} else if (chunk.i != null) {
if (component.d.slice(offset, offset + chunk.i.length) !== chunk.i) {
throw new Error("The deleted text doesn't match the inserted text")
}
offset += chunk.i.length
// The ops cancel each other out.
} else {
// Delete
append(chunk)
}
}
}
}
// Append extras from op1
while ((component = take())) {
if ((component != null ? component.d : undefined) == null) {
throw new Error(`Trailing stuff in op1 ${i(component)}`)
}
append(component)
}
return result
}
const invertComponent = function (c) {
if (typeof c === 'number') {
return c
} else if (c.i != null) {
return { d: c.i }
} else {
return { i: c.d }
}
}
// Invert an op
moduleExport.invert = function (op) {
const result = []
const append = makeAppend(result)
for (const component of Array.from(op)) {
append(invertComponent(component))
}
return result
}
if (typeof window !== 'undefined' && window !== null) {
if (!window.ot) {
window.ot = {}
}
if (!window.ot.types) {
window.ot.types = {}
}
window.ot.types.text = moduleExport
}

View File

@@ -0,0 +1,133 @@
/* eslint-disable
no-undef,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// Text document API for text-tp2
let type
if (typeof WEB !== 'undefined' && WEB !== null) {
type = exports.types['text-tp2']
} else {
type = require('./text-tp2')
}
const { _takeDoc: takeDoc, _append: append } = type
const appendSkipChars = (op, doc, pos, maxlength) =>
(() => {
const result = []
while (
(maxlength === undefined || maxlength > 0) &&
pos.index < doc.data.length
) {
const part = takeDoc(doc, pos, maxlength, true)
if (maxlength !== undefined && typeof part === 'string') {
maxlength -= part.length
}
result.push(append(op, part.length || part))
}
return result
})()
type.api = {
provides: { text: true },
// The number of characters in the string
getLength() {
return this.snapshot.charLength
},
// Flatten a document into a string
getText() {
const strings = Array.from(this.snapshot.data).filter(
elem => typeof elem === 'string'
)
return strings.join('')
},
insert(pos, text, callback) {
if (pos === undefined) {
pos = 0
}
const op = []
const docPos = { index: 0, offset: 0 }
appendSkipChars(op, this.snapshot, docPos, pos)
append(op, { i: text })
appendSkipChars(op, this.snapshot, docPos)
this.submitOp(op, callback)
return op
},
del(pos, length, callback) {
const op = []
const docPos = { index: 0, offset: 0 }
appendSkipChars(op, this.snapshot, docPos, pos)
while (length > 0) {
const part = takeDoc(this.snapshot, docPos, length, true)
if (typeof part === 'string') {
append(op, { d: part.length })
length -= part.length
} else {
append(op, part)
}
}
appendSkipChars(op, this.snapshot, docPos)
this.submitOp(op, callback)
return op
},
_register() {
// Interpret recieved ops + generate more detailed events for them
return this.on('remoteop', function (op, snapshot) {
let textPos = 0
const docPos = { index: 0, offset: 0 }
for (const component of Array.from(op)) {
let part, remainder
if (typeof component === 'number') {
// Skip
remainder = component
while (remainder > 0) {
part = takeDoc(snapshot, docPos, remainder)
if (typeof part === 'string') {
textPos += part.length
}
remainder -= part.length || part
}
} else if (component.i !== undefined) {
// Insert
if (typeof component.i === 'string') {
this.emit('insert', textPos, component.i)
textPos += component.i.length
}
} else {
// Delete
remainder = component.d
while (remainder > 0) {
part = takeDoc(snapshot, docPos, remainder)
if (typeof part === 'string') {
this.emit('delete', textPos, part)
}
remainder -= part.length || part
}
}
}
})
},
}

View File

@@ -0,0 +1,499 @@
/* eslint-disable
no-cond-assign,
no-return-assign,
no-undef,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS103: Rewrite code to no longer use __guard__
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// A TP2 implementation of text, following this spec:
// http://code.google.com/p/lightwave/source/browse/trunk/experimental/ot/README
//
// A document is made up of a string and a set of tombstones inserted throughout
// the string. For example, 'some ', (2 tombstones), 'string'.
//
// This is encoded in a document as: {s:'some string', t:[5, -2, 6]}
//
// Ops are lists of components which iterate over the whole document.
// Components are either:
// N: Skip N characters in the original document
// {i:'str'}: Insert 'str' at the current position in the document
// {i:N}: Insert N tombstones at the current position in the document
// {d:N}: Delete (tombstone) N characters at the current position in the document
//
// Eg: [3, {i:'hi'}, 5, {d:8}]
//
// Snapshots are lists with characters and tombstones. Characters are stored in strings
// and adjacent tombstones are flattened into numbers.
//
// Eg, the document: 'Hello .....world' ('.' denotes tombstoned (deleted) characters)
// would be represented by a document snapshot of ['Hello ', 5, 'world']
let append, appendDoc, takeDoc
const type = {
name: 'text-tp2',
tp2: true,
create() {
return { charLength: 0, totalLength: 0, positionCache: [], data: [] }
},
serialize(doc) {
if (!doc.data) {
throw new Error('invalid doc snapshot')
}
return doc.data
},
deserialize(data) {
const doc = type.create()
doc.data = data
for (const component of Array.from(data)) {
if (typeof component === 'string') {
doc.charLength += component.length
doc.totalLength += component.length
} else {
doc.totalLength += component
}
}
return doc
},
}
const checkOp = function (op) {
if (!Array.isArray(op)) {
throw new Error('Op must be an array of components')
}
let last = null
return (() => {
const result = []
for (const c of Array.from(op)) {
if (typeof c === 'object') {
if (c.i !== undefined) {
if (
(typeof c.i !== 'string' || !(c.i.length > 0)) &&
(typeof c.i !== 'number' || !(c.i > 0))
) {
throw new Error('Inserts must insert a string or a +ive number')
}
} else if (c.d !== undefined) {
if (typeof c.d !== 'number' || !(c.d > 0)) {
throw new Error('Deletes must be a +ive number')
}
} else {
throw new Error('Operation component must define .i or .d')
}
} else {
if (typeof c !== 'number') {
throw new Error('Op components must be objects or numbers')
}
if (!(c > 0)) {
throw new Error('Skip components must be a positive number')
}
if (typeof last === 'number') {
throw new Error('Adjacent skip components should be combined')
}
}
result.push((last = c))
}
return result
})()
}
// Take the next part from the specified position in a document snapshot.
// position = {index, offset}. It will be updated.
type._takeDoc = takeDoc = function (
doc,
position,
maxlength,
tombsIndivisible
) {
if (position.index >= doc.data.length) {
throw new Error('Operation goes past the end of the document')
}
const part = doc.data[position.index]
// peel off data[0]
const result =
typeof part === 'string'
? maxlength !== undefined
? part.slice(position.offset, position.offset + maxlength)
: part.slice(position.offset)
: maxlength === undefined || tombsIndivisible
? part - position.offset
: Math.min(maxlength, part - position.offset)
const resultLen = result.length || result
if ((part.length || part) - position.offset > resultLen) {
position.offset += resultLen
} else {
position.index++
position.offset = 0
}
return result
}
// Append a part to the end of a document
type._appendDoc = appendDoc = function (doc, p) {
if (p === 0 || p === '') {
return
}
if (typeof p === 'string') {
doc.charLength += p.length
doc.totalLength += p.length
} else {
doc.totalLength += p
}
const { data } = doc
if (data.length === 0) {
data.push(p)
} else if (typeof data[data.length - 1] === typeof p) {
data[data.length - 1] += p
} else {
data.push(p)
}
}
// Apply the op to the document. The document is not modified in the process.
type.apply = function (doc, op) {
if (
doc.totalLength === undefined ||
doc.charLength === undefined ||
doc.data.length === undefined
) {
throw new Error('Snapshot is invalid')
}
checkOp(op)
const newDoc = type.create()
const position = { index: 0, offset: 0 }
for (const component of Array.from(op)) {
let part, remainder
if (typeof component === 'number') {
remainder = component
while (remainder > 0) {
part = takeDoc(doc, position, remainder)
appendDoc(newDoc, part)
remainder -= part.length || part
}
} else if (component.i !== undefined) {
appendDoc(newDoc, component.i)
} else if (component.d !== undefined) {
remainder = component.d
while (remainder > 0) {
part = takeDoc(doc, position, remainder)
remainder -= part.length || part
}
appendDoc(newDoc, component.d)
}
}
return newDoc
}
// Append an op component to the end of the specified op.
// Exported for the randomOpGenerator.
type._append = append = function (op, component) {
if (
component === 0 ||
component.i === '' ||
component.i === 0 ||
component.d === 0
) {
return
}
if (op.length === 0) {
return op.push(component)
} else {
const last = op[op.length - 1]
if (typeof component === 'number' && typeof last === 'number') {
return (op[op.length - 1] += component)
} else if (
component.i !== undefined &&
last.i != null &&
typeof last.i === typeof component.i
) {
return (last.i += component.i)
} else if (component.d !== undefined && last.d != null) {
return (last.d += component.d)
} else {
return op.push(component)
}
}
}
// Makes 2 functions for taking components from the start of an op, and for peeking
// at the next op that could be taken.
const makeTake = function (op) {
// The index of the next component to take
let index = 0
// The offset into the component
let offset = 0
// Take up to length maxlength from the op. If maxlength is not defined, there is no max.
// If insertsIndivisible is true, inserts (& insert tombstones) won't be separated.
//
// Returns null when op is fully consumed.
const take = function (maxlength, insertsIndivisible) {
let current
if (index === op.length) {
return null
}
const e = op[index]
if (
typeof (current = e) === 'number' ||
typeof (current = e.i) === 'number' ||
(current = e.d) !== undefined
) {
let c
if (
maxlength == null ||
current - offset <= maxlength ||
(insertsIndivisible && e.i !== undefined)
) {
// Return the rest of the current element.
c = current - offset
++index
offset = 0
} else {
offset += maxlength
c = maxlength
}
if (e.i !== undefined) {
return { i: c }
} else if (e.d !== undefined) {
return { d: c }
} else {
return c
}
} else {
// Take from the inserted string
let result
if (
maxlength == null ||
e.i.length - offset <= maxlength ||
insertsIndivisible
) {
result = { i: e.i.slice(offset) }
++index
offset = 0
} else {
result = { i: e.i.slice(offset, offset + maxlength) }
offset += maxlength
}
return result
}
}
const peekType = () => op[index]
return [take, peekType]
}
// Find and return the length of an op component
const componentLength = function (component) {
if (typeof component === 'number') {
return component
} else if (typeof component.i === 'string') {
return component.i.length
} else {
// This should work because c.d and c.i must be +ive.
return component.d || component.i
}
}
// Normalize an op, removing all empty skips and empty inserts / deletes. Concatenate
// adjacent inserts and deletes.
type.normalize = function (op) {
const newOp = []
for (const component of Array.from(op)) {
append(newOp, component)
}
return newOp
}
// This is a helper method to transform and prune. goForwards is true for transform, false for prune.
const transformer = function (op, otherOp, goForwards, side) {
let component
checkOp(op)
checkOp(otherOp)
const newOp = []
const [take, peek] = Array.from(makeTake(op))
for (component of Array.from(otherOp)) {
let chunk
let length = componentLength(component)
if (component.i !== undefined) {
// Insert text or tombs
if (goForwards) {
// transform - insert skips over inserted parts
if (side === 'left') {
// The left insert should go first.
while (__guard__(peek(), x => x.i) !== undefined) {
append(newOp, take())
}
}
// In any case, skip the inserted text.
append(newOp, length)
} else {
// Prune. Remove skips for inserts.
while (length > 0) {
chunk = take(length, true)
if (chunk === null) {
throw new Error('The transformed op is invalid')
}
if (chunk.d !== undefined) {
throw new Error(
'The transformed op deletes locally inserted characters - it cannot be purged of the insert.'
)
}
if (typeof chunk === 'number') {
length -= chunk
} else {
append(newOp, chunk)
}
}
}
} else {
// Skip or delete
while (length > 0) {
chunk = take(length, true)
if (chunk === null) {
throw new Error(
'The op traverses more elements than the document has'
)
}
append(newOp, chunk)
if (!chunk.i) {
length -= componentLength(chunk)
}
}
}
}
// Append extras from op1
while ((component = take())) {
if (component.i === undefined) {
throw new Error(`Remaining fragments in the op: ${component}`)
}
append(newOp, component)
}
return newOp
}
// transform op1 by op2. Return transformed version of op1.
// op1 and op2 are unchanged by transform.
// side should be 'left' or 'right', depending on if op1.id <> op2.id. 'left' == client op.
type.transform = function (op, otherOp, side) {
if (side !== 'left' && side !== 'right') {
throw new Error(`side (${side}) should be 'left' or 'right'`)
}
return transformer(op, otherOp, true, side)
}
// Prune is the inverse of transform.
type.prune = (op, otherOp) => transformer(op, otherOp, false)
// Compose 2 ops into 1 op.
type.compose = function (op1, op2) {
let component
if (op1 === null || op1 === undefined) {
return op2
}
checkOp(op1)
checkOp(op2)
const result = []
const [take, _] = Array.from(makeTake(op1))
for (component of Array.from(op2)) {
let chunk, length
if (typeof component === 'number') {
// Skip
// Just copy from op1.
length = component
while (length > 0) {
chunk = take(length)
if (chunk === null) {
throw new Error(
'The op traverses more elements than the document has'
)
}
append(result, chunk)
length -= componentLength(chunk)
}
} else if (component.i !== undefined) {
// Insert
append(result, { i: component.i })
} else {
// Delete
length = component.d
while (length > 0) {
chunk = take(length)
if (chunk === null) {
throw new Error(
'The op traverses more elements than the document has'
)
}
const chunkLength = componentLength(chunk)
if (chunk.i !== undefined) {
append(result, { i: chunkLength })
} else {
append(result, { d: chunkLength })
}
length -= chunkLength
}
}
}
// Append extras from op1
while ((component = take())) {
if (component.i === undefined) {
throw new Error(`Remaining fragments in op1: ${component}`)
}
append(result, component)
}
return result
}
if (typeof WEB !== 'undefined' && WEB !== null) {
exports.types['text-tp2'] = type
} else {
module.exports = type
}
function __guard__(value, transform) {
return typeof value !== 'undefined' && value !== null
? transform(value)
: undefined
}

View File

@@ -0,0 +1,387 @@
/* eslint-disable
no-return-assign,
no-undef,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// A simple text implementation
//
// Operations are lists of components.
// Each component either inserts or deletes at a specified position in the document.
//
// Components are either:
// {i:'str', p:100}: Insert 'str' at position 100 in the document
// {d:'str', p:100}: Delete 'str' at position 100 in the document
//
// Components in an operation are executed sequentially, so the position of components
// assumes previous components have already executed.
//
// Eg: This op:
// [{i:'abc', p:0}]
// is equivalent to this op:
// [{i:'a', p:0}, {i:'b', p:1}, {i:'c', p:2}]
// NOTE: The global scope here is shared with other sharejs files when built with closure.
// Be careful what ends up in your namespace.
let append, transformComponent
const text = {}
text.name = 'text'
text.create = () => ''
const strInject = (s1, pos, s2) => s1.slice(0, pos) + s2 + s1.slice(pos)
const checkValidComponent = function (c) {
if (typeof c.p !== 'number') {
throw new Error('component missing position field')
}
const iType = typeof c.i
const dType = typeof c.d
const cType = typeof c.c
if (!((iType === 'string') ^ (dType === 'string') ^ (cType === 'string'))) {
throw new Error('component needs an i, d or c field')
}
if (!(c.p >= 0)) {
throw new Error('position cannot be negative')
}
}
const checkValidOp = function (op) {
for (const c of Array.from(op)) {
checkValidComponent(c)
}
return true
}
text.apply = function (snapshot, op) {
checkValidOp(op)
for (const component of Array.from(op)) {
if (component.i != null) {
snapshot = strInject(snapshot, component.p, component.i)
} else if (component.d != null) {
const deleted = snapshot.slice(
component.p,
component.p + component.d.length
)
if (component.d !== deleted) {
throw new Error(
`Delete component '${component.d}' does not match deleted text '${deleted}'`
)
}
snapshot =
snapshot.slice(0, component.p) +
snapshot.slice(component.p + component.d.length)
} else if (component.c != null) {
const comment = snapshot.slice(
component.p,
component.p + component.c.length
)
if (component.c !== comment) {
throw new Error(
`Comment component '${component.c}' does not match commented text '${comment}'`
)
}
} else {
throw new Error('Unknown op type')
}
}
return snapshot
}
// Exported for use by the random op generator.
//
// For simplicity, this version of append does not compress adjacent inserts and deletes of
// the same text. It would be nice to change that at some stage.
text._append = append = function (newOp, c) {
if (c.i === '' || c.d === '') {
return
}
if (newOp.length === 0) {
return newOp.push(c)
} else {
const last = newOp[newOp.length - 1]
// Compose the insert into the previous insert if possible
if (
last.i != null &&
c.i != null &&
last.p <= c.p &&
c.p <= last.p + last.i.length
) {
return (newOp[newOp.length - 1] = {
i: strInject(last.i, c.p - last.p, c.i),
p: last.p,
})
} else if (
last.d != null &&
c.d != null &&
c.p <= last.p &&
last.p <= c.p + c.d.length
) {
return (newOp[newOp.length - 1] = {
d: strInject(c.d, last.p - c.p, last.d),
p: c.p,
})
} else {
return newOp.push(c)
}
}
}
text.compose = function (op1, op2) {
checkValidOp(op1)
checkValidOp(op2)
const newOp = op1.slice()
for (const c of Array.from(op2)) {
append(newOp, c)
}
return newOp
}
// Attempt to compress the op components together 'as much as possible'.
// This implementation preserves order and preserves create/delete pairs.
text.compress = op => text.compose([], op)
text.normalize = function (op) {
const newOp = []
// Normalize should allow ops which are a single (unwrapped) component:
// {i:'asdf', p:23}.
// There's no good way to test if something is an array:
// http://perfectionkills.com/instanceof-considered-harmful-or-how-to-write-a-robust-isarray/
// so this is probably the least bad solution.
if (op.i != null || op.p != null) {
op = [op]
}
for (const c of Array.from(op)) {
if (c.p == null) {
c.p = 0
}
append(newOp, c)
}
return newOp
}
// This helper method transforms a position by an op component.
//
// If c is an insert, insertAfter specifies whether the transform
// is pushed after the insert (true) or before it (false).
//
// insertAfter is optional for deletes.
const transformPosition = function (pos, c, insertAfter) {
if (c.i != null) {
if (c.p < pos || (c.p === pos && insertAfter)) {
return pos + c.i.length
} else {
return pos
}
} else if (c.d != null) {
// I think this could also be written as: Math.min(c.p, Math.min(c.p - otherC.p, otherC.d.length))
// but I think its harder to read that way, and it compiles using ternary operators anyway
// so its no slower written like this.
if (pos <= c.p) {
return pos
} else if (pos <= c.p + c.d.length) {
return c.p
} else {
return pos - c.d.length
}
} else if (c.c != null) {
return pos
} else {
throw new Error('unknown op type')
}
}
// Helper method to transform a cursor position as a result of an op.
//
// Like transformPosition above, if c is an insert, insertAfter specifies whether the cursor position
// is pushed after an insert (true) or before it (false).
text.transformCursor = function (position, op, side) {
const insertAfter = side === 'right'
for (const c of Array.from(op)) {
position = transformPosition(position, c, insertAfter)
}
return position
}
// Transform an op component by another op component. Asymmetric.
// The result will be appended to destination.
//
// exported for use in JSON type
text._tc = transformComponent = function (dest, c, otherC, side) {
let cIntersect, intersectEnd, intersectStart, newC, otherIntersect
checkValidOp([c])
checkValidOp([otherC])
if (c.i != null) {
append(dest, {
i: c.i,
p: transformPosition(c.p, otherC, side === 'right'),
})
} else if (c.d != null) {
// Delete
if (otherC.i != null) {
// delete vs insert
let s = c.d
if (c.p < otherC.p) {
append(dest, { d: s.slice(0, otherC.p - c.p), p: c.p })
s = s.slice(otherC.p - c.p)
}
if (s !== '') {
append(dest, { d: s, p: c.p + otherC.i.length })
}
} else if (otherC.d != null) {
// Delete vs delete
if (c.p >= otherC.p + otherC.d.length) {
append(dest, { d: c.d, p: c.p - otherC.d.length })
} else if (c.p + c.d.length <= otherC.p) {
append(dest, c)
} else {
// They overlap somewhere.
newC = { d: '', p: c.p }
if (c.p < otherC.p) {
newC.d = c.d.slice(0, otherC.p - c.p)
}
if (c.p + c.d.length > otherC.p + otherC.d.length) {
newC.d += c.d.slice(otherC.p + otherC.d.length - c.p)
}
// This is entirely optional - just for a check that the deleted
// text in the two ops matches
intersectStart = Math.max(c.p, otherC.p)
intersectEnd = Math.min(c.p + c.d.length, otherC.p + otherC.d.length)
cIntersect = c.d.slice(intersectStart - c.p, intersectEnd - c.p)
otherIntersect = otherC.d.slice(
intersectStart - otherC.p,
intersectEnd - otherC.p
)
if (cIntersect !== otherIntersect) {
throw new Error(
'Delete ops delete different text in the same region of the document'
)
}
if (newC.d !== '') {
// This could be rewritten similarly to insert v delete, above.
newC.p = transformPosition(newC.p, otherC)
append(dest, newC)
}
}
} else if (otherC.c != null) {
append(dest, c)
} else {
throw new Error('unknown op type')
}
} else if (c.c != null) {
// Comment
if (otherC.i != null) {
if (c.p < otherC.p && otherC.p < c.p + c.c.length) {
const offset = otherC.p - c.p
const newC =
c.c.slice(0, +(offset - 1) + 1 || undefined) +
otherC.i +
c.c.slice(offset)
append(dest, { c: newC, p: c.p, t: c.t })
} else {
append(dest, {
c: c.c,
p: transformPosition(c.p, otherC, true),
t: c.t,
})
}
} else if (otherC.d != null) {
if (c.p >= otherC.p + otherC.d.length) {
append(dest, { c: c.c, p: c.p - otherC.d.length, t: c.t })
} else if (c.p + c.c.length <= otherC.p) {
append(dest, c)
} else {
// Delete overlaps comment
// They overlap somewhere.
newC = { c: '', p: c.p, t: c.t }
if (c.p < otherC.p) {
newC.c = c.c.slice(0, otherC.p - c.p)
}
if (c.p + c.c.length > otherC.p + otherC.d.length) {
newC.c += c.c.slice(otherC.p + otherC.d.length - c.p)
}
// This is entirely optional - just for a check that the deleted
// text in the two ops matches
intersectStart = Math.max(c.p, otherC.p)
intersectEnd = Math.min(c.p + c.c.length, otherC.p + otherC.d.length)
cIntersect = c.c.slice(intersectStart - c.p, intersectEnd - c.p)
otherIntersect = otherC.d.slice(
intersectStart - otherC.p,
intersectEnd - otherC.p
)
if (cIntersect !== otherIntersect) {
throw new Error(
'Delete ops delete different text in the same region of the document'
)
}
newC.p = transformPosition(newC.p, otherC)
append(dest, newC)
}
} else if (otherC.c != null) {
append(dest, c)
} else {
throw new Error('unknown op type')
}
}
return dest
}
const invertComponent = function (c) {
if (c.i != null) {
return { d: c.i, p: c.p }
} else {
return { i: c.d, p: c.p }
}
}
// No need to use append for invert, because the components won't be able to
// cancel with one another.
text.invert = op =>
Array.from(op.slice().reverse()).map(c => invertComponent(c))
if (typeof WEB !== 'undefined' && WEB !== null) {
if (!exports.types) {
exports.types = {}
}
// This is kind of awful - come up with a better way to hook this helper code up.
bootstrapTransform(text, transformComponent, checkValidOp, append)
// [] is used to prevent closure from renaming types.text
exports.types.text = text
} else {
module.exports = text
// The text type really shouldn't need this - it should be possible to define
// an efficient transform function by making a sort of transform map and passing each
// op component through it.
require('./helpers').bootstrapTransform(
text,
transformComponent,
checkValidOp,
append
)
}

View File

@@ -0,0 +1,14 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
// This is included at the top of each compiled type file for the web.
/**
@const
@type {boolean}
*/
const WEB = true
const exports = window.sharejs

View File

@@ -0,0 +1,136 @@
import {
TrackingPropsRawData,
ClearTrackingPropsRawData,
} from 'overleaf-editor-core/lib/types'
/**
* An update coming from the editor
*/
export type Update = {
doc: string
op: Op[]
v: number
meta?: {
tc?: boolean
user_id?: string
ts?: number
}
projectHistoryId?: string
}
export type Op = InsertOp | DeleteOp | CommentOp | RetainOp
export type InsertOp = {
i: string
p: number
u?: boolean
}
export type RetainOp = {
r: string
p: number
}
export type DeleteOp = {
d: string
p: number
u?: boolean
}
export type CommentOp = {
c: string
p: number
t: string
u?: boolean
// Used by project-history when restoring CommentSnapshots
resolved?: boolean
}
/**
* Ranges record on a document
*/
export type Ranges = {
comments?: Comment[]
changes?: TrackedChange[]
}
export type Comment = {
id: string
op: CommentOp
metadata?: {
user_id: string
ts: string
}
}
export type TrackedChange = {
id: string
op: InsertOp | DeleteOp
metadata: {
user_id: string
ts: string
}
}
/**
* Updates sent to project-history
*/
export type HistoryUpdate = {
op: HistoryOp[]
doc: string
v?: number
meta?: {
ts?: number
pathname?: string
doc_length?: number
history_doc_length?: number
doc_hash?: string
tc?: boolean
user_id?: string
}
projectHistoryId?: string
}
export type HistoryOp =
| HistoryInsertOp
| HistoryDeleteOp
| HistoryCommentOp
| HistoryRetainOp
export type HistoryInsertOp = InsertOp & {
commentIds?: string[]
hpos?: number
trackedDeleteRejection?: boolean
}
export type HistoryRetainOp = RetainOp & {
hpos?: number
tracking?: TrackingPropsRawData | ClearTrackingPropsRawData
}
export type HistoryDeleteOp = DeleteOp & {
hpos?: number
trackedChanges?: HistoryDeleteTrackedChange[]
}
export type HistoryDeleteTrackedChange = {
type: 'insert' | 'delete'
offset: number
length: number
}
export type HistoryCommentOp = CommentOp & {
hpos?: number
hlen?: number
}
export type HistoryRanges = {
comments?: HistoryComment[]
changes?: HistoryTrackedChange[]
}
export type HistoryComment = Comment & { op: HistoryCommentOp }
export type HistoryTrackedChange = TrackedChange & {
op: HistoryInsertOp | HistoryDeleteOp
}