first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1 @@
app/lib/*.js

8
services/project-history/.gitignore vendored Normal file
View File

@@ -0,0 +1,8 @@
**.swp
node_modules/
forever/
.config
.npm
# managed by dev-environment$ bin/update_build_scripts
.npmrc

View File

@@ -0,0 +1,3 @@
{
"require": "test/setup.js"
}

View File

@@ -0,0 +1 @@
20.18.2

View File

@@ -0,0 +1,27 @@
# This file was auto-generated, do not edit it directly.
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
FROM node:20.18.2 AS base
WORKDIR /overleaf/services/project-history
# Google Cloud Storage needs a writable $HOME/.config for resumable uploads
# (see https://googleapis.dev/nodejs/storage/latest/File.html#createWriteStream)
RUN mkdir /home/node/.config && chown node:node /home/node/.config
FROM base AS app
COPY package.json package-lock.json /overleaf/
COPY services/project-history/package.json /overleaf/services/project-history/
COPY libraries/ /overleaf/libraries/
COPY patches/ /overleaf/patches/
RUN cd /overleaf && npm ci --quiet
COPY services/project-history/ /overleaf/services/project-history/
FROM app
USER node
CMD ["node", "--expose-gc", "app.js"]

View File

@@ -0,0 +1,156 @@
# This file was auto-generated, do not edit it directly.
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
BUILD_NUMBER ?= local
BRANCH_NAME ?= $(shell git rev-parse --abbrev-ref HEAD)
PROJECT_NAME = project-history
BUILD_DIR_NAME = $(shell pwd | xargs basename | tr -cd '[a-zA-Z0-9_.\-]')
DOCKER_COMPOSE_FLAGS ?= -f docker-compose.yml
DOCKER_COMPOSE := BUILD_NUMBER=$(BUILD_NUMBER) \
BRANCH_NAME=$(BRANCH_NAME) \
PROJECT_NAME=$(PROJECT_NAME) \
MOCHA_GREP=${MOCHA_GREP} \
docker compose ${DOCKER_COMPOSE_FLAGS}
COMPOSE_PROJECT_NAME_TEST_ACCEPTANCE ?= test_acceptance_$(BUILD_DIR_NAME)
DOCKER_COMPOSE_TEST_ACCEPTANCE = \
COMPOSE_PROJECT_NAME=$(COMPOSE_PROJECT_NAME_TEST_ACCEPTANCE) $(DOCKER_COMPOSE)
COMPOSE_PROJECT_NAME_TEST_UNIT ?= test_unit_$(BUILD_DIR_NAME)
DOCKER_COMPOSE_TEST_UNIT = \
COMPOSE_PROJECT_NAME=$(COMPOSE_PROJECT_NAME_TEST_UNIT) $(DOCKER_COMPOSE)
clean:
-docker rmi ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
-docker rmi us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
-$(DOCKER_COMPOSE_TEST_UNIT) down --rmi local
-$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down --rmi local
HERE=$(shell pwd)
MONOREPO=$(shell cd ../../ && pwd)
# Run the linting commands in the scope of the monorepo.
# Eslint and prettier (plus some configs) are on the root.
RUN_LINTING = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(HERE) node:20.18.2 npm run --silent
RUN_LINTING_CI = docker run --rm --volume $(MONOREPO)/.editorconfig:/overleaf/.editorconfig --volume $(MONOREPO)/.eslintignore:/overleaf/.eslintignore --volume $(MONOREPO)/.eslintrc:/overleaf/.eslintrc --volume $(MONOREPO)/.prettierignore:/overleaf/.prettierignore --volume $(MONOREPO)/.prettierrc:/overleaf/.prettierrc --volume $(MONOREPO)/tsconfig.backend.json:/overleaf/tsconfig.backend.json --volume $(MONOREPO)/services/document-updater/app/js/types.ts:/overleaf/services/document-updater/app/js/types.ts ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) npm run --silent
# Same but from the top of the monorepo
RUN_LINTING_MONOREPO = docker run --rm -v $(MONOREPO):$(MONOREPO) -w $(MONOREPO) node:20.18.2 npm run --silent
SHELLCHECK_OPTS = \
--shell=bash \
--external-sources
SHELLCHECK_COLOR := $(if $(CI),--color=never,--color)
SHELLCHECK_FILES := { git ls-files "*.sh" -z; git grep -Plz "\A\#\!.*bash"; } | sort -zu
shellcheck:
@$(SHELLCHECK_FILES) | xargs -0 -r docker run --rm -v $(HERE):/mnt -w /mnt \
koalaman/shellcheck:stable $(SHELLCHECK_OPTS) $(SHELLCHECK_COLOR)
shellcheck_fix:
@$(SHELLCHECK_FILES) | while IFS= read -r -d '' file; do \
diff=$$(docker run --rm -v $(HERE):/mnt -w /mnt koalaman/shellcheck:stable $(SHELLCHECK_OPTS) --format=diff "$$file" 2>/dev/null); \
if [ -n "$$diff" ] && ! echo "$$diff" | patch -p1 >/dev/null 2>&1; then echo "\033[31m$$file\033[0m"; \
elif [ -n "$$diff" ]; then echo "$$file"; \
else echo "\033[2m$$file\033[0m"; fi \
done
format:
$(RUN_LINTING) format
format_ci:
$(RUN_LINTING_CI) format
format_fix:
$(RUN_LINTING) format:fix
lint:
$(RUN_LINTING) lint
lint_ci:
$(RUN_LINTING_CI) lint
lint_fix:
$(RUN_LINTING) lint:fix
typecheck:
$(RUN_LINTING) types:check
typecheck_ci:
$(RUN_LINTING_CI) types:check
test: format lint typecheck shellcheck test_unit test_acceptance
test_unit:
ifneq (,$(wildcard test/unit))
$(DOCKER_COMPOSE_TEST_UNIT) run --rm test_unit
$(MAKE) test_unit_clean
endif
test_clean: test_unit_clean
test_unit_clean:
ifneq (,$(wildcard test/unit))
$(DOCKER_COMPOSE_TEST_UNIT) down -v -t 0
endif
test_acceptance: test_acceptance_clean test_acceptance_pre_run test_acceptance_run
$(MAKE) test_acceptance_clean
test_acceptance_debug: test_acceptance_clean test_acceptance_pre_run test_acceptance_run_debug
$(MAKE) test_acceptance_clean
test_acceptance_run:
ifneq (,$(wildcard test/acceptance))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance
endif
test_acceptance_run_debug:
ifneq (,$(wildcard test/acceptance))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run -p 127.0.0.9:19999:19999 --rm test_acceptance npm run test:acceptance -- --inspect=0.0.0.0:19999 --inspect-brk
endif
test_clean: test_acceptance_clean
test_acceptance_clean:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) down -v -t 0
test_acceptance_pre_run:
ifneq (,$(wildcard test/acceptance/js/scripts/pre-run))
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance test/acceptance/js/scripts/pre-run
endif
benchmarks:
$(DOCKER_COMPOSE_TEST_ACCEPTANCE) run --rm test_acceptance npm run benchmarks
build:
docker build \
--pull \
--build-arg BUILDKIT_INLINE_CACHE=1 \
--tag ci/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) \
--tag us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER) \
--tag us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME) \
--cache-from us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):$(BRANCH_NAME) \
--cache-from us-east1-docker.pkg.dev/overleaf-ops/ol-docker/$(PROJECT_NAME):main \
--file Dockerfile \
../..
tar:
$(DOCKER_COMPOSE) up tar
publish:
docker push $(DOCKER_REPO)/$(PROJECT_NAME):$(BRANCH_NAME)-$(BUILD_NUMBER)
.PHONY: clean \
format format_fix \
lint lint_fix \
build_types typecheck \
lint_ci format_ci typecheck_ci \
shellcheck shellcheck_fix \
test test_clean test_unit test_unit_clean \
test_acceptance test_acceptance_debug test_acceptance_pre_run \
test_acceptance_run test_acceptance_run_debug test_acceptance_clean \
benchmarks \
build tar publish \

View File

@@ -0,0 +1,71 @@
@overleaf/project-history
==========================
An API for converting raw editor updates into a compressed and browseable history.
Running project-history
-----------------------
The app runs natively using npm and Node on the local system:
```
npm install
npm run start
```
Unit Tests
----------
The test suites run in Docker.
Unit tests can be run in the `test_unit` container defined in `docker-compose.tests.yml`.
The makefile contains a short cut to run these:
```
make install # Only needs running once, or when npm packages are updated
make test_unit
```
During development it is often useful to only run a subset of tests, which can be configured with arguments to the mocha CLI:
```
make test_unit MOCHA_ARGS='--grep=AuthorizationManager'
```
Acceptance Tests
----------------
Acceptance tests are run against a live service, which runs in the `acceptance_test` container defined in `docker-compose.tests.yml`.
To run the tests out-of-the-box, the makefile defines:
```
make install # Only needs running once, or when npm packages are updated
make test_acceptance
```
However, during development it is often useful to leave the service running for rapid iteration on the acceptance tests. This can be done with:
```
make test_acceptance_start_service
make test_acceptance_run # Run as many times as needed during development
make test_acceptance_stop_service
```
`make test_acceptance` just runs these three commands in sequence.
During development it is often useful to only run a subset of tests, which can be configured with arguments to the mocha CLI:
```
make test_acceptance_run MOCHA_ARGS='--grep=AuthorizationManager'
```
Makefile and npm scripts
------------------------
The commands used to compile the app and tests, to run the mocha tests, and to run the app are all in `package.json`. These commands call out to `coffee`, `mocha`, etc which are available to `npm` in the local `node_modules/.bin` directory, using the local versions. Normally, these commands should not be run directly, but instead run in docker via make.
The makefile contains a collection of shortcuts for running the npm scripts inside the appropriate docker containers, using the `docker-compose` files in the project.
Copyright (c) Overleaf, 2017-2021.

View File

@@ -0,0 +1,28 @@
// Metrics must be initialized before importing anything else
import '@overleaf/metrics/initialize.js'
import Settings from '@overleaf/settings'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import { mongoClient } from './app/js/mongodb.js'
import { app } from './app/js/server.js'
const host = Settings.internal.history.host
const port = Settings.internal.history.port
mongoClient
.connect()
.then(() => {
app.listen(port, host, error => {
if (error) {
error = OError.tag(error, 'could not start history server')
logger.error({ error }, error.message)
} else {
logger.debug({}, `history starting up, listening on ${host}:${port}`)
}
})
})
.catch(err => {
logger.fatal({ err }, 'Cannot connect to mongo. Exiting.')
process.exit(1)
})

View File

@@ -0,0 +1,129 @@
import _ from 'lodash'
import async from 'async'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as UpdateTranslator from './UpdateTranslator.js'
// avoid creating too many blobs at the same time
const MAX_CONCURRENT_REQUESTS = 4
// number of retry attempts for blob creation
const RETRY_ATTEMPTS = 3
// delay between retries
const RETRY_INTERVAL = 100
export function createBlobsForUpdates(
projectId,
historyId,
updates,
extendLock,
callback
) {
// async.mapLimit runs jobs in parallel and returns on the first error. It
// doesn't wait for concurrent jobs to finish. We want to make sure all jobs
// are wrapped within our lock so we collect the first error enountered here
// and wait for all jobs to finish before returning the error.
let firstBlobCreationError = null
function createBlobForUpdate(update, cb) {
// For file additions we need to first create a blob in the history-store
// with the contents of the file. Then we can create a change containing a
// file addition operation which references the blob.
//
// To do this we decorate file creation updates with a blobHash
if (!UpdateTranslator.isAddUpdate(update)) {
return async.setImmediate(() => cb(null, { update }))
}
let attempts = 0
// Since we may be creating O(1000) blobs in an update, allow for the
// occasional failure to prevent the whole update failing.
let lastErr
async.retry(
{
times: RETRY_ATTEMPTS,
interval: RETRY_INTERVAL,
},
_cb => {
attempts++
if (attempts > 1) {
logger.error(
{
err: lastErr,
projectId,
historyId,
update: _.pick(
update,
'doc',
'file',
'hash',
'createdBlob',
'url'
),
attempts,
},
'previous createBlob attempt failed, retrying'
)
}
// extend the lock for each file because large files may take a long time
extendLock(err => {
if (err) {
lastErr = OError.tag(err)
return _cb(lastErr)
}
HistoryStoreManager.createBlobForUpdate(
projectId,
historyId,
update,
(err, hashes) => {
if (err) {
lastErr = OError.tag(err, 'retry: error creating blob', {
projectId,
doc: update.doc,
file: update.file,
})
_cb(lastErr)
} else {
_cb(null, hashes)
}
}
)
})
},
(error, blobHashes) => {
if (error) {
if (!firstBlobCreationError) {
firstBlobCreationError = error
}
return cb(null, { update, blobHashes })
}
extendLock(error => {
if (error) {
if (!firstBlobCreationError) {
firstBlobCreationError = error
}
}
cb(null, { update, blobHashes })
})
}
)
}
async.mapLimit(
updates,
MAX_CONCURRENT_REQUESTS,
createBlobForUpdate,
(unusedError, updatesWithBlobs) => {
// As indicated by the name this is unexpected, but changes in the future
// could cause it to be set and ignoring it would be unexpected
if (unusedError) {
return callback(unusedError)
}
if (firstBlobCreationError) {
return callback(firstBlobCreationError)
}
callback(null, updatesWithBlobs)
}
)
}

View File

@@ -0,0 +1,626 @@
import _ from 'lodash'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as Errors from './Errors.js'
import {
TextOperation,
InsertOp,
RemoveOp,
RetainOp,
Range,
TrackedChangeList,
} from 'overleaf-editor-core'
/**
* @import { RawEditOperation, TrackedChangeRawData } from 'overleaf-editor-core/lib/types'
*/
export function convertToSummarizedUpdates(chunk, callback) {
const version = chunk.chunk.startVersion
const { files } = chunk.chunk.history.snapshot
const builder = new UpdateSetBuilder(version, files)
for (const change of chunk.chunk.history.changes) {
try {
builder.applyChange(change)
} catch (error1) {
const error = error1
return callback(error)
}
}
callback(null, builder.summarizedUpdates)
}
export function convertToDiffUpdates(
projectId,
chunk,
pathname,
fromVersion,
toVersion,
callback
) {
let error
let version = chunk.chunk.startVersion
const { files } = chunk.chunk.history.snapshot
const builder = new UpdateSetBuilder(version, files)
let file = null
for (const change of chunk.chunk.history.changes) {
// Because we're referencing by pathname, which can change, we
// want to get the last file in the range fromVersion:toVersion
// that has the pathname we want. Note that this might not exist yet
// at fromVersion, so we'll just settle for the last existing one we find
// after that.
if (fromVersion <= version && version <= toVersion) {
const currentFile = builder.getFile(pathname)
if (currentFile) {
file = currentFile
}
}
try {
builder.applyChange(change)
} catch (error1) {
error = error1
return callback(error)
}
version += 1
}
// Versions act as fence posts, with updates taking us from one to another,
// so we also need to check after the final update, when we're at the last version.
if (fromVersion <= version && version <= toVersion) {
const currentFile = builder.getFile(pathname)
if (currentFile) {
file = currentFile
}
}
// return an empty diff if the file was flagged as missing with an explicit null
if (builder.getFile(pathname) === null) {
return callback(null, { initialContent: '', updates: [] })
}
if (file == null) {
error = new Errors.NotFoundError(
`pathname '${pathname}' not found in range`
)
return callback(error)
}
WebApiManager.getHistoryId(projectId, (err, historyId) => {
if (err) {
return callback(err)
}
file.getDiffUpdates(historyId, fromVersion, toVersion, callback)
})
}
class UpdateSetBuilder {
constructor(startVersion, files) {
this.version = startVersion
this.summarizedUpdates = []
this.files = Object.create(null)
for (const pathname in files) {
// initialize file from snapshot
const data = files[pathname]
this.files[pathname] = new File(pathname, data, startVersion)
}
}
getFile(pathname) {
return this.files[pathname]
}
applyChange(change) {
const timestamp = new Date(change.timestamp)
let authors = _.map(change.authors, id => {
if (id == null) {
return null
}
return id
})
authors = authors.concat(change.v2Authors || [])
this.currentUpdate = {
meta: {
users: authors,
start_ts: timestamp.getTime(),
end_ts: timestamp.getTime(),
},
v: this.version,
pathnames: new Set([]),
project_ops: [],
}
if (change.origin) {
this.currentUpdate.meta.origin = change.origin
}
for (const op of change.operations) {
this.applyOperation(op, timestamp, authors, change.origin)
}
this.currentUpdate.pathnames = Array.from(this.currentUpdate.pathnames)
this.summarizedUpdates.push(this.currentUpdate)
this.version += 1
}
applyOperation(op, timestamp, authors, origin) {
if (UpdateSetBuilder._isTextOperation(op)) {
this.applyTextOperation(op, timestamp, authors, origin)
} else if (UpdateSetBuilder._isRenameOperation(op)) {
this.applyRenameOperation(op, timestamp, authors)
} else if (UpdateSetBuilder._isRemoveFileOperation(op)) {
this.applyRemoveFileOperation(op, timestamp, authors)
} else if (UpdateSetBuilder._isAddFileOperation(op)) {
this.applyAddFileOperation(op, timestamp, authors)
}
}
applyTextOperation(operation, timestamp, authors, origin) {
const { pathname } = operation
if (pathname === '') {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'pathname is empty for text operation'
)
return
}
const file = this.files[pathname]
if (file == null) {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'file is missing for text operation'
)
this.files[pathname] = null // marker for a missing file
return
}
file.applyTextOperation(authors, timestamp, this.version, operation, origin)
this.currentUpdate.pathnames.add(pathname)
}
applyRenameOperation(operation, timestamp, authors) {
const { pathname, newPathname } = operation
const file = this.files[pathname]
if (file == null) {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'file is missing for rename operation'
)
this.files[pathname] = null // marker for a missing file
return
}
file.rename(newPathname)
delete this.files[pathname]
this.files[newPathname] = file
this.currentUpdate.project_ops.push({
rename: { pathname, newPathname },
})
}
applyAddFileOperation(operation, timestamp, authors) {
const { pathname } = operation
// add file
this.files[pathname] = new File(pathname, operation.file, this.version)
this.currentUpdate.project_ops.push({ add: { pathname } })
}
applyRemoveFileOperation(operation, timestamp, authors) {
const { pathname } = operation
const file = this.files[pathname]
if (file == null) {
// this shouldn't happen, but we continue to allow the user to see the history
logger.warn(
{ operation, timestamp, authors },
'pathname not found when removing file'
)
this.files[pathname] = null // marker for a missing file
return
}
delete this.files[pathname]
this.currentUpdate.project_ops.push({ remove: { pathname } })
}
static _isTextOperation(op) {
return Object.prototype.hasOwnProperty.call(op, 'textOperation')
}
static _isRenameOperation(op) {
return (
Object.prototype.hasOwnProperty.call(op, 'newPathname') &&
op.newPathname !== ''
)
}
static _isRemoveFileOperation(op) {
return (
Object.prototype.hasOwnProperty.call(op, 'newPathname') &&
op.newPathname === ''
)
}
static _isAddFileOperation(op) {
return Object.prototype.hasOwnProperty.call(op, 'file')
}
}
/**
* @param {string} content
* @param {TrackedChangeList} trackedChanges
* @returns {string}
*/
function removeTrackedDeletesFromString(content, trackedChanges) {
let result = ''
let cursor = 0
const trackedDeletes = trackedChanges
.asSorted()
.filter(tc => tc.tracking.type === 'delete')
for (const trackedChange of trackedDeletes) {
if (cursor < trackedChange.range.start) {
result += content.slice(cursor, trackedChange.range.start)
}
// skip the tracked change itself
cursor = trackedChange.range.end
}
result += content.slice(cursor)
return result
}
class File {
constructor(pathname, snapshot, initialVersion) {
this.pathname = pathname
this.snapshot = snapshot
this.initialVersion = initialVersion
this.operations = []
}
applyTextOperation(authors, timestamp, version, operation, origin) {
this.operations.push({ authors, timestamp, version, operation, origin })
}
rename(pathname) {
this.pathname = pathname
}
getDiffUpdates(historyId, fromVersion, toVersion, callback) {
if (this.snapshot.stringLength == null) {
// Binary file
return callback(null, { binary: true })
}
this._loadContentAndRanges(historyId, (error, content, ranges) => {
if (error != null) {
return callback(OError.tag(error))
}
const trackedChanges = TrackedChangeList.fromRaw(
ranges?.trackedChanges || []
)
/** @type {string | undefined} */
let initialContent
const updates = []
for (const operationInfo of this.operations) {
if (!('textOperation' in operationInfo.operation)) {
// We only care about text operations
continue
}
const { authors, timestamp, version, operation } = operationInfo
// Set the initialContent to the latest version we have before the diff
// begins. 'version' here refers to the document version as we are
// applying the updates. So we store the content *before* applying the
// updates.
if (version >= fromVersion && initialContent === undefined) {
initialContent = removeTrackedDeletesFromString(
content,
trackedChanges
)
}
let ops
;({ content, ops } = this._convertTextOperation(
content,
operation,
trackedChanges
))
// We only need to return the updates between fromVersion and toVersion
if (fromVersion <= version && version < toVersion) {
const update = {
meta: {
users: authors,
start_ts: timestamp.getTime(),
end_ts: timestamp.getTime(),
},
v: version,
op: ops,
}
if (operationInfo.origin) {
update.meta.origin = operationInfo.origin
}
updates.push(update)
}
}
if (initialContent === undefined) {
initialContent = removeTrackedDeletesFromString(content, trackedChanges)
}
callback(null, { initialContent, updates })
})
}
/**
*
* @param {string} initialContent
* @param {RawEditOperation} operation
* @param {TrackedChangeList} trackedChanges
*/
_convertTextOperation(initialContent, operation, trackedChanges) {
const textOp = TextOperation.fromJSON(operation)
const textUpdateBuilder = new TextUpdateBuilder(
initialContent,
trackedChanges
)
for (const op of textOp.ops) {
textUpdateBuilder.applyOp(op)
}
textUpdateBuilder.finish()
return {
content: textUpdateBuilder.result,
ops: textUpdateBuilder.changes,
}
}
_loadContentAndRanges(historyId, callback) {
HistoryStoreManager.getProjectBlob(
historyId,
this.snapshot.hash,
(err, content) => {
if (err) {
return callback(err)
}
if (this.snapshot.rangesHash) {
HistoryStoreManager.getProjectBlob(
historyId,
this.snapshot.rangesHash,
(err, ranges) => {
if (err) {
return callback(err)
}
return callback(null, content, JSON.parse(ranges))
}
)
} else {
return callback(null, content, undefined)
}
}
)
}
}
class TextUpdateBuilder {
/**
*
* @param {string} source
* @param {TrackedChangeList} ranges
*/
constructor(source, ranges) {
this.trackedChanges = ranges
this.source = source
this.sourceCursor = 0
this.result = ''
/** @type {({i: string, p: number} | {d: string, p: number})[]} */
this.changes = []
}
applyOp(op) {
if (op instanceof RetainOp) {
const length = this.result.length
this.applyRetain(op)
this.trackedChanges.applyRetain(length, op.length, {
tracking: op.tracking,
})
}
if (op instanceof InsertOp) {
const length = this.result.length
this.applyInsert(op)
this.trackedChanges.applyInsert(length, op.insertion, {
tracking: op.tracking,
})
}
if (op instanceof RemoveOp) {
const length = this.result.length
this.applyDelete(op)
this.trackedChanges.applyDelete(length, op.length)
}
}
/**
*
* @param {RetainOp} retain
*/
applyRetain(retain) {
const resultRetentionRange = new Range(this.result.length, retain.length)
const sourceRetentionRange = new Range(this.sourceCursor, retain.length)
let scanCursor = this.result.length
if (retain.tracking) {
// We are modifying existing tracked deletes. We need to treat removal
// (type insert/none) of a tracked delete as an insertion. Similarly, any
// range we introduce as a tracked deletion must be reported as a deletion.
const trackedDeletes = this.trackedChanges
.asSorted()
.filter(
tc =>
tc.tracking.type === 'delete' &&
tc.range.overlaps(resultRetentionRange)
)
const sourceOffset = this.sourceCursor - this.result.length
for (const trackedDelete of trackedDeletes) {
const resultTrackedDelete = trackedDelete.range
const sourceTrackedDelete = trackedDelete.range.moveBy(sourceOffset)
if (scanCursor < resultTrackedDelete.start) {
if (retain.tracking.type === 'delete') {
this.changes.push({
d: this.source.slice(
this.sourceCursor,
sourceTrackedDelete.start
),
p: this.result.length,
})
}
this.result += this.source.slice(
this.sourceCursor,
sourceTrackedDelete.start
)
scanCursor = resultTrackedDelete.start
this.sourceCursor = sourceTrackedDelete.start
}
const endOfInsertionResult = Math.min(
resultTrackedDelete.end,
resultRetentionRange.end
)
const endOfInsertionSource = Math.min(
sourceTrackedDelete.end,
sourceRetentionRange.end
)
const text = this.source.slice(this.sourceCursor, endOfInsertionSource)
if (
retain.tracking.type === 'none' ||
retain.tracking.type === 'insert'
) {
this.changes.push({
i: text,
p: this.result.length,
})
}
this.result += text
// skip the tracked delete itself
scanCursor = endOfInsertionResult
this.sourceCursor = endOfInsertionSource
if (scanCursor >= resultRetentionRange.end) {
break
}
}
}
if (scanCursor < resultRetentionRange.end) {
// The last region is not a tracked delete. But we should still handle
// a new tracked delete as a deletion.
const text = this.source.slice(
this.sourceCursor,
sourceRetentionRange.end
)
if (retain.tracking?.type === 'delete') {
this.changes.push({
d: text,
p: this.result.length,
})
}
this.result += text
}
this.sourceCursor = sourceRetentionRange.end
}
/**
*
* @param {InsertOp} insert
*/
applyInsert(insert) {
if (insert.tracking?.type !== 'delete') {
// Skip tracked deletions
this.changes.push({
i: insert.insertion,
p: this.result.length,
})
}
this.result += insert.insertion
// The source cursor doesn't advance
}
/**
*
* @param {RemoveOp} deletion
*/
applyDelete(deletion) {
const sourceDeletionRange = new Range(this.sourceCursor, deletion.length)
const resultDeletionRange = new Range(this.result.length, deletion.length)
const trackedDeletes = this.trackedChanges
.asSorted()
.filter(
tc =>
tc.tracking.type === 'delete' &&
tc.range.overlaps(resultDeletionRange)
)
.sort((a, b) => a.range.start - b.range.start)
let scanCursor = this.result.length
const sourceOffset = this.sourceCursor - this.result.length
for (const trackedDelete of trackedDeletes) {
const resultTrackDeleteRange = trackedDelete.range
const sourceTrackDeleteRange = trackedDelete.range.moveBy(sourceOffset)
if (scanCursor < resultTrackDeleteRange.start) {
this.changes.push({
d: this.source.slice(this.sourceCursor, sourceTrackDeleteRange.start),
p: this.result.length,
})
}
// skip the tracked delete itself
scanCursor = Math.min(resultTrackDeleteRange.end, resultDeletionRange.end)
this.sourceCursor = Math.min(
sourceTrackDeleteRange.end,
sourceDeletionRange.end
)
if (scanCursor >= resultDeletionRange.end) {
break
}
}
if (scanCursor < resultDeletionRange.end) {
this.changes.push({
d: this.source.slice(this.sourceCursor, sourceDeletionRange.end),
p: this.result.length,
})
}
this.sourceCursor = sourceDeletionRange.end
}
finish() {
if (this.sourceCursor < this.source.length) {
this.result += this.source.slice(this.sourceCursor)
}
for (const op of this.changes) {
if ('p' in op && typeof op.p === 'number') {
// Maybe we have to move the position of the deletion to account for
// tracked changes that we're hiding in the UI.
op.p -= this.trackedChanges
.asSorted()
.filter(tc => tc.tracking.type === 'delete' && tc.range.start < op.p)
.map(tc => {
if (tc.range.end < op.p) {
return tc.range.length
}
return op.p - tc.range.start
})
.reduce((a, b) => a + b, 0)
}
}
}
}

View File

@@ -0,0 +1,274 @@
import _ from 'lodash'
import OError from '@overleaf/o-error'
export class ConsistencyError extends OError {}
/**
* Container for functions that need to be mocked in tests
*
* TODO: Rewrite tests in terms of exported functions only
*/
export const _mocks = {}
export function buildDiff(initialContent, updates) {
let diff = [{ u: initialContent }]
for (const update of updates) {
diff = applyUpdateToDiff(diff, update)
}
diff = compressDiff(diff)
return diff
}
_mocks.compressDiff = diff => {
const newDiff = []
for (const part of diff) {
const users = part.meta?.users ?? []
if (part.meta?.origin?.kind === 'history-resync') {
// Skip history resync updates. Inserts are converted to unchanged text
// and deletes are skipped, so that they effectively don't appear in the
// diff.
if (part.u != null) {
newDiff.push(part)
} else if (part.i != null) {
newDiff.push({ u: part.i })
}
continue
}
if (newDiff.length === 0) {
// If we haven't seen other parts yet, we have nothing to merge.
newDiff.push(part)
continue
}
const lastPart = newDiff[newDiff.length - 1]
const lastUsers = lastPart.meta?.users ?? []
const usersNotInBothParts = _.xor(users, lastUsers)
if (usersNotInBothParts.length > 0) {
// If the set of users in the last part and this part are not the same, we
// can't merge.
newDiff.push(part)
continue
}
if (lastPart.i != null && part.i != null) {
// Merge two inserts
lastPart.i += part.i
lastPart.meta.start_ts = Math.min(
lastPart.meta.start_ts,
part.meta.start_ts
)
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts)
} else if (lastPart.d != null && part.d != null) {
// Merge two deletes
lastPart.d += part.d
lastPart.meta.start_ts = Math.min(
lastPart.meta.start_ts,
part.meta.start_ts
)
lastPart.meta.end_ts = Math.max(lastPart.meta.end_ts, part.meta.end_ts)
} else {
newDiff.push(part)
}
}
return newDiff
}
export function compressDiff(...args) {
return _mocks.compressDiff(...args)
}
export function applyOpToDiff(diff, op, meta) {
let consumedDiff
let remainingDiff = diff.slice()
;({ consumedDiff, remainingDiff } = _consumeToOffset(remainingDiff, op.p))
const newDiff = consumedDiff
if (op.i != null) {
newDiff.push({
i: op.i,
meta,
})
} else if (op.d != null) {
;({ consumedDiff, remainingDiff } = _consumeDiffAffectedByDeleteOp(
remainingDiff,
op,
meta
))
newDiff.push(...(consumedDiff || []))
}
newDiff.push(...(remainingDiff || []))
return newDiff
}
_mocks.applyUpdateToDiff = (diff, update) => {
for (const op of update.op) {
if (op.broken !== true) {
diff = applyOpToDiff(diff, op, update.meta)
}
}
return diff
}
export function applyUpdateToDiff(...args) {
return _mocks.applyUpdateToDiff(...args)
}
function _consumeToOffset(remainingDiff, totalOffset) {
let part
const consumedDiff = []
let position = 0
while ((part = remainingDiff.shift())) {
const length = _getLengthOfDiffPart(part)
if (part.d != null) {
consumedDiff.push(part)
} else if (position + length >= totalOffset) {
const partOffset = totalOffset - position
if (partOffset > 0) {
consumedDiff.push(_slicePart(part, 0, partOffset))
}
if (partOffset < length) {
remainingDiff.unshift(_slicePart(part, partOffset))
}
break
} else {
position += length
consumedDiff.push(part)
}
}
return {
consumedDiff,
remainingDiff,
}
}
function _consumeDiffAffectedByDeleteOp(remainingDiff, deleteOp, meta) {
const consumedDiff = []
let remainingOp = deleteOp
while (remainingOp && remainingDiff.length > 0) {
let newPart
;({ newPart, remainingDiff, remainingOp } = _consumeDeletedPart(
remainingDiff,
remainingOp,
meta
))
if (newPart != null) {
consumedDiff.push(newPart)
}
}
return {
consumedDiff,
remainingDiff,
}
}
function _consumeDeletedPart(remainingDiff, op, meta) {
let deletedContent, newPart, remainingOp
const part = remainingDiff.shift()
const partLength = _getLengthOfDiffPart(part)
if (part.d != null) {
// Skip existing deletes
remainingOp = op
newPart = part
} else if (partLength > op.d.length) {
// Only the first bit of the part has been deleted
const remainingPart = _slicePart(part, op.d.length)
remainingDiff.unshift(remainingPart)
deletedContent = _getContentOfPart(part).slice(0, op.d.length)
if (deletedContent !== op.d) {
throw new ConsistencyError(
`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`
)
}
if (part.u != null) {
newPart = {
d: op.d,
meta,
}
} else if (part.i != null) {
newPart = null
}
remainingOp = null
} else if (partLength === op.d.length) {
// The entire part has been deleted, but it is the last part
deletedContent = _getContentOfPart(part)
if (deletedContent !== op.d) {
throw new ConsistencyError(
`deleted content, '${deletedContent}', does not match delete op, '${op.d}'`
)
}
if (part.u != null) {
newPart = {
d: op.d,
meta,
}
} else if (part.i != null) {
newPart = null
}
remainingOp = null
} else if (partLength < op.d.length) {
// The entire part has been deleted and there is more
deletedContent = _getContentOfPart(part)
const opContent = op.d.slice(0, deletedContent.length)
if (deletedContent !== opContent) {
throw new ConsistencyError(
`deleted content, '${deletedContent}', does not match delete op, '${opContent}'`
)
}
if (part.u) {
newPart = {
d: part.u,
meta,
}
} else if (part.i != null) {
newPart = null
}
remainingOp = {
p: op.p,
d: op.d.slice(_getLengthOfDiffPart(part)),
}
}
return {
newPart,
remainingDiff,
remainingOp,
}
}
function _slicePart(basePart, from, to) {
let part
if (basePart.u != null) {
part = { u: basePart.u.slice(from, to) }
} else if (basePart.i != null) {
part = { i: basePart.i.slice(from, to) }
}
if (basePart.meta != null) {
part.meta = basePart.meta
}
return part
}
function _getLengthOfDiffPart(part) {
return (part.u || part.d || part.i || '').length
}
function _getContentOfPart(part) {
return part.u || part.d || part.i || ''
}

View File

@@ -0,0 +1,240 @@
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import async from 'async'
import * as DiffGenerator from './DiffGenerator.js'
import * as FileTreeDiffGenerator from './FileTreeDiffGenerator.js'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as ChunkTranslator from './ChunkTranslator.js'
import * as Errors from './Errors.js'
let MAX_CHUNK_REQUESTS = 10
/**
* Container for functions that need to be mocked in tests
*
* TODO: Rewrite tests in terms of exported functions only
*/
export const _mocks = {}
export function getDiff(projectId, pathname, fromVersion, toVersion, callback) {
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error) {
return callback(OError.tag(error))
}
_getProjectUpdatesBetweenVersions(
projectId,
pathname,
fromVersion,
toVersion,
(error, result) => {
if (error) {
return callback(OError.tag(error))
}
const { binary, initialContent, updates } = result
let diff
if (binary) {
diff = { binary: true }
} else {
try {
diff = DiffGenerator.buildDiff(initialContent, updates)
} catch (err) {
return callback(
OError.tag(err, 'failed to build diff', {
projectId,
pathname,
fromVersion,
toVersion,
})
)
}
}
callback(null, diff)
}
)
})
}
export function getFileTreeDiff(projectId, fromVersion, toVersion, callback) {
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error) {
return callback(OError.tag(error))
}
_getChunksAsSingleChunk(
projectId,
fromVersion,
toVersion,
(error, chunk) => {
let diff
if (error) {
return callback(OError.tag(error))
}
try {
diff = FileTreeDiffGenerator.buildDiff(chunk, fromVersion, toVersion)
} catch (error1) {
error = error1
if (error instanceof Errors.InconsistentChunkError) {
return callback(error)
} else {
throw OError.tag(error)
}
}
callback(null, diff)
}
)
})
}
export function _getChunksAsSingleChunk(
projectId,
fromVersion,
toVersion,
callback
) {
logger.debug(
{ projectId, fromVersion, toVersion },
'[_getChunksAsSingleChunk] getting chunks'
)
_getChunks(projectId, fromVersion, toVersion, (error, chunks) => {
if (error) {
return callback(OError.tag(error))
}
logger.debug(
{ projectId, fromVersion, toVersion, chunks },
'[_getChunksAsSingleChunk] got chunks'
)
const chunk = _concatChunks(chunks)
callback(null, chunk)
})
}
_mocks._getProjectUpdatesBetweenVersions = (
projectId,
pathname,
fromVersion,
toVersion,
callback
) => {
_getChunksAsSingleChunk(projectId, fromVersion, toVersion, (error, chunk) => {
if (error) {
return callback(OError.tag(error))
}
logger.debug(
{ projectId, pathname, fromVersion, toVersion, chunk },
'[_getProjectUpdatesBetweenVersions] concatted chunk'
)
ChunkTranslator.convertToDiffUpdates(
projectId,
chunk,
pathname,
fromVersion,
toVersion,
callback
)
})
}
export function _getProjectUpdatesBetweenVersions(...args) {
_mocks._getProjectUpdatesBetweenVersions(...args)
}
_mocks._getChunks = (projectId, fromVersion, toVersion, callback) => {
let chunksRequested = 0
let lastChunkStartVersion = toVersion
const chunks = []
function shouldRequestAnotherChunk(cb) {
const stillUnderChunkLimit = chunksRequested < MAX_CHUNK_REQUESTS
const stillNeedVersions = fromVersion < lastChunkStartVersion
const stillSaneStartVersion = lastChunkStartVersion > 0
logger.debug(
{
projectId,
stillUnderChunkLimit,
stillNeedVersions,
stillSaneStartVersion,
fromVersion,
lastChunkStartVersion,
chunksRequested,
},
'[_getChunks.shouldRequestAnotherChunk]'
)
return cb(
null,
stillUnderChunkLimit && stillNeedVersions && stillSaneStartVersion
)
}
function getNextChunk(cb) {
logger.debug(
{
projectId,
lastChunkStartVersion,
},
'[_getChunks.getNextChunk]'
)
WebApiManager.getHistoryId(projectId, (error, historyId) => {
if (error) {
return cb(OError.tag(error))
}
HistoryStoreManager.getChunkAtVersion(
projectId,
historyId,
lastChunkStartVersion,
(error, chunk) => {
if (error) {
return cb(OError.tag(error))
}
lastChunkStartVersion = chunk.chunk.startVersion
chunksRequested += 1
chunks.push(chunk)
cb()
}
)
})
}
getNextChunk(error => {
if (error) {
return callback(OError.tag(error))
}
async.whilst(shouldRequestAnotherChunk, getNextChunk, error => {
if (error) {
return callback(error)
}
if (chunksRequested >= MAX_CHUNK_REQUESTS) {
error = new Errors.BadRequestError('Diff spans too many chunks')
callback(error)
} else {
callback(null, chunks)
}
})
})
}
export function _getChunks(...args) {
_mocks._getChunks(...args)
}
_mocks._concatChunks = chunks => {
chunks.reverse()
const chunk = chunks[0]
// We will append all of the changes from the later
// chunks onto the first one, to form one 'big' chunk.
for (const nextChunk of chunks.slice(1)) {
chunk.chunk.history.changes = chunk.chunk.history.changes.concat(
nextChunk.chunk.history.changes
)
}
return chunk
}
function _concatChunks(...args) {
return _mocks._concatChunks(...args)
}
// for tests
export function setMaxChunkRequests(value) {
MAX_CHUNK_REQUESTS = value
}

View File

@@ -0,0 +1,80 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import request from 'request'
import logger from '@overleaf/logger'
import Settings from '@overleaf/settings'
import OError from '@overleaf/o-error'
export function getDocument(projectId, docId, callback) {
if (callback == null) {
callback = function () {}
}
const url = `${Settings.apis.documentupdater.url}/project/${projectId}/doc/${docId}`
logger.debug({ projectId, docId }, 'getting doc from document updater')
return request.get(url, function (error, res, body) {
if (error != null) {
return callback(OError.tag(error))
}
if (res.statusCode >= 200 && res.statusCode < 300) {
try {
body = JSON.parse(body)
} catch (error1) {
error = error1
return callback(error)
}
logger.debug(
{ projectId, docId, version: body.version },
'got doc from document updater'
)
return callback(null, body.lines.join('\n'), body.version)
} else {
error = new OError(
`doc updater returned a non-success status code: ${res.statusCode}`,
{ project_id: projectId, doc_id: docId, url }
)
return callback(error)
}
})
}
export function setDocument(projectId, docId, content, userId, callback) {
if (callback == null) {
callback = function () {}
}
const url = `${Settings.apis.documentupdater.url}/project/${projectId}/doc/${docId}`
logger.debug({ projectId, docId }, 'setting doc in document updater')
return request.post(
{
url,
json: {
lines: content.split('\n'),
source: 'restore',
user_id: userId,
undoing: true,
},
},
function (error, res, body) {
if (error != null) {
return callback(OError.tag(error))
}
if (res.statusCode >= 200 && res.statusCode < 300) {
return callback(null)
} else {
error = new OError(
`doc updater returned a non-success status code: ${res.statusCode}`,
{ project_id: projectId, doc_id: docId, url }
)
return callback(error)
}
}
)
}

View File

@@ -0,0 +1,267 @@
// @ts-check
import { callbackify } from 'node:util'
import logger from '@overleaf/logger'
import metrics from '@overleaf/metrics'
import OError from '@overleaf/o-error'
import { db } from './mongodb.js'
/**
* @import { ProjectHistoryFailure } from './mongo-types'
*/
/**
* @param {string} projectId
* @param {number} queueSize
* @param {Error} error
* @return {Promise<ProjectHistoryFailure>} the failure record
*/
async function record(projectId, queueSize, error) {
const errorRecord = {
queueSize,
error: error.toString(),
stack: error.stack ?? '',
ts: new Date(),
}
logger.debug(
{ projectId, errorRecord },
'recording failed attempt to process updates'
)
const result = await db.projectHistoryFailures.findOneAndUpdate(
{ project_id: projectId },
{
$set: errorRecord,
$inc: { attempts: 1 },
$push: {
history: {
$each: [errorRecord],
$position: 0,
// only keep recent failures
$slice: 10,
},
},
},
{ upsert: true, returnDocument: 'after', includeResultMetadata: true }
)
if (result.value == null) {
// Since we upsert, the result should always have a value
throw new OError('no value returned when recording an error', { projectId })
}
return result.value
}
async function clearError(projectId) {
await db.projectHistoryFailures.deleteOne({ project_id: projectId })
}
async function setForceDebug(projectId, state) {
if (state == null) {
state = true
}
logger.debug({ projectId, state }, 'setting forceDebug state for project')
await db.projectHistoryFailures.updateOne(
{ project_id: projectId },
{ $set: { forceDebug: state } },
{ upsert: true }
)
}
// we only record the sync start time, and not the end time, because the
// record should be cleared on success.
async function recordSyncStart(projectId) {
await db.projectHistoryFailures.updateOne(
{ project_id: projectId },
{
$currentDate: { resyncStartedAt: true },
$inc: { resyncAttempts: 1 },
$push: {
history: {
$each: [{ resyncStartedAt: new Date() }],
$position: 0,
$slice: 10,
},
},
},
{ upsert: true }
)
}
/**
* @param projectId
*/
async function getFailureRecord(projectId) {
return await db.projectHistoryFailures.findOne({ project_id: projectId })
}
async function getLastFailure(projectId) {
const result = await db.projectHistoryFailures.findOneAndUpdate(
{ project_id: projectId },
{ $inc: { requestCount: 1 } }, // increment the request count every time we check the last failure
{ projection: { error: 1, ts: 1 } }
)
return result && result.value
}
async function getFailedProjects() {
return await db.projectHistoryFailures.find({}).toArray()
}
async function getFailuresByType() {
const results = await db.projectHistoryFailures.find({}).toArray()
const failureCounts = {}
const failureAttempts = {}
const failureRequests = {}
const maxQueueSize = {}
// count all the failures and number of attempts by type
for (const result of results || []) {
const failureType = result.error
const attempts = result.attempts || 1 // allow for field to be absent
const requests = result.requestCount || 0
const queueSize = result.queueSize || 0
if (failureCounts[failureType] > 0) {
failureCounts[failureType]++
failureAttempts[failureType] += attempts
failureRequests[failureType] += requests
maxQueueSize[failureType] = Math.max(queueSize, maxQueueSize[failureType])
} else {
failureCounts[failureType] = 1
failureAttempts[failureType] = attempts
failureRequests[failureType] = requests
maxQueueSize[failureType] = queueSize
}
}
return { failureCounts, failureAttempts, failureRequests, maxQueueSize }
}
async function getFailures() {
const { failureCounts, failureAttempts, failureRequests, maxQueueSize } =
await getFailuresByType()
let attempts, failureType, label, requests
const shortNames = {
'Error: bad response from filestore: 404': 'filestore-404',
'Error: bad response from filestore: 500': 'filestore-500',
'NotFoundError: got a 404 from web api': 'web-api-404',
'OError: history store a non-success status code: 413': 'history-store-413',
'OError: history store a non-success status code: 422': 'history-store-422',
'OError: history store a non-success status code: 500': 'history-store-500',
'OError: history store a non-success status code: 503': 'history-store-503',
'Error: history store a non-success status code: 413': 'history-store-413',
'Error: history store a non-success status code: 422': 'history-store-422',
'Error: history store a non-success status code: 500': 'history-store-500',
'Error: history store a non-success status code: 503': 'history-store-503',
'Error: web returned a non-success status code: 500 (attempts: 2)':
'web-500',
'Error: ESOCKETTIMEDOUT': 'socket-timeout',
'Error: no project found': 'no-project-found',
'OpsOutOfOrderError: project structure version out of order on incoming updates':
'incoming-project-version-out-of-order',
'OpsOutOfOrderError: doc version out of order on incoming updates':
'incoming-doc-version-out-of-order',
'OpsOutOfOrderError: project structure version out of order':
'chunk-project-version-out-of-order',
'OpsOutOfOrderError: doc version out of order':
'chunk-doc-version-out-of-order',
'Error: failed to extend lock': 'lock-overrun',
'Error: tried to release timed out lock': 'lock-overrun',
'Error: Timeout': 'lock-overrun',
'Error: sync ongoing': 'sync-ongoing',
'SyncError: unexpected resyncProjectStructure update': 'sync-error',
'[object Error]': 'unknown-error-object',
'UpdateWithUnknownFormatError: update with unknown format':
'unknown-format',
'Error: update with unknown format': 'unknown-format',
'TextOperationError: The base length of the second operation has to be the target length of the first operation':
'text-op-error',
'Error: ENOSPC: no space left on device, write': 'ENOSPC',
'*': 'other',
}
// set all the known errors to zero if not present (otherwise gauges stay on their last value)
const summaryCounts = {}
const summaryAttempts = {}
const summaryRequests = {}
const summaryMaxQueueSize = {}
for (failureType in shortNames) {
label = shortNames[failureType]
summaryCounts[label] = 0
summaryAttempts[label] = 0
summaryRequests[label] = 0
summaryMaxQueueSize[label] = 0
}
// record a metric for each type of failure
for (failureType in failureCounts) {
const failureCount = failureCounts[failureType]
label = shortNames[failureType] || shortNames['*']
summaryCounts[label] += failureCount
summaryAttempts[label] += failureAttempts[failureType]
summaryRequests[label] += failureRequests[failureType]
summaryMaxQueueSize[label] = Math.max(
maxQueueSize[failureType],
summaryMaxQueueSize[label]
)
}
for (label in summaryCounts) {
const count = summaryCounts[label]
metrics.globalGauge('failed', count, 1, { status: label })
}
for (label in summaryAttempts) {
attempts = summaryAttempts[label]
metrics.globalGauge('attempts', attempts, 1, { status: label })
}
for (label in summaryRequests) {
requests = summaryRequests[label]
metrics.globalGauge('requests', requests, 1, { status: label })
}
for (label in summaryMaxQueueSize) {
const queueSize = summaryMaxQueueSize[label]
metrics.globalGauge('max-queue-size', queueSize, 1, { status: label })
}
return {
counts: summaryCounts,
attempts: summaryAttempts,
requests: summaryRequests,
maxQueueSize: summaryMaxQueueSize,
}
}
// EXPORTS
const getFailedProjectsCb = callbackify(getFailedProjects)
const getFailureRecordCb = callbackify(getFailureRecord)
const getFailuresCb = callbackify(getFailures)
const getLastFailureCb = callbackify(getLastFailure)
const recordCb = callbackify(record)
const clearErrorCb = callbackify(clearError)
const recordSyncStartCb = callbackify(recordSyncStart)
const setForceDebugCb = callbackify(setForceDebug)
export {
getFailedProjectsCb as getFailedProjects,
getFailureRecordCb as getFailureRecord,
getLastFailureCb as getLastFailure,
getFailuresCb as getFailures,
recordCb as record,
clearErrorCb as clearError,
recordSyncStartCb as recordSyncStart,
setForceDebugCb as setForceDebug,
}
export const promises = {
getFailedProjects,
getFailureRecord,
getLastFailure,
getFailures,
record,
clearError,
recordSyncStart,
setForceDebug,
}

View File

@@ -0,0 +1,11 @@
import OError from '@overleaf/o-error'
export class NotFoundError extends OError {}
export class BadRequestError extends OError {}
export class SyncError extends OError {}
export class OpsOutOfOrderError extends OError {}
export class InconsistentChunkError extends OError {}
export class UpdateWithUnknownFormatError extends OError {}
export class UnexpectedOpTypeError extends OError {}
export class TooManyRequestsError extends OError {}
export class NeedFullProjectStructureResyncError extends OError {}

View File

@@ -0,0 +1,129 @@
/* eslint-disable
no-return-assign,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import Core from 'overleaf-editor-core'
import logger from '@overleaf/logger'
import * as Errors from './Errors.js'
const { MoveFileOperation, AddFileOperation, EditFileOperation } = Core
export function buildDiff(chunk, fromVersion, toVersion) {
chunk = Core.Chunk.fromRaw(chunk.chunk)
const chunkStartVersion = chunk.getStartVersion()
const diff = _getInitialDiffSnapshot(chunk, fromVersion)
const changes = chunk
.getChanges()
.slice(fromVersion - chunkStartVersion, toVersion - chunkStartVersion)
for (let i = 0; i < changes.length; i++) {
const change = changes[i]
for (const operation of Array.from(change.getOperations())) {
if (operation.pathname === null || operation.pathname === '') {
// skip operations for missing files
logger.warn({ diff, operation }, 'invalid pathname in operation')
} else if (operation instanceof EditFileOperation) {
_applyEditFileToDiff(diff, operation)
} else if (operation instanceof AddFileOperation) {
_applyAddFileToDiff(diff, operation)
} else if (operation instanceof MoveFileOperation) {
if (operation.isRemoveFile()) {
const deletedAtV = fromVersion + i
_applyDeleteFileToDiff(diff, operation, deletedAtV)
} else {
_applyMoveFileToDiff(diff, operation)
}
}
}
}
return Object.values(diff)
}
function _getInitialDiffSnapshot(chunk, fromVersion) {
// Start with a 'diff' which is snapshot of the filetree at the beginning,
// with nothing in the diff marked as changed.
// Use a bare object to protect against reserved names.
const diff = Object.create(null)
const files = _getInitialFiles(chunk, fromVersion)
for (const [pathname, file] of Object.entries(files)) {
diff[pathname] = { pathname, editable: file.isEditable() }
}
return diff
}
function _getInitialFiles(chunk, fromVersion) {
const snapshot = chunk.getSnapshot()
const changes = chunk
.getChanges()
.slice(0, fromVersion - chunk.getStartVersion())
snapshot.applyAll(changes)
return snapshot.fileMap.files
}
function _applyAddFileToDiff(diff, operation) {
return (diff[operation.pathname] = {
pathname: operation.pathname,
operation: 'added',
editable: operation.file.isEditable(),
})
}
function _applyEditFileToDiff(diff, operation) {
const change = diff[operation.pathname]
if ((change != null ? change.operation : undefined) == null) {
// avoid exception for non-existent change
return (diff[operation.pathname] = {
pathname: operation.pathname,
operation: 'edited',
})
}
}
function _applyMoveFileToDiff(diff, operation) {
if (
diff[operation.newPathname] != null &&
diff[operation.newPathname].operation !== 'removed'
) {
const err = new Errors.InconsistentChunkError(
'trying to move to file that already exists',
{ diff, operation }
)
throw err
}
const change = diff[operation.pathname]
if (change == null) {
logger.warn({ diff, operation }, 'tried to rename non-existent file')
return
}
change.newPathname = operation.newPathname
if (change.operation === 'added') {
// If this file was added this time, just leave it as an add, but
// at the new name.
change.pathname = operation.newPathname
delete change.newPathname
} else {
change.operation = 'renamed'
}
diff[operation.newPathname] = change
return delete diff[operation.pathname]
}
function _applyDeleteFileToDiff(diff, operation, deletedAtV) {
// avoid exception for non-existent change
if (diff[operation.pathname] != null) {
diff[operation.pathname].operation = 'removed'
}
return diff[operation.pathname] != null
? (diff[operation.pathname].deletedAtV = deletedAtV)
: undefined
}

View File

@@ -0,0 +1,142 @@
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import async from 'async'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import metrics from '@overleaf/metrics'
import _ from 'lodash'
import * as RedisManager from './RedisManager.js'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as ErrorRecorder from './ErrorRecorder.js'
export function flushIfOld(projectId, cutoffTime, callback) {
if (callback == null) {
callback = function () {}
}
return RedisManager.getFirstOpTimestamp(
projectId,
function (err, firstOpTimestamp) {
if (err != null) {
return callback(OError.tag(err))
}
// In the normal case, the flush marker will be set with the
// timestamp of the oldest operation in the queue by docupdater.
// If the marker is not set for any reason, we flush it anyway
// for safety.
if (!firstOpTimestamp || firstOpTimestamp < cutoffTime) {
logger.debug(
{ projectId, firstOpTimestamp, cutoffTime },
'flushing old project'
)
metrics.inc('flush-old-updates', 1, { status: 'flushed' })
return UpdatesProcessor.processUpdatesForProject(projectId, callback)
} else {
metrics.inc('flush-old-updates', 1, { status: 'skipped' })
return callback()
}
}
)
}
export function flushOldOps(options, callback) {
if (callback == null) {
callback = function () {}
}
logger.debug({ options }, 'starting flush of old ops')
// allow running flush in background for cron jobs
if (options.background) {
// return immediate response to client, then discard callback
callback(null, { message: 'running flush in background' })
callback = function () {}
}
return RedisManager.getProjectIdsWithHistoryOps(
null,
function (error, projectIds) {
if (error != null) {
return callback(OError.tag(error))
}
return ErrorRecorder.getFailedProjects(
function (error, projectHistoryFailures) {
if (error != null) {
return callback(OError.tag(error))
}
// exclude failed projects already in projectHistoryFailures
const failedProjects = new Set()
for (const entry of Array.from(projectHistoryFailures)) {
failedProjects.add(entry.project_id)
}
// randomise order so we get different projects if there is a limit
projectIds = _.shuffle(projectIds)
const maxAge = options.maxAge || 6 * 3600 // default to 6 hours
const cutoffTime = new Date(Date.now() - maxAge * 1000)
const startTime = new Date()
let count = 0
const jobs = projectIds.map(
projectId =>
function (cb) {
const timeTaken = new Date() - startTime
count++
if (
(options != null ? options.timeout : undefined) &&
timeTaken > options.timeout
) {
// finish early due to timeout, return an error to bail out of the async iteration
logger.debug('background retries timed out')
return cb(new OError('retries timed out'))
}
if (
(options != null ? options.limit : undefined) &&
count > options.limit
) {
// finish early due to reaching limit, return an error to bail out of the async iteration
logger.debug({ count }, 'background retries hit limit')
return cb(new OError('hit limit'))
}
if (failedProjects.has(projectId)) {
// skip failed projects
return setTimeout(cb, options.queueDelay || 100) // pause between flushes
}
return flushIfOld(projectId, cutoffTime, function (err) {
if (err != null) {
logger.warn(
{ projectId, err },
'error flushing old project'
)
}
return setTimeout(cb, options.queueDelay || 100)
})
}
) // pause between flushes
return async.series(
async.reflectAll(jobs),
function (error, results) {
const success = []
const failure = []
results.forEach((result, i) => {
if (
result.error != null &&
!['retries timed out', 'hit limit'].includes(
result?.error?.message
)
) {
// ignore expected errors
return failure.push(projectIds[i])
} else {
return success.push(projectIds[i])
}
})
return callback(error, { success, failure, failedProjects })
}
)
}
)
}
)
}

View File

@@ -0,0 +1,58 @@
/* eslint-disable
no-undef,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import { promisify } from 'node:util'
import fs from 'node:fs'
import crypto from 'node:crypto'
import OError from '@overleaf/o-error'
import { pipeline } from 'node:stream'
export function _getBlobHashFromString(string) {
const byteLength = Buffer.byteLength(string)
const hash = crypto.createHash('sha1')
hash.setEncoding('hex')
hash.update('blob ' + byteLength + '\x00')
hash.update(string, 'utf8')
hash.end()
return hash.read()
}
export function _getBlobHash(fsPath, callback) {
return fs.stat(fsPath, function (err, stats) {
if (err != null) {
OError.tag(err, 'failed to stat file in _getBlobHash', { fsPath })
return callback(err)
}
const byteLength = stats.size
const hash = crypto.createHash('sha1')
hash.setEncoding('hex')
hash.update('blob ' + byteLength + '\x00')
pipeline(fs.createReadStream(fsPath), hash, err => {
if (err) {
callback(
OError.tag(err, 'error streaming file from disk', {
fsPath,
byteLength,
})
)
} else {
hash.end()
callback(null, hash.read(), byteLength)
}
})
})
}
export const promises = {
_getBlobHash: promisify(_getBlobHash),
}

View File

@@ -0,0 +1,78 @@
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import { ObjectId } from './mongodb.js'
import request from 'request'
import async from 'async'
import settings from '@overleaf/settings'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import * as LockManager from './LockManager.js'
const { port } = settings.internal.history
export function check(callback) {
const projectId = new ObjectId(settings.history.healthCheck.project_id)
const url = `http://127.0.0.1:${port}/project/${projectId}`
logger.debug({ projectId }, 'running health check')
const jobs = [
cb =>
request.get(
{ url: `http://127.0.0.1:${port}/check_lock`, timeout: 3000 },
function (err, res, body) {
if (err != null) {
OError.tag(err, 'error checking lock for health check', {
project_id: projectId,
})
return cb(err)
} else if ((res != null ? res.statusCode : undefined) !== 200) {
return cb(new Error(`status code not 200, it's ${res.statusCode}`))
} else {
return cb()
}
}
),
cb =>
request.post(
{ url: `${url}/flush`, timeout: 10000 },
function (err, res, body) {
if (err != null) {
OError.tag(err, 'error flushing for health check', {
project_id: projectId,
})
return cb(err)
} else if ((res != null ? res.statusCode : undefined) !== 204) {
return cb(new Error(`status code not 204, it's ${res.statusCode}`))
} else {
return cb()
}
}
),
cb =>
request.get(
{ url: `${url}/updates`, timeout: 10000 },
function (err, res, body) {
if (err != null) {
OError.tag(err, 'error getting updates for health check', {
project_id: projectId,
})
return cb(err)
} else if ((res != null ? res.statusCode : undefined) !== 200) {
return cb(new Error(`status code not 200, it's ${res.statusCode}`))
} else {
return cb()
}
}
),
]
return async.series(jobs, callback)
}
export function checkLock(callback) {
return LockManager.healthCheck(callback)
}

View File

@@ -0,0 +1,22 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import * as WebApiManager from './WebApiManager.js'
import logger from '@overleaf/logger'
export function shouldUseProjectHistory(projectId, callback) {
if (callback == null) {
callback = function () {}
}
return WebApiManager.getHistoryId(projectId, (error, historyId) =>
callback(error, historyId != null)
)
}

View File

@@ -0,0 +1,123 @@
// @ts-check
import {
Range,
TrackedChange,
TrackedChangeList,
CommentList,
Comment,
TrackingProps,
} from 'overleaf-editor-core'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
/**
* @import { AddDocUpdate } from './types'
* @import { CommentRawData, TrackedChangeRawData } from 'overleaf-editor-core/lib/types'
*/
/**
*
* @param {AddDocUpdate} update
* @returns {{trackedChanges: TrackedChangeRawData[], comments: CommentRawData[]} | undefined}
*/
export function createRangeBlobDataFromUpdate(update) {
logger.debug({ update }, 'createBlobDataFromUpdate')
if (update.doc == null || update.docLines == null) {
throw new OError('Not an AddFileUpdate')
}
if (
!update.ranges ||
(update.ranges.changes == null && update.ranges.comments == null)
) {
return undefined
}
if (
(!update.ranges.changes || update.ranges.changes.length === 0) &&
(!update.ranges.comments || update.ranges.comments.length === 0)
) {
return undefined
}
const sortedRanges = [...(update.ranges.changes || [])].sort((a, b) => {
if (a.op.p !== b.op.p) {
return a.op.p - b.op.p
}
if ('i' in a.op && a.op.i != null && 'd' in b.op && b.op.d != null) {
// Move deletes before inserts
return 1
}
return -1
})
const tcList = new TrackedChangeList([])
for (const change of sortedRanges) {
if ('d' in change.op && change.op.d != null) {
const length = change.op.d.length
const range = new Range(change.op.hpos ?? change.op.p, length)
tcList.add(
new TrackedChange(
range,
new TrackingProps(
'delete',
change.metadata.user_id,
new Date(change.metadata.ts)
)
)
)
} else if ('i' in change.op && change.op.i != null) {
const length = change.op.i.length
const range = new Range(change.op.hpos ?? change.op.p, length)
tcList.add(
new TrackedChange(
range,
new TrackingProps(
'insert',
change.metadata.user_id,
new Date(change.metadata.ts)
)
)
)
}
}
const comments = [...(update.ranges.comments || [])].sort((a, b) => {
return a.op.p - b.op.p
})
/** @type {Map<string, {ranges: Range[], resolved: boolean}>} */
const commentMap = new Map()
for (const comment of comments) {
const id = comment.op.t
if (!commentMap.has(id)) {
commentMap.set(id, {
ranges: [],
resolved: comment.op.resolved ?? false,
})
}
const entry = commentMap.get(id)
if (!entry) {
throw new Error('Comment entry not found')
}
if (entry.resolved !== (comment.op.resolved ?? false)) {
throw new Error('Mismatching resolved status for comment')
}
const commentLength = comment.op.c.length
if (commentLength > 0) {
// Empty comments in operations are translated to detached comments
const range = new Range(comment.op.hpos ?? comment.op.p, commentLength)
entry.ranges.push(range)
}
}
const commentList = new CommentList(
[...commentMap.entries()].map(
([id, commentObj]) =>
new Comment(id, commentObj.ranges, commentObj.resolved)
)
)
return { trackedChanges: tcList.toRaw(), comments: commentList.toRaw() }
}

View File

@@ -0,0 +1,625 @@
import { promisify } from 'node:util'
import fs from 'node:fs'
import request from 'request'
import stream from 'node:stream'
import logger from '@overleaf/logger'
import _ from 'lodash'
import { URL } from 'node:url'
import OError from '@overleaf/o-error'
import Settings from '@overleaf/settings'
import {
fetchStream,
fetchNothing,
RequestFailedError,
} from '@overleaf/fetch-utils'
import * as Versions from './Versions.js'
import * as Errors from './Errors.js'
import * as LocalFileWriter from './LocalFileWriter.js'
import * as HashManager from './HashManager.js'
import * as HistoryBlobTranslator from './HistoryBlobTranslator.js'
import { promisifyMultiResult } from '@overleaf/promise-utils'
const HTTP_REQUEST_TIMEOUT = Settings.overleaf.history.requestTimeout
/**
* Container for functions that need to be mocked in tests
*
* TODO: Rewrite tests in terms of exported functions only
*/
export const _mocks = {}
class StringStream extends stream.Readable {
_read() {}
}
_mocks.getMostRecentChunk = (projectId, historyId, callback) => {
const path = `projects/${historyId}/latest/history`
logger.debug({ projectId, historyId }, 'getting chunk from history service')
_requestChunk({ path, json: true }, callback)
}
/**
* @param {Callback} callback
*/
export function getMostRecentChunk(projectId, historyId, callback) {
_mocks.getMostRecentChunk(projectId, historyId, callback)
}
/**
* @param {Callback} callback
*/
export function getChunkAtVersion(projectId, historyId, version, callback) {
const path = `projects/${historyId}/versions/${version}/history`
logger.debug(
{ projectId, historyId, version },
'getting chunk from history service for version'
)
_requestChunk({ path, json: true }, callback)
}
export function getMostRecentVersion(projectId, historyId, callback) {
getMostRecentChunk(projectId, historyId, (error, chunk) => {
if (error) {
return callback(OError.tag(error))
}
const mostRecentVersion =
chunk.chunk.startVersion + (chunk.chunk.history.changes || []).length
const lastChange = _.last(
_.sortBy(chunk.chunk.history.changes || [], x => x.timestamp)
)
// find the latest project and doc versions in the chunk
_getLatestProjectVersion(projectId, chunk, (err1, projectVersion) =>
_getLatestV2DocVersions(projectId, chunk, (err2, v2DocVersions) => {
// return the project and doc versions
const projectStructureAndDocVersions = {
project: projectVersion,
docs: v2DocVersions,
}
callback(
err1 || err2,
mostRecentVersion,
projectStructureAndDocVersions,
lastChange,
chunk
)
})
)
})
}
/**
* @param {string} projectId
* @param {string} historyId
* @param {Object} opts
* @param {boolean} [opts.readOnly]
* @param {(error: Error, rawChunk?: { startVersion: number, endVersion: number, endTimestamp: Date}) => void} callback
*/
export function getMostRecentVersionRaw(projectId, historyId, opts, callback) {
const path = `projects/${historyId}/latest/history/raw`
logger.debug(
{ projectId, historyId },
'getting raw chunk from history service'
)
const qs = opts.readOnly ? { readOnly: true } : {}
_requestHistoryService({ path, json: true, qs }, (err, body) => {
if (err) return callback(OError.tag(err))
const { startVersion, endVersion, endTimestamp } = body
callback(null, {
startVersion,
endVersion,
endTimestamp: new Date(endTimestamp),
})
})
}
function _requestChunk(options, callback) {
_requestHistoryService(options, (err, chunk) => {
if (err) {
return callback(OError.tag(err))
}
if (
chunk == null ||
chunk.chunk == null ||
chunk.chunk.startVersion == null
) {
const { path } = options
return callback(new OError('unexpected response', { path }))
}
callback(null, chunk)
})
}
function _getLatestProjectVersion(projectId, chunk, callback) {
// find the initial project version
const projectVersionInSnapshot = chunk.chunk.history.snapshot?.projectVersion
let projectVersion = projectVersionInSnapshot
const chunkStartVersion = chunk.chunk.startVersion
// keep track of any first error
let error = null
// iterate over the changes in chunk to find the most recent project version
for (const [changeIdx, change] of (
chunk.chunk.history.changes || []
).entries()) {
const projectVersionInChange = change.projectVersion
if (projectVersionInChange != null) {
if (
projectVersion != null &&
Versions.lt(projectVersionInChange, projectVersion)
) {
if (!error) {
error = new Errors.OpsOutOfOrderError(
'project structure version out of order',
{
projectId,
chunkStartVersion,
projectVersionInSnapshot,
changeIdx,
projectVersion,
projectVersionInChange,
}
)
}
} else {
projectVersion = projectVersionInChange
}
}
}
callback(error, projectVersion)
}
function _getLatestV2DocVersions(projectId, chunk, callback) {
// find the initial doc versions (indexed by docId as this is immutable)
const v2DocVersions =
(chunk.chunk.history.snapshot &&
chunk.chunk.history.snapshot.v2DocVersions) ||
{}
// keep track of any errors
let error = null
// iterate over the changes in the chunk to find the most recent doc versions
for (const change of chunk.chunk.history.changes || []) {
if (change.v2DocVersions != null) {
for (const docId in change.v2DocVersions) {
const docInfo = change.v2DocVersions[docId]
const { v } = docInfo
if (
v2DocVersions[docId] &&
v2DocVersions[docId].v != null &&
Versions.lt(v, v2DocVersions[docId].v)
) {
if (!error) {
logger.warn(
{
projectId,
docId,
changeVersion: docInfo,
previousVersion: v2DocVersions[docId],
},
'doc version out of order in chunk'
)
error = new Errors.OpsOutOfOrderError('doc version out of order')
}
} else {
v2DocVersions[docId] = docInfo
}
}
}
}
callback(error, v2DocVersions)
}
export function getProjectBlob(historyId, blobHash, callback) {
logger.debug({ historyId, blobHash }, 'getting blob from history service')
_requestHistoryService(
{ path: `projects/${historyId}/blobs/${blobHash}` },
callback
)
}
/**
* @param {Callback} callback
*/
export function getProjectBlobStream(historyId, blobHash, callback) {
const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${blobHash}`
logger.debug(
{ historyId, blobHash },
'getting blob stream from history service'
)
fetchStream(url, getHistoryFetchOptions())
.then(stream => {
callback(null, stream)
})
.catch(err => callback(OError.tag(err)))
}
export function sendChanges(
projectId,
historyId,
changes,
endVersion,
callback
) {
logger.debug(
{ projectId, historyId, endVersion },
'sending changes to history service'
)
_requestHistoryService(
{
path: `projects/${historyId}/legacy_changes`,
qs: { end_version: endVersion },
method: 'POST',
json: changes,
},
error => {
if (error) {
OError.tag(error, 'failed to send changes to v1', {
projectId,
historyId,
endVersion,
errorCode: error.code,
statusCode: error.statusCode,
body: error.body,
})
return callback(error)
}
callback()
}
)
}
function createBlobFromString(historyId, data, fileId, callback) {
const stringStream = new StringStream()
stringStream.push(data)
stringStream.push(null)
LocalFileWriter.bufferOnDisk(
stringStream,
'',
fileId,
(fsPath, cb) => {
_createBlob(historyId, fsPath, cb)
},
callback
)
}
function _checkBlobExists(historyId, hash, callback) {
if (!hash) return callback(null, false)
const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${hash}`
fetchNothing(url, {
method: 'HEAD',
...getHistoryFetchOptions(),
})
.then(res => {
callback(null, true)
})
.catch(err => {
if (err instanceof RequestFailedError && err.response.status === 404) {
return callback(null, false)
}
callback(OError.tag(err), false)
})
}
function _rewriteFilestoreUrl(url, projectId, callback) {
if (!url) {
return { fileId: null, filestoreURL: null }
}
// Rewrite the filestore url to point to the location in the local
// settings for this service (this avoids problems with cross-
// datacentre requests when running filestore in multiple locations).
const { pathname: fileStorePath } = new URL(url)
const urlMatch = /^\/project\/([0-9a-f]{24})\/file\/([0-9a-f]{24})$/.exec(
fileStorePath
)
if (urlMatch == null) {
return callback(new OError('invalid file for blob creation'))
}
if (urlMatch[1] !== projectId) {
return callback(new OError('invalid project for blob creation'))
}
const fileId = urlMatch[2]
const filestoreURL = `${Settings.apis.filestore.url}/project/${projectId}/file/${fileId}`
return { filestoreURL, fileId }
}
export function createBlobForUpdate(projectId, historyId, update, callback) {
callback = _.once(callback)
if (update.doc != null && update.docLines != null) {
let ranges
try {
ranges = HistoryBlobTranslator.createRangeBlobDataFromUpdate(update)
} catch (error) {
return callback(error)
}
createBlobFromString(
historyId,
update.docLines,
`project-${projectId}-doc-${update.doc}`,
(err, fileHash) => {
if (err) {
return callback(err)
}
if (ranges) {
createBlobFromString(
historyId,
JSON.stringify(ranges),
`project-${projectId}-doc-${update.doc}-ranges`,
(err, rangesHash) => {
if (err) {
return callback(err)
}
logger.debug(
{ fileHash, rangesHash },
'created blobs for both ranges and content'
)
return callback(null, { file: fileHash, ranges: rangesHash })
}
)
} else {
logger.debug({ fileHash }, 'created blob for content')
return callback(null, { file: fileHash })
}
}
)
} else if (
update.file != null &&
(update.url != null || update.createdBlob)
) {
const { fileId, filestoreURL } = _rewriteFilestoreUrl(
update.url,
projectId,
callback
)
_checkBlobExists(historyId, update.hash, (err, blobExists) => {
if (err) {
return callback(
new OError(
'error checking whether blob exists',
{ projectId, historyId, update },
err
)
)
} else if (blobExists) {
logger.debug(
{ projectId, fileId, update },
'Skipping blob creation as it has already been created'
)
return callback(null, { file: update.hash })
} else if (update.createdBlob) {
logger.warn(
{ projectId, fileId, update },
'created blob does not exist, reading from filestore'
)
}
if (!filestoreURL) {
return callback(
new OError('no filestore URL provided and blob was not created')
)
}
if (!Settings.apis.filestore.enabled) {
return callback(new OError('blocking filestore read', { update }))
}
fetchStream(filestoreURL, {
signal: AbortSignal.timeout(HTTP_REQUEST_TIMEOUT),
})
.then(stream => {
LocalFileWriter.bufferOnDisk(
stream,
filestoreURL,
`project-${projectId}-file-${fileId}`,
(fsPath, cb) => {
_createBlob(historyId, fsPath, cb)
},
(err, fileHash) => {
if (err) {
return callback(err)
}
if (update.hash && update.hash !== fileHash) {
logger.warn(
{ projectId, fileId, webHash: update.hash, fileHash },
'hash mismatch between web and project-history'
)
}
logger.debug({ fileHash }, 'created blob for file')
callback(null, { file: fileHash })
}
)
})
.catch(err => {
if (
err instanceof RequestFailedError &&
err.response.status === 404
) {
logger.warn(
{ projectId, historyId, filestoreURL },
'File contents not found in filestore. Storing in history as an empty file'
)
const emptyStream = new StringStream()
LocalFileWriter.bufferOnDisk(
emptyStream,
filestoreURL,
`project-${projectId}-file-${fileId}`,
(fsPath, cb) => {
_createBlob(historyId, fsPath, cb)
},
(err, fileHash) => {
if (err) {
return callback(err)
}
logger.debug({ fileHash }, 'created empty blob for file')
callback(null, { file: fileHash })
}
)
emptyStream.push(null) // send an EOF signal
} else {
callback(OError.tag(err, 'error from filestore', { filestoreURL }))
}
})
})
} else {
const error = new OError('invalid update for blob creation')
callback(error)
}
}
function _createBlob(historyId, fsPath, _callback) {
const callback = _.once(_callback)
HashManager._getBlobHash(fsPath, (error, hash, byteLength) => {
if (error) {
return callback(OError.tag(error))
}
const outStream = fs.createReadStream(fsPath)
logger.debug(
{ fsPath, historyId, hash, byteLength },
'sending blob to history service'
)
const url = `${Settings.overleaf.history.host}/projects/${historyId}/blobs/${hash}`
fetchNothing(url, {
method: 'PUT',
body: outStream,
headers: { 'Content-Length': byteLength }, // add the content length to work around problems with chunked encoding in node 18
...getHistoryFetchOptions(),
})
.then(res => {
callback(null, hash)
})
.catch(err => {
callback(OError.tag(err))
})
})
}
export function initializeProject(historyId, callback) {
_requestHistoryService(
{
method: 'POST',
path: 'projects',
json: historyId == null ? true : { projectId: historyId },
},
(error, project) => {
if (error) {
return callback(OError.tag(error))
}
const id = project.projectId
if (id == null) {
error = new OError('history store did not return a project id', id)
return callback(error)
}
callback(null, id)
}
)
}
export function deleteProject(projectId, callback) {
_requestHistoryService(
{ method: 'DELETE', path: `projects/${projectId}` },
callback
)
}
const getProjectBlobAsync = promisify(getProjectBlob)
class BlobStore {
constructor(projectId) {
this.projectId = projectId
}
async getString(hash) {
return await getProjectBlobAsync(this.projectId, hash)
}
async getObject(hash) {
const string = await this.getString(hash)
return JSON.parse(string)
}
}
export function getBlobStore(projectId) {
return new BlobStore(projectId)
}
function _requestOptions(options) {
const requestOptions = {
method: options.method || 'GET',
url: `${Settings.overleaf.history.host}/${options.path}`,
timeout: HTTP_REQUEST_TIMEOUT,
auth: {
user: Settings.overleaf.history.user,
pass: Settings.overleaf.history.pass,
sendImmediately: true,
},
}
if (options.json != null) {
requestOptions.json = options.json
}
if (options.body != null) {
requestOptions.body = options.body
}
if (options.qs != null) {
requestOptions.qs = options.qs
}
return requestOptions
}
/**
* @return {RequestInit}
*/
function getHistoryFetchOptions() {
return {
signal: AbortSignal.timeout(HTTP_REQUEST_TIMEOUT),
basicAuth: {
user: Settings.overleaf.history.user,
password: Settings.overleaf.history.pass,
},
}
}
function _requestHistoryService(options, callback) {
const requestOptions = _requestOptions(options)
request(requestOptions, (error, res, body) => {
if (error) {
return callback(OError.tag(error))
}
if (res.statusCode >= 200 && res.statusCode < 300) {
callback(null, body)
} else {
const { method, url, qs } = requestOptions
error = new OError(
`history store a non-success status code: ${res.statusCode}`,
{ method, url, qs, statusCode: res.statusCode }
)
callback(error)
}
})
}
export const promises = {
/** @type {(projectId: string, historyId: string) => Promise<{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}>} */
getMostRecentChunk: promisify(getMostRecentChunk),
getChunkAtVersion: promisify(getChunkAtVersion),
getMostRecentVersion: promisifyMultiResult(getMostRecentVersion, [
'version',
'projectStructureAndDocVersions',
'lastChange',
'mostRecentChunk',
]),
getMostRecentVersionRaw: promisify(getMostRecentVersionRaw),
getProjectBlob: promisify(getProjectBlob),
getProjectBlobStream: promisify(getProjectBlobStream),
sendChanges: promisify(sendChanges),
createBlobForUpdate: promisify(createBlobForUpdate),
initializeProject: promisify(initializeProject),
deleteProject: promisify(deleteProject),
}

View File

@@ -0,0 +1,582 @@
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import request from 'request'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as SummarizedUpdatesManager from './SummarizedUpdatesManager.js'
import * as DiffManager from './DiffManager.js'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as SnapshotManager from './SnapshotManager.js'
import * as HealthChecker from './HealthChecker.js'
import * as SyncManager from './SyncManager.js'
import * as ErrorRecorder from './ErrorRecorder.js'
import * as RedisManager from './RedisManager.js'
import * as LabelsManager from './LabelsManager.js'
import * as HistoryApiManager from './HistoryApiManager.js'
import * as RetryManager from './RetryManager.js'
import * as FlushManager from './FlushManager.js'
import { pipeline } from 'node:stream'
import { RequestFailedError } from '@overleaf/fetch-utils'
const ONE_DAY_IN_SECONDS = 24 * 60 * 60
export function getProjectBlob(req, res, next) {
const historyId = req.params.history_id
const blobHash = req.params.hash
HistoryStoreManager.getProjectBlobStream(
historyId,
blobHash,
(err, stream) => {
if (err != null) {
if (err instanceof RequestFailedError && err.response.status === 404) {
return res.status(404).end()
}
return next(OError.tag(err))
}
res.setHeader('Cache-Control', `private, max-age=${ONE_DAY_IN_SECONDS}`)
pipeline(stream, res, err => {
if (err) next(err)
// res.end() is already called via 'end' event by pipeline.
})
}
)
}
export function initializeProject(req, res, next) {
const { historyId } = req.body
HistoryStoreManager.initializeProject(historyId, (error, id) => {
if (error != null) {
return next(OError.tag(error))
}
res.json({ project: { id } })
})
}
export function flushProject(req, res, next) {
const projectId = req.params.project_id
if (req.query.debug) {
logger.debug(
{ projectId },
'compressing project history in single-step mode'
)
UpdatesProcessor.processSingleUpdateForProject(projectId, error => {
if (error != null) {
return next(OError.tag(error))
}
res.sendStatus(204)
})
} else if (req.query.bisect) {
logger.debug({ projectId }, 'compressing project history in bisect mode')
UpdatesProcessor.processUpdatesForProjectUsingBisect(
projectId,
UpdatesProcessor.REDIS_READ_BATCH_SIZE,
error => {
if (error != null) {
return next(OError.tag(error))
}
res.sendStatus(204)
}
)
} else {
logger.debug({ projectId }, 'compressing project history')
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error != null) {
return next(OError.tag(error))
}
res.sendStatus(204)
})
}
}
export function dumpProject(req, res, next) {
const projectId = req.params.project_id
const batchSize = req.query.count || UpdatesProcessor.REDIS_READ_BATCH_SIZE
logger.debug({ projectId }, 'retrieving raw updates')
UpdatesProcessor.getRawUpdates(projectId, batchSize, (error, rawUpdates) => {
if (error != null) {
return next(OError.tag(error))
}
res.json(rawUpdates)
})
}
export function flushOld(req, res, next) {
const { maxAge, queueDelay, limit, timeout, background } = req.query
const options = { maxAge, queueDelay, limit, timeout, background }
FlushManager.flushOldOps(options, (error, results) => {
if (error != null) {
return next(OError.tag(error))
}
res.send(results)
})
}
export function getDiff(req, res, next) {
const projectId = req.params.project_id
const { pathname, from, to } = req.query
if (pathname == null) {
return res.sendStatus(400)
}
logger.debug({ projectId, pathname, from, to }, 'getting diff')
DiffManager.getDiff(projectId, pathname, from, to, (error, diff) => {
if (error != null) {
return next(OError.tag(error))
}
res.json({ diff })
})
}
export function getFileTreeDiff(req, res, next) {
const projectId = req.params.project_id
const { to, from } = req.query
DiffManager.getFileTreeDiff(projectId, from, to, (error, diff) => {
if (error != null) {
return next(OError.tag(error))
}
res.json({ diff })
})
}
export function getUpdates(req, res, next) {
const projectId = req.params.project_id
const { before, min_count: minCount } = req.query
SummarizedUpdatesManager.getSummarizedProjectUpdates(
projectId,
{ before, min_count: minCount },
(error, updates, nextBeforeTimestamp) => {
if (error != null) {
return next(OError.tag(error))
}
for (const update of updates) {
// Sets don't JSONify, so convert to arrays
update.pathnames = Array.from(update.pathnames || []).sort()
}
res.json({
updates,
nextBeforeTimestamp,
})
}
)
}
export function latestVersion(req, res, next) {
const projectId = req.params.project_id
logger.debug({ projectId }, 'compressing project history and getting version')
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error != null) {
return next(OError.tag(error))
}
WebApiManager.getHistoryId(projectId, (error, historyId) => {
if (error != null) {
return next(OError.tag(error))
}
HistoryStoreManager.getMostRecentVersion(
projectId,
historyId,
(error, version, projectStructureAndDocVersions, lastChange) => {
if (error != null) {
return next(OError.tag(error))
}
res.json({
version,
timestamp: lastChange != null ? lastChange.timestamp : undefined,
v2Authors: lastChange != null ? lastChange.v2Authors : undefined,
})
}
)
})
})
}
export function getFileSnapshot(req, res, next) {
const { project_id: projectId, version, pathname } = req.params
SnapshotManager.getFileSnapshotStream(
projectId,
version,
pathname,
(error, stream) => {
if (error != null) {
return next(OError.tag(error))
}
pipeline(stream, res, err => {
if (err) next(err)
// res.end() is already called via 'end' event by pipeline.
})
}
)
}
export function getRangesSnapshot(req, res, next) {
const { project_id: projectId, version, pathname } = req.params
SnapshotManager.getRangesSnapshot(
projectId,
version,
pathname,
(err, ranges) => {
if (err) {
return next(OError.tag(err))
}
res.json(ranges)
}
)
}
export function getFileMetadataSnapshot(req, res, next) {
const { project_id: projectId, version, pathname } = req.params
SnapshotManager.getFileMetadataSnapshot(
projectId,
version,
pathname,
(err, data) => {
if (err) {
return next(OError.tag(err))
}
res.json(data)
}
)
}
export function getLatestSnapshot(req, res, next) {
const { project_id: projectId } = req.params
WebApiManager.getHistoryId(projectId, (error, historyId) => {
if (error) return next(OError.tag(error))
SnapshotManager.getLatestSnapshot(
projectId,
historyId,
(error, details) => {
if (error != null) {
return next(error)
}
const { snapshot, version } = details
res.json({ snapshot: snapshot.toRaw(), version })
}
)
})
}
export function getChangesInChunkSince(req, res, next) {
const { project_id: projectId } = req.params
const { since } = req.query
WebApiManager.getHistoryId(projectId, (error, historyId) => {
if (error) return next(OError.tag(error))
SnapshotManager.getChangesInChunkSince(
projectId,
historyId,
since,
(error, details) => {
if (error != null) {
return next(error)
}
const { latestStartVersion, changes } = details
res.json({
latestStartVersion,
changes: changes.map(c => c.toRaw()),
})
}
)
})
}
export function getProjectSnapshot(req, res, next) {
const { project_id: projectId, version } = req.params
SnapshotManager.getProjectSnapshot(
projectId,
version,
(error, snapshotData) => {
if (error != null) {
return next(error)
}
res.json(snapshotData)
}
)
}
export function getPathsAtVersion(req, res, next) {
const { project_id: projectId, version } = req.params
SnapshotManager.getPathsAtVersion(projectId, version, (error, result) => {
if (error != null) {
return next(error)
}
res.json(result)
})
}
export function healthCheck(req, res) {
HealthChecker.check(err => {
if (err != null) {
logger.err({ err }, 'error performing health check')
res.sendStatus(500)
} else {
res.sendStatus(200)
}
})
}
export function checkLock(req, res) {
HealthChecker.checkLock(err => {
if (err != null) {
logger.err({ err }, 'error performing lock check')
res.sendStatus(500)
} else {
res.sendStatus(200)
}
})
}
export function resyncProject(req, res, next) {
const projectId = req.params.project_id
const options = {}
if (req.body.origin) {
options.origin = req.body.origin
}
if (req.body.historyRangesMigration) {
options.historyRangesMigration = req.body.historyRangesMigration
}
if (req.query.force || req.body.force) {
// this will delete the queue and clear the sync state
// use if the project is completely broken
SyncManager.startHardResync(projectId, options, error => {
if (error != null) {
return next(error)
}
// flush the sync operations
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error != null) {
return next(error)
}
res.sendStatus(204)
})
})
} else {
SyncManager.startResync(projectId, options, error => {
if (error != null) {
return next(error)
}
// flush the sync operations
UpdatesProcessor.processUpdatesForProject(projectId, error => {
if (error != null) {
return next(error)
}
res.sendStatus(204)
})
})
}
}
export function forceDebugProject(req, res, next) {
const projectId = req.params.project_id
// set the debug flag to true unless we see ?clear=true
const state = !req.query.clear
ErrorRecorder.setForceDebug(projectId, state, error => {
if (error != null) {
return next(error)
}
// display the failure record to help debugging
ErrorRecorder.getFailureRecord(projectId, (error, result) => {
if (error != null) {
return next(error)
}
res.send(result)
})
})
}
export function getFailures(req, res, next) {
ErrorRecorder.getFailures((error, result) => {
if (error != null) {
return next(error)
}
res.send({ failures: result })
})
}
export function getQueueCounts(req, res, next) {
RedisManager.getProjectIdsWithHistoryOpsCount((err, queuedProjectsCount) => {
if (err != null) {
return next(err)
}
res.send({ queuedProjects: queuedProjectsCount })
})
}
export function getLabels(req, res, next) {
const projectId = req.params.project_id
HistoryApiManager.shouldUseProjectHistory(
projectId,
(error, shouldUseProjectHistory) => {
if (error != null) {
return next(error)
}
if (shouldUseProjectHistory) {
LabelsManager.getLabels(projectId, (error, labels) => {
if (error != null) {
return next(error)
}
res.json(labels)
})
} else {
res.sendStatus(409)
}
}
)
}
export function createLabel(req, res, next) {
const { project_id: projectId, user_id: userIdParam } = req.params
const {
version,
comment,
user_id: userIdBody,
created_at: createdAt,
validate_exists: validateExists,
} = req.body
// Temporarily looking up both params and body while rolling out changes
// in the router path - https://github.com/overleaf/internal/pull/20200
const userId = userIdParam || userIdBody
HistoryApiManager.shouldUseProjectHistory(
projectId,
(error, shouldUseProjectHistory) => {
if (error != null) {
return next(error)
}
if (shouldUseProjectHistory) {
LabelsManager.createLabel(
projectId,
userId,
version,
comment,
createdAt,
validateExists,
(error, label) => {
if (error != null) {
return next(error)
}
res.json(label)
}
)
} else {
logger.error(
{
projectId,
userId,
version,
comment,
createdAt,
validateExists,
},
'not using v2 history'
)
res.sendStatus(409)
}
}
)
}
/**
* This will delete a label if it is owned by the current user. If you wish to
* delete a label regardless of the current user, then use `deleteLabel` instead.
*/
export function deleteLabelForUser(req, res, next) {
const {
project_id: projectId,
user_id: userId,
label_id: labelId,
} = req.params
LabelsManager.deleteLabelForUser(projectId, userId, labelId, error => {
if (error != null) {
return next(error)
}
res.sendStatus(204)
})
}
export function deleteLabel(req, res, next) {
const { project_id: projectId, label_id: labelId } = req.params
LabelsManager.deleteLabel(projectId, labelId, error => {
if (error != null) {
return next(error)
}
res.sendStatus(204)
})
}
export function retryFailures(req, res, next) {
const { failureType, timeout, limit, callbackUrl } = req.query
if (callbackUrl) {
// send response but run in background when callbackUrl provided
res.send({ retryStatus: 'running retryFailures in background' })
}
RetryManager.retryFailures(
{ failureType, timeout, limit },
(error, result) => {
if (callbackUrl) {
// if present, notify the callbackUrl on success
if (!error) {
// Needs Node 12
// const callbackHeaders = Object.fromEntries(Object.entries(req.headers || {}).filter(([k,v]) => k.match(/^X-CALLBACK-/i)))
const callbackHeaders = {}
for (const key of Object.getOwnPropertyNames(
req.headers || {}
).filter(key => key.match(/^X-CALLBACK-/i))) {
const found = key.match(/^X-CALLBACK-(.*)/i)
callbackHeaders[found[1]] = req.headers[key]
}
request({ url: callbackUrl, headers: callbackHeaders })
}
} else {
if (error != null) {
return next(error)
}
res.send({ retryStatus: result })
}
}
)
}
export function transferLabels(req, res, next) {
const { from_user: fromUser, to_user: toUser } = req.params
LabelsManager.transferLabels(fromUser, toUser, error => {
if (error != null) {
return next(error)
}
res.sendStatus(204)
})
}
export function deleteProject(req, res, next) {
const { project_id: projectId } = req.params
// clear the timestamp before clearing the queue,
// because the queue location is used in the migration
RedisManager.clearFirstOpTimestamp(projectId, err => {
if (err) {
return next(err)
}
RedisManager.clearCachedHistoryId(projectId, err => {
if (err) {
return next(err)
}
RedisManager.destroyDocUpdatesQueue(projectId, err => {
if (err) {
return next(err)
}
SyncManager.clearResyncState(projectId, err => {
if (err) {
return next(err)
}
ErrorRecorder.clearError(projectId, err => {
if (err) {
return next(err)
}
res.sendStatus(204)
})
})
})
})
})
}

View File

@@ -0,0 +1,175 @@
import OError from '@overleaf/o-error'
import { db, ObjectId } from './mongodb.js'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as WebApiManager from './WebApiManager.js'
export function getLabels(projectId, callback) {
_toObjectId(projectId, function (error, projectId) {
if (error) {
return callback(OError.tag(error))
}
db.projectHistoryLabels
.find({ project_id: new ObjectId(projectId) })
.toArray(function (error, labels) {
if (error) {
return callback(OError.tag(error))
}
const formattedLabels = labels.map(_formatLabel)
callback(null, formattedLabels)
})
})
}
export function createLabel(
projectId,
userId,
version,
comment,
createdAt,
shouldValidateExists,
callback
) {
const validateVersionExists = function (callback) {
if (shouldValidateExists === false) {
callback()
} else {
_validateChunkExistsForVersion(projectId.toString(), version, callback)
}
}
_toObjectId(projectId, userId, function (error, projectId, userId) {
if (error) {
return callback(OError.tag(error))
}
validateVersionExists(function (error) {
if (error) {
return callback(OError.tag(error))
}
createdAt = createdAt != null ? new Date(createdAt) : new Date()
const label = {
project_id: new ObjectId(projectId),
comment,
version,
created_at: createdAt,
}
if (userId) {
label.user_id = userId
}
db.projectHistoryLabels.insertOne(label, function (error, confirmation) {
if (error) {
return callback(OError.tag(error))
}
label._id = confirmation.insertedId
callback(null, _formatLabel(label))
})
})
})
}
export function deleteLabelForUser(projectId, userId, labelId, callback) {
_toObjectId(
projectId,
userId,
labelId,
function (error, projectId, userId, labelId) {
if (error) {
return callback(OError.tag(error))
}
db.projectHistoryLabels.deleteOne(
{
_id: new ObjectId(labelId),
project_id: new ObjectId(projectId),
user_id: new ObjectId(userId),
},
callback
)
}
)
}
export function deleteLabel(projectId, labelId, callback) {
_toObjectId(projectId, labelId, function (error, projectId, labelId) {
if (error) {
return callback(OError.tag(error))
}
db.projectHistoryLabels.deleteOne(
{
_id: new ObjectId(labelId),
project_id: new ObjectId(projectId),
},
callback
)
})
}
export function transferLabels(fromUserId, toUserId, callback) {
_toObjectId(fromUserId, toUserId, function (error, fromUserId, toUserId) {
if (error) {
return callback(OError.tag(error))
}
db.projectHistoryLabels.updateMany(
{
user_id: fromUserId,
},
{
$set: { user_id: toUserId },
},
callback
)
})
}
function _toObjectId(...args1) {
const adjustedLength = Math.max(args1.length, 1)
const args = args1.slice(0, adjustedLength - 1)
const callback = args1[adjustedLength - 1]
try {
const ids = args.map(id => {
if (id) {
return new ObjectId(id)
} else {
return undefined
}
})
callback(null, ...ids)
} catch (error) {
callback(error)
}
}
function _formatLabel(label) {
return {
id: label._id,
comment: label.comment,
version: label.version,
user_id: label.user_id,
created_at: label.created_at,
}
}
function _validateChunkExistsForVersion(projectId, version, callback) {
UpdatesProcessor.processUpdatesForProject(projectId, function (error) {
if (error) {
return callback(error)
}
WebApiManager.getHistoryId(projectId, function (error, historyId) {
if (error) {
return callback(error)
}
HistoryStoreManager.getChunkAtVersion(
projectId,
historyId,
version,
function (error) {
if (error) {
return callback(error)
}
callback()
}
)
})
})
}

View File

@@ -0,0 +1,88 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import fs from 'node:fs'
import { randomUUID } from 'node:crypto'
import Path from 'node:path'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import _ from 'lodash'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as HashManager from './HashManager.js'
export function createStub(fsPath, fileId, fileSize, fileHash, callback) {
if (callback == null) {
callback = function () {}
}
callback = _.once(callback)
const newFsPath = Path.join(
Settings.path.uploadFolder,
randomUUID() + `-${fileId}-stub`
)
const writeStream = fs.createWriteStream(newFsPath)
writeStream.on('error', function (error) {
OError.tag(error, 'error writing stub file', { fsPath, newFsPath })
return fs.unlink(newFsPath, () => callback(error))
})
writeStream.on('finish', function () {
logger.debug(
{ fsPath, fileId, fileSize, fileHash },
'replaced large file with stub'
)
return callback(null, newFsPath)
}) // let the consumer unlink the file
const stubLines = [
'FileTooLargeError v1',
'File too large to be stored in history service',
`id ${fileId}`,
`size ${fileSize} bytes`,
`hash ${fileHash}`,
'\0', // null byte to make this a binary file
]
writeStream.write(stubLines.join('\n'))
return writeStream.end()
}
export function replaceWithStubIfNeeded(fsPath, fileId, fileSize, callback) {
if (callback == null) {
callback = function () {}
}
if (
Settings.maxFileSizeInBytes != null &&
fileSize > Settings.maxFileSizeInBytes
) {
logger.error(
{ fsPath, fileId, maxFileSizeInBytes: Settings.maxFileSizeInBytes },
'file too large, will use stub'
)
return HashManager._getBlobHash(fsPath, function (error, fileHash) {
if (error != null) {
return callback(error)
}
return createStub(
fsPath,
fileId,
fileSize,
fileHash,
function (error, newFsPath) {
if (error != null) {
return callback(error)
}
return callback(null, newFsPath)
}
)
})
} else {
return callback(null, fsPath)
}
}

View File

@@ -0,0 +1,114 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import fs from 'node:fs'
import { pipeline } from 'node:stream'
import { randomUUID } from 'node:crypto'
import path from 'node:path'
import _ from 'lodash'
import logger from '@overleaf/logger'
import metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import OError from '@overleaf/o-error'
import * as LargeFileManager from './LargeFileManager.js'
//
// This method takes a stream and provides you a new stream which is now
// reading from disk.
//
// This is useful if we're piping one network stream to another. If the stream
// we're piping to can't consume data as quickly as the one we're consuming
// from then large quantities of data may be held in memory. Instead the read
// stream can be passed to this method, the data will then be held on disk
// rather than in memory and will be cleaned up once it has been consumed.
//
export function bufferOnDisk(
inStream,
url,
fileId,
consumeOutStream,
callback
) {
const timer = new metrics.Timer('LocalFileWriter.writeStream')
const fsPath = path.join(
Settings.path.uploadFolder,
randomUUID() + `-${fileId}`
)
const cleanup = _.once((streamError, res) => {
return deleteFile(fsPath, function (cleanupError) {
if (streamError) {
OError.tag(streamError, 'error deleting temporary file', {
fsPath,
url,
})
}
if (cleanupError) {
OError.tag(cleanupError)
}
if (streamError && cleanupError) {
// logging the cleanup error in case only the stream error is sent to the callback
logger.error(cleanupError)
}
return callback(streamError || cleanupError, res)
})
})
logger.debug({ fsPath, url }, 'writing file locally')
const writeStream = fs.createWriteStream(fsPath)
pipeline(inStream, writeStream, err => {
if (err) {
OError.tag(err, 'problem writing file locally', {
fsPath,
url,
})
return cleanup(err)
}
timer.done()
// in future check inStream.response.headers for hash value here
logger.debug({ fsPath, url }, 'stream closed after writing file locally')
const fileSize = writeStream.bytesWritten
return LargeFileManager.replaceWithStubIfNeeded(
fsPath,
fileId,
fileSize,
function (err, newFsPath) {
if (err != null) {
OError.tag(err, 'problem in large file manager', {
newFsPath,
fsPath,
fileId,
fileSize,
})
return cleanup(err)
}
return consumeOutStream(newFsPath, cleanup)
}
)
})
}
export function deleteFile(fsPath, callback) {
if (fsPath == null || fsPath === '') {
return callback()
}
logger.debug({ fsPath }, 'removing local temp file')
return fs.unlink(fsPath, function (err) {
if (err != null && err.code !== 'ENOENT') {
// ignore errors deleting the file when it was never created
return callback(OError.tag(err))
} else {
return callback()
}
})
}

View File

@@ -0,0 +1,314 @@
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import { promisify } from 'node:util'
import async from 'async'
import metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import redis from '@overleaf/redis-wrapper'
import os from 'node:os'
import crypto from 'node:crypto'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
const LOCK_TEST_INTERVAL = 50 // 50ms between each test of the lock
const MAX_LOCK_WAIT_TIME = 10000 // 10s maximum time to spend trying to get the lock
export const LOCK_TTL = 360 // seconds
export const MIN_LOCK_EXTENSION_INTERVAL = 1000 // 1s minimum interval when extending a lock
export const UNLOCK_SCRIPT =
'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("del", KEYS[1]) else return 0 end'
const EXTEND_SCRIPT =
'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("expire", KEYS[1], ARGV[2]) else return 0 end'
const HOST = os.hostname()
const PID = process.pid
const RND = crypto.randomBytes(4).toString('hex')
let COUNT = 0
const rclient = redis.createClient(Settings.redis.lock)
/**
* Container for functions that need to be mocked in tests
*
* TODO: Rewrite tests in terms of exported functions only
*/
export const _mocks = {}
// Use a signed lock value as described in
// http://redis.io/topics/distlock#correct-implementation-with-a-single-instance
// to prevent accidental unlocking by multiple processes
_mocks.randomLock = () => {
const time = Date.now()
return `locked:host=${HOST}:pid=${PID}:random=${RND}:time=${time}:count=${COUNT++}`
}
export function randomLock(...args) {
return _mocks.randomLock(...args)
}
_mocks.tryLock = (key, callback) => {
if (callback == null) {
callback = function () {}
}
const lockValue = randomLock()
return rclient.set(
key,
lockValue,
'EX',
LOCK_TTL,
'NX',
function (err, gotLock) {
if (err != null) {
return callback(
OError.tag(err, 'redis error trying to get lock', { key })
)
}
if (gotLock === 'OK') {
metrics.inc('lock.project.try.success')
return callback(err, true, lockValue)
} else {
metrics.inc('lock.project.try.failed')
return callback(err, false)
}
}
)
}
export function tryLock(...args) {
_mocks.tryLock(...args)
}
_mocks.extendLock = (key, lockValue, callback) => {
if (callback == null) {
callback = function () {}
}
return rclient.eval(
EXTEND_SCRIPT,
1,
key,
lockValue,
LOCK_TTL,
function (err, result) {
if (err != null) {
return callback(
OError.tag(err, 'redis error trying to extend lock', { key })
)
}
if (result != null && result !== 1) {
// successful extension should release exactly one key
metrics.inc('lock.project.extend.failed')
const error = new OError('failed to extend lock', {
key,
lockValue,
result,
})
return callback(error)
}
metrics.inc('lock.project.extend.success')
return callback()
}
)
}
export function extendLock(...args) {
_mocks.extendLock(...args)
}
_mocks.getLock = (key, callback) => {
let attempt
if (callback == null) {
callback = function () {}
}
const startTime = Date.now()
let attempts = 0
return (attempt = function () {
if (Date.now() - startTime > MAX_LOCK_WAIT_TIME) {
metrics.inc('lock.project.get.failed')
return callback(new OError('Timeout', { key }))
}
attempts += 1
return tryLock(key, function (error, gotLock, lockValue) {
if (error != null) {
return callback(OError.tag(error))
}
if (gotLock) {
metrics.gauge('lock.project.get.success.tries', attempts)
return callback(null, lockValue)
} else {
return setTimeout(attempt, LOCK_TEST_INTERVAL)
}
})
})()
}
export function getLock(...args) {
_mocks.getLock(...args)
}
export function checkLock(key, callback) {
if (callback == null) {
callback = function () {}
}
return rclient.exists(key, function (err, exists) {
if (err != null) {
return callback(OError.tag(err))
}
exists = parseInt(exists)
if (exists === 1) {
return callback(err, false)
} else {
return callback(err, true)
}
})
}
_mocks.releaseLock = (key, lockValue, callback) => {
return rclient.eval(UNLOCK_SCRIPT, 1, key, lockValue, function (err, result) {
if (err != null) {
return callback(OError.tag(err))
}
if (result != null && result !== 1) {
// successful unlock should release exactly one key
const error = new OError('tried to release timed out lock', {
key,
lockValue,
redis_result: result,
})
return callback(error)
}
return callback(err, result)
})
}
export function releaseLock(...args) {
_mocks.releaseLock(...args)
}
export function runWithLock(key, runner, callback) {
if (callback == null) {
callback = function () {}
}
return getLock(key, function (error, lockValue) {
if (error != null) {
return callback(OError.tag(error))
}
const lock = new Lock(key, lockValue)
return runner(lock.extend.bind(lock), (error1, ...args) =>
lock.release(function (error2) {
error = error1 || error2
if (error != null) {
return callback(OError.tag(error), ...Array.from(args))
}
return callback(null, ...Array.from(args))
})
)
})
}
export function healthCheck(callback) {
const action = (extendLock, releaseLock) => releaseLock()
return runWithLock(
`HistoryLock:HealthCheck:host=${HOST}:pid=${PID}:random=${RND}`,
action,
callback
)
}
export function close(callback) {
rclient.quit()
return rclient.once('end', callback)
}
class Lock {
constructor(key, value) {
this.key = key
this.value = value
this.slowExecutionError = new OError('slow execution during lock')
this.lockTakenAt = Date.now()
this.timer = new metrics.Timer('lock.project')
}
extend(callback) {
const lockLength = Date.now() - this.lockTakenAt
if (lockLength < MIN_LOCK_EXTENSION_INTERVAL) {
return async.setImmediate(callback)
}
return extendLock(this.key, this.value, error => {
if (error != null) {
return callback(OError.tag(error))
}
this.lockTakenAt = Date.now()
return callback()
})
}
release(callback) {
// The lock can expire in redis but the process carry on. This setTimout call
// is designed to log if this happens.
const lockLength = Date.now() - this.lockTakenAt
if (lockLength > LOCK_TTL * 1000) {
metrics.inc('lock.project.exceeded_lock_timeout')
logger.debug('exceeded lock timeout', {
key: this.key,
slowExecutionError: this.slowExecutionError,
})
}
return releaseLock(this.key, this.value, error => {
this.timer.done()
if (error != null) {
return callback(OError.tag(error))
}
return callback()
})
}
}
/**
* Promisified version of runWithLock.
*
* @param {string} key
* @param {(extendLock: Function) => Promise<any>} runner
*/
async function runWithLockPromises(key, runner) {
const runnerCb = (extendLock, callback) => {
const extendLockPromises = promisify(extendLock)
runner(extendLockPromises)
.then(result => {
callback(null, result)
})
.catch(err => {
callback(err)
})
}
return await new Promise((resolve, reject) => {
runWithLock(key, runnerCb, (err, result) => {
if (err) {
reject(err)
} else {
resolve(result)
}
})
})
}
export const promises = {
tryLock: promisify(tryLock),
extendLock: promisify(extendLock),
getLock: promisify(getLock),
checkLock: promisify(checkLock),
releaseLock: promisify(releaseLock),
runWithLock: runWithLockPromises,
}

View File

@@ -0,0 +1,15 @@
// @ts-check
import { prom } from '@overleaf/metrics'
export const historyFlushDurationSeconds = new prom.Histogram({
name: 'history_flush_duration_seconds',
help: 'Duration of a history flush in seconds',
buckets: [0.05, 0.1, 0.2, 0.3, 0.5, 1, 2, 5, 10],
})
export const historyFlushQueueSize = new prom.Histogram({
name: 'history_flush_queue_size',
help: 'Size of the queue during history flushes',
buckets: prom.exponentialBuckets(1, 2, 10),
})

View File

@@ -0,0 +1,20 @@
export function compressOperations(operations) {
if (!operations.length) return []
const newOperations = []
let currentOperation = operations[0]
for (let operationId = 1; operationId < operations.length; operationId++) {
const nextOperation = operations[operationId]
if (currentOperation.canBeComposedWith(nextOperation)) {
currentOperation = currentOperation.compose(nextOperation)
} else {
// currentOperation and nextOperation cannot be composed. Push the
// currentOperation and start over with nextOperation.
newOperations.push(currentOperation)
currentOperation = nextOperation
}
}
newOperations.push(currentOperation)
return newOperations
}

View File

@@ -0,0 +1,80 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS206: Consider reworking classes to avoid initClass
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import Settings from '@overleaf/settings'
import logger from '@overleaf/logger'
import metrics from '@overleaf/metrics'
const LOG_CUTOFF_TIME = 1000
const deltaMs = function (ta, tb) {
const nanoSeconds = (ta[0] - tb[0]) * 1e9 + (ta[1] - tb[1])
const milliSeconds = Math.floor(nanoSeconds * 1e-6)
return milliSeconds
}
export class Profiler {
constructor(name, args) {
this.name = name
this.args = args
this.t0 = this.t = process.hrtime()
this.start = new Date()
this.updateTimes = []
}
log(label) {
const t1 = process.hrtime()
const dtMilliSec = deltaMs(t1, this.t)
this.t = t1
this.updateTimes.push([label, dtMilliSec]) // timings in ms
return this // make it chainable
}
end(message) {
const totalTime = deltaMs(this.t, this.t0)
// record the update times in metrics
for (const update of Array.from(this.updateTimes)) {
metrics.timing(`profile.${this.name}.${update[0]}`, update[1])
}
if (totalTime > LOG_CUTOFF_TIME) {
// log anything greater than cutoff
const args = {}
for (const k in this.args) {
const v = this.args[k]
args[k] = v
}
args.updateTimes = this.updateTimes
args.start = this.start
args.end = new Date()
logger.debug(args, this.name)
}
return totalTime
}
getTimeDelta() {
const lastIdx = this.updateTimes.length - 1
if (lastIdx >= 0) {
return this.updateTimes[lastIdx][1]
} else {
return 0
}
}
wrap(label, fn) {
// create a wrapped function which calls profile.log(label) before continuing execution
const newFn = (...args) => {
this.log(label)
return fn(...Array.from(args || []))
}
return newFn
}
}

View File

@@ -0,0 +1,445 @@
import { callbackify, promisify } from 'node:util'
import { setTimeout } from 'node:timers/promises'
import logger from '@overleaf/logger'
import Settings from '@overleaf/settings'
import redis from '@overleaf/redis-wrapper'
import metrics from '@overleaf/metrics'
import OError from '@overleaf/o-error'
/**
* Maximum size taken from the redis queue, to prevent project history
* consuming unbounded amounts of memory
*/
export const RAW_UPDATE_SIZE_THRESHOLD = 4 * 1024 * 1024
/**
* Batch size when reading updates from Redis
*/
export const RAW_UPDATES_BATCH_SIZE = 50
/**
* Maximum length of ops (insertion and deletions) to process in a single
* iteration
*/
export const MAX_UPDATE_OP_LENGTH = 1024
/**
* Warn if we exceed this raw update size, the final compressed updates we
* send could be smaller than this
*/
const WARN_RAW_UPDATE_SIZE = 1024 * 1024
/**
* Maximum number of new docs to process in a single iteration
*/
export const MAX_NEW_DOC_CONTENT_COUNT = 32
const CACHE_TTL_IN_SECONDS = 3600
const Keys = Settings.redis.project_history.key_schema
const rclient = redis.createClient(Settings.redis.project_history)
async function countUnprocessedUpdates(projectId) {
const key = Keys.projectHistoryOps({ project_id: projectId })
const updates = await rclient.llen(key)
return updates
}
async function* getRawUpdates(projectId) {
const key = Keys.projectHistoryOps({ project_id: projectId })
let start = 0
while (true) {
const stop = start + RAW_UPDATES_BATCH_SIZE - 1
const updates = await rclient.lrange(key, start, stop)
for (const update of updates) {
yield update
}
if (updates.length < RAW_UPDATES_BATCH_SIZE) {
return
}
start += RAW_UPDATES_BATCH_SIZE
}
}
async function getRawUpdatesBatch(projectId, batchSize) {
const rawUpdates = []
let totalRawUpdatesSize = 0
let hasMore = false
for await (const rawUpdate of getRawUpdates(projectId)) {
totalRawUpdatesSize += rawUpdate.length
if (
rawUpdates.length > 0 &&
totalRawUpdatesSize > RAW_UPDATE_SIZE_THRESHOLD
) {
hasMore = true
break
}
rawUpdates.push(rawUpdate)
if (rawUpdates.length >= batchSize) {
hasMore = true
break
}
}
metrics.timing('redis.incoming.bytes', totalRawUpdatesSize, 1)
if (totalRawUpdatesSize > WARN_RAW_UPDATE_SIZE) {
const rawUpdateSizes = rawUpdates.map(rawUpdate => rawUpdate.length)
logger.warn(
{
projectId,
totalRawUpdatesSize,
rawUpdateSizes,
},
'large raw update size'
)
}
return { rawUpdates, hasMore }
}
export function parseDocUpdates(jsonUpdates) {
return jsonUpdates.map(update => JSON.parse(update))
}
async function getUpdatesInBatches(projectId, batchSize, runner) {
let moreBatches = true
while (moreBatches) {
const redisBatch = await getRawUpdatesBatch(projectId, batchSize)
if (redisBatch.rawUpdates.length === 0) {
break
}
moreBatches = redisBatch.hasMore
const rawUpdates = []
const updates = []
let totalOpLength = 0
let totalDocContentCount = 0
for (const rawUpdate of redisBatch.rawUpdates) {
let update
try {
update = JSON.parse(rawUpdate)
} catch (error) {
throw OError.tag(error, 'failed to parse update', {
projectId,
update,
})
}
totalOpLength += update?.op?.length || 1
if (update.resyncDocContent) {
totalDocContentCount += 1
}
if (
updates.length > 0 &&
(totalOpLength > MAX_UPDATE_OP_LENGTH ||
totalDocContentCount > MAX_NEW_DOC_CONTENT_COUNT)
) {
moreBatches = true
break
}
if (update.resyncProjectStructureOnly) {
update._raw = rawUpdate
}
rawUpdates.push(rawUpdate)
updates.push(update)
}
await runner(updates)
await deleteAppliedDocUpdates(projectId, rawUpdates)
if (batchSize === 1) {
// Special case for single stepping, don't process more batches
break
}
}
}
/**
* @param {string} projectId
* @param {ResyncProjectStructureUpdate} update
* @return {Promise<void>}
*/
async function deleteAppliedDocUpdate(projectId, update) {
const raw = update._raw
// Delete the first occurrence of the update with LREM KEY COUNT
// VALUE by setting COUNT to 1 which 'removes COUNT elements equal to
// value moving from head to tail.'
//
// If COUNT is 0 the entire list would be searched which would block
// redis since it would be an O(N) operation where N is the length of
// the queue, in a multi of the batch size.
metrics.summary('redis.projectHistoryOps', raw.length, {
status: 'lrem',
})
await rclient.lrem(Keys.projectHistoryOps({ project_id: projectId }), 1, raw)
}
async function deleteAppliedDocUpdates(projectId, updates) {
const multi = rclient.multi()
// Delete all the updates which have been applied (exact match)
for (const update of updates) {
// Delete the first occurrence of the update with LREM KEY COUNT
// VALUE by setting COUNT to 1 which 'removes COUNT elements equal to
// value moving from head to tail.'
//
// If COUNT is 0 the entire list would be searched which would block
// redis since it would be an O(N) operation where N is the length of
// the queue, in a multi of the batch size.
metrics.summary('redis.projectHistoryOps', update.length, {
status: 'lrem',
})
multi.lrem(Keys.projectHistoryOps({ project_id: projectId }), 1, update)
}
if (updates.length > 0) {
multi.del(Keys.projectHistoryFirstOpTimestamp({ project_id: projectId }))
}
await multi.exec()
}
/**
* Deletes the entire queue - use with caution
*/
async function destroyDocUpdatesQueue(projectId) {
await rclient.del(
Keys.projectHistoryOps({ project_id: projectId }),
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
)
}
/**
* Iterate over keys asynchronously using redis scan (non-blocking)
*
* handle all the cluster nodes or single redis server
*/
async function _getKeys(pattern, limit) {
const nodes = rclient.nodes?.('master') || [rclient]
const keysByNode = []
for (const node of nodes) {
const keys = await _getKeysFromNode(node, pattern, limit)
keysByNode.push(keys)
}
return [].concat(...keysByNode)
}
async function _getKeysFromNode(node, pattern, limit) {
let cursor = 0 // redis iterator
const keySet = new Set() // avoid duplicate results
const batchSize = limit != null ? Math.min(limit, 1000) : 1000
// scan over all keys looking for pattern
while (true) {
const reply = await node.scan(cursor, 'MATCH', pattern, 'COUNT', batchSize)
const [newCursor, keys] = reply
cursor = newCursor
for (const key of keys) {
keySet.add(key)
}
const noResults = cursor === '0' // redis returns string results not numeric
const limitReached = limit != null && keySet.size >= limit
if (noResults || limitReached) {
return Array.from(keySet)
}
// avoid hitting redis too hard
await setTimeout(10)
}
}
/**
* Extract ids from keys like DocsWithHistoryOps:57fd0b1f53a8396d22b2c24b
* or DocsWithHistoryOps:{57fd0b1f53a8396d22b2c24b} (for redis cluster)
*/
function _extractIds(keyList) {
return keyList.map(key => {
const m = key.match(/:\{?([0-9a-f]{24})\}?/) // extract object id
return m[1]
})
}
async function getProjectIdsWithHistoryOps(limit) {
const projectKeys = await _getKeys(
Keys.projectHistoryOps({ project_id: '*' }),
limit
)
const projectIds = _extractIds(projectKeys)
return projectIds
}
async function getProjectIdsWithHistoryOpsCount() {
const projectIds = await getProjectIdsWithHistoryOps()
const queuedProjectsCount = projectIds.length
metrics.globalGauge('queued-projects', queuedProjectsCount)
return queuedProjectsCount
}
async function setFirstOpTimestamp(projectId) {
const key = Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
// store current time as an integer (string)
await rclient.setnx(key, Date.now())
}
async function getFirstOpTimestamp(projectId) {
const key = Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
const result = await rclient.get(key)
// convert stored time back to a numeric timestamp
const timestamp = parseInt(result, 10)
// check for invalid timestamp
if (isNaN(timestamp)) {
return null
}
// convert numeric timestamp to a date object
const firstOpTimestamp = new Date(timestamp)
return firstOpTimestamp
}
async function getFirstOpTimestamps(projectIds) {
const keys = projectIds.map(projectId =>
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
)
const results = await rclient.mget(keys)
const timestamps = results.map(result => {
// convert stored time back to a numeric timestamp
const timestamp = parseInt(result, 10)
// check for invalid timestamp
if (isNaN(timestamp)) {
return null
}
// convert numeric timestamp to a date object
return new Date(timestamp)
})
return timestamps
}
async function clearFirstOpTimestamp(projectId) {
const key = Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
await rclient.del(key)
}
async function getProjectIdsWithFirstOpTimestamps(limit) {
const projectKeys = await _getKeys(
Keys.projectHistoryFirstOpTimestamp({ project_id: '*' }),
limit
)
const projectIds = _extractIds(projectKeys)
return projectIds
}
async function clearDanglingFirstOpTimestamp(projectId) {
const count = await rclient.exists(
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId }),
Keys.projectHistoryOps({ project_id: projectId })
)
if (count === 2 || count === 0) {
// both (or neither) keys are present, so don't delete the timestamp
return 0
}
// only one key is present, which makes this a dangling record,
// so delete the timestamp
const cleared = await rclient.del(
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId })
)
return cleared
}
async function getCachedHistoryId(projectId) {
const key = Keys.projectHistoryCachedHistoryId({ project_id: projectId })
const historyId = await rclient.get(key)
return historyId
}
async function setCachedHistoryId(projectId, historyId) {
const key = Keys.projectHistoryCachedHistoryId({ project_id: projectId })
await rclient.setex(key, CACHE_TTL_IN_SECONDS, historyId)
}
async function clearCachedHistoryId(projectId) {
const key = Keys.projectHistoryCachedHistoryId({ project_id: projectId })
await rclient.del(key)
}
// EXPORTS
const countUnprocessedUpdatesCb = callbackify(countUnprocessedUpdates)
const getRawUpdatesBatchCb = callbackify(getRawUpdatesBatch)
const deleteAppliedDocUpdatesCb = callbackify(deleteAppliedDocUpdates)
const destroyDocUpdatesQueueCb = callbackify(destroyDocUpdatesQueue)
const getProjectIdsWithHistoryOpsCb = callbackify(getProjectIdsWithHistoryOps)
const getProjectIdsWithHistoryOpsCountCb = callbackify(
getProjectIdsWithHistoryOpsCount
)
const setFirstOpTimestampCb = callbackify(setFirstOpTimestamp)
const getFirstOpTimestampCb = callbackify(getFirstOpTimestamp)
const getFirstOpTimestampsCb = callbackify(getFirstOpTimestamps)
const clearFirstOpTimestampCb = callbackify(clearFirstOpTimestamp)
const getProjectIdsWithFirstOpTimestampsCb = callbackify(
getProjectIdsWithFirstOpTimestamps
)
const clearDanglingFirstOpTimestampCb = callbackify(
clearDanglingFirstOpTimestamp
)
const getCachedHistoryIdCb = callbackify(getCachedHistoryId)
const setCachedHistoryIdCb = callbackify(setCachedHistoryId)
const clearCachedHistoryIdCb = callbackify(clearCachedHistoryId)
const getUpdatesInBatchesCb = function (
projectId,
batchSize,
runner,
callback
) {
const runnerPromises = promisify(runner)
getUpdatesInBatches(projectId, batchSize, runnerPromises)
.then(result => {
callback(null, result)
})
.catch(err => {
callback(err)
})
}
export {
countUnprocessedUpdatesCb as countUnprocessedUpdates,
getRawUpdatesBatchCb as getRawUpdatesBatch,
deleteAppliedDocUpdatesCb as deleteAppliedDocUpdates,
destroyDocUpdatesQueueCb as destroyDocUpdatesQueue,
getUpdatesInBatchesCb as getUpdatesInBatches,
getProjectIdsWithHistoryOpsCb as getProjectIdsWithHistoryOps,
getProjectIdsWithHistoryOpsCountCb as getProjectIdsWithHistoryOpsCount,
setFirstOpTimestampCb as setFirstOpTimestamp,
getFirstOpTimestampCb as getFirstOpTimestamp,
getFirstOpTimestampsCb as getFirstOpTimestamps,
clearFirstOpTimestampCb as clearFirstOpTimestamp,
getProjectIdsWithFirstOpTimestampsCb as getProjectIdsWithFirstOpTimestamps,
clearDanglingFirstOpTimestampCb as clearDanglingFirstOpTimestamp,
getCachedHistoryIdCb as getCachedHistoryId,
setCachedHistoryIdCb as setCachedHistoryId,
clearCachedHistoryIdCb as clearCachedHistoryId,
}
export const promises = {
countUnprocessedUpdates,
getRawUpdatesBatch,
deleteAppliedDocUpdates,
deleteAppliedDocUpdate,
destroyDocUpdatesQueue,
getUpdatesInBatches,
getProjectIdsWithHistoryOps,
getProjectIdsWithHistoryOpsCount,
setFirstOpTimestamp,
getFirstOpTimestamp,
getFirstOpTimestamps,
clearFirstOpTimestamp,
getProjectIdsWithFirstOpTimestamps,
clearDanglingFirstOpTimestamp,
getCachedHistoryId,
setCachedHistoryId,
clearCachedHistoryId,
}

View File

@@ -0,0 +1,194 @@
import _ from 'lodash'
import { promisify, callbackify } from 'node:util'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as SyncManager from './SyncManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as RedisManager from './RedisManager.js'
import * as ErrorRecorder from './ErrorRecorder.js'
const sleep = promisify(setTimeout)
const TEMPORARY_FAILURES = [
'Error: ENOSPC: no space left on device, write',
'Error: ESOCKETTIMEDOUT',
'Error: failed to extend lock',
'Error: tried to release timed out lock',
'Error: Timeout',
]
const HARD_FAILURES = [
'Error: history store a non-success status code: 422',
'OError: history store a non-success status code: 422',
'OpsOutOfOrderError: project structure version out of order',
'OpsOutOfOrderError: project structure version out of order on incoming updates',
'OpsOutOfOrderError: doc version out of order',
'OpsOutOfOrderError: doc version out of order on incoming updates',
]
const MAX_RESYNC_ATTEMPTS = 2
const MAX_SOFT_RESYNC_ATTEMPTS = 1
export const promises = {}
promises.retryFailures = async (options = {}) => {
const { failureType, timeout, limit } = options
if (failureType === 'soft') {
const batch = await getFailureBatch(softErrorSelector, limit)
const result = await retryFailureBatch(batch, timeout, async failure => {
await UpdatesProcessor.promises.processUpdatesForProject(
failure.project_id
)
})
return result
} else if (failureType === 'hard') {
const batch = await getFailureBatch(hardErrorSelector, limit)
const result = await retryFailureBatch(batch, timeout, async failure => {
await resyncProject(failure.project_id, {
hard: failureRequiresHardResync(failure),
})
})
return result
}
}
export const retryFailures = callbackify(promises.retryFailures)
function softErrorSelector(failure) {
return (
(isTemporaryFailure(failure) && !isRepeatedFailure(failure)) ||
(isFirstFailure(failure) && !isHardFailure(failure))
)
}
function hardErrorSelector(failure) {
return (
(isHardFailure(failure) || isRepeatedFailure(failure)) &&
!isStuckFailure(failure)
)
}
function isTemporaryFailure(failure) {
return TEMPORARY_FAILURES.includes(failure.error)
}
export function isHardFailure(failure) {
return HARD_FAILURES.includes(failure.error)
}
export function isFirstFailure(failure) {
return failure.attempts <= 1
}
function isRepeatedFailure(failure) {
return failure.attempts > 3
}
function isStuckFailure(failure) {
return (
failure.resyncAttempts != null &&
failure.resyncAttempts >= MAX_RESYNC_ATTEMPTS
)
}
function failureRequiresHardResync(failure) {
return (
failure.resyncAttempts != null &&
failure.resyncAttempts >= MAX_SOFT_RESYNC_ATTEMPTS
)
}
async function getFailureBatch(selector, limit) {
let failures = await ErrorRecorder.promises.getFailedProjects()
failures = failures.filter(selector)
// randomise order
failures = _.shuffle(failures)
// put a limit on the number to retry
const projectsToRetryCount = failures.length
if (limit && projectsToRetryCount > limit) {
failures = failures.slice(0, limit)
}
logger.debug({ projectsToRetryCount, limit }, 'retrying failed projects')
return failures
}
async function retryFailureBatch(failures, timeout, retryHandler) {
const startTime = new Date()
// keep track of successes and failures
const failed = []
const succeeded = []
for (const failure of failures) {
const projectId = failure.project_id
const timeTaken = new Date() - startTime
if (timeout && timeTaken > timeout) {
// finish early due to timeout
logger.debug('background retries timed out')
break
}
logger.debug(
{ projectId, timeTaken },
'retrying failed project in background'
)
try {
await retryHandler(failure)
succeeded.push(projectId)
} catch (err) {
failed.push(projectId)
}
}
return { succeeded, failed }
}
async function resyncProject(projectId, options = {}) {
const { hard = false } = options
try {
if (!/^[0-9a-f]{24}$/.test(projectId)) {
logger.debug({ projectId }, 'clearing bad project id')
await ErrorRecorder.promises.clearError(projectId)
return
}
await checkProjectHasHistoryId(projectId)
if (hard) {
await SyncManager.promises.startHardResync(projectId)
} else {
await SyncManager.promises.startResync(projectId)
}
await waitUntilRedisQueueIsEmpty(projectId)
await checkFailureRecordWasRemoved(projectId)
} catch (err) {
throw new OError({
message: 'failed to resync project',
info: { projectId, hard },
}).withCause(err)
}
}
async function checkProjectHasHistoryId(projectId) {
const historyId = await WebApiManager.promises.getHistoryId(projectId)
if (historyId == null) {
throw new OError('no history id')
}
}
async function waitUntilRedisQueueIsEmpty(projectId) {
for (let attempts = 0; attempts < 30; attempts++) {
const updatesCount =
await RedisManager.promises.countUnprocessedUpdates(projectId)
if (updatesCount === 0) {
return
}
await sleep(1000)
}
throw new OError('queue not empty')
}
async function checkFailureRecordWasRemoved(projectId) {
const failureRecord = await ErrorRecorder.promises.getFailureRecord(projectId)
if (failureRecord) {
throw new OError('failure record still exists')
}
}

View File

@@ -0,0 +1,250 @@
import OError from '@overleaf/o-error'
import * as HttpController from './HttpController.js'
import { Joi, validate } from './Validation.js'
export function initialize(app) {
app.use(
validate({
params: Joi.object({
project_id: Joi.string().regex(/^[0-9a-f]{24}$/),
user_id: Joi.string().regex(/^[0-9a-f]{24}$/),
label_id: Joi.string().regex(/^[0-9a-f]{24}$/),
version: Joi.number().integer(),
}),
})
)
// use an extended timeout on all endpoints, to allow for long requests to history-v1
app.use(longerTimeout)
app.post('/project', HttpController.initializeProject)
app.delete('/project/:project_id', HttpController.deleteProject)
app.get('/project/:project_id/snapshot', HttpController.getLatestSnapshot)
app.get(
'/project/:project_id/diff',
validate({
query: {
pathname: Joi.string().required(),
from: Joi.number().integer().required(),
to: Joi.number().integer().required(),
},
}),
HttpController.getDiff
)
app.get(
'/project/:project_id/filetree/diff',
validate({
query: {
from: Joi.number().integer().required(),
to: Joi.number().integer().required(),
},
}),
HttpController.getFileTreeDiff
)
app.get(
'/project/:project_id/updates',
validate({
query: {
before: Joi.number().integer(),
min_count: Joi.number().integer(),
},
}),
HttpController.getUpdates
)
app.get(
'/project/:project_id/changes-in-chunk',
validate({
query: {
since: Joi.number().integer().min(0),
},
}),
HttpController.getChangesInChunkSince
)
app.get('/project/:project_id/version', HttpController.latestVersion)
app.post(
'/project/:project_id/flush',
validate({
query: {
debug: Joi.boolean().default(false),
bisect: Joi.boolean().default(false),
},
}),
HttpController.flushProject
)
app.post(
'/project/:project_id/resync',
validate({
query: {
force: Joi.boolean().default(false),
},
body: {
force: Joi.boolean().default(false),
origin: Joi.object({
kind: Joi.string().required(),
}),
historyRangesMigration: Joi.string()
.optional()
.valid('forwards', 'backwards'),
},
}),
HttpController.resyncProject
)
app.get(
'/project/:project_id/dump',
validate({
query: {
count: Joi.number().integer(),
},
}),
HttpController.dumpProject
)
app.get('/project/:project_id/labels', HttpController.getLabels)
app.post(
'/project/:project_id/labels',
validate({
body: {
version: Joi.number().integer().required(),
comment: Joi.string().required(),
created_at: Joi.string(),
validate_exists: Joi.boolean().default(true),
user_id: Joi.string().allow(null),
},
}),
HttpController.createLabel
)
app.delete(
'/project/:project_id/user/:user_id/labels/:label_id',
validate({
params: Joi.object({
project_id: Joi.string().regex(/^[0-9a-f]{24}$/),
user_id: Joi.string().regex(/^[0-9a-f]{24}$/),
label_id: Joi.string().regex(/^[0-9a-f]{24}$/),
}),
}),
HttpController.deleteLabelForUser
)
app.delete(
'/project/:project_id/labels/:label_id',
validate({
params: Joi.object({
project_id: Joi.string().regex(/^[0-9a-f]{24}$/),
label_id: Joi.string().regex(/^[0-9a-f]{24}$/),
}),
}),
HttpController.deleteLabel
)
app.post(
'/user/:from_user/labels/transfer/:to_user',
HttpController.transferLabels
)
app.get(
'/project/:project_id/version/:version/:pathname',
HttpController.getFileSnapshot
)
app.get(
'/project/:project_id/ranges/version/:version/:pathname',
HttpController.getRangesSnapshot
)
app.get(
'/project/:project_id/metadata/version/:version/:pathname',
HttpController.getFileMetadataSnapshot
)
app.get(
'/project/:project_id/version/:version',
HttpController.getProjectSnapshot
)
app.get(
'/project/:project_id/paths/version/:version',
HttpController.getPathsAtVersion
)
app.post(
'/project/:project_id/force',
validate({
query: {
clear: Joi.boolean().default(false),
},
}),
HttpController.forceDebugProject
)
app.get('/project/:history_id/blob/:hash', HttpController.getProjectBlob)
app.get('/status/failures', HttpController.getFailures)
app.get('/status/queue', HttpController.getQueueCounts)
app.post(
'/retry/failures',
validate({
query: {
failureType: Joi.string().valid('soft', 'hard'),
// bail out after this time limit
timeout: Joi.number().integer().default(300),
// maximum number of projects to check
limit: Joi.number().integer().default(100),
callbackUrl: Joi.string(),
},
}),
HttpController.retryFailures
)
app.post(
'/flush/old',
validate({
query: {
// flush projects with queued ops older than this
maxAge: Joi.number()
.integer()
.default(6 * 3600),
// pause this amount of time between checking queues
queueDelay: Joi.number().integer().default(100),
// maximum number of queues to check
limit: Joi.number().integer().default(1000),
// maximum amount of time allowed
timeout: Joi.number()
.integer()
.default(60 * 1000),
// whether to run in the background
background: Joi.boolean().falsy('0').truthy('1').default(false),
},
}),
HttpController.flushOld
)
app.get('/status', (req, res, next) => res.send('project-history is up'))
app.get('/oops', function (req, res, next) {
throw new OError('dummy test error')
})
app.get('/check_lock', HttpController.checkLock)
app.get('/health_check', HttpController.healthCheck)
}
function longerTimeout(req, res, next) {
res.setTimeout(6 * 60 * 1000)
next()
}

View File

@@ -0,0 +1,426 @@
// @ts-check
import { callbackify } from 'node:util'
import Core from 'overleaf-editor-core'
import { Readable as StringStream } from 'node:stream'
import OError from '@overleaf/o-error'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as WebApiManager from './WebApiManager.js'
import * as Errors from './Errors.js'
import _ from 'lodash'
/**
* @import { Snapshot } from 'overleaf-editor-core'
* @import { RangesSnapshot } from './types'
*/
StringStream.prototype._read = function () {}
const MAX_REQUESTS = 4 // maximum number of parallel requests to v1 history service
/**
*
* @param {string} projectId
* @param {number} version
* @param {string} pathname
*/
async function getFileSnapshotStream(projectId, version, pathname) {
const snapshot = await _getSnapshotAtVersion(projectId, version)
const file = snapshot.getFile(pathname)
if (file == null) {
throw new Errors.NotFoundError(`${pathname} not found`, {
projectId,
version,
pathname,
})
}
const historyId = await WebApiManager.promises.getHistoryId(projectId)
if (file.isEditable()) {
await file.load('eager', HistoryStoreManager.getBlobStore(historyId))
const stream = new StringStream()
stream.push(file.getContent({ filterTrackedDeletes: true }))
stream.push(null)
return stream
} else {
return await HistoryStoreManager.promises.getProjectBlobStream(
historyId,
file.getHash()
)
}
}
/**
* Constructs a snapshot of the ranges in a document-updater compatible format.
* Positions will be relative to a document where tracked deletes have been
* removed from the string. This also means that if a tracked delete overlaps
* a comment range, the comment range will be truncated.
*
* @param {string} projectId
* @param {number} version
* @param {string} pathname
* @returns {Promise<RangesSnapshot>}
*/
async function getRangesSnapshot(projectId, version, pathname) {
const snapshot = await _getSnapshotAtVersion(projectId, version)
const file = snapshot.getFile(pathname)
if (!file) {
throw new Errors.NotFoundError(`${pathname} not found`, {
projectId,
version,
pathname,
})
}
if (!file.isEditable()) {
// A binary file has no tracked changes or comments
return {
changes: [],
comments: [],
}
}
const historyId = await WebApiManager.promises.getHistoryId(projectId)
await file.load('eager', HistoryStoreManager.getBlobStore(historyId))
const content = file.getContent()
if (content == null) {
throw new Error('Unable to read file contents')
}
const trackedChanges = file.getTrackedChanges().asSorted()
const comments = file.getComments().toArray()
const docUpdaterCompatibleTrackedChanges = []
let trackedDeletionOffset = 0
for (const trackedChange of trackedChanges) {
const isTrackedDeletion = trackedChange.tracking.type === 'delete'
const trackedChangeContent = content.slice(
trackedChange.range.start,
trackedChange.range.end
)
const tcContent = isTrackedDeletion
? { d: trackedChangeContent }
: { i: trackedChangeContent }
docUpdaterCompatibleTrackedChanges.push({
op: {
p: trackedChange.range.start - trackedDeletionOffset,
...tcContent,
},
metadata: {
ts: trackedChange.tracking.ts.toISOString(),
user_id: trackedChange.tracking.userId,
},
})
if (isTrackedDeletion) {
trackedDeletionOffset += trackedChange.range.length
}
}
// Comments are shifted left by the length of any previous tracked deletions.
// If they overlap with a tracked deletion, they are truncated.
//
// Example:
// { } comment
// [ ] tracked deletion
// the quic[k {b]rown [fox] jum[ps} ove]r the lazy dog
// => rown jum
// starting at position 8
const trackedDeletions = trackedChanges.filter(
tc => tc.tracking.type === 'delete'
)
const docUpdaterCompatibleComments = []
for (const comment of comments) {
let trackedDeletionIndex = 0
if (comment.ranges.length === 0) {
// Translate detached comments into zero length comments at position 0
docUpdaterCompatibleComments.push({
op: {
p: 0,
c: '',
t: comment.id,
resolved: comment.resolved,
},
})
continue
}
// Consider a multiple range comment as a single comment that joins all its
// ranges
const commentStart = comment.ranges[0].start
const commentEnd = comment.ranges[comment.ranges.length - 1].end
let commentContent = ''
// Docupdater position
let position = commentStart
while (trackedDeletions[trackedDeletionIndex]?.range.end <= commentStart) {
// Skip over tracked deletions that are before the current comment range
position -= trackedDeletions[trackedDeletionIndex].range.length
trackedDeletionIndex++
}
if (trackedDeletions[trackedDeletionIndex]?.range.start < commentStart) {
// There's overlap with a tracked deletion, move the position left and
// truncate the overlap
position -=
commentStart - trackedDeletions[trackedDeletionIndex].range.start
}
// Cursor in the history content
let cursor = commentStart
while (cursor < commentEnd) {
const trackedDeletion = trackedDeletions[trackedDeletionIndex]
if (!trackedDeletion || trackedDeletion.range.start >= commentEnd) {
// We've run out of relevant tracked changes
commentContent += content.slice(cursor, commentEnd)
break
}
if (trackedDeletion.range.start > cursor) {
// There's a gap between the current cursor and the tracked deletion
commentContent += content.slice(cursor, trackedDeletion.range.start)
}
if (trackedDeletion.range.end <= commentEnd) {
// Skip to the end of the tracked delete
cursor = trackedDeletion.range.end
trackedDeletionIndex++
} else {
// We're done with that comment
break
}
}
docUpdaterCompatibleComments.push({
op: {
p: position,
c: commentContent,
t: comment.id,
resolved: comment.resolved,
},
id: comment.id,
})
}
return {
changes: docUpdaterCompatibleTrackedChanges,
comments: docUpdaterCompatibleComments,
}
}
/**
* Gets the file metadata at a specific version.
*
* @param {string} projectId
* @param {number} version
* @param {string} pathname
* @returns {Promise<{metadata: any}>}
*/
async function getFileMetadataSnapshot(projectId, version, pathname) {
const snapshot = await _getSnapshotAtVersion(projectId, version)
const file = snapshot.getFile(pathname)
if (!file) {
throw new Errors.NotFoundError(`${pathname} not found`, {
projectId,
version,
pathname,
})
}
const rawMetadata = file.getMetadata()
const metadata = _.isEmpty(rawMetadata) ? undefined : rawMetadata
return { metadata }
}
// Returns project snapshot containing the document content for files with
// text operations in the relevant chunk, and hashes for unmodified/binary
// files. Used by git bridge to get the state of the project.
async function getProjectSnapshot(projectId, version) {
const snapshot = await _getSnapshotAtVersion(projectId, version)
const historyId = await WebApiManager.promises.getHistoryId(projectId)
await _loadFilesLimit(
snapshot,
'eager',
HistoryStoreManager.getBlobStore(historyId)
)
return {
projectId,
files: snapshot.getFileMap().map(file => {
if (!file) {
return null
}
const content = file.getContent({
filterTrackedDeletes: true,
})
if (content === null) {
return { data: { hash: file.getHash() } }
}
return { data: { content } }
}),
}
}
async function getPathsAtVersion(projectId, version) {
const snapshot = await _getSnapshotAtVersion(projectId, version)
return {
paths: snapshot.getFilePathnames(),
}
}
/**
*
* @param {string} projectId
* @param {number} version
*/
async function _getSnapshotAtVersion(projectId, version) {
const historyId = await WebApiManager.promises.getHistoryId(projectId)
const data = await HistoryStoreManager.promises.getChunkAtVersion(
projectId,
historyId,
version
)
const chunk = Core.Chunk.fromRaw(data.chunk)
const snapshot = chunk.getSnapshot()
const changes = chunk.getChanges().slice(0, version - chunk.getStartVersion())
snapshot.applyAll(changes)
return snapshot
}
/**
* @param {string} projectId
* @param {string} historyId
* @return {Promise<Record<string, import('overleaf-editor-core').File>>}
*/
async function getLatestSnapshotFiles(projectId, historyId) {
const data = await HistoryStoreManager.promises.getMostRecentChunk(
projectId,
historyId
)
return await getLatestSnapshotFilesForChunk(historyId, data)
}
/**
* @param {string} historyId
* @param {{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}} chunk
* @return {Promise<Record<string, import('overleaf-editor-core').File>>}
*/
async function getLatestSnapshotFilesForChunk(historyId, chunk) {
const { snapshot } = getLatestSnapshotFromChunk(chunk)
const snapshotFiles = await snapshot.loadFiles(
'lazy',
HistoryStoreManager.getBlobStore(historyId)
)
return snapshotFiles
}
/**
* @param {string} projectId
* @param {string} historyId
* @return {Promise<{version: number, snapshot: import('overleaf-editor-core').Snapshot}>}
*/
async function getLatestSnapshot(projectId, historyId) {
const data = await HistoryStoreManager.promises.getMostRecentChunk(
projectId,
historyId
)
return getLatestSnapshotFromChunk(data)
}
/**
* @param {{chunk: import('overleaf-editor-core/lib/types.js').RawChunk}} data
* @return {{version: number, snapshot: import('overleaf-editor-core').Snapshot}}
*/
function getLatestSnapshotFromChunk(data) {
if (data == null || data.chunk == null) {
throw new OError('undefined chunk')
}
// apply all the changes in the chunk to get the current snapshot
const chunk = Core.Chunk.fromRaw(data.chunk)
const snapshot = chunk.getSnapshot()
const changes = chunk.getChanges()
snapshot.applyAll(changes)
return {
snapshot,
version: chunk.getEndVersion(),
}
}
async function getChangesInChunkSince(projectId, historyId, sinceVersion) {
const latestChunk = Core.Chunk.fromRaw(
(
await HistoryStoreManager.promises.getMostRecentChunk(
projectId,
historyId
)
).chunk
)
if (sinceVersion > latestChunk.getEndVersion()) {
throw new Errors.BadRequestError(
'requested version past the end of the history'
)
}
const latestStartVersion = latestChunk.getStartVersion()
let chunk = latestChunk
if (sinceVersion < latestStartVersion) {
chunk = Core.Chunk.fromRaw(
(
await HistoryStoreManager.promises.getChunkAtVersion(
projectId,
historyId,
sinceVersion
)
).chunk
)
}
const changes = chunk
.getChanges()
.slice(sinceVersion - chunk.getStartVersion())
return { latestStartVersion, changes }
}
async function _loadFilesLimit(snapshot, kind, blobStore) {
await snapshot.fileMap.mapAsync(async file => {
// only load changed files or files with tracked changes, others can be
// dereferenced from their blobs (this method is only used by the git
// bridge which understands how to load blobs).
if (!file.isEditable() || (file.getHash() && !file.getRangesHash())) {
return
}
await file.load(kind, blobStore)
}, MAX_REQUESTS)
}
// EXPORTS
const getChangesInChunkSinceCb = callbackify(getChangesInChunkSince)
const getFileSnapshotStreamCb = callbackify(getFileSnapshotStream)
const getProjectSnapshotCb = callbackify(getProjectSnapshot)
const getLatestSnapshotCb = callbackify(getLatestSnapshot)
const getLatestSnapshotFilesCb = callbackify(getLatestSnapshotFiles)
const getLatestSnapshotFilesForChunkCb = callbackify(
getLatestSnapshotFilesForChunk
)
const getRangesSnapshotCb = callbackify(getRangesSnapshot)
const getFileMetadataSnapshotCb = callbackify(getFileMetadataSnapshot)
const getPathsAtVersionCb = callbackify(getPathsAtVersion)
export {
getLatestSnapshotFromChunk,
getChangesInChunkSinceCb as getChangesInChunkSince,
getFileSnapshotStreamCb as getFileSnapshotStream,
getProjectSnapshotCb as getProjectSnapshot,
getFileMetadataSnapshotCb as getFileMetadataSnapshot,
getLatestSnapshotCb as getLatestSnapshot,
getLatestSnapshotFilesCb as getLatestSnapshotFiles,
getLatestSnapshotFilesForChunkCb as getLatestSnapshotFilesForChunk,
getRangesSnapshotCb as getRangesSnapshot,
getPathsAtVersionCb as getPathsAtVersion,
}
export const promises = {
getChangesInChunkSince,
getFileSnapshotStream,
getProjectSnapshot,
getLatestSnapshot,
getLatestSnapshotFiles,
getLatestSnapshotFilesForChunk,
getRangesSnapshot,
getPathsAtVersion,
getFileMetadataSnapshot,
}

View File

@@ -0,0 +1,354 @@
import _ from 'lodash'
import async from 'async'
import logger from '@overleaf/logger'
import OError from '@overleaf/o-error'
import * as ChunkTranslator from './ChunkTranslator.js'
import * as HistoryApiManager from './HistoryApiManager.js'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as LabelsManager from './LabelsManager.js'
import * as UpdatesProcessor from './UpdatesProcessor.js'
import * as WebApiManager from './WebApiManager.js'
const MAX_CHUNK_REQUESTS = 5
const TIME_BETWEEN_DISTINCT_UPDATES = 5 * 60 * 1000 // five minutes
export function getSummarizedProjectUpdates(projectId, options, callback) {
// Some notes on versions:
//
// Versions of the project are like the fenceposts between updates.
// An update applies to a certain version of the project, and gives us the
// next version.
//
// When we ask for updates 'before' a version, this includes the update
// that created the version equal to 'before'.
//
// A chunk in OL has a 'startVersion', which is the version of the project
// before any of the updates in it were applied. This is the same version as
// the last update in the previous chunk would have created.
//
// If we ask the OL history store for the chunk with version that is the end of one
// chunk and the start of another, it will return the older chunk, i.e.
// the chunk with the updates that led up to that version.
//
// So once we read in the updates from a chunk, and want to get the updates from
// the previous chunk, we ask OL for the chunk with the version equal to the
// 'startVersion' of the newer chunk we just read.
let nextVersionToRequest
if (options == null) {
options = {}
}
if (!options.min_count) {
options.min_count = 25
}
if (options.before != null) {
// The version is of the doc, so we want the updates before that version,
// which includes the update that created that version.
nextVersionToRequest = options.before
} else {
// Return the latest updates first if no nextVersionToRequest is set.
nextVersionToRequest = null
}
UpdatesProcessor.processUpdatesForProject(projectId, function (error) {
if (error) {
return callback(OError.tag(error))
}
LabelsManager.getLabels(projectId, function (error, labels) {
if (error) {
return callback(OError.tag(error))
}
const labelsByVersion = {}
for (const label of labels) {
if (labelsByVersion[label.version] == null) {
labelsByVersion[label.version] = []
}
labelsByVersion[label.version].push(label)
}
WebApiManager.getHistoryId(projectId, function (error, historyId) {
if (error) return callback(error)
let chunksRequested = 0
let summarizedUpdates = []
let toV = null
const shouldRequestMoreUpdates = cb => {
return cb(
null,
chunksRequested < MAX_CHUNK_REQUESTS &&
(nextVersionToRequest == null || nextVersionToRequest > 0) &&
summarizedUpdates.length < options.min_count
)
}
const getNextBatchOfUpdates = cb =>
_getProjectUpdates(
projectId,
historyId,
nextVersionToRequest,
function (error, updateSet, startVersion) {
if (error) {
return cb(OError.tag(error))
}
// Updates are returned in time order, but we want to go back in time
updateSet.reverse()
updateSet = discardUnwantedUpdates(updateSet)
;({ summarizedUpdates, toV } = _summarizeUpdates(
updateSet,
labelsByVersion,
summarizedUpdates,
toV
))
nextVersionToRequest = startVersion
chunksRequested += 1
cb()
}
)
function discardUnwantedUpdates(updateSet) {
// We're getting whole chunks from the OL history store, but we might
// only want updates from before a certain version
if (options.before == null) {
return updateSet
} else {
return updateSet.filter(u => u.v < options.before)
}
}
// If the project doesn't have a history then we can bail out here
HistoryApiManager.shouldUseProjectHistory(
projectId,
function (error, shouldUseProjectHistory) {
if (error) {
return callback(OError.tag(error))
}
if (shouldUseProjectHistory) {
async.whilst(
shouldRequestMoreUpdates,
getNextBatchOfUpdates,
function (error) {
if (error) {
return callback(OError.tag(error))
}
callback(
null,
summarizedUpdates,
nextVersionToRequest > 0 ? nextVersionToRequest : undefined
)
}
)
} else {
logger.debug(
{ projectId },
'returning no updates as project does not use history'
)
callback(null, [])
}
}
)
})
})
})
}
function _getProjectUpdates(projectId, historyId, version, callback) {
function getChunk(cb) {
if (version != null) {
HistoryStoreManager.getChunkAtVersion(projectId, historyId, version, cb)
} else {
HistoryStoreManager.getMostRecentChunk(projectId, historyId, cb)
}
}
getChunk(function (error, chunk) {
if (error) {
return callback(OError.tag(error))
}
const oldestVersion = chunk.chunk.startVersion
ChunkTranslator.convertToSummarizedUpdates(
chunk,
function (error, updateSet) {
if (error) {
return callback(OError.tag(error))
}
callback(error, updateSet, oldestVersion)
}
)
})
}
function _summarizeUpdates(updates, labels, existingSummarizedUpdates, toV) {
if (existingSummarizedUpdates == null) {
existingSummarizedUpdates = []
}
const summarizedUpdates = existingSummarizedUpdates.slice()
for (const update of updates) {
if (toV == null) {
// This is the first update we've seen. Initialize toV.
toV = update.v + 1
}
// Skip empty updates (only record their version). Empty updates are
// updates that only contain comment operations. We don't have a UI for
// these yet.
if (isUpdateEmpty(update)) {
continue
}
// The client needs to know the exact version that a delete happened, in order
// to be able to restore. So even when summarizing, retain the version that each
// projectOp happened at.
for (const projectOp of update.project_ops) {
projectOp.atV = update.v
}
const summarizedUpdate = summarizedUpdates[summarizedUpdates.length - 1]
const labelsForVersion = labels[update.v + 1] || []
if (
summarizedUpdate &&
_shouldMergeUpdate(update, summarizedUpdate, labelsForVersion)
) {
_mergeUpdate(update, summarizedUpdate)
} else {
const newUpdate = {
fromV: update.v,
toV,
meta: {
users: update.meta.users,
start_ts: update.meta.start_ts,
end_ts: update.meta.end_ts,
},
labels: labelsForVersion,
pathnames: new Set(update.pathnames),
project_ops: update.project_ops.slice(), // Clone since we'll modify
}
if (update.meta.origin) {
newUpdate.meta.origin = update.meta.origin
}
summarizedUpdates.push(newUpdate)
}
toV = update.v
}
return { summarizedUpdates, toV }
}
/**
* Given an update, the latest summarized update, and the labels that apply to
* the update, figure out if we can merge the update into the summarized
* update.
*/
function _shouldMergeUpdate(update, summarizedUpdate, labels) {
// Split updates on labels
if (labels.length > 0) {
return false
}
// Split updates on origin
if (update.meta.origin) {
if (summarizedUpdate.meta.origin) {
if (update.meta.origin.kind !== summarizedUpdate.meta.origin.kind) {
return false
}
if (update.meta.origin.path !== summarizedUpdate.meta.origin.path) {
return false
}
if (
update.meta.origin.kind === 'file-restore' &&
update.meta.origin.timestamp !== summarizedUpdate.meta.origin.timestamp
) {
return false
}
if (
update.meta.origin.kind === 'project-restore' &&
update.meta.origin.timestamp !== summarizedUpdate.meta.origin.timestamp
) {
return false
}
} else {
return false
}
} else if (summarizedUpdate.meta.origin) {
return false
}
// Split updates if it's been too long since the last update. We're going
// backwards in time through the updates, so the update comes before the summarized update.
if (
summarizedUpdate.meta.end_ts - update.meta.start_ts >=
TIME_BETWEEN_DISTINCT_UPDATES
) {
return false
}
// Do not merge text operations and file operations, except for history resyncs
const updateHasTextOps = update.pathnames.length > 0
const updateHasFileOps = update.project_ops.length > 0
const summarizedUpdateHasTextOps = summarizedUpdate.pathnames.size > 0
const summarizedUpdateHasFileOps = summarizedUpdate.project_ops.length > 0
const isHistoryResync =
update.meta.origin &&
['history-resync', 'history-migration'].includes(update.meta.origin.kind)
if (
!isHistoryResync &&
((updateHasTextOps && summarizedUpdateHasFileOps) ||
(updateHasFileOps && summarizedUpdateHasTextOps))
) {
return false
}
return true
}
/**
* Merge an update into a summarized update.
*
* This mutates the summarized update.
*/
function _mergeUpdate(update, summarizedUpdate) {
// check if the user in this update is already present in the earliest update,
// if not, add them to the users list of the earliest update
summarizedUpdate.meta.users = _.uniqBy(
_.union(summarizedUpdate.meta.users, update.meta.users),
function (user) {
if (user == null) {
return null
}
if (user.id == null) {
return user
}
return user.id
}
)
summarizedUpdate.fromV = Math.min(summarizedUpdate.fromV, update.v)
summarizedUpdate.toV = Math.max(summarizedUpdate.toV, update.v + 1)
summarizedUpdate.meta.start_ts = Math.min(
summarizedUpdate.meta.start_ts,
update.meta.start_ts
)
summarizedUpdate.meta.end_ts = Math.max(
summarizedUpdate.meta.end_ts,
update.meta.end_ts
)
// Add file operations
for (const op of update.project_ops || []) {
summarizedUpdate.project_ops.push(op)
if (op.add) {
// Merging a file creation. Remove any corresponding edit since that's redundant.
summarizedUpdate.pathnames.delete(op.add.pathname)
}
}
// Add edit operations
for (const pathname of update.pathnames || []) {
summarizedUpdate.pathnames.add(pathname)
}
}
function isUpdateEmpty(update) {
return update.project_ops.length === 0 && update.pathnames.length === 0
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,491 @@
// @ts-check
import OError from '@overleaf/o-error'
import DMP from 'diff-match-patch'
/**
* @import { DeleteOp, InsertOp, Op, Update } from './types'
*/
const MAX_TIME_BETWEEN_UPDATES = 60 * 1000 // one minute
const MAX_UPDATE_SIZE = 2 * 1024 * 1024 // 2 MB
const ADDED = 1
const REMOVED = -1
const UNCHANGED = 0
const strInject = (s1, pos, s2) => s1.slice(0, pos) + s2 + s1.slice(pos)
const strRemove = (s1, pos, length) => s1.slice(0, pos) + s1.slice(pos + length)
const dmp = new DMP()
dmp.Diff_Timeout = 0.1 // prevent the diff algorithm from searching too hard for changes in unrelated content
const cloneWithOp = function (update, op) {
// to improve performance, shallow clone the update
// and its meta property (also an object), then
// overwrite the op property directly.
update = Object.assign({}, update)
update.meta = Object.assign({}, update.meta)
update.op = op
return update
}
const mergeUpdatesWithOp = function (firstUpdate, secondUpdate, op) {
// We want to take doc_length and ts from the firstUpdate, v and doc_hash from the second
const update = cloneWithOp(firstUpdate, op)
if (secondUpdate.v != null) {
update.v = secondUpdate.v
}
if (secondUpdate.meta.doc_hash != null) {
update.meta.doc_hash = secondUpdate.meta.doc_hash
} else {
delete update.meta.doc_hash
}
return update
}
/**
* Adjust the given length to account for the given op
*
* The resulting length is the new length of the doc after the op is applied.
*
* @param {number} length
* @param {Op} op
* @param {object} opts
* @param {boolean} [opts.tracked] - whether or not the update is a tracked change
* @returns {number} the adjusted length
*/
function adjustLengthByOp(length, op, opts = {}) {
if ('i' in op && op.i != null) {
if (op.trackedDeleteRejection) {
// Tracked delete rejection: will be translated into a retain
return length
} else {
return length + op.i.length
}
} else if ('d' in op && op.d != null) {
if (opts.tracked) {
// Tracked delete: will be translated into a retain, except where it overlaps tracked inserts.
for (const change of op.trackedChanges ?? []) {
if (change.type === 'insert') {
length -= change.length
}
}
return length
} else {
return length - op.d.length
}
} else if ('r' in op && op.r != null) {
return length
} else if ('c' in op && op.c != null) {
return length
} else {
throw new OError('unexpected op type')
}
}
/**
* Updates come from the doc updater in format
* {
* op: [ { ... op1 ... }, { ... op2 ... } ]
* meta: { ts: ..., user_id: ... }
* }
* but it's easier to work with on op per update, so convert these updates to
* our compressed format
* [{
* op: op1
* meta: { ts: ..., user_id: ... }
* }, {
* op: op2
* meta: { ts: ..., user_id: ... }
* }]
*
* @param {Update[]} updates
* @returns {Update[]} single op updates
*/
export function convertToSingleOpUpdates(updates) {
const splitUpdates = []
for (const update of updates) {
if (!('op' in update)) {
// Not a text op, likely a project strucure op
splitUpdates.push(update)
continue
}
const ops = update.op
let docLength = update.meta.history_doc_length ?? update.meta.doc_length
// Temporary fix for document-updater sending a length of -1 for empty
// documents. This can be removed after all queues have been flushed.
if (docLength === -1) {
docLength = 0
}
const docHash = update.meta.doc_hash
for (const op of ops) {
const splitUpdate = cloneWithOp(update, op)
// Only the last update will keep the doc_hash property
delete splitUpdate.meta.doc_hash
if (docLength != null) {
splitUpdate.meta.doc_length = docLength
docLength = adjustLengthByOp(docLength, op, {
tracked: update.meta.tc != null,
})
delete splitUpdate.meta.history_doc_length
}
splitUpdates.push(splitUpdate)
}
if (docHash != null && splitUpdates.length > 0) {
splitUpdates[splitUpdates.length - 1].meta.doc_hash = docHash
}
}
return splitUpdates
}
export function filterBlankUpdates(updates) {
// Diffing an insert and delete can return blank inserts and deletes
// which the OL history service doesn't have an equivalent for.
//
// NOTE: this relies on the updates only containing either op.i or op.d entries
// but not both, which is the case because diffAsShareJsOps does this
return updates.filter(
update => !(update.op && (update.op.i === '' || update.op.d === ''))
)
}
export function concatUpdatesWithSameVersion(updates) {
const concattedUpdates = []
for (let update of updates) {
if (update.op != null) {
update = cloneWithOp(update, [update.op])
const lastUpdate = concattedUpdates[concattedUpdates.length - 1]
if (
lastUpdate != null &&
lastUpdate.op != null &&
lastUpdate.v === update.v &&
lastUpdate.doc === update.doc &&
lastUpdate.pathname === update.pathname
) {
lastUpdate.op = lastUpdate.op.concat(update.op)
if (update.meta.doc_hash == null) {
delete lastUpdate.meta.doc_hash
} else {
lastUpdate.meta.doc_hash = update.meta.doc_hash
}
} else {
concattedUpdates.push(update)
}
} else {
concattedUpdates.push(update)
}
}
return concattedUpdates
}
export function compressRawUpdates(rawUpdates) {
let updates = convertToSingleOpUpdates(rawUpdates)
updates = compressUpdates(updates)
updates = filterBlankUpdates(updates)
updates = concatUpdatesWithSameVersion(updates)
return updates
}
export function compressUpdates(updates) {
if (updates.length === 0) {
return []
}
let compressedUpdates = [updates.shift()]
for (const update of updates) {
const lastCompressedUpdate = compressedUpdates.pop()
if (lastCompressedUpdate != null) {
const newCompressedUpdates = _concatTwoUpdates(
lastCompressedUpdate,
update
)
compressedUpdates = compressedUpdates.concat(newCompressedUpdates)
} else {
compressedUpdates.push(update)
}
}
return compressedUpdates
}
/**
* If possible, merge two updates into a single update that has the same effect.
*
* It's useful to do some of this work at this point while we're dealing with
* document-updater updates. The deletes, in particular include the deleted
* text. This allows us to find pieces of inserts and deletes that cancel each
* other out because they insert/delete the exact same text. This compression
* makes the diff smaller.
*/
function _concatTwoUpdates(firstUpdate, secondUpdate) {
// Previously we cloned firstUpdate and secondUpdate at this point but we
// can skip this step because whenever they are returned with
// modification there is always a clone at that point via
// mergeUpdatesWithOp.
if (firstUpdate.op == null || secondUpdate.op == null) {
// Project structure ops
return [firstUpdate, secondUpdate]
}
if (
firstUpdate.doc !== secondUpdate.doc ||
firstUpdate.pathname !== secondUpdate.pathname
) {
return [firstUpdate, secondUpdate]
}
if (firstUpdate.meta.user_id !== secondUpdate.meta.user_id) {
return [firstUpdate, secondUpdate]
}
if (
(firstUpdate.meta.type === 'external' &&
secondUpdate.meta.type !== 'external') ||
(firstUpdate.meta.type !== 'external' &&
secondUpdate.meta.type === 'external') ||
(firstUpdate.meta.type === 'external' &&
secondUpdate.meta.type === 'external' &&
firstUpdate.meta.source !== secondUpdate.meta.source)
) {
return [firstUpdate, secondUpdate]
}
if (secondUpdate.meta.ts - firstUpdate.meta.ts > MAX_TIME_BETWEEN_UPDATES) {
return [firstUpdate, secondUpdate]
}
if (
(firstUpdate.meta.tc == null && secondUpdate.meta.tc != null) ||
(firstUpdate.meta.tc != null && secondUpdate.meta.tc == null)
) {
// One update is tracking changes and the other isn't. Tracking changes
// results in different behaviour in the history, so we need to keep these
// two updates separate.
return [firstUpdate, secondUpdate]
}
if (Boolean(firstUpdate.op.u) !== Boolean(secondUpdate.op.u)) {
// One update is an undo and the other isn't. If we were to merge the two
// updates, we would have to choose one value for the flag, which would be
// partially incorrect. Moreover, a tracked delete that is also an undo is
// treated as a tracked insert rejection by the history, so these updates
// need to be well separated.
return [firstUpdate, secondUpdate]
}
if (
firstUpdate.op.trackedDeleteRejection ||
secondUpdate.op.trackedDeleteRejection
) {
// Do not merge tracked delete rejections. Each tracked delete rejection is
// a separate operation.
return [firstUpdate, secondUpdate]
}
if (
firstUpdate.op.trackedChanges != null ||
secondUpdate.op.trackedChanges != null
) {
// Do not merge ops that span tracked changes.
// TODO: This could theoretically be handled, but it would be complex. One
// would need to take tracked deletes into account when merging inserts and
// deletes together.
return [firstUpdate, secondUpdate]
}
const firstOp = firstUpdate.op
const secondOp = secondUpdate.op
const firstSize =
(firstOp.i && firstOp.i.length) || (firstOp.d && firstOp.d.length)
const secondSize =
(secondOp.i && secondOp.i.length) || (secondOp.d && secondOp.d.length)
const firstOpInsideSecondOp =
secondOp.p <= firstOp.p && firstOp.p <= secondOp.p + secondSize
const secondOpInsideFirstOp =
firstOp.p <= secondOp.p && secondOp.p <= firstOp.p + firstSize
const combinedLengthUnderLimit = firstSize + secondSize < MAX_UPDATE_SIZE
// Two inserts
if (
firstOp.i != null &&
secondOp.i != null &&
secondOpInsideFirstOp &&
combinedLengthUnderLimit &&
insertOpsInsideSameComments(firstOp, secondOp)
) {
return [
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
...firstOp,
i: strInject(firstOp.i, secondOp.p - firstOp.p, secondOp.i),
}),
]
}
// Two deletes
if (
firstOp.d != null &&
secondOp.d != null &&
firstOpInsideSecondOp &&
combinedLengthUnderLimit &&
firstUpdate.meta.tc == null &&
secondUpdate.meta.tc == null
) {
return [
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
...secondOp,
d: strInject(secondOp.d, firstOp.p - secondOp.p, firstOp.d),
}),
]
}
// An insert and then a delete
if (
firstOp.i != null &&
secondOp.d != null &&
secondOpInsideFirstOp &&
firstUpdate.meta.tc == null &&
secondUpdate.meta.tc == null
) {
const offset = secondOp.p - firstOp.p
const insertedText = firstOp.i.slice(offset, offset + secondOp.d.length)
// Only trim the insert when the delete is fully contained within in it
if (insertedText === secondOp.d) {
const insert = strRemove(firstOp.i, offset, secondOp.d.length)
if (insert === '') {
return []
} else {
return [
mergeUpdatesWithOp(firstUpdate, secondUpdate, {
...firstOp,
i: insert,
}),
]
}
} else {
// This will only happen if the delete extends outside the insert
return [firstUpdate, secondUpdate]
}
}
// A delete then an insert at the same place, likely a copy-paste of a chunk of content
if (
firstOp.d != null &&
secondOp.i != null &&
firstOp.p === secondOp.p &&
firstUpdate.meta.tc == null &&
secondUpdate.meta.tc == null
) {
const offset = firstOp.p
const hoffset = firstOp.hpos
const diffUpdates = diffAsShareJsOps(firstOp.d, secondOp.i).map(
function (op) {
// diffAsShareJsOps() returns ops with positions relative to the position
// of the copy/paste. We need to adjust these positions so that they
// apply to the whole document instead.
const pos = op.p
op.p = pos + offset
if (hoffset != null) {
op.hpos = pos + hoffset
}
if (firstOp.u && secondOp.u) {
op.u = true
}
if ('i' in op && secondOp.commentIds != null) {
// Make sure that commentIds metadata is propagated to inserts
op.commentIds = secondOp.commentIds
}
const update = mergeUpdatesWithOp(firstUpdate, secondUpdate, op)
// Set the doc hash only on the last update
delete update.meta.doc_hash
return update
}
)
const docHash = secondUpdate.meta.doc_hash
if (docHash != null && diffUpdates.length > 0) {
diffUpdates[diffUpdates.length - 1].meta.doc_hash = docHash
}
// Doing a diff like this loses track of the doc lengths for each
// update, so recalculate them
let docLength =
firstUpdate.meta.history_doc_length ?? firstUpdate.meta.doc_length
for (const update of diffUpdates) {
update.meta.doc_length = docLength
docLength = adjustLengthByOp(docLength, update.op, {
tracked: update.meta.tc != null,
})
delete update.meta.history_doc_length
}
return diffUpdates
}
return [firstUpdate, secondUpdate]
}
/**
* Return the diff between two strings
*
* @param {string} before
* @param {string} after
* @returns {(InsertOp | DeleteOp)[]} the ops that generate that diff
*/
export function diffAsShareJsOps(before, after) {
const diffs = dmp.diff_main(before, after)
dmp.diff_cleanupSemantic(diffs)
const ops = []
let position = 0
for (const diff of diffs) {
const type = diff[0]
const content = diff[1]
if (type === ADDED) {
ops.push({
i: content,
p: position,
})
position += content.length
} else if (type === REMOVED) {
ops.push({
d: content,
p: position,
})
} else if (type === UNCHANGED) {
position += content.length
} else {
throw new Error('Unknown type')
}
}
return ops
}
/**
* Checks if two insert ops are inside the same comments
*
* @param {InsertOp} op1
* @param {InsertOp} op2
* @returns {boolean}
*/
function insertOpsInsideSameComments(op1, op2) {
const commentIds1 = op1.commentIds
const commentIds2 = op2.commentIds
if (commentIds1 == null && commentIds2 == null) {
// None are inside comments
return true
}
if (
commentIds1 != null &&
commentIds2 != null &&
commentIds1.every(id => commentIds2.includes(id)) &&
commentIds2.every(id => commentIds1.includes(id))
) {
// Both are inside the same comments
return true
}
return false
}

View File

@@ -0,0 +1,487 @@
// @ts-check
import _ from 'lodash'
import Core from 'overleaf-editor-core'
import * as Errors from './Errors.js'
import * as OperationsCompressor from './OperationsCompressor.js'
import { isInsert, isRetain, isDelete, isComment } from './Utils.js'
/**
* @import { AddDocUpdate, AddFileUpdate, DeleteCommentUpdate, Op, RawScanOp } from './types'
* @import { RenameUpdate, TextUpdate, TrackingDirective, TrackingProps } from './types'
* @import { SetCommentStateUpdate, SetFileMetadataOperation, Update, UpdateWithBlob } from './types'
*/
/**
* Convert updates into history changes
*
* @param {string} projectId
* @param {UpdateWithBlob[]} updatesWithBlobs
* @returns {Array<Core.Change | null>}
*/
export function convertToChanges(projectId, updatesWithBlobs) {
return updatesWithBlobs.map(update => _convertToChange(projectId, update))
}
/**
* Convert an update into a history change
*
* @param {string} projectId
* @param {UpdateWithBlob} updateWithBlob
* @returns {Core.Change | null}
*/
function _convertToChange(projectId, updateWithBlob) {
let operations
const { update } = updateWithBlob
let projectVersion = null
const v2DocVersions = {}
if (_isRenameUpdate(update)) {
operations = [
{
pathname: _convertPathname(update.pathname),
newPathname: _convertPathname(update.new_pathname),
},
]
projectVersion = update.version
} else if (isAddUpdate(update)) {
const op = {
pathname: _convertPathname(update.pathname),
file: {
hash: updateWithBlob.blobHashes.file,
},
}
if (_isAddDocUpdate(update)) {
op.file.rangesHash = updateWithBlob.blobHashes.ranges
}
if (_isAddFileUpdate(update)) {
op.file.metadata = update.metadata
}
operations = [op]
projectVersion = update.version
} else if (isTextUpdate(update)) {
const docLength = update.meta.history_doc_length ?? update.meta.doc_length
let pathname = update.meta.pathname
pathname = _convertPathname(pathname)
const builder = new OperationsBuilder(docLength, pathname)
// convert ops
for (const op of update.op) {
builder.addOp(op, update)
}
// add doc hash if present
if (update.meta.doc_hash != null) {
// This will commit the text operation that the builder is currently
// building and set the contentHash property.
builder.commitTextOperation({ contentHash: update.meta.doc_hash })
}
operations = builder.finish()
// add doc version information if present
if (update.v != null) {
v2DocVersions[update.doc] = { pathname, v: update.v }
}
} else if (isSetCommentStateUpdate(update)) {
operations = [
{
pathname: _convertPathname(update.pathname),
commentId: update.commentId,
resolved: update.resolved,
},
]
} else if (isSetFileMetadataOperation(update)) {
operations = [
{
pathname: _convertPathname(update.pathname),
metadata: update.metadata,
},
]
} else if (isDeleteCommentUpdate(update)) {
operations = [
{
pathname: _convertPathname(update.pathname),
deleteComment: update.deleteComment,
},
]
} else {
const error = new Errors.UpdateWithUnknownFormatError(
'update with unknown format',
{ projectId, update }
)
throw error
}
let v2Authors
if (update.meta.user_id === 'anonymous-user') {
// history-v1 uses null to represent an anonymous author
v2Authors = [null]
} else {
// user_id is missing on resync operations that update the contents of a doc
v2Authors = _.compact([update.meta.user_id])
}
const rawChange = {
operations,
v2Authors,
timestamp: new Date(update.meta.ts).toISOString(),
projectVersion,
v2DocVersions: Object.keys(v2DocVersions).length ? v2DocVersions : null,
}
if (update.meta.origin) {
rawChange.origin = update.meta.origin
} else if (update.meta.type === 'external' && update.meta.source) {
rawChange.origin = { kind: update.meta.source }
}
const change = Core.Change.fromRaw(rawChange)
if (change != null) {
change.operations = OperationsCompressor.compressOperations(
change.operations
)
}
return change
}
/**
* @param {Update} update
* @returns {update is RenameUpdate}
*/
function _isRenameUpdate(update) {
return 'new_pathname' in update && update.new_pathname != null
}
/**
* @param {Update} update
* @returns {update is AddDocUpdate}
*/
function _isAddDocUpdate(update) {
return (
'doc' in update &&
update.doc != null &&
'docLines' in update &&
update.docLines != null
)
}
/**
* @param {Update} update
* @returns {update is AddFileUpdate}
*/
function _isAddFileUpdate(update) {
return (
'file' in update &&
update.file != null &&
(('createdBlob' in update && update.createdBlob) ||
('url' in update && update.url != null))
)
}
/**
* @param {Update} update
* @returns {update is TextUpdate}
*/
export function isTextUpdate(update) {
return (
'doc' in update &&
update.doc != null &&
'op' in update &&
update.op != null &&
'pathname' in update.meta &&
update.meta.pathname != null &&
'doc_length' in update.meta &&
update.meta.doc_length != null
)
}
export function isProjectStructureUpdate(update) {
return isAddUpdate(update) || _isRenameUpdate(update)
}
/**
* @param {Update} update
* @returns {update is AddDocUpdate | AddFileUpdate}
*/
export function isAddUpdate(update) {
return _isAddDocUpdate(update) || _isAddFileUpdate(update)
}
/**
* @param {Update} update
* @returns {update is SetCommentStateUpdate}
*/
export function isSetCommentStateUpdate(update) {
return 'commentId' in update && 'resolved' in update
}
/**
* @param {Update} update
* @returns {update is DeleteCommentUpdate}
*/
export function isDeleteCommentUpdate(update) {
return 'deleteComment' in update
}
/**
* @param {Update} update
* @returns {update is SetFileMetadataOperation}
*/
export function isSetFileMetadataOperation(update) {
return 'metadata' in update
}
export function _convertPathname(pathname) {
// Strip leading /
pathname = pathname.replace(/^\//, '')
// Replace \\ with _. Backslashes are no longer allowed
// in projects in web, but we have some which have gone through
// into history before this restriction was added. This makes
// them valid for the history store.
// See https://github.com/overleaf/write_latex/issues/4471
pathname = pathname.replace(/\\/g, '_')
// workaround for filenames containing asterisks, this will
// fail if a corresponding replacement file already exists but it
// would fail anyway without this attempt to fix the pathname.
// See https://github.com/overleaf/internal/issues/900
pathname = pathname.replace(/\*/g, '__ASTERISK__')
// workaround for filenames beginning with spaces
// See https://github.com/overleaf/internal/issues/1404
// note: we have already stripped any leading slash above
pathname = pathname.replace(/^ /, '__SPACE__') // handle top-level
pathname = pathname.replace(/\/ /g, '/__SPACE__') // handle folders
return pathname
}
class OperationsBuilder {
/**
* @param {number} docLength
* @param {string} pathname
*/
constructor(docLength, pathname) {
/**
* List of operations being built
*/
this.operations = []
/**
* Currently built text operation
*
* @type {RawScanOp[]}
*/
this.textOperation = []
/**
* Cursor inside the current text operation
*/
this.cursor = 0
this.docLength = docLength
this.pathname = pathname
}
/**
* @param {Op} op
* @param {TextUpdate} update
* @returns {void}
*/
addOp(op, update) {
// We sometimes receive operations that operate at positions outside the
// docLength. Document updater coerces the position to the end of the
// document. We do the same here.
const pos = Math.min(op.hpos ?? op.p, this.docLength)
if (isComment(op)) {
// Commit the current text operation
this.commitTextOperation()
// Add a comment operation
const commentLength = op.hlen ?? op.c.length
const commentOp = {
pathname: this.pathname,
commentId: op.t,
ranges: commentLength > 0 ? [{ pos, length: commentLength }] : [],
}
if ('resolved' in op) {
commentOp.resolved = op.resolved
}
this.operations.push(commentOp)
return
}
if (!isInsert(op) && !isDelete(op) && !isRetain(op)) {
throw new Errors.UnexpectedOpTypeError('unexpected op type', { op })
}
if (pos < this.cursor) {
this.commitTextOperation()
// At this point, this.cursor === 0 and we can continue
}
if (pos > this.cursor) {
this.retain(pos - this.cursor)
}
if (isInsert(op)) {
if (op.trackedDeleteRejection) {
this.retain(op.i.length, {
tracking: { type: 'none' },
})
} else {
const opts = {}
if (update.meta.tc != null) {
opts.tracking = {
type: 'insert',
userId: update.meta.user_id,
ts: new Date(update.meta.ts).toISOString(),
}
}
if (op.commentIds != null) {
opts.commentIds = op.commentIds
}
this.insert(op.i, opts)
}
}
if (isRetain(op)) {
if (op.tracking) {
this.retain(op.r.length, { tracking: op.tracking })
} else {
this.retain(op.r.length)
}
}
if (isDelete(op)) {
const changes = op.trackedChanges ?? []
// Tracked changes should already be ordered by offset, but let's make
// sure they are.
changes.sort((a, b) => {
const posOrder = a.offset - b.offset
if (posOrder !== 0) {
return posOrder
} else if (a.type === 'insert' && b.type === 'delete') {
return 1
} else if (a.type === 'delete' && b.type === 'insert') {
return -1
} else {
return 0
}
})
let offset = 0
for (const change of changes) {
if (change.offset > offset) {
// Handle the portion before the tracked change
if (update.meta.tc != null) {
// This is a tracked delete
this.retain(change.offset - offset, {
tracking: {
type: 'delete',
userId: update.meta.user_id,
ts: new Date(update.meta.ts).toISOString(),
},
})
} else {
// This is a regular delete
this.delete(change.offset - offset)
}
offset = change.offset
}
// Now, handle the portion inside the tracked change
if (change.type === 'delete') {
// Tracked deletes are skipped over when deleting
this.retain(change.length)
} else if (change.type === 'insert') {
// Deletes inside tracked inserts are always regular deletes
this.delete(change.length)
offset += change.length
}
}
if (offset < op.d.length) {
// Handle the portion after the last tracked change
if (update.meta.tc != null) {
// This is a tracked delete
this.retain(op.d.length - offset, {
tracking: {
type: 'delete',
userId: update.meta.user_id,
ts: new Date(update.meta.ts).toISOString(),
},
})
} else {
// This is a regular delete
this.delete(op.d.length - offset)
}
}
}
}
/**
* @param {number} length
* @param {object} opts
* @param {TrackingDirective} [opts.tracking]
*/
retain(length, opts = {}) {
if (opts.tracking) {
this.textOperation.push({ r: length, ...opts })
} else {
this.textOperation.push(length)
}
this.cursor += length
}
/**
* @param {string} str
* @param {object} opts
* @param {TrackingProps} [opts.tracking]
* @param {string[]} [opts.commentIds]
*/
insert(str, opts = {}) {
if (opts.tracking || opts.commentIds) {
this.textOperation.push({ i: str, ...opts })
} else {
this.textOperation.push(str)
}
this.cursor += str.length
this.docLength += str.length
}
/**
* @param {number} length
* @param {object} opts
*/
delete(length, opts = {}) {
this.textOperation.push(-length)
this.docLength -= length
}
/**
* Finalize the current text operation and push it to the queue
*
* @param {object} [opts]
* @param {string} [opts.contentHash]
*/
commitTextOperation(opts = {}) {
if (this.textOperation.length > 0 && this.cursor < this.docLength) {
this.retain(this.docLength - this.cursor)
}
if (this.textOperation.length > 0) {
const operation = {
pathname: this.pathname,
textOperation: this.textOperation,
}
if (opts.contentHash != null) {
operation.contentHash = opts.contentHash
}
this.operations.push(operation)
this.textOperation = []
}
this.cursor = 0
}
finish() {
this.commitTextOperation()
return this.operations
}
}

View File

@@ -0,0 +1,800 @@
import { promisify } from 'node:util'
import logger from '@overleaf/logger'
import async from 'async'
import metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import OError from '@overleaf/o-error'
import * as HistoryStoreManager from './HistoryStoreManager.js'
import * as UpdateTranslator from './UpdateTranslator.js'
import * as BlobManager from './BlobManager.js'
import * as RedisManager from './RedisManager.js'
import * as ErrorRecorder from './ErrorRecorder.js'
import * as LockManager from './LockManager.js'
import * as UpdateCompressor from './UpdateCompressor.js'
import * as WebApiManager from './WebApiManager.js'
import * as SyncManager from './SyncManager.js'
import * as Versions from './Versions.js'
import * as Errors from './Errors.js'
import * as Metrics from './Metrics.js'
import * as RetryManager from './RetryManager.js'
import { Profiler } from './Profiler.js'
const keys = Settings.redis.lock.key_schema
export const REDIS_READ_BATCH_SIZE = 500
/**
* Container for functions that need to be mocked in tests
*
* TODO: Rewrite tests in terms of exported functions only
*/
export const _mocks = {}
export function getRawUpdates(projectId, batchSize, callback) {
RedisManager.getRawUpdatesBatch(projectId, batchSize, (error, batch) => {
if (error != null) {
return callback(OError.tag(error))
}
let updates
try {
updates = RedisManager.parseDocUpdates(batch.rawUpdates)
} catch (error) {
return callback(OError.tag(error))
}
_getHistoryId(projectId, updates, (error, historyId) => {
if (error != null) {
return callback(OError.tag(error))
}
HistoryStoreManager.getMostRecentChunk(
projectId,
historyId,
(error, chunk) => {
if (error != null) {
return callback(OError.tag(error))
}
callback(null, { project_id: projectId, chunk, updates })
}
)
})
})
}
// Trigger resync and start processing under lock to avoid other operations to
// flush the resync updates.
export function startResyncAndProcessUpdatesUnderLock(
projectId,
opts,
callback
) {
const startTimeMs = Date.now()
LockManager.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
(extendLock, releaseLock) => {
SyncManager.startResyncWithoutLock(projectId, opts, err => {
if (err) return callback(OError.tag(err))
extendLock(err => {
if (err) return callback(OError.tag(err))
_countAndProcessUpdates(
projectId,
extendLock,
REDIS_READ_BATCH_SIZE,
releaseLock
)
})
})
},
(flushError, queueSize) => {
if (flushError) {
OError.tag(flushError)
ErrorRecorder.record(projectId, queueSize, flushError, recordError => {
if (recordError) {
logger.error(
{ err: recordError, projectId },
'failed to record error'
)
}
callback(flushError)
})
} else {
ErrorRecorder.clearError(projectId, clearError => {
if (clearError) {
logger.error(
{ err: clearError, projectId },
'failed to clear error'
)
}
callback()
})
}
if (queueSize > 0) {
const duration = (Date.now() - startTimeMs) / 1000
Metrics.historyFlushDurationSeconds.observe(duration)
Metrics.historyFlushQueueSize.observe(queueSize)
}
// clear the timestamp in the background if the queue is now empty
RedisManager.clearDanglingFirstOpTimestamp(projectId, () => {})
}
)
}
// Process all updates for a project, only check project-level information once
export function processUpdatesForProject(projectId, callback) {
const startTimeMs = Date.now()
LockManager.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
(extendLock, releaseLock) => {
_countAndProcessUpdates(
projectId,
extendLock,
REDIS_READ_BATCH_SIZE,
releaseLock
)
},
(flushError, queueSize) => {
if (flushError) {
OError.tag(flushError)
ErrorRecorder.record(
projectId,
queueSize,
flushError,
(recordError, failure) => {
if (recordError) {
logger.error(
{ err: recordError, projectId },
'failed to record error'
)
callback(recordError)
} else if (
RetryManager.isFirstFailure(failure) &&
RetryManager.isHardFailure(failure)
) {
// This is the first failed flush since the last successful flush.
// Immediately attempt a resync.
logger.warn({ projectId }, 'Flush failed, attempting resync')
resyncProject(projectId, callback)
} else {
callback(flushError)
}
}
)
} else {
ErrorRecorder.clearError(projectId, clearError => {
if (clearError) {
logger.error(
{ err: clearError, projectId },
'failed to clear error'
)
}
callback()
})
}
if (queueSize > 0) {
const duration = (Date.now() - startTimeMs) / 1000
Metrics.historyFlushDurationSeconds.observe(duration)
Metrics.historyFlushQueueSize.observe(queueSize)
}
// clear the timestamp in the background if the queue is now empty
RedisManager.clearDanglingFirstOpTimestamp(projectId, () => {})
}
)
}
export function resyncProject(projectId, callback) {
SyncManager.startHardResync(projectId, {}, error => {
if (error != null) {
return callback(OError.tag(error))
}
// Flush the sync operations; this will not loop indefinitely
// because any failure won't be the first failure anymore.
LockManager.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
(extendLock, releaseLock) => {
_countAndProcessUpdates(
projectId,
extendLock,
REDIS_READ_BATCH_SIZE,
releaseLock
)
},
(flushError, queueSize) => {
if (flushError) {
ErrorRecorder.record(
projectId,
queueSize,
flushError,
(recordError, failure) => {
if (recordError) {
logger.error(
{ err: recordError, projectId },
'failed to record error'
)
callback(OError.tag(recordError))
} else {
callback(OError.tag(flushError))
}
}
)
} else {
ErrorRecorder.clearError(projectId, clearError => {
if (clearError) {
logger.error(
{ err: clearError, projectId },
'failed to clear error'
)
}
callback()
})
}
}
)
})
}
export function processUpdatesForProjectUsingBisect(
projectId,
amountToProcess,
callback
) {
LockManager.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
(extendLock, releaseLock) => {
_countAndProcessUpdates(
projectId,
extendLock,
amountToProcess,
releaseLock
)
},
(flushError, queueSize) => {
if (amountToProcess === 0 || queueSize === 0) {
// no further processing possible
if (flushError != null) {
ErrorRecorder.record(
projectId,
queueSize,
OError.tag(flushError),
recordError => {
if (recordError) {
logger.error(
{ err: recordError, projectId },
'failed to record error'
)
}
callback(flushError)
}
)
} else {
callback()
}
} else {
if (flushError != null) {
// decrease the batch size when we hit an error
processUpdatesForProjectUsingBisect(
projectId,
Math.floor(amountToProcess / 2),
callback
)
} else {
// otherwise continue processing with the same batch size
processUpdatesForProjectUsingBisect(
projectId,
amountToProcess,
callback
)
}
}
}
)
}
export function processSingleUpdateForProject(projectId, callback) {
LockManager.runWithLock(
keys.projectHistoryLock({ project_id: projectId }),
(
extendLock,
releaseLock // set the batch size to 1 for single-stepping
) => {
_countAndProcessUpdates(projectId, extendLock, 1, releaseLock)
},
(flushError, queueSize) => {
// no need to clear the flush marker when single stepping
// it will be cleared up on the next background flush if
// the queue is empty
if (flushError) {
ErrorRecorder.record(projectId, queueSize, flushError, recordError => {
if (recordError) {
logger.error(
{ err: recordError, projectId },
'failed to record error'
)
}
callback(flushError)
})
} else {
ErrorRecorder.clearError(projectId, clearError => {
if (clearError) {
logger.error(
{ err: clearError, projectId },
'failed to clear error'
)
}
callback()
})
}
}
)
}
_mocks._countAndProcessUpdates = (
projectId,
extendLock,
batchSize,
callback
) => {
RedisManager.countUnprocessedUpdates(projectId, (error, queueSize) => {
if (error != null) {
return callback(OError.tag(error))
}
if (queueSize > 0) {
logger.debug({ projectId, queueSize }, 'processing uncompressed updates')
RedisManager.getUpdatesInBatches(
projectId,
batchSize,
(updates, cb) => {
_processUpdatesBatch(projectId, updates, extendLock, cb)
},
error => {
// Unconventional callback signature. The caller needs the queue size
// even when an error is thrown in order to record the queue size in
// the projectHistoryFailures collection. We'll have to find another
// way to achieve this when we promisify.
callback(error, queueSize)
}
)
} else {
logger.debug({ projectId }, 'no updates to process')
callback(null, queueSize)
}
})
}
function _countAndProcessUpdates(...args) {
_mocks._countAndProcessUpdates(...args)
}
function _processUpdatesBatch(projectId, updates, extendLock, callback) {
// If the project doesn't have a history then we can bail out here
_getHistoryId(projectId, updates, (error, historyId) => {
if (error != null) {
return callback(OError.tag(error))
}
if (historyId == null) {
logger.debug(
{ projectId },
'discarding updates as project does not use history'
)
return callback()
}
_processUpdates(projectId, historyId, updates, extendLock, error => {
if (error != null) {
return callback(OError.tag(error))
}
callback()
})
})
}
export function _getHistoryId(projectId, updates, callback) {
let idFromUpdates = null
// check that all updates have the same history id
for (const update of updates) {
if (update.projectHistoryId != null) {
if (idFromUpdates == null) {
idFromUpdates = update.projectHistoryId.toString()
} else if (idFromUpdates !== update.projectHistoryId.toString()) {
metrics.inc('updates.batches.project-history-id.inconsistent-update')
return callback(
new OError('inconsistent project history id between updates', {
projectId,
idFromUpdates,
currentId: update.projectHistoryId,
})
)
}
}
}
WebApiManager.getHistoryId(projectId, (error, idFromWeb) => {
if (error != null && idFromUpdates != null) {
// present only on updates
// 404s from web are an error
metrics.inc('updates.batches.project-history-id.from-updates')
return callback(null, idFromUpdates)
} else if (error != null) {
return callback(OError.tag(error))
}
if (idFromWeb == null && idFromUpdates == null) {
// present on neither web nor updates
callback(null, null)
} else if (idFromWeb != null && idFromUpdates == null) {
// present only on web
metrics.inc('updates.batches.project-history-id.from-web')
callback(null, idFromWeb)
} else if (idFromWeb == null && idFromUpdates != null) {
// present only on updates
metrics.inc('updates.batches.project-history-id.from-updates')
callback(null, idFromUpdates)
} else if (idFromWeb.toString() !== idFromUpdates.toString()) {
// inconsistent between web and updates
metrics.inc('updates.batches.project-history-id.inconsistent-with-web')
logger.warn(
{
projectId,
idFromWeb,
idFromUpdates,
updates,
},
'inconsistent project history id between updates and web'
)
callback(
new OError('inconsistent project history id between updates and web')
)
} else {
// the same on web and updates
metrics.inc('updates.batches.project-history-id.from-updates')
callback(null, idFromWeb)
}
})
}
function _handleOpsOutOfOrderError(projectId, projectHistoryId, err, ...rest) {
const adjustedLength = Math.max(rest.length, 1)
const results = rest.slice(0, adjustedLength - 1)
const callback = rest[adjustedLength - 1]
ErrorRecorder.getFailureRecord(projectId, (error, failureRecord) => {
if (error != null) {
return callback(error)
}
// Bypass ops-out-of-order errors in the stored chunk when in forceDebug mode
if (failureRecord != null && failureRecord.forceDebug === true) {
logger.warn(
{ err, projectId, projectHistoryId },
'ops out of order in chunk, forced continue'
)
callback(null, ...results) // return results without error
} else {
callback(err, ...results)
}
})
}
function _getMostRecentVersionWithDebug(projectId, projectHistoryId, callback) {
HistoryStoreManager.getMostRecentVersion(
projectId,
projectHistoryId,
(err, ...results) => {
if (err instanceof Errors.OpsOutOfOrderError) {
_handleOpsOutOfOrderError(
projectId,
projectHistoryId,
err,
...results,
callback
)
} else {
callback(err, ...results)
}
}
)
}
export function _processUpdates(
projectId,
projectHistoryId,
updates,
extendLock,
callback
) {
const profile = new Profiler('_processUpdates', {
project_id: projectId,
projectHistoryId,
})
// skip updates first if we're in a sync, we might not need to do anything else
SyncManager.skipUpdatesDuringSync(
projectId,
updates,
(error, filteredUpdates, newSyncState) => {
profile.log('skipUpdatesDuringSync')
if (error != null) {
return callback(error)
}
if (filteredUpdates.length === 0) {
// return early if there are no updates to apply
return SyncManager.setResyncState(projectId, newSyncState, callback)
}
// only make request to history service if we have actual updates to process
_getMostRecentVersionWithDebug(
projectId,
projectHistoryId,
(
error,
baseVersion,
projectStructureAndDocVersions,
_lastChange,
mostRecentChunk
) => {
if (projectStructureAndDocVersions == null) {
projectStructureAndDocVersions = { project: null, docs: {} }
}
profile.log('getMostRecentVersion')
if (error != null) {
return callback(error)
}
async.waterfall(
[
cb => {
cb = profile.wrap('expandSyncUpdates', cb)
SyncManager.expandSyncUpdates(
projectId,
projectHistoryId,
mostRecentChunk,
filteredUpdates,
extendLock,
cb
)
},
(expandedUpdates, cb) => {
let unappliedUpdates
try {
unappliedUpdates = _skipAlreadyAppliedUpdates(
projectId,
expandedUpdates,
projectStructureAndDocVersions
)
} catch (err) {
return cb(err)
}
profile.log('skipAlreadyAppliedUpdates')
const compressedUpdates =
UpdateCompressor.compressRawUpdates(unappliedUpdates)
const timeTaken = profile
.log('compressRawUpdates')
.getTimeDelta()
if (timeTaken >= 1000) {
logger.debug(
{ projectId, updates: unappliedUpdates, timeTaken },
'slow compression of raw updates'
)
}
cb = profile.wrap('createBlobs', cb)
BlobManager.createBlobsForUpdates(
projectId,
projectHistoryId,
compressedUpdates,
extendLock,
cb
)
},
(updatesWithBlobs, cb) => {
let changes
try {
changes = UpdateTranslator.convertToChanges(
projectId,
updatesWithBlobs
).map(change => change.toRaw())
} catch (err) {
return cb(err)
} finally {
profile.log('convertToChanges')
}
cb(null, changes)
},
(changes, cb) => {
let change
const numChanges = changes.length
const byteLength = Buffer.byteLength(
JSON.stringify(changes),
'utf8'
)
let numOperations = 0
for (change of changes) {
if (change.operations != null) {
numOperations += change.operations.length
}
}
metrics.timing('history-store.request.changes', numChanges, 1)
metrics.timing('history-store.request.bytes', byteLength, 1)
metrics.timing(
'history-store.request.operations',
numOperations,
1
)
// thresholds taken from write_latex/main/lib/history_exporter.rb
if (numChanges > 1000) {
metrics.inc('history-store.request.exceeds-threshold.changes')
}
if (byteLength > Math.pow(1024, 2)) {
metrics.inc('history-store.request.exceeds-threshold.bytes')
const changeLengths = changes.map(change =>
Buffer.byteLength(JSON.stringify(change), 'utf8')
)
logger.warn(
{ projectId, byteLength, changeLengths },
'change size exceeds limit'
)
}
cb = profile.wrap('sendChanges', cb)
// this is usually the longest request, so extend the lock before starting it
extendLock(error => {
if (error != null) {
return cb(error)
}
if (changes.length === 0) {
return cb()
} // avoid unnecessary requests to history service
HistoryStoreManager.sendChanges(
projectId,
projectHistoryId,
changes,
baseVersion,
cb
)
})
},
cb => {
cb = profile.wrap('setResyncState', cb)
SyncManager.setResyncState(projectId, newSyncState, cb)
},
],
error => {
profile.end()
callback(error)
}
)
}
)
}
)
}
_mocks._skipAlreadyAppliedUpdates = (
projectId,
updates,
projectStructureAndDocVersions
) => {
function alreadySeenProjectVersion(previousProjectStructureVersion, update) {
return (
UpdateTranslator.isProjectStructureUpdate(update) &&
previousProjectStructureVersion != null &&
update.version != null &&
Versions.gte(previousProjectStructureVersion, update.version)
)
}
function alreadySeenDocVersion(previousDocVersions, update) {
if (UpdateTranslator.isTextUpdate(update) && update.v != null) {
const docId = update.doc
return (
previousDocVersions[docId] != null &&
previousDocVersions[docId].v != null &&
Versions.gte(previousDocVersions[docId].v, update.v)
)
} else {
return false
}
}
// check that the incoming updates are in the correct order (we do not
// want to send out of order updates to the history service)
let incomingProjectStructureVersion = null
const incomingDocVersions = {}
for (const update of updates) {
if (alreadySeenProjectVersion(incomingProjectStructureVersion, update)) {
logger.warn(
{ projectId, update, incomingProjectStructureVersion },
'incoming project structure updates are out of order'
)
throw new Errors.OpsOutOfOrderError(
'project structure version out of order on incoming updates'
)
} else if (alreadySeenDocVersion(incomingDocVersions, update)) {
logger.warn(
{ projectId, update, incomingDocVersions },
'incoming doc updates are out of order'
)
throw new Errors.OpsOutOfOrderError(
'doc version out of order on incoming updates'
)
}
// update the current project structure and doc versions
if (UpdateTranslator.isProjectStructureUpdate(update)) {
incomingProjectStructureVersion = update.version
} else if (UpdateTranslator.isTextUpdate(update)) {
incomingDocVersions[update.doc] = { v: update.v }
}
}
// discard updates already applied
const updatesToApply = []
const previousProjectStructureVersion = projectStructureAndDocVersions.project
const previousDocVersions = projectStructureAndDocVersions.docs
if (projectStructureAndDocVersions != null) {
const updateProjectVersions = []
for (const update of updates) {
if (update != null && update.version != null) {
updateProjectVersions.push(update.version)
}
}
logger.debug(
{ projectId, projectStructureAndDocVersions, updateProjectVersions },
'comparing updates with existing project versions'
)
}
for (const update of updates) {
if (alreadySeenProjectVersion(previousProjectStructureVersion, update)) {
metrics.inc('updates.discarded_project_structure_version')
logger.debug(
{ projectId, update, previousProjectStructureVersion },
'discarding previously applied project structure update'
)
continue
}
if (alreadySeenDocVersion(previousDocVersions, update)) {
metrics.inc('updates.discarded_doc_version')
logger.debug(
{ projectId, update, previousDocVersions },
'discarding previously applied doc update'
)
continue
}
// remove non-BMP characters from resync updates that have bypassed the normal docupdater flow
_sanitizeUpdate(update)
// if all checks above are ok then accept the update
updatesToApply.push(update)
}
return updatesToApply
}
export function _skipAlreadyAppliedUpdates(...args) {
return _mocks._skipAlreadyAppliedUpdates(...args)
}
function _sanitizeUpdate(update) {
// adapted from docupdater's UpdateManager, we should clean these in docupdater
// too but we already have queues with this problem so we will handle it here
// too for robustness.
// Replace high and low surrogate characters with 'replacement character' (\uFFFD)
const removeBadChars = str => str.replace(/[\uD800-\uDFFF]/g, '\uFFFD')
// clean up any bad chars in resync diffs
if (update.op) {
for (const op of update.op) {
if (op.i != null) {
op.i = removeBadChars(op.i)
}
}
}
// clean up any bad chars in resync new docs
if (update.docLines != null) {
update.docLines = removeBadChars(update.docLines)
}
return update
}
export const promises = {
/** @type {(projectId: string) => Promise<number>} */
processUpdatesForProject: promisify(processUpdatesForProject),
/** @type {(projectId: string, opts: any) => Promise<number>} */
startResyncAndProcessUpdatesUnderLock: promisify(
startResyncAndProcessUpdatesUnderLock
),
}

View File

@@ -0,0 +1,37 @@
// @ts-check
/**
* @import { CommentOp, DeleteOp, InsertOp, Op, RetainOp } from './types'
*/
/**
* @param {Op} op
* @returns {op is InsertOp}
*/
export function isInsert(op) {
return 'i' in op && op.i != null
}
/**
* @param {Op} op
* @returns {op is RetainOp}
*/
export function isRetain(op) {
return 'r' in op && op.r != null
}
/**
* @param {Op} op
* @returns {op is DeleteOp}
*/
export function isDelete(op) {
return 'd' in op && op.d != null
}
/**
* @param {Op} op
* @returns {op is CommentOp}
*/
export function isComment(op) {
return 'c' in op && op.c != null && 't' in op && op.t != null
}

View File

@@ -0,0 +1,12 @@
import { celebrate, errors } from 'celebrate'
export { Joi } from 'celebrate'
export const errorMiddleware = errors()
/**
* Validation middleware
*/
export function validate(schema) {
return celebrate(schema, { allowUnknown: true })
}

View File

@@ -0,0 +1,68 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
// Compare Versions like 1.2 < 4.1
const convertToArray = v => Array.from(v.split('.')).map(x => parseInt(x, 10))
const cmp = function (v1, v2) {
// allow comparison to work with integers
if (typeof v1 === 'number' && typeof v2 === 'number') {
if (v1 > v2) {
return +1
}
if (v1 < v2) {
return -1
}
// otherwise equal
return 0
}
// comparison with strings
v1 = convertToArray(v1)
v2 = convertToArray(v2)
while (v1.length || v2.length) {
const [x, y] = Array.from([v1.shift(), v2.shift()])
if (x > y) {
return +1
}
if (x < y) {
return -1
}
if (x != null && y == null) {
return +1
}
if (x == null && y != null) {
return -1
}
}
return 0
}
export function compare(v1, v2) {
return cmp(v1, v2)
}
export function gt(v1, v2) {
return cmp(v1, v2) > 0
}
export function lt(v1, v2) {
return cmp(v1, v2) < 0
}
export function gte(v1, v2) {
return cmp(v1, v2) >= 0
}
export function lte(v1, v2) {
return cmp(v1, v2) <= 0
}

View File

@@ -0,0 +1,112 @@
import { callbackify } from 'node:util'
import { setTimeout } from 'node:timers/promises'
import logger from '@overleaf/logger'
import Metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import {
fetchNothing,
fetchJson,
RequestFailedError,
} from '@overleaf/fetch-utils'
import * as Errors from './Errors.js'
import * as RedisManager from './RedisManager.js'
let RETRY_TIMEOUT_MS = 5000
async function getHistoryId(projectId) {
Metrics.inc('history_id_cache_requests_total')
const cachedHistoryId =
await RedisManager.promises.getCachedHistoryId(projectId)
if (cachedHistoryId) {
Metrics.inc('history_id_cache_hits_total')
return cachedHistoryId
} else {
const project = await _getProjectDetails(projectId)
const historyId =
project.overleaf &&
project.overleaf.history &&
project.overleaf.history.id
if (historyId != null) {
await RedisManager.promises.setCachedHistoryId(projectId, historyId)
}
return historyId
}
}
async function requestResync(projectId, opts = {}) {
try {
const body = {}
if (opts.historyRangesMigration) {
body.historyRangesMigration = opts.historyRangesMigration
}
if (opts.resyncProjectStructureOnly) {
body.resyncProjectStructureOnly = opts.resyncProjectStructureOnly
}
await fetchNothing(
`${Settings.apis.web.url}/project/${projectId}/history/resync`,
{
method: 'POST',
signal: AbortSignal.timeout(6 * 60000),
basicAuth: {
user: Settings.apis.web.user,
password: Settings.apis.web.pass,
},
json: body,
}
)
} catch (err) {
if (err instanceof RequestFailedError && err.response.status === 404) {
throw new Errors.NotFoundError('got a 404 from web api').withCause(err)
} else {
throw err
}
}
}
async function _getProjectDetails(projectId, callback) {
logger.debug({ projectId }, 'getting project details from web')
let attempts = 0
while (true) {
attempts += 1
try {
return await fetchJson(
`${Settings.apis.web.url}/project/${projectId}/details`,
{
signal: AbortSignal.timeout(16000),
basicAuth: {
user: Settings.apis.web.user,
password: Settings.apis.web.pass,
},
}
)
} catch (err) {
if (err instanceof RequestFailedError && err.response.status === 404) {
throw new Errors.NotFoundError('got a 404 from web api').withCause(err)
} else if (attempts < 2) {
// retry after 5 seconds
await setTimeout(RETRY_TIMEOUT_MS)
} else {
throw err
}
}
}
}
/**
* Adjust the retry timeout in tests
*/
export async function setRetryTimeoutMs(timeoutMs) {
RETRY_TIMEOUT_MS = timeoutMs
}
// EXPORTS
const getHistoryIdCb = callbackify(getHistoryId)
const requestResyncCb = callbackify(requestResync)
export { getHistoryIdCb as getHistoryId, requestResyncCb as requestResync }
export const promises = {
getHistoryId,
requestResync,
}

View File

@@ -0,0 +1,22 @@
import { ObjectId } from 'mongodb-legacy'
export type ProjectHistoryFailure = {
_id: ObjectId
project_id: string
attempts: number
resyncAttempts: number
resyncStartedAt: Date
requestCount?: number
history: (ErrorRecord | SyncStartRecord)[]
} & ErrorRecord
type ErrorRecord = {
error: string
stack: string
queueSize: number
ts: Date
}
type SyncStartRecord = {
resyncStartedAt: Date
}

View File

@@ -0,0 +1,27 @@
import Metrics from '@overleaf/metrics'
import Settings from '@overleaf/settings'
import mongodb from 'mongodb-legacy'
const { MongoClient, ObjectId } = mongodb
/**
* @import { ProjectHistoryFailure } from './mongo-types.ts'
*/
export { ObjectId }
export const mongoClient = new MongoClient(
Settings.mongo.url,
Settings.mongo.options
)
const mongoDb = mongoClient.db()
Metrics.mongodb.monitor(mongoClient)
export const db = {
deletedProjects: mongoDb.collection('deletedProjects'),
projects: mongoDb.collection('projects'),
/** @type {mongodb.Collection<ProjectHistoryFailure>} */
projectHistoryFailures: mongoDb.collection('projectHistoryFailures'),
projectHistoryLabels: mongoDb.collection('projectHistoryLabels'),
projectHistorySyncState: mongoDb.collection('projectHistorySyncState'),
}

View File

@@ -0,0 +1,61 @@
import Metrics from '@overleaf/metrics'
import logger from '@overleaf/logger'
import express from 'express'
import bodyParser from 'body-parser'
import * as Errors from './Errors.js'
import * as Router from './Router.js'
import * as Validation from './Validation.js'
const HistoryLogger = logger.initialize('project-history').logger
Metrics.event_loop.monitor(logger)
Metrics.memory.monitor(logger)
Metrics.leaked_sockets.monitor(logger)
Metrics.open_sockets.monitor()
// log updates as truncated strings
function truncateFn(updates) {
return JSON.parse(
JSON.stringify(updates, function (key, value) {
let len
if (typeof value === 'string' && (len = value.length) > 80) {
return (
value.substr(0, 32) +
`...(message of length ${len} truncated)...` +
value.substr(-32)
)
} else {
return value
}
})
)
}
HistoryLogger.addSerializers({
rawUpdate: truncateFn,
rawUpdates: truncateFn,
newUpdates: truncateFn,
lastUpdate: truncateFn,
})
export const app = express()
app.use(bodyParser.json())
app.use(bodyParser.urlencoded({ extended: true }))
app.use(Metrics.http.monitor(logger))
Router.initialize(app)
Metrics.injectMetricsRoute(app)
app.use(Validation.errorMiddleware)
app.use(function (error, req, res, next) {
if (error instanceof Errors.NotFoundError) {
res.sendStatus(404)
} else if (error instanceof Errors.BadRequestError) {
res.sendStatus(400)
} else if (error instanceof Errors.InconsistentChunkError) {
res.sendStatus(422)
} else if (error instanceof Errors.TooManyRequestsError) {
res.status(429).set('Retry-After', 300).end()
} else {
logger.error({ err: error, req }, error.message)
res.status(500).json({ message: 'an internal error occurred' })
}
})

View File

@@ -0,0 +1,253 @@
import { HistoryRanges } from '../../../document-updater/app/js/types'
import { LinkedFileData, RawOrigin } from 'overleaf-editor-core/lib/types'
export type Update =
| TextUpdate
| AddDocUpdate
| AddFileUpdate
| RenameUpdate
| DeleteCommentUpdate
| SetCommentStateUpdate
| SetFileMetadataOperation
| ResyncProjectStructureUpdate
| ResyncDocContentUpdate
export type ProjectStructureUpdate =
| AddDocUpdate
| AddFileUpdate
| RenameUpdate
| SetFileMetadataOperation
export type UpdateMeta = {
user_id: string
ts: number
source?: string
type?: string
origin?: RawOrigin
tc?: string
resync?: boolean
}
export type TextUpdate = {
doc: string
op: Op[]
v: number
meta: UpdateMeta & {
pathname: string
doc_length: number
doc_hash?: string
history_doc_length?: number
}
}
export type SetCommentStateUpdate = {
pathname: string
commentId: string
resolved: boolean
meta: UpdateMeta
}
export type SetFileMetadataOperation = {
pathname: string
meta: UpdateMeta
metadata: LinkedFileData | object
}
export type DeleteCommentUpdate = {
pathname: string
deleteComment: string
meta: UpdateMeta
}
type ProjectUpdateBase = {
version: string
projectHistoryId: string
meta: UpdateMeta
doc: string
}
export type AddDocUpdate = ProjectUpdateBase & {
pathname: string
docLines: string
ranges?: HistoryRanges
}
export type AddFileUpdate = ProjectUpdateBase & {
pathname: string
file: string
url: string
hash: string
createdBlob?: boolean
metadata?: LinkedFileData
}
export type RenameUpdate = ProjectUpdateBase & {
pathname: string
new_pathname: string
}
export type ResyncProjectStructureUpdate = {
resyncProjectStructure: {
docs: Doc[]
files: File[]
}
projectHistoryId: string
meta: {
ts: string
}
// optional fields for resyncProjectStructureOnly=true
resyncProjectStructureOnly?: boolean
_raw: string
}
export type ResyncDocContentUpdate = {
resyncDocContent: {
content: string
version: number
ranges?: Ranges
resolvedCommentIds?: string[]
}
projectHistoryId: string
path: string
doc: string
meta: {
ts: string
}
}
export type Op = RetainOp | InsertOp | DeleteOp | CommentOp
export type RetainOp = {
r: string
p: number
hpos?: number
tracking?: TrackingDirective
}
export type InsertOp = {
i: string
p: number
u?: boolean
hpos?: number
trackedDeleteRejection?: boolean
commentIds?: string[]
}
export type DeleteOp = {
d: string
p: number
u?: boolean
hpos?: number
trackedChanges?: TrackedChangesInsideDelete[]
}
export type TrackedChangesInsideDelete = {
type: 'insert' | 'delete'
offset: number
length: number
}
export type CommentOp = {
c: string
p: number
t: string
hpos?: number
hlen?: number
resolved?: boolean
}
export type UpdateWithBlob<T extends Update = Update> = {
update: T
blobHashes: T extends AddDocUpdate | AddFileUpdate
? {
file: string
ranges?: string
}
: never
}
export type TrackingProps = {
type: 'insert' | 'delete'
userId: string
ts: string
}
export type TrackingDirective = TrackingProps | { type: 'none' }
export type TrackingType = 'insert' | 'delete' | 'none'
export type RawScanOp =
| number
| string
| { r: number; tracking?: TrackingDirective }
| { i: string; tracking?: TrackingProps; commentIds?: string[] }
| { d: number }
export type TrackedChangeSnapshot = {
op: {
p: number
} & ({ d: string } | { i: string })
metadata: {
ts: string
user_id: string
}
}
export type CommentSnapshot = {
op: {
p: number
t: string
c: string
resolved: boolean
}
}
export type RangesSnapshot = {
changes: TrackedChangeSnapshot[]
comments: CommentSnapshot[]
}
export type Doc = {
doc: string
path: string
}
export type File = {
file: string
url?: string
path: string
_hash?: string
createdBlob?: boolean
metadata?: LinkedFileData
}
export type Entity = Doc | File
export type Ranges = {
comments?: Comment[]
changes?: TrackedChange[]
}
export type Comment = {
id: string
op: CommentOp
metadata: {
user_id: string
ts: string
}
}
export type TrackedChange = {
id: string
op: InsertOp | DeleteOp
metadata: {
user_id: string
ts: string
}
}
export type TrackedChangeTransition = {
pos: number
tracking: TrackingDirective
stage: 'persisted' | 'expected'
}

View File

@@ -0,0 +1,9 @@
project-history
--dependencies=mongo,redis
--docker-repos=us-east1-docker.pkg.dev/overleaf-ops/ol-docker
--env-add=
--env-pass-through=
--esmock-loader=True
--node-version=20.18.2
--public-repo=False
--script-version=4.7.0

View File

@@ -0,0 +1,109 @@
const http = require('node:http')
const https = require('node:https')
http.globalAgent.keepAlive = false
https.globalAgent.keepAlive = false
module.exports = {
mongo: {
url:
process.env.MONGO_CONNECTION_STRING ||
`mongodb://${process.env.MONGO_HOST || '127.0.0.1'}/sharelatex`,
options: {
monitorCommands: true,
},
},
internal: {
history: {
port: 3054,
host: process.env.LISTEN_ADDRESS || '127.0.0.1',
},
},
apis: {
documentupdater: {
url: `http://${process.env.DOCUPDATER_HOST || '127.0.0.1'}:3003`,
},
docstore: {
url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016`,
},
filestore: {
enabled: process.env.FILESTORE_ENABLED !== 'false',
url: `http://${process.env.FILESTORE_HOST || '127.0.0.1'}:3009`,
},
web: {
url: `http://${
process.env.WEB_API_HOST || process.env.WEB_HOST || '127.0.0.1'
}:${process.env.WEB_PORT || 3000}`,
user: process.env.WEB_API_USER || 'overleaf',
pass: process.env.WEB_API_PASSWORD || 'password',
historyIdCacheSize: parseInt(
process.env.HISTORY_ID_CACHE_SIZE || '10000',
10
),
},
project_history: {
url: `http://${process.env.PROJECT_HISTORY_HOST || '127.0.0.1'}:3054`,
},
},
redis: {
lock: {
host: process.env.REDIS_HOST || '127.0.0.1',
password: process.env.REDIS_PASSWORD,
port: process.env.REDIS_PORT || 6379,
key_schema: {
projectHistoryLock({ project_id: projectId }) {
return `ProjectHistoryLock:{${projectId}}`
},
},
},
project_history: {
host:
process.env.HISTORY_REDIS_HOST || process.env.REDIS_HOST || '127.0.0.1',
port: process.env.HISTORY_REDIS_PORT || process.env.REDIS_PORT || 6379,
password:
process.env.HISTORY_REDIS_PASSWORD || process.env.REDIS_PASSWORD,
key_schema: {
projectHistoryOps({ project_id: projectId }) {
return `ProjectHistory:Ops:{${projectId}}`
},
projectHistoryFirstOpTimestamp({ project_id: projectId }) {
return `ProjectHistory:FirstOpTimestamp:{${projectId}}`
},
projectHistoryCachedHistoryId({ project_id: projectId }) {
return `ProjectHistory:CachedHistoryId:{${projectId}}`
},
},
},
},
history: {
healthCheck: {
project_id: process.env.HEALTH_CHECK_PROJECT_ID || '',
},
},
overleaf: {
history: {
host:
process.env.V1_HISTORY_FULL_HOST ||
`http://${
process.env.V1_HISTORY_HOST ||
process.env.HISTORY_V1_HOST ||
'127.0.0.1'
}:3100/api`,
user: process.env.V1_HISTORY_USER || 'staging',
pass: process.env.V1_HISTORY_PASSWORD || 'password',
sync: {
retries_max: 30,
interval: 2,
},
requestTimeout: parseInt(process.env.V1_REQUEST_TIMEOUT || '300000', 10),
},
},
path: {
uploadFolder: process.env.UPLOAD_FOLDER || '/tmp/',
},
maxFileSizeInBytes: 100 * 1024 * 1024, // 100 megabytes
}

View File

@@ -0,0 +1,65 @@
# This file was auto-generated, do not edit it directly.
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
version: "2.3"
services:
test_unit:
image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
user: node
command: npm run test:unit:_run
environment:
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
test_acceptance:
build: .
image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
environment:
ELASTIC_SEARCH_DSN: es:9200
REDIS_HOST: redis
QUEUES_REDIS_HOST: redis
HISTORY_REDIS_HOST: redis
ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
MOCHA_GREP: ${MOCHA_GREP}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
depends_on:
mongo:
condition: service_started
redis:
condition: service_healthy
user: node
command: npm run test:acceptance
tar:
build: .
image: ci/$PROJECT_NAME:$BRANCH_NAME-$BUILD_NUMBER
volumes:
- ./:/tmp/build/
command: tar -czf /tmp/build/build.tar.gz --exclude=build.tar.gz --exclude-vcs .
user: root
redis:
image: redis
healthcheck:
test: ping="$$(redis-cli ping)" && [ "$$ping" = 'PONG' ]
interval: 1s
retries: 20
mongo:
image: mongo:6.0.13
command: --replSet overleaf
volumes:
- ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
environment:
MONGO_INITDB_DATABASE: sharelatex
extra_hosts:
# Required when using the automatic database setup for initializing the
# replica set. This override is not needed when running the setup after
# starting up mongo.
- mongo:127.0.0.1

View File

@@ -0,0 +1,69 @@
# This file was auto-generated, do not edit it directly.
# Instead run bin/update_build_scripts from
# https://github.com/overleaf/internal/
version: "2.3"
services:
test_unit:
image: node:20.18.2
volumes:
- .:/overleaf/services/project-history
- ../../node_modules:/overleaf/node_modules
- ../../libraries:/overleaf/libraries
working_dir: /overleaf/services/project-history
environment:
MOCHA_GREP: ${MOCHA_GREP}
LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
command: npm run --silent test:unit
user: node
test_acceptance:
image: node:20.18.2
volumes:
- .:/overleaf/services/project-history
- ../../node_modules:/overleaf/node_modules
- ../../libraries:/overleaf/libraries
working_dir: /overleaf/services/project-history
environment:
ELASTIC_SEARCH_DSN: es:9200
REDIS_HOST: redis
HISTORY_REDIS_HOST: redis
QUEUES_REDIS_HOST: redis
ANALYTICS_QUEUES_REDIS_HOST: redis
MONGO_HOST: mongo
POSTGRES_HOST: postgres
MOCHA_GREP: ${MOCHA_GREP}
LOG_LEVEL: ${LOG_LEVEL:-}
NODE_ENV: test
NODE_OPTIONS: "--unhandled-rejections=strict"
user: node
depends_on:
mongo:
condition: service_started
redis:
condition: service_healthy
command: npm run --silent test:acceptance
redis:
image: redis
healthcheck:
test: ping=$$(redis-cli ping) && [ "$$ping" = 'PONG' ]
interval: 1s
retries: 20
mongo:
image: mongo:6.0.13
command: --replSet overleaf
volumes:
- ../../bin/shared/mongodb-init-replica-set.js:/docker-entrypoint-initdb.d/mongodb-init-replica-set.js
environment:
MONGO_INITDB_DATABASE: sharelatex
extra_hosts:
# Required when using the automatic database setup for initializing the
# replica set. This override is not needed when running the setup after
# starting up mongo.
- mongo:127.0.0.1

View File

@@ -0,0 +1,53 @@
{
"name": "@overleaf/project-history",
"description": "An API for saving and compressing individual document updates into a browseable history",
"private": true,
"main": "app.js",
"type": "module",
"scripts": {
"test:acceptance": "npm run test:acceptance:_run -- --grep=$MOCHA_GREP",
"test:unit": "npm run test:unit:_run -- --grep=$MOCHA_GREP",
"start": "node app.js",
"nodemon": "node --watch app.js",
"test:acceptance:_run": "LOG_LEVEL=fatal mocha --loader=esmock --recursive --reporter spec --timeout 15000 --exit $@ test/acceptance/js",
"test:unit:_run": "LOG_LEVEL=fatal mocha --loader=esmock --recursive --reporter spec $@ test/unit/js",
"lint": "eslint --max-warnings 0 --format unix .",
"format": "prettier --list-different $PWD/'**/*.*js'",
"format:fix": "prettier --write $PWD/'**/*.*js'",
"lint:fix": "eslint --fix .",
"types:check": "tsc --noEmit"
},
"dependencies": {
"@overleaf/fetch-utils": "*",
"@overleaf/logger": "*",
"@overleaf/metrics": "*",
"@overleaf/o-error": "*",
"@overleaf/promise-utils": "*",
"@overleaf/redis-wrapper": "*",
"@overleaf/settings": "*",
"async": "^3.2.5",
"aws-sdk": "^2.650.0",
"body-parser": "^1.20.3",
"bunyan": "^1.8.15",
"celebrate": "^15.0.3",
"diff-match-patch": "overleaf/diff-match-patch#89805f9c671a77a263fc53461acd62aa7498f688",
"esmock": "^2.6.3",
"express": "^4.21.2",
"lodash": "^4.17.20",
"minimist": "^1.2.8",
"mongodb-legacy": "6.1.3",
"overleaf-editor-core": "*",
"p-queue": "^8.1.0",
"request": "^2.88.2"
},
"devDependencies": {
"chai": "^4.3.6",
"chai-as-promised": "^7.1.1",
"mocha": "^11.1.0",
"nock": "^13.5.3",
"sinon": "~9.0.1",
"sinon-chai": "^3.7.0",
"timekeeper": "2.2.0",
"typescript": "^5.0.4"
}
}

View File

@@ -0,0 +1,21 @@
/* eslint-env mongo */
// add a TTL index to expire entries for completed resyncs in the
// projectHistorySyncState collection. The entries should only be expired if
// resyncProjectStructure is false and resyncDocContents is a zero-length array.
const now = Date.now()
const inTheFuture = now + 24 * 3600 * 1000
db.projectHistorySyncState.ensureIndex(
{ expiresAt: 1 },
{ expireAfterSeconds: 0, background: true }
)
db.projectHistorySyncState.updateMany(
{
resyncProjectStructure: false,
resyncDocContents: [],
expiresAt: { $exists: false },
},
{ $set: { expiresAt: new Date(inTheFuture) } }
)

View File

@@ -0,0 +1,328 @@
// @ts-check
import Events from 'node:events'
import { setTimeout } from 'node:timers/promises'
import readline from 'node:readline'
import fs from 'node:fs'
import minimist from 'minimist'
import { ObjectId } from 'mongodb'
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
import logger from '@overleaf/logger'
import Metrics from '@overleaf/metrics'
import OError from '@overleaf/o-error'
import { promiseMapWithLimit } from '@overleaf/promise-utils'
import { db, mongoClient } from '../app/js/mongodb.js'
import * as HistoryStoreManager from '../app/js/HistoryStoreManager.js'
import * as RedisManager from '../app/js/RedisManager.js'
import * as SyncManager from '../app/js/SyncManager.js'
import * as UpdatesProcessor from '../app/js/UpdatesProcessor.js'
import { NeedFullProjectStructureResyncError } from '../app/js/Errors.js'
import * as ErrorRecorder from '../app/js/ErrorRecorder.js'
// Silence warning.
Events.setMaxListeners(20)
// Enable caching for ObjectId.toString()
ObjectId.cacheHexString = true
const READ_CONCURRENCY = parseInt(process.env.READ_CONCURRENCY || '100', 10)
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY || '10', 10)
const FLUSH_RETRIES = parseInt(process.env.FLUSH_RETRIES || '20', 10)
// Relevant dates:
// - 2024-12-19, start of event-hold removal in filestore bucket -> objects older than 24h are (soft-)deleted.
// - 2024-12-23, copy operation skipped in filestore when cloning project -> objects not created on clone.
// - 2025-01-24, no more filestore reads allowed in project-history -> no more empty files in history for 404s
const FILESTORE_SOFT_DELETE_START = new Date('2024-12-19T00:00:00Z')
const FILESTORE_READ_OFF = new Date('2025-01-24T15:00:00Z')
const argv = minimist(process.argv.slice(2), {
string: ['logs', 'log-latency'],
})
const LOG_LATENCY = argv['log-latency'] === 'true'
let gracefulShutdownInitiated = false
process.on('SIGINT', handleSignal)
process.on('SIGTERM', handleSignal)
function handleSignal() {
gracefulShutdownInitiated = true
console.warn('graceful shutdown initiated, draining queue')
}
const STATS = {
processedLines: 0,
success: 0,
changed: 0,
failure: 0,
skipped: 0,
checkFailure: 0,
}
function logStats() {
console.log(
JSON.stringify({
time: new Date(),
gracefulShutdownInitiated,
...STATS,
})
)
}
const logInterval = setInterval(logStats, 10_000)
/**
* @typedef {Object} FileRef
* @property {ObjectId} _id
* @property {any} linkedFileData
*/
/**
* @typedef {Object} Folder
* @property {Array<Folder>} folders
* @property {Array<FileRef>} fileRefs
*/
/**
* @typedef {Object} Project
* @property {ObjectId} _id
* @property {Date} lastUpdated
* @property {Array<Folder>} rootFolder
* @property {{history: {id: (number|string)}}} overleaf
*/
/**
* @param {Folder} folder
* @return {boolean}
*/
function checkFileTreeNeedsResync(folder) {
if (!folder) return false
if (Array.isArray(folder.fileRefs)) {
for (const fileRef of folder.fileRefs) {
if (fileRef.linkedFileData) return true
if (fileRef._id.getTimestamp() > FILESTORE_SOFT_DELETE_START) return true
}
}
if (Array.isArray(folder.folders)) {
for (const child of folder.folders) {
if (checkFileTreeNeedsResync(child)) return true
}
}
return false
}
/**
* @param {string} projectId
* @param {string} historyId
* @return {Promise<Date>}
*/
async function getLastEndTimestamp(projectId, historyId) {
const raw = await HistoryStoreManager.promises.getMostRecentVersionRaw(
projectId,
historyId,
{ readOnly: true }
)
if (!raw) throw new Error('bug: history not initialized')
return raw.endTimestamp
}
/** @type {Record<string, (project: Project) => Promise<boolean>>} */
const conditions = {
// cheap: in-memory mongo lookup
'updated after filestore soft-delete': async function (project) {
return project.lastUpdated > FILESTORE_SOFT_DELETE_START
},
// cheap: in-memory mongo lookup
'file-tree requires re-sync': async function (project) {
return checkFileTreeNeedsResync(project.rootFolder?.[0])
},
// moderate: GET from Redis
'has pending operations': async function (project) {
const n = await RedisManager.promises.countUnprocessedUpdates(
project._id.toString()
)
return n > 0
},
// expensive: GET from Mongo/Postgres via history-v1 HTTP API call
'has been flushed after filestore soft-delete': async function (project) {
// Resyncs started after soft-deleting can trigger 404s and result in empty files.
const endTimestamp = await getLastEndTimestamp(
project._id.toString(),
project.overleaf.history.id.toString()
)
return endTimestamp > FILESTORE_SOFT_DELETE_START
},
}
/**
* @param {Project} project
* @return {Promise<{projectId: string, historyId: string} | null>}
*/
async function checkProject(project) {
if (gracefulShutdownInitiated) return null
if (project._id.getTimestamp() > FILESTORE_READ_OFF) {
STATS.skipped++ // Project created after all bugs were fixed.
return null
}
const projectId = project._id.toString()
const historyId = project.overleaf.history.id.toString()
for (const [condition, check] of Object.entries(conditions)) {
try {
if (await check(project)) return { projectId, historyId }
} catch (err) {
logger.err({ projectId, condition, err }, 'failed to check project')
STATS.checkFailure++
return null
}
}
STATS.skipped++
return null
}
/**
* @param {string} projectId
* @param {string} historyId
* @return {Promise<void>}
*/
async function processProject(projectId, historyId) {
if (gracefulShutdownInitiated) return
const t0 = performance.now()
try {
await tryProcessProject(projectId, historyId)
const latency = performance.now() - t0
if (LOG_LATENCY) {
logger.info({ projectId, historyId, latency }, 'processed project')
}
STATS.success++
} catch (err) {
logger.err({ err, projectId, historyId }, 'failed to process project')
STATS.failure++
}
}
/**
* @param {string} projectId
* @return {Promise<void>}
*/
async function flushWithRetries(projectId) {
for (let attempt = 0; attempt < FLUSH_RETRIES; attempt++) {
try {
await UpdatesProcessor.promises.processUpdatesForProject(projectId)
return
} catch (err) {
logger.warn(
{ projectId, err, attempt },
'failed to flush updates, trying again'
)
if (gracefulShutdownInitiated) throw err
}
}
try {
await UpdatesProcessor.promises.processUpdatesForProject(projectId)
} catch (err) {
// @ts-ignore err is Error
throw new OError('failed to flush updates', {}, err)
}
}
/**
* @param {string} projectId
* @param {string} historyId
* @return {Promise<void>}
*/
async function tryProcessProject(projectId, historyId) {
await flushWithRetries(projectId)
const start = new Date()
let needsFullSync = false
try {
await UpdatesProcessor.promises.startResyncAndProcessUpdatesUnderLock(
projectId,
{ resyncProjectStructureOnly: true }
)
} catch (err) {
if (err instanceof NeedFullProjectStructureResyncError) {
needsFullSync = true
} else {
throw err
}
}
if (needsFullSync) {
logger.warn(
{ projectId, historyId },
'structure only resync not sufficient, doing full soft resync'
)
await SyncManager.promises.startResync(projectId, {})
await UpdatesProcessor.promises.processUpdatesForProject(projectId)
STATS.changed++
} else {
const after = await getLastEndTimestamp(projectId, historyId)
if (after > start) {
STATS.changed++
}
}
// Avoid db.projectHistorySyncState from growing for each project we resynced.
// MongoDB collections cannot shrink on their own. In case of success, purge
// the db entry created by this script right away.
await SyncManager.promises.clearResyncStateIfAllAfter(projectId, start)
}
async function processBatch(projects) {
const projectIds = (
await promiseMapWithLimit(READ_CONCURRENCY, projects, checkProject)
).filter(id => !!id)
await promiseMapWithLimit(WRITE_CONCURRENCY, projectIds, ids =>
processProject(ids.projectId, ids.historyId)
)
if (gracefulShutdownInitiated) throw new Error('graceful shutdown triggered')
}
async function processProjectsFromLog() {
const rl = readline.createInterface({
input: fs.createReadStream(argv.logs),
})
for await (const line of rl) {
if (gracefulShutdownInitiated) break
STATS.processedLines++
if (!line.startsWith('{')) continue
const { projectId, historyId, msg } = JSON.parse(line)
if (msg !== 'failed to process project') continue
await processProject(projectId, historyId) // does try/catch with logging
}
}
async function main() {
if (argv.logs) {
await processProjectsFromLog()
return
}
await batchedUpdate(db.projects, {}, processBatch, {
_id: 1,
lastUpdated: 1,
'overleaf.history': 1,
rootFolder: 1,
})
}
try {
try {
await main()
} finally {
clearInterval(logInterval)
logStats()
Metrics.close()
await mongoClient.close()
// TODO(das7pad): graceful shutdown for redis. Refactor process.exit when done.
}
console.log('Done.')
await setTimeout(1_000)
if (STATS.failure) {
process.exit(Math.min(STATS.failure, 99))
} else {
process.exit(0)
}
} catch (err) {
logger.err({ err }, 'fatal error')
await setTimeout(1_000)
process.exit(100)
}

View File

@@ -0,0 +1,43 @@
#!/usr/bin/env node
// Clear timestamps which don't have any corresponding history ops
// usage: scripts/flush_all.js <limit>
import logger from '@overleaf/logger'
import * as RedisManager from '../app/js/RedisManager.js'
const argv = process.argv.slice(2)
const limit = parseInt(argv[0], 10) || null
// find all dangling timestamps and clear them
async function main() {
logger.info(
{ limit },
'running redis scan for project timestamps, this may take a while'
)
const projectIdsWithFirstOpTimestamps =
await RedisManager.promises.getProjectIdsWithFirstOpTimestamps(limit)
const totalTimestamps = projectIdsWithFirstOpTimestamps.length
logger.info(
{ totalTimestamps },
'scan completed, now clearing dangling timestamps'
)
let clearedTimestamps = 0
let processed = 0
for (const projectId of projectIdsWithFirstOpTimestamps) {
const result =
await RedisManager.promises.clearDanglingFirstOpTimestamp(projectId)
processed++
clearedTimestamps += result
if (processed % 1000 === 0) {
logger.info(
{ processed, totalTimestamps, clearedTimestamps },
'clearing timestamps'
)
}
}
logger.info({ processed, totalTimestamps, clearedTimestamps }, 'completed')
process.exit(0)
}
main()

View File

@@ -0,0 +1,136 @@
#!/usr/bin/env node
import async from 'async'
import logger from '@overleaf/logger'
import Settings from '@overleaf/settings'
import redis from '@overleaf/redis-wrapper'
import { db, ObjectId } from '../app/js/mongodb.js'
logger.logger.level('fatal')
const rclient = redis.createClient(Settings.redis.project_history)
const Keys = Settings.redis.project_history.key_schema
const argv = process.argv.slice(2)
const limit = parseInt(argv[0], 10) || null
const force = argv[1] === 'force' || false
let delay = 0
function checkAndClear(project, callback) {
const projectId = project.project_id
function checkDeleted(cb) {
db.projects.findOne(
{ _id: new ObjectId(projectId) },
{ projection: { _id: 1 } },
(err, result) => {
if (err) {
cb(err)
} else if (!result) {
// project not found, but we still need to look at deletedProjects
cb()
} else {
console.log(`Project ${projectId} found in projects`)
cb(new Error('error: project still exists'))
}
}
)
}
function checkRecoverable(cb) {
db.deletedProjects.findOne(
{
// this condition makes use of the index
'deleterData.deletedProjectId': new ObjectId(projectId),
// this condition checks if the deleted project has expired
'project._id': new ObjectId(projectId),
},
{ projection: { _id: 1 } },
(err, result) => {
if (err) {
cb(err)
} else if (!result) {
console.log(
`project ${projectId} has been deleted - safe to clear queue`
)
cb()
} else {
console.log(`Project ${projectId} found in deletedProjects`)
cb(new Error('error: project still exists'))
}
}
)
}
function clearRedisQueue(cb) {
const key = Keys.projectHistoryOps({ project_id: projectId })
delay++
if (force) {
console.log('setting redis key', key, 'to expire in', delay, 'seconds')
// use expire to allow redis to delete the key in the background
rclient.expire(key, delay, err => {
cb(err)
})
} else {
console.log(
'dry run, would set key',
key,
'to expire in',
delay,
'seconds'
)
cb()
}
}
function clearMongoEntry(cb) {
if (force) {
console.log('deleting key in mongo projectHistoryFailures', projectId)
db.projectHistoryFailures.deleteOne({ project_id: projectId }, cb)
} else {
console.log('would delete failure record for', projectId, 'from mongo')
cb()
}
}
// do the checks and deletions
async.waterfall(
[checkDeleted, checkRecoverable, clearRedisQueue, clearMongoEntry],
err => {
if (!err || err.message === 'error: project still exists') {
callback()
} else {
console.log('error:', err)
callback(err)
}
}
)
}
// find all the broken projects from the failure records
async function main() {
const results = await db.projectHistoryFailures.find({}).toArray()
processFailures(results)
}
main().catch(error => {
console.error(error)
process.exit(1)
})
function processFailures(results) {
if (argv.length === 0) {
console.log(`
Usage: node clear_deleted.js [QUEUES] [FORCE]
where
QUEUES is the number of queues to process
FORCE is the string "force" when we're ready to delete the queues. Without it, this script does a dry-run
`)
}
console.log('number of stuck projects', results.length)
// now check if the project is truly deleted in mongo
async.eachSeries(results.slice(0, limit), checkAndClear, err => {
console.log('DONE', err)
process.exit()
})
}

View File

@@ -0,0 +1,175 @@
#!/usr/bin/env node
// To run in dev:
//
// docker compose run --rm project-history scripts/clear_deleted.js
//
// In production:
//
// docker run --rm $(docker ps -lq) scripts/clear_deleted.js
import async from 'async'
import logger from '@overleaf/logger'
import Settings from '@overleaf/settings'
import redis from '@overleaf/redis-wrapper'
import { db, ObjectId } from '../app/js/mongodb.js'
logger.logger.level('fatal')
const rclient = redis.createClient(Settings.redis.project_history)
const Keys = Settings.redis.project_history.key_schema
const argv = process.argv.slice(2)
const limit = parseInt(argv[0], 10) || null
const force = argv[1] === 'force' || false
let projectNotFoundErrors = 0
let projectImportedFromV1Errors = 0
const projectsNotFound = []
const projectsImportedFromV1 = []
let projectWithHistoryIdErrors = 0
const projectsWithHistoryId = []
function checkAndClear(project, callback) {
const projectId = project.project_id
console.log('checking project', projectId)
function checkDeleted(cb) {
db.projects.findOne(
{ _id: new ObjectId(projectId) },
{ projection: { overleaf: true } },
(err, result) => {
console.log(
'1. looking in mongo projects collection: err',
err,
'result',
JSON.stringify(result)
)
if (err) {
return cb(err)
}
if (!result) {
return cb(new Error('project not found in mongo'))
}
if (
result &&
result.overleaf &&
!result.overleaf.id &&
result.overleaf.history &&
!result.overleaf.history.id &&
result.overleaf.history.deleted_id
) {
console.log(
' - project is not imported from v1 and has a deleted_id - ok to clear'
)
return cb()
} else if (result && result.overleaf && result.overleaf.id) {
console.log(' - project is imported from v1')
return cb(
new Error('project is imported from v1 - will not clear it')
)
} else if (
result &&
result.overleaf &&
result.overleaf.history &&
result.overleaf.history.id
) {
console.log(' - project has a history id')
return cb(new Error('project has a history id - will not clear it'))
} else {
console.log(' - project state not recognised')
return cb(new Error('project state not recognised'))
}
}
)
}
function clearRedisQueue(cb) {
const key = Keys.projectHistoryOps({ project_id: projectId })
if (force) {
console.log('deleting redis key', key)
rclient.del(key, err => {
cb(err)
})
} else {
console.log('dry run, would deleted key', key)
cb()
}
}
function clearMongoEntry(cb) {
if (force) {
console.log('deleting key in mongo projectHistoryFailures', projectId)
db.projectHistoryFailures.deleteOne(
{ project_id: projectId },
(err, result) => {
console.log('got result from remove', err, result)
cb(err)
}
)
} else {
console.log('would delete failure record for', projectId, 'from mongo')
cb()
}
}
// do the checks and deletions
async.waterfall([checkDeleted, clearRedisQueue, clearMongoEntry], err => {
if (!err) {
if (force) {
return setTimeout(callback, 100)
} // include a 1 second delay
return callback()
} else if (err.message === 'project not found in mongo') {
projectNotFoundErrors++
projectsNotFound.push(projectId)
return callback()
} else if (err.message === 'project has a history id - will not clear it') {
projectWithHistoryIdErrors++
projectsWithHistoryId.push(projectId)
return callback()
} else if (
err.message === 'project is imported from v1 - will not clear it'
) {
projectImportedFromV1Errors++
projectsImportedFromV1.push(projectId)
return callback()
} else {
console.log('error:', err)
return callback(err)
}
})
}
// find all the broken projects from the failure records
async function main() {
const results = await db.projectHistoryFailures
.find({ error: /history store a non-success status code: 422/ })
.toArray()
console.log('number of queues without history store 442 =', results.length)
// now check if the project is truly deleted in mongo
async.eachSeries(results.slice(0, limit), checkAndClear, err => {
console.log('Final error status', err)
console.log(
'Project not found errors',
projectNotFoundErrors,
projectsNotFound
)
console.log(
'Project with history id errors',
projectWithHistoryIdErrors,
projectsWithHistoryId
)
console.log(
'Project imported from V1 errors',
projectImportedFromV1Errors,
projectsImportedFromV1
)
process.exit()
})
}
main().catch(error => {
console.error(error)
process.exit(1)
})

View File

@@ -0,0 +1,204 @@
#!/usr/bin/env node
// To run in dev:
//
// docker compose run --rm project-history scripts/clear_deleted.js
//
// In production:
//
// docker run --rm $(docker ps -lq) scripts/clear_deleted.js
import async from 'async'
import logger from '@overleaf/logger'
import request from 'request'
import Settings from '@overleaf/settings'
import redis from '@overleaf/redis-wrapper'
import { db, ObjectId } from '../app/js/mongodb.js'
logger.logger.level('fatal')
const rclient = redis.createClient(Settings.redis.project_history)
const Keys = Settings.redis.project_history.key_schema
const argv = process.argv.slice(2)
const limit = parseInt(argv[0], 10) || null
const force = argv[1] === 'force' || false
let projectNotFoundErrors = 0
let projectImportedFromV1Errors = 0
const projectsNotFound = []
const projectsImportedFromV1 = []
function checkAndClear(project, callback) {
const projectId = project.project_id
console.log('checking project', projectId)
// These can probably also be reset and their overleaf.history.id unset
// (unless they are v1 projects).
function checkNotV1Project(cb) {
db.projects.findOne(
{ _id: new ObjectId(projectId) },
{ projection: { overleaf: true } },
(err, result) => {
console.log(
'1. looking in mongo projects collection: err',
err,
'result',
JSON.stringify(result)
)
if (err) {
return cb(err)
}
if (!result) {
return cb(new Error('project not found in mongo'))
}
if (result && result.overleaf && !result.overleaf.id) {
console.log(' - project is not imported from v1 - ok to clear')
cb()
} else {
cb(new Error('project is imported from v1 - will not clear it'))
}
}
)
}
function clearProjectHistoryInMongo(cb) {
if (force) {
console.log('2. deleting overleaf.history.id in mongo project', projectId)
// Accessing mongo projects collection directly - BE CAREFUL!
db.projects.updateOne(
{ _id: new ObjectId(projectId) },
{ $unset: { 'overleaf.history.id': '' } },
(err, result) => {
console.log(' - got result from remove', err, result)
if (err) {
return err
}
if (
result &&
(result.modifiedCount === 1 || result.modifiedCount === 0)
) {
return cb()
} else {
return cb(
new Error('error: problem trying to unset overleaf.history.id')
)
}
}
)
} else {
console.log(
'2. would delete overleaf.history.id for',
projectId,
'from mongo'
)
cb()
}
}
function clearDocUpdaterCache(cb) {
const url = Settings.apis.documentupdater.url + '/project/' + projectId
if (force) {
console.log('3. making request to clear docupdater', url)
request.delete(url, (err, response, body) => {
console.log(
' - result of request',
err,
response && response.statusCode,
body
)
cb(err)
})
} else {
console.log('3. dry run, would request DELETE on url', url)
cb()
}
}
function clearRedisQueue(cb) {
const key = Keys.projectHistoryOps({ project_id: projectId })
if (force) {
console.log('4. deleting redis queue key', key)
rclient.del(key, err => {
cb(err)
})
} else {
console.log('4. dry run, would delete redis key', key)
cb()
}
}
function clearMongoEntry(cb) {
if (force) {
console.log('5. deleting key in mongo projectHistoryFailures', projectId)
db.projectHistoryFailures.deleteOne(
{ project_id: projectId },
(err, result) => {
console.log(' - got result from remove', err, result)
cb(err)
}
)
} else {
console.log('5. would delete failure record for', projectId, 'from mongo')
cb()
}
}
// do the checks and deletions
async.waterfall(
[
checkNotV1Project,
clearProjectHistoryInMongo,
clearDocUpdaterCache,
clearRedisQueue,
clearMongoEntry,
],
err => {
if (!err) {
return setTimeout(callback, 1000) // include a 1 second delay
} else if (err.message === 'project not found in mongo') {
projectNotFoundErrors++
projectsNotFound.push(projectId)
return callback()
} else if (
err.message === 'project is imported from v1 - will not clear it'
) {
projectImportedFromV1Errors++
projectsImportedFromV1.push(projectId)
return callback()
} else {
console.log('error:', err)
return callback(err)
}
}
)
}
// find all the broken projects from the failure records
async function main() {
const results = await db.projectHistoryFailures
.find({ error: 'Error: bad response from filestore: 404' })
.toArray()
console.log('number of queues without filestore 404 =', results.length)
// now check if the project is truly deleted in mongo
async.eachSeries(results.slice(0, limit), checkAndClear, err => {
console.log('Final error status', err)
console.log(
'Project not found errors',
projectNotFoundErrors,
projectsNotFound
)
console.log(
'Project imported from V1 errors',
projectImportedFromV1Errors,
projectsImportedFromV1
)
process.exit()
})
}
main().catch(error => {
console.error(error)
process.exit(1)
})

View File

@@ -0,0 +1,260 @@
#!/usr/bin/env node
// To run in dev:
//
// docker compose run --rm project-history scripts/clear_deleted.js
//
// In production:
//
// docker run --rm $(docker ps -lq) scripts/clear_deleted.js
import async from 'async'
import logger from '@overleaf/logger'
import request from 'request'
import Settings from '@overleaf/settings'
import redis from '@overleaf/redis-wrapper'
import { db, ObjectId } from '../app/js/mongodb.js'
logger.logger.level('fatal')
const rclient = redis.createClient(Settings.redis.project_history)
const Keys = Settings.redis.project_history.key_schema
const argv = process.argv.slice(2)
const limit = parseInt(argv[0], 10) || null
const force = argv[1] === 'force' || false
let projectNotFoundErrors = 0
let projectImportedFromV1Errors = 0
const projectsNotFound = []
const projectsImportedFromV1 = []
let projectHasV2HistoryErrors = 0
const projectsV2HistoryInUse = []
function checkAndClear(project, callback) {
const projectId = project.project_id
console.log('checking project', projectId)
// These can probably also be reset and their overleaf.history.id unset
// (unless they are v1 projects).
function checkNotV1Project(cb) {
db.projects.findOne(
{ _id: new ObjectId(projectId) },
{ projection: { overleaf: true } },
(err, result) => {
console.log(
'1. looking in mongo projects collection: err',
err,
'result',
JSON.stringify(result)
)
if (err) {
return cb(err)
}
if (!result) {
return cb(new Error('project not found in mongo'))
}
const isV1Project = result && result.overleaf && result.overleaf.id
const hasHistoryId =
result &&
result.overleaf &&
result.overleaf.history &&
result.overleaf.history.id
const hasV2HistoryInUse =
result &&
result.overleaf &&
result.overleaf.history &&
result.overleaf.history.display
const hasExistingDeletedHistory =
result &&
result.overleaf.history &&
result.overleaf.history.deleted_id
if (
hasHistoryId &&
!(isV1Project || hasV2HistoryInUse || hasExistingDeletedHistory)
) {
console.log(
' - project is not imported from v1 and v2 history is not in use - ok to clear'
)
return cb()
} else if (hasHistoryId && hasExistingDeletedHistory) {
console.log(' - project already has deleted_id')
return cb(
new Error('project already has deleted_id - will not clear it')
)
} else if (hasHistoryId && isV1Project) {
console.log(' - project is imported from v1')
return cb(
new Error('project is imported from v1 - will not clear it')
)
} else if (hasHistoryId && hasV2HistoryInUse) {
console.log(' - project is displaying v2 history')
return cb(
new Error('project is displaying v2 history - will not clear it')
)
} else {
console.log(' - project state not recognised')
return cb(new Error('project state not recognised'))
}
}
)
}
function clearProjectHistoryInMongo(cb) {
if (force) {
console.log('2. deleting overleaf.history.id in mongo project', projectId)
// Accessing mongo projects collection directly - BE CAREFUL!
db.projects.updateOne(
{ _id: new ObjectId(projectId) },
{ $rename: { 'overleaf.history.id': 'overleaf.history.deleted_id' } },
(err, result) => {
console.log(' - got result from remove', err, result)
if (err) {
return err
}
if (
result &&
(result.modifiedCount === 1 || result.modifiedCount === 0)
) {
return cb()
} else {
return cb(
new Error('error: problem trying to unset overleaf.history.id')
)
}
}
)
} else {
console.log(
'2. would delete overleaf.history.id for',
projectId,
'from mongo'
)
cb()
}
}
function clearDocUpdaterCache(cb) {
const url = Settings.apis.documentupdater.url + '/project/' + projectId
if (force) {
console.log('3. making request to clear docupdater', url)
request.delete(url, (err, response, body) => {
console.log(
' - result of request',
err,
response && response.statusCode,
body
)
cb(err)
})
} else {
console.log('3. dry run, would request DELETE on url', url)
cb()
}
}
function clearRedisQueue(cb) {
const key = Keys.projectHistoryOps({ project_id: projectId })
if (force) {
console.log('4. deleting redis queue key', key)
rclient.del(key, err => {
cb(err)
})
} else {
console.log('4. dry run, would delete redis key', key)
cb()
}
}
function clearMongoEntry(cb) {
if (force) {
console.log('5. deleting key in mongo projectHistoryFailures', projectId)
db.projectHistoryFailures.deleteOne(
{ project_id: projectId },
(err, result) => {
console.log(' - got result from remove', err, result)
cb(err)
}
)
} else {
console.log('5. would delete failure record for', projectId, 'from mongo')
cb()
}
}
// do the checks and deletions
async.waterfall(
[
checkNotV1Project,
clearProjectHistoryInMongo,
clearDocUpdaterCache,
clearRedisQueue,
clearMongoEntry,
],
err => {
if (!err) {
return setTimeout(callback, 100) // include a delay
} else if (err.message === 'project not found in mongo') {
projectNotFoundErrors++
projectsNotFound.push(projectId)
return callback()
} else if (
err.message === 'project is imported from v1 - will not clear it'
) {
projectImportedFromV1Errors++
projectsImportedFromV1.push(projectId)
return callback()
} else if (
err.message === 'project is displaying v2 history - will not clear it'
) {
projectHasV2HistoryErrors++
projectsV2HistoryInUse.push(projectId)
return callback()
} else {
console.log('error:', err)
return callback(err)
}
}
)
}
// find all the broken projects from the failure records
async function main() {
const results = await db.projectHistoryFailures
.find({
error:
'OpsOutOfOrderError: project structure version out of order on incoming updates',
})
.toArray()
console.log(
'number of queues with project structure version out of order on incoming updates=',
results.length
)
// now clear the projects
async.eachSeries(results.slice(0, limit), checkAndClear, err => {
console.log('Final error status', err)
console.log(
'Project not found errors',
projectNotFoundErrors,
projectsNotFound
)
console.log(
'Project imported from V1 errors',
projectImportedFromV1Errors,
projectsImportedFromV1
)
console.log(
'Project has V2 history in use',
projectHasV2HistoryErrors,
projectsV2HistoryInUse
)
process.exit()
})
}
main().catch(error => {
console.error(error)
process.exit(1)
})

View File

@@ -0,0 +1,74 @@
#!/usr/bin/env node
/**
* This script takes a dump file, obtained via the /project/:project_id/dump
* endpoint and feeds it to the update translator to how updates are transfomed
* into changes sent to v1 history.
*/
import fs from 'node:fs'
import * as UpdateTranslator from '../app/js/UpdateTranslator.js'
import * as SyncManager from '../app/js/SyncManager.js'
import * as HistoryStoreManager from '../app/js/HistoryStoreManager.js'
const { filename } = parseArgs()
const { projectId, updates, chunk } = parseDumpFile(filename)
function expandResyncProjectStructure(chunk, update) {
HistoryStoreManager._mocks.getMostRecentChunk = function (
projectId,
projectHistoryId,
callback
) {
callback(null, chunk)
}
SyncManager.expandSyncUpdates(
projectId,
99999, // dummy history id
chunk,
[update],
cb => cb(), // extend lock
(err, result) => {
console.log('err', err, 'result', JSON.stringify(result, null, 2))
process.exit()
}
)
}
function expandUpdates(updates) {
const wrappedUpdates = updates.map(update => ({ update }))
let changes
try {
changes = UpdateTranslator.convertToChanges(projectId, wrappedUpdates)
} catch (err) {
error(err)
}
console.log(JSON.stringify(changes, null, 2))
}
if (updates[0].resyncProjectStructure) {
expandResyncProjectStructure(chunk, updates[0])
} else {
expandUpdates(updates)
}
function parseArgs() {
const args = process.argv.slice(2)
if (args.length !== 1) {
console.log('Usage: debug_translate_updates.js DUMP_FILE')
process.exit(1)
}
const filename = args[0]
return { filename }
}
function parseDumpFile(filename) {
const json = fs.readFileSync(filename)
const { project_id: projectId, updates, chunk } = JSON.parse(json)
return { projectId, updates, chunk }
}
function error(err) {
console.error(err)
process.exit(1)
}

View File

@@ -0,0 +1,93 @@
#!/usr/bin/env node
// To run in dev:
//
// docker compose run --rm project-history scripts/flush_all.js <limit>
//
// In production:
//
// docker run --rm $(docker ps -lq) scripts/flush_all.js <limit>
import _ from 'lodash'
import async from 'async'
import logger from '@overleaf/logger'
import * as RedisManager from '../app/js/RedisManager.js'
import * as UpdatesProcessor from '../app/js/UpdatesProcessor.js'
logger.logger.level('fatal')
const argv = process.argv.slice(2)
const limit = parseInt(argv[0], 10) || null
const parallelism = Math.min(parseInt(argv[1], 10) || 1, 10)
// flush all outstanding changes
RedisManager.getProjectIdsWithHistoryOps(limit, flushProjects)
function flushProjects(error, projectIds) {
if (error) {
throw error
}
let ts = new Date()
console.log(
'found projects',
JSON.stringify({ project_ids: projectIds.length, limit, ts })
)
projectIds = _.shuffle(projectIds) // randomise to avoid hitting same projects each time
if (limit > 0) {
projectIds = projectIds.slice(0, limit)
}
let succeededProjects = 0
let failedProjects = 0
let attempts = 0
async.eachLimit(
projectIds,
parallelism,
function (projectId, cb) {
attempts++
UpdatesProcessor.processUpdatesForProject(
projectId,
function (err, queueSize) {
const progress = attempts + '/' + projectIds.length
ts = new Date()
if (err) {
failedProjects++
console.log(
'failed',
progress,
JSON.stringify({
projectId,
queueSize,
ts,
err: err.toString(),
})
)
} else {
succeededProjects++
console.log(
'succeeded',
progress,
JSON.stringify({
projectId,
queueSize,
ts,
})
)
}
return cb()
}
)
},
function () {
console.log(
'total',
JSON.stringify({
succeededProjects,
failedProjects,
})
)
process.exit(0)
}
)
}

View File

@@ -0,0 +1,191 @@
#!/usr/bin/env node
import Settings from '@overleaf/settings'
import minimist from 'minimist'
import logger from '@overleaf/logger'
import PQueue from 'p-queue'
import * as RedisManager from '../app/js/RedisManager.js'
import * as ErrorRecorder from '../app/js/ErrorRecorder.js'
logger.logger.level('fatal')
function usage() {
console.log(`
Usage: flush_old.js [options]
Options:
-b, --batch-size <size> Number of projects to process in each batch (default: 100)
-a, --max-age <seconds> Maximum age of projects to keep (default: 3600)
-i, --interval <seconds> Interval to spread the processing over (default: 300)
-c, --concurrency <number> Number of concurrent jobs (default: 10)
-u, --buffer <seconds> Buffer time in seconds to reserve at end (default: 15)
-n, --dry-run Show what would be done without making changes
-h, --help Show this help message
Examples:
# Flush projects older than 24 hours with 5 concurrent jobs
flush_old.js --batch-size 100 --max-age 86400 -c 5
# Dry run to see what would be flushed
flush_old.js --max-age 3600 --dry-run
`)
process.exit(0)
}
const argv = minimist(process.argv.slice(2), {
boolean: ['dry-run', 'help'],
alias: {
b: 'batch-size',
a: 'max-age',
i: 'interval',
c: 'concurrency',
n: 'dry-run',
u: 'buffer',
h: 'help',
},
default: {
'batch-size': 100,
'max-age': 3600,
interval: 300,
concurrency: 10,
'dry-run': false,
buffer: 15,
help: false,
},
})
if (argv.help || process.argv.length === 2) {
usage()
}
const batchSize = parseInt(argv['batch-size'], 10)
const maxAge = argv['max-age'] ? parseInt(argv['max-age'], 10) : null
const interval = parseInt(argv.interval, 10) || 300
const concurrency = parseInt(argv.concurrency, 10) || 10
const bufferTime = parseInt(argv.buffer, 10) || 15
const dryRun = argv['dry-run']
/**
* Generator function that yields batches of items from an array
* @param {Array} array - The array to batch
* @param {number} size - The size of each batch
* @yields {Array} A batch of items
*/
function* getBatches(array, size) {
for (let i = 0; i < array.length; i += size) {
yield array.slice(i, i + size)
}
}
let flushCount = 0
async function flushProject({ projectId, timestamp }) {
const url = `${Settings.apis.project_history.url}/project/${projectId}/flush`
if (dryRun) {
console.log(`[DRY RUN] would flush project ${projectId}`)
return
}
const response = await fetch(url, {
method: 'POST',
})
flushCount++
if (flushCount % 100 === 0) {
console.log('flushed', flushCount, 'projects, up to', timestamp)
}
if (!response.ok) {
throw new Error(`failed to flush project ${projectId}`)
}
}
const SCRIPT_START_TIME = Date.now() // current time in milliseconds from start of script
function olderThan(maxAge, timestamp) {
const age = (SCRIPT_START_TIME - timestamp) / 1000
return age > maxAge
}
async function main() {
const projectIds = await RedisManager.promises.getProjectIdsWithHistoryOps()
const failedProjects = await ErrorRecorder.promises.getFailedProjects()
const failedProjectIds = new Set(failedProjects.map(p => p.project_id))
const projectIdsToProcess = projectIds.filter(p => !failedProjectIds.has(p))
console.log('number of projects with history ops', projectIds.length)
console.log(
'number of failed projects to exclude',
projectIds.length - projectIdsToProcess.length
)
const collectedProjects = []
let nullCount = 0
// iterate over the project ids in batches of doing a redis MGET to retrieve the first op timestamps
for (const batch of getBatches(projectIdsToProcess, batchSize)) {
const timestamps = await RedisManager.promises.getFirstOpTimestamps(batch)
const newProjects = batch
.map((projectId, idx) => {
return { projectId, timestamp: timestamps[idx] }
})
.filter(({ timestamp }) => {
if (!timestamp) {
nullCount++
}
return timestamp ? olderThan(maxAge, timestamp) : true
})
collectedProjects.push(...newProjects)
}
// sort the collected projects by ascending timestamp
collectedProjects.sort((a, b) => a.timestamp - b.timestamp)
console.log('number of projects to flush', collectedProjects.length)
console.log('number with null timestamps', nullCount)
const elapsedTime = Math.floor((Date.now() - SCRIPT_START_TIME) / 1000)
console.log('elapsed time', elapsedTime, 'seconds, buffer time', bufferTime)
const remainingTime = Math.max(interval - elapsedTime - bufferTime, 0)
console.log('remaining time', remainingTime, 'seconds')
const jobsPerSecond = Math.max(
Math.ceil(collectedProjects.length / Math.max(remainingTime, 60)),
1
)
console.log('interval', interval, 'seconds')
console.log('jobs per second', jobsPerSecond)
console.log('concurrency', concurrency)
const queue = new PQueue({
concurrency,
interval: 1000,
intervalCap: jobsPerSecond,
})
const taskFns = collectedProjects.map(project => {
return async () => {
try {
await flushProject(project)
return { status: 'fulfilled', value: project }
} catch (error) {
return { status: 'rejected', reason: error, project }
}
}
})
const results = await queue.addAll(taskFns)
console.log(
'finished after',
Math.floor((Date.now() - SCRIPT_START_TIME) / 1000),
'seconds'
)
// count the number of successful and failed flushes
const success = results.filter(r => r.status === 'fulfilled').length
const failed = results.filter(r => r.status === 'rejected').length
console.log('completed', { success, failed })
}
main()
.then(() => {
process.exit(0)
})
.catch(err => {
console.error(err)
process.exit(1)
})

View File

@@ -0,0 +1,233 @@
#!/usr/bin/env node
// To run in dev:
//
// docker compose run --rm project-history scripts/clear_deleted.js
//
// In production:
//
// docker run --rm $(docker ps -lq) scripts/clear_deleted.js
import async from 'async'
import Settings from '@overleaf/settings'
import redis from '@overleaf/redis-wrapper'
import { db, ObjectId } from '../app/js/mongodb.js'
import * as SyncManager from '../app/js/SyncManager.js'
import * as UpdatesProcessor from '../app/js/UpdatesProcessor.js'
const rclient = redis.createClient(Settings.redis.project_history)
const Keys = Settings.redis.project_history.key_schema
const argv = process.argv.slice(2)
const limit = parseInt(argv[0], 10) || null
const force = argv[1] === 'force' || false
let projectNotFoundErrors = 0
let projectImportedFromV1Errors = 0
const projectsNotFound = []
const projectsImportedFromV1 = []
let projectNoHistoryIdErrors = 0
let projectsFailedErrors = 0
const projectsFailed = []
let projectsBrokenSyncErrors = 0
const projectsBrokenSync = []
function checkAndClear(project, callback) {
const projectId = project.project_id
console.log('checking project', projectId)
// These can probably also be reset and their overleaf.history.id unset
// (unless they are v1 projects).
function checkNotV1Project(cb) {
db.projects.findOne(
{ _id: new ObjectId(projectId) },
{ projection: { overleaf: true } },
(err, result) => {
console.log(
'1. looking in mongo projects collection: err',
err,
'result',
JSON.stringify(result)
)
if (err) {
return cb(err)
}
if (!result) {
return cb(new Error('project not found in mongo'))
}
if (result && result.overleaf && !result.overleaf.id) {
if (result.overleaf.history.id) {
console.log(
' - project is not imported from v1 and has a history id - ok to resync'
)
return cb()
} else {
console.log(
' - project is not imported from v1 but does not have a history id'
)
return cb(new Error('no history id'))
}
} else {
cb(new Error('project is imported from v1 - will not resync it'))
}
}
)
}
function startResync(cb) {
if (force) {
console.log('2. starting resync for', projectId)
SyncManager.startHardResync(projectId, err => {
if (err) {
console.log('ERR', JSON.stringify(err.message))
return cb(err)
}
setTimeout(cb, 3000) // include a delay to allow the request to be processed
})
} else {
console.log('2. dry run, would start resync for', projectId)
cb()
}
}
function forceFlush(cb) {
if (force) {
console.log('3. forcing a flush for', projectId)
UpdatesProcessor.processUpdatesForProject(projectId, err => {
console.log('err', err)
return cb(err)
})
} else {
console.log('3. dry run, would force a flush for', projectId)
cb()
}
}
function watchRedisQueue(cb) {
const key = Keys.projectHistoryOps({ project_id: projectId })
function checkQueueEmpty(_callback) {
rclient.llen(key, (err, result) => {
console.log('LLEN', projectId, err, result)
if (err) {
_callback(err)
}
if (result === 0) {
_callback()
} else {
_callback(new Error('queue not empty'))
}
})
}
if (force) {
console.log('4. checking redis queue key', key)
async.retry({ times: 30, interval: 1000 }, checkQueueEmpty, err => {
cb(err)
})
} else {
console.log('4. dry run, would check redis key', key)
cb()
}
}
function checkMongoFailureEntry(cb) {
if (force) {
console.log('5. checking key in mongo projectHistoryFailures', projectId)
db.projectHistoryFailures.findOne(
{ project_id: projectId },
{ projection: { _id: 1 } },
(err, result) => {
console.log('got result', err, result)
if (err) {
return cb(err)
}
if (result) {
return cb(new Error('failure record still exists'))
}
return cb()
}
)
} else {
console.log('5. would check failure record for', projectId, 'in mongo')
cb()
}
}
// do the checks and deletions
async.waterfall(
[
checkNotV1Project,
startResync,
forceFlush,
watchRedisQueue,
checkMongoFailureEntry,
],
err => {
if (!err) {
return setTimeout(callback, 1000) // include a 1 second delay
} else if (err.message === 'project not found in mongo') {
projectNotFoundErrors++
projectsNotFound.push(projectId)
return callback()
} else if (err.message === 'no history id') {
projectNoHistoryIdErrors++
return callback()
} else if (
err.message === 'project is imported from v1 - will not resync it'
) {
projectImportedFromV1Errors++
projectsImportedFromV1.push(projectId)
return callback()
} else if (
err.message === 'history store a non-success status code: 422'
) {
projectsFailedErrors++
projectsFailed.push(projectId)
return callback()
} else if (err.message === 'sync ongoing') {
projectsBrokenSyncErrors++
projectsBrokenSync.push(projectId)
return callback()
} else {
console.log('error:', err)
return callback()
}
}
)
}
async function main() {
const results = await db.projectHistoryFailures.find().toArray()
console.log('number of queues without history store 442 =', results.length)
// now check if the project is truly deleted in mongo
async.eachSeries(results.slice(0, limit), checkAndClear, err => {
console.log('Final error status', err)
console.log(
'Project flush failed again errors',
projectsFailedErrors,
projectsFailed
)
console.log(
'Project flush ongoing errors',
projectsBrokenSyncErrors,
projectsBrokenSync
)
console.log(
'Project not found errors',
projectNotFoundErrors,
projectsNotFound
)
console.log('Project without history_id errors', projectNoHistoryIdErrors)
console.log(
'Project imported from V1 errors',
projectImportedFromV1Errors,
projectsImportedFromV1
)
process.exit()
})
}
main().catch(error => {
console.error(error)
process.exit(1)
})

View File

@@ -0,0 +1,404 @@
% Choose pra, prb, prc, prd, pre, prl, prstab, or rmp for journal
% Add 'draft' option to mark overfull boxes with black boxes
% Add 'showpacs' option to make PACS codes appear
% for review and submission
%\documentclass[aps,preprint,showpacs,superscriptaddress,groupedaddress]{revtex4} % for double-spaced preprint
% needed for figures
% needed for some tables
% for math
% for math
% for crossing out text
% for coloring text
%\input{tcilatex}
\documentclass[aps,prl,twocolumn,showpacs,superscriptaddress,groupedaddress]{revtex4}
\usepackage{graphicx}
\usepackage{dcolumn}
\usepackage{bm}
\usepackage{amssymb}
\usepackage{soul}
\usepackage{color}
%TCIDATA{OutputFilter=LATEX.DLL}
%TCIDATA{Version=5.50.0.2960}
%TCIDATA{<META NAME="SaveForMode" CONTENT="1">}
%TCIDATA{BibliographyScheme=BibTeX}
%TCIDATA{LastRevised=Tuesday, May 20, 2014 03:06:00}
%TCIDATA{<META NAME="GraphicsSave" CONTENT="32">}
\hyphenation{ALPGEN}
\hyphenation{EVTGEN}
\hyphenation{PYTHIA}
\def\be{\begin{equation}}
\def\ee{\end{equation}}
\def\bea{\begin{eqnarray}}
\def\eea{\end{eqnarray}}
%\input{tcilatex}
\begin{document}
\title{Transport measurements of the spin wave gap of Mn}
\input author_list.tex
\date{\today}
\begin{abstract}
Temperature dependent transport measurements on ultrathin antiferromagnetic
Mn films reveal a heretofore unknown non-universal weak localization
correction to the conductivity which extends to disorder strengths greater than
100~k$\Omega$ per square. The inelastic scattering of electrons off of
gapped antiferromagnetic spin waves gives rise to an inelastic scattering
length which is short enough to place the system in the 3D regime. The
extracted fitting parameters provide estimates of the energy gap ($\Delta
\approx$~16~K) and exchange energy ($\bar{J} \approx$~320~K). %\st{which are in
%agreement with values obtained with other techniques}.
\end{abstract}
\pacs{75}
\maketitle
Hello world
Thin-film transition metal ferromagnets (Fe, Co, Ni, Gd) and
antiferromagnets (Mn, Cr) and their alloys are not only ubiquitous in
present day technologies but are also expected to play an important role in
future developments~\cite{thompson_2008}. Understanding magnetism in these
materials, especially when the films are thin enough so that disorder plays
an important role, is complicated by the long standing controversy about the
relative importance of itinerant and local moments~\cite%
{slater_1936,van_vleck_1953,aharoni_2000}. For the itinerant transition
metal magnets, a related fundamental issue centers on the question of how
itinerancy is compromised by disorder. Clearly with sufficient disorder the
charge carriers become localized, but questions arise as to what happens to
the spins and associated spin waves and whether the outcome depends on the
ferro/antiferro alignment of spins in the itinerant parent. Ferromagnets
which have magnetization as the order parameter are fundamentally different
than antiferromagnets which have staggered magnetization (i.e., difference
between the magnetization on each sublattice) as the order parameter~\cite%
{blundell_2001}. Ferromagnetism thus distinguishes itself by having soft
modes at zero wave number whereas antiferromagnets have soft modes at finite
wave number~\cite{belitz_2005}. Accordingly, the respective spin wave
spectrums are radically different. These distinctions are particularly
important when comparing quantum corrections to the conductivity near
quantum critical points for ferromagnets~\cite{paul_2005} and
antiferromagnets~\cite{syzranov_2012}.
Surprisingly, although there have been systematic studies of the effect of
disorder on the longitudinal $\sigma_{xx}$ and transverse $\sigma_{xy}$
conductivity of ferromagnetic films~\cite%
{bergmann_1978,bergmann_1991,mitra_2007,misra_2009,kurzweil_2009}, there
have been few if any such studies on antiferromagnetic films. In this paper
we remedy this situation by presenting transport data on systematically
disordered Mn films that are sputter deposited in a custom designed vacuum
chamber and then transferred without exposure to air into an adjacent
cryostat for transport studies to low temperature. The experimental
procedures are similar to those reported previously: disorder, characterized
by the sheet resistance $R_0$ measured at $T=$~5~K, can be changed either by
growing separate samples or by gentle annealing of a given sample through
incremental stages of disorder~\cite{misra_2011}. Using these same procedures our results for
antiferromagnets however are decidedly different. The data are well
described over a large range of disorder strengths by a non-universal three
dimensional (3d) quantum correction that applies only to spin wave gapped
antiferromagnets. This finding implies the presence of strong inelastic
electron scattering off of antiferromagnetic spin waves. The theory is
validated not only by good fits to the data but also by extraction from the
fitting parameters of a value for the spin wave gap $\Delta$ that is in
agreement with the value expected for Mn. On the other hand, the
exchange energy $\bar{J}$ could be sensitive to the high disorder in our
ultra thin films, and it turns out to be much smaller compared to the known values.
In previous work the inelastic scattering of electrons off of spin waves has
been an essential ingredient in understanding disordered ferromagnets. For
example, to explain the occurrence of weak-localization corrections to the
anomalous Hall effect in polycrystalline Fe films~\cite{mitra_2007}, it was
necessary to invoke a contribution to the inelastic phase breaking rate $%
\tau_{\varphi}^{-1}$ due to spin-conserving inelastic scattering off
spin-wave excitations. This phase breaking rate, anticipated by theory~\cite%
{tatara_2004} and seen experimentally in spin polarized electron energy loss
spectroscopy (SPEELS) measurements of ultrathin Fe films~\cite%
{plihal_1999,zhang_2010}, is linear in temperature and significantly larger
than the phase breaking rate due to electron-electron interactions, thus
allowing a wide temperature range to observe weak localization corrections~%
\cite{mitra_2007}. The effect of a high $\tau_{\varphi}^{-1}$ due to
inelastic scattering off spin-wave excitations is also seen in Gd films
where in addition to a localizing log($T$) quantum correction to the
conductance, a localizing linear-in-$T$ quantum correction is present and is
interpreted as a spin-wave mediated Altshuler-Aronov type correction to the
conductivity~\cite{misra_2009}.
Interestingly, this high rate of inelastic spin rate scattering becomes even
more important for the thinnest films as shown in theoretical calculations
on Fe and Ni which point to extremely short spin-dependent inelastic mean
free paths~\cite{hong_2000} and in spin-polarized electron energy-loss
spectroscopy (SPEELS) measurements on few monolayer-thick Fe/W(110) films in
which a strong nonmonotonic enhancement of localized spin wave energies is
found on the thinnest films~\cite{zhang_2010}.
Inelastic spin wave scattering in highly disordered ferromagnetic films can
be strong enough to assure that the associated $T$-dependent dephasing
length $L_{\varphi }(T)=\sqrt{D\tau _{\varphi }}$ (with $D$ the diffusion
constant)~\cite{lee_1985} is less than the film thickness $t$, thus putting
thin films into the 3d limit where a metal-insulator
transition is observed~\cite{misra_2011}. Recognizing that similarly high
inelastic scattering rates must apply to highly disordered antiferromagnetic
films, we first proceed with a theoretical approach that takes into account
the scattering of antiferromagnetic spin waves on the phase relaxation rate
and find a heretofore unrecognized non-universal 3d weak localization
correction to the conductivity that allows an interpretation of our experimental
results.
We mention in passing that the 3d interaction-induced quantum correction
found to be dominant in the case of ferromagnetic Gd
films which undergo a metal-insulator transition\cite{misra_2011} is
found to be much smaller in the present case and will not be considered further (for an estimate of this contribution see \cite{muttalib_unpub}.
As discussed in detail in Ref.~[\onlinecite{wm10}], the phase relaxation
time $\tau _{\varphi }$ limits the phase coherence in a particle-particle
diffusion propagator $C(q,\omega )$ (Cooperon) in the form
\begin{equation}
C(q,\omega _{l})=\frac{1}{2\pi N_{0}\tau ^{2}}\frac{1}{Dq^{2}+|\omega
_{l}|+1/\tau _{\varphi }}.
\end{equation}
where $N_{0}$ is the density of states at the Fermi level, $\tau $ is the
elastic scattering time and $\omega _{l}=2\pi lT$ is the Matsubara
frequency. Labeling the Cooperon propagator in the absence of interactions
as $C_{0}$, we can write
\begin{equation}
\frac{1}{\tau _{\varphi }}=\frac{1}{2\pi N_{0}\tau ^{2}}[C^{-1}-C_{0}^{-1}].
\end{equation}
In general, $C(q,\omega )$ can be evaluated diagrammatically in the presence
of interactions and disorder in a ladder approximation \cite{fa} that can be
symbolically written as $C=C_{0}+C_{0}KC$ where the interaction vertex $K$
contains self energy as well as vertex corrections due to both interactions
and disorder. It then follows that $1/\tau _{\varphi }$ is given by
\begin{equation}
\frac{1}{\tau _{\varphi }}=-\frac{1}{2\pi N_{0}\tau ^{2}}K.
\end{equation}%
In Ref.~[\onlinecite{wm10}], the leading temperature and disorder dependence
of the inelastic diffusion propagator was evaluated diagrammatically, in the
presence of ferromagnetic spin-wave mediated electron-electron interactions.
Here we consider the antiferromagnetic case. We only consider large
spin-wave gap where the damping can be ignored. Using the antiferromagnetic
dispersion relation $\omega _{q}=\Delta +Aq$, where $A$ is the spin
stiffness, the inelastic lifetime is given by
\be
\frac{\hbar }{\tau _{\varphi }}=\frac{4}{\pi \hbar }nJ^{2}\int_{0}^{1/l}%
\frac{q^{d-1}dq}{\sinh \beta \omega _{q}}\frac{Dq^{2}+1/\tau _{\varphi }}{%
(Dq^{2}+1/\tau _{\varphi })^{2}+\omega _{q}^{2}}
\ee%
where $n=k_{F}^{3}/3\pi ^{2}$ is the 3d density, $J$ is the effective
spin-exchange interaction and $\beta =1/k_{B}T$. Here we will consider the
limit $\hbar /\tau _{\varphi }\ll \Delta $, relevant for our experiment on
Mn. In this limit we can neglect the $1/\tau _{\varphi }$ terms inside the
integral. The upper limit should be restricted to $\Delta /A$ in the limit $%
\Delta /A<1/l$. For large disorder, we expect the parameter $x\equiv
\hbar Dk_{F}^{2}\Delta / \bar{J}^{2}\ll 1$, where the spin-exchange energy
is given by $\bar{J}=Ak_{F}$. In this limit, $L_{\varphi }$ can be
simplified as
\be
k_{F}L_{\varphi }\approx \left( \frac{\bar{J}}{\Delta }\right) ^{3/2}\left(
\frac{5\sinh \frac{\Delta }{T}}{12\pi }\right) ^{1/2},\;\;\;x\ll 1
\label{L-phi-3d}
\ee%
which is independent of $x$, and therefore, independent of disorder.
Given the inelastic lifetime, the weak localization correction in 3d is
usually given by \cite{lee_1985} $\delta \sigma _{3d}=\frac{e^{2}}{\hbar \pi
^{3}}\frac{1}{L_{\varphi }},$ where the prefactor to the inverse inelastic
length is a universal number, independent of disorder. However, at large
enough disorder, we show that there exists a disorder dependent correction,
due to the scale dependent diffusion coefficient near the Anderson
metal-insulator transition. In fact, the diffusion coefficient obeys the
self consistent equation \cite{WV}
\begin{equation}
\frac{D_{0}}{D(\omega )}=1+\frac{k_{F}^{2-d}}{\pi m}\int_{0}^{1/l}dQ\frac{%
Q^{d-1}}{-i\omega +D(\omega )Q^{2}}
\end{equation}%
where $D_{0}=v_{F}l/d$ is the diffusion coefficient at weak disorder. While
the significance of the prefactor to the integral is not clear, the above
equation remains qualitatively accurate over a wide range near the Anderson
transition. Setting $\omega =i/\tau _{\varphi }$ and doing the $Q$-integral
in 3d,
\bea
\frac{D_{0}}{D} &\approx & 1+\frac{1}{\pi mk_{F}}\int_{1/L_{\phi }}^{1/l}dQ\frac{%
Q^{2}}{DQ^{2}}\cr
&=& 1+\frac{D_{0}}{D}\frac{3}{\pi k_{F}^{2}l^{2}}-\delta
\left( \frac{D_{0}}{D}\right) ,
\label{delta}
\eea%
where
\bea
\delta \equiv \frac{D_{0}}{D}\frac{3}{\pi k_{F}^{2}l^{2}}\frac{l}{%
L_{\varphi }}
\eea
is assumed to be a small correction, and Eq.~(\ref{delta})
should not be solved self-consistently. This follows from the fact that the
diffusion coefficient of electrons at fixed energy entering the Cooperon
expression is that of non-interacting electrons, and is given by the limit $%
T\rightarrow 0$, $L_{\varphi }\rightarrow \infty $ and therefore $\delta
\rightarrow 0$. Then the correction at finite $T$ is given by
\bea
\frac{D}{D_{0}} &=& \frac{1}{\left( \frac{D_{0}}{D}\right) _{0}-\delta \left(
\frac{D_{0}}{D}\right) }\cr
&\approx & \left( \frac{D}{D_{0}}\right) _{0}+\left( \frac{D}{D_{0}}\right) _{0}
\frac{3}{\pi k_{F}^{2}l^{2}}\frac{l}{L_{\varphi }}%
\eea%
where
\be
\lim_{T\rightarrow 0}\frac{D}{D_{0}}\equiv \left( \frac{D}{D_{0}}\right)
_{0}.
\ee%
Using the relation $\sigma _{3d}=(e^{2}/\hbar )nD$ where the longitudinal
sheet conductance $\sigma _{\square }=\sigma _{3d}t$, with $t$ being the
film thickness, we finally get the temperature dependent weak localization
correction term
\bea
\frac{\delta \sigma _{\square }}{L_{00}} &=& \left( \frac{D}{D_{0}}\right) _{0}%
\frac{2}{\pi }\frac{t}{L_{\varphi }}\cr
\left( \frac{D}{D_{0}}\right)_{0} &\approx &\frac{2}{1+\sqrt{1+\frac{4R_{0}^{2}}{a^{2}}}}
\label{WL}
\eea%
where $R_{0}=L_{00}/\sigma _{\square }(T$=$0)$, $L_{00}=e^{2}/\pi h$, $%
a=3\pi/2k_{F}tb_{0}$, $b_{0}$ is a number of order unity and we
have solved the self-consistent equation for $D$ in order to express $D_{0%
\text{ }}$in terms of $D$ and finally $R_{0}$. Thus in this case, the weak
localization correction has a prefactor which is not universal. While this
reduces to the well-known universal result at weak disorder $R_{0}\ll a$, it
becomes dependent on disorder characterized by the sheet resistance $R_{0}$
at strong disorder and at the same time substantially extends the 3d regime
near the transition.
Using the expression for $L_{\varphi }$ (Eq.~(\ref{L-phi-3d})) into Eq.~(\ref%
{WL}), we finally obtain the total conductivity, including the quantum
correction to the conductivity due to weak localization in 3d arising from
scattering of electrons off antiferromagnetic spin waves in Mn,
\begin{equation}
\frac{\sigma _{\square }}{L_{00}}=A+\frac{B}{\sqrt{\sinh [\Delta /T]}},
\label{sigmaWL}
\end{equation}%
\textbf{\textbf{}}where the parameter $A$ is temperature independent and the parameter
\bea
B &\equiv & \left( \frac{D}{D_{0}}\right) _{0}\frac{2}{\pi ^{2}}\left( \frac{%
12\pi }{5}\right) ^{1/2}\left( \frac{\Delta }{\bar{J}}\right) ^{3/2}tk_{F}\cr%
&=&\frac{2c}{1+\sqrt{1+\frac{4R_{0}^{2}}{a^{2}}}},
\label{BFit}
\eea%
where
\be
c\equiv \left( \frac{\Delta }{\bar{J}}\right) ^{3/2}\left( \frac{%
48t^{2}k_{F}^{2}}{5\pi}\right) ^{1/2}.
\label{cFit}
\ee
The data presented here is for a single film prepared with an initial $R_0
\approx$~6~k$\Omega$. Disorder was consequently increased in incremental
stages up to 180~k$\Omega$ by annealing at approximately 280~K~\cite%
{misra_2011}. Additional samples were grown at intermediate disorder and
measured to check reproducibility.
Figure~\ref{fig:cond} shows the conductivity data for two samples with
disorder $R_{0}=$~17573~$\Omega $ and 63903~$\Omega $ with corresponding
fittings to the expression (\ref{sigmaWL}) where $A$ and $B$ are taken as
fitting parameters and $\Delta =$~16~K is the spin wave gap. The fits are
sensitive to the parameters $A$ and $B$ but relatively insensitive to $%
\Delta $. We find that $\Delta =$~16~$\pm $~4~K provides good fittings in
the whole range of disorder (from 6 to 180~k$\Omega $).
\begin{figure}[tbp]
\begin{center}
\includegraphics[width=9cm]{fig_1_16.eps}
\end{center}
\caption{The temperature-dependent normalized conductivity (open squares)
for two samples with the indicated disorder strengths of $R_0 =$~17573~$%
\Omega$ and 63903~$\Omega$ show good agreement with theory (solid lines).
The fitting parameters $A$ and $B$ are indicated for each curve with the
error in the least significant digit indicated in parentheses.}
\label{fig:cond}
\end{figure}
Figure~\ref{fig:parb} shows the dependence of the parameter $B$ on the
disorder strength $R_0$ (open squares) and a theoretical fit (solid line)
using Eq.~(\ref{BFit}), where $c$ and $a$ are fitting parameters. The solid
line for this two-paramener fit is drawn for the best-fit values $c=0.67 \pm
0.04$ and $a= 28 \pm 3$~k$\Omega$. We note that the fit is of reasonable
quality over most of the disorder range except for the film with the least
disorder ($R_0 = 6$~k$\Omega$) where $B = 0.77$,
somewhat below the saturated value
$B = c = 0.67$ evaluated from Eq.~(\ref{BFit}) at $R_0 = 0$. Using higher
values of $c$ (e.g., $c=0.8$) and lower values of $a$ (eg., $a = 22$~k$\Omega$)
improves the fit at low disorder strengths but
increases the discrepancy at higher disorder strengths.
%L_phi/t = 2/pi*2/(1+sqrt(1+16))/0.5, 2/pi*2/(1+sqrt(1+1))/0.25
%http://hyperphysics.phy-astr.gsu.edu/hbase/tables/fermi.html , k_F = sqrt(2*m_e*(10.9 eV))/(hbar) = 1.7E10 1/m
% (bar(J) / \Delta) ^ 3/2 = (48*(2e-9)^2*(2.7e9)^2/5/pi/(0.65)^2) ^0.5 = 8360 = 20 ^ 3
%A = \bar{J} / k_F , \bar{J} = nJ
Substituting the Fermi energy for bulk Mn~\cite{ashcroft_1976},
a thickness $t=2$~nm known to 20\% accuracy, together with the best-fit
value for $c$ into Eq.~(\ref{cFit}), we calculate the value $\bar{J} =$~320~$%
\pm$~93~K. Gao et al.~\cite{gao_2008} performed inelastic scanning tunneling
spectroscopy (ISTS) on thin Mn films and reported $\Delta$ in the range from
30 to 60~K and $\bar{J}=vk_F=$~3150~$\pm$~200~K. The agreement of energy gaps is
good; however our significantly lower value of $\bar{J}$ is probably due to the
high disorder in our ultra thin films.
Since the temperature-dependent correction $B/\sqrt{\sinh (\Delta /T)}$ of
Eq.~\ref{sigmaWL} is small compared to the parameter $A$, we can write
$\sigma_{\square} \approx 1/R_0$ so that Eq.~\ref{sigmaWL} reduces to the
expression $A \approx 1/L_{00}R_0$. The logarithmic plot derived by taking the
logarithm of both sides of this approximation is shown in the inset of
Fig.~\ref{fig:parb}. The slope of -1 confirms the linear dependence of $A$ on
$1/R_0$ and the intercept of 5.01 (10$^{5.01}\approx $~102~k$\Omega$) is
within 20\% of the expected theoretical value $L_{00}=$~81~k$\Omega $,
for the normalization constant. Accordingly, the conductivity corrections in
Eq.~\ref{sigmaWL} are small compared to the zero temperature conductivity and
the normalization constant $L_{00}$ for the conductivity is close to the
expected theoretical value.
Using Eq.~(\ref{WL}) and the obtained value for $a\approx $~28~k$\Omega $ we can
compare the dephasing length ($L_{\varphi }$) with the thickness ($t\approx $%
~2~nm) at 16~K. For the sample with $R_{0}=$~63903~$\Omega $ the ratio $%
L_{\varphi }/t\approx $~0.5 and for the sample with $R_{0}=$~17573~$\Omega $
$L_{\varphi }/t\approx $~2. The latter estimate assumes no spin
polarization, while a full polarization would imply $L_{\varphi }/t\approx $%
~1. Thus $L_{\varphi }$ is smaller than or close to the thickness of the
film, which keeps the film in the three-dimensional regime for almost all
temperatures and disorder strengths considered.
\begin{figure}[tbp]
\begin{center}
\includegraphics[width=9cm]{fig_2_16.eps}
\end{center}
\caption{Dependence of the fitting parameters $B$ and $A$ (inset) on
disorder $R_0$ for $\Delta=$~16~K. The fitting parameters are indicated for
each curve with the error in the least significant digit indicated in
parentheses.}
\label{fig:parb}
\end{figure}
In conclusion, we have performed \textit{in situ} transport measurements on
ultra thin Mn films, systematically varying the disorder ($R_{0}=R_{xx}$($T=$%
~5~K)). The obtained data were analyzed within a weak localization theory in
3d generalized to strong disorder. In the temperature range considered
inelastic scattering off spin waves is found to be strong giving rise to a
dephasing length shorter than the film thickness, which places these systems
into the 3d regime. The obtained value for the spin wave gap was close to
the one measured by Gao et al.~\cite{gao_2008} using ISTS, while the
exchange energy was much smaller.
This work has been supported by the NSF under Grant No 1305783 (AFH).
PW thanks A.\ M.\ \ Finkel'stein for useful discussions and acknowledges
partial support through the DFG research unit "Quantum phase transitions".
\bibliographystyle{apsrev}
\bibliography{bibl}
\end{document}

View File

@@ -0,0 +1,3 @@
Hello world
One two three

View File

@@ -0,0 +1,5 @@
Hello world
One two three
Four five six

View File

@@ -0,0 +1,7 @@
Hello world
One two three
Four five six
Seven eight nine

View File

@@ -0,0 +1,404 @@
% Choose pra, prb, prc, prd, pre, prl, prstab, or rmp for journal
% Add 'draft' option to mark overfull boxes with black boxes
% Add 'showpacs' option to make PACS codes appear
% for review and submission
%\documentclass[aps,preprint,showpacs,superscriptaddress,groupedaddress]{revtex4} % for double-spaced preprint
% needed for figures
% needed for some tables
% for math
% for math
% for crossing out text
% for coloring text
%\input{tcilatex}
\documentclass[aps,prl,twocolumn,showpacs,superscriptaddress,groupedaddress]{revtex4}
\usepackage{graphicx}
\usepackage{dcolumn}
\usepackage{bm}
\usepackage{amssymb}
\usepackage{soul}
\usepackage{color}
%TCIDATA{OutputFilter=LATEX.DLL}
%TCIDATA{Version=5.50.0.2960}
%TCIDATA{<META NAME="SaveForMode" CONTENT="1">}
%TCIDATA{BibliographyScheme=BibTeX}
%TCIDATA{LastRevised=Tuesday, May 20, 2014 03:06:00}
%TCIDATA{<META NAME="GraphicsSave" CONTENT="32">}
\hyphenation{ALPGEN}
\hyphenation{EVTGEN}
\hyphenation{PYTHIA}
\def\be{\begin{equation}}
\def\ee{\end{equation}}
\def\bea{\begin{eqnarray}}
\def\eea{\end{eqnarray}}
%\input{tcilatex}
\begin{document}
\title{Transport measurements of the spin wave gap of Mn}
\input author_list.tex
\date{\today}
\begin{abstract}
Temperature dependent transport measurements on ultrathin antiferromagnetic
Mn films reveal a heretofore unknown non-universal weak localization
correction to the conductivity which extends to disorder strengths greater than
100~k$\Omega$ per square. The inelastic scattering of electrons off of
gapped antiferromagnetic spin waves gives rise to an inelastic scattering
length which is short enough to place the system in the 3D regime. The
extracted fitting parameters provide estimates of the energy gap ($\Delta
\approx$~16~K) and exchange energy ($\bar{J} \approx$~320~K). %\st{which are in
%agreement with values obtained with other techniques}.
\end{abstract}
\pacs{75}
\maketitle
Thin-film transition metal ferromagnets (Fe, Co, Ni, Gd) and
antiferromagnets (Mn, Cr) and their alloys are not only ubiquitous in
present day technologies but are also expected to play an important role in
future developments~\cite{thompson_2008}. Understanding magnetism in these
materials, especially when the films are thin enough so that disorder plays
an important role, is complicated by the long standing controversy about the
relative importance of itinerant and local moments~\cite%
{slater_1936,van_vleck_1953,aharoni_2000}. For the itinerant transition
metal magnets, a related fundamental issue centers on the question of how
itinerancy is compromised by disorder. Clearly with sufficient disorder the
charge carriers become localized, but questions arise as to what happens to
the spins and associated spin waves and whether the outcome depends on the
ferro/antiferro alignment of spins in the itinerant parent. Ferromagnets
which have magnetization as the order parameter are fundamentally different
than antiferromagnets which have staggered magnetization (i.e., difference
between the magnetization on each sublattice) as the order parameter~\cite%
{blundell_2001}. Ferromagnetism thus distinguishes itself by having soft
modes at zero wave number whereas antiferromagnets have soft modes at finite
wave number~\cite{belitz_2005}. Accordingly, the respective spin wave
spectrums are radically different. These distinctions are particularly
important when comparing quantum corrections to the conductivity near
quantum critical points for ferromagnets~\cite{paul_2005} and
antiferromagnets~\cite{syzranov_2012}.
Surprisingly, although there have been systematic studies of the effect of
disorder on the longitudinal $\sigma_{xx}$ and transverse $\sigma_{xy}$
conductivity of ferromagnetic films~\cite%
{bergmann_1978,bergmann_1991,mitra_2007,misra_2009,kurzweil_2009}, there
have been few if any such studies on antiferromagnetic films. In this paper
we remedy this situation by presenting transport data on systematically
disordered Mn films that are sputter deposited in a custom designed vacuum
chamber and then transferred without exposure to air into an adjacent
cryostat for transport studies to low temperature. The experimental
procedures are similar to those reported previously: disorder, characterized
by the sheet resistance $R_0$ measured at $T=$~5~K, can be changed either by
growing separate samples or by gentle annealing of a given sample through
incremental stages of disorder~\cite{misra_2011}. Using these same procedures our results for
antiferromagnets however are decidedly different. The data are well
described over a large range of disorder strengths by a non-universal three
dimensional (3d) quantum correction that applies only to spin wave gapped
antiferromagnets. This finding implies the presence of strong inelastic
electron scattering off of antiferromagnetic spin waves. The theory is
validated not only by good fits to the data but also by extraction from the
fitting parameters of a value for the spin wave gap $\Delta$ that is in
agreement with the value expected for Mn. On the other hand, the
exchange energy $\bar{J}$ could be sensitive to the high disorder in our
ultra thin films, and it turns out to be much smaller compared to the known values.
In previous work the inelastic scattering of electrons off of spin waves has
been an essential ingredient in understanding disordered ferromagnets. For
example, to explain the occurrence of weak-localization corrections to the
anomalous Hall effect in polycrystalline Fe films~\cite{mitra_2007}, it was
necessary to invoke a contribution to the inelastic phase breaking rate $%
\tau_{\varphi}^{-1}$ due to spin-conserving inelastic scattering off
spin-wave excitations. This phase breaking rate, anticipated by theory~\cite%
{tatara_2004} and seen experimentally in spin polarized electron energy loss
spectroscopy (SPEELS) measurements of ultrathin Fe films~\cite%
{plihal_1999,zhang_2010}, is linear in temperature and significantly larger
than the phase breaking rate due to electron-electron interactions, thus
allowing a wide temperature range to observe weak localization corrections~%
\cite{mitra_2007}. The effect of a high $\tau_{\varphi}^{-1}$ due to
inelastic scattering off spin-wave excitations is also seen in Gd films
where in addition to a localizing log($T$) quantum correction to the
conductance, a localizing linear-in-$T$ quantum correction is present and is
interpreted as a spin-wave mediated Altshuler-Aronov type correction to the
conductivity~\cite{misra_2009}.
Interestingly, this high rate of inelastic spin rate scattering becomes even
more important for the thinnest films as shown in theoretical calculations
on Fe and Ni which point to extremely short spin-dependent inelastic mean
free paths~\cite{hong_2000} and in spin-polarized electron energy-loss
spectroscopy (SPEELS) measurements on few monolayer-thick Fe/W(110) films in
which a strong nonmonotonic enhancement of localized spin wave energies is
found on the thinnest films~\cite{zhang_2010}.
Inelastic spin wave scattering in highly disordered ferromagnetic films can
be strong enough to assure that the associated $T$-dependent dephasing
length $L_{\varphi }(T)=\sqrt{D\tau _{\varphi }}$ (with $D$ the diffusion
constant)~\cite{lee_1985} is less than the film thickness $t$, thus putting
thin films into the 3d limit where a metal-insulator
transition is observed~\cite{misra_2011}. Recognizing that similarly high
inelastic scattering rates must apply to highly disordered antiferromagnetic
films, we first proceed with a theoretical approach that takes into account
the scattering of antiferromagnetic spin waves on the phase relaxation rate
and find a heretofore unrecognized non-universal 3d weak localization
correction to the conductivity that allows an interpretation of our experimental
results.
We mention in passing that the 3d interaction-induced quantum correction
found to be dominant in the case of ferromagnetic Gd
films which undergo a metal-insulator transition\cite{misra_2011} is
found to be much smaller in the present case and will not be considered further (for an estimate of this contribution see \cite{muttalib_unpub}.
As discussed in detail in Ref.~[\onlinecite{wm10}], the phase relaxation
time $\tau _{\varphi }$ limits the phase coherence in a particle-particle
diffusion propagator $C(q,\omega )$ (Cooperon) in the form
\begin{equation}
C(q,\omega _{l})=\frac{1}{2\pi N_{0}\tau ^{2}}\frac{1}{Dq^{2}+|\omega
_{l}|+1/\tau _{\varphi }}.
\end{equation}
where $N_{0}$ is the density of states at the Fermi level, $\tau $ is the
elastic scattering time and $\omega _{l}=2\pi lT$ is the Matsubara
frequency. Labeling the Cooperon propagator in the absence of interactions
as $C_{0}$, we can write
\begin{equation}
\frac{1}{\tau _{\varphi }}=\frac{1}{2\pi N_{0}\tau ^{2}}[C^{-1}-C_{0}^{-1}].
\end{equation}
In general, $C(q,\omega )$ can be evaluated diagrammatically in the presence
of interactions and disorder in a ladder approximation \cite{fa} that can be
symbolically written as $C=C_{0}+C_{0}KC$ where the interaction vertex $K$
contains self energy as well as vertex corrections due to both interactions
and disorder. It then follows that $1/\tau _{\varphi }$ is given by
\begin{equation}
\frac{1}{\tau _{\varphi }}=-\frac{1}{2\pi N_{0}\tau ^{2}}K.
\end{equation}%
In Ref.~[\onlinecite{wm10}], the leading temperature and disorder dependence
of the inelastic diffusion propagator was evaluated diagrammatically, in the
presence of ferromagnetic spin-wave mediated electron-electron interactions.
Here we consider the antiferromagnetic case. We only consider large
spin-wave gap where the damping can be ignored. Using the antiferromagnetic
dispersion relation $\omega _{q}=\Delta +Aq$, where $A$ is the spin
stiffness, the inelastic lifetime is given by
\be
\frac{\hbar }{\tau _{\varphi }}=\frac{4}{\pi \hbar }nJ^{2}\int_{0}^{1/l}%
\frac{q^{d-1}dq}{\sinh \beta \omega _{q}}\frac{Dq^{2}+1/\tau _{\varphi }}{%
(Dq^{2}+1/\tau _{\varphi })^{2}+\omega _{q}^{2}}
\ee%
where $n=k_{F}^{3}/3\pi ^{2}$ is the 3d density, $J$ is the effective
spin-exchange interaction and $\beta =1/k_{B}T$. Here we will consider the
limit $\hbar /\tau _{\varphi }\ll \Delta $, relevant for our experiment on
Mn. In this limit we can neglect the $1/\tau _{\varphi }$ terms inside the
integral. The upper limit should be restricted to $\Delta /A$ in the limit $%
\Delta /A<1/l$. For large disorder, we expect the parameter $x\equiv
\hbar Dk_{F}^{2}\Delta / \bar{J}^{2}\ll 1$, where the spin-exchange energy
is given by $\bar{J}=Ak_{F}$. In this limit, $L_{\varphi }$ can be
simplified as
\be
k_{F}L_{\varphi }\approx \left( \frac{\bar{J}}{\Delta }\right) ^{3/2}\left(
\frac{5\sinh \frac{\Delta }{T}}{12\pi }\right) ^{1/2},\;\;\;x\ll 1
\label{L-phi-3d}
\ee%
which is independent of $x$, and therefore, independent of disorder.
Given the inelastic lifetime, the weak localization correction in 3d is
usually given by \cite{lee_1985} $\delta \sigma _{3d}=\frac{e^{2}}{\hbar \pi
^{3}}\frac{1}{L_{\varphi }},$ where the prefactor to the inverse inelastic
length is a universal number, independent of disorder. However, at large
enough disorder, we show that there exists a disorder dependent correction,
due to the scale dependent diffusion coefficient near the Anderson
metal-insulator transition. In fact, the diffusion coefficient obeys the
self consistent equation \cite{WV}
\begin{equation}
\frac{D_{0}}{D(\omega )}=1+\frac{k_{F}^{2-d}}{\pi m}\int_{0}^{1/l}dQ\frac{%
Q^{d-1}}{-i\omega +D(\omega )Q^{2}}
\end{equation}%
where $D_{0}=v_{F}l/d$ is the diffusion coefficient at weak disorder. While
the significance of the prefactor to the integral is not clear, the above
equation remains qualitatively accurate over a wide range near the Anderson
transition. Setting $\omega =i/\tau _{\varphi }$ and doing the $Q$-integral
in 3d,
\bea
\frac{D_{0}}{D} &\approx & 1+\frac{1}{\pi mk_{F}}\int_{1/L_{\phi }}^{1/l}dQ\frac{%
Q^{2}}{DQ^{2}}\cr
&=& 1+\frac{D_{0}}{D}\frac{3}{\pi k_{F}^{2}l^{2}}-\delta
\left( \frac{D_{0}}{D}\right) ,
\label{delta}
\eea%
where
\bea
\delta \equiv \frac{D_{0}}{D}\frac{3}{\pi k_{F}^{2}l^{2}}\frac{l}{%
L_{\varphi }}
\eea
is assumed to be a small correction, and Eq.~(\ref{delta})
should not be solved self-consistently. This follows from the fact that the
diffusion coefficient of electrons at fixed energy entering the Cooperon
expression is that of non-interacting electrons, and is given by the limit $%
T\rightarrow 0$, $L_{\varphi }\rightarrow \infty $ and therefore $\delta
\rightarrow 0$. Then the correction at finite $T$ is given by
\bea
\frac{D}{D_{0}} &=& \frac{1}{\left( \frac{D_{0}}{D}\right) _{0}-\delta \left(
\frac{D_{0}}{D}\right) }\cr
&\approx & \left( \frac{D}{D_{0}}\right) _{0}+\left( \frac{D}{D_{0}}\right) _{0}
\frac{3}{\pi k_{F}^{2}l^{2}}\frac{l}{L_{\varphi }}%
\eea%
where
\be
\lim_{T\rightarrow 0}\frac{D}{D_{0}}\equiv \left( \frac{D}{D_{0}}\right)
_{0}.
\ee%
Using the relation $\sigma _{3d}=(e^{2}/\hbar )nD$ where the longitudinal
sheet conductance $\sigma _{\square }=\sigma _{3d}t$, with $t$ being the
film thickness, we finally get the temperature dependent weak localization
correction term
\bea
\frac{\delta \sigma _{\square }}{L_{00}} &=& \left( \frac{D}{D_{0}}\right) _{0}%
\frac{2}{\pi }\frac{t}{L_{\varphi }}\cr
\left( \frac{D}{D_{0}}\right)_{0} &\approx &\frac{2}{1+\sqrt{1+\frac{4R_{0}^{2}}{a^{2}}}}
\label{WL}
\eea%
where $R_{0}=L_{00}/\sigma _{\square }(T$=$0)$, $L_{00}=e^{2}/\pi h$, $%
a=3\pi/2k_{F}tb_{0}$, $b_{0}$ is a number of order unity and we
have solved the self-consistent equation for $D$ in order to express $D_{0%
\text{ }}$in terms of $D$ and finally $R_{0}$. Thus in this case, the weak
localization correction has a prefactor which is not universal. While this
reduces to the well-known universal result at weak disorder $R_{0}\ll a$, it
becomes dependent on disorder characterized by the sheet resistance $R_{0}$
at strong disorder and at the same time substantially extends the 3d regime
near the transition.
Using the expression for $L_{\varphi }$ (Eq.~(\ref{L-phi-3d})) into Eq.~(\ref%
{WL}), we finally obtain the total conductivity, including the quantum
correction to the conductivity due to weak localization in 3d arising from
scattering of electrons off antiferromagnetic spin waves in Mn,
\begin{equation}
\frac{\sigma _{\square }}{L_{00}}=A+\frac{B}{\sqrt{\sinh [\Delta /T]}},
\label{sigmaWL}
\end{equation}%
\textbf{\textbf{}}where the parameter $A$ is temperature independent and the parameter
\bea
B &\equiv & \left( \frac{D}{D_{0}}\right) _{0}\frac{2}{\pi ^{2}}\left( \frac{%
12\pi }{5}\right) ^{1/2}\left( \frac{\Delta }{\bar{J}}\right) ^{3/2}tk_{F}\cr%
&=&\frac{2c}{1+\sqrt{1+\frac{4R_{0}^{2}}{a^{2}}}},
\label{BFit}
\eea%
where
\be
c\equiv \left( \frac{\Delta }{\bar{J}}\right) ^{3/2}\left( \frac{%
48t^{2}k_{F}^{2}}{5\pi}\right) ^{1/2}.
\label{cFit}
\ee
The data presented here is for a single film prepared with an initial $R_0
\approx$~6~k$\Omega$. Disorder was consequently increased in incremental
stages up to 180~k$\Omega$ by annealing at approximately 280~K~\cite%
{misra_2011}. Additional samples were grown at intermediate disorder and
measured to check reproducibility.
Figure~\ref{fig:cond} shows the conductivity data for two samples with
disorder $R_{0}=$~17573~$\Omega $ and 63903~$\Omega $ with corresponding
fittings to the expression (\ref{sigmaWL}) where $A$ and $B$ are taken as
fitting parameters and $\Delta =$~16~K is the spin wave gap. The fits are
sensitive to the parameters $A$ and $B$ but relatively insensitive to $%
\Delta $. We find that $\Delta =$~16~$\pm $~4~K provides good fittings in
the whole range of disorder (from 6 to 180~k$\Omega $).
\begin{figure}[tbp]
\begin{center}
\includegraphics[width=9cm]{fig_1_16.eps}
\end{center}
\caption{The temperature-dependent normalized conductivity (open squares)
for two samples with the indicated disorder strengths of $R_0 =$~17573~$%
\Omega$ and 63903~$\Omega$ show good agreement with theory (solid lines).
The fitting parameters $A$ and $B$ are indicated for each curve with the
error in the least significant digit indicated in parentheses.}
\label{fig:cond}
\end{figure}
Figure~\ref{fig:parb} shows the dependence of the parameter $B$ on the
disorder strength $R_0$ (open squares) and a theoretical fit (solid line)
using Eq.~(\ref{BFit}), where $c$ and $a$ are fitting parameters. The solid
line for this two-paramener fit is drawn for the best-fit values $c=0.67 \pm
0.04$ and $a= 28 \pm 3$~k$\Omega$. We note that the fit is of reasonable
quality over most of the disorder range except for the film with the least
disorder ($R_0 = 6$~k$\Omega$) where $B = 0.77$,
somewhat below the saturated value
$B = c = 0.67$ evaluated from Eq.~(\ref{BFit}) at $R_0 = 0$. Using higher
values of $c$ (e.g., $c=0.8$) and lower values of $a$ (eg., $a = 22$~k$\Omega$)
improves the fit at low disorder strengths but
increases the discrepancy at higher disorder strengths.
%L_phi/t = 2/pi*2/(1+sqrt(1+16))/0.5, 2/pi*2/(1+sqrt(1+1))/0.25
%http://hyperphysics.phy-astr.gsu.edu/hbase/tables/fermi.html , k_F = sqrt(2*m_e*(10.9 eV))/(hbar) = 1.7E10 1/m
% (bar(J) / \Delta) ^ 3/2 = (48*(2e-9)^2*(2.7e9)^2/5/pi/(0.65)^2) ^0.5 = 8360 = 20 ^ 3
%A = \bar{J} / k_F , \bar{J} = nJ
Substituting the Fermi energy for bulk Mn~\cite{ashcroft_1976},
a thickness $t=2$~nm known to 20\% accuracy, together with the best-fit
value for $c$ into Eq.~(\ref{cFit}), we calculate the value $\bar{J} =$~320~$%
\pm$~93~K. Gao et al.~\cite{gao_2008} performed inelastic scanning tunneling
spectroscopy (ISTS) on thin Mn films and reported $\Delta$ in the range from
30 to 60~K and $\bar{J}=vk_F=$~3150~$\pm$~200~K. The agreement of energy gaps is
good; however our significantly lower value of $\bar{J}$ is probably due to the
high disorder in our ultra thin films.
Since the temperature-dependent correction $B/\sqrt{\sinh (\Delta /T)}$ of
Eq.~\ref{sigmaWL} is small compared to the parameter $A$, we can write
$\sigma_{\square} \approx 1/R_0$ so that Eq.~\ref{sigmaWL} reduces to the
expression $A \approx 1/L_{00}R_0$. The logarithmic plot derived by taking the
logarithm of both sides of this approximation is shown in the inset of
Fig.~\ref{fig:parb}. The slope of -1 confirms the linear dependence of $A$ on
$1/R_0$ and the intercept of 5.01 (10$^{5.01}\approx $~102~k$\Omega$) is
within 20\% of the expected theoretical value $L_{00}=$~81~k$\Omega $,
for the normalization constant. Accordingly, the conductivity corrections in
Eq.~\ref{sigmaWL} are small compared to the zero temperature conductivity and
the normalization constant $L_{00}$ for the conductivity is close to the
expected theoretical value.
Using Eq.~(\ref{WL}) and the obtained value for $a\approx $~28~k$\Omega $ we can
compare the dephasing length ($L_{\varphi }$) with the thickness ($t\approx $%
~2~nm) at 16~K. For the sample with $R_{0}=$~63903~$\Omega $ the ratio $%
L_{\varphi }/t\approx $~0.5 and for the sample with $R_{0}=$~17573~$\Omega $
$L_{\varphi }/t\approx $~2. The latter estimate assumes no spin
polarization, while a full polarization would imply $L_{\varphi }/t\approx $%
~1. Thus $L_{\varphi }$ is smaller than or close to the thickness of the
film, which keeps the film in the three-dimensional regime for almost all
temperatures and disorder strengths considered.
\begin{figure}[tbp]
\begin{center}
\includegraphics[width=9cm]{fig_2_16.eps}
\end{center}
\caption{Dependence of the fitting parameters $B$ and $A$ (inset) on
disorder $R_0$ for $\Delta=$~16~K. The fitting parameters are indicated for
each curve with the error in the least significant digit indicated in
parentheses.}
\label{fig:parb}
\end{figure}
In conclusion, we have performed \textit{in situ} transport measurements on
ultra thin Mn films, systematically varying the disorder ($R_{0}=R_{xx}$($T=$%
~5~K)). The obtained data were analyzed within a weak localization theory in
3d generalized to strong disorder. In the temperature range considered
inelastic scattering off spin waves is found to be strong giving rise to a
dephasing length shorter than the film thickness, which places these systems
into the 3d regime. The obtained value for the spin wave gap was close to
the one measured by Gao et al.~\cite{gao_2008} using ISTS, while the
exchange energy was much smaller.
This work has been supported by the NSF under Grant No 1305783 (AFH).
PW thanks A.\ M.\ \ Finkel'stein for useful discussions and acknowledges
partial support through the DFG research unit "Quantum phase transitions".
\bibliographystyle{apsrev}
\bibliography{bibl}
\end{document}

View File

@@ -0,0 +1,74 @@
{
"chunk": {
"history": {
"snapshot": {
"files": {
"bar.tex": {
"hash": "4f785a4c192155b240e3042b3a7388b47603f423",
"stringLength": 26
},
"main.tex": {
"hash": "f28571f561d198b87c24cc6a98b78e87b665e22d",
"stringLength": 20638,
"metadata": {
"main": true
}
}
}
},
"changes": [
{
"operations": [
{
"pathname": "main.tex",
"textOperation": [
1912,
"Hello world",
18726
]
}
],
"timestamp": "2017-12-04T10:23:35.633Z",
"authors": [
31
]
},
{
"operations": [
{
"pathname": "bar.tex",
"newPathname": "foo.tex"
}
],
"timestamp": "2017-12-04T10:27:26.874Z",
"authors": [
31
]
},
{
"operations": [
{
"pathname": "foo.tex",
"textOperation": [
26,
"\n\nFour five six"
]
}
],
"timestamp": "2017-12-04T10:28:33.724Z",
"authors": [
31
]
}
]
},
"startVersion": 0
},
"authors": [
{
"id": 31,
"email": "james.allen@overleaf.com",
"name": "James"
}
]
}

View File

@@ -0,0 +1,74 @@
{
"chunk": {
"history": {
"snapshot": {
"files": {
"main.tex": {
"hash": "35c9bd86574d61dcadbce2fdd3d4a0684272c6ea",
"stringLength": 20649,
"metadata": {
"main": true
}
},
"foo.tex": {
"hash": "c6654ea913979e13e22022653d284444f284a172",
"stringLength": 41
}
}
},
"changes": [
{
"operations": [
{
"pathname": "foo.tex",
"textOperation": [
41,
"\n\nSeven eight nince"
]
}
],
"timestamp": "2017-12-04T10:29:17.786Z",
"authors": [
31
]
},
{
"operations": [
{
"pathname": "foo.tex",
"textOperation": [
58,
-1,
1
]
}
],
"timestamp": "2017-12-04T10:29:22.905Z",
"authors": [
31
]
},
{
"operations": [
{
"pathname": "foo.tex",
"newPathname": "bar.tex"
}
],
"timestamp": "2017-12-04T10:29:26.120Z",
"authors": [
31
]
}
]
},
"startVersion": 3
},
"authors": [
{
"id": 31,
"email": "james.allen@overleaf.com",
"name": "James"
}
]
}

View File

@@ -0,0 +1,63 @@
{
"chunk": {
"history": {
"snapshot": {
"files": {
"main.tex": {
"hash": "35c9bd86574d61dcadbce2fdd3d4a0684272c6ea",
"stringLength": 20649,
"metadata": {
"main": true
}
},
"bar.tex": {
"hash": "e13c315d53aaef3aa34550a86b09cff091ace220",
"stringLength": 59
}
}
},
"changes": [
{
"operations": [
{
"pathname": "main.tex",
"textOperation": [
1923,
" also updated",
18726
]
}
],
"timestamp": "2017-12-04T10:32:47.277Z",
"authors": [
31
]
},
{
"operations": [
{
"pathname": "bar.tex",
"textOperation": [
28,
-15,
16
]
}
],
"timestamp": "2017-12-04T10:32:52.877Z",
"v2Authors": [
"5a5637efdac84e81b71014c4"
]
}
]
},
"startVersion": 6
},
"authors": [
{
"id": 31,
"email": "james.allen@overleaf.com",
"name": "James"
}
]
}

View File

@@ -0,0 +1,83 @@
import { expect } from 'chai'
import nock from 'nock'
import mongodb from 'mongodb-legacy'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockWeb = () => nock('http://127.0.0.1:3000')
const fixture = path => new URL(`../fixtures/${path}`, import.meta.url)
describe('Deleting project', function () {
beforeEach(function (done) {
this.projectId = new ObjectId().toString()
this.historyId = new ObjectId().toString()
MockWeb()
.get(`/project/${this.projectId}/details`)
.reply(200, {
name: 'Test Project',
overleaf: { history: { id: this.historyId } },
})
MockHistoryStore()
.get(`/api/projects/${this.historyId}/latest/history`)
.replyWithFile(200, fixture('chunks/0-3.json'))
MockHistoryStore().delete(`/api/projects/${this.historyId}`).reply(204)
ProjectHistoryApp.ensureRunning(done)
})
describe('when the project has no pending updates', function (done) {
it('successfully deletes the project', function (done) {
ProjectHistoryClient.deleteProject(this.projectId, done)
})
})
describe('when the project has pending updates', function (done) {
beforeEach(function (done) {
ProjectHistoryClient.pushRawUpdate(
this.projectId,
{
pathname: '/main.tex',
docLines: 'hello',
doc: this.docId,
meta: { userId: this.userId, ts: new Date() },
},
err => {
if (err) {
return done(err)
}
ProjectHistoryClient.setFirstOpTimestamp(
this.projectId,
Date.now(),
err => {
if (err) {
return done(err)
}
ProjectHistoryClient.deleteProject(this.projectId, done)
}
)
}
)
})
it('clears pending updates', function (done) {
ProjectHistoryClient.getDump(this.projectId, (err, dump) => {
if (err) {
return done(err)
}
expect(dump.updates).to.deep.equal([])
done()
})
})
it('clears the first op timestamp', function (done) {
ProjectHistoryClient.getFirstOpTimestamp(this.projectId, (err, ts) => {
if (err) {
return done(err)
}
expect(ts).to.be.null
done()
})
})
})
})

View File

@@ -0,0 +1,415 @@
import { expect } from 'chai'
import request from 'request'
import crypto from 'node:crypto'
import mongodb from 'mongodb-legacy'
import nock from 'nock'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockWeb = () => nock('http://127.0.0.1:3000')
function createMockBlob(historyId, content) {
const sha = crypto.createHash('sha1').update(content).digest('hex')
MockHistoryStore()
.get(`/api/projects/${historyId}/blobs/${sha}`)
.reply(200, content)
.persist()
return sha
}
describe('Diffs', function () {
beforeEach(function (done) {
ProjectHistoryApp.ensureRunning(error => {
if (error) {
throw error
}
this.historyId = new ObjectId().toString()
this.projectId = new ObjectId().toString()
MockHistoryStore().post('/api/projects').reply(200, {
projectId: this.historyId,
})
MockWeb()
.get(`/project/${this.projectId}/details`)
.reply(200, {
name: 'Test Project',
overleaf: { history: { id: this.historyId } },
})
ProjectHistoryClient.initializeProject(this.historyId, error => {
if (error) {
return done(error)
}
done()
})
})
})
afterEach(function () {
nock.cleanAll()
})
it('should return a diff of the updates to a doc from a single chunk', function (done) {
this.blob = 'one two three five'
this.sha = createMockBlob(this.historyId, this.blob)
this.v2AuthorId = '123456789'
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/6/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'foo.tex': {
hash: this.sha,
stringLength: this.blob.length,
},
},
},
changes: [
{
operations: [
{
pathname: 'foo.tex',
textOperation: [13, ' four', 5],
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: 'foo.tex',
textOperation: [4, -4, 15],
},
],
timestamp: '2017-12-04T10:29:22.905Z',
authors: [31],
},
{
operations: [
{
pathname: 'foo.tex',
textOperation: [19, ' six'],
},
],
timestamp: '2017-12-04T10:29:26.120Z',
v2Authors: [this.v2AuthorId],
},
],
},
startVersion: 3,
},
authors: [31],
})
ProjectHistoryClient.getDiff(
this.projectId,
'foo.tex',
3,
6,
(error, diff) => {
if (error) {
throw error
}
expect(diff).to.deep.equal({
diff: [
{
u: 'one ',
},
{
d: 'two ',
meta: {
users: [31],
start_ts: 1512383362905,
end_ts: 1512383362905,
},
},
{
u: 'three',
},
{
i: ' four',
meta: {
users: [31],
start_ts: 1512383357786,
end_ts: 1512383357786,
},
},
{
u: ' five',
},
{
i: ' six',
meta: {
users: [this.v2AuthorId],
start_ts: 1512383366120,
end_ts: 1512383366120,
},
},
],
})
done()
}
)
})
it('should return a diff of the updates to a doc across multiple chunks', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/5/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'foo.tex': {
hash: createMockBlob(this.historyId, 'one two three five'),
stringLength: 'one three four five'.length,
},
},
},
changes: [
{
operations: [
{
pathname: 'foo.tex',
textOperation: [13, ' four', 5],
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: 'foo.tex',
textOperation: [4, -4, 15],
},
],
timestamp: '2017-12-04T10:29:22.905Z',
authors: [31],
},
],
},
startVersion: 3,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/6/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'foo.tex': {
hash: createMockBlob(this.historyId, 'one three four five'),
stringLength: 'one three four five'.length,
},
},
},
changes: [
{
operations: [
{
pathname: 'foo.tex',
textOperation: [19, ' six'],
},
],
timestamp: '2017-12-04T10:29:26.120Z',
authors: [31],
},
{
operations: [
{
pathname: 'foo.tex',
textOperation: [23, ' seven'],
},
],
timestamp: '2017-12-04T10:29:26.120Z',
authors: [31],
},
],
},
startVersion: 5,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
ProjectHistoryClient.getDiff(
this.projectId,
'foo.tex',
4,
6,
(error, diff) => {
if (error) {
throw error
}
expect(diff).to.deep.equal({
diff: [
{
u: 'one ',
},
{
d: 'two ',
meta: {
users: [31],
start_ts: 1512383362905,
end_ts: 1512383362905,
},
},
{
u: 'three four five',
},
{
i: ' six',
meta: {
users: [31],
start_ts: 1512383366120,
end_ts: 1512383366120,
},
},
],
})
done()
}
)
})
it('should return a 404 when there are no changes for the file in the range', function (done) {
this.blob = 'one two three five'
this.sha = createMockBlob(this.historyId, this.blob)
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/6/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'foo.tex': {
hash: this.sha,
stringLength: this.blob.length,
},
},
},
changes: [
{
operations: [
{
pathname: 'foo.tex',
textOperation: [13, ' four', 5],
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
],
},
startVersion: 3,
},
authors: [31],
})
request.get(
{
url: `http://127.0.0.1:3054/project/${this.projectId}/diff`,
qs: {
pathname: 'not_here.tex',
from: 3,
to: 6,
},
json: true,
},
(error, res, body) => {
if (error) {
throw error
}
expect(res.statusCode).to.equal(404)
done()
}
)
})
it('should return a binary flag with a diff of a binary file', function (done) {
this.blob = 'one two three five'
this.sha = createMockBlob(this.historyId, this.blob)
this.binaryBlob = Buffer.from([1, 2, 3, 4])
this.binarySha = createMockBlob(this.historyId, this.binaryBlob)
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/6/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'binary.tex': {
hash: this.binarySha,
byteLength: this.binaryBlob.length, // Indicates binary
},
'foo.tex': {
hash: this.sha,
stringLength: this.blob.length, // Indicates binary
},
},
},
changes: [
{
operations: [
{
pathname: 'foo.tex',
textOperation: [13, ' four', 5],
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: 'foo.tex',
textOperation: [4, -4, 15],
},
],
timestamp: '2017-12-04T10:29:22.905Z',
authors: [31],
},
{
operations: [
{
pathname: 'foo.tex',
textOperation: [19, ' six'],
},
],
timestamp: '2017-12-04T10:29:26.120Z',
authors: [31],
},
],
},
startVersion: 3,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
ProjectHistoryClient.getDiff(
this.projectId,
'binary.tex',
3,
6,
(error, diff) => {
if (error) {
throw error
}
expect(diff).to.deep.equal({
diff: {
binary: true,
},
})
done()
}
)
})
})

View File

@@ -0,0 +1,73 @@
/* eslint-disable
no-undef,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import async from 'async'
import sinon from 'sinon'
import { expect } from 'chai'
import Settings from '@overleaf/settings'
import assert from 'node:assert'
import mongodb from 'mongodb-legacy'
import nock from 'nock'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockWeb = () => nock('http://127.0.0.1:3000')
describe('DiscardingUpdates', function () {
beforeEach(function (done) {
this.timestamp = new Date()
return ProjectHistoryApp.ensureRunning(error => {
if (error != null) {
throw error
}
this.user_id = new ObjectId().toString()
this.project_id = new ObjectId().toString()
this.doc_id = new ObjectId().toString()
MockHistoryStore().post('/api/projects').reply(200, {
projectId: 0,
})
MockWeb()
.get(`/project/${this.project_id}/details`)
.reply(200, { name: 'Test Project' })
return ProjectHistoryClient.initializeProject(this.project_id, done)
})
})
return it('should discard updates', function (done) {
return async.series(
[
cb => {
const update = {
pathname: '/main.tex',
docLines: 'a\nb',
doc: this.doc_id,
meta: { user_id: this.user_id, ts: new Date() },
}
return ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb)
},
cb => {
return ProjectHistoryClient.flushProject(this.project_id, cb)
},
],
error => {
if (error != null) {
throw error
}
return done()
}
)
})
})

View File

@@ -0,0 +1,880 @@
/* eslint-disable
no-undef,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import sinon from 'sinon'
import { expect } from 'chai'
import Settings from '@overleaf/settings'
import request from 'request'
import assert from 'node:assert'
import Path from 'node:path'
import crypto from 'node:crypto'
import mongodb from 'mongodb-legacy'
import nock from 'nock'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
import * as HistoryId from './helpers/HistoryId.js'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockFileStore = () => nock('http://127.0.0.1:3009')
const MockWeb = () => nock('http://127.0.0.1:3000')
const sha = data => crypto.createHash('sha1').update(data).digest('hex')
describe('FileTree Diffs', function () {
beforeEach(function (done) {
return ProjectHistoryApp.ensureRunning(error => {
if (error != null) {
throw error
}
this.historyId = new ObjectId().toString()
this.projectId = new ObjectId().toString()
MockHistoryStore().post('/api/projects').reply(200, {
projectId: this.historyId,
})
MockWeb()
.get(`/project/${this.projectId}/details`)
.reply(200, {
name: 'Test Project',
overleaf: { history: { id: this.historyId } },
})
return ProjectHistoryClient.initializeProject(
this.historyId,
(error, olProject) => {
if (error != null) {
throw error
}
return done()
}
)
})
})
afterEach(function () {
return nock.cleanAll()
})
it('should return a diff of the updates to a doc from a single chunk', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/7/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'foo.tex': {
hash: sha('mock-sha-foo'),
stringLength: 42,
},
'renamed.tex': {
hash: sha('mock-sha-renamed'),
stringLength: 42,
},
'deleted.tex': {
hash: sha('mock-sha-deleted'),
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'renamed.tex',
newPathname: 'newName.tex',
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: 'foo.tex',
textOperation: ['lorem ipsum'],
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: 'deleted.tex',
newPathname: '',
},
],
timestamp: '2017-12-04T10:29:22.905Z',
authors: [31],
},
{
operations: [
{
file: {
hash: sha('new-sha'),
stringLength: 42,
},
pathname: 'added.tex',
},
],
timestamp: '2017-12-04T10:29:22.905Z',
authors: [31],
},
],
},
startVersion: 3,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
return ProjectHistoryClient.getFileTreeDiff(
this.projectId,
3,
7,
(error, diff) => {
if (error != null) {
throw error
}
expect(diff).to.deep.equal({
diff: [
{
pathname: 'foo.tex',
operation: 'edited',
},
{
pathname: 'deleted.tex',
operation: 'removed',
deletedAtV: 5,
editable: true,
},
{
newPathname: 'newName.tex',
pathname: 'renamed.tex',
operation: 'renamed',
editable: true,
},
{
pathname: 'added.tex',
operation: 'added',
editable: true,
},
],
})
return done()
}
)
})
it('should return a diff of the updates to a doc across multiple chunks', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/5/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'foo.tex': {
// Updated in this chunk
hash: sha('mock-sha-foo'),
stringLength: 42,
},
'bar.tex': {
// Updated in the next chunk
hash: sha('mock-sha-bar'),
stringLength: 42,
},
'baz.tex': {
// Not updated
hash: sha('mock-sha-bar'),
stringLength: 42,
},
'renamed.tex': {
hash: sha('mock-sha-renamed'),
stringLength: 42,
},
'deleted.tex': {
hash: sha('mock-sha-deleted'),
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'renamed.tex',
newPathname: 'newName.tex',
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: 'foo.tex',
textOperation: ['lorem ipsum'],
},
],
timestamp: '2017-12-04T10:29:19.786Z',
authors: [31],
},
{
operations: [
{
pathname: 'deleted.tex',
newPathname: '',
},
],
timestamp: '2017-12-04T10:29:22.905Z',
authors: [31],
},
],
},
startVersion: 2,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/7/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'foo.tex': {
hash: sha('mock-sha-foo'),
stringLength: 42,
},
'baz.tex': {
hash: sha('mock-sha-bar'),
stringLength: 42,
},
'newName.tex': {
hash: sha('mock-sha-renamed'),
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
file: {
hash: sha('new-sha'),
stringLength: 42,
},
pathname: 'added.tex',
},
],
timestamp: '2017-12-04T10:29:22.905Z',
authors: [31],
},
{
operations: [
{
pathname: 'bar.tex',
textOperation: ['lorem ipsum'],
},
],
timestamp: '2017-12-04T10:29:23.786Z',
authors: [31],
},
],
},
startVersion: 5,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
return ProjectHistoryClient.getFileTreeDiff(
this.projectId,
2,
7,
(error, diff) => {
if (error != null) {
throw error
}
expect(diff).to.deep.equal({
diff: [
{
pathname: 'foo.tex',
operation: 'edited',
},
{
pathname: 'bar.tex',
operation: 'edited',
},
{
pathname: 'baz.tex',
editable: true,
},
{
pathname: 'deleted.tex',
operation: 'removed',
deletedAtV: 4,
editable: true,
},
{
newPathname: 'newName.tex',
pathname: 'renamed.tex',
operation: 'renamed',
editable: true,
},
{
pathname: 'added.tex',
operation: 'added',
editable: true,
},
],
})
return done()
}
)
})
it('should return a diff that includes multiple renames', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/5/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'one.tex': {
hash: sha('mock-sha'),
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'one.tex',
newPathname: 'two.tex',
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: 'two.tex',
newPathname: 'three.tex',
},
],
timestamp: '2017-12-04T10:29:22.905Z',
authors: [31],
},
],
},
startVersion: 3,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
return ProjectHistoryClient.getFileTreeDiff(
this.projectId,
3,
5,
(error, diff) => {
if (error != null) {
throw error
}
expect(diff).to.deep.equal({
diff: [
{
newPathname: 'three.tex',
pathname: 'one.tex',
operation: 'renamed',
editable: true,
},
],
})
return done()
}
)
})
it('should handle deleting then re-adding a file', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/5/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'one.tex': {
hash: sha('mock-sha'),
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'one.tex',
newPathname: '',
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: 'one.tex',
file: {
hash: sha('mock-sha'),
},
},
],
timestamp: '2017-12-04T10:29:22.905Z',
authors: [31],
},
],
},
startVersion: 3,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
return ProjectHistoryClient.getFileTreeDiff(
this.projectId,
3,
5,
(error, diff) => {
if (error != null) {
throw error
}
expect(diff).to.deep.equal({
diff: [
{
pathname: 'one.tex',
operation: 'added',
editable: null,
},
],
})
return done()
}
)
})
it('should handle deleting the renaming a file to the same place', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/5/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'one.tex': {
hash: sha('mock-sha-one'),
stringLength: 42,
},
'two.tex': {
hash: sha('mock-sha-two'),
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'one.tex',
newPathname: '',
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: 'two.tex',
newPathname: 'one.tex',
},
],
timestamp: '2017-12-04T10:29:22.905Z',
authors: [31],
},
],
},
startVersion: 3,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
return ProjectHistoryClient.getFileTreeDiff(
this.projectId,
3,
5,
(error, diff) => {
if (error != null) {
throw error
}
expect(diff).to.deep.equal({
diff: [
{
pathname: 'two.tex',
newPathname: 'one.tex',
operation: 'renamed',
editable: true,
},
],
})
return done()
}
)
})
it('should handle adding then renaming a file', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/5/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {},
},
changes: [
{
operations: [
{
pathname: 'one.tex',
file: {
hash: sha('mock-sha'),
stringLength: 42,
},
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: 'one.tex',
newPathname: 'two.tex',
},
],
timestamp: '2017-12-04T10:29:22.905Z',
authors: [31],
},
],
},
startVersion: 3,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
return ProjectHistoryClient.getFileTreeDiff(
this.projectId,
3,
5,
(error, diff) => {
if (error != null) {
throw error
}
expect(diff).to.deep.equal({
diff: [
{
pathname: 'two.tex',
operation: 'added',
editable: true,
},
],
})
return done()
}
)
})
it('should return 422 with a chunk with an invalid rename', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/6/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'foo.tex': {
hash: sha('mock-sha-foo'),
stringLength: 42,
},
'bar.tex': {
hash: sha('mock-sha-bar'),
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
pathname: 'foo.tex',
newPathname: 'bar.tex',
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
],
},
startVersion: 5,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
return ProjectHistoryClient.getFileTreeDiff(
this.projectId,
5,
6,
(error, diff, statusCode) => {
if (error != null) {
throw error
}
expect(statusCode).to.equal(422)
return done()
}
)
})
it('should return 200 with a chunk with an invalid add', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/6/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
'foo.tex': {
hash: sha('mock-sha-foo'),
stringLength: 42,
},
},
},
changes: [
{
operations: [
{
file: {
hash: sha('new-sha'),
},
pathname: 'foo.tex',
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
],
},
startVersion: 5,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
return ProjectHistoryClient.getFileTreeDiff(
this.projectId,
5,
6,
(error, diff, statusCode) => {
if (error != null) {
throw error
}
expect(diff).to.deep.equal({
diff: [
{
pathname: 'foo.tex',
operation: 'added',
editable: null,
},
],
})
expect(statusCode).to.equal(200)
return done()
}
)
})
it('should handle edits of missing/invalid files ', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/5/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {},
},
changes: [
{
operations: [
{
pathname: 'new.tex',
textOperation: ['lorem ipsum'],
},
],
timestamp: '2017-12-04T10:29:18.786Z',
authors: [31],
},
{
operations: [
{
pathname: '',
textOperation: ['lorem ipsum'],
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
],
},
startVersion: 3,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
return ProjectHistoryClient.getFileTreeDiff(
this.projectId,
3,
5,
(error, diff) => {
if (error != null) {
throw error
}
expect(diff).to.deep.equal({
diff: [
{
operation: 'edited',
pathname: 'new.tex',
},
],
})
return done()
}
)
})
it('should handle deletions of missing/invalid files ', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/5/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {},
},
changes: [
{
operations: [
{
pathname: 'missing.tex',
newPathname: '',
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: '',
newPathname: '',
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
],
},
startVersion: 3,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
return ProjectHistoryClient.getFileTreeDiff(
this.projectId,
3,
5,
(error, diff) => {
if (error != null) {
throw error
}
expect(diff).to.deep.equal({
diff: [],
})
return done()
}
)
})
return it('should handle renames of missing/invalid files ', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/5/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {},
},
changes: [
{
operations: [
{
pathname: 'missing.tex',
newPathname: 'missing-renamed.tex',
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
{
operations: [
{
pathname: '',
newPathname: 'missing-renamed-other.tex',
},
],
timestamp: '2017-12-04T10:29:17.786Z',
authors: [31],
},
],
},
startVersion: 3,
},
authors: [{ id: 31, email: 'james.allen@overleaf.com', name: 'James' }],
})
return ProjectHistoryClient.getFileTreeDiff(
this.projectId,
3,
5,
(error, diff) => {
if (error != null) {
throw error
}
expect(diff).to.deep.equal({
diff: [],
})
return done()
}
)
})
})

View File

@@ -0,0 +1,242 @@
import async from 'async'
import nock from 'nock'
import { expect } from 'chai'
import request from 'request'
import assert from 'node:assert'
import mongodb from 'mongodb-legacy'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockWeb = () => nock('http://127.0.0.1:3000')
describe('Flushing old queues', function () {
const historyId = new ObjectId().toString()
beforeEach(function (done) {
this.timestamp = new Date()
ProjectHistoryApp.ensureRunning(error => {
if (error) {
throw error
}
this.projectId = new ObjectId().toString()
this.docId = new ObjectId().toString()
this.fileId = new ObjectId().toString()
MockHistoryStore().post('/api/projects').reply(200, {
projectId: historyId,
})
MockWeb()
.get(`/project/${this.projectId}/details`)
.reply(200, {
name: 'Test Project',
overleaf: {
history: {
id: historyId,
},
},
})
MockHistoryStore()
.get(`/api/projects/${historyId}/latest/history`)
.reply(200, {
chunk: {
startVersion: 0,
history: {
changes: [],
},
},
})
ProjectHistoryClient.initializeProject(historyId, done)
})
})
afterEach(function () {
nock.cleanAll()
})
describe('retrying an unflushed project', function () {
describe('when the update is older than the cutoff', function () {
beforeEach(function (done) {
this.flushCall = MockHistoryStore()
.put(
`/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
)
.reply(201)
.post(`/api/projects/${historyId}/legacy_changes?end_version=0`)
.reply(200)
const update = {
pathname: '/main.tex',
docLines: 'a\nb',
doc: this.docId,
meta: { user_id: this.user_id, ts: new Date() },
}
async.series(
[
cb =>
ProjectHistoryClient.pushRawUpdate(this.projectId, update, cb),
cb =>
ProjectHistoryClient.setFirstOpTimestamp(
this.projectId,
Date.now() - 24 * 3600 * 1000,
cb
),
],
done
)
})
it('flushes the project history queue', function (done) {
request.post(
{
url: 'http://127.0.0.1:3054/flush/old?maxAge=10800',
},
(error, res, body) => {
if (error) {
return done(error)
}
expect(res.statusCode).to.equal(200)
assert(
this.flushCall.isDone(),
'made calls to history service to store updates'
)
done()
}
)
})
it('flushes the project history queue in the background when requested', function (done) {
request.post(
{
url: 'http://127.0.0.1:3054/flush/old?maxAge=10800&background=1',
},
(error, res, body) => {
if (error) {
return done(error)
}
expect(res.statusCode).to.equal(200)
expect(body).to.equal('{"message":"running flush in background"}')
assert(
!this.flushCall.isDone(),
'did not make calls to history service to store updates in the foreground'
)
setTimeout(() => {
assert(
this.flushCall.isDone(),
'made calls to history service to store updates in the background'
)
done()
}, 100)
}
)
})
})
describe('when the update is newer than the cutoff', function () {
beforeEach(function (done) {
this.flushCall = MockHistoryStore()
.put(
`/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
)
.reply(201)
.post(`/api/projects/${historyId}/legacy_changes?end_version=0`)
.reply(200)
const update = {
pathname: '/main.tex',
docLines: 'a\nb',
doc: this.docId,
meta: { user_id: this.user_id, ts: new Date() },
}
async.series(
[
cb =>
ProjectHistoryClient.pushRawUpdate(this.projectId, update, cb),
cb =>
ProjectHistoryClient.setFirstOpTimestamp(
this.projectId,
Date.now() - 60 * 1000,
cb
),
],
done
)
})
it('does not flush the project history queue', function (done) {
request.post(
{
url: `http://127.0.0.1:3054/flush/old?maxAge=${3 * 3600}`,
},
(error, res, body) => {
if (error) {
return done(error)
}
expect(res.statusCode).to.equal(200)
assert(
!this.flushCall.isDone(),
'did not make calls to history service to store updates'
)
done()
}
)
})
})
describe('when the update does not have a timestamp', function () {
beforeEach(function (done) {
this.flushCall = MockHistoryStore()
.put(
`/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
)
.reply(201)
.post(`/api/projects/${historyId}/legacy_changes?end_version=0`)
.reply(200)
const update = {
pathname: '/main.tex',
docLines: 'a\nb',
doc: this.docId,
meta: { user_id: this.user_id, ts: new Date() },
}
this.startDate = Date.now()
async.series(
[
cb =>
ProjectHistoryClient.pushRawUpdate(this.projectId, update, cb),
cb =>
ProjectHistoryClient.clearFirstOpTimestamp(this.projectId, cb),
],
done
)
})
it('flushes the project history queue anyway', function (done) {
request.post(
{
url: `http://127.0.0.1:3054/flush/old?maxAge=${3 * 3600}`,
},
(error, res, body) => {
if (error) {
return done(error)
}
expect(res.statusCode).to.equal(200)
assert(
this.flushCall.isDone(),
'made calls to history service to store updates'
)
ProjectHistoryClient.getFirstOpTimestamp(
this.projectId,
(err, result) => {
if (err) {
return done(err)
}
expect(result).to.be.null
done()
}
)
}
)
})
})
})
})

View File

@@ -0,0 +1,158 @@
import { expect } from 'chai'
import mongodb from 'mongodb-legacy'
import nock from 'nock'
import Core from 'overleaf-editor-core'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
import latestChunk from '../fixtures/chunks/7-8.json' with { type: 'json' }
import previousChunk from '../fixtures/chunks/4-6.json' with { type: 'json' }
import firstChunk from '../fixtures/chunks/0-3.json' with { type: 'json' }
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockWeb = () => nock('http://127.0.0.1:3000')
const fixture = path => new URL(`../fixtures/${path}`, import.meta.url)
describe('GetChangesInChunkSince', function () {
let projectId, historyId
beforeEach(function (done) {
projectId = new ObjectId().toString()
historyId = new ObjectId().toString()
ProjectHistoryApp.ensureRunning(error => {
if (error) throw error
MockHistoryStore().post('/api/projects').reply(200, {
projectId: historyId,
})
ProjectHistoryClient.initializeProject(historyId, (error, olProject) => {
if (error) throw error
MockWeb()
.get(`/project/${projectId}/details`)
.reply(200, {
name: 'Test Project',
overleaf: { history: { id: olProject.id } },
})
MockHistoryStore()
.get(`/api/projects/${historyId}/latest/history`)
.replyWithFile(200, fixture('chunks/7-8.json'))
MockHistoryStore()
.get(`/api/projects/${historyId}/versions/7/history`)
.replyWithFile(200, fixture('chunks/7-8.json'))
MockHistoryStore()
.get(`/api/projects/${historyId}/versions/6/history`)
.replyWithFile(200, fixture('chunks/7-8.json'))
MockHistoryStore()
.get(`/api/projects/${historyId}/versions/5/history`)
.replyWithFile(200, fixture('chunks/4-6.json'))
MockHistoryStore()
.get(`/api/projects/${historyId}/versions/4/history`)
.replyWithFile(200, fixture('chunks/4-6.json'))
MockHistoryStore()
.get(`/api/projects/${historyId}/versions/3/history`)
.replyWithFile(200, fixture('chunks/4-6.json'))
MockHistoryStore()
.get(`/api/projects/${historyId}/versions/2/history`)
.replyWithFile(200, fixture('chunks/0-3.json'))
MockHistoryStore()
.get(`/api/projects/${historyId}/versions/1/history`)
.replyWithFile(200, fixture('chunks/0-3.json'))
MockHistoryStore()
.get(`/api/projects/${historyId}/versions/0/history`)
.replyWithFile(200, fixture('chunks/0-3.json'))
done()
})
})
})
afterEach(function () {
nock.cleanAll()
})
function expectChangesSince(version, n, changes, done) {
ProjectHistoryClient.getChangesInChunkSince(
projectId,
version,
{},
(error, got) => {
if (error) throw error
expect(got.latestStartVersion).to.equal(6)
expect(got.changes).to.have.length(n)
expect(got.changes.map(c => Core.Change.fromRaw(c))).to.deep.equal(
changes.map(c => Core.Change.fromRaw(c))
)
done()
}
)
}
const cases = {
8: {
name: 'when up-to-date, return zero changes',
n: 0,
changes: [],
},
7: {
name: 'when one version behind, return one change',
n: 1,
changes: latestChunk.chunk.history.changes.slice(1),
},
6: {
name: 'when at current chunk boundary, return latest chunk in full',
n: 2,
changes: latestChunk.chunk.history.changes,
},
5: {
name: 'when one version behind last chunk, return one change',
n: 1,
changes: previousChunk.chunk.history.changes.slice(2),
},
4: {
name: 'when in last chunk, return two changes',
n: 2,
changes: previousChunk.chunk.history.changes.slice(1),
},
3: {
name: 'when at previous chunk boundary, return just the previous chunk',
n: 3,
changes: previousChunk.chunk.history.changes,
},
2: {
name: 'when at end of first chunk, return one change',
n: 1,
changes: firstChunk.chunk.history.changes.slice(2),
},
1: {
name: 'when in first chunk, return two changes',
n: 2,
changes: firstChunk.chunk.history.changes.slice(1),
},
0: {
name: 'when from zero, return just the first chunk',
n: 3,
changes: firstChunk.chunk.history.changes,
},
}
for (const [since, { name, n, changes }] of Object.entries(cases)) {
it(name, function (done) {
expectChangesSince(since, n, changes, done)
})
}
it('should return an error when past the end version', function (done) {
ProjectHistoryClient.getChangesInChunkSince(
projectId,
9,
{ allowErrors: true },
(error, _body, statusCode) => {
if (error) throw error
expect(statusCode).to.equal(400)
done()
}
)
})
})

View File

@@ -0,0 +1,76 @@
/* eslint-disable
no-undef,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import { expect } from 'chai'
import settings from '@overleaf/settings'
import request from 'request'
import mongodb from 'mongodb-legacy'
import nock from 'nock'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockWeb = () => nock('http://127.0.0.1:3000')
describe('Health Check', function () {
beforeEach(function (done) {
const projectId = new ObjectId()
const historyId = new ObjectId().toString()
settings.history.healthCheck = { project_id: projectId }
return ProjectHistoryApp.ensureRunning(error => {
if (error != null) {
throw error
}
MockHistoryStore().post('/api/projects').reply(200, {
projectId: historyId,
})
MockHistoryStore()
.get(`/api/projects/${historyId}/latest/history`)
.reply(200, {
chunk: {
startVersion: 0,
history: {
snapshot: {},
changes: [],
},
},
})
MockWeb()
.get(`/project/${projectId}/details`)
.reply(200, {
name: 'Test Project',
overleaf: {
history: {
id: historyId,
},
},
})
return ProjectHistoryClient.initializeProject(historyId, done)
})
})
return it('should respond to the health check', function (done) {
return request.get(
{
url: 'http://127.0.0.1:3054/health_check',
},
(error, res, body) => {
if (error != null) {
return callback(error)
}
expect(res.statusCode).to.equal(200)
return done()
}
)
})
})

View File

@@ -0,0 +1,282 @@
import { expect } from 'chai'
import mongodb from 'mongodb-legacy'
import nock from 'nock'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockWeb = () => nock('http://127.0.0.1:3000')
const fixture = path => new URL(`../fixtures/${path}`, import.meta.url)
describe('Labels', function () {
beforeEach(function (done) {
ProjectHistoryApp.ensureRunning(error => {
if (error != null) {
throw error
}
this.historyId = new ObjectId().toString()
MockHistoryStore().post('/api/projects').reply(200, {
projectId: this.historyId,
})
ProjectHistoryClient.initializeProject(
this.historyId,
(error, olProject) => {
if (error != null) {
throw error
}
this.project_id = new ObjectId().toString()
MockWeb()
.get(`/project/${this.project_id}/details`)
.reply(200, {
name: 'Test Project',
overleaf: { history: { id: olProject.id } },
})
MockHistoryStore()
.get(`/api/projects/${this.historyId}/latest/history`)
.replyWithFile(200, fixture('chunks/7-8.json'))
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/7/history`)
.replyWithFile(200, fixture('chunks/7-8.json'))
.persist()
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/8/history`)
.replyWithFile(200, fixture('chunks/7-8.json'))
.persist()
this.comment = 'a saved version comment'
this.comment2 = 'another saved version comment'
this.user_id = new ObjectId().toString()
this.created_at = new Date(1)
done()
}
)
})
})
afterEach(function () {
nock.cleanAll()
})
it('can create and get labels', function (done) {
ProjectHistoryClient.createLabel(
this.project_id,
this.user_id,
7,
this.comment,
this.created_at,
(error, label) => {
if (error != null) {
throw error
}
ProjectHistoryClient.getLabels(this.project_id, (error, labels) => {
if (error != null) {
throw error
}
expect(labels).to.deep.equal([label])
done()
})
}
)
})
it('can create and get labels with no user id', function (done) {
const userId = undefined
ProjectHistoryClient.createLabel(
this.project_id,
userId,
7,
this.comment,
this.created_at,
(error, label) => {
if (error != null) {
throw error
}
ProjectHistoryClient.getLabels(this.project_id, (error, labels) => {
if (error != null) {
throw error
}
expect(labels).to.deep.equal([label])
done()
})
}
)
})
it('can delete labels', function (done) {
ProjectHistoryClient.createLabel(
this.project_id,
this.user_id,
7,
this.comment,
this.created_at,
(error, label) => {
if (error != null) {
throw error
}
ProjectHistoryClient.deleteLabel(this.project_id, label.id, error => {
if (error != null) {
throw error
}
ProjectHistoryClient.getLabels(this.project_id, (error, labels) => {
if (error != null) {
throw error
}
expect(labels).to.deep.equal([])
done()
})
})
}
)
})
it('can delete labels for the current user', function (done) {
ProjectHistoryClient.createLabel(
this.project_id,
this.user_id,
7,
this.comment,
this.created_at,
(error, label) => {
if (error != null) {
throw error
}
ProjectHistoryClient.deleteLabelForUser(
this.project_id,
this.user_id,
label.id,
error => {
if (error != null) {
throw error
}
ProjectHistoryClient.getLabels(this.project_id, (error, labels) => {
if (error != null) {
throw error
}
expect(labels).to.deep.equal([])
done()
})
}
)
}
)
})
it('can transfer ownership of labels', function (done) {
const fromUser = new ObjectId().toString()
const toUser = new ObjectId().toString()
ProjectHistoryClient.createLabel(
this.project_id,
fromUser,
7,
this.comment,
this.created_at,
(error, label) => {
if (error != null) {
throw error
}
ProjectHistoryClient.createLabel(
this.project_id,
fromUser,
7,
this.comment2,
this.created_at,
(error, label2) => {
if (error != null) {
throw error
}
ProjectHistoryClient.transferLabelOwnership(
fromUser,
toUser,
error => {
if (error != null) {
throw error
}
ProjectHistoryClient.getLabels(
this.project_id,
(error, labels) => {
if (error != null) {
throw error
}
expect(labels).to.deep.equal([
{
id: label.id,
comment: label.comment,
version: label.version,
created_at: label.created_at,
user_id: toUser,
},
{
id: label2.id,
comment: label2.comment,
version: label2.version,
created_at: label2.created_at,
user_id: toUser,
},
])
done()
}
)
}
)
}
)
}
)
})
it('should return labels with summarized updates', function (done) {
ProjectHistoryClient.createLabel(
this.project_id,
this.user_id,
8,
this.comment,
this.created_at,
(error, label) => {
if (error != null) {
throw error
}
ProjectHistoryClient.getSummarizedUpdates(
this.project_id,
{ min_count: 1 },
(error, updates) => {
if (error != null) {
throw error
}
expect(updates).to.deep.equal({
nextBeforeTimestamp: 6,
updates: [
{
fromV: 6,
toV: 8,
meta: {
users: ['5a5637efdac84e81b71014c4', 31],
start_ts: 1512383567277,
end_ts: 1512383572877,
},
pathnames: ['bar.tex', 'main.tex'],
project_ops: [],
labels: [
{
id: label.id.toString(),
comment: this.comment,
version: 8,
user_id: this.user_id,
created_at: this.created_at.toISOString(),
},
],
},
],
})
done()
}
)
}
)
})
})

View File

@@ -0,0 +1,78 @@
import { expect } from 'chai'
import mongodb from 'mongodb-legacy'
import nock from 'nock'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockWeb = () => nock('http://127.0.0.1:3000')
const fixture = path => new URL(`../fixtures/${path}`, import.meta.url)
describe('LatestSnapshot', function () {
beforeEach(function (done) {
ProjectHistoryApp.ensureRunning(error => {
if (error) {
throw error
}
this.historyId = new ObjectId().toString()
MockHistoryStore().post('/api/projects').reply(200, {
projectId: this.historyId,
})
ProjectHistoryClient.initializeProject(
this.historyId,
(error, v1Project) => {
if (error) {
throw error
}
this.projectId = new ObjectId().toString()
MockWeb()
.get(`/project/${this.projectId}/details`)
.reply(200, {
name: 'Test Project',
overleaf: { history: { id: v1Project.id } },
})
done()
}
)
})
})
afterEach(function () {
nock.cleanAll()
})
it('should return the snapshot with applied changes, metadata and without full content', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/latest/history`)
.replyWithFile(200, fixture('chunks/0-3.json'))
ProjectHistoryClient.getLatestSnapshot(this.projectId, (error, body) => {
if (error) {
throw error
}
expect(body).to.deep.equal({
snapshot: {
files: {
'main.tex': {
hash: 'f28571f561d198b87c24cc6a98b78e87b665e22d',
stringLength: 20649,
operations: [{ textOperation: [1912, 'Hello world', 18726] }],
metadata: { main: true },
},
'foo.tex': {
hash: '4f785a4c192155b240e3042b3a7388b47603f423',
stringLength: 41,
operations: [{ textOperation: [26, '\n\nFour five six'] }],
},
},
},
version: 3,
})
done()
})
})
})

View File

@@ -0,0 +1,298 @@
import { expect } from 'chai'
import mongodb from 'mongodb-legacy'
import nock from 'nock'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockWeb = () => nock('http://127.0.0.1:3000')
const fixture = path => new URL(`../fixtures/${path}`, import.meta.url)
describe('ReadSnapshot', function () {
beforeEach(function (done) {
ProjectHistoryApp.ensureRunning(error => {
if (error) {
throw error
}
this.historyId = new ObjectId().toString()
MockHistoryStore().post('/api/projects').reply(200, {
projectId: this.historyId,
})
ProjectHistoryClient.initializeProject(
this.historyId,
(error, v1Project) => {
if (error) {
throw error
}
this.projectId = new ObjectId().toString()
MockWeb()
.get(`/project/${this.projectId}/details`)
.reply(200, {
name: 'Test Project',
overleaf: { history: { id: v1Project.id } },
})
done()
}
)
})
})
afterEach(function () {
nock.cleanAll()
})
describe('of a text file', function () {
it('should return the snapshot of a doc at the given version', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/5/history`)
.replyWithFile(200, fixture('chunks/4-6.json'))
MockHistoryStore()
.get(
`/api/projects/${this.historyId}/blobs/c6654ea913979e13e22022653d284444f284a172`
)
.replyWithFile(
200,
fixture('blobs/c6654ea913979e13e22022653d284444f284a172')
)
ProjectHistoryClient.getSnapshot(
this.projectId,
'foo.tex',
5,
(error, body) => {
if (error) {
throw error
}
expect(body).to.deep.equal(
`\
Hello world
One two three
Four five six
Seven eight nine\
`.replace(/^\t/g, '')
)
done()
}
)
})
it('should return the snapshot of a doc at a different version', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/4/history`)
.replyWithFile(200, fixture('chunks/4-6.json'))
MockHistoryStore()
.get(
`/api/projects/${this.historyId}/blobs/c6654ea913979e13e22022653d284444f284a172`
)
.replyWithFile(
200,
fixture('blobs/c6654ea913979e13e22022653d284444f284a172')
)
ProjectHistoryClient.getSnapshot(
this.projectId,
'foo.tex',
4,
(error, body) => {
if (error) {
throw error
}
expect(body).to.deep.equal(
`\
Hello world
One two three
Four five six
Seven eight nince\
`.replace(/^\t/g, '')
)
done()
}
)
})
it('should return the snapshot of a doc after a rename version', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/6/history`)
.replyWithFile(200, fixture('chunks/4-6.json'))
MockHistoryStore()
.get(
`/api/projects/${this.historyId}/blobs/c6654ea913979e13e22022653d284444f284a172`
)
.replyWithFile(
200,
fixture('blobs/c6654ea913979e13e22022653d284444f284a172')
)
ProjectHistoryClient.getSnapshot(
this.projectId,
'bar.tex',
6,
(error, body) => {
if (error) {
throw error
}
expect(body).to.deep.equal(
`\
Hello world
One two three
Four five six
Seven eight nine\
`.replace(/^\t/g, '')
)
done()
}
)
})
})
describe('of a binary file', function () {
beforeEach(function () {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/4/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
binary_file: {
hash: 'c6654ea913979e13e22022653d284444f284a172',
byteLength: 41,
},
},
},
changes: [],
},
startVersion: 3,
},
authors: [],
})
})
it('should return the snapshot of the file at the given version', function (done) {
MockHistoryStore()
.get(
`/api/projects/${this.historyId}/blobs/c6654ea913979e13e22022653d284444f284a172`
)
.replyWithFile(
200,
fixture('blobs/c6654ea913979e13e22022653d284444f284a172')
)
ProjectHistoryClient.getSnapshot(
this.projectId,
'binary_file',
4,
(error, body) => {
if (error) {
throw error
}
expect(body).to.deep.equal(
`\
Hello world
One two three
Four five six\
`.replace(/^\t/g, '')
)
done()
}
)
})
it("should return an error when the blob doesn't exist", function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/4/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
binary_file: {
hash: 'c6654ea913979e13e22022653d284444f284a172',
byteLength: 41,
},
},
},
changes: [],
},
startVersion: 3,
},
authors: [],
})
MockHistoryStore()
.get(
`/api/projects/${this.historyId}/blobs/c6654ea913979e13e22022653d284444f284a172`
)
.reply(404)
ProjectHistoryClient.getSnapshot(
this.projectId,
'binary_file',
4,
{ allowErrors: true },
(error, body, statusCode) => {
if (error) {
throw error
}
expect(statusCode).to.equal(500)
done()
}
)
})
it('should return an error when the blob request errors', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/4/history`)
.reply(200, {
chunk: {
history: {
snapshot: {
files: {
binary_file: {
hash: 'c6654ea913979e13e22022653d284444f284a172',
byteLength: 41,
},
},
},
changes: [],
},
startVersion: 3,
},
authors: [],
})
MockHistoryStore()
.get(
`/api/projects/${this.historyId}/blobs/c6654ea913979e13e22022653d284444f284a172`
)
.replyWithError('oh no!')
ProjectHistoryClient.getSnapshot(
this.projectId,
'binary_file',
4,
{ allowErrors: true },
(error, body, statusCode) => {
if (error) {
throw error
}
expect(statusCode).to.equal(500)
done()
}
)
})
})
})

View File

@@ -0,0 +1,194 @@
import async from 'async'
import nock from 'nock'
import { expect } from 'chai'
import request from 'request'
import assert from 'node:assert'
import mongodb from 'mongodb-legacy'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockWeb = () => nock('http://127.0.0.1:3000')
const MockCallback = () => nock('http://127.0.0.1')
describe('Retrying failed projects', function () {
const historyId = new ObjectId().toString()
beforeEach(function (done) {
this.timestamp = new Date()
ProjectHistoryApp.ensureRunning(error => {
if (error) {
throw error
}
this.project_id = new ObjectId().toString()
this.doc_id = new ObjectId().toString()
this.file_id = new ObjectId().toString()
MockHistoryStore().post('/api/projects').reply(200, {
projectId: historyId,
})
MockWeb()
.get(`/project/${this.project_id}/details`)
.reply(200, {
name: 'Test Project',
overleaf: {
history: {
id: historyId,
},
},
})
MockHistoryStore()
.get(`/api/projects/${historyId}/latest/history`)
.reply(200, {
chunk: {
startVersion: 0,
history: {
changes: [],
},
},
})
ProjectHistoryClient.initializeProject(historyId, done)
})
})
afterEach(function () {
nock.cleanAll()
})
describe('retrying project history', function () {
describe('when there is a soft failure', function () {
beforeEach(function (done) {
this.flushCall = MockHistoryStore()
.put(
`/api/projects/${historyId}/blobs/0a207c060e61f3b88eaee0a8cd0696f46fb155eb`
)
.reply(201)
.post(`/api/projects/${historyId}/legacy_changes?end_version=0`)
.reply(200)
const update = {
pathname: '/main.tex',
docLines: 'a\nb',
doc: this.doc_id,
meta: { user_id: this.user_id, ts: new Date() },
}
async.series(
[
cb =>
ProjectHistoryClient.pushRawUpdate(this.project_id, update, cb),
cb =>
ProjectHistoryClient.setFailure(
{
project_id: this.project_id,
attempts: 1,
error: 'soft-error',
},
cb
),
],
done
)
})
it('flushes the project history queue', function (done) {
request.post(
{
url: 'http://127.0.0.1:3054/retry/failures?failureType=soft&limit=1&timeout=10000',
},
(error, res, body) => {
if (error) {
return done(error)
}
expect(res.statusCode).to.equal(200)
assert(
this.flushCall.isDone(),
'made calls to history service to store updates'
)
done()
}
)
})
it('retries in the background when requested', function (done) {
this.callback = MockCallback()
.matchHeader('Authorization', '123')
.get('/ping')
.reply(200)
request.post(
{
url: 'http://127.0.0.1:3054/retry/failures?failureType=soft&limit=1&timeout=10000&callbackUrl=http%3A%2F%2F127.0.0.1%2Fping',
headers: {
'X-CALLBACK-Authorization': '123',
},
},
(error, res, body) => {
if (error) {
return done(error)
}
expect(res.statusCode).to.equal(200)
expect(body).to.equal(
'{"retryStatus":"running retryFailures in background"}'
)
assert(
!this.flushCall.isDone(),
'did not make calls to history service to store updates in the foreground'
)
setTimeout(() => {
assert(
this.flushCall.isDone(),
'made calls to history service to store updates in the background'
)
assert(this.callback.isDone(), 'hit the callback url')
done()
}, 100)
}
)
})
})
describe('when there is a hard failure', function () {
beforeEach(function (done) {
MockWeb()
.get(`/project/${this.project_id}/details`)
.reply(200, {
name: 'Test Project',
overleaf: {
history: {
id: historyId,
},
},
})
ProjectHistoryClient.setFailure(
{
project_id: this.project_id,
attempts: 100,
error: 'hard-error',
},
done
)
})
it('calls web to resync the project', function (done) {
const resyncCall = MockWeb()
.post(`/project/${this.project_id}/history/resync`)
.reply(200)
request.post(
{
url: 'http://127.0.0.1:3054/retry/failures?failureType=hard&limit=1&timeout=10000',
},
(error, res, body) => {
if (error) {
return done(error)
}
expect(res.statusCode).to.equal(200)
assert(resyncCall.isDone(), 'made a call to web to resync project')
done()
}
)
})
})
})
})

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,249 @@
/* eslint-disable
no-undef,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import sinon from 'sinon'
import { expect } from 'chai'
import Settings from '@overleaf/settings'
import request from 'request'
import assert from 'node:assert'
import mongodb from 'mongodb-legacy'
import nock from 'nock'
import * as ProjectHistoryClient from './helpers/ProjectHistoryClient.js'
import * as ProjectHistoryApp from './helpers/ProjectHistoryApp.js'
const { ObjectId } = mongodb
const MockHistoryStore = () => nock('http://127.0.0.1:3100')
const MockFileStore = () => nock('http://127.0.0.1:3009')
const MockWeb = () => nock('http://127.0.0.1:3000')
const fixture = path => new URL(`../fixtures/${path}`, import.meta.url)
describe('Summarized updates', function () {
beforeEach(function (done) {
this.projectId = new ObjectId().toString()
this.historyId = new ObjectId().toString()
return ProjectHistoryApp.ensureRunning(error => {
if (error != null) {
throw error
}
MockHistoryStore().post('/api/projects').reply(200, {
projectId: this.historyId,
})
return ProjectHistoryClient.initializeProject(
this.historyId,
(error, olProject) => {
if (error != null) {
throw error
}
MockWeb()
.get(`/project/${this.projectId}/details`)
.reply(200, {
name: 'Test Project',
overleaf: { history: { id: olProject.id } },
})
MockHistoryStore()
.get(`/api/projects/${this.historyId}/latest/history`)
.replyWithFile(200, fixture('chunks/7-8.json'))
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/6/history`)
.replyWithFile(200, fixture('chunks/4-6.json'))
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/3/history`)
.replyWithFile(200, fixture('chunks/0-3.json'))
return done()
}
)
})
})
afterEach(function () {
return nock.cleanAll()
})
it('should return the latest summarized updates from a single chunk', function (done) {
return ProjectHistoryClient.getSummarizedUpdates(
this.projectId,
{ min_count: 1 },
(error, updates) => {
if (error != null) {
throw error
}
expect(updates).to.deep.equal({
nextBeforeTimestamp: 6,
updates: [
{
fromV: 6,
toV: 8,
meta: {
users: ['5a5637efdac84e81b71014c4', 31],
start_ts: 1512383567277,
end_ts: 1512383572877,
},
pathnames: ['bar.tex', 'main.tex'],
project_ops: [],
labels: [],
},
],
})
return done()
}
)
})
it('should return the latest summarized updates, with min_count spanning multiple chunks', function (done) {
return ProjectHistoryClient.getSummarizedUpdates(
this.projectId,
{ min_count: 5 },
(error, updates) => {
if (error != null) {
throw error
}
expect(updates).to.deep.equal({
updates: [
{
fromV: 6,
toV: 8,
meta: {
users: ['5a5637efdac84e81b71014c4', 31],
start_ts: 1512383567277,
end_ts: 1512383572877,
},
pathnames: ['bar.tex', 'main.tex'],
project_ops: [],
labels: [],
},
{
fromV: 5,
toV: 6,
meta: {
users: [31],
start_ts: 1512383366120,
end_ts: 1512383366120,
},
pathnames: [],
project_ops: [
{
atV: 5,
rename: {
pathname: 'foo.tex',
newPathname: 'bar.tex',
},
},
],
labels: [],
},
{
fromV: 2,
toV: 5,
meta: {
users: [31],
start_ts: 1512383313724,
end_ts: 1512383362905,
},
pathnames: ['foo.tex'],
project_ops: [],
labels: [],
},
{
fromV: 1,
toV: 2,
meta: {
users: [31],
start_ts: 1512383246874,
end_ts: 1512383246874,
},
pathnames: [],
project_ops: [
{
atV: 1,
rename: {
pathname: 'bar.tex',
newPathname: 'foo.tex',
},
},
],
labels: [],
},
{
fromV: 0,
toV: 1,
meta: {
users: [31],
start_ts: 1512383015633,
end_ts: 1512383015633,
},
pathnames: ['main.tex'],
project_ops: [],
labels: [],
},
],
})
return done()
}
)
})
it('should return the summarized updates from a before version at the start of a chunk', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/4/history`)
.replyWithFile(200, fixture('chunks/4-6.json'))
return ProjectHistoryClient.getSummarizedUpdates(
this.projectId,
{ before: 4 },
(error, updates) => {
if (error != null) {
throw error
}
expect(updates.updates[0].toV).to.equal(4)
return done()
}
)
})
it('should return the summarized updates from a before version in the middle of a chunk', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/5/history`)
.replyWithFile(200, fixture('chunks/4-6.json'))
return ProjectHistoryClient.getSummarizedUpdates(
this.projectId,
{ before: 5 },
(error, updates) => {
if (error != null) {
throw error
}
expect(updates.updates[0].toV).to.equal(5)
return done()
}
)
})
return it('should return the summarized updates from a before version at the end of a chunk', function (done) {
MockHistoryStore()
.get(`/api/projects/${this.historyId}/versions/6/history`)
.replyWithFile(200, fixture('chunks/4-6.json'))
return ProjectHistoryClient.getSummarizedUpdates(
this.projectId,
{ before: 6 },
(error, updates) => {
if (error != null) {
throw error
}
expect(updates.updates[0].toV).to.equal(6)
return done()
}
)
})
})

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
// TODO: This file was created by bulk-decaffeinate.
// Sanity-check the conversion and remove this comment.
let id = 0
export function nextId() {
return id++
}

View File

@@ -0,0 +1,41 @@
/* eslint-disable
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import { expect } from 'chai'
import request from 'request'
import Settings from '@overleaf/settings'
export function getLatestContent(olProjectId, callback) {
if (callback == null) {
callback = function () {}
}
return request.get(
{
url: `${Settings.overleaf.history.host}/projects/${olProjectId}/latest/content`,
auth: {
user: Settings.overleaf.history.user,
pass: Settings.overleaf.history.pass,
sendImmediately: true,
},
},
(error, res, body) => {
if (res.statusCode < 200 || res.statusCode >= 300) {
callback(
new Error(
`history store a non-success status code: ${res.statusCode}`
)
)
}
return callback(error, JSON.parse(body))
}
)
}

View File

@@ -0,0 +1,41 @@
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS101: Remove unnecessary use of Array.from
* DS102: Remove unnecessary code created because of implicit returns
* DS205: Consider reworking code to avoid use of IIFEs
* DS207: Consider shorter variations of null checks
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import { app } from '../../../../app/js/server.js'
let running = false
let initing = false
const callbacks = []
export function ensureRunning(callback) {
if (callback == null) {
callback = function () {}
}
if (running) {
return callback()
} else if (initing) {
return callbacks.push(callback)
}
initing = true
callbacks.push(callback)
app.listen(3054, '127.0.0.1', error => {
if (error != null) {
throw error
}
running = true
return (() => {
const result = []
for (callback of Array.from(callbacks)) {
result.push(callback())
}
return result
})()
})
}

View File

@@ -0,0 +1,354 @@
import { expect } from 'chai'
import request from 'request'
import Settings from '@overleaf/settings'
import RedisWrapper from '@overleaf/redis-wrapper'
import { db } from '../../../../app/js/mongodb.js'
const rclient = RedisWrapper.createClient(Settings.redis.project_history)
const Keys = Settings.redis.project_history.key_schema
export function resetDatabase(callback) {
rclient.flushdb(callback)
}
export function initializeProject(historyId, callback) {
request.post(
{
url: 'http://127.0.0.1:3054/project',
json: { historyId },
},
(error, res, body) => {
if (error) {
return callback(error)
}
expect(res.statusCode).to.equal(200)
callback(null, body.project)
}
)
}
export function flushProject(projectId, options, callback) {
if (typeof options === 'function') {
callback = options
options = null
}
if (!options) {
options = { allowErrors: false }
}
request.post(
{
url: `http://127.0.0.1:3054/project/${projectId}/flush`,
},
(error, res, body) => {
if (error) {
return callback(error)
}
if (!options.allowErrors) {
expect(res.statusCode).to.equal(204)
}
callback(error, res)
}
)
}
export function getSummarizedUpdates(projectId, query, callback) {
request.get(
{
url: `http://127.0.0.1:3054/project/${projectId}/updates`,
qs: query,
json: true,
},
(error, res, body) => {
if (error) {
return callback(error)
}
expect(res.statusCode).to.equal(200)
callback(error, body)
}
)
}
export function getDiff(projectId, pathname, from, to, callback) {
request.get(
{
url: `http://127.0.0.1:3054/project/${projectId}/diff`,
qs: {
pathname,
from,
to,
},
json: true,
},
(error, res, body) => {
if (error) {
return callback(error)
}
expect(res.statusCode).to.equal(200)
callback(error, body)
}
)
}
export function getFileTreeDiff(projectId, from, to, callback) {
request.get(
{
url: `http://127.0.0.1:3054/project/${projectId}/filetree/diff`,
qs: {
from,
to,
},
json: true,
},
(error, res, body) => {
if (error) {
return callback(error)
}
callback(error, body, res.statusCode)
}
)
}
export function getChangesInChunkSince(projectId, since, options, callback) {
request.get(
{
url: `http://127.0.0.1:3054/project/${projectId}/changes-in-chunk`,
qs: {
since,
},
json: true,
},
(error, res, body) => {
if (error) return callback(error)
if (!options.allowErrors) {
expect(res.statusCode).to.equal(200)
}
callback(null, body, res.statusCode)
}
)
}
export function getLatestSnapshot(projectId, callback) {
request.get(
{
url: `http://127.0.0.1:3054/project/${projectId}/snapshot`,
json: true,
},
(error, res, body) => {
if (error) {
return callback(error)
}
expect(res.statusCode).to.equal(200)
callback(null, body)
}
)
}
export function getSnapshot(projectId, pathname, version, options, callback) {
if (typeof options === 'function') {
callback = options
options = null
}
if (!options) {
options = { allowErrors: false }
}
request.get(
{
url: `http://127.0.0.1:3054/project/${projectId}/version/${version}/${encodeURIComponent(
pathname
)}`,
},
(error, res, body) => {
if (error) {
return callback(error)
}
if (!options.allowErrors) {
expect(res.statusCode).to.equal(200)
}
callback(error, body, res.statusCode)
}
)
}
export function pushRawUpdate(projectId, update, callback) {
rclient.rpush(
Keys.projectHistoryOps({ project_id: projectId }),
JSON.stringify(update),
callback
)
}
export function setFirstOpTimestamp(projectId, timestamp, callback) {
rclient.set(
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId }),
timestamp,
callback
)
}
export function getFirstOpTimestamp(projectId, callback) {
rclient.get(
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId }),
callback
)
}
export function clearFirstOpTimestamp(projectId, callback) {
rclient.del(
Keys.projectHistoryFirstOpTimestamp({ project_id: projectId }),
callback
)
}
export function getQueueLength(projectId, callback) {
rclient.llen(Keys.projectHistoryOps({ project_id: projectId }), callback)
}
export function getQueueCounts(callback) {
return request.get(
{
url: 'http://127.0.0.1:3054/status/queue',
json: true,
},
callback
)
}
export function resyncHistory(projectId, callback) {
request.post(
{
url: `http://127.0.0.1:3054/project/${projectId}/resync`,
json: true,
body: { origin: { kind: 'test-origin' } },
},
(error, res, body) => {
if (error) {
return callback(error)
}
expect(res.statusCode).to.equal(204)
callback(error)
}
)
}
export function createLabel(
projectId,
userId,
version,
comment,
createdAt,
callback
) {
request.post(
{
url: `http://127.0.0.1:3054/project/${projectId}/labels`,
json: { comment, version, created_at: createdAt, user_id: userId },
},
(error, res, body) => {
if (error) {
return callback(error)
}
expect(res.statusCode).to.equal(200)
callback(null, body)
}
)
}
export function getLabels(projectId, callback) {
request.get(
{
url: `http://127.0.0.1:3054/project/${projectId}/labels`,
json: true,
},
(error, res, body) => {
if (error) {
return callback(error)
}
expect(res.statusCode).to.equal(200)
callback(null, body)
}
)
}
export function deleteLabelForUser(projectId, userId, labelId, callback) {
request.delete(
{
url: `http://127.0.0.1:3054/project/${projectId}/user/${userId}/labels/${labelId}`,
},
(error, res, body) => {
if (error) {
return callback(error)
}
expect(res.statusCode).to.equal(204)
callback(null, body)
}
)
}
export function deleteLabel(projectId, labelId, callback) {
request.delete(
{
url: `http://127.0.0.1:3054/project/${projectId}/labels/${labelId}`,
},
(error, res, body) => {
if (error) {
return callback(error)
}
expect(res.statusCode).to.equal(204)
callback(null, body)
}
)
}
export function setFailure(failureEntry, callback) {
db.projectHistoryFailures.deleteOne(
{ project_id: { $exists: true } },
(err, result) => {
if (err) {
return callback(err)
}
db.projectHistoryFailures.insertOne(failureEntry, callback)
}
)
}
export function getFailure(projectId, callback) {
db.projectHistoryFailures.findOne({ project_id: projectId }, callback)
}
export function transferLabelOwnership(fromUser, toUser, callback) {
request.post(
{
url: `http://127.0.0.1:3054/user/${fromUser}/labels/transfer/${toUser}`,
},
(error, res, body) => {
if (error) {
return callback(error)
}
expect(res.statusCode).to.equal(204)
callback(null, body)
}
)
}
export function getDump(projectId, callback) {
request.get(
`http://127.0.0.1:3054/project/${projectId}/dump`,
(err, res, body) => {
if (err) {
return callback(err)
}
expect(res.statusCode).to.equal(200)
callback(null, JSON.parse(body))
}
)
}
export function deleteProject(projectId, callback) {
request.delete(`http://127.0.0.1:3054/project/${projectId}`, (err, res) => {
if (err) {
return callback(err)
}
expect(res.statusCode).to.equal(204)
callback()
})
}

View File

@@ -0,0 +1,13 @@
import chai from 'chai'
import sinonChai from 'sinon-chai'
import chaiAsPromised from 'chai-as-promised'
import mongodb from 'mongodb-legacy'
const { ObjectId } = mongodb
// ensure every ObjectId has the id string as a property for correct comparisons
ObjectId.cacheHexString = true
// Chai configuration
chai.should()
chai.use(sinonChai)
chai.use(chaiAsPromised)

View File

@@ -0,0 +1,160 @@
import sinon from 'sinon'
import { strict as esmock } from 'esmock'
const MODULE_PATH = '../../../../app/js/BlobManager.js'
describe('BlobManager', function () {
beforeEach(async function () {
this.callback = sinon.stub()
this.extendLock = sinon.stub().yields()
this.project_id = 'project-1'
this.historyId = 12345
this.HistoryStoreManager = {
createBlobForUpdate: sinon.stub(),
}
this.UpdateTranslator = {
isAddUpdate: sinon.stub().returns(false),
}
this.BlobManager = await esmock(MODULE_PATH, {
'../../../../app/js/HistoryStoreManager.js': this.HistoryStoreManager,
'../../../../app/js/UpdateTranslator.js': this.UpdateTranslator,
})
this.updates = ['update-1', 'update-2']
})
describe('createBlobsForUpdates', function () {
describe('when there are no blobs to create', function () {
beforeEach(function (done) {
this.BlobManager.createBlobsForUpdates(
this.project_id,
this.historyId,
this.updates,
this.extendLock,
(error, updatesWithBlobs) => {
this.callback(error, updatesWithBlobs)
done()
}
)
})
it('should not create any blobs', function () {
this.HistoryStoreManager.createBlobForUpdate.called.should.equal(false)
})
it('should call the callback with the updates', function () {
const updatesWithBlobs = this.updates.map(update => ({
update,
}))
this.callback.calledWith(null, updatesWithBlobs).should.equal(true)
})
})
describe('when there are blobs to create', function () {
beforeEach(function (done) {
this.UpdateTranslator.isAddUpdate.returns(true)
this.blobHash = 'test hash'
this.HistoryStoreManager.createBlobForUpdate.yields(null, {
file: this.blobHash,
})
this.BlobManager.createBlobsForUpdates(
this.project_id,
this.historyId,
this.updates,
this.extendLock,
(error, updatesWithBlobs) => {
this.callback(error, updatesWithBlobs)
done()
}
)
})
it('should create blobs', function () {
this.HistoryStoreManager.createBlobForUpdate
.calledWith(this.project_id, this.historyId, this.updates[0])
.should.equal(true)
})
it('should extend the lock', function () {
this.extendLock.called.should.equal(true)
})
it('should call the callback with the updates', function () {
const updatesWithBlobs = this.updates.map(update => ({
update,
blobHashes: { file: this.blobHash },
}))
this.callback.calledWith(null, updatesWithBlobs).should.equal(true)
})
})
describe('when there are blobs to create and there is a single network error', function () {
beforeEach(function (done) {
this.UpdateTranslator.isAddUpdate.returns(true)
this.blobHash = 'test hash'
this.HistoryStoreManager.createBlobForUpdate
.onFirstCall()
.yields(new Error('random failure'))
this.HistoryStoreManager.createBlobForUpdate.yields(null, {
file: this.blobHash,
})
this.BlobManager.createBlobsForUpdates(
this.project_id,
this.historyId,
this.updates,
this.extendLock,
(error, updatesWithBlobs) => {
this.callback(error, updatesWithBlobs)
done()
}
)
})
it('should create blobs', function () {
this.HistoryStoreManager.createBlobForUpdate
.calledWith(this.project_id, this.historyId, this.updates[0])
.should.equal(true)
})
it('should extend the lock', function () {
this.extendLock.called.should.equal(true)
})
it('should call the callback with the updates', function () {
const updatesWithBlobs = this.updates.map(update => ({
update,
blobHashes: { file: this.blobHash },
}))
this.callback.calledWith(null, updatesWithBlobs).should.equal(true)
})
})
describe('when there are blobs to create and there are multiple network errors', function () {
beforeEach(function (done) {
this.UpdateTranslator.isAddUpdate.returns(true)
this.blobHash = 'test hash'
this.error = new Error('random failure')
this.HistoryStoreManager.createBlobForUpdate.yields(this.error)
this.BlobManager.createBlobsForUpdates(
this.project_id,
this.historyId,
this.updates,
this.extendLock,
(error, updatesWithBlobs) => {
this.callback(error, updatesWithBlobs)
done()
}
)
})
it('should try to create blobs', function () {
this.HistoryStoreManager.createBlobForUpdate
.calledWith(this.project_id, this.historyId, this.updates[0])
.should.equal(true)
})
it('should call the callback with an error', function () {
this.callback.calledWith(this.error).should.equal(true)
})
})
})
})

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,395 @@
import sinon from 'sinon'
import { expect } from 'chai'
import { strict as esmock } from 'esmock'
const MODULE_PATH = '../../../../app/js/DiffGenerator.js'
describe('DiffGenerator', function () {
beforeEach(async function () {
this.DiffGenerator = await esmock(MODULE_PATH, {})
this.ts = Date.now()
this.user_id = 'mock-user-id'
this.user_id_2 = 'mock-user-id-2'
this.meta = {
start_ts: this.ts,
end_ts: this.ts,
user_id: this.user_id,
}
})
describe('buildDiff', function () {
beforeEach(function () {
this.diff = [{ u: 'mock-diff' }]
this.content = 'Hello world'
this.updates = [
{ i: 'mock-update-1' },
{ i: 'mock-update-2' },
{ i: 'mock-update-3' },
]
this.DiffGenerator._mocks.applyUpdateToDiff = sinon
.stub()
.returns(this.diff)
this.DiffGenerator._mocks.compressDiff = sinon.stub().returns(this.diff)
this.result = this.DiffGenerator.buildDiff(this.content, this.updates)
})
it('should return the diff', function () {
this.result.should.deep.equal(this.diff)
})
it('should build the content into an initial diff', function () {
this.DiffGenerator._mocks.applyUpdateToDiff
.calledWith(
[
{
u: this.content,
},
],
this.updates[0]
)
.should.equal(true)
})
it('should apply each update', function () {
this.updates.map(update =>
this.DiffGenerator._mocks.applyUpdateToDiff
.calledWith(sinon.match.any, update)
.should.equal(true)
)
})
it('should compress the diff', function () {
this.DiffGenerator._mocks.compressDiff
.calledWith(this.diff)
.should.equal(true)
})
})
describe('compressDiff', function () {
describe('with adjacent inserts with the same user id', function () {
it('should create one update with combined meta data and min/max timestamps', function () {
const diff = this.DiffGenerator.compressDiff([
{
i: 'foo',
meta: { start_ts: 10, end_ts: 20, users: [this.user_id] },
},
{
i: 'bar',
meta: { start_ts: 5, end_ts: 15, users: [this.user_id] },
},
])
expect(diff).to.deep.equal([
{
i: 'foobar',
meta: { start_ts: 5, end_ts: 20, users: [this.user_id] },
},
])
})
})
describe('with adjacent inserts with different user ids', function () {
it('should leave the inserts unchanged', function () {
const input = [
{
i: 'foo',
meta: { start_ts: 10, end_ts: 20, users: [this.user_id] },
},
{
i: 'bar',
meta: { start_ts: 5, end_ts: 15, users: [this.user_id_2] },
},
]
const output = this.DiffGenerator.compressDiff(input)
expect(output).to.deep.equal(input)
})
})
describe('with adjacent deletes with the same user id', function () {
it('should create one update with combined meta data and min/max timestamps', function () {
const diff = this.DiffGenerator.compressDiff([
{
d: 'foo',
meta: { start_ts: 10, end_ts: 20, users: [this.user_id] },
},
{
d: 'bar',
meta: { start_ts: 5, end_ts: 15, users: [this.user_id] },
},
])
expect(diff).to.deep.equal([
{
d: 'foobar',
meta: { start_ts: 5, end_ts: 20, users: [this.user_id] },
},
])
})
})
describe('with adjacent deletes with different user ids', function () {
it('should leave the deletes unchanged', function () {
const input = [
{
d: 'foo',
meta: { start_ts: 10, end_ts: 20, users: [this.user_id] },
},
{
d: 'bar',
meta: { start_ts: 5, end_ts: 15, users: [this.user_id_2] },
},
]
const output = this.DiffGenerator.compressDiff(input)
expect(output).to.deep.equal(input)
})
})
describe('with history resync updates', function () {
it('should keep only inserts and mark them as unchanged text', function () {
const input = [
{ u: 'untracked text' },
{
i: 'inserted anonymously',
meta: { origin: { kind: 'history-resync' } },
},
{
d: 'deleted anonymously',
meta: { origin: { kind: 'history-resync' } },
},
]
const output = this.DiffGenerator.compressDiff(input)
expect(output).to.deep.equal([
{ u: 'untracked text' },
{ u: 'inserted anonymously' },
])
})
})
})
describe('applyUpdateToDiff', function () {
describe('an insert', function () {
it('should insert into the middle of (u)nchanged text', function () {
const diff = this.DiffGenerator.applyUpdateToDiff([{ u: 'foobar' }], {
op: [{ p: 3, i: 'baz' }],
meta: this.meta,
})
expect(diff).to.deep.equal([
{ u: 'foo' },
{ i: 'baz', meta: this.meta },
{ u: 'bar' },
])
})
it('should insert into the start of (u)changed text', function () {
const diff = this.DiffGenerator.applyUpdateToDiff([{ u: 'foobar' }], {
op: [{ p: 0, i: 'baz' }],
meta: this.meta,
})
expect(diff).to.deep.equal([
{ i: 'baz', meta: this.meta },
{ u: 'foobar' },
])
})
it('should insert into the end of (u)changed text', function () {
const diff = this.DiffGenerator.applyUpdateToDiff([{ u: 'foobar' }], {
op: [{ p: 6, i: 'baz' }],
meta: this.meta,
})
expect(diff).to.deep.equal([
{ u: 'foobar' },
{ i: 'baz', meta: this.meta },
])
})
it('should insert into the middle of (i)inserted text', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ i: 'foobar', meta: this.meta }],
{ op: [{ p: 3, i: 'baz' }], meta: this.meta }
)
expect(diff).to.deep.equal([
{ i: 'foo', meta: this.meta },
{ i: 'baz', meta: this.meta },
{ i: 'bar', meta: this.meta },
])
})
it('should not count deletes in the running length total', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ d: 'deleted', meta: this.meta }, { u: 'foobar' }],
{ op: [{ p: 3, i: 'baz' }], meta: this.meta }
)
expect(diff).to.deep.equal([
{ d: 'deleted', meta: this.meta },
{ u: 'foo' },
{ i: 'baz', meta: this.meta },
{ u: 'bar' },
])
})
})
describe('a delete', function () {
describe('deleting unchanged text', function () {
it('should delete from the middle of (u)nchanged text', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ u: 'foobazbar' }],
{ op: [{ p: 3, d: 'baz' }], meta: this.meta }
)
expect(diff).to.deep.equal([
{ u: 'foo' },
{ d: 'baz', meta: this.meta },
{ u: 'bar' },
])
})
it('should delete from the start of (u)nchanged text', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ u: 'foobazbar' }],
{ op: [{ p: 0, d: 'foo' }], meta: this.meta }
)
expect(diff).to.deep.equal([
{ d: 'foo', meta: this.meta },
{ u: 'bazbar' },
])
})
it('should delete from the end of (u)nchanged text', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ u: 'foobazbar' }],
{ op: [{ p: 6, d: 'bar' }], meta: this.meta }
)
expect(diff).to.deep.equal([
{ u: 'foobaz' },
{ d: 'bar', meta: this.meta },
])
})
it('should delete across multiple (u)changed text parts', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ u: 'foo' }, { u: 'baz' }, { u: 'bar' }],
{ op: [{ p: 2, d: 'obazb' }], meta: this.meta }
)
expect(diff).to.deep.equal([
{ u: 'fo' },
{ d: 'o', meta: this.meta },
{ d: 'baz', meta: this.meta },
{ d: 'b', meta: this.meta },
{ u: 'ar' },
])
})
})
describe('deleting inserts', function () {
it('should delete from the middle of (i)nserted text', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ i: 'foobazbar', meta: this.meta }],
{ op: [{ p: 3, d: 'baz' }], meta: this.meta }
)
expect(diff).to.deep.equal([
{ i: 'foo', meta: this.meta },
{ i: 'bar', meta: this.meta },
])
})
it('should delete from the start of (u)nchanged text', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ i: 'foobazbar', meta: this.meta }],
{ op: [{ p: 0, d: 'foo' }], meta: this.meta }
)
expect(diff).to.deep.equal([{ i: 'bazbar', meta: this.meta }])
})
it('should delete from the end of (u)nchanged text', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ i: 'foobazbar', meta: this.meta }],
{ op: [{ p: 6, d: 'bar' }], meta: this.meta }
)
expect(diff).to.deep.equal([{ i: 'foobaz', meta: this.meta }])
})
it('should delete across multiple (u)changed and (i)nserted text parts', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ u: 'foo' }, { i: 'baz', meta: this.meta }, { u: 'bar' }],
{ op: [{ p: 2, d: 'obazb' }], meta: this.meta }
)
expect(diff).to.deep.equal([
{ u: 'fo' },
{ d: 'o', meta: this.meta },
{ d: 'b', meta: this.meta },
{ u: 'ar' },
])
})
})
describe('deleting over existing deletes', function () {
it('should delete across multiple (u)changed and (d)deleted text parts', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ u: 'foo' }, { d: 'baz', meta: this.meta }, { u: 'bar' }],
{ op: [{ p: 2, d: 'ob' }], meta: this.meta }
)
expect(diff).to.deep.equal([
{ u: 'fo' },
{ d: 'o', meta: this.meta },
{ d: 'baz', meta: this.meta },
{ d: 'b', meta: this.meta },
{ u: 'ar' },
])
})
})
describe("deleting when the text doesn't match", function () {
it('should throw an error when deleting from the middle of (u)nchanged text', function () {
expect(() =>
this.DiffGenerator.applyUpdateToDiff([{ u: 'foobazbar' }], {
op: [{ p: 3, d: 'xxx' }],
meta: this.meta,
})
).to.throw(this.DiffGenerator.ConsistencyError)
})
it('should throw an error when deleting from the start of (u)nchanged text', function () {
expect(() =>
this.DiffGenerator.applyUpdateToDiff([{ u: 'foobazbar' }], {
op: [{ p: 0, d: 'xxx' }],
meta: this.meta,
})
).to.throw(this.DiffGenerator.ConsistencyError)
})
it('should throw an error when deleting from the end of (u)nchanged text', function () {
expect(() =>
this.DiffGenerator.applyUpdateToDiff([{ u: 'foobazbar' }], {
op: [{ p: 6, d: 'xxx' }],
meta: this.meta,
})
).to.throw(this.DiffGenerator.ConsistencyError)
})
})
describe('when the last update in the existing diff is a delete', function () {
it('should insert the new update before the delete', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ u: 'foo' }, { d: 'bar', meta: this.meta }],
{ op: [{ p: 3, i: 'baz' }], meta: this.meta }
)
expect(diff).to.deep.equal([
{ u: 'foo' },
{ i: 'baz', meta: this.meta },
{ d: 'bar', meta: this.meta },
])
})
})
describe('when the only update in the existing diff is a delete', function () {
it('should insert the new update after the delete', function () {
const diff = this.DiffGenerator.applyUpdateToDiff(
[{ d: 'bar', meta: this.meta }],
{ op: [{ p: 0, i: 'baz' }], meta: this.meta }
)
expect(diff).to.deep.equal([
{ d: 'bar', meta: this.meta },
{ i: 'baz', meta: this.meta },
])
})
})
})
})
})

View File

@@ -0,0 +1,523 @@
import sinon from 'sinon'
import { expect } from 'chai'
import { strict as esmock } from 'esmock'
const MODULE_PATH = '../../../../app/js/DiffManager.js'
describe('DiffManager', function () {
beforeEach(async function () {
this.DocumentUpdaterManager = {}
this.DiffGenerator = {
buildDiff: sinon.stub(),
}
this.UpdatesProcessor = {
processUpdatesForProject: sinon.stub(),
}
this.HistoryStoreManager = {
getChunkAtVersion: sinon.stub(),
}
this.WebApiManager = {
getHistoryId: sinon.stub(),
}
this.ChunkTranslator = {
convertToDiffUpdates: sinon.stub(),
}
this.FileTreeDiffGenerator = {}
this.DiffManager = await esmock(MODULE_PATH, {
'../../../../app/js/DocumentUpdaterManager.js':
this.DocumentUpdaterManager,
'../../../../app/js/DiffGenerator.js': this.DiffGenerator,
'../../../../app/js/UpdatesProcessor.js': this.UpdatesProcessor,
'../../../../app/js/HistoryStoreManager.js': this.HistoryStoreManager,
'../../../../app/js/WebApiManager.js': this.WebApiManager,
'../../../../app/js/ChunkTranslator.js': this.ChunkTranslator,
'../../../../app/js/FileTreeDiffGenerator.js': this.FileTreeDiffGenerator,
})
this.projectId = 'mock-project-id'
this.callback = sinon.stub()
})
describe('getDiff', function () {
beforeEach(function () {
this.pathname = 'main.tex'
this.fromVersion = 4
this.toVersion = 8
this.initialContent = 'foo bar baz'
this.updates = ['mock-updates']
this.diff = { mock: 'dif' }
this.UpdatesProcessor.processUpdatesForProject
.withArgs(this.projectId)
.yields()
this.DiffGenerator.buildDiff
.withArgs(this.initialContent, this.updates)
.returns(this.diff)
})
describe('with a text file', function () {
beforeEach(function () {
this.DiffManager._mocks._getProjectUpdatesBetweenVersions = sinon.stub()
this.DiffManager._mocks._getProjectUpdatesBetweenVersions
.withArgs(
this.projectId,
this.pathname,
this.fromVersion,
this.toVersion
)
.yields(null, {
initialContent: this.initialContent,
updates: this.updates,
})
this.DiffManager.getDiff(
this.projectId,
this.pathname,
this.fromVersion,
this.toVersion,
this.callback
)
})
it('should make sure all pending updates have been process', function () {
this.UpdatesProcessor.processUpdatesForProject
.calledWith(this.projectId)
.should.equal(true)
})
it('should get the updates from the history backend', function () {
this.DiffManager._mocks._getProjectUpdatesBetweenVersions
.calledWith(
this.projectId,
this.pathname,
this.fromVersion,
this.toVersion
)
.should.equal(true)
})
it('should convert the updates to a diff', function () {
this.DiffGenerator.buildDiff
.calledWith(this.initialContent, this.updates)
.should.equal(true)
})
it('should return the diff', function () {
this.callback.calledWith(null, this.diff).should.equal(true)
})
})
describe('with a binary file', function () {
beforeEach(function () {
this.DiffManager._mocks._getProjectUpdatesBetweenVersions = sinon.stub()
this.DiffManager._mocks._getProjectUpdatesBetweenVersions
.withArgs(
this.projectId,
this.pathname,
this.fromVersion,
this.toVersion
)
.yields(null, { binary: true })
this.DiffManager.getDiff(
this.projectId,
this.pathname,
this.fromVersion,
this.toVersion,
this.callback
)
})
it('should make sure all pending updates have been process', function () {
this.UpdatesProcessor.processUpdatesForProject
.calledWith(this.projectId)
.should.equal(true)
})
it('should get the updates from the history backend', function () {
this.DiffManager._mocks._getProjectUpdatesBetweenVersions
.calledWith(
this.projectId,
this.pathname,
this.fromVersion,
this.toVersion
)
.should.equal(true)
})
it('should not try convert any updates to a diff', function () {
this.DiffGenerator.buildDiff.called.should.equal(false)
})
it('should return the binary diff', function () {
this.callback.calledWith(null, { binary: true }).should.equal(true)
})
})
})
describe('_getProjectUpdatesBetweenVersions', function () {
beforeEach(function () {
this.pathname = 'main.tex'
this.fromVersion = 4
this.toVersion = 8
this.chunks = ['mock-chunk-1', 'mock-chunk-2']
this.concatted_chunk = 'mock-chunk'
this.DiffManager._mocks._concatChunks = sinon.stub()
this.DiffManager._mocks._concatChunks
.withArgs(this.chunks)
.returns(this.concatted_chunk)
this.updates = ['mock-updates']
this.initialContent = 'foo bar baz'
this.ChunkTranslator.convertToDiffUpdates
.withArgs(
this.projectId,
this.concatted_chunk,
this.pathname,
this.fromVersion,
this.toVersion
)
.yields(null, {
initialContent: this.initialContent,
updates: this.updates,
})
})
describe('for the normal case', function () {
beforeEach(function () {
this.DiffManager._mocks._getChunks = sinon.stub()
this.DiffManager._mocks._getChunks
.withArgs(this.projectId, this.fromVersion, this.toVersion)
.yields(null, this.chunks)
this.DiffManager._getProjectUpdatesBetweenVersions(
this.projectId,
this.pathname,
this.fromVersion,
this.toVersion,
this.callback
)
})
it('should get the relevant chunks', function () {
this.DiffManager._mocks._getChunks
.calledWith(this.projectId, this.fromVersion, this.toVersion)
.should.equal(true)
})
it('should get the concat the chunks', function () {
this.DiffManager._mocks._concatChunks
.calledWith(this.chunks)
.should.equal(true)
})
it('should convert the chunks to an initial version and updates', function () {
this.ChunkTranslator.convertToDiffUpdates
.calledWith(
this.projectId,
this.concatted_chunk,
this.pathname,
this.fromVersion,
this.toVersion
)
.should.equal(true)
})
it('should return the initialContent and updates', function () {
this.callback
.calledWith(null, {
initialContent: this.initialContent,
updates: this.updates,
})
.should.equal(true)
})
})
describe('for the error case', function () {
beforeEach(function () {
this.DiffManager._mocks._getChunks = sinon.stub()
this.DiffManager._mocks._getChunks
.withArgs(this.projectId, this.fromVersion, this.toVersion)
.yields(new Error('failed to load chunk'))
this.DiffManager._getProjectUpdatesBetweenVersions(
this.projectId,
this.pathname,
this.fromVersion,
this.toVersion,
this.callback
)
})
it('should call the callback with an error', function () {
this.callback
.calledWith(sinon.match.instanceOf(Error))
.should.equal(true)
})
})
})
describe('_getChunks', function () {
beforeEach(function () {
this.historyId = 'mock-overleaf-id'
this.WebApiManager.getHistoryId.yields(null, this.historyId)
})
describe('where only one chunk is needed', function () {
beforeEach(function (done) {
this.fromVersion = 4
this.toVersion = 8
this.chunk = {
chunk: {
startVersion: 2,
}, // before fromVersion
}
this.HistoryStoreManager.getChunkAtVersion
.withArgs(this.projectId, this.historyId, this.toVersion)
.yields(null, this.chunk)
this.DiffManager._getChunks(
this.projectId,
this.fromVersion,
this.toVersion,
(error, chunks) => {
this.error = error
this.chunks = chunks
done()
}
)
})
it("should the project's overleaf id", function () {
this.WebApiManager.getHistoryId
.calledWith(this.projectId)
.should.equal(true)
})
it('should request the first chunk', function () {
this.HistoryStoreManager.getChunkAtVersion
.calledWith(this.projectId, this.historyId, this.toVersion)
.should.equal(true)
})
it('should return an array of chunks', function () {
expect(this.chunks).to.deep.equal([this.chunk])
})
})
describe('where multiple chunks are needed', function () {
beforeEach(function (done) {
this.fromVersion = 4
this.toVersion = 8
this.chunk1 = {
chunk: {
startVersion: 6,
},
}
this.chunk2 = {
chunk: {
startVersion: 2,
},
}
this.HistoryStoreManager.getChunkAtVersion
.withArgs(this.projectId, this.historyId, this.toVersion)
.yields(null, this.chunk1)
this.HistoryStoreManager.getChunkAtVersion
.withArgs(
this.projectId,
this.historyId,
this.chunk1.chunk.startVersion
)
.yields(null, this.chunk2)
this.DiffManager._mocks._getChunks(
this.projectId,
this.fromVersion,
this.toVersion,
(error, chunks) => {
this.error = error
this.chunks = chunks
done()
}
)
})
it('should request the first chunk', function () {
this.HistoryStoreManager.getChunkAtVersion
.calledWith(this.projectId, this.historyId, this.toVersion)
.should.equal(true)
})
it('should request the second chunk, from where the first one started', function () {
this.HistoryStoreManager.getChunkAtVersion
.calledWith(
this.projectId,
this.historyId,
this.chunk1.chunk.startVersion
)
.should.equal(true)
})
it('should return an array of chunks', function () {
expect(this.chunks).to.deep.equal([this.chunk1, this.chunk2])
})
})
describe('where more than MAX_CHUNKS are requested', function () {
beforeEach(function (done) {
this.fromVersion = 0
this.toVersion = 8
this.chunk1 = {
chunk: {
startVersion: 6,
},
}
this.chunk2 = {
chunk: {
startVersion: 4,
},
}
this.chunk3 = {
chunk: {
startVersion: 2,
},
}
this.DiffManager.setMaxChunkRequests(2)
this.HistoryStoreManager.getChunkAtVersion
.withArgs(this.projectId, this.historyId, this.toVersion)
.yields(null, this.chunk1)
this.HistoryStoreManager.getChunkAtVersion
.withArgs(
this.projectId,
this.historyId,
this.chunk1.chunk.startVersion
)
.yields(null, this.chunk2)
this.DiffManager._mocks._getChunks(
this.projectId,
this.fromVersion,
this.toVersion,
(error, chunks) => {
this.error = error
this.chunks = chunks
done()
}
)
})
it('should request the first chunk', function () {
this.HistoryStoreManager.getChunkAtVersion
.calledWith(this.projectId, this.historyId, this.toVersion)
.should.equal(true)
})
it('should request the second chunk, from where the first one started', function () {
this.HistoryStoreManager.getChunkAtVersion
.calledWith(
this.projectId,
this.historyId,
this.chunk1.chunk.startVersion
)
.should.equal(true)
})
it('should not request the third chunk', function () {
this.HistoryStoreManager.getChunkAtVersion
.calledWith(
this.projectId,
this.historyId,
this.chunk2.chunk.startVersion
)
.should.equal(false)
})
it('should return an error', function () {
expect(this.error).to.exist
expect(this.error.message).to.equal('Diff spans too many chunks')
expect(this.error.name).to.equal('BadRequestError')
})
})
describe('where fromVersion == toVersion', function () {
beforeEach(function (done) {
this.fromVersion = 4
this.toVersion = 4
this.chunk = {
chunk: {
startVersion: 2,
}, // before fromVersion
}
this.HistoryStoreManager.getChunkAtVersion
.withArgs(this.projectId, this.historyId, this.toVersion)
.yields(null, this.chunk)
this.DiffManager._mocks._getChunks(
this.projectId,
this.fromVersion,
this.toVersion,
(error, chunks) => {
this.error = error
this.chunks = chunks
done()
}
)
})
it('should still request the first chunk (because we need the file contents)', function () {
this.HistoryStoreManager.getChunkAtVersion
.calledWith(this.projectId, this.historyId, this.toVersion)
.should.equal(true)
})
it('should return an array of chunks', function () {
expect(this.chunks).to.deep.equal([this.chunk])
})
})
})
describe('_concatChunks', function () {
it('should concat the chunks in reverse order', function () {
const result = this.DiffManager._mocks._concatChunks([
{
chunk: {
history: {
snapshot: {
files: {
mock: 'files-updated-2',
},
},
changes: [7, 8, 9],
},
},
},
{
chunk: {
history: {
snapshot: {
files: {
mock: 'files-updated',
},
},
changes: [4, 5, 6],
},
},
},
{
chunk: {
history: {
snapshot: {
files: {
mock: 'files-original',
},
},
changes: [1, 2, 3],
},
},
},
])
expect(result).to.deep.equal({
chunk: {
history: {
snapshot: {
files: {
mock: 'files-original',
},
},
changes: [1, 2, 3, 4, 5, 6, 7, 8, 9],
},
},
})
})
})
})

View File

@@ -0,0 +1,184 @@
/* eslint-disable
no-return-assign,
no-undef,
no-unused-vars,
*/
// TODO: This file was created by bulk-decaffeinate.
// Fix any style issues and re-enable lint.
/*
* decaffeinate suggestions:
* DS102: Remove unnecessary code created because of implicit returns
* Full docs: https://github.com/decaffeinate/decaffeinate/blob/master/docs/suggestions.md
*/
import sinon from 'sinon'
import { expect } from 'chai'
import { strict as esmock } from 'esmock'
const MODULE_PATH = '../../../../app/js/DocumentUpdaterManager.js'
describe('DocumentUpdaterManager', function () {
beforeEach(async function () {
this.settings = {
apis: { documentupdater: { url: 'http://example.com' } },
}
this.request = {
get: sinon.stub(),
post: sinon.stub(),
}
this.DocumentUpdaterManager = await esmock(MODULE_PATH, {
request: this.request,
'@overleaf/settings': this.settings,
})
this.callback = sinon.stub()
this.lines = ['one', 'two', 'three']
return (this.version = 42)
})
describe('getDocument', function () {
describe('successfully', function () {
beforeEach(function () {
this.body = JSON.stringify({
lines: this.lines,
version: this.version,
ops: [],
})
this.request.get.yields(null, { statusCode: 200 }, this.body)
return this.DocumentUpdaterManager.getDocument(
this.project_id,
this.doc_id,
this.callback
)
})
it('should get the document from the document updater', function () {
const url = `${this.settings.apis.documentupdater.url}/project/${this.project_id}/doc/${this.doc_id}`
return this.request.get.calledWith(url).should.equal(true)
})
return it('should call the callback with the content and version', function () {
return this.callback
.calledWith(null, this.lines.join('\n'), this.version)
.should.equal(true)
})
})
describe('when the document updater API returns an error', function () {
beforeEach(function () {
this.error = new Error('something went wrong')
this.request.get.yields(this.error, null, null)
return this.DocumentUpdaterManager.getDocument(
this.project_id,
this.doc_id,
this.callback
)
})
return it('should return an error to the callback', function () {
return this.callback.calledWith(this.error).should.equal(true)
})
})
return describe('when the document updater returns a failure error code', function () {
beforeEach(function () {
this.request.get.yields(null, { statusCode: 500 }, '')
return this.DocumentUpdaterManager.getDocument(
this.project_id,
this.doc_id,
this.callback
)
})
return it('should return the callback with an error', function () {
return this.callback
.calledWith(
sinon.match.has(
'message',
'doc updater returned a non-success status code: 500'
)
)
.should.equal(true)
})
})
})
return describe('setDocument', function () {
beforeEach(function () {
this.content = 'mock content'
return (this.user_id = 'user-id-123')
})
describe('successfully', function () {
beforeEach(function () {
this.request.post.yields(null, { statusCode: 200 })
return this.DocumentUpdaterManager.setDocument(
this.project_id,
this.doc_id,
this.content,
this.user_id,
this.callback
)
})
it('should set the document in the document updater', function () {
const url = `${this.settings.apis.documentupdater.url}/project/${this.project_id}/doc/${this.doc_id}`
return this.request.post
.calledWith({
url,
json: {
lines: this.content.split('\n'),
source: 'restore',
user_id: this.user_id,
undoing: true,
},
})
.should.equal(true)
})
return it('should call the callback', function () {
return this.callback.calledWith(null).should.equal(true)
})
})
describe('when the document updater API returns an error', function () {
beforeEach(function () {
this.error = new Error('something went wrong')
this.request.post.yields(this.error, null, null)
return this.DocumentUpdaterManager.setDocument(
this.project_id,
this.doc_id,
this.content,
this.user_id,
this.callback
)
})
return it('should return an error to the callback', function () {
return this.callback.calledWith(this.error).should.equal(true)
})
})
return describe('when the document updater returns a failure error code', function () {
beforeEach(function () {
this.request.post.yields(null, { statusCode: 500 }, '')
return this.DocumentUpdaterManager.setDocument(
this.project_id,
this.doc_id,
this.content,
this.user_id,
this.callback
)
})
return it('should return the callback with an error', function () {
return this.callback
.calledWith(
sinon.match.has(
'message',
'doc updater returned a non-success status code: 500'
)
)
.should.equal(true)
})
})
})
})

View File

@@ -0,0 +1,96 @@
import sinon from 'sinon'
import { strict as esmock } from 'esmock'
import tk from 'timekeeper'
const MODULE_PATH = '../../../../app/js/ErrorRecorder.js'
describe('ErrorRecorder', function () {
beforeEach(async function () {
this.now = new Date()
tk.freeze(this.now)
this.db = {
projectHistoryFailures: {
deleteOne: sinon.stub().resolves(),
findOneAndUpdate: sinon
.stub()
.resolves({ value: { failure: 'record' } }),
},
}
this.mongodb = { db: this.db }
this.metrics = { gauge: sinon.stub() }
this.ErrorRecorder = await esmock(MODULE_PATH, {
'../../../../app/js/mongodb.js': this.mongodb,
'@overleaf/metrics': this.metrics,
})
this.project_id = 'project-id-123'
this.queueSize = 445
})
afterEach(function () {
tk.reset()
})
describe('record', function () {
beforeEach(async function () {
this.error = new Error('something bad')
await this.ErrorRecorder.promises.record(
this.project_id,
this.queueSize,
this.error
)
})
it('should record the error to mongo', function () {
this.db.projectHistoryFailures.findOneAndUpdate
.calledWithMatch(
{
project_id: this.project_id,
},
{
$set: {
queueSize: this.queueSize,
error: this.error.toString(),
stack: this.error.stack,
ts: this.now,
},
$inc: {
attempts: 1,
},
$push: {
history: {
$each: [
{
queueSize: this.queueSize,
error: this.error.toString(),
stack: this.error.stack,
ts: this.now,
},
],
$position: 0,
$slice: 10,
},
},
},
{
upsert: true,
}
)
.should.equal(true)
})
})
describe('clearError', function () {
beforeEach(async function () {
this.result = await this.ErrorRecorder.promises.clearError(
this.project_id
)
})
it('should remove any error from mongo', function () {
this.db.projectHistoryFailures.deleteOne
.calledWithMatch({ project_id: this.project_id })
.should.equal(true)
})
})
})

View File

@@ -0,0 +1,497 @@
import { expect } from 'chai'
import { createRangeBlobDataFromUpdate } from '../../../../app/js/HistoryBlobTranslator.js'
/**
* @import { AddDocUpdate } from "../../../../app/js/types"
*/
/**
*
* @param {string} pathname s
* @param {string} docLines
* @param {AddDocUpdate["ranges"]} ranges
* @returns {AddDocUpdate}
*/
const update = (pathname, docLines, ranges) => {
return {
pathname,
docLines,
ranges,
version: 'version-1',
projectHistoryId: 'project-id',
doc: 'doc',
meta: {
user_id: 'user-id',
ts: 0,
},
}
}
describe('HistoryBlobTranslator', function () {
describe('createBlobDataFromUpdate', function () {
beforeEach(function () {
this.text = 'the quick brown fox jumps over the lazy dog'
})
describe('for update with no ranges', function () {
beforeEach(function () {
this.result = createRangeBlobDataFromUpdate(
update('pathname', this.text, undefined)
)
})
it('should not return ranges', function () {
expect(this.result).to.be.undefined
})
})
describe('for update with empty ranges object', function () {
beforeEach(function () {
this.result = createRangeBlobDataFromUpdate(
update('pathname', this.text, {})
)
})
it('should not return ranges', function () {
expect(this.result).to.be.undefined
})
})
describe('for update with ranges object with empty lists', function () {
beforeEach(function () {
this.result = createRangeBlobDataFromUpdate(
update('pathname', this.text, { changes: [], comments: [] })
)
})
it('should not return ranges', function () {
expect(this.result).to.be.undefined
})
})
describe('for update with zero length comments', function () {
beforeEach(function () {
this.result = createRangeBlobDataFromUpdate(
update('pathname', this.text, {
changes: [],
comments: [
{ op: { c: '', p: 4, t: 'comment-1', resolved: false } },
],
})
)
})
it('should treat them as detached comments', function () {
expect(this.result).to.deep.equal({
comments: [{ id: 'comment-1', ranges: [] }],
trackedChanges: [],
})
})
})
describe('for update with ranges object with only comments', function () {
it('should return unmoved ranges', function () {
const result = createRangeBlobDataFromUpdate(
update('pathname', this.text, {
comments: [
{
op: { c: 'quick', p: 4, t: 'comment-1', resolved: false },
},
],
})
)
expect(result).to.deep.equal({
comments: [
{
id: 'comment-1',
ranges: [{ pos: 4, length: 5 }],
},
],
trackedChanges: [],
})
})
it('should merge comments ranges into a single comment by id', function () {
const result = createRangeBlobDataFromUpdate(
update('pathname', this.text, {
comments: [
{
op: { c: 'quick', p: 4, t: 'comment-1', resolved: false },
},
{
op: { c: 'jumps', p: 20, t: 'comment-1', resolved: false },
},
],
})
)
expect(result).to.deep.equal({
comments: [
{
id: 'comment-1',
ranges: [
{ pos: 4, length: 5 },
{ pos: 20, length: 5 },
],
},
],
trackedChanges: [],
})
})
it('should not merge ranges into a single comment if id differs', function () {
const result = createRangeBlobDataFromUpdate(
update('pathname', this.text, {
comments: [
{
op: { c: 'quick', p: 4, t: 'comment-1', resolved: false },
},
{
op: { c: 'jumps', p: 20, t: 'comment-2', resolved: false },
},
],
})
)
expect(result).to.deep.equal({
comments: [
{
id: 'comment-1',
ranges: [{ pos: 4, length: 5 }],
},
{
id: 'comment-2',
ranges: [{ pos: 20, length: 5 }],
},
],
trackedChanges: [],
})
})
})
describe('for update with ranges object with only tracked insertions', function () {
it('should translate into history tracked insertions', function () {
const result = createRangeBlobDataFromUpdate(
update('pathname', this.text, {
changes: [
{
op: { p: 4, i: 'quick' },
metadata: {
ts: '2024-01-01T00:00:00.000Z',
user_id: 'user-1',
},
},
{
op: { p: 10, i: 'brown' },
metadata: {
ts: '2023-01-01T00:00:00.000Z',
user_id: 'user-2',
},
},
],
})
)
expect(result).to.deep.equal({
comments: [],
trackedChanges: [
{
range: { pos: 4, length: 5 },
tracking: {
type: 'insert',
userId: 'user-1',
ts: '2024-01-01T00:00:00.000Z',
},
},
{
range: { pos: 10, length: 5 },
tracking: {
type: 'insert',
userId: 'user-2',
ts: '2023-01-01T00:00:00.000Z',
},
},
],
})
})
})
describe('for update with ranges object with mixed tracked changes', function () {
describe('with tracked deletions before insertions', function () {
it('should insert tracked deletions before insertions', function () {
const text = 'the quickrapid brown fox jumps over the lazy dog'
const result = createRangeBlobDataFromUpdate(
update('pathname', text, {
changes: [
{
op: { p: 4, d: 'quick' },
metadata: {
ts: '2024-01-01T00:00:00.000Z',
user_id: 'user-1',
},
},
{
op: { p: 4, hpos: 9, i: 'rapid' },
metadata: {
ts: '2023-01-01T00:00:00.000Z',
user_id: 'user-2',
},
},
],
})
)
expect(result).to.deep.equal({
comments: [],
trackedChanges: [
{
range: { pos: 4, length: 5 },
tracking: {
type: 'delete',
userId: 'user-1',
ts: '2024-01-01T00:00:00.000Z',
},
},
{
range: { pos: 9, length: 5 },
tracking: {
type: 'insert',
userId: 'user-2',
ts: '2023-01-01T00:00:00.000Z',
},
},
],
})
})
})
describe('with tracked insertions before deletions', function () {
it('should insert tracked deletions before insertions', function () {
const text = 'the quickrapid brown fox jumps over the lazy dog'
const result = createRangeBlobDataFromUpdate(
update('pathname', text, {
changes: [
{
op: { p: 4, hpos: 9, i: 'rapid' },
metadata: {
ts: '2023-01-01T00:00:00.000Z',
user_id: 'user-2',
},
},
{
op: { p: 4, d: 'quick' },
metadata: {
ts: '2024-01-01T00:00:00.000Z',
user_id: 'user-1',
},
},
],
})
)
expect(result).to.deep.equal({
comments: [],
trackedChanges: [
{
range: { pos: 4, length: 5 },
tracking: {
type: 'delete',
userId: 'user-1',
ts: '2024-01-01T00:00:00.000Z',
},
},
{
range: { pos: 9, length: 5 },
tracking: {
type: 'insert',
userId: 'user-2',
ts: '2023-01-01T00:00:00.000Z',
},
},
],
})
})
})
it('should adjust positions', function () {
const text = 'the quick brown fox jumps over the lazy dog'
const result = createRangeBlobDataFromUpdate(
update('pathname', text, {
changes: [
{
op: { p: 4, i: 'quick' },
metadata: {
ts: '2024-01-01T00:00:00.000Z',
user_id: 'user-1',
},
},
{
op: { p: 10, d: 'brown' },
metadata: {
ts: '2023-01-01T00:00:00.000Z',
user_id: 'user-2',
},
},
{
op: { p: 30, hpos: 35, i: 'lazy' },
metadata: {
ts: '2022-01-01T00:00:00.000Z',
user_id: 'user-2',
},
},
],
})
)
expect(result).to.deep.equal({
comments: [],
trackedChanges: [
{
range: { pos: 4, length: 5 },
tracking: {
type: 'insert',
userId: 'user-1',
ts: '2024-01-01T00:00:00.000Z',
},
},
{
range: { pos: 10, length: 5 },
tracking: {
type: 'delete',
userId: 'user-2',
ts: '2023-01-01T00:00:00.000Z',
},
},
{
range: { pos: 35, length: 4 },
tracking: {
type: 'insert',
userId: 'user-2',
ts: '2022-01-01T00:00:00.000Z',
},
},
],
})
})
})
describe('for update with ranges object with mixed tracked changes and comments', function () {
it('should adjust positions', function () {
const text = 'the quick brown fox jumps over the lazy dog'
const result = createRangeBlobDataFromUpdate(
update('pathname', text, {
comments: [
{
op: { c: 'quick', p: 4, t: 'comment-1', resolved: false },
},
{
op: {
c: 'fox',
p: 11,
hpos: 16,
t: 'comment-2',
resolved: false,
},
},
],
changes: [
{
op: { p: 4, i: 'quick' },
metadata: {
ts: '2024-01-01T00:00:00.000Z',
user_id: 'user-1',
},
},
{
op: { p: 10, d: 'brown' },
metadata: {
ts: '2023-01-01T00:00:00.000Z',
user_id: 'user-2',
},
},
{
op: { p: 30, hpos: 35, i: 'lazy' },
metadata: {
ts: '2022-01-01T00:00:00.000Z',
user_id: 'user-2',
},
},
],
})
)
expect(result).to.deep.equal({
comments: [
{
ranges: [{ pos: 4, length: 5 }],
id: 'comment-1',
},
{
ranges: [{ pos: 16, length: 3 }],
id: 'comment-2',
},
],
trackedChanges: [
{
range: { pos: 4, length: 5 },
tracking: {
type: 'insert',
userId: 'user-1',
ts: '2024-01-01T00:00:00.000Z',
},
},
{
range: { pos: 10, length: 5 },
tracking: {
type: 'delete',
userId: 'user-2',
ts: '2023-01-01T00:00:00.000Z',
},
},
{
range: { pos: 35, length: 4 },
tracking: {
type: 'insert',
userId: 'user-2',
ts: '2022-01-01T00:00:00.000Z',
},
},
],
})
})
it('should adjust comment length', function () {
const text = 'the quick brown fox jumps over the lazy dog'
const result = createRangeBlobDataFromUpdate(
update('pathname', text, {
comments: [
{
op: { c: 'quick fox', p: 4, t: 'comment-1', resolved: false },
},
],
changes: [
{
op: { p: 10, d: 'brown ' },
metadata: {
ts: '2023-01-01T00:00:00.000Z',
user_id: 'user-2',
},
},
],
})
)
expect(result).to.deep.equal({
comments: [
{
ranges: [{ pos: 4, length: 9 }],
id: 'comment-1',
},
],
trackedChanges: [
{
range: { pos: 10, length: 6 },
tracking: {
type: 'delete',
userId: 'user-2',
ts: '2023-01-01T00:00:00.000Z',
},
},
],
})
})
})
})
})

View File

@@ -0,0 +1,727 @@
import sinon from 'sinon'
import { expect } from 'chai'
import { strict as esmock } from 'esmock'
import EventEmitter from 'node:events'
import { RequestFailedError } from '@overleaf/fetch-utils'
import * as Errors from '../../../../app/js/Errors.js'
const MODULE_PATH = '../../../../app/js/HistoryStoreManager.js'
describe('HistoryStoreManager', function () {
beforeEach(async function () {
this.projectId = '123456789012345678901234'
this.historyId = 'mock-ol-project-id'
this.settings = {
overleaf: {
history: {
host: 'http://example.com',
user: 'overleaf',
pass: 'password',
requestTimeout: 123,
},
},
apis: {
filestore: {
enabled: true,
url: 'http://filestore.overleaf.production',
},
},
}
this.latestChunkRequestArgs = sinon.match({
method: 'GET',
url: `${this.settings.overleaf.history.host}/projects/${this.historyId}/latest/history`,
json: true,
auth: {
user: this.settings.overleaf.history.user,
pass: this.settings.overleaf.history.pass,
sendImmediately: true,
},
})
this.callback = sinon.stub()
this.LocalFileWriter = {
bufferOnDisk: sinon.stub(),
}
this.WebApiManager = {
getHistoryId: sinon.stub(),
}
this.WebApiManager.getHistoryId
.withArgs(this.projectId)
.yields(null, this.historyId)
this.FetchUtils = {
fetchStream: sinon.stub(),
fetchNothing: sinon.stub().resolves(),
RequestFailedError,
}
this.request = sinon.stub()
this.logger = {
debug: sinon.stub(),
warn: sinon.stub(),
}
this.HistoryStoreManager = await esmock(MODULE_PATH, {
'@overleaf/fetch-utils': this.FetchUtils,
request: this.request,
'@overleaf/settings': this.settings,
'../../../../app/js/LocalFileWriter.js': this.LocalFileWriter,
'../../../../app/js/WebApiManager.js': this.WebApiManager,
'../../../../app/js/Errors.js': Errors,
'@overleaf/logger': this.logger,
})
})
describe('getMostRecentChunk', function () {
describe('successfully', function () {
beforeEach(function () {
this.chunk = {
chunk: {
startVersion: 0,
history: {
snapshot: {
files: {},
},
changes: [],
},
},
}
this.request
.withArgs(this.latestChunkRequestArgs)
.yields(null, { statusCode: 200 }, this.chunk)
this.HistoryStoreManager.getMostRecentChunk(
this.projectId,
this.historyId,
this.callback
)
})
it('should call the callback with the chunk', function () {
expect(this.callback).to.have.been.calledWith(null, this.chunk)
})
})
})
describe('getMostRecentVersion', function () {
describe('successfully', function () {
beforeEach(function () {
this.chunk = {
chunk: {
startVersion: 5,
history: {
snapshot: {
files: {},
},
changes: [
{ v2Authors: ['5678'], timestamp: '2017-10-17T10:44:40.227Z' },
{ v2Authors: ['1234'], timestamp: '2017-10-16T10:44:40.227Z' },
],
},
},
}
this.request
.withArgs(this.latestChunkRequestArgs)
.yields(null, { statusCode: 200 }, this.chunk)
this.HistoryStoreManager.getMostRecentVersion(
this.projectId,
this.historyId,
this.callback
)
})
it('should call the callback with the latest version information', function () {
expect(this.callback).to.have.been.calledWith(
null,
7,
{ project: undefined, docs: {} },
{ v2Authors: ['5678'], timestamp: '2017-10-17T10:44:40.227Z' }
)
})
})
describe('out of order doc ops', function () {
beforeEach(function () {
this.chunk = {
chunk: {
startVersion: 5,
history: {
snapshot: {
v2DocVersions: {
mock_doc_id: {
pathname: '/main.tex',
v: 2,
},
},
},
changes: [
{
operations: [],
v2DocVersions: {
mock_doc_id: {
pathname: '/main.tex',
v: 1,
},
},
},
],
},
},
}
this.request
.withArgs(this.latestChunkRequestArgs)
.yields(null, { statusCode: 200 }, this.chunk)
this.HistoryStoreManager.getMostRecentVersion(
this.projectId,
this.historyId,
this.callback
)
})
it('should return an error', function () {
expect(this.callback).to.have.been.calledWith(
sinon.match
.instanceOf(Errors.OpsOutOfOrderError)
.and(sinon.match.has('message', 'doc version out of order'))
)
})
it('should call the callback with the latest version information', function () {
expect(this.callback).to.have.been.calledWith(
sinon.match.instanceOf(Errors.OpsOutOfOrderError),
6,
{
project: undefined,
docs: { mock_doc_id: { pathname: '/main.tex', v: 2 } },
},
this.chunk.chunk.history.changes[0]
)
})
})
describe('out of order project structure versions', function () {
beforeEach(function () {
this.chunk = {
chunk: {
startVersion: 5,
history: {
snapshot: {
projectVersion: 2,
},
changes: [
{
operations: [{ pathname: 'main.tex', newPathname: '' }],
projectVersion: 1,
},
],
},
},
}
this.request
.withArgs(this.latestChunkRequestArgs)
.yields(null, { statusCode: 200 }, this.chunk)
this.HistoryStoreManager.getMostRecentVersion(
this.projectId,
this.historyId,
this.callback
)
})
it('should return an error', function () {
expect(this.callback).to.have.been.calledWith(
sinon.match
.instanceOf(Errors.OpsOutOfOrderError)
.and(
sinon.match.has(
'message',
'project structure version out of order'
)
)
)
})
it('should call the callback with the latest version information', function () {
expect(this.callback).to.have.been.calledWith(
sinon.match.instanceOf(Errors.OpsOutOfOrderError),
6,
{ project: 2, docs: {} },
this.chunk.chunk.history.changes[0]
)
})
})
describe('out of order project structure and doc versions', function () {
beforeEach(function () {
this.chunk = {
chunk: {
startVersion: 5,
history: {
snapshot: {
projectVersion: 1,
},
changes: [
{
operations: [{ pathname: 'main.tex', newPathname: '' }],
projectVersion: 1,
},
{
operations: [{ pathname: 'main.tex', newPathname: '' }],
projectVersion: 2,
},
{
operations: [{ pathname: 'main.tex', newPathname: '' }],
projectVersion: 3,
},
{
operations: [{ pathname: 'main.tex', newPathname: '' }],
projectVersion: 1,
},
{
operations: [],
v2DocVersions: {
mock_doc_id: {
pathname: '/main.tex',
v: 1,
},
},
},
{
operations: [],
v2DocVersions: {
mock_doc_id: {
pathname: '/main.tex',
v: 2,
},
},
},
{
operations: [],
v2DocVersions: {
mock_doc_id: {
pathname: '/main.tex',
v: 1,
},
},
},
],
},
},
}
this.request
.withArgs(this.latestChunkRequestArgs)
.yields(null, { statusCode: 200 }, this.chunk)
this.HistoryStoreManager.getMostRecentVersion(
this.projectId,
this.historyId,
this.callback
)
})
it('should return an error', function () {
expect(this.callback).to.have.been.calledWith(
sinon.match
.instanceOf(Errors.OpsOutOfOrderError)
.and(
sinon.match.has(
'message',
'project structure version out of order'
)
)
)
})
it('should call the callback with the latest version information', function () {
expect(this.callback).to.have.been.calledWith(
sinon.match.instanceOf(Errors.OpsOutOfOrderError),
12,
{
project: 3,
docs: { mock_doc_id: { pathname: '/main.tex', v: 2 } },
},
this.chunk.chunk.history.changes[6]
)
})
})
describe('with an unexpected response', function () {
beforeEach(function () {
this.badChunk = {
chunk: {
foo: 123, // valid chunk should have startVersion property
bar: 456,
},
}
this.request
.withArgs(this.latestChunkRequestArgs)
.yields(null, { statusCode: 200 }, this.badChunk)
this.HistoryStoreManager.getMostRecentVersion(
this.projectId,
this.historyId,
this.callback
)
})
it('should return an error', function () {
expect(this.callback).to.have.been.calledWith(
sinon.match
.instanceOf(Error)
.and(sinon.match.has('message', 'unexpected response'))
)
})
})
})
describe('createBlobForUpdate', function () {
beforeEach(function () {
this.fileStream = {}
this.hash = 'random-hash'
this.LocalFileWriter.bufferOnDisk.callsArgWith(4, null, this.hash)
this.FetchUtils.fetchNothing.rejects(
new RequestFailedError('', {}, { status: 404 })
)
this.FetchUtils.fetchStream.resolves(this.fileStream)
})
describe('for a file update with any filestore location', function () {
beforeEach(function (done) {
this.file_id = '012345678901234567890123'
this.update = {
file: true,
url: `http://filestore.other.cloud.provider/project/${this.projectId}/file/${this.file_id}`,
hash: this.hash,
}
this.HistoryStoreManager.createBlobForUpdate(
this.projectId,
this.historyId,
this.update,
(err, { file: hash }) => {
if (err) {
return done(err)
}
this.actualHash = hash
done()
}
)
})
it('should not log any warnings', function () {
expect(this.logger.warn).to.not.have.been.called
})
it('should request the file from the filestore in settings', function () {
expect(this.FetchUtils.fetchStream).to.have.been.calledWithMatch(
`${this.settings.apis.filestore.url}/project/${this.projectId}/file/${this.file_id}`
)
})
it('should call the callback with the blob', function () {
expect(this.actualHash).to.equal(this.hash)
})
})
describe('with filestore disabled', function () {
beforeEach(function (done) {
this.settings.apis.filestore.enabled = false
this.file_id = '012345678901234567890123'
this.update = {
file: true,
url: `http://filestore.other.cloud.provider/project/${this.projectId}/file/${this.file_id}`,
hash: this.hash,
}
this.HistoryStoreManager.createBlobForUpdate(
this.projectId,
this.historyId,
this.update,
err => {
expect(err).to.match(/blocking filestore read/)
done()
}
)
})
it('should not request the file', function () {
expect(this.FetchUtils.fetchStream).to.not.have.been.called
})
})
describe('for a file update with an invalid filestore location', function () {
beforeEach(function (done) {
this.invalid_id = '000000000000000000000000'
this.file_id = '012345678901234567890123'
this.update = {
file: true,
url: `http://filestore.other.cloud.provider/project/${this.invalid_id}/file/${this.file_id}`,
hash: this.hash,
}
this.HistoryStoreManager.createBlobForUpdate(
this.projectId,
this.historyId,
this.update,
err => {
expect(err).to.exist
done()
}
)
})
it('should not request the file from the filestore', function () {
expect(this.FetchUtils.fetchStream).to.not.have.been.called
})
})
describe('when the hash mismatches', function () {
beforeEach(function (done) {
this.file_id = '012345678901234567890123'
this.update = {
file: true,
url: `http://filestore.other.cloud.provider/project/${this.projectId}/file/${this.file_id}`,
hash: 'another-hash-from-web',
}
this.HistoryStoreManager.createBlobForUpdate(
this.projectId,
this.historyId,
this.update,
(err, { file: hash }) => {
if (err) {
return done(err)
}
this.actualHash = hash
done()
}
)
})
it('should log a warning', function () {
expect(this.logger.warn).to.have.been.calledWith(
{
projectId: this.projectId,
fileId: this.file_id,
webHash: 'another-hash-from-web',
fileHash: this.hash,
},
'hash mismatch between web and project-history'
)
})
it('should request the file from the filestore in settings', function () {
expect(this.FetchUtils.fetchStream).to.have.been.calledWithMatch(
`${this.settings.apis.filestore.url}/project/${this.projectId}/file/${this.file_id}`
)
})
it('should call the callback with the blob', function () {
expect(this.actualHash).to.equal(this.hash)
})
})
describe('when the createdBlob flag is set on the update', function () {
beforeEach(function () {
this.file_id = '012345678901234567890123'
this.update = {
file: true,
createdBlob: true,
url: `http://filestore.other.cloud.provider/project/${this.projectId}/file/${this.file_id}`,
hash: this.hash,
}
})
describe('when history-v1 confirms that the blob exists', function () {
beforeEach(function (done) {
this.FetchUtils.fetchNothing.resolves()
this.HistoryStoreManager.createBlobForUpdate(
this.projectId,
this.historyId,
this.update,
(err, { file: hash }) => {
if (err) {
return done(err)
}
this.actualHash = hash
done()
}
)
})
it('should call the callback with the existing hash', function () {
expect(this.actualHash).to.equal(this.hash)
})
it('should not request the file from the filestore', function () {
expect(this.FetchUtils.fetchStream).to.not.have.been.called
})
it('should log a debug level message', function () {
expect(this.logger.debug).to.have.been.calledWith(
{
projectId: this.projectId,
fileId: this.file_id,
update: this.update,
},
'Skipping blob creation as it has already been created'
)
})
})
describe('when history-v1 does not confirm that the blob exists', function () {
beforeEach(function (done) {
this.FetchUtils.fetchNothing.rejects(
new RequestFailedError(
`${this.settings.overleaf.history.host}/project/${this.projectId}/file/${this.file_id}`,
{ method: 'HEAD' },
{ status: 404 }
)
)
this.HistoryStoreManager.createBlobForUpdate(
this.projectId,
this.historyId,
this.update,
(err, { file: hash }) => {
if (err) {
return done(err)
}
this.actualHash = hash
done()
}
)
})
it('should warn that we will use the filestore', function () {
expect(this.logger.warn).to.have.been.calledWithMatch(
{
fileId: this.file_id,
projectId: this.projectId,
update: this.update,
},
'created blob does not exist, reading from filestore'
)
})
it('should request the file from the filestore in settings', function () {
expect(this.FetchUtils.fetchStream).to.have.been.calledWithMatch(
`${this.settings.apis.filestore.url}/project/${this.projectId}/file/${this.file_id}`
)
})
it('should call the callback with the blob', function () {
expect(this.actualHash).to.equal(this.hash)
})
})
})
})
describe('getProjectBlob', function () {
describe('successfully', function () {
beforeEach(function () {
this.blobContent = 'test content'
this.blobHash = 'test hash'
this.request.yields(null, { statusCode: 200 }, this.blobContent)
this.HistoryStoreManager.getProjectBlob(
this.historyId,
this.blobHash,
this.callback
)
})
it('should get the blob from the overleaf history service', function () {
expect(this.request).to.have.been.calledWithMatch({
method: 'GET',
url: `${this.settings.overleaf.history.host}/projects/${this.historyId}/blobs/${this.blobHash}`,
auth: {
user: this.settings.overleaf.history.user,
pass: this.settings.overleaf.history.pass,
sendImmediately: true,
},
})
})
it('should call the callback with the blob', function () {
expect(this.callback).to.have.been.calledWith(null, this.blobContent)
})
})
})
describe('getProjectBlobStream', function () {
describe('successfully', function () {
beforeEach(function (done) {
this.historyResponse = new EventEmitter()
this.blobHash = 'test hash'
this.FetchUtils.fetchStream.resolves(this.historyResponse)
this.HistoryStoreManager.getProjectBlobStream(
this.historyId,
this.blobHash,
(err, stream) => {
if (err) {
return done(err)
}
this.stream = stream
done()
}
)
})
it('should get the blob from the overleaf history service', function () {
expect(this.FetchUtils.fetchStream).to.have.been.calledWithMatch(
`${this.settings.overleaf.history.host}/projects/${this.historyId}/blobs/${this.blobHash}`
)
})
it('should return a stream of the blob contents', function () {
expect(this.stream).to.equal(this.historyResponse)
})
})
})
describe('initializeProject', function () {
describe('successfully', function () {
beforeEach(function () {
this.response_body = { projectId: this.historyId }
this.request.callsArgWith(
1,
null,
{ statusCode: 200 },
this.response_body
)
this.HistoryStoreManager.initializeProject(
this.historyId,
this.callback
)
})
it('should send the change to the history store', function () {
expect(this.request).to.have.been.calledWithMatch({
method: 'POST',
url: `${this.settings.overleaf.history.host}/projects`,
auth: {
user: this.settings.overleaf.history.user,
pass: this.settings.overleaf.history.pass,
sendImmediately: true,
},
json: { projectId: this.historyId },
})
})
it('should call the callback with the new overleaf id', function () {
expect(this.callback).to.have.been.calledWith(null, this.historyId)
})
})
})
describe('deleteProject', function () {
beforeEach(function (done) {
this.request.yields(null, { statusCode: 204 }, '')
this.HistoryStoreManager.deleteProject(this.historyId, done)
})
it('should ask the history store to delete the project', function () {
expect(this.request).to.have.been.calledWithMatch({
method: 'DELETE',
url: `${this.settings.overleaf.history.host}/projects/${this.historyId}`,
})
})
})
})

View File

@@ -0,0 +1,573 @@
import sinon from 'sinon'
import { strict as esmock } from 'esmock'
import mongodb from 'mongodb-legacy'
const { ObjectId } = mongodb
const MODULE_PATH = '../../../../app/js/HttpController.js'
describe('HttpController', function () {
beforeEach(async function () {
this.UpdatesProcessor = {
processUpdatesForProject: sinon.stub().yields(),
}
this.SummarizedUpdatesManager = {
getSummarizedProjectUpdates: sinon.stub(),
}
this.DiffManager = {
getDiff: sinon.stub(),
}
this.HistoryStoreManager = {
deleteProject: sinon.stub().yields(),
getMostRecentVersion: sinon.stub(),
getProjectBlobStream: sinon.stub(),
initializeProject: sinon.stub(),
}
this.SnapshotManager = {
getFileSnapshotStream: sinon.stub(),
getProjectSnapshot: sinon.stub(),
}
this.HealthChecker = {}
this.SyncManager = {
clearResyncState: sinon.stub().yields(),
startResync: sinon.stub().yields(),
}
this.WebApiManager = {
getHistoryId: sinon.stub(),
}
this.RedisManager = {
destroyDocUpdatesQueue: sinon.stub().yields(),
clearFirstOpTimestamp: sinon.stub().yields(),
clearCachedHistoryId: sinon.stub().yields(),
}
this.ErrorRecorder = {
clearError: sinon.stub().yields(),
}
this.LabelsManager = {
createLabel: sinon.stub(),
deleteLabel: sinon.stub().yields(),
deleteLabelForUser: sinon.stub().yields(),
getLabels: sinon.stub(),
}
this.HistoryApiManager = {
shouldUseProjectHistory: sinon.stub(),
}
this.RetryManager = {}
this.FlushManager = {}
this.request = {}
this.pipeline = sinon.stub()
this.HttpController = await esmock(MODULE_PATH, {
request: this.request,
stream: { pipeline: this.pipeline },
'../../../../app/js/UpdatesProcessor.js': this.UpdatesProcessor,
'../../../../app/js/SummarizedUpdatesManager.js':
this.SummarizedUpdatesManager,
'../../../../app/js/DiffManager.js': this.DiffManager,
'../../../../app/js/HistoryStoreManager.js': this.HistoryStoreManager,
'../../../../app/js/SnapshotManager.js': this.SnapshotManager,
'../../../../app/js/HealthChecker.js': this.HealthChecker,
'../../../../app/js/SyncManager.js': this.SyncManager,
'../../../../app/js/WebApiManager.js': this.WebApiManager,
'../../../../app/js/RedisManager.js': this.RedisManager,
'../../../../app/js/ErrorRecorder.js': this.ErrorRecorder,
'../../../../app/js/LabelsManager.js': this.LabelsManager,
'../../../../app/js/HistoryApiManager.js': this.HistoryApiManager,
'../../../../app/js/RetryManager.js': this.RetryManager,
'../../../../app/js/FlushManager.js': this.FlushManager,
})
this.pathname = 'doc-id-123'
this.projectId = new ObjectId().toString()
this.projectOwnerId = new ObjectId().toString()
this.next = sinon.stub()
this.userId = new ObjectId().toString()
this.now = Date.now()
this.res = {
json: sinon.stub(),
send: sinon.stub(),
sendStatus: sinon.stub(),
setHeader: sinon.stub(),
}
})
describe('getProjectBlob', function () {
beforeEach(function () {
this.blobHash = 'abcd'
this.stream = {}
this.historyId = 1337
this.HistoryStoreManager.getProjectBlobStream.yields(null, this.stream)
this.HttpController.getProjectBlob(
{ params: { history_id: this.historyId, hash: this.blobHash } },
this.res,
this.next
)
})
it('should get a blob stream', function () {
this.HistoryStoreManager.getProjectBlobStream
.calledWith(this.historyId, this.blobHash)
.should.equal(true)
this.pipeline.should.have.been.calledWith(this.stream, this.res)
})
it('should set caching header', function () {
this.res.setHeader.should.have.been.calledWith(
'Cache-Control',
'private, max-age=86400'
)
})
})
describe('initializeProject', function () {
beforeEach(function () {
this.historyId = new ObjectId().toString()
this.req = { body: { historyId: this.historyId } }
this.HistoryStoreManager.initializeProject.yields(null, this.historyId)
this.HttpController.initializeProject(this.req, this.res, this.next)
})
it('should initialize the project', function () {
this.HistoryStoreManager.initializeProject.calledWith().should.equal(true)
})
it('should return the new overleaf id', function () {
this.res.json
.calledWith({ project: { id: this.historyId } })
.should.equal(true)
})
})
describe('flushProject', function () {
beforeEach(function () {
this.req = {
params: {
project_id: this.projectId,
},
query: {},
}
this.HttpController.flushProject(this.req, this.res, this.next)
})
it('should process the updates', function () {
this.UpdatesProcessor.processUpdatesForProject
.calledWith(this.projectId)
.should.equal(true)
})
it('should return a success code', function () {
this.res.sendStatus.calledWith(204).should.equal(true)
})
})
describe('getDiff', function () {
beforeEach(function () {
this.from = 42
this.to = 45
this.req = {
params: {
project_id: this.projectId,
},
query: {
pathname: this.pathname,
from: this.from,
to: this.to,
},
}
this.diff = [{ u: 'mock-diff' }]
this.DiffManager.getDiff.yields(null, this.diff)
this.HttpController.getDiff(this.req, this.res, this.next)
})
it('should get the diff', function () {
this.DiffManager.getDiff.should.have.been.calledWith(
this.projectId,
this.pathname,
this.from,
this.to
)
})
it('should return the diff', function () {
this.res.json.calledWith({ diff: this.diff }).should.equal(true)
})
})
describe('getUpdates', function () {
beforeEach(function () {
this.before = Date.now()
this.nextBeforeTimestamp = this.before - 100
this.min_count = 10
this.req = {
params: {
project_id: this.projectId,
},
query: {
before: this.before,
min_count: this.min_count,
},
}
this.updates = [{ i: 'mock-summarized-updates', p: 10 }]
this.SummarizedUpdatesManager.getSummarizedProjectUpdates.yields(
null,
this.updates,
this.nextBeforeTimestamp
)
this.HttpController.getUpdates(this.req, this.res, this.next)
})
it('should get the updates', function () {
this.SummarizedUpdatesManager.getSummarizedProjectUpdates.should.have.been.calledWith(
this.projectId,
{
before: this.before,
min_count: this.min_count,
}
)
})
it('should return the formatted updates', function () {
this.res.json.should.have.been.calledWith({
updates: this.updates,
nextBeforeTimestamp: this.nextBeforeTimestamp,
})
})
})
describe('latestVersion', function () {
beforeEach(function () {
this.historyId = 1234
this.req = {
params: {
project_id: this.projectId,
},
}
this.version = 99
this.lastChange = {
v2Authors: ['1234'],
timestamp: '2016-08-16T10:44:40.227Z',
}
this.versionInfo = {
version: this.version,
v2Authors: ['1234'],
timestamp: '2016-08-16T10:44:40.227Z',
}
this.WebApiManager.getHistoryId.yields(null, this.historyId)
this.HistoryStoreManager.getMostRecentVersion.yields(
null,
this.version,
{},
this.lastChange
)
this.HttpController.latestVersion(this.req, this.res, this.next)
})
it('should process the updates', function () {
this.UpdatesProcessor.processUpdatesForProject
.calledWith(this.projectId)
.should.equal(true)
})
it('should get the ol project id', function () {
this.WebApiManager.getHistoryId
.calledWith(this.projectId)
.should.equal(true)
})
it('should get the latest version', function () {
this.HistoryStoreManager.getMostRecentVersion
.calledWith(this.projectId, this.historyId)
.should.equal(true)
})
it('should return version number', function () {
this.res.json.calledWith(this.versionInfo).should.equal(true)
})
})
describe('resyncProject', function () {
beforeEach(function () {
this.req = {
params: {
project_id: this.projectId,
},
query: {},
body: {},
}
this.HttpController.resyncProject(this.req, this.res, this.next)
})
it('should resync the project', function () {
this.SyncManager.startResync.calledWith(this.projectId).should.equal(true)
})
it('should flush the queue', function () {
this.UpdatesProcessor.processUpdatesForProject
.calledWith(this.projectId)
.should.equal(true)
})
it('should return 204', function () {
this.res.sendStatus.calledWith(204).should.equal(true)
})
})
describe('getFileSnapshot', function () {
beforeEach(function () {
this.version = 42
this.pathname = 'foo.tex'
this.req = {
params: {
project_id: this.projectId,
version: this.version,
pathname: this.pathname,
},
}
this.res = { mock: 'res' }
this.stream = {}
this.SnapshotManager.getFileSnapshotStream.yields(null, this.stream)
this.HttpController.getFileSnapshot(this.req, this.res, this.next)
})
it('should get the snapshot', function () {
this.SnapshotManager.getFileSnapshotStream.should.have.been.calledWith(
this.projectId,
this.version,
this.pathname
)
})
it('should pipe the returned stream into the response', function () {
this.pipeline.should.have.been.calledWith(this.stream, this.res)
})
})
describe('getProjectSnapshot', function () {
beforeEach(function () {
this.version = 42
this.req = {
params: {
project_id: this.projectId,
version: this.version,
},
}
this.res = { json: sinon.stub() }
this.snapshotData = { one: 1 }
this.SnapshotManager.getProjectSnapshot.yields(null, this.snapshotData)
this.HttpController.getProjectSnapshot(this.req, this.res, this.next)
})
it('should get the snapshot', function () {
this.SnapshotManager.getProjectSnapshot.should.have.been.calledWith(
this.projectId,
this.version
)
})
it('should send json response', function () {
this.res.json.calledWith(this.snapshotData).should.equal(true)
})
})
describe('getLabels', function () {
beforeEach(function () {
this.req = {
params: {
project_id: this.projectId,
},
}
this.labels = ['label-1', 'label-2']
this.LabelsManager.getLabels.yields(null, this.labels)
})
describe('project history is enabled', function () {
beforeEach(function () {
this.HistoryApiManager.shouldUseProjectHistory.yields(null, true)
this.HttpController.getLabels(this.req, this.res, this.next)
})
it('should get the labels for a project', function () {
this.LabelsManager.getLabels
.calledWith(this.projectId)
.should.equal(true)
})
it('should return the labels', function () {
this.res.json.calledWith(this.labels).should.equal(true)
})
})
describe('project history is not enabled', function () {
beforeEach(function () {
this.HistoryApiManager.shouldUseProjectHistory.yields(null, false)
this.HttpController.getLabels(this.req, this.res, this.next)
})
it('should return 409', function () {
this.res.sendStatus.calledWith(409).should.equal(true)
})
})
})
describe('createLabel', function () {
beforeEach(function () {
this.req = {
params: {
project_id: this.projectId,
},
body: {
version: (this.version = 'label-1'),
comment: (this.comment = 'a comment'),
created_at: (this.created_at = Date.now().toString()),
validate_exists: true,
user_id: this.userId,
},
}
this.label = { _id: new ObjectId() }
this.LabelsManager.createLabel.yields(null, this.label)
})
describe('project history is enabled', function () {
beforeEach(function () {
this.HistoryApiManager.shouldUseProjectHistory.yields(null, true)
this.HttpController.createLabel(this.req, this.res, this.next)
})
it('should create a label for a project', function () {
this.LabelsManager.createLabel.should.have.been.calledWith(
this.projectId,
this.userId,
this.version,
this.comment,
this.created_at,
true
)
})
it('should return the label', function () {
this.res.json.calledWith(this.label).should.equal(true)
})
})
describe('validate_exists = false is passed', function () {
beforeEach(function () {
this.req.body.validate_exists = false
this.HistoryApiManager.shouldUseProjectHistory.yields(null, true)
this.HttpController.createLabel(this.req, this.res, this.next)
})
it('should create a label for a project', function () {
this.LabelsManager.createLabel
.calledWith(
this.projectId,
this.userId,
this.version,
this.comment,
this.created_at,
false
)
.should.equal(true)
})
it('should return the label', function () {
this.res.json.calledWith(this.label).should.equal(true)
})
})
describe('project history is not enabled', function () {
beforeEach(function () {
this.HistoryApiManager.shouldUseProjectHistory.yields(null, false)
this.HttpController.createLabel(this.req, this.res, this.next)
})
it('should return 409', function () {
this.res.sendStatus.calledWith(409).should.equal(true)
})
})
})
describe('deleteLabelForUser', function () {
beforeEach(function () {
this.req = {
params: {
project_id: this.projectId,
user_id: this.userId,
label_id: (this.label_id = new ObjectId()),
},
}
this.HttpController.deleteLabelForUser(this.req, this.res, this.next)
})
it('should delete a label for a project', function () {
this.LabelsManager.deleteLabelForUser
.calledWith(this.projectId, this.userId, this.label_id)
.should.equal(true)
})
it('should return 204', function () {
this.res.sendStatus.calledWith(204).should.equal(true)
})
})
describe('deleteLabel', function () {
beforeEach(function () {
this.req = {
params: {
project_id: this.projectId,
label_id: (this.label_id = new ObjectId()),
},
}
this.HttpController.deleteLabel(this.req, this.res, this.next)
})
it('should delete a label for a project', function () {
this.LabelsManager.deleteLabel
.calledWith(this.projectId, this.label_id)
.should.equal(true)
})
it('should return 204', function () {
this.res.sendStatus.calledWith(204).should.equal(true)
})
})
describe('deleteProject', function () {
beforeEach(function () {
this.req = {
params: {
project_id: this.projectId,
},
}
this.WebApiManager.getHistoryId
.withArgs(this.projectId)
.yields(null, this.historyId)
this.HttpController.deleteProject(this.req, this.res, this.next)
})
it('should delete the updates queue', function () {
this.RedisManager.destroyDocUpdatesQueue.should.have.been.calledWith(
this.projectId
)
})
it('should clear the first op timestamp', function () {
this.RedisManager.clearFirstOpTimestamp.should.have.been.calledWith(
this.projectId
)
})
it('should clear the cached history id', function () {
this.RedisManager.clearCachedHistoryId.should.have.been.calledWith(
this.projectId
)
})
it('should clear the resync state', function () {
this.SyncManager.clearResyncState.should.have.been.calledWith(
this.projectId
)
})
it('should clear any failure record', function () {
this.ErrorRecorder.clearError.should.have.been.calledWith(this.projectId)
})
})
})

View File

@@ -0,0 +1,293 @@
import sinon from 'sinon'
import { expect } from 'chai'
import mongodb from 'mongodb-legacy'
import tk from 'timekeeper'
import { strict as esmock } from 'esmock'
const { ObjectId } = mongodb
const MODULE_PATH = '../../../../app/js/LabelsManager.js'
describe('LabelsManager', function () {
beforeEach(async function () {
this.now = new Date()
tk.freeze(this.now)
this.db = {
projectHistoryLabels: {
deleteOne: sinon.stub(),
find: sinon.stub(),
insertOne: sinon.stub(),
},
}
this.mongodb = {
ObjectId,
db: this.db,
}
this.HistoryStoreManager = {
getChunkAtVersion: sinon.stub().yields(),
}
this.UpdatesProcessor = {
processUpdatesForProject: sinon.stub().yields(),
}
this.WebApiManager = {
getHistoryId: sinon.stub(),
}
this.LabelsManager = await esmock(MODULE_PATH, {
'../../../../app/js/mongodb.js': this.mongodb,
'../../../../app/js/HistoryStoreManager.js': this.HistoryStoreManager,
'../../../../app/js/UpdatesProcessor.js': this.UpdatesProcessor,
'../../../../app/js/WebApiManager.js': this.WebApiManager,
})
this.project_id = new ObjectId().toString()
this.historyId = 123
this.user_id = new ObjectId().toString()
this.label_id = new ObjectId().toString()
this.callback = sinon.stub()
})
afterEach(function () {
tk.reset()
})
describe('getLabels', function () {
beforeEach(function () {
this.label = {
_id: new ObjectId(),
comment: 'some comment',
version: 123,
user_id: new ObjectId(),
created_at: new Date(),
}
this.db.projectHistoryLabels.find.returns({
toArray: sinon.stub().yields(null, [this.label]),
})
})
describe('with valid project id', function () {
beforeEach(function () {
this.LabelsManager.getLabels(this.project_id, this.callback)
})
it('gets the labels state from mongo', function () {
expect(this.db.projectHistoryLabels.find).to.have.been.calledWith({
project_id: new ObjectId(this.project_id),
})
})
it('returns formatted labels', function () {
expect(this.callback).to.have.been.calledWith(null, [
sinon.match({
id: this.label._id,
comment: this.label.comment,
version: this.label.version,
user_id: this.label.user_id,
created_at: this.label.created_at,
}),
])
})
})
describe('with invalid project id', function () {
it('returns an error', function (done) {
this.LabelsManager.getLabels('invalid id', error => {
expect(error).to.exist
done()
})
})
})
})
describe('createLabel', function () {
beforeEach(function () {
this.version = 123
this.comment = 'a comment'
this.WebApiManager.getHistoryId.yields(null, this.historyId)
})
describe('with createdAt', function () {
beforeEach(function () {
this.createdAt = new Date(1)
this.db.projectHistoryLabels.insertOne.yields(null, {
insertedId: new ObjectId(this.label_id),
})
this.LabelsManager.createLabel(
this.project_id,
this.user_id,
this.version,
this.comment,
this.createdAt,
true,
this.callback
)
})
it('flushes unprocessed updates', function () {
expect(
this.UpdatesProcessor.processUpdatesForProject
).to.have.been.calledWith(this.project_id)
})
it('finds the V1 project id', function () {
expect(this.WebApiManager.getHistoryId).to.have.been.calledWith(
this.project_id
)
})
it('checks there is a chunk for the project + version', function () {
expect(
this.HistoryStoreManager.getChunkAtVersion
).to.have.been.calledWith(this.project_id, this.historyId, this.version)
})
it('create the label in mongo', function () {
expect(this.db.projectHistoryLabels.insertOne).to.have.been.calledWith(
sinon.match({
project_id: new ObjectId(this.project_id),
comment: this.comment,
version: this.version,
user_id: new ObjectId(this.user_id),
created_at: this.createdAt,
}),
sinon.match.any
)
})
it('returns the label', function () {
expect(this.callback).to.have.been.calledWith(null, {
id: new ObjectId(this.label_id),
comment: this.comment,
version: this.version,
user_id: new ObjectId(this.user_id),
created_at: this.createdAt,
})
})
})
describe('without createdAt', function () {
beforeEach(function () {
this.db.projectHistoryLabels.insertOne.yields(null, {
insertedId: new ObjectId(this.label_id),
})
this.LabelsManager.createLabel(
this.project_id,
this.user_id,
this.version,
this.comment,
undefined,
true,
this.callback
)
})
it('create the label with the current date', function () {
expect(this.db.projectHistoryLabels.insertOne).to.have.been.calledWith(
sinon.match({
project_id: new ObjectId(this.project_id),
comment: this.comment,
version: this.version,
user_id: new ObjectId(this.user_id),
created_at: this.now,
})
)
})
})
describe('with shouldValidateExists = false', function () {
beforeEach(function () {
this.createdAt = new Date(1)
this.db.projectHistoryLabels.insertOne.yields(null, {
insertedId: new ObjectId(this.label_id),
})
this.LabelsManager.createLabel(
this.project_id,
this.user_id,
this.version,
this.comment,
this.createdAt,
false,
this.callback
)
})
it('checks there is a chunk for the project + version', function () {
expect(this.HistoryStoreManager.getChunkAtVersion).to.not.have.been
.called
})
})
describe('with no userId', function () {
beforeEach(function () {
this.db.projectHistoryLabels.insertOne.yields(null, {
insertedId: new ObjectId(this.label_id),
})
const userId = undefined
this.LabelsManager.createLabel(
this.project_id,
userId,
this.version,
this.comment,
this.createdAt,
false,
this.callback
)
})
it('creates the label without user_id', function () {
expect(this.db.projectHistoryLabels.insertOne).to.have.been.calledWith(
sinon.match({
project_id: new ObjectId(this.project_id),
comment: this.comment,
version: this.version,
user_id: undefined,
created_at: this.now,
})
)
})
})
})
describe('deleteLabelForUser', function () {
beforeEach(function () {
this.db.projectHistoryLabels.deleteOne.yields()
this.LabelsManager.deleteLabelForUser(
this.project_id,
this.user_id,
this.label_id,
this.callback
)
})
it('removes the label from the database', function () {
expect(this.db.projectHistoryLabels.deleteOne).to.have.been.calledWith(
{
_id: new ObjectId(this.label_id),
project_id: new ObjectId(this.project_id),
user_id: new ObjectId(this.user_id),
},
this.callback
)
})
})
describe('deleteLabel', function () {
beforeEach(function () {
this.db.projectHistoryLabels.deleteOne.yields()
this.LabelsManager.deleteLabel(
this.project_id,
this.label_id,
this.callback
)
})
it('removes the label from the database', function () {
expect(this.db.projectHistoryLabels.deleteOne).to.have.been.calledWith(
{
_id: new ObjectId(this.label_id),
project_id: new ObjectId(this.project_id),
},
this.callback
)
})
})
})

Some files were not shown because too many files have changed in this diff Show More