first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,2 @@
*
!.gitignore

View File

@@ -0,0 +1,195 @@
const config = require('config')
const fetch = require('node-fetch')
const sinon = require('sinon')
const { expect } = require('chai')
const cleanup = require('../storage/support/cleanup')
const expectResponse = require('./support/expect_response')
const fixtures = require('../storage/support/fixtures')
const HTTPStatus = require('http-status')
const testServer = require('./support/test_server')
describe('auth', function () {
beforeEach(cleanup.everything)
beforeEach(fixtures.create)
beforeEach('Set up stubs', function () {
sinon.stub(config, 'has').callThrough()
sinon.stub(config, 'get').callThrough()
})
afterEach(sinon.restore)
it('renders 401 on swagger docs endpoint without auth', async function () {
const response = await fetch(testServer.url('/docs'))
expect(response.status).to.equal(HTTPStatus.UNAUTHORIZED)
expect(response.headers.get('www-authenticate')).to.match(/^Basic/)
})
it('renders swagger docs endpoint with auth', async function () {
const response = await fetch(testServer.url('/docs'), {
headers: {
Authorization: testServer.basicAuthHeader,
},
})
expect(response.ok).to.be.true
})
it('takes an old basic auth password during a password change', async function () {
setMockConfig('basicHttpAuth.oldPassword', 'foo')
// Primary should still work.
const response1 = await fetch(testServer.url('/docs'), {
headers: {
Authorization: testServer.basicAuthHeader,
},
})
expect(response1.ok).to.be.true
// Old password should also work.
const response2 = await fetch(testServer.url('/docs'), {
headers: {
Authorization: 'Basic ' + Buffer.from('staging:foo').toString('base64'),
},
})
expect(response2.ok).to.be.true
// Incorrect password should not work.
const response3 = await fetch(testServer.url('/docs'), {
header: {
Authorization: 'Basic ' + Buffer.from('staging:bar').toString('base64'),
},
})
expect(response3.status).to.equal(HTTPStatus.UNAUTHORIZED)
})
it('renders 401 on ProjectImport endpoints', async function () {
const unauthenticatedClient = testServer.client
try {
await unauthenticatedClient.apis.ProjectImport.importSnapshot1({
project_id: '1',
snapshot: { files: {} },
})
expect.fail()
} catch (err) {
expectResponse.unauthorized(err)
expect(err.response.headers['www-authenticate']).to.match(/^Basic/)
}
// check that the snapshot was not persisted even if the response was a 401
const projectClient = await testServer.createClientForProject('1')
try {
await projectClient.apis.Project.getLatestHistory({ project_id: '1' })
expect.fail()
} catch (err) {
expectResponse.notFound(err)
}
})
it('renders 401 for JWT endpoints', function () {
return testServer.client.apis.Project.getLatestHistory({
project_id: '10000',
})
.then(() => {
expect.fail()
})
.catch(err => {
expectResponse.unauthorized(err)
expect(err.response.headers['www-authenticate']).to.equal('Bearer')
})
})
it('accepts basic auth in place of JWT (for now)', function () {
const projectId = fixtures.docs.initializedProject.id
return testServer.pseudoJwtBasicAuthClient.apis.Project.getLatestHistory({
project_id: projectId,
}).then(response => {
expect(response.obj.chunk).to.exist
})
})
it('uses JWT', function () {
const projectId = fixtures.docs.initializedProject.id
return testServer
.createClientForProject(projectId)
.then(client => {
return client.apis.Project.getLatestHistory({
project_id: projectId,
})
})
.then(response => {
expect(response.obj.chunk).to.exist
})
})
it('checks for project id', function () {
return testServer
.createClientForProject('1')
.then(client => {
return client.apis.Project.getLatestHistory({
project_id: '2',
})
})
.then(() => {
expect.fail()
})
.catch(expectResponse.forbidden)
})
it('does not accept jwt for ProjectUpdate endpoints', function () {
return testServer.createClientForProject('1').then(client => {
return client.apis.ProjectImport.importSnapshot1({
project_id: '1',
snapshot: {},
})
.then(() => {
expect.fail()
})
.catch(expectResponse.unauthorized)
})
})
describe('when an old JWT key is defined', function () {
beforeEach(function () {
setMockConfig('jwtAuth.oldKey', 'old-secret')
})
it('accepts the old key', async function () {
const projectId = fixtures.docs.initializedProject.id
const client = await testServer.createClientForProject(projectId, {
jwtKey: 'old-secret',
})
const response = await client.apis.Project.getLatestHistory({
project_id: projectId,
})
expect(response.obj.chunk).to.exist
})
it('accepts the new key', async function () {
const projectId = fixtures.docs.initializedProject.id
const client = await testServer.createClientForProject(projectId)
const response = await client.apis.Project.getLatestHistory({
project_id: projectId,
})
expect(response.obj.chunk).to.exist
})
it('rejects other keys', async function () {
const projectId = fixtures.docs.initializedProject.id
const client = await testServer.createClientForProject(projectId, {
jwtKey: 'bad-secret',
})
try {
await client.apis.Project.getLatestHistory({
project_id: projectId,
})
expect.fail()
} catch (err) {
expectResponse.unauthorized(err)
}
})
})
})
function setMockConfig(path, value) {
config.has.withArgs(path).returns(true)
config.get.withArgs(path).returns(value)
}

View File

@@ -0,0 +1,244 @@
// @ts-check
import cleanup from '../storage/support/cleanup.js'
import fetch from 'node-fetch'
import testServer from './support/test_backup_deletion_server.mjs'
import { expect } from 'chai'
import testProjects from './support/test_projects.js'
import { db } from '../../../../storage/lib/mongodb.js'
import { ObjectId } from 'mongodb'
import {
backupPersistor,
projectBlobsBucket,
chunksBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import { makeProjectKey } from '../../../../storage/lib/blob_store/index.js'
import config from 'config'
import Stream from 'stream'
import projectKey from '../../../../storage/lib/project_key.js'
/**
* @typedef {import("node-fetch").Response} Response
*/
const { deksBucket } = config.get('backupStore')
const deletedProjectsCollection = db.collection('deletedProjects')
/**
* @param {string} bucket
* @param {string} prefix
* @return {Promise<Array<string>>}
*/
async function listS3Bucket(bucket, prefix) {
// @ts-ignore access to internal library helper
const client = backupPersistor._getClientForBucket(bucket)
const response = await client
.listObjectsV2({ Bucket: bucket, Prefix: prefix })
.promise()
return (response.Contents || []).map(item => item.Key || '')
}
/**
* @param {ObjectId} projectId
* @return {Promise<Response>}
*/
async function deleteProject(projectId) {
return await fetch(testServer.testUrl(`/project/${projectId}/backup`), {
method: 'DELETE',
headers: { Authorization: testServer.basicAuthHeader },
})
}
/**
* @param {number|ObjectId} historyId
* @return {Promise<void>}
*/
async function expectToHaveBackup(historyId) {
const prefix = projectKey.format(historyId.toString()) + '/'
expect(await listS3Bucket(deksBucket, prefix)).to.have.length(1)
expect(await listS3Bucket(chunksBucket, prefix)).to.have.length(2)
expect(await listS3Bucket(projectBlobsBucket, prefix)).to.have.length(2)
}
/**
* @param {number|ObjectId} historyId
* @return {Promise<void>}
*/
async function expectToHaveNoBackup(historyId) {
const prefix = projectKey.format(historyId.toString()) + '/'
expect(await listS3Bucket(deksBucket, prefix)).to.have.length(0)
expect(await listS3Bucket(chunksBucket, prefix)).to.have.length(0)
expect(await listS3Bucket(projectBlobsBucket, prefix)).to.have.length(0)
}
describe('backupDeletion', function () {
beforeEach(cleanup.everything)
beforeEach('create health check projects', async function () {
await testProjects.createEmptyProject('42')
await testProjects.createEmptyProject('000000000000000000000042')
})
beforeEach(testServer.listenOnRandomPort)
it('renders 200 on /status', async function () {
const response = await fetch(testServer.testUrl('/status'))
expect(response.status).to.equal(200)
})
it('renders 200 on /health_check', async function () {
const response = await fetch(testServer.testUrl('/health_check'))
expect(response.status).to.equal(200)
})
describe('DELETE /project/:projectId', function () {
const postgresHistoryId = 1
const projectIdPostgres = new ObjectId('000000000000000000000001')
const projectIdMongoDB = new ObjectId('000000000000000000000002')
const projectIdNonDeleted = new ObjectId('000000000000000000000003')
const projectIdNonExpired = new ObjectId('000000000000000000000004')
const projectIdWithChunks = new ObjectId('000000000000000000000005')
const projectIdNoHistoryId = new ObjectId('000000000000000000000006')
beforeEach('populate mongo', async function () {
await deletedProjectsCollection.insertMany([
{
_id: new ObjectId(),
deleterData: {
deletedProjectId: projectIdPostgres,
deletedAt: new Date('2024-01-01T00:00:00Z'),
deletedProjectOverleafHistoryId: postgresHistoryId,
},
},
{
_id: new ObjectId(),
deleterData: {
deletedProjectId: projectIdNonExpired,
deletedAt: new Date(),
deletedProjectOverleafHistoryId: projectIdNonExpired.toString(),
},
},
{
_id: new ObjectId(),
deleterData: {
deletedProjectId: projectIdNoHistoryId,
deletedAt: new Date('2024-01-01T00:00:00Z'),
},
},
...[projectIdMongoDB, projectIdWithChunks].map(projectId => {
return {
_id: new ObjectId(),
deleterData: {
deletedProjectId: projectId,
deletedAt: new Date('2024-01-01T00:00:00Z'),
deletedProjectOverleafHistoryId: projectId.toString(),
},
}
}),
])
})
beforeEach('initialize history', async function () {
await testProjects.createEmptyProject(projectIdWithChunks.toString())
})
beforeEach('create a file in s3', async function () {
const historyIds = [
postgresHistoryId,
projectIdMongoDB,
projectIdNonDeleted,
projectIdNonExpired,
projectIdWithChunks,
projectIdNoHistoryId,
]
const jobs = []
for (const historyId of historyIds) {
jobs.push(
backupPersistor.sendStream(
projectBlobsBucket,
makeProjectKey(historyId, 'a'.repeat(40)),
Stream.Readable.from(['blob a']),
{ contentLength: 6 }
)
)
jobs.push(
backupPersistor.sendStream(
projectBlobsBucket,
makeProjectKey(historyId, 'b'.repeat(40)),
Stream.Readable.from(['blob b']),
{ contentLength: 6 }
)
)
jobs.push(
backupPersistor.sendStream(
chunksBucket,
projectKey.format(historyId) + '/111',
Stream.Readable.from(['chunk 1']),
{ contentLength: 7 }
)
)
jobs.push(
backupPersistor.sendStream(
chunksBucket,
projectKey.format(historyId) + '/222',
Stream.Readable.from(['chunk 2']),
{ contentLength: 7 }
)
)
}
await Promise.all(jobs)
})
it('renders 401 without auth', async function () {
const response = await fetch(
testServer.testUrl('/project/000000000000000000000042/backup'),
{ method: 'DELETE' }
)
expect(response.status).to.equal(401)
expect(response.headers.get('www-authenticate')).to.match(/^Basic/)
})
it('returns 422 when not deleted', async function () {
const response = await deleteProject(projectIdNonDeleted)
expect(response.status).to.equal(422)
expect(await response.text()).to.equal(
'refusing to delete non-deleted project'
)
await expectToHaveBackup(projectIdNonDeleted)
})
it('returns 422 when not expired', async function () {
const response = await deleteProject(projectIdNonExpired)
expect(response.status).to.equal(422)
expect(await response.text()).to.equal(
'refusing to delete non-expired project'
)
await expectToHaveBackup(projectIdNonExpired)
})
it('returns 422 when live-history not deleted', async function () {
const response = await deleteProject(projectIdWithChunks)
expect(response.status).to.equal(422)
expect(await response.text()).to.equal(
'refusing to delete project with remaining chunks'
)
await expectToHaveBackup(projectIdWithChunks)
})
it('returns 422 when historyId is unknown', async function () {
const response = await deleteProject(projectIdNoHistoryId)
expect(response.status).to.equal(422)
expect(await response.text()).to.equal(
'refusing to delete project with unknown historyId'
)
await expectToHaveBackup(projectIdNoHistoryId)
})
it('should successfully delete postgres id', async function () {
await expectToHaveBackup(postgresHistoryId)
const response = await deleteProject(projectIdPostgres)
expect(response.status).to.equal(204)
await expectToHaveNoBackup(postgresHistoryId)
})
it('should successfully delete mongo id', async function () {
await expectToHaveBackup(projectIdMongoDB)
const response = await deleteProject(projectIdMongoDB)
expect(response.status).to.equal(204)
await expectToHaveNoBackup(projectIdMongoDB)
})
})
})

View File

@@ -0,0 +1,375 @@
// @ts-check
import cleanup from '../storage/support/cleanup.js'
import fetch from 'node-fetch'
import testServer from './support/test_backup_verifier_server.mjs'
import { expect } from 'chai'
import testProjects from './support/test_projects.js'
import {
backupPersistor,
chunksBucket,
projectBlobsBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import {
BlobStore,
makeProjectKey,
} from '../../../../storage/lib/blob_store/index.js'
import Stream from 'node:stream'
import * as zlib from 'node:zlib'
import { promisify } from 'node:util'
import { execFile } from 'node:child_process'
import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
import { chunkStore } from '../../../../storage/index.js'
import { Change, File, Operation } from 'overleaf-editor-core'
import Crypto from 'node:crypto'
import path from 'node:path'
import projectKey from '../../../../storage/lib/project_key.js'
import { historyStore } from '../../../../storage/lib/history_store.js'
/**
* @typedef {import("node-fetch").Response} Response
* @typedef {import("overleaf-editor-core").Blob} Blob
*/
async function verifyProjectScript(historyId, expectFail = true) {
try {
const result = await promisify(execFile)(
process.argv0,
['storage/scripts/verify_project.mjs', `--historyId=${historyId}`],
{
encoding: 'utf-8',
timeout: 5_000,
env: {
...process.env,
LOG_LEVEL: 'warn',
},
}
)
return { status: 0, stdout: result.stdout, stderr: result.stderr }
} catch (err) {
if (
err &&
typeof err === 'object' &&
'stdout' in err &&
'code' in err &&
'stderr' in err
) {
if (!expectFail) {
console.log(err)
}
return {
stdout: typeof err.stdout === 'string' ? err.stdout : '',
status: typeof err.code === 'number' ? err.code : -1,
stderr: typeof err.stdout === 'string' ? err.stderr : '',
}
}
throw err
}
}
/**
* @param {string} historyId
* @param {string} hash
* @return {Promise<{stdout: string, status:number }>}
*/
async function verifyBlobScript(historyId, hash, expectFail = true) {
try {
const result = await promisify(execFile)(
process.argv0,
[
'storage/scripts/verify_backup_blob.mjs',
`--historyId=${historyId}`,
hash,
],
{
encoding: 'utf-8',
timeout: 5_000,
env: {
...process.env,
LOG_LEVEL: 'warn',
},
}
)
return { status: 0, stdout: result.stdout }
} catch (err) {
if (err && typeof err === 'object' && 'stdout' in err && 'code' in err) {
if (!expectFail) {
console.log(err)
}
return {
stdout: typeof err.stdout === 'string' ? err.stdout : '',
status: typeof err.code === 'number' ? err.code : -1,
}
}
throw err
}
}
/**
* @param {string} historyId
* @param {string} hash
* @return {Promise<Response>}
*/
async function verifyBlobHTTP(historyId, hash) {
return await fetch(
testServer.testUrl(`/history/${historyId}/blob/${hash}/verify`),
{ method: 'GET' }
)
}
async function backupChunk(historyId) {
const newChunk = await chunkStore.loadLatestRaw(historyId)
const { buffer: chunkBuffer } = await historyStore.loadRawWithBuffer(
historyId,
newChunk.id
)
const md5 = Crypto.createHash('md5').update(chunkBuffer)
await backupPersistor.sendStream(
chunksBucket,
path.join(
projectKey.format(historyId),
projectKey.pad(newChunk.startVersion)
),
Stream.Readable.from([chunkBuffer]),
{
contentType: 'application/json',
contentEncoding: 'gzip',
contentLength: chunkBuffer.byteLength,
sourceMd5: md5.digest('hex'),
}
)
}
const FIFTEEN_MINUTES_IN_MS = 900_000
async function addFileInNewChunk(
fileContents,
filePath,
historyId,
{ creationDate = new Date() }
) {
const chunk = await chunkStore.loadLatest(historyId)
const operation = Operation.addFile(
`${historyId}.txt`,
File.fromString(fileContents)
)
const changes = [new Change([operation], creationDate, [])]
chunk.pushChanges(changes)
await chunkStore.update(historyId, 0, chunk)
}
/**
* @param {string} historyId
* @param {Object} [backup]
* @return {Promise<string>}
*/
async function prepareProjectAndBlob(
historyId,
{ shouldBackupBlob, shouldBackupChunk, shouldCreateChunk } = {
shouldBackupBlob: true,
shouldBackupChunk: true,
shouldCreateChunk: true,
}
) {
await testProjects.createEmptyProject(historyId)
const blobStore = new BlobStore(historyId)
const fileContents = historyId
const blob = await blobStore.putString(fileContents)
if (shouldCreateChunk) {
await addFileInNewChunk(fileContents, `${historyId}.txt`, historyId, {
creationDate: new Date(new Date().getTime() - FIFTEEN_MINUTES_IN_MS),
})
}
if (shouldBackupBlob) {
const gzipped = zlib.gzipSync(Buffer.from(historyId))
await backupPersistor.sendStream(
projectBlobsBucket,
makeProjectKey(historyId, blob.getHash()),
Stream.Readable.from([gzipped]),
{ contentLength: gzipped.byteLength, contentEncoding: 'gzip' }
)
await checkDEKExists(historyId)
}
if (shouldCreateChunk && shouldBackupChunk) {
await backupChunk(historyId)
}
return blob.getHash()
}
/**
* @param {string} historyId
* @return {Promise<void>}
*/
async function checkDEKExists(historyId) {
await backupPersistor.forProjectRO(
projectBlobsBucket,
makeProjectKey(historyId, '')
)
}
describe('backupVerifier', function () {
const historyIdPostgres = '42'
const historyIdMongo = '000000000000000000000042'
let blobHashPG, blobHashMongo, blobPathPG
beforeEach(cleanup.everything)
beforeEach('create health check projects', async function () {
;[blobHashPG, blobHashMongo] = await Promise.all([
prepareProjectAndBlob('42'),
prepareProjectAndBlob('000000000000000000000042'),
])
blobPathPG = makeProjectKey(historyIdPostgres, blobHashPG)
})
beforeEach(testServer.listenOnRandomPort)
it('renders 200 on /status', async function () {
const response = await fetch(testServer.testUrl('/status'))
expect(response.status).to.equal(200)
})
it('renders 200 on /health_check', async function () {
const response = await fetch(testServer.testUrl('/health_check'))
expect(response.status).to.equal(200)
})
describe('storage/scripts/verify_project.mjs', function () {
describe('when the project is appropriately backed up', function () {
it('should return 0', async function () {
const response = await verifyProjectScript(historyIdPostgres, false)
expect(response.status).to.equal(0)
})
})
describe('when the project chunk is not backed up', function () {
let response
beforeEach(async function () {
await prepareProjectAndBlob('000000000000000000000043', {
shouldBackupChunk: false,
shouldBackupBlob: true,
shouldCreateChunk: true,
})
response = await verifyProjectScript('000000000000000000000043')
})
it('should return 1', async function () {
expect(response.status).to.equal(1)
})
it('should emit an error message referring to a missing chunk', async function () {
const stderr = response.stderr
expect(stderr).to.include('BackupRPOViolationChunkNotBackedUpError')
})
})
describe('when a project blob is not backed up', function () {
let response
beforeEach(async function () {
await prepareProjectAndBlob('43', {
shouldBackupChunk: true,
shouldBackupBlob: false,
shouldCreateChunk: true,
})
response = await verifyProjectScript('43')
})
it('should return 1', function () {
expect(response.status).to.equal(1)
})
it('includes a BackupCorruptedError in stderr', function () {
expect(response.stderr).to.include(
'BackupCorruptedMissingBlobError: missing blob'
)
})
})
})
describe('storage/scripts/verify_backup_blob.mjs', function () {
it('throws and does not create DEK if missing', async function () {
const historyId = '404'
const hash = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
const response = await verifyBlobScript(historyId, hash)
expect(response.status).to.equal(1)
expect(response.stdout).to.include('dek does not exist')
await expect(checkDEKExists(historyId)).to.be.rejectedWith(NotFoundError)
})
it('throws when deleted in db', async function () {
const blobStore = new BlobStore(historyIdPostgres)
await blobStore.deleteBlobs()
const response = await verifyBlobScript(historyIdPostgres, blobHashPG)
expect(response.status).to.equal(1)
expect(response.stdout).to.include(`blob ${blobHashPG} not found`)
})
it('throws when not existing', async function () {
await backupPersistor.deleteObject(projectBlobsBucket, blobPathPG)
const result = await verifyBlobScript(historyIdPostgres, blobHashPG)
expect(result.status).to.equal(1)
expect(result.stdout).to.include('missing blob')
})
it('throws when corrupted', async function () {
await backupPersistor.sendStream(
projectBlobsBucket,
blobPathPG,
Stream.Readable.from(['something else']),
{ contentLength: 14 }
)
const result = await verifyBlobScript(historyIdPostgres, blobHashPG)
expect(result.status).to.equal(1)
expect(result.stdout).to.include('hash mismatch for backed up blob')
})
it('should successfully verify from postgres', async function () {
const result = await verifyBlobScript(
historyIdPostgres,
blobHashPG,
false
)
expect(result.status).to.equal(0)
expect(result.stdout.split('\n')).to.include('OK')
})
it('should successfully verify from mongo', async function () {
const result = await verifyBlobScript(
historyIdMongo,
blobHashMongo,
false
)
expect(result.status).to.equal(0)
expect(result.stdout.split('\n')).to.include('OK')
})
})
describe('GET /history/:historyId/blob/:hash/verify', function () {
it('returns 404 when deleted in db', async function () {
const blobStore = new BlobStore(historyIdPostgres)
await blobStore.deleteBlobs()
const response = await verifyBlobHTTP(historyIdPostgres, blobHashPG)
expect(response.status).to.equal(404)
expect(await response.text()).to.equal(`blob ${blobHashPG} not found`)
})
it('returns 422 and does not create DEK if missing', async function () {
const historyId = '404'
const hash = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
const response = await verifyBlobHTTP(historyId, hash)
expect(response.status).to.equal(422)
expect(await response.text()).to.equal('dek does not exist')
await expect(checkDEKExists(historyId)).to.be.rejectedWith(NotFoundError)
})
it('returns 422 when not existing', async function () {
await backupPersistor.deleteObject(projectBlobsBucket, blobPathPG)
const response = await verifyBlobHTTP(historyIdPostgres, blobHashPG)
expect(response.status).to.equal(422)
expect(await response.text()).to.equal('missing blob')
})
it('returns 422 when corrupted', async function () {
await backupPersistor.sendStream(
projectBlobsBucket,
blobPathPG,
Stream.Readable.from(['something else']),
{ contentLength: 14 }
)
const response = await verifyBlobHTTP(historyIdPostgres, blobHashPG)
expect(response.status).to.equal(422)
expect(await response.text()).to.equal('hash mismatch for backed up blob')
})
it('should successfully verify from postgres', async function () {
const response = await verifyBlobHTTP(historyIdPostgres, blobHashPG)
expect(response.status).to.equal(200)
})
it('should successfully verify from mongo', async function () {
const response = await verifyBlobHTTP(historyIdMongo, blobHashMongo)
expect(response.status).to.equal(200)
})
})
})

View File

@@ -0,0 +1,396 @@
'use strict'
const BPromise = require('bluebird')
const { expect } = require('chai')
const HTTPStatus = require('http-status')
const fetch = require('node-fetch')
const fs = BPromise.promisifyAll(require('node:fs'))
const cleanup = require('../storage/support/cleanup')
const fixtures = require('../storage/support/fixtures')
const testFiles = require('../storage/support/test_files')
const testProjects = require('./support/test_projects')
const testServer = require('./support/test_server')
const core = require('overleaf-editor-core')
const Change = core.Change
const ChunkResponse = core.ChunkResponse
const File = core.File
const Operation = core.Operation
const Snapshot = core.Snapshot
const TextOperation = core.TextOperation
const blobHash = require('../../../../storage').blobHash
describe('overleaf ot', function () {
beforeEach(cleanup.everything)
beforeEach(fixtures.create)
this.timeout(10000) // it takes a while on Docker for Mac
it('can use API', function () {
let client, downloadZipClient
const basicAuthClient = testServer.basicAuthClient
return (
testProjects
.createEmptyProject()
.then(projectId => {
return testServer
.createClientForProject(projectId)
.then(clientForProject => {
client = clientForProject
return testServer.createClientForDownloadZip(projectId)
})
.then(clientForProject => {
downloadZipClient = clientForProject
return projectId
})
})
// the project is currently empty
.then(projectId => {
return client.apis.Project.getLatestContent({
project_id: projectId,
}).then(response => {
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(0)
return projectId
})
})
// upload a blob and add two files using it
.then(projectId => {
return fetch(
testServer.url(
`/api/projects/${projectId}/blobs/${testFiles.GRAPH_PNG_HASH}`,
{ qs: { pathname: 'graph_1.png' } }
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('graph.png')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
.then(response => {
expect(response.ok).to.be.true
})
.then(() => {
const testFile = File.fromHash(testFiles.GRAPH_PNG_HASH)
const change = new Change(
[
Operation.addFile('graph_1.png', testFile),
Operation.addFile('graph_2.png', testFile),
],
new Date()
)
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: projectId,
end_version: 0,
return_snapshot: 'hashed',
changes: [change.toRaw()],
})
})
.then(() => projectId)
})
// get the new project state
.then(projectId => {
return client.apis.Project.getLatestContent({
project_id: projectId,
}).then(response => {
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(2)
const file0 = snapshot.getFile('graph_1.png')
expect(file0.getHash()).to.equal(testFiles.GRAPH_PNG_HASH)
const file1 = snapshot.getFile('graph_2.png')
expect(file1.getHash()).to.equal(testFiles.GRAPH_PNG_HASH)
return projectId
})
})
// get the history
.then(projectId => {
return client.apis.Project.getLatestHistory({
project_id: projectId,
}).then(response => {
const chunk = ChunkResponse.fromRaw(response.obj).getChunk()
const changes = chunk.getChanges()
expect(changes.length).to.equal(1)
const change0Timestamp = changes[0].getTimestamp().getTime()
expect(change0Timestamp).to.be.closeTo(Date.now(), 1e4)
return projectId
})
})
// upload an empty file
.then(projectId => {
return fetch(
testServer.url(
`/api/projects/${projectId}/blobs/${File.EMPTY_FILE_HASH}`,
{ qs: { pathname: 'main.tex' } }
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('empty.tex')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
.then(response => {
expect(response.ok).to.be.true
})
.then(() => {
const testFile = File.fromHash(File.EMPTY_FILE_HASH)
const change = new Change(
[Operation.addFile('main.tex', testFile)],
new Date()
)
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: projectId,
end_version: 1,
return_snapshot: 'hashed',
changes: [change.toRaw()],
})
})
.then(() => projectId)
})
.then(projectId => {
// Fetch empty file blob
return client.apis.Project.getProjectBlob({
project_id: projectId,
hash: File.EMPTY_FILE_HASH,
})
.then(response => {
expect(response.headers['content-type']).to.equal(
'application/octet-stream'
)
return response.data.arrayBuffer()
})
.then(buffer => {
expect(buffer).to.deep.equal(new ArrayBuffer(0))
return projectId
})
})
// get the history
.then(projectId => {
return client.apis.Project.getLatestHistory({
project_id: projectId,
}).then(response => {
const chunk = ChunkResponse.fromRaw(response.obj).getChunk()
const changes = chunk.getChanges()
expect(changes.length).to.equal(2)
return projectId
})
})
// get the new project state
.then(projectId => {
return client.apis.Project.getLatestContent({
project_id: projectId,
}).then(response => {
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(3)
expect(snapshot.getFile('graph_1.png').getHash()).to.equal(
testFiles.GRAPH_PNG_HASH
)
expect(snapshot.getFile('graph_2.png').getHash()).to.equal(
testFiles.GRAPH_PNG_HASH
)
expect(snapshot.getFile('main.tex').getContent()).to.equal('')
return projectId
})
})
// edit the main file
.then(projectId => {
const change = new Change(
[
Operation.editFile(
'main.tex',
TextOperation.fromJSON({ textOperation: ['hello'] })
),
],
new Date()
)
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: projectId,
changes: [change.toRaw()],
end_version: 2,
return_snapshot: 'hashed',
}).then(response => {
expect(response.status).to.equal(HTTPStatus.CREATED)
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(3)
expect(snapshot.getFile('graph_1.png').getHash()).to.equal(
testFiles.GRAPH_PNG_HASH
)
expect(snapshot.getFile('graph_2.png').getHash()).to.equal(
testFiles.GRAPH_PNG_HASH
)
expect(snapshot.getFile('main.tex').getHash()).to.equal(
blobHash.fromString('hello')
)
return projectId
})
})
// get the new project state
.then(projectId => {
return client.apis.Project.getLatestContent({
project_id: projectId,
}).then(response => {
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(3)
expect(snapshot.getFile('graph_1.png').getHash()).to.equal(
testFiles.GRAPH_PNG_HASH
)
expect(snapshot.getFile('graph_2.png').getHash()).to.equal(
testFiles.GRAPH_PNG_HASH
)
const mainFile = snapshot.getFile('main.tex')
expect(mainFile.getHash()).to.be.null
expect(mainFile.getContent()).to.equal('hello')
return projectId
})
})
// edit the main file again
.then(projectId => {
const change = new Change(
[
Operation.editFile(
'main.tex',
TextOperation.fromJSON({ textOperation: [1, -4, 'i world'] })
),
],
new Date()
)
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: projectId,
changes: [change.toRaw()],
end_version: 3,
return_snapshot: 'hashed',
}).then(response => {
expect(response.status).to.equal(HTTPStatus.CREATED)
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(3)
expect(snapshot.getFile('main.tex').getHash()).to.equal(
blobHash.fromString('hi world')
)
return projectId
})
})
// get the new project state
.then(projectId => {
return client.apis.Project.getLatestContent({
project_id: projectId,
}).then(response => {
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(3)
expect(snapshot.getFile('graph_1.png')).to.exist
expect(snapshot.getFile('graph_2.png')).to.exist
const mainFile = snapshot.getFile('main.tex')
expect(mainFile.getHash()).to.be.null
expect(mainFile.getContent()).to.equal('hi world')
return projectId
})
})
// rename the text file
.then(projectId => {
const change = new Change(
[Operation.moveFile('main.tex', 'intro.tex')],
new Date()
)
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: projectId,
changes: [change.toRaw()],
end_version: 4,
return_snapshot: 'hashed',
}).then(response => {
expect(response.status).to.equal(HTTPStatus.CREATED)
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(3)
expect(snapshot.getFile('intro.tex').getHash()).to.equal(
blobHash.fromString('hi world')
)
return projectId
})
})
// get the new project state
.then(projectId => {
return client.apis.Project.getLatestContent({
project_id: projectId,
}).then(response => {
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(3)
expect(snapshot.getFile('graph_1.png')).to.exist
expect(snapshot.getFile('graph_2.png')).to.exist
const mainFile = snapshot.getFile('intro.tex')
expect(mainFile.getHash()).to.be.null
expect(mainFile.getContent()).to.equal('hi world')
return projectId
})
})
// remove a graph
.then(projectId => {
const change = new Change(
[Operation.removeFile('graph_1.png')],
new Date()
)
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: projectId,
changes: [change.toRaw()],
end_version: 5,
return_snapshot: 'hashed',
}).then(response => {
expect(response.status).to.equal(HTTPStatus.CREATED)
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(2)
return projectId
})
})
// get the new project state
.then(projectId => {
return client.apis.Project.getLatestContent({
project_id: projectId,
}).then(response => {
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(2)
expect(snapshot.getFile('graph_2.png')).to.exist
const mainFile = snapshot.getFile('intro.tex')
expect(mainFile.getHash()).to.be.null
expect(mainFile.getContent()).to.equal('hi world')
return projectId
})
})
// download zip with project content
.then(projectId => {
return downloadZipClient.apis.Project.getZip({
project_id: projectId,
version: 6,
}).then(response => {
expect(response.status).to.equal(HTTPStatus.OK)
const headers = response.headers
expect(headers['content-type']).to.equal('application/octet-stream')
expect(headers['content-disposition']).to.equal(
'attachment; filename=project.zip'
)
})
})
)
})
})

View File

@@ -0,0 +1,251 @@
const { expect } = require('chai')
const config = require('config')
const fs = require('node:fs')
const fetch = require('node-fetch')
const HTTPStatus = require('http-status')
const cleanup = require('../storage/support/cleanup')
const fixtures = require('../storage/support/fixtures')
const testFiles = require('../storage/support/test_files')
const testServer = require('./support/test_server')
const { expectHttpError } = require('./support/expect_response')
const { globalBlobs } = require('../../../../storage/lib/mongodb.js')
const {
loadGlobalBlobs,
} = require('../../../../storage/lib/blob_store/index.js')
describe('Project blobs API', function () {
const projectId = '123'
beforeEach(cleanup.everything)
beforeEach(fixtures.create)
let client
let token
before(async function () {
client = await testServer.createClientForProject(projectId)
token = testServer.createTokenForProject(projectId)
})
it('returns 404 if the blob is not found', async function () {
const testHash = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
await expectHttpError(
client.apis.Project.getProjectBlob({
project_id: projectId,
hash: testHash,
}),
HTTPStatus.NOT_FOUND
)
})
it('checks if file hash matches the hash parameter', async function () {
const testHash = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
const response = await fetch(
testServer.url(`/api/projects/${projectId}/blobs/${testHash}`),
{
method: 'PUT',
headers: { Authorization: `Bearer ${token}` },
body: fs.createReadStream(testFiles.path('hello.txt')),
}
)
expect(response.status).to.equal(HTTPStatus.CONFLICT)
// check that it did not store the file
await expectHttpError(
client.apis.Project.getProjectBlob({
project_id: projectId,
hash: testFiles.HELLO_TXT_HASH,
}),
HTTPStatus.NOT_FOUND
)
})
it('rejects oversized files', async function () {
const testHash = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
const buffer = Buffer.alloc(
parseInt(config.get('maxFileUploadSize'), 10) + 1
)
const response = await fetch(
testServer.url(`/api/projects/${projectId}/blobs/${testHash}`),
{
method: 'PUT',
headers: { Authorization: `Bearer ${token}` },
body: buffer,
}
)
expect(response.status).to.equal(HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
})
describe('with an existing blob', async function () {
let fileContents
beforeEach(async function () {
fileContents = await fs.promises.readFile(testFiles.path('hello.txt'))
const response = await fetch(
testServer.url(
`/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}`
),
{
method: 'PUT',
headers: { Authorization: `Bearer ${token}` },
body: fileContents,
}
)
expect(response.ok).to.be.true
})
it('fulfills a request with a JWT header', async function () {
const response = await client.apis.Project.getProjectBlob({
project_id: projectId,
hash: testFiles.HELLO_TXT_HASH,
})
const responseText = await response.data.text()
expect(responseText).to.equal(fileContents.toString())
})
it('fulfills a request with a token parameter', async function () {
const url = new URL(
testServer.url(
`/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}`
)
)
url.searchParams.append('token', token)
const response = await fetch(url)
const payload = await response.text()
expect(payload).to.equal(fileContents.toString())
})
it('supports range request', async function () {
const url = new URL(
testServer.url(
`/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}`
)
)
url.searchParams.append('token', token)
const response = await fetch(url, { headers: { Range: 'bytes=0-4' } })
const payload = await response.text()
expect(payload).to.equal(fileContents.toString().slice(0, 4))
})
it('supports HEAD request', async function () {
const url = new URL(
testServer.url(
`/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}`
)
)
url.searchParams.append('token', token)
const response = await fetch(url, { method: 'HEAD' })
expect(response.headers.get('Content-Length')).to.equal(
testFiles.HELLO_TXT_BYTE_LENGTH.toString()
)
const payload = await response.text()
expect(payload).to.have.length(0)
})
it('rejects an unautorized request', async function () {
const response = await fetch(
testServer.url(
`/api/projects/${projectId}/blobs/${testFiles.HELLO_TXT_HASH}`
)
)
expect(response.status).to.equal(HTTPStatus.UNAUTHORIZED)
})
it('copies the blob to another project', async function () {
const targetProjectId = '456'
const targetClient =
await testServer.createClientForProject(targetProjectId)
const targetToken = testServer.createTokenForProject(targetProjectId)
const url = new URL(
testServer.url(
`/api/projects/${targetProjectId}/blobs/${testFiles.HELLO_TXT_HASH}`
)
)
url.searchParams.append('copyFrom', projectId)
const response = await fetch(url, {
method: 'POST',
headers: { Authorization: `Bearer ${targetToken}` },
})
expect(response.status).to.equal(HTTPStatus.CREATED)
const newBlobResponse = await targetClient.apis.Project.getProjectBlob({
project_id: targetProjectId,
hash: testFiles.HELLO_TXT_HASH,
})
const newBlobResponseText = await newBlobResponse.data.text()
expect(newBlobResponseText).to.equal(fileContents.toString())
})
it('skips copying a blob to another project if it already exists', async function () {
const targetProjectId = '456'
const targetClient =
await testServer.createClientForProject(targetProjectId)
const targetToken = testServer.createTokenForProject(targetProjectId)
const fileContents = await fs.promises.readFile(
testFiles.path('hello.txt')
)
const uploadResponse = await fetch(
testServer.url(
`/api/projects/${targetProjectId}/blobs/${testFiles.HELLO_TXT_HASH}`
),
{
method: 'PUT',
headers: { Authorization: `Bearer ${targetToken}` },
body: fileContents,
}
)
expect(uploadResponse.ok).to.be.true
const url = new URL(
testServer.url(
`/api/projects/${targetProjectId}/blobs/${testFiles.HELLO_TXT_HASH}`
)
)
url.searchParams.append('copyFrom', projectId)
const response = await fetch(url, {
method: 'POST',
headers: { Authorization: `Bearer ${targetToken}` },
})
expect(response.status).to.equal(HTTPStatus.NO_CONTENT)
const newBlobResponse = await targetClient.apis.Project.getProjectBlob({
project_id: targetProjectId,
hash: testFiles.HELLO_TXT_HASH,
})
const newBlobResponseText = await newBlobResponse.data.text()
expect(newBlobResponseText).to.equal(fileContents.toString())
})
})
describe('with a global blob', async function () {
before(async function () {
await globalBlobs.insertOne({
_id: testFiles.STRING_A_HASH,
byteLength: 1,
stringLength: 1,
})
await loadGlobalBlobs()
})
it('does not copy global blobs', async function () {
const targetProjectId = '456'
const targetToken = testServer.createTokenForProject(targetProjectId)
const url = new URL(
testServer.url(
`/api/projects/${targetProjectId}/blobs/${testFiles.STRING_A_HASH}`
)
)
url.searchParams.append('copyFrom', projectId)
const response = await fetch(url, {
method: 'POST',
headers: { Authorization: `Bearer ${targetToken}` },
})
expect(response.status).to.equal(HTTPStatus.NO_CONTENT)
})
})
})

View File

@@ -0,0 +1,57 @@
'use strict'
const BPromise = require('bluebird')
const { expect } = require('chai')
const HTTPStatus = require('http-status')
const fetch = require('node-fetch')
const fs = BPromise.promisifyAll(require('node:fs'))
const cleanup = require('../storage/support/cleanup')
const fixtures = require('../storage/support/fixtures')
const testFiles = require('../storage/support/test_files')
const testProjects = require('./support/test_projects')
const testServer = require('./support/test_server')
const { Change, File, Operation } = require('overleaf-editor-core')
describe('project import', function () {
beforeEach(cleanup.everything)
beforeEach(fixtures.create)
it('skips generating the snapshot by default', async function () {
const basicAuthClient = testServer.basicAuthClient
const projectId = await testProjects.createEmptyProject()
// upload an empty file
const response = await fetch(
testServer.url(
`/api/projects/${projectId}/blobs/${File.EMPTY_FILE_HASH}`,
{ qs: { pathname: 'main.tex' } }
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('empty.tex')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
expect(response.ok).to.be.true
const testFile = File.fromHash(File.EMPTY_FILE_HASH)
const testChange = new Change(
[Operation.addFile('main.tex', testFile)],
new Date()
)
const importResponse =
await basicAuthClient.apis.ProjectImport.importChanges1({
project_id: projectId,
end_version: 0,
changes: [testChange.toRaw()],
})
expect(importResponse.status).to.equal(HTTPStatus.CREATED)
expect(importResponse.obj).to.deep.equal({})
})
})

View File

@@ -0,0 +1,853 @@
const BPromise = require('bluebird')
const { expect } = require('chai')
const fs = BPromise.promisifyAll(require('node:fs'))
const HTTPStatus = require('http-status')
const fetch = require('node-fetch')
const cleanup = require('../storage/support/cleanup')
const fixtures = require('../storage/support/fixtures')
const testFiles = require('../storage/support/test_files')
const expectResponse = require('./support/expect_response')
const testServer = require('./support/test_server')
const core = require('overleaf-editor-core')
const testProjects = require('./support/test_projects')
const Change = core.Change
const ChunkResponse = core.ChunkResponse
const File = core.File
const Operation = core.Operation
const Origin = core.Origin
const Snapshot = core.Snapshot
const TextOperation = core.TextOperation
const V2DocVersions = core.V2DocVersions
const knex = require('../../../../storage').knex
describe('history import', function () {
beforeEach(cleanup.everything)
beforeEach(fixtures.create)
function changeToRaw(change) {
return change.toRaw()
}
function makeChange(operation) {
return new Change([operation], new Date(), [])
}
let basicAuthClient
let pseudoJwtBasicAuthClient
let clientForProject
before(async function () {
basicAuthClient = testServer.basicAuthClient
pseudoJwtBasicAuthClient = testServer.pseudoJwtBasicAuthClient
clientForProject = await testServer.createClientForProject('1')
})
it('creates blobs and then imports a snapshot and history', function () {
// We need to be able to set the projectId to match an existing doc ID.
const testProjectId = '1'
const testFilePathname = 'main.tex'
const testAuthors = [123, null]
const testTextOperation0 = TextOperation.fromJSON({ textOperation: ['a'] })
const testTextOperation1 = TextOperation.fromJSON({
textOperation: [1, 'b'],
})
let testSnapshot
return fetch(
testServer.url(
`/api/projects/${testProjectId}/blobs/${File.EMPTY_FILE_HASH}`
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('empty.tex')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
.then(response => {
expect(response.ok).to.be.true
})
.then(() => {
// Import project
testSnapshot = new Snapshot()
testSnapshot.addFile(
testFilePathname,
File.fromHash(File.EMPTY_FILE_HASH)
)
return basicAuthClient.apis.ProjectImport.importSnapshot1({
project_id: testProjectId,
snapshot: testSnapshot.toRaw(),
})
})
.then(response => {
// Check project is valid
expect(response.obj.projectId).to.equal(testProjectId)
})
.then(() => {
// Try importing the project again
return basicAuthClient.apis.ProjectImport.importSnapshot1({
project_id: testProjectId,
snapshot: testSnapshot.toRaw(),
})
})
.then(() => {
// Check that importing a duplicate fails
expect.fail()
})
.catch(expectResponse.conflict)
.then(() => {
// Get project history
return clientForProject.apis.Project.getLatestHistory({
project_id: testProjectId,
})
})
.then(response => {
// Check that the imported history is valid
const chunk = ChunkResponse.fromRaw(response.obj).getChunk()
const snapshot = chunk.getSnapshot()
expect(snapshot.countFiles()).to.equal(1)
const file = snapshot.getFile(testFilePathname)
expect(file.getHash()).to.eql(File.EMPTY_FILE_HASH)
expect(chunk.getChanges().length).to.equal(0)
expect(chunk.getEndVersion()).to.equal(0)
})
.then(() => {
// Import changes with an end version
const changes = [
makeChange(Operation.editFile(testFilePathname, testTextOperation0)),
makeChange(Operation.editFile(testFilePathname, testTextOperation1)),
]
changes[0].setAuthors(testAuthors)
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
changes: changes.map(changeToRaw),
end_version: 0,
return_snapshot: 'hashed',
})
})
.then(response => {
expect(response.status).to.equal(HTTPStatus.CREATED)
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(1)
expect(snapshot.getFile('main.tex').getHash()).to.equal(
testFiles.STRING_AB_HASH
)
})
.then(() => {
// Get project history
return clientForProject.apis.Project.getLatestHistory({
project_id: testProjectId,
})
})
.then(response => {
// Check that the history is valid
const chunkResponse = ChunkResponse.fromRaw(response.obj)
const chunk = chunkResponse.getChunk()
const snapshot = chunk.getSnapshot()
expect(snapshot.countFiles()).to.equal(1)
const file = snapshot.getFile(testFilePathname)
expect(file.getHash()).to.equal(File.EMPTY_FILE_HASH)
expect(chunk.getChanges().length).to.equal(2)
const changeWithAuthors = chunk.getChanges()[0]
expect(changeWithAuthors.getAuthors().length).to.equal(2)
expect(changeWithAuthors.getAuthors()).to.deep.equal(testAuthors)
expect(chunk.getStartVersion()).to.equal(0)
expect(chunk.getEndVersion()).to.equal(2)
})
.then(() => {
return clientForProject.apis.Project.getLatestHistory({
project_id: testProjectId,
})
})
.then(response => {
// it should retrieve the same chunk
const chunkResponse = ChunkResponse.fromRaw(response.obj)
const chunk = chunkResponse.getChunk()
expect(chunk.getChanges().length).to.equal(2)
expect(chunk.getStartVersion()).to.equal(0)
expect(chunk.getEndVersion()).to.equal(2)
})
.then(() => {
// Get project's latest content
return clientForProject.apis.Project.getLatestContent({
project_id: testProjectId,
})
})
.then(response => {
// Check that the content is valid
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(1)
const file = snapshot.getFile(testFilePathname)
expect(file.getContent()).to.equal('ab')
})
})
it('rejects invalid changes in history', function () {
const testProjectId = '1'
const testFilePathname = 'main.tex'
const testTextOperation = TextOperation.fromJSON({
textOperation: ['a', 10],
})
let testSnapshot
return fetch(
testServer.url(
`/api/projects/${testProjectId}/blobs/${File.EMPTY_FILE_HASH}`
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('empty.tex')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
.then(response => {
expect(response.ok).to.be.true
})
.then(() => {
// Import project
testSnapshot = new Snapshot()
testSnapshot.addFile(
testFilePathname,
File.fromHash(File.EMPTY_FILE_HASH)
)
return basicAuthClient.apis.ProjectImport.importSnapshot1({
project_id: testProjectId,
snapshot: testSnapshot.toRaw(),
})
})
.then(response => {
// Check project is valid
expect(response.obj.projectId).to.equal(testProjectId)
})
.then(() => {
// Import invalid changes
const changes = [
makeChange(Operation.editFile(testFilePathname, testTextOperation)),
]
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 0,
return_snapshot: 'hashed',
changes: changes.map(changeToRaw),
})
})
.then(() => {
// Check that this fails
expect.fail()
})
.catch(expectResponse.unprocessableEntity)
.then(() => {
// Get the latest content
return clientForProject.apis.Project.getLatestContent({
project_id: testProjectId,
})
})
.then(response => {
// Check that no changes have been stored
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(1)
const file = snapshot.getFile(testFilePathname)
expect(file.getContent()).to.equal('')
})
.then(() => {
// Send a change with the wrong end version that is not conflicting
// with the latest snapshot
const changes = [makeChange(Operation.removeFile(testFilePathname))]
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 10000,
changes,
})
})
.then(() => {
// Check that this fails
expect.fail()
})
.catch(expectResponse.unprocessableEntity)
.then(() => {
// Get the latest project history
return clientForProject.apis.Project.getLatestHistory({
project_id: testProjectId,
})
})
.then(response => {
// Check that no changes have been stored
const chunkResponse = ChunkResponse.fromRaw(response.obj)
const changes = chunkResponse.getChunk().getChanges()
expect(changes).to.have.length(0)
})
})
it('creates and edits a file using changes', function () {
const testProjectId = '1'
const mainFilePathname = 'main.tex'
const testFilePathname = 'test.tex'
const testTextOperation = TextOperation.fromJSON({ textOperation: ['a'] })
const inexistentAuthors = [1234, 5678]
const projectVersion = '12345.0'
const v2DocVersions = new V2DocVersions({
'random-doc-id': { pathname: 'doc-path.tex', v: 123 },
})
const testLabelOrigin = Origin.fromRaw({
kind: 'saved ver',
})
const testRestoreOrigin = Origin.fromRaw({
kind: 'restore',
timestamp: '2016-01-01T00:00:00',
version: 1,
})
let testSnapshot
return fetch(
testServer.url(
`/api/projects/${testProjectId}/blobs/${File.EMPTY_FILE_HASH}`
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('empty.tex')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
.then(response => {
expect(response.ok).to.be.true
})
.then(() => {
// Import a project
testSnapshot = new Snapshot()
testSnapshot.addFile(
mainFilePathname,
File.fromHash(File.EMPTY_FILE_HASH)
)
return basicAuthClient.apis.ProjectImport.importSnapshot1({
project_id: testProjectId,
snapshot: testSnapshot.toRaw(),
})
})
.then(response => {
// Check that the project is valid
expect(response.obj.projectId).to.equal(testProjectId)
})
.then(() => {
// Import changes
const testFile = File.fromHash(File.EMPTY_FILE_HASH)
const changes = [
makeChange(Operation.addFile(testFilePathname, testFile)),
makeChange(Operation.editFile(testFilePathname, testTextOperation)),
]
changes[0].setProjectVersion(projectVersion)
changes[1].setAuthors(inexistentAuthors)
changes[1].setV2DocVersions(v2DocVersions)
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 0,
return_snapshot: 'hashed',
changes: changes.map(changeToRaw),
})
})
.then(response => {
expect(response.status).to.equal(HTTPStatus.CREATED)
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(2)
expect(snapshot.getFile('main.tex').getHash()).to.equal(
File.EMPTY_FILE_HASH
)
expect(snapshot.getFile('test.tex').getHash()).to.equal(
testFiles.STRING_A_HASH
)
})
.then(() => {
// Get the project history
return clientForProject.apis.Project.getLatestHistory({
project_id: testProjectId,
})
})
.then(response => {
// it should not fail when the some of the authors do not exist anymore
const chunkResponse = ChunkResponse.fromRaw(response.obj)
const changes = chunkResponse.getChunk().getChanges()
expect(changes.length).to.equal(2)
const changeWithAuthor = changes[1]
expect(changeWithAuthor.getAuthors()).to.deep.equal(inexistentAuthors)
})
.then(() => {
// it should retrieve the latest snapshot when the changes set is empty
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 0,
return_snapshot: 'hashed',
changes: [],
})
})
.then(response => {
// Check latest snapshot
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(2)
expect(snapshot.getFile('main.tex').getHash()).to.equal(
File.EMPTY_FILE_HASH
)
expect(snapshot.getFile('test.tex').getHash()).to.equal(
testFiles.STRING_A_HASH
)
expect(snapshot.getProjectVersion()).to.equal(projectVersion)
expect(snapshot.getV2DocVersions()).to.deep.equal(v2DocVersions)
})
.then(() => {
// Import changes with origin
const testFile = File.fromHash(File.EMPTY_FILE_HASH)
const changes = [
makeChange(Operation.removeFile(testFilePathname)),
makeChange(Operation.addFile(testFilePathname, testFile)),
]
changes[0].setOrigin(testLabelOrigin)
changes[1].setOrigin(testRestoreOrigin)
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 0,
changes: changes.map(changeToRaw),
})
})
.then(() => {
// Get the latest history
return clientForProject.apis.Project.getLatestHistory({
project_id: testProjectId,
})
})
.then(response => {
// Check that the origin is stored
const chunkResponse = ChunkResponse.fromRaw(response.obj)
const changes = chunkResponse.getChunk().getChanges()
expect(changes).to.have.length(4)
expect(changes[2].getOrigin()).to.deep.equal(testLabelOrigin)
expect(changes[3].getOrigin()).to.deep.equal(testRestoreOrigin)
})
.then(() => {
// Import invalid changes
const testFile = File.fromHash(File.EMPTY_FILE_HASH)
const changes = [makeChange(Operation.addFile('../../a.tex', testFile))]
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 0,
changes: changes.map(changeToRaw),
})
})
.then(() => {
// Check that this fails and returns a 422
expect.fail()
})
.catch(expectResponse.unprocessableEntity)
})
it('rejects text operations on binary files', function () {
const testProjectId = '1'
const testFilePathname = 'main.tex'
const testTextOperation = TextOperation.fromJSON({ textOperation: ['bb'] })
let testSnapshot
return fetch(
testServer.url(
`/api/projects/${testProjectId}/blobs/${testFiles.NON_BMP_TXT_HASH}`
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('non_bmp.txt')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
.then(response => {
expect(response.ok).to.be.true
})
.then(() => {
// Import a project
testSnapshot = new Snapshot()
testSnapshot.addFile(
testFilePathname,
File.fromHash(testFiles.NON_BMP_TXT_HASH)
)
return basicAuthClient.apis.ProjectImport.importSnapshot1({
project_id: testProjectId,
snapshot: testSnapshot.toRaw(),
})
})
.then(response => {
// Check that the project is valid
expect(response.obj.projectId).to.equal(testProjectId)
})
.then(() => {
// Import invalid changes
const changes = [
makeChange(Operation.editFile(testFilePathname, testTextOperation)),
]
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 0,
changes: changes.map(changeToRaw),
})
})
.then(() => {
// Expect invalid changes to fail
expect.fail()
})
.catch(expectResponse.unprocessableEntity)
.then(() => {
// Get latest content
return clientForProject.apis.Project.getLatestContent({
project_id: testProjectId,
})
})
.then(response => {
// Check that no changes were stored
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(1)
expect(snapshot.getFile(testFilePathname).getHash()).to.equal(
testFiles.NON_BMP_TXT_HASH
)
})
})
it('accepts text operation on files with null characters if stringLength is present', function () {
const testProjectId = '1'
const mainFilePathname = 'main.tex'
const testTextOperation = TextOperation.fromJSON({
textOperation: [3, 'a'],
})
let testSnapshot
function importChanges() {
const changes = [
makeChange(Operation.editFile(mainFilePathname, testTextOperation)),
]
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 0,
changes: changes.map(changeToRaw),
})
}
function getLatestContent() {
return clientForProject.apis.Project.getLatestContent({
project_id: testProjectId,
})
}
return fetch(
testServer.url(
`/api/projects/${testProjectId}/blobs/${testFiles.NULL_CHARACTERS_TXT_HASH}`
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('null_characters.txt')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
.then(response => {
expect(response.ok).to.be.true
})
.then(() => {
// Import project
testSnapshot = new Snapshot()
testSnapshot.addFile(
mainFilePathname,
File.fromHash(testFiles.NULL_CHARACTERS_TXT_HASH)
)
return basicAuthClient.apis.ProjectImport.importSnapshot1({
project_id: testProjectId,
snapshot: testSnapshot.toRaw(),
})
})
.then(importChanges)
.then(() => {
// Expect invalid changes to fail
expect.fail()
})
.catch(expectResponse.unprocessableEntity)
.then(getLatestContent)
.then(response => {
// Check that no chaes were made
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(1)
expect(snapshot.getFile(mainFilePathname).getHash()).to.equal(
testFiles.NULL_CHARACTERS_TXT_HASH
)
})
.then(() => {
// Set string length
return knex('project_blobs').update(
'string_length',
testFiles.NULL_CHARACTERS_TXT_BYTE_LENGTH
)
})
.then(importChanges)
.then(getLatestContent)
.then(response => {
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(1)
expect(snapshot.getFile(mainFilePathname).getContent()).to.equal(
'\x00\x00\x00a'
)
})
})
it('returns 404 when chunk is not found in bucket', function () {
const testProjectId = '1'
const fooChange = makeChange(Operation.removeFile('foo.tex'))
return knex('chunks')
.insert({
doc_id: testProjectId,
start_version: 0,
end_version: 100,
end_timestamp: null,
})
.then(() => {
// Import changes
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 100,
changes: [fooChange.toRaw()],
})
})
.then(() => {
// Expect invalid changes to fail
expect.fail()
})
.catch(expectResponse.notFound)
})
it('creates and returns changes with v2 author ids', function () {
const testFilePathname = 'test.tex'
const testTextOperation = TextOperation.fromJSON({ textOperation: ['a'] })
const v2Authors = ['5a296963ad5e82432674c839', null]
let testProjectId
return testProjects
.createEmptyProject()
.then(projectId => {
testProjectId = projectId
expect(testProjectId).to.be.a('string')
})
.then(() => {
return fetch(
testServer.url(
`/api/projects/${testProjectId}/blobs/${File.EMPTY_FILE_HASH}`
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('empty.tex')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
})
.then(response => {
expect(response.ok).to.be.true
})
.then(() => {
// Import changes
const testFile = File.fromHash(File.EMPTY_FILE_HASH)
const changes = [
makeChange(Operation.addFile(testFilePathname, testFile)),
makeChange(Operation.editFile(testFilePathname, testTextOperation)),
]
changes[1].setV2Authors(v2Authors)
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 0,
return_snapshot: 'hashed',
changes: changes.map(changeToRaw),
})
})
.then(response => {
expect(response.status).to.equal(HTTPStatus.CREATED)
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(1)
expect(snapshot.getFile('test.tex').getHash()).to.equal(
testFiles.STRING_A_HASH
)
})
.then(() => {
// Get project history
return pseudoJwtBasicAuthClient.apis.Project.getLatestHistory({
project_id: testProjectId,
})
})
.then(response => {
// it should not fail when the some of the authors do not exist anymore
const chunkResponse = ChunkResponse.fromRaw(response.obj)
const changes = chunkResponse.getChunk().getChanges()
expect(changes.length).to.equal(2)
const changeWithAuthor = changes[1]
expect(changeWithAuthor.getV2Authors()).to.deep.equal(v2Authors)
})
})
it('should reject invalid v2 author ids', function () {
const testFilePathname = 'test.tex'
const v2Authors = ['not-a-v2-id']
let testProjectId
return testProjects
.createEmptyProject()
.then(projectId => {
testProjectId = projectId
expect(testProjectId).to.be.a('string')
})
.then(() => {
return fetch(
testServer.url(
`/api/projects/${testProjectId}/blobs/${File.EMPTY_FILE_HASH}`
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('empty.tex')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
})
.then(response => {
expect(response.ok).to.be.true
})
.then(() => {
// Import changes
const testFile = File.fromHash(File.EMPTY_FILE_HASH)
const changes = [
makeChange(Operation.addFile(testFilePathname, testFile)),
]
changes[0].v2Authors = v2Authors
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 0,
changes: changes.map(changeToRaw),
})
})
.then(() => {
// Check that invalid changes fail
expect.fail()
})
.catch(expectResponse.unprocessableEntity)
})
it('should reject changes with both v1 and v2 authors ids', function () {
const testFilePathname = 'test.tex'
const v1Authors = [456]
const v2Authors = ['5a296963ad5e82432674c839', null]
let testProjectId
return testProjects
.createEmptyProject()
.then(projectId => {
testProjectId = projectId
expect(testProjectId).to.be.a('string')
})
.then(() => {
return fetch(
testServer.url(
`/api/projects/${testProjectId}/blobs/${File.EMPTY_FILE_HASH}`
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('empty.tex')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
})
.then(response => {
expect(response.ok).to.be.true
})
.then(() => {
// Import changes
const testFile = File.fromHash(File.EMPTY_FILE_HASH)
const changes = [
makeChange(Operation.addFile(testFilePathname, testFile)),
]
changes[0].authors = v1Authors
changes[0].v2Authors = v2Authors
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: testProjectId,
end_version: 0,
changes: changes.map(changeToRaw),
})
})
.then(() => {
// Check that invalid changes fail
expect.fail()
})
.catch(expectResponse.unprocessableEntity)
})
it("returns unprocessable if end_version isn't provided", function () {
return testProjects
.createEmptyProject()
.then(projectId => {
expect(projectId).to.be.a('string')
return projectId
})
.then(projectId => {
// Import changes
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: projectId,
changes: [],
})
})
.then(() => {
// Check that invalid changes fail
expect.fail()
})
.catch(error => {
expect(error.message).to.equal(
'Required parameter end_version is not provided'
)
})
})
it('returns unprocessable if return_snapshot is invalid', function () {
return testProjects
.createEmptyProject()
.then(projectId => {
// Import changes
return basicAuthClient.apis.ProjectImport.importChanges1({
project_id: projectId,
changes: [],
end_version: 0,
return_snapshot: 'not_a_valid_value',
})
})
.then(() => {
// Check that invalid changes fail
expect.fail()
})
.catch(error => {
expect(error.status).to.equal(HTTPStatus.UNPROCESSABLE_ENTITY)
expect(error.response.body.message).to.equal(
'invalid enum value: return_snapshot'
)
})
})
})

View File

@@ -0,0 +1,333 @@
'use strict'
const { expect } = require('chai')
const fs = require('node:fs')
const HTTPStatus = require('http-status')
const fetch = require('node-fetch')
const sinon = require('sinon')
const cleanup = require('../storage/support/cleanup')
const fixtures = require('../storage/support/fixtures')
const testFiles = require('../storage/support/test_files')
const { zipStore, persistChanges } = require('../../../../storage')
const { expectHttpError } = require('./support/expect_response')
const testServer = require('./support/test_server')
const { createEmptyProject } = require('./support/test_projects')
const {
File,
Snapshot,
Change,
AddFileOperation,
EditFileOperation,
TextOperation,
} = require('overleaf-editor-core')
const testProjects = require('./support/test_projects')
describe('project controller', function () {
beforeEach(cleanup.everything)
beforeEach(fixtures.create)
describe('initializeProject', function () {
it('can initialize a new project', async function () {
const projectId = await testProjects.createEmptyProject()
expect(projectId).to.be.a('string')
})
})
describe('createZip', function () {
let importSnapshot
let createZip
before(function () {
importSnapshot =
testServer.basicAuthClient.apis.ProjectImport.importSnapshot1
createZip = testServer.basicAuthClient.apis.Project.createZip
})
beforeEach(function () {
// Don't start the work in the background in this test --- it is flaky.
sinon.stub(zipStore, 'storeZip').resolves()
})
afterEach(function () {
zipStore.storeZip.restore()
})
it('creates a URL to a zip file', async function () {
// Create a test blob.
const testProjectId = fixtures.docs.uninitializedProject.id
const response = await fetch(
testServer.url(
`/api/projects/${testProjectId}/blobs/${testFiles.HELLO_TXT_HASH}`
),
{
method: 'PUT',
body: fs.createReadStream(testFiles.path('hello.txt')),
headers: {
Authorization: testServer.basicAuthHeader,
},
}
)
expect(response.ok).to.be.true
// Import a project with the test blob.
const testFilePathname = 'hello.txt'
const testSnapshot = new Snapshot()
testSnapshot.addFile(
testFilePathname,
File.fromHash(testFiles.HELLO_TXT_HASH)
)
const importResponse = await importSnapshot({
project_id: testProjectId,
snapshot: testSnapshot.toRaw(),
})
expect(importResponse.obj.projectId).to.equal(testProjectId)
const createZipResponse = await createZip({
project_id: testProjectId,
version: 0,
})
expect(createZipResponse.status).to.equal(HTTPStatus.OK)
const zipInfo = createZipResponse.obj
expect(zipInfo.zipUrl).to.match(
/^http:\/\/gcs:9090\/download\/storage\/v1\/b\/overleaf-test-zips/
)
expect(zipStore.storeZip.calledOnce).to.be.true
})
})
// eslint-disable-next-line mocha/no-skipped-tests
describe.skip('getLatestContent', function () {
// TODO: remove this endpoint entirely, see
// https://github.com/overleaf/write_latex/pull/5120#discussion_r244291862
})
describe('project with changes', function () {
let projectId
beforeEach(async function () {
// used to provide a limit which forces us to persist all of the changes.
const farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
const limits = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
}
const changes = [
new Change(
[new AddFileOperation('test.tex', File.fromString('ab'))],
new Date(),
[]
),
new Change(
[new AddFileOperation('other.tex', File.fromString('hello'))],
new Date(),
[]
),
]
projectId = await createEmptyProject()
await persistChanges(projectId, changes, limits, 0)
})
describe('getLatestHashedContent', function () {
it('returns a snapshot', async function () {
const response =
await testServer.basicAuthClient.apis.Project.getLatestHashedContent({
project_id: projectId,
})
expect(response.status).to.equal(HTTPStatus.OK)
const snapshot = Snapshot.fromRaw(response.obj)
expect(snapshot.countFiles()).to.equal(2)
expect(snapshot.getFile('test.tex').getHash()).to.equal(
testFiles.STRING_AB_HASH
)
})
})
describe('getChanges', function () {
it('returns all changes when not given a limit', async function () {
const response =
await testServer.basicAuthClient.apis.Project.getChanges({
project_id: projectId,
})
expect(response.status).to.equal(HTTPStatus.OK)
const changes = response.obj
expect(changes.length).to.equal(2)
const filenames = changes
.flatMap(change => change.operations)
.map(operation => operation.pathname)
expect(filenames).to.deep.equal(['test.tex', 'other.tex'])
})
it('returns only requested changes', async function () {
const response =
await testServer.basicAuthClient.apis.Project.getChanges({
project_id: projectId,
since: 1,
})
expect(response.status).to.equal(HTTPStatus.OK)
const changes = response.obj
expect(changes.length).to.equal(1)
const filenames = changes
.flatMap(change => change.operations)
.map(operation => operation.pathname)
expect(filenames).to.deep.equal(['other.tex'])
})
it('rejects negative versions', async function () {
await expect(
testServer.basicAuthClient.apis.Project.getChanges({
project_id: projectId,
since: -1,
})
).to.be.rejectedWith('Bad Request')
})
it('rejects out of bounds versions', async function () {
await expect(
testServer.basicAuthClient.apis.Project.getChanges({
project_id: projectId,
since: 20,
})
).to.be.rejectedWith('Bad Request')
})
})
})
describe('project with many chunks', function () {
let projectId
beforeEach(async function () {
// used to provide a limit which forces us to persist all of the changes.
const farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
const limits = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
maxChunkChanges: 5,
}
const changes = [
new Change(
[new AddFileOperation('test.tex', File.fromString(''))],
new Date(),
[]
),
]
for (let i = 0; i < 20; i++) {
const textOperation = new TextOperation()
textOperation.retain(i)
textOperation.insert('x')
changes.push(
new Change(
[new EditFileOperation('test.tex', textOperation)],
new Date(),
[]
)
)
}
projectId = await createEmptyProject()
await persistChanges(projectId, changes, limits, 0)
})
it('returns all changes when not given a limit', async function () {
const response = await testServer.basicAuthClient.apis.Project.getChanges(
{
project_id: projectId,
}
)
expect(response.status).to.equal(HTTPStatus.OK)
const changes = response.obj
expect(changes.length).to.equal(21)
expect(changes[10].operations[0].textOperation).to.deep.equal([9, 'x'])
})
it('returns only requested changes', async function () {
const response = await testServer.basicAuthClient.apis.Project.getChanges(
{
project_id: projectId,
since: 10,
}
)
expect(response.status).to.equal(HTTPStatus.OK)
const changes = response.obj
expect(changes.length).to.equal(11)
expect(changes[2].operations[0].textOperation).to.deep.equal([11, 'x'])
})
})
describe('getLatestHistoryRaw', function () {
it('should handles read', async function () {
const projectId = fixtures.docs.initializedProject.id
const response =
await testServer.pseudoJwtBasicAuthClient.apis.Project.getLatestHistoryRaw(
{
project_id: projectId,
readOnly: 'true',
}
)
expect(response.body).to.deep.equal({
startVersion: 0,
endVersion: 1,
endTimestamp: '2032-01-01T00:00:00.000Z',
})
})
})
describe('deleteProject', function () {
it('deletes the project chunks', async function () {
const projectId = fixtures.docs.initializedProject.id
const historyResponse =
await testServer.pseudoJwtBasicAuthClient.apis.Project.getLatestHistory(
{
project_id: projectId,
}
)
expect(historyResponse.status).to.equal(HTTPStatus.OK)
expect(historyResponse.body).to.have.property('chunk')
const deleteResponse =
await testServer.basicAuthClient.apis.Project.deleteProject({
project_id: projectId,
})
expect(deleteResponse.status).to.equal(HTTPStatus.NO_CONTENT)
await expectHttpError(
testServer.pseudoJwtBasicAuthClient.apis.Project.getLatestHistory({
project_id: projectId,
}),
HTTPStatus.NOT_FOUND
)
})
it('deletes the project blobs', async function () {
const projectId = fixtures.docs.initializedProject.id
const token = testServer.createTokenForProject(projectId)
const authHeaders = { Authorization: `Bearer ${token}` }
const hash = testFiles.HELLO_TXT_HASH
const fileContents = await fs.promises.readFile(
testFiles.path('hello.txt')
)
const blobUrl = testServer.url(`/api/projects/${projectId}/blobs/${hash}`)
const response1 = await fetch(blobUrl, {
method: 'PUT',
headers: authHeaders,
body: fileContents,
})
expect(response1.ok).to.be.true
const response2 = await fetch(blobUrl, { headers: authHeaders })
const payload = await response2.text()
expect(payload).to.equal(fileContents.toString())
const deleteResponse =
await testServer.basicAuthClient.apis.Project.deleteProject({
project_id: projectId,
})
expect(deleteResponse.status).to.equal(HTTPStatus.NO_CONTENT)
const response3 = await fetch(blobUrl, { headers: authHeaders })
expect(response3.status).to.equal(HTTPStatus.NOT_FOUND)
})
})
})

View File

@@ -0,0 +1,53 @@
'use strict'
const { expect } = require('chai')
const HTTPStatus = require('http-status')
function expectStatus(err, expected) {
const httpStatus = err.status || err.statusCode
if (httpStatus === undefined) {
throw err
} else {
expect(httpStatus).to.equal(expected)
}
}
async function expectHttpError(promise, expectedStatusCode) {
try {
await promise
} catch (err) {
const statusCode = err.status || err.statusCode
if (statusCode === undefined) {
throw err
} else {
expect(statusCode).to.equal(expectedStatusCode)
return
}
}
expect.fail('expected HTTP request to return with an error response')
}
exports.expectHttpError = expectHttpError
exports.notFound = function (err) {
expectStatus(err, HTTPStatus.NOT_FOUND)
}
exports.unprocessableEntity = function (err) {
expectStatus(err, HTTPStatus.UNPROCESSABLE_ENTITY)
}
exports.conflict = function (err) {
expectStatus(err, HTTPStatus.CONFLICT)
}
exports.unauthorized = function (err) {
expectStatus(err, HTTPStatus.UNAUTHORIZED)
}
exports.forbidden = function (err) {
expectStatus(err, HTTPStatus.FORBIDDEN)
}
exports.requestEntityTooLarge = function (err) {
expectStatus(err, HTTPStatus.REQUEST_ENTITY_TOO_LARGE)
}

View File

@@ -0,0 +1,51 @@
// @ts-check
import config from 'config'
import { startApp } from '../../../../../backup-deletion-app.mjs'
/** @type {import("http").Server} */
let server
/**
* @param {string} pathname
* @return {string}
*/
function testUrl(pathname) {
const url = new URL('http://127.0.0.1')
const addr = server.address()
if (addr && typeof addr === 'object') {
url.port = addr.port.toString()
}
url.pathname = pathname
return url.toString()
}
const basicAuthHeader =
'Basic ' +
Buffer.from(`staging:${config.get('basicHttpAuth.password')}`).toString(
'base64'
)
async function listenOnRandomPort() {
if (server) return // already running
for (let i = 0; i < 10; i++) {
try {
server = await startApp(0)
return
} catch {}
}
server = await startApp(0)
}
after('close server', function (done) {
if (server) {
server.close(done)
} else {
done()
}
})
export default {
testUrl,
basicAuthHeader,
listenOnRandomPort,
}

View File

@@ -0,0 +1,43 @@
// @ts-check
import { startApp } from '../../../../../backup-verifier-app.mjs'
/** @type {import("http").Server} */
let server
/**
* @param {string} pathname
* @return {string}
*/
function testUrl(pathname) {
const url = new URL('http://127.0.0.1')
const addr = server.address()
if (addr && typeof addr === 'object') {
url.port = addr.port.toString()
}
url.pathname = pathname
return url.toString()
}
async function listenOnRandomPort() {
if (server) return // already running
for (let i = 0; i < 10; i++) {
try {
server = await startApp(0)
return
} catch {}
}
server = await startApp(0, false)
}
after('close server', function (done) {
if (server) {
server.close(done)
} else {
done()
}
})
export default {
testUrl,
listenOnRandomPort,
}

View File

@@ -0,0 +1,26 @@
const BPromise = require('bluebird')
const { expect } = require('chai')
const HTTPStatus = require('http-status')
const assert = require('../../../../../storage/lib/assert')
const testServer = require('./test_server')
/**
* Without a provided history id, a new one will get generated.
* The history id could either be a mongo id, or a postgres id.
*
* @param {string} [existingHistoryId]
* @return {Promise<string>}
*/
exports.createEmptyProject = function (existingHistoryId) {
return BPromise.resolve(
testServer.basicAuthClient.apis.Project.initializeProject({
body: { projectId: existingHistoryId },
})
).then(response => {
expect(response.status).to.equal(HTTPStatus.OK)
const { projectId } = response.obj
assert.projectId(projectId, 'bad projectId')
return projectId
})
}

View File

@@ -0,0 +1,133 @@
/**
* @file
* Create a test server. For performance reasons, there is only one test server,
* and it is shared between all of the tests.
*
* This uses the mocha's "root-level hooks" to start and clean up the server.
*/
const BPromise = require('bluebird')
const config = require('config')
const http = require('node:http')
const jwt = require('jsonwebtoken')
const Swagger = require('swagger-client')
const app = require('../../../../../app')
function testUrl(pathname, opts = {}) {
const url = new URL('http://127.0.0.1')
url.port = exports.server.address().port
url.pathname = pathname
if (opts.qs) {
url.searchParams = new URLSearchParams(opts.qs)
}
return url.toString()
}
exports.url = testUrl
function createClient(options) {
// The Swagger client returns native Promises; we use Bluebird promises. Just
// wrapping the client creation is enough in many (but not all) cases to
// get Bluebird into the chain.
return BPromise.resolve(new Swagger(testUrl('/api-docs'), options))
}
function createTokenForProject(projectId, opts = {}) {
const jwtKey = opts.jwtKey || config.get('jwtAuth.key')
const jwtAlgorithm = config.get('jwtAuth.algorithm')
return jwt.sign({ project_id: projectId }, jwtKey, {
algorithm: jwtAlgorithm,
})
}
exports.createTokenForProject = createTokenForProject
function createClientForProject(projectId, opts = {}) {
const token = createTokenForProject(projectId, opts)
return createClient({ authorizations: { jwt: `Bearer ${token}` } })
}
exports.createClientForProject = createClientForProject
function createClientForDownloadZip(projectId) {
const token = createTokenForProject(projectId)
return createClient({ authorizations: { token } })
}
exports.createClientForDownloadZip = createClientForDownloadZip
function createBasicAuthClient() {
return createClient({
authorizations: {
basic: {
username: 'staging',
password: config.get('basicHttpAuth.password'),
},
},
})
}
function createPseudoJwtBasicAuthClient() {
// HACK: The history service will accept HTTP basic auth for any endpoint that
// is expecting a JWT. If / when we fix that, we will need to fix this.
const jwt =
'Basic ' +
Buffer.from(`staging:${config.get('basicHttpAuth.password')}`).toString(
'base64'
)
return createClient({ authorizations: { jwt } })
}
exports.basicAuthHeader =
'Basic ' +
Buffer.from(`staging:${config.get('basicHttpAuth.password')}`).toString(
'base64'
)
function createServer() {
const server = http.createServer(app)
return app.setup().then(() => {
exports.server = server
return server
})
}
function createDefaultUnauthenticatedClient() {
return createClient().then(client => {
exports.client = client
})
}
function createDefaultBasicAuthClient() {
return createBasicAuthClient().then(client => {
exports.basicAuthClient = client
})
}
function createDefaultPseudoJwtBasicAuthClient() {
return createPseudoJwtBasicAuthClient().then(client => {
exports.pseudoJwtBasicAuthClient = client
})
}
before(function () {
function listenOnRandomPort(server) {
const listen = BPromise.promisify(server.listen, { context: server })
return listen(0).catch(err => {
if (err.code !== 'EADDRINUSE' && err.code !== 'EACCES') throw err
return listenOnRandomPort(server)
})
}
return createServer()
.then(listenOnRandomPort)
.then(createDefaultUnauthenticatedClient)
.then(createDefaultBasicAuthClient)
.then(createDefaultPseudoJwtBasicAuthClient)
})
after(function () {
exports.server.close()
})

View File

@@ -0,0 +1,248 @@
'use strict'
const OError = require('@overleaf/o-error')
const { expect } = require('chai')
const assert = require('../../../../storage/lib/assert')
describe('assert', function () {
describe('blobHash', function () {
it('should not throw for valid blob hashes', function () {
expect(() =>
assert.blobHash(
'aad321caf77ca6c5ab09e6c638c237705f93b001',
'should be a blob hash'
)
).to.not.throw()
})
it('should throw for invalid blob hashes', function () {
try {
assert.blobHash('invalid-hash', 'should be a blob hash')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a blob hash')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-hash' })
}
})
it('should throw for string integer blob hashes', function () {
try {
assert.blobHash('123', 'should be a blob hash')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a blob hash')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: '123' })
}
})
})
describe('projectId', function () {
it('should not throw for valid mongo project ids', function () {
expect(() =>
assert.projectId('507f1f77bcf86cd799439011', 'should be a project id')
).to.not.throw()
})
it('should not throw for valid postgres project ids', function () {
expect(() =>
assert.projectId('123456789', 'should be a project id')
).to.not.throw()
})
it('should throw for invalid project ids', function () {
try {
assert.projectId('invalid-id', 'should be a project id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a project id')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' })
}
})
it('should throw for non-numeric project ids', function () {
try {
assert.projectId('12345x', 'should be a project id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a project id')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: '12345x' })
}
})
it('should throw for postgres ids starting with 0', function () {
try {
assert.projectId('0123456', 'should be a project id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a project id')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: '0123456' })
}
})
})
describe('chunkId', function () {
it('should not throw for valid mongo chunk ids', function () {
expect(() =>
assert.chunkId('507f1f77bcf86cd799439011', 'should be a chunk id')
).to.not.throw()
})
it('should not throw for valid postgres chunk ids', function () {
expect(() =>
assert.chunkId('123456789', 'should be a chunk id')
).to.not.throw()
})
it('should throw for invalid chunk ids', function () {
try {
assert.chunkId('invalid-id', 'should be a chunk id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a chunk id')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' })
}
})
it('should throw for integer chunk ids', function () {
try {
assert.chunkId(12345, 'should be a chunk id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a chunk id')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: 12345 })
}
})
})
describe('mongoId', function () {
it('should not throw for valid mongo ids', function () {
expect(() =>
assert.mongoId('507f1f77bcf86cd799439011', 'should be a mongo id')
).to.not.throw()
})
it('should throw for invalid mongo ids', function () {
try {
assert.mongoId('invalid-id', 'should be a mongo id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a mongo id')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' })
}
})
it('should throw for numeric mongo ids', function () {
try {
assert.mongoId('12345', 'should be a mongo id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a mongo id')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: '12345' })
}
})
it('should throw for mongo ids that are too short', function () {
try {
assert.mongoId('507f1f77bcf86cd79943901', 'should be a mongo id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a mongo id')
expect(OError.getFullInfo(error)).to.deep.equal({
arg: '507f1f77bcf86cd79943901',
})
}
})
it('should throw for mongo ids that are too long', function () {
try {
assert.mongoId('507f1f77bcf86cd7994390111', 'should be a mongo id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a mongo id')
expect(OError.getFullInfo(error)).to.deep.equal({
arg: '507f1f77bcf86cd7994390111',
})
}
})
})
describe('postgresId', function () {
it('should not throw for valid postgres ids', function () {
expect(() =>
assert.postgresId('123456789', 'should be a postgres id')
).to.not.throw()
expect(() =>
assert.postgresId('1', 'should be a postgres id')
).to.not.throw()
})
it('should throw for invalid postgres ids', function () {
try {
assert.postgresId('invalid-id', 'should be a postgres id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a postgres id')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: 'invalid-id' })
}
})
it('should throw for postgres ids starting with 0', function () {
try {
assert.postgresId('0123456', 'should be a postgres id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a postgres id')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: '0123456' })
}
})
it('should throw for postgres ids that are too long', function () {
try {
assert.postgresId('12345678901', 'should be a postgres id')
expect.fail()
} catch (error) {
expect(error).to.be.instanceOf(TypeError)
expect(error.message).to.equal('should be a postgres id')
expect(OError.getFullInfo(error)).to.deep.equal({ arg: '12345678901' })
}
})
})
describe('regex constants', function () {
it('MONGO_ID_REGEXP should match valid mongo ids', function () {
expect('507f1f77bcf86cd799439011').to.match(assert.MONGO_ID_REGEXP)
expect('abcdef0123456789abcdef01').to.match(assert.MONGO_ID_REGEXP)
})
it('MONGO_ID_REGEXP should not match invalid mongo ids', function () {
expect('invalid-id').to.not.match(assert.MONGO_ID_REGEXP)
expect('507f1f77bcf86cd79943901').to.not.match(assert.MONGO_ID_REGEXP) // too short
expect('507f1f77bcf86cd7994390111').to.not.match(assert.MONGO_ID_REGEXP) // too long
expect('507F1F77BCF86CD799439011').to.not.match(assert.MONGO_ID_REGEXP) // uppercase
})
it('POSTGRES_ID_REGEXP should match valid postgres ids', function () {
expect('123456789').to.match(assert.POSTGRES_ID_REGEXP)
expect('1').to.match(assert.POSTGRES_ID_REGEXP)
})
it('POSTGRES_ID_REGEXP should not match invalid postgres ids', function () {
expect('invalid-id').to.not.match(assert.POSTGRES_ID_REGEXP)
expect('0123456').to.not.match(assert.POSTGRES_ID_REGEXP) // starts with 0
expect('12345678901').to.not.match(assert.POSTGRES_ID_REGEXP) // too long (> 10 digits)
})
})
})

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,818 @@
import fs from 'node:fs'
import Crypto from 'node:crypto'
import Stream from 'node:stream'
import { promisify } from 'node:util'
import { Binary, ObjectId } from 'mongodb'
import { Blob } from 'overleaf-editor-core'
import { backedUpBlobs, blobs, db } from '../../../../storage/lib/mongodb.js'
import cleanup from './support/cleanup.js'
import testProjects from '../api/support/test_projects.js'
import { execFile } from 'node:child_process'
import chai, { expect } from 'chai'
import chaiExclude from 'chai-exclude'
import config from 'config'
import { WritableBuffer } from '@overleaf/stream-utils'
import {
backupPersistor,
projectBlobsBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import projectKey from '../../../../storage/lib/project_key.js'
import {
BlobStore,
makeProjectKey,
} from '../../../../storage/lib/blob_store/index.js'
import ObjectPersistor from '@overleaf/object-persistor'
chai.use(chaiExclude)
const TIMEOUT = 20 * 1_000
const { deksBucket } = config.get('backupStore')
const { tieringStorageClass } = config.get('backupPersistor')
const projectsCollection = db.collection('projects')
const deletedProjectsCollection = db.collection('deletedProjects')
const FILESTORE_PERSISTOR = ObjectPersistor({
backend: 'gcs',
gcs: {
endpoint: {
apiEndpoint: process.env.GCS_API_ENDPOINT,
projectId: process.env.GCS_PROJECT_ID,
},
},
})
/**
* @param {ObjectId} objectId
* @return {string}
*/
function gitBlobHash(objectId) {
return gitBlobHashBuffer(Buffer.from(objectId.toString()))
}
/**
* @param {Buffer} buf
* @return {string}
*/
function gitBlobHashBuffer(buf) {
const sha = Crypto.createHash('sha1')
sha.update(`blob ${buf.byteLength}\x00`)
sha.update(buf)
return sha.digest('hex')
}
/**
* @param {string} gitBlobHash
* @return {Binary}
*/
function binaryForGitBlobHash(gitBlobHash) {
return new Binary(Buffer.from(gitBlobHash, 'hex'))
}
async function listS3Bucket(bucket, wantStorageClass) {
const client = backupPersistor._getClientForBucket(bucket)
const response = await client.listObjectsV2({ Bucket: bucket }).promise()
for (const object of response.Contents || []) {
expect(object).to.have.property('StorageClass', wantStorageClass)
}
return (response.Contents || []).map(item => item.Key || '')
}
function objectIdFromTime(timestamp) {
return ObjectId.createFromTime(new Date(timestamp).getTime() / 1000)
}
const PRINT_IDS_AND_HASHES_FOR_DEBUGGING = false
describe('back_fill_file_hash_fix_up script', function () {
this.timeout(TIMEOUT)
const USER_FILES_BUCKET_NAME = 'fake-user-files-gcs'
const projectId0 = objectIdFromTime('2017-01-01T00:00:00Z')
const projectIdDeleted0 = objectIdFromTime('2017-01-01T00:04:00Z')
const historyId0 = 42 // stored as number is mongo
const historyIdDeleted0 = projectIdDeleted0.toString()
const fileIdWithDifferentHashFound = objectIdFromTime('2017-02-01T00:00:00Z')
const fileIdInGoodState = objectIdFromTime('2017-02-01T00:01:00Z')
const fileIdBlobExistsInGCS0 = objectIdFromTime('2017-02-01T00:02:00Z')
const fileIdWithDifferentHashNotFound0 = objectIdFromTime(
'2017-02-01T00:03:00Z'
)
const fileIdWithDifferentHashNotFound1 = objectIdFromTime(
'2017-02-01T00:04:00Z'
)
const fileIdBlobExistsInGCSCorrupted = objectIdFromTime(
'2017-02-01T00:05:00Z'
)
const fileIdMissing0 = objectIdFromTime('2024-02-01T00:06:00Z')
const fileIdMissing1 = objectIdFromTime('2017-02-01T00:07:00Z')
const fileIdWithDifferentHashRestore = objectIdFromTime(
'2017-02-01T00:08:00Z'
)
const fileIdBlobExistsInGCS1 = objectIdFromTime('2017-02-01T00:09:00Z')
const fileIdRestoreFromFilestore0 = objectIdFromTime('2017-02-01T00:10:00Z')
const fileIdRestoreFromFilestore1 = objectIdFromTime('2017-02-01T00:11:00Z')
const fileIdMissing2 = objectIdFromTime('2017-02-01T00:12:00Z')
const fileIdHashMissing0 = objectIdFromTime('2017-02-01T00:13:00Z')
const fileIdHashMissing1 = objectIdFromTime('2017-02-01T00:14:00Z')
const contentCorruptedBlob = 'string that produces another hash'
const contentDoesNotExistAsBlob = 'does not exist as blob'
const hashDoesNotExistAsBlob = gitBlobHashBuffer(
Buffer.from(contentDoesNotExistAsBlob)
)
const deleteProjectsRecordId0 = new ObjectId()
const writtenBlobs = [
{
projectId: projectId0,
historyId: historyId0,
fileId: fileIdBlobExistsInGCS0,
},
{
projectId: projectId0,
historyId: historyId0,
fileId: fileIdBlobExistsInGCS1,
},
{
projectId: projectId0,
historyId: historyId0,
fileId: fileIdWithDifferentHashNotFound0,
},
{
projectId: projectId0,
historyId: historyId0,
fileId: fileIdRestoreFromFilestore0,
},
{
projectId: projectId0,
historyId: historyId0,
fileId: fileIdRestoreFromFilestore1,
},
{
projectId: projectId0,
historyId: historyId0,
fileId: fileIdHashMissing0,
},
{
projectId: projectId0,
historyId: historyId0,
fileId: fileIdHashMissing1,
},
{
projectId: projectIdDeleted0,
historyId: historyIdDeleted0,
fileId: fileIdWithDifferentHashNotFound1,
},
]
const logs = [
{
projectId: projectId0,
fileId: fileIdWithDifferentHashFound,
err: { message: 'OError: hash mismatch' },
hash: gitBlobHash(fileIdMissing0), // does not matter
entry: {
ctx: { historyId: historyId0.toString() },
hash: gitBlobHash(fileIdInGoodState),
},
msg: 'failed to process file',
},
{
projectId: projectId0,
fileId: fileIdWithDifferentHashRestore,
err: { message: 'OError: hash mismatch' },
hash: hashDoesNotExistAsBlob,
entry: {
ctx: { historyId: historyId0.toString() },
hash: gitBlobHash(fileIdMissing0), // does not matter
},
msg: 'failed to process file',
},
{
projectId: projectId0,
fileId: fileIdWithDifferentHashNotFound0,
err: { message: 'OError: hash mismatch' },
hash: gitBlobHash(fileIdWithDifferentHashNotFound0),
entry: {
ctx: { historyId: historyId0.toString() },
hash: hashDoesNotExistAsBlob,
},
msg: 'failed to process file',
},
{
projectId: projectId0,
fileId: fileIdRestoreFromFilestore0,
err: { message: 'OError: hash mismatch' },
hash: gitBlobHash(fileIdRestoreFromFilestore0),
entry: {
ctx: { historyId: historyId0.toString() },
hash: hashDoesNotExistAsBlob,
},
msg: 'failed to process file',
},
{
projectId: projectIdDeleted0,
fileId: fileIdWithDifferentHashNotFound1,
err: { message: 'OError: hash mismatch' },
hash: gitBlobHash(fileIdWithDifferentHashNotFound1),
entry: {
ctx: { historyId: historyIdDeleted0.toString() },
hash: hashDoesNotExistAsBlob,
},
msg: 'failed to process file',
},
{
projectId: projectId0,
fileId: fileIdMissing0,
bucketName: USER_FILES_BUCKET_NAME,
err: { message: 'NotFoundError' },
msg: 'failed to process file',
},
{
projectId: projectId0,
fileId: fileIdMissing2,
bucketName: USER_FILES_BUCKET_NAME,
err: { message: 'NotFoundError' },
msg: 'failed to process file',
},
{
projectId: projectId0,
fileId: fileIdBlobExistsInGCS0,
hash: gitBlobHash(fileIdBlobExistsInGCS0),
err: { message: 'storage.objects.delete' },
msg: 'failed to process file',
},
{
projectId: projectId0,
fileId: fileIdBlobExistsInGCSCorrupted,
hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted),
err: { message: 'storage.objects.delete' },
msg: 'failed to process file',
},
{
projectId: projectId0,
fileId: fileIdBlobExistsInGCS1,
hash: gitBlobHash(fileIdBlobExistsInGCS1),
err: { message: 'storage.objects.delete' },
msg: 'failed to process file',
},
{
projectId: projectId0,
fileId: fileIdRestoreFromFilestore1,
err: { message: 'storage.objects.delete' },
msg: 'failed to process file',
},
{
projectId: projectIdDeleted0,
fileId: fileIdMissing1,
bucketName: USER_FILES_BUCKET_NAME,
err: { message: 'NotFoundError' },
msg: 'failed to process file',
},
{
err: { message: 'spurious error' },
msg: 'failed to process file, trying again',
},
{
err: { message: 'some other error' },
msg: 'failed to process file',
},
// from find_malformed_filetrees.mjs
{
projectId: projectId0,
_id: fileIdHashMissing0,
reason: 'bad file hash',
msg: 'bad file-tree path',
},
{
projectId: projectId0,
_id: fileIdHashMissing1,
reason: 'bad file hash',
msg: 'bad file-tree path',
},
]
if (PRINT_IDS_AND_HASHES_FOR_DEBUGGING) {
const fileIds = {
fileIdWithDifferentHashFound,
fileIdInGoodState,
fileIdBlobExistsInGCS0,
fileIdBlobExistsInGCS1,
fileIdWithDifferentHashNotFound0,
fileIdWithDifferentHashNotFound1,
fileIdBlobExistsInGCSCorrupted,
fileIdMissing0,
fileIdMissing1,
fileIdMissing2,
fileIdWithDifferentHashRestore,
fileIdRestoreFromFilestore0,
fileIdRestoreFromFilestore1,
fileIdHashMissing0,
fileIdHashMissing1,
}
console.log({
projectId0,
projectIdDeleted0,
historyId0,
historyIdDeleted0,
...fileIds,
hashDoesNotExistAsBlob,
})
for (const [name, v] of Object.entries(fileIds)) {
console.log(
name,
gitBlobHash(v),
Array.from(binaryForGitBlobHash(gitBlobHash(v)).value())
)
}
}
before(cleanup.everything)
before('populate blobs/GCS', async function () {
await FILESTORE_PERSISTOR.sendStream(
USER_FILES_BUCKET_NAME,
`${projectId0}/${fileIdRestoreFromFilestore0}`,
Stream.Readable.from([fileIdRestoreFromFilestore0.toString()])
)
await FILESTORE_PERSISTOR.sendStream(
USER_FILES_BUCKET_NAME,
`${projectId0}/${fileIdRestoreFromFilestore1}`,
Stream.Readable.from([fileIdRestoreFromFilestore1.toString()])
)
await FILESTORE_PERSISTOR.sendStream(
USER_FILES_BUCKET_NAME,
`${projectId0}/${fileIdHashMissing0}`,
Stream.Readable.from([fileIdHashMissing0.toString()])
)
await FILESTORE_PERSISTOR.sendStream(
USER_FILES_BUCKET_NAME,
`${projectId0}/${fileIdHashMissing1}`,
Stream.Readable.from([fileIdHashMissing1.toString()])
)
await new BlobStore(historyId0.toString()).putString(
fileIdHashMissing1.toString() // partially processed
)
await new BlobStore(historyId0.toString()).putString(
fileIdBlobExistsInGCS0.toString()
)
await new BlobStore(historyId0.toString()).putString(
fileIdBlobExistsInGCS1.toString()
)
await new BlobStore(historyId0.toString()).putString(
fileIdRestoreFromFilestore1.toString()
)
const path = '/tmp/test-blob-corrupted'
try {
await fs.promises.writeFile(path, contentCorruptedBlob)
await new BlobStore(historyId0.toString()).putBlob(
path,
new Blob(gitBlobHash(fileIdBlobExistsInGCSCorrupted), 42)
)
} finally {
await fs.promises.rm(path, { force: true })
}
await cleanup.postgres()
await cleanup.mongo()
await Promise.all([
testProjects.createEmptyProject(historyId0.toString()),
testProjects.createEmptyProject(historyIdDeleted0),
])
await new BlobStore(historyId0.toString()).putString(
fileIdWithDifferentHashNotFound0.toString()
)
await new BlobStore(historyIdDeleted0.toString()).putString(
fileIdWithDifferentHashNotFound1.toString()
)
await new BlobStore(historyId0.toString()).putString(
fileIdInGoodState.toString()
)
})
before('populate mongo', async function () {
await projectsCollection.insertMany([
{
_id: projectId0,
rootFolder: [
{
fileRefs: [
{ _id: fileIdMissing0 },
{ _id: fileIdMissing0 }, // bad file-tree, duplicated fileRef.
{ _id: fileIdMissing2 },
{ _id: fileIdHashMissing0 },
{ _id: fileIdHashMissing1 },
{
_id: fileIdWithDifferentHashFound,
hash: gitBlobHash(fileIdInGoodState),
},
{
_id: fileIdWithDifferentHashRestore,
hash: gitBlobHash(fileIdMissing0),
},
],
folders: [
{
docs: [],
},
null,
{
fileRefs: [
null,
{
_id: fileIdInGoodState,
hash: gitBlobHash(fileIdInGoodState),
},
{
_id: fileIdWithDifferentHashNotFound0,
hash: hashDoesNotExistAsBlob,
},
{
_id: fileIdRestoreFromFilestore0,
hash: hashDoesNotExistAsBlob,
},
{
_id: fileIdRestoreFromFilestore1,
},
{
_id: fileIdBlobExistsInGCS0,
hash: gitBlobHash(fileIdBlobExistsInGCS0),
},
{
_id: fileIdBlobExistsInGCSCorrupted,
hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted),
},
{ _id: fileIdBlobExistsInGCS1 },
],
folders: [],
},
],
},
],
overleaf: { history: { id: historyId0 } },
version: 0,
},
])
await deletedProjectsCollection.insertMany([
{
_id: deleteProjectsRecordId0,
project: {
_id: projectIdDeleted0,
rootFolder: [
{
fileRefs: [
{
_id: fileIdWithDifferentHashNotFound1,
hash: hashDoesNotExistAsBlob,
},
],
folders: [
{
fileRefs: [],
folders: [
{ fileRefs: [{ _id: fileIdMissing1 }], folders: [] },
],
},
],
},
],
overleaf: { history: { id: historyIdDeleted0 } },
version: 100,
},
deleterData: {
deletedProjectId: projectIdDeleted0,
},
},
])
})
/**
* @param {Array<string>} args
* @param {Record<string, string>} env
* @return {Promise<{ stdout: string, stderr: string, status: number }>}
*/
async function tryRunScript(args = [], env = {}) {
let result
try {
result = await promisify(execFile)(
process.argv0,
['storage/scripts/back_fill_file_hash_fix_up.mjs', ...args],
{
encoding: 'utf-8',
timeout: TIMEOUT - 500,
env: {
...process.env,
USER_FILES_BUCKET_NAME,
SLEEP_BEFORE_EXIT: '1',
...env,
LOG_LEVEL: 'warn', // Override LOG_LEVEL of acceptance tests
},
}
)
result.status = 0
} catch (err) {
const { stdout, stderr, code } = err
if (typeof code !== 'number') {
console.log(err)
}
result = { stdout, stderr, status: code }
}
expect((await fs.promises.readdir('/tmp')).join(';')).to.not.match(
/back_fill_file_hash/
)
return result
}
async function runScriptWithLogs() {
const logsPath = '/tmp/test-script-logs'
let result
try {
await fs.promises.writeFile(
logsPath,
logs.map(e => JSON.stringify(e)).join('\n')
)
result = await tryRunScript([`--logs=${logsPath}`])
} finally {
await fs.promises.rm(logsPath, { force: true })
}
const stats = JSON.parse(result.stdout.trim().split('\n').pop())
return {
result,
stats,
}
}
let result, stats
before(async function () {
;({ result, stats } = await runScriptWithLogs())
})
it('should print stats', function () {
expect(stats).to.contain({
processedLines: 16,
success: 11,
alreadyProcessed: 0,
fileDeleted: 0,
skipped: 0,
failed: 3,
unmatched: 1,
})
})
it('should handle re-run on same logs', async function () {
;({ stats } = await runScriptWithLogs())
expect(stats).to.contain({
processedLines: 16,
success: 0,
alreadyProcessed: 8,
fileDeleted: 3,
skipped: 0,
failed: 3,
unmatched: 1,
})
})
it('should flag the unknown fatal error', function () {
const unknown = result.stdout
.split('\n')
.filter(l => l.includes('unknown fatal error'))
expect(unknown).to.have.length(1)
const [line] = unknown
expect(line).to.exist
expect(line).to.include('some other error')
})
it('should flag the unexpected blob on mismatched hash', function () {
const line = result.stdout
.split('\n')
.find(l => l.includes('found blob with computed filestore object hash'))
expect(line).to.exist
expect(line).to.include(projectId0.toString())
expect(line).to.include(fileIdWithDifferentHashFound.toString())
expect(line).to.include(gitBlobHash(fileIdInGoodState))
})
it('should flag the need to restore', function () {
const line = result.stdout
.split('\n')
.find(l => l.includes('missing blob, need to restore filestore file'))
expect(line).to.exist
expect(line).to.include(projectId0.toString())
expect(line).to.include(fileIdWithDifferentHashRestore.toString())
expect(line).to.include(hashDoesNotExistAsBlob)
})
it('should flag the corrupted blob', function () {
const line = result.stdout
.split('\n')
.find(l => l.includes('blob corrupted'))
expect(line).to.exist
expect(line).to.include(projectId0.toString())
expect(line).to.include(fileIdBlobExistsInGCSCorrupted.toString())
expect(line).to.include(
gitBlobHashBuffer(Buffer.from(contentCorruptedBlob))
)
expect(line).to.include(gitBlobHash(fileIdBlobExistsInGCSCorrupted))
})
it('should update mongo', async function () {
expect(await projectsCollection.find({}).toArray())
.excludingEvery([
'currentEndTimestamp',
'currentEndVersion',
'updatedAt',
'backup',
])
.to.deep.equal([
{
_id: projectId0,
rootFolder: [
{
fileRefs: [
// Removed
// { _id: fileIdMissing0 },
// Removed
// { _id: fileIdMissing2 },
// Added hash
{
_id: fileIdHashMissing0,
hash: gitBlobHash(fileIdHashMissing0),
},
// Added hash
{
_id: fileIdHashMissing1,
hash: gitBlobHash(fileIdHashMissing1),
},
// No change, should warn about the find.
{
_id: fileIdWithDifferentHashFound,
hash: gitBlobHash(fileIdInGoodState),
},
// No change, should warn about the need to restore.
{
_id: fileIdWithDifferentHashRestore,
hash: gitBlobHash(fileIdMissing0),
},
],
folders: [
{
docs: [],
},
null,
{
fileRefs: [
null,
// No change
{
_id: fileIdInGoodState,
hash: gitBlobHash(fileIdInGoodState),
},
// Updated hash
{
_id: fileIdWithDifferentHashNotFound0,
hash: gitBlobHash(fileIdWithDifferentHashNotFound0),
},
// Updated hash
{
_id: fileIdRestoreFromFilestore0,
hash: gitBlobHash(fileIdRestoreFromFilestore0),
},
// Added hash
{
_id: fileIdRestoreFromFilestore1,
hash: gitBlobHash(fileIdRestoreFromFilestore1),
},
// No change, blob created
{
_id: fileIdBlobExistsInGCS0,
hash: gitBlobHash(fileIdBlobExistsInGCS0),
},
// No change, flagged
{
_id: fileIdBlobExistsInGCSCorrupted,
hash: gitBlobHash(fileIdBlobExistsInGCSCorrupted),
},
// Added hash
{
_id: fileIdBlobExistsInGCS1,
hash: gitBlobHash(fileIdBlobExistsInGCS1),
},
],
folders: [],
},
],
},
],
overleaf: { history: { id: historyId0 } },
// Incremented when removing file/updating hash
version: 8,
},
])
expect(await deletedProjectsCollection.find({}).toArray()).to.deep.equal([
{
_id: deleteProjectsRecordId0,
project: {
_id: projectIdDeleted0,
rootFolder: [
{
fileRefs: [
// Updated hash
{
_id: fileIdWithDifferentHashNotFound1,
hash: gitBlobHash(fileIdWithDifferentHashNotFound1),
},
],
folders: [
{
fileRefs: [],
folders: [
{
fileRefs: [
// Removed
// { _id: fileIdMissing1 },
],
folders: [],
},
],
},
],
},
],
overleaf: { history: { id: historyIdDeleted0 } },
// Incremented when removing file/updating hash
version: 102,
},
deleterData: {
deletedProjectId: projectIdDeleted0,
},
},
])
const writtenBlobsByProject = new Map()
for (const { projectId, fileId } of writtenBlobs) {
writtenBlobsByProject.set(
projectId,
(writtenBlobsByProject.get(projectId) || []).concat([fileId])
)
}
expect(
(await backedUpBlobs.find({}, { sort: { _id: 1 } }).toArray()).map(
entry => {
// blobs are pushed unordered into mongo. Sort the list for consistency.
entry.blobs.sort()
return entry
}
)
).to.deep.equal(
Array.from(writtenBlobsByProject.entries()).map(
([projectId, fileIds]) => {
return {
_id: projectId,
blobs: fileIds
.map(fileId => binaryForGitBlobHash(gitBlobHash(fileId)))
.sort(),
}
}
)
)
})
it('should have backed up all the files', async function () {
expect(tieringStorageClass).to.exist
const objects = await listS3Bucket(projectBlobsBucket, tieringStorageClass)
expect(objects.sort()).to.deep.equal(
writtenBlobs
.map(({ historyId, fileId, hash }) =>
makeProjectKey(historyId, hash || gitBlobHash(fileId))
)
.sort()
)
for (let { historyId, fileId } of writtenBlobs) {
const hash = gitBlobHash(fileId.toString())
const s = await backupPersistor.getObjectStream(
projectBlobsBucket,
makeProjectKey(historyId, hash),
{ autoGunzip: true }
)
const buf = new WritableBuffer()
await Stream.promises.pipeline(s, buf)
expect(gitBlobHashBuffer(buf.getContents())).to.equal(hash)
const id = buf.getContents().toString('utf-8')
expect(id).to.equal(fileId.toString())
// double check we are not comparing 'undefined' or '[object Object]' above
expect(id).to.match(/^[a-f0-9]{24}$/)
}
const deks = await listS3Bucket(deksBucket, 'STANDARD')
expect(deks.sort()).to.deep.equal(
Array.from(
new Set(
writtenBlobs.map(
({ historyId }) => projectKey.format(historyId) + '/dek'
)
)
).sort()
)
})
it('should have written the back filled files to history v1', async function () {
for (const { historyId, fileId } of writtenBlobs) {
const blobStore = new BlobStore(historyId.toString())
const hash = gitBlobHash(fileId.toString())
const blob = await blobStore.getBlob(hash)
expect(blob).to.exist
expect(blob.getByteLength()).to.equal(24)
const id = await blobStore.getString(hash)
expect(id).to.equal(fileId.toString())
// double check we are not comparing 'undefined' or '[object Object]' above
expect(id).to.match(/^[a-f0-9]{24}$/)
}
})
})

View File

@@ -0,0 +1,682 @@
import config from 'config'
import { ObjectId } from 'mongodb'
import { expect } from 'chai'
import {
backedUpBlobs,
client,
globalBlobs,
} from '../../../../storage/lib/mongodb.js'
import persistor from '../../../../storage/lib/persistor.js'
import {
loadGlobalBlobs,
BlobStore,
makeProjectKey,
} from '../../../../storage/lib/blob_store/index.js'
import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js'
import projectKey from '../../../../storage/lib/project_key.js'
import { getBackupStatus } from '../../../../storage/lib/backup_store/index.js'
import { text, buffer } from 'node:stream/consumers'
import { createGunzip } from 'node:zlib'
import { Change, Operation, File, TextOperation } from 'overleaf-editor-core'
import ChunkStore from '../../../../storage/lib/chunk_store/index.js'
import persistChanges from '../../../../storage/lib/persist_changes.js'
import { historyStore } from '../../../../storage/lib/history_store.js'
import { execFile } from 'node:child_process'
import { promisify } from 'node:util'
import testFiles from '../storage/support/test_files.js'
import fs from 'node:fs'
import {
backupBlob,
storeBlobBackup,
} from '../../../../storage/lib/backupBlob.mjs'
import {
backupPersistor,
projectBlobsBucket,
chunksBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import { Readable } from 'node:stream'
const projectsCollection = client.db().collection('projects')
/**
* @param {ObjectId} projectId
* @param {number} version
* @return {string}
*/
function makeChunkKey(projectId, version) {
return projectKey.format(projectId) + '/' + projectKey.pad(version)
}
describe('backup script', function () {
let project
let projectId, historyId
let limitsToPersistImmediately
before(function () {
// Used to provide a limit which forces us to persist all of the changes
const farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
limitsToPersistImmediately = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
maxChanges: 10,
maxChunkChanges: 10,
}
})
beforeEach(async function () {
// Set up test projects with proper history metadata
projectId = new ObjectId()
historyId = projectId.toString()
project = {
_id: projectId,
overleaf: {
history: {
id: historyId,
currentEndVersion: 0, // Will be updated as changes are made
currentEndTimestamp: new Date(), // Will be updated as changes are made
},
backup: {
// Start with no backup state
},
},
}
// Pre-load the global blobs
await loadGlobalBlobs()
// Clean up any pre-existing test data
await projectsCollection.deleteMany({
_id: projectId,
})
await backedUpBlobs.deleteMany({}) // Clear any existing backedUpBlobs entries
})
describe('with simple project content', function () {
const contentString = 'hello world'
const newContentString = 'hello world more'
const graphPngPath = testFiles.path('graph.png')
const graphPngBuf = fs.readFileSync(graphPngPath)
const graphPngHash = testFiles.GRAPH_PNG_HASH
const nonBmpPath = testFiles.path('non_bmp.txt')
const DUMMY_HASH = '1111111111111111111111111111111111111111'
beforeEach(async function () {
// Create initial project
await projectsCollection.insertOne(project)
// Initialize project in chunk store
await ChunkStore.initializeProject(historyId)
const blobStore = new BlobStore(historyId)
// Create the blobs and then back them up using backupBlob
const graphPngBlob = await blobStore.putFile(graphPngPath)
await backupBlob(historyId, graphPngBlob, graphPngPath)
// Add initial content using persistChanges
const file = File.fromString(contentString)
const addFileOp = Operation.addFile('main.tex', file)
const addGraphFileOp = Operation.addFile(
'graph.png',
File.fromHash(testFiles.GRAPH_PNG_HASH)
)
const change1 = new Change([addFileOp, addGraphFileOp], new Date(), [])
await persistChanges(historyId, [change1], limitsToPersistImmediately, 0)
// Add a second change with a proper TextOperation
// For text operation: first number is how many chars to retain, then the text to insert
const textOp = TextOperation.fromJSON({
textOperation: [contentString.length, ' more'], // Keep existing content, append ' more'
})
const editOp = Operation.editFile('main.tex', textOp)
const change2 = new Change([editOp], new Date(), [])
// store an unrelated hash in the backedUpBlobs collection,
// so we can test that only the backed up hashes are cleared.
await storeBlobBackup(historyId, DUMMY_HASH)
await persistChanges(historyId, [change2], limitsToPersistImmediately, 1)
})
it('should perform an initial backup', async function () {
// Run backup script for initial version
const { stdout } = await runBackupScript(['--projectId', projectId])
expect(stdout).to.not.include(
'warning: persistor not passed to backupBlob'
)
// Verify backup state
const result = await getBackupStatus(projectId)
expect(result.backupStatus.lastBackedUpVersion).to.equal(2)
expect(result.backupStatus.lastBackedUpAt).to.be.an.instanceOf(Date)
expect(result.currentEndTimestamp).to.be.an.instanceOf(Date)
expect(result.backupStatus.pendingChangeAt).to.be.undefined
// Verify graph.png blob was backed up
const graphBlobStream = await backupPersistor.getObjectStream(
projectBlobsBucket,
makeProjectKey(historyId, graphPngHash),
{ autoGunzip: true }
)
const graphBlobContent = await buffer(graphBlobStream)
expect(graphBlobContent.equals(graphPngBuf)).to.be.true
// Verify chunk was backed up
const chunkStream = await backupPersistor.getObjectStream(
chunksBucket,
makeChunkKey(historyId, 0)
)
const chunkContent = await text(chunkStream.pipe(createGunzip()))
const chunk = await ChunkStore.loadLatestRaw(historyId)
const rawHistory = await historyStore.loadRaw(historyId, chunk.id)
expect(JSON.parse(chunkContent)).to.deep.equal(rawHistory)
// Unrelated entries from backedUpBlobs should be not cleared
const backedUpBlobsDoc = await backedUpBlobs.findOne({
_id: project._id,
})
expect(backedUpBlobsDoc).not.to.be.null
expect(backedUpBlobsDoc.blobs).to.have.length(1)
expect(backedUpBlobsDoc.blobs[0].toString('hex')).to.equal(DUMMY_HASH)
})
it('should perform an incremental backup', async function () {
// Backup first version
const { stdout: stdout1 } = await runBackupScript([
'--projectId',
projectId,
])
expect(stdout1).to.not.include(
'warning: persistor not passed to backupBlob'
)
// Verify first backup
const result1 = await getBackupStatus(projectId)
expect(result1.backupStatus.lastBackedUpVersion).to.equal(2)
// Persist additional changes
const additionalTextOp = TextOperation.fromJSON({
textOperation: [newContentString.length, ' even more'], // Keep existing content, append ' even more'
})
const additionalEditOp = Operation.editFile('main.tex', additionalTextOp)
const firstTimestamp = new Date()
const additionalChange = new Change(
[additionalEditOp],
firstTimestamp,
[]
)
// add the nonbmp file
const blobStore = new BlobStore(historyId)
const nonBmpBlob = await blobStore.putFile(nonBmpPath)
await backupBlob(historyId, nonBmpBlob, nonBmpPath)
// Verify that the non-BMP file was backed up when the file was added
const newBackedUpBlobs = await backedUpBlobs.findOne({
_id: project._id,
})
expect(newBackedUpBlobs).not.to.be.null
expect(newBackedUpBlobs.blobs).to.have.length(2)
expect(
newBackedUpBlobs.blobs.map(b => b.toString('hex'))
).to.have.members([testFiles.NON_BMP_TXT_HASH, DUMMY_HASH])
const addNonBmpFileOp = Operation.addFile(
'non_bmp.txt',
File.fromHash(testFiles.NON_BMP_TXT_HASH)
)
const secondTimestamp = new Date()
const additionalChange2 = new Change(
[addNonBmpFileOp],
secondTimestamp,
[]
)
await persistChanges(
historyId,
[additionalChange, additionalChange2],
limitsToPersistImmediately,
2
)
const afterChangeResult = await getBackupStatus(projectId)
// Verify that the currentEndVersion and currentEndTimestamp are updated
expect(afterChangeResult.currentEndVersion).to.equal(4)
expect(afterChangeResult.currentEndTimestamp)
.to.be.an.instanceOf(Date)
.and.to.be.greaterThan(result1.currentEndTimestamp)
// Persisting a change should not modify the backup version and timestamp
expect(afterChangeResult.backupStatus.lastBackedUpVersion).to.equal(2)
expect(afterChangeResult.backupStatus.lastBackedUpAt)
.to.be.an.instanceOf(Date)
.and.to.deep.equal(result1.backupStatus.lastBackedUpAt)
// but it should update the pendingChangeAt timestamp to the timestamp of the
// first change which modified the project
expect(afterChangeResult.backupStatus.pendingChangeAt)
.to.be.an.instanceOf(Date)
.and.to.deep.equal(firstTimestamp)
// Second backup
const { stdout: stdout2 } = await runBackupScript([
'--projectId',
projectId,
])
expect(stdout2).to.not.include(
'warning: persistor not passed to backupBlob'
)
// Verify incremental backup
const result2 = await getBackupStatus(projectId)
// The backup version and timestamp should be updated
expect(result2.backupStatus.lastBackedUpVersion).to.equal(4)
expect(result2.backupStatus.lastBackedUpAt)
.to.be.an.instanceOf(Date)
.and.to.be.greaterThan(result1.backupStatus.lastBackedUpAt)
// The currentEndVersion and currentEndTimestamp should not be modified
expect(result2.currentEndVersion).to.equal(4)
expect(result2.currentEndTimestamp)
.to.be.an.instanceOf(Date)
.and.to.deep.equal(afterChangeResult.currentEndTimestamp)
// The pendingChangeAt timestamp should be cleared when the backup is complete
expect(result2.backupStatus.pendingChangeAt).to.be.undefined
// Verify additional blob was backed up
const newBlobStream = await backupPersistor.getObjectStream(
projectBlobsBucket,
makeProjectKey(historyId, testFiles.NON_BMP_TXT_HASH),
{ autoGunzip: true }
)
const newBlobContent = await buffer(newBlobStream)
expect(newBlobContent).to.deep.equal(
fs.readFileSync(testFiles.path('non_bmp.txt'))
)
// Check chunk was backed up
const chunkStream = await backupPersistor.getObjectStream(
chunksBucket,
makeChunkKey(historyId, 0)
)
const chunkContent = await text(chunkStream.pipe(createGunzip()))
const chunk = await ChunkStore.loadLatestRaw(historyId)
const rawHistory = await historyStore.loadRaw(historyId, chunk.id)
expect(JSON.parse(chunkContent)).to.deep.equal(rawHistory)
// Unrelated entries from backedUpBlobs should be not cleared
const backedUpBlobsDoc = await backedUpBlobs.findOne({
_id: project._id,
})
expect(backedUpBlobsDoc).not.to.be.null
expect(backedUpBlobsDoc.blobs).to.have.length(1)
expect(backedUpBlobsDoc.blobs[0].toString('hex')).to.equal(DUMMY_HASH)
})
it('should not backup global blobs', async function () {
const globalBlobString = 'a'
const globalBlobHash = testFiles.STRING_A_HASH
await globalBlobs.insertOne({
_id: globalBlobHash,
byteLength: globalBlobString.length,
stringLength: globalBlobString.length,
})
const bucket = config.get('blobStore.globalBucket')
for (const { key, content } of [
{
key: '2e/65/efe2a145dda7ee51d1741299f848e5bf752e',
content: globalBlobString,
},
]) {
const stream = Readable.from([content])
await persistor.sendStream(bucket, key, stream)
}
await loadGlobalBlobs()
// Create a change using the global blob
const addFileOp = Operation.addFile(
'global.tex',
File.fromHash(globalBlobHash)
)
const change = new Change([addFileOp], new Date(), [])
await persistChanges(historyId, [change], limitsToPersistImmediately, 2)
// Run backup
await runBackupScript(['--projectId', projectId])
// Verify global blob wasn't backed up
try {
await backupPersistor.getObjectStream(
projectBlobsBucket,
makeProjectKey(historyId, globalBlobHash),
{ autoGunzip: true }
)
expect.fail('Should not find global blob in project blobs')
} catch (err) {
expect(err).to.be.an.instanceOf(NotFoundError)
}
})
it('should back up global blobs if they are demoted', async function () {
const demotedBlobString = 'ab'
const demotedBlobHash = testFiles.STRING_AB_HASH
await globalBlobs.insertOne({
_id: demotedBlobHash,
byteLength: demotedBlobString.length,
stringLength: demotedBlobString.length,
demoted: true,
})
const bucket = config.get('blobStore.globalBucket')
for (const { key, content } of [
{
key: '9a/e9/e86b7bd6cb1472d9373702d8249973da0832',
content: demotedBlobString,
},
]) {
const stream = Readable.from([content])
await persistor.sendStream(bucket, key, stream)
}
await loadGlobalBlobs()
// Create a change using the global blob
const addFileOp = Operation.addFile(
'demoted.tex',
File.fromHash(demotedBlobHash)
)
const change = new Change([addFileOp], new Date(), [])
await persistChanges(historyId, [change], limitsToPersistImmediately, 2)
// Run backup
const { stdout } = await runBackupScript(['--projectId', projectId])
expect(stdout).to.not.include(
'warning: persistor not passed to backupBlob'
)
// Check chunk was backed up
const chunkStream = await backupPersistor.getObjectStream(
chunksBucket,
makeChunkKey(historyId, 0)
)
const chunkContent = await text(chunkStream.pipe(createGunzip()))
const chunk = await ChunkStore.loadLatestRaw(historyId)
const rawHistory = await historyStore.loadRaw(historyId, chunk.id)
expect(JSON.parse(chunkContent)).to.deep.equal(rawHistory)
// Verify that the demoted global blob was backed up
try {
const demotedBlobStream = await backupPersistor.getObjectStream(
projectBlobsBucket,
makeProjectKey(historyId, demotedBlobHash),
{
autoGunzip: true,
}
)
const demotedBlobContent = await buffer(demotedBlobStream)
expect(demotedBlobContent).to.deep.equal(Buffer.from(demotedBlobString))
} catch (err) {
expect.fail('Should find demoted global blob in project blobs')
}
})
})
describe('with complex project content', function () {
let beforeInitializationTimestamp
let afterInitializationTimestamp
beforeEach(async function () {
// Create initial project
await projectsCollection.insertOne(project)
// Initialize project in chunk store
// bracket the initialisation with two timestamps to check the pendingChangeAt field
beforeInitializationTimestamp = new Date()
await ChunkStore.initializeProject(historyId)
afterInitializationTimestamp = new Date()
const blobStore = new BlobStore(historyId)
// Set up test files with varying content
const testFilesData = {
mainTex: { name: 'main.tex', content: 'Initial content' },
chapter1: { name: 'chapter1.tex', content: 'Chapter 1 content' },
chapter2: { name: 'chapter2.tex', content: 'Chapter 2 content' },
bibliography: {
name: 'bibliography.bib',
content: '@article{key1,\n title={Title1}\n}',
newContent: '@article{key2,\n title={Title2}\n}',
},
graph: {
name: 'graph.png',
path: testFiles.path('graph.png'),
hash: testFiles.GRAPH_PNG_HASH,
},
unicodeFile: {
name: 'unicodeFile.tex',
path: testFiles.path('non_bmp.txt'),
hash: testFiles.NON_BMP_TXT_HASH,
},
}
const textFiles = [
testFilesData.mainTex,
testFilesData.chapter1,
testFilesData.chapter2,
testFilesData.bibliography,
]
const binaryFiles = [testFilesData.graph, testFilesData.unicodeFile]
// Add binary files first
await Promise.all(binaryFiles.map(file => blobStore.putFile(file.path)))
// Back up the binary files
await Promise.all(
binaryFiles.map(async file => {
await backupBlob(
historyId,
await blobStore.putFile(file.path),
file.path
)
})
)
// Create operations to add all files initially
const addFileOperations = Object.values(testFilesData).map(file => {
if (file.path) {
return Operation.addFile(file.name, File.fromHash(file.hash))
}
return Operation.addFile(file.name, File.fromString(file.content))
})
// Initial change adding all files
const initialChange = new Change(addFileOperations, new Date(), [])
await persistChanges(
historyId,
[initialChange],
limitsToPersistImmediately,
0
)
// Generate a series of edit operations for each text file
const editOperations = []
for (let i = 0; i < 50; i++) {
const targetFile = textFiles[i % textFiles.length]
if (!targetFile.path) {
// Skip binary/unicode files
const appendText = `\n\nEdit ${i + 1}`
targetFile.content += appendText
const textOp = TextOperation.fromJSON({
textOperation: [
targetFile.content.length - appendText.length,
appendText,
],
})
const editOp = Operation.editFile(targetFile.name, textOp)
editOperations.push(new Change([editOp], new Date(), []))
}
}
// Add a delete operation
const deleteChange = new Change(
[Operation.removeFile(testFilesData.bibliography.name)],
new Date(),
[]
)
editOperations.push(deleteChange)
// Add the file back with different content
const addBackChange = new Change(
[
Operation.addFile(
testFilesData.bibliography.name,
File.fromString(testFilesData.bibliography.newContent)
),
],
new Date(),
[]
)
editOperations.push(addBackChange)
// Persist all changes
await persistChanges(
historyId,
editOperations,
limitsToPersistImmediately,
1
)
})
it('persistChanges should set the pendingChangeAt field to the time of snapshot initialisation', async function () {
const result = await getBackupStatus(projectId)
expect(result.backupStatus.pendingChangeAt).to.be.an.instanceOf(Date)
expect(result.backupStatus.pendingChangeAt)
.to.be.greaterThan(beforeInitializationTimestamp)
.and.to.be.lessThan(afterInitializationTimestamp)
})
it('should backup all chunks and blobs from a complex project history', async function () {
// Run backup script
const { stdout } = await runBackupScript(['--projectId', projectId])
expect(stdout).to.not.include(
'warning: persistor not passed to backupBlob'
)
// Verify backup state
const result = await getBackupStatus(projectId)
expect(result.backupStatus.lastBackedUpVersion).to.equal(53) // 1 initial change + 50 edits + 1 delete + 1 add back
expect(result.backupStatus.lastBackedUpAt).to.be.an.instanceOf(Date)
expect(result.currentEndTimestamp).to.be.an.instanceOf(Date)
expect(result.backupStatus.pendingChangeAt).to.be.undefined
// Verify that binary files were backed up
for (const hash of [
testFiles.GRAPH_PNG_HASH,
testFiles.NON_BMP_TXT_HASH,
]) {
const blobStream = await backupPersistor.getObjectStream(
projectBlobsBucket,
makeProjectKey(historyId, hash),
{ autoGunzip: true }
)
expect(blobStream).to.exist
}
// Get all chunks and verify they were backed up
const listing = await backupPersistor
._getClientForBucket(chunksBucket)
.listObjectsV2({
Bucket: chunksBucket,
Prefix: projectKey.format(historyId) + '/',
})
.promise()
const chunkKeys = listing.Contents.map(item => item.Key)
expect(chunkKeys.length).to.equal(6) // Should have multiple chunks
const localChunks = await ChunkStore.getProjectChunks(historyId)
const chunksByStartVersion = new Map()
for (const chunkRecord of localChunks) {
chunksByStartVersion.set(chunkRecord.startVersion, chunkRecord)
}
// Verify the content of each chunk matches what's in the history store
for (const chunkKey of chunkKeys) {
const chunkStream = await backupPersistor.getObjectStream(
chunksBucket,
chunkKey
)
const chunkContent = await text(chunkStream.pipe(createGunzip()))
const startVersion = parseInt(chunkKey.split('/').pop(), 10)
const chunk = chunksByStartVersion.get(startVersion)
const rawHistory = await historyStore.loadRaw(historyId, chunk.id)
expect(JSON.parse(chunkContent)).to.deep.equal(rawHistory)
}
})
it('should throw an error if downloading a blob fails', async function () {
const blobStore = new BlobStore(historyId)
const blob = await blobStore.putFile(
testFiles.path('null_characters.txt')
)
const change = new Change(
[Operation.addFile('broken-file', File.fromHash(blob.getHash()))],
new Date(),
[]
)
// Persist all changes
await persistChanges(historyId, [change], limitsToPersistImmediately, 53)
// Delete the blob from the underlying storage to simulate a failure
const bucket = config.get('blobStore.projectBucket')
const key = makeProjectKey(historyId, blob.getHash())
await persistor.deleteObject(bucket, key)
// Run backup script - it should fail because the blob is missing
let result
try {
result = await runBackupScript(['--projectId', projectId])
expect.fail('Backup script should have failed')
} catch (err) {
expect(err).to.exist
expect(result).to.not.exist
}
// Verify that backup did not complete
const newBackupStatus = await getBackupStatus(projectId)
expect(newBackupStatus.backupStatus.lastBackedUpVersion).to.equal(50) // backup fails on final chunk
expect(newBackupStatus.currentEndVersion).to.equal(54) // backup is incomplete due to missing blob
})
})
})
/**
* Run the backup script with given arguments
* @param {string[]} args
*/
async function runBackupScript(args) {
const TIMEOUT = 20 * 1000
let result
try {
result = await promisify(execFile)(
'node',
['storage/scripts/backup.mjs', ...args],
{
encoding: 'utf-8',
timeout: TIMEOUT,
env: {
...process.env,
LOG_LEVEL: 'debug', // Override LOG_LEVEL of acceptance tests
},
}
)
result.status = 0
} catch (err) {
const { stdout, stderr, code } = err
if (typeof code !== 'number') {
console.log(err)
}
result = { stdout, stderr, status: code }
}
if (result.status !== 0) {
throw new Error('backup failed')
}
return result
}

View File

@@ -0,0 +1,278 @@
import { expect } from 'chai'
import Crypto from 'node:crypto'
import Stream from 'node:stream'
import {
makeBlobForFile,
getStringLengthOfFile,
makeProjectKey,
BlobStore,
} from '../../../../storage/lib/blob_store/index.js'
import { Blob } from 'overleaf-editor-core'
import { insertBlob } from '../../../../storage/lib/blob_store/mongo.js'
import {
backupBlob,
downloadBlobToDir,
} from '../../../../storage/lib/backupBlob.mjs'
import fs from 'node:fs'
import path from 'node:path'
import os from 'node:os'
import fsExtra from 'fs-extra'
import { backedUpBlobs, projects } from '../../../../storage/lib/mongodb.js'
import { Binary, ObjectId } from 'mongodb'
import {
backupPersistor,
projectBlobsBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import { WritableBuffer } from '@overleaf/stream-utils'
import cleanup from './support/cleanup.js'
async function listS3BucketRaw(bucket) {
const client = backupPersistor._getClientForBucket(bucket)
return await client.listObjectsV2({ Bucket: bucket }).promise()
}
async function listS3Bucket(bucket, wantStorageClass) {
const response = await listS3BucketRaw(bucket)
for (const object of response.Contents || []) {
if (wantStorageClass) {
expect(object).to.have.property('StorageClass', wantStorageClass)
}
}
return (response.Contents || []).map(item => item.Key || '')
}
describe('backupBlob', function () {
let filePath
let tmpDir
before(async function () {
tmpDir = await fs.promises.mkdtemp(path.join(os.tmpdir(), 'temp-test-'))
filePath = path.join(tmpDir, 'test.txt')
await fs.promises.writeFile(filePath, 'test')
})
after(async function () {
try {
fsExtra.remove(tmpDir)
} catch (err) {
if (err.code !== 'ENOENT') {
console.log('failed to delete temporary file')
}
}
})
beforeEach(cleanup.everything)
describe('when the blob is already backed up', function () {
let blob
let historyId
beforeEach(async function () {
blob = await makeBlobForFile(filePath)
historyId = 'abc123def456abc789def123'
await backedUpBlobs.updateOne(
{
_id: new ObjectId(historyId),
},
{
$set: { blobs: [new Binary(Buffer.from(blob.getHash(), 'hex'))] },
},
{ upsert: true }
)
await backupBlob(historyId, blob, filePath)
})
it('does not upload the blob', async function () {
const bucketContents = await listS3Bucket(projectBlobsBucket)
expect(bucketContents).to.have.lengthOf(0)
})
})
describe('when the historyId is for a postgres project', function () {
let blob
let historyId
const projectId = new ObjectId()
beforeEach(async function () {
blob = await makeBlobForFile(filePath)
historyId = '123'
await projects.insertOne({
_id: projectId,
overleaf: { history: { id: 123 } },
})
await backupBlob(historyId, blob, filePath)
})
afterEach(async function () {
await projects.deleteOne({
_id: projectId,
})
})
it('uploads the blob to the backup', async function () {
const bucketContents = await listS3Bucket(projectBlobsBucket)
expect(bucketContents).to.have.lengthOf(1)
})
it('stores the backup', async function () {
expect(
await backedUpBlobs.findOne({
_id: projectId,
blobs: {
$elemMatch: { $eq: new Binary(Buffer.from(blob.getHash(), 'hex')) },
},
})
).to.exist
})
})
describe('when the blob is not already backed up', function () {
let blob
let historyId
beforeEach(async function () {
blob = await makeBlobForFile(filePath)
historyId = 'abc123def456abc789def123'
await backupBlob(historyId, blob, filePath)
})
it('uploads the blob to the backup', async function () {
const bucketContents = await listS3Bucket(projectBlobsBucket)
expect(bucketContents).to.have.lengthOf(1)
})
it('stores the backup', async function () {
expect(
await backedUpBlobs.findOne({
_id: new ObjectId(historyId),
blobs: {
$elemMatch: { $eq: new Binary(Buffer.from(blob.getHash(), 'hex')) },
},
})
).to.exist
})
})
const cases = [
{
name: 'text file',
content: Buffer.from('x'.repeat(1000)),
storedSize: 29, // zlib.gzipSync(content).byteLength
},
{
name: 'large text file',
// 'ä' is a 2-byte utf-8 character -> 4MB.
content: Buffer.from('ü'.repeat(2 * 1024 * 1024)),
storedSize: 4101, // zlib.gzipSync(content).byteLength
},
{
name: 'binary file',
content: Buffer.from([0, 1, 2, 3]),
storedSize: 4,
},
{
name: 'large binary file',
content: Crypto.randomBytes(10 * 1024 * 1024),
storedSize: 10 * 1024 * 1024,
},
]
for (const { name, content, storedSize } of cases) {
describe(name, function () {
let blob
let key
let historyId
beforeEach(async function () {
historyId = 'abc123def456abc789def123'
await fs.promises.writeFile(filePath, content)
blob = await makeBlobForFile(filePath)
blob.setStringLength(
await getStringLengthOfFile(blob.getByteLength(), filePath)
)
key = makeProjectKey(historyId, blob.getHash())
await backupBlob(historyId, blob, filePath)
})
it('should upload the blob', async function () {
const response = await listS3BucketRaw(projectBlobsBucket)
expect(response.Contents).to.have.length(1)
expect(response.Contents[0].Key).to.equal(key)
expect(response.Contents[0].Size).to.equal(storedSize)
})
it('should read back the same content', async function () {
const buf = new WritableBuffer()
await Stream.promises.pipeline(
await backupPersistor.getObjectStream(projectBlobsBucket, key, {
autoGunzip: true,
}),
buf
)
expect(buf.getContents()).to.deep.equal(content)
})
})
}
})
describe('downloadBlobToDir', function () {
let tmpDirDownload
const historyId = 'abc123def456abc789def123'
before(async function () {
tmpDirDownload = await fs.promises.mkdtemp(
path.join(os.tmpdir(), 'downloadBlobTest-')
)
})
after(async function () {
await fs.promises.rm(tmpDirDownload, { recursive: true, force: true })
})
it('should download the blob successfully', async function () {
const data = 'hello world'
// Use putString instead of writing a source file and using makeBlobForFile
const blobStore = new BlobStore(historyId)
const blob = await blobStore.putString(data)
// Now call downloadBlobToDir which will use blobStore.getStream internally
const downloadedFilePath = await downloadBlobToDir(
historyId,
blob,
tmpDirDownload
)
const contents = await fs.promises.readFile(downloadedFilePath, 'utf8')
expect(contents).to.equal(data)
})
it('should delete the file on error (if file already exists)', async function () {
const data = 'data that will not be written'
const blobStore = new BlobStore(historyId)
const blob = await blobStore.putString(data)
const hash = blob.getHash()
const fileName = `${historyId}-${hash}`
// Pre-create the destination file to trigger a failure due to an existing file
const downloadedFilePath = path.join(tmpDirDownload, fileName)
await fs.promises.writeFile(downloadedFilePath, 'preexisting content')
try {
await downloadBlobToDir(historyId, blob, tmpDirDownload)
expect.fail('should not reach here')
} catch (error) {
// Check that the file was deleted
await expect(fs.promises.access(downloadedFilePath)).to.be.rejected
}
})
it('should not leave an empty file if download fails', async function () {
// Create a blob with a hash that does not exist in the blob store
const hash = '0000000000000000000000000000000000000000'
const blob = new Blob(hash, 12, 12)
await insertBlob(historyId, blob)
const fileName = `${historyId}-${hash}`
try {
await downloadBlobToDir(historyId, blob, tmpDirDownload)
expect.fail('should not reach here')
} catch (error) {
expect(error).to.be.instanceOf(Blob.NotFoundError)
const downloadedFilePath = path.join(tmpDirDownload, fileName)
// Check that the file was deleted
await expect(fs.promises.access(downloadedFilePath)).to.be.rejected
}
})
})

View File

@@ -0,0 +1,51 @@
import {
pathToProjectFolder,
projectBlobsBucket,
} from '../../../../storage/lib/backupPersistor.mjs'
import { expect } from 'chai'
describe('backupPersistor', () => {
describe('pathToProjectFolder', () => {
it('handles postgres and mongo-ids', function () {
expect(pathToProjectFolder(projectBlobsBucket, '100/000/000')).to.equal(
'100/000/000/'
)
expect(pathToProjectFolder(projectBlobsBucket, '100/000/000/')).to.equal(
'100/000/000/'
)
expect(
pathToProjectFolder(projectBlobsBucket, '100/000/000/foo')
).to.equal('100/000/000/')
expect(pathToProjectFolder(projectBlobsBucket, '210/000/000')).to.equal(
'210/000/000/'
)
expect(pathToProjectFolder(projectBlobsBucket, '987/654/321')).to.equal(
'987/654/321/'
)
expect(pathToProjectFolder(projectBlobsBucket, '987/654/3219')).to.equal(
'987/654/3219/'
)
expect(
pathToProjectFolder(projectBlobsBucket, 'fed/cba/987654321000000000')
).to.equal('fed/cba/987654321000000000/')
expect(
pathToProjectFolder(projectBlobsBucket, 'fed/cba/987654321000000000/')
).to.equal('fed/cba/987654321000000000/')
expect(
pathToProjectFolder(
projectBlobsBucket,
'fed/cba/987654321000000000/foo'
)
).to.equal('fed/cba/987654321000000000/')
})
it('rejects invalid input', function () {
const cases = ['', '//', '1/2/3', '123/456/78', 'abc/d/e', 'abc/def/012']
for (const key of cases) {
expect(() => {
pathToProjectFolder(projectBlobsBucket, key)
}, key).to.throw('invalid project folder')
}
})
})
})

View File

@@ -0,0 +1,338 @@
import { expect } from 'chai'
import { backupGenerator } from '../../../../storage/lib/backupGenerator.mjs'
import ChunkStore from '../../../../storage/lib/chunk_store/index.js'
import persistChanges from '../../../../storage/lib/persist_changes.js'
import {
Change,
Operation,
TextOperation,
AddFileOperation,
File,
} from 'overleaf-editor-core'
import { ObjectId } from 'mongodb'
import testFiles from './support/test_files.js'
import { BlobStore } from '../../../../storage/lib/blob_store/index.js'
import fs from 'node:fs'
import blobHash from '../../../../storage/lib/blob_hash.js'
const scenarios = [
{
description: 'Postgres history',
createProject: ChunkStore.initializeProject,
},
{
description: 'Mongo history',
createProject: () =>
ChunkStore.initializeProject(new ObjectId().toString()),
},
]
for (const scenario of scenarios) {
describe(`backupGenerator with ${scenario.description}`, function () {
let projectId
let limitsToPersistImmediately
let blobStore
const NUM_CHUNKS = 3
const FINAL_VERSION = 24
before(function () {
// used to provide a limit which forces us to persist all of the changes
const farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
limitsToPersistImmediately = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
maxChunkChanges: 10,
}
})
beforeEach(async function () {
projectId = await scenario.createProject()
blobStore = new BlobStore(projectId)
// Add test files first
await Promise.all([
blobStore.putFile(testFiles.path('graph.png')),
blobStore.putFile(testFiles.path('non_bmp.txt')),
])
const HELLO_TXT = fs.readFileSync(testFiles.path('hello.txt')).toString()
// Create a sample project history for testing, with a chunk size of 10
//
// 1. Add a text file main.tex with contents from hello.txt
// 2. Add a binary file image.png with contents from graph.png
// 3. Add a text file other.tex with empty contents
// 4. Apply 10 changes that append characters to the end of other.tex giving 'aaaaaaaaaa'
// In applying the 10 changes we hit the first chunk boundary and create a new chunk.
// The first chunk contains the 3 file operations and 7 changes
// to other.tex which is now "aaaaaaa" (7 characters)
// snapshot: {}
// changes: add main.tex, add image.png, add other.tex, 7 changes to other.tex
// The second chunk has a snapshot with the existing files
// snapshot: main.tex, image.png, other.tex="aaaaaaa" (7 characters)
// changes: 3 changes to other.tex, each appending 'a'
// 5. Now we add a new file non_bmp.txt with non-BMP characters
// 6. Finally we apply 10 more changes to other.tex, each appending another 'a' to give 'aaaaaaaaaaaaaaaaaaaa' (20 characters)
// In applying the 10 changes we hit another chunk boundary and create a third chunk.
// The final state of the second chunk is
// snapshot: main.tex, image.png, other.tex="aaaaaaa" (7 characters)
// changes:
// 3 changes to other.tex, each appending 'a'
// add file non_bmp.txt,
// 6 changes to other.tex, each appending 'a'
// The third chunk will contain the last 4 changes to other.tex
// snapshot: main.tex, image.png, non_bmp.tex, other.tex="aaaaaaaaaaaaaaaa" (16 characters)
// changes: 4 changes to other.tex, each appending 'a'
const textChange = new Change(
[new AddFileOperation('main.tex', File.fromString(HELLO_TXT))],
new Date(),
[]
)
const binaryChange = new Change(
[
new AddFileOperation(
'image.png',
File.fromHash(testFiles.GRAPH_PNG_HASH)
),
],
new Date(),
[]
)
const otherChange = new Change(
[new AddFileOperation('other.tex', File.fromString(''))],
new Date(),
[]
)
// now append characters to the end of the contents of other.tex
const otherEdits = Array.from(
{ length: 10 },
(_, i) =>
new Change(
[
Operation.editFile(
'other.tex',
TextOperation.fromJSON({
textOperation: i === 0 ? ['a'] : [i, 'a'],
})
),
],
new Date(),
[]
)
)
const newFile = new Change(
[
new AddFileOperation(
'non_bmp.txt',
File.fromHash(testFiles.NON_BMP_TXT_HASH)
),
],
new Date(),
[]
)
const moreOtherEdits = Array.from(
{ length: 10 },
(_, i) =>
new Change(
[
Operation.editFile(
'other.tex',
TextOperation.fromJSON({ textOperation: [i + 10, 'a'] })
),
],
new Date(),
[]
)
)
await persistChanges(
projectId,
[
textChange,
binaryChange,
otherChange,
...otherEdits,
newFile,
...moreOtherEdits,
],
limitsToPersistImmediately,
0
)
})
it('should yield correct data for an initial backup', async function () {
const results = []
for await (const result of backupGenerator(projectId)) {
results.push(result)
}
// There should be 3 chunks
expect(results).to.have.length(NUM_CHUNKS)
// First chunk
expect(results[0].chunkRecord.startVersion).to.equal(0)
expect(results[0].chunkRecord.endVersion).to.equal(10)
expect(results[0].blobsToBackup).to.have.deep.members([
{
hash: testFiles.HELLO_TXT_HASH,
byteLength: testFiles.HELLO_TXT_BYTE_LENGTH,
stringLength: testFiles.HELLO_TXT_UTF8_LENGTH,
},
{
hash: testFiles.GRAPH_PNG_HASH,
byteLength: testFiles.GRAPH_PNG_BYTE_LENGTH,
stringLength: null,
},
{
hash: File.EMPTY_FILE_HASH,
byteLength: 0,
stringLength: 0,
},
])
// Second chunk
expect(results[1].chunkRecord.startVersion).to.equal(10)
expect(results[1].chunkRecord.endVersion).to.equal(20)
expect(results[1].blobsToBackup).to.have.deep.members([
{
hash: blobHash.fromString('a'.repeat(7)),
byteLength: 7,
stringLength: 7,
},
{
hash: testFiles.NON_BMP_TXT_HASH,
byteLength: testFiles.NON_BMP_TXT_BYTE_LENGTH,
stringLength: null,
},
])
// Third chunk
expect(results[2].chunkRecord.startVersion).to.equal(20)
expect(results[2].chunkRecord.endVersion).to.equal(24)
expect(results[2].blobsToBackup).to.have.deep.members([
{
hash: blobHash.fromString('a'.repeat(16)),
byteLength: 16,
stringLength: 16,
},
])
})
for (
let lastBackedUpVersion = 0;
lastBackedUpVersion <= FINAL_VERSION;
lastBackedUpVersion++
) {
it(`should yield the expected data when the last backed up version was ${lastBackedUpVersion}`, async function () {
const results = []
for await (const result of backupGenerator(
projectId,
lastBackedUpVersion
)) {
results.push(result)
}
const chunkDefinitions = [
{
chunk: { startVersion: 0, endVersion: 10 },
blobs: [
{
version: 1,
blob: {
hash: testFiles.HELLO_TXT_HASH,
byteLength: testFiles.HELLO_TXT_BYTE_LENGTH,
stringLength: testFiles.HELLO_TXT_UTF8_LENGTH,
},
},
{
version: 2,
blob: {
hash: testFiles.GRAPH_PNG_HASH,
byteLength: testFiles.GRAPH_PNG_BYTE_LENGTH,
stringLength: null,
},
},
{
version: 3,
blob: {
hash: File.EMPTY_FILE_HASH,
byteLength: 0,
stringLength: 0,
},
},
],
},
{
chunk: { startVersion: 10, endVersion: 20 },
blobs: [
{
version: 11,
blob: {
hash: blobHash.fromString('a'.repeat(7)),
byteLength: 7,
stringLength: 7,
},
},
{
version: 14,
blob: {
hash: testFiles.NON_BMP_TXT_HASH,
byteLength: testFiles.NON_BMP_TXT_BYTE_LENGTH,
stringLength: null,
},
},
],
},
{
chunk: { startVersion: 20, endVersion: 24 },
blobs: [
{
version: 21,
blob: {
hash: blobHash.fromString('a'.repeat(16)),
byteLength: 16,
stringLength: 16,
},
},
],
},
]
const expectedChunks = chunkDefinitions
.filter(({ chunk }) => lastBackedUpVersion < chunk.endVersion)
.map(({ chunk }) => chunk)
const expectedBlobs = chunkDefinitions
.filter(({ chunk }) => lastBackedUpVersion < chunk.endVersion)
.map(({ blobs }) =>
blobs
.filter(({ version }) => lastBackedUpVersion < version)
.map(({ blob }) => blob)
)
expect(results).to.have.length(expectedChunks.length)
expect(results).to.have.length(expectedBlobs.length)
results.forEach((result, i) => {
expect(result.chunkRecord).to.deep.include(expectedChunks[i])
expect(result.blobsToBackup).to.have.deep.members(expectedBlobs[i])
})
})
}
it(`should not back up blobs that have already been backed up in previous chunks`, async function () {
const results = []
for await (const result of backupGenerator(projectId)) {
results.push(result)
}
const seenBlobs = new Set()
for (const result of results) {
for (const blob of result.blobsToBackup) {
expect(seenBlobs).to.not.include(blob.hash)
seenBlobs.add(blob.hash)
}
}
})
})
}

View File

@@ -0,0 +1,52 @@
'use strict'
const { expect } = require('chai')
const cleanup = require('./support/cleanup')
const testFiles = require('./support/test_files')
const core = require('overleaf-editor-core')
const File = core.File
const storage = require('../../../../storage')
const BatchBlobStore = storage.BatchBlobStore
const BlobStore = storage.BlobStore
const projectId = '123'
const blobStore = new BlobStore(projectId)
const batchBlobStore = new BatchBlobStore(blobStore)
describe('BatchBlobStore', function () {
beforeEach(cleanup.everything)
it('can preload and batch getBlob calls', async function () {
// Add some test files
await Promise.all([
blobStore.putFile(testFiles.path('graph.png')),
blobStore.putFile(testFiles.path('hello.txt')),
])
// Cache some blobs (one that exists and another that doesn't)
await batchBlobStore.preload([
testFiles.GRAPH_PNG_HASH,
File.EMPTY_FILE_HASH, // not found
])
expect(batchBlobStore.blobs.size).to.equal(1)
const [cached, notCachedExists, notCachedNotExists, duplicate] =
await Promise.all([
batchBlobStore.getBlob(testFiles.GRAPH_PNG_HASH), // cached
batchBlobStore.getBlob(testFiles.HELLO_TXT_HASH), // not cached; exists
batchBlobStore.getBlob(File.EMPTY_FILE_HASH), // not cached; not exists
batchBlobStore.getBlob(testFiles.GRAPH_PNG_HASH), // duplicate
])
expect(cached.getHash()).to.equal(testFiles.GRAPH_PNG_HASH)
expect(notCachedExists.getHash()).to.equal(testFiles.HELLO_TXT_HASH)
expect(notCachedNotExists).to.be.undefined
expect(duplicate.getHash()).to.equal(testFiles.GRAPH_PNG_HASH)
// We should get exactly the object from the cache.
expect(cached).to.equal(batchBlobStore.blobs.get(testFiles.GRAPH_PNG_HASH))
})
})

View File

@@ -0,0 +1,15 @@
'use strict'
const { expect } = require('chai')
const storage = require('../../../../storage')
const blobHash = storage.blobHash
describe('blobHash', function () {
it('can hash non-ASCII strings', function () {
// checked with git hash-object
const testString = 'å\n'
const testHash = 'aad321caf77ca6c5ab09e6c638c237705f93b001'
expect(blobHash.fromString(testString)).to.equal(testHash)
})
})

View File

@@ -0,0 +1,580 @@
'use strict'
const _ = require('lodash')
const { expect } = require('chai')
const config = require('config')
const fs = require('node:fs')
const path = require('node:path')
const { Readable } = require('node:stream')
const temp = require('temp').track()
const { promisify } = require('node:util')
const cleanup = require('./support/cleanup')
const testFiles = require('./support/test_files')
const { Blob, TextOperation } = require('overleaf-editor-core')
const {
BlobStore,
loadGlobalBlobs,
mongodb,
persistor,
streams,
} = require('../../../../storage')
const mongoBackend = require('../../../../storage/lib/blob_store/mongo')
const postgresBackend = require('../../../../storage/lib/blob_store/postgres')
const { getProjectBlobsBatch } = require('../../../../storage/lib/blob_store')
const mkTmpDir = promisify(temp.mkdir)
describe('BlobStore', function () {
const helloWorldString = 'Hello World'
const helloWorldHash = '5e1c309dae7f45e0f39b1bf3ac3cd9db12e7d689'
const globalBlobString = 'a'
const globalBlobHash = testFiles.STRING_A_HASH
const demotedBlobString = 'ab'
const demotedBlobHash = testFiles.STRING_AB_HASH
beforeEach(cleanup.everything)
beforeEach('install a global blob', async function () {
await mongodb.globalBlobs.insertOne({
_id: globalBlobHash,
byteLength: globalBlobString.length,
stringLength: globalBlobString.length,
})
await mongodb.globalBlobs.insertOne({
_id: demotedBlobHash,
byteLength: demotedBlobString.length,
stringLength: demotedBlobString.length,
demoted: true,
})
const bucket = config.get('blobStore.globalBucket')
for (const { key, content } of [
{
key: '2e/65/efe2a145dda7ee51d1741299f848e5bf752e',
content: globalBlobString,
},
{
key: '9a/e9/e86b7bd6cb1472d9373702d8249973da0832',
content: demotedBlobString,
},
]) {
const stream = Readable.from([content])
await persistor.sendStream(bucket, key, stream)
}
await loadGlobalBlobs()
})
const scenarios = [
{
description: 'Postgres backend',
projectId: '123',
projectId2: '456',
backend: postgresBackend,
},
{
description: 'Mongo backend',
projectId: '63725f84b2bdd246ec8c0000',
projectId2: '63725f84b2bdd246ec8c1234',
backend: mongoBackend,
},
]
for (const scenario of scenarios) {
describe(scenario.description, function () {
const blobStore = new BlobStore(scenario.projectId)
const blobStore2 = new BlobStore(scenario.projectId2)
beforeEach('initialize the blob stores', async function () {
await blobStore.initialize()
await blobStore2.initialize()
})
it('can initialize a project again without throwing an error', async function () {
await blobStore.initialize()
await blobStore2.initialize()
})
it('can store and fetch string content', async function () {
function checkBlob(blob) {
expect(blob.getHash()).to.equal(helloWorldHash)
expect(blob.getByteLength()).to.equal(helloWorldString.length)
expect(blob.getStringLength()).to.equal(helloWorldString.length)
}
const insertedBlob = await blobStore.putString(helloWorldString)
checkBlob(insertedBlob)
const fetchedBlob = await blobStore.getBlob(helloWorldHash)
checkBlob(fetchedBlob)
const content = await blobStore.getString(helloWorldHash)
expect(content).to.equal(helloWorldString)
})
it('can store and fetch utf-8 files', async function () {
const testFile = 'hello.txt'
function checkBlob(blob) {
expect(blob.getHash()).to.equal(testFiles.HELLO_TXT_HASH)
expect(blob.getByteLength()).to.equal(testFiles.HELLO_TXT_BYTE_LENGTH)
expect(blob.getStringLength()).to.equal(
testFiles.HELLO_TXT_UTF8_LENGTH
)
}
const insertedBlob = await blobStore.putFile(testFiles.path(testFile))
checkBlob(insertedBlob)
const fetchedBlob = await blobStore.getBlob(testFiles.HELLO_TXT_HASH)
checkBlob(fetchedBlob)
const content = await blobStore.getString(testFiles.HELLO_TXT_HASH)
expect(content).to.equal('Olá mundo\n')
})
it('can store and fetch a large text file', async function () {
const testString = _.repeat('a', 1000000)
const testHash = 'de1fbf0c2f34f67f01f355f31ed0cf7319643c5e'
function checkBlob(blob) {
expect(blob.getHash()).to.equal(testHash)
expect(blob.getByteLength()).to.equal(testString.length)
expect(blob.getStringLength()).to.equal(testString.length)
}
const dir = await mkTmpDir('blobStore')
const pathname = path.join(dir, 'a.txt')
fs.writeFileSync(pathname, testString)
const insertedBlob = await blobStore.putFile(pathname)
checkBlob(insertedBlob)
const fetchedBlob = await blobStore.getBlob(testHash)
checkBlob(fetchedBlob)
const content = await blobStore.getString(testHash)
expect(content).to.equal(testString)
})
it('stores overlarge text files as binary', async function () {
const testString = _.repeat('a', TextOperation.MAX_STRING_LENGTH + 1)
const dir = await mkTmpDir('blobStore')
const pathname = path.join(dir, 'a.txt')
fs.writeFileSync(pathname, testString)
const blob = await blobStore.putFile(pathname)
expect(blob.getByteLength()).to.equal(testString.length)
expect(blob.getStringLength()).not.to.exist
})
it('can store and fetch binary files', async function () {
const testFile = 'graph.png'
function checkBlob(blob) {
expect(blob.getHash()).to.equal(testFiles.GRAPH_PNG_HASH)
expect(blob.getByteLength()).to.equal(testFiles.GRAPH_PNG_BYTE_LENGTH)
expect(blob.getStringLength()).to.be.null
}
const insertedBlob = await blobStore.putFile(testFiles.path(testFile))
checkBlob(insertedBlob)
const fetchedBlob = await blobStore.getBlob(testFiles.GRAPH_PNG_HASH)
checkBlob(fetchedBlob)
const stream = await blobStore.getStream(testFiles.GRAPH_PNG_HASH)
const buffer = await streams.readStreamToBuffer(stream)
expect(buffer.length).to.equal(testFiles.GRAPH_PNG_BYTE_LENGTH)
expect(buffer.toString('hex', 0, 8)).to.equal(
testFiles.PNG_MAGIC_NUMBER
)
})
const missingHash = 'deadbeef00000000000000000000000000000000'
it('fails to get a missing key as a string', async function () {
try {
await blobStore.getString(missingHash)
} catch (err) {
expect(err).to.be.an.instanceof(Blob.NotFoundError)
expect(err.hash).to.equal(missingHash)
return
}
expect.fail('expected NotFoundError')
})
it('fails to get a missing key as a stream', async function () {
try {
await blobStore.getStream(missingHash)
} catch (err) {
expect(err).to.be.an.instanceof(Blob.NotFoundError)
return
}
expect.fail('expected NotFoundError')
})
it('reads invalid utf-8 as utf-8', async function () {
// We shouldn't do this, but we need to know what happens if we do.
// TODO: We should throw an error instead, but this function doesn't have
// an easy way of checking the content type.
const testFile = 'graph.png'
await blobStore.putFile(testFiles.path(testFile))
const content = await blobStore.getString(testFiles.GRAPH_PNG_HASH)
expect(content.length).to.equal(12902)
})
it('checks for non BMP characters', async function () {
const testFile = 'non_bmp.txt'
await blobStore.putFile(testFiles.path(testFile))
const blob = await blobStore.getBlob(testFiles.NON_BMP_TXT_HASH)
expect(blob.getStringLength()).to.be.null
expect(blob.getByteLength()).to.equal(testFiles.NON_BMP_TXT_BYTE_LENGTH)
})
it('can fetch metadata for multiple blobs at once', async function () {
await blobStore.putFile(testFiles.path('graph.png'))
const blobs = await blobStore.getBlobs([
testFiles.GRAPH_PNG_HASH,
testFiles.HELLO_TXT_HASH, // not found
testFiles.GRAPH_PNG_HASH, // requested twice
])
const hashes = blobs.map(blob => blob.getHash())
expect(hashes).to.deep.equal([testFiles.GRAPH_PNG_HASH])
})
describe('multiple blobs in the same project', async function () {
beforeEach(async function () {
await blobStore.putString(helloWorldString)
await blobStore.putFile(testFiles.path('graph.png'))
await blobStore.putFile(testFiles.path('hello.txt'))
})
it('getBlob() returns each blob', async function () {
const helloBlob = await blobStore.getBlob(testFiles.HELLO_TXT_HASH)
const graphBlob = await blobStore.getBlob(testFiles.GRAPH_PNG_HASH)
const helloWorldBlob = await blobStore.getBlob(helloWorldHash)
expect(helloBlob.hash).to.equal(testFiles.HELLO_TXT_HASH)
expect(graphBlob.hash).to.equal(testFiles.GRAPH_PNG_HASH)
expect(helloWorldBlob.hash).to.equal(helloWorldHash)
})
it('getBlobs() returns all blobs', async function () {
const blobs = await blobStore.getBlobs([
testFiles.HELLO_TXT_HASH,
testFiles.GRAPH_PNG_HASH,
testFiles.NON_BMP_TXT_HASH, // not in blob store
])
const actualHashes = blobs.map(blob => blob.hash)
expect(actualHashes).to.have.members([
testFiles.HELLO_TXT_HASH,
testFiles.GRAPH_PNG_HASH,
])
})
it('getProjectBlobs() returns all blobs in the project', async function () {
const blobs = await blobStore.getProjectBlobs()
const hashes = blobs.map(blob => blob.getHash())
expect(hashes).to.have.members([
testFiles.HELLO_TXT_HASH,
testFiles.GRAPH_PNG_HASH,
helloWorldHash,
])
})
})
describe('two blob stores on different projects', function () {
beforeEach(async function () {
await blobStore.putString(helloWorldString)
await blobStore2.putFile(testFiles.path('graph.png'))
})
it('separates blobs when calling getBlob()', async function () {
const blobFromStore1 = await blobStore.getBlob(helloWorldHash)
const blobFromStore2 = await blobStore2.getBlob(helloWorldHash)
expect(blobFromStore1).to.exist
expect(blobFromStore2).not.to.exist
})
it('separates blobs when calling getBlobs()', async function () {
const blobsFromStore1 = await blobStore.getBlobs([
helloWorldHash,
testFiles.GRAPH_PNG_HASH,
])
const blobsFromStore2 = await blobStore2.getBlobs([
helloWorldHash,
testFiles.GRAPH_PNG_HASH,
])
expect(blobsFromStore1.map(blob => blob.getHash())).to.deep.equal([
helloWorldHash,
])
expect(blobsFromStore2.map(blob => blob.getHash())).to.deep.equal([
testFiles.GRAPH_PNG_HASH,
])
})
it('separates blobs when calling getStream()', async function () {
await blobStore2.getStream(testFiles.GRAPH_PNG_HASH)
try {
await blobStore.getStream(testFiles.GRAPH_PNG_HASH)
} catch (err) {
expect(err).to.be.an.instanceof(Blob.NotFoundError)
return
}
expect.fail(
'expected Blob.NotFoundError when calling blobStore.getStream()'
)
})
it('separates blobs when calling getString()', async function () {
const content = await blobStore.getString(helloWorldHash)
expect(content).to.equal(helloWorldString)
try {
await blobStore2.getString(helloWorldHash)
} catch (err) {
expect(err).to.be.an.instanceof(Blob.NotFoundError)
return
}
expect.fail(
'expected Blob.NotFoundError when calling blobStore.getStream()'
)
})
if (scenario.backend !== mongoBackend) {
// mongo backend has its own test for this, covering sharding
it('getProjectBlobsBatch() returns blobs per project', async function () {
const projects = [
parseInt(scenario.projectId, 10),
parseInt(scenario.projectId2, 10),
]
const { nBlobs, blobs } =
await postgresBackend.getProjectBlobsBatch(projects)
expect(nBlobs).to.equal(2)
expect(Object.fromEntries(blobs.entries())).to.deep.equal({
[parseInt(scenario.projectId, 10)]: [
new Blob(helloWorldHash, 11, 11),
],
[parseInt(scenario.projectId2, 10)]: [
new Blob(
testFiles.GRAPH_PNG_HASH,
testFiles.GRAPH_PNG_BYTE_LENGTH,
null
),
],
})
})
}
})
describe('a global blob', function () {
it('is available through getBlob()', async function () {
const blob = await blobStore.getBlob(globalBlobHash)
expect(blob.getHash()).to.equal(globalBlobHash)
})
it('is available through getBlobs()', async function () {
await blobStore.putString(helloWorldString)
const requestedHashes = [globalBlobHash, helloWorldHash]
const blobs = await blobStore.getBlobs(requestedHashes)
const hashes = blobs.map(blob => blob.getHash())
expect(hashes).to.have.members(requestedHashes)
})
it('is available through getString()', async function () {
const content = await blobStore.getString(globalBlobHash)
expect(content).to.equal('a')
})
it('is available through getStream()', async function () {
const stream = await blobStore.getStream(globalBlobHash)
const buffer = await streams.readStreamToBuffer(stream)
expect(buffer.toString()).to.equal(globalBlobString)
})
it("doesn't prevent putString() from adding the same blob", async function () {
const blob = await blobStore.putString(globalBlobString)
expect(blob.getHash()).to.equal(globalBlobHash)
const projectBlob = await scenario.backend.findBlob(
scenario.projectId,
globalBlobHash
)
expect(projectBlob).not.to.exist
})
it("doesn't prevent putFile() from adding the same blob", async function () {
const dir = await mkTmpDir('blobStore')
const pathname = path.join(dir, 'blob.txt')
fs.writeFileSync(pathname, globalBlobString)
const blob = await blobStore.putFile(pathname)
expect(blob.getHash()).to.equal(globalBlobHash)
const projectBlob = await scenario.backend.findBlob(
scenario.projectId,
globalBlobHash
)
expect(projectBlob).not.to.exist
})
})
describe('a demoted global blob', function () {
it('is available through getBlob()', async function () {
const blob = await blobStore.getBlob(demotedBlobHash)
expect(blob.getHash()).to.equal(demotedBlobHash)
})
it('is available through getBlobs()', async function () {
await blobStore.putString(helloWorldString)
const requestedHashes = [demotedBlobHash, helloWorldHash]
const blobs = await blobStore.getBlobs(requestedHashes)
const hashes = blobs.map(blob => blob.getHash())
expect(hashes).to.have.members(requestedHashes)
})
it('is available through getString()', async function () {
const content = await blobStore.getString(demotedBlobHash)
expect(content).to.equal(demotedBlobString)
})
it('is available through getStream()', async function () {
const stream = await blobStore.getStream(demotedBlobHash)
const buffer = await streams.readStreamToBuffer(stream)
expect(buffer.toString()).to.equal(demotedBlobString)
})
it("doesn't prevent putString() from creating a project blob", async function () {
const blob = await blobStore.putString(demotedBlobString)
expect(blob.getHash()).to.equal(demotedBlobHash)
const projectBlob = await scenario.backend.findBlob(
scenario.projectId,
demotedBlobHash
)
expect(projectBlob).to.exist
})
it("doesn't prevent putFile() from creating a project blob", async function () {
const dir = await mkTmpDir('blobStore')
const pathname = path.join(dir, 'blob.txt')
fs.writeFileSync(pathname, demotedBlobString)
const blob = await blobStore.putFile(pathname)
expect(blob.getHash()).to.equal(demotedBlobHash)
const projectBlob = await scenario.backend.findBlob(
scenario.projectId,
demotedBlobHash
)
expect(projectBlob).to.exist
})
})
describe('deleting blobs', async function () {
beforeEach('install a project blob', async function () {
await blobStore.putString(helloWorldString)
const blob = await blobStore.getBlob(helloWorldHash)
expect(blob).to.exist
})
beforeEach('delete project blobs', async function () {
await blobStore.deleteBlobs()
})
it('deletes project blobs', async function () {
try {
await blobStore.getString(helloWorldHash)
expect.fail('expected NotFoundError')
} catch (err) {
expect(err).to.be.an.instanceof(Blob.NotFoundError)
}
})
it('retains global blobs', async function () {
const content = await blobStore.getString(globalBlobHash)
expect(content).to.equal(globalBlobString)
})
})
describe('copyBlob method', function () {
it('copies a binary blob to another project in the same backend', async function () {
const testFile = 'graph.png'
const originalHash = testFiles.GRAPH_PNG_HASH
const insertedBlob = await blobStore.putFile(testFiles.path(testFile))
await blobStore.copyBlob(insertedBlob, scenario.projectId2)
const copiedBlob = await blobStore2.getBlob(originalHash)
expect(copiedBlob.getHash()).to.equal(originalHash)
expect(copiedBlob.getByteLength()).to.equal(
insertedBlob.getByteLength()
)
expect(copiedBlob.getStringLength()).to.be.null
})
it('copies a text blob to another project in the same backend', async function () {
const insertedBlob = await blobStore.putString(helloWorldString)
await blobStore.copyBlob(insertedBlob, scenario.projectId2)
const copiedBlob = await blobStore2.getBlob(helloWorldHash)
expect(copiedBlob.getHash()).to.equal(helloWorldHash)
const content = await blobStore2.getString(helloWorldHash)
expect(content).to.equal(helloWorldString)
})
})
describe('copyBlob method with different backends', function () {
const otherScenario = scenarios.find(
s => s.backend !== scenario.backend
)
const otherBlobStore = new BlobStore(otherScenario.projectId2)
beforeEach(async function () {
await otherBlobStore.initialize()
})
it('copies a binary blob to another project in a different backend', async function () {
const testFile = 'graph.png'
const originalHash = testFiles.GRAPH_PNG_HASH
const insertedBlob = await blobStore.putFile(testFiles.path(testFile))
await blobStore.copyBlob(insertedBlob, otherScenario.projectId2)
const copiedBlob = await otherBlobStore.getBlob(originalHash)
expect(copiedBlob).to.exist
expect(copiedBlob.getHash()).to.equal(originalHash)
expect(copiedBlob.getByteLength()).to.equal(
insertedBlob.getByteLength()
)
expect(copiedBlob.getStringLength()).to.be.null
})
it('copies a text blob to another project in a different backend', async function () {
const insertedBlob = await blobStore.putString(helloWorldString)
await blobStore.copyBlob(insertedBlob, otherScenario.projectId2)
const copiedBlob = await otherBlobStore.getBlob(helloWorldHash)
expect(copiedBlob).to.exist
expect(copiedBlob.getHash()).to.equal(helloWorldHash)
const content = await otherBlobStore.getString(helloWorldHash)
expect(content).to.equal(helloWorldString)
})
})
})
}
it('getProjectBlobsBatch() with mixed projects', async function () {
for (const scenario of scenarios) {
const blobStore = new BlobStore(scenario.projectId)
const blobStore2 = new BlobStore(scenario.projectId2)
await blobStore.initialize()
await blobStore.putString(helloWorldString)
await blobStore2.initialize()
await blobStore2.putFile(testFiles.path('graph.png'))
}
const projects = [
parseInt(scenarios[0].projectId, 10),
scenarios[1].projectId,
parseInt(scenarios[0].projectId2, 10),
scenarios[1].projectId2,
]
const { nBlobs, blobs } = await getProjectBlobsBatch(projects)
expect(nBlobs).to.equal(4)
expect(Object.fromEntries(blobs.entries())).to.deep.equal({
[scenarios[0].projectId]: [new Blob(helloWorldHash, 11, 11)],
[scenarios[1].projectId]: [new Blob(helloWorldHash, 11, 11)],
[scenarios[0].projectId2]: [
new Blob(
testFiles.GRAPH_PNG_HASH,
testFiles.GRAPH_PNG_BYTE_LENGTH,
null
),
],
[scenarios[1].projectId2]: [
new Blob(
testFiles.GRAPH_PNG_HASH,
testFiles.GRAPH_PNG_BYTE_LENGTH,
null
),
],
})
})
})

View File

@@ -0,0 +1,172 @@
const { expect } = require('chai')
const { ObjectId, Binary } = require('mongodb')
const { Blob } = require('overleaf-editor-core')
const cleanup = require('./support/cleanup')
const mongoBackend = require('../../../../storage/lib/blob_store/mongo')
const mongodb = require('../../../../storage/lib/mongodb')
describe('BlobStore Mongo backend', function () {
const projectId = new ObjectId().toString()
const hashes = {
abcd: [
'abcd000000000000000000000000000000000000',
'abcd111111111111111111111111111111111111',
'abcd222222222222222222222222222222222222',
'abcd333333333333333333333333333333333333',
'abcd444444444444444444444444444444444444',
'abcd555555555555555555555555555555555555',
'abcd666666666666666666666666666666666666',
'abcd777777777777777777777777777777777777',
'abcd888888888888888888888888888888888888',
'abcd999999999999999999999999999999999999',
'abcdaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa',
],
1234: ['1234000000000000000000000000000000000000'],
1337: ['1337000000000000000000000000000000000000'],
}
beforeEach('clean up', cleanup.everything)
beforeEach('initialize the project', async function () {
await mongoBackend.initialize(projectId)
})
describe('insertBlob', function () {
it('writes blobs to the projectHistoryBlobs collection', async function () {
for (const hash of hashes.abcd
.slice(0, 2)
.concat(hashes[1234].slice(0, 1))) {
const blob = new Blob(hash, 123, 99)
await mongoBackend.insertBlob(projectId, blob)
}
const record = await mongodb.blobs.findOne(new ObjectId(projectId), {
promoteBuffers: true,
})
expect(record.blobs).to.deep.equal({
abc: hashes.abcd.slice(0, 2).map(hash => ({
h: Buffer.from(hash, 'hex'),
b: 123,
s: 99,
})),
123: [{ h: Buffer.from(hashes[1234][0], 'hex'), b: 123, s: 99 }],
})
})
it('writes excess blobs to the projectHistoryShardedBlobs collection', async function () {
for (const hash of hashes.abcd.concat(hashes[1234])) {
const blob = new Blob(hash, 123, 99)
await mongoBackend.insertBlob(projectId, blob)
}
const record = await mongodb.blobs.findOne(new ObjectId(projectId), {
promoteBuffers: true,
})
expect(record.blobs).to.deep.equal({
abc: hashes.abcd
.slice(0, 8)
.map(hash => ({ h: Buffer.from(hash, 'hex'), b: 123, s: 99 })),
123: [{ h: Buffer.from(hashes[1234][0], 'hex'), b: 123, s: 99 }],
})
const shardedRecord = await mongodb.shardedBlobs.findOne(
{ _id: new Binary(Buffer.from(`${projectId}0a`, 'hex')) },
{ promoteBuffers: true }
)
expect(shardedRecord.blobs).to.deep.equal({
bcd: hashes.abcd
.slice(8)
.map(hash => ({ h: Buffer.from(hash, 'hex'), b: 123, s: 99 })),
})
})
})
describe('getProjectBlobsBatch', function () {
it('finds all the blobs', async function () {
const projectId0 = new ObjectId().toString()
const hashesProject0 = hashes[1234].concat(hashes.abcd)
const projectId1 = new ObjectId().toString()
const hashesProject1 = hashes[1337].concat(hashes.abcd)
const projectId2 = new ObjectId().toString()
const hashesProject2 = [] // no hashes
const projectId3 = new ObjectId().toString()
const hashesProject3 = hashes[1337]
const projectBlobs = {
[projectId0]: hashesProject0,
[projectId1]: hashesProject1,
[projectId2]: hashesProject2,
[projectId3]: hashesProject3,
}
for (const [projectId, hashes] of Object.entries(projectBlobs)) {
for (const hash of hashes) {
const blob = new Blob(hash, 123, 99)
await mongoBackend.insertBlob(projectId, blob)
}
}
const projects = [projectId0, projectId1, projectId2, projectId3]
const { nBlobs, blobs } =
await mongoBackend.getProjectBlobsBatch(projects)
expect(nBlobs).to.equal(
hashesProject0.length + hashesProject1.length + hashesProject3.length
)
expect(Object.fromEntries(blobs.entries())).to.deep.equal({
[projectId0]: hashesProject0.map(hash => new Blob(hash, 123, 99)),
[projectId1]: hashesProject1.map(hash => new Blob(hash, 123, 99)),
[projectId3]: hashesProject3.map(hash => new Blob(hash, 123, 99)),
})
})
})
describe('with existing blobs', function () {
beforeEach(async function () {
for (const hash of hashes.abcd.concat(hashes[1234])) {
const blob = new Blob(hash, 123, 99)
await mongoBackend.insertBlob(projectId, blob)
}
})
describe('findBlob', function () {
it('finds blobs in the projectHistoryBlobs collection', async function () {
const blob = await mongoBackend.findBlob(projectId, hashes.abcd[0])
expect(blob.getHash()).to.equal(hashes.abcd[0])
})
it('finds blobs in the projectHistoryShardedBlobs collection', async function () {
const blob = await mongoBackend.findBlob(projectId, hashes.abcd[9])
expect(blob.getHash()).to.equal(hashes.abcd[9])
})
})
describe('findBlobs', function () {
it('finds blobs in the projectHistoryBlobs collection', async function () {
const requestedHashes = hashes.abcd.slice(0, 3).concat(hashes[1234])
const blobs = await mongoBackend.findBlobs(projectId, requestedHashes)
const obtainedHashes = blobs.map(blob => blob.getHash())
expect(obtainedHashes).to.have.members(requestedHashes)
})
it('finds blobs in the projectHistoryShardedBlobs collection', async function () {
const requestedHashes = [1, 3, 5, 8, 9].map(idx => hashes.abcd[idx])
const blobs = await mongoBackend.findBlobs(projectId, requestedHashes)
const obtainedHashes = blobs.map(blob => blob.getHash())
expect(obtainedHashes).to.have.members(requestedHashes)
})
})
describe('getProjectBlobs', function () {
it('returns all blobs for a given project', async function () {
const blobs = await mongoBackend.getProjectBlobs(projectId)
const obtainedHashes = blobs.map(blob => blob.getHash())
const expectedHashes = hashes.abcd.concat(hashes[1234])
expect(obtainedHashes).to.have.members(expectedHashes)
})
})
describe('deleteBlobs', function () {
it('deletes all blobs for a given project', async function () {
await mongoBackend.deleteBlobs(projectId)
const recordCount = await mongodb.blobs.count()
const shardedRecordCount = await mongodb.shardedBlobs.count()
expect(recordCount).to.equal(0)
expect(shardedRecordCount).to.equal(0)
})
})
})
})

View File

@@ -0,0 +1,42 @@
const postgresBackend = require('../../../../storage/lib/blob_store/postgres')
const { ObjectId } = require('mongodb')
const { expect } = require('chai')
describe('BlobStore postgres backend', function () {
describe('projectId validation', function () {
it('insertBlob rejects when called with bad projectId', async function () {
const projectId = new ObjectId().toString()
await expect(
postgresBackend.insertBlob(projectId, 'hash', 123, 99)
).to.be.rejectedWith('bad projectId')
})
it('deleteBlobs rejects when called with bad projectId', async function () {
const projectId = new ObjectId().toString()
await expect(postgresBackend.deleteBlobs(projectId)).to.be.rejectedWith(
'bad projectId'
)
})
it('findBlobs rejects when called with bad projectId', async function () {
const projectId = new ObjectId().toString()
await expect(postgresBackend.findBlobs(projectId)).to.be.rejectedWith(
'bad projectId'
)
})
it('findBlob rejects when called with bad projectId', async function () {
const projectId = new ObjectId().toString()
await expect(
postgresBackend.findBlob(projectId, 'hash')
).to.be.rejectedWith('bad projectId')
})
it('getProjectBlobs rejects when called with bad projectId', async function () {
const projectId = new ObjectId().toString()
await expect(
postgresBackend.getProjectBlobs(projectId)
).to.be.rejectedWith('bad projectId')
})
})
})

View File

@@ -0,0 +1,210 @@
'use strict'
const { expect } = require('chai')
const sinon = require('sinon')
const {
Chunk,
Snapshot,
History,
File,
AddFileOperation,
Change,
} = require('overleaf-editor-core')
const cleanup = require('./support/cleanup')
const fixtures = require('./support/fixtures')
const chunkBuffer = require('../../../../storage/lib/chunk_buffer')
const chunkStore = require('../../../../storage/lib/chunk_store')
const redisBackend = require('../../../../storage/lib/chunk_store/redis')
const metrics = require('@overleaf/metrics')
describe('chunk buffer', function () {
beforeEach(cleanup.everything)
beforeEach(fixtures.create)
beforeEach(function () {
sinon.spy(metrics, 'inc')
})
afterEach(function () {
metrics.inc.restore()
})
const projectId = '123456'
describe('loadLatest', function () {
// Initialize project and create a test chunk
beforeEach(async function () {
// Initialize project in chunk store
await chunkStore.initializeProject(projectId)
})
describe('with an existing chunk', function () {
beforeEach(async function () {
// Create a sample chunk with some content
const snapshot = new Snapshot()
const changes = [
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello World'))],
new Date(),
[]
),
]
const history = new History(snapshot, changes)
const chunk = new Chunk(history, 1) // startVersion 1
// Store the chunk directly in the chunk store using create method
// which internally calls uploadChunk
await chunkStore.create(projectId, chunk)
// Clear any existing cache
await redisBackend.clearCache(projectId)
})
it('should load from chunk store and update cache on first access (cache miss)', async function () {
// First access should load from chunk store and populate cache
const firstResult = await chunkBuffer.loadLatest(projectId)
// Verify the chunk is correct
expect(firstResult).to.not.be.null
expect(firstResult.getStartVersion()).to.equal(1)
expect(firstResult.getEndVersion()).to.equal(2)
// Verify that we got a cache miss metric
expect(
metrics.inc.calledWith('chunk_buffer.loadLatest', 1, {
status: 'cache-miss',
})
).to.be.true
// Reset the metrics spy
metrics.inc.resetHistory()
// Second access should hit the cache
const secondResult = await chunkBuffer.loadLatest(projectId)
// Verify we got the same chunk
expect(secondResult).to.not.be.null
expect(secondResult.getStartVersion()).to.equal(1)
expect(secondResult.getEndVersion()).to.equal(2)
// Verify that we got a cache hit metric
expect(
metrics.inc.calledWith('chunk_buffer.loadLatest', 1, {
status: 'cache-hit',
})
).to.be.true
// Verify both chunks are equivalent
expect(secondResult.getStartVersion()).to.equal(
firstResult.getStartVersion()
)
expect(secondResult.getEndVersion()).to.equal(
firstResult.getEndVersion()
)
})
it('should refresh the cache when chunk changes in the store', async function () {
// First access to load into cache
const firstResult = await chunkBuffer.loadLatest(projectId)
expect(firstResult.getStartVersion()).to.equal(1)
// Reset metrics spy
metrics.inc.resetHistory()
// Create a new chunk with different content
const newSnapshot = new Snapshot()
const newChanges = [
new Change(
[
new AddFileOperation(
'updated.tex',
File.fromString('Updated content')
),
],
new Date(),
[]
),
]
const newHistory = new History(newSnapshot, newChanges)
const newChunk = new Chunk(newHistory, 2) // Different start version
// Store the new chunk directly in the chunk store
await chunkStore.create(projectId, newChunk)
// Access again - should detect the change and refresh cache
const secondResult = await chunkBuffer.loadLatest(projectId)
// Verify we got the updated chunk
expect(secondResult.getStartVersion()).to.equal(2)
expect(secondResult.getEndVersion()).to.equal(3)
// Verify that we got a cache miss metric (since the cached chunk was invalidated)
expect(
metrics.inc.calledWith('chunk_buffer.loadLatest', 1, {
status: 'cache-miss',
})
).to.be.true
})
it('should continue using cache when chunk in store has not changed', async function () {
// First access to load into cache
await chunkBuffer.loadLatest(projectId)
// Reset metrics spy
metrics.inc.resetHistory()
// Access again without changing the underlying chunk
const result = await chunkBuffer.loadLatest(projectId)
// Verify we got the same chunk
expect(result.getStartVersion()).to.equal(1)
expect(result.getEndVersion()).to.equal(2)
// Verify that we got a cache hit metric
expect(
metrics.inc.calledWith('chunk_buffer.loadLatest', 1, {
status: 'cache-hit',
})
).to.be.true
})
})
describe('with an empty project', function () {
it('should handle a case with empty chunks (no changes)', async function () {
// Clear the cache
await redisBackend.clearCache(projectId)
// Load the initial empty chunk via buffer
const result = await chunkBuffer.loadLatest(projectId)
// Verify we got the empty chunk
expect(result.getStartVersion()).to.equal(0)
expect(result.getEndVersion()).to.equal(0) // Start equals end for empty chunks
expect(result.history.changes.length).to.equal(0)
// Verify cache miss metric
expect(
metrics.inc.calledWith('chunk_buffer.loadLatest', 1, {
status: 'cache-miss',
})
).to.be.true
// Reset metrics
metrics.inc.resetHistory()
// Second access should hit the cache
const secondResult = await chunkBuffer.loadLatest(projectId)
// Verify we got the same empty chunk
expect(secondResult.getStartVersion()).to.equal(0)
expect(secondResult.getEndVersion()).to.equal(0)
expect(secondResult.history.changes.length).to.equal(0)
// Verify cache hit metric
expect(
metrics.inc.calledWith('chunk_buffer.loadLatest', 1, {
status: 'cache-hit',
})
).to.be.true
})
})
})
})

View File

@@ -0,0 +1,544 @@
'use strict'
const cleanup = require('./support/cleanup')
const fixtures = require('./support/fixtures')
const { expect } = require('chai')
const sinon = require('sinon')
const { ObjectId } = require('mongodb')
const { projects } = require('../../../../storage/lib/mongodb')
const {
Chunk,
Snapshot,
Change,
History,
File,
Operation,
AddFileOperation,
EditFileOperation,
TextOperation,
} = require('overleaf-editor-core')
const { chunkStore, historyStore } = require('../../../../storage')
describe('chunkStore', function () {
beforeEach(cleanup.everything)
beforeEach(fixtures.create)
const scenarios = [
{
description: 'Postgres backend',
createProject: chunkStore.initializeProject,
idMapping: id => parseInt(id, 10),
},
{
description: 'Mongo backend',
createProject: () =>
chunkStore.initializeProject(new ObjectId().toString()),
idMapping: id => id,
},
]
for (const scenario of scenarios) {
describe(scenario.description, function () {
let projectId
let projectRecord
beforeEach(async function () {
projectId = await scenario.createProject()
// create a record in the mongo projects collection
projectRecord = await projects.insertOne({
overleaf: { history: { id: scenario.idMapping(projectId) } },
})
})
it('loads empty latest chunk for a new project', async function () {
const chunk = await chunkStore.loadLatest(projectId)
expect(chunk.getSnapshot().countFiles()).to.equal(0)
expect(chunk.getChanges().length).to.equal(0)
expect(chunk.getEndTimestamp()).not.to.exist
})
describe('creating a chunk', async function () {
const pendingChangeTimestamp = new Date('2014-01-01T00:00:00')
const lastChangeTimestamp = new Date('2015-01-01T00:00:00')
beforeEach(async function () {
const chunk = makeChunk(
[
makeChange(
Operation.addFile('main.tex', File.fromString('abc')),
lastChangeTimestamp
),
],
1
)
await chunkStore.create(projectId, chunk, pendingChangeTimestamp)
})
it('creates a chunk and inserts the pending change timestamp', async function () {
const project = await projects.findOne({
_id: new ObjectId(projectRecord.insertedId),
})
expect(project.overleaf.history.currentEndVersion).to.equal(2)
expect(project.overleaf.history.currentEndTimestamp).to.deep.equal(
lastChangeTimestamp
)
expect(project.overleaf.backup.pendingChangeAt).to.deep.equal(
pendingChangeTimestamp
)
})
})
describe('adding and editing a blank file', function () {
const testPathname = 'foo.txt'
const testTextOperation = TextOperation.fromJSON({
textOperation: ['a'],
}) // insert an a
let lastChangeTimestamp
const pendingChangeTimestamp = new Date()
beforeEach(async function () {
const chunk = await chunkStore.loadLatest(projectId)
const oldEndVersion = chunk.getEndVersion()
const changes = [
makeChange(Operation.addFile(testPathname, File.fromString(''))),
makeChange(Operation.editFile(testPathname, testTextOperation)),
]
lastChangeTimestamp = changes[1].getTimestamp()
chunk.pushChanges(changes)
await chunkStore.update(
projectId,
oldEndVersion,
chunk,
pendingChangeTimestamp
)
})
it('records the correct metadata in db readOnly=false', async function () {
const raw = await chunkStore.loadLatestRaw(projectId)
expect(raw).to.deep.include({
startVersion: 0,
endVersion: 2,
endTimestamp: lastChangeTimestamp,
})
})
it('records the correct metadata in db readOnly=true', async function () {
const raw = await chunkStore.loadLatestRaw(projectId, {
readOnly: true,
})
expect(raw).to.deep.include({
startVersion: 0,
endVersion: 2,
endTimestamp: lastChangeTimestamp,
})
})
it('records the correct timestamp', async function () {
const chunk = await chunkStore.loadLatest(projectId)
expect(chunk.getEndTimestamp()).to.deep.equal(lastChangeTimestamp)
})
it('records changes', async function () {
const chunk = await chunkStore.loadLatest(projectId)
const history = chunk.getHistory()
expect(history.getSnapshot().countFiles()).to.equal(0)
expect(history.getChanges().length).to.equal(2)
const addChange = history.getChanges()[0]
expect(addChange.getOperations().length).to.equal(1)
const addFile = addChange.getOperations()[0]
expect(addFile).to.be.an.instanceof(AddFileOperation)
expect(addFile.getPathname()).to.equal(testPathname)
const file = addFile.getFile()
expect(file.getHash()).to.equal(File.EMPTY_FILE_HASH)
expect(file.getByteLength()).to.equal(0)
expect(file.getStringLength()).to.equal(0)
const editChange = history.getChanges()[1]
expect(editChange.getOperations().length).to.equal(1)
const editFile = editChange.getOperations()[0]
expect(editFile).to.be.an.instanceof(EditFileOperation)
expect(editFile.getPathname()).to.equal(testPathname)
})
it('updates the project record with the current version and timestamps', async function () {
const project = await projects.findOne({
_id: new ObjectId(projectRecord.insertedId),
})
expect(project.overleaf.history.currentEndVersion).to.equal(2)
expect(project.overleaf.history.currentEndTimestamp).to.deep.equal(
lastChangeTimestamp
)
expect(project.overleaf.backup.pendingChangeAt).to.deep.equal(
pendingChangeTimestamp
)
})
})
describe('multiple chunks', async function () {
// Two chunks are 1 year apart
const pendingChangeTimestamp = new Date('2014-01-01T00:00:00')
const firstChunkTimestamp = new Date('2015-01-01T00:00:00')
const secondChunkTimestamp = new Date('2016-01-01T00:00:00')
const thirdChunkTimestamp = new Date('2017-01-01T00:00:00')
let firstChunk, secondChunk, thirdChunk
beforeEach(async function () {
firstChunk = makeChunk(
[
makeChange(
Operation.addFile('foo.tex', File.fromString('')),
new Date(firstChunkTimestamp - 5000)
),
makeChange(
Operation.addFile('bar.tex', File.fromString('')),
firstChunkTimestamp
),
],
0
)
await chunkStore.update(
projectId,
0,
firstChunk,
pendingChangeTimestamp
)
firstChunk = await chunkStore.loadLatest(projectId)
secondChunk = makeChunk(
[
makeChange(
Operation.addFile('baz.tex', File.fromString('')),
new Date(secondChunkTimestamp - 5000)
),
makeChange(
Operation.addFile('qux.tex', File.fromString('')),
secondChunkTimestamp
),
],
2
)
await chunkStore.create(projectId, secondChunk)
secondChunk = await chunkStore.loadLatest(projectId)
thirdChunk = makeChunk(
[
makeChange(
Operation.addFile('quux.tex', File.fromString('')),
thirdChunkTimestamp
),
],
4
)
await chunkStore.create(projectId, thirdChunk)
thirdChunk = await chunkStore.loadLatest(projectId)
})
it('returns the second chunk when querying for a version between the start and end version', async function () {
const chunk = await chunkStore.loadAtVersion(projectId, 3)
expect(chunk).to.deep.equal(secondChunk)
// Check file lazy loading
const history = chunk.getHistory()
expect(history.getSnapshot().countFiles()).to.equal(0)
expect(history.getChanges().length).to.equal(2)
const change = history.getChanges()[0]
expect(change.getOperations().length).to.equal(1)
const addFile = change.getOperations()[0]
expect(addFile).to.be.an.instanceof(AddFileOperation)
expect(addFile.getPathname()).to.equal('baz.tex')
const file = addFile.getFile()
expect(file.getHash()).to.equal(File.EMPTY_FILE_HASH)
expect(file.getByteLength()).to.equal(0)
expect(file.getStringLength()).to.equal(0)
})
it('returns the first chunk when querying for the end version of the chunk', async function () {
const chunk = await chunkStore.loadAtVersion(projectId, 2)
expect(chunk).to.deep.equal(firstChunk)
})
it('returns the second chunk when querying for a timestamp between the second and third chunk', async function () {
const searchTimestamp = new Date('2015-07-01T00:00:00')
const chunk = await chunkStore.loadAtTimestamp(
projectId,
searchTimestamp
)
expect(chunk).to.deep.equal(secondChunk)
// Check file lazy loading
const history = chunk.getHistory()
expect(history.getSnapshot().countFiles()).to.equal(0)
expect(history.getChanges().length).to.equal(2)
const change = history.getChanges()[0]
expect(change.getOperations().length).to.equal(1)
const addFile = change.getOperations()[0]
expect(addFile).to.be.an.instanceof(AddFileOperation)
expect(addFile.getPathname()).to.equal('baz.tex')
const file = addFile.getFile()
expect(file.getHash()).to.equal(File.EMPTY_FILE_HASH)
expect(file.getByteLength()).to.equal(0)
expect(file.getStringLength()).to.equal(0)
})
it('returns the third chunk when querying for a timestamp past the latest chunk', async function () {
const searchTimestampPastLatestChunk = new Date('2018-01-01T00:00:00')
const chunk = await chunkStore.loadAtTimestamp(
projectId,
searchTimestampPastLatestChunk
)
// Check that we found the third chunk
expect(chunk).to.deep.equal(thirdChunk)
})
it('updates the project record to match the last chunk', async function () {
const project = await projects.findOne({
_id: new ObjectId(projectRecord.insertedId),
})
expect(project.overleaf.history.currentEndVersion).to.equal(5)
expect(project.overleaf.history.currentEndTimestamp).to.deep.equal(
thirdChunkTimestamp
)
})
it('updates the pending change timestamp to match the first chunk', async function () {
const project = await projects.findOne({
_id: new ObjectId(projectRecord.insertedId),
})
expect(project.overleaf.backup.pendingChangeAt).to.deep.equal(
pendingChangeTimestamp
)
})
describe('after updating the last chunk', function () {
let newChunk
beforeEach(async function () {
newChunk = makeChunk(
[
...thirdChunk.getChanges(),
makeChange(
Operation.addFile('onemore.tex', File.fromString('')),
thirdChunkTimestamp
),
],
4
)
await chunkStore.update(projectId, 5, newChunk)
newChunk = await chunkStore.loadLatest(projectId)
})
it('replaces the latest chunk', function () {
expect(newChunk.getChanges()).to.have.length(2)
})
it('returns the right chunk when querying by version', async function () {
const chunk = await chunkStore.loadAtVersion(projectId, 5)
expect(chunk).to.deep.equal(newChunk)
})
it('returns the right chunk when querying by timestamp', async function () {
const chunk = await chunkStore.loadAtTimestamp(
projectId,
thirdChunkTimestamp
)
expect(chunk).to.deep.equal(newChunk)
})
it('updates the project record to match the latest version and timestamp', async function () {
const project = await projects.findOne({
_id: new ObjectId(projectRecord.insertedId),
})
expect(project.overleaf.history.currentEndVersion).to.equal(6)
expect(project.overleaf.history.currentEndTimestamp).to.deep.equal(
thirdChunkTimestamp
)
})
it('does not modify the existing pending change timestamp in the project record', async function () {
const project = await projects.findOne({
_id: new ObjectId(projectRecord.insertedId),
})
expect(project.overleaf.backup.pendingChangeAt).to.deep.equal(
pendingChangeTimestamp
)
})
})
describe('when iterating the chunks with getProjectChunksFromVersion', function () {
// The first chunk has startVersion:0 and endVersion:2
for (let startVersion = 0; startVersion <= 2; startVersion++) {
it(`returns all chunk records when starting from version ${startVersion}`, async function () {
const chunkRecords = []
for await (const chunk of chunkStore.getProjectChunksFromVersion(
projectId,
startVersion
)) {
chunkRecords.push(chunk)
}
const expectedChunks = [firstChunk, secondChunk, thirdChunk]
expect(chunkRecords).to.have.length(expectedChunks.length)
chunkRecords.forEach((chunkRecord, index) => {
expect(chunkRecord.startVersion).to.deep.equal(
expectedChunks[index].getStartVersion()
)
expect(chunkRecord.endVersion).to.deep.equal(
expectedChunks[index].getEndVersion()
)
})
})
}
// The second chunk has startVersion:2 and endVersion:4
for (let startVersion = 3; startVersion <= 4; startVersion++) {
it(`returns two chunk records when starting from version ${startVersion}`, async function () {
const chunkRecords = []
for await (const chunk of chunkStore.getProjectChunksFromVersion(
projectId,
startVersion
)) {
chunkRecords.push(chunk)
}
const expectedChunks = [secondChunk, thirdChunk]
expect(chunkRecords).to.have.length(expectedChunks.length)
chunkRecords.forEach((chunkRecord, index) => {
expect(chunkRecord.startVersion).to.deep.equal(
expectedChunks[index].getStartVersion()
)
expect(chunkRecord.endVersion).to.deep.equal(
expectedChunks[index].getEndVersion()
)
})
})
}
// The third chunk has startVersion:4 and endVersion:5
for (let startVersion = 5; startVersion <= 5; startVersion++) {
it(`returns one chunk record when starting from version ${startVersion}`, async function () {
const chunkRecords = []
for await (const chunk of chunkStore.getProjectChunksFromVersion(
projectId,
startVersion
)) {
chunkRecords.push(chunk)
}
const expectedChunks = [thirdChunk]
expect(chunkRecords).to.have.length(expectedChunks.length)
chunkRecords.forEach((chunkRecord, index) => {
expect(chunkRecord.startVersion).to.deep.equal(
expectedChunks[index].getStartVersion()
)
expect(chunkRecord.endVersion).to.deep.equal(
expectedChunks[index].getEndVersion()
)
})
})
}
it('returns no chunk records when starting from a version after the last chunk', async function () {
const chunkRecords = []
for await (const chunk of chunkStore.getProjectChunksFromVersion(
projectId,
6
)) {
chunkRecords.push(chunk)
}
expect(chunkRecords).to.have.length(0)
})
})
})
describe('when saving to object storage fails', function () {
beforeEach(function () {
sinon.stub(historyStore, 'storeRaw').rejects(new Error('S3 Error'))
})
afterEach(function () {
historyStore.storeRaw.restore()
})
it('does not create chunks', async function () {
const oldEndVersion = 0
const testPathname = 'foo.txt'
const testTextOperation = TextOperation.fromJSON({
textOperation: ['a'],
}) // insert an a
let chunk = await chunkStore.loadLatest(projectId)
expect(chunk.getEndVersion()).to.equal(oldEndVersion)
const changes = [
makeChange(Operation.addFile(testPathname, File.fromString(''))),
makeChange(Operation.editFile(testPathname, testTextOperation)),
]
chunk.pushChanges(changes)
await expect(
chunkStore.update(projectId, oldEndVersion, chunk)
).to.be.rejectedWith('S3 Error')
chunk = await chunkStore.loadLatest(projectId)
expect(chunk.getEndVersion()).to.equal(oldEndVersion)
})
})
describe('version checks', function () {
beforeEach(async function () {
// Create a chunk with start version 0, end version 3
const chunk = makeChunk(
[
makeChange(Operation.addFile('main.tex', File.fromString('abc'))),
makeChange(
Operation.editFile(
'main.tex',
TextOperation.fromJSON({ textOperation: [3, 'def'] })
)
),
makeChange(
Operation.editFile(
'main.tex',
TextOperation.fromJSON({ textOperation: [6, 'ghi'] })
)
),
],
0
)
await chunkStore.update(projectId, 0, chunk)
})
it('refuses to create a chunk with the same start version', async function () {
const chunk = makeChunk(
[makeChange(Operation.addFile('main.tex', File.fromString('abc')))],
0
)
await expect(chunkStore.create(projectId, chunk)).to.be.rejectedWith(
chunkStore.ChunkVersionConflictError
)
})
it("allows creating chunks that don't have version conflicts", async function () {
const chunk = makeChunk(
[makeChange(Operation.addFile('main.tex', File.fromString('abc')))],
3
)
await chunkStore.create(projectId, chunk)
})
})
})
}
})
function makeChange(operation, date = new Date()) {
return new Change([operation], date, [])
}
function makeChunk(changes, versionNumber) {
const snapshot = Snapshot.fromRaw({ files: {} })
const history = new History(snapshot, [])
const chunk = new Chunk(history, versionNumber)
chunk.pushChanges(changes)
return chunk
}

View File

@@ -0,0 +1,135 @@
const { expect } = require('chai')
const { ObjectId } = require('mongodb')
const {
Chunk,
Snapshot,
History,
Change,
AddFileOperation,
File,
} = require('overleaf-editor-core')
const cleanup = require('./support/cleanup')
const backend = require('../../../../storage/lib/chunk_store/mongo')
const { ChunkVersionConflictError } = require('../../../../storage')
describe('chunk store Mongo backend', function () {
beforeEach(cleanup.everything)
describe('garbage collection', function () {
it('deletes pending and deleted chunks', async function () {
const projectId = new ObjectId().toString()
// Create a pending chunk
const pendingChunk = makeChunk([], 0)
const pendingChunkId = await backend.insertPendingChunk(
projectId,
pendingChunk
)
// Create a deleted chunk
const deletedChunk = makeChunk([], 0)
const deletedChunkId = await backend.insertPendingChunk(
projectId,
deletedChunk
)
await backend.confirmCreate(projectId, deletedChunk, deletedChunkId)
await backend.deleteChunk(projectId, deletedChunkId)
// Check that both chunks are ready to be deleted
let oldChunks = await backend.getOldChunksBatch(100, 0)
expect(oldChunks).to.have.deep.members([
{ projectId, chunkId: pendingChunkId },
{ projectId, chunkId: deletedChunkId },
])
// Delete old chunks
await backend.deleteOldChunks(oldChunks.map(chunk => chunk.chunkId))
// Check that there are no more chunks to be deleted
oldChunks = await backend.getOldChunksBatch(100, 0)
expect(oldChunks).to.deep.equal([])
})
})
describe('concurrency handling', function () {
it('prevents chunks from being created with the same start version', async function () {
const projectId = new ObjectId().toString()
const chunks = [makeChunk([], 10), makeChunk([], 10)]
const chunkIds = []
for (const chunk of chunks) {
const chunkId = await backend.insertPendingChunk(projectId, chunk)
chunkIds.push(chunkId)
}
await backend.confirmCreate(projectId, chunks[0], chunkIds[0])
await expect(
backend.confirmCreate(projectId, chunks[1], chunkIds[1])
).to.be.rejectedWith(ChunkVersionConflictError)
})
describe('conflicts between chunk extension and chunk creation', function () {
let projectId,
baseChunkId,
updatedChunkId,
newChunkId,
updatedChunk,
newChunk
beforeEach(async function () {
projectId = new ObjectId().toString()
const baseChunk = makeChunk([], 0)
baseChunkId = await backend.insertPendingChunk(projectId, baseChunk)
await backend.confirmCreate(projectId, baseChunk, baseChunkId)
const change = new Change(
[new AddFileOperation('main.tex', File.fromString('hello'))],
new Date()
)
updatedChunk = makeChunk([change], 0)
updatedChunkId = await backend.insertPendingChunk(
projectId,
updatedChunk
)
newChunk = makeChunk([change], 1)
newChunkId = await backend.insertPendingChunk(projectId, newChunk)
})
it('prevents creation after extension', async function () {
await backend.confirmUpdate(
projectId,
baseChunkId,
updatedChunk,
updatedChunkId
)
await expect(
backend.confirmCreate(projectId, newChunk, newChunkId, {
oldChunkId: baseChunkId,
})
).to.be.rejectedWith(ChunkVersionConflictError)
})
it('prevents extension after creation', async function () {
await backend.confirmCreate(projectId, newChunk, newChunkId, {
oldChunkId: baseChunkId,
})
await expect(
backend.confirmUpdate(
projectId,
baseChunkId,
updatedChunk,
updatedChunkId
)
).to.be.rejectedWith(ChunkVersionConflictError)
})
})
})
})
function makeChunk(changes, versionNumber) {
const snapshot = Snapshot.fromRaw({ files: {} })
const history = new History(snapshot, changes)
const chunk = new Chunk(history, versionNumber)
return chunk
}

View File

@@ -0,0 +1,110 @@
const { expect } = require('chai')
const { ObjectId } = require('mongodb')
const {
Chunk,
Snapshot,
History,
Change,
AddFileOperation,
File,
} = require('overleaf-editor-core')
const cleanup = require('./support/cleanup')
const { ChunkVersionConflictError } = require('../../../../storage')
const backend = require('../../../../storage/lib/chunk_store/postgres')
describe('chunk store Postgres backend', function () {
beforeEach(cleanup.everything)
it('should reject ObjectId strings as project IDs', async function () {
const invalidProjectId = new ObjectId().toString()
await expect(backend.getLatestChunk(invalidProjectId)).to.be.rejectedWith(
'bad projectId'
)
await expect(
backend.getChunkForVersion(invalidProjectId, 1)
).to.be.rejectedWith('bad projectId')
await expect(
backend.getChunkForTimestamp(invalidProjectId, new Date())
).to.be.rejectedWith('bad projectId')
await expect(
backend.getProjectChunkIds(invalidProjectId)
).to.be.rejectedWith('bad projectId')
await expect(
backend.insertPendingChunk(invalidProjectId, makeChunk([], 0))
).to.be.rejectedWith('bad projectId')
await expect(
backend.confirmCreate(invalidProjectId, makeChunk([], 0), 1)
).to.be.rejectedWith('bad projectId')
await expect(
backend.confirmUpdate(invalidProjectId, 1, makeChunk([], 0), 2)
).to.be.rejectedWith('bad projectId')
await expect(backend.deleteChunk(invalidProjectId, 1)).to.be.rejectedWith(
'bad projectId'
)
await expect(
backend.deleteProjectChunks(invalidProjectId)
).to.be.rejectedWith('bad projectId')
})
describe('conflicts between chunk extension and chunk creation', function () {
let projectId,
baseChunkId,
updatedChunkId,
newChunkId,
updatedChunk,
newChunk
beforeEach(async function () {
projectId = '1234'
const baseChunk = makeChunk([], 0)
baseChunkId = await backend.insertPendingChunk(projectId, baseChunk)
await backend.confirmCreate(projectId, baseChunk, baseChunkId)
const change = new Change(
[new AddFileOperation('main.tex', File.fromString('hello'))],
new Date()
)
updatedChunk = makeChunk([change], 0)
updatedChunkId = await backend.insertPendingChunk(projectId, updatedChunk)
newChunk = makeChunk([change], 1)
newChunkId = await backend.insertPendingChunk(projectId, newChunk)
})
it('prevents creation after extension', async function () {
await backend.confirmUpdate(
projectId,
baseChunkId,
updatedChunk,
updatedChunkId
)
await expect(
backend.confirmCreate(projectId, newChunk, newChunkId, {
oldChunkId: baseChunkId,
})
).to.be.rejectedWith(ChunkVersionConflictError)
})
it('prevents extension after creation', async function () {
await backend.confirmCreate(projectId, newChunk, newChunkId, {
oldChunkId: baseChunkId,
})
await expect(
backend.confirmUpdate(
projectId,
baseChunkId,
updatedChunk,
updatedChunkId
)
).to.be.rejectedWith(ChunkVersionConflictError)
})
})
})
function makeChunk(changes, versionNumber) {
const snapshot = Snapshot.fromRaw({ files: {} })
const history = new History(snapshot, [])
const chunk = new Chunk(history, versionNumber)
return chunk
}

View File

@@ -0,0 +1,606 @@
'use strict'
const { expect } = require('chai')
const {
Chunk,
Snapshot,
History,
File,
AddFileOperation,
Origin,
Change,
V2DocVersions,
} = require('overleaf-editor-core')
const cleanup = require('./support/cleanup')
const redisBackend = require('../../../../storage/lib/chunk_store/redis')
describe('chunk store Redis backend', function () {
beforeEach(cleanup.everything)
const projectId = '123456'
describe('getCurrentChunk', function () {
it('should return null on cache miss', async function () {
const chunk = await redisBackend.getCurrentChunk(projectId)
expect(chunk).to.be.null
})
it('should return the cached chunk', async function () {
// Create a sample chunk
const snapshot = new Snapshot()
const changes = [
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello World'))],
new Date(),
[]
),
]
const history = new History(snapshot, changes)
const chunk = new Chunk(history, 5) // startVersion 5
// Cache the chunk
await redisBackend.setCurrentChunk(projectId, chunk)
// Retrieve the cached chunk
const cachedChunk = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunk).to.not.be.null
expect(cachedChunk.getStartVersion()).to.equal(5)
expect(cachedChunk.getEndVersion()).to.equal(6)
expect(cachedChunk).to.deep.equal(chunk)
})
})
describe('setCurrentChunk', function () {
it('should successfully cache a chunk', async function () {
// Create a sample chunk
const snapshot = new Snapshot()
const changes = [
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello World'))],
new Date(),
[]
),
]
const history = new History(snapshot, changes)
const chunk = new Chunk(history, 5) // startVersion 5
// Cache the chunk
await redisBackend.setCurrentChunk(projectId, chunk)
// Verify the chunk was cached correctly by retrieving it
const cachedChunk = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunk).to.not.be.null
expect(cachedChunk.getStartVersion()).to.equal(5)
expect(cachedChunk.getEndVersion()).to.equal(6)
expect(cachedChunk).to.deep.equal(chunk)
// Verify that the chunk was stored correctly using the chunk metadata
const chunkMetadata =
await redisBackend.getCurrentChunkMetadata(projectId)
expect(chunkMetadata).to.not.be.null
expect(chunkMetadata.startVersion).to.equal(5)
expect(chunkMetadata.changesCount).to.equal(1)
})
it('should correctly handle a chunk with zero changes', async function () {
// Create a sample chunk with no changes
const snapshot = new Snapshot()
const changes = []
const history = new History(snapshot, changes)
const chunk = new Chunk(history, 10) // startVersion 10
// Cache the chunk
await redisBackend.setCurrentChunk(projectId, chunk)
// Retrieve the cached chunk
const cachedChunk = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunk).to.not.be.null
expect(cachedChunk.getStartVersion()).to.equal(10)
expect(cachedChunk.getEndVersion()).to.equal(10) // End version should equal start version with no changes
expect(cachedChunk.history.changes.length).to.equal(0)
expect(cachedChunk).to.deep.equal(chunk)
})
})
describe('updating already cached chunks', function () {
it('should replace a chunk with a longer chunk', async function () {
// Set initial chunk with one change
const snapshotA = new Snapshot()
const changesA = [
new Change(
[
new AddFileOperation(
'test.tex',
File.fromString('Initial content')
),
],
new Date(),
[]
),
]
const historyA = new History(snapshotA, changesA)
const chunkA = new Chunk(historyA, 10)
await redisBackend.setCurrentChunk(projectId, chunkA)
// Verify the initial chunk was cached
const cachedChunkA = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunkA.getStartVersion()).to.equal(10)
expect(cachedChunkA.getEndVersion()).to.equal(11)
expect(cachedChunkA.history.changes.length).to.equal(1)
// Create a longer chunk (with more changes)
const snapshotB = new Snapshot()
const changesB = [
new Change(
[new AddFileOperation('test1.tex', File.fromString('Content 1'))],
new Date(),
[]
),
new Change(
[new AddFileOperation('test2.tex', File.fromString('Content 2'))],
new Date(),
[]
),
new Change(
[new AddFileOperation('test3.tex', File.fromString('Content 3'))],
new Date(),
[]
),
]
const historyB = new History(snapshotB, changesB)
const chunkB = new Chunk(historyB, 15)
// Replace the cached chunk
await redisBackend.setCurrentChunk(projectId, chunkB)
// Verify the new chunk replaced the old one
const cachedChunkB = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunkB).to.not.be.null
expect(cachedChunkB.getStartVersion()).to.equal(15)
expect(cachedChunkB.getEndVersion()).to.equal(18)
expect(cachedChunkB.history.changes.length).to.equal(3)
expect(cachedChunkB).to.deep.equal(chunkB)
// Verify the metadata was updated
const updatedMetadata =
await redisBackend.getCurrentChunkMetadata(projectId)
expect(updatedMetadata.startVersion).to.equal(15)
expect(updatedMetadata.changesCount).to.equal(3)
})
it('should replace a chunk with a shorter chunk', async function () {
// Set initial chunk with three changes
const snapshotA = new Snapshot()
const changesA = [
new Change(
[new AddFileOperation('file1.tex', File.fromString('Content 1'))],
new Date(),
[]
),
new Change(
[new AddFileOperation('file2.tex', File.fromString('Content 2'))],
new Date(),
[]
),
new Change(
[new AddFileOperation('file3.tex', File.fromString('Content 3'))],
new Date(),
[]
),
]
const historyA = new History(snapshotA, changesA)
const chunkA = new Chunk(historyA, 20)
await redisBackend.setCurrentChunk(projectId, chunkA)
// Verify the initial chunk was cached
const cachedChunkA = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunkA.getStartVersion()).to.equal(20)
expect(cachedChunkA.getEndVersion()).to.equal(23)
expect(cachedChunkA.history.changes.length).to.equal(3)
// Create a shorter chunk (with fewer changes)
const snapshotB = new Snapshot()
const changesB = [
new Change(
[new AddFileOperation('new.tex', File.fromString('New content'))],
new Date(),
[]
),
]
const historyB = new History(snapshotB, changesB)
const chunkB = new Chunk(historyB, 30)
// Replace the cached chunk
await redisBackend.setCurrentChunk(projectId, chunkB)
// Verify the new chunk replaced the old one
const cachedChunkB = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunkB).to.not.be.null
expect(cachedChunkB.getStartVersion()).to.equal(30)
expect(cachedChunkB.getEndVersion()).to.equal(31)
expect(cachedChunkB.history.changes.length).to.equal(1)
expect(cachedChunkB).to.deep.equal(chunkB)
// Verify the metadata was updated
const updatedMetadata =
await redisBackend.getCurrentChunkMetadata(projectId)
expect(updatedMetadata.startVersion).to.equal(30)
expect(updatedMetadata.changesCount).to.equal(1)
})
it('should replace a chunk with a zero-length chunk', async function () {
// Set initial chunk with changes
const snapshotA = new Snapshot()
const changesA = [
new Change(
[new AddFileOperation('file1.tex', File.fromString('Content 1'))],
new Date(),
[]
),
new Change(
[new AddFileOperation('file2.tex', File.fromString('Content 2'))],
new Date(),
[]
),
]
const historyA = new History(snapshotA, changesA)
const chunkA = new Chunk(historyA, 25)
await redisBackend.setCurrentChunk(projectId, chunkA)
// Verify the initial chunk was cached
const cachedChunkA = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunkA.getStartVersion()).to.equal(25)
expect(cachedChunkA.getEndVersion()).to.equal(27)
expect(cachedChunkA.history.changes.length).to.equal(2)
// Create a zero-length chunk (with no changes)
const snapshotB = new Snapshot()
const changesB = []
const historyB = new History(snapshotB, changesB)
const chunkB = new Chunk(historyB, 40)
// Replace the cached chunk
await redisBackend.setCurrentChunk(projectId, chunkB)
// Verify the new chunk replaced the old one
const cachedChunkB = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunkB).to.not.be.null
expect(cachedChunkB.getStartVersion()).to.equal(40)
expect(cachedChunkB.getEndVersion()).to.equal(40) // Start version equals end version with no changes
expect(cachedChunkB.history.changes.length).to.equal(0)
expect(cachedChunkB).to.deep.equal(chunkB)
// Verify the metadata was updated
const updatedMetadata =
await redisBackend.getCurrentChunkMetadata(projectId)
expect(updatedMetadata.startVersion).to.equal(40)
expect(updatedMetadata.changesCount).to.equal(0)
})
it('should replace a zero-length chunk with a non-empty chunk', async function () {
// Set initial empty chunk
const snapshotA = new Snapshot()
const changesA = []
const historyA = new History(snapshotA, changesA)
const chunkA = new Chunk(historyA, 50)
await redisBackend.setCurrentChunk(projectId, chunkA)
// Verify the initial chunk was cached
const cachedChunkA = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunkA.getStartVersion()).to.equal(50)
expect(cachedChunkA.getEndVersion()).to.equal(50)
expect(cachedChunkA.history.changes.length).to.equal(0)
// Create a non-empty chunk
const snapshotB = new Snapshot()
const changesB = [
new Change(
[new AddFileOperation('newfile.tex', File.fromString('New content'))],
new Date(),
[]
),
new Change(
[
new AddFileOperation(
'another.tex',
File.fromString('Another file')
),
],
new Date(),
[]
),
]
const historyB = new History(snapshotB, changesB)
const chunkB = new Chunk(historyB, 60)
// Replace the cached chunk
await redisBackend.setCurrentChunk(projectId, chunkB)
// Verify the new chunk replaced the old one
const cachedChunkB = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunkB).to.not.be.null
expect(cachedChunkB.getStartVersion()).to.equal(60)
expect(cachedChunkB.getEndVersion()).to.equal(62)
expect(cachedChunkB.history.changes.length).to.equal(2)
expect(cachedChunkB).to.deep.equal(chunkB)
// Verify the metadata was updated
const updatedMetadata =
await redisBackend.getCurrentChunkMetadata(projectId)
expect(updatedMetadata.startVersion).to.equal(60)
expect(updatedMetadata.changesCount).to.equal(2)
})
})
describe('checkCacheValidity', function () {
it('should return true when versions match', function () {
const snapshotA = new Snapshot()
const historyA = new History(snapshotA, [])
const chunkA = new Chunk(historyA, 10)
chunkA.pushChanges([
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello'))],
new Date(),
[]
),
])
const snapshotB = new Snapshot()
const historyB = new History(snapshotB, [])
const chunkB = new Chunk(historyB, 10)
chunkB.pushChanges([
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello'))],
new Date(),
[]
),
])
const isValid = redisBackend.checkCacheValidity(chunkA, chunkB)
expect(isValid).to.be.true
})
it('should return false when start versions differ', function () {
const snapshotA = new Snapshot()
const historyA = new History(snapshotA, [])
const chunkA = new Chunk(historyA, 10)
const snapshotB = new Snapshot()
const historyB = new History(snapshotB, [])
const chunkB = new Chunk(historyB, 11)
const isValid = redisBackend.checkCacheValidity(chunkA, chunkB)
expect(isValid).to.be.false
})
it('should return false when end versions differ', function () {
const snapshotA = new Snapshot()
const historyA = new History(snapshotA, [])
const chunkA = new Chunk(historyA, 10)
chunkA.pushChanges([
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello'))],
new Date(),
[]
),
])
const snapshotB = new Snapshot()
const historyB = new History(snapshotB, [])
const chunkB = new Chunk(historyB, 10)
chunkB.pushChanges([
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello'))],
new Date(),
[]
),
new Change(
[new AddFileOperation('other.tex', File.fromString('World'))],
new Date(),
[]
),
])
const isValid = redisBackend.checkCacheValidity(chunkA, chunkB)
expect(isValid).to.be.false
})
it('should return false when cached chunk is null', function () {
const snapshotB = new Snapshot()
const historyB = new History(snapshotB, [])
const chunkB = new Chunk(historyB, 10)
const isValid = redisBackend.checkCacheValidity(null, chunkB)
expect(isValid).to.be.false
})
})
describe('compareChunks', function () {
it('should return true when chunks are identical', function () {
// Create two identical chunks
const snapshot = new Snapshot()
const changes = [
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello World'))],
new Date('2025-04-10T12:00:00Z'), // Using fixed date for consistent comparison
[]
),
]
const history1 = new History(snapshot, changes)
const chunk1 = new Chunk(history1, 5)
// Create a separate but identical chunk
const snapshot2 = new Snapshot()
const changes2 = [
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello World'))],
new Date('2025-04-10T12:00:00Z'), // Using same fixed date
[]
),
]
const history2 = new History(snapshot2, changes2)
const chunk2 = new Chunk(history2, 5)
const result = redisBackend.compareChunks(projectId, chunk1, chunk2)
expect(result).to.be.true
})
it('should return false when chunks differ', function () {
// Create first chunk
const snapshot1 = new Snapshot()
const changes1 = [
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello World'))],
new Date('2025-04-10T12:00:00Z'),
[]
),
]
const history1 = new History(snapshot1, changes1)
const chunk1 = new Chunk(history1, 5)
// Create a different chunk (different content)
const snapshot2 = new Snapshot()
const changes2 = [
new Change(
[
new AddFileOperation(
'test.tex',
File.fromString('Different content')
),
],
new Date('2025-04-10T12:00:00Z'),
[]
),
]
const history2 = new History(snapshot2, changes2)
const chunk2 = new Chunk(history2, 5)
const result = redisBackend.compareChunks(projectId, chunk1, chunk2)
expect(result).to.be.false
})
it('should return false when one chunk is null', function () {
// Create a chunk
const snapshot = new Snapshot()
const changes = [
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello World'))],
new Date('2025-04-10T12:00:00Z'),
[]
),
]
const history = new History(snapshot, changes)
const chunk = new Chunk(history, 5)
const resultWithNullCached = redisBackend.compareChunks(
projectId,
null,
chunk
)
expect(resultWithNullCached).to.be.false
const resultWithNullCurrent = redisBackend.compareChunks(
projectId,
chunk,
null
)
expect(resultWithNullCurrent).to.be.false
})
it('should return false when chunks have different start versions', function () {
// Create first chunk with start version 5
const snapshot1 = new Snapshot()
const changes1 = [
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello World'))],
new Date('2025-04-10T12:00:00Z'),
[]
),
]
const history1 = new History(snapshot1, changes1)
const chunk1 = new Chunk(history1, 5)
// Create second chunk with identical content but different start version (10)
const snapshot2 = new Snapshot()
const changes2 = [
new Change(
[new AddFileOperation('test.tex', File.fromString('Hello World'))],
new Date('2025-04-10T12:00:00Z'),
[]
),
]
const history2 = new History(snapshot2, changes2)
const chunk2 = new Chunk(history2, 10)
const result = redisBackend.compareChunks(projectId, chunk1, chunk2)
expect(result).to.be.false
})
})
describe('integration with redis', function () {
it('should store and retrieve complex chunks correctly', async function () {
// Create a more complex chunk
const snapshot = new Snapshot()
const changes = [
new Change(
[new AddFileOperation('file1.tex', File.fromString('Content 1'))],
new Date(),
[1234]
),
new Change(
[new AddFileOperation('file2.tex', File.fromString('Content 2'))],
new Date(),
null,
new Origin('test-origin'),
['5a296963ad5e82432674c839', null],
'123.4',
new V2DocVersions({
'random-doc-id': { pathname: 'file2.tex', v: 123 },
})
),
new Change(
[new AddFileOperation('file3.tex', File.fromString('Content 3'))],
new Date(),
[]
),
]
const history = new History(snapshot, changes)
const chunk = new Chunk(history, 20)
// Cache the chunk
await redisBackend.setCurrentChunk(projectId, chunk)
// Retrieve the cached chunk
const cachedChunk = await redisBackend.getCurrentChunk(projectId)
expect(cachedChunk.getStartVersion()).to.equal(20)
expect(cachedChunk.getEndVersion()).to.equal(23)
expect(cachedChunk).to.deep.equal(chunk)
expect(cachedChunk.history.changes.length).to.equal(3)
// Check that the operations were preserved correctly
const retrievedChanges = cachedChunk.history.changes
expect(retrievedChanges[0].getOperations()[0].getPathname()).to.equal(
'file1.tex'
)
expect(retrievedChanges[1].getOperations()[0].getPathname()).to.equal(
'file2.tex'
)
expect(retrievedChanges[2].getOperations()[0].getPathname()).to.equal(
'file3.tex'
)
// Check that the chunk was stored correctly using the chunk metadata
const chunkMetadata =
await redisBackend.getCurrentChunkMetadata(projectId)
expect(chunkMetadata).to.not.be.null
expect(chunkMetadata.startVersion).to.equal(20)
expect(chunkMetadata.changesCount).to.equal(3)
})
})
})

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

View File

@@ -0,0 +1 @@
Olá mundo

View File

@@ -0,0 +1 @@
􏰁􏰁􏰁􏰁􏰁􏰁􏰁􏰁􏰁􏰁􏰁􏰁􏰁􏰁

View File

@@ -0,0 +1,21 @@
'use strict'
const DocFixtures = require('./docs').docs
exports.chunks = {
chunkOne: {
id: 1000000,
doc_id: DocFixtures.initializedProject.id,
start_version: 0,
end_version: 1,
end_timestamp: new Date('2032-01-01'),
},
}
exports.histories = {
chunkOne: {
projectId: DocFixtures.initializedProject.id,
chunkId: '1000000',
json: { snapshot: { files: {} }, changes: [] },
},
}

View File

@@ -0,0 +1,9 @@
'use strict'
// Test docs are no longer inserted in the database. Only their ids are now
// relevant as they are used in history chunks.
exports.docs = {
uninitializedProject: { id: '1000000' },
initializedProject: { id: '1000001' },
}

View File

@@ -0,0 +1,7 @@
'use strict'
exports.dbSpecs = {
chunks: Object.values(require('./chunks').chunks),
histories: Object.values(require('./chunks').histories),
docs: Object.values(require('./docs').docs),
}

View File

@@ -0,0 +1,249 @@
'use strict'
const { createHash } = require('node:crypto')
const { expect } = require('chai')
const cleanup = require('./support/cleanup')
const fixtures = require('./support/fixtures')
const storage = require('../../../../storage')
const chunkStore = storage.chunkStore
const persistChanges = storage.persistChanges
const core = require('overleaf-editor-core')
const AddFileOperation = core.AddFileOperation
const EditFileOperation = core.EditFileOperation
const TextOperation = core.TextOperation
const Change = core.Change
const Chunk = core.Chunk
const File = core.File
const History = core.History
const Snapshot = core.Snapshot
describe('persistChanges', function () {
beforeEach(cleanup.everything)
beforeEach(fixtures.create)
let farFuture
before(function () {
// used to provide a limit which forces us to persist all of the changes.
farFuture = new Date()
farFuture.setTime(farFuture.getTime() + 7 * 24 * 3600 * 1000)
})
it('persists changes', async function () {
const limitsToPersistImmediately = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
}
const projectId = fixtures.docs.uninitializedProject.id
const change = new Change(
[new AddFileOperation('test.tex', File.fromString(''))],
new Date(),
[]
)
const changes = [change]
await chunkStore.initializeProject(projectId)
const result = await persistChanges(
projectId,
changes,
limitsToPersistImmediately,
0
)
const history = new History(new Snapshot(), changes)
const currentChunk = new Chunk(history, 0)
expect(result).to.deep.equal({
numberOfChangesPersisted: 1,
originalEndVersion: 0,
currentChunk,
})
const chunk = await chunkStore.loadLatest(projectId)
expect(chunk.getStartVersion()).to.equal(0)
expect(chunk.getEndVersion()).to.equal(1)
expect(chunk.getChanges().length).to.equal(1)
})
it('persists changes in two chunks', async function () {
const limitsToPersistImmediately = {
maxChunkChanges: 1,
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
}
const projectId = fixtures.docs.uninitializedProject.id
const firstChange = new Change(
[new AddFileOperation('a.tex', File.fromString(''))],
new Date(),
[]
)
const secondChange = new Change(
[new AddFileOperation('b.tex', File.fromString(''))],
new Date(),
[]
)
const changes = [firstChange, secondChange]
await chunkStore.initializeProject(projectId)
const result = await persistChanges(
projectId,
changes,
limitsToPersistImmediately,
0
)
const snapshot = Snapshot.fromRaw({
files: {
'a.tex': {
content: '',
},
},
})
const history = new History(snapshot, [secondChange])
const currentChunk = new Chunk(history, 1)
expect(result).to.deep.equal({
numberOfChangesPersisted: 2,
originalEndVersion: 0,
currentChunk,
})
const chunk = await chunkStore.loadLatest(projectId)
expect(chunk.getStartVersion()).to.equal(1)
expect(chunk.getEndVersion()).to.equal(2)
expect(chunk.getChanges().length).to.equal(1)
})
it('persists the snapshot at the start of the chunk', async function () {
const limitsToPersistImmediately = {
maxChunkChanges: 2,
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
}
const projectId = fixtures.docs.uninitializedProject.id
const firstChange = new Change(
[new AddFileOperation('a.tex', File.fromString(''))],
new Date(),
[]
)
const secondChange = new Change(
[new AddFileOperation('b.tex', File.fromString(''))],
new Date(),
[]
)
const changes = [firstChange, secondChange]
await chunkStore.initializeProject(projectId)
const result = await persistChanges(
projectId,
changes,
limitsToPersistImmediately,
0
)
const history = new History(new Snapshot(), changes)
const currentChunk = new Chunk(history, 0)
expect(result).to.deep.equal({
numberOfChangesPersisted: 2,
originalEndVersion: 0,
currentChunk,
})
const chunk = await chunkStore.loadLatest(projectId)
expect(chunk.getStartVersion()).to.equal(0)
expect(chunk.getEndVersion()).to.equal(2)
expect(chunk.getChanges().length).to.equal(2)
})
it("errors if the version doesn't match the latest chunk", async function () {
const limitsToPersistImmediately = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
}
const projectId = fixtures.docs.uninitializedProject.id
const firstChange = new Change(
[new AddFileOperation('a.tex', File.fromString(''))],
new Date(),
[]
)
const secondChange = new Change(
[new AddFileOperation('b.tex', File.fromString(''))],
new Date(),
[]
)
const changes = [firstChange, secondChange]
await chunkStore.initializeProject(projectId)
await expect(
persistChanges(projectId, changes, limitsToPersistImmediately, 1)
).to.be.rejectedWith(
'client sent updates with end_version 1 but latest chunk has end_version 0'
)
})
describe('content hash validation', function () {
it('acccepts a change with a valid hash', async function () {
const limitsToPersistImmediately = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
}
const projectId = fixtures.docs.uninitializedProject.id
await chunkStore.initializeProject(projectId)
const textOperation = new TextOperation()
textOperation.insert('hello ')
textOperation.retain(5)
textOperation.contentHash = hashString('hello world')
const change = new Change(
[
new AddFileOperation('a.tex', File.fromString('world')),
new EditFileOperation('a.tex', textOperation),
],
new Date(),
[]
)
const changes = [change]
const result = await persistChanges(
projectId,
changes,
limitsToPersistImmediately,
0
)
expect(result.numberOfChangesPersisted).to.equal(1)
})
it('rejects a change with an invalid hash', async function () {
const limitsToPersistImmediately = {
minChangeTimestamp: farFuture,
maxChangeTimestamp: farFuture,
}
const projectId = fixtures.docs.uninitializedProject.id
await chunkStore.initializeProject(projectId)
const textOperation = new TextOperation()
textOperation.insert('hello ')
textOperation.retain(5)
textOperation.contentHash = hashString('bad hash')
const change = new Change(
[
new AddFileOperation('a.tex', File.fromString('world')),
new EditFileOperation('a.tex', textOperation),
],
new Date(),
[]
)
const changes = [change]
await expect(
persistChanges(projectId, changes, limitsToPersistImmediately, 0)
).to.be.rejectedWith(storage.InvalidChangeError)
})
})
})
function hashString(s) {
const hash = createHash('sha-1')
hash.update(s)
return hash.digest('hex')
}

View File

@@ -0,0 +1,208 @@
'use strict'
const _ = require('lodash')
const BPromise = require('bluebird')
const { expect } = require('chai')
const fs = BPromise.promisifyAll(require('node:fs'))
const sinon = require('sinon')
const stream = require('node:stream')
const temp = require('temp')
const cleanup = require('./support/cleanup')
const fixtures = require('./support/fixtures')
const testFiles = require('./support/test_files')
const unzip = require('./support/unzip')
const core = require('overleaf-editor-core')
const File = core.File
const Snapshot = core.Snapshot
const storage = require('../../../../storage')
const BlobStore = storage.BlobStore
const ProjectArchive = storage.ProjectArchive
describe('ProjectArchive', function () {
beforeEach(cleanup.everything)
beforeEach(fixtures.create)
const projectId = '123'
const blobStore = new BlobStore(projectId)
let zipFilePath
beforeEach(function () {
zipFilePath = temp.path({ suffix: '.zip' })
})
afterEach(function () {
return fs.unlinkAsync(zipFilePath).catch(() => {})
})
function makeMixedTestSnapshot(rounds) {
const snapshot = new Snapshot()
return blobStore.putFile(testFiles.path('graph.png')).then(() => {
_.times(rounds, i => {
snapshot.addFile('test' + i + '.txt', File.fromString('test'))
snapshot.addFile(
'graph' + i + '.png',
File.fromHash(testFiles.GRAPH_PNG_HASH)
)
})
return snapshot
})
}
function makeTextTestSnapshot(rounds) {
const snapshot = new Snapshot()
_.times(rounds, i => {
snapshot.addFile('test' + i + '.txt', File.fromString('test'))
})
return snapshot
}
it('archives a small snapshot with binary and text data', function () {
return makeMixedTestSnapshot(1)
.then(snapshot => {
const projectArchive = new ProjectArchive(snapshot)
return projectArchive.writeZip(blobStore, zipFilePath)
})
.then(() => {
return unzip.getZipEntries(zipFilePath)
})
.then(zipEntries => {
expect(zipEntries).to.have.length(2)
zipEntries = _.sortBy(zipEntries, 'fileName')
expect(zipEntries[0].fileName).to.equal('graph0.png')
expect(zipEntries[0].uncompressedSize).to.equal(
testFiles.GRAPH_PNG_BYTE_LENGTH
)
expect(zipEntries[1].fileName).to.equal('test0.txt')
expect(zipEntries[1].uncompressedSize).to.equal(4)
})
})
it('archives a larger snapshot with binary and text data', function () {
return makeMixedTestSnapshot(10)
.then(snapshot => {
const projectArchive = new ProjectArchive(snapshot)
return projectArchive.writeZip(blobStore, zipFilePath)
})
.then(() => {
return unzip.getZipEntries(zipFilePath)
})
.then(zipEntries => {
expect(zipEntries).to.have.length(20)
})
})
it('archives empty files', function () {
const snapshot = new Snapshot()
snapshot.addFile('test0', File.fromString(''))
snapshot.addFile('test1', File.fromHash(File.EMPTY_FILE_HASH))
return blobStore
.putString('')
.then(() => {
const projectArchive = new ProjectArchive(snapshot)
return projectArchive.writeZip(blobStore, zipFilePath)
})
.then(() => {
return unzip.getZipEntries(zipFilePath)
})
.then(zipEntries => {
zipEntries = _.sortBy(zipEntries, 'fileName')
expect(zipEntries[0].fileName).to.equal('test0')
expect(zipEntries[0].uncompressedSize).to.equal(0)
expect(zipEntries[1].fileName).to.equal('test1')
expect(zipEntries[1].uncompressedSize).to.equal(0)
})
})
describe('with a blob stream download error', function () {
beforeEach(function () {
const testStream = new stream.Readable({
read: function () {
testStream.destroy(new Error('test read error'))
},
})
sinon.stub(blobStore, 'getStream').resolves(testStream)
})
afterEach(function () {
blobStore.getStream.restore()
})
it('rejects with the error', function () {
return makeMixedTestSnapshot(1)
.then(snapshot => {
const projectArchive = new ProjectArchive(snapshot)
return projectArchive.writeZip(blobStore, zipFilePath)
})
.then(() => {
expect.fail()
})
.catch(err => {
let message = err.message
if (err instanceof ProjectArchive.DownloadError) {
message = err.cause.message
}
expect(message).to.match(/test read error/)
})
})
})
describe('with zip write error', function () {
beforeEach(function () {
sinon.stub(fs, 'createWriteStream').callsFake(path => {
const testStream = new stream.Writable({
write: function (chunk, encoding, callback) {
callback(new Error('test write error'))
},
})
return testStream
})
})
afterEach(function () {
fs.createWriteStream.restore()
})
it('rejects with the error', function () {
return makeMixedTestSnapshot(1)
.then(snapshot => {
const projectArchive = new ProjectArchive(snapshot)
return projectArchive.writeZip(blobStore, zipFilePath)
})
.then(() => {
expect.fail()
})
.catch(err => {
expect(err.message).to.equal('test write error')
})
})
})
describe('with a delayed file load', function () {
beforeEach(function () {
sinon.stub(File.prototype, 'load').callsFake(function () {
return BPromise.delay(200).thenReturn(this)
})
})
afterEach(function () {
File.prototype.load.restore()
})
it('times out', function () {
const snapshot = makeTextTestSnapshot(10)
const projectArchive = new ProjectArchive(snapshot, 100)
return projectArchive
.writeZip(blobStore, zipFilePath)
.then(() => {
expect.fail()
})
.catch(err => {
expect(err.name).to.equal('ArchiveTimeout')
})
})
})
})

View File

@@ -0,0 +1,21 @@
'use strict'
const { expect } = require('chai')
const { format, pad } = require('../../../../storage/lib/project_key')
describe('projectKey', function () {
it('reverses padded keys', function () {
expect(format(1)).to.equal('100/000/000')
expect(format(12)).to.equal('210/000/000')
expect(format(123456789)).to.equal('987/654/321')
expect(format(9123456789)).to.equal('987/654/3219')
})
it('pads numbers with zeros to length 9', function () {
expect(pad(1)).to.equal('000000001')
expect(pad(10)).to.equal('000000010')
expect(pad(100000000)).to.equal('100000000')
expect(pad(1000000000)).to.equal('1000000000')
})
})

View File

@@ -0,0 +1,101 @@
const config = require('config')
const { knex, persistor, mongodb, redis } = require('../../../../../storage')
const { S3Persistor } = require('@overleaf/object-persistor/src/S3Persistor')
const POSTGRES_TABLES = [
'chunks',
'project_blobs',
'old_chunks',
'pending_chunks',
]
const MONGO_COLLECTIONS = [
'projectHistoryGlobalBlobs',
'projectHistoryBlobs',
'projectHistoryShardedBlobs',
'projectHistoryChunks',
// back_fill_file_hash.test.mjs
'deletedFiles',
'deletedProjects',
'projects',
'projectHistoryBackedUpBlobs',
]
// make sure we don't delete the wrong data by accident
if (process.env.NODE_ENV !== 'test') {
throw new Error('test cleanup can only be loaded in a test environment')
}
async function cleanupPostgres() {
for (const table of POSTGRES_TABLES) {
await knex(table).del()
}
}
async function cleanupMongo() {
const collections = await mongodb.db.listCollections().map(c => c.name)
for await (const collection of collections) {
if (MONGO_COLLECTIONS.includes(collection)) {
await mongodb.db.collection(collection).deleteMany({})
}
}
}
async function cleanupRedis() {
await redis.rclientHistory.flushdb()
await redis.rclientLock.flushdb()
}
async function cleanupPersistor() {
await Promise.all([
clearBucket(config.get('blobStore.globalBucket')),
clearBucket(config.get('blobStore.projectBucket')),
clearBucket(config.get('chunkStore.bucket')),
clearBucket(config.get('zipStore.bucket')),
])
}
async function clearBucket(name) {
await persistor.deleteDirectory(name, '')
}
let s3PersistorForBackupCleanup
async function cleanupBackup() {
// The backupPersistor refuses to delete short prefixes. Use a low-level S3 persistor.
if (!s3PersistorForBackupCleanup) {
const { backupPersistor } = await import(
'../../../../../storage/lib/backupPersistor.mjs'
)
s3PersistorForBackupCleanup = new S3Persistor(backupPersistor.settings)
}
await Promise.all(
Object.values(config.get('backupStore')).map(name =>
s3PersistorForBackupCleanup.deleteDirectory(name, '')
)
)
}
async function cleanupEverything() {
// Set the timeout when called in a Mocha test. This function is also called
// in benchmarks where it is not passed a Mocha context.
this.timeout?.(5000)
await Promise.all([
cleanupPostgres(),
cleanupMongo(),
cleanupPersistor(),
cleanupBackup(),
cleanupRedis(),
])
}
module.exports = {
postgres: cleanupPostgres,
mongo: cleanupMongo,
persistor: cleanupPersistor,
backup: cleanupBackup,
redis: cleanupRedis,
everything: cleanupEverything,
}

View File

@@ -0,0 +1,6 @@
const BPromise = require('bluebird')
const fetch = require('node-fetch')
fetch.Promise = BPromise
module.exports = fetch

View File

@@ -0,0 +1,20 @@
'use strict'
const BPromise = require('bluebird')
const dbSpecs = require('../fixtures').dbSpecs
const knex = require('../../../../../storage').knex
const historyStore = require('../../../../../storage').historyStore
function createFixtures() {
return knex('chunks')
.insert(dbSpecs.chunks)
.then(() => {
return BPromise.mapSeries(dbSpecs.histories, history =>
historyStore.storeRaw(history.projectId, history.chunkId, history.json)
)
})
}
exports.create = createFixtures
exports.chunks = require('../fixtures/chunks').chunks
exports.docs = require('../fixtures/docs').docs

View File

@@ -0,0 +1,27 @@
const path = require('node:path')
exports.path = function (pathname) {
return path.join(__dirname, '..', 'files', pathname)
}
exports.GRAPH_PNG_HASH = '81dac49dc128aa0a7d0263d24c0d1ce14de554a8'
exports.GRAPH_PNG_BYTE_LENGTH = 13476
exports.HELLO_TXT_HASH = '80dc915a94d134320281f2a139c018facce4b670'
exports.HELLO_TXT_BYTE_LENGTH = 11
exports.HELLO_TXT_UTF8_LENGTH = 10
// file is UTF-8 encoded and contains non BMP characters
exports.NON_BMP_TXT_HASH = '323ec6325a14288a81e15bc0bbee0c0a35f38049'
exports.NON_BMP_TXT_BYTE_LENGTH = 57
// files contains null characters
exports.NULL_CHARACTERS_TXT_HASH = '4227ca4e8736af63036e7457e2db376ddf7e5795'
exports.NULL_CHARACTERS_TXT_BYTE_LENGTH = 3
// git hashes of some short strings for testing
exports.STRING_A_HASH = '2e65efe2a145dda7ee51d1741299f848e5bf752e'
exports.STRING_AB_HASH = '9ae9e86b7bd6cb1472d9373702d8249973da0832'
// From https://en.wikipedia.org/wiki/Portable_Network_Graphics
exports.PNG_MAGIC_NUMBER = '89504e470d0a1a0a'

View File

@@ -0,0 +1,22 @@
'use strict'
const BPromise = require('bluebird')
const yauzl = BPromise.promisifyAll(require('yauzl'))
function getZipEntries(pathname) {
function readEntries(zip) {
return new BPromise((resolve, reject) => {
const entries = []
zip.on('entry', entry => {
entries.push(entry)
})
zip.on('error', reject)
zip.on('end', () => {
resolve(entries)
})
})
}
return yauzl.openAsync(pathname).then(readEntries)
}
exports.getZipEntries = getZipEntries

View File

@@ -0,0 +1,115 @@
'use strict'
const { ObjectId } = require('mongodb')
const { expect } = require('chai')
const config = require('config')
const tasks = require('../../../../storage/tasks')
const {
persistor,
historyStore,
knex,
mongodb,
} = require('../../../../storage')
const cleanup = require('./support/cleanup')
const CHUNK_STORE_BUCKET = config.get('chunkStore.bucket')
const postgresProjectId = 1
const mongoProjectId = new ObjectId('abcdefabcdefabcdefabcdef')
describe('tasks', function () {
beforeEach(cleanup.everything)
const options = {
batchSize: 3,
timeout: 3000,
minAgeSecs: 3600,
maxBatches: 1000,
}
it('deletes old chunks', async function () {
const postgresChunks = []
const mongoChunks = []
for (let i = 1; i <= 25; i++) {
const deletedAt = new Date(Date.now() - 86400000)
const startVersion = (i - 1) * 10
const endVersion = i * 10
postgresChunks.push({
chunk_id: i,
doc_id: postgresProjectId,
start_version: startVersion,
end_version: endVersion,
deleted_at: deletedAt,
})
mongoChunks.push({
_id: new ObjectId(i.toString().padStart(24, '0')),
projectId: mongoProjectId,
startVersion,
endVersion,
state: 'deleted',
updatedAt: deletedAt,
})
}
for (let i = 26; i <= 30; i++) {
const deletedAt = new Date()
const startVersion = (i - 1) * 10
const endVersion = i * 10
postgresChunks.push({
chunk_id: i,
doc_id: postgresProjectId,
start_version: startVersion,
end_version: endVersion,
deleted_at: deletedAt,
})
mongoChunks.push({
_id: new ObjectId(i.toString().padStart(24, '0')),
projectId: mongoProjectId,
startVersion,
endVersion,
state: 'deleted',
updatedAt: deletedAt,
})
}
await knex('old_chunks').insert(postgresChunks)
await mongodb.chunks.insertMany(mongoChunks)
await Promise.all([
...postgresChunks.map(chunk =>
historyStore.storeRaw(
postgresProjectId.toString(),
chunk.chunk_id.toString(),
{
history: 'raw history',
}
)
),
...mongoChunks.map(chunk =>
historyStore.storeRaw(mongoProjectId.toString(), chunk._id.toString(), {
history: 'raw history',
})
),
])
await expectChunksExist(1, 30, true)
await tasks.deleteOldChunks(options)
await expectChunksExist(1, 25, false)
await expectChunksExist(26, 30, true)
})
})
async function expectChunksExist(minChunkId, maxChunkId, expected) {
const keys = []
for (let i = minChunkId; i <= maxChunkId; i++) {
keys.push(`100/000/000/${i.toString().padStart(9, '0')}`)
keys.push(`fed/cba/fedcbafedcbafedcba/${i.toString().padStart(24, '0')}`)
}
return await Promise.all(
keys.map(async key => {
const exists = await persistor.checkIfObjectExists(
CHUNK_STORE_BUCKET,
key
)
expect(exists).to.equal(expected)
})
)
}

View File

@@ -0,0 +1,101 @@
'use strict'
const BPromise = require('bluebird')
const { expect } = require('chai')
const fs = BPromise.promisifyAll(require('node:fs'))
const temp = require('temp')
const cleanup = require('./support/cleanup')
const fetch = require('./support/fetch')
const fixtures = require('./support/fixtures')
const { getZipEntries } = require('./support/unzip')
const { Snapshot, File } = require('overleaf-editor-core')
const { zipStore } = require('../../../../storage')
describe('zipStore', function () {
beforeEach(cleanup.persistor)
let zipFilePath
beforeEach(function () {
zipFilePath = temp.path({ suffix: '.zip' })
})
afterEach(async function () {
try {
await fs.unlinkAsync(zipFilePath)
} catch (_error) {
// Ignore.
}
})
it('stores a snapshot in a zip file', async function () {
const projectId = fixtures.docs.uninitializedProject.id
const version = 1
const testSnapshot = new Snapshot()
testSnapshot.addFile('hello.txt', File.fromString('hello world'))
const zipUrl = await zipStore.getSignedUrl(projectId, version)
// Initially, there is no zip file; we should get a 404.
const preZipResponse = await fetch(zipUrl)
expect(preZipResponse.status).to.equal(404)
// Build the zip file.
await zipStore.storeZip(projectId, version, testSnapshot)
// Now we should be able to fetch it.
const postZipResponse = await fetch(zipUrl)
expect(postZipResponse.status).to.equal(200)
const zipBuffer = await postZipResponse.buffer()
await fs.writeFileAsync(zipFilePath, zipBuffer)
const entries = await getZipEntries(zipFilePath)
expect(entries.length).to.equal(1)
expect(entries[0].fileName).to.equal('hello.txt')
})
it('filters out tracked deletes', async function () {
const projectId = fixtures.docs.uninitializedProject.id
const version = 1
const testSnapshot = new Snapshot()
testSnapshot.addFile(
'test.tex',
File.fromRaw({
content: 'the quick brown fox jumps over the lazy dog',
trackedChanges: [
{
range: { pos: 4, length: 6 },
tracking: {
type: 'delete',
ts: '2024-01-01T00:00:00.000Z',
userId: 'user1',
},
},
{
range: { pos: 35, length: 5 },
tracking: {
type: 'delete',
ts: '2023-01-01T00:00:00.000Z',
userId: 'user2',
},
},
],
})
)
const zipUrl = await zipStore.getSignedUrl(projectId, version)
// Build the zip file.
await zipStore.storeZip(projectId, version, testSnapshot)
// Now we should be able to fetch it.
const postZipResponse = await fetch(zipUrl)
expect(postZipResponse.status).to.equal(200)
const zipBuffer = await postZipResponse.buffer()
await fs.writeFileAsync(zipFilePath, zipBuffer)
const entries = await getZipEntries(zipFilePath)
expect(entries.length).to.equal(1)
expect(entries[0].fileName).to.equal('test.tex')
expect(entries[0].uncompressedSize).to.equal(
'the brown fox jumps over the dog'.length
)
})
})

View File

@@ -0,0 +1,2 @@
CREATE USER read_only PASSWORD 'password';
ALTER DEFAULT PRIVILEGES FOR USER overleaf IN SCHEMA public GRANT SELECT ON TABLES TO read_only;

View File

@@ -0,0 +1,67 @@
const chai = require('chai')
const chaiAsPromised = require('chai-as-promised')
const config = require('config')
const fetch = require('node-fetch')
const { knex, mongodb, redis } = require('../storage')
// ensure every ObjectId has the id string as a property for correct comparisons
require('mongodb').ObjectId.cacheHexString = true
chai.use(chaiAsPromised)
chai.config.truncateThreshold = 0
async function setupPostgresDatabase() {
this.timeout(60_000)
await knex.migrate.latest()
}
async function setupMongoDatabase() {
this.timeout(60_000)
await mongodb.db.collection('projectHistoryChunks').createIndexes([
{
key: { projectId: 1, startVersion: 1 },
name: 'projectId_1_startVersion_1',
partialFilterExpression: { state: { $in: ['active', 'closed'] } },
unique: true,
},
{
key: { state: 1 },
name: 'state_1',
partialFilterExpression: { state: 'deleted' },
},
])
}
async function createGcsBuckets() {
this.timeout(60_000)
for (const bucket of [
config.get('blobStore.globalBucket'),
config.get('blobStore.projectBucket'),
config.get('chunkStore.bucket'),
config.get('zipStore.bucket'),
'fake-user-files-gcs',
]) {
await fetch('http://gcs:9090/storage/v1/b', {
method: 'POST',
body: JSON.stringify({ name: bucket }),
headers: { 'Content-Type': 'application/json' },
})
}
}
// Tear down the connection pool after all the tests have run, so the process
// can exit.
async function tearDownConnectionPool() {
await knex.destroy()
await redis.disconnect()
}
module.exports = {
setupPostgresDatabase,
createGcsBuckets,
tearDownConnectionPool,
mochaHooks: {
beforeAll: [setupPostgresDatabase, setupMongoDatabase, createGcsBuckets],
afterAll: [tearDownConnectionPool],
},
}