// @ts-check import OError from '@overleaf/o-error' import chunkStore from '../lib/chunk_store/index.js' import { backupPersistor, chunksBucket, projectBlobsBucket, } from './backupPersistor.mjs' import { Blob, Chunk, History } from 'overleaf-editor-core' import { BlobStore, GLOBAL_BLOBS, makeProjectKey } from './blob_store/index.js' import blobHash from './blob_hash.js' import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js' import logger from '@overleaf/logger' import path from 'node:path' import projectKey from './project_key.js' import streams from './streams.js' import objectPersistor from '@overleaf/object-persistor' import { getEndDateForRPO } from '../../backupVerifier/utils.mjs' /** * @typedef {import("@overleaf/object-persistor/src/PerProjectEncryptedS3Persistor.js").CachedPerProjectEncryptedS3Persistor} CachedPerProjectEncryptedS3Persistor */ /** * @param {string} historyId * @param {string} hash */ export async function verifyBlob(historyId, hash) { return await verifyBlobs(historyId, [hash]) } /** * * @param {string} historyId * @return {Promise} */ async function getProjectPersistor(historyId) { try { return await backupPersistor.forProjectRO( projectBlobsBucket, makeProjectKey(historyId, '') ) } catch (err) { if (err instanceof NotFoundError) { throw new BackupCorruptedError('dek does not exist', {}, err) } throw err } } /** * @param {string} historyId * @param {Array} hashes * @param {CachedPerProjectEncryptedS3Persistor} [projectCache] */ export async function verifyBlobs(historyId, hashes, projectCache) { if (hashes.length === 0) throw new Error('bug: empty hashes') if (!projectCache) { projectCache = await getProjectPersistor(historyId) } const blobStore = new BlobStore(historyId) for (const hash of hashes) { const path = makeProjectKey(historyId, hash) const blob = await blobStore.getBlob(hash) if (!blob) throw new Blob.NotFoundError(hash) let stream try { stream = await projectCache.getObjectStream(projectBlobsBucket, path, { autoGunzip: true, }) } catch (err) { if (err instanceof NotFoundError) { throw new BackupCorruptedMissingBlobError('missing blob', { path, hash, }) } throw err } const backupHash = await blobHash.fromStream(blob.getByteLength(), stream) if (backupHash !== hash) { throw new BackupCorruptedInvalidBlobError( 'hash mismatch for backed up blob', { path, hash, backupHash, } ) } } } /** * @param {string} historyId * @param {Date} [endTimestamp] */ export async function verifyProjectWithErrorContext( historyId, endTimestamp = getEndDateForRPO() ) { try { await verifyProject(historyId, endTimestamp) } catch (err) { // @ts-ignore err is Error instance throw OError.tag(err, 'verifyProject', { historyId, endTimestamp }) } } /** * * @param {string} historyId * @param {number} startVersion * @param {CachedPerProjectEncryptedS3Persistor} backupPersistorForProject * @return {Promise} */ async function loadChunk(historyId, startVersion, backupPersistorForProject) { const key = path.join( projectKey.format(historyId), projectKey.pad(startVersion) ) try { const buf = await streams.gunzipStreamToBuffer( await backupPersistorForProject.getObjectStream(chunksBucket, key) ) return JSON.parse(buf.toString('utf-8')) } catch (err) { if (err instanceof objectPersistor.Errors.NotFoundError) { throw new Chunk.NotPersistedError(historyId) } if (err instanceof Error) { throw OError.tag(err, 'Failed to load chunk', { historyId, startVersion }) } throw err } } /** * @param {string} historyId * @param {Date} endTimestamp */ export async function verifyProject(historyId, endTimestamp) { const backend = chunkStore.getBackend(historyId) const [first, last] = await Promise.all([ backend.getFirstChunkBeforeTimestamp(historyId, endTimestamp), backend.getLastActiveChunkBeforeTimestamp(historyId, endTimestamp), ]) const chunksRecordsToVerify = [ { chunkId: first.id, chunkLabel: 'first', }, ] if (first.startVersion !== last.startVersion) { chunksRecordsToVerify.push({ chunkId: last.id, chunkLabel: 'last before RPO', }) } const projectCache = await getProjectPersistor(historyId) const chunks = await Promise.all( chunksRecordsToVerify.map(async chunk => { try { return History.fromRaw( await loadChunk(historyId, chunk.startVersion, projectCache) ) } catch (err) { if (err instanceof Chunk.NotPersistedError) { throw new BackupRPOViolationChunkNotBackedUpError( 'BackupRPOviolation: chunk not backed up', chunk ) } throw err } }) ) const seenBlobs = new Set() const blobsToVerify = [] for (const chunk of chunks) { /** @type {Set} */ const chunkBlobs = new Set() chunk.findBlobHashes(chunkBlobs) let hasAddedBlobFromThisChunk = false for (const blobHash of chunkBlobs) { if (seenBlobs.has(blobHash)) continue // old blob if (GLOBAL_BLOBS.has(blobHash)) continue // global blob seenBlobs.add(blobHash) if (!hasAddedBlobFromThisChunk) { blobsToVerify.push(blobHash) hasAddedBlobFromThisChunk = true } } } if (blobsToVerify.length === 0) { logger.debug( { historyId, chunksRecordsToVerify: chunksRecordsToVerify.map(c => c.chunkId), }, 'chunks contain no blobs to verify' ) return } await verifyBlobs(historyId, blobsToVerify, projectCache) } export class BackupCorruptedError extends OError {} export class BackupRPOViolationError extends OError {} export class BackupCorruptedMissingBlobError extends BackupCorruptedError {} export class BackupCorruptedInvalidBlobError extends BackupCorruptedError {} export class BackupRPOViolationChunkNotBackedUpError extends OError {}