// @ts-check import logger from '@overleaf/logger' import commandLineArgs from 'command-line-args' import { Chunk, History, Snapshot } from 'overleaf-editor-core' import { getProjectChunks, loadLatestRaw, create, } from '../lib/chunk_store/index.js' import { client } from '../lib/mongodb.js' import redis from '../lib/redis.js' import knex from '../lib/knex.js' import { historyStore } from '../lib/history_store.js' import pLimit from 'p-limit' import { GLOBAL_BLOBS, loadGlobalBlobs, makeProjectKey, BlobStore, } from '../lib/blob_store/index.js' import { listPendingBackups, getBackupStatus, setBackupVersion, updateCurrentMetadataIfNotSet, updatePendingChangeTimestamp, getBackedUpBlobHashes, unsetBackedUpBlobHashes, } from '../lib/backup_store/index.js' import { backupBlob, downloadBlobToDir } from '../lib/backupBlob.mjs' import { backupPersistor, chunksBucket, projectBlobsBucket, } from '../lib/backupPersistor.mjs' import { backupGenerator } from '../lib/backupGenerator.mjs' import { promises as fs, createWriteStream } from 'node:fs' import os from 'node:os' import path from 'node:path' import projectKey from '../lib/project_key.js' import Crypto from 'node:crypto' import Stream from 'node:stream' import { EventEmitter } from 'node:events' import { objectIdFromInput, batchedUpdate, READ_PREFERENCE_SECONDARY, } from '@overleaf/mongo-utils/batchedUpdate.js' import { createGunzip } from 'node:zlib' import { text } from 'node:stream/consumers' import { fromStream as blobHashFromStream } from '../lib/blob_hash.js' import { NotFoundError } from '@overleaf/object-persistor/src/Errors.js' // Create a singleton promise that loads global blobs once let globalBlobsPromise = null function ensureGlobalBlobsLoaded() { if (!globalBlobsPromise) { globalBlobsPromise = loadGlobalBlobs() } return globalBlobsPromise } EventEmitter.defaultMaxListeners = 20 logger.initialize('history-v1-backup') // Settings shared between command-line and module usage let DRY_RUN = false let RETRY_LIMIT = 3 const RETRY_DELAY = 1000 let CONCURRENCY = 4 let BATCH_CONCURRENCY = 1 let BLOB_LIMITER = pLimit(CONCURRENCY) let USE_SECONDARY = false /** * Configure backup settings * @param {Object} options Backup configuration options */ export function configureBackup(options = {}) { DRY_RUN = options.dryRun || false RETRY_LIMIT = options.retries || 3 CONCURRENCY = options.concurrency || 1 BATCH_CONCURRENCY = options.batchConcurrency || 1 BLOB_LIMITER = pLimit(CONCURRENCY) USE_SECONDARY = options.useSecondary || false } let gracefulShutdownInitiated = false process.on('SIGINT', handleSignal) process.on('SIGTERM', handleSignal) function handleSignal() { gracefulShutdownInitiated = true logger.info({}, 'graceful shutdown initiated, draining queue') } async function retry(fn, times, delayMs) { let attempts = times while (attempts > 0) { try { const result = await fn() return result } catch (err) { attempts-- if (attempts === 0) throw err await new Promise(resolve => setTimeout(resolve, delayMs)) } } } function wrapWithRetry(fn, retries, delayMs) { return async (...args) => { const result = await retry(() => fn(...args), retries, delayMs) return result } } const downloadWithRetry = wrapWithRetry( downloadBlobToDir, RETRY_LIMIT, RETRY_DELAY ) // FIXME: this creates a new backupPersistor for each blob // so there is no caching of the DEK const backupWithRetry = wrapWithRetry(backupBlob, RETRY_LIMIT, RETRY_DELAY) async function findNewBlobs(projectId, blobs) { const newBlobs = [] const existingBackedUpBlobHashes = await getBackedUpBlobHashes(projectId) for (const blob of blobs) { const hash = blob.getHash() if (existingBackedUpBlobHashes.has(blob.getHash())) { logger.debug({ projectId, hash }, 'Blob is already backed up, skipping') continue } const globalBlob = GLOBAL_BLOBS.get(hash) if (globalBlob && !globalBlob.demoted) { logger.debug( { projectId, hash }, 'Blob is a global blob and not demoted, skipping' ) continue } newBlobs.push(blob) } return newBlobs } async function cleanBackedUpBlobs(projectId, blobs) { const hashes = blobs.map(blob => blob.getHash()) if (DRY_RUN) { console.log( 'Would remove blobs', hashes.join(' '), 'from project', projectId ) return } await unsetBackedUpBlobHashes(projectId, hashes) } async function backupSingleBlob(projectId, historyId, blob, tmpDir, persistor) { if (DRY_RUN) { console.log( 'Would back up blob', JSON.stringify(blob), 'in history', historyId, 'for project', projectId ) return } logger.debug({ blob, historyId }, 'backing up blob') const blobPath = await downloadWithRetry(historyId, blob, tmpDir) await backupWithRetry(historyId, blob, blobPath, persistor) } async function backupBlobs(projectId, historyId, blobs, limiter, persistor) { let tmpDir try { tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), 'blob-backup-')) const blobBackupOperations = blobs.map(blob => limiter(backupSingleBlob, projectId, historyId, blob, tmpDir, persistor) ) // Reject if any blob backup fails await Promise.all(blobBackupOperations) } finally { if (tmpDir) { await fs.rm(tmpDir, { recursive: true, force: true }) } } } async function backupChunk( projectId, historyId, chunkBackupPersistorForProject, chunkToBackup, chunkRecord, chunkBuffer ) { if (DRY_RUN) { console.log( 'Would back up chunk', JSON.stringify(chunkRecord), 'in history', historyId, 'for project', projectId, 'key', makeChunkKey(historyId, chunkToBackup.startVersion) ) return } const key = makeChunkKey(historyId, chunkToBackup.startVersion) logger.debug({ chunkRecord, historyId, projectId, key }, 'backing up chunk') const md5 = Crypto.createHash('md5').update(chunkBuffer) await chunkBackupPersistorForProject.sendStream( chunksBucket, makeChunkKey(historyId, chunkToBackup.startVersion), Stream.Readable.from([chunkBuffer]), { contentType: 'application/json', contentEncoding: 'gzip', contentLength: chunkBuffer.byteLength, sourceMd5: md5.digest('hex'), } ) } async function updateBackupStatus( projectId, lastBackedUpVersion, chunkRecord, startOfBackupTime ) { if (DRY_RUN) { console.log( 'Would set backup version to', chunkRecord.endVersion, 'with lastBackedUpTimestamp', startOfBackupTime ) return } logger.debug( { projectId, chunkRecord, startOfBackupTime }, 'setting backupVersion and lastBackedUpTimestamp' ) await setBackupVersion( projectId, lastBackedUpVersion, chunkRecord.endVersion, startOfBackupTime ) } // Define command-line options const optionDefinitions = [ { name: 'projectId', alias: 'p', type: String, description: 'The ID of the project to backup', defaultOption: true, }, { name: 'help', alias: 'h', type: Boolean, description: 'Display this usage guide.', }, { name: 'status', alias: 's', type: Boolean, description: 'Display project status.', }, { name: 'list', alias: 'l', type: Boolean, description: 'List projects that need to be backed up', }, { name: 'dry-run', alias: 'n', type: Boolean, description: 'Perform a dry run without making any changes.', }, { name: 'retries', alias: 'r', type: Number, description: 'Number of retries, default is 3.', }, { name: 'concurrency', alias: 'c', type: Number, description: 'Number of concurrent blob downloads (default: 1)', }, { name: 'batch-concurrency', alias: 'b', type: Number, description: 'Number of concurrent project operations (default: 1)', }, { name: 'pending', alias: 'P', type: Boolean, description: 'Backup all pending projects.', }, { name: 'interval', alias: 'i', type: Number, description: 'Time interval in seconds for pending backups (default: 3600)', defaultValue: 3600, }, { name: 'fix', type: Number, description: 'Fix projects without chunks', }, { name: 'init', alias: 'I', type: Boolean, description: 'Initialize backups for all projects.', }, { name: 'output', alias: 'o', type: String, description: 'Output file' }, { name: 'start-date', type: String, description: 'Start date for initialization (ISO format)', }, { name: 'end-date', type: String, description: 'End date for initialization (ISO format)', }, { name: 'use-secondary', type: Boolean, description: 'Use secondary read preference for backup status', }, { name: 'compare', alias: 'C', type: Boolean, description: 'Compare backup with original chunks. With --start-date and --end-date compares all projects in range.', }, ] function handleOptions() { const options = commandLineArgs(optionDefinitions) if (options.help) { console.log('Usage:') optionDefinitions.forEach(option => { console.log(` --${option.name}, -${option.alias}: ${option.description}`) }) process.exit(0) } const projectIdRequired = !options.list && !options.pending && !options.init && !(options.fix >= 0) && !(options.compare && options['start-date'] && options['end-date']) if (projectIdRequired && !options.projectId) { console.error('Error: projectId is required') process.exit(1) } if (options.pending && options.projectId) { console.error('Error: --pending cannot be specified with projectId') process.exit(1) } if (options.pending && (options.list || options.status)) { console.error('Error: --pending is exclusive with --list and --status') process.exit(1) } if (options.init && options.pending) { console.error('Error: --init cannot be specified with --pending') process.exit(1) } if ( (options['start-date'] || options['end-date']) && !options.init && !options.compare ) { console.error( 'Error: date options can only be used with --init or --compare' ) process.exit(1) } if (options['use-secondary']) { USE_SECONDARY = true } if ( options.compare && !options.projectId && !(options['start-date'] && options['end-date']) ) { console.error( 'Error: --compare requires either projectId or both --start-date and --end-date' ) process.exit(1) } DRY_RUN = options['dry-run'] || false RETRY_LIMIT = options.retries || 3 CONCURRENCY = options.concurrency || 1 BATCH_CONCURRENCY = options['batch-concurrency'] || 1 BLOB_LIMITER = pLimit(CONCURRENCY) return options } async function displayBackupStatus(projectId) { const result = await analyseBackupStatus(projectId) console.log('Backup status:', JSON.stringify(result)) } async function analyseBackupStatus(projectId) { const { backupStatus, historyId, currentEndVersion, currentEndTimestamp } = await getBackupStatus(projectId) // TODO: when we have confidence that the latestChunkMetadata always matches // the values from the backupStatus we can skip loading it here const latestChunkMetadata = await loadLatestRaw(historyId, { readOnly: Boolean(USE_SECONDARY), }) if ( currentEndVersion && currentEndVersion !== latestChunkMetadata.endVersion ) { // compare the current end version with the latest chunk metadata to check that // the updates to the project collection are reliable // expect some failures due to the time window between getBackupStatus and // loadLatestRaw where the project is being actively edited. logger.warn( { projectId, historyId, currentEndVersion, currentEndTimestamp, latestChunkMetadata, }, 'currentEndVersion does not match latest chunk metadata' ) } if (DRY_RUN) { console.log('Project:', projectId) console.log('History ID:', historyId) console.log('Latest Chunk Metadata:', JSON.stringify(latestChunkMetadata)) console.log('Current end version:', currentEndVersion) console.log('Current end timestamp:', currentEndTimestamp) console.log('Backup status:', backupStatus ?? 'none') } if (!backupStatus) { if (DRY_RUN) { console.log('No backup status found - doing full backup') } } const lastBackedUpVersion = backupStatus?.lastBackedUpVersion const endVersion = latestChunkMetadata.endVersion if (endVersion >= 0 && endVersion === lastBackedUpVersion) { if (DRY_RUN) { console.log( 'Project is up to date, last backed up at version', lastBackedUpVersion ) } } else if (endVersion < lastBackedUpVersion) { throw new Error('backup is ahead of project') } else { if (DRY_RUN) { console.log( 'Project needs to be backed up from', lastBackedUpVersion, 'to', endVersion ) } } return { historyId, lastBackedUpVersion, currentVersion: latestChunkMetadata.endVersion || 0, upToDate: endVersion >= 0 && lastBackedUpVersion === endVersion, pendingChangeAt: backupStatus?.pendingChangeAt, currentEndVersion, currentEndTimestamp, latestChunkMetadata, } } async function displayPendingBackups(options) { const intervalMs = options.interval * 1000 for await (const project of listPendingBackups(intervalMs)) { console.log( 'Project:', project._id.toHexString(), 'backup status:', JSON.stringify(project.overleaf.backup), 'history status:', JSON.stringify(project.overleaf.history, [ 'currentEndVersion', 'currentEndTimestamp', ]) ) } } function makeChunkKey(projectId, startVersion) { return path.join(projectKey.format(projectId), projectKey.pad(startVersion)) } export async function backupProject(projectId, options) { if (gracefulShutdownInitiated) { return } await ensureGlobalBlobsLoaded() // FIXME: flush the project first! // Let's assume the the flush happens externally and triggers this backup const backupStartTime = new Date() // find the last backed up version const { historyId, lastBackedUpVersion, currentVersion, upToDate, pendingChangeAt, currentEndVersion, latestChunkMetadata, } = await analyseBackupStatus(projectId) if (upToDate) { logger.debug( { projectId, historyId, lastBackedUpVersion, currentVersion, pendingChangeAt, }, 'backup is up to date' ) if ( currentEndVersion === undefined && latestChunkMetadata.endVersion >= 0 ) { if (DRY_RUN) { console.log('Would update current metadata to', latestChunkMetadata) } else { await updateCurrentMetadataIfNotSet(projectId, latestChunkMetadata) } } // clear the pending changes timestamp if the backup is complete if (pendingChangeAt) { if (DRY_RUN) { console.log( 'Would update or clear pending changes timestamp', backupStartTime ) } else { await updatePendingChangeTimestamp(projectId, backupStartTime) } } return } logger.debug( { projectId, historyId, lastBackedUpVersion, currentVersion, pendingChangeAt, }, 'backing up project' ) // this persistor works for both the chunks and blobs buckets, // because they use the same DEK const backupPersistorForProject = await backupPersistor.forProject( chunksBucket, makeProjectKey(historyId, '') ) let previousBackedUpVersion = lastBackedUpVersion const backupVersions = [previousBackedUpVersion] for await (const { blobsToBackup, chunkToBackup, chunkRecord, chunkBuffer, } of backupGenerator(historyId, lastBackedUpVersion)) { // backup the blobs first // this can be done in parallel but must fail if any blob cannot be backed up // if the blob already exists in the backup then that is allowed const newBlobs = await findNewBlobs(projectId, blobsToBackup) await backupBlobs( projectId, historyId, newBlobs, BLOB_LIMITER, backupPersistorForProject ) // then backup the original compressed chunk using the startVersion as the key await backupChunk( projectId, historyId, backupPersistorForProject, chunkToBackup, chunkRecord, chunkBuffer ) // persist the backup status in mongo for the current chunk try { await updateBackupStatus( projectId, previousBackedUpVersion, chunkRecord, backupStartTime ) } catch (err) { logger.error( { projectId, chunkRecord, err, backupVersions }, 'error updating backup status' ) throw err } previousBackedUpVersion = chunkRecord.endVersion backupVersions.push(previousBackedUpVersion) await cleanBackedUpBlobs(projectId, blobsToBackup) } // update the current end version and timestamp if they are not set if (currentEndVersion === undefined && latestChunkMetadata.endVersion >= 0) { if (DRY_RUN) { console.log('Would update current metadata to', latestChunkMetadata) } else { await updateCurrentMetadataIfNotSet(projectId, latestChunkMetadata) } } // clear the pending changes timestamp if the backup is complete, otherwise set it to the time // when the backup started (to pick up the new changes on the next backup) if (DRY_RUN) { console.log( 'Would update or clear pending changes timestamp', backupStartTime ) } else { await updatePendingChangeTimestamp(projectId, backupStartTime) } } function convertToISODate(dateStr) { // Expecting YYYY-MM-DD format if (!/^\d{4}-\d{2}-\d{2}$/.test(dateStr)) { throw new Error('Date must be in YYYY-MM-DD format') } return new Date(dateStr + 'T00:00:00.000Z').toISOString() } export async function fixProjectsWithoutChunks(options) { const limit = options.fix || 1 const query = { 'overleaf.history.id': { $exists: true }, 'overleaf.backup.lastBackedUpVersion': { $in: [null] }, } const cursor = client .db() .collection('projects') .find(query, { projection: { _id: 1, 'overleaf.history.id': 1 }, readPreference: READ_PREFERENCE_SECONDARY, }) .limit(limit) for await (const project of cursor) { const historyId = project.overleaf.history.id.toString() const chunks = await getProjectChunks(historyId) if (chunks.length > 0) { continue } if (DRY_RUN) { console.log( 'Would create new chunk for Project ID:', project._id.toHexString(), 'History ID:', historyId, 'Chunks:', chunks ) } else { console.log( 'Creating new chunk for Project ID:', project._id.toHexString(), 'History ID:', historyId, 'Chunks:', chunks ) const snapshot = new Snapshot() const history = new History(snapshot, []) const chunk = new Chunk(history, 0) await create(historyId, chunk) const newChunks = await getProjectChunks(historyId) console.log('New chunk:', newChunks) } } } export async function initializeProjects(options) { await ensureGlobalBlobsLoaded() let totalErrors = 0 let totalProjects = 0 const query = { 'overleaf.backup.lastBackedUpVersion': { $in: [null] }, } if (options['start-date'] && options['end-date']) { query._id = { $gte: objectIdFromInput(convertToISODate(options['start-date'])), $lt: objectIdFromInput(convertToISODate(options['end-date'])), } } const cursor = client .db() .collection('projects') .find(query, { projection: { _id: 1 }, readPreference: READ_PREFERENCE_SECONDARY, }) if (options.output) { console.log("Writing project IDs to file: '" + options.output + "'") const output = createWriteStream(options.output) for await (const project of cursor) { output.write(project._id.toHexString() + '\n') totalProjects++ } output.end() console.log('Wrote ' + totalProjects + ' project IDs to file') return } for await (const project of cursor) { if (gracefulShutdownInitiated) { console.warn('graceful shutdown: stopping project initialization') break } totalProjects++ const projectId = project._id.toHexString() try { await backupProject(projectId, options) } catch (err) { totalErrors++ logger.error({ projectId, err }, 'error backing up project') } } return { errors: totalErrors, projects: totalProjects } } async function backupPendingProjects(options) { const intervalMs = options.interval * 1000 for await (const project of listPendingBackups(intervalMs)) { if (gracefulShutdownInitiated) { console.warn('graceful shutdown: stopping pending project backups') break } const projectId = project._id.toHexString() console.log(`Backing up pending project with ID: ${projectId}`) await backupProject(projectId, options) } } class BlobComparator { constructor(backupPersistorForProject) { this.cache = new Map() this.backupPersistorForProject = backupPersistorForProject } async compareBlob(historyId, blob) { let computedHash = this.cache.get(blob.hash) const fromCache = !!computedHash if (!computedHash) { const blobKey = makeProjectKey(historyId, blob.hash) const backupBlobStream = await this.backupPersistorForProject.getObjectStream( projectBlobsBucket, blobKey, { autoGunzip: true } ) computedHash = await blobHashFromStream(blob.byteLength, backupBlobStream) this.cache.set(blob.hash, computedHash) } const matches = computedHash === blob.hash return { matches, computedHash, fromCache, } } } async function compareBackups(projectId, options) { console.log(`Comparing backups for project ${projectId}`) const { historyId } = await getBackupStatus(projectId) const chunks = await getProjectChunks(historyId) const blobStore = new BlobStore(historyId) const backupPersistorForProject = await backupPersistor.forProject( chunksBucket, makeProjectKey(historyId, '') ) let totalChunkMatches = 0 let totalChunkMismatches = 0 let totalChunksNotFound = 0 let totalBlobMatches = 0 let totalBlobMismatches = 0 let totalBlobsNotFound = 0 const errors = [] const blobComparator = new BlobComparator(backupPersistorForProject) for (const chunk of chunks) { try { // Compare chunk content const originalChunk = await historyStore.loadRaw(historyId, chunk.id) const key = makeChunkKey(historyId, chunk.startVersion) try { const backupChunkStream = await backupPersistorForProject.getObjectStream(chunksBucket, key) const backupStr = await text(backupChunkStream.pipe(createGunzip())) const originalStr = JSON.stringify(originalChunk) const backupChunk = JSON.parse(backupStr) const backupStartVersion = chunk.startVersion const backupEndVersion = chunk.startVersion + backupChunk.changes.length if (originalStr === backupStr) { console.log( `✓ Chunk ${chunk.id} (v${chunk.startVersion}-v${chunk.endVersion}) matches` ) totalChunkMatches++ } else if (originalStr === JSON.stringify(JSON.parse(backupStr))) { console.log( `✓ Chunk ${chunk.id} (v${chunk.startVersion}-v${chunk.endVersion}) matches (after normalisation)` ) totalChunkMatches++ } else if (backupEndVersion < chunk.endVersion) { console.log( `✗ Chunk ${chunk.id} is ahead of backup (v${chunk.startVersion}-v${chunk.endVersion} vs v${backupStartVersion}-v${backupEndVersion})` ) totalChunkMismatches++ errors.push({ chunkId: chunk.id, error: 'Chunk ahead of backup' }) } else { console.log( `✗ Chunk ${chunk.id} (v${chunk.startVersion}-v${chunk.endVersion}) MISMATCH` ) totalChunkMismatches++ errors.push({ chunkId: chunk.id, error: 'Chunk mismatch' }) } } catch (err) { if (err instanceof NotFoundError) { console.log(`✗ Chunk ${chunk.id} not found in backup`, err.cause) totalChunksNotFound++ errors.push({ chunkId: chunk.id, error: `Chunk not found` }) } else { throw err } } const history = History.fromRaw(originalChunk) // Compare blobs in chunk const blobHashes = new Set() history.findBlobHashes(blobHashes) const blobs = await blobStore.getBlobs(Array.from(blobHashes)) for (const blob of blobs) { if (GLOBAL_BLOBS.has(blob.hash)) { const globalBlob = GLOBAL_BLOBS.get(blob.hash) console.log( ` ✓ Blob ${blob.hash} is a global blob`, globalBlob.demoted ? '(demoted)' : '' ) continue } try { const { matches, computedHash, fromCache } = await blobComparator.compareBlob(historyId, blob) if (matches) { console.log( ` ✓ Blob ${blob.hash} hash matches (${blob.byteLength} bytes)` + (fromCache ? ' (from cache)' : '') ) totalBlobMatches++ } else { console.log( ` ✗ Blob ${blob.hash} hash mismatch (original: ${blob.hash}, backup: ${computedHash}) (${blob.byteLength} bytes, ${blob.stringLength} string length)` + (fromCache ? ' (from cache)' : '') ) totalBlobMismatches++ errors.push({ chunkId: chunk.id, error: `Blob ${blob.hash} hash mismatch`, }) } } catch (err) { if (err instanceof NotFoundError) { console.log(` ✗ Blob ${blob.hash} not found in backup`, err.cause) totalBlobsNotFound++ errors.push({ chunkId: chunk.id, error: `Blob ${blob.hash} not found`, }) } else { throw err } } } } catch (err) { console.error(`Error comparing chunk ${chunk.id}:`, err) errors.push({ chunkId: chunk.id, error: err }) } } // Print summary console.log('\nComparison Summary:') console.log('==================') console.log(`Total chunks: ${chunks.length}`) console.log(`Chunk matches: ${totalChunkMatches}`) console.log(`Chunk mismatches: ${totalChunkMismatches}`) console.log(`Chunk not found: ${totalChunksNotFound}`) console.log(`Blob matches: ${totalBlobMatches}`) console.log(`Blob mismatches: ${totalBlobMismatches}`) console.log(`Blob not found: ${totalBlobsNotFound}`) console.log(`Errors: ${errors.length}`) if (errors.length > 0) { console.log('\nErrors:') errors.forEach(({ chunkId, error }) => { console.log(` Chunk ${chunkId}: ${error}`) }) throw new Error('Backup comparison FAILED') } else { console.log('Backup comparison successful') } } async function compareAllProjects(options) { const limiter = pLimit(BATCH_CONCURRENCY) let totalErrors = 0 let totalProjects = 0 async function processBatch(batch) { if (gracefulShutdownInitiated) { throw new Error('graceful shutdown') } const batchOperations = batch.map(project => limiter(async () => { const projectId = project._id.toHexString() totalProjects++ try { console.log(`\nComparing project ${projectId} (${totalProjects})`) await compareBackups(projectId, options) } catch (err) { totalErrors++ console.error(`Failed to compare project ${projectId}:`, err) } }) ) await Promise.allSettled(batchOperations) } const query = { 'overleaf.history.id': { $exists: true }, 'overleaf.backup.lastBackedUpVersion': { $exists: true }, } await batchedUpdate( client.db().collection('projects'), query, processBatch, { _id: 1, 'overleaf.history': 1, 'overleaf.backup': 1, }, { readPreference: 'secondary' }, { BATCH_RANGE_START: convertToISODate(options['start-date']), BATCH_RANGE_END: convertToISODate(options['end-date']), } ) console.log('\nComparison Summary:') console.log('==================') console.log(`Total projects processed: ${totalProjects}`) console.log(`Projects with errors: ${totalErrors}`) if (totalErrors > 0) { throw new Error('Some project comparisons failed') } } async function main() { const options = handleOptions() await ensureGlobalBlobsLoaded() const projectId = options.projectId if (options.status) { await displayBackupStatus(projectId) } else if (options.list) { await displayPendingBackups(options) } else if (options.fix !== undefined) { await fixProjectsWithoutChunks(options) } else if (options.pending) { await backupPendingProjects(options) } else if (options.init) { await initializeProjects(options) } else if (options.compare) { if (options['start-date'] && options['end-date']) { await compareAllProjects(options) } else { await compareBackups(projectId, options) } } else { await backupProject(projectId, options) } } // Only run command-line interface when script is run directly if (import.meta.url === `file://${process.argv[1]}`) { main() .then(() => { console.log( gracefulShutdownInitiated ? 'Exited - graceful shutdown' : 'Completed' ) }) .catch(err => { console.error('Error backing up project:', err) process.exit(1) }) .finally(() => { knex .destroy() .then(() => { console.log('Postgres connection closed') }) .catch(err => { console.error('Error closing Postgres connection:', err) }) client .close() .then(() => { console.log('MongoDB connection closed') }) .catch(err => { console.error('Error closing MongoDB connection:', err) }) redis .disconnect() .then(() => { console.log('Redis connection closed') }) .catch(err => { console.error('Error closing Redis connection:', err) }) }) }