first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,33 @@
import Metrics from '@overleaf/metrics'
import { objectIdFromDate } from './utils.mjs'
import { db } from '../storage/lib/mongodb.js'
const projectsCollection = db.collection('projects')
/**
*
* @param {Date} beforeTime
* @return {Promise<void>}
*/
export async function measurePendingChangesBeforeTime(beforeTime) {
const pendingChangeCount = await projectsCollection.countDocuments({
'overleaf.backup.pendingChangeAt': {
$lt: beforeTime,
},
})
Metrics.gauge('backup_verification_pending_changes', pendingChangeCount)
}
/**
*
* @param {Date} graceTime
* @return {Promise<void>}
*/
export async function measureNeverBackedUpProjects(graceTime) {
const neverBackedUpCount = await projectsCollection.countDocuments({
'overleaf.backup.lastBackedUpVersion': null,
_id: { $lt: objectIdFromDate(graceTime) },
})
Metrics.gauge('backup_verification_never_backed_up', neverBackedUpCount)
}

View File

@@ -0,0 +1,79 @@
// @ts-check
import { objectIdFromDate } from './utils.mjs'
import { db } from '../storage/lib/mongodb.js'
import config from 'config'
const projectsCollection = db.collection('projects')
const HAS_PROJECTS_WITHOUT_HISTORY =
config.get('hasProjectsWithoutHistory') === 'true'
/**
* @param {Date} start
* @param {Date} end
* @param {number} N
* @yields {string}
*/
export async function* getProjectsCreatedInDateRangeCursor(start, end, N) {
yield* getSampleProjectsCursor(N, [
{
$match: {
_id: {
$gt: objectIdFromDate(start),
$lte: objectIdFromDate(end),
},
},
},
])
}
export async function* getProjectsUpdatedInDateRangeCursor(start, end, N) {
yield* getSampleProjectsCursor(N, [
{
$match: {
'overleaf.history.updatedAt': {
$gt: start,
$lte: end,
},
},
},
])
}
/**
* @typedef {import('mongodb').Document} Document
*/
/**
*
* @generator
* @param {number} N
* @param {Array<Document>} preSampleAggregationStages
* @yields {string}
*/
export async function* getSampleProjectsCursor(
N,
preSampleAggregationStages = []
) {
const cursor = projectsCollection.aggregate([
...preSampleAggregationStages,
{ $sample: { size: N } },
{ $project: { 'overleaf.history.id': 1 } },
])
let validProjects = 0
let hasInvalidProject = false
for await (const project of cursor) {
if (HAS_PROJECTS_WITHOUT_HISTORY && !project.overleaf?.history?.id) {
hasInvalidProject = true
continue
}
validProjects++
yield project.overleaf.history.id.toString()
}
if (validProjects === 0 && hasInvalidProject) {
yield* getSampleProjectsCursor(N, preSampleAggregationStages)
}
}

View File

@@ -0,0 +1,320 @@
// @ts-check
import { verifyProjectWithErrorContext } from '../storage/lib/backupVerifier.mjs'
import { promiseMapSettledWithLimit } from '@overleaf/promise-utils'
import logger from '@overleaf/logger'
import metrics from '@overleaf/metrics'
import {
getSampleProjectsCursor,
getProjectsCreatedInDateRangeCursor,
getProjectsUpdatedInDateRangeCursor,
} from './ProjectSampler.mjs'
import OError from '@overleaf/o-error'
import { setTimeout } from 'node:timers/promises'
const MS_PER_30_DAYS = 30 * 24 * 60 * 60 * 1000
const failureCounter = new metrics.prom.Counter({
name: 'backup_project_verification_failed',
help: 'Number of projects that failed verification',
labelNames: ['name'],
})
const successCounter = new metrics.prom.Counter({
name: 'backup_project_verification_succeeded',
help: 'Number of projects that succeeded verification',
})
let WRITE_METRICS = false
/**
* @typedef {import('node:events').EventEmitter} EventEmitter
*/
/**
* Allows writing metrics to be enabled or disabled.
* @param {Boolean} writeMetrics
*/
export function setWriteMetrics(writeMetrics) {
WRITE_METRICS = writeMetrics
}
/**
*
* @param {Error|unknown} error
* @param {string} historyId
*/
function handleVerificationError(error, historyId) {
const name = error instanceof Error ? error.name : 'UnknownError'
logger.error({ historyId, error, name }, 'error verifying project backup')
WRITE_METRICS && failureCounter.inc({ name })
return name
}
/**
*
* @param {Date} startDate
* @param {Date} endDate
* @param {number} interval
* @returns {Array<VerificationJobSpecification>}
*/
function splitJobs(startDate, endDate, interval) {
/** @type {Array<VerificationJobSpecification>} */
const jobs = []
while (startDate < endDate) {
const nextStart = new Date(
Math.min(startDate.getTime() + interval, endDate.getTime())
)
jobs.push({ startDate, endDate: nextStart })
startDate = nextStart
}
return jobs
}
/**
*
* @param {AsyncGenerator<string>} historyIdCursor
* @param {EventEmitter} [eventEmitter]
* @param {number} [delay] - Allows a delay between each verification
* @return {Promise<{verified: number, total: number, errorTypes: *[], hasFailure: boolean}>}
*/
async function verifyProjectsFromCursor(
historyIdCursor,
eventEmitter,
delay = 0
) {
const errorTypes = []
let verified = 0
let total = 0
let receivedShutdownSignal = false
if (eventEmitter) {
eventEmitter.once('shutdown', () => {
receivedShutdownSignal = true
})
}
for await (const historyId of historyIdCursor) {
if (receivedShutdownSignal) {
break
}
total++
try {
await verifyProjectWithErrorContext(historyId)
logger.debug({ historyId }, 'verified project backup successfully')
WRITE_METRICS && successCounter.inc()
verified++
} catch (error) {
const errorType = handleVerificationError(error, historyId)
errorTypes.push(errorType)
}
if (delay > 0) {
await setTimeout(delay)
}
}
return {
verified,
total,
errorTypes,
hasFailure: errorTypes.length > 0,
}
}
/**
*
* @param {number} nProjectsToSample
* @param {EventEmitter} [signal]
* @param {number} [delay]
* @return {Promise<VerificationJobStatus>}
*/
export async function verifyRandomProjectSample(
nProjectsToSample,
signal,
delay = 0
) {
const historyIds = await getSampleProjectsCursor(nProjectsToSample)
return await verifyProjectsFromCursor(historyIds, signal, delay)
}
/**
* Samples projects with history IDs between the specified dates and verifies them.
*
* @param {Date} startDate
* @param {Date} endDate
* @param {number} projectsPerRange
* @param {EventEmitter} [signal]
* @return {Promise<VerificationJobStatus>}
*/
async function verifyRange(startDate, endDate, projectsPerRange, signal) {
logger.info({ startDate, endDate }, 'verifying range')
const results = await verifyProjectsFromCursor(
getProjectsCreatedInDateRangeCursor(startDate, endDate, projectsPerRange),
signal
)
if (results.total === 0) {
logger.debug(
{ start: startDate, end: endDate },
'No projects found in range'
)
}
const jobStatus = {
...results,
startDate,
endDate,
}
logger.debug(
{ ...jobStatus, errorTypes: Array.from(new Set(jobStatus.errorTypes)) },
'Verified range'
)
return jobStatus
}
/**
* @typedef {Object} VerificationJobSpecification
* @property {Date} startDate
* @property {Date} endDate
*/
/**
* @typedef {import('./types.d.ts').VerificationJobStatus} VerificationJobStatus
*/
/**
* @typedef {Object} VerifyDateRangeOptions
* @property {Date} startDate
* @property {Date} endDate
* @property {number} [interval]
* @property {number} [projectsPerRange]
* @property {number} [concurrency]
* @property {EventEmitter} [signal]
*/
/**
*
* @param {VerifyDateRangeOptions} options
* @return {Promise<VerificationJobStatus>}
*/
export async function verifyProjectsCreatedInDateRange({
concurrency = 0,
projectsPerRange = 10,
startDate,
endDate,
interval = MS_PER_30_DAYS,
signal,
}) {
const jobs = splitJobs(startDate, endDate, interval)
if (jobs.length === 0) {
throw new OError('Time range could not be split into jobs', {
start: startDate,
end: endDate,
interval,
})
}
const settlements = await promiseMapSettledWithLimit(
concurrency,
jobs,
({ startDate, endDate }) =>
verifyRange(startDate, endDate, projectsPerRange, signal)
)
return settlements.reduce(
/**
*
* @param {VerificationJobStatus} acc
* @param settlement
* @return {VerificationJobStatus}
*/
(acc, settlement) => {
if (settlement.status !== 'rejected') {
if (settlement.value.hasFailure) {
acc.hasFailure = true
}
acc.total += settlement.value.total
acc.verified += settlement.value.verified
acc.errorTypes = acc.errorTypes.concat(settlement.value.errorTypes)
} else {
logger.error({ ...settlement.reason }, 'Error processing range')
}
return acc
},
/** @type {VerificationJobStatus} */
{
startDate,
endDate,
verified: 0,
total: 0,
hasFailure: false,
errorTypes: [],
}
)
}
/**
* Verifies that projects that have recently gone out of RPO have been updated.
*
* @param {Date} startDate
* @param {Date} endDate
* @param {number} nProjects
* @param {EventEmitter} [signal]
* @return {Promise<VerificationJobStatus>}
*/
export async function verifyProjectsUpdatedInDateRange(
startDate,
endDate,
nProjects,
signal
) {
logger.debug(
{ startDate, endDate, nProjects },
'Sampling projects updated in date range'
)
const results = await verifyProjectsFromCursor(
getProjectsUpdatedInDateRangeCursor(startDate, endDate, nProjects),
signal
)
if (results.total === 0) {
logger.debug(
{ start: startDate, end: endDate },
'No projects updated recently'
)
}
const jobStatus = {
...results,
startDate,
endDate,
}
logger.debug(
{ ...jobStatus, errorTypes: Array.from(new Set(jobStatus.errorTypes)) },
'Verified recently updated projects'
)
return jobStatus
}
/**
*
* @param {EventEmitter} signal
* @return {void}
*/
export function loopRandomProjects(signal) {
let shutdown = false
signal.on('shutdown', function () {
shutdown = true
})
async function loop() {
do {
try {
const result = await verifyRandomProjectSample(100, signal, 2_000)
logger.debug({ result }, 'verified random project sample')
} catch (error) {
logger.error({ error }, 'error verifying random project sample')
}
// eslint-disable-next-line no-unmodified-loop-condition
} while (!shutdown)
}
loop()
}

View File

@@ -0,0 +1,32 @@
import config from 'config'
import { verifyProjectWithErrorContext } from '../storage/lib/backupVerifier.mjs'
import {
measureNeverBackedUpProjects,
measurePendingChangesBeforeTime,
} from './ProjectMetrics.mjs'
import { getEndDateForRPO, RPO } from './utils.mjs'
/** @type {Array<string>} */
const HEALTH_CHECK_PROJECTS = JSON.parse(config.get('healthCheckProjects'))
export async function healthCheck() {
if (!Array.isArray(HEALTH_CHECK_PROJECTS)) {
throw new Error('expected healthCheckProjects to be an array')
}
if (HEALTH_CHECK_PROJECTS.length !== 2) {
throw new Error('expected 2 healthCheckProjects')
}
if (!HEALTH_CHECK_PROJECTS.some(id => id.length === 24)) {
throw new Error('expected mongo id in healthCheckProjects')
}
if (!HEALTH_CHECK_PROJECTS.some(id => id.length < 24)) {
throw new Error('expected postgres id in healthCheckProjects')
}
for (const historyId of HEALTH_CHECK_PROJECTS) {
await verifyProjectWithErrorContext(historyId)
}
await measurePendingChangesBeforeTime(getEndDateForRPO(2))
await measureNeverBackedUpProjects(getEndDateForRPO(2))
}

View File

@@ -0,0 +1,8 @@
export type VerificationJobStatus = {
verified: number
total: number
startDate?: Date
endDate?: Date
hasFailure: boolean
errorTypes: Array<string>
}

View File

@@ -0,0 +1,35 @@
import { ObjectId } from 'mongodb'
import config from 'config'
export const RPO = parseInt(config.get('backupRPOInMS'), 10)
/**
* @param {Date} time
* @return {ObjectId}
*/
export function objectIdFromDate(time) {
return ObjectId.createFromTime(time.getTime() / 1000)
}
/**
* @param {number} [factor] - Multiply RPO by this factor, default is 1
* @return {Date}
*/
export function getEndDateForRPO(factor = 1) {
return new Date(Date.now() - RPO * factor)
}
/**
* Creates a startDate, endDate pair that checks a period of time before the RPO horizon
*
* @param {number} offset - How many seconds we should check
* @return {{endDate: Date, startDate: Date}}
*/
export function getDatesBeforeRPO(offset) {
const now = new Date()
const endDate = new Date(now.getTime() - RPO)
return {
endDate,
startDate: new Date(endDate.getTime() - offset * 1000),
}
}