first commit
This commit is contained in:
21
services/project-history/scripts/add_index_for_sync_state.js
Normal file
21
services/project-history/scripts/add_index_for_sync_state.js
Normal file
@@ -0,0 +1,21 @@
|
||||
/* eslint-env mongo */
|
||||
|
||||
// add a TTL index to expire entries for completed resyncs in the
|
||||
// projectHistorySyncState collection. The entries should only be expired if
|
||||
// resyncProjectStructure is false and resyncDocContents is a zero-length array.
|
||||
|
||||
const now = Date.now()
|
||||
const inTheFuture = now + 24 * 3600 * 1000
|
||||
|
||||
db.projectHistorySyncState.ensureIndex(
|
||||
{ expiresAt: 1 },
|
||||
{ expireAfterSeconds: 0, background: true }
|
||||
)
|
||||
db.projectHistorySyncState.updateMany(
|
||||
{
|
||||
resyncProjectStructure: false,
|
||||
resyncDocContents: [],
|
||||
expiresAt: { $exists: false },
|
||||
},
|
||||
{ $set: { expiresAt: new Date(inTheFuture) } }
|
||||
)
|
328
services/project-history/scripts/bulk_resync_file_fix_up.mjs
Normal file
328
services/project-history/scripts/bulk_resync_file_fix_up.mjs
Normal file
@@ -0,0 +1,328 @@
|
||||
// @ts-check
|
||||
import Events from 'node:events'
|
||||
import { setTimeout } from 'node:timers/promises'
|
||||
import readline from 'node:readline'
|
||||
import fs from 'node:fs'
|
||||
import minimist from 'minimist'
|
||||
import { ObjectId } from 'mongodb'
|
||||
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
|
||||
import logger from '@overleaf/logger'
|
||||
import Metrics from '@overleaf/metrics'
|
||||
import OError from '@overleaf/o-error'
|
||||
import { promiseMapWithLimit } from '@overleaf/promise-utils'
|
||||
import { db, mongoClient } from '../app/js/mongodb.js'
|
||||
import * as HistoryStoreManager from '../app/js/HistoryStoreManager.js'
|
||||
import * as RedisManager from '../app/js/RedisManager.js'
|
||||
import * as SyncManager from '../app/js/SyncManager.js'
|
||||
import * as UpdatesProcessor from '../app/js/UpdatesProcessor.js'
|
||||
import { NeedFullProjectStructureResyncError } from '../app/js/Errors.js'
|
||||
import * as ErrorRecorder from '../app/js/ErrorRecorder.js'
|
||||
|
||||
// Silence warning.
|
||||
Events.setMaxListeners(20)
|
||||
|
||||
// Enable caching for ObjectId.toString()
|
||||
ObjectId.cacheHexString = true
|
||||
|
||||
const READ_CONCURRENCY = parseInt(process.env.READ_CONCURRENCY || '100', 10)
|
||||
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY || '10', 10)
|
||||
const FLUSH_RETRIES = parseInt(process.env.FLUSH_RETRIES || '20', 10)
|
||||
|
||||
// Relevant dates:
|
||||
// - 2024-12-19, start of event-hold removal in filestore bucket -> objects older than 24h are (soft-)deleted.
|
||||
// - 2024-12-23, copy operation skipped in filestore when cloning project -> objects not created on clone.
|
||||
// - 2025-01-24, no more filestore reads allowed in project-history -> no more empty files in history for 404s
|
||||
const FILESTORE_SOFT_DELETE_START = new Date('2024-12-19T00:00:00Z')
|
||||
const FILESTORE_READ_OFF = new Date('2025-01-24T15:00:00Z')
|
||||
|
||||
const argv = minimist(process.argv.slice(2), {
|
||||
string: ['logs', 'log-latency'],
|
||||
})
|
||||
const LOG_LATENCY = argv['log-latency'] === 'true'
|
||||
|
||||
let gracefulShutdownInitiated = false
|
||||
|
||||
process.on('SIGINT', handleSignal)
|
||||
process.on('SIGTERM', handleSignal)
|
||||
|
||||
function handleSignal() {
|
||||
gracefulShutdownInitiated = true
|
||||
console.warn('graceful shutdown initiated, draining queue')
|
||||
}
|
||||
|
||||
const STATS = {
|
||||
processedLines: 0,
|
||||
success: 0,
|
||||
changed: 0,
|
||||
failure: 0,
|
||||
skipped: 0,
|
||||
checkFailure: 0,
|
||||
}
|
||||
|
||||
function logStats() {
|
||||
console.log(
|
||||
JSON.stringify({
|
||||
time: new Date(),
|
||||
gracefulShutdownInitiated,
|
||||
...STATS,
|
||||
})
|
||||
)
|
||||
}
|
||||
const logInterval = setInterval(logStats, 10_000)
|
||||
|
||||
/**
|
||||
* @typedef {Object} FileRef
|
||||
* @property {ObjectId} _id
|
||||
* @property {any} linkedFileData
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} Folder
|
||||
* @property {Array<Folder>} folders
|
||||
* @property {Array<FileRef>} fileRefs
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} Project
|
||||
* @property {ObjectId} _id
|
||||
* @property {Date} lastUpdated
|
||||
* @property {Array<Folder>} rootFolder
|
||||
* @property {{history: {id: (number|string)}}} overleaf
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param {Folder} folder
|
||||
* @return {boolean}
|
||||
*/
|
||||
function checkFileTreeNeedsResync(folder) {
|
||||
if (!folder) return false
|
||||
if (Array.isArray(folder.fileRefs)) {
|
||||
for (const fileRef of folder.fileRefs) {
|
||||
if (fileRef.linkedFileData) return true
|
||||
if (fileRef._id.getTimestamp() > FILESTORE_SOFT_DELETE_START) return true
|
||||
}
|
||||
}
|
||||
if (Array.isArray(folder.folders)) {
|
||||
for (const child of folder.folders) {
|
||||
if (checkFileTreeNeedsResync(child)) return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} historyId
|
||||
* @return {Promise<Date>}
|
||||
*/
|
||||
async function getLastEndTimestamp(projectId, historyId) {
|
||||
const raw = await HistoryStoreManager.promises.getMostRecentVersionRaw(
|
||||
projectId,
|
||||
historyId,
|
||||
{ readOnly: true }
|
||||
)
|
||||
if (!raw) throw new Error('bug: history not initialized')
|
||||
return raw.endTimestamp
|
||||
}
|
||||
|
||||
/** @type {Record<string, (project: Project) => Promise<boolean>>} */
|
||||
const conditions = {
|
||||
// cheap: in-memory mongo lookup
|
||||
'updated after filestore soft-delete': async function (project) {
|
||||
return project.lastUpdated > FILESTORE_SOFT_DELETE_START
|
||||
},
|
||||
// cheap: in-memory mongo lookup
|
||||
'file-tree requires re-sync': async function (project) {
|
||||
return checkFileTreeNeedsResync(project.rootFolder?.[0])
|
||||
},
|
||||
// moderate: GET from Redis
|
||||
'has pending operations': async function (project) {
|
||||
const n = await RedisManager.promises.countUnprocessedUpdates(
|
||||
project._id.toString()
|
||||
)
|
||||
return n > 0
|
||||
},
|
||||
// expensive: GET from Mongo/Postgres via history-v1 HTTP API call
|
||||
'has been flushed after filestore soft-delete': async function (project) {
|
||||
// Resyncs started after soft-deleting can trigger 404s and result in empty files.
|
||||
const endTimestamp = await getLastEndTimestamp(
|
||||
project._id.toString(),
|
||||
project.overleaf.history.id.toString()
|
||||
)
|
||||
return endTimestamp > FILESTORE_SOFT_DELETE_START
|
||||
},
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Project} project
|
||||
* @return {Promise<{projectId: string, historyId: string} | null>}
|
||||
*/
|
||||
async function checkProject(project) {
|
||||
if (gracefulShutdownInitiated) return null
|
||||
if (project._id.getTimestamp() > FILESTORE_READ_OFF) {
|
||||
STATS.skipped++ // Project created after all bugs were fixed.
|
||||
return null
|
||||
}
|
||||
const projectId = project._id.toString()
|
||||
const historyId = project.overleaf.history.id.toString()
|
||||
for (const [condition, check] of Object.entries(conditions)) {
|
||||
try {
|
||||
if (await check(project)) return { projectId, historyId }
|
||||
} catch (err) {
|
||||
logger.err({ projectId, condition, err }, 'failed to check project')
|
||||
STATS.checkFailure++
|
||||
return null
|
||||
}
|
||||
}
|
||||
STATS.skipped++
|
||||
return null
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} historyId
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function processProject(projectId, historyId) {
|
||||
if (gracefulShutdownInitiated) return
|
||||
const t0 = performance.now()
|
||||
try {
|
||||
await tryProcessProject(projectId, historyId)
|
||||
const latency = performance.now() - t0
|
||||
if (LOG_LATENCY) {
|
||||
logger.info({ projectId, historyId, latency }, 'processed project')
|
||||
}
|
||||
STATS.success++
|
||||
} catch (err) {
|
||||
logger.err({ err, projectId, historyId }, 'failed to process project')
|
||||
STATS.failure++
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function flushWithRetries(projectId) {
|
||||
for (let attempt = 0; attempt < FLUSH_RETRIES; attempt++) {
|
||||
try {
|
||||
await UpdatesProcessor.promises.processUpdatesForProject(projectId)
|
||||
return
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
{ projectId, err, attempt },
|
||||
'failed to flush updates, trying again'
|
||||
)
|
||||
if (gracefulShutdownInitiated) throw err
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
await UpdatesProcessor.promises.processUpdatesForProject(projectId)
|
||||
} catch (err) {
|
||||
// @ts-ignore err is Error
|
||||
throw new OError('failed to flush updates', {}, err)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} historyId
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function tryProcessProject(projectId, historyId) {
|
||||
await flushWithRetries(projectId)
|
||||
const start = new Date()
|
||||
let needsFullSync = false
|
||||
try {
|
||||
await UpdatesProcessor.promises.startResyncAndProcessUpdatesUnderLock(
|
||||
projectId,
|
||||
{ resyncProjectStructureOnly: true }
|
||||
)
|
||||
} catch (err) {
|
||||
if (err instanceof NeedFullProjectStructureResyncError) {
|
||||
needsFullSync = true
|
||||
} else {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
if (needsFullSync) {
|
||||
logger.warn(
|
||||
{ projectId, historyId },
|
||||
'structure only resync not sufficient, doing full soft resync'
|
||||
)
|
||||
await SyncManager.promises.startResync(projectId, {})
|
||||
await UpdatesProcessor.promises.processUpdatesForProject(projectId)
|
||||
STATS.changed++
|
||||
} else {
|
||||
const after = await getLastEndTimestamp(projectId, historyId)
|
||||
if (after > start) {
|
||||
STATS.changed++
|
||||
}
|
||||
}
|
||||
// Avoid db.projectHistorySyncState from growing for each project we resynced.
|
||||
// MongoDB collections cannot shrink on their own. In case of success, purge
|
||||
// the db entry created by this script right away.
|
||||
await SyncManager.promises.clearResyncStateIfAllAfter(projectId, start)
|
||||
}
|
||||
|
||||
async function processBatch(projects) {
|
||||
const projectIds = (
|
||||
await promiseMapWithLimit(READ_CONCURRENCY, projects, checkProject)
|
||||
).filter(id => !!id)
|
||||
await promiseMapWithLimit(WRITE_CONCURRENCY, projectIds, ids =>
|
||||
processProject(ids.projectId, ids.historyId)
|
||||
)
|
||||
|
||||
if (gracefulShutdownInitiated) throw new Error('graceful shutdown triggered')
|
||||
}
|
||||
|
||||
async function processProjectsFromLog() {
|
||||
const rl = readline.createInterface({
|
||||
input: fs.createReadStream(argv.logs),
|
||||
})
|
||||
for await (const line of rl) {
|
||||
if (gracefulShutdownInitiated) break
|
||||
STATS.processedLines++
|
||||
if (!line.startsWith('{')) continue
|
||||
const { projectId, historyId, msg } = JSON.parse(line)
|
||||
if (msg !== 'failed to process project') continue
|
||||
await processProject(projectId, historyId) // does try/catch with logging
|
||||
}
|
||||
}
|
||||
|
||||
async function main() {
|
||||
if (argv.logs) {
|
||||
await processProjectsFromLog()
|
||||
return
|
||||
}
|
||||
await batchedUpdate(db.projects, {}, processBatch, {
|
||||
_id: 1,
|
||||
lastUpdated: 1,
|
||||
'overleaf.history': 1,
|
||||
rootFolder: 1,
|
||||
})
|
||||
}
|
||||
|
||||
try {
|
||||
try {
|
||||
await main()
|
||||
} finally {
|
||||
clearInterval(logInterval)
|
||||
logStats()
|
||||
Metrics.close()
|
||||
await mongoClient.close()
|
||||
// TODO(das7pad): graceful shutdown for redis. Refactor process.exit when done.
|
||||
}
|
||||
console.log('Done.')
|
||||
await setTimeout(1_000)
|
||||
if (STATS.failure) {
|
||||
process.exit(Math.min(STATS.failure, 99))
|
||||
} else {
|
||||
process.exit(0)
|
||||
}
|
||||
} catch (err) {
|
||||
logger.err({ err }, 'fatal error')
|
||||
await setTimeout(1_000)
|
||||
process.exit(100)
|
||||
}
|
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// Clear timestamps which don't have any corresponding history ops
|
||||
// usage: scripts/flush_all.js <limit>
|
||||
|
||||
import logger from '@overleaf/logger'
|
||||
import * as RedisManager from '../app/js/RedisManager.js'
|
||||
|
||||
const argv = process.argv.slice(2)
|
||||
const limit = parseInt(argv[0], 10) || null
|
||||
|
||||
// find all dangling timestamps and clear them
|
||||
async function main() {
|
||||
logger.info(
|
||||
{ limit },
|
||||
'running redis scan for project timestamps, this may take a while'
|
||||
)
|
||||
const projectIdsWithFirstOpTimestamps =
|
||||
await RedisManager.promises.getProjectIdsWithFirstOpTimestamps(limit)
|
||||
const totalTimestamps = projectIdsWithFirstOpTimestamps.length
|
||||
logger.info(
|
||||
{ totalTimestamps },
|
||||
'scan completed, now clearing dangling timestamps'
|
||||
)
|
||||
let clearedTimestamps = 0
|
||||
let processed = 0
|
||||
for (const projectId of projectIdsWithFirstOpTimestamps) {
|
||||
const result =
|
||||
await RedisManager.promises.clearDanglingFirstOpTimestamp(projectId)
|
||||
processed++
|
||||
clearedTimestamps += result
|
||||
if (processed % 1000 === 0) {
|
||||
logger.info(
|
||||
{ processed, totalTimestamps, clearedTimestamps },
|
||||
'clearing timestamps'
|
||||
)
|
||||
}
|
||||
}
|
||||
logger.info({ processed, totalTimestamps, clearedTimestamps }, 'completed')
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
main()
|
136
services/project-history/scripts/clear_deleted.js
Executable file
136
services/project-history/scripts/clear_deleted.js
Executable file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import async from 'async'
|
||||
import logger from '@overleaf/logger'
|
||||
import Settings from '@overleaf/settings'
|
||||
import redis from '@overleaf/redis-wrapper'
|
||||
import { db, ObjectId } from '../app/js/mongodb.js'
|
||||
|
||||
logger.logger.level('fatal')
|
||||
|
||||
const rclient = redis.createClient(Settings.redis.project_history)
|
||||
const Keys = Settings.redis.project_history.key_schema
|
||||
|
||||
const argv = process.argv.slice(2)
|
||||
const limit = parseInt(argv[0], 10) || null
|
||||
const force = argv[1] === 'force' || false
|
||||
let delay = 0
|
||||
|
||||
function checkAndClear(project, callback) {
|
||||
const projectId = project.project_id
|
||||
function checkDeleted(cb) {
|
||||
db.projects.findOne(
|
||||
{ _id: new ObjectId(projectId) },
|
||||
{ projection: { _id: 1 } },
|
||||
(err, result) => {
|
||||
if (err) {
|
||||
cb(err)
|
||||
} else if (!result) {
|
||||
// project not found, but we still need to look at deletedProjects
|
||||
cb()
|
||||
} else {
|
||||
console.log(`Project ${projectId} found in projects`)
|
||||
cb(new Error('error: project still exists'))
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
function checkRecoverable(cb) {
|
||||
db.deletedProjects.findOne(
|
||||
{
|
||||
// this condition makes use of the index
|
||||
'deleterData.deletedProjectId': new ObjectId(projectId),
|
||||
// this condition checks if the deleted project has expired
|
||||
'project._id': new ObjectId(projectId),
|
||||
},
|
||||
{ projection: { _id: 1 } },
|
||||
(err, result) => {
|
||||
if (err) {
|
||||
cb(err)
|
||||
} else if (!result) {
|
||||
console.log(
|
||||
`project ${projectId} has been deleted - safe to clear queue`
|
||||
)
|
||||
cb()
|
||||
} else {
|
||||
console.log(`Project ${projectId} found in deletedProjects`)
|
||||
cb(new Error('error: project still exists'))
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
function clearRedisQueue(cb) {
|
||||
const key = Keys.projectHistoryOps({ project_id: projectId })
|
||||
delay++
|
||||
if (force) {
|
||||
console.log('setting redis key', key, 'to expire in', delay, 'seconds')
|
||||
// use expire to allow redis to delete the key in the background
|
||||
rclient.expire(key, delay, err => {
|
||||
cb(err)
|
||||
})
|
||||
} else {
|
||||
console.log(
|
||||
'dry run, would set key',
|
||||
key,
|
||||
'to expire in',
|
||||
delay,
|
||||
'seconds'
|
||||
)
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
function clearMongoEntry(cb) {
|
||||
if (force) {
|
||||
console.log('deleting key in mongo projectHistoryFailures', projectId)
|
||||
db.projectHistoryFailures.deleteOne({ project_id: projectId }, cb)
|
||||
} else {
|
||||
console.log('would delete failure record for', projectId, 'from mongo')
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
// do the checks and deletions
|
||||
async.waterfall(
|
||||
[checkDeleted, checkRecoverable, clearRedisQueue, clearMongoEntry],
|
||||
err => {
|
||||
if (!err || err.message === 'error: project still exists') {
|
||||
callback()
|
||||
} else {
|
||||
console.log('error:', err)
|
||||
callback(err)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
// find all the broken projects from the failure records
|
||||
async function main() {
|
||||
const results = await db.projectHistoryFailures.find({}).toArray()
|
||||
processFailures(results)
|
||||
}
|
||||
|
||||
main().catch(error => {
|
||||
console.error(error)
|
||||
process.exit(1)
|
||||
})
|
||||
|
||||
function processFailures(results) {
|
||||
if (argv.length === 0) {
|
||||
console.log(`
|
||||
Usage: node clear_deleted.js [QUEUES] [FORCE]
|
||||
|
||||
where
|
||||
QUEUES is the number of queues to process
|
||||
FORCE is the string "force" when we're ready to delete the queues. Without it, this script does a dry-run
|
||||
`)
|
||||
}
|
||||
console.log('number of stuck projects', results.length)
|
||||
// now check if the project is truly deleted in mongo
|
||||
async.eachSeries(results.slice(0, limit), checkAndClear, err => {
|
||||
console.log('DONE', err)
|
||||
process.exit()
|
||||
})
|
||||
}
|
175
services/project-history/scripts/clear_deleted_history.js
Executable file
175
services/project-history/scripts/clear_deleted_history.js
Executable file
@@ -0,0 +1,175 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// To run in dev:
|
||||
//
|
||||
// docker compose run --rm project-history scripts/clear_deleted.js
|
||||
//
|
||||
// In production:
|
||||
//
|
||||
// docker run --rm $(docker ps -lq) scripts/clear_deleted.js
|
||||
|
||||
import async from 'async'
|
||||
import logger from '@overleaf/logger'
|
||||
import Settings from '@overleaf/settings'
|
||||
import redis from '@overleaf/redis-wrapper'
|
||||
import { db, ObjectId } from '../app/js/mongodb.js'
|
||||
|
||||
logger.logger.level('fatal')
|
||||
|
||||
const rclient = redis.createClient(Settings.redis.project_history)
|
||||
const Keys = Settings.redis.project_history.key_schema
|
||||
|
||||
const argv = process.argv.slice(2)
|
||||
const limit = parseInt(argv[0], 10) || null
|
||||
const force = argv[1] === 'force' || false
|
||||
let projectNotFoundErrors = 0
|
||||
let projectImportedFromV1Errors = 0
|
||||
const projectsNotFound = []
|
||||
const projectsImportedFromV1 = []
|
||||
let projectWithHistoryIdErrors = 0
|
||||
const projectsWithHistoryId = []
|
||||
|
||||
function checkAndClear(project, callback) {
|
||||
const projectId = project.project_id
|
||||
console.log('checking project', projectId)
|
||||
|
||||
function checkDeleted(cb) {
|
||||
db.projects.findOne(
|
||||
{ _id: new ObjectId(projectId) },
|
||||
{ projection: { overleaf: true } },
|
||||
(err, result) => {
|
||||
console.log(
|
||||
'1. looking in mongo projects collection: err',
|
||||
err,
|
||||
'result',
|
||||
JSON.stringify(result)
|
||||
)
|
||||
if (err) {
|
||||
return cb(err)
|
||||
}
|
||||
if (!result) {
|
||||
return cb(new Error('project not found in mongo'))
|
||||
}
|
||||
if (
|
||||
result &&
|
||||
result.overleaf &&
|
||||
!result.overleaf.id &&
|
||||
result.overleaf.history &&
|
||||
!result.overleaf.history.id &&
|
||||
result.overleaf.history.deleted_id
|
||||
) {
|
||||
console.log(
|
||||
' - project is not imported from v1 and has a deleted_id - ok to clear'
|
||||
)
|
||||
return cb()
|
||||
} else if (result && result.overleaf && result.overleaf.id) {
|
||||
console.log(' - project is imported from v1')
|
||||
return cb(
|
||||
new Error('project is imported from v1 - will not clear it')
|
||||
)
|
||||
} else if (
|
||||
result &&
|
||||
result.overleaf &&
|
||||
result.overleaf.history &&
|
||||
result.overleaf.history.id
|
||||
) {
|
||||
console.log(' - project has a history id')
|
||||
return cb(new Error('project has a history id - will not clear it'))
|
||||
} else {
|
||||
console.log(' - project state not recognised')
|
||||
return cb(new Error('project state not recognised'))
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
function clearRedisQueue(cb) {
|
||||
const key = Keys.projectHistoryOps({ project_id: projectId })
|
||||
if (force) {
|
||||
console.log('deleting redis key', key)
|
||||
rclient.del(key, err => {
|
||||
cb(err)
|
||||
})
|
||||
} else {
|
||||
console.log('dry run, would deleted key', key)
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
function clearMongoEntry(cb) {
|
||||
if (force) {
|
||||
console.log('deleting key in mongo projectHistoryFailures', projectId)
|
||||
db.projectHistoryFailures.deleteOne(
|
||||
{ project_id: projectId },
|
||||
(err, result) => {
|
||||
console.log('got result from remove', err, result)
|
||||
cb(err)
|
||||
}
|
||||
)
|
||||
} else {
|
||||
console.log('would delete failure record for', projectId, 'from mongo')
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
// do the checks and deletions
|
||||
async.waterfall([checkDeleted, clearRedisQueue, clearMongoEntry], err => {
|
||||
if (!err) {
|
||||
if (force) {
|
||||
return setTimeout(callback, 100)
|
||||
} // include a 1 second delay
|
||||
return callback()
|
||||
} else if (err.message === 'project not found in mongo') {
|
||||
projectNotFoundErrors++
|
||||
projectsNotFound.push(projectId)
|
||||
return callback()
|
||||
} else if (err.message === 'project has a history id - will not clear it') {
|
||||
projectWithHistoryIdErrors++
|
||||
projectsWithHistoryId.push(projectId)
|
||||
return callback()
|
||||
} else if (
|
||||
err.message === 'project is imported from v1 - will not clear it'
|
||||
) {
|
||||
projectImportedFromV1Errors++
|
||||
projectsImportedFromV1.push(projectId)
|
||||
return callback()
|
||||
} else {
|
||||
console.log('error:', err)
|
||||
return callback(err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// find all the broken projects from the failure records
|
||||
async function main() {
|
||||
const results = await db.projectHistoryFailures
|
||||
.find({ error: /history store a non-success status code: 422/ })
|
||||
.toArray()
|
||||
|
||||
console.log('number of queues without history store 442 =', results.length)
|
||||
// now check if the project is truly deleted in mongo
|
||||
async.eachSeries(results.slice(0, limit), checkAndClear, err => {
|
||||
console.log('Final error status', err)
|
||||
console.log(
|
||||
'Project not found errors',
|
||||
projectNotFoundErrors,
|
||||
projectsNotFound
|
||||
)
|
||||
console.log(
|
||||
'Project with history id errors',
|
||||
projectWithHistoryIdErrors,
|
||||
projectsWithHistoryId
|
||||
)
|
||||
console.log(
|
||||
'Project imported from V1 errors',
|
||||
projectImportedFromV1Errors,
|
||||
projectsImportedFromV1
|
||||
)
|
||||
process.exit()
|
||||
})
|
||||
}
|
||||
|
||||
main().catch(error => {
|
||||
console.error(error)
|
||||
process.exit(1)
|
||||
})
|
204
services/project-history/scripts/clear_filestore_404.js
Executable file
204
services/project-history/scripts/clear_filestore_404.js
Executable file
@@ -0,0 +1,204 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// To run in dev:
|
||||
//
|
||||
// docker compose run --rm project-history scripts/clear_deleted.js
|
||||
//
|
||||
// In production:
|
||||
//
|
||||
// docker run --rm $(docker ps -lq) scripts/clear_deleted.js
|
||||
|
||||
import async from 'async'
|
||||
import logger from '@overleaf/logger'
|
||||
import request from 'request'
|
||||
import Settings from '@overleaf/settings'
|
||||
import redis from '@overleaf/redis-wrapper'
|
||||
import { db, ObjectId } from '../app/js/mongodb.js'
|
||||
|
||||
logger.logger.level('fatal')
|
||||
|
||||
const rclient = redis.createClient(Settings.redis.project_history)
|
||||
const Keys = Settings.redis.project_history.key_schema
|
||||
|
||||
const argv = process.argv.slice(2)
|
||||
const limit = parseInt(argv[0], 10) || null
|
||||
const force = argv[1] === 'force' || false
|
||||
let projectNotFoundErrors = 0
|
||||
let projectImportedFromV1Errors = 0
|
||||
const projectsNotFound = []
|
||||
const projectsImportedFromV1 = []
|
||||
|
||||
function checkAndClear(project, callback) {
|
||||
const projectId = project.project_id
|
||||
console.log('checking project', projectId)
|
||||
|
||||
// These can probably also be reset and their overleaf.history.id unset
|
||||
// (unless they are v1 projects).
|
||||
|
||||
function checkNotV1Project(cb) {
|
||||
db.projects.findOne(
|
||||
{ _id: new ObjectId(projectId) },
|
||||
{ projection: { overleaf: true } },
|
||||
(err, result) => {
|
||||
console.log(
|
||||
'1. looking in mongo projects collection: err',
|
||||
err,
|
||||
'result',
|
||||
JSON.stringify(result)
|
||||
)
|
||||
if (err) {
|
||||
return cb(err)
|
||||
}
|
||||
if (!result) {
|
||||
return cb(new Error('project not found in mongo'))
|
||||
}
|
||||
if (result && result.overleaf && !result.overleaf.id) {
|
||||
console.log(' - project is not imported from v1 - ok to clear')
|
||||
cb()
|
||||
} else {
|
||||
cb(new Error('project is imported from v1 - will not clear it'))
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
function clearProjectHistoryInMongo(cb) {
|
||||
if (force) {
|
||||
console.log('2. deleting overleaf.history.id in mongo project', projectId)
|
||||
// Accessing mongo projects collection directly - BE CAREFUL!
|
||||
db.projects.updateOne(
|
||||
{ _id: new ObjectId(projectId) },
|
||||
{ $unset: { 'overleaf.history.id': '' } },
|
||||
(err, result) => {
|
||||
console.log(' - got result from remove', err, result)
|
||||
if (err) {
|
||||
return err
|
||||
}
|
||||
if (
|
||||
result &&
|
||||
(result.modifiedCount === 1 || result.modifiedCount === 0)
|
||||
) {
|
||||
return cb()
|
||||
} else {
|
||||
return cb(
|
||||
new Error('error: problem trying to unset overleaf.history.id')
|
||||
)
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
console.log(
|
||||
'2. would delete overleaf.history.id for',
|
||||
projectId,
|
||||
'from mongo'
|
||||
)
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
function clearDocUpdaterCache(cb) {
|
||||
const url = Settings.apis.documentupdater.url + '/project/' + projectId
|
||||
if (force) {
|
||||
console.log('3. making request to clear docupdater', url)
|
||||
request.delete(url, (err, response, body) => {
|
||||
console.log(
|
||||
' - result of request',
|
||||
err,
|
||||
response && response.statusCode,
|
||||
body
|
||||
)
|
||||
cb(err)
|
||||
})
|
||||
} else {
|
||||
console.log('3. dry run, would request DELETE on url', url)
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
function clearRedisQueue(cb) {
|
||||
const key = Keys.projectHistoryOps({ project_id: projectId })
|
||||
if (force) {
|
||||
console.log('4. deleting redis queue key', key)
|
||||
rclient.del(key, err => {
|
||||
cb(err)
|
||||
})
|
||||
} else {
|
||||
console.log('4. dry run, would delete redis key', key)
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
function clearMongoEntry(cb) {
|
||||
if (force) {
|
||||
console.log('5. deleting key in mongo projectHistoryFailures', projectId)
|
||||
db.projectHistoryFailures.deleteOne(
|
||||
{ project_id: projectId },
|
||||
(err, result) => {
|
||||
console.log(' - got result from remove', err, result)
|
||||
cb(err)
|
||||
}
|
||||
)
|
||||
} else {
|
||||
console.log('5. would delete failure record for', projectId, 'from mongo')
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
// do the checks and deletions
|
||||
async.waterfall(
|
||||
[
|
||||
checkNotV1Project,
|
||||
clearProjectHistoryInMongo,
|
||||
clearDocUpdaterCache,
|
||||
clearRedisQueue,
|
||||
clearMongoEntry,
|
||||
],
|
||||
err => {
|
||||
if (!err) {
|
||||
return setTimeout(callback, 1000) // include a 1 second delay
|
||||
} else if (err.message === 'project not found in mongo') {
|
||||
projectNotFoundErrors++
|
||||
projectsNotFound.push(projectId)
|
||||
return callback()
|
||||
} else if (
|
||||
err.message === 'project is imported from v1 - will not clear it'
|
||||
) {
|
||||
projectImportedFromV1Errors++
|
||||
projectsImportedFromV1.push(projectId)
|
||||
return callback()
|
||||
} else {
|
||||
console.log('error:', err)
|
||||
return callback(err)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
// find all the broken projects from the failure records
|
||||
async function main() {
|
||||
const results = await db.projectHistoryFailures
|
||||
.find({ error: 'Error: bad response from filestore: 404' })
|
||||
.toArray()
|
||||
|
||||
console.log('number of queues without filestore 404 =', results.length)
|
||||
// now check if the project is truly deleted in mongo
|
||||
async.eachSeries(results.slice(0, limit), checkAndClear, err => {
|
||||
console.log('Final error status', err)
|
||||
console.log(
|
||||
'Project not found errors',
|
||||
projectNotFoundErrors,
|
||||
projectsNotFound
|
||||
)
|
||||
console.log(
|
||||
'Project imported from V1 errors',
|
||||
projectImportedFromV1Errors,
|
||||
projectsImportedFromV1
|
||||
)
|
||||
process.exit()
|
||||
})
|
||||
}
|
||||
|
||||
main().catch(error => {
|
||||
console.error(error)
|
||||
process.exit(1)
|
||||
})
|
260
services/project-history/scripts/clear_project_version_out_of_order.js
Executable file
260
services/project-history/scripts/clear_project_version_out_of_order.js
Executable file
@@ -0,0 +1,260 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// To run in dev:
|
||||
//
|
||||
// docker compose run --rm project-history scripts/clear_deleted.js
|
||||
//
|
||||
// In production:
|
||||
//
|
||||
// docker run --rm $(docker ps -lq) scripts/clear_deleted.js
|
||||
|
||||
import async from 'async'
|
||||
import logger from '@overleaf/logger'
|
||||
import request from 'request'
|
||||
import Settings from '@overleaf/settings'
|
||||
import redis from '@overleaf/redis-wrapper'
|
||||
import { db, ObjectId } from '../app/js/mongodb.js'
|
||||
|
||||
logger.logger.level('fatal')
|
||||
|
||||
const rclient = redis.createClient(Settings.redis.project_history)
|
||||
const Keys = Settings.redis.project_history.key_schema
|
||||
|
||||
const argv = process.argv.slice(2)
|
||||
const limit = parseInt(argv[0], 10) || null
|
||||
const force = argv[1] === 'force' || false
|
||||
let projectNotFoundErrors = 0
|
||||
let projectImportedFromV1Errors = 0
|
||||
const projectsNotFound = []
|
||||
const projectsImportedFromV1 = []
|
||||
let projectHasV2HistoryErrors = 0
|
||||
const projectsV2HistoryInUse = []
|
||||
|
||||
function checkAndClear(project, callback) {
|
||||
const projectId = project.project_id
|
||||
console.log('checking project', projectId)
|
||||
|
||||
// These can probably also be reset and their overleaf.history.id unset
|
||||
// (unless they are v1 projects).
|
||||
|
||||
function checkNotV1Project(cb) {
|
||||
db.projects.findOne(
|
||||
{ _id: new ObjectId(projectId) },
|
||||
{ projection: { overleaf: true } },
|
||||
(err, result) => {
|
||||
console.log(
|
||||
'1. looking in mongo projects collection: err',
|
||||
err,
|
||||
'result',
|
||||
JSON.stringify(result)
|
||||
)
|
||||
if (err) {
|
||||
return cb(err)
|
||||
}
|
||||
if (!result) {
|
||||
return cb(new Error('project not found in mongo'))
|
||||
}
|
||||
|
||||
const isV1Project = result && result.overleaf && result.overleaf.id
|
||||
const hasHistoryId =
|
||||
result &&
|
||||
result.overleaf &&
|
||||
result.overleaf.history &&
|
||||
result.overleaf.history.id
|
||||
const hasV2HistoryInUse =
|
||||
result &&
|
||||
result.overleaf &&
|
||||
result.overleaf.history &&
|
||||
result.overleaf.history.display
|
||||
const hasExistingDeletedHistory =
|
||||
result &&
|
||||
result.overleaf.history &&
|
||||
result.overleaf.history.deleted_id
|
||||
if (
|
||||
hasHistoryId &&
|
||||
!(isV1Project || hasV2HistoryInUse || hasExistingDeletedHistory)
|
||||
) {
|
||||
console.log(
|
||||
' - project is not imported from v1 and v2 history is not in use - ok to clear'
|
||||
)
|
||||
return cb()
|
||||
} else if (hasHistoryId && hasExistingDeletedHistory) {
|
||||
console.log(' - project already has deleted_id')
|
||||
return cb(
|
||||
new Error('project already has deleted_id - will not clear it')
|
||||
)
|
||||
} else if (hasHistoryId && isV1Project) {
|
||||
console.log(' - project is imported from v1')
|
||||
return cb(
|
||||
new Error('project is imported from v1 - will not clear it')
|
||||
)
|
||||
} else if (hasHistoryId && hasV2HistoryInUse) {
|
||||
console.log(' - project is displaying v2 history')
|
||||
return cb(
|
||||
new Error('project is displaying v2 history - will not clear it')
|
||||
)
|
||||
} else {
|
||||
console.log(' - project state not recognised')
|
||||
return cb(new Error('project state not recognised'))
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
function clearProjectHistoryInMongo(cb) {
|
||||
if (force) {
|
||||
console.log('2. deleting overleaf.history.id in mongo project', projectId)
|
||||
// Accessing mongo projects collection directly - BE CAREFUL!
|
||||
db.projects.updateOne(
|
||||
{ _id: new ObjectId(projectId) },
|
||||
{ $rename: { 'overleaf.history.id': 'overleaf.history.deleted_id' } },
|
||||
(err, result) => {
|
||||
console.log(' - got result from remove', err, result)
|
||||
if (err) {
|
||||
return err
|
||||
}
|
||||
if (
|
||||
result &&
|
||||
(result.modifiedCount === 1 || result.modifiedCount === 0)
|
||||
) {
|
||||
return cb()
|
||||
} else {
|
||||
return cb(
|
||||
new Error('error: problem trying to unset overleaf.history.id')
|
||||
)
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
console.log(
|
||||
'2. would delete overleaf.history.id for',
|
||||
projectId,
|
||||
'from mongo'
|
||||
)
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
function clearDocUpdaterCache(cb) {
|
||||
const url = Settings.apis.documentupdater.url + '/project/' + projectId
|
||||
if (force) {
|
||||
console.log('3. making request to clear docupdater', url)
|
||||
request.delete(url, (err, response, body) => {
|
||||
console.log(
|
||||
' - result of request',
|
||||
err,
|
||||
response && response.statusCode,
|
||||
body
|
||||
)
|
||||
cb(err)
|
||||
})
|
||||
} else {
|
||||
console.log('3. dry run, would request DELETE on url', url)
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
function clearRedisQueue(cb) {
|
||||
const key = Keys.projectHistoryOps({ project_id: projectId })
|
||||
if (force) {
|
||||
console.log('4. deleting redis queue key', key)
|
||||
rclient.del(key, err => {
|
||||
cb(err)
|
||||
})
|
||||
} else {
|
||||
console.log('4. dry run, would delete redis key', key)
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
function clearMongoEntry(cb) {
|
||||
if (force) {
|
||||
console.log('5. deleting key in mongo projectHistoryFailures', projectId)
|
||||
db.projectHistoryFailures.deleteOne(
|
||||
{ project_id: projectId },
|
||||
(err, result) => {
|
||||
console.log(' - got result from remove', err, result)
|
||||
cb(err)
|
||||
}
|
||||
)
|
||||
} else {
|
||||
console.log('5. would delete failure record for', projectId, 'from mongo')
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
// do the checks and deletions
|
||||
async.waterfall(
|
||||
[
|
||||
checkNotV1Project,
|
||||
clearProjectHistoryInMongo,
|
||||
clearDocUpdaterCache,
|
||||
clearRedisQueue,
|
||||
clearMongoEntry,
|
||||
],
|
||||
err => {
|
||||
if (!err) {
|
||||
return setTimeout(callback, 100) // include a delay
|
||||
} else if (err.message === 'project not found in mongo') {
|
||||
projectNotFoundErrors++
|
||||
projectsNotFound.push(projectId)
|
||||
return callback()
|
||||
} else if (
|
||||
err.message === 'project is imported from v1 - will not clear it'
|
||||
) {
|
||||
projectImportedFromV1Errors++
|
||||
projectsImportedFromV1.push(projectId)
|
||||
return callback()
|
||||
} else if (
|
||||
err.message === 'project is displaying v2 history - will not clear it'
|
||||
) {
|
||||
projectHasV2HistoryErrors++
|
||||
projectsV2HistoryInUse.push(projectId)
|
||||
return callback()
|
||||
} else {
|
||||
console.log('error:', err)
|
||||
return callback(err)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
// find all the broken projects from the failure records
|
||||
async function main() {
|
||||
const results = await db.projectHistoryFailures
|
||||
.find({
|
||||
error:
|
||||
'OpsOutOfOrderError: project structure version out of order on incoming updates',
|
||||
})
|
||||
.toArray()
|
||||
|
||||
console.log(
|
||||
'number of queues with project structure version out of order on incoming updates=',
|
||||
results.length
|
||||
)
|
||||
// now clear the projects
|
||||
async.eachSeries(results.slice(0, limit), checkAndClear, err => {
|
||||
console.log('Final error status', err)
|
||||
console.log(
|
||||
'Project not found errors',
|
||||
projectNotFoundErrors,
|
||||
projectsNotFound
|
||||
)
|
||||
console.log(
|
||||
'Project imported from V1 errors',
|
||||
projectImportedFromV1Errors,
|
||||
projectsImportedFromV1
|
||||
)
|
||||
console.log(
|
||||
'Project has V2 history in use',
|
||||
projectHasV2HistoryErrors,
|
||||
projectsV2HistoryInUse
|
||||
)
|
||||
process.exit()
|
||||
})
|
||||
}
|
||||
|
||||
main().catch(error => {
|
||||
console.error(error)
|
||||
process.exit(1)
|
||||
})
|
74
services/project-history/scripts/debug_translate_updates.js
Executable file
74
services/project-history/scripts/debug_translate_updates.js
Executable file
@@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
/**
|
||||
* This script takes a dump file, obtained via the /project/:project_id/dump
|
||||
* endpoint and feeds it to the update translator to how updates are transfomed
|
||||
* into changes sent to v1 history.
|
||||
*/
|
||||
import fs from 'node:fs'
|
||||
import * as UpdateTranslator from '../app/js/UpdateTranslator.js'
|
||||
import * as SyncManager from '../app/js/SyncManager.js'
|
||||
import * as HistoryStoreManager from '../app/js/HistoryStoreManager.js'
|
||||
|
||||
const { filename } = parseArgs()
|
||||
const { projectId, updates, chunk } = parseDumpFile(filename)
|
||||
|
||||
function expandResyncProjectStructure(chunk, update) {
|
||||
HistoryStoreManager._mocks.getMostRecentChunk = function (
|
||||
projectId,
|
||||
projectHistoryId,
|
||||
callback
|
||||
) {
|
||||
callback(null, chunk)
|
||||
}
|
||||
|
||||
SyncManager.expandSyncUpdates(
|
||||
projectId,
|
||||
99999, // dummy history id
|
||||
chunk,
|
||||
[update],
|
||||
cb => cb(), // extend lock
|
||||
(err, result) => {
|
||||
console.log('err', err, 'result', JSON.stringify(result, null, 2))
|
||||
process.exit()
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
function expandUpdates(updates) {
|
||||
const wrappedUpdates = updates.map(update => ({ update }))
|
||||
let changes
|
||||
try {
|
||||
changes = UpdateTranslator.convertToChanges(projectId, wrappedUpdates)
|
||||
} catch (err) {
|
||||
error(err)
|
||||
}
|
||||
console.log(JSON.stringify(changes, null, 2))
|
||||
}
|
||||
|
||||
if (updates[0].resyncProjectStructure) {
|
||||
expandResyncProjectStructure(chunk, updates[0])
|
||||
} else {
|
||||
expandUpdates(updates)
|
||||
}
|
||||
|
||||
function parseArgs() {
|
||||
const args = process.argv.slice(2)
|
||||
if (args.length !== 1) {
|
||||
console.log('Usage: debug_translate_updates.js DUMP_FILE')
|
||||
process.exit(1)
|
||||
}
|
||||
const filename = args[0]
|
||||
return { filename }
|
||||
}
|
||||
|
||||
function parseDumpFile(filename) {
|
||||
const json = fs.readFileSync(filename)
|
||||
const { project_id: projectId, updates, chunk } = JSON.parse(json)
|
||||
return { projectId, updates, chunk }
|
||||
}
|
||||
|
||||
function error(err) {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
}
|
93
services/project-history/scripts/flush_all.js
Executable file
93
services/project-history/scripts/flush_all.js
Executable file
@@ -0,0 +1,93 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// To run in dev:
|
||||
//
|
||||
// docker compose run --rm project-history scripts/flush_all.js <limit>
|
||||
//
|
||||
// In production:
|
||||
//
|
||||
// docker run --rm $(docker ps -lq) scripts/flush_all.js <limit>
|
||||
|
||||
import _ from 'lodash'
|
||||
import async from 'async'
|
||||
import logger from '@overleaf/logger'
|
||||
import * as RedisManager from '../app/js/RedisManager.js'
|
||||
import * as UpdatesProcessor from '../app/js/UpdatesProcessor.js'
|
||||
|
||||
logger.logger.level('fatal')
|
||||
|
||||
const argv = process.argv.slice(2)
|
||||
const limit = parseInt(argv[0], 10) || null
|
||||
const parallelism = Math.min(parseInt(argv[1], 10) || 1, 10)
|
||||
|
||||
// flush all outstanding changes
|
||||
RedisManager.getProjectIdsWithHistoryOps(limit, flushProjects)
|
||||
|
||||
function flushProjects(error, projectIds) {
|
||||
if (error) {
|
||||
throw error
|
||||
}
|
||||
let ts = new Date()
|
||||
console.log(
|
||||
'found projects',
|
||||
JSON.stringify({ project_ids: projectIds.length, limit, ts })
|
||||
)
|
||||
projectIds = _.shuffle(projectIds) // randomise to avoid hitting same projects each time
|
||||
if (limit > 0) {
|
||||
projectIds = projectIds.slice(0, limit)
|
||||
}
|
||||
|
||||
let succeededProjects = 0
|
||||
let failedProjects = 0
|
||||
let attempts = 0
|
||||
|
||||
async.eachLimit(
|
||||
projectIds,
|
||||
parallelism,
|
||||
function (projectId, cb) {
|
||||
attempts++
|
||||
UpdatesProcessor.processUpdatesForProject(
|
||||
projectId,
|
||||
function (err, queueSize) {
|
||||
const progress = attempts + '/' + projectIds.length
|
||||
ts = new Date()
|
||||
if (err) {
|
||||
failedProjects++
|
||||
console.log(
|
||||
'failed',
|
||||
progress,
|
||||
JSON.stringify({
|
||||
projectId,
|
||||
queueSize,
|
||||
ts,
|
||||
err: err.toString(),
|
||||
})
|
||||
)
|
||||
} else {
|
||||
succeededProjects++
|
||||
console.log(
|
||||
'succeeded',
|
||||
progress,
|
||||
JSON.stringify({
|
||||
projectId,
|
||||
queueSize,
|
||||
ts,
|
||||
})
|
||||
)
|
||||
}
|
||||
return cb()
|
||||
}
|
||||
)
|
||||
},
|
||||
function () {
|
||||
console.log(
|
||||
'total',
|
||||
JSON.stringify({
|
||||
succeededProjects,
|
||||
failedProjects,
|
||||
})
|
||||
)
|
||||
process.exit(0)
|
||||
}
|
||||
)
|
||||
}
|
191
services/project-history/scripts/flush_old.js
Normal file
191
services/project-history/scripts/flush_old.js
Normal file
@@ -0,0 +1,191 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
import Settings from '@overleaf/settings'
|
||||
import minimist from 'minimist'
|
||||
import logger from '@overleaf/logger'
|
||||
import PQueue from 'p-queue'
|
||||
import * as RedisManager from '../app/js/RedisManager.js'
|
||||
import * as ErrorRecorder from '../app/js/ErrorRecorder.js'
|
||||
|
||||
logger.logger.level('fatal')
|
||||
|
||||
function usage() {
|
||||
console.log(`
|
||||
Usage: flush_old.js [options]
|
||||
|
||||
Options:
|
||||
-b, --batch-size <size> Number of projects to process in each batch (default: 100)
|
||||
-a, --max-age <seconds> Maximum age of projects to keep (default: 3600)
|
||||
-i, --interval <seconds> Interval to spread the processing over (default: 300)
|
||||
-c, --concurrency <number> Number of concurrent jobs (default: 10)
|
||||
-u, --buffer <seconds> Buffer time in seconds to reserve at end (default: 15)
|
||||
-n, --dry-run Show what would be done without making changes
|
||||
-h, --help Show this help message
|
||||
|
||||
Examples:
|
||||
# Flush projects older than 24 hours with 5 concurrent jobs
|
||||
flush_old.js --batch-size 100 --max-age 86400 -c 5
|
||||
|
||||
# Dry run to see what would be flushed
|
||||
flush_old.js --max-age 3600 --dry-run
|
||||
`)
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
const argv = minimist(process.argv.slice(2), {
|
||||
boolean: ['dry-run', 'help'],
|
||||
alias: {
|
||||
b: 'batch-size',
|
||||
a: 'max-age',
|
||||
i: 'interval',
|
||||
c: 'concurrency',
|
||||
n: 'dry-run',
|
||||
u: 'buffer',
|
||||
h: 'help',
|
||||
},
|
||||
default: {
|
||||
'batch-size': 100,
|
||||
'max-age': 3600,
|
||||
interval: 300,
|
||||
concurrency: 10,
|
||||
'dry-run': false,
|
||||
buffer: 15,
|
||||
help: false,
|
||||
},
|
||||
})
|
||||
|
||||
if (argv.help || process.argv.length === 2) {
|
||||
usage()
|
||||
}
|
||||
|
||||
const batchSize = parseInt(argv['batch-size'], 10)
|
||||
const maxAge = argv['max-age'] ? parseInt(argv['max-age'], 10) : null
|
||||
const interval = parseInt(argv.interval, 10) || 300
|
||||
const concurrency = parseInt(argv.concurrency, 10) || 10
|
||||
const bufferTime = parseInt(argv.buffer, 10) || 15
|
||||
const dryRun = argv['dry-run']
|
||||
|
||||
/**
|
||||
* Generator function that yields batches of items from an array
|
||||
* @param {Array} array - The array to batch
|
||||
* @param {number} size - The size of each batch
|
||||
* @yields {Array} A batch of items
|
||||
*/
|
||||
function* getBatches(array, size) {
|
||||
for (let i = 0; i < array.length; i += size) {
|
||||
yield array.slice(i, i + size)
|
||||
}
|
||||
}
|
||||
|
||||
let flushCount = 0
|
||||
|
||||
async function flushProject({ projectId, timestamp }) {
|
||||
const url = `${Settings.apis.project_history.url}/project/${projectId}/flush`
|
||||
if (dryRun) {
|
||||
console.log(`[DRY RUN] would flush project ${projectId}`)
|
||||
return
|
||||
}
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
})
|
||||
flushCount++
|
||||
if (flushCount % 100 === 0) {
|
||||
console.log('flushed', flushCount, 'projects, up to', timestamp)
|
||||
}
|
||||
if (!response.ok) {
|
||||
throw new Error(`failed to flush project ${projectId}`)
|
||||
}
|
||||
}
|
||||
|
||||
const SCRIPT_START_TIME = Date.now() // current time in milliseconds from start of script
|
||||
|
||||
function olderThan(maxAge, timestamp) {
|
||||
const age = (SCRIPT_START_TIME - timestamp) / 1000
|
||||
return age > maxAge
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const projectIds = await RedisManager.promises.getProjectIdsWithHistoryOps()
|
||||
const failedProjects = await ErrorRecorder.promises.getFailedProjects()
|
||||
const failedProjectIds = new Set(failedProjects.map(p => p.project_id))
|
||||
|
||||
const projectIdsToProcess = projectIds.filter(p => !failedProjectIds.has(p))
|
||||
console.log('number of projects with history ops', projectIds.length)
|
||||
console.log(
|
||||
'number of failed projects to exclude',
|
||||
projectIds.length - projectIdsToProcess.length
|
||||
)
|
||||
const collectedProjects = []
|
||||
let nullCount = 0
|
||||
// iterate over the project ids in batches of doing a redis MGET to retrieve the first op timestamps
|
||||
for (const batch of getBatches(projectIdsToProcess, batchSize)) {
|
||||
const timestamps = await RedisManager.promises.getFirstOpTimestamps(batch)
|
||||
const newProjects = batch
|
||||
.map((projectId, idx) => {
|
||||
return { projectId, timestamp: timestamps[idx] }
|
||||
})
|
||||
.filter(({ timestamp }) => {
|
||||
if (!timestamp) {
|
||||
nullCount++
|
||||
}
|
||||
return timestamp ? olderThan(maxAge, timestamp) : true
|
||||
})
|
||||
collectedProjects.push(...newProjects)
|
||||
}
|
||||
// sort the collected projects by ascending timestamp
|
||||
collectedProjects.sort((a, b) => a.timestamp - b.timestamp)
|
||||
|
||||
console.log('number of projects to flush', collectedProjects.length)
|
||||
console.log('number with null timestamps', nullCount)
|
||||
|
||||
const elapsedTime = Math.floor((Date.now() - SCRIPT_START_TIME) / 1000)
|
||||
console.log('elapsed time', elapsedTime, 'seconds, buffer time', bufferTime)
|
||||
const remainingTime = Math.max(interval - elapsedTime - bufferTime, 0)
|
||||
console.log('remaining time', remainingTime, 'seconds')
|
||||
|
||||
const jobsPerSecond = Math.max(
|
||||
Math.ceil(collectedProjects.length / Math.max(remainingTime, 60)),
|
||||
1
|
||||
)
|
||||
console.log('interval', interval, 'seconds')
|
||||
console.log('jobs per second', jobsPerSecond)
|
||||
console.log('concurrency', concurrency)
|
||||
|
||||
const queue = new PQueue({
|
||||
concurrency,
|
||||
interval: 1000,
|
||||
intervalCap: jobsPerSecond,
|
||||
})
|
||||
|
||||
const taskFns = collectedProjects.map(project => {
|
||||
return async () => {
|
||||
try {
|
||||
await flushProject(project)
|
||||
return { status: 'fulfilled', value: project }
|
||||
} catch (error) {
|
||||
return { status: 'rejected', reason: error, project }
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
const results = await queue.addAll(taskFns)
|
||||
|
||||
console.log(
|
||||
'finished after',
|
||||
Math.floor((Date.now() - SCRIPT_START_TIME) / 1000),
|
||||
'seconds'
|
||||
)
|
||||
// count the number of successful and failed flushes
|
||||
const success = results.filter(r => r.status === 'fulfilled').length
|
||||
const failed = results.filter(r => r.status === 'rejected').length
|
||||
console.log('completed', { success, failed })
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => {
|
||||
process.exit(0)
|
||||
})
|
||||
.catch(err => {
|
||||
console.error(err)
|
||||
process.exit(1)
|
||||
})
|
233
services/project-history/scripts/force_resync.js
Executable file
233
services/project-history/scripts/force_resync.js
Executable file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
// To run in dev:
|
||||
//
|
||||
// docker compose run --rm project-history scripts/clear_deleted.js
|
||||
//
|
||||
// In production:
|
||||
//
|
||||
// docker run --rm $(docker ps -lq) scripts/clear_deleted.js
|
||||
|
||||
import async from 'async'
|
||||
import Settings from '@overleaf/settings'
|
||||
import redis from '@overleaf/redis-wrapper'
|
||||
import { db, ObjectId } from '../app/js/mongodb.js'
|
||||
import * as SyncManager from '../app/js/SyncManager.js'
|
||||
import * as UpdatesProcessor from '../app/js/UpdatesProcessor.js'
|
||||
|
||||
const rclient = redis.createClient(Settings.redis.project_history)
|
||||
const Keys = Settings.redis.project_history.key_schema
|
||||
|
||||
const argv = process.argv.slice(2)
|
||||
const limit = parseInt(argv[0], 10) || null
|
||||
const force = argv[1] === 'force' || false
|
||||
let projectNotFoundErrors = 0
|
||||
let projectImportedFromV1Errors = 0
|
||||
const projectsNotFound = []
|
||||
const projectsImportedFromV1 = []
|
||||
let projectNoHistoryIdErrors = 0
|
||||
let projectsFailedErrors = 0
|
||||
const projectsFailed = []
|
||||
let projectsBrokenSyncErrors = 0
|
||||
const projectsBrokenSync = []
|
||||
|
||||
function checkAndClear(project, callback) {
|
||||
const projectId = project.project_id
|
||||
console.log('checking project', projectId)
|
||||
|
||||
// These can probably also be reset and their overleaf.history.id unset
|
||||
// (unless they are v1 projects).
|
||||
|
||||
function checkNotV1Project(cb) {
|
||||
db.projects.findOne(
|
||||
{ _id: new ObjectId(projectId) },
|
||||
{ projection: { overleaf: true } },
|
||||
(err, result) => {
|
||||
console.log(
|
||||
'1. looking in mongo projects collection: err',
|
||||
err,
|
||||
'result',
|
||||
JSON.stringify(result)
|
||||
)
|
||||
if (err) {
|
||||
return cb(err)
|
||||
}
|
||||
if (!result) {
|
||||
return cb(new Error('project not found in mongo'))
|
||||
}
|
||||
if (result && result.overleaf && !result.overleaf.id) {
|
||||
if (result.overleaf.history.id) {
|
||||
console.log(
|
||||
' - project is not imported from v1 and has a history id - ok to resync'
|
||||
)
|
||||
return cb()
|
||||
} else {
|
||||
console.log(
|
||||
' - project is not imported from v1 but does not have a history id'
|
||||
)
|
||||
return cb(new Error('no history id'))
|
||||
}
|
||||
} else {
|
||||
cb(new Error('project is imported from v1 - will not resync it'))
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
function startResync(cb) {
|
||||
if (force) {
|
||||
console.log('2. starting resync for', projectId)
|
||||
SyncManager.startHardResync(projectId, err => {
|
||||
if (err) {
|
||||
console.log('ERR', JSON.stringify(err.message))
|
||||
return cb(err)
|
||||
}
|
||||
setTimeout(cb, 3000) // include a delay to allow the request to be processed
|
||||
})
|
||||
} else {
|
||||
console.log('2. dry run, would start resync for', projectId)
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
function forceFlush(cb) {
|
||||
if (force) {
|
||||
console.log('3. forcing a flush for', projectId)
|
||||
UpdatesProcessor.processUpdatesForProject(projectId, err => {
|
||||
console.log('err', err)
|
||||
return cb(err)
|
||||
})
|
||||
} else {
|
||||
console.log('3. dry run, would force a flush for', projectId)
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
function watchRedisQueue(cb) {
|
||||
const key = Keys.projectHistoryOps({ project_id: projectId })
|
||||
function checkQueueEmpty(_callback) {
|
||||
rclient.llen(key, (err, result) => {
|
||||
console.log('LLEN', projectId, err, result)
|
||||
if (err) {
|
||||
_callback(err)
|
||||
}
|
||||
if (result === 0) {
|
||||
_callback()
|
||||
} else {
|
||||
_callback(new Error('queue not empty'))
|
||||
}
|
||||
})
|
||||
}
|
||||
if (force) {
|
||||
console.log('4. checking redis queue key', key)
|
||||
async.retry({ times: 30, interval: 1000 }, checkQueueEmpty, err => {
|
||||
cb(err)
|
||||
})
|
||||
} else {
|
||||
console.log('4. dry run, would check redis key', key)
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
function checkMongoFailureEntry(cb) {
|
||||
if (force) {
|
||||
console.log('5. checking key in mongo projectHistoryFailures', projectId)
|
||||
db.projectHistoryFailures.findOne(
|
||||
{ project_id: projectId },
|
||||
{ projection: { _id: 1 } },
|
||||
(err, result) => {
|
||||
console.log('got result', err, result)
|
||||
if (err) {
|
||||
return cb(err)
|
||||
}
|
||||
if (result) {
|
||||
return cb(new Error('failure record still exists'))
|
||||
}
|
||||
return cb()
|
||||
}
|
||||
)
|
||||
} else {
|
||||
console.log('5. would check failure record for', projectId, 'in mongo')
|
||||
cb()
|
||||
}
|
||||
}
|
||||
|
||||
// do the checks and deletions
|
||||
async.waterfall(
|
||||
[
|
||||
checkNotV1Project,
|
||||
startResync,
|
||||
forceFlush,
|
||||
watchRedisQueue,
|
||||
checkMongoFailureEntry,
|
||||
],
|
||||
err => {
|
||||
if (!err) {
|
||||
return setTimeout(callback, 1000) // include a 1 second delay
|
||||
} else if (err.message === 'project not found in mongo') {
|
||||
projectNotFoundErrors++
|
||||
projectsNotFound.push(projectId)
|
||||
return callback()
|
||||
} else if (err.message === 'no history id') {
|
||||
projectNoHistoryIdErrors++
|
||||
return callback()
|
||||
} else if (
|
||||
err.message === 'project is imported from v1 - will not resync it'
|
||||
) {
|
||||
projectImportedFromV1Errors++
|
||||
projectsImportedFromV1.push(projectId)
|
||||
return callback()
|
||||
} else if (
|
||||
err.message === 'history store a non-success status code: 422'
|
||||
) {
|
||||
projectsFailedErrors++
|
||||
projectsFailed.push(projectId)
|
||||
return callback()
|
||||
} else if (err.message === 'sync ongoing') {
|
||||
projectsBrokenSyncErrors++
|
||||
projectsBrokenSync.push(projectId)
|
||||
return callback()
|
||||
} else {
|
||||
console.log('error:', err)
|
||||
return callback()
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const results = await db.projectHistoryFailures.find().toArray()
|
||||
|
||||
console.log('number of queues without history store 442 =', results.length)
|
||||
// now check if the project is truly deleted in mongo
|
||||
async.eachSeries(results.slice(0, limit), checkAndClear, err => {
|
||||
console.log('Final error status', err)
|
||||
console.log(
|
||||
'Project flush failed again errors',
|
||||
projectsFailedErrors,
|
||||
projectsFailed
|
||||
)
|
||||
console.log(
|
||||
'Project flush ongoing errors',
|
||||
projectsBrokenSyncErrors,
|
||||
projectsBrokenSync
|
||||
)
|
||||
console.log(
|
||||
'Project not found errors',
|
||||
projectNotFoundErrors,
|
||||
projectsNotFound
|
||||
)
|
||||
console.log('Project without history_id errors', projectNoHistoryIdErrors)
|
||||
console.log(
|
||||
'Project imported from V1 errors',
|
||||
projectImportedFromV1Errors,
|
||||
projectsImportedFromV1
|
||||
)
|
||||
process.exit()
|
||||
})
|
||||
}
|
||||
|
||||
main().catch(error => {
|
||||
console.error(error)
|
||||
process.exit(1)
|
||||
})
|
Reference in New Issue
Block a user