first commit
This commit is contained in:
@@ -0,0 +1,425 @@
|
||||
const fs = require('node:fs')
|
||||
const Path = require('node:path')
|
||||
const _ = require('lodash')
|
||||
const logger = require('@overleaf/logger')
|
||||
const OError = require('@overleaf/o-error')
|
||||
const Errors = require('../app/js/Errors')
|
||||
const LockManager = require('../app/js/LockManager')
|
||||
const PersistenceManager = require('../app/js/PersistenceManager')
|
||||
const ProjectFlusher = require('../app/js/ProjectFlusher')
|
||||
const ProjectManager = require('../app/js/ProjectManager')
|
||||
const RedisManager = require('../app/js/RedisManager')
|
||||
const Settings = require('@overleaf/settings')
|
||||
const request = require('requestretry').defaults({
|
||||
maxAttempts: 2,
|
||||
retryDelay: 10,
|
||||
})
|
||||
|
||||
const AUTO_FIX_VERSION_MISMATCH =
|
||||
process.env.AUTO_FIX_VERSION_MISMATCH === 'true'
|
||||
const AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA =
|
||||
process.env.AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA === 'true'
|
||||
const SCRIPT_LOG_LEVEL = process.env.SCRIPT_LOG_LEVEL || 'warn'
|
||||
const FLUSH_IN_SYNC_PROJECTS = process.env.FLUSH_IN_SYNC_PROJECTS === 'true'
|
||||
const FOLDER =
|
||||
process.env.FOLDER || '/tmp/overleaf-check-redis-mongo-sync-state'
|
||||
const LIMIT = parseInt(process.env.LIMIT || '1000', 10)
|
||||
const RETRIES = parseInt(process.env.RETRIES || '5', 10)
|
||||
const WRITE_CONTENT = process.env.WRITE_CONTENT === 'true'
|
||||
|
||||
process.env.LOG_LEVEL = SCRIPT_LOG_LEVEL
|
||||
logger.initialize('check-redis-mongo-sync-state')
|
||||
|
||||
const COMPARE_AND_SET =
|
||||
'if redis.call("get", KEYS[1]) == ARGV[1] then return redis.call("set", KEYS[1], ARGV[2]) else return 0 end'
|
||||
|
||||
/**
|
||||
* @typedef {Object} Doc
|
||||
* @property {number} version
|
||||
* @property {Array<string>} lines
|
||||
* @property {string} pathname
|
||||
* @property {Object} ranges
|
||||
* @property {boolean} [partiallyDeleted]
|
||||
*/
|
||||
|
||||
class TryAgainError extends Error {}
|
||||
|
||||
/**
|
||||
* @param {string} docId
|
||||
* @param {Doc} redisDoc
|
||||
* @param {Doc} mongoDoc
|
||||
* @return {Promise<void>}
|
||||
*/
|
||||
async function updateDocVersionInRedis(docId, redisDoc, mongoDoc) {
|
||||
const lockValue = await LockManager.promises.getLock(docId)
|
||||
try {
|
||||
const key = Settings.redis.documentupdater.key_schema.docVersion({
|
||||
doc_id: docId,
|
||||
})
|
||||
const numberOfKeys = 1
|
||||
const ok = await RedisManager.rclient.eval(
|
||||
COMPARE_AND_SET,
|
||||
numberOfKeys,
|
||||
key,
|
||||
redisDoc.version,
|
||||
mongoDoc.version
|
||||
)
|
||||
if (!ok) {
|
||||
throw new TryAgainError(
|
||||
'document has been updated, aborting overwrite. Try again.'
|
||||
)
|
||||
}
|
||||
} finally {
|
||||
await LockManager.promises.releaseLock(docId, lockValue)
|
||||
}
|
||||
}
|
||||
|
||||
async function fixPartiallyDeletedDocMetadata(projectId, docId, pathname) {
|
||||
await new Promise((resolve, reject) => {
|
||||
request(
|
||||
{
|
||||
method: 'PATCH',
|
||||
url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc/${docId}`,
|
||||
timeout: 60 * 1000,
|
||||
json: {
|
||||
name: Path.basename(pathname),
|
||||
deleted: true,
|
||||
deletedAt: new Date(),
|
||||
},
|
||||
},
|
||||
(err, res, body) => {
|
||||
if (err) return reject(err)
|
||||
const { statusCode } = res
|
||||
if (statusCode !== 204) {
|
||||
return reject(
|
||||
new OError('patch request to docstore failed', {
|
||||
statusCode,
|
||||
body,
|
||||
})
|
||||
)
|
||||
}
|
||||
resolve()
|
||||
}
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
async function getDocFromMongo(projectId, docId) {
|
||||
try {
|
||||
return await PersistenceManager.promises.getDoc(projectId, docId)
|
||||
} catch (err) {
|
||||
if (!(err instanceof Errors.NotFoundError)) {
|
||||
throw err
|
||||
}
|
||||
}
|
||||
const docstoreDoc = await new Promise((resolve, reject) => {
|
||||
request(
|
||||
{
|
||||
url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc/${docId}/peek`,
|
||||
timeout: 60 * 1000,
|
||||
json: true,
|
||||
},
|
||||
(err, res, body) => {
|
||||
if (err) return reject(err)
|
||||
const { statusCode } = res
|
||||
if (statusCode !== 200) {
|
||||
return reject(
|
||||
new OError('fallback request to docstore failed', {
|
||||
statusCode,
|
||||
body,
|
||||
})
|
||||
)
|
||||
}
|
||||
resolve(body)
|
||||
}
|
||||
)
|
||||
})
|
||||
const deletedDocName = await new Promise((resolve, reject) => {
|
||||
request(
|
||||
{
|
||||
url: `http://${process.env.DOCSTORE_HOST || '127.0.0.1'}:3016/project/${projectId}/doc-deleted`,
|
||||
timeout: 60 * 1000,
|
||||
json: true,
|
||||
},
|
||||
(err, res, body) => {
|
||||
if (err) return reject(err)
|
||||
const { statusCode } = res
|
||||
if (statusCode !== 200) {
|
||||
return reject(
|
||||
new OError('list deleted docs request to docstore failed', {
|
||||
statusCode,
|
||||
body,
|
||||
})
|
||||
)
|
||||
}
|
||||
resolve(body.find(doc => doc._id === docId)?.name)
|
||||
}
|
||||
)
|
||||
})
|
||||
if (docstoreDoc.deleted && deletedDocName) {
|
||||
return {
|
||||
...docstoreDoc,
|
||||
pathname: deletedDocName,
|
||||
}
|
||||
}
|
||||
return {
|
||||
...docstoreDoc,
|
||||
pathname: `/partially-deleted-doc-with-unknown-name-and-id-${docId}.txt`,
|
||||
partiallyDeleted: true,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @param {string} docId
|
||||
* @return {Promise<boolean>}
|
||||
*/
|
||||
async function processDoc(projectId, docId) {
|
||||
const redisDoc = /** @type Doc */ await RedisManager.promises.getDoc(
|
||||
projectId,
|
||||
docId
|
||||
)
|
||||
const mongoDoc = /** @type Doc */ await getDocFromMongo(projectId, docId)
|
||||
|
||||
if (mongoDoc.partiallyDeleted) {
|
||||
if (AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA) {
|
||||
console.log(
|
||||
`Found partially deleted doc ${docId} in project ${projectId}: fixing metadata`
|
||||
)
|
||||
await fixPartiallyDeletedDocMetadata(projectId, docId, redisDoc.pathname)
|
||||
} else {
|
||||
console.log(
|
||||
`Found partially deleted doc ${docId} in project ${projectId}: use AUTO_FIX_PARTIALLY_DELETED_DOC_METADATA=true to fix metadata`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if (mongoDoc.version < redisDoc.version) {
|
||||
// mongo is behind, we can flush to mongo when all docs are processed.
|
||||
return false
|
||||
}
|
||||
|
||||
mongoDoc.snapshot = mongoDoc.lines.join('\n')
|
||||
redisDoc.snapshot = redisDoc.lines.join('\n')
|
||||
if (!mongoDoc.ranges) mongoDoc.ranges = {}
|
||||
if (!redisDoc.ranges) redisDoc.ranges = {}
|
||||
|
||||
const sameLines = mongoDoc.snapshot === redisDoc.snapshot
|
||||
const sameRanges = _.isEqual(mongoDoc.ranges, redisDoc.ranges)
|
||||
if (sameLines && sameRanges) {
|
||||
if (mongoDoc.version > redisDoc.version) {
|
||||
// mongo is ahead, technically out of sync, but practically the content is identical
|
||||
if (AUTO_FIX_VERSION_MISMATCH) {
|
||||
console.log(
|
||||
`Fixing out of sync doc version for doc ${docId} in project ${projectId}: mongo=${mongoDoc.version} > redis=${redisDoc.version}`
|
||||
)
|
||||
await updateDocVersionInRedis(docId, redisDoc, mongoDoc)
|
||||
return false
|
||||
} else {
|
||||
console.error(
|
||||
`Detected out of sync redis and mongo version for doc ${docId} in project ${projectId}, auto-fixable via AUTO_FIX_VERSION_MISMATCH=true`
|
||||
)
|
||||
return true
|
||||
}
|
||||
} else {
|
||||
// same lines, same ranges, same version
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
const dir = Path.join(FOLDER, projectId, docId)
|
||||
console.error(
|
||||
`Detected out of sync redis and mongo content for doc ${docId} in project ${projectId}`
|
||||
)
|
||||
if (!WRITE_CONTENT) return true
|
||||
|
||||
console.log(`pathname: ${mongoDoc.pathname}`)
|
||||
if (mongoDoc.pathname !== redisDoc.pathname) {
|
||||
console.log(`pathname redis: ${redisDoc.pathname}`)
|
||||
}
|
||||
console.log(`mongo version: ${mongoDoc.version}`)
|
||||
console.log(`redis version: ${redisDoc.version}`)
|
||||
|
||||
await fs.promises.mkdir(dir, { recursive: true })
|
||||
|
||||
if (sameLines) {
|
||||
console.log('mongo lines match redis lines')
|
||||
} else {
|
||||
console.log(
|
||||
`mongo lines and redis lines out of sync, writing content into ${dir}`
|
||||
)
|
||||
await fs.promises.writeFile(
|
||||
Path.join(dir, 'mongo-snapshot.txt'),
|
||||
mongoDoc.snapshot
|
||||
)
|
||||
await fs.promises.writeFile(
|
||||
Path.join(dir, 'redis-snapshot.txt'),
|
||||
redisDoc.snapshot
|
||||
)
|
||||
}
|
||||
if (sameRanges) {
|
||||
console.log('mongo ranges match redis ranges')
|
||||
} else {
|
||||
console.log(
|
||||
`mongo ranges and redis ranges out of sync, writing content into ${dir}`
|
||||
)
|
||||
await fs.promises.writeFile(
|
||||
Path.join(dir, 'mongo-ranges.json'),
|
||||
JSON.stringify(mongoDoc.ranges)
|
||||
)
|
||||
await fs.promises.writeFile(
|
||||
Path.join(dir, 'redis-ranges.json'),
|
||||
JSON.stringify(redisDoc.ranges)
|
||||
)
|
||||
}
|
||||
console.log('---')
|
||||
return true
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} projectId
|
||||
* @return {Promise<number>}
|
||||
*/
|
||||
async function processProject(projectId) {
|
||||
const docIds = await RedisManager.promises.getDocIdsInProject(projectId)
|
||||
|
||||
let outOfSync = 0
|
||||
for (const docId of docIds) {
|
||||
let lastErr
|
||||
for (let i = 0; i <= RETRIES; i++) {
|
||||
try {
|
||||
if (await processDoc(projectId, docId)) {
|
||||
outOfSync++
|
||||
}
|
||||
break
|
||||
} catch (err) {
|
||||
lastErr = err
|
||||
}
|
||||
}
|
||||
if (lastErr) {
|
||||
throw OError.tag(lastErr, 'process doc', { docId })
|
||||
}
|
||||
}
|
||||
if (outOfSync === 0 && FLUSH_IN_SYNC_PROJECTS) {
|
||||
try {
|
||||
await ProjectManager.promises.flushAndDeleteProjectWithLocks(
|
||||
projectId,
|
||||
{}
|
||||
)
|
||||
} catch (err) {
|
||||
throw OError.tag(err, 'flush project with only in-sync docs')
|
||||
}
|
||||
}
|
||||
return outOfSync
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Set<string>} processed
|
||||
* @param {Set<string>} outOfSync
|
||||
* @return {Promise<{perIterationOutOfSync: number, done: boolean}>}
|
||||
*/
|
||||
async function scanOnce(processed, outOfSync) {
|
||||
const projectIds = await ProjectFlusher.promises.flushAllProjects({
|
||||
limit: LIMIT,
|
||||
dryRun: true,
|
||||
})
|
||||
|
||||
let perIterationOutOfSync = 0
|
||||
for (const projectId of projectIds) {
|
||||
if (processed.has(projectId)) continue
|
||||
processed.add(projectId)
|
||||
|
||||
let perProjectOutOfSync = 0
|
||||
try {
|
||||
perProjectOutOfSync = await processProject(projectId)
|
||||
} catch (err) {
|
||||
throw OError.tag(err, 'process project', { projectId })
|
||||
}
|
||||
perIterationOutOfSync += perProjectOutOfSync
|
||||
if (perProjectOutOfSync > 0) {
|
||||
outOfSync.add(projectId)
|
||||
}
|
||||
}
|
||||
|
||||
return { perIterationOutOfSync, done: projectIds.length < LIMIT }
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {Promise<number>}
|
||||
*/
|
||||
async function main() {
|
||||
if (!WRITE_CONTENT) {
|
||||
console.warn()
|
||||
console.warn(
|
||||
` Use WRITE_CONTENT=true to write the content of out of sync docs to FOLDER=${FOLDER}`
|
||||
)
|
||||
console.warn()
|
||||
} else {
|
||||
console.log(
|
||||
`Writing content for projects with out of sync docs into FOLDER=${FOLDER}`
|
||||
)
|
||||
await fs.promises.mkdir(FOLDER, { recursive: true })
|
||||
const existing = await fs.promises.readdir(FOLDER)
|
||||
if (existing.length > 0) {
|
||||
console.warn()
|
||||
console.warn(
|
||||
` Found existing entries in FOLDER=${FOLDER}. Please delete or move these before running the script again.`
|
||||
)
|
||||
console.warn()
|
||||
return 101
|
||||
}
|
||||
}
|
||||
if (LIMIT < 100) {
|
||||
console.warn()
|
||||
console.warn(
|
||||
` Using small LIMIT=${LIMIT}, this can take a while to SCAN in a large redis database.`
|
||||
)
|
||||
console.warn()
|
||||
}
|
||||
|
||||
const processed = new Set()
|
||||
const outOfSyncProjects = new Set()
|
||||
let totalOutOfSyncDocs = 0
|
||||
while (true) {
|
||||
const before = processed.size
|
||||
const { perIterationOutOfSync, done } = await scanOnce(
|
||||
processed,
|
||||
outOfSyncProjects
|
||||
)
|
||||
totalOutOfSyncDocs += perIterationOutOfSync
|
||||
console.log(`Processed ${processed.size} projects`)
|
||||
console.log(
|
||||
`Found ${
|
||||
outOfSyncProjects.size
|
||||
} projects with ${totalOutOfSyncDocs} out of sync docs: ${JSON.stringify(
|
||||
Array.from(outOfSyncProjects)
|
||||
)}`
|
||||
)
|
||||
if (done) {
|
||||
console.log('Finished iterating all projects in redis')
|
||||
break
|
||||
}
|
||||
if (processed.size === before) {
|
||||
console.error(
|
||||
`Found too many un-flushed projects (LIMIT=${LIMIT}). Please fix the reported projects first, then try again.`
|
||||
)
|
||||
if (!FLUSH_IN_SYNC_PROJECTS) {
|
||||
console.error(
|
||||
'Use FLUSH_IN_SYNC_PROJECTS=true to flush projects that have been checked.'
|
||||
)
|
||||
}
|
||||
return 2
|
||||
}
|
||||
}
|
||||
return totalOutOfSyncDocs > 0 ? 1 : 0
|
||||
}
|
||||
|
||||
main()
|
||||
.then(code => {
|
||||
process.exit(code)
|
||||
})
|
||||
.catch(error => {
|
||||
console.error(OError.getFullStack(error))
|
||||
console.error(OError.getFullInfo(error))
|
||||
process.exit(1)
|
||||
})
|
65
services/document-updater/scripts/expire_docops.js
Normal file
65
services/document-updater/scripts/expire_docops.js
Normal file
@@ -0,0 +1,65 @@
|
||||
const Settings = require('@overleaf/settings')
|
||||
const rclient = require('@overleaf/redis-wrapper').createClient(
|
||||
Settings.redis.documentupdater
|
||||
)
|
||||
let keys = Settings.redis.documentupdater.key_schema
|
||||
const async = require('async')
|
||||
const RedisManager = require('../app/js/RedisManager')
|
||||
|
||||
const getKeysFromNode = function (node, pattern, callback) {
|
||||
let cursor = 0 // redis iterator
|
||||
const keySet = {} // use hash to avoid duplicate results
|
||||
// scan over all keys looking for pattern
|
||||
const doIteration = () =>
|
||||
node.scan(cursor, 'MATCH', pattern, 'COUNT', 1000, function (error, reply) {
|
||||
if (error) {
|
||||
return callback(error)
|
||||
}
|
||||
;[cursor, keys] = reply
|
||||
console.log('SCAN', keys.length)
|
||||
for (const key of keys) {
|
||||
keySet[key] = true
|
||||
}
|
||||
if (cursor === '0') {
|
||||
// note redis returns string result not numeric
|
||||
return callback(null, Object.keys(keySet))
|
||||
} else {
|
||||
return doIteration()
|
||||
}
|
||||
})
|
||||
return doIteration()
|
||||
}
|
||||
|
||||
const getKeys = function (pattern, callback) {
|
||||
const nodes = (typeof rclient.nodes === 'function'
|
||||
? rclient.nodes('master')
|
||||
: undefined) || [rclient]
|
||||
console.log('GOT NODES', nodes.length)
|
||||
const doKeyLookupForNode = (node, cb) => getKeysFromNode(node, pattern, cb)
|
||||
return async.concatSeries(nodes, doKeyLookupForNode, callback)
|
||||
}
|
||||
|
||||
const expireDocOps = callback =>
|
||||
getKeys(keys.docOps({ doc_id: '*' }), (error, keys) => {
|
||||
if (error) return callback(error)
|
||||
async.mapSeries(
|
||||
keys,
|
||||
function (key, cb) {
|
||||
console.log(`EXPIRE ${key} ${RedisManager.DOC_OPS_TTL}`)
|
||||
return rclient.expire(key, RedisManager.DOC_OPS_TTL, cb)
|
||||
},
|
||||
callback
|
||||
)
|
||||
})
|
||||
|
||||
setTimeout(
|
||||
() =>
|
||||
// Give redis a chance to connect
|
||||
expireDocOps(function (error) {
|
||||
if (error) {
|
||||
throw error
|
||||
}
|
||||
return process.exit()
|
||||
}),
|
||||
1000
|
||||
)
|
@@ -0,0 +1,79 @@
|
||||
const Settings = require('@overleaf/settings')
|
||||
const logger = require('@overleaf/logger')
|
||||
const rclient = require('@overleaf/redis-wrapper').createClient(
|
||||
Settings.redis.documentupdater
|
||||
)
|
||||
const keys = Settings.redis.documentupdater.key_schema
|
||||
const ProjectFlusher = require('app/js/ProjectFlusher')
|
||||
const DocumentManager = require('app/js/DocumentManager')
|
||||
const util = require('node:util')
|
||||
const flushAndDeleteDocWithLock = util.promisify(
|
||||
DocumentManager.flushAndDeleteDocWithLock
|
||||
)
|
||||
|
||||
async function flushAndDeleteDocs(dockeys, options) {
|
||||
const docIds = ProjectFlusher._extractIds(dockeys)
|
||||
for (const docId of docIds) {
|
||||
const pathname = await rclient.get(keys.pathname({ doc_id: docId }))
|
||||
if (!pathname) {
|
||||
const projectId = await rclient.get(keys.projectKey({ doc_id: docId }))
|
||||
if (!projectId) {
|
||||
// await deleteDanglingDoc(projectId, docId, pathname, options)
|
||||
logger.info(
|
||||
{ projectId, docId, pathname },
|
||||
'skipping doc with empty pathname and project id'
|
||||
)
|
||||
} else {
|
||||
await flushAndDeleteDoc(projectId, docId, pathname, options)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function flushAndDeleteDoc(projectId, docId, pathname, options) {
|
||||
if (options.dryRun) {
|
||||
logger.info(
|
||||
{ projectId, docId, pathname },
|
||||
'dry run mode - would flush doc with empty pathname'
|
||||
)
|
||||
return
|
||||
}
|
||||
logger.info(
|
||||
{ projectId, docId, pathname },
|
||||
'flushing doc with empty pathname'
|
||||
)
|
||||
try {
|
||||
await flushAndDeleteDocWithLock(projectId, docId, {})
|
||||
} catch (err) {
|
||||
logger.error(
|
||||
{ projectId, docId, pathname, err },
|
||||
'error flushing and deleting doc without pathname'
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
async function cleanUpDocs(options) {
|
||||
logger.info({ options }, 'cleaning up docs without pathnames')
|
||||
let cursor = 0
|
||||
do {
|
||||
const [newCursor, doclinesKeys] = await rclient.scan(
|
||||
cursor,
|
||||
'MATCH',
|
||||
keys.docLines({ doc_id: '*' }),
|
||||
'COUNT',
|
||||
options.limit
|
||||
)
|
||||
await flushAndDeleteDocs(doclinesKeys, options)
|
||||
cursor = newCursor
|
||||
} while (cursor !== '0')
|
||||
}
|
||||
|
||||
cleanUpDocs({ limit: 1000, dryRun: process.env.DRY_RUN !== 'false' })
|
||||
.then(result => {
|
||||
rclient.quit()
|
||||
console.log('DONE')
|
||||
})
|
||||
.catch(function (error) {
|
||||
console.error(error)
|
||||
process.exit(1)
|
||||
})
|
@@ -0,0 +1,87 @@
|
||||
const Settings = require('@overleaf/settings')
|
||||
const logger = require('@overleaf/logger')
|
||||
const rclient = require('@overleaf/redis-wrapper').createClient(
|
||||
Settings.redis.documentupdater
|
||||
)
|
||||
const keys = Settings.redis.documentupdater.key_schema
|
||||
const ProjectFlusher = require('../app/js/ProjectFlusher')
|
||||
const DocumentManager = require('../app/js/DocumentManager')
|
||||
const { mongoClient, db, ObjectId } = require('../app/js/mongodb')
|
||||
const util = require('node:util')
|
||||
const flushAndDeleteDocWithLock = util.promisify(
|
||||
DocumentManager.flushAndDeleteDocWithLock
|
||||
)
|
||||
|
||||
async function fixDocsWithMissingProjectIds(dockeys, options) {
|
||||
const docIds = ProjectFlusher._extractIds(dockeys)
|
||||
for (const docId of docIds) {
|
||||
const projectId = await rclient.get(keys.projectKey({ doc_id: docId }))
|
||||
logger.debug({ docId, projectId }, 'checking doc')
|
||||
if (!projectId) {
|
||||
try {
|
||||
await insertMissingProjectId(docId, options)
|
||||
} catch (err) {
|
||||
logger.error({ docId, err }, 'error fixing doc without project id')
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function insertMissingProjectId(docId, options) {
|
||||
const doc = await db.docs.findOne({ _id: ObjectId(docId) })
|
||||
if (!doc) {
|
||||
logger.warn({ docId }, 'doc not found in mongo')
|
||||
return
|
||||
}
|
||||
if (!doc.project_id) {
|
||||
logger.error({ docId }, 'doc does not have project id in mongo')
|
||||
return
|
||||
}
|
||||
logger.debug({ docId, doc }, 'found doc')
|
||||
const projectIdFromMongo = doc.project_id.toString()
|
||||
if (options.dryRun) {
|
||||
logger.info(
|
||||
{ projectIdFromMongo, docId },
|
||||
'dry run mode - would insert project id in redis'
|
||||
)
|
||||
return
|
||||
}
|
||||
// set the project id for this doc
|
||||
await rclient.set(keys.projectKey({ doc_id: docId }), projectIdFromMongo)
|
||||
logger.debug({ docId, projectIdFromMongo }, 'inserted project id in redis')
|
||||
if (projectIdFromMongo) {
|
||||
await flushAndDeleteDocWithLock(projectIdFromMongo, docId, {})
|
||||
logger.info(
|
||||
{ docId, projectIdFromMongo },
|
||||
'fixed doc with empty project id'
|
||||
)
|
||||
}
|
||||
return projectIdFromMongo
|
||||
}
|
||||
|
||||
async function findAndProcessDocs(options) {
|
||||
logger.info({ options }, 'fixing docs with missing projcct id')
|
||||
let cursor = 0
|
||||
do {
|
||||
const [newCursor, doclinesKeys] = await rclient.scan(
|
||||
cursor,
|
||||
'MATCH',
|
||||
keys.docLines({ doc_id: '*' }),
|
||||
'COUNT',
|
||||
options.limit
|
||||
)
|
||||
await fixDocsWithMissingProjectIds(doclinesKeys, options)
|
||||
cursor = newCursor
|
||||
} while (cursor !== '0')
|
||||
}
|
||||
|
||||
findAndProcessDocs({ limit: 1000, dryRun: process.env.DRY_RUN !== 'false' })
|
||||
.then(result => {
|
||||
rclient.quit()
|
||||
mongoClient.close()
|
||||
console.log('DONE')
|
||||
})
|
||||
.catch(function (error) {
|
||||
console.error(error)
|
||||
process.exit(1)
|
||||
})
|
54
services/document-updater/scripts/flush_all.js
Normal file
54
services/document-updater/scripts/flush_all.js
Normal file
@@ -0,0 +1,54 @@
|
||||
const ProjectFlusher = require('../app/js/ProjectFlusher')
|
||||
const minimist = require('minimist')
|
||||
|
||||
async function main() {
|
||||
const argv = minimist(process.argv.slice(2), {
|
||||
default: {
|
||||
limit: 100000,
|
||||
concurrency: 5,
|
||||
'dry-run': false,
|
||||
},
|
||||
boolean: ['dry-run', 'help'],
|
||||
alias: { h: 'help', n: 'dry-run', j: 'concurrency' },
|
||||
})
|
||||
|
||||
if (argv.help) {
|
||||
console.log(`
|
||||
Usage: node scripts/flush_all.js [options]
|
||||
|
||||
Options:
|
||||
--limit Number of projects to flush (default: 100000)
|
||||
--concurrency, -j Number of concurrent flush operations (default: 5)
|
||||
--dryRun, -n Perform a dry run without making any changes (default: false)
|
||||
--help, -h Show this help message
|
||||
`)
|
||||
process.exit(0)
|
||||
}
|
||||
|
||||
const options = {
|
||||
limit: argv.limit,
|
||||
concurrency: argv.concurrency,
|
||||
dryRun: argv['dry-run'],
|
||||
}
|
||||
console.log('Flushing all projects with options:', options)
|
||||
|
||||
return await new Promise((resolve, reject) => {
|
||||
ProjectFlusher.flushAllProjects(options, err => {
|
||||
if (err) {
|
||||
reject(err)
|
||||
} else {
|
||||
resolve()
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
main()
|
||||
.then(() => {
|
||||
console.log('Done flushing all projects')
|
||||
process.exit(0)
|
||||
})
|
||||
.catch(error => {
|
||||
console.error('There was an error flushing all projects', { error })
|
||||
process.exit(1)
|
||||
})
|
161
services/document-updater/scripts/remove_deleted_docs.js
Normal file
161
services/document-updater/scripts/remove_deleted_docs.js
Normal file
@@ -0,0 +1,161 @@
|
||||
const Settings = require('@overleaf/settings')
|
||||
const logger = require('@overleaf/logger')
|
||||
const rclient = require('@overleaf/redis-wrapper').createClient(
|
||||
Settings.redis.documentupdater
|
||||
)
|
||||
const keys = Settings.redis.documentupdater.key_schema
|
||||
const ProjectFlusher = require('../app/js/ProjectFlusher')
|
||||
const RedisManager = require('../app/js/RedisManager')
|
||||
const { mongoClient, db, ObjectId } = require('../app/js/mongodb')
|
||||
const util = require('node:util')
|
||||
const getDoc = util.promisify((projectId, docId, cb) =>
|
||||
RedisManager.getDoc(projectId, docId, (err, ...args) => cb(err, args))
|
||||
)
|
||||
const removeDocFromMemory = util.promisify(RedisManager.removeDocFromMemory)
|
||||
|
||||
const summary = { totalDocs: 0, deletedDocs: 0, skippedDocs: 0 }
|
||||
|
||||
async function removeDeletedDocs(dockeys, options) {
|
||||
const docIds = ProjectFlusher._extractIds(dockeys)
|
||||
for (const docId of docIds) {
|
||||
summary.totalDocs++
|
||||
const docCount = await db.docs.find({ _id: new ObjectId(docId) }).count()
|
||||
if (!docCount) {
|
||||
try {
|
||||
await removeDeletedDoc(docId, options)
|
||||
} catch (err) {
|
||||
logger.error({ docId, err }, 'error removing deleted doc')
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async function removeDeletedDoc(docId, options) {
|
||||
const projectId = await rclient.get(keys.projectKey({ doc_id: docId }))
|
||||
|
||||
const [
|
||||
docLines,
|
||||
version,
|
||||
ranges,
|
||||
pathname,
|
||||
projectHistoryId,
|
||||
unflushedTime,
|
||||
lastUpdatedAt,
|
||||
lastUpdatedBy,
|
||||
] = await getDoc(projectId, docId)
|
||||
|
||||
const project = await db.projects.findOne({ _id: new ObjectId(projectId) })
|
||||
|
||||
let status
|
||||
|
||||
if (project) {
|
||||
const projectJSON = JSON.stringify(project.rootFolder)
|
||||
const containsDoc = projectJSON.indexOf(docId) !== -1
|
||||
if (containsDoc) {
|
||||
logger.warn(
|
||||
{
|
||||
projectId,
|
||||
docId,
|
||||
docLinesBytes: docLines && docLines.length,
|
||||
version,
|
||||
rangesBytes: ranges && ranges.length,
|
||||
pathname,
|
||||
projectHistoryId,
|
||||
unflushedTime,
|
||||
lastUpdatedAt,
|
||||
lastUpdatedBy,
|
||||
},
|
||||
'refusing to delete doc, project contains docId'
|
||||
)
|
||||
summary.skippedDocs++
|
||||
return
|
||||
} else {
|
||||
logger.warn(
|
||||
{
|
||||
projectId,
|
||||
docId,
|
||||
docLinesBytes: docLines && docLines.length,
|
||||
version,
|
||||
rangesBytes: ranges && ranges.length,
|
||||
pathname,
|
||||
projectHistoryId,
|
||||
unflushedTime,
|
||||
lastUpdatedAt,
|
||||
lastUpdatedBy,
|
||||
},
|
||||
'refusing to delete doc, project still exists'
|
||||
)
|
||||
summary.skippedDocs++
|
||||
return
|
||||
}
|
||||
} else {
|
||||
status = 'projectDeleted'
|
||||
}
|
||||
summary.deletedDocs++
|
||||
if (options.dryRun) {
|
||||
logger.info(
|
||||
{
|
||||
projectId,
|
||||
docId,
|
||||
docLinesBytes: docLines && docLines.length,
|
||||
version,
|
||||
rangesBytes: ranges && ranges.length,
|
||||
pathname,
|
||||
projectHistoryId,
|
||||
unflushedTime,
|
||||
lastUpdatedAt,
|
||||
lastUpdatedBy,
|
||||
status,
|
||||
summary,
|
||||
},
|
||||
'dry run mode - would remove doc from redis'
|
||||
)
|
||||
return
|
||||
}
|
||||
removeDocFromMemory(projectId, docId)
|
||||
logger.info(
|
||||
{
|
||||
projectId,
|
||||
docId,
|
||||
docLinesBytes: docLines && docLines.length,
|
||||
version,
|
||||
rangesBytes: ranges && ranges.length,
|
||||
pathname,
|
||||
projectHistoryId,
|
||||
unflushedTime,
|
||||
lastUpdatedAt,
|
||||
lastUpdatedBy,
|
||||
status,
|
||||
summary,
|
||||
},
|
||||
'removed doc from redis'
|
||||
)
|
||||
}
|
||||
|
||||
async function findAndProcessDocs(options) {
|
||||
logger.info({ options }, 'removing deleted docs')
|
||||
let cursor = 0
|
||||
do {
|
||||
const [newCursor, doclinesKeys] = await rclient.scan(
|
||||
cursor,
|
||||
'MATCH',
|
||||
keys.docLines({ doc_id: '*' }),
|
||||
'COUNT',
|
||||
options.limit
|
||||
)
|
||||
await removeDeletedDocs(doclinesKeys, options)
|
||||
cursor = newCursor
|
||||
} while (cursor !== '0')
|
||||
}
|
||||
|
||||
findAndProcessDocs({ limit: 1000, dryRun: process.env.DRY_RUN !== 'false' })
|
||||
.then(result => {
|
||||
rclient.quit()
|
||||
mongoClient.close()
|
||||
console.log('DONE')
|
||||
process.exit(0)
|
||||
})
|
||||
.catch(function (error) {
|
||||
console.error(error)
|
||||
process.exit(1)
|
||||
})
|
Reference in New Issue
Block a user