2025-04-24 13:11:28 +08:00

222 lines
5.5 KiB
JavaScript

// @ts-check
import minimist from 'minimist'
import PQueue from 'p-queue'
import {
db,
ObjectId,
READ_PREFERENCE_SECONDARY,
} from '../app/src/infrastructure/mongodb.js'
import DocstoreManager from '../app/src/Features/Docstore/DocstoreManager.js'
import { NotFoundError } from '../app/src/Features/Errors/Errors.js'
const OPTS = parseArgs()
function parseArgs() {
const args = minimist(process.argv.slice(2), {
string: ['min-project-id', 'max-project-id', 'project-modified-since'],
boolean: ['help', 'dangling-comments', 'tracked-changes'],
})
if (args.help) {
usage()
process.exit(0)
}
const danglingComments = Boolean(args['dangling-comments'])
const trackedChanges = Boolean(args['tracked-changes'])
if (!danglingComments && !trackedChanges) {
console.log(
'At least one of --dangling-comments or --tracked-changes must be enabled'
)
process.exit(1)
}
return {
minProjectId: args['min-project-id'] ?? null,
maxProjectId: args['max-project-id'] ?? null,
projectModifiedSince: args['project-modified-since']
? new Date(args['project-modified-since'])
: null,
danglingComments,
trackedChanges,
concurrency: parseInt(args.concurrency ?? '1', 10),
}
}
function usage() {
console.log(`Usage: find_dangling_comments.mjs [OPTS]
Options:
--min-project-id Start scanning at this project id
--max-project-id Stop scanning at this project id
--project-modified-since Only consider projects that were modified after the given date
Example: 2020-01-01
--dangling-comments Report projects with dangling comments
--tracked-changes Report projects with tracked changes
--concurrency How many projects can be processed in parallel
`)
}
async function main() {
const queue = new PQueue({ concurrency: OPTS.concurrency })
let projectsProcessed = 0
let danglingCommentsFound = 0
let trackedChangesFound = 0
for await (const projectId of getProjectIds()) {
await queue.onEmpty()
queue.add(async () => {
const docs = await getDocs(projectId)
if (OPTS.danglingComments) {
const danglingThreadIds = await findDanglingThreadIds(projectId, docs)
if (danglingThreadIds.length > 0) {
console.log(
`Project ${projectId} has dangling threads: ${danglingThreadIds.join(', ')}`
)
danglingCommentsFound += 1
}
}
if (OPTS.trackedChanges) {
if (docsHaveTrackedChanges(docs)) {
console.log(`Project ${projectId} has tracked changes`)
trackedChangesFound += 1
}
}
projectsProcessed += 1
if (projectsProcessed % 100000 === 0) {
console.log(
`${projectsProcessed} projects processed - Last project: ${projectId}`
)
}
})
}
await queue.onIdle()
if (OPTS.danglingComments) {
console.log(
`${danglingCommentsFound} projects with dangling comments found`
)
}
if (OPTS.trackedChanges) {
console.log(`${trackedChangesFound} projects with tracked changes found`)
}
}
function getProjectIds() {
const clauses = []
if (OPTS.minProjectId != null) {
clauses.push({ _id: { $gte: new ObjectId(OPTS.minProjectId) } })
}
if (OPTS.maxProjectId != null) {
clauses.push({ _id: { $lte: new ObjectId(OPTS.maxProjectId) } })
}
if (OPTS.projectModifiedSince) {
clauses.push({ lastUpdated: { $gte: OPTS.projectModifiedSince } })
}
const query = clauses.length > 0 ? { $and: clauses } : {}
return db.projects
.find(query, {
projection: { _id: 1 },
readPreference: READ_PREFERENCE_SECONDARY,
sort: { _id: 1 },
})
.map(x => x._id.toString())
}
async function getDocs(projectId) {
const mongoDocs = db.docs.find(
{
project_id: new ObjectId(projectId),
deleted: { $ne: true },
},
{
projection: { ranges: 1, inS3: 1 },
readPreference: READ_PREFERENCE_SECONDARY,
}
)
const docs = []
for await (const mongoDoc of mongoDocs) {
if (mongoDoc.inS3) {
try {
const archivedDoc = await DocstoreManager.promises.getDoc(
projectId,
mongoDoc._id,
{ peek: true }
)
docs.push({
id: mongoDoc._id.toString(),
ranges: archivedDoc.ranges,
})
} catch (err) {
if (err instanceof NotFoundError) {
console.warn(`Doc ${mongoDoc._id} in project ${projectId} not found`)
} else {
throw err
}
}
} else {
docs.push({
id: mongoDoc._id.toString(),
ranges: mongoDoc.ranges,
})
}
}
return docs
}
async function findDanglingThreadIds(projectId, docs) {
const threadIds = new Set()
for (const doc of docs) {
const comments = doc.ranges?.comments ?? []
for (const comment of comments) {
threadIds.add(comment.op.t.toString())
}
}
if (threadIds.size === 0) {
return []
}
const rooms = await db.rooms.find(
{ project_id: new ObjectId(projectId), thread_id: { $exists: true } },
{ readPreference: READ_PREFERENCE_SECONDARY }
)
for await (const room of rooms) {
threadIds.delete(room.thread_id.toString())
if (threadIds.size === 0) {
break
}
}
return Array.from(threadIds)
}
function docsHaveTrackedChanges(docs) {
for (const doc of docs) {
const changes = doc.ranges?.changes ?? []
if (changes.length > 0) {
return true
}
}
return false
}
try {
await main()
process.exit(0)
} catch (err) {
console.error(err)
process.exit(1)
}