222 lines
5.5 KiB
JavaScript
222 lines
5.5 KiB
JavaScript
// @ts-check
|
|
|
|
import minimist from 'minimist'
|
|
import PQueue from 'p-queue'
|
|
import {
|
|
db,
|
|
ObjectId,
|
|
READ_PREFERENCE_SECONDARY,
|
|
} from '../app/src/infrastructure/mongodb.js'
|
|
import DocstoreManager from '../app/src/Features/Docstore/DocstoreManager.js'
|
|
import { NotFoundError } from '../app/src/Features/Errors/Errors.js'
|
|
|
|
const OPTS = parseArgs()
|
|
|
|
function parseArgs() {
|
|
const args = minimist(process.argv.slice(2), {
|
|
string: ['min-project-id', 'max-project-id', 'project-modified-since'],
|
|
boolean: ['help', 'dangling-comments', 'tracked-changes'],
|
|
})
|
|
|
|
if (args.help) {
|
|
usage()
|
|
process.exit(0)
|
|
}
|
|
|
|
const danglingComments = Boolean(args['dangling-comments'])
|
|
const trackedChanges = Boolean(args['tracked-changes'])
|
|
if (!danglingComments && !trackedChanges) {
|
|
console.log(
|
|
'At least one of --dangling-comments or --tracked-changes must be enabled'
|
|
)
|
|
process.exit(1)
|
|
}
|
|
|
|
return {
|
|
minProjectId: args['min-project-id'] ?? null,
|
|
maxProjectId: args['max-project-id'] ?? null,
|
|
projectModifiedSince: args['project-modified-since']
|
|
? new Date(args['project-modified-since'])
|
|
: null,
|
|
danglingComments,
|
|
trackedChanges,
|
|
concurrency: parseInt(args.concurrency ?? '1', 10),
|
|
}
|
|
}
|
|
|
|
function usage() {
|
|
console.log(`Usage: find_dangling_comments.mjs [OPTS]
|
|
|
|
Options:
|
|
|
|
--min-project-id Start scanning at this project id
|
|
--max-project-id Stop scanning at this project id
|
|
--project-modified-since Only consider projects that were modified after the given date
|
|
Example: 2020-01-01
|
|
--dangling-comments Report projects with dangling comments
|
|
--tracked-changes Report projects with tracked changes
|
|
--concurrency How many projects can be processed in parallel
|
|
`)
|
|
}
|
|
|
|
async function main() {
|
|
const queue = new PQueue({ concurrency: OPTS.concurrency })
|
|
let projectsProcessed = 0
|
|
let danglingCommentsFound = 0
|
|
let trackedChangesFound = 0
|
|
for await (const projectId of getProjectIds()) {
|
|
await queue.onEmpty()
|
|
queue.add(async () => {
|
|
const docs = await getDocs(projectId)
|
|
|
|
if (OPTS.danglingComments) {
|
|
const danglingThreadIds = await findDanglingThreadIds(projectId, docs)
|
|
if (danglingThreadIds.length > 0) {
|
|
console.log(
|
|
`Project ${projectId} has dangling threads: ${danglingThreadIds.join(', ')}`
|
|
)
|
|
danglingCommentsFound += 1
|
|
}
|
|
}
|
|
|
|
if (OPTS.trackedChanges) {
|
|
if (docsHaveTrackedChanges(docs)) {
|
|
console.log(`Project ${projectId} has tracked changes`)
|
|
trackedChangesFound += 1
|
|
}
|
|
}
|
|
|
|
projectsProcessed += 1
|
|
if (projectsProcessed % 100000 === 0) {
|
|
console.log(
|
|
`${projectsProcessed} projects processed - Last project: ${projectId}`
|
|
)
|
|
}
|
|
})
|
|
}
|
|
await queue.onIdle()
|
|
|
|
if (OPTS.danglingComments) {
|
|
console.log(
|
|
`${danglingCommentsFound} projects with dangling comments found`
|
|
)
|
|
}
|
|
|
|
if (OPTS.trackedChanges) {
|
|
console.log(`${trackedChangesFound} projects with tracked changes found`)
|
|
}
|
|
}
|
|
|
|
function getProjectIds() {
|
|
const clauses = []
|
|
|
|
if (OPTS.minProjectId != null) {
|
|
clauses.push({ _id: { $gte: new ObjectId(OPTS.minProjectId) } })
|
|
}
|
|
|
|
if (OPTS.maxProjectId != null) {
|
|
clauses.push({ _id: { $lte: new ObjectId(OPTS.maxProjectId) } })
|
|
}
|
|
|
|
if (OPTS.projectModifiedSince) {
|
|
clauses.push({ lastUpdated: { $gte: OPTS.projectModifiedSince } })
|
|
}
|
|
|
|
const query = clauses.length > 0 ? { $and: clauses } : {}
|
|
return db.projects
|
|
.find(query, {
|
|
projection: { _id: 1 },
|
|
readPreference: READ_PREFERENCE_SECONDARY,
|
|
sort: { _id: 1 },
|
|
})
|
|
.map(x => x._id.toString())
|
|
}
|
|
|
|
async function getDocs(projectId) {
|
|
const mongoDocs = db.docs.find(
|
|
{
|
|
project_id: new ObjectId(projectId),
|
|
deleted: { $ne: true },
|
|
},
|
|
{
|
|
projection: { ranges: 1, inS3: 1 },
|
|
readPreference: READ_PREFERENCE_SECONDARY,
|
|
}
|
|
)
|
|
|
|
const docs = []
|
|
for await (const mongoDoc of mongoDocs) {
|
|
if (mongoDoc.inS3) {
|
|
try {
|
|
const archivedDoc = await DocstoreManager.promises.getDoc(
|
|
projectId,
|
|
mongoDoc._id,
|
|
{ peek: true }
|
|
)
|
|
docs.push({
|
|
id: mongoDoc._id.toString(),
|
|
ranges: archivedDoc.ranges,
|
|
})
|
|
} catch (err) {
|
|
if (err instanceof NotFoundError) {
|
|
console.warn(`Doc ${mongoDoc._id} in project ${projectId} not found`)
|
|
} else {
|
|
throw err
|
|
}
|
|
}
|
|
} else {
|
|
docs.push({
|
|
id: mongoDoc._id.toString(),
|
|
ranges: mongoDoc.ranges,
|
|
})
|
|
}
|
|
}
|
|
|
|
return docs
|
|
}
|
|
|
|
async function findDanglingThreadIds(projectId, docs) {
|
|
const threadIds = new Set()
|
|
for (const doc of docs) {
|
|
const comments = doc.ranges?.comments ?? []
|
|
for (const comment of comments) {
|
|
threadIds.add(comment.op.t.toString())
|
|
}
|
|
}
|
|
|
|
if (threadIds.size === 0) {
|
|
return []
|
|
}
|
|
|
|
const rooms = await db.rooms.find(
|
|
{ project_id: new ObjectId(projectId), thread_id: { $exists: true } },
|
|
{ readPreference: READ_PREFERENCE_SECONDARY }
|
|
)
|
|
for await (const room of rooms) {
|
|
threadIds.delete(room.thread_id.toString())
|
|
if (threadIds.size === 0) {
|
|
break
|
|
}
|
|
}
|
|
|
|
return Array.from(threadIds)
|
|
}
|
|
|
|
function docsHaveTrackedChanges(docs) {
|
|
for (const doc of docs) {
|
|
const changes = doc.ranges?.changes ?? []
|
|
if (changes.length > 0) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
try {
|
|
await main()
|
|
process.exit(0)
|
|
} catch (err) {
|
|
console.error(err)
|
|
process.exit(1)
|
|
}
|