264 lines
7.2 KiB
JavaScript
264 lines
7.2 KiB
JavaScript
const Path = require('path')
|
|
const DocstoreManager = require('../app/src/Features/Docstore/DocstoreManager')
|
|
const DocumentUpdaterHandler = require('../app/src/Features/DocumentUpdater/DocumentUpdaterHandler')
|
|
const FileStoreHandler = require('../app/src/Features/FileStore/FileStoreHandler')
|
|
const ProjectGetter = require('../app/src/Features/Project/ProjectGetter')
|
|
const ProjectEntityMongoUpdateHandler = require('../app/src/Features/Project/ProjectEntityMongoUpdateHandler')
|
|
const { waitForDb, db, ObjectId } = require('../app/src/infrastructure/mongodb')
|
|
const logger = require('@overleaf/logger').logger
|
|
|
|
const args = require('minimist')(process.argv.slice(2), {
|
|
boolean: ['verbose', 'fix'],
|
|
})
|
|
const verbose = args.verbose
|
|
|
|
if (!verbose) {
|
|
logger.level('error')
|
|
}
|
|
|
|
// no remaining arguments, print usage
|
|
if (args._.length === 0) {
|
|
console.log(
|
|
'Usage: node services/web/scripts/check_project_docs.js [--verbose] [--fix] <projectId>...'
|
|
)
|
|
process.exit(1)
|
|
}
|
|
|
|
function logDoc(projectId, path, doc, message = '') {
|
|
console.log(
|
|
'projectId:',
|
|
projectId,
|
|
'doc:',
|
|
JSON.stringify({
|
|
_id: doc._id,
|
|
name: doc.name,
|
|
lines: doc.lines ? doc.lines.join('\n').length : 0,
|
|
rev: doc.rev,
|
|
version: doc.version,
|
|
ranges: typeof doc.ranges,
|
|
}),
|
|
path,
|
|
message
|
|
)
|
|
}
|
|
|
|
function logFile(projectId, path, file, message = '') {
|
|
console.log(
|
|
'projectId:',
|
|
projectId,
|
|
'file:',
|
|
JSON.stringify({
|
|
_id: file._id,
|
|
name: file.name,
|
|
linkedFileData: file.linkedFileData,
|
|
hash: file.hash,
|
|
size: file.size,
|
|
}),
|
|
path,
|
|
message
|
|
)
|
|
}
|
|
|
|
function findPathCounts(projectId, docEntries, fileEntries) {
|
|
const pathCounts = new Map()
|
|
const docPaths = docEntries.map(({ path }) => path)
|
|
const filePaths = fileEntries.map(({ path }) => path)
|
|
const allPaths = docPaths.concat(filePaths)
|
|
for (const path of allPaths) {
|
|
pathCounts.set(path, (pathCounts.get(path) || 0) + 1)
|
|
}
|
|
return pathCounts
|
|
}
|
|
|
|
// copied from services/web/app/src/Features/Project/ProjectDuplicator.js
|
|
function _getFolderEntries(folder, folderPath = '/') {
|
|
const docEntries = []
|
|
const fileEntries = []
|
|
const docs = folder.docs || []
|
|
const files = folder.fileRefs || []
|
|
const subfolders = folder.folders || []
|
|
|
|
for (const doc of docs) {
|
|
if (doc == null || doc._id == null) {
|
|
continue
|
|
}
|
|
const path = Path.join(folderPath, doc.name)
|
|
docEntries.push({ doc, path })
|
|
}
|
|
|
|
for (const file of files) {
|
|
if (file == null || file._id == null) {
|
|
continue
|
|
}
|
|
const path = Path.join(folderPath, file.name)
|
|
fileEntries.push({ file, path })
|
|
}
|
|
|
|
for (const subfolder of subfolders) {
|
|
if (subfolder == null || subfolder._id == null) {
|
|
continue
|
|
}
|
|
const subfolderPath = Path.join(folderPath, subfolder.name)
|
|
const subfolderEntries = _getFolderEntries(subfolder, subfolderPath)
|
|
for (const docEntry of subfolderEntries.docEntries) {
|
|
docEntries.push(docEntry)
|
|
}
|
|
for (const fileEntry of subfolderEntries.fileEntries) {
|
|
fileEntries.push(fileEntry)
|
|
}
|
|
}
|
|
return { docEntries, fileEntries }
|
|
}
|
|
|
|
async function getDocsInMongo(projectId) {
|
|
return await db.docs
|
|
.find({ project_id: new ObjectId(projectId), deleted: { $ne: true } })
|
|
.toArray()
|
|
}
|
|
|
|
function getDocIdsInFileTree(docEntries) {
|
|
return docEntries.map(({ doc }) => doc._id.toString())
|
|
}
|
|
|
|
function findMissingDocs(docsInMongo, docIdsInFileTree) {
|
|
const missingDocs = []
|
|
for (const doc of docsInMongo) {
|
|
const docId = doc._id.toString()
|
|
if (!docIdsInFileTree.includes(docId)) {
|
|
console.log(`Found doc in docstore not in project filetree:`, docId)
|
|
missingDocs.push(doc)
|
|
}
|
|
}
|
|
return missingDocs
|
|
}
|
|
|
|
async function createRecoveryFolder(projectId) {
|
|
const recoveryFolder = `recovered-${Date.now()}`
|
|
const { folder } = await ProjectEntityMongoUpdateHandler.promises.mkdirp(
|
|
new ObjectId(projectId),
|
|
recoveryFolder,
|
|
null // unset lastUpdatedBy
|
|
)
|
|
console.log('Created recovery folder:', folder._id.toString())
|
|
return folder
|
|
}
|
|
|
|
async function restoreMissingDocs(projectId, folder, missingDocs) {
|
|
for (const doc of missingDocs) {
|
|
doc.name = doc.name || `unknown-file-${doc._id.toString()}`
|
|
try {
|
|
await ProjectEntityMongoUpdateHandler.promises.addDoc(
|
|
new ObjectId(projectId),
|
|
folder._id,
|
|
doc,
|
|
null // unset lastUpdatedBy
|
|
)
|
|
console.log('Restored doc to filetree:', doc._id.toString())
|
|
} catch (err) {
|
|
console.log(`Error adding doc to filetree:`, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
async function checkProject(projectId) {
|
|
try {
|
|
await DocumentUpdaterHandler.promises.flushProjectToMongo(projectId)
|
|
} catch (err) {
|
|
console.log(`Error flushing project ${projectId} to mongo: ${err}`)
|
|
}
|
|
const project = await ProjectGetter.promises.getProject(projectId, {
|
|
rootFolder: true,
|
|
rootDoc_id: true,
|
|
})
|
|
if (verbose) {
|
|
console.log(`project: ${JSON.stringify(project)}`)
|
|
}
|
|
const { docEntries, fileEntries } = _getFolderEntries(project.rootFolder[0])
|
|
console.log(
|
|
`Found ${docEntries.length} docEntries and ${fileEntries.length} fileEntries`
|
|
)
|
|
const pathCounts = findPathCounts(projectId, docEntries, fileEntries)
|
|
|
|
for (const [path, count] of pathCounts) {
|
|
if (count > 1) {
|
|
console.log(`Found duplicate path: ${path}`)
|
|
}
|
|
}
|
|
|
|
let errors = 0
|
|
for (const { doc, path } of docEntries) {
|
|
try {
|
|
const { lines, rev, version, ranges } =
|
|
await DocstoreManager.promises.getDoc(projectId, doc._id)
|
|
if (!lines) {
|
|
throw new Error('no doclines')
|
|
}
|
|
if (pathCounts.get(path) > 1) {
|
|
logDoc(
|
|
projectId,
|
|
path,
|
|
{ ...doc, lines, rev, version, ranges },
|
|
'duplicate path'
|
|
)
|
|
errors++
|
|
} else if (verbose) {
|
|
logDoc(projectId, path, { ...doc, lines, rev, version, ranges })
|
|
}
|
|
} catch (err) {
|
|
logDoc(projectId, path, doc, err)
|
|
errors++
|
|
}
|
|
}
|
|
for (const { file, path } of fileEntries) {
|
|
try {
|
|
const fileSize = await FileStoreHandler.promises.getFileSize(
|
|
projectId,
|
|
file._id
|
|
)
|
|
if (pathCounts.get(path) > 1) {
|
|
logFile(projectId, path, { ...file, fileSize }, 'duplicate path')
|
|
errors++
|
|
} else if (verbose) {
|
|
logFile(projectId, path, { ...file, fileSize })
|
|
}
|
|
} catch (err) {
|
|
logFile(projectId, path, file, err)
|
|
errors++
|
|
}
|
|
}
|
|
|
|
// now look for docs in the docstore that are not in the project filetree
|
|
const docsInMongo = await getDocsInMongo(projectId)
|
|
const docIdsInFileTree = getDocIdsInFileTree(docEntries)
|
|
const missingDocs = findMissingDocs(docsInMongo, docIdsInFileTree)
|
|
|
|
if (args.fix && missingDocs.length > 0) {
|
|
console.log('Restoring missing docs to filetree...')
|
|
const folder = await createRecoveryFolder(projectId)
|
|
await restoreMissingDocs(projectId, folder, missingDocs)
|
|
}
|
|
|
|
if (errors > 0) {
|
|
console.log(`Errors found in project: ${projectId}`)
|
|
} else {
|
|
console.log(`No errors found in project: ${projectId}`)
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
await waitForDb()
|
|
for (const projectId of args._) {
|
|
await checkProject(projectId)
|
|
}
|
|
}
|
|
|
|
main()
|
|
.then(() => {
|
|
console.log('DONE')
|
|
process.exit(0)
|
|
})
|
|
.catch(err => {
|
|
console.error(err)
|
|
process.exit(1)
|
|
})
|