154 lines
4.2 KiB
JavaScript
154 lines
4.2 KiB
JavaScript
// @ts-check
|
|
import { ObjectId } from 'mongodb'
|
|
import { READ_PREFERENCE_SECONDARY } from '@overleaf/mongo-utils/batchedUpdate.js'
|
|
import { db, client } from '../lib/mongodb.js'
|
|
|
|
const projectsCollection = db.collection('projects')
|
|
|
|
// Enable caching for ObjectId.toString()
|
|
ObjectId.cacheHexString = true
|
|
|
|
// Configuration
|
|
const SAMPLE_SIZE_PER_ITERATION = process.argv[2]
|
|
? parseInt(process.argv[2], 10)
|
|
: 10000
|
|
const TARGET_ERROR_PERCENTAGE = process.argv[3]
|
|
? parseFloat(process.argv[3])
|
|
: 5.0
|
|
|
|
let gracefulShutdownInitiated = false
|
|
|
|
process.on('SIGINT', handleSignal)
|
|
process.on('SIGTERM', handleSignal)
|
|
|
|
function handleSignal() {
|
|
gracefulShutdownInitiated = true
|
|
console.warn('graceful shutdown initiated')
|
|
}
|
|
|
|
async function takeSample(sampleSize) {
|
|
const results = await projectsCollection
|
|
.aggregate(
|
|
[
|
|
{ $sample: { size: sampleSize } },
|
|
{
|
|
$match: { 'overleaf.backup.lastBackedUpVersion': { $exists: true } },
|
|
},
|
|
{
|
|
$count: 'total',
|
|
},
|
|
],
|
|
{ readPreference: READ_PREFERENCE_SECONDARY }
|
|
)
|
|
.toArray()
|
|
|
|
const count = results[0]?.total || 0
|
|
return { totalSampled: sampleSize, backedUp: count }
|
|
}
|
|
|
|
function calculateStatistics(
|
|
cumulativeSampled,
|
|
cumulativeBackedUp,
|
|
totalPopulation
|
|
) {
|
|
const proportion = Math.max(1, cumulativeBackedUp) / cumulativeSampled
|
|
|
|
// Standard error with finite population correction
|
|
const fpc = Math.sqrt(
|
|
(totalPopulation - cumulativeSampled) / (totalPopulation - 1)
|
|
)
|
|
const stdError =
|
|
Math.sqrt((proportion * (1 - proportion)) / cumulativeSampled) * fpc
|
|
|
|
// 95% confidence interval is approximately ±1.96 standard errors
|
|
const marginOfError = 1.96 * stdError
|
|
|
|
return {
|
|
proportion,
|
|
percentage: (proportion * 100).toFixed(2),
|
|
marginOfError,
|
|
errorPercentage: (marginOfError * 100).toFixed(2),
|
|
lowerBound: ((proportion - marginOfError) * 100).toFixed(2),
|
|
upperBound: ((proportion + marginOfError) * 100).toFixed(2),
|
|
sampleSize: cumulativeSampled,
|
|
populationSize: totalPopulation,
|
|
}
|
|
}
|
|
|
|
async function main() {
|
|
console.log('Date:', new Date().toISOString())
|
|
const totalCount = await projectsCollection.estimatedDocumentCount({
|
|
readPreference: READ_PREFERENCE_SECONDARY,
|
|
})
|
|
console.log(
|
|
`Total projects in collection (estimated): ${totalCount.toLocaleString()}`
|
|
)
|
|
console.log(`Target margin of error: ${TARGET_ERROR_PERCENTAGE}%`)
|
|
|
|
let cumulativeSampled = 0
|
|
let cumulativeBackedUp = 0
|
|
let currentError = Infinity
|
|
let iteration = 0
|
|
|
|
console.log('Iteration | Total Sampled | % Backed Up | Margin of Error')
|
|
console.log('----------|---------------|-------------|----------------')
|
|
|
|
while (currentError > TARGET_ERROR_PERCENTAGE) {
|
|
if (gracefulShutdownInitiated) {
|
|
console.log('Graceful shutdown initiated. Exiting sampling loop.')
|
|
break
|
|
}
|
|
|
|
iteration++
|
|
const { totalSampled, backedUp } = await takeSample(
|
|
SAMPLE_SIZE_PER_ITERATION
|
|
)
|
|
cumulativeSampled += totalSampled
|
|
cumulativeBackedUp += backedUp
|
|
|
|
const stats = calculateStatistics(
|
|
cumulativeSampled,
|
|
cumulativeBackedUp,
|
|
totalCount
|
|
)
|
|
currentError = parseFloat(stats.errorPercentage)
|
|
|
|
console.log(
|
|
`${iteration.toString().padStart(9)} | ` +
|
|
`${cumulativeSampled.toString().padStart(13)} | ` +
|
|
`${stats.percentage.padStart(10)}% | ` +
|
|
`\u00B1${stats.errorPercentage}%`
|
|
)
|
|
|
|
// Small delay between iterations
|
|
await new Promise(resolve => setTimeout(resolve, 100))
|
|
}
|
|
|
|
const finalStats = calculateStatistics(
|
|
cumulativeSampled,
|
|
cumulativeBackedUp,
|
|
totalCount
|
|
)
|
|
|
|
console.log(
|
|
`Projects sampled: ${cumulativeSampled.toLocaleString()} out of ${totalCount.toLocaleString()}`
|
|
)
|
|
console.log(
|
|
`Estimated percentage with lastBackedUpVersion: ${finalStats.percentage}%`
|
|
)
|
|
console.log(
|
|
`95% Confidence Interval: ${finalStats.lowerBound}% - ${finalStats.upperBound}%`
|
|
)
|
|
console.log(`Final Margin of Error: \u00B1${finalStats.errorPercentage}%`)
|
|
}
|
|
|
|
main()
|
|
.then(() => console.log('Done.'))
|
|
.catch(err => {
|
|
console.error('Error:', err)
|
|
process.exitCode = 1
|
|
})
|
|
.finally(() => {
|
|
client.close().catch(err => console.error('Error closing MongoDB:', err))
|
|
})
|