first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,182 @@
// Script to add feature overrides
//
// A feature override is appended to the user's featuresOverride list if they do
// not already have the feature. The features are refreshed after adding the
// override.
//
// If the script detects that the user would have the feature just by refreshing
// then it skips adding the override and just refreshes the users features --
// this is to minimise the creation of unnecessary overrides.
//
// Usage:
//
// $ node scripts/add_feature_override.mjs --commit --note 'text description' --expires 2022-01-01 --override JSONFILE --ids IDFILE
//
// --commit do the update, remove this option for dry-run testing
// --note text description [optional]
// --expires expiry date for override [optional]
// --skip-existing don't create the override for users who already have the feature (e.g. via a subscription)
//
// IDFILE: file containing list of user ids, one per line
// JSONFILE: file containing JSON of the desired feature overrides e.g. {"symbolPalette": true}
//
// The feature override is specified with JSON to allow types to be set as string/number/boolean.
// It is contained in a file to avoid any issues with shell quoting.
import minimist from 'minimist'
import fs from 'node:fs'
import { ObjectId } from '../app/src/infrastructure/mongodb.js'
import pLimit from 'p-limit'
import FeaturesUpdater from '../app/src/Features/Subscription/FeaturesUpdater.js'
import FeaturesHelper from '../app/src/Features/Subscription/FeaturesHelper.js'
import UserFeaturesUpdater from '../app/src/Features/Subscription/UserFeaturesUpdater.js'
import UserGetter from '../app/src/Features/User/UserGetter.js'
const processLogger = {
failed: [],
success: [],
skipped: [],
printSummary: () => {
console.log(
{
success: processLogger.success,
failed: processLogger.failed,
skipped: processLogger.skipped,
},
`\nDONE. ${processLogger.success.length} successful. ${processLogger.skipped.length} skipped. ${processLogger.failed.length} failed to update.`
)
},
}
function _validateUserIdList(userIds) {
userIds.forEach(userId => {
if (!ObjectId.isValid(userId))
throw new Error(`user ID not valid: ${userId}`)
})
}
async function _handleUser(userId) {
console.log('updating user', userId)
const user = await UserGetter.promises.getUser(userId, {
features: 1,
featuresOverrides: 1,
})
if (!user) {
console.log(userId, 'does not exist, failed')
processLogger.failed.push(userId)
return
}
const desiredFeatures = OVERRIDE.features
// Does the user have the requested features already?
if (
SKIP_EXISTING &&
FeaturesHelper.isFeatureSetBetter(user.features, desiredFeatures)
) {
console.log(
userId,
`already has ${JSON.stringify(desiredFeatures)}, skipping`
)
processLogger.skipped.push(userId)
return
}
// Would the user have the requested feature if the features were refreshed?
const freshFeatures = await FeaturesUpdater.promises.computeFeatures(userId)
if (
SKIP_EXISTING &&
FeaturesHelper.isFeatureSetBetter(freshFeatures, desiredFeatures)
) {
console.log(
userId,
`would have ${JSON.stringify(
desiredFeatures
)} if refreshed, skipping override`
)
} else {
// create the override (if not in dry-run mode)
if (COMMIT) {
await UserFeaturesUpdater.promises.createFeaturesOverride(
userId,
OVERRIDE
)
}
}
if (!COMMIT) {
// not saving features; nothing else to do
return
}
const refreshResult = await FeaturesUpdater.promises.refreshFeatures(
userId,
'add-feature-override-script'
)
const featureSetIncludesNewFeatures = FeaturesHelper.isFeatureSetBetter(
refreshResult.features,
desiredFeatures
)
if (featureSetIncludesNewFeatures) {
// features added successfully
processLogger.success.push(userId)
} else {
console.log('FEATURE NOT ADDED', refreshResult)
processLogger.failed.push(userId)
}
}
const argv = minimist(process.argv.slice(2))
const CONCURRENCY = argv.async ? argv.async : 10
const overridesFilename = argv.override
const expires = argv.expires
const note = argv.note
const SKIP_EXISTING = argv['skip-existing'] || false
const COMMIT = argv.commit !== undefined
if (!COMMIT) {
console.warn('Doing dry run without --commit')
}
const idsFilename = argv.ids
if (!idsFilename) throw new Error('missing ids list filename')
const usersFile = fs.readFileSync(idsFilename, 'utf8')
const userIds = usersFile
.trim()
.split('\n')
.map(id => id.trim())
const overridesFile = fs.readFileSync(overridesFilename, 'utf8')
const features = JSON.parse(overridesFile)
const OVERRIDE = { features }
if (note) {
OVERRIDE.note = note
}
if (expires) {
OVERRIDE.expiresAt = new Date(expires)
}
async function processUsers(userIds) {
console.log('---Starting add feature override script---')
console.log('Will update users to have', OVERRIDE)
console.log(
SKIP_EXISTING
? 'Users with this feature already will be skipped'
: 'Every user in file will get feature override'
)
_validateUserIdList(userIds)
console.log(`---Starting to process ${userIds.length} users---`)
const limit = pLimit(CONCURRENCY)
const results = await Promise.allSettled(
userIds.map(userId => limit(() => _handleUser(new ObjectId(userId))))
)
results.forEach((result, idx) => {
if (result.status !== 'fulfilled') {
console.log(userIds[idx], 'failed', result.reason)
processLogger.failed.push(userIds[idx])
}
})
processLogger.printSummary()
process.exit()
}
await processUsers(userIds)

View File

@@ -0,0 +1,204 @@
import fs from 'node:fs'
import minimist from 'minimist'
import { parse } from 'csv'
import Stream from 'node:stream/promises'
import { ObjectId } from '../app/src/infrastructure/mongodb.js'
import { Subscription } from '../app/src/models/Subscription.js'
function usage() {
console.log(
'Usage: node add_salesforce_data_to_subscriptions.mjs -f <filename> [options]'
)
console.log(
'Updates the subscriptions collection with external IDs for determining the Salesforce account that goes with the subscription. The file should be a CSV and have columns account_id, v1_id and subscription_id. The account_id column is the Salesforce account ID, the v1_id column is the V1 account ID, and the subscription_id column is the subscription ID.'
)
console.log('Options:')
console.log(
' --commit, -c Commit changes to the database'
)
console.log(
' --emptyFieldValue <value> The value to treat as an empty field (default: NA)'
)
console.log(
' -f, --filename <filename> The path to the file to read data from'
)
console.log(' -h, --help Show this help message')
console.log(' -v, --verbose Produces more detailed logs')
process.exit(0)
}
const { commit, emptyFieldValue, filename, help, verbose } = minimist(
process.argv.slice(2),
{
string: ['emptyFieldValue', 'filename'],
boolean: ['commit', 'help', 'verbose'],
alias: {
commit: 'c',
filename: 'f',
help: 'h',
verbose: 'v',
},
default: {
commit: false,
emptyFieldValue: 'NA',
help: false,
verbose: false,
},
}
)
const SUBSCRIPTION_ID_FIELD = 'subscription_id'
const SALESFORCE_ID_FIELD = 'account_id'
const V1_ID_FIELD = 'v1_id'
if (help) {
usage()
process.exit(0)
}
if (!filename) {
console.error('No filename provided')
usage()
process.exit(1)
}
const stats = {
totalRows: 0,
subscriptionIDMissing: 0,
usedV1ID: 0,
usedSalesforceID: 0,
processedRows: 0,
db: {
errors: 0,
matched: 0,
updateAttempted: 0,
updated: 0,
},
}
function showStats() {
console.log('Stats:')
console.log(` Total rows: ${stats.totalRows}`)
console.log(` Processed rows: ${stats.processedRows}`)
console.log(` Skipped (no subscription ID): ${stats.subscriptionIDMissing}`)
console.log(` Used V1 ID: ${stats.usedV1ID}`)
console.log(` Used Salesforce ID: ${stats.usedSalesforceID}`)
if (commit) {
console.log('Database operations:')
console.log(` Errors: ${stats.db.errors}`)
console.log(` Matched: ${stats.db.matched}`)
console.log(` Updated: ${stats.db.updated}`)
console.log(` Update attempted: ${stats.db.updateAttempted}`)
}
}
function pickRelevantColumns(row) {
const newRow = {
salesforceId: row[SALESFORCE_ID_FIELD],
}
if (row[V1_ID_FIELD] && row[V1_ID_FIELD] !== emptyFieldValue) {
newRow.v1Id = row[V1_ID_FIELD]
}
if (
row[SUBSCRIPTION_ID_FIELD] &&
row[SUBSCRIPTION_ID_FIELD] !== emptyFieldValue
) {
newRow.subscriptionId = row[SUBSCRIPTION_ID_FIELD]
}
return newRow
}
async function processRows(rows) {
for await (const row of rows) {
const { v1Id, salesforceId, subscriptionId } = row
const update = {}
if (v1Id) {
stats.usedV1ID++
update.v1_id = v1Id
} else {
stats.usedSalesforceID++
update.salesforce_id = salesforceId
}
// Useful for logging later.
const updateString = Object.entries(update).flatMap(([k, v]) => `${k}=${v}`)
if (commit) {
try {
const result = await Subscription.updateOne(
{ _id: new ObjectId(subscriptionId) },
update,
{ upsert: false }
)
if (result.matchedCount) {
stats.db.matched++
}
if (result.modifiedCount) {
stats.db.updated++
if (verbose) {
console.log(
`Updated subscription ${subscriptionId} to set ${updateString}`
)
}
}
} catch (error) {
stats.db.errors++
if (verbose) {
console.error(
`Error updating subscription ${subscriptionId}: ${error}`
)
}
} finally {
stats.db.updateAttempted++
}
} else if (verbose) {
console.log(`Would set ${updateString} on subscription ${subscriptionId}`)
}
}
}
async function main() {
await Stream.pipeline(
fs.createReadStream(filename),
parse({
columns: true,
cast: function (value, context) {
if (context.column === V1_ID_FIELD && value !== emptyFieldValue) {
return parseInt(value)
}
return value
},
on_record: function (record, context) {
stats.totalRows++
const row = pickRelevantColumns(record)
// Cannot process records without a Subscription ID
if (!row.subscriptionId) {
if (verbose) {
console.log(
`No subscription id found for ${row.salesforceId}, skipping...`
)
}
stats.subscriptionIDMissing++
return null
}
stats.processedRows++
return row
},
}),
processRows
)
}
if (!commit) {
console.log('Dry run')
} else {
console.log('Committing changes to the database')
}
await main()
showStats()
process.exit()

View File

@@ -0,0 +1,95 @@
// Usage: node scripts/add_user_count_to_csv.mjs [OPTS] [INPUT-FILE]
// Looks up the number of users for each domain in the input csv file and adds
// columns for the number of users in the domain, subdomains, and total.
import fs from 'node:fs'
// https://github.com/import-js/eslint-plugin-import/issues/1810
// eslint-disable-next-line import/no-unresolved
import * as csv from 'csv/sync'
import minimist from 'minimist'
import UserGetter from '../app/src/Features/User/UserGetter.js'
import { db } from '../app/src/infrastructure/mongodb.js'
import _ from 'lodash'
const argv = minimist(process.argv.slice(2), {
string: ['domain', 'output'],
boolean: ['help'],
alias: {
domain: 'd',
output: 'o',
},
default: {
domain: 'Email domain',
output: '/dev/stdout',
},
})
if (argv.help || argv._.length > 1) {
console.error(`Usage: node scripts/add_user_count_to_csv.mjs [OPTS] [INPUT-FILE]
Looks up the number of users for each domain in the input file and adds
columns for the number of users in the domain, subdomains, and total.
Options:
--domain name of the csv column containing the email domain (default: "Email domain")
--output output file (default: /dev/stdout)
`)
process.exit(1)
}
const input = fs.readFileSync(argv._[0], 'utf8')
const records = csv.parse(input, { columns: true })
if (records.length === 0) {
console.error('No records in input file')
process.exit(1)
}
async function main() {
for (const record of records) {
const domain = record[argv.domain]
const { domainUserCount, subdomainUserCount } = await getUserCount(domain, {
_id: 1,
})
record['Domain Users'] = domainUserCount
record['Subdomain Users'] = subdomainUserCount
record['Total Users'] = domainUserCount + subdomainUserCount
}
const output = csv.stringify(records, { header: true })
fs.writeFileSync(argv.output, output)
}
async function getUserCount(domain) {
const domainUsers = await UserGetter.promises.getUsersByHostname(domain, {
_id: 1,
})
const subdomainUsers = await getUsersByHostnameWithSubdomain(domain, {
_id: 1,
})
return {
domainUserCount: domainUsers.length,
subdomainUserCount: subdomainUsers.length,
}
}
async function getUsersByHostnameWithSubdomain(domain, projection) {
const reversedDomain = domain.trim().split('').reverse().join('')
const reversedDomainRegex = _.escapeRegExp(reversedDomain)
const query = {
emails: { $exists: true },
// look for users in subdomains of a domain, but not the domain itself
// e.g. for domain 'foo.edu', match 'cs.foo.edu' but not 'foo.edu'
// we use the reversed hostname index to do this efficiently
// we need to escape the domain name to prevent '.' from matching any character
'emails.reversedHostname': { $regex: '^' + reversedDomainRegex + '\\.' },
}
return await db.users.find(query, { projection }).toArray()
}
try {
await main()
console.log('Done')
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,120 @@
/**
* This script backfills the account mapping for subscriptions that are active and have a group plan.
*
* The mapping joins a recurlySubscription_id to a subscription _id in BigQuery.
*
* This script has an assumption that it is being run in a clean slate condition, it will create some
* duplicate mappings if run multiple times. The Analytics team will have the expectation
* that this table may need to be deduplicated as it is an event sourcing record.
*
* Call it with `--commit` to actually register the mappings.
* Call it with `--verbose` to see debug logs.
* Call it with `--endDate=<subscription ID>` to stop processing at a certain date
*/
import logger from '@overleaf/logger'
import minimist from 'minimist'
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
import { db } from '../../app/src/infrastructure/mongodb.js'
import AccountMappingHelper from '../../app/src/Features/Analytics/AccountMappingHelper.js'
import { registerAccountMapping } from '../../app/src/Features/Analytics/AnalyticsManager.js'
import { triggerGracefulShutdown } from '../../app/src/infrastructure/GracefulShutdown.js'
import Validation from '../../app/src/infrastructure/Validation.js'
const paramsSchema = Validation.Joi.object({
endDate: Validation.Joi.string().isoDate(),
commit: Validation.Joi.boolean().default(false),
verbose: Validation.Joi.boolean().default(false),
}).unknown(true)
let mapped = 0
let subscriptionCount = 0
const now = new Date().toISOString() // use the same timestamp for all mappings
const seenSubscriptions = new Set()
function registerMapping(subscription) {
if (seenSubscriptions.has(subscription._id)) {
logger.warn({ subscription }, 'duplicate subscription found, skipping')
return
}
seenSubscriptions.add(subscription._id)
subscriptionCount++
const mapping = AccountMappingHelper.generateSubscriptionToRecurlyMapping(
subscription._id,
subscription.recurlySubscription_id,
now
)
logger.debug(
{
recurly: subscription.recurlySubscription_id,
mapping,
},
`processing subscription ${subscription._id}`
)
if (commit) {
registerAccountMapping(mapping)
mapped++
}
}
async function main() {
const additionalBatchedUpdateOptions = {}
if (endDate) {
additionalBatchedUpdateOptions.BATCH_RANGE_END = endDate
}
await batchedUpdate(
db.subscriptions,
{
'recurlyStatus.state': 'active',
groupPlan: true,
},
subscriptions => subscriptions.forEach(registerMapping),
{
_id: 1,
recurlySubscription_id: 1,
},
{
readPreference: 'secondaryPreferred',
},
{
verboseLogging: verbose,
...additionalBatchedUpdateOptions,
}
)
logger.debug({}, `${subscriptionCount} subscriptions processed`)
if (commit) {
logger.debug({}, `${mapped} mappings registered`)
}
}
const {
error,
value: { commit, endDate, verbose },
} = paramsSchema.validate(
minimist(process.argv.slice(2), {
boolean: ['commit', 'verbose'],
string: ['endDate'],
})
)
logger.logger.level(verbose ? 'debug' : 'info')
if (error) {
logger.error({ error }, 'error with parameters')
triggerGracefulShutdown(done => done(1))
} else {
logger.info({ verbose, commit, endDate }, commit ? 'COMMITTING' : 'DRY RUN')
await main()
triggerGracefulShutdown({
close(done) {
logger.info({}, 'shutting down')
done()
},
})
}

View File

@@ -0,0 +1,30 @@
import { BigQuery as GoogleBigQuery } from '@google-cloud/bigquery'
let dataset = null
function getDataset() {
if (!dataset) {
console.log(
'Connecting to BigQuery dataset: ',
process.env.BQ_PROJECT_ID,
process.env.BQ_DATASET_V2
)
dataset = new GoogleBigQuery({
projectId: process.env.BQ_PROJECT_ID,
keyFilename: process.env.GCS_KEY_FILE,
}).dataset(process.env.BQ_DATASET_V2)
}
return dataset
}
async function query(query, params = {}) {
const [job] = await getDataset().createQueryJob({ query, params })
const [rows] = await job.getQueryResults()
return rows
}
export default {
query,
}

View File

@@ -0,0 +1,281 @@
import GoogleBigQueryHelper from './helpers/GoogleBigQueryHelper.mjs'
import { Subscription } from '../../app/src/models/Subscription.js'
import AnalyticsManager from '../../app/src/Features/Analytics/AnalyticsManager.js'
import { DeletedSubscription } from '../../app/src/models/DeletedSubscription.js'
import minimist from 'minimist'
import _ from 'lodash'
import mongodb from 'mongodb-legacy'
const { ObjectId } = mongodb
let BATCH_SIZE, COMMIT, VERBOSE
async function main() {
console.log('## Syncing group subscription memberships...')
const subscriptionsCount = await Subscription.countDocuments({
groupPlan: true,
})
const deletedSubscriptionsCount = await DeletedSubscription.countDocuments({
'subscription.groupPlan': true,
})
console.log(
`## Going to synchronize ${subscriptionsCount} subscriptions and ${deletedSubscriptionsCount} deleted subscriptions`
)
await checkActiveSubscriptions()
await checkDeletedSubscriptions()
}
async function checkActiveSubscriptions() {
let subscriptions
const processedSubscriptionIds = new Set()
const cursor = Subscription.find(
{ groupPlan: true },
{ recurlySubscription_id: 1, member_ids: 1 }
)
.sort('_id')
.cursor()
do {
subscriptions = []
while (subscriptions.length <= BATCH_SIZE) {
const next = await cursor.next()
if (!next) {
break
}
subscriptions.push(next)
}
if (subscriptions.length) {
const groupIds = subscriptions.map(sub => sub._id)
const bigQueryGroupMemberships =
await fetchBigQueryMembershipStatuses(groupIds)
const membershipsByGroupId = _.groupBy(
bigQueryGroupMemberships,
'group_id'
)
for (const subscription of subscriptions) {
const subscriptionId = subscription._id.toString()
if (!processedSubscriptionIds.has(subscriptionId)) {
await checkSubscriptionMemberships(
subscription,
membershipsByGroupId[subscriptionId] || []
)
processedSubscriptionIds.add(subscriptionId)
}
}
}
} while (subscriptions.length > 0)
}
async function checkDeletedSubscriptions() {
let deletedSubscriptions
const processedSubscriptionIds = new Set()
const cursor = DeletedSubscription.find(
{ 'subscription.groupPlan': true },
{ subscription: 1 }
).cursor()
do {
deletedSubscriptions = []
while (deletedSubscriptions.length <= BATCH_SIZE) {
const next = await cursor.next()
if (!next) {
break
}
deletedSubscriptions.push(next.toObject().subscription)
}
if (deletedSubscriptions.length) {
const groupIds = deletedSubscriptions.map(sub => sub._id.toString())
const bigQueryGroupMemberships =
await fetchBigQueryMembershipStatuses(groupIds)
const membershipsByGroupId = _.groupBy(
bigQueryGroupMemberships,
'group_id'
)
for (const deletedSubscription of deletedSubscriptions) {
const subscriptionId = deletedSubscription._id.toString()
if (!processedSubscriptionIds.has(subscriptionId)) {
await checkDeletedSubscriptionMemberships(
deletedSubscription,
membershipsByGroupId[subscriptionId] || []
)
processedSubscriptionIds.add(subscriptionId)
}
}
}
} while (deletedSubscriptions.length > 0)
}
async function checkSubscriptionMemberships(subscription, membershipStatuses) {
if (VERBOSE) {
console.log(
'\n###########################################################################################',
'\n# Subscription (mongo): ',
'\n# _id: \t\t\t\t',
subscription._id.toString(),
'\n# member_ids: \t\t\t',
subscription.member_ids.map(_id => _id.toString()),
'\n# recurlySubscription_id: \t',
subscription.recurlySubscription_id
)
console.log('#\n# Membership statuses found in BigQuery: ')
console.table(membershipStatuses)
}
// create missing `joined` events when membership status is missing
for (const memberId of subscription.member_ids) {
if (
!_.find(membershipStatuses, {
user_id: memberId.toString(),
is_member: true,
})
) {
await sendCorrectiveEvent(
memberId,
'group-subscription-joined',
subscription
)
}
}
// create missing `left` events if user is not a member of the group anymore
for (const { user_id: userId, is_member: isMember } of membershipStatuses) {
if (
isMember &&
!subscription.member_ids.some(id => id.toString() === userId)
) {
await sendCorrectiveEvent(userId, 'group-subscription-left', subscription)
}
}
}
async function checkDeletedSubscriptionMemberships(
subscription,
membershipStatuses
) {
if (VERBOSE) {
console.log(
'\n###########################################################################################',
'\n# Deleted subscription (mongo): ',
'\n# _id: \t\t\t\t',
subscription._id.toString(),
'\n# member_ids: \t\t\t',
subscription.member_ids.map(_id => _id.toString()),
'\n# recurlySubscription_id: \t',
subscription.recurlySubscription_id
)
console.log('#\n# Membership statuses found in BigQuery: ')
console.table(membershipStatuses)
}
const updatedUserIds = new Set()
// create missing `left` events if user was a member of the group in BQ and status is not up-to-date
for (const memberId of subscription.member_ids.map(id => id.toString())) {
if (
_.find(membershipStatuses, {
user_id: memberId,
is_member: true,
})
) {
await sendCorrectiveEvent(
memberId,
'group-subscription-left',
subscription
)
updatedUserIds.add(memberId)
}
}
// for cases where the user has been removed from the subscription before it was deleted and status is not up-to-date
for (const { user_id: userId, is_member: isMember } of membershipStatuses) {
if (isMember && !updatedUserIds.has(userId)) {
await sendCorrectiveEvent(userId, 'group-subscription-left', subscription)
updatedUserIds.add(userId)
}
}
}
async function sendCorrectiveEvent(userId, event, subscription) {
if (!ObjectId.isValid(userId)) {
console.warn(`Skipping '${event}' for user ${userId}: invalid user ID`)
return
}
const segmentation = {
groupId: subscription._id.toString(),
subscriptionId: subscription.recurlySubscription_id,
source: 'sync',
}
if (COMMIT) {
console.log(
`Sending event '${event}' for user ${userId} with segmentation: ${JSON.stringify(
segmentation
)}`
)
await AnalyticsManager.recordEventForUser(userId, event, segmentation)
} else {
console.log(
`Dry run - would send event '${event}' for user ${userId} with segmentation: ${JSON.stringify(
segmentation
)}`
)
}
}
/**
* @param {Array<ObjectId>} groupIds
* @return {Promise<*>}
*/
async function fetchBigQueryMembershipStatuses(groupIds) {
const query = `\
WITH user_memberships AS (
SELECT
group_id,
COALESCE(user_aliases.user_id, ugm.user_id) AS user_id,
is_member,
ugm.created_at
FROM INT_user_group_memberships ugm
LEFT JOIN INT_user_aliases user_aliases ON ugm.user_id = user_aliases.analytics_id
WHERE ugm.group_id IN UNNEST(@groupIds)
),
ordered_status AS (
SELECT *,
ROW_NUMBER() OVER(PARTITION BY group_id, user_id ORDER BY created_at DESC) AS row_number
FROM user_memberships
)
SELECT group_id, user_id, is_member, created_at FROM ordered_status
WHERE row_number = 1;
`
return await GoogleBigQueryHelper.query(query, {
groupIds: groupIds.map(id => id.toString()),
})
}
const setup = () => {
const argv = minimist(process.argv.slice(2))
BATCH_SIZE = argv.batchSize ? parseInt(argv.batchSize, 10) : 100
COMMIT = argv.commit !== undefined
VERBOSE = argv.debug !== undefined
if (!COMMIT) {
console.warn('Doing dry run without --commit')
}
if (VERBOSE) {
console.log('Running in verbose mode')
}
}
setup()
try {
await main()
console.error('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}

View File

@@ -0,0 +1,146 @@
// @ts-check
import minimist from 'minimist'
import process from 'node:process'
import ChatApiHandler from '../app/src/Features/Chat/ChatApiHandler.js'
import DocumentUpdaterHandler from '../app/src/Features/DocumentUpdater/DocumentUpdaterHandler.js'
import DocstoreManager from '../app/src/Features/Docstore/DocstoreManager.js'
import HistoryManager from '../app/src/Features/History/HistoryManager.js'
import { db, ObjectId } from '../app/src/infrastructure/mongodb.js'
const OPTS = parseArgs()
function usage() {
console.error('Attach dangling threads to the beginning of a document')
console.error('')
console.error('Usage: node attach_dangling_comments_to_doc.mjs')
console.error(' --project PROJECT_ID')
console.error(' --doc DOC_ID')
console.error(' [--commit]')
}
function parseArgs() {
const args = minimist(process.argv.slice(2), {
boolean: ['commit'],
string: ['project', 'doc'],
})
const projectId = args.project
const docId = args.doc
if (!projectId || !docId) {
usage()
process.exit(0)
}
return { projectId, docId, commit: args.commit }
}
/**
* @typedef {{ id: string, content: string, timestamp: number, user_id: string }} Message
* @typedef {{ id: string, messages: Message[] }} Thread
*/
/**
* @param {string} projectId
* @returns {Promise<Thread[]>}
*/
async function getDanglingThreads(projectId) {
const docRanges = await DocstoreManager.promises.getAllRanges(projectId)
const threads = await ChatApiHandler.promises.getThreads(projectId)
const threadsInDoc = new Set()
for (const doc of docRanges) {
for (const comment of doc.ranges?.comments ?? []) {
threadsInDoc.add(comment.op.t)
}
}
const danglingThreads = Object.keys(threads)
.filter(threadId => !threadsInDoc.has(threadId))
.map(id => ({ ...threads[id], id }))
console.log(`Found:`)
console.log(` - ${Object.keys(threads).length} threads`)
console.log(` - ${threadsInDoc.size} threads in docRanges`)
console.log(` - ${danglingThreads.length} dangling threads`)
return danglingThreads
}
const ensureDocExists = async (projectId, docId) => {
const doc = await DocstoreManager.promises.getDoc(projectId, docId)
if (!doc) {
console.error(`Document ${docId} not found`)
process.exit(1)
}
}
/**
* @param {Thread[]} threads
*/
const ensureThreadsHaveMessages = async threads => {
const threadsWithoutMessages = threads.filter(
thread => !thread.messages || thread.messages.length === 0
)
if (threadsWithoutMessages.length > 0) {
console.error(`The following threads have no messages:`)
console.error(threadsWithoutMessages.join(','))
process.exit(1)
}
}
/**
* @param {string} projectId
* @param {string} docId
*/
async function processProject(projectId, docId) {
console.log(`Processing project ${projectId}`)
await DocumentUpdaterHandler.promises.flushProjectToMongoAndDelete(projectId)
const danglingThreads = await getDanglingThreads(projectId)
await ensureDocExists(projectId, docId)
await ensureThreadsHaveMessages(danglingThreads)
for (const thread of danglingThreads) {
const firstMessage = thread.messages[0]
if (!firstMessage) {
console.error(`Thread ${thread.id} has no messages`)
continue
}
const rangeComment = newRangeComment(thread, firstMessage)
console.log(`Attaching thread ${thread.id} to doc ${docId}`)
if (OPTS.commit) {
await db.docs.updateOne(
{ _id: new ObjectId(docId) },
{ $push: { 'ranges.comments': rangeComment } }
)
}
}
if (OPTS.commit) {
console.log(`Resyncing history for project ${projectId}`)
await HistoryManager.promises.resyncProject(projectId)
}
}
/**
* @param {Thread} thread
* @param {Message} message
*/
const newRangeComment = (thread, message) => ({
id: new ObjectId(thread.id),
op: { t: new ObjectId(thread.id), p: 0, c: '' },
metadata: {
user_id: new ObjectId(message.user_id),
ts: new Date(message.timestamp),
},
})
await processProject(OPTS.projectId, OPTS.docId)
if (!OPTS.commit) {
console.log('This was a dry run. Rerun with --commit to apply changes')
}
process.exit(0)

View File

@@ -0,0 +1,133 @@
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
import { promiseMapWithLimit, promisify } from '@overleaf/promise-utils'
import { db } from '../app/src/infrastructure/mongodb.js'
import _ from 'lodash'
import { fileURLToPath } from 'node:url'
const sleep = promisify(setTimeout)
async function main(options) {
if (!options) {
options = {}
}
_.defaults(options, {
writeConcurrency: parseInt(process.env.WRITE_CONCURRENCY, 10) || 10,
performCleanup: process.argv.includes('--perform-cleanup'),
fixPartialInserts: process.argv.includes('--fix-partial-inserts'),
letUserDoubleCheckInputsFor: parseInt(
process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000,
10
),
})
await letUserDoubleCheckInputs(options)
await batchedUpdate(
db.projects,
// array is not empty ~ array has one item
{ 'deletedFiles.0': { $exists: true } },
async projects => {
await processBatch(projects, options)
},
{ _id: 1, deletedFiles: 1 }
)
}
async function processBatch(projects, options) {
await promiseMapWithLimit(
options.writeConcurrency,
projects,
async project => {
await processProject(project, options)
}
)
}
async function processProject(project, options) {
await backFillFiles(project, options)
if (options.performCleanup) {
await cleanupProject(project)
}
}
async function backFillFiles(project, options) {
const projectId = project._id
filterDuplicatesInPlace(project)
project.deletedFiles.forEach(file => {
file.projectId = projectId
})
if (options.fixPartialInserts) {
await fixPartialInserts(project)
} else {
await db.deletedFiles.insertMany(project.deletedFiles)
}
}
function filterDuplicatesInPlace(project) {
const fileIds = new Set()
project.deletedFiles = project.deletedFiles.filter(file => {
const id = file._id.toString()
if (fileIds.has(id)) return false
fileIds.add(id)
return true
})
}
async function fixPartialInserts(project) {
const seenFileIds = new Set(
(
await db.deletedFiles
.find(
{ _id: { $in: project.deletedFiles.map(file => file._id) } },
{ projection: { _id: 1 } }
)
.toArray()
).map(file => file._id.toString())
)
project.deletedFiles = project.deletedFiles.filter(file => {
const id = file._id.toString()
if (seenFileIds.has(id)) return false
seenFileIds.add(id)
return true
})
if (project.deletedFiles.length > 0) {
await db.deletedFiles.insertMany(project.deletedFiles)
}
}
async function cleanupProject(project) {
await db.projects.updateOne(
{ _id: project._id },
{ $set: { deletedFiles: [] } }
)
}
async function letUserDoubleCheckInputs(options) {
if (options.performCleanup) {
console.error('BACK FILLING AND PERFORMING CLEANUP')
} else {
console.error(
'BACK FILLING ONLY - You will need to rerun with --perform-cleanup'
)
}
console.error(
'Waiting for you to double check inputs for',
options.letUserDoubleCheckInputsFor,
'ms'
)
await sleep(options.letUserDoubleCheckInputsFor)
}
export default main
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await main()
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}
}

View File

@@ -0,0 +1,92 @@
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
import { promiseMapWithLimit, promisify } from '@overleaf/promise-utils'
import { db } from '../app/src/infrastructure/mongodb.js'
import { fileURLToPath } from 'node:url'
import _ from 'lodash'
const sleep = promisify(setTimeout)
async function main(options) {
if (!options) {
options = {}
}
_.defaults(options, {
writeConcurrency: parseInt(process.env.WRITE_CONCURRENCY, 10) || 10,
performCleanup: process.argv.pop() === '--perform-cleanup',
letUserDoubleCheckInputsFor: parseInt(
process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000,
10
),
})
await letUserDoubleCheckInputs(options)
await batchedUpdate(
db.projects,
// array is not empty ~ array has one item
{ 'deletedDocs.0': { $exists: true } },
async projects => {
await processBatch(projects, options)
},
{ _id: 1, deletedDocs: 1 }
)
}
async function processBatch(projects, options) {
await promiseMapWithLimit(
options.writeConcurrency,
projects,
async project => {
await processProject(project, options)
}
)
}
async function processProject(project, options) {
for (const doc of project.deletedDocs) {
await backFillDoc(doc)
}
if (options.performCleanup) {
await cleanupProject(project)
}
}
async function backFillDoc(doc) {
const { name, deletedAt } = doc
await db.docs.updateOne({ _id: doc._id }, { $set: { name, deletedAt } })
}
async function cleanupProject(project) {
await db.projects.updateOne(
{ _id: project._id },
{ $set: { deletedDocs: [] } }
)
}
async function letUserDoubleCheckInputs(options) {
if (options.performCleanup) {
console.error('BACK FILLING AND PERFORMING CLEANUP')
} else {
console.error(
'BACK FILLING ONLY - You will need to rerun with --perform-cleanup'
)
}
console.error(
'Waiting for you to double check inputs for',
options.letUserDoubleCheckInputsFor,
'ms'
)
await sleep(options.letUserDoubleCheckInputsFor)
}
export default main
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await main()
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}
}

View File

@@ -0,0 +1,59 @@
import { db } from '../app/src/infrastructure/mongodb.js'
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
import { fileURLToPath } from 'node:url'
const DRY_RUN = !process.argv.includes('--dry-run=false')
const LOG_EVERY_IN_S = parseInt(process.env.LOG_EVERY_IN_S, 10) || 5
async function main(DRY_RUN) {
let processed = 0
let deleted = 0
let lastLog = 0
function logProgress() {
console.log(`rev missing ${processed} | deleted=true ${deleted}`)
}
await batchedUpdate(
db.docs,
{ rev: { $exists: false } },
async docs => {
if (!DRY_RUN) {
await db.docs.updateMany(
{
_id: { $in: docs.map(doc => doc._id) },
rev: { $exists: false },
},
{ $set: { rev: 1 } }
)
}
processed += docs.length
deleted += docs.filter(doc => doc.deleted).length
if (Date.now() - lastLog >= LOG_EVERY_IN_S * 1000) {
logProgress()
lastLog = Date.now()
}
},
{
_id: 1,
deleted: true,
}
)
logProgress()
}
export default main
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await main(DRY_RUN)
console.log('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}
}

View File

@@ -0,0 +1,161 @@
import { promisify } from 'node:util'
import mongodb from 'mongodb-legacy'
import {
db,
READ_PREFERENCE_SECONDARY,
} from '../app/src/infrastructure/mongodb.js'
import _ from 'lodash'
import LRUCache from 'lru-cache'
import { fileURLToPath } from 'node:url'
const { ObjectId } = mongodb
const sleep = promisify(setTimeout)
const NOW_IN_S = Date.now() / 1000
const ONE_WEEK_IN_S = 60 * 60 * 24 * 7
const TEN_SECONDS = 10 * 1000
const DUMMY_NAME = 'unknown.tex'
const DUMMY_TIME = new Date('2021-04-12T00:00:00.000Z')
let deletedProjectsCache = null
function getSecondsFromObjectId(id) {
return id.getTimestamp().getTime() / 1000
}
async function main(options) {
if (!options) {
options = {}
}
_.defaults(options, {
dryRun: process.env.DRY_RUN === 'true',
cacheSize: parseInt(process.env.CACHE_SIZE, 10) || 100,
firstProjectId: new ObjectId(process.env.FIRST_PROJECT_ID),
incrementByS: parseInt(process.env.INCREMENT_BY_S, 10) || ONE_WEEK_IN_S,
batchSize: parseInt(process.env.BATCH_SIZE, 10) || 1000,
stopAtS: parseInt(process.env.STOP_AT_S, 10) || NOW_IN_S,
letUserDoubleCheckInputsFor:
parseInt(process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR, 10) || TEN_SECONDS,
})
if (!options.firstProjectId) {
console.error('Set FIRST_PROJECT_ID and re-run.')
process.exit(1)
}
deletedProjectsCache = new LRUCache({
max: options.cacheSize,
})
await letUserDoubleCheckInputs(options)
let startId = options.firstProjectId
let nProcessed = 0
while (getSecondsFromObjectId(startId) <= options.stopAtS) {
const end = getSecondsFromObjectId(startId) + options.incrementByS
let endId = ObjectId.createFromTime(end)
const query = {
project_id: {
// include edge
$gte: startId,
// exclude edge
$lt: endId,
},
deleted: true,
name: {
$exists: false,
},
}
const docs = await db.docs
.find(query, { readPreference: READ_PREFERENCE_SECONDARY })
.project({ _id: 1, project_id: 1 })
.limit(options.batchSize)
.toArray()
if (docs.length) {
const docIds = docs.map(doc => doc._id)
console.log('Back filling dummy meta data for', JSON.stringify(docIds))
await processBatch(docs, options)
nProcessed += docIds.length
if (docs.length === options.batchSize) {
endId = docs[docs.length - 1].project_id
}
}
console.error('Processed %d until %s', nProcessed, endId)
startId = endId
}
}
async function getDeletedProject(projectId) {
const cacheKey = projectId.toString()
if (deletedProjectsCache.has(cacheKey)) {
return deletedProjectsCache.get(cacheKey)
}
const deletedProject = await db.deletedProjects.findOne(
{ 'deleterData.deletedProjectId': projectId },
{
projection: {
_id: 1,
'project.deletedDocs': 1,
},
}
)
deletedProjectsCache.set(cacheKey, deletedProject)
return deletedProject
}
async function processBatch(docs, options) {
for (const doc of docs) {
const { _id: docId, project_id: projectId } = doc
const deletedProject = await getDeletedProject(projectId)
let name = DUMMY_NAME
let deletedAt = DUMMY_TIME
if (deletedProject) {
const project = deletedProject.project
if (project) {
const deletedDoc =
project.deletedDocs &&
project.deletedDocs.find(deletedDoc => docId.equals(deletedDoc._id))
if (deletedDoc) {
console.log('Found deletedDoc for %s', docId)
name = deletedDoc.name
deletedAt = deletedDoc.deletedAt
} else {
console.log('Missing deletedDoc for %s', docId)
}
} else {
console.log('Orphaned deleted doc %s (failed hard deletion)', docId)
}
} else {
console.log('Orphaned deleted doc %s (no deletedProjects entry)', docId)
}
if (options.dryRun) continue
await db.docs.updateOne({ _id: docId }, { $set: { name, deletedAt } })
}
}
async function letUserDoubleCheckInputs(options) {
console.error('Options:', JSON.stringify(options, null, 2))
console.error(
'Waiting for you to double check inputs for',
options.letUserDoubleCheckInputsFor,
'ms'
)
await sleep(options.letUserDoubleCheckInputsFor)
}
export default main
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await main()
console.error('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}
}

View File

@@ -0,0 +1,93 @@
import {
db,
READ_PREFERENCE_SECONDARY,
} from '../app/src/infrastructure/mongodb.js'
import UserSessionsManager from '../app/src/Features/User/UserSessionsManager.js'
const COMMIT = process.argv.includes('--commit')
const KEEP_SESSIONS = process.argv.includes('--keep-sessions')
const FULL_STAFF_ACCESS = {
publisherMetrics: true,
publisherManagement: true,
institutionMetrics: true,
institutionManagement: true,
groupMetrics: true,
groupManagement: true,
adminMetrics: true,
splitTestMetrics: true,
splitTestManagement: true,
}
function doesNotHaveFullStaffAccess(user) {
if (!user.staffAccess) {
return true
}
for (const field of Object.keys(FULL_STAFF_ACCESS)) {
if (!user.staffAccess[field]) {
return true
}
}
return false
}
function formatUser(user) {
user = Object.assign({}, user, user.staffAccess)
delete user.staffAccess
return user
}
async function main() {
const adminUsers = await db.users
.find(
{ isAdmin: true },
{
projection: {
_id: 1,
email: 1,
staffAccess: 1,
},
readPreference: READ_PREFERENCE_SECONDARY,
}
)
.toArray()
console.log('All Admin users:')
console.table(adminUsers.map(formatUser))
const incompleteUsers = adminUsers.filter(doesNotHaveFullStaffAccess)
if (incompleteUsers.length === 0) {
console.warn('All Admin users have full staff access.')
return
}
console.log()
console.log('Incomplete staff access:')
console.table(incompleteUsers.map(formatUser))
if (COMMIT) {
for (const user of incompleteUsers) {
console.error(
`Granting ${user.email} (${user._id.toString()}) full staff access`
)
await db.users.updateOne(
{ _id: user._id, isAdmin: true },
{ $set: { staffAccess: FULL_STAFF_ACCESS } }
)
if (!KEEP_SESSIONS) {
await UserSessionsManager.promises.removeSessionsFromRedis(user)
}
}
} else {
console.warn('Use --commit to grant missing staff access.')
}
}
try {
await main()
console.error('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}

View File

@@ -0,0 +1,70 @@
import NotificationsBuilder from '../app/src/Features/Notifications/NotificationsBuilder.js'
import { db } from '../app/src/infrastructure/mongodb.js'
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
const DRY_RUN = !process.argv.includes('--dry-run=false')
if (DRY_RUN) {
console.log('Doing dry run')
}
async function processBatch(groupSubscriptionsBatch) {
console.log('\n')
console.log('----- Batch computation started -----')
const flattenedMemberIds = groupSubscriptionsBatch
.map(sub => sub.member_ids)
.flatMap(memberId => memberId)
const uniqueFlattenedMemberIds = [...new Set(flattenedMemberIds)]
const userWithIndividualAndGroupSubscriptions = await db.subscriptions
.find({
groupPlan: false,
'recurlyStatus.state': 'active',
admin_id: { $in: uniqueFlattenedMemberIds },
})
.toArray()
console.log(
`Found ${userWithIndividualAndGroupSubscriptions.length} affected users in this batch`
)
if (DRY_RUN) {
console.error('---')
console.error('Dry-run enabled, use --dry-run=false to commit changes')
console.error('---')
} else {
if (userWithIndividualAndGroupSubscriptions.length > 0) {
console.log(
`Notifying ${userWithIndividualAndGroupSubscriptions.length} users`
)
for (const notif of userWithIndividualAndGroupSubscriptions) {
await NotificationsBuilder.promises
.personalAndGroupSubscriptions(notif.admin_id.toString())
.create()
}
console.log(
`${userWithIndividualAndGroupSubscriptions.length} users successfully notified in this batch`
)
} else {
console.log(
'No users currently subscribe to both individual and group subscription in this batch'
)
}
}
}
async function main() {
await batchedUpdate(db.subscriptions, { groupPlan: true }, processBatch, {
member_ids: 1,
})
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,118 @@
// @ts-check
import '../app/src/models/User.js'
import { batchedUpdateWithResultHandling } from '@overleaf/mongo-utils/batchedUpdate.js'
import { promiseMapWithLimit } from '@overleaf/promise-utils'
import { getQueue } from '../app/src/infrastructure/Queues.js'
import SubscriptionLocator from '../app/src/Features/Subscription/SubscriptionLocator.js'
import PlansLocator from '../app/src/Features/Subscription/PlansLocator.js'
import FeaturesHelper from '../app/src/Features/Subscription/FeaturesHelper.js'
import { db } from '../app/src/infrastructure/mongodb.js'
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY || '10', 10)
const mixpanelSinkQueue = getQueue('analytics-mixpanel-sink')
async function processUser(user) {
const analyticsId = user.analyticsId || user._id
await _sendPropertyToQueue(analyticsId, 'user-id', user._id)
await _sendPropertyToQueue(analyticsId, 'analytics-id', analyticsId)
await _sendPropertyToQueue(analyticsId, 'created-at', user.signUpDate)
if (user.alphaProgram !== undefined) {
await _sendPropertyToQueue(analyticsId, 'alpha-program', user.alphaProgram)
}
if (user.betaProgram !== undefined) {
await _sendPropertyToQueue(analyticsId, 'beta-program', user.betaProgram)
}
const groupSubscriptionPlanCode = await _getGroupSubscriptionPlanCode(
user._id
)
if (groupSubscriptionPlanCode) {
await _sendPropertyToQueue(
analyticsId,
'group-subscription-plan-code',
groupSubscriptionPlanCode
)
}
const matchedFeatureSet = FeaturesHelper.getMatchedFeatureSet(user.features)
if (matchedFeatureSet !== 'personal') {
await _sendPropertyToQueue(analyticsId, 'feature-set', matchedFeatureSet)
}
if (user.splitTests) {
for (const splitTestName of Object.keys(user.splitTests)) {
const assignments = user.splitTests[splitTestName]
if (Array.isArray(assignments)) {
for (const assignment of assignments) {
await _sendPropertyToQueue(
analyticsId,
`split-test-${splitTestName}-${assignment.versionNumber}`,
`${assignment.variantName}`
)
}
}
}
}
}
async function _getGroupSubscriptionPlanCode(userId) {
const subscriptions =
await SubscriptionLocator.promises.getMemberSubscriptions(userId)
let bestPlanCode = null
let bestFeatures = {}
for (const subscription of subscriptions) {
const plan = PlansLocator.findLocalPlanInSettings(subscription.planCode)
if (
plan &&
FeaturesHelper.isFeatureSetBetter(plan.features, bestFeatures)
) {
bestPlanCode = plan.planCode
bestFeatures = plan.features
}
}
return bestPlanCode
}
async function _sendPropertyToQueue(
analyticsId,
propertyName,
propertyValue,
createdAt = new Date()
) {
if (propertyValue == null) {
return
}
await mixpanelSinkQueue.add('user-property', {
analyticsId,
propertyName,
propertyValue,
createdAt,
})
}
async function processBatch(_, users) {
await promiseMapWithLimit(WRITE_CONCURRENCY, users, async user => {
await processUser(user)
})
}
batchedUpdateWithResultHandling(
db.users,
{
$nor: [
{ thirdPartyIdentifiers: { $exists: false } },
{ thirdPartyIdentifiers: { $size: 0 } },
],
},
processBatch,
{
_id: true,
analyticsId: true,
signUpDate: true,
splitTests: true,
alphaProgram: true,
betaProgram: true,
}
)

View File

@@ -0,0 +1,56 @@
// @ts-check
import minimist from 'minimist'
import { batchedUpdateWithResultHandling } from '@overleaf/mongo-utils/batchedUpdate.js'
import { db } from '../app/src/infrastructure/mongodb.js'
const argv = minimist(process.argv.slice(2))
const commit = argv.commit !== undefined
let imageName = argv._[0]
function usage() {
console.log(
'Usage: node backfill_project_image_name.mjs --commit <texlive_docker_image>'
)
console.log(
'Argument <texlive_docker_image> is not required when TEX_LIVE_DOCKER_IMAGE is set.'
)
console.log(
'Environment variable ALL_TEX_LIVE_DOCKER_IMAGES must contain <texlive_docker_image>.'
)
}
if (!imageName && process.env.TEX_LIVE_DOCKER_IMAGE) {
imageName = process.env.TEX_LIVE_DOCKER_IMAGE
}
if (!imageName) {
usage()
process.exit(1)
}
if (!process.env.ALL_TEX_LIVE_DOCKER_IMAGES) {
console.error(
'Error: environment variable ALL_TEX_LIVE_DOCKER_IMAGES is not defined.'
)
usage()
process.exit(1)
}
if (!process.env.ALL_TEX_LIVE_DOCKER_IMAGES.split(',').includes(imageName)) {
console.error(
`Error: ALL_TEX_LIVE_DOCKER_IMAGES doesn't contain ${imageName}`
)
usage()
process.exit(1)
}
if (!commit) {
console.error('DOING DRY RUN. TO SAVE CHANGES PASS --commit')
process.exit(1)
}
batchedUpdateWithResultHandling(
db.projects,
{ imageName: null },
{ $set: { imageName } }
)

View File

@@ -0,0 +1,79 @@
import { db } from '../app/src/infrastructure/mongodb.js'
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
import minimist from 'minimist'
import CollaboratorsInviteHelper from '../app/src/Features/Collaborators/CollaboratorsInviteHelper.js'
import { fileURLToPath } from 'node:url'
const argv = minimist(process.argv.slice(2), {
boolean: ['dry-run', 'help'],
default: {
'dry-run': true,
},
})
const DRY_RUN = argv['dry-run']
async function addTokenHmacField(DRY_RUN) {
const query = { tokenHmac: { $exists: false } }
await batchedUpdate(
db.projectInvites,
query,
async invites => {
for (const invite of invites) {
console.log(
`=> Missing "tokenHmac" token in invitation: ${invite._id.toString()}`
)
if (DRY_RUN) {
console.log(
`=> DRY RUN - would add "tokenHmac" token to invitation ${invite._id.toString()}`
)
continue
}
const tokenHmac = CollaboratorsInviteHelper.hashInviteToken(
invite.token
)
await db.projectInvites.updateOne(
{ _id: invite._id },
{ $set: { tokenHmac } }
)
console.log(
`=> Added "tokenHmac" token to invitation ${invite._id.toString()}`
)
}
},
{ token: 1 }
)
}
async function main(DRY_RUN) {
await addTokenHmacField(DRY_RUN)
}
export default main
if (fileURLToPath(import.meta.url) === process.argv[1]) {
if (argv.help || argv._.length > 1) {
console.error(`Usage: node scripts/backfill_project_invites_token_hmac.mjs
Adds a "tokenHmac" field (which is a hashed version of the token) to each project invite record.
Options:
--dry-run finds invitations without HMAC token but does not do any updates
`)
process.exit(1)
}
try {
await main(DRY_RUN)
console.error('Done')
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}
}

View File

@@ -0,0 +1,61 @@
import { batchedUpdateWithResultHandling } from '@overleaf/mongo-utils/batchedUpdate.js'
import { promiseMapWithLimit } from '@overleaf/promise-utils'
import SubscriptionLocator from '../app/src/Features/Subscription/SubscriptionLocator.js'
import PlansLocator from '../app/src/Features/Subscription/PlansLocator.js'
import FeaturesHelper from '../app/src/Features/Subscription/FeaturesHelper.js'
import AnalyticsManager from '../app/src/Features/Analytics/AnalyticsManager.js'
import { db } from '../app/src/infrastructure/mongodb.js'
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10
async function getGroupSubscriptionPlanCode(userId) {
const subscriptions =
await SubscriptionLocator.promises.getMemberSubscriptions(userId)
let bestPlanCode = null
let bestFeatures = {}
for (const subscription of subscriptions) {
const plan = PlansLocator.findLocalPlanInSettings(subscription.planCode)
if (
plan &&
FeaturesHelper.isFeatureSetBetter(plan.features, bestFeatures)
) {
bestPlanCode = plan.planCode
bestFeatures = plan.features
}
}
return bestPlanCode
}
async function processUser(user) {
const analyticsId = user.analyticsId || user._id
const groupSubscriptionPlanCode = await getGroupSubscriptionPlanCode(user._id)
if (groupSubscriptionPlanCode) {
await AnalyticsManager.setUserPropertyForAnalyticsId(
analyticsId,
'group-subscription-plan-code',
groupSubscriptionPlanCode
)
}
const matchedFeatureSet = FeaturesHelper.getMatchedFeatureSet(user.features)
if (matchedFeatureSet !== 'personal') {
await AnalyticsManager.setUserPropertyForAnalyticsId(
analyticsId,
'feature-set',
matchedFeatureSet
)
}
}
async function processBatch(users) {
await promiseMapWithLimit(WRITE_CONCURRENCY, users, async user => {
await processUser(user)
})
}
batchedUpdateWithResultHandling(db.users, {}, processBatch, {
_id: true,
analyticsId: true,
features: true,
})

View File

@@ -0,0 +1,33 @@
import SAMLUserIdAttributeBatchHandler from '../modules/saas-authentication/app/src/SAML/SAMLUserIdAttributeBatchHandler.mjs'
const startInstitutionId = parseInt(process.argv[2])
const endInstitutionId = parseInt(process.argv[3])
process.env.LOG_LEVEL = 'info'
process.env.MONGO_CONNECTION_STRING =
process.env.READ_ONLY_MONGO_CONNECTION_STRING
console.log('Checking users at institutions')
console.log(
'Start institution ID:',
startInstitutionId ||
'none provided, will start at beginning of ordered list.'
)
console.log(
'End institution ID:',
endInstitutionId || 'none provided, will go to end of ordered list.'
)
try {
const result = await SAMLUserIdAttributeBatchHandler.check(
startInstitutionId,
endInstitutionId
)
console.log(result)
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,122 @@
import minimist from 'minimist'
import { promisify } from 'node:util'
import bcrypt from 'bcrypt'
import { promiseMapWithLimit } from '@overleaf/promise-utils'
// https://github.com/import-js/eslint-plugin-import/issues/1810
// eslint-disable-next-line import/no-unresolved
import * as csv from 'csv/sync'
const bcryptCompare = promisify(bcrypt.compare)
const bcryptGenSalt = promisify(bcrypt.genSalt)
const bcryptHash = promisify(bcrypt.hash)
const argv = minimist(process.argv.slice(2), {
string: ['major', 'minor', 'concurrency', 'samples', 'password'],
bool: ['hash', 'compare', 'verbose', 'table', 'csv'],
default: {
major: '12,13,14,15',
minor: 'a',
concurrency: '1,2,4,10,20',
samples: 100,
password: 'x'.repeat(72),
hash: true,
compare: true,
verbose: true,
table: true,
csv: true,
},
})
const SAMPLES = parseInt(argv.samples, 10)
const STATS = []
function asListOfInt(s) {
return s.split(',').map(x => parseInt(x, 10))
}
async function computeHash(rounds, minor) {
const salt = await bcryptGenSalt(rounds, minor)
return await bcryptHash(argv.password, salt)
}
async function sample(concurrency, fn) {
const stats = await promiseMapWithLimit(
concurrency,
new Array(SAMPLES).fill(0),
async () => {
const t0 = process.hrtime.bigint()
await fn()
const t1 = process.hrtime.bigint()
return Number(t1 - t0) / 1e6
}
)
const sum = stats.reduce((a, b) => a + b, 0)
const avg = sum / SAMPLES
stats.sort((a, b) => a - b)
const median = stats[Math.ceil(SAMPLES / 2)]
const p95 = stats[Math.ceil(SAMPLES * 0.95)]
const min = stats[0]
const max = stats[stats.length - 1]
return Object.fromEntries(
Object.entries({
min,
avg,
median,
p95,
max,
}).map(([key, value]) => [key, Math.ceil(value) + 'ms'])
)
}
async function run(rounds, minor, concurrency) {
if (argv.hash) {
const stats = await sample(concurrency, async () => {
await computeHash(rounds, minor)
})
STATS.push({
kind: 'hash',
rounds,
concurrency,
...stats,
})
if (argv.verbose) console.log(STATS[STATS.length - 1])
}
if (argv.compare) {
const hashedPassword = await computeHash(rounds, minor)
const stats = await sample(concurrency, async () => {
await bcryptCompare(argv.password, hashedPassword)
})
STATS.push({
kind: 'compare',
rounds,
concurrency,
...stats,
})
if (argv.verbose) console.log(STATS[STATS.length - 1])
}
}
async function main() {
for (const rounds of asListOfInt(argv.major)) {
for (const minor of argv.minor.split(',')) {
for (const concurrency of asListOfInt(argv.concurrency)) {
await run(rounds, minor, concurrency)
}
}
}
STATS.forEach(s => {
s.samples = SAMPLES
})
if (argv.table) console.table(STATS)
if (argv.csv) console.log(csv.stringify(STATS, { header: true }))
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,221 @@
// @ts-check
import minimist from 'minimist'
import PQueue from 'p-queue'
import {
db,
ObjectId,
READ_PREFERENCE_SECONDARY,
} from '../app/src/infrastructure/mongodb.js'
import DocstoreManager from '../app/src/Features/Docstore/DocstoreManager.js'
import { NotFoundError } from '../app/src/Features/Errors/Errors.js'
const OPTS = parseArgs()
function parseArgs() {
const args = minimist(process.argv.slice(2), {
string: ['min-project-id', 'max-project-id', 'project-modified-since'],
boolean: ['help', 'dangling-comments', 'tracked-changes'],
})
if (args.help) {
usage()
process.exit(0)
}
const danglingComments = Boolean(args['dangling-comments'])
const trackedChanges = Boolean(args['tracked-changes'])
if (!danglingComments && !trackedChanges) {
console.log(
'At least one of --dangling-comments or --tracked-changes must be enabled'
)
process.exit(1)
}
return {
minProjectId: args['min-project-id'] ?? null,
maxProjectId: args['max-project-id'] ?? null,
projectModifiedSince: args['project-modified-since']
? new Date(args['project-modified-since'])
: null,
danglingComments,
trackedChanges,
concurrency: parseInt(args.concurrency ?? '1', 10),
}
}
function usage() {
console.log(`Usage: find_dangling_comments.mjs [OPTS]
Options:
--min-project-id Start scanning at this project id
--max-project-id Stop scanning at this project id
--project-modified-since Only consider projects that were modified after the given date
Example: 2020-01-01
--dangling-comments Report projects with dangling comments
--tracked-changes Report projects with tracked changes
--concurrency How many projects can be processed in parallel
`)
}
async function main() {
const queue = new PQueue({ concurrency: OPTS.concurrency })
let projectsProcessed = 0
let danglingCommentsFound = 0
let trackedChangesFound = 0
for await (const projectId of getProjectIds()) {
await queue.onEmpty()
queue.add(async () => {
const docs = await getDocs(projectId)
if (OPTS.danglingComments) {
const danglingThreadIds = await findDanglingThreadIds(projectId, docs)
if (danglingThreadIds.length > 0) {
console.log(
`Project ${projectId} has dangling threads: ${danglingThreadIds.join(', ')}`
)
danglingCommentsFound += 1
}
}
if (OPTS.trackedChanges) {
if (docsHaveTrackedChanges(docs)) {
console.log(`Project ${projectId} has tracked changes`)
trackedChangesFound += 1
}
}
projectsProcessed += 1
if (projectsProcessed % 100000 === 0) {
console.log(
`${projectsProcessed} projects processed - Last project: ${projectId}`
)
}
})
}
await queue.onIdle()
if (OPTS.danglingComments) {
console.log(
`${danglingCommentsFound} projects with dangling comments found`
)
}
if (OPTS.trackedChanges) {
console.log(`${trackedChangesFound} projects with tracked changes found`)
}
}
function getProjectIds() {
const clauses = []
if (OPTS.minProjectId != null) {
clauses.push({ _id: { $gte: new ObjectId(OPTS.minProjectId) } })
}
if (OPTS.maxProjectId != null) {
clauses.push({ _id: { $lte: new ObjectId(OPTS.maxProjectId) } })
}
if (OPTS.projectModifiedSince) {
clauses.push({ lastUpdated: { $gte: OPTS.projectModifiedSince } })
}
const query = clauses.length > 0 ? { $and: clauses } : {}
return db.projects
.find(query, {
projection: { _id: 1 },
readPreference: READ_PREFERENCE_SECONDARY,
sort: { _id: 1 },
})
.map(x => x._id.toString())
}
async function getDocs(projectId) {
const mongoDocs = db.docs.find(
{
project_id: new ObjectId(projectId),
deleted: { $ne: true },
},
{
projection: { ranges: 1, inS3: 1 },
readPreference: READ_PREFERENCE_SECONDARY,
}
)
const docs = []
for await (const mongoDoc of mongoDocs) {
if (mongoDoc.inS3) {
try {
const archivedDoc = await DocstoreManager.promises.getDoc(
projectId,
mongoDoc._id,
{ peek: true }
)
docs.push({
id: mongoDoc._id.toString(),
ranges: archivedDoc.ranges,
})
} catch (err) {
if (err instanceof NotFoundError) {
console.warn(`Doc ${mongoDoc._id} in project ${projectId} not found`)
} else {
throw err
}
}
} else {
docs.push({
id: mongoDoc._id.toString(),
ranges: mongoDoc.ranges,
})
}
}
return docs
}
async function findDanglingThreadIds(projectId, docs) {
const threadIds = new Set()
for (const doc of docs) {
const comments = doc.ranges?.comments ?? []
for (const comment of comments) {
threadIds.add(comment.op.t.toString())
}
}
if (threadIds.size === 0) {
return []
}
const rooms = await db.rooms.find(
{ project_id: new ObjectId(projectId), thread_id: { $exists: true } },
{ readPreference: READ_PREFERENCE_SECONDARY }
)
for await (const room of rooms) {
threadIds.delete(room.thread_id.toString())
if (threadIds.size === 0) {
break
}
}
return Array.from(threadIds)
}
function docsHaveTrackedChanges(docs) {
for (const doc of docs) {
const changes = doc.ranges?.changes ?? []
if (changes.length > 0) {
return true
}
}
return false
}
try {
await main()
process.exit(0)
} catch (err) {
console.error(err)
process.exit(1)
}

View File

@@ -0,0 +1,25 @@
import InstitutionsManager from '../app/src/Features/Institutions/InstitutionsManager.js'
import { ensureRunningOnMongoSecondaryWithTimeout } from './helpers/env_variable_helper.mjs'
ensureRunningOnMongoSecondaryWithTimeout(300000)
const institutionId = parseInt(process.argv[2])
if (isNaN(institutionId)) throw new Error('No institution id')
console.log('Checking users of institution', institutionId)
const emitNonProUserIds = process.argv.includes('--emit-non-pro-user-ids')
async function main() {
const usersSummary = await InstitutionsManager.promises.checkInstitutionUsers(
institutionId,
emitNonProUserIds
)
console.log(usersSummary)
process.exit()
}
try {
await main()
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,26 @@
import CE_CONFIG from '../config/settings.defaults.js'
import PRO_CONFIG from '../config/settings.overrides.server-pro.js'
import SAAS_CONFIG from '../config/settings.webpack.js'
function getOverleafModuleImports(settings) {
return Object.keys(settings.overleafModuleImports).sort().join(',')
}
function main() {
const CE = getOverleafModuleImports(CE_CONFIG)
const PRO = getOverleafModuleImports(CE_CONFIG.mergeWith(PRO_CONFIG))
const SAAS = getOverleafModuleImports(CE_CONFIG.mergeWith(SAAS_CONFIG))
if (CE !== PRO) {
throw new Error(
'settings.defaults is missing overleafModuleImports defined in settings.overrides.server-pro'
)
}
if (CE !== SAAS) {
throw new Error(
'settings.defaults is missing overleafModuleImports defined in settings.webpack'
)
}
}
main()

View File

@@ -0,0 +1,263 @@
const Path = require('path')
const DocstoreManager = require('../app/src/Features/Docstore/DocstoreManager')
const DocumentUpdaterHandler = require('../app/src/Features/DocumentUpdater/DocumentUpdaterHandler')
const FileStoreHandler = require('../app/src/Features/FileStore/FileStoreHandler')
const ProjectGetter = require('../app/src/Features/Project/ProjectGetter')
const ProjectEntityMongoUpdateHandler = require('../app/src/Features/Project/ProjectEntityMongoUpdateHandler')
const { waitForDb, db, ObjectId } = require('../app/src/infrastructure/mongodb')
const logger = require('@overleaf/logger').logger
const args = require('minimist')(process.argv.slice(2), {
boolean: ['verbose', 'fix'],
})
const verbose = args.verbose
if (!verbose) {
logger.level('error')
}
// no remaining arguments, print usage
if (args._.length === 0) {
console.log(
'Usage: node services/web/scripts/check_project_docs.js [--verbose] [--fix] <projectId>...'
)
process.exit(1)
}
function logDoc(projectId, path, doc, message = '') {
console.log(
'projectId:',
projectId,
'doc:',
JSON.stringify({
_id: doc._id,
name: doc.name,
lines: doc.lines ? doc.lines.join('\n').length : 0,
rev: doc.rev,
version: doc.version,
ranges: typeof doc.ranges,
}),
path,
message
)
}
function logFile(projectId, path, file, message = '') {
console.log(
'projectId:',
projectId,
'file:',
JSON.stringify({
_id: file._id,
name: file.name,
linkedFileData: file.linkedFileData,
hash: file.hash,
size: file.size,
}),
path,
message
)
}
function findPathCounts(projectId, docEntries, fileEntries) {
const pathCounts = new Map()
const docPaths = docEntries.map(({ path }) => path)
const filePaths = fileEntries.map(({ path }) => path)
const allPaths = docPaths.concat(filePaths)
for (const path of allPaths) {
pathCounts.set(path, (pathCounts.get(path) || 0) + 1)
}
return pathCounts
}
// copied from services/web/app/src/Features/Project/ProjectDuplicator.js
function _getFolderEntries(folder, folderPath = '/') {
const docEntries = []
const fileEntries = []
const docs = folder.docs || []
const files = folder.fileRefs || []
const subfolders = folder.folders || []
for (const doc of docs) {
if (doc == null || doc._id == null) {
continue
}
const path = Path.join(folderPath, doc.name)
docEntries.push({ doc, path })
}
for (const file of files) {
if (file == null || file._id == null) {
continue
}
const path = Path.join(folderPath, file.name)
fileEntries.push({ file, path })
}
for (const subfolder of subfolders) {
if (subfolder == null || subfolder._id == null) {
continue
}
const subfolderPath = Path.join(folderPath, subfolder.name)
const subfolderEntries = _getFolderEntries(subfolder, subfolderPath)
for (const docEntry of subfolderEntries.docEntries) {
docEntries.push(docEntry)
}
for (const fileEntry of subfolderEntries.fileEntries) {
fileEntries.push(fileEntry)
}
}
return { docEntries, fileEntries }
}
async function getDocsInMongo(projectId) {
return await db.docs
.find({ project_id: new ObjectId(projectId), deleted: { $ne: true } })
.toArray()
}
function getDocIdsInFileTree(docEntries) {
return docEntries.map(({ doc }) => doc._id.toString())
}
function findMissingDocs(docsInMongo, docIdsInFileTree) {
const missingDocs = []
for (const doc of docsInMongo) {
const docId = doc._id.toString()
if (!docIdsInFileTree.includes(docId)) {
console.log(`Found doc in docstore not in project filetree:`, docId)
missingDocs.push(doc)
}
}
return missingDocs
}
async function createRecoveryFolder(projectId) {
const recoveryFolder = `recovered-${Date.now()}`
const { folder } = await ProjectEntityMongoUpdateHandler.promises.mkdirp(
new ObjectId(projectId),
recoveryFolder,
null // unset lastUpdatedBy
)
console.log('Created recovery folder:', folder._id.toString())
return folder
}
async function restoreMissingDocs(projectId, folder, missingDocs) {
for (const doc of missingDocs) {
doc.name = doc.name || `unknown-file-${doc._id.toString()}`
try {
await ProjectEntityMongoUpdateHandler.promises.addDoc(
new ObjectId(projectId),
folder._id,
doc,
null // unset lastUpdatedBy
)
console.log('Restored doc to filetree:', doc._id.toString())
} catch (err) {
console.log(`Error adding doc to filetree:`, err)
}
}
}
async function checkProject(projectId) {
try {
await DocumentUpdaterHandler.promises.flushProjectToMongo(projectId)
} catch (err) {
console.log(`Error flushing project ${projectId} to mongo: ${err}`)
}
const project = await ProjectGetter.promises.getProject(projectId, {
rootFolder: true,
rootDoc_id: true,
})
if (verbose) {
console.log(`project: ${JSON.stringify(project)}`)
}
const { docEntries, fileEntries } = _getFolderEntries(project.rootFolder[0])
console.log(
`Found ${docEntries.length} docEntries and ${fileEntries.length} fileEntries`
)
const pathCounts = findPathCounts(projectId, docEntries, fileEntries)
for (const [path, count] of pathCounts) {
if (count > 1) {
console.log(`Found duplicate path: ${path}`)
}
}
let errors = 0
for (const { doc, path } of docEntries) {
try {
const { lines, rev, version, ranges } =
await DocstoreManager.promises.getDoc(projectId, doc._id)
if (!lines) {
throw new Error('no doclines')
}
if (pathCounts.get(path) > 1) {
logDoc(
projectId,
path,
{ ...doc, lines, rev, version, ranges },
'duplicate path'
)
errors++
} else if (verbose) {
logDoc(projectId, path, { ...doc, lines, rev, version, ranges })
}
} catch (err) {
logDoc(projectId, path, doc, err)
errors++
}
}
for (const { file, path } of fileEntries) {
try {
const fileSize = await FileStoreHandler.promises.getFileSize(
projectId,
file._id
)
if (pathCounts.get(path) > 1) {
logFile(projectId, path, { ...file, fileSize }, 'duplicate path')
errors++
} else if (verbose) {
logFile(projectId, path, { ...file, fileSize })
}
} catch (err) {
logFile(projectId, path, file, err)
errors++
}
}
// now look for docs in the docstore that are not in the project filetree
const docsInMongo = await getDocsInMongo(projectId)
const docIdsInFileTree = getDocIdsInFileTree(docEntries)
const missingDocs = findMissingDocs(docsInMongo, docIdsInFileTree)
if (args.fix && missingDocs.length > 0) {
console.log('Restoring missing docs to filetree...')
const folder = await createRecoveryFolder(projectId)
await restoreMissingDocs(projectId, folder, missingDocs)
}
if (errors > 0) {
console.log(`Errors found in project: ${projectId}`)
} else {
console.log(`No errors found in project: ${projectId}`)
}
}
async function main() {
await waitForDb()
for (const projectId of args._) {
await checkProject(projectId)
}
}
main()
.then(() => {
console.log('DONE')
process.exit(0)
})
.catch(err => {
console.error(err)
process.exit(1)
})

View File

@@ -0,0 +1,19 @@
import SAMLEmailBatchCheck from '../modules/saas-authentication/app/src/SAML/SAMLEmailBatchCheck.mjs'
import { ensureRunningOnMongoSecondaryWithTimeout } from './helpers/env_variable_helper.mjs'
ensureRunningOnMongoSecondaryWithTimeout(300000)
const startInstitutionId = parseInt(process.argv[2])
const emitDetailedData = process.argv.includes('--detailed-data')
try {
const result = await SAMLEmailBatchCheck.promises.checkEmails(
startInstitutionId,
emitDetailedData
)
console.table(result)
process.exit()
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,67 @@
import {
db,
READ_PREFERENCE_SECONDARY,
} from '../app/src/infrastructure/mongodb.js'
import UserSessionsManager from '../app/src/Features/User/UserSessionsManager.js'
const COMMIT = process.argv.includes('--commit')
const LOG_SESSIONS = !process.argv.includes('--log-sessions=false')
async function main() {
const adminUsers = await db.users
.find(
{ isAdmin: true },
{
projection: {
_id: 1,
email: 1,
},
readPreference: READ_PREFERENCE_SECONDARY,
}
)
.toArray()
if (LOG_SESSIONS) {
for (const user of adminUsers) {
user.sessions = JSON.stringify(
await UserSessionsManager.promises.getAllUserSessions(user, [])
)
}
}
console.log('All Admin users before clearing:')
console.table(adminUsers)
if (COMMIT) {
let anyFailed = false
for (const user of adminUsers) {
console.error(
`Clearing sessions for ${user.email} (${user._id.toString()})`
)
user.clearedSessions = 0
try {
user.clearedSessions =
await UserSessionsManager.promises.removeSessionsFromRedis(user)
} catch (err) {
anyFailed = true
console.error(err)
}
}
console.log('All Admin users after clearing:')
console.table(adminUsers)
if (anyFailed) {
throw new Error('failed to clear some sessions, see above for details')
}
} else {
console.warn('Use --commit to clear sessions.')
}
}
try {
await main()
console.error('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}

View File

@@ -0,0 +1,41 @@
/* Clear feedback collection before a cutoff date
*
* Usage
* node scripts/clear_feedback_collection.mjs 2022-11-01 # dry run mode
* DRY_RUN=false node scripts/clear_feedback_collection.mjs 2022-11-01 # deletion mode
*/
import { db, ObjectId } from '../app/src/infrastructure/mongodb.js'
import { fileURLToPath } from 'node:url'
const runScript = async (timestamp, dryRun) => {
const t = new Date(timestamp)
if (isNaN(t)) {
throw new Error('invalid date ' + timestamp)
}
const cutoffId = ObjectId.createFromTime(t / 1000)
console.log('deleting all feedback entries before', t, '=>', cutoffId)
const cursor = db.feedbacks.find({ _id: { $lt: cutoffId } })
for await (const entry of cursor) {
console.log('deleting', entry._id)
if (dryRun) {
console.log('skipping in dry run mode')
continue
}
await db.feedbacks.deleteOne({ _id: entry._id })
}
}
if (fileURLToPath(import.meta.url) === process.argv[1]) {
// we are in the root module, which means that we're running as a script
const timestamp = process.env.CUTOFF_TIMESTAMP || process.argv[2]
const dryRun = process.env.DRY_RUN !== 'false'
runScript(timestamp, dryRun)
.then(() => process.exit())
.catch(err => {
console.error(err)
process.exit(1)
})
}
export default runScript

View File

@@ -0,0 +1,49 @@
import { promisify } from 'node:util'
import InstitutionsManager from '../app/src/Features/Institutions/InstitutionsManager.js'
import { fileURLToPath } from 'node:url'
const sleep = promisify(setTimeout)
async function main() {
const institutionId = parseInt(process.argv[2])
if (isNaN(institutionId)) throw new Error('No institution id')
const dryRun = process.argv.includes('--dry-run')
console.log('Deleting notifications of institution', institutionId)
const preview =
await InstitutionsManager.promises.clearInstitutionNotifications(
institutionId,
true
)
console.log('--- Preview ---')
console.log(JSON.stringify(preview, null, 4))
console.log('---------------')
if (dryRun) {
console.log('Exiting early due to --dry-run flag')
return
}
console.log('Exit in the next 10s in case these numbers are off.')
await sleep(10 * 1000)
const cleared =
await InstitutionsManager.promises.clearInstitutionNotifications(
institutionId,
false
)
console.log('--- Cleared ---')
console.log(JSON.stringify(cleared, null, 4))
console.log('---------------')
}
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await main()
console.log('Done.')
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}
}

View File

@@ -0,0 +1,28 @@
import ProjectDetailsHandler from '../app/src/Features/Project/ProjectDetailsHandler.js'
const projectId = process.argv[2]
if (!/^(?=[a-f\d]{24}$)(\d+[a-f]|[a-f]+\d)/.test(projectId)) {
console.error('Usage: node clear_project_tokens.js projectId')
process.exit(1)
}
function main() {
ProjectDetailsHandler.clearTokens(projectId, err => {
if (err) {
console.error(
`Error clearing project tokens from project ${projectId}`,
err
)
process.exit(1)
}
console.log(`Successfully cleared project tokens from project ${projectId}`)
process.exit(0)
})
}
try {
await main()
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,83 @@
import { promisify, promiseMapWithLimit } from '@overleaf/promise-utils'
import UserSessionsRedis from '../app/src/Features/User/UserSessionsRedis.js'
import minimist from 'minimist'
const rClient = UserSessionsRedis.client()
const args = minimist(process.argv.slice(2))
const CURSOR = args.cursor
const COMMIT = args.commit === 'true'
const CONCURRENCY = parseInt(args.concurrency, 10) || 50
const LOG_EVERY_IN_S = parseInt(args['log-every-in-s'], 10) || 5
function shouldDelete(session) {
if (session.twoFactorAuthenticationPendingUser) {
// twoFactorAuthenticationPendingUserId migration
return true
}
// default: keep
return false
}
async function processSession(key) {
if (!key || !key.startsWith('sess:')) {
throw new Error(`unexpected session key: ${key}`)
}
const blob = await rClient.get(key)
if (!blob) return false // expired or deleted
const session = JSON.parse(blob)
if (shouldDelete(session)) {
const deleteLabel = COMMIT ? 'delete' : 'would delete'
console.warn(deleteLabel, key)
if (COMMIT) {
await rClient.del(key)
}
return true
}
return false
}
async function main() {
console.warn({ COMMIT, CONCURRENCY, CURSOR, LOG_EVERY_IN_S })
console.warn('starting in 10s')
await promisify(setTimeout)(10_000)
let processed = 0
let deleted = 0
function logProgress() {
const deletedLabel = COMMIT ? 'deleted' : 'would have deleted'
console.log(
`processed ${processed} | ${deletedLabel} ${deleted} | cursor ${cursor}`
)
}
let cursor = CURSOR
let lastLog = 0
while (cursor !== '0') {
let keys
;[cursor, keys] = await rClient.scan(cursor || 0, 'MATCH', 'sess:*')
const results = await promiseMapWithLimit(CONCURRENCY, keys, processSession)
processed += keys.length
for (const r of results) {
if (r) deleted++
}
if (Date.now() - lastLog >= LOG_EVERY_IN_S * 1000) {
logProgress()
lastLog = Date.now()
}
}
logProgress()
console.log('Done.')
await rClient.disconnect()
}
try {
await main()
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,93 @@
import fs from 'node:fs'
import { ObjectId } from '../app/src/infrastructure/mongodb.js'
import UserUpdater from '../app/src/Features/User/UserUpdater.js'
import UserSessionsManager from '../app/src/Features/User/UserSessionsManager.js'
import UserAuditLogHandler from '../app/src/Features/User/UserAuditLogHandler.js'
import { promiseMapWithLimit } from '@overleaf/promise-utils'
const ASYNC_LIMIT = 10
const processLogger = {
failedClear: [],
failedSet: [],
success: [],
printSummary: () => {
console.log(
{
success: processLogger.success,
failedClear: processLogger.failedClear,
failedSet: processLogger.failedSet,
},
`\nDONE. ${processLogger.success.length} successful. ${processLogger.failedClear.length} failed to clear sessions. ${processLogger.failedSet.length} failed to set must_reconfirm.`
)
},
}
function _validateUserIdList(userIds) {
if (!Array.isArray(userIds)) throw new Error('users is not an array')
userIds.forEach(userId => {
if (!ObjectId.isValid(userId)) throw new Error('user ID not valid')
})
}
async function _handleUser(userId) {
try {
await UserUpdater.promises.updateUser(userId, {
$set: { must_reconfirm: true },
})
} catch (error) {
console.log(`Failed to set must_reconfirm ${userId}`, error)
processLogger.failedSet.push(userId)
return
}
try {
await UserAuditLogHandler.promises.addEntry(
userId,
'must-reset-password-set',
undefined,
undefined,
{ script: true }
)
} catch (error) {
console.log(`Failed to create audit log for ${userId}`, error)
// don't block the process if audit log fails
}
try {
await UserSessionsManager.promises.removeSessionsFromRedis(
{ _id: userId },
null
)
} catch (error) {
console.log(`Failed to clear sessions for ${userId}`, error)
processLogger.failedClear.push(userId)
return
}
processLogger.success.push(userId)
}
async function _loopUsers(userIds) {
return promiseMapWithLimit(ASYNC_LIMIT, userIds, _handleUser)
}
const fileName = process.argv[2]
if (!fileName) throw new Error('missing filename')
const usersFile = fs.readFileSync(fileName, 'utf8')
const userIds = usersFile
.trim()
.split('\n')
.map(id => id.trim())
async function processUsers(userIds) {
console.log('---Starting set_must_reconfirm script---')
_validateUserIdList(userIds)
console.log(`---Starting to process ${userIds.length} users---`)
await _loopUsers(userIds)
processLogger.printSummary()
process.exit()
}
processUsers(userIds)

View File

@@ -0,0 +1,53 @@
import { db } from '../app/src/infrastructure/mongodb.js'
import { fileURLToPath } from 'node:url'
async function updateStringDates() {
const users = await db.users.aggregate([
{ $unwind: { path: '$emails' } },
{
$match: { 'emails.confirmedAt': { $exists: true, $type: 'string' } },
},
{
$project: {
_id: 1,
'emails.email': 1,
'emails.confirmedAt': 1,
},
},
])
let user
let count = 0
while ((user = await users.next())) {
count += 1
if (count % 10000 === 0) {
console.log(`processed ${count} users`)
}
const confirmedAt = user.emails.confirmedAt
const dateConfirmedAt = new Date(confirmedAt.replace(/ UTC$/, ''))
await db.users.updateOne(
{
_id: user._id,
'emails.email': user.emails.email,
},
{
$set: {
'emails.$.confirmedAt': dateConfirmedAt,
},
}
)
}
console.log(`Updated ${count} confirmedAt strings to dates!`)
}
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await updateStringDates()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}
}
export default updateStringDates

View File

@@ -0,0 +1,93 @@
import _ from 'lodash'
import { db } from '../app/src/infrastructure/mongodb.js'
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
import { promiseMapWithLimit } from '@overleaf/promise-utils'
import { fileURLToPath } from 'node:url'
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10
// $ node scripts/convert_archived_state.mjs FIRST,SECOND
async function main(STAGE) {
for (const FIELD of ['archived', 'trashed']) {
if (STAGE.includes('FIRST')) {
await batchedUpdate(
db.projects,
{ [FIELD]: false },
{
$set: { [FIELD]: [] },
}
)
console.error('Done, with first part for field:', FIELD)
}
if (STAGE.includes('SECOND')) {
await batchedUpdate(
db.projects,
{ [FIELD]: true },
async function performUpdate(nextBatch) {
await promiseMapWithLimit(
WRITE_CONCURRENCY,
nextBatch,
async project => {
try {
await upgradeFieldToArray({ project, FIELD })
} catch (err) {
console.error(project._id, err)
throw err
}
}
)
},
{
_id: 1,
owner_ref: 1,
collaberator_refs: 1,
readOnly_refs: 1,
tokenAccessReadAndWrite_refs: 1,
tokenAccessReadOnly_refs: 1,
}
)
console.error('Done, with second part for field:', FIELD)
}
}
}
async function upgradeFieldToArray({ project, FIELD }) {
return db.projects.updateOne(
{ _id: project._id },
{
$set: { [FIELD]: getAllUserIds(project) },
}
)
}
function getAllUserIds(project) {
return _.unionWith(
[project.owner_ref],
project.collaberator_refs,
project.readOnly_refs,
project.tokenAccessReadAndWrite_refs,
project.tokenAccessReadOnly_refs,
_objectIdEquals
)
}
function _objectIdEquals(firstVal, secondVal) {
// For use as a comparator for unionWith
return firstVal.toString() === secondVal.toString()
}
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await main(process.argv.pop())
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}
}
export default main

View File

@@ -0,0 +1,44 @@
import minimist from 'minimist'
import { ObjectId } from '../app/src/infrastructure/mongodb.js'
import ProjectEntityUpdateHandler from '../app/src/Features/Project/ProjectEntityUpdateHandler.js'
import Errors from '../app/src/Features/Errors/Errors.js'
async function main() {
const argv = minimist(process.argv.slice(2))
const projectId = argv['project-id']
const docId = argv['doc-id']
const userId = argv['user-id']
if ([projectId, docId, userId].some(it => !it || !ObjectId.isValid(it))) {
throw new Error(
'provide a valid object id as --project-id, --doc-id and --user-id'
)
}
console.log(`Converting doc ${projectId}/${docId} as user ${userId}`)
try {
await ProjectEntityUpdateHandler.promises.convertDocToFile(
projectId,
docId,
userId,
null
)
} catch (err) {
if (err instanceof Errors.NotFoundError) {
throw new Error('Document not found')
} else if (err instanceof Errors.DocHasRangesError) {
throw new Error('Document has comments or tracked changes')
} else {
throw err
}
}
}
try {
await main()
console.log('Done.')
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,73 @@
import {
db,
READ_PREFERENCE_SECONDARY,
} from '../app/src/infrastructure/mongodb.js'
import _ from 'lodash'
import { formatTokenUsageStats } from '@overleaf/access-token-encryptor/scripts/helpers/format-usage-stats.js'
import { ensureMongoTimeout } from './helpers/env_variable_helper.mjs'
if (!process.env.MONGO_SOCKET_TIMEOUT) {
const TEN_MINUTES = 1000 * 60 * 10
ensureMongoTimeout(TEN_MINUTES)
}
const CASES = {
users: {
dropbox: 'dropbox.access_token_oauth2.encrypted',
zotero: 'refProviders.zotero.encrypted',
mendeley: 'refProviders.mendeley.encrypted',
},
githubSyncUserCredentials: {
github: 'auth_token_encrypted',
},
}
async function count(collectionName, paths) {
const collection = db[collectionName]
const stats = {}
const projection = { _id: 0 }
for (const path of Object.values(paths)) {
projection[path] = 1
}
const cursor = collection.find(
{},
{
readPreference: READ_PREFERENCE_SECONDARY,
projection,
}
)
for await (const doc of cursor) {
for (const [name, path] of Object.entries(paths)) {
const blob = _.get(doc, path)
if (!blob) continue
// Schema: LABEL-VERSION:SALT:CIPHERTEXT:IV
const [label] = blob.split(':')
let [, version] = label.split('-')
version = version || 'v2'
const key = [name, version, collectionName, path, label].join(':')
stats[key] = (stats[key] || 0) + 1
}
}
return stats
}
async function main() {
const STATS = {}
for (const [collectionName, paths] of Object.entries(CASES)) {
const stats = await count(collectionName, paths)
Object.assign(STATS, stats)
}
formatTokenUsageStats(STATS)
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,42 @@
import readline from 'node:readline'
import ProjectEntityHandler from '../app/src/Features/Project/ProjectEntityHandler.js'
import ProjectGetter from '../app/src/Features/Project/ProjectGetter.js'
import Errors from '../app/src/Features/Errors/Errors.js'
async function countFiles() {
const rl = readline.createInterface({
input: process.stdin,
})
for await (const projectId of rl) {
try {
const project = await ProjectGetter.promises.getProject(projectId)
if (!project) {
throw new Errors.NotFoundError('project not found')
}
const { files, docs } =
ProjectEntityHandler.getAllEntitiesFromProject(project)
console.error(
projectId,
files.length,
(project.deletedFiles && project.deletedFiles.length) || 0,
docs.length,
(project.deletedDocs && project.deletedDocs.length) || 0
)
} catch (err) {
if (err instanceof Errors.NotFoundError) {
console.error(projectId, 'NOTFOUND')
} else {
console.log(projectId, 'ERROR', err.name, err.message)
}
}
}
}
try {
await countFiles()
process.exit(0)
} catch (error) {
console.log('Aiee, something went wrong!', error)
process.exit(1)
}

View File

@@ -0,0 +1,79 @@
import {
db,
READ_PREFERENCE_SECONDARY,
} from '../app/src/infrastructure/mongodb.js'
import { extname } from 'node:path'
const FILE_TYPES = [
'.jpg',
'.jpeg',
'.png',
'.bmp',
'.webp',
'.svg',
'.pdf',
'.eps',
'.gif',
'.ico',
'.tiff',
]
const longestFileType = Math.max(...FILE_TYPES.map(fileType => fileType.length))
async function main() {
const projects = db.projects.find(
{},
{
projection: { rootFolder: 1 },
readPreference: READ_PREFERENCE_SECONDARY,
}
)
let projectsProcessed = 0
const result = new Map(FILE_TYPES.map(fileType => [fileType, 0]))
for await (const project of projects) {
projectsProcessed += 1
if (projectsProcessed % 100000 === 0) {
console.log(projectsProcessed, 'projects processed')
}
countFiles(project.rootFolder[0], result)
}
const sortedResults = [...result.entries()].sort(
([, countA], [, countB]) => countB - countA
)
sortedResults.forEach(([fileType, count]) => {
console.log(
`${fileType.padStart(longestFileType, ' ')}: ${count
.toString()
.padStart(7, ' ')}`
)
})
}
function countFiles(folder, result) {
if (folder.folders) {
for (const subfolder of folder.folders) {
countFiles(subfolder, result)
}
}
if (folder.fileRefs) {
for (const file of folder.fileRefs) {
const fileType = extname(file.name).toLowerCase()
const current = result.get(fileType)
if (current !== undefined) {
result.set(fileType, current + 1)
}
}
}
return result
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,135 @@
import readline from 'node:readline'
import { ObjectId, db } from '../app/src/infrastructure/mongodb.js'
import ProjectEntityHandler from '../app/src/Features/Project/ProjectEntityHandler.js'
import ProjectGetter from '../app/src/Features/Project/ProjectGetter.js'
import Errors from '../app/src/Features/Errors/Errors.js'
import FileStoreHandler from '../app/src/Features/FileStore/FileStoreHandler.js'
// Handles a list of project IDs from stdin, one per line, and outputs the count of files and docs
// in the project, along with the aggregated size in bytes for all files and docs.
//
// It outputs to stderr, so that the logging junk can be piped elsewhere - e.g., running like:
// node scripts/count_project_size.mjs < /tmp/project_ids.txt /dev/null 2> /tmp/output.txt
//
// The output format is line-per-project with data separated by a single space, containing:
// - projectId
// - file count
// - deleted files count
// - doc count
// - deleted docs count
// - total size in bytes of (non deleted) files
// - total size in bytes of (non deleted) docs
async function countProjectFiles() {
const rl = readline.createInterface({
input: process.stdin,
})
for await (const projectId of rl) {
try {
const project = await ProjectGetter.promises.getProject(projectId)
if (!project) {
throw new Errors.NotFoundError('project not found')
}
const { files, docs } =
ProjectEntityHandler.getAllEntitiesFromProject(project)
const [fileSize, docSize] = await Promise.all([
countFilesSize(files, projectId),
countDocsSizes(docs),
])
console.error(
projectId,
files.length,
(project.deletedFiles && project.deletedFiles.length) || 0,
docs.length,
(project.deletedDocs && project.deletedDocs.length) || 0,
fileSize,
docSize
)
} catch (err) {
if (err instanceof Errors.NotFoundError) {
console.error(projectId, 'NOTFOUND')
} else {
console.log(projectId, 'ERROR', err.name, err.message)
}
}
}
}
async function countFilesSize(files, projectId) {
if (!files?.length > 0) {
return 0
}
const ids = files.map(fileObject => fileObject.file._id)
let totalFileSize = 0
for (const fileId of ids) {
const contentLength = await FileStoreHandler.promises.getFileSize(
projectId,
fileId
)
const size = parseInt(contentLength, 10)
if (isNaN(size)) {
throw new Error(
`Unable to fetch file size for fileId=${fileId} and projectId=${projectId}`
)
}
totalFileSize += size
}
return totalFileSize
}
async function countDocsSizes(docs) {
if (!docs?.length > 0) {
return 0
}
const ids = docs.map(docObject => docObject.doc._id)
let totalDocSize = 0
for (const docId of ids) {
const result = await db.docs.aggregate([
{
$match: { _id: new ObjectId(docId) },
},
{
$project: {
lineSizeInBytes: {
$reduce: {
input: { $ifNull: ['$lines', []] },
initialValue: 0,
in: {
$add: ['$$value', { $strLenBytes: '$$this' }],
},
},
},
},
},
])
const { lineSizeInBytes } = await result.next()
if (isNaN(lineSizeInBytes)) {
throw new Error(`Unable to fetch 'lineSizeInBytes' for docId=${docId}`)
}
totalDocSize += lineSizeInBytes
}
return totalDocSize
}
try {
await countProjectFiles()
process.exit(0)
} catch (error) {
console.log('Aiee, something went wrong!', error)
process.exit(1)
}

View File

@@ -0,0 +1,31 @@
// Script to create a Personal Access Token for a given user
// Example:
// node scripts/create_oauth_personal_access_token.mjs --user-id=643e5b240dc50c83b5bf1127
import parseArgs from 'minimist'
import OAuthPersonalAccessTokenManager from '../modules/oauth2-server/app/src/OAuthPersonalAccessTokenManager.mjs'
const argv = parseArgs(process.argv.slice(2), {
string: ['user-id'],
})
const userId = argv['user-id']
if (!userId) {
console.error('Missing --user-id argument')
process.exit(1)
}
async function createPersonalAccessToken() {
const accessToken = await OAuthPersonalAccessTokenManager.createToken(userId)
console.log('Personal Access Token: ' + accessToken)
}
try {
await createPersonalAccessToken()
process.exit()
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,238 @@
// Script to create projects with sharelatex history for testing
// Example:
// node scripts/create_project.mjs --user-id=5dca84e11e71ae002ff73bd4 --name="My Test Project" --old-history
import fs from 'node:fs'
import path from 'node:path'
import _ from 'lodash'
import parseArgs from 'minimist'
import OError from '@overleaf/o-error'
import { User } from '../app/src/models/User.js'
import ProjectCreationHandler from '../app/src/Features/Project/ProjectCreationHandler.js'
import ProjectEntityUpdateHandler from '../app/src/Features/Project/ProjectEntityUpdateHandler.js'
import ProjectEntityHandler from '../app/src/Features/Project/ProjectEntityHandler.js'
import EditorController from '../app/src/Features/Editor/EditorController.js'
import { fileURLToPath } from 'node:url'
const __dirname = path.dirname(fileURLToPath(import.meta.url))
const argv = parseArgs(process.argv.slice(2), {
string: ['user-id', 'name', 'random-operations', 'extend-project-id'],
boolean: ['random-content'],
unknown: function (arg) {
console.error('unrecognised argument', arg)
process.exit(1)
},
})
console.log('argv', argv)
const userId = argv['user-id']
const projectName = argv.name || `Test Project ${new Date().toISOString()}`
let randomOperations = 0
if (argv['random-content'] === true || argv['random-operations']) {
randomOperations = parseInt(argv['random-operations'] || '1000', 10)
}
const extendProjectId = argv['extend-project-id']
console.log('userId', userId)
async function _createRootDoc(project, ownerId, docLines) {
try {
const { doc } = await ProjectEntityUpdateHandler.promises.addDoc(
project._id,
project.rootFolder[0]._id,
'main.tex',
docLines,
ownerId,
'create-project-script'
)
await ProjectEntityUpdateHandler.promises.setRootDoc(project._id, doc._id)
} catch (error) {
throw OError.tag(error, 'error adding root doc when creating project')
}
}
async function _addDefaultExampleProjectFiles(ownerId, projectName, project) {
const mainDocLines = await _buildTemplate(
'example-project/main.tex',
ownerId,
projectName
)
await _createRootDoc(project, ownerId, mainDocLines)
const bibDocLines = await _buildTemplate(
'example-project/sample.bib',
ownerId,
projectName
)
await ProjectEntityUpdateHandler.promises.addDoc(
project._id,
project.rootFolder[0]._id,
'sample.bib',
bibDocLines,
ownerId,
'create-project-script'
)
const frogPath = path.join(
__dirname,
'/../app/templates/project_files/example-project/frog.jpg'
)
await ProjectEntityUpdateHandler.promises.addFile(
project._id,
project.rootFolder[0]._id,
'frog.jpg',
frogPath,
null,
ownerId,
'create-project-script'
)
}
async function _buildTemplate(templateName, userId, projectName) {
const user = await User.findById(userId, 'first_name last_name')
const templatePath = path.join(
__dirname,
`/../app/templates/project_files/${templateName}`
)
const template = fs.readFileSync(templatePath)
const data = {
project_name: projectName,
user,
year: new Date().getUTCFullYear(),
month: new Date().getUTCMonth(),
}
const output = _.template(template.toString())(data)
return output.split('\n')
}
// Create a project with some random content and file operations for testing history migrations
// Unfortunately we cannot easily change the timestamps of the history entries, so everything
// will be created at the same time.
async function _pickRandomDoc(projectId) {
const result = await ProjectEntityHandler.promises.getAllDocs(projectId)
const keys = Object.keys(result)
if (keys.length === 0) {
return null
}
const filepath = _.sample(keys)
result[filepath].path = filepath
return result[filepath]
}
let COUNTER = 0
// format counter as a 6 digit zero padded number
function nextId() {
return ('000000' + COUNTER++).slice(-6)
}
async function _applyRandomDocUpdate(ownerId, projectId) {
const action = _.sample(['create', 'edit', 'delete', 'rename'])
switch (action) {
case 'create': // create a new doc
await EditorController.promises.upsertDocWithPath(
projectId,
`subdir/new-doc-${nextId()}.tex`,
[`This is a new doc ${new Date().toISOString()}`],
'create-project-script',
ownerId
)
break
case 'edit': {
// edit an existing doc
const doc = await _pickRandomDoc(projectId)
if (!doc) {
return
}
// pick a random line and either insert or delete a character
const lines = doc.lines
const index = _.random(0, lines.length - 1)
let thisLine = lines[index]
const pos = _.random(0, thisLine.length - 1)
if (Math.random() > 0.5) {
// insert a character
thisLine = thisLine.slice(0, pos) + 'x' + thisLine.slice(pos)
} else {
// delete a character
thisLine = thisLine.slice(0, pos) + thisLine.slice(pos + 1)
}
lines[index] = thisLine
await EditorController.promises.upsertDocWithPath(
projectId,
doc.path,
lines,
'create-project-script',
ownerId
)
break
}
case 'delete': {
// delete an existing doc (but not the root doc)
const doc = await _pickRandomDoc(projectId)
if (!doc || doc.path === '/main.tex') {
return
}
await EditorController.promises.deleteEntityWithPath(
projectId,
doc.path,
'create-project-script',
ownerId
)
break
}
case 'rename': {
// rename an existing doc (but not the root doc)
const doc = await _pickRandomDoc(projectId)
if (!doc || doc.path === '/main.tex') {
return
}
const newName = `renamed-${nextId()}.tex`
await EditorController.promises.renameEntity(
projectId,
doc._id,
'doc',
newName,
ownerId,
'create-project-script'
)
break
}
}
}
async function createProject() {
const user = await User.findById(userId)
console.log('Will create project')
console.log('user_id:', userId, '=>', user.email)
let projectId
if (extendProjectId) {
console.log('extending existing project', extendProjectId)
projectId = extendProjectId
} else {
console.log('project name:', projectName)
const project = await ProjectCreationHandler.promises.createBlankProject(
userId,
projectName
)
await _addDefaultExampleProjectFiles(userId, projectName, project)
projectId = project._id
}
for (let i = 0; i < randomOperations; i++) {
await _applyRandomDocUpdate(userId, projectId)
}
return projectId
}
try {
const projectId = await createProject()
console.log('Created project', projectId)
process.exit()
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,91 @@
import { db } from '../../app/src/infrastructure/mongodb.js'
import minimist from 'minimist'
const argv = minimist(process.argv.slice(2))
const commit = argv.commit !== undefined
if (!commit) {
console.log('DOING DRY RUN. TO SAVE CHANGES PASS --commit')
}
async function getDupes(commit) {
const entries = await db.splittests.aggregate([
{
$match: {
archived: { $eq: true },
},
},
{ $unwind: '$versions' },
{
$group: {
// Group by fields to match on (a,b)
_id: {
_id: '$_id',
name: '$name',
creationDate: '$version.creationDate',
},
// Count number of matching docs for the group
count: { $sum: 1 },
// Save the _id for matching docs
docs: { $push: '$_id' },
},
},
// Limit results to duplicates (more than 1 match)
{
$match: {
count: { $gt: 1 },
},
},
])
let entry
const removed = []
while ((entry = await entries.next())) {
const name = entry._id.name
const test = await db.splittests.findOne({ name })
if (hasArchiveDupe(test.versions)) {
removed.push(test.name)
removeLastVersion(test, commit)
}
}
const message = commit
? `removed dupes from ${removed.length} feature flags`
: `planning to remove dupes from ${removed.length} feature flags`
console.info(message, removed)
console.log('DONE')
process.exit()
}
function hasArchiveDupe(versions) {
const last = versions.length - 1
// guard in case we somehow get smthn with only one version here flagged as having a dupe
if (last < 2) return false
// need to string compare dates, as otherwise will compare the isoDate objects (diff objs so not equal)
return (
versions[last].createdAt.toString() ===
versions[last - 1].createdAt.toString()
)
}
function removeLastVersion(test, commit) {
const name = test.name
const numVersions = test.versions.length
if (name && numVersions > 1) {
const lastVersion = test.versions[numVersions - 1].versionNumber
console.log(`removing test ${test.name} version ${lastVersion}`)
if (commit) {
db.splittests.updateOne(
{ name },
{ $pull: { versions: { versionNumber: lastVersion } } }
)
}
}
}
getDupes(commit)

View File

@@ -0,0 +1,3 @@
*.csv
*.csv.gz
node_modules

View File

@@ -0,0 +1,75 @@
# Delete Orphaned Docs
Because of the large numbers of documents and projects it is necessary to detect
orphaned docs using bulk exports of the raw data.
## Exporting Data Files
Follow the directions in `google-ops/README.md` for exporting data from mongo
and copying the files to your local machine.
### Exporting docs
Run the following doc export command to export all doc ids and their associated
project ids in batches of 10,000,000.
```
mongoexport --uri $READ_ONLY_MONGO_CONNECTION_STRING --collection docs --fields '_id,project_id' --skip 0 --limit 10000000 --type=csv --out docs.00000000.csv
```
This will produce files like:
```
_id,project_id
ObjectId(5babb6f864c952737a9a4c32),ObjectId(5b98bba5e2f38b7c88f6a625)
ObjectId(4eecaffcbffa66588e000007),ObjectId(4eecaffcbffa66588e00000d)
```
Concatenate these into a single file: `cat docs.*csv > all-docs-doc_id-project_id.csv`
For object ids the script will accept either plain hex strings or the `ObjectId(...)`
format used by mongoexport.
### Exporting Projects
Export project ids from all `projects` and `deletedProjects`
```
mongoexport --uri $READ_ONLY_MONGO_CONNECTION_STRING --collection projects --fields '_id' --type=csv --out projects.csv
mongoexport --uri $READ_ONLY_MONGO_CONNECTION_STRING --collection deletedProjects --fields 'project._id' --type=csv --out deleted-projects.csv
```
Concatenate these: `cat projects.csv deleted-projects.csv > all-projects-project_id.csv`
## Processing Exported Data
### Create a unique sorted list of project ids from docs
```
cut -d, -f 2 all-docs-doc_id-project_id.csv | sort | uniq > all-docs-project_ids.sorted.uniq.csv
```
### Create a unique sorted list of projects ids from projects
```
sort all-projects-project_id.csv | uniq > all-projects-project_id.sorted.uniq.csv
```
### Create list of project ids in docs but not in projects
```
comm --check-order -23 all-docs-project_ids.sorted.uniq.csv all-projects-project_id.sorted.uniq.csv > orphaned-doc-project_ids.csv
```
### Create list of docs ids with project ids not in projects
```
grep -F -f orphaned-doc-project_ids.csv all-docs-doc_id-project_id.csv > orphaned-doc-doc_id-project_id.csv
```
## Run doc deleter
```
node delete-orphaned-docs orphaned-doc-doc_id-project_id.csv
```
### Commit Changes
By default the script will only print the list of project ids and docs ids to be
deleted. In order to actually delete docs run with the `--commit` argument.
### Selecting Input Lines to Process
The `--limit` and `--offset` arguments can be used to specify which lines to
process. There is one doc per line so a single project will often have multiple
lines, but deletion is based on project id, so if one doc for a project is
deleted all will be deleted, even if all of the input lines are not processed.

View File

@@ -0,0 +1,119 @@
import fs from 'node:fs'
import minimist from 'minimist'
import readline from 'node:readline'
import { db, ObjectId } from '../../app/src/infrastructure/mongodb.js'
import DocstoreManagerModule from '../../app/src/Features/Docstore/DocstoreManager.js'
const { promises: DocstoreManager } = DocstoreManagerModule
const argv = minimist(process.argv.slice(2))
const commit = argv.commit !== undefined
const offset = parseInt(argv.offset) || 0
const limit = parseInt(argv.limit) || 0
if (!commit) {
console.log('DOING DRY RUN. TO SAVE CHANGES PASS --commit')
}
const input = fs.createReadStream(argv._[0])
const rl = readline.createInterface({
crlfDelay: Infinity,
input,
})
const orphanedDocs = {}
console.log('Loading Data')
let idx = 0
let processed = 0
rl.on('line', async line => {
if (offset && idx++ < offset) {
return
}
if (limit && processed++ >= limit) {
return
}
let [docId, projectId] = line.split(',')
docId = docId.replace(/^ObjectId\(/, '').replace(/\)$/, '')
projectId = projectId.replace(/^ObjectId\(/, '').replace(/\)$/, '')
try {
docId = new ObjectId(docId).toString()
projectId = new ObjectId(projectId).toString()
} catch (err) {
console.error(`Invalid id: ${docId}, ${projectId}`)
return
}
if (!orphanedDocs[projectId]) {
orphanedDocs[projectId] = []
}
orphanedDocs[projectId].push(docId)
})
rl.on('close', async () => {
const docCount = Object.values(orphanedDocs).reduce((i, v) => i + v.length, 0)
const projectCount = Object.keys(orphanedDocs).length
console.log(`Loaded Data for ${docCount} docs in ${projectCount} Projects`)
for (const projectId of Object.keys(orphanedDocs)) {
await deleteOrphanedDocs(projectId, orphanedDocs[projectId])
}
console.log('DONE')
process.exit()
})
async function deleteOrphanedDocs(projectId, docIds) {
try {
if (await projectIdExists(projectId)) {
console.error(`Project id exists: ${projectId}`)
return
}
} catch (err) {
console.error(`Error checking if project exists: ${projectId}`, err.stack)
return
}
console.log(`Delete docs ${docIds.join(', ')} for project ${projectId}`)
if (!commit) {
return
}
try {
await DocstoreManager.destroyProject(projectId)
} catch (err) {
console.error(`Error deleting project ${projectId}`, err)
}
}
async function projectIdExists(projectId) {
// check both projects and deletedProjects to see if project id exists
const [project, deletedProject] = await Promise.all([
findProject(projectId),
findDeletedProject(projectId),
])
return project !== null || deletedProject !== null
}
async function findProject(projectId) {
return db.projects.findOne(
{ _id: new ObjectId(projectId) },
{ projection: { _id: 1 } }
)
}
async function findDeletedProject(projectId) {
return db.deletedProjects.findOne(
{ 'project._id': new ObjectId(projectId) },
{ projection: { _id: 1 } }
)
}

View File

@@ -0,0 +1,85 @@
// @ts-check
import minimist from 'minimist'
import ChatApiHandler from '../app/src/Features/Chat/ChatApiHandler.js'
import DocumentUpdaterHandler from '../app/src/Features/DocumentUpdater/DocumentUpdaterHandler.js'
import DocstoreManager from '../app/src/Features/Docstore/DocstoreManager.js'
import HistoryManager from '../app/src/Features/History/HistoryManager.js'
import { db, ObjectId } from '../app/src/infrastructure/mongodb.js'
const OPTS = parseArgs()
function usage() {
console.error(
'Usage: node delete_dangling_comments.mjs [--commit] PROJECT_ID...'
)
}
function parseArgs() {
const args = minimist(process.argv.slice(2), {
boolean: ['commit'],
})
if (args._.length === 0) {
usage()
process.exit(0)
}
return {
projectIds: args._,
commit: args.commit,
}
}
async function processProject(projectId) {
console.log(`Processing project ${projectId}...`)
await DocumentUpdaterHandler.promises.flushProjectToMongoAndDelete(projectId)
const docRanges = await DocstoreManager.promises.getAllRanges(projectId)
const threads = await ChatApiHandler.promises.getThreads(projectId)
const threadIds = new Set(Object.keys(threads))
let commentsDeleted = 0
for (const doc of docRanges) {
const commentsDeletedInDoc = await processDoc(projectId, doc, threadIds)
commentsDeleted += commentsDeletedInDoc
}
if (OPTS.commit) {
console.log(`${commentsDeleted} comments deleted`)
if (commentsDeleted > 0) {
console.log(`Resyncing history for project ${projectId}`)
await HistoryManager.promises.resyncProject(projectId)
}
}
}
async function processDoc(projectId, doc, threadIds) {
let commentsDeleted = 0
for (const comment of doc.ranges?.comments ?? []) {
const threadId = comment.op.t
if (!threadIds.has(threadId)) {
if (OPTS.commit) {
console.log(`Deleting dangling comment ${comment.op.t}...`)
await deleteComment(doc._id, threadId)
commentsDeleted += 1
} else {
console.log(`Would delete dangling comment ${comment.op.t}...`)
}
}
}
return commentsDeleted
}
async function deleteComment(docId, threadId) {
await db.docs.updateOne(
{ _id: new ObjectId(docId) },
{
$pull: { 'ranges.comments': { 'op.t': new ObjectId(threadId) } },
}
)
}
// Main loop
for (const projectId of OPTS.projectIds) {
await processProject(projectId)
}
if (!OPTS.commit) {
console.log('This was a dry run. Rerun with --commit to apply changes')
}
process.exit(0)

View File

@@ -0,0 +1,131 @@
/**
* This script deletes dangling doc and file refs in projects
*/
import minimist from 'minimist'
import mongodb from 'mongodb-legacy'
import { db } from '../app/src/infrastructure/mongodb.js'
import Errors from '../app/src/Features/Errors/Errors.js'
import FileStoreHandler from '../app/src/Features/FileStore/FileStoreHandler.js'
import ProjectEntityMongoUpdateHandler from '../app/src/Features/Project/ProjectEntityMongoUpdateHandler.js'
import { iterablePaths } from '../app/src/Features/Project/IterablePath.js'
const { ObjectId } = mongodb
const OPTIONS = parseArgs()
function parseArgs() {
const argv = minimist(process.argv.slice(2), {
boolean: ['dry-run'],
default: { 'dry-run': true },
})
const dryRun = argv['dry-run']
const projectIds = argv._
if (projectIds.length === 0) {
console.log(`Usage: ${process.argv[1]} [--no-dry-run] PROJECT_ID ...`)
process.exit(0)
}
return { projectIds, dryRun }
}
async function main() {
const projects = await getProjects()
for (const project of projects) {
await processProject(project)
}
if (OPTIONS.dryRun) {
console.log(
'\nThis was a dry run. Re-run with --no-dry-run to delete broken refs.'
)
}
}
async function getProjects() {
const projectIds = OPTIONS.projectIds.map(id => new ObjectId(id))
const projects = await db.projects
.find(
{ _id: { $in: projectIds } },
{ projection: { _id: 1, rootFolder: 1 } }
)
.toArray()
return projects
}
async function processProject(project) {
console.log(`Processing project ${project._id}`)
const { docIds, fileIds } = findRefsInFolder(project.rootFolder[0])
for (const docId of docIds) {
if (!(await docExists(docId))) {
await deleteDoc(project._id, docId)
}
}
for (const fileId of fileIds) {
if (!(await fileExists(project._id, fileId))) {
await deleteFile(project._id, fileId)
}
}
}
function findRefsInFolder(folder) {
let docIds = folder.docs.map(doc => doc._id)
let fileIds = folder.fileRefs.map(file => file._id)
for (const subfolder of iterablePaths(folder, 'folders')) {
const subrefs = findRefsInFolder(subfolder)
docIds = docIds.concat(subrefs.docIds)
fileIds = fileIds.concat(subrefs.fileIds)
}
return { docIds, fileIds }
}
async function docExists(docId) {
const doc = await db.docs.findOne({ _id: docId })
return doc != null
}
async function fileExists(projectId, fileId) {
try {
// Getting the file size to avoid downloading the whole file
await FileStoreHandler.promises.getFileSize(projectId, fileId)
} catch (err) {
if (err instanceof Errors.NotFoundError) {
return false
}
throw err
}
return true
}
async function deleteDoc(projectId, docId) {
console.log(` * Deleting bad doc ref ${docId}`)
if (!OPTIONS.dryRun) {
await ProjectEntityMongoUpdateHandler.promises.deleteEntity(
projectId,
docId,
'doc',
null // unset lastUpdatedBy
)
}
}
async function deleteFile(projectId, fileId) {
console.log(` * Deleting bad file ref ${fileId}`)
if (!OPTIONS.dryRun) {
await ProjectEntityMongoUpdateHandler.promises.deleteEntity(
projectId,
fileId,
'file',
null // unset lastUpdatedBy
)
}
}
try {
await main()
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}

View File

@@ -0,0 +1,102 @@
import mongodb from 'mongodb-legacy'
import { promiseMapWithLimit } from '@overleaf/promise-utils'
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
import ChatApiHandler from '../app/src/Features/Chat/ChatApiHandler.js'
import DeleteOrphanedDataHelper from './delete_orphaned_data_helper.mjs'
import { ensureMongoTimeout } from './helpers/env_variable_helper.mjs'
import { db } from '../app/src/infrastructure/mongodb.js'
const { ObjectId } = mongodb
const { getHardDeletedProjectIds } = DeleteOrphanedDataHelper
const READ_CONCURRENCY_SECONDARY =
parseInt(process.env.READ_CONCURRENCY_SECONDARY, 10) || 1000
const READ_CONCURRENCY_PRIMARY =
parseInt(process.env.READ_CONCURRENCY_PRIMARY, 10) || 500
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10
const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 100
const DRY_RUN = process.env.DRY_RUN !== 'false'
const MAX_CHATS_TO_DESTROY =
parseInt(process.env.MAX_CHATS_TO_DESTROY, 10) || false
// persist fallback in order to keep batchedUpdate in-sync
process.env.BATCH_SIZE = BATCH_SIZE
// ensure set mongo timeout to 10mins if otherwise unspecified
if (!process.env.MONGO_SOCKET_TIMEOUT) {
ensureMongoTimeout(600000)
}
console.log({
DRY_RUN,
WRITE_CONCURRENCY,
BATCH_SIZE,
MAX_CHATS_TO_DESTROY,
})
const RESULT = {
DRY_RUN,
projectChatsDestroyed: 0,
continueFrom: null,
}
async function processBatch(rooms) {
if (rooms.length && rooms[0]._id) {
RESULT.continueFrom = rooms[0]._id
}
const projectIds = Array.from(
new Set(rooms.map(room => room.project_id.toString()))
).map(id => new ObjectId(id))
console.log(
`Checking projects (${projectIds.length})`,
JSON.stringify(projectIds)
)
const projectsWithOrphanedChat = await getHardDeletedProjectIds({
projectIds,
READ_CONCURRENCY_PRIMARY,
READ_CONCURRENCY_SECONDARY,
})
console.log(
`Destroying chat for projects (${projectsWithOrphanedChat.length})`,
JSON.stringify(projectsWithOrphanedChat)
)
if (!DRY_RUN) {
await promiseMapWithLimit(
WRITE_CONCURRENCY,
projectsWithOrphanedChat,
ChatApiHandler.promises.destroyProject
)
}
RESULT.projectChatsDestroyed += projectsWithOrphanedChat.length
console.log(RESULT)
if (
MAX_CHATS_TO_DESTROY &&
RESULT.projectChatsDestroyed >= MAX_CHATS_TO_DESTROY
) {
console.log(
`MAX_CHATS_TO_DELETE limit (${MAX_CHATS_TO_DESTROY}) reached. Stopping.`
)
process.exit(0)
}
}
async function main() {
const projection = {
_id: 1,
project_id: 1,
}
await batchedUpdate(db.rooms, {}, processBatch, projection)
console.log('Final')
console.log(RESULT)
}
main()
.then(() => {
console.log('Done.')
process.exit(0)
})
.catch(error => {
console.error({ error })
process.exit(1)
})

View File

@@ -0,0 +1,113 @@
import {
db,
READ_PREFERENCE_PRIMARY,
READ_PREFERENCE_SECONDARY,
} from '../app/src/infrastructure/mongodb.js'
import { promiseMapWithLimit } from '@overleaf/promise-utils'
async function getDeletedProject(projectId, readPreference) {
return await db.deletedProjects.findOne(
{ 'deleterData.deletedProjectId': projectId },
{
// There is no index on .project. Pull down something small.
projection: { 'project._id': 1 },
readPreference,
}
)
}
async function getProject(projectId, readPreference) {
return await db.projects.findOne(
{ _id: projectId },
{
// Pulling down an empty object is fine for differentiating with null.
projection: { _id: 0 },
readPreference,
}
)
}
async function checkProjectExistsWithReadPreference(projectId, readPreference) {
// NOTE: Possible race conditions!
// There are two processes which are racing with our queries:
// 1. project deletion
// 2. project restoring
// For 1. we check the projects collection before deletedProjects.
// If a project were to be delete in this very moment, we should see the
// soft-deleted entry which is created before deleting the projects entry.
// For 2. we check the projects collection after deletedProjects again.
// If a project were to be restored in this very moment, it is very likely
// to see the projects entry again.
// Unlikely edge case: Restore+Deletion in rapid succession.
// We could add locking to the ProjectDeleter for ruling ^ out.
if (await getProject(projectId, readPreference)) {
// The project is live.
return true
}
const deletedProject = await getDeletedProject(projectId, readPreference)
if (deletedProject && deletedProject.project) {
// The project is registered for hard-deletion.
return true
}
if (await getProject(projectId, readPreference)) {
// The project was just restored.
return true
}
// The project does not exist.
return false
}
async function checkProjectExistsOnPrimary(projectId) {
return await checkProjectExistsWithReadPreference(
projectId,
READ_PREFERENCE_PRIMARY
)
}
async function checkProjectExistsOnSecondary(projectId) {
return await checkProjectExistsWithReadPreference(
projectId,
READ_PREFERENCE_SECONDARY
)
}
async function getHardDeletedProjectIds({
projectIds,
READ_CONCURRENCY_PRIMARY,
READ_CONCURRENCY_SECONDARY,
}) {
const doubleCheckProjectIdsOnPrimary = []
async function checkProjectOnSecondary(projectId) {
if (await checkProjectExistsOnSecondary(projectId)) {
// Finding a project with secondary confidence is sufficient.
return
}
// At this point, the secondaries deem this project as having orphaned docs.
doubleCheckProjectIdsOnPrimary.push(projectId)
}
const hardDeletedProjectIds = []
async function checkProjectOnPrimary(projectId) {
if (await checkProjectExistsOnPrimary(projectId)) {
// The project is actually live.
return
}
hardDeletedProjectIds.push(projectId)
}
await promiseMapWithLimit(
READ_CONCURRENCY_SECONDARY,
projectIds,
checkProjectOnSecondary
)
await promiseMapWithLimit(
READ_CONCURRENCY_PRIMARY,
doubleCheckProjectIdsOnPrimary,
checkProjectOnPrimary
)
return hardDeletedProjectIds
}
export default {
getHardDeletedProjectIds,
}

View File

@@ -0,0 +1,52 @@
import minimist from 'minimist'
import ChatApiHandler from '../app/src/Features/Chat/ChatApiHandler.js'
import DocstoreManager from '../app/src/Features/Docstore/DocstoreManager.js'
import DocumentUpdaterHandler from '../app/src/Features/DocumentUpdater/DocumentUpdaterHandler.js'
import { promiseMapWithLimit } from '@overleaf/promise-utils'
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10
/**
* Remove doc comment ranges that are "orphaned" as they do have matching chat
* threads. This can happen when adding comments and the HTTP request fails, but
* the ShareJS op succeeded (eventually). See https://github.com/overleaf/internal/issues/3425
* for more detail.
*/
async function main() {
const argv = minimist(process.argv.slice(2))
const { projectId, docId } = argv
const threads = await ChatApiHandler.promises.getThreads(projectId)
const threadIds = Object.keys(threads)
const doc = await DocstoreManager.promises.getDoc(projectId, docId)
const comments = doc.ranges.comments
const orphanedCommentIds = comments.filter(comment => {
const commentThreadId = comment.op.t
return !threadIds.includes(commentThreadId)
})
await promiseMapWithLimit(
WRITE_CONCURRENCY,
orphanedCommentIds,
async comment => {
await DocumentUpdaterHandler.promises.deleteThread(
projectId,
docId,
comment.op.t
)
}
)
await DocumentUpdaterHandler.promises.flushDocToMongo(projectId, docId)
}
try {
await main()
console.log('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}

View File

@@ -0,0 +1,179 @@
import DocstoreManager from '../app/src/Features/Docstore/DocstoreManager.js'
import { promisify } from 'node:util'
import mongodb from 'mongodb-legacy'
import {
db,
READ_PREFERENCE_PRIMARY,
READ_PREFERENCE_SECONDARY,
} from '../app/src/infrastructure/mongodb.js'
import { promiseMapWithLimit } from '@overleaf/promise-utils'
import DeleteOrphanedDataHelper from './delete_orphaned_data_helper.mjs'
const { ObjectId } = mongodb
const sleep = promisify(setTimeout)
const { getHardDeletedProjectIds } = DeleteOrphanedDataHelper
const NOW_IN_S = Date.now() / 1000
const ONE_WEEK_IN_S = 60 * 60 * 24 * 7
const TEN_SECONDS = 10 * 1000
const DRY_RUN = process.env.DRY_RUN === 'true'
if (!process.env.BATCH_LAST_ID) {
console.error('Set BATCH_LAST_ID and re-run.')
process.exit(1)
}
const BATCH_LAST_ID = new ObjectId(process.env.BATCH_LAST_ID)
const INCREMENT_BY_S = parseInt(process.env.INCREMENT_BY_S, 10) || ONE_WEEK_IN_S
const BATCH_SIZE = parseInt(process.env.BATCH_SIZE, 10) || 1000
const READ_CONCURRENCY_SECONDARY =
parseInt(process.env.READ_CONCURRENCY_SECONDARY, 10) || 1000
const READ_CONCURRENCY_PRIMARY =
parseInt(process.env.READ_CONCURRENCY_PRIMARY, 10) || 500
const STOP_AT_S = parseInt(process.env.STOP_AT_S, 10) || NOW_IN_S
const WRITE_CONCURRENCY = parseInt(process.env.WRITE_CONCURRENCY, 10) || 10
const LET_USER_DOUBLE_CHECK_INPUTS_FOR =
parseInt(process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR, 10) || TEN_SECONDS
function getSecondsFromObjectId(id) {
return id.getTimestamp().getTime() / 1000
}
async function main() {
await letUserDoubleCheckInputs()
let lowerProjectId = BATCH_LAST_ID
let nProjectsProcessedTotal = 0
let nProjectsWithOrphanedDocsTotal = 0
let nDeletedDocsTotal = 0
while (getSecondsFromObjectId(lowerProjectId) <= STOP_AT_S) {
const upperTime = getSecondsFromObjectId(lowerProjectId) + INCREMENT_BY_S
let upperProjectId = ObjectId.createFromTime(upperTime)
const query = {
project_id: {
// exclude edge
$gt: lowerProjectId,
// include edge
$lte: upperProjectId,
},
}
const docs = await db.docs
.find(query, { readPreference: READ_PREFERENCE_SECONDARY })
.project({ project_id: 1 })
.sort({ project_id: 1 })
.limit(BATCH_SIZE)
.toArray()
if (docs.length) {
const projectIds = Array.from(
new Set(docs.map(doc => doc.project_id.toString()))
).map(id => new ObjectId(id))
console.log('Checking projects', JSON.stringify(projectIds))
const { nProjectsWithOrphanedDocs, nDeletedDocs } =
await processBatch(projectIds)
nProjectsProcessedTotal += projectIds.length
nProjectsWithOrphanedDocsTotal += nProjectsWithOrphanedDocs
nDeletedDocsTotal += nDeletedDocs
if (docs.length === BATCH_SIZE) {
// This project may have more than BATCH_SIZE docs.
const lastDoc = docs[docs.length - 1]
// Resume from after this projectId.
upperProjectId = lastDoc.project_id
}
}
console.error(
'Processed %d projects ' +
'(%d projects with orphaned docs/%d docs deleted) ' +
'until %s',
nProjectsProcessedTotal,
nProjectsWithOrphanedDocsTotal,
nDeletedDocsTotal,
upperProjectId
)
lowerProjectId = upperProjectId
}
}
async function getProjectDocs(projectId) {
return await db.docs
.find(
{ project_id: projectId },
{
projection: { _id: 1 },
readPreference: READ_PREFERENCE_PRIMARY,
}
)
.toArray()
}
async function processBatch(projectIds) {
const projectsWithOrphanedDocs = await getHardDeletedProjectIds({
projectIds,
READ_CONCURRENCY_PRIMARY,
READ_CONCURRENCY_SECONDARY,
})
let nDeletedDocs = 0
async function countOrphanedDocs(projectId) {
const docs = await getProjectDocs(projectId)
nDeletedDocs += docs.length
console.log(
'Deleted project %s has %s orphaned docs: %s',
projectId,
docs.length,
JSON.stringify(docs.map(doc => doc._id))
)
}
await promiseMapWithLimit(
READ_CONCURRENCY_PRIMARY,
projectsWithOrphanedDocs,
countOrphanedDocs
)
if (!DRY_RUN) {
await promiseMapWithLimit(
WRITE_CONCURRENCY,
projectsWithOrphanedDocs,
DocstoreManager.promises.destroyProject
)
}
const nProjectsWithOrphanedDocs = projectsWithOrphanedDocs.length
return { nProjectsWithOrphanedDocs, nDeletedDocs }
}
async function letUserDoubleCheckInputs() {
console.error(
'Options:',
JSON.stringify(
{
BATCH_LAST_ID,
BATCH_SIZE,
DRY_RUN,
INCREMENT_BY_S,
STOP_AT_S,
READ_CONCURRENCY_SECONDARY,
READ_CONCURRENCY_PRIMARY,
WRITE_CONCURRENCY,
LET_USER_DOUBLE_CHECK_INPUTS_FOR,
},
null,
2
)
)
console.error(
'Waiting for you to double check inputs for',
LET_USER_DOUBLE_CHECK_INPUTS_FOR,
'ms'
)
await sleep(LET_USER_DOUBLE_CHECK_INPUTS_FOR)
}
try {
await main()
console.error('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}

View File

@@ -0,0 +1,47 @@
import { Subscription } from '../app/src/models/Subscription.js'
import SubscriptionUpdater from '../app/src/Features/Subscription/SubscriptionUpdater.js'
import minimist from 'minimist'
import mongodb from 'mongodb-legacy'
const { ObjectId } = mongodb
const run = async () => {
for (const id of ids) {
console.log('id', id)
const subscription = await Subscription.findOne({ _id: new ObjectId(id) })
await SubscriptionUpdater.promises.deleteSubscription(
subscription,
deleterData
)
console.log('Deleted subscription', id)
}
}
let ids, deleterData
const setup = () => {
const argv = minimist(process.argv.slice(2))
ids = argv.ids
if (!ids) {
console.error('No ids given')
process.exit(1)
}
ids = ids.split(',')
const deleterId = argv.deleterId
if (!deleterId) {
console.error('No deleterId given')
process.exit(1)
}
deleterData = { id: new ObjectId(deleterId) }
}
setup()
try {
await run()
process.exit(0)
} catch (err) {
console.error('Aiee, something went wrong!', err)
process.exit(1)
}

View File

@@ -0,0 +1,70 @@
import { promisify } from 'node:util'
import Settings from '@overleaf/settings'
import AdminController from '../app/src/Features/ServerAdmin/AdminController.js'
import minimist from 'minimist'
import { fileURLToPath } from 'node:url'
const args = minimist(process.argv.slice(2), {
string: ['confirm-site-url', 'delay-in-seconds'],
default: {
'delay-in-seconds': 10,
'confirm-site-url': '',
},
})
const sleep = promisify(setTimeout)
async function main() {
if (args.help) {
console.error()
console.error(
' usage: node disconnect_all_users.mjs [--delay-in-seconds=10] --confirm-site-url=https://www....\n'
)
process.exit(1)
}
const isSaaS = Boolean(Settings.overleaf)
if (isSaaS && args['confirm-site-url'] !== Settings.siteUrl) {
console.error()
console.error(
'Please confirm the environment you want to disconnect ALL USERS from by specifying the site URL aka PUBLIC_URL, e.g. --confirm-site-url=https://www.dev-overleaf.com for the dev-env'
)
console.error()
console.error(
`!!! --confirm-site-url=${
args['confirm-site-url'] || "''"
} does not match the PUBLIC_URL in this environment.`
)
console.error()
console.error(' Are you running this script in the correct environment?')
process.exit(1)
}
const delay = parseInt(args['delay-in-seconds'] || '10', 10)
if (!(delay >= 0)) {
console.error(
`--delay-in-seconds='${args['delay-in-seconds']}' should be a number >=0`
)
process.exit(1)
}
console.log()
console.log(
`Disconnect all users from ${args['confirm-site-url']}, with delay ${delay}`
)
if (isSaaS) {
console.error(' Use CTRL+C in the next 5s to abort.')
await sleep(5 * 1000)
}
await AdminController._sendDisconnectAllUsersMessage(delay)
}
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await main()
console.error('Done.')
process.exit(0)
} catch (error) {
console.error('Error', error)
process.exit(1)
}
}

View File

@@ -0,0 +1,169 @@
import fs from 'node:fs'
import Path from 'node:path'
import { fileURLToPath } from 'node:url'
import { promiseMapWithLimit } from '@overleaf/promise-utils'
import Settings from '@overleaf/settings'
import { db } from '../app/src/infrastructure/mongodb.js'
import GracefulShutdown from '../app/src/infrastructure/GracefulShutdown.js'
import ProjectDeleter from '../app/src/Features/Project/ProjectDeleter.js'
import SplitTestManager from '../app/src/Features/SplitTests/SplitTestManager.js'
import UserDeleter from '../app/src/Features/User/UserDeleter.js'
import UserRegistrationHandler from '../app/src/Features/User/UserRegistrationHandler.js'
const MONOREPO = Path.dirname(
Path.dirname(Path.dirname(Path.dirname(fileURLToPath(import.meta.url))))
)
/**
* @param {string} email
* @return {Promise<void>}
*/
async function createUser(email) {
const user = await UserRegistrationHandler.promises.registerNewUser({
email,
password: process.env.CYPRESS_DEFAULT_PASSWORD,
})
const features = email.startsWith('free@')
? Settings.defaultFeatures
: Settings.features.professional
await db.users.updateOne(
{ _id: user._id },
{
$set: {
// Set admin flag.
isAdmin: email.startsWith('admin@'),
// Disable spell-checking for performance and flakiness reasons.
'ace.spellCheckLanguage': '',
// Override features.
features,
featuresOverrides: [{ features }],
},
}
)
}
/**
* @param {string} email
* @return {Promise<void>}
*/
async function deleteUser(email) {
const user = await db.users.findOne({ email })
if (!user) return
// Soft delete the user.
await UserDeleter.promises.deleteUser(user._id, {
force: true,
ipAddress: '0.0.0.0',
})
// Hard-delete the users projects.
const projects = await db.deletedProjects
.find(
{ deletedProjectOwnerId: user._id },
{ projection: { deletedProjectId: 1 } }
)
.toArray()
await promiseMapWithLimit(
10,
projects.map(p => p.deletedProjectId),
ProjectDeleter.promises.expireDeletedProject
)
// Hard-delete the user.
await UserDeleter.promises.expireDeletedUser(user._id)
}
/**
* @param {string} email
* @return {Promise<void>}
*/
async function provisionUser(email) {
await deleteUser(email)
await createUser(email)
}
async function provisionUsers() {
const emails = Settings.recaptcha.trustedUsers
console.log(`> Provisioning ${emails.length} E2E users.`)
await promiseMapWithLimit(3, emails, provisionUser)
}
async function purgeNewUsers() {
const users = await db.users
.find(
{ email: Settings.recaptcha.trustedUsersRegex },
{ projection: { email: 1 } }
)
.toArray()
console.log(`> Deleting ${users.length} newly created E2E users.`)
await promiseMapWithLimit(
3,
users.map(user => user.email),
deleteUser
)
}
const SPLIT_TEST_OVERRIDES = [
// disable writefull, oauth registration does not work in dev-env and their banners hide our buttons.
{
name: 'writefull-auto-account-creation',
versions: [
{
versionNumber: 1,
phase: 'release',
active: true,
analyticsEnabled: false,
variants: [{ name: 'enabled', rolloutPercent: 0, rolloutStripes: [] }],
createdAt: new Date(),
},
],
},
]
async function provisionSplitTests() {
const backup = Path.join(
MONOREPO,
'backup',
'split-tests',
new Date().toISOString() + '.json'
)
console.log(
`> Backing up previous split-tests into ${backup}. You can import them again on https://www.dev-overleaf.com/admin/split-test via the [Import] button.`
)
const splitTests = await SplitTestManager.getRuntimeTests()
await fs.promises.mkdir(Path.dirname(backup), { recursive: true })
await fs.promises.writeFile(
backup,
JSON.stringify(splitTests.sort((a, b) => (a.name > b.name ? 1 : -1)))
)
// Imported from production via https://www.overleaf.com/admin/split-test -> "Copy all split tests" -> "Copy for E2E test setup"
const SPLIT_TESTS = JSON.parse(
await fs.promises.readFile(
Path.join(MONOREPO, 'tools/saas-e2e/split-tests.json')
)
)
console.log(`> Importing ${SPLIT_TESTS.length} split-tests from production.`)
await SplitTestManager.replaceSplitTests(SPLIT_TESTS)
console.log(
`> Importing ${SPLIT_TEST_OVERRIDES.length} split-tests for test compatibility.`
)
await SplitTestManager.mergeSplitTests(SPLIT_TEST_OVERRIDES, true)
}
async function main() {
if (process.env.NODE_ENV !== 'development') {
throw new Error('only available in dev-env')
}
await purgeNewUsers()
await provisionUsers()
await provisionSplitTests()
}
await main()
await GracefulShutdown.gracefulShutdown(
{
close(cb) {
cb()
},
},
'SIGTERM'
)

View File

@@ -0,0 +1,52 @@
import { User } from '../app/src/models/User.js'
import UserController from '../app/src/Features/User/UserController.js'
import Logger from '@overleaf/logger'
import pLimit from 'p-limit'
Logger.logger.level('error')
const CONCURRENCY = 10
const failure = []
const success = []
console.log('Starting ensure affiliations')
const query = {
'emails.affiliationUnchecked': true,
}
async function _handleEnsureAffiliation(user) {
try {
await UserController.ensureAffiliation(user)
console.log(`${user._id}`)
success.push(user._id)
} catch (error) {
failure.push(user._id)
console.log(`ERROR: ${user._id}`, error)
}
}
async function getUsers() {
return User.find(query, { emails: 1 }).exec()
}
async function run() {
const limit = pLimit(CONCURRENCY)
const users = await getUsers()
console.log(`Found ${users.length} users`)
await Promise.all(
users.map(user => limit(() => _handleEnsureAffiliation(user)))
)
console.log(`${success.length} successes`)
console.log(`${failure.length} failures`)
if (failure.length > 0) {
console.log('Failed to update:', failure)
}
}
try {
await run()
process.exit()
} catch (error) {
console.log(error)
process.exit(1)
}

View File

@@ -0,0 +1,259 @@
import fs from 'node:fs'
import path from 'node:path'
import minimist from 'minimist'
const APP_CODE_PATH = ['app', 'modules', 'migrations', 'scripts', 'test']
const {
_: args,
files,
help,
json,
} = minimist(process.argv.slice(2), {
boolean: ['files', 'help', 'json'],
alias: {
files: 'f',
help: 'h',
json: 'j',
},
default: {
files: false,
help: false,
json: false,
},
})
const paths = args.length > 0 ? args : APP_CODE_PATH
function usage() {
console.error(`Usage: node check-esm-migration.js [OPTS...] dir1 dir2
node check-esm-migration.js file
Usage with directories
----------------------
When the arguments are a list of directories it prints the status of ES Modules migration within those directories.
When no directory is provided, it checks app/ and modules/, which represent the entire codebase.
With the --files (-f) option, it prints the list of JS files that:
- Are not migrated to ESM
- Are not required by any file that is not migrated yet to ESM (in the entire codebase)
These files should be the most immediate candidates to be migrated.
WARNING: please note that this script only looks up literals in require() statements, so paths
built dynamically (such as those in infrastructure/Modules.js) are not being taken into account.
Usage with a JS file
--------------------
When the argument is a JS file, the script outputs the files that depend on this file that have not been converted
yet to ES Modules.
The files in the list must to be converted to ES Modules before converting the JS file.
Example:
node scrips/check-esm-migration.js --files modules/admin-panel
node scrips/check-esm-migration.js app/src/router.js
Options:
--files Prints the files that are not imported by app code via CommonJS
--json Prints the result in JSON format, including the list of files from --files
--help Prints this help
`)
}
function resolveImportPaths(dir, file) {
const absolutePath = path.resolve(dir, file)
if (fs.existsSync(absolutePath)) {
return absolutePath
} else if (fs.existsSync(absolutePath + '.js')) {
return absolutePath + '.js'
} else if (fs.existsSync(absolutePath + '.mjs')) {
return absolutePath + '.mjs'
} else {
return null
}
}
function collectJsFiles(dir, files = []) {
const items = fs.readdirSync(dir)
items.forEach(item => {
const fullPath = path.join(dir, item)
const stat = fs.statSync(fullPath)
if (stat.isDirectory()) {
const basename = path.basename(fullPath)
// skipping directories from search
if (!['frontend', 'node_modules'].includes(basename)) {
collectJsFiles(fullPath, files)
}
} else if (
stat.isFile() &&
(fullPath.endsWith('.js') || fullPath.endsWith('.mjs'))
) {
files.push(fullPath)
}
})
return files
}
function extractImports(filePath) {
const fileContent = fs.readFileSync(filePath, 'utf-8')
// not 100% compliant (string escaping, etc.) but does the work here
const contentWithoutComments = fileContent.replace(
/\/\/.*|\/\*[\s\S]*?\*\//g,
''
)
const requireRegex = /require\s*\(\s*['"](.+?)['"]\s*\)/g
const dependencies = []
while (true) {
const match = requireRegex.exec(contentWithoutComments)
if (!match) {
break
}
dependencies.push(match[1])
}
// build absolute path for the imported file
return dependencies
.map(depPath => resolveImportPaths(path.dirname(filePath), depPath))
.filter(path => path !== null)
}
// Main function to process a list of directories and create the Map of dependencies
function findJSAndImports(directories) {
const fileDependenciesMap = new Map()
directories.forEach(dir => {
if (fs.existsSync(dir)) {
const jsFiles = collectJsFiles(dir)
jsFiles.forEach(filePath => {
const imports = extractImports(filePath)
fileDependenciesMap.set(filePath, imports)
})
} else {
console.error(`Directory not found: ${dir}`)
process.exit(1)
}
})
return fileDependenciesMap
}
function printDirectoriesReport(allFilesAndImports) {
// collect all files that are imported via CommonJS in the entire backend codebase
const filesImportedViaCjs = new Set()
allFilesAndImports.forEach((imports, file) => {
if (!file.endsWith('.mjs')) {
imports.forEach(imprt => filesImportedViaCjs.add(imprt))
}
})
// collect js files from the selected paths
const selectedFiles = Array.from(
findJSAndImports(paths.map(dir => path.resolve(dir))).keys()
).filter(file => !file.endsWith('settings.test.js'))
const nonMigratedFiles = selectedFiles.filter(file => !file.endsWith('.mjs'))
const migratedFileCount = selectedFiles.filter(file =>
file.endsWith('.mjs')
).length
// collect files in the selected paths that are not imported via CommonJs in the entire backend codebase
const filesNotImportedViaCjs = nonMigratedFiles.filter(
file => !filesImportedViaCjs.has(file)
)
if (json) {
console.log(
JSON.stringify(
{
fileCount: selectedFiles.length,
migratedFileCount,
filesNotImportedViaCjs,
},
null,
2
)
)
} else {
console.log(`Found ${selectedFiles.length} files in ${paths}:
- ${migratedFileCount} have been migrated to ES Modules (progress=${((migratedFileCount / selectedFiles.length) * 100).toFixed(2)}%)
- ${filesNotImportedViaCjs.length} are ready to migrate (these are not imported via CommonJS in the entire codebase)
`)
if (files) {
console.log(`Files that are ready to migrate:`)
filesNotImportedViaCjs.forEach(file =>
console.log(` - ${file.replace(process.cwd() + '/', '')}`)
)
}
}
}
function printFileReport(allFilesAndImports) {
const filePath = path.resolve(paths[0])
if (filePath.endsWith('.mjs')) {
console.log(`${filePath} is already migrated to ESM`)
return
}
const filePathWithoutExtension = filePath.replace('.js', '')
const importingFiles = []
allFilesAndImports.forEach((imports, file) => {
if (file.endsWith('.mjs')) {
return
}
if (
imports.some(
imprt => imprt === filePath || imprt === filePathWithoutExtension
)
) {
importingFiles.push(file)
}
})
if (json) {
console.log(
JSON.stringify(
{
importingFiles,
},
null,
2
)
)
} else {
console.log(`${filePath} is required by ${importingFiles.length} CJS file`)
importingFiles.forEach(file =>
console.log(` - ${file.replace(process.cwd() + '/', '')}`)
)
}
}
function main() {
if (help) {
usage()
process.exit(0)
}
// collect all the js files in the entire backend codebase (app/ + modules/) with its imports
const allFilesAndImports = findJSAndImports(
APP_CODE_PATH.map(dir => path.resolve(dir))
)
const entryPoint = fs.existsSync('app.js') ? 'app.js' : 'app.mjs'
allFilesAndImports.set(path.resolve(entryPoint), extractImports(entryPoint))
const isFileReport = fs.statSync(paths[0]).isFile()
if (isFileReport) {
printFileReport(allFilesAndImports)
} else {
printDirectoriesReport(allFilesAndImports)
}
}
main()

View File

@@ -0,0 +1,30 @@
/* subscription.freeTrialExpiresAt
* Example script for a migration:
*
* This script demonstrates how to write a script that is runnable either via
* the CLI, or via a migration. The related migration is `script_example`
* in the migrations directory.
*/
import { User } from '../../app/src/models/User.js'
import { fileURLToPath } from 'node:url'
// const somePackage = require('some-package')
const runScript = async () => {
const user = await User.findOne({}, { first_name: 1 }).exec()
const name = user ? user.first_name : 'World'
console.log(`Hello ${name}!`)
}
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await runScript()
process.exit()
} catch (error) {
console.error(error)
process.exit(1)
}
}
export default runScript

View File

@@ -0,0 +1,31 @@
// Import the script runner utility (adjust the path as needed)
import { scriptRunner } from '../lib/ScriptRunner.mjs'
const subJobs = 30
/**
* Your script's main work goes here.
* It must be an async function and accept `trackProgress`.
* @param {(message: string) => Promise<void>} trackProgress - Call this to log progress.
*/
async function main(trackProgress) {
for (let i = 0; i < subJobs; i++) {
await new Promise(resolve => setTimeout(() => resolve(), 1000))
await trackProgress(`Job in progress ${i + 1}/${subJobs}`)
}
await trackProgress('Job finished')
}
// Define any variables your script needs (optional)
const scriptVariables = {
subJobs,
}
// --- Execute the script using the runner with async/await ---
try {
await scriptRunner(main, scriptVariables)
process.exit()
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,96 @@
const csv = require('csv')
const fs = require('fs')
const {
OnboardingDataCollection,
} = require('../app/src/models/OnboardingDataCollection')
/**
* This script extracts the OnboardingDataCollection collection from the database
* and writes it to a CSV file.
*
* Usage:
* - Locally:
* - docker compose exec web bash
* - node services/web/scripts/extract_onboardingdatacollection_csv.js
* - On the server:
* - rake connect:app[staging,web]
* - node web/scripts/extract_onboardingdatacollection_csv.js
* - exit
* - kubectl cp web-standalone-prod-XXXXX:/tmp/onboardingDataCollection.csv ~/onboardingDataCollection.csv
*
*/
const mapFields = doc => {
return {
primaryOccupation: doc.primaryOccupation,
usedLatex: doc.usedLatex,
companyDivisionDepartment: doc.companyDivisionDepartment,
companyJobTitle: doc.companyJobTitle,
governmentJobTitle: doc.governmentJobTitle,
institutionName: doc.institutionName,
otherJobTitle: doc.otherJobTitle,
nonprofitDivisionDepartment: doc.nonprofitDivisionDepartment,
nonprofitJobTitle: doc.nonprofitJobTitle,
role: doc.role,
subjectArea: doc.subjectArea,
updatedAt: new Date(doc.updatedAt).toISOString(),
userId: doc._id.toString(), // _id is set as the userId
firstName: Boolean(doc.firstName).toString(),
lastName: Boolean(doc.lastName).toString(),
}
}
const runScript = async () => {
console.time('CSV Writing Duration')
console.log('Starting to write to csv file...')
const cursor = OnboardingDataCollection.find().cursor()
const csvWriter = csv.stringify({
header: true,
columns: [
'primaryOccupation',
'usedLatex',
'companyDivisionDepartment',
'companyJobTitle',
'governmentJobTitle',
'institutionName',
'otherJobTitle',
'nonprofitDivisionDepartment',
'nonprofitJobTitle',
'role',
'subjectArea',
'updatedAt',
'userId',
'firstName',
'lastName',
],
})
const writeStream = fs.createWriteStream('/tmp/onboardingDataCollection.csv')
csvWriter.pipe(writeStream)
let lineCount = 0
for (let doc = await cursor.next(); doc != null; doc = await cursor.next()) {
lineCount++
csvWriter.write(mapFields(doc))
}
csvWriter.end()
writeStream.on('finish', () => {
console.log(`Done writing to csv file. Total lines written: ${lineCount}`)
console.timeEnd('CSV Writing Duration')
process.exit()
})
writeStream.on('error', err => console.error('Write Stream Error:', err))
csvWriter.on('error', err => console.error('CSV Writer Error:', err))
}
runScript().catch(err => {
console.error(err)
process.exit(1)
})

View File

@@ -0,0 +1,202 @@
const csv = require('csv')
const fs = require('fs')
const minimist = require('minimist')
const {
OnboardingDataCollection,
} = require('../app/src/models/OnboardingDataCollection')
const { User } = require('../app/src/models/User')
const SubscriptionLocator = require('../app/src/Features/Subscription/SubscriptionLocator')
const Settings = require('@overleaf/settings')
const { fetchJson } = require('@overleaf/fetch-utils')
/**
* This script extracts ODC data with some extra fields, and filters on registration date and LaTeX experience
*
* It will:
* — filter for used_latex=never
* — augment rows with user registered date and email addresses
* — filter on users registered after a certain date
* — export updated CSV
*
* Usage:
* - Locally:
* - docker compose exec web bash
* - node scripts/extract_onboardingdatacollection_never_used_latex.js
* - On the server:
* - rake connect:app[staging,web]
* - node scripts/extract_onboardingdatacollection_never_used_latex.js
* - exit
* - kubectl cp web-standalone-prod-XXXXX:/tmp/odc_neverUsedLatex.csv ~/odc_neverUsedLatex.csv
*/
function usage() {
console.log(
`
Onboarding Data Collection extraction, outputs to /tmp/odc_neverUsedLatex.csv
Usage:
node scripts/extract_onboardingdatacollection_never_used_latex.js [--registeredBefore=<date>] [--studentsOnly] [--includeSignUpDate] [--includeCountry] [--includePlanCode]
Options:
--help Show this screen
--registeredBefore=<date> Limit to users registered before ISO 8601 date (eg. 2024-08-01)
--studentsOnly Only include users whose primary occupation is 'university' or 'school'
--includeSignUpDate Include signUpDate column
--includeCountry Include countryCode column (inferred from institution and possibly missing)
--includePlanCode Include planCode column
`
)
}
function parseArgs() {
const argv = minimist(process.argv.slice(2), {
string: ['registeredBefore'],
bool: [
'help',
'studentsOnly',
'includeSignUpDate',
'includeCountry',
'includePlanCode',
],
default: {
help: false,
studentsOnly: false,
includeSignUpDate: false,
includeCountry: false,
includePlanCode: false,
registeredBefore: '2024-02-18',
},
})
if (argv.help) {
usage()
process.exit(0)
}
return argv
}
async function getEmails(userIds, { registeredBefore }) {
const userEmails = await User.find(
{ _id: { $in: userIds }, signUpDate: { $lte: new Date(registeredBefore) } },
{ email: 1, signUpDate: 1 }
).exec()
return userEmails.map(({ email, signUpDate }) => ({
email,
signUpDate: new Date(signUpDate).toISOString(),
}))
}
async function getUsers({ studentsOnly }) {
const odcCriteria = { usedLatex: 'never' }
if (studentsOnly) {
odcCriteria.primaryOccupation = 'university'
}
const cursor = OnboardingDataCollection.find(odcCriteria).cursor()
const userIds = []
const institutionNames = []
for (let doc = await cursor.next(); doc != null; doc = await cursor.next()) {
userIds.push(doc._id.toString())
institutionNames.push(doc.institutionName)
}
return { userIds, institutionNames }
}
async function getUserPlanCodes(users) {
const planCodes = []
for await (const user of users) {
const subscription =
await SubscriptionLocator.promises.getUsersSubscription(user)
planCodes.push(subscription?.planCode || 'free')
}
return planCodes
}
// inferred from institution so will not always be available or accurate
async function getUserCountries(institutions) {
const countryCodes = []
// cache any institutions we lookup to avoid making duplicate calls
const institutionLookups = {}
for await (const inst of institutions) {
if (!inst) {
countryCodes.push(undefined)
continue
}
if (institutionLookups[inst]) {
countryCodes.push(institutionLookups[inst])
continue
}
try {
const url = `${Settings.apis.web.url}/institutions/search?search=${encodeURIComponent(inst)}&max_results=1`
const response = await fetchJson(url)
countryCodes.push(response[0]?.country_code)
institutionLookups[inst] = response[0]?.country_code
} catch (e) {
// if institution search fails just move on
console.log(`Error when looking up institution ${inst}: ${e.message}`)
countryCodes.push(undefined)
}
}
return countryCodes
}
async function runScript() {
const columns = ['email']
const args = parseArgs()
if (args.includeSignUpDate) {
columns.push('signUpDate')
}
const users = await getUsers(args)
let userEmails = await getEmails(users.userIds, args)
if (args.includePlanCode) {
columns.push('planCode')
const planCodes = await getUserPlanCodes(users.userIds)
userEmails = userEmails.map((user, index) => {
user.planCode = planCodes[index]
return user
})
}
if (args.includeCountry) {
columns.push('country')
const countryCodes = await getUserCountries(users.institutionNames)
userEmails = userEmails.map((user, index) => {
user.country = countryCodes[index]
return user
})
}
console.log('Starting to write to csv file...')
csv.stringify(
userEmails,
{
header: true,
columns,
},
function (err, output) {
fs.writeFileSync('/tmp/odc_neverUsedLatex.csv', output)
if (err) {
console.log('error writing csv output: ', err)
process.exit(1)
}
process.exit()
}
)
}
runScript().catch(err => {
console.error(err)
process.exit(1)
})

View File

@@ -0,0 +1,169 @@
// @ts-check
import { db, ObjectId } from '../app/src/infrastructure/mongodb.js'
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
/**
* @typedef {Object} Doc
* @property {ObjectId} _id
* @property {string} name
*/
/**
* @typedef {Object} FileRef
* @property {ObjectId} _id
* @property {string} name
* @property {string} hash
*/
/**
* @typedef {Object} Folder
* @property {ObjectId} _id
* @property {string} name
* @property {Array<Doc>} docs
* @property {Array<Folder>} folders
* @property {Array<FileRef>} fileRefs
*/
/**
* @typedef {Object} Project
* @property {ObjectId} _id
* @property {Array<Folder>} rootFolder
*/
async function main() {
let projectsProcessed = 0
await batchedUpdate(
db.projects,
{},
/**
* @param {Array<Project>} projects
* @return {Promise<void>}
*/
async function projects(projects) {
for (const project of projects) {
projectsProcessed += 1
if (projectsProcessed % 100000 === 0) {
console.log(projectsProcessed, 'projects processed')
}
const projectId = project._id.toString()
for (const { reason, path, _id } of processProject(project)) {
console.log(
JSON.stringify({
msg: 'bad file-tree path',
projectId,
reason,
path,
_id,
})
)
}
}
},
{ _id: 1, rootFolder: 1 }
)
}
/**
* @param {Project} project
* @return {Generator<{path: string, reason: string, _id: any}, void, *>}
*/
function* processProject(project) {
if (!project.rootFolder || !Array.isArray(project.rootFolder)) {
yield { reason: 'bad rootFolder', path: 'rootFolder', _id: null }
} else if (!project.rootFolder[0]) {
yield { reason: 'missing rootFolder', path: 'rootFolder.0', _id: null }
} else {
for (const { path, reason, _id } of findBadPaths(project.rootFolder[0])) {
yield { reason, path: `rootFolder.0${path}`, _id }
}
}
}
/**
* @param {Folder} folder
* @return {Generator<{path: string, reason: string, _id: any}, void, *>}
*/
function* findBadPaths(folder) {
const folderId = folder._id
if (!(folderId instanceof ObjectId)) {
yield { path: '._id', reason: 'bad folder id', _id: folderId }
}
if (typeof folder.name !== 'string' || !folder.name) {
yield { path: '.name', reason: 'bad folder name', _id: folderId }
}
if (folder.folders && Array.isArray(folder.folders)) {
for (const [i, subfolder] of folder.folders.entries()) {
if (!subfolder || typeof subfolder !== 'object') {
yield { path: `.folders.${i}`, reason: 'bad folder', _id: folderId }
continue
}
for (const { path, reason, _id } of findBadPaths(subfolder)) {
yield { path: `.folders.${i}${path}`, reason, _id }
}
}
} else {
yield { path: '.folders', reason: 'missing .folders', _id: folderId }
}
if (folder.docs && Array.isArray(folder.docs)) {
for (const [i, doc] of folder.docs.entries()) {
if (!doc || typeof doc !== 'object') {
yield { path: `.docs.${i}`, reason: 'bad doc', _id: folderId }
continue
}
const docId = doc._id
if (!(docId instanceof ObjectId)) {
yield { path: `.docs.${i}._id`, reason: 'bad doc id', _id: docId }
// no need to check further: this doc can be deleted
continue
}
if (typeof doc.name !== 'string' || !doc.name) {
yield { path: `.docs.${i}.name`, reason: 'bad doc name', _id: docId }
}
}
} else {
yield { path: '.docs', reason: 'missing .docs', _id: folderId }
}
if (folder.fileRefs && Array.isArray(folder.fileRefs)) {
for (const [i, file] of folder.fileRefs.entries()) {
if (!file || typeof file !== 'object') {
yield { path: `.fileRefs.${i}`, reason: 'bad file', _id: folderId }
continue
}
const fileId = file._id
if (!(fileId instanceof ObjectId)) {
yield { path: `.fileRefs.${i}._id`, reason: 'bad file id', _id: fileId }
// no need to check further: this file can be deleted
continue
}
if (typeof file.name !== 'string' || !file.name) {
yield {
path: `.fileRefs.${i}.name`,
reason: 'bad file name',
_id: fileId,
}
}
if (typeof file.hash !== 'string' || !file.hash) {
yield {
path: `.fileRefs.${i}.hash`,
reason: 'bad file hash',
_id: fileId,
}
}
}
} else {
yield { path: '.fileRefs', reason: 'missing .fileRefs', _id: folderId }
}
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,80 @@
// @ts-check
import minimist from 'minimist'
import DocstoreManager from '../app/src/Features/Docstore/DocstoreManager.js'
import { db, ObjectId } from '../app/src/infrastructure/mongodb.js'
const OPTS = parseArgs()
function usage() {
console.error('Usage: node fix_comment_id.mjs [--commit] PROJECT_ID...')
}
function parseArgs() {
const args = minimist(process.argv.slice(2), {
boolean: ['commit'],
})
if (args._.length === 0) {
usage()
process.exit(0)
}
return {
projectIds: args._,
commit: args.commit,
}
}
async function processProject(projectId) {
console.log(`Processing project ${projectId}...`)
const docRanges = await DocstoreManager.promises.getAllRanges(projectId)
let commentsUpdated = 0
for (const doc of docRanges) {
const updateCommentsInDoc = await processDoc(doc)
commentsUpdated += updateCommentsInDoc
}
if (OPTS.commit) {
console.log(`${commentsUpdated} comments updated`)
}
}
async function processDoc(doc) {
let commentsUpdated = 0
for (const comment of doc.ranges.comments ?? []) {
if (comment.op.t !== comment.id) {
console.log(
`updating comment id ${comment.id} to ${comment.op.t} in doc ${doc._id} ...`
)
if (OPTS.commit) {
await db.docs.updateOne(
{ _id: new ObjectId(doc._id) },
{
$set: {
'ranges.comments.$[element].id': new ObjectId(comment.op.t),
},
},
{
arrayFilters: [
{ 'element.op.t': { $eq: new ObjectId(comment.op.t) } },
],
}
)
commentsUpdated += 1
} else {
console.log(
`Would update comment id ${comment.id} to ${comment.op.t} (dry run)`
)
}
}
}
return commentsUpdated
}
// Main loop
for (const projectId of OPTS.projectIds) {
await processProject(projectId)
}
if (!OPTS.commit) {
console.log('This was a dry run. Rerun with --commit to apply changes')
}
process.exit(0)

View File

@@ -0,0 +1,67 @@
import { db } from '../app/src/infrastructure/mongodb.js'
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
const DRY_RUN = process.env.DRY_RUN !== 'false'
console.log({
DRY_RUN,
})
function anyInviteEmailHasUppercaseChars(subscription) {
return subscription.teamInvites.some(invite => {
return /[A-Z]/.test(invite.email)
})
}
async function processBatch(subscriptions) {
for (const subscription of subscriptions) {
if (anyInviteEmailHasUppercaseChars(subscription)) {
console.log('fixing emails in group invites for', subscription._id)
if (!DRY_RUN) {
await db.subscriptions.updateOne({ _id: subscription._id }, [
{
$set: {
teamInvites: {
$map: {
input: '$teamInvites',
in: {
$mergeObjects: [
'$$this',
{
email: {
$toLower: '$$this.email',
},
},
],
},
},
},
},
},
])
}
}
}
}
async function main() {
const projection = {
_id: 1,
teamInvites: 1,
}
const query = {
'teamInvites.0': {
$exists: true,
},
}
await batchedUpdate(db.subscriptions, query, processBatch, projection)
}
try {
await main()
console.error('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}

View File

@@ -0,0 +1,306 @@
/**
* This script fixes problems found by the find_malformed_filetrees.js script.
*
* The script takes a single argument --logs pointing at the output of a
* previous run of the find_malformed_filetrees.js script.
*
* Alternatively, use an adhoc file: --logs=<(echo '{"projectId":"...","path":"..."}')
*/
import mongodb from 'mongodb-legacy'
import { db } from '../app/src/infrastructure/mongodb.js'
import ProjectLocator, {
findDeep,
} from '../app/src/Features/Project/ProjectLocator.js'
import minimist from 'minimist'
import readline from 'node:readline'
import fs from 'node:fs'
import logger from '@overleaf/logger'
const { ObjectId } = mongodb
const lastUpdated = new Date()
const argv = minimist(process.argv.slice(2), {
string: ['logs'],
})
let gracefulShutdownInitiated = false
process.on('SIGINT', handleSignal)
process.on('SIGTERM', handleSignal)
function handleSignal() {
gracefulShutdownInitiated = true
console.warn('graceful shutdown initiated, draining queue')
}
const STATS = {
processedLines: 0,
success: 0,
alreadyProcessed: 0,
hash: 0,
failed: 0,
unmatched: 0,
}
function logStats() {
console.log(
JSON.stringify({
time: new Date(),
gracefulShutdownInitiated,
...STATS,
})
)
}
setInterval(logStats, 10_000)
async function main() {
const rl = readline.createInterface({
input: fs.createReadStream(argv.logs),
})
for await (const line of rl) {
if (gracefulShutdownInitiated) break
STATS.processedLines++
if (!line.startsWith('{')) continue
try {
const { projectId, path, _id } = JSON.parse(line)
await processBadPath(projectId, path, _id)
} catch (err) {
STATS.failed++
logger.err({ line, err }, 'failed to fix tree')
}
}
}
async function processBadPath(projectId, mongoPath, _id) {
let modifiedCount
if (isRootFolder(mongoPath)) {
modifiedCount = await fixRootFolder(projectId)
} else if (isArrayElement(mongoPath)) {
modifiedCount = await removeNulls(projectId, _id)
} else if (isArray(mongoPath)) {
modifiedCount = await fixArray(projectId, mongoPath)
} else if (isFolderId(mongoPath)) {
modifiedCount = await fixFolderId(projectId, mongoPath)
} else if (isDocOrFileId(mongoPath)) {
modifiedCount = await removeElementsWithoutIds(
projectId,
parentPath(parentPath(mongoPath))
)
} else if (isName(mongoPath)) {
modifiedCount = await fixName(projectId, _id)
} else if (isHash(mongoPath)) {
console.error(`Missing file hash: ${projectId}/${_id} (${mongoPath})`)
console.error('SaaS: likely needs filestore restore')
console.error('Server Pro: please reach out to support')
STATS.hash++
return
} else {
console.error(`Unexpected mongo path: ${mongoPath}`)
STATS.unmatched++
return
}
if (modifiedCount === 0) {
STATS.alreadyProcessed++
} else {
STATS.success++
}
}
function isRootFolder(path) {
return path === 'rootFolder.0'
}
function isArray(path) {
return /\.(docs|folders|fileRefs)$/.test(path)
}
function isArrayElement(path) {
return /\.\d+$/.test(path)
}
function isFolderId(path) {
return /\.folders\.\d+\._id$/.test(path)
}
function isDocOrFileId(path) {
return /\.(docs|fileRefs)\.\d+\._id$/.test(path)
}
function isName(path) {
return /\.name$/.test(path)
}
function isHash(path) {
return /\.hash$/.test(path)
}
function parentPath(path) {
return path.slice(0, path.lastIndexOf('.'))
}
/**
* If the root folder structure is missing, set it up
*/
async function fixRootFolder(projectId) {
const result = await db.projects.updateOne(
{
_id: new ObjectId(projectId),
rootFolder: { $size: 0 },
},
{
$set: {
rootFolder: [
{
_id: new ObjectId(),
name: 'rootFolder',
folders: [],
docs: [],
fileRefs: [],
},
],
lastUpdated,
lastUpdatedBy: null, // unset lastUpdatedBy
},
}
)
return result.modifiedCount
}
/**
* Remove all nulls from the given docs/files/folders array
*/
async function removeNulls(projectId, _id) {
if (!_id) {
throw new Error('missing _id')
}
const project = await db.projects.findOne(
{ _id: new ObjectId(projectId) },
{ projection: { rootFolder: 1 } }
)
const foundResult = findDeep(project, obj => obj?._id?.toString() === _id)
if (!foundResult) return
const { path } = foundResult
const result = await db.projects.updateOne(
{ _id: new ObjectId(projectId) },
{
$pull: {
[`${path}.folders`]: null,
[`${path}.docs`]: null,
[`${path}.fileRefs`]: null,
},
$set: {
lastUpdated,
lastUpdatedBy: null, // unset lastUpdatedBy
},
}
)
return result.modifiedCount
}
/**
* If the element at the given path is not an array, set it to an empty array
*/
async function fixArray(projectId, path) {
const result = await db.projects.updateOne(
{ _id: new ObjectId(projectId), [path]: { $not: { $type: 'array' } } },
{ $set: { [path]: [], lastUpdated, lastUpdatedBy: null } }
)
return result.modifiedCount
}
/**
* Generate a missing id for a folder
*/
async function fixFolderId(projectId, path) {
const result = await db.projects.updateOne(
{ _id: new ObjectId(projectId), [path]: { $exists: false } },
{
$set: {
[path]: new ObjectId(),
lastUpdated,
lastUpdatedBy: null, // unset lastUpdatedBy
},
}
)
return result.modifiedCount
}
/**
* Remove elements that don't have ids in the array at the given path
*/
async function removeElementsWithoutIds(projectId, path) {
const result = await db.projects.updateOne(
{ _id: new ObjectId(projectId), [path]: { $type: 'array' } },
{
$pull: { [path]: { _id: null } },
$set: {
lastUpdated,
lastUpdatedBy: null, // unset lastUpdatedBy
},
}
)
return result.modifiedCount
}
/**
* Give a name to a file/doc/folder that doesn't have one
*/
async function fixName(projectId, _id) {
if (!_id) {
throw new Error('missing _id')
}
const project = await db.projects.findOne(
{ _id: new ObjectId(projectId) },
{ projection: { rootFolder: 1 } }
)
const foundResult = findDeep(project, obj => obj?._id?.toString() === _id)
if (!foundResult) return
const { path } = foundResult
const array = ProjectLocator.findElementByMongoPath(project, parentPath(path))
const name =
path === 'rootFolder.0'
? 'rootFolder'
: findUniqueName(new Set(array.map(x => x?.name)))
const pathToName = `${path}.name`
const result = await db.projects.updateOne(
{ _id: new ObjectId(projectId), [pathToName]: { $in: [null, ''] } },
{
$set: {
[pathToName]: name,
lastUpdated,
lastUpdatedBy: null, // unset lastUpdatedBy
},
}
)
return result.modifiedCount
}
function findUniqueName(existingFilenames) {
let index = 0
let filename = 'untitled'
while (existingFilenames.has(filename)) {
index += 1
filename = `untitled-${index}`
}
return filename
}
try {
try {
await main()
} finally {
logStats()
}
if (STATS.failed > 0) {
process.exit(Math.min(STATS.failed, 99))
} else if (STATS.hash > 0) {
process.exit(100)
} else if (STATS.unmatched > 0) {
process.exit(101)
} else {
process.exit(0)
}
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,163 @@
import fs from 'node:fs'
import minimist from 'minimist'
import { ObjectId } from '../app/src/infrastructure/mongodb.js'
import DocstoreManager from '../app/src/Features/Docstore/DocstoreManager.js'
import FileStoreHandler from '../app/src/Features/FileStore/FileStoreHandler.js'
import FileWriter from '../app/src/infrastructure/FileWriter.js'
import ProjectEntityMongoUpdateHandler from '../app/src/Features/Project/ProjectEntityMongoUpdateHandler.js'
import ProjectLocator from '../app/src/Features/Project/ProjectLocator.js'
import RedisWrapper from '@overleaf/redis-wrapper'
import Settings from '@overleaf/settings'
const opts = parseArgs()
const redis = RedisWrapper.createClient(Settings.redis.web)
function parseArgs() {
const args = minimist(process.argv.slice(2), {
boolean: ['commit', 'ignore-ranges'],
})
const projectIds = args._
if (projectIds.length === 0) {
console.log(`Usage: ${process.argv[1]} [OPTS] PROJECT_ID
Options:
--commit Actually convert oversized docs to binary files
--max-doc-size Size over which docs are converted to binary files
--ignore-ranges Convert docs even if they contain ranges
`)
process.exit(0)
}
const commit = args.commit
const ignoreRanges = args['ignore-ranges']
const maxDocSize = args['max-doc-size']
? parseInt(args['max-doc-size'], 10)
: 2 * 1024 * 1024
return { projectIds, commit, ignoreRanges, maxDocSize }
}
async function main() {
for (const projectId of opts.projectIds) {
await processProject(projectId)
}
if (!opts.commit) {
console.log('This was a dry run. Re-run with --commit to apply changes')
}
}
async function processProject(projectId) {
const docIds = await getDocIds(projectId)
for (const docId of docIds) {
await processDoc(projectId, docId)
}
}
async function processDoc(projectId, docId) {
const doc = await getDoc(projectId, docId)
const size = doc.lines.reduce((sum, line) => sum + line.length + 1, 0)
if (size > opts.maxDocSize) {
if (
!opts.ignoreRanges &&
((doc.ranges.comments && doc.ranges.comments.length > 0) ||
(doc.ranges.changes && doc.ranges.changes.length > 0))
) {
console.log(
`Skipping doc ${doc.path} in project ${projectId} because it has ranges`
)
return
}
console.log(
`Converting doc ${doc.path} in project ${projectId} to binary (${size} bytes)`
)
if (opts.commit) {
const fileRef = await sendDocToFilestore(projectId, doc)
await ProjectEntityMongoUpdateHandler.promises.replaceDocWithFile(
new ObjectId(projectId),
new ObjectId(docId),
fileRef,
null // unset lastUpdatedBy
)
await deleteDocFromMongo(projectId, doc)
await deleteDocFromRedis(projectId, docId)
}
}
}
async function getDocIds(projectId) {
const docIds = await redis.smembers(`DocsIn:{${projectId}}`)
return docIds
}
async function getDoc(projectId, docId) {
const lines = await redis.get(`doclines:{${docId}}`)
const ranges = await redis.get(`Ranges:{${docId}}`)
const { path } = await ProjectLocator.promises.findElement({
project_id: projectId,
element_id: docId,
type: 'doc',
})
return {
id: docId,
lines: JSON.parse(lines),
ranges: ranges ? JSON.parse(ranges) : {},
path: path.fileSystem,
}
}
async function sendDocToFilestore(projectId, doc) {
const basename = doc.path.split('/').pop()
const tmpFilePath = await FileWriter.promises.writeLinesToDisk(
projectId,
doc.lines
)
try {
const { fileRef } = await FileStoreHandler.promises.uploadFileFromDisk(
projectId,
{ name: basename, rev: doc.version + 1 },
tmpFilePath
)
return fileRef
} finally {
fs.promises.unlink(tmpFilePath)
}
}
async function deleteDocFromMongo(projectId, doc) {
const basename = doc.path.split('/').pop()
const deletedAt = new Date()
await DocstoreManager.promises.deleteDoc(
projectId,
doc.id,
basename,
deletedAt
)
}
async function deleteDocFromRedis(projectId, docId) {
await redis.del(
`Blocking:{${docId}}`,
`doclines:{${docId}}`,
`DocOps:{${docId}}`,
`DocVersion:{${docId}}`,
`DocHash:{${docId}}`,
`ProjectId:{${docId}}`,
`Ranges:{${docId}}`,
`UnflushedTime:{${docId}}`,
`Pathname:{${docId}}`,
`ProjectHistoryId:{${docId}}`,
`PendingUpdates:{${docId}}`,
`lastUpdatedAt:{${docId}}`,
`lastUpdatedBy:{${docId}}`
)
await redis.srem(`DocsIn:{${projectId}}`, projectId)
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,76 @@
import mongodb from 'mongodb-legacy'
import { db } from '../app/src/infrastructure/mongodb.js'
import DocumentUpdaterHandler from '../app/src/Features/DocumentUpdater/DocumentUpdaterHandler.js'
const { ObjectId } = mongodb
const PROJECT_ID = process.env.PROJECT_ID
const DOC_ID = process.env.DOC_ID
const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true'
const DRY_RUN = process.env.DRY_RUN !== 'false'
console.log({
PROJECT_ID,
DOC_ID,
VERBOSE_LOGGING,
DRY_RUN,
})
async function main() {
const { lines, version, ranges } = await getDocument()
const size = lines.reduce((size, line) => size + line.length + 1, 0)
console.log('doc stats:', {
lineCount: lines.length,
size,
version,
})
if (!DRY_RUN) {
console.log(`updating doc ${DOC_ID} in mongo for project ${PROJECT_ID}`)
const result = await db.docs.updateOne(
{ _id: new ObjectId(DOC_ID), project_id: new ObjectId(PROJECT_ID) },
{
$set: { lines, version, ranges },
$inc: { rev: 1 }, // maintain same behaviour as Docstore upsertIntoDocCollection
$unset: {
inS3: true,
},
}
)
console.log('mongo result', result)
if (
result.matchedCount !== 1 ||
result.modifiedCount !== 1 ||
!result.acknowledged
) {
throw new Error('unexpected result from mongo update')
}
console.log(`deleting doc ${DOC_ID} from redis for project ${PROJECT_ID}`)
await DocumentUpdaterHandler.promises.deleteDoc(PROJECT_ID, DOC_ID, true)
}
}
function getDocument() {
return new Promise((resolve, reject) => {
DocumentUpdaterHandler.getDocument(
PROJECT_ID,
DOC_ID,
-1,
(error, lines, version, ranges) => {
if (error) {
reject(error)
} else {
resolve({ lines, version, ranges })
}
}
)
})
}
try {
await main()
console.error('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}

View File

@@ -0,0 +1,12 @@
const CHUNK_SIZE = 1000
// Function to chunk the array
export function chunkArray(array, size = CHUNK_SIZE) {
const result = []
for (let i = 0; i < array.length; i += size) {
result.push(array.slice(i, i + size))
}
return result
}
export default { chunkArray }

View File

@@ -0,0 +1,32 @@
/**
* Ensures that the specific MongoDB connection timeout is set.
*
* @param {number} timeoutInMS
* @returns {void}
*/
export function ensureMongoTimeout(timeoutInMS) {
if (process.env.MONGO_SOCKET_TIMEOUT !== timeoutInMS.toString()) {
throw new Error(
`must run with higher mongo timeout: MONGO_SOCKET_TIMEOUT=${timeoutInMS} node ${process.argv[1]}`
)
}
}
/**
* Ensures MongoDB queries are running on secondary and the specific connection timeout is set.
*
* @param {number} timeoutInMS
* @returns {void}
*/
export function ensureRunningOnMongoSecondaryWithTimeout(timeoutInMS) {
const timeout = parseInt(process.env.MONGO_SOCKET_TIMEOUT, 10) || 0
if (
timeout < timeoutInMS ||
process.env.MONGO_CONNECTION_STRING !==
process.env.READ_ONLY_MONGO_CONNECTION_STRING
) {
throw new Error(
`must run on secondary with higher mongo timeout: MONGO_SOCKET_TIMEOUT=${timeoutInMS} MONGO_CONNECTION_STRING="$READ_ONLY_MONGO_CONNECTION_STRING" node ${process.argv[1]}`
)
}
}

View File

@@ -0,0 +1,73 @@
import { db } from '../../app/src/infrastructure/mongodb.js'
import { ensureMongoTimeout } from '../helpers/env_variable_helper.mjs'
// Ensure default mongo query timeout has been increased 1h
if (!process.env.MONGO_SOCKET_TIMEOUT) {
ensureMongoTimeout(360000)
}
async function main() {
await checkAllProjectsAreMigrated()
await setAllowDowngradeToFalse()
await deleteHistoryCollections()
console.log('Legacy history data cleaned up successfully')
process.exit(0)
}
async function checkAllProjectsAreMigrated() {
console.log('checking all projects are migrated to Full Project History')
const count = await db.projects.countDocuments({
'overleaf.history.display': { $ne: true },
})
if (count === 0) {
console.log('All projects are migrated to Full Project History')
} else {
console.error(
`There are ${count} projects that are not migrated to Full Project History` +
` please complete the migration before running this script again.`
)
process.exit(1)
}
}
async function setAllowDowngradeToFalse() {
console.log('unsetting `allowDowngrade` flag in all projects')
await db.projects.updateMany(
{
'overleaf.history.id': { $exists: true },
'overleaf.history.allowDowngrade': true,
},
{ $unset: { 'overleaf.history.allowDowngrade': 1 } }
)
console.log('unsetting `allowDowngrade` flag in all projects - Done')
}
async function deleteHistoryCollections() {
await gracefullyDropCollection(db.docHistory)
await gracefullyDropCollection(db.docHistoryIndex)
await gracefullyDropCollection(db.projectHistoryMetaData)
}
async function gracefullyDropCollection(collection) {
const collectionName = collection.collectionName
console.log(`removing \`${collectionName}\` data`)
try {
await collection.drop()
} catch (err) {
if (err.code === 26) {
// collection already deleted
console.log(`removing \`${collectionName}\` data - Already removed`)
} else {
throw err
}
}
console.log(`removing \`${collectionName}\` data - Done`)
}
try {
await main()
} catch (err) {
console.error(err)
process.exit(1)
}

View File

@@ -0,0 +1,119 @@
import HistoryRangesSupportMigration from '../../app/src/Features/History/HistoryRangesSupportMigration.mjs'
import minimist from 'minimist'
async function main() {
const {
projectIds,
ownerIds,
minId,
maxId,
maxCount,
direction,
force,
stopOnError,
quickOnly,
concurrency,
} = parseArgs()
await HistoryRangesSupportMigration.promises.migrateProjects({
projectIds,
ownerIds,
minId,
maxId,
maxCount,
direction,
force,
stopOnError,
quickOnly,
concurrency,
})
}
function usage() {
console.error(`Usage: migrate_ranges_support.mjs [OPTIONS]
Options:
--help Print this help
--owner-id Migrate all projects owned by this owner
--project-id Migrate this project
--min-id Migrate projects from this id
--max-id Migrate projects to this id
--max-count Migrate at most this number of projects
--all Migrate all projects
--backwards Disable history ranges support for selected project ids
--force Migrate projects even if they were already migrated
--stop-on-error Stop after first migration error
--quick-only Do not try a resync migration if quick migration fails
--concurrency How many jobs to run in parallel
`)
}
function parseArgs() {
const args = minimist(process.argv.slice(2), {
boolean: ['backwards', 'help', 'all', 'force', 'quick-only'],
string: ['owner-id', 'project-id', 'min-id', 'max-id'],
})
if (args.help) {
usage()
process.exit(0)
}
const direction = args.backwards ? 'backwards' : 'forwards'
const ownerIds = arrayOpt(args['owner-id'])
const projectIds = arrayOpt(args['project-id'])
const minId = args['min-id']
const maxId = args['max-id']
const maxCount = args['max-count']
const force = args.force
const stopOnError = args['stop-on-error']
const quickOnly = args['quick-only']
const concurrency = args.concurrency ?? 1
const all = args.all
if (
!all &&
ownerIds == null &&
projectIds == null &&
minId == null &&
maxId == null &&
maxCount == null
) {
console.error(
'Please specify at least one filter, or --all to process all projects\n'
)
usage()
process.exit(1)
}
return {
ownerIds,
projectIds,
minId,
maxId,
maxCount,
direction,
force,
stopOnError,
quickOnly,
concurrency,
}
}
function arrayOpt(value) {
if (typeof value === 'string') {
return [value]
} else if (Array.isArray(value)) {
return value
} else {
return undefined
}
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,38 @@
/*
* Creates the HTML for the institution in the institution table on /for/universities
*/
const name = process.argv[2]
const href = process.argv[3]
const image = process.argv[4]
function create() {
if (!name) {
return console.log('Error: Institution name is required')
}
const eventLabel = name.replace(/ /g, '-').replace(/\(|\)/g, '')
if (!href) {
return console.log('Error: Institution portal href is required')
}
let result = ` <div class="row">`
result += `\n <div class="col-sm-2 col-xs-3 text-center">`
if (image) {
result += `\n <img alt="${name}" class="uni-logo" src="${image}">`
}
result += `\n </div>`
result += `\n <div class="col-sm-8 col-xs-5">
<p>
<strong>${name}</strong>
</p>`
result += `\n </div>`
result += `\n <div class="col-sm-2 col-xs-4 university-claim-btn">
<a class="btn btn-primary" href="${href}" event-tracking-ga="For-Pages" event-tracking="Universities-Click-Edu" event-tracking-label="View-${eventLabel}" event-tracking-trigger="click">VIEW</a>
</div>`
result += '\n </div>'
console.log(result)
}
create()

View File

@@ -0,0 +1,52 @@
import { db, ObjectId } from '../app/src/infrastructure/mongodb.js'
import minimist from 'minimist'
const argv = minimist(process.argv.slice(2))
const commit = argv.commit !== undefined
const projectIds = argv._.map(x => {
return new ObjectId(x)
})
if (!commit) {
console.log('Doing dry run without --commit')
}
console.log('checking', projectIds.length, 'projects')
const affectedProjects = await db.projects
.find(
{ _id: { $in: projectIds } },
{
projection: {
_id: 1,
owner_ref: 1,
tokenAccessReadOnly_refs: 1,
tokenAccessReadAndWrite_refs: 1,
},
}
)
.toArray()
console.log('Found ' + affectedProjects.length + ' affected projects')
affectedProjects.forEach(project => {
console.log(JSON.stringify(project))
})
if (!commit) {
console.log('dry run, not updating')
process.exit(0)
} else {
try {
const result = await db.projects.updateMany(
{ _id: { $in: affectedProjects.map(project => project._id) } },
{
$set: {
publicAccesLevel: 'private', // note the spelling in the db is publicAccesLevel (with one 's')
tokenAccessReadOnly_refs: [],
tokenAccessReadAndWrite_refs: [],
},
}
)
console.log('result', JSON.stringify(result))
process.exit(0)
} catch (err) {
console.error('err', err)
process.exit(1)
}
}

View File

@@ -0,0 +1,32 @@
# Usage
```
node scripts/learn/checkSanitize/index.mjs https://LEARN_WIKI
```
## Bulk export
There is a bulk export for media wiki pages, but it produces different
html escaping compared to the regular parse API we use in web.
The bulk export does not escape all the placeholder HTML-like elements,
like `<project-id` or `<document goes here>`.
## Example output
Here is how a missing tag gets flagged:
```
---
page : MediaWiki markup for the Overleaf support team
title : MediaWiki markup for the Overleaf support team
match : false
toText : false
text : "Overleaf</strong></td>\n </tr>\n <tr><td>Kb/<strong>TITLE_SLUG</strong></td><td><nowiki>https://www.overleaf.com/learn/how-to/</nowiki><strong>TITLE_SLUG</strong></td>\n </"
sanitized : "Overleaf</strong></td>\n </tr>\n <tr><td>Kb/<strong>TITLE_SLUG</strong></td><td>&lt;nowiki&gt;https://www.overleaf.com/learn/how-to/&lt;/nowiki&gt;<strong>TITLE_SLUG</strong></td>\n "
textToText : " \n \n \n \n MediaWiki page\n Maps to on Overleaf\n \n Kb/TITLE_SLUGhttps://www.overleaf.com/learn/how-to/TITLE_SLUG\n "
sanitizedToText: " \n \n \n \n MediaWiki page\n Maps to on Overleaf\n \n Kb/TITLE_SLUG<nowiki>https://www.overleaf.com/learn/how-to/</nowiki>TITLE"
```
Note the hidden/escaped `<nowiki>` element.
In addition to the side-by-side comparison of HTML you will see a plain-text diff.

View File

@@ -0,0 +1,118 @@
import crypto from 'node:crypto'
import fs from 'node:fs'
import Path from 'node:path'
import cheerio from 'cheerio'
// checkSanitizeOptions is only used in dev env
// eslint-disable-next-line import/no-extraneous-dependencies
import prettier from 'prettier'
import sanitizeHtml from 'sanitize-html'
import { sanitizeOptions } from '../../../modules/learn/app/src/sanitizeOptions.js'
import { fileURLToPath } from 'node:url'
const __dirname = Path.dirname(fileURLToPath(import.meta.url))
const EXTRACT_STYLE = process.env.EXTRACT_STYLES === 'true'
const OMIT_STYLE = process.env.OMIT_STYLE !== 'false'
const DUMP_CSS_IN = Path.join(
Path.dirname(Path.dirname(Path.dirname(__dirname))),
'data',
'dumpFolder'
)
function hash(blob) {
return crypto.createHash('sha1').update(blob).digest('hex')
}
function normalize(blob, title) {
// styles are dropped in web and kept in wiki pages for previewing there.
blob = blob.replace(/<style>(.+?)<\/style>/gs, (_, match) => {
if (EXTRACT_STYLE) {
// normalize css with prettier
const css = prettier.format(match, { parser: 'css' })
fs.writeFileSync(
Path.join(DUMP_CSS_IN, `${hash(css)}-${encodeURIComponent(title)}.css`),
`/* title: ${title} */\n\n${css}`
)
}
if (OMIT_STYLE) {
return ''
}
return match
})
// strip comments:
// - comment at the bottom of each page
blob = blob.replace(/<!-- \nNewPP limit report.+/s, '')
// - annotation of math characters
blob = blob.replace(/<!-- . -->/g, '')
// wrap for consistent rendering
if (blob.indexOf('<html><head>') !== 0) {
blob = `<html><head>${blob}</head></html>`
}
// normalize inline style:
// - drop trailing ;
blob = blob.replace(/style="([^"]+);"/g, (_, style) => `style="${style}"`)
// - normalize whitespace
blob = blob.replace(
/style="([^"]+)"/g,
(_, style) => `style="${style.trim().replace(/([:;])\s+/g, '$1')}"`
)
// let cherrio do another pass
return cheerio.load(blob).html()
}
function toText(blob) {
return cheerio.load(blob).text()
}
const zoomOut = 50
function peak(content, offset) {
// show some more content before/after the mismatch
if (offset > zoomOut) {
offset -= zoomOut
}
// wrap in JSON to escape new line characters
return JSON.stringify(content.slice(offset, offset + chunkSize + 2 * zoomOut))
}
const chunkSize = 100
function findFirstMismatch(a, b) {
if (a === b) return a.length
let i = 0
while (
a.length > chunkSize &&
b.length > chunkSize &&
a.slice(0, chunkSize) === b.slice(0, chunkSize)
) {
i++
a = a.slice(chunkSize)
b = b.slice(chunkSize)
}
return i * chunkSize
}
function checkSanitizeOptions(page, title, text) {
text = normalize(text, title)
const sanitized = normalize(sanitizeHtml(text, sanitizeOptions))
if (text === sanitized) return
const offset = findFirstMismatch(text, sanitized)
const textToText = toText(text)
const sanitizedToText = toText(sanitized)
const offsetText = findFirstMismatch(textToText, sanitizedToText)
console.error('---')
console.error('page :', page)
console.error('title :', title)
console.error('match :', text === sanitized)
console.error('toText :', toText(text) === toText(sanitized))
console.error('text :', peak(text, offset))
console.error('sanitized :', peak(sanitized, offset))
console.error('textToText :', peak(textToText, offsetText))
console.error('sanitizedToText:', peak(sanitizedToText, offsetText))
}
export default checkSanitizeOptions

View File

@@ -0,0 +1,41 @@
import checkSanitizeOptions from './checkSanitizeOptions.mjs'
import Scrape from './scrape.mjs'
import { fileURLToPath } from 'node:url'
const { getAllPagesAndCache, scrapeAndCachePage } = Scrape
async function main() {
const BASE_URL = process.argv.pop()
if (!BASE_URL.startsWith('http')) {
throw new Error(
'Usage: node scripts/learn/checkSanitize/index.mjs https://LEARN_WIKI'
)
}
const pages = await getAllPagesAndCache(BASE_URL)
for (const page of pages) {
try {
const parsed = await scrapeAndCachePage(BASE_URL, page)
const title = parsed.title
const text = parsed.text ? parsed.text['*'] : ''
checkSanitizeOptions(page, title, text)
} catch (e) {
console.error('---')
console.error(page, e)
throw e
}
}
}
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}
}

View File

@@ -0,0 +1,130 @@
import Path from 'node:path'
import fs from 'node:fs'
import {
fetchString,
fetchJson,
RequestFailedError,
} from '@overleaf/fetch-utils'
import crypto from 'node:crypto'
import { fileURLToPath } from 'node:url'
const __dirname = Path.dirname(fileURLToPath(import.meta.url))
const CACHE_IN = Path.join(
Path.dirname(Path.dirname(Path.dirname(__dirname))),
'data',
'learnPages'
)
async function scrape(baseUrl, page) {
const uri = new URL(baseUrl + '/learn-scripts/api.php')
uri.search = new URLSearchParams({
page,
action: 'parse',
format: 'json',
redirects: true,
}).toString()
try {
return await fetchString(uri)
} catch (err) {
if (err instanceof RequestFailedError) {
console.error(err.response.status, page, err.response)
} else {
console.error(err)
}
}
}
function hash(blob) {
return crypto.createHash('sha1').update(blob).digest('hex')
}
function getName(page) {
let enc = encodeURIComponent(page)
// There are VERY long titles in media wiki.
// Add percent encoding and they exceed the filename size on my Ubuntu box.
if (enc.length > 100) {
enc = enc.slice(0, 100) + hash(page)
}
return enc
}
async function scrapeAndCachePage(baseUrl, page) {
const path = Path.join(CACHE_IN, getName(page) + '.json')
try {
return JSON.parse(await fs.promises.readFile(path, 'utf-8'))
} catch (e) {
const blob = await scrape(baseUrl, page)
const parsed = JSON.parse(blob).parse
if (!parsed) {
console.error(page, blob)
throw new Error('bad contents')
}
await fs.promises.mkdir(CACHE_IN, { recursive: true })
await fs.promises.writeFile(path, JSON.stringify(parsed, null, 2), 'utf-8')
return parsed
}
}
async function getAllPagesFrom(baseUrl, continueFrom) {
// https://learn.overleaf.com/learn/Special:ApiSandbox#action=query&format=json&generator=allpages&gapfilterredir=nonredirects
const uri = new URL(baseUrl + '/learn-scripts/api.php')
uri.search = new URLSearchParams({
action: 'query',
format: 'json',
generator: 'allpages',
// Ignore pages with redirects. We do not want to check page content twice.
gapfilterredir: 'nonredirects',
// Bump the default page size of 10.
gaplimit: 100,
...continueFrom,
}).toString()
let blob
try {
blob = await fetchJson(uri)
} catch (err) {
if (err instanceof RequestFailedError) {
console.error(err.response.status, continueFrom, err.response)
} else {
console.error(err)
throw err
}
}
const nextContinueFrom = blob && blob.continue
const pagesRaw = (blob && blob.query && blob.query.pages) || {}
const pages = Object.values(pagesRaw).map(page => page.title)
return { nextContinueFrom, pages }
}
async function getAllPages(baseUrl) {
let continueFrom = {}
let allPages = []
while (true) {
const { nextContinueFrom, pages } = await getAllPagesFrom(
baseUrl,
continueFrom
)
allPages = allPages.concat(pages)
if (!nextContinueFrom) break
continueFrom = nextContinueFrom
}
return allPages.sort()
}
async function getAllPagesAndCache(baseUrl) {
const path = Path.join(CACHE_IN, 'allPages.txt')
try {
return JSON.parse(await fs.promises.readFile(path, 'utf-8'))
} catch (e) {
const allPages = await getAllPages(baseUrl)
await fs.promises.mkdir(CACHE_IN, { recursive: true })
await fs.promises.writeFile(path, JSON.stringify(allPages), 'utf-8')
return allPages
}
}
export default {
getAllPagesAndCache,
scrapeAndCachePage,
}

View File

@@ -0,0 +1,66 @@
import { parser } from '../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
import * as fs from 'node:fs'
import * as path from 'node:path'
import { fileURLToPath } from 'node:url'
import minimist from 'minimist'
const argv = minimist(process.argv.slice(2))
const NUMBER_OF_OPS = argv.ops || 100
const CSV_OUTPUT = argv.csv || false
const __dirname = path.dirname(fileURLToPath(import.meta.url))
const examplesDir = path.join(
__dirname,
'../../test/unit/src/LezerLatex/examples'
)
const strictParser = parser.configure({ strict: true }) // throw exception for invalid documents
if (!fs.existsSync(examplesDir)) {
console.error('No examples directory')
process.exit()
}
function dumpParserStats(parser) {
console.log('Parser size:')
console.dir({
states: parser.states.length,
data: parser.data.length,
goto: parser.goto.length,
})
}
dumpParserStats(strictParser)
const folder = examplesDir
for (const file of fs.readdirSync(folder).sort()) {
if (!/\.tex$/.test(file)) continue
const name = /^[^.]*/.exec(file)[0]
const content = fs.readFileSync(path.join(folder, file), 'utf8')
benchmark(name, content)
}
function benchmark(name, content) {
let timeSum = 0
try {
for (let i = 0; i < NUMBER_OF_OPS; ++i) {
const startTime = performance.now()
strictParser.parse(content)
const endTime = performance.now()
timeSum += endTime - startTime
}
const avgTime = timeSum / NUMBER_OF_OPS
if (CSV_OUTPUT) {
console.log(`${name},${avgTime.toFixed(2)},${content.length}`)
} else {
console.log(
`${name.padEnd(20)} time to run (ms):\t ${avgTime.toFixed(2)}`
)
}
} catch (error) {
console.error(`${name.padEnd(20)} ${error}`)
}
}

View File

@@ -0,0 +1,69 @@
const { buildParserFile } = require('@lezer/generator')
const { writeFileSync, readFileSync } = require('fs')
const path = require('path')
const grammars = [
{
grammarPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-latex/latex.grammar'
),
parserOutputPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
),
termsOutputPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-latex/latex.terms.mjs'
),
},
{
grammarPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-bibtex/bibtex.grammar'
),
parserOutputPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-bibtex/bibtex.mjs'
),
termsOutputPath: path.resolve(
__dirname,
'../../frontend/js/features/source-editor/lezer-bibtex/bibtex.terms.mjs'
),
},
]
function compile(grammar) {
const { grammarPath, termsOutputPath, parserOutputPath } = grammar
const moduleStyle = 'es'
console.info(`Compiling ${grammarPath}`)
const grammarText = readFileSync(grammarPath, 'utf8')
console.info(`Loaded grammar from ${grammarPath}`)
const { parser, terms } = buildParserFile(grammarText, {
fileName: grammarPath,
moduleStyle,
})
console.info(`Built parser`)
writeFileSync(parserOutputPath, parser)
console.info(`Wrote parser to ${parserOutputPath}`)
writeFileSync(termsOutputPath, terms)
console.info(`Wrote terms to ${termsOutputPath}`)
console.info('Done!')
}
module.exports = { compile, grammars }
if (require.main === module) {
try {
grammars.forEach(compile)
process.exit(0)
} catch (err) {
console.error(err)
process.exit(1)
}
}

View File

@@ -0,0 +1,215 @@
// from https://gist.github.com/msteen/e4828fbf25d6efef73576fc43ac479d2
// https://discuss.codemirror.net/t/whats-the-best-to-test-and-debug-grammars/2542/5
// MIT License
//
// Copyright (c) 2021 Matthijs Steen
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
import { Text } from '@codemirror/state'
import { Tree, TreeCursor } from '@lezer/common'
class StringInput {
constructor(input) {
this.input = input
this.lineChunks = false
}
get length() {
return this.input.length
}
chunk(from) {
return this.input.slice(from)
}
read(from, to) {
return this.input.slice(from, to)
}
}
function cursorNode({ type, from, to }, isLeaf = false) {
return { type, from, to, isLeaf }
}
function traverseTree(
cursor,
{
from = -Infinity,
to = Infinity,
includeParents = false,
beforeEnter,
onEnter,
onLeave,
}
) {
if (!(cursor instanceof TreeCursor))
cursor = cursor instanceof Tree ? cursor.cursor() : cursor.cursor()
for (;;) {
let node = cursorNode(cursor)
let leave = false
if (node.from <= to && node.to >= from) {
const enter =
!node.type.isAnonymous &&
(includeParents || (node.from >= from && node.to <= to))
if (enter && beforeEnter) beforeEnter(cursor)
node.isLeaf = !cursor.firstChild()
if (enter) {
leave = true
if (onEnter(node) === false) return
}
if (!node.isLeaf) continue
}
for (;;) {
node = cursorNode(cursor, node.isLeaf)
if (leave && onLeave) if (onLeave(node) === false) return
leave = cursor.type.isAnonymous
node.isLeaf = false
if (cursor.nextSibling()) break
if (!cursor.parent()) return
leave = true
}
}
}
function isChildOf(child, parent) {
return (
child.from >= parent.from &&
child.from <= parent.to &&
child.to <= parent.to &&
child.to >= parent.from
)
}
function validatorTraversal(input, { fullMatch = true } = {}) {
if (typeof input === 'string') input = new StringInput(input)
const state = {
valid: true,
parentNodes: [],
lastLeafTo: 0,
}
return {
state,
traversal: {
onEnter(node) {
state.valid = true
if (!node.isLeaf) state.parentNodes.unshift(node)
if (node.from > node.to || node.from < state.lastLeafTo) {
state.valid = false
} else if (node.isLeaf) {
if (
state.parentNodes.length &&
!isChildOf(node, state.parentNodes[0])
)
state.valid = false
state.lastLeafTo = node.to
} else {
if (state.parentNodes.length) {
if (!isChildOf(node, state.parentNodes[0])) state.valid = false
} else if (
fullMatch &&
(node.from !== 0 || node.to !== input.length)
) {
state.valid = false
}
}
},
onLeave(node) {
if (!node.isLeaf) state.parentNodes.shift()
},
},
}
}
let Color
;(function (Color) {
Color[(Color.Red = 31)] = 'Red'
Color[(Color.Green = 32)] = 'Green'
Color[(Color.Yellow = 33)] = 'Yellow'
})(Color || (Color = {}))
function colorize(value, color) {
return '\u001b[' + color + 'm' + String(value) + '\u001b[39m'
}
function printTree(
cursor,
input,
{ from, to, start = 0, includeParents } = {}
) {
const inp = typeof input === 'string' ? new StringInput(input) : input
const text = Text.of(inp.read(0, inp.length).split('\n'))
const state = {
output: '',
prefixes: [],
hasNextSibling: false,
}
const validator = validatorTraversal(inp)
traverseTree(cursor, {
from,
to,
includeParents,
beforeEnter(cursor) {
state.hasNextSibling = cursor.nextSibling() && cursor.prevSibling()
},
onEnter(node) {
validator.traversal.onEnter(node)
const isTop = state.output === ''
const hasPrefix = !isTop || node.from > 0
if (hasPrefix) {
state.output += (!isTop ? '\n' : '') + state.prefixes.join('')
if (state.hasNextSibling) {
state.output += ' ├─ '
state.prefixes.push(' │ ')
} else {
state.output += ' └─ '
state.prefixes.push(' ')
}
}
const hasRange = node.from !== node.to
state.output +=
(node.type.isError || !validator.state.valid
? colorize('ERROR ' + node.type.name, Color.Red)
: node.type.name) +
' ' +
(hasRange
? '[' +
colorize(locAt(text, start + node.from), Color.Yellow) +
'..' +
colorize(locAt(text, start + node.to), Color.Yellow) +
']'
: colorize(locAt(text, start + node.from), Color.Yellow))
if (hasRange && node.isLeaf) {
state.output +=
': ' +
colorize(JSON.stringify(inp.read(node.from, node.to)), Color.Green)
}
},
onLeave(node) {
validator.traversal.onLeave(node)
state.prefixes.pop()
},
})
return state.output
}
function locAt(text, pos) {
const line = text.lineAt(pos)
return line.number + ':' + (pos - line.from)
}
export function logTree(tree, input, options) {
console.warn(printTree(tree, input, options))
}

View File

@@ -0,0 +1,19 @@
// Super quick and dirty LCG PRNG
const m = 0xffffffff
let X = Math.floor(Math.random() * (m - 1))
const a = 16807
const c = 0
// Should probably be a large-ish number
export function seed(i) {
if (i < 0) {
throw new Error('Seed must be a positive integer')
}
X = i & m
}
export function random() {
X = (a * X + c) % m
return X / m
}

View File

@@ -0,0 +1,79 @@
import { readFileSync } from 'node:fs'
import { logTree } from './print-tree.mjs'
import { parser as LaTeXParser } from '../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
import { parser as BibTeXParser } from '../../frontend/js/features/source-editor/lezer-bibtex/bibtex.mjs'
// Runs the lezer-latex or lezer-bibtex parser on a supplied file, and prints the resulting
// parse tree to stdout
//
// show parse tree: lezer-latex-run.js test/unit/src/LezerLatex/examples/amsmath.tex
// lezer-latex-run.js test/unit/src/LezerLatex/examples/overleaf.bib
// show error summary: lezer-latex-run.js coverage test/unit/src/LezerLatex/examples/amsmath.tex
let files = process.argv.slice(2)
if (!files.length) {
files = ['test/unit/src/LezerLatex/examples/demo.tex']
}
let coverage = false
if (files[0] === 'coverage') {
// count errors
coverage = true
files.shift()
}
function reportErrorCounts(output) {
if (coverage) process.stdout.write(output)
}
function parseFile(filename) {
const text = readFileSync(filename).toString()
const t0 = process.hrtime()
const parser = filename.endsWith('.bib') ? BibTeXParser : LaTeXParser
const tree = parser.parse(text)
const dt = process.hrtime(t0)
const timeTaken = dt[0] + dt[1] * 1e-9
let errorCount = 0
let nodeCount = 0
tree.iterate({
enter: syntaxNodeRef => {
nodeCount++
if (syntaxNodeRef.type.isError) {
errorCount++
}
},
})
if (!coverage) logTree(tree, text)
return { nodeCount, errorCount, timeTaken, bytes: text.length }
}
let totalErrors = 0
let totalTime = 0
let totalBytes = 0
for (const file of files) {
const { nodeCount, errorCount, timeTaken, bytes } = parseFile(file)
const errorRate = Math.round((100 * errorCount) / nodeCount)
totalErrors += errorCount
totalTime += timeTaken
totalBytes += bytes
reportErrorCounts(
`${errorCount} errors`.padStart(12) +
`${nodeCount} nodes`.padStart(12) +
`(${errorRate}%)`.padStart(6) +
`${(1000 * timeTaken).toFixed(1)} ms`.padStart(8) +
`${(bytes / 1024).toFixed(1)} KB`.padStart(8) +
` ${file}\n`
)
}
const timeInMilliseconds = 1000 * totalTime
const hundredKBs = totalBytes / (100 * 1024)
reportErrorCounts(
`\ntotal errors ${totalErrors}, performance ${(
timeInMilliseconds / hundredKBs
).toFixed(1)} ms/100KB \n`
)
if (totalErrors > 0) {
process.exit(1) // return non-zero exit status for tests
}

View File

@@ -0,0 +1,163 @@
import { parser } from '../../frontend/js/features/source-editor/lezer-latex/latex.mjs'
import * as fs from 'node:fs'
import * as path from 'node:path'
import { fileURLToPath } from 'node:url'
import { TreeFragment } from '@lezer/common'
import minimist from 'minimist'
import { seed, random } from './random.mjs'
const argv = minimist(process.argv.slice(2))
const NUMBER_OF_OPS = argv.ops || 1000
const CSV_OUTPUT = argv.csv || false
const SEED = argv.seed
if (SEED) {
seed(SEED)
}
const __dirname = path.dirname(fileURLToPath(import.meta.url))
const examplesDir = path.join(
__dirname,
'../../test/unit/src/LezerLatex/examples'
)
const folder = examplesDir
for (const file of fs.readdirSync(folder).sort()) {
if (!/\.tex$/.test(file)) continue
const name = /^[^.]*/.exec(file)[0]
const content = fs.readFileSync(path.join(folder, file), 'utf8')
runPerformanceTests(name, content)
}
function runPerformanceTests(name, content) {
const insertEnd = writeTextAt(
content,
content.length,
content.substring(0, NUMBER_OF_OPS)
)
const insertBeginning = writeTextAt(
content,
0,
content.substring(0, NUMBER_OF_OPS)
)
const insertMiddle = writeTextAt(
content,
Math.floor(content.length / 2),
content.substring(0, NUMBER_OF_OPS)
)
const randomDelete = randomDeletions(content, NUMBER_OF_OPS)
const middleDelete = deletionsFromMiddle(content, NUMBER_OF_OPS)
const randomInsert = randomInsertions(content, NUMBER_OF_OPS)
if (CSV_OUTPUT) {
console.log(
[
name,
insertBeginning.average,
insertMiddle.average,
insertEnd.average,
randomInsert.average,
randomDelete.average,
middleDelete.average,
content.length,
].join(',')
)
} else {
console.log({
name,
insertAtEnd: insertEnd.average,
insertAtBeginning: insertBeginning.average,
insertAtMiddle: insertMiddle.average,
randomDelete: randomDelete.average,
middleDelete: middleDelete.average,
randomInsert: randomInsert.average,
docLength: content.length,
})
}
}
function timedChanges(document, changes, changeFn) {
let totalParseTime = 0
// Do a fresh parse to get TreeFragments
const initialTree = parser.parse(document)
let fragments = TreeFragment.addTree(initialTree)
let currentDoc = document
for (let i = 0; i < changes; ++i) {
const change = changeFn(currentDoc, i)
currentDoc = change.text
// Do a timed parse
const start = performance.now()
fragments = TreeFragment.applyChanges(fragments, [change.range])
const tree = parser.parse(currentDoc, fragments)
fragments = TreeFragment.addTree(tree, fragments)
const end = performance.now()
totalParseTime += end - start
}
return {
total: totalParseTime,
average: totalParseTime / changes,
ops: changes,
fragments: fragments.length,
}
}
// Write and parse after every character insertion
function writeTextAt(document, position, text) {
return timedChanges(document, text.length, (currentDoc, index) =>
insertAt(currentDoc, position + index, text[index])
)
}
function randomInsertions(document, num) {
return timedChanges(document, num, currentDoc =>
insertAt(currentDoc, Math.floor(random() * currentDoc.length), 'a')
)
}
function randomDeletions(document, num) {
return timedChanges(document, num, currentDoc =>
deleteAt(currentDoc, Math.floor(random() * currentDoc.length), 1)
)
}
function deletionsFromMiddle(document, num) {
const deletionPoint = Math.floor(document.length / 2)
const deletions = Math.min(num, deletionPoint - 1)
return timedChanges(document, deletions, (currentDoc, index) =>
deleteAt(currentDoc, deletionPoint - index, 1)
)
}
function insertAt(document, position, text) {
const start = document.substring(0, position)
const end = document.substring(position)
return {
text: start + text + end,
range: {
fromA: position,
toA: position,
fromB: position,
toB: position + text.length,
},
}
}
function deleteAt(document, position, length = 1) {
const start = document.substring(0, position)
const end = document.substring(position + length)
return {
text: start + end,
range: {
fromA: position,
toA: position + length,
fromB: position,
toB: position,
},
}
}

View File

@@ -0,0 +1,55 @@
# Script Runner
## Overview
The Script Runner wraps your script's main logic to automatically handle logging, status tracking (success/error), and progress updates. Script execution status can be viewed from "Script Logs" portal page.
## Features
- Automatically logs the start and end of your script.
- Records the final status ('success' or 'error').
- Provides a simple function (`trackProgress`) to your script for logging custom progress steps.
- Captures script parameters and basic environment details.
## Usage
1. **Import `scriptRunner`**.
2. **Define your script's main logic** as an `async` function that accepts `trackProgress` as its argument (can ignore `trackProgress` if you don't need to track progress).
3. **Call `scriptRunner`**, passing your function and any variables it needs.
4. **Check script execution status** by visiting the "Script Logs" portal page using the URL printed in the console output.
**Example:**
```javascript
// Import the script runner utility (adjust the path as needed)
import { scriptRunner } from './lib/ScriptRunner.mjs'
const subJobs = 30
/**
* Your script's main work goes here.
* It must be an async function and accept `trackProgress`.
* @param {(message: string) => void} trackProgress - Call this to log progress.
*/
async function main(trackProgress) {
for (let i = 0; i < subJobs; i++) {
await new Promise(resolve => setTimeout(() => resolve(), 1000))
await trackProgress(`Job in progress ${i + 1}/${subJobs}`)
}
await trackProgress('Job finished')
}
// Define any variables your script needs (optional)
const scriptVariables = {
subJobs,
}
// --- Execute the script using the runner with async/await ---
try {
await scriptRunner(main, scriptVariables)
process.exit()
} catch (error) {
console.error(error)
process.exit(1)
}
```

View File

@@ -0,0 +1,75 @@
import { ScriptLog } from '../../app/src/models/ScriptLog.mjs'
import Settings from '@overleaf/settings'
async function beforeScriptExecution(canonicalName, vars, scriptPath) {
let log = new ScriptLog({
canonicalName,
filePathAtVersion: scriptPath,
podName: process.env.OL_POD_NAME,
username: process.env.OL_USERNAME,
imageVersion: process.env.OL_IMAGE_VERSION,
vars,
})
log = await log.save()
console.log(
'\n==================================' +
'\n✨ Your script is running!' +
'\n📊 Track progress at:' +
`\n${Settings.adminUrl}/admin/script-log/${log._id}` +
'\n==================================\n'
)
return log._id
}
async function afterScriptExecution(logId, status) {
await ScriptLog.findByIdAndUpdate(logId, { status, endTime: new Date() })
}
/**
* @param {(trackProgress: (progress: string) => Promise<void>) => Promise<any>} main - Main function for the script
* @param {Object} vars - Variables to be used in the script
* @param {string} canonicalName - The canonical name of the script, default to filename
* @param {string} scriptPath - The file path of the script, default to process.argv[1]
* @returns {Promise<void>}
* @async
*/
export async function scriptRunner(
main,
vars = {},
canonicalName = process.argv[1].split('/').pop().split('.')[0],
scriptPath = process.argv[1]
) {
const isSaaS = Boolean(Settings.overleaf)
if (!isSaaS) {
await main(async message => {
console.warn(message)
})
return
}
const logId = await beforeScriptExecution(canonicalName, vars, scriptPath)
async function trackProgress(message) {
try {
console.warn(message)
await ScriptLog.findByIdAndUpdate(logId, {
$push: {
progressLogs: {
timestamp: new Date(),
message,
},
},
})
} catch (error) {
console.error('Error tracking progress:', error)
}
}
try {
await main(trackProgress)
} catch (error) {
await afterScriptExecution(logId, 'error')
throw error
}
await afterScriptExecution(logId, 'success')
}

View File

@@ -0,0 +1,120 @@
import { db } from '../app/src/infrastructure/mongodb.js'
import minimist from 'minimist'
import UserGetter from '../app/src/Features/User/UserGetter.js'
import fs from 'node:fs'
function usage() {
console.log(
'Usage: node lowercase_institution_user_ids.mjs -i <institution-id>'
)
console.log(
'Converts external user IDs to lowercase for all users in an institute'
)
console.log('Options:')
console.log(
' --institution-id, -i Institution ID to update'
)
console.log(
' --dry-run, -d Finds users with non-lowercase id but does not do any updates'
)
console.log(
' --file, -f A file that contains external user ids to be updated'
)
console.log(
' If not provided, the script will update all the users within the institution that have upper case external user id'
)
console.log(
' -h, --help Show this help message'
)
process.exit(0)
}
const {
'dry-run': dryRun,
'institution-id': providerId,
help,
file,
} = minimist(process.argv.slice(2), {
string: ['institution-id', 'file'],
boolean: ['dry-run', 'help'],
alias: {
'institution-id': 'i',
'dry-run': 'd',
help: 'h',
file: 'f',
},
default: {
'dry-run': true,
},
})
async function main() {
if (help || !providerId) {
usage()
}
let externalUserIdsToUpdate = null
if (file) {
const lines = fs.readFileSync(file, 'utf8').split('\n')
externalUserIdsToUpdate = new Set(lines)
}
const users = await UserGetter.promises.getSsoUsersAtInstitution(providerId, {
_id: 1,
samlIdentifiers: 1,
})
let userToUpdate = 0
let userUpdated = 0
for (const user of users) {
const matchingIdentifier = user.samlIdentifiers.find(
u => u.providerId === providerId
)
const lowercaseId = matchingIdentifier.externalUserId.toLowerCase()
// skip if external user id is already in lower case
if (lowercaseId === matchingIdentifier.externalUserId) {
continue
}
// skip if an id file is provided but current external user id is not in the file
if (externalUserIdsToUpdate && !externalUserIdsToUpdate.has(lowercaseId)) {
continue
}
userToUpdate = userToUpdate + 1
console.log(
`${user._id},${matchingIdentifier.externalUserId},${lowercaseId}`
)
if (dryRun) {
continue
}
try {
await db.users.updateOne(
{ _id: user._id, 'samlIdentifiers.providerId': providerId },
{ $set: { 'samlIdentifiers.$.externalUserId': lowercaseId } }
)
userUpdated = userUpdated + 1
} catch (error) {
console.error(error)
}
}
if (dryRun) {
console.log(`DRY RUN: ${userToUpdate} users will be updated`)
} else {
console.log(`UPDATED: ${userUpdated}/${userToUpdate} users successfully`)
}
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,70 @@
import Adapter from '../migrations/lib/adapter.mjs'
import { promises as fs } from 'node:fs'
import { join, dirname } from 'node:path'
import { fileURLToPath } from 'node:url'
const __dirname = dirname(fileURLToPath(import.meta.url))
async function main(args) {
if (
!args ||
args.length === 0 ||
args.includes('help') ||
args.includes('--help') ||
args.includes('-h')
) {
console.log('')
console.log('usage: node ./scripts/mark_migration.mjs migration state')
console.log('')
console.log(' migration: name of migration file')
console.log(' state: executed | unexecuted')
console.log('')
return
}
const migration = args[0]
if (!migration) {
throw new Error('Error: migration must be supplied')
}
const state = args[1]
if (!state) {
throw new Error('Error: migration state must be supplied')
}
try {
await fs.access(join(__dirname, '../migrations', `${migration}.mjs`))
} catch (err) {
throw new Error(
`Error: migration ${migration} does not exist on disk: ${err}`
)
}
console.log(`Marking ${migration} as ${state}`)
process.env.SKIP_TAG_CHECK = 'true'
const adapter = new Adapter()
await adapter.connect()
switch (state) {
case 'executed':
await adapter.markExecuted(migration)
break
case 'unexecuted':
await adapter.unmarkExecuted(migration)
break
default:
throw new Error(`invalid state "${state}"`)
}
console.log('Done')
}
if (fileURLToPath(import.meta.url) === process.argv[1]) {
const args = process.argv.slice(2)
main(args)
.then(() => {
process.exit(0)
})
.catch(err => {
console.error(err)
process.exit(1)
})
}

View File

@@ -0,0 +1,102 @@
// dry run:
// node scripts/merge_group_subscription_members \
// --target [targetSubscriptionId] --source [sourceSubscriptionId]
//
// commit changes:
// node scripts/merge_group_subscription_members \
// --target [targetSubscriptionId] --source [sourceSubscriptionId] --commit
import { db, ObjectId } from '../app/src/infrastructure/mongodb.js'
import SubscriptionUpdater from '../app/src/Features/Subscription/SubscriptionUpdater.js'
import minimist from 'minimist'
const argv = minimist(process.argv.slice(2), {
string: ['target', 'source'],
boolean: ['commit'],
})
const { target, source, commit } = argv
async function getSubscription(subscriptionId) {
const projection = {
member_ids: 1,
membersLimit: 1,
groupPlan: 1,
teamName: 1,
}
return await db.subscriptions.findOne(
{
_id: subscriptionId,
},
{ projection }
)
}
async function main() {
if (!target) {
throw new Error('missing --target argument')
}
if (!source) {
throw new Error('missing --source argument')
}
if (!commit) {
console.log('Doing dry run without --commit')
}
const targetSubscription = await getSubscription(new ObjectId(target))
const sourceSubscription = await getSubscription(new ObjectId(source))
if (!targetSubscription) {
throw new Error('couldnt find target (to) subscription')
}
if (!sourceSubscription) {
throw new Error('couldnt find source (from) subscription')
}
console.log(
`\nTarget/destination subscription (${targetSubscription.member_ids.length} members) is:`,
targetSubscription
)
console.log(
`\nSource subscription (${sourceSubscription.member_ids.length} members) is:`,
sourceSubscription
)
if (!targetSubscription.groupPlan || !sourceSubscription.groupPlan) {
throw new Error('both subscriptions must be group subscriptions')
}
let addCount = 0
for (const member of sourceSubscription.member_ids) {
const exists = targetSubscription.member_ids.find(m => {
return m.toString() === member.toString()
})
if (!exists) {
console.log(`adding ${member} to target ${targetSubscription._id}`)
addCount += 1
if (commit) {
await SubscriptionUpdater.promises.addUserToGroup(
targetSubscription._id,
member
)
}
} else {
console.log(`skipping ${member}, already exists in target`)
}
}
console.log(`Added ${addCount} users to target subscription`)
if (!commit) {
console.log('Run again with --commit to make the above changes')
}
}
try {
await main()
console.error('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}

View File

@@ -0,0 +1,162 @@
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
import { promiseMapWithLimit, promisify } from '@overleaf/promise-utils'
import { db, ObjectId } from '../app/src/infrastructure/mongodb.js'
import _ from 'lodash'
import { fileURLToPath } from 'node:url'
const sleep = promisify(setTimeout)
async function main(options) {
if (!options) {
options = {}
}
_.defaults(options, {
dryRun: process.env.DRY_RUN !== 'false',
projectId: process.env.PROJECT_ID,
userId: process.env.USER_ID,
skipUsersMigration: process.env.SKIP_USERS_MIGRATION === 'true',
writeConcurrency: parseInt(process.env.WRITE_CONCURRENCY, 10) || 10,
letUserDoubleCheckInputsFor: parseInt(
process.env.LET_USER_DOUBLE_CHECK_INPUTS_FOR || 10 * 1000,
10
),
})
await letUserDoubleCheckInputs(options)
if (options.projectId) {
console.log('migrating projectId=' + options.projectId)
const project = await db.projects.findOne(
{ _id: new ObjectId(options.projectId) },
{ _id: 1, auditLog: 1 }
)
if (!project || !project.auditLog) {
console.error('unable to process project', project)
return
}
await processProjectsBatch([project], options)
} else if (options.userId) {
console.log('migrating userId=' + options.userId)
const user = await db.users.findOne(
{ _id: new ObjectId(options.userId) },
{ _id: 1, auditLog: 1 }
)
if (!user || !user.auditLog) {
console.error('unable to process user', user)
return
}
await processUsersBatch([user], options)
} else {
if (!options.skipUsersMigration) {
await batchedUpdate(
db.users,
{ auditLog: { $exists: true } },
async users => {
await processUsersBatch(users, options)
},
{ _id: 1, auditLog: 1 }
)
}
// most projects are processed after its owner has been processed, but only those
// users with an existing `auditLog` have been taken into consideration, leaving
// some projects orphan. This batched update processes all remaining projects.
await batchedUpdate(
db.projects,
{ auditLog: { $exists: true } },
async projects => {
await processProjectsBatch(projects, options)
},
{ _id: 1, auditLog: 1 }
)
}
}
async function processUsersBatch(users, options) {
if (!users || users.length <= 0) {
return
}
const entries = users
.map(user => user.auditLog.map(log => ({ ...log, userId: user._id })))
.flat()
if (!options.dryRun && entries?.length > 0) {
await db.userAuditLogEntries.insertMany(entries)
}
if (!options.dryRun) {
const userIds = users.map(user => user._id)
await db.users.updateMany(
{ _id: { $in: userIds } },
{ $unset: { auditLog: 1 } }
)
}
await promiseMapWithLimit(options.writeConcurrency, users, async user => {
const projects = await db.projects
.find(
{ owner_ref: user._id, auditLog: { $exists: true } },
{ _id: 1, auditLog: 1 }
)
.toArray()
await processProjectsBatch(projects, options)
})
}
async function processProjectsBatch(projects, options) {
if (!projects || projects.length <= 0) {
return
}
const entries = projects
.map(project =>
project.auditLog.map(log => ({ ...log, projectId: project._id }))
)
.flat()
if (!options.dryRun && entries?.length > 0) {
await db.projectAuditLogEntries.insertMany(entries)
}
if (!options.dryRun) {
const projectIds = projects.map(project => project._id)
await db.projects.updateMany(
{ _id: { $in: projectIds } },
{ $unset: { auditLog: 1 } }
)
}
}
async function letUserDoubleCheckInputs(options) {
const allOptions = {
...options,
// batchedUpdate() environment variables
BATCH_DESCENDING: process.env.BATCH_DESCENDING,
BATCH_SIZE: process.env.BATCH_SIZE,
VERBOSE_LOGGING: process.env.VERBOSE_LOGGING,
BATCH_LAST_ID: process.env.BATCH_LAST_ID,
BATCH_RANGE_END: process.env.BATCH_RANGE_END,
SKIP_USERS_MIGRATION: process.env.SKIP_USERS_MIGRATION,
}
console.error('Options:', JSON.stringify(allOptions, null, 2))
console.error(
'Waiting for you to double check inputs for',
options.letUserDoubleCheckInputsFor,
'ms'
)
await sleep(options.letUserDoubleCheckInputsFor)
}
export default main
if (fileURLToPath(import.meta.url) === process.argv[1]) {
try {
await main()
console.log('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}
}

View File

@@ -0,0 +1,43 @@
import {
db,
READ_PREFERENCE_SECONDARY,
} from '../../app/src/infrastructure/mongodb.js'
import { hashSecret } from '../../modules/oauth2-server/app/src/SecretsHelper.js'
async function main() {
console.log('Hashing client secrets...')
await hashSecrets(db.oauthApplications, 'clientSecret')
console.log('Hashing access tokens...')
await hashSecrets(db.oauthAccessTokens, 'accessToken')
console.log('Hashing refresh tokens...')
await hashSecrets(db.oauthAccessTokens, 'refreshToken')
console.log('Hashing authorization codes...')
await hashSecrets(db.oauthAuthorizationCodes, 'authorizationCode')
}
async function hashSecrets(collection, field) {
const cursor = collection.find(
{
[field]: /^(?!v1\.)/,
},
{
projection: { _id: 1, [field]: 1 },
readPreference: READ_PREFERENCE_SECONDARY,
}
)
let hashedCount = 0
for await (const doc of cursor) {
const hash = hashSecret(doc[field])
await collection.updateOne({ _id: doc._id }, { $set: { [field]: hash } })
hashedCount++
}
console.log(`${hashedCount} secrets hashed`)
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,96 @@
import minimist from 'minimist'
import { db } from '../../app/src/infrastructure/mongodb.js'
import { hashSecret } from '../../modules/oauth2-server/app/src/SecretsHelper.js'
async function main() {
const opts = parseArgs()
if (opts.accessToken == null) {
console.error('Missing --token option')
process.exit(1)
}
if (opts.refreshToken == null) {
console.error('Missing --refresh-token option')
process.exit(1)
}
if (opts.oauthApplication_id == null) {
console.error('Missing --application-id option')
process.exit(1)
}
if (opts.user_id == null) {
console.error('Missing --user-id option')
process.exit(1)
}
if (opts.scope == null) {
console.error('Missing --scope option')
process.exit(1)
}
if (opts.accessTokenExpiresAt == null) {
console.error('Missing --expiry-date option')
process.exit(1)
}
await insertToken(opts)
}
async function insertToken(opts) {
const token = {
...opts,
accessToken: hashSecret(opts.accessToken),
refreshToken: hashSecret(opts.refreshToken),
accessTokenExpiresAt: new Date(opts.accessTokenExpiresAt),
createdAt: new Date(),
}
await db.oauthAccessTokens.insertOne(token)
}
function parseArgs() {
const args = minimist(process.argv.slice(2), {
boolean: ['help'],
})
if (args.help) {
usage()
process.exit(0)
}
if (args._.length !== 0) {
usage()
process.exit(1)
}
return {
accessToken: args.token,
oauthApplication_id: args['application-id'],
refreshToken: args['refresh-token'],
user_id: args['user-id'],
scope: args.scope,
accessTokenExpiresAt: args['expiry-date'],
}
}
function usage() {
console.error(`Usage: create_token.js [OPTS...]
Creates an OAuth access token
Options:
--application-id ID for the OAuth application
--user-id ID of the user this token belongs to
--token Access token
--refresh-token Refresh token
--scope Accepted scope
--expiry-date Token expiry date
`)
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,129 @@
import minimist from 'minimist'
import mongodb from 'mongodb-legacy'
import { db } from '../../app/src/infrastructure/mongodb.js'
import { hashSecret } from '../../modules/oauth2-server/app/src/SecretsHelper.js'
const { ObjectId } = mongodb
async function main() {
const opts = parseArgs()
const application = await getApplication(opts.id)
if (application == null) {
console.log(
`Application ${opts.id} is not registered. Creating a new configuration.`
)
if (opts.name == null) {
console.error('Missing --name option')
process.exit(1)
}
if (opts.secret == null) {
console.error('Missing --secret option')
process.exit(1)
}
} else {
console.log(`Updating configuration for client: ${application.name}`)
if (opts.mongoId != null) {
console.error('Cannot change Mongo ID for an existing client')
process.exit(1)
}
}
await upsertApplication(opts)
}
async function getApplication(clientId) {
return await db.oauthApplications.findOne({ id: clientId })
}
async function upsertApplication(opts) {
const key = { id: opts.id }
const defaults = {}
const updates = {}
if (opts.name != null) {
updates.name = opts.name
}
if (opts.secret != null) {
updates.clientSecret = hashSecret(opts.secret)
}
if (opts.grants != null) {
updates.grants = opts.grants
} else {
defaults.grants = []
}
if (opts.scopes != null) {
updates.scopes = opts.scopes
} else {
defaults.scopes = []
}
if (opts.redirectUris != null) {
updates.redirectUris = opts.redirectUris
} else {
defaults.redirectUris = []
}
if (opts.mongoId != null) {
defaults._id = new ObjectId(opts.mongoId)
}
await db.oauthApplications.updateOne(
key,
{
$setOnInsert: { ...key, ...defaults },
$set: updates,
},
{ upsert: true }
)
}
function parseArgs() {
const args = minimist(process.argv.slice(2), {
boolean: ['help'],
})
if (args.help) {
usage()
process.exit(0)
}
if (args._.length !== 1) {
usage()
process.exit(1)
}
return {
id: args._[0],
mongoId: args['mongo-id'],
name: args.name,
secret: args.secret,
scopes: toArray(args.scope),
grants: toArray(args.grant),
redirectUris: toArray(args['redirect-uri']),
}
}
function usage() {
console.error(`Usage: register_client.js [OPTS...] CLIENT_ID
Creates or updates an OAuth client configuration
Options:
--name Descriptive name for the OAuth client (required for creation)
--secret Client secret (required for creation)
--scope Accepted scope (can be given more than once)
--grant Accepted grant type (can be given more than once)
--redirect-uri Accepted redirect URI (can be given more than once)
--mongo-id Mongo ID to use if the configuration is created (optional)
`)
}
function toArray(value) {
if (value != null && !Array.isArray(value)) {
return [value]
} else {
return value
}
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,121 @@
import minimist from 'minimist'
import {
db,
READ_PREFERENCE_SECONDARY,
} from '../../app/src/infrastructure/mongodb.js'
async function main() {
const opts = parseArgs()
const application = await getApplication(opts.clientId)
if (application == null) {
console.error(`Client configuration not found: ${opts.clientId}`)
process.exit(1)
}
if (opts.commit) {
console.log(
`Preparing to remove OAuth client configuration: ${application.name}.`
)
const deletedAccessTokens = await deleteAccessTokens(application._id)
console.log(`Deleted ${deletedAccessTokens} access tokens`)
const deletedAuthorizationCodes = await deleteAuthorizationCodes(
application._id
)
console.log(`Deleted ${deletedAuthorizationCodes} authorization codes`)
await deleteApplication(application._id)
console.log('Deleted OAuth client configuration')
} else {
console.log(
`Preparing to remove OAuth client configuration (dry run): ${application.name}.`
)
const accessTokenCount = await countAccessTokens(application._id)
const authorizationCodeCount = await countAuthorizationCodes(
application._id
)
console.log(
`This would delete ${accessTokenCount} access tokens and ${authorizationCodeCount} authorization codes.`
)
console.log('This was a dry run. Rerun with --commit to proceed.')
}
}
async function getApplication(clientId) {
return await db.oauthApplications.findOne({ id: clientId })
}
async function countAccessTokens(applicationId) {
return await db.oauthAccessTokens.count(
{
oauthApplication_id: applicationId,
},
{ readPreference: READ_PREFERENCE_SECONDARY }
)
}
async function countAuthorizationCodes(applicationId) {
return await db.oauthAuthorizationCodes.count(
{
oauthApplication_id: applicationId,
},
{ readPreference: READ_PREFERENCE_SECONDARY }
)
}
async function deleteAccessTokens(applicationId) {
const res = await db.oauthAccessTokens.deleteMany({
oauthApplication_id: applicationId,
})
return res.deletedCount
}
async function deleteAuthorizationCodes(applicationId) {
const res = await db.oauthAuthorizationCodes.deleteMany({
oauthApplication_id: applicationId,
})
return res.deletedCount
}
async function deleteApplication(applicationId) {
await db.oauthApplications.deleteOne({ _id: applicationId })
}
function parseArgs() {
const args = minimist(process.argv.slice(2), {
boolean: ['help', 'commit'],
})
if (args.help) {
usage()
process.exit(0)
}
if (args._.length !== 1) {
usage()
process.exit(1)
}
return {
clientId: args._[0],
commit: args.commit,
}
}
function usage() {
console.error(`Usage: remove_client.js [OPTS...] CLIENT_ID
Removes an OAuth client configuration and all associated tokens and
authorization codes
Options:
--commit Really delete the OAuth application (will do a dry run by default)
`)
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,2 @@
node_modules
output

View File

@@ -0,0 +1,17 @@
A nodejs tool for reading plans prices from csv files and creating JSON objects.
Run `npm install` in order to install the dependencies.
The scripts will put the output results into the `output` folder.
### Create localized and group plan pricing
_Command_ `node plans.mjs -f fileName -o outputdir` - generates two json files:
- `localizedPlanPricing.json` for `/services/web/config/settings.overrides.saas.js`
- `groups.json` for `/services/web/app/templates/plans/groups.json`
The input file can be in `.csv` or `.json` format
- `.csv` csv format
- `.json` json format from the `recurly_prices.mjs --download` script output

View File

@@ -0,0 +1,546 @@
{
"educational": {
"professional": {
"2": {
"AUD": 321,
"BRL": 699,
"CAD": 314,
"CHF": 279,
"CLP": 168693,
"COP": 552930,
"DKK": 1665,
"EUR": 258,
"GBP": 223,
"INR": 6719,
"MXN": 4129,
"NOK": 2008,
"NZD": 321,
"PEN": 671,
"SEK": 2008,
"SGD": 363,
"USD": 279
},
"3": {
"AUD": 321,
"BRL": 699,
"CAD": 314,
"CHF": 279,
"CLP": 168693,
"COP": 552930,
"DKK": 1665,
"EUR": 258,
"GBP": 223,
"INR": 6719,
"MXN": 4129,
"NOK": 2008,
"NZD": 321,
"PEN": 671,
"SEK": 2008,
"SGD": 363,
"USD": 279
},
"4": {
"AUD": 321,
"BRL": 699,
"CAD": 314,
"CHF": 279,
"CLP": 168693,
"COP": 552930,
"DKK": 1665,
"EUR": 258,
"GBP": 223,
"INR": 6719,
"MXN": 4129,
"NOK": 2008,
"NZD": 321,
"PEN": 671,
"SEK": 2008,
"SGD": 363,
"USD": 279
},
"5": {
"AUD": 321,
"BRL": 699,
"CAD": 314,
"CHF": 279,
"CLP": 168693,
"COP": 552930,
"DKK": 1665,
"EUR": 258,
"GBP": 223,
"INR": 6719,
"MXN": 4129,
"NOK": 2008,
"NZD": 321,
"PEN": 671,
"SEK": 2008,
"SGD": 363,
"USD": 279
},
"10": {
"AUD": 179,
"BRL": 389,
"CAD": 175,
"CHF": 155,
"CLP": 93986,
"COP": 308061,
"DKK": 927,
"EUR": 143,
"GBP": 124,
"INR": 3743,
"MXN": 2300,
"NOK": 1118,
"NZD": 179,
"PEN": 374,
"SEK": 1118,
"SGD": 202,
"USD": 155
},
"20": {
"AUD": 165,
"BRL": 359,
"CAD": 161,
"CHF": 143,
"CLP": 86756,
"COP": 284364,
"DKK": 856,
"EUR": 132,
"GBP": 114,
"INR": 3455,
"MXN": 2123,
"NOK": 1032,
"NZD": 165,
"PEN": 345,
"SEK": 1032,
"SGD": 186,
"USD": 143
},
"50": {
"AUD": 151,
"BRL": 329,
"CAD": 148,
"CHF": 131,
"CLP": 79526,
"COP": 260667,
"DKK": 785,
"EUR": 121,
"GBP": 105,
"INR": 3167,
"MXN": 1946,
"NOK": 946,
"NZD": 151,
"PEN": 316,
"SEK": 946,
"SGD": 171,
"USD": 131
}
},
"collaborator": {
"2": {
"AUD": 167,
"BRL": 349,
"CAD": 160,
"CHF": 139,
"CLP": 77693,
"COP": 272930,
"DKK": 839,
"EUR": 125,
"GBP": 111,
"INR": 3219,
"MXN": 2029,
"NOK": 1014,
"NZD": 167,
"PEN": 321,
"SEK": 1014,
"SGD": 181,
"USD": 139
},
"3": {
"AUD": 167,
"BRL": 349,
"CAD": 160,
"CHF": 139,
"CLP": 77693,
"COP": 272930,
"DKK": 839,
"EUR": 125,
"GBP": 111,
"INR": 3219,
"MXN": 2029,
"NOK": 1014,
"NZD": 167,
"PEN": 321,
"SEK": 1014,
"SGD": 181,
"USD": 139
},
"4": {
"AUD": 167,
"BRL": 349,
"CAD": 160,
"CHF": 139,
"CLP": 77693,
"COP": 272930,
"DKK": 839,
"EUR": 125,
"GBP": 111,
"INR": 3219,
"MXN": 2029,
"NOK": 1014,
"NZD": 167,
"PEN": 321,
"SEK": 1014,
"SGD": 181,
"USD": 139
},
"5": {
"AUD": 167,
"BRL": 349,
"CAD": 160,
"CHF": 139,
"CLP": 77693,
"COP": 272930,
"DKK": 839,
"EUR": 125,
"GBP": 111,
"INR": 3219,
"MXN": 2029,
"NOK": 1014,
"NZD": 167,
"PEN": 321,
"SEK": 1014,
"SGD": 181,
"USD": 139
},
"10": {
"AUD": 93,
"BRL": 194,
"CAD": 89,
"CHF": 77,
"CLP": 43286,
"COP": 152061,
"DKK": 467,
"EUR": 69,
"GBP": 62,
"INR": 1793,
"MXN": 1130,
"NOK": 565,
"NZD": 93,
"PEN": 179,
"SEK": 565,
"SGD": 101,
"USD": 77
},
"20": {
"AUD": 86,
"BRL": 179,
"CAD": 82,
"CHF": 71,
"CLP": 39956,
"COP": 140364,
"DKK": 431,
"EUR": 64,
"GBP": 57,
"INR": 1655,
"MXN": 1043,
"NOK": 521,
"NZD": 86,
"PEN": 165,
"SEK": 521,
"SGD": 93,
"USD": 71
},
"50": {
"AUD": 78,
"BRL": 164,
"CAD": 75,
"CHF": 65,
"CLP": 36626,
"COP": 128667,
"DKK": 395,
"EUR": 59,
"GBP": 52,
"INR": 1517,
"MXN": 956,
"NOK": 478,
"NZD": 78,
"PEN": 151,
"SEK": 478,
"SGD": 85,
"USD": 65
}
}
},
"enterprise": {
"professional": {
"2": {
"AUD": 321,
"BRL": 699,
"CAD": 314,
"CHF": 499,
"CLP": 168693,
"COP": 552930,
"DKK": 1665,
"EUR": 258,
"GBP": 223,
"INR": 6719,
"MXN": 4129,
"NOK": 2008,
"NZD": 321,
"PEN": 671,
"SEK": 2008,
"SGD": 363,
"USD": 279
},
"3": {
"AUD": 321,
"BRL": 699,
"CAD": 314,
"CHF": 499,
"CLP": 168693,
"COP": 552930,
"DKK": 1665,
"EUR": 258,
"GBP": 223,
"INR": 6719,
"MXN": 4129,
"NOK": 2008,
"NZD": 321,
"PEN": 671,
"SEK": 2008,
"SGD": 363,
"USD": 279
},
"4": {
"AUD": 321,
"BRL": 699,
"CAD": 314,
"CHF": 499,
"CLP": 168693,
"COP": 552930,
"DKK": 1665,
"EUR": 258,
"GBP": 223,
"INR": 6719,
"MXN": 4129,
"NOK": 2008,
"NZD": 321,
"PEN": 671,
"SEK": 2008,
"SGD": 363,
"USD": 279
},
"5": {
"AUD": 321,
"BRL": 699,
"CAD": 314,
"CHF": 499,
"CLP": 168693,
"COP": 552930,
"DKK": 1665,
"EUR": 258,
"GBP": 223,
"INR": 6719,
"MXN": 4129,
"NOK": 2008,
"NZD": 321,
"PEN": 671,
"SEK": 2008,
"SGD": 363,
"USD": 279
},
"10": {
"AUD": 298,
"BRL": 649,
"CAD": 291,
"CHF": 259,
"CLP": 156643,
"COP": 513435,
"DKK": 1546,
"EUR": 239,
"GBP": 207,
"INR": 6239,
"MXN": 3834,
"NOK": 1864,
"NZD": 298,
"PEN": 623,
"SEK": 1864,
"SGD": 337,
"USD": 259
},
"20": {
"AUD": 275,
"BRL": 599,
"CAD": 269,
"CHF": 239,
"CLP": 144594,
"COP": 473940,
"DKK": 1427,
"EUR": 221,
"GBP": 191,
"INR": 5759,
"MXN": 3539,
"NOK": 1721,
"NZD": 275,
"PEN": 575,
"SEK": 1721,
"SGD": 311,
"USD": 239
},
"50": {
"AUD": 252,
"BRL": 549,
"CAD": 246,
"CHF": 219,
"CLP": 132544,
"COP": 400000,
"DKK": 1308,
"EUR": 202,
"GBP": 175,
"INR": 5279,
"MXN": 3244,
"NOK": 1577,
"NZD": 252,
"PEN": 527,
"SEK": 1577,
"SGD": 285,
"USD": 219
}
},
"collaborator": {
"2": {
"AUD": 167,
"BRL": 349,
"CAD": 160,
"CHF": 139,
"CLP": 77693,
"COP": 272930,
"DKK": 839,
"EUR": 125,
"GBP": 111,
"INR": 3219,
"MXN": 2029,
"NOK": 1014,
"NZD": 167,
"PEN": 321,
"SEK": 1014,
"SGD": 181,
"USD": 139
},
"3": {
"AUD": 167,
"BRL": 349,
"CAD": 160,
"CHF": 139,
"CLP": 77693,
"COP": 272930,
"DKK": 839,
"EUR": 125,
"GBP": 111,
"INR": 3219,
"MXN": 2029,
"NOK": 1014,
"NZD": 167,
"PEN": 321,
"SEK": 1014,
"SGD": 181,
"USD": 139
},
"4": {
"AUD": 167,
"BRL": 349,
"CAD": 160,
"CHF": 139,
"CLP": 77693,
"COP": 272930,
"DKK": 839,
"EUR": 125,
"GBP": 111,
"INR": 3219,
"MXN": 2029,
"NOK": 1014,
"NZD": 167,
"PEN": 321,
"SEK": 1014,
"SGD": 181,
"USD": 139
},
"5": {
"AUD": 167,
"BRL": 349,
"CAD": 160,
"CHF": 139,
"CLP": 77693,
"COP": 272930,
"DKK": 839,
"EUR": 125,
"GBP": 111,
"INR": 3219,
"MXN": 2029,
"NOK": 1014,
"NZD": 167,
"PEN": 321,
"SEK": 1014,
"SGD": 181,
"USD": 139
},
"10": {
"AUD": 155,
"BRL": 324,
"CAD": 148,
"CHF": 129,
"CLP": 72143,
"COP": 253435,
"DKK": 779,
"EUR": 116,
"GBP": 103,
"INR": 2989,
"MXN": 1884,
"NOK": 941,
"NZD": 155,
"PEN": 298,
"SEK": 941,
"SGD": 168,
"USD": 129
},
"20": {
"AUD": 143,
"BRL": 299,
"CAD": 137,
"CHF": 119,
"CLP": 66594,
"COP": 233940,
"DKK": 719,
"EUR": 107,
"GBP": 95,
"INR": 2759,
"MXN": 1739,
"NOK": 869,
"NZD": 143,
"PEN": 275,
"SEK": 869,
"SGD": 155,
"USD": 119
},
"50": {
"AUD": 131,
"BRL": 274,
"CAD": 125,
"CHF": 109,
"CLP": 61044,
"COP": 214445,
"DKK": 659,
"EUR": 98,
"GBP": 87,
"INR": 2529,
"MXN": 1594,
"NOK": 796,
"NZD": 131,
"PEN": 252,
"SEK": 796,
"SGD": 142,
"USD": 109
}
}
}
}

View File

@@ -0,0 +1,65 @@
{
"name": "prices",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "prices",
"version": "1.0.0",
"license": "ISC",
"devDependencies": {
"csv": "^6.2.10",
"minimist": "^1.2.8"
}
},
"node_modules/csv": {
"version": "6.2.10",
"resolved": "https://registry.npmjs.org/csv/-/csv-6.2.10.tgz",
"integrity": "sha512-aO1dkeMlzWHvtKOdiTeqt7G4SwF/JtJ2fYNOMtlrGiKERD+ASq+QZelGqpFCzHGvZSIhzDtwqRVEgPMkme2BQg==",
"dev": true,
"dependencies": {
"csv-generate": "^4.2.4",
"csv-parse": "^5.3.8",
"csv-stringify": "^6.3.2",
"stream-transform": "^3.2.4"
},
"engines": {
"node": ">= 0.1.90"
}
},
"node_modules/csv-generate": {
"version": "4.2.4",
"resolved": "https://registry.npmjs.org/csv-generate/-/csv-generate-4.2.4.tgz",
"integrity": "sha512-PvEwuRksnW30I1DlZnVuCVMOiff7ZoUXOCMQJ1c0DPKXQkIC87hWvqJ4ztO70ceQMQER1hp/Lajo8KIy7at1PA==",
"dev": true
},
"node_modules/csv-parse": {
"version": "5.3.8",
"resolved": "https://registry.npmjs.org/csv-parse/-/csv-parse-5.3.8.tgz",
"integrity": "sha512-ird8lzMv9I64oqIVIHdaTbT7Yr55n2C/Nv6m1LxO7nddLEeI67468VQ9Ik+r6lwYbK9kTE1oSqAVcVKc/Uqx6g==",
"dev": true
},
"node_modules/csv-stringify": {
"version": "6.3.2",
"resolved": "https://registry.npmjs.org/csv-stringify/-/csv-stringify-6.3.2.tgz",
"integrity": "sha512-dD9gfbxNKa5v90NHiE2Qd6F9I52GtJjGTfowwzqiNDZD/+NPW3h19d2Nvv311a8QUW11rYRobco27nvVAnCrLw==",
"dev": true
},
"node_modules/minimist": {
"version": "1.2.8",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz",
"integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==",
"dev": true,
"funding": {
"url": "https://github.com/sponsors/ljharb"
}
},
"node_modules/stream-transform": {
"version": "3.2.4",
"resolved": "https://registry.npmjs.org/stream-transform/-/stream-transform-3.2.4.tgz",
"integrity": "sha512-YoZm/eoh6f/MH7uHkq+NK3fx3JkyXbck7FcTpJavwEUg0aMINqMPkDj5uNW0CoRy7c/2NSJm0HvoyFv6dVauPA==",
"dev": true
}
}
}

View File

@@ -0,0 +1,15 @@
{
"name": "prices",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC",
"devDependencies": {
"csv": "^6.2.10",
"minimist": "^1.2.8"
}
}

View File

@@ -0,0 +1,235 @@
// Creates data for localizedPlanPricing object in settings.overrides.saas.js
// and group plans object in app/templates/plans/groups.json
// https://github.com/import-js/eslint-plugin-import/issues/1810
// eslint-disable-next-line import/no-unresolved
import * as csv from 'csv/sync'
import fs from 'node:fs'
import path from 'node:path'
import minimist from 'minimist'
import { fileURLToPath } from 'node:url'
const __dirname = path.dirname(fileURLToPath(import.meta.url))
function readCSVFile(fileName) {
// Pick the csv file
const filePath = path.resolve(__dirname, fileName)
const input = fs.readFileSync(filePath, 'utf8')
const rawRecords = csv.parse(input, { columns: true })
return rawRecords
}
function readJSONFile(fileName) {
const filePath = path.resolve(__dirname, fileName)
const file = fs.readFileSync(filePath)
const plans = JSON.parse(file)
// convert the plans JSON from recurly to an array of
// objects matching the spreadsheet format
const result = []
for (const plan of plans) {
const newRow = { plan_code: plan.code }
for (const price of plan.currencies) {
newRow[price.currency] = price.unitAmount
}
result.push(newRow)
}
return result
}
// Mapping of [output_keys]:[actual_keys]
const plansMap = {
student: 'student',
personal: 'paid-personal',
collaborator: 'collaborator',
professional: 'professional',
}
const currencies = [
'AUD',
'BRL',
'CAD',
'CHF',
'CLP',
'COP',
'DKK',
'EUR',
'GBP',
'INR',
'MXN',
'NOK',
'NZD',
'PEN',
'SEK',
'SGD',
'USD',
]
/**
* This is duplicated in:
* - services/web/app/src/Features/Subscription/SubscriptionHelper.js
* - services/web/modules/subscriptions/frontend/js/pages/plans-new-design/group-member-picker/group-plan-pricing.js
*/
function roundUpToNearest5Cents(number) {
return Math.ceil(number * 20) / 20
}
function generatePlans(workSheetJSON) {
// localizedPlanPricing object for settings.overrides.saas.js
const localizedPlanPricing = {}
for (const currency of currencies) {
localizedPlanPricing[currency] = {
free: {
monthly: 0,
annual: 0,
},
}
for (const [outputKey, actualKey] of Object.entries(plansMap)) {
const monthlyPlan = workSheetJSON.find(
data => data.plan_code === actualKey
)
if (!monthlyPlan) throw new Error(`Missing plan: ${actualKey}`)
if (!(currency in monthlyPlan))
throw new Error(
`Missing currency "${currency}" for plan "${actualKey}"`
)
const actualKeyAnnual = `${actualKey}-annual`
const annualPlan = workSheetJSON.find(
data => data.plan_code === actualKeyAnnual
)
if (!annualPlan) throw new Error(`Missing plan: ${actualKeyAnnual}`)
if (!(currency in annualPlan))
throw new Error(
`Missing currency "${currency}" for plan "${actualKeyAnnual}"`
)
const monthly = Number(monthlyPlan[currency])
const monthlyTimesTwelve = Number(monthlyPlan[currency] * 12)
const annual = Number(annualPlan[currency])
const annualDividedByTwelve = Number(
roundUpToNearest5Cents(annualPlan[currency] / 12)
)
localizedPlanPricing[currency] = {
...localizedPlanPricing[currency],
[outputKey]: {
monthly,
monthlyTimesTwelve,
annual,
annualDividedByTwelve,
},
}
}
}
return localizedPlanPricing
}
function generateGroupPlans(workSheetJSON) {
// group plans object for app/templates/plans/groups.json
const groupPlans = workSheetJSON.filter(data =>
data.plan_code.startsWith('group')
)
const sizes = ['2', '3', '4', '5', '10', '20', '50']
const additionalLicenseAddOnLegacyPricesFilePath = path.resolve(
__dirname,
'additional-license-add-on-legacy-prices.json'
)
const additionalLicenseAddOnLegacyPricesFile = fs.readFileSync(
additionalLicenseAddOnLegacyPricesFilePath
)
const additionalLicenseAddOnLegacyPrices = JSON.parse(
additionalLicenseAddOnLegacyPricesFile
)
const result = {}
for (const type1 of ['educational', 'enterprise']) {
result[type1] = {}
for (const type2 of ['professional', 'collaborator']) {
result[type1][type2] = {}
for (const currency of currencies) {
result[type1][type2][currency] = {}
for (const size of sizes) {
const planCode = `group_${type2}_${size}_${type1}`
const plan = groupPlans.find(data => data.plan_code === planCode)
if (!plan) throw new Error(`Missing plan: ${planCode}`)
result[type1][type2][currency][size] = {
price_in_cents: plan[currency] * 100,
}
const additionalLicenseAddOnLegacyPrice =
additionalLicenseAddOnLegacyPrices[type1][type2][size]?.[currency]
if (additionalLicenseAddOnLegacyPrice) {
Object.assign(result[type1][type2][currency][size], {
additional_license_legacy_price_in_cents:
additionalLicenseAddOnLegacyPrice * 100,
})
}
}
}
}
}
return result
}
const argv = minimist(process.argv.slice(2), {
string: ['output', 'file'],
alias: { o: 'output', f: 'file' },
})
let input
if (argv.file) {
const ext = path.extname(argv.file)
switch (ext) {
case '.csv':
input = readCSVFile(argv.file)
break
case '.json':
input = readJSONFile(argv.file)
break
default:
console.log('Invalid file type: must be csv or json')
}
} else {
console.log('usage: node plans.mjs -f <file.csv|file.json> -o <dir>')
process.exit(1)
}
// removes quotes from object keys
const formatJS = obj =>
JSON.stringify(obj, null, 2).replace(/"([^"]+)":/g, '$1:')
const formatJSON = obj => JSON.stringify(obj, null, 2)
function writeFile(outputFile, data) {
console.log(`Writing ${outputFile}`)
fs.writeFileSync(outputFile, data)
}
const localizedPlanPricing = generatePlans(input)
const groupPlans = generateGroupPlans(input)
if (argv.output) {
const dir = argv.output
// check if output directory exists
if (!fs.existsSync(dir)) {
console.log(`Creating output directory ${dir}`)
fs.mkdirSync(dir)
}
// check if output directory is a directory and report error if not
if (!fs.lstatSync(dir).isDirectory()) {
console.error(`Error: output dir ${dir} is not a directory`)
process.exit(1)
}
writeFile(`${dir}/localizedPlanPricing.json`, formatJS(localizedPlanPricing))
writeFile(`${dir}/groups.json`, formatJSON(groupPlans))
} else {
console.log('LOCALIZED', localizedPlanPricing)
console.log('GROUP PLANS', JSON.stringify(groupPlans, null, 2))
}
console.log('Completed!')

View File

@@ -0,0 +1,9 @@
import InstitutionsReconfirmationHandler from '../modules/institutions/app/src/InstitutionsReconfirmationHandler.mjs'
try {
await InstitutionsReconfirmationHandler.processLapsed()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,101 @@
import RedisWrapper from '@overleaf/redis-wrapper'
import Settings from '@overleaf/settings'
import SessionManager from '../app/src/Features/Authentication/SessionManager.js'
import minimist from 'minimist'
const redis = RedisWrapper.createClient(Settings.redis.websessions)
const argv = minimist(process.argv.slice(2), {
string: ['count'],
boolean: ['dry-run', 'help'],
alias: {
count: 'c',
'dry-run': 'n',
help: 'h',
},
})
if (argv.help) {
console.log(
`Usage: node purge_non_logged_in_sessions.js [--count <count>] [--dry-run]
--count <count> the number of keys to scan on each iteration (default 1000)
--dry-run to not delete any keys
--help to show this help
Note: use --count=10000 to delete faster (this will impact redis performance,
so use with caution)`
)
process.exit()
}
const scanCount = argv.count ? parseInt(argv.count, 10) : 1000
const dryRun = argv['dry-run']
console.log(`Scan count set to ${scanCount}`)
if (dryRun) {
console.log('Dry run, not deleting any keys')
}
// iterate over all redis keys matching sess:* and delete the ones
// that are not logged in using async await and mget and mdel
async function scanAndPurge() {
let totalSessions = 0
let totalDeletedSessions = 0
const stream = redis.scanStream({
match: 'sess:*',
count: scanCount,
})
console.log('Starting scan...')
for await (const resultKeys of stream) {
if (resultKeys.length === 0) {
continue // scan is allowed to return zero elements, the client should not consider the iteration complete
}
console.log(`Keys found, count: ${resultKeys.length}`)
totalSessions += resultKeys.length
const sessions = await redis.mget(resultKeys)
const toDelete = []
for (let i = 0; i < sessions.length; i++) {
const resultKey = resultKeys[i]
const session = sessions[i]
if (!session) {
continue
}
try {
const sessionObject = JSON.parse(session)
if (!SessionManager.isUserLoggedIn(sessionObject)) {
totalDeletedSessions++
toDelete.push(resultKey)
}
} catch (error) {
console.error(`Error parsing session ${resultKeys[i]}: ${error}`)
}
}
if (toDelete.length === 0) {
continue
}
if (dryRun) {
console.log(`Would delete ${toDelete.length} keys`)
} else {
await redis.del(toDelete)
console.log(`Keys deleted so far: ${totalDeletedSessions}`)
}
}
if (dryRun) {
console.log(
`Dry run: ${totalSessions} sessions checked, ${totalDeletedSessions} would have been deleted`
)
} else {
console.log(
`All ${totalSessions} sessions have been checked, ${totalDeletedSessions} deleted`
)
}
redis.quit()
}
try {
await scanAndPurge()
} catch (error) {
console.error(error)
process.exit()
}

View File

@@ -0,0 +1,181 @@
// recover docs from redis where there is no doc in mongo but the project exists
import minimist from 'minimist'
import { db, ObjectId } from '../app/src/infrastructure/mongodb.js'
import ProjectEntityUpdateHandler from '../app/src/Features/Project/ProjectEntityUpdateHandler.js'
import ProjectEntityRestoreHandler from '../app/src/Features/Project/ProjectEntityRestoreHandler.js'
import RedisWrapper from '@overleaf/redis-wrapper'
import Settings from '@overleaf/settings'
import logger from '@overleaf/logger'
const opts = parseArgs()
const redis = RedisWrapper.createClient(Settings.redis.web)
function parseArgs() {
const args = minimist(process.argv.slice(2), {
boolean: ['commit'],
})
const commit = args.commit
const deleteOversized = args['delete-oversized']
return {
commit,
deleteOversized,
maxDocSize: 2 * 1024 * 1024,
}
}
function extractObjectId(s) {
const m = s.match(/:\{?([0-9a-f]{24})\}?/)
return m[1]
}
async function main() {
logger.info({ opts }, 'removing deleted docs')
let cursor = 0
do {
const [newCursor, doclinesKeys] = await redis.scan(
cursor,
'MATCH',
'doclines:{*}',
'COUNT',
1000
)
const docIds = doclinesKeys.map(extractObjectId)
for (const docId of docIds) {
await processDoc(docId)
}
cursor = newCursor
} while (cursor !== '0')
if (!opts.commit) {
console.log('This was a dry run. Re-run with --commit to apply changes')
}
}
async function processDoc(docId) {
// check if the doc is in mongo.. if so ignore it
const docCount = await db.docs.find({ _id: new ObjectId(docId) }).count()
if (docCount > 0) {
logger.debug({ docId }, 'doc is present in mongo - no recovery needed')
return
}
// get the doc from redis and check if it has a project id
const doc = await getDoc(docId)
const projectId = doc.projectId
if (!projectId) {
logger.warn(
{ docId, doc },
'projectId not available in redis, cannot restore - skipping'
)
// we could delete the document in redis here since we have no way to recover it
return
}
// check that the project is in mongo, if not delete the doc
const project = await db.projects.findOne({ _id: new ObjectId(projectId) })
if (!project) {
logger.warn(
{ docId },
'project not present in mongo - could remove doc from redis'
)
return
}
// if the doc is too big we will need to convert it to a file, skip it for now
// or delete the doc if the --delete-oversized option is used
const size = doc.lines.reduce((sum, line) => sum + line.length + 1, 0)
if (size > opts.maxDocSize) {
logger.warn(
{ docId, projectId, size },
'doc that exceeds max size, cannot restore'
)
if (opts.deleteOversized) {
logger.info(
{ docId, projectId, size, commit: opts.commit },
'forcing delete of large doc'
)
if (opts.commit) {
try {
await deleteDocFromRedis(projectId, docId)
} catch (err) {
logger.error(
{ docId, projectId, deleteErr: err },
'error deleting doc from redis'
)
}
}
}
return
}
// now we have a doc content from redis, in a project where the doc has been deleted
const restoredName = ProjectEntityRestoreHandler.generateRestoredName(
doc.name || 'unknown',
'recovered'
)
logger.info(
{ docId, projectId, restoredName, commit: opts.commit },
'recovering doc from redis to mongo'
)
if (opts.commit) {
const folderId = project.rootFolder[0]._id
try {
await ProjectEntityUpdateHandler.promises.addDocWithRanges(
projectId,
folderId,
restoredName,
doc.lines,
doc.ranges,
null,
'recovery-script'
)
await deleteDocFromRedis(projectId, docId)
} catch (err) {
logger.error(
{ docId, projectId, restoreErr: err },
'error restoring doc from redis to mongo'
)
}
}
}
async function getDoc(docId) {
const [projectId, lines, ranges, pathname] = await redis.mget(
`ProjectId:{${docId}}`,
`doclines:{${docId}}`,
`Ranges:{${docId}}`,
`Pathname:{${docId}}`
)
const name = pathname?.split('/').pop()
return {
projectId,
id: docId,
lines: JSON.parse(lines),
ranges: ranges ? JSON.parse(ranges) : {},
name: name || 'unnamed',
}
}
async function deleteDocFromRedis(projectId, docId) {
await redis.del(
`Blocking:{${docId}}`,
`doclines:{${docId}}`,
`DocOps:{${docId}}`,
`DocVersion:{${docId}}`,
`DocHash:{${docId}}`,
`ProjectId:{${docId}}`,
`Ranges:{${docId}}`,
`UnflushedTime:{${docId}}`,
`Pathname:{${docId}}`,
`ProjectHistoryId:{${docId}}`,
`PendingUpdates:{${docId}}`,
`lastUpdatedAt:{${docId}}`,
`lastUpdatedBy:{${docId}}`
)
await redis.srem(`DocsIn:{${projectId}}`, projectId)
}
try {
await main()
process.exit(0)
} catch (error) {
console.error(error)
process.exit(1)
}

View File

@@ -0,0 +1,4 @@
source 'https://rubygems.org'
gem 'recurly'
gem 'json'

Some files were not shown because too many files have changed in this diff Show More