first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,120 @@
/**
* This script backfills the account mapping for subscriptions that are active and have a group plan.
*
* The mapping joins a recurlySubscription_id to a subscription _id in BigQuery.
*
* This script has an assumption that it is being run in a clean slate condition, it will create some
* duplicate mappings if run multiple times. The Analytics team will have the expectation
* that this table may need to be deduplicated as it is an event sourcing record.
*
* Call it with `--commit` to actually register the mappings.
* Call it with `--verbose` to see debug logs.
* Call it with `--endDate=<subscription ID>` to stop processing at a certain date
*/
import logger from '@overleaf/logger'
import minimist from 'minimist'
import { batchedUpdate } from '@overleaf/mongo-utils/batchedUpdate.js'
import { db } from '../../app/src/infrastructure/mongodb.js'
import AccountMappingHelper from '../../app/src/Features/Analytics/AccountMappingHelper.js'
import { registerAccountMapping } from '../../app/src/Features/Analytics/AnalyticsManager.js'
import { triggerGracefulShutdown } from '../../app/src/infrastructure/GracefulShutdown.js'
import Validation from '../../app/src/infrastructure/Validation.js'
const paramsSchema = Validation.Joi.object({
endDate: Validation.Joi.string().isoDate(),
commit: Validation.Joi.boolean().default(false),
verbose: Validation.Joi.boolean().default(false),
}).unknown(true)
let mapped = 0
let subscriptionCount = 0
const now = new Date().toISOString() // use the same timestamp for all mappings
const seenSubscriptions = new Set()
function registerMapping(subscription) {
if (seenSubscriptions.has(subscription._id)) {
logger.warn({ subscription }, 'duplicate subscription found, skipping')
return
}
seenSubscriptions.add(subscription._id)
subscriptionCount++
const mapping = AccountMappingHelper.generateSubscriptionToRecurlyMapping(
subscription._id,
subscription.recurlySubscription_id,
now
)
logger.debug(
{
recurly: subscription.recurlySubscription_id,
mapping,
},
`processing subscription ${subscription._id}`
)
if (commit) {
registerAccountMapping(mapping)
mapped++
}
}
async function main() {
const additionalBatchedUpdateOptions = {}
if (endDate) {
additionalBatchedUpdateOptions.BATCH_RANGE_END = endDate
}
await batchedUpdate(
db.subscriptions,
{
'recurlyStatus.state': 'active',
groupPlan: true,
},
subscriptions => subscriptions.forEach(registerMapping),
{
_id: 1,
recurlySubscription_id: 1,
},
{
readPreference: 'secondaryPreferred',
},
{
verboseLogging: verbose,
...additionalBatchedUpdateOptions,
}
)
logger.debug({}, `${subscriptionCount} subscriptions processed`)
if (commit) {
logger.debug({}, `${mapped} mappings registered`)
}
}
const {
error,
value: { commit, endDate, verbose },
} = paramsSchema.validate(
minimist(process.argv.slice(2), {
boolean: ['commit', 'verbose'],
string: ['endDate'],
})
)
logger.logger.level(verbose ? 'debug' : 'info')
if (error) {
logger.error({ error }, 'error with parameters')
triggerGracefulShutdown(done => done(1))
} else {
logger.info({ verbose, commit, endDate }, commit ? 'COMMITTING' : 'DRY RUN')
await main()
triggerGracefulShutdown({
close(done) {
logger.info({}, 'shutting down')
done()
},
})
}

View File

@@ -0,0 +1,30 @@
import { BigQuery as GoogleBigQuery } from '@google-cloud/bigquery'
let dataset = null
function getDataset() {
if (!dataset) {
console.log(
'Connecting to BigQuery dataset: ',
process.env.BQ_PROJECT_ID,
process.env.BQ_DATASET_V2
)
dataset = new GoogleBigQuery({
projectId: process.env.BQ_PROJECT_ID,
keyFilename: process.env.GCS_KEY_FILE,
}).dataset(process.env.BQ_DATASET_V2)
}
return dataset
}
async function query(query, params = {}) {
const [job] = await getDataset().createQueryJob({ query, params })
const [rows] = await job.getQueryResults()
return rows
}
export default {
query,
}

View File

@@ -0,0 +1,281 @@
import GoogleBigQueryHelper from './helpers/GoogleBigQueryHelper.mjs'
import { Subscription } from '../../app/src/models/Subscription.js'
import AnalyticsManager from '../../app/src/Features/Analytics/AnalyticsManager.js'
import { DeletedSubscription } from '../../app/src/models/DeletedSubscription.js'
import minimist from 'minimist'
import _ from 'lodash'
import mongodb from 'mongodb-legacy'
const { ObjectId } = mongodb
let BATCH_SIZE, COMMIT, VERBOSE
async function main() {
console.log('## Syncing group subscription memberships...')
const subscriptionsCount = await Subscription.countDocuments({
groupPlan: true,
})
const deletedSubscriptionsCount = await DeletedSubscription.countDocuments({
'subscription.groupPlan': true,
})
console.log(
`## Going to synchronize ${subscriptionsCount} subscriptions and ${deletedSubscriptionsCount} deleted subscriptions`
)
await checkActiveSubscriptions()
await checkDeletedSubscriptions()
}
async function checkActiveSubscriptions() {
let subscriptions
const processedSubscriptionIds = new Set()
const cursor = Subscription.find(
{ groupPlan: true },
{ recurlySubscription_id: 1, member_ids: 1 }
)
.sort('_id')
.cursor()
do {
subscriptions = []
while (subscriptions.length <= BATCH_SIZE) {
const next = await cursor.next()
if (!next) {
break
}
subscriptions.push(next)
}
if (subscriptions.length) {
const groupIds = subscriptions.map(sub => sub._id)
const bigQueryGroupMemberships =
await fetchBigQueryMembershipStatuses(groupIds)
const membershipsByGroupId = _.groupBy(
bigQueryGroupMemberships,
'group_id'
)
for (const subscription of subscriptions) {
const subscriptionId = subscription._id.toString()
if (!processedSubscriptionIds.has(subscriptionId)) {
await checkSubscriptionMemberships(
subscription,
membershipsByGroupId[subscriptionId] || []
)
processedSubscriptionIds.add(subscriptionId)
}
}
}
} while (subscriptions.length > 0)
}
async function checkDeletedSubscriptions() {
let deletedSubscriptions
const processedSubscriptionIds = new Set()
const cursor = DeletedSubscription.find(
{ 'subscription.groupPlan': true },
{ subscription: 1 }
).cursor()
do {
deletedSubscriptions = []
while (deletedSubscriptions.length <= BATCH_SIZE) {
const next = await cursor.next()
if (!next) {
break
}
deletedSubscriptions.push(next.toObject().subscription)
}
if (deletedSubscriptions.length) {
const groupIds = deletedSubscriptions.map(sub => sub._id.toString())
const bigQueryGroupMemberships =
await fetchBigQueryMembershipStatuses(groupIds)
const membershipsByGroupId = _.groupBy(
bigQueryGroupMemberships,
'group_id'
)
for (const deletedSubscription of deletedSubscriptions) {
const subscriptionId = deletedSubscription._id.toString()
if (!processedSubscriptionIds.has(subscriptionId)) {
await checkDeletedSubscriptionMemberships(
deletedSubscription,
membershipsByGroupId[subscriptionId] || []
)
processedSubscriptionIds.add(subscriptionId)
}
}
}
} while (deletedSubscriptions.length > 0)
}
async function checkSubscriptionMemberships(subscription, membershipStatuses) {
if (VERBOSE) {
console.log(
'\n###########################################################################################',
'\n# Subscription (mongo): ',
'\n# _id: \t\t\t\t',
subscription._id.toString(),
'\n# member_ids: \t\t\t',
subscription.member_ids.map(_id => _id.toString()),
'\n# recurlySubscription_id: \t',
subscription.recurlySubscription_id
)
console.log('#\n# Membership statuses found in BigQuery: ')
console.table(membershipStatuses)
}
// create missing `joined` events when membership status is missing
for (const memberId of subscription.member_ids) {
if (
!_.find(membershipStatuses, {
user_id: memberId.toString(),
is_member: true,
})
) {
await sendCorrectiveEvent(
memberId,
'group-subscription-joined',
subscription
)
}
}
// create missing `left` events if user is not a member of the group anymore
for (const { user_id: userId, is_member: isMember } of membershipStatuses) {
if (
isMember &&
!subscription.member_ids.some(id => id.toString() === userId)
) {
await sendCorrectiveEvent(userId, 'group-subscription-left', subscription)
}
}
}
async function checkDeletedSubscriptionMemberships(
subscription,
membershipStatuses
) {
if (VERBOSE) {
console.log(
'\n###########################################################################################',
'\n# Deleted subscription (mongo): ',
'\n# _id: \t\t\t\t',
subscription._id.toString(),
'\n# member_ids: \t\t\t',
subscription.member_ids.map(_id => _id.toString()),
'\n# recurlySubscription_id: \t',
subscription.recurlySubscription_id
)
console.log('#\n# Membership statuses found in BigQuery: ')
console.table(membershipStatuses)
}
const updatedUserIds = new Set()
// create missing `left` events if user was a member of the group in BQ and status is not up-to-date
for (const memberId of subscription.member_ids.map(id => id.toString())) {
if (
_.find(membershipStatuses, {
user_id: memberId,
is_member: true,
})
) {
await sendCorrectiveEvent(
memberId,
'group-subscription-left',
subscription
)
updatedUserIds.add(memberId)
}
}
// for cases where the user has been removed from the subscription before it was deleted and status is not up-to-date
for (const { user_id: userId, is_member: isMember } of membershipStatuses) {
if (isMember && !updatedUserIds.has(userId)) {
await sendCorrectiveEvent(userId, 'group-subscription-left', subscription)
updatedUserIds.add(userId)
}
}
}
async function sendCorrectiveEvent(userId, event, subscription) {
if (!ObjectId.isValid(userId)) {
console.warn(`Skipping '${event}' for user ${userId}: invalid user ID`)
return
}
const segmentation = {
groupId: subscription._id.toString(),
subscriptionId: subscription.recurlySubscription_id,
source: 'sync',
}
if (COMMIT) {
console.log(
`Sending event '${event}' for user ${userId} with segmentation: ${JSON.stringify(
segmentation
)}`
)
await AnalyticsManager.recordEventForUser(userId, event, segmentation)
} else {
console.log(
`Dry run - would send event '${event}' for user ${userId} with segmentation: ${JSON.stringify(
segmentation
)}`
)
}
}
/**
* @param {Array<ObjectId>} groupIds
* @return {Promise<*>}
*/
async function fetchBigQueryMembershipStatuses(groupIds) {
const query = `\
WITH user_memberships AS (
SELECT
group_id,
COALESCE(user_aliases.user_id, ugm.user_id) AS user_id,
is_member,
ugm.created_at
FROM INT_user_group_memberships ugm
LEFT JOIN INT_user_aliases user_aliases ON ugm.user_id = user_aliases.analytics_id
WHERE ugm.group_id IN UNNEST(@groupIds)
),
ordered_status AS (
SELECT *,
ROW_NUMBER() OVER(PARTITION BY group_id, user_id ORDER BY created_at DESC) AS row_number
FROM user_memberships
)
SELECT group_id, user_id, is_member, created_at FROM ordered_status
WHERE row_number = 1;
`
return await GoogleBigQueryHelper.query(query, {
groupIds: groupIds.map(id => id.toString()),
})
}
const setup = () => {
const argv = minimist(process.argv.slice(2))
BATCH_SIZE = argv.batchSize ? parseInt(argv.batchSize, 10) : 100
COMMIT = argv.commit !== undefined
VERBOSE = argv.debug !== undefined
if (!COMMIT) {
console.warn('Doing dry run without --commit')
}
if (VERBOSE) {
console.log('Running in verbose mode')
}
}
setup()
try {
await main()
console.error('Done.')
process.exit(0)
} catch (error) {
console.error({ error })
process.exit(1)
}