first commit

This commit is contained in:
2025-04-24 13:11:28 +08:00
commit ff9c54d5e4
5960 changed files with 834111 additions and 0 deletions

View File

@@ -0,0 +1,40 @@
# Sync User Entitlements
Entitlement information for insitutional (university) sso users is stored in
both the mongo users collection and the postgres v2_user_universities table.
The mongo users collection is authoratative but these need to be in sync for
everything to work properly.
This script takes exports from both mongo and postgres, finds mismatches, and
then corrects the data in postgres so that it matches mongo.
## Exporting users data from mongo
Follow the directions in `google-ops/README.md` for exporting data from mongo
and copying the files to your local machine.
Run the following user export command.
```
mongoexport --uri $READ_ONLY_MONGO_CONNECTION_STRING --collection users --fields '_id,email,emails,samlIdentifiers' --query '{"samlIdentifiers.providerId": {"$exists": 1}}' --out user-entitlements.json
```
**Note: this file contains PII and caution must be exercised to insure that it
is never transferred or stored insecurely and that it is deleted ASAP**
## Exporting data from postgres
Connect to postgres by running `heroku psql -a electric-leaf-4093`
Run the following v2_user_universities export comand.
```
\copy (select uu.user_id, uu.email, uu.cached_entitlement, ud.university_id from v2_user_universities uu LEFT JOIN university_domains ud ON uu.university_domain_id = ud.id WHERE uu.removed_at IS NULL) to 'cached-entitlements.csv' with csv;
```
**Note: this file contains PII and caution must be exercised to insure that it
is never transferred or stored insecurely and that it is deleted ASAP**
## Run sync
```
node scripts/sync-user-entitlements/sync-user-entitlements --user-entitlements user-entitlements.json --cached-entitlements cached-entitlements.csv --commit
```

View File

@@ -0,0 +1,196 @@
import fs from 'node:fs'
import minimist from 'minimist'
import InstitutionsAPIModule from '../../app/src/Features/Institutions/InstitutionsAPI.js'
const { promises: InstitutionsAPI } = InstitutionsAPIModule
const argv = minimist(process.argv.slice(2))
const commit = argv.commit !== undefined
const ignoreNulls = !!argv['ignore-nulls']
if (!commit) {
console.log('DOING DRY RUN. TO SAVE CHANGES PASS --commit')
}
const userEntitlements = loadUserEntitlements(argv['user-entitlements'])
const cachedEntitlements = loadCachedEntitlements(argv['cached-entitlements'])
async function syncUserEntitlements(userEntitlements, cachedEntitlements) {
// check for user entitlements in mongo but not in postgres
for (const key of Object.keys(userEntitlements)) {
const userEntitlement = userEntitlements[key]
if (!userEntitlement) {
continue
}
// find any email(s) that are linked through sso
for (const email of userEntitlement.emails) {
if (!email.samlProviderId) {
continue
}
// get samlIdentifiers entry for email
const samlIdentifier = userEntitlement.samlIdentifiers.find(
samlIdentifier => samlIdentifier.providerId === email.samlProviderId
)
// validate that entitlement is cached
if (samlIdentifier) {
const cachedEntitlment = cachedEntitlements[email.email]
// validate that record is correct
if (cachedEntitlment) {
if (
cachedEntitlment.hasEntitlement !== samlIdentifier.hasEntitlement
) {
console.log(
`cached entitlement mismatch for user ${userEntitlement.userId} mongo(${samlIdentifier.hasEntitlement}) postgres(${cachedEntitlment.hasEntitlement})`
)
await syncUserEntitlement(
userEntitlement.userId,
email.email,
samlIdentifier.hasEntitlement
)
}
}
// there is not record in postgres at all
else {
console.log(
`missing cached entitlement for user ${userEntitlement.userId}`
)
await syncUserEntitlement(
userEntitlement.userId,
email.email,
samlIdentifier.hasEntitlement
)
}
}
// if identifier is missing for email this is internal inconsistency in mongo
else {
console.log(`missing samlIdentifier for user ${userEntitlement.userId}`)
}
}
// find any samlIdentifier records missing email entry
for (const samlIdentifier of userEntitlement.samlIdentifiers) {
const email = userEntitlement.emails.find(
email => email.samlProviderId === samlIdentifier.providerId
)
if (!email) {
console.log(
`missing email entry for samlIdentifier for user ${userEntitlement.userId}`
)
}
}
}
// check for user entitlements in postgres but not in mongo
for (const key of Object.keys(cachedEntitlements)) {
const cachedEntitlment = cachedEntitlements[key]
if (!cachedEntitlment) {
continue
}
if (!cachedEntitlment.hasEntitlement) {
continue
}
const userEntitlement = userEntitlements[cachedEntitlment.userId]
// validate that mongo has correct entitlement
if (userEntitlement) {
// find samlIdentifier for provider
const samlIdentifier = userEntitlement.samlIdentifiers.find(
samlIdentifier =>
samlIdentifier.providerId === cachedEntitlment.providerId
)
if (!samlIdentifier || !samlIdentifier.hasEntitlement) {
console.log(
`cached entitlement mismatch for user ${userEntitlement.userId} mongo(false) postgres(true)`
)
await syncUserEntitlement(
userEntitlement.userId,
cachedEntitlment.email,
false
)
}
}
// if the record does not exist it is probably because users without
// entitlements were not exported
else {
console.log(
`missing cached entitlement in mongo for user ${cachedEntitlment.userId}`
)
}
}
}
async function syncUserEntitlement(userId, email, hasEntitlement) {
if (!commit) {
return
}
try {
if (hasEntitlement) {
await InstitutionsAPI.addEntitlement(userId, email)
} else {
await InstitutionsAPI.removeEntitlement(userId, email)
}
} catch (err) {
console.error(
`error setting entitlement: ${userId}, ${email}, ${hasEntitlement} - ${err.message}`
)
}
}
function loadUserEntitlements(userEntitlementsFilename) {
const userEntitlementsData = fs
.readFileSync(userEntitlementsFilename, {
encoding: 'utf8',
})
.split('\n')
const userEntitlements = {}
for (const userEntitlementLine of userEntitlementsData) {
if (!userEntitlementLine) {
continue
}
const userEntitlementExport = JSON.parse(userEntitlementLine)
const userId = userEntitlementExport._id.$oid
delete userEntitlementExport._id
userEntitlementExport.userId = userId
userEntitlements[userId] = userEntitlementExport
}
return userEntitlements
}
function loadCachedEntitlements(cachedEntitlementsFilename) {
const cachedEntitlementsData = fs
.readFileSync(cachedEntitlementsFilename, {
encoding: 'utf8',
})
.split('\n')
const cachedEntitlements = {}
for (const cachedEntitlementLine of cachedEntitlementsData) {
// this is safe because comma is not an allowed value for any column
const [userId, email, hasEntitlement, providerId] =
cachedEntitlementLine.split(',')
let hasEntitlementBoolean
if (ignoreNulls) {
hasEntitlementBoolean = hasEntitlement === 't'
} else {
hasEntitlementBoolean =
hasEntitlement === 't' ? true : hasEntitlement === 'f' ? false : null
}
cachedEntitlements[email] = {
email,
hasEntitlement: hasEntitlementBoolean,
providerId,
userId,
}
}
return cachedEntitlements
}
try {
await syncUserEntitlements(userEntitlements, cachedEntitlements)
} catch (error) {
console.error(error.stack)
}
process.exit()