first commit

2025-10-03 22:20:19 +08:00
commit 44db9807a1
2172 changed files with 526822 additions and 0 deletions
--- a/external/cmapscompress/README.md
+++ b/external/cmapscompress/README.md
@@ -0,0 +1,171 @@
+# Quick notes about binary CMap format (bcmap)
+
+The format is designed to package some information from the CMap files located at external/cmap. Please notice for size optimization reasons, the original information blocks can be changed (split or joined) and items in the blocks can be swapped.
+
+The data stored in binary format in network byte order (big-endian).
+
+# Data primitives
+
+The following primitives used during encoding of the file:
+  - byte (B) – a byte, bits are numbered from 0 (less significant) to 7 (most significant)
+  - bytes block (B[n])  – a sequence of n bytes
+  - unsigned number (UN) – the number is encoded as sequence of bytes, bit 7 is flag to continue decoding the byte, bits 6-0 store number information, e.g. bytes 0x818407 will represent 16903 (0x4207). Limited to the 32 bit.
+  - signed number (SN) – the number is encoded as sequence of bytes, as UN, however shall be transformed before encoding: if n < 0, the n shall be encoded as (-2*n-1) using UN encoding, other n shall be encoded as (2*n) using UN encoding. So the lowest bit of the number indicates the sign of the initial number
+  - unsigned fixed number (UB[n]) – similar to the UN, but it represents an unsigned number that is stored in B[n]
+  - signed fixed number (SB[n]) – similar to the SN, but it represents a signed number that is stored in B[n]
+  - string (S) – the string is encoded as sequence of bytes. First comes length is characters encoded as UN, when UTF16 characters encoded as UN.
+
+# File structure
+
+The first byte is a header:
+  - bits 2-1 – indicate a CMapType. Valid values are 1 and 2
+  - bit 0 – indicate WMode. Valid values are 0 and 1.
+
+Then records follow. The records starts from the record header encoded as B, where bits 7-5 indicate record type (see description of other bits below):
+  - 0 – codespacerange
+  - 1 – notdefrange
+  - 2 – cidchar
+  - 3 – cidrange
+  - 4 – bfchar
+  - 5 – bfrange
+  - 6 – reserved
+  - 7 – metadata
+
+## Metadata record
+
+The metadata record header bit 4-0 contain id of the metadata:
+  - 0 – comment, body of the record is encoded comment string (S)
+  - 1 – UseCMap, body of the record is usecmap id string (S)
+
+## Data records
+
+The records that have types 0 – 5, have the following fields in the header:
+  - bit 4 – indicate the char or start/end entries are stored in a sequence in this block
+  - bits 3-0 – contain length of the data size minus 1 in this block (dataSize)
+
+The amount of entries encoded as UN follows the header. The items records follow (see below).
+
+
+### codespacerange (0)
+
+Represents the following CMap block:
+
+  n begincodespacerange
+  <start> <end>
+  endcodespacerange
+
+First record format is:
+
+  - start as B[dataSize]
+  - endDelta as UB[dataSize], end is calculated as (start + endDelta)
+
+Next record format is:
+
+  - startDelta as UB[dataSize], start = end + startDelta
+  - endDelta as UB[dataSize], end = start + endDelta
+
+
+### notdefrange (1)
+
+Represents the following CMap block:
+
+  n beginnotdefrange
+  <start> <end> code
+  endnotdefrange
+
+First record format is:
+
+  - start as B[dataSize]
+  - endDelta as UB[dataSize], end is calculated as (start + endDelta)
+  - code as UN
+
+Next record format is:
+
+  - startDelta as UB[dataSize], start = end + startDelta
+  - endDelta as UB[dataSize], end = start + endDelta
+  - code as UN
+
+
+### cidchar (2)
+
+Represents the following CMap block:
+
+  n begincidchar
+  <char> code
+  endcidchar
+
+First record format is:
+
+  - char as B[dataSize]
+  - code as UN
+
+Next record format is:
+
+  - if sequence = 0, charDelta as UB[dataSize], char = char + charDelta + 1
+  - if sequence = 1, char = char + 1
+  - codeDelta as SN, code = code + codeDelta
+
+
+### cidrange (3)
+
+Represents the following CMap block:
+
+  n begincidrange
+  <start> <end> code
+  endcidrange
+
+First record format is:
+
+  - start as B[dataSize]
+  - endDelta as UN[dataSize], end is calculated as (start + endDelta)
+  - code as UN
+
+Next record format is:
+
+  - if sequence = 0, startDelta as UB[dataSize], start = end + startDelta + 1
+  - if sequence = 1, start = end + 1
+  - endDelta as UN[dataSize], end = start + endDelta
+  - code as UN
+
+
+### bfchar (4)
+
+Represents the following CMap block:
+
+  n beginbfchar
+  <char> <code>
+  endbfchar
+
+First record format is:
+
+  - char as B[ucs2Size], where ucs2Size = 2 (here and below)
+  - code as B[dataSize]
+
+Next record format is:
+
+  - if sequence = 0, charDelta as UN[ucs2Size], char = charDelta + charDelta + 1
+  - if sequence = 1, char = char + 1
+  - codeDelta as SB[dataSize], code = code + codeDelta
+
+
+### bfrange (5)
+
+Represents the following CMap block:
+
+  n beginbfrange
+  <start> <end> <code>
+  endbfrange
+
+First record format is:
+
+  - start as B[ucs2Size]
+  - endDelta as UB[ucs2Size], end is calculated as (start + endDelta)
+  - code as B[dataSize]
+
+Next record format is:
+
+  - if sequence = 0, startDelta as UB[ucs2Size], start = end + startDelta + 1
+  - if sequence = 1, start = end + 1
+  - endDelta as UB[ucs2Size], end = start + endDelta
+  - code as B[dataSize]
+
--- a/external/cmapscompress/compress.mjs
+++ b/external/cmapscompress/compress.mjs
@@ -0,0 +1,494 @@
+/* Copyright 2014 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import fs from "fs";
+import { optimizeCMap } from "./optimize.mjs";
+import { parseAdobeCMap } from "./parse.mjs";
+import path from "path";
+
+function compressCmap(srcPath, destPath, verify) {
+  const content = fs.readFileSync(srcPath).toString();
+  const inputData = parseAdobeCMap(content);
+  optimizeCMap(inputData);
+
+  let out = writeByte((inputData.type << 1) | inputData.wmode);
+  if (inputData.comment) {
+    out += writeByte(0xe0) + writeString(inputData.comment);
+  }
+  if (inputData.usecmap) {
+    out += writeByte(0xe1) + writeString(inputData.usecmap);
+  }
+  let i = 0;
+  while (i < inputData.body.length) {
+    const item = inputData.body[i++],
+      subitems = item.items;
+    const first = item.items[0];
+    const sequence = item.sequence === true;
+    const flags = (item.type << 5) | (sequence ? 0x10 : 0);
+    let nextStart, nextCode;
+    switch (item.type) {
+      case 0:
+        out +=
+          writeByte(flags | getHexSize(first.start)) +
+          writeNumber(subitems.length);
+        out += first.start + writeNumber(subHex(first.end, first.start));
+        nextStart = incHex(first.end);
+        for (let j = 1; j < subitems.length; j++) {
+          out +=
+            writeNumber(subHex(subitems[j].start, nextStart)) +
+            writeNumber(subHex(subitems[j].end, subitems[j].start));
+          nextStart = incHex(subitems[j].end);
+        }
+        break;
+      case 1:
+        out +=
+          writeByte(flags | getHexSize(first.start)) +
+          writeNumber(subitems.length);
+        out +=
+          first.start +
+          writeNumber(subHex(first.end, first.start)) +
+          writeNumber(first.code);
+        nextStart = incHex(first.end);
+        for (let j = 1; j < subitems.length; j++) {
+          out +=
+            writeNumber(subHex(subitems[j].start, nextStart)) +
+            writeNumber(subHex(subitems[j].end, subitems[j].start)) +
+            writeNumber(subitems[j].code);
+          nextStart = incHex(subitems[j].end);
+        }
+        break;
+      case 2:
+        out +=
+          writeByte(flags | getHexSize(first.char)) +
+          writeNumber(subitems.length);
+        out += first.char + writeNumber(first.code);
+        nextStart = incHex(first.char);
+        nextCode = first.code + 1;
+        for (let j = 1; j < subitems.length; j++) {
+          out +=
+            (sequence ? "" : writeNumber(subHex(subitems[j].char, nextStart))) +
+            writeSigned(subitems[j].code - nextCode);
+          nextStart = incHex(subitems[j].char);
+          nextCode = item.items[j].code + 1;
+        }
+        break;
+      case 3:
+        out +=
+          writeByte(flags | getHexSize(first.start)) +
+          writeNumber(subitems.length);
+        out +=
+          first.start +
+          writeNumber(subHex(first.end, first.start)) +
+          writeNumber(first.code);
+        nextStart = incHex(first.end);
+        for (let j = 1; j < subitems.length; j++) {
+          out +=
+            (sequence
+              ? ""
+              : writeNumber(subHex(subitems[j].start, nextStart))) +
+            writeNumber(subHex(subitems[j].end, subitems[j].start)) +
+            writeNumber(subitems[j].code);
+          nextStart = incHex(subitems[j].end);
+        }
+        break;
+      case 4:
+        out +=
+          writeByte(flags | getHexSize(first.code)) +
+          writeNumber(subitems.length);
+        out += first.char + first.code;
+        nextStart = incHex(first.char);
+        nextCode = incHex(first.code);
+        for (let j = 1; j < subitems.length; j++) {
+          out +=
+            (sequence ? "" : writeNumber(subHex(subitems[j].char, nextStart))) +
+            writeSigned(subHex(subitems[j].code, nextCode));
+          nextStart = incHex(subitems[j].char);
+          nextCode = incHex(subitems[j].code);
+        }
+        break;
+      case 5:
+        out +=
+          writeByte(flags | getHexSize(first.code)) +
+          writeNumber(subitems.length);
+        out +=
+          first.start +
+          writeNumber(subHex(first.end, first.start)) +
+          first.code;
+        nextStart = incHex(first.end);
+        for (let j = 1; j < subitems.length; j++) {
+          out +=
+            (sequence
+              ? ""
+              : writeNumber(subHex(subitems[j].start, nextStart))) +
+            writeNumber(subHex(subitems[j].end, subitems[j].start)) +
+            subitems[j].code;
+          nextStart = incHex(subitems[j].end);
+        }
+        break;
+    }
+  }
+
+  fs.writeFileSync(destPath, Buffer.from(out, "hex"));
+
+  if (verify) {
+    const result2 = parseCMap(out);
+    const isGood = JSON.stringify(inputData) === JSON.stringify(result2);
+    if (!isGood) {
+      throw new Error("Extracted data does not match the expected result");
+    }
+  }
+
+  return {
+    orig: fs.statSync(srcPath).size,
+    packed: out.length >> 1,
+  };
+}
+
+function parseCMap(binaryData) {
+  const reader = {
+    buffer: binaryData,
+    pos: 0,
+    end: binaryData.length,
+    readByte() {
+      if (this.pos >= this.end) {
+        return -1;
+      }
+      const d1 = fromHexDigit(this.buffer[this.pos]);
+      const d2 = fromHexDigit(this.buffer[this.pos + 1]);
+      this.pos += 2;
+      return (d1 << 4) | d2;
+    },
+    readNumber() {
+      let n = 0;
+      let last;
+      do {
+        const b = this.readByte();
+        last = !(b & 0x80);
+        n = (n << 7) | (b & 0x7f);
+      } while (!last);
+      return n;
+    },
+    readSigned() {
+      const n = this.readNumber();
+      return n & 1 ? -(n >>> 1) - 1 : n >>> 1;
+    },
+    readHex(size) {
+      const lengthInChars = (size + 1) << 1;
+      const s = this.buffer.substring(this.pos, this.pos + lengthInChars);
+      this.pos += lengthInChars;
+      return s;
+    },
+    readHexNumber(size) {
+      const lengthInChars = (size + 1) << 1,
+        stack = [];
+      let last;
+      do {
+        const b = this.readByte();
+        last = !(b & 0x80);
+        stack.push(b & 0x7f);
+      } while (!last);
+      let s = "",
+        buffer = 0,
+        bufferSize = 0;
+      while (s.length < lengthInChars) {
+        while (bufferSize < 4 && stack.length > 0) {
+          buffer |= stack.pop() << bufferSize;
+          bufferSize += 7;
+        }
+        s = toHexDigit(buffer & 15) + s;
+        buffer >>= 4;
+        bufferSize -= 4;
+      }
+      return s;
+    },
+    readHexSigned(size) {
+      const num = this.readHexNumber(size);
+      const sign = fromHexDigit(num.at(-1)) & 1 ? 15 : 0;
+      let c = 0;
+      let result = "";
+      for (const digit of num) {
+        c = (c << 4) | fromHexDigit(digit);
+        result += toHexDigit(sign ? (c >> 1) ^ sign : c >> 1);
+        c &= 1;
+      }
+      return result;
+    },
+    readString() {
+      const len = this.readNumber();
+      let s = "";
+      for (let i = 0; i < len; i++) {
+        s += String.fromCharCode(this.readNumber());
+      }
+      return s;
+    },
+  };
+
+  const header = reader.readByte();
+  const result = {
+    type: header >> 1,
+    wmode: header & 1,
+    comment: null,
+    usecmap: null,
+    body: [],
+  };
+
+  let b;
+  while ((b = reader.readByte()) >= 0) {
+    const type = b >> 5;
+    if (type === 7) {
+      switch (b & 0x1f) {
+        case 0:
+          result.comment = reader.readString();
+          break;
+        case 1:
+          result.usecmap = reader.readString();
+          break;
+      }
+      continue;
+    }
+    const sequence = !!(b & 0x10);
+    const dataSize = b & 15;
+    const subitems = [];
+    const item = {
+      type,
+      items: subitems,
+    };
+    if (sequence) {
+      item.sequence = true;
+    }
+    const ucs2DataSize = 1;
+    const subitemsCount = reader.readNumber();
+    let start, end, code, char;
+    switch (type) {
+      case 0:
+        start = reader.readHex(dataSize);
+        end = addHex(reader.readHexNumber(dataSize), start);
+        subitems.push({ start, end });
+        for (let i = 1; i < subitemsCount; i++) {
+          start = addHex(reader.readHexNumber(dataSize), incHex(end));
+          end = addHex(reader.readHexNumber(dataSize), start);
+          subitems.push({ start, end });
+        }
+        break;
+      case 1:
+        start = reader.readHex(dataSize);
+        end = addHex(reader.readHexNumber(dataSize), start);
+        code = reader.readNumber();
+        subitems.push({ start, end, code });
+        for (let i = 1; i < subitemsCount; i++) {
+          start = addHex(reader.readHexNumber(dataSize), incHex(end));
+          end = addHex(reader.readHexNumber(dataSize), start);
+          code = reader.readNumber();
+          subitems.push({ start, end, code });
+        }
+        break;
+      case 2:
+        char = reader.readHex(dataSize);
+        code = reader.readNumber();
+        subitems.push({ char, code });
+        for (let i = 1; i < subitemsCount; i++) {
+          char = sequence
+            ? incHex(char)
+            : addHex(reader.readHexNumber(dataSize), incHex(char));
+          code = reader.readSigned() + (code + 1);
+          subitems.push({ char, code });
+        }
+        break;
+      case 3:
+        start = reader.readHex(dataSize);
+        end = addHex(reader.readHexNumber(dataSize), start);
+        code = reader.readNumber();
+        subitems.push({ start, end, code });
+        for (let i = 1; i < subitemsCount; i++) {
+          start = sequence
+            ? incHex(end)
+            : addHex(reader.readHexNumber(dataSize), incHex(end));
+          end = addHex(reader.readHexNumber(dataSize), start);
+          code = reader.readNumber();
+          subitems.push({ start, end, code });
+        }
+        break;
+      case 4:
+        char = reader.readHex(ucs2DataSize);
+        code = reader.readHex(dataSize);
+        subitems.push({ char, code });
+        for (let i = 1; i < subitemsCount; i++) {
+          char = sequence
+            ? incHex(char)
+            : addHex(reader.readHexNumber(ucs2DataSize), incHex(char));
+          code = addHex(reader.readHexSigned(dataSize), incHex(code));
+          subitems.push({ char, code });
+        }
+        break;
+      case 5:
+        start = reader.readHex(ucs2DataSize);
+        end = addHex(reader.readHexNumber(ucs2DataSize), start);
+        code = reader.readHex(dataSize);
+        subitems.push({ start, end, code });
+        for (let i = 1; i < subitemsCount; i++) {
+          start = sequence
+            ? incHex(end)
+            : addHex(reader.readHexNumber(ucs2DataSize), incHex(end));
+          end = addHex(reader.readHexNumber(ucs2DataSize), start);
+          code = reader.readHex(dataSize);
+          subitems.push({ start, end, code });
+        }
+        break;
+      default:
+        throw new Error("Unknown type: " + type);
+    }
+    result.body.push(item);
+  }
+
+  return result;
+}
+
+function toHexDigit(n) {
+  return n.toString(16);
+}
+function fromHexDigit(s) {
+  return parseInt(s, 16);
+}
+function getHexSize(s) {
+  return (s.length >> 1) - 1;
+}
+function writeByte(b) {
+  return toHexDigit((b >> 4) & 15) + toHexDigit(b & 15);
+}
+function writeNumber(n) {
+  if (typeof n === "string") {
+    let s = "",
+      buffer = 0,
+      bufferSize = 0;
+    let i = n.length;
+    while (i > 0) {
+      --i;
+      buffer |= fromHexDigit(n[i]) << bufferSize;
+      bufferSize += 4;
+      if (bufferSize >= 7) {
+        s = writeByte((buffer & 0x7f) | (s.length > 0 ? 0x80 : 0)) + s;
+        buffer >>>= 7;
+        bufferSize -= 7;
+      }
+    }
+    if (buffer > 0) {
+      s = writeByte((buffer & 0x7f) | (s.length > 0 ? 0x80 : 0)) + s;
+    }
+    while (s.indexOf("80") === 0) {
+      s = s.substring(2);
+    }
+    return s;
+  }
+  let s = writeByte(n & 0x7f);
+  n >>>= 7;
+  while (n > 0) {
+    s = writeByte((n & 0x7f) | 0x80) + s;
+    n >>>= 7;
+  }
+  return s;
+}
+function writeSigned(n) {
+  if (typeof n === "string") {
+    let t = "";
+    let c = fromHexDigit(n[0]);
+    const neg = c >= 8;
+    c = neg ? c ^ 15 : c;
+    for (let i = 1; i < n.length; i++) {
+      const d = fromHexDigit(n[i]);
+      c = (c << 4) | (neg ? d ^ 15 : d);
+      t += toHexDigit(c >> 3);
+      c &= 7;
+    }
+    t += toHexDigit((c << 1) | (neg ? 1 : 0));
+    return writeNumber(t);
+  }
+  return n < 0 ? writeNumber(-2 * n - 1) : writeNumber(2 * n);
+}
+function writeString(s) {
+  let t = writeNumber(s.length);
+  for (let i = 0; i < s.length; i++) {
+    t += writeNumber(s.charCodeAt(i));
+  }
+  return t;
+}
+function addHex(a, b) {
+  let c = 0,
+    s = "";
+  for (let i = a.length - 1; i >= 0; i--) {
+    c += fromHexDigit(a[i]) + fromHexDigit(b[i]);
+    if (c >= 16) {
+      s = toHexDigit(c - 16) + s;
+      c = 1;
+    } else {
+      s = toHexDigit(c) + s;
+      c = 0;
+    }
+  }
+  return s;
+}
+function subHex(a, b) {
+  let c = 0,
+    s = "";
+  for (let i = a.length - 1; i >= 0; i--) {
+    c += fromHexDigit(a[i]) - fromHexDigit(b[i]);
+    if (c < 0) {
+      s = toHexDigit(c + 16) + s;
+      c = -1;
+    } else {
+      s = toHexDigit(c) + s;
+      c = 0;
+    }
+  }
+  return s;
+}
+function incHex(a) {
+  let c = 1,
+    s = "";
+  for (let i = a.length - 1; i >= 0; i--) {
+    c += fromHexDigit(a[i]);
+    if (c >= 16) {
+      s = toHexDigit(c - 16) + s;
+      c = 1;
+    } else {
+      s = toHexDigit(c) + s;
+      c = 0;
+    }
+  }
+  return s;
+}
+
+function compressCmaps(src, dest, verify) {
+  const files = fs.readdirSync(src).filter(function (fn) {
+    return !fn.includes("."); // skipping files with the extension
+  });
+  files.forEach(function (fn) {
+    const srcPath = path.join(src, fn);
+    const destPath = path.join(dest, fn + ".bcmap");
+    const stats = compressCmap(srcPath, destPath, verify);
+    console.log(
+      "Compressing " +
+        fn +
+        ": " +
+        stats.orig +
+        " vs " +
+        stats.packed +
+        " " +
+        ((stats.packed / stats.orig) * 100).toFixed(1) +
+        "%"
+    );
+  });
+}
+
+export { compressCmaps };
--- a/external/cmapscompress/optimize.mjs
+++ b/external/cmapscompress/optimize.mjs
@@ -0,0 +1,227 @@
+/* Copyright 2014 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+function optimizeCMap(data) {
+  let i = 1;
+  while (i < data.body.length) {
+    if (data.body[i - 1].type === data.body[i].type) {
+      data.body[i - 1].items = data.body[i - 1].items.concat(
+        data.body[i].items
+      );
+      data.body.splice(i, 1);
+    } else {
+      i++;
+    }
+  }
+  // split into groups with different lengths
+  i = 0;
+  while (i < data.body.length) {
+    const item = data.body[i];
+    const keys = Object.keys(item.items[0]).filter(function (val) {
+      return typeof item.items[0][val] === "string";
+    });
+    let j = 1;
+    while (j < item.items.length) {
+      let different = false;
+      for (let q = 0; q < keys.length && !different; q++) {
+        different =
+          item.items[j - 1][keys[q]].length !== item.items[j][keys[q]].length;
+      }
+      if (different) {
+        break;
+      }
+      j++;
+    }
+    if (j < item.items.length) {
+      data.body.splice(i + 1, 0, {
+        type: item.type,
+        items: item.items.splice(j, item.items.length - j),
+      });
+    }
+    i++;
+  }
+  // find sequences of single char ranges
+  i = 0;
+  while (i < data.body.length) {
+    const item = data.body[i];
+    if (item.type === 3 || item.type === 5) {
+      let j = 0;
+      while (j < item.items.length) {
+        const q = j;
+        while (
+          j < item.items.length &&
+          item.items[j].start === item.items[j].end
+        ) {
+          j++;
+        }
+        if (j - q >= 9) {
+          if (j < item.items.length) {
+            data.body.splice(i + 1, 0, {
+              type: item.type,
+              items: item.items.splice(j, item.items.length - j),
+            });
+          }
+          if (q > 0) {
+            data.body.splice(i + 1, 0, {
+              type: item.type - 1,
+              items: item.items.splice(q, j - q).map(function (val) {
+                return { char: val.start, code: val.code };
+              }),
+            });
+            i++;
+          } else {
+            item.type -= 1;
+            item.items = item.items.map(function (val) {
+              return { char: val.start, code: val.code };
+            });
+          }
+          continue;
+        }
+        j++;
+      }
+    }
+    i++;
+  }
+
+  // find sequences of increasing code/ranges order
+  i = 0;
+  while (i < data.body.length) {
+    const item = data.body[i];
+    if (item.type >= 2 && item.type <= 5) {
+      let j = 1;
+      const startProp = item.type === 2 || item.type === 4 ? "char" : "start";
+      const endProp = item.type === 2 || item.type === 4 ? "char" : "end";
+      while (j < item.items.length) {
+        const q = j - 1;
+        while (
+          j < item.items.length &&
+          incHex(item.items[j - 1][endProp]) === item.items[j][startProp]
+        ) {
+          j++;
+        }
+        if (j - q >= 9) {
+          if (j < item.items.length) {
+            data.body.splice(i + 1, 0, {
+              type: item.type,
+              items: item.items.splice(j, item.items.length - j),
+            });
+          }
+          if (q > 0) {
+            data.body.splice(i + 1, 0, {
+              type: item.type,
+              items: item.items.splice(q, j - q),
+              sequence: true,
+            });
+            i++;
+          } else {
+            item.sequence = true;
+          }
+          continue;
+        }
+        j++;
+      }
+    }
+    i++;
+  }
+
+  // split non-sequences two groups where codes are close
+  i = 0;
+  while (i < data.body.length) {
+    const item = data.body[i];
+    if (!item.sequence && (item.type === 2 || item.type === 3)) {
+      const subitems = item.items;
+      const codes = subitems.map(function (val) {
+        return val.code;
+      });
+      codes.sort(function (a, b) {
+        return a - b;
+      });
+      const maxDistance = 100,
+        minItems = 10,
+        itemsPerBucket = 50;
+      if (subitems.length > minItems && codes.at(-1) - codes[0] > maxDistance) {
+        const gapsCount = Math.max(2, (subitems.length / itemsPerBucket) | 0);
+        const gaps = [];
+        for (let q = 0; q < gapsCount; q++) {
+          gaps.push({ length: 0 });
+        }
+        for (let j = 1; j < codes.length; j++) {
+          const gapLength = codes[j] - codes[j - 1];
+          let q = 0;
+          while (q < gaps.length && gaps[q].length > gapLength) {
+            q++;
+          }
+          if (q >= gaps.length) {
+            continue;
+          }
+          let q0 = q;
+          while (q < gaps.length) {
+            if (gaps[q].length < gaps[q0].length) {
+              q0 = q;
+            }
+            q++;
+          }
+          gaps[q0] = { length: gapLength, boundary: codes[j] };
+        }
+        const groups = gaps
+          .filter(function (g) {
+            return g.length >= maxDistance;
+          })
+          .map(function (g) {
+            return g.boundary;
+          });
+        groups.sort(function (a, b) {
+          return a - b;
+        });
+        if (groups.length > 1) {
+          const buckets = [(item.items = [])];
+          for (let j = 0; j < groups.length; j++) {
+            const newItem = { type: item.type, items: [] };
+            buckets.push(newItem.items);
+            i++;
+            data.body.splice(i, 0, newItem);
+          }
+          for (const subitem of subitems) {
+            const { code } = subitem;
+            let q = 0;
+            while (q < groups.length && groups[q] <= code) {
+              q++;
+            }
+            buckets[q].push(subitem);
+          }
+        }
+      }
+    }
+    i++;
+  }
+}
+
+function incHex(a) {
+  let c = 1,
+    s = "";
+  for (let i = a.length - 1; i >= 0; i--) {
+    c += parseInt(a[i], 16);
+    if (c >= 16) {
+      s = "0" + s;
+      c = 1;
+    } else {
+      s = c.toString(16) + s;
+      c = 0;
+    }
+  }
+  return s;
+}
+
+export { optimizeCMap };
--- a/external/cmapscompress/parse.mjs
+++ b/external/cmapscompress/parse.mjs
@@ -0,0 +1,105 @@
+/* Copyright 2014 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+function parseAdobeCMap(content) {
+  let m = /(\bbegincmap\b[\s\S]*?)\bendcmap\b/.exec(content);
+  if (!m) {
+    throw new Error("cmap was not found");
+  }
+
+  const body = m[1].replaceAll(/\r\n?/g, "\n");
+  const result = {
+    type: 1,
+    wmode: 0,
+    comment:
+      "Copyright 1990-2009 Adobe Systems Incorporated.\nAll rights reserved.\nSee ./LICENSE",
+    usecmap: null,
+    body: [],
+  };
+  m = /\/CMapType\s+(\d+)\s+def\b/.exec(body);
+  result.type = +m[1];
+  m = /\/WMode\s+(\d+)\s+def\b/.exec(body);
+  result.wmode = +m[1];
+  m = /\/([\w-]+)\s+usecmap\b/.exec(body);
+  if (m) {
+    result.usecmap = m[1];
+  }
+  const re =
+    /(\d+)\s+(begincodespacerange|beginnotdefrange|begincidchar|begincidrange|beginbfchar|beginbfrange)\n([\s\S]*?)\n(endcodespacerange|endnotdefrange|endcidchar|endcidrange|endbfchar|endbfrange)/g;
+  while ((m = re.exec(body))) {
+    const lines = m[3].toLowerCase().split("\n");
+
+    switch (m[2]) {
+      case "begincodespacerange":
+        result.body.push({
+          type: 0,
+          items: lines.map(function (line) {
+            const m2 = /<(\w+)>\s+<(\w+)>/.exec(line);
+            return { start: m2[1], end: m2[2] };
+          }),
+        });
+        break;
+      case "beginnotdefrange":
+        result.body.push({
+          type: 1,
+          items: lines.map(function (line) {
+            const m2 = /<(\w+)>\s+<(\w+)>\s+(\d+)/.exec(line);
+            return { start: m2[1], end: m2[2], code: +m2[3] };
+          }),
+        });
+        break;
+      case "begincidchar":
+        result.body.push({
+          type: 2,
+          items: lines.map(function (line) {
+            const m2 = /<(\w+)>\s+(\d+)/.exec(line);
+            return { char: m2[1], code: +m2[2] };
+          }),
+        });
+        break;
+      case "begincidrange":
+        result.body.push({
+          type: 3,
+          items: lines.map(function (line) {
+            const m2 = /<(\w+)>\s+<(\w+)>\s+(\d+)/.exec(line);
+            return { start: m2[1], end: m2[2], code: +m2[3] };
+          }),
+        });
+        break;
+      case "beginbfchar":
+        result.body.push({
+          type: 4,
+          items: lines.map(function (line) {
+            const m2 = /<(\w+)>\s+<(\w+)>/.exec(line);
+            return { char: m2[1], code: m2[2] };
+          }),
+        });
+        break;
+      case "beginbfrange":
+        result.body.push({
+          type: 5,
+          items: lines.map(function (line) {
+            const m2 = /<(\w+)>\s+<(\w+)>\s+<(\w+)>/.exec(line);
+            return { start: m2[1], end: m2[2], code: m2[3] };
+          }),
+        });
+        break;
+    }
+  }
+
+  return result;
+}
+
+export { parseAdobeCMap };