first commit

2025-10-03 22:20:19 +08:00
commit 44db9807a1
2172 changed files with 526822 additions and 0 deletions
--- a/web/autolinker.js
+++ b/web/autolinker.js
@@ -0,0 +1,180 @@
+/* Copyright 2025 Mozilla Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import { AnnotationType, createValidAbsoluteUrl, Util } from "pdfjs-lib";
+import { getOriginalIndex, normalize } from "./pdf_find_controller.js";
+
+function DOMRectToPDF({ width, height, left, top }, pdfPageView) {
+  if (width === 0 || height === 0) {
+    return null;
+  }
+
+  const pageBox = pdfPageView.textLayer.div.getBoundingClientRect();
+  const bottomLeft = pdfPageView.getPagePoint(
+    left - pageBox.left,
+    top - pageBox.top
+  );
+  const topRight = pdfPageView.getPagePoint(
+    left - pageBox.left + width,
+    top - pageBox.top + height
+  );
+
+  return Util.normalizeRect([
+    bottomLeft[0],
+    bottomLeft[1],
+    topRight[0],
+    topRight[1],
+  ]);
+}
+
+function calculateLinkPosition(range, pdfPageView) {
+  const rangeRects = range.getClientRects();
+  if (rangeRects.length === 1) {
+    return { rect: DOMRectToPDF(rangeRects[0], pdfPageView) };
+  }
+
+  const rect = [Infinity, Infinity, -Infinity, -Infinity];
+  const quadPoints = [];
+  let i = 0;
+  for (const domRect of rangeRects) {
+    const normalized = DOMRectToPDF(domRect, pdfPageView);
+    if (normalized === null) {
+      continue;
+    }
+
+    quadPoints[i] = quadPoints[i + 4] = normalized[0];
+    quadPoints[i + 1] = quadPoints[i + 3] = normalized[3];
+    quadPoints[i + 2] = quadPoints[i + 6] = normalized[2];
+    quadPoints[i + 5] = quadPoints[i + 7] = normalized[1];
+
+    Util.rectBoundingBox(...normalized, rect);
+    i += 8;
+  }
+  return { quadPoints, rect };
+}
+
+/**
+ * Given a DOM node `container` and an index into its text contents `offset`,
+ * returns a pair consisting of text node that the `offset` actually points
+ * to, together with the offset relative to that text node.
+ * When the offset points at the boundary between two node, the result will
+ * point to the first text node in depth-first traversal order.
+ *
+ * For example, given this DOM:
+ * <p>abc<span>def</span>ghi</p>
+ *
+ * textPosition(p, 0) -> [#text "abc", 0] (before `a`)
+ * textPosition(p, 2) -> [#text "abc", 2] (between `b` and `c`)
+ * textPosition(p, 3) -> [#text "abc", 3] (after `c`)
+ * textPosition(p, 5) -> [#text "def", 2] (between `e` and `f`)
+ * textPosition(p, 6) -> [#text "def", 3] (after `f`)
+ */
+function textPosition(container, offset) {
+  let currentContainer = container;
+  do {
+    if (currentContainer.nodeType === Node.TEXT_NODE) {
+      const currentLength = currentContainer.textContent.length;
+      if (offset <= currentLength) {
+        return [currentContainer, offset];
+      }
+      offset -= currentLength;
+    } else if (currentContainer.firstChild) {
+      currentContainer = currentContainer.firstChild;
+      continue;
+    }
+
+    while (!currentContainer.nextSibling && currentContainer !== container) {
+      currentContainer = currentContainer.parentNode;
+    }
+    if (currentContainer !== container) {
+      currentContainer = currentContainer.nextSibling;
+    }
+  } while (currentContainer !== container);
+  throw new Error("Offset is bigger than container's contents length.");
+}
+
+function createLinkAnnotation({ url, index, length }, pdfPageView, id) {
+  const highlighter = pdfPageView._textHighlighter;
+  const [{ begin, end }] = highlighter._convertMatches([index], [length]);
+
+  const range = new Range();
+  range.setStart(
+    ...textPosition(highlighter.textDivs[begin.divIdx], begin.offset)
+  );
+  range.setEnd(...textPosition(highlighter.textDivs[end.divIdx], end.offset));
+
+  return {
+    id: `inferred_link_${id}`,
+    unsafeUrl: url,
+    url,
+    annotationType: AnnotationType.LINK,
+    rotation: 0,
+    ...calculateLinkPosition(range, pdfPageView),
+    // Populated in the annotationLayer to avoid unnecessary object creation,
+    // since most inferred links overlap existing LinkAnnotations:
+    borderStyle: null,
+  };
+}
+
+class Autolinker {
+  static #index = 0;
+
+  static #regex;
+
+  static findLinks(text) {
+    // Regex can be tested and verified at https://regex101.com/r/rXoLiT/2.
+    this.#regex ??=
+      /\b(?:https?:\/\/|mailto:|www\.)(?:[\S--[\p{P}<>]]|\/|[\S--[\[\]]]+[\S--[\p{P}<>]])+|\b[\S--[@\p{Ps}\p{Pe}<>]]+@([\S--[\p{P}<>]]+(?:\.[\S--[\p{P}<>]]+)+)/gmv;
+
+    const [normalizedText, diffs] = normalize(text, { ignoreDashEOL: true });
+    const matches = normalizedText.matchAll(this.#regex);
+    const links = [];
+    for (const match of matches) {
+      const [url, emailDomain] = match;
+      let raw;
+      if (
+        url.startsWith("www.") ||
+        url.startsWith("http://") ||
+        url.startsWith("https://")
+      ) {
+        raw = url;
+      } else if (URL.canParse(`http://${emailDomain}`)) {
+        raw = url.startsWith("mailto:") ? url : `mailto:${url}`;
+      } else {
+        continue;
+      }
+      const absoluteURL = createValidAbsoluteUrl(raw, null, {
+        addDefaultProtocol: true,
+      });
+      if (absoluteURL) {
+        const [index, length] = getOriginalIndex(
+          diffs,
+          match.index,
+          url.length
+        );
+        links.push({ url: absoluteURL.href, index, length });
+      }
+    }
+    return links;
+  }
+
+  static processLinks(pdfPageView) {
+    return this.findLinks(
+      pdfPageView._textHighlighter.textContentItemsStr.join("\n")
+    ).map(link => createLinkAnnotation(link, pdfPageView, this.#index++));
+  }
+}
+
+export { Autolinker };