Some checks failed
		
		
	
	Types tests / Test (lts/*) (push) Has been cancelled
				
			Lint / Lint (lts/*) (push) Has been cancelled
				
			CodeQL / Analyze (javascript) (push) Has been cancelled
				
			CI / Test (20) (push) Has been cancelled
				
			CI / Test (22) (push) Has been cancelled
				
			CI / Test (24) (push) Has been cancelled
				
			
		
			
				
	
	
		
			158 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
			
		
		
	
	
			158 lines
		
	
	
		
			5.1 KiB
		
	
	
	
		
			JavaScript
		
	
	
	
	
	
/* Copyright 2018 Mozilla Foundation
 | 
						||
 *
 | 
						||
 * Licensed under the Apache License, Version 2.0 (the "License");
 | 
						||
 * you may not use this file except in compliance with the License.
 | 
						||
 * You may obtain a copy of the License at
 | 
						||
 *
 | 
						||
 *     http://www.apache.org/licenses/LICENSE-2.0
 | 
						||
 *
 | 
						||
 * Unless required by applicable law or agreed to in writing, software
 | 
						||
 * distributed under the License is distributed on an "AS IS" BASIS,
 | 
						||
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						||
 * See the License for the specific language governing permissions and
 | 
						||
 * limitations under the License.
 | 
						||
 */
 | 
						||
 | 
						||
const CharacterType = {
 | 
						||
  SPACE: 0,
 | 
						||
  ALPHA_LETTER: 1,
 | 
						||
  PUNCT: 2,
 | 
						||
  HAN_LETTER: 3,
 | 
						||
  KATAKANA_LETTER: 4,
 | 
						||
  HIRAGANA_LETTER: 5,
 | 
						||
  HALFWIDTH_KATAKANA_LETTER: 6,
 | 
						||
  THAI_LETTER: 7,
 | 
						||
};
 | 
						||
 | 
						||
function isAlphabeticalScript(charCode) {
 | 
						||
  return charCode < 0x2e80;
 | 
						||
}
 | 
						||
 | 
						||
function isAscii(charCode) {
 | 
						||
  return (charCode & 0xff80) === 0;
 | 
						||
}
 | 
						||
 | 
						||
function isAsciiAlpha(charCode) {
 | 
						||
  return (
 | 
						||
    (charCode >= /* a = */ 0x61 && charCode <= /* z = */ 0x7a) ||
 | 
						||
    (charCode >= /* A = */ 0x41 && charCode <= /* Z = */ 0x5a)
 | 
						||
  );
 | 
						||
}
 | 
						||
 | 
						||
function isAsciiDigit(charCode) {
 | 
						||
  return charCode >= /* 0 = */ 0x30 && charCode <= /* 9 = */ 0x39;
 | 
						||
}
 | 
						||
 | 
						||
function isAsciiSpace(charCode) {
 | 
						||
  return (
 | 
						||
    charCode === /* SPACE = */ 0x20 ||
 | 
						||
    charCode === /* TAB = */ 0x09 ||
 | 
						||
    charCode === /* CR = */ 0x0d ||
 | 
						||
    charCode === /* LF = */ 0x0a
 | 
						||
  );
 | 
						||
}
 | 
						||
 | 
						||
function isHan(charCode) {
 | 
						||
  return (
 | 
						||
    (charCode >= 0x3400 && charCode <= 0x9fff) ||
 | 
						||
    (charCode >= 0xf900 && charCode <= 0xfaff)
 | 
						||
  );
 | 
						||
}
 | 
						||
 | 
						||
function isKatakana(charCode) {
 | 
						||
  return charCode >= 0x30a0 && charCode <= 0x30ff;
 | 
						||
}
 | 
						||
 | 
						||
function isHiragana(charCode) {
 | 
						||
  return charCode >= 0x3040 && charCode <= 0x309f;
 | 
						||
}
 | 
						||
 | 
						||
function isHalfwidthKatakana(charCode) {
 | 
						||
  return charCode >= 0xff60 && charCode <= 0xff9f;
 | 
						||
}
 | 
						||
 | 
						||
function isThai(charCode) {
 | 
						||
  return (charCode & 0xff80) === 0x0e00;
 | 
						||
}
 | 
						||
 | 
						||
/**
 | 
						||
 * This function is based on the word-break detection implemented in:
 | 
						||
 * https://hg.mozilla.org/mozilla-central/file/tip/intl/lwbrk/WordBreaker.cpp
 | 
						||
 */
 | 
						||
function getCharacterType(charCode) {
 | 
						||
  if (isAlphabeticalScript(charCode)) {
 | 
						||
    if (isAscii(charCode)) {
 | 
						||
      if (isAsciiSpace(charCode)) {
 | 
						||
        return CharacterType.SPACE;
 | 
						||
      } else if (
 | 
						||
        isAsciiAlpha(charCode) ||
 | 
						||
        isAsciiDigit(charCode) ||
 | 
						||
        charCode === /* UNDERSCORE = */ 0x5f
 | 
						||
      ) {
 | 
						||
        return CharacterType.ALPHA_LETTER;
 | 
						||
      }
 | 
						||
      return CharacterType.PUNCT;
 | 
						||
    } else if (isThai(charCode)) {
 | 
						||
      return CharacterType.THAI_LETTER;
 | 
						||
    } else if (charCode === /* NBSP = */ 0xa0) {
 | 
						||
      return CharacterType.SPACE;
 | 
						||
    }
 | 
						||
    return CharacterType.ALPHA_LETTER;
 | 
						||
  }
 | 
						||
 | 
						||
  if (isHan(charCode)) {
 | 
						||
    return CharacterType.HAN_LETTER;
 | 
						||
  } else if (isKatakana(charCode)) {
 | 
						||
    return CharacterType.KATAKANA_LETTER;
 | 
						||
  } else if (isHiragana(charCode)) {
 | 
						||
    return CharacterType.HIRAGANA_LETTER;
 | 
						||
  } else if (isHalfwidthKatakana(charCode)) {
 | 
						||
    return CharacterType.HALFWIDTH_KATAKANA_LETTER;
 | 
						||
  }
 | 
						||
  return CharacterType.ALPHA_LETTER;
 | 
						||
}
 | 
						||
 | 
						||
let NormalizeWithNFKC;
 | 
						||
function getNormalizeWithNFKC() {
 | 
						||
  /* eslint-disable no-irregular-whitespace */
 | 
						||
  NormalizeWithNFKC ||= ` ¨ª¯²-µ¸-º¼-¾IJ-ijĿ-ŀʼnſDŽ-njDZ-dzʰ-ʸ˘-˝ˠ-ˤʹͺ;΄-΅·ϐ-ϖϰ-ϲϴ-ϵϹևٵ-ٸक़-य़ড়-ঢ়য়ਲ਼ਸ਼ਖ਼-ਜ਼ਫ਼ଡ଼-ଢ଼ำຳໜ-ໝ༌གྷཌྷདྷབྷཛྷཀྵჼᴬ-ᴮᴰ-ᴺᴼ-ᵍᵏ-ᵪᵸᶛ-ᶿẚ-ẛάέήίόύώΆ᾽-῁ΈΉ῍-῏ΐΊ῝-῟ΰΎ῭-`ΌΏ´-῾ - ‑‗․-… ″-‴‶-‷‼‾⁇-⁉⁗ ⁰-ⁱ⁴-₎ₐ-ₜ₨℀-℃℅-ℇ℉-ℓℕ-№ℙ-ℝ℠-™ℤΩℨK-ℭℯ-ℱℳ-ℹ℻-⅀ⅅ-ⅉ⅐-ⅿ↉∬-∭∯-∰〈-〉①-⓪⨌⩴-⩶⫝̸ⱼ-ⱽⵯ⺟⻳⼀-⿕ 〶〸-〺゛-゜ゟヿㄱ-ㆎ㆒-㆟㈀-㈞㈠-㉇㉐-㉾㊀-㏿ꚜ-ꚝꝰꟲ-ꟴꟸ-ꟹꭜ-ꭟꭩ豈-嗀塚晴凞-羽蘒諸逸-都飯-舘並-龎ff-stﬓ-ﬗיִײַ-זּטּ-לּמּנּ-סּףּ-פּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-﷼︐-︙︰-﹄﹇-﹒﹔-﹦﹨-﹫ﹰ-ﹲﹴﹶ-ﻼ!-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ¢-₩`;
 | 
						||
 | 
						||
  if (typeof PDFJSDev === "undefined" || PDFJSDev.test("TESTING")) {
 | 
						||
    const ranges = [];
 | 
						||
    const range = [];
 | 
						||
    const diacriticsRegex = /^\p{M}$/u;
 | 
						||
    // Some chars must be replaced by their NFKC counterpart during a search.
 | 
						||
    for (let i = 0; i < 65536; i++) {
 | 
						||
      const c = String.fromCharCode(i);
 | 
						||
      if (c.normalize("NFKC") !== c && !diacriticsRegex.test(c)) {
 | 
						||
        if (range.length !== 2) {
 | 
						||
          range[0] = range[1] = i;
 | 
						||
          continue;
 | 
						||
        }
 | 
						||
        if (range[1] + 1 !== i) {
 | 
						||
          if (range[0] === range[1]) {
 | 
						||
            ranges.push(String.fromCharCode(range[0]));
 | 
						||
          } else {
 | 
						||
            ranges.push(
 | 
						||
              `${String.fromCharCode(range[0])}-${String.fromCharCode(
 | 
						||
                range[1]
 | 
						||
              )}`
 | 
						||
            );
 | 
						||
          }
 | 
						||
          range[0] = range[1] = i;
 | 
						||
        } else {
 | 
						||
          range[1] = i;
 | 
						||
        }
 | 
						||
      }
 | 
						||
    }
 | 
						||
    if (ranges.join("") !== NormalizeWithNFKC) {
 | 
						||
      throw new Error(
 | 
						||
        "getNormalizeWithNFKC - update the `NormalizeWithNFKC` string."
 | 
						||
      );
 | 
						||
    }
 | 
						||
  }
 | 
						||
  return NormalizeWithNFKC;
 | 
						||
}
 | 
						||
 | 
						||
export { CharacterType, getCharacterType, getNormalizeWithNFKC };
 |