first commit
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
dist-newstyle/
|
||||||
|
.ds_store
|
||||||
7
.vscode/settings.json
vendored
Normal file
7
.vscode/settings.json
vendored
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
{
|
||||||
|
"haskell.toolchain": {
|
||||||
|
"hls": "recommended",
|
||||||
|
"cabal": "recommended",
|
||||||
|
"stack": null
|
||||||
|
}
|
||||||
|
}
|
||||||
5
CHANGELOG.md
Normal file
5
CHANGELOG.md
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
# Revision history for panguFilter
|
||||||
|
|
||||||
|
## 0.1.0.0 -- YYYY-mm-dd
|
||||||
|
|
||||||
|
* First version. Released on an unsuspecting world.
|
||||||
29
LICENSE
Normal file
29
LICENSE
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
Copyright (c) 2026, Yu Cong
|
||||||
|
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
* Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following
|
||||||
|
disclaimer in the documentation and/or other materials provided
|
||||||
|
with the distribution.
|
||||||
|
|
||||||
|
* Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
191
pangu.py
Normal file
191
pangu.py
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
#!/usr/bin/env python
|
||||||
|
# coding: utf-8
|
||||||
|
"""
|
||||||
|
Paranoid text spacing for good readability, to automatically insert whitespace between CJK (Chinese, Japanese, Korean) and half-width characters (alphabetical letters, numerical digits and symbols).
|
||||||
|
|
||||||
|
>>> import pangu
|
||||||
|
>>> nwe_text = pangu.spacing_text('當你凝視著bug,bug也凝視著你')
|
||||||
|
>>> print(nwe_text)
|
||||||
|
'當你凝視著 bug,bug 也凝視著你'
|
||||||
|
>>> nwe_content = pangu.spacing_file('path/to/file.txt')
|
||||||
|
>>> print(nwe_content)
|
||||||
|
'與 PM 戰鬥的人,應當小心自己不要成為 PM'
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
__version__ = '4.0.6.1'
|
||||||
|
__all__ = ['spacing_text', 'spacing_file', 'spacing', 'cli']
|
||||||
|
|
||||||
|
CJK = r'\u2e80-\u2eff\u2f00-\u2fdf\u3040-\u309f\u30a0-\u30fa\u30fc-\u30ff\u3100-\u312f\u3200-\u32ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff'
|
||||||
|
|
||||||
|
ANY_CJK = re.compile(r'[{CJK}]'.format(CJK=CJK))
|
||||||
|
|
||||||
|
CONVERT_TO_FULLWIDTH_CJK_SYMBOLS_CJK = re.compile('([{CJK}])([ ]*(?:[\\:]+|\\.)[ ]*)([{CJK}])'.format(CJK=CJK)) # there is an extra non-capturing group compared to JavaScript version
|
||||||
|
CONVERT_TO_FULLWIDTH_CJK_SYMBOLS = re.compile('([{CJK}])[ ]*([~\\!;,\\?]+)[ ]*'.format(CJK=CJK))
|
||||||
|
DOTS_CJK = re.compile('([\\.]{{2,}}|\u2026)([{CJK}])'.format(CJK=CJK)) # need to escape { }
|
||||||
|
FIX_CJK_COLON_ANS = re.compile('([{CJK}])\\:([A-Z0-9\\(\\)])'.format(CJK=CJK))
|
||||||
|
|
||||||
|
CJK_QUOTE = re.compile('([{CJK}])([`"\u05f4])'.format(CJK=CJK)) # no need to escape `
|
||||||
|
QUOTE_CJK = re.compile('([`"\u05f4])([{CJK}])'.format(CJK=CJK)) # no need to escape `
|
||||||
|
FIX_QUOTE_ANY_QUOTE = re.compile(r'([`"\u05f4]+)(\s*)(.+?)(\s*)([`"\u05f4]+)')
|
||||||
|
|
||||||
|
CJK_SINGLE_QUOTE_BUT_POSSESSIVE = re.compile("([{CJK}])('[^s])".format(CJK=CJK))
|
||||||
|
SINGLE_QUOTE_CJK = re.compile("(')([{CJK}])".format(CJK=CJK))
|
||||||
|
FIX_POSSESSIVE_SINGLE_QUOTE = re.compile("([{CJK}A-Za-z0-9])( )('s)".format(CJK=CJK))
|
||||||
|
|
||||||
|
HASH_ANS_CJK_HASH = re.compile('([{CJK}])(#)([{CJK}]+)(#)([{CJK}])'.format(CJK=CJK))
|
||||||
|
CJK_HASH = re.compile('([{CJK}])(#([^ ]))'.format(CJK=CJK))
|
||||||
|
HASH_CJK = re.compile('(([^ ])#)([{CJK}])'.format(CJK=CJK))
|
||||||
|
|
||||||
|
CJK_OPERATOR_ANS = re.compile('([{CJK}])([\\+\\-\\*\\/=&\\|<>])([A-Za-z0-9])'.format(CJK=CJK))
|
||||||
|
ANS_OPERATOR_CJK = re.compile('([A-Za-z0-9])([\\+\\-\\*\\/=&\\|<>])([{CJK}])'.format(CJK=CJK))
|
||||||
|
|
||||||
|
FIX_SLASH_AS = re.compile(r'([/]) ([a-z\-_\./]+)')
|
||||||
|
FIX_SLASH_AS_SLASH = re.compile(r'([/\.])([A-Za-z\-_\./]+) ([/])')
|
||||||
|
|
||||||
|
CJK_LEFT_BRACKET = re.compile('([{CJK}])([\\(\\[\\{{<>\u201c])'.format(CJK=CJK)) # need to escape {
|
||||||
|
RIGHT_BRACKET_CJK = re.compile('([\\)\\]\\}}<>\u201d])([{CJK}])'.format(CJK=CJK)) # need to escape }
|
||||||
|
FIX_LEFT_BRACKET_ANY_RIGHT_BRACKET = re.compile(r'([\(\[\{<\u201c]+)(\s*)(.+?)(\s*)([\)\]\}>\u201d]+)') # need to escape { }
|
||||||
|
ANS_CJK_LEFT_BRACKET_ANY_RIGHT_BRACKET = re.compile('([A-Za-z0-9{CJK}])[ ]*([\u201c])([A-Za-z0-9{CJK}\\-_ ]+)([\u201d])'.format(CJK=CJK))
|
||||||
|
LEFT_BRACKET_ANY_RIGHT_BRACKET_ANS_CJK = re.compile('([\u201c])([A-Za-z0-9{CJK}\\-_ ]+)([\u201d])[ ]*([A-Za-z0-9{CJK}])'.format(CJK=CJK))
|
||||||
|
|
||||||
|
AN_LEFT_BRACKET = re.compile(r'([A-Za-z0-9])([\(\[\{])')
|
||||||
|
RIGHT_BRACKET_AN = re.compile(r'([\)\]\}])([A-Za-z0-9])')
|
||||||
|
|
||||||
|
CJK_ANS = re.compile('([{CJK}])([A-Za-z\u0370-\u03ff0-9@\\$%\\^&\\*\\-\\+\\\\=\\|/\u00a1-\u00ff\u2150-\u218f\u2700—\u27bf])'.format(CJK=CJK))
|
||||||
|
ANS_CJK = re.compile('([A-Za-z\u0370-\u03ff0-9~\\!\\$%\\^&\\*\\-\\+\\\\=\\|;:,\\./\\?\u00a1-\u00ff\u2150-\u218f\u2700—\u27bf])([{CJK}])'.format(CJK=CJK))
|
||||||
|
|
||||||
|
S_A = re.compile(r'(%)([A-Za-z])')
|
||||||
|
|
||||||
|
MIDDLE_DOT = re.compile(r'([ ]*)([\u00b7\u2022\u2027])([ ]*)')
|
||||||
|
|
||||||
|
# Python version only
|
||||||
|
TILDES = re.compile(r'~+')
|
||||||
|
EXCLAMATION_MARKS = re.compile(r'!+')
|
||||||
|
SEMICOLONS = re.compile(r';+')
|
||||||
|
COLONS = re.compile(r':+')
|
||||||
|
COMMAS = re.compile(r',+')
|
||||||
|
PERIODS = re.compile(r'\.+')
|
||||||
|
QUESTION_MARKS = re.compile(r'\?+')
|
||||||
|
|
||||||
|
|
||||||
|
def convert_to_fullwidth(symbols):
|
||||||
|
symbols = TILDES.sub('~', symbols)
|
||||||
|
symbols = EXCLAMATION_MARKS.sub('!', symbols)
|
||||||
|
symbols = SEMICOLONS.sub(';', symbols)
|
||||||
|
symbols = COLONS.sub(':', symbols)
|
||||||
|
symbols = COMMAS.sub(',', symbols)
|
||||||
|
symbols = PERIODS.sub('。', symbols)
|
||||||
|
symbols = QUESTION_MARKS.sub('?', symbols)
|
||||||
|
return symbols.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def spacing(text):
|
||||||
|
"""
|
||||||
|
Perform paranoid text spacing on text.
|
||||||
|
"""
|
||||||
|
if len(text) <= 1 or not ANY_CJK.search(text):
|
||||||
|
return text
|
||||||
|
|
||||||
|
new_text = text
|
||||||
|
|
||||||
|
# TODO: refactoring
|
||||||
|
matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS_CJK.search(new_text)
|
||||||
|
while matched:
|
||||||
|
start, end = matched.span()
|
||||||
|
new_text = ''.join((new_text[:start + 1], convert_to_fullwidth(new_text[start + 1:end - 1]), new_text[end - 1:]))
|
||||||
|
matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS_CJK.search(new_text)
|
||||||
|
|
||||||
|
matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS.search(new_text)
|
||||||
|
while matched:
|
||||||
|
start, end = matched.span()
|
||||||
|
new_text = ''.join((new_text[:start + 1].strip(), convert_to_fullwidth(new_text[start + 1:end]), new_text[end:].strip()))
|
||||||
|
matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS.search(new_text)
|
||||||
|
|
||||||
|
new_text = DOTS_CJK.sub(r'\1 \2', new_text)
|
||||||
|
new_text = FIX_CJK_COLON_ANS.sub(r'\1:\2', new_text)
|
||||||
|
|
||||||
|
new_text = CJK_QUOTE.sub(r'\1 \2', new_text)
|
||||||
|
new_text = QUOTE_CJK.sub(r'\1 \2', new_text)
|
||||||
|
new_text = FIX_QUOTE_ANY_QUOTE.sub(r'\1\3\5', new_text)
|
||||||
|
|
||||||
|
new_text = CJK_SINGLE_QUOTE_BUT_POSSESSIVE.sub(r'\1 \2', new_text)
|
||||||
|
new_text = SINGLE_QUOTE_CJK.sub(r'\1 \2', new_text)
|
||||||
|
new_text = FIX_POSSESSIVE_SINGLE_QUOTE.sub(r"\1's", new_text)
|
||||||
|
|
||||||
|
new_text = HASH_ANS_CJK_HASH.sub(r'\1 \2\3\4 \5', new_text)
|
||||||
|
new_text = CJK_HASH.sub(r'\1 \2', new_text)
|
||||||
|
new_text = HASH_CJK.sub(r'\1 \3', new_text)
|
||||||
|
|
||||||
|
new_text = CJK_OPERATOR_ANS.sub(r'\1 \2 \3', new_text)
|
||||||
|
new_text = ANS_OPERATOR_CJK.sub(r'\1 \2 \3', new_text)
|
||||||
|
|
||||||
|
new_text = FIX_SLASH_AS.sub(r'\1\2', new_text)
|
||||||
|
new_text = FIX_SLASH_AS_SLASH.sub(r'\1\2\3', new_text)
|
||||||
|
|
||||||
|
new_text = CJK_LEFT_BRACKET.sub(r'\1 \2', new_text)
|
||||||
|
new_text = RIGHT_BRACKET_CJK.sub(r'\1 \2', new_text)
|
||||||
|
new_text = FIX_LEFT_BRACKET_ANY_RIGHT_BRACKET.sub(r'\1\3\5', new_text)
|
||||||
|
new_text = ANS_CJK_LEFT_BRACKET_ANY_RIGHT_BRACKET.sub(r'\1 \2\3\4', new_text)
|
||||||
|
new_text = LEFT_BRACKET_ANY_RIGHT_BRACKET_ANS_CJK.sub(r'\1\2\3 \4', new_text)
|
||||||
|
|
||||||
|
new_text = AN_LEFT_BRACKET.sub(r'\1 \2', new_text)
|
||||||
|
new_text = RIGHT_BRACKET_AN.sub(r'\1 \2', new_text)
|
||||||
|
|
||||||
|
new_text = CJK_ANS.sub(r'\1 \2', new_text)
|
||||||
|
new_text = ANS_CJK.sub(r'\1 \2', new_text)
|
||||||
|
|
||||||
|
new_text = S_A.sub(r'\1 \2', new_text)
|
||||||
|
|
||||||
|
new_text = MIDDLE_DOT.sub('・', new_text)
|
||||||
|
|
||||||
|
return new_text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def spacing_text(text):
|
||||||
|
"""
|
||||||
|
Perform paranoid text spacing on text. An alias of `spacing()`.
|
||||||
|
"""
|
||||||
|
return spacing(text)
|
||||||
|
|
||||||
|
|
||||||
|
def spacing_file(path):
|
||||||
|
"""
|
||||||
|
Perform paranoid text spacing from file.
|
||||||
|
"""
|
||||||
|
# TODO: read line by line
|
||||||
|
with open(os.path.abspath(path)) as f:
|
||||||
|
return spacing_text(f.read())
|
||||||
|
|
||||||
|
|
||||||
|
def cli(args=None):
|
||||||
|
if not args:
|
||||||
|
args = sys.argv[1:]
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog='pangu',
|
||||||
|
description='pangu.py -- Paranoid text spacing for good readability, to automatically insert whitespace between CJK and half-width characters (alphabetical letters, numerical digits and symbols).',
|
||||||
|
)
|
||||||
|
parser.add_argument('-v', '--version', action='version', version=__version__)
|
||||||
|
parser.add_argument('-t', '--text', action='store_true', dest='is_text', required=False, help='specify the input value is a text')
|
||||||
|
parser.add_argument('-f', '--file', action='store_true', dest='is_file', required=False, help='specify the input value is a file path')
|
||||||
|
parser.add_argument('text_or_path', action='store', type=str, help='the text or file path to apply spacing')
|
||||||
|
|
||||||
|
if not sys.stdin.isatty():
|
||||||
|
print(spacing_text(sys.stdin.read())) # noqa: T003
|
||||||
|
else:
|
||||||
|
args = parser.parse_args(args)
|
||||||
|
if args.is_text:
|
||||||
|
print(spacing_text(args.text_or_path)) # noqa: T003
|
||||||
|
elif args.is_file:
|
||||||
|
print(spacing_file(args.text_or_path)) # noqa: T003
|
||||||
|
else:
|
||||||
|
print(spacing_text(args.text_or_path)) # noqa: T003
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
cli()
|
||||||
689
pangu.simple.js
Normal file
689
pangu.simple.js
Normal file
@@ -0,0 +1,689 @@
|
|||||||
|
/*!
|
||||||
|
* pangu.simple.js
|
||||||
|
* --------
|
||||||
|
* @version: 1.0.5
|
||||||
|
* @homepage: https://github.com/backrunner/pangu.simple.js
|
||||||
|
* @license: MIT
|
||||||
|
* @author: BackRunner
|
||||||
|
*/
|
||||||
|
(function webpackUniversalModuleDefinition(root, factory) {
|
||||||
|
if(typeof exports === 'object' && typeof module === 'object')
|
||||||
|
module.exports = factory();
|
||||||
|
else if(typeof define === 'function' && define.amd)
|
||||||
|
define("pangu", [], factory);
|
||||||
|
else if(typeof exports === 'object')
|
||||||
|
exports["pangu"] = factory();
|
||||||
|
else
|
||||||
|
root["pangu"] = factory();
|
||||||
|
})(window, function() {
|
||||||
|
return
|
||||||
|
/******/ (function(modules) { // webpackBootstrap
|
||||||
|
/******/ // The module cache
|
||||||
|
/******/ var installedModules = {};
|
||||||
|
/******/
|
||||||
|
/******/ // The require function
|
||||||
|
/******/ function __webpack_require__(moduleId) {
|
||||||
|
/******/
|
||||||
|
/******/ // Check if module is in cache
|
||||||
|
/******/ if(installedModules[moduleId]) {
|
||||||
|
/******/ return installedModules[moduleId].exports;
|
||||||
|
/******/ }
|
||||||
|
/******/ // Create a new module (and put it into the cache)
|
||||||
|
/******/ var module = installedModules[moduleId] = {
|
||||||
|
/******/ i: moduleId,
|
||||||
|
/******/ l: false,
|
||||||
|
/******/ exports: {}
|
||||||
|
/******/ };
|
||||||
|
/******/
|
||||||
|
/******/ // Execute the module function
|
||||||
|
/******/ modules[moduleId].call(module.exports, module, module.exports, __webpack_require__);
|
||||||
|
/******/
|
||||||
|
/******/ // Flag the module as loaded
|
||||||
|
/******/ module.l = true;
|
||||||
|
/******/
|
||||||
|
/******/ // Return the exports of the module
|
||||||
|
/******/ return module.exports;
|
||||||
|
/******/ }
|
||||||
|
/******/
|
||||||
|
/******/
|
||||||
|
/******/ // expose the modules object (__webpack_modules__)
|
||||||
|
/******/ __webpack_require__.m = modules;
|
||||||
|
/******/
|
||||||
|
/******/ // expose the module cache
|
||||||
|
/******/ __webpack_require__.c = installedModules;
|
||||||
|
/******/
|
||||||
|
/******/ // define getter function for harmony exports
|
||||||
|
/******/ __webpack_require__.d = function(exports, name, getter) {
|
||||||
|
/******/ if(!__webpack_require__.o(exports, name)) {
|
||||||
|
/******/ Object.defineProperty(exports, name, { enumerable: true, get: getter });
|
||||||
|
/******/ }
|
||||||
|
/******/ };
|
||||||
|
/******/
|
||||||
|
/******/ // define __esModule on exports
|
||||||
|
/******/ __webpack_require__.r = function(exports) {
|
||||||
|
/******/ if(typeof Symbol !== 'undefined' && Symbol.toStringTag) {
|
||||||
|
/******/ Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
|
||||||
|
/******/ }
|
||||||
|
/******/ Object.defineProperty(exports, '__esModule', { value: true });
|
||||||
|
/******/ };
|
||||||
|
/******/
|
||||||
|
/******/ // create a fake namespace object
|
||||||
|
/******/ // mode & 1: value is a module id, require it
|
||||||
|
/******/ // mode & 2: merge all properties of value into the ns
|
||||||
|
/******/ // mode & 4: return value when already ns object
|
||||||
|
/******/ // mode & 8|1: behave like require
|
||||||
|
/******/ __webpack_require__.t = function(value, mode) {
|
||||||
|
/******/ if(mode & 1) value = __webpack_require__(value);
|
||||||
|
/******/ if(mode & 8) return value;
|
||||||
|
/******/ if((mode & 4) && typeof value === 'object' && value && value.__esModule) return value;
|
||||||
|
/******/ var ns = Object.create(null);
|
||||||
|
/******/ __webpack_require__.r(ns);
|
||||||
|
/******/ Object.defineProperty(ns, 'default', { enumerable: true, value: value });
|
||||||
|
/******/ if(mode & 2 && typeof value != 'string') for(var key in value) __webpack_require__.d(ns, key, function(key) { return value[key]; }.bind(null, key));
|
||||||
|
/******/ return ns;
|
||||||
|
/******/ };
|
||||||
|
/******/
|
||||||
|
/******/ // getDefaultExport function for compatibility with non-harmony modules
|
||||||
|
/******/ __webpack_require__.n = function(module) {
|
||||||
|
/******/ var getter = module && module.__esModule ?
|
||||||
|
/******/ function getDefault() { return module['default']; } :
|
||||||
|
/******/ function getModuleExports() { return module; };
|
||||||
|
/******/ __webpack_require__.d(getter, 'a', getter);
|
||||||
|
/******/ return getter;
|
||||||
|
/******/ };
|
||||||
|
/******/
|
||||||
|
/******/ // Object.prototype.hasOwnProperty.call
|
||||||
|
/******/ __webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); };
|
||||||
|
/******/
|
||||||
|
/******/ // __webpack_public_path__
|
||||||
|
/******/ __webpack_require__.p = "";
|
||||||
|
/******/
|
||||||
|
/******/
|
||||||
|
/******/ // Load entry module and return exports
|
||||||
|
/******/ return __webpack_require__(__webpack_require__.s = 0);
|
||||||
|
/******/ })
|
||||||
|
/************************************************************************/
|
||||||
|
/******/ ([
|
||||||
|
/* 0 */
|
||||||
|
/***/ (function(module, exports, __webpack_require__) {
|
||||||
|
|
||||||
|
var __WEBPACK_AMD_DEFINE_FACTORY__, __WEBPACK_AMD_DEFINE_ARRAY__, __WEBPACK_AMD_DEFINE_RESULT__;function _typeof(obj) { "@babel/helpers - typeof"; if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; } return _typeof(obj); }
|
||||||
|
|
||||||
|
(function (global, factory) {
|
||||||
|
if (true) {
|
||||||
|
!(__WEBPACK_AMD_DEFINE_ARRAY__ = [], __WEBPACK_AMD_DEFINE_FACTORY__ = (factory),
|
||||||
|
__WEBPACK_AMD_DEFINE_RESULT__ = (typeof __WEBPACK_AMD_DEFINE_FACTORY__ === 'function' ?
|
||||||
|
(__WEBPACK_AMD_DEFINE_FACTORY__.apply(exports, __WEBPACK_AMD_DEFINE_ARRAY__)) : __WEBPACK_AMD_DEFINE_FACTORY__),
|
||||||
|
__WEBPACK_AMD_DEFINE_RESULT__ !== undefined && (module.exports = __WEBPACK_AMD_DEFINE_RESULT__));
|
||||||
|
} else { var mod; }
|
||||||
|
})(typeof globalThis !== "undefined" ? globalThis : typeof self !== "undefined" ? self : this, function () {
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
|
||||||
|
|
||||||
|
function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }
|
||||||
|
|
||||||
|
function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }
|
||||||
|
|
||||||
|
function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); }
|
||||||
|
|
||||||
|
function _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); }
|
||||||
|
|
||||||
|
function _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = _getPrototypeOf(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = _getPrototypeOf(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return _possibleConstructorReturn(this, result); }; }
|
||||||
|
|
||||||
|
function _possibleConstructorReturn(self, call) { if (call && (_typeof(call) === "object" || typeof call === "function")) { return call; } return _assertThisInitialized(self); }
|
||||||
|
|
||||||
|
function _assertThisInitialized(self) { if (self === void 0) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return self; }
|
||||||
|
|
||||||
|
function _isNativeReflectConstruct() { if (typeof Reflect === "undefined" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === "function") return true; try { Boolean.prototype.valueOf.call(Reflect.construct(Boolean, [], function () {})); return true; } catch (e) { return false; } }
|
||||||
|
|
||||||
|
function _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); }
|
||||||
|
|
||||||
|
var _require = __webpack_require__(1),
|
||||||
|
Pangu = _require.Pangu;
|
||||||
|
|
||||||
|
function once(func) {
|
||||||
|
var _arguments = arguments,
|
||||||
|
_this = this;
|
||||||
|
|
||||||
|
var executed = false;
|
||||||
|
return function () {
|
||||||
|
if (executed) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var self = _this;
|
||||||
|
executed = true;
|
||||||
|
func.apply(self, _arguments);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function debounce(func, delay, mustRunDelay) {
|
||||||
|
var _arguments2 = arguments,
|
||||||
|
_this2 = this;
|
||||||
|
|
||||||
|
var timer = null;
|
||||||
|
var startTime = null;
|
||||||
|
return function () {
|
||||||
|
var self = _this2;
|
||||||
|
var args = _arguments2;
|
||||||
|
var currentTime = +new Date();
|
||||||
|
clearTimeout(timer);
|
||||||
|
|
||||||
|
if (!startTime) {
|
||||||
|
startTime = currentTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentTime - startTime >= mustRunDelay) {
|
||||||
|
func.apply(self, args);
|
||||||
|
startTime = currentTime;
|
||||||
|
} else {
|
||||||
|
timer = setTimeout(function () {
|
||||||
|
func.apply(self, args);
|
||||||
|
}, delay);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
var BrowserPangu = function (_Pangu) {
|
||||||
|
_inherits(BrowserPangu, _Pangu);
|
||||||
|
|
||||||
|
var _super = _createSuper(BrowserPangu);
|
||||||
|
|
||||||
|
function BrowserPangu() {
|
||||||
|
var _this3;
|
||||||
|
|
||||||
|
_classCallCheck(this, BrowserPangu);
|
||||||
|
|
||||||
|
_this3 = _super.call(this);
|
||||||
|
_this3.blockTags = /^(div|p|h1|h2|h3|h4|h5|h6)$/i;
|
||||||
|
_this3.ignoredTags = /^(script|code|pre|textarea)$/i;
|
||||||
|
_this3.presentationalTags = /^(b|code|del|em|i|s|strong|kbd)$/i;
|
||||||
|
_this3.spaceLikeTags = /^(br|hr|i|img|pangu)$/i;
|
||||||
|
_this3.spaceSensitiveTags = /^(a|del|pre|s|strike|u)$/i;
|
||||||
|
_this3.isAutoSpacingPageExecuted = false;
|
||||||
|
return _this3;
|
||||||
|
}
|
||||||
|
|
||||||
|
_createClass(BrowserPangu, [{
|
||||||
|
key: "isContentEditable",
|
||||||
|
value: function isContentEditable(node) {
|
||||||
|
return node.isContentEditable || node.getAttribute && node.getAttribute('g_editable') === 'true';
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "isSpecificTag",
|
||||||
|
value: function isSpecificTag(node, tagRegex) {
|
||||||
|
return node && node.nodeName && node.nodeName.search(tagRegex) >= 0;
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "isInsideSpecificTag",
|
||||||
|
value: function isInsideSpecificTag(node, tagRegex) {
|
||||||
|
var checkCurrent = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
|
||||||
|
var currentNode = node;
|
||||||
|
|
||||||
|
if (checkCurrent) {
|
||||||
|
if (this.isSpecificTag(currentNode, tagRegex)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while (currentNode.parentNode) {
|
||||||
|
currentNode = currentNode.parentNode;
|
||||||
|
|
||||||
|
if (this.isSpecificTag(currentNode, tagRegex)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "canIgnoreNode",
|
||||||
|
value: function canIgnoreNode(node) {
|
||||||
|
var currentNode = node;
|
||||||
|
|
||||||
|
if (currentNode && (this.isSpecificTag(currentNode, this.ignoredTags) || this.isContentEditable(currentNode))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
while (currentNode.parentNode) {
|
||||||
|
currentNode = currentNode.parentNode;
|
||||||
|
|
||||||
|
if (currentNode && (this.isSpecificTag(currentNode, this.ignoredTags) || this.isContentEditable(currentNode))) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "isFirstTextChild",
|
||||||
|
value: function isFirstTextChild(parentNode, targetNode) {
|
||||||
|
var childNodes = parentNode.childNodes;
|
||||||
|
|
||||||
|
for (var i = 0; i < childNodes.length; i++) {
|
||||||
|
var childNode = childNodes[i];
|
||||||
|
|
||||||
|
if (childNode.nodeType !== Node.COMMENT_NODE && childNode.textContent) {
|
||||||
|
return childNode === targetNode;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "isLastTextChild",
|
||||||
|
value: function isLastTextChild(parentNode, targetNode) {
|
||||||
|
var childNodes = parentNode.childNodes;
|
||||||
|
|
||||||
|
for (var i = childNodes.length - 1; i > -1; i--) {
|
||||||
|
var childNode = childNodes[i];
|
||||||
|
|
||||||
|
if (childNode.nodeType !== Node.COMMENT_NODE && childNode.textContent) {
|
||||||
|
return childNode === targetNode;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "spacingNodeByXPath",
|
||||||
|
value: function spacingNodeByXPath(xPathQuery, contextNode) {
|
||||||
|
if (!(contextNode instanceof Node) || contextNode instanceof DocumentFragment) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
var textNodes = document.evaluate(xPathQuery, contextNode, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||||
|
var currentTextNode;
|
||||||
|
var nextTextNode;
|
||||||
|
|
||||||
|
for (var i = textNodes.snapshotLength - 1; i > -1; --i) {
|
||||||
|
currentTextNode = textNodes.snapshotItem(i);
|
||||||
|
|
||||||
|
if (this.isSpecificTag(currentTextNode.parentNode, this.presentationalTags) && !this.isInsideSpecificTag(currentTextNode.parentNode, this.ignoredTags)) {
|
||||||
|
var elementNode = currentTextNode.parentNode;
|
||||||
|
|
||||||
|
if (elementNode.previousSibling) {
|
||||||
|
var previousSibling = elementNode.previousSibling;
|
||||||
|
|
||||||
|
if (previousSibling.nodeType === Node.TEXT_NODE) {
|
||||||
|
var testText = previousSibling.data.substr(-1) + currentTextNode.data.toString().charAt(0);
|
||||||
|
var testNewText = this.spacing(testText);
|
||||||
|
|
||||||
|
if (testText !== testNewText) {
|
||||||
|
previousSibling.data = "".concat(previousSibling.data, " ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (elementNode.nextSibling) {
|
||||||
|
var nextSibling = elementNode.nextSibling;
|
||||||
|
|
||||||
|
if (nextSibling.nodeType === Node.TEXT_NODE) {
|
||||||
|
var _testText = currentTextNode.data.substr(-1) + nextSibling.data.toString().charAt(0);
|
||||||
|
|
||||||
|
var _testNewText = this.spacing(_testText);
|
||||||
|
|
||||||
|
if (_testText !== _testNewText) {
|
||||||
|
nextSibling.data = " ".concat(nextSibling.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.canIgnoreNode(currentTextNode)) {
|
||||||
|
nextTextNode = currentTextNode;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var newText = this.spacing(currentTextNode.data);
|
||||||
|
|
||||||
|
if (currentTextNode.data !== newText) {
|
||||||
|
currentTextNode.data = newText;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nextTextNode) {
|
||||||
|
if (currentTextNode.nextSibling && currentTextNode.nextSibling.nodeName.search(this.spaceLikeTags) >= 0) {
|
||||||
|
nextTextNode = currentTextNode;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
var _testText2 = currentTextNode.data.toString().substr(-1) + nextTextNode.data.toString().substr(0, 1);
|
||||||
|
|
||||||
|
var _testNewText2 = this.spacing(_testText2);
|
||||||
|
|
||||||
|
if (_testNewText2 !== _testText2) {
|
||||||
|
var nextNode = nextTextNode;
|
||||||
|
|
||||||
|
while (nextNode.parentNode && nextNode.nodeName.search(this.spaceSensitiveTags) === -1 && this.isFirstTextChild(nextNode.parentNode, nextNode)) {
|
||||||
|
nextNode = nextNode.parentNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
var currentNode = currentTextNode;
|
||||||
|
|
||||||
|
while (currentNode.parentNode && currentNode.nodeName.search(this.spaceSensitiveTags) === -1 && this.isLastTextChild(currentNode.parentNode, currentNode)) {
|
||||||
|
currentNode = currentNode.parentNode;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentNode.nextSibling) {
|
||||||
|
if (currentNode.nextSibling.nodeName.search(this.spaceLikeTags) >= 0) {
|
||||||
|
nextTextNode = currentTextNode;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentNode.nodeName.search(this.blockTags) === -1) {
|
||||||
|
if (nextNode.nodeName.search(this.spaceSensitiveTags) === -1) {
|
||||||
|
if (nextNode.nodeName.search(this.ignoredTags) === -1 && nextNode.nodeName.search(this.blockTags) === -1) {
|
||||||
|
if (nextTextNode.previousSibling) {
|
||||||
|
if (nextTextNode.previousSibling.nodeName.search(this.spaceLikeTags) === -1) {
|
||||||
|
nextTextNode.data = " ".concat(nextTextNode.data);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!this.canIgnoreNode(nextTextNode)) {
|
||||||
|
nextTextNode.data = " ".concat(nextTextNode.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (currentNode.nodeName.search(this.spaceSensitiveTags) === -1) {
|
||||||
|
currentTextNode.data = "".concat(currentTextNode.data, " ");
|
||||||
|
} else {
|
||||||
|
var panguSpace = document.createElement('pangu');
|
||||||
|
panguSpace.innerHTML = ' ';
|
||||||
|
|
||||||
|
if (nextNode.previousSibling) {
|
||||||
|
if (nextNode.previousSibling.nodeName.search(this.spaceLikeTags) === -1) {
|
||||||
|
nextNode.parentNode.insertBefore(panguSpace, nextNode);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
nextNode.parentNode.insertBefore(panguSpace, nextNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!panguSpace.previousElementSibling) {
|
||||||
|
if (panguSpace.parentNode) {
|
||||||
|
panguSpace.parentNode.removeChild(panguSpace);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nextTextNode = currentTextNode;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "spacingNode",
|
||||||
|
value: function spacingNode(contextNode) {
|
||||||
|
var xPathQuery = './/*/text()[normalize-space(.)]';
|
||||||
|
|
||||||
|
if (contextNode.children && contextNode.children.length === 0) {
|
||||||
|
xPathQuery = './/text()[normalize-space(.)]';
|
||||||
|
}
|
||||||
|
|
||||||
|
this.spacingNodeByXPath(xPathQuery, contextNode);
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "spacingElementById",
|
||||||
|
value: function spacingElementById(idName) {
|
||||||
|
var xPathQuery = "id(\"".concat(idName, "\")//text()");
|
||||||
|
this.spacingNodeByXPath(xPathQuery, document);
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "spacingElementByClassName",
|
||||||
|
value: function spacingElementByClassName(className) {
|
||||||
|
var xPathQuery = "//*[contains(concat(\" \", normalize-space(@class), \" \"), \"".concat(className, "\")]//text()");
|
||||||
|
this.spacingNodeByXPath(xPathQuery, document);
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "spacingElementByTagName",
|
||||||
|
value: function spacingElementByTagName(tagName) {
|
||||||
|
var xPathQuery = "//".concat(tagName, "//text()");
|
||||||
|
this.spacingNodeByXPath(xPathQuery, document);
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "spacingPageTitle",
|
||||||
|
value: function spacingPageTitle() {
|
||||||
|
var xPathQuery = '/html/head/title/text()';
|
||||||
|
this.spacingNodeByXPath(xPathQuery, document);
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "spacingPageBody",
|
||||||
|
value: function spacingPageBody() {
|
||||||
|
var xPathQuery = '/html/body//*/text()[normalize-space(.)]';
|
||||||
|
['script', 'style', 'textarea'].forEach(function (tag) {
|
||||||
|
xPathQuery = "".concat(xPathQuery, "[translate(name(..),\"ABCDEFGHIJKLMNOPQRSTUVWXYZ\",\"abcdefghijklmnopqrstuvwxyz\")!=\"").concat(tag, "\"]");
|
||||||
|
});
|
||||||
|
this.spacingNodeByXPath(xPathQuery, document);
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "spacingPage",
|
||||||
|
value: function spacingPage() {
|
||||||
|
this.spacingPageTitle();
|
||||||
|
this.spacingPageBody();
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "autoSpacingPage",
|
||||||
|
value: function autoSpacingPage() {
|
||||||
|
var pageDelay = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1000;
|
||||||
|
var nodeDelay = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 500;
|
||||||
|
var nodeMaxWait = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 2000;
|
||||||
|
|
||||||
|
if (!(document.body instanceof Node)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.isAutoSpacingPageExecuted) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.isAutoSpacingPageExecuted = true;
|
||||||
|
var self = this;
|
||||||
|
var onceSpacingPage = once(function () {
|
||||||
|
self.spacingPage();
|
||||||
|
});
|
||||||
|
var videos = document.getElementsByTagName('video');
|
||||||
|
|
||||||
|
if (videos.length === 0) {
|
||||||
|
setTimeout(function () {
|
||||||
|
onceSpacingPage();
|
||||||
|
}, pageDelay);
|
||||||
|
} else {
|
||||||
|
for (var i = 0; i < videos.length; i++) {
|
||||||
|
var video = videos[i];
|
||||||
|
|
||||||
|
if (video.readyState === 4) {
|
||||||
|
setTimeout(function () {
|
||||||
|
onceSpacingPage();
|
||||||
|
}, 3000);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
video.addEventListener('loadeddata', function () {
|
||||||
|
setTimeout(function () {
|
||||||
|
onceSpacingPage();
|
||||||
|
}, 4000);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var queue = [];
|
||||||
|
var debouncedSpacingNodes = debounce(function () {
|
||||||
|
while (queue.length) {
|
||||||
|
var node = queue.shift();
|
||||||
|
|
||||||
|
if (node) {
|
||||||
|
self.spacingNode(node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, nodeDelay, {
|
||||||
|
'maxWait': nodeMaxWait
|
||||||
|
});
|
||||||
|
var mutationObserver = new MutationObserver(function (mutations, observer) {
|
||||||
|
mutations.forEach(function (mutation) {
|
||||||
|
switch (mutation.type) {
|
||||||
|
case 'childList':
|
||||||
|
mutation.addedNodes.forEach(function (node) {
|
||||||
|
if (node.nodeType === Node.ELEMENT_NODE) {
|
||||||
|
queue.push(node);
|
||||||
|
} else if (node.nodeType === Node.TEXT_NODE) {
|
||||||
|
queue.push(node.parentNode);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
break;
|
||||||
|
|
||||||
|
case 'characterData':
|
||||||
|
var node = mutation.target;
|
||||||
|
|
||||||
|
if (node.nodeType === Node.TEXT_NODE) {
|
||||||
|
queue.push(node.parentNode);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
debouncedSpacingNodes();
|
||||||
|
});
|
||||||
|
mutationObserver.observe(document.body, {
|
||||||
|
characterData: true,
|
||||||
|
childList: true,
|
||||||
|
subtree: true
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}]);
|
||||||
|
|
||||||
|
return BrowserPangu;
|
||||||
|
}(Pangu);
|
||||||
|
|
||||||
|
var pangu = new BrowserPangu();
|
||||||
|
module.exports = pangu;
|
||||||
|
module.exports["default"] = pangu;
|
||||||
|
module.exports.Pangu = BrowserPangu;
|
||||||
|
});
|
||||||
|
|
||||||
|
/***/ }),
|
||||||
|
/* 1 */
|
||||||
|
/***/ (function(module, exports, __webpack_require__) {
|
||||||
|
|
||||||
|
var __WEBPACK_AMD_DEFINE_FACTORY__, __WEBPACK_AMD_DEFINE_ARRAY__, __WEBPACK_AMD_DEFINE_RESULT__;(function (global, factory) {
|
||||||
|
if (true) {
|
||||||
|
!(__WEBPACK_AMD_DEFINE_ARRAY__ = [], __WEBPACK_AMD_DEFINE_FACTORY__ = (factory),
|
||||||
|
__WEBPACK_AMD_DEFINE_RESULT__ = (typeof __WEBPACK_AMD_DEFINE_FACTORY__ === 'function' ?
|
||||||
|
(__WEBPACK_AMD_DEFINE_FACTORY__.apply(exports, __WEBPACK_AMD_DEFINE_ARRAY__)) : __WEBPACK_AMD_DEFINE_FACTORY__),
|
||||||
|
__WEBPACK_AMD_DEFINE_RESULT__ !== undefined && (module.exports = __WEBPACK_AMD_DEFINE_RESULT__));
|
||||||
|
} else { var mod; }
|
||||||
|
})(typeof globalThis !== "undefined" ? globalThis : typeof self !== "undefined" ? self : this, function () {
|
||||||
|
"use strict";
|
||||||
|
|
||||||
|
function _typeof(obj) { "@babel/helpers - typeof"; if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; } return _typeof(obj); }
|
||||||
|
|
||||||
|
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
|
||||||
|
|
||||||
|
function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }
|
||||||
|
|
||||||
|
function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }
|
||||||
|
|
||||||
|
var CJK = "\u2E80-\u2EFF\u2F00-\u2FDF\u3040-\u309F\u30A0-\u30FA\u30FC-\u30FF\u3100-\u312F\u3200-\u32FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF";
|
||||||
|
var ANY_CJK = new RegExp("[".concat(CJK, "]"));
|
||||||
|
var SYMBOL_WIDE = '`~!@#$%*^&()/\\-+=<>?:"{}|,.;\'[\\]·~¥%——|\\\\';
|
||||||
|
var SYMBOL = '`~!@#$%^&()/\\-+=<>?:"{}|,.;\'[\\]·~¥%——|\\\\';
|
||||||
|
var SYMBOL_LEFT = '`~!@#$%^&(/\\-+=<>?:"{|,.;\'[·~¥%——|\\\\';
|
||||||
|
var SYMBOL_RIGHT = '`~!@#$%^&)/\\-+=<>?:"}|,.;\'\\]·~¥%——|\\\\';
|
||||||
|
var SYMBOL_SAFE = '`~!#$%^&/+=<>?:"|,;\'·~¥%——|\\\\';
|
||||||
|
var ALPHA_CJK = new RegExp("([A-Za-z_])([".concat(CJK, "]+)"), 'g');
|
||||||
|
var CJK_ALPHA = new RegExp("([".concat(CJK, "]+)([A-Za-z_])"), 'g');
|
||||||
|
var NUMBER_CJK = new RegExp("([0-9_])([".concat(CJK, "]+)"), 'g');
|
||||||
|
var CJK_NUMBER = new RegExp("([".concat(CJK, "]+)([0-9_])"), 'g');
|
||||||
|
var CJK_AND_ALPHA = new RegExp("([".concat(CJK, "]+)(&)([A-Za-z_])"), 'g');
|
||||||
|
var ALPHA_AND_CJK = new RegExp("([A-Za-z_])(&)([".concat(CJK, "]+)"), 'g');
|
||||||
|
var ALPHA_SYMBOL_CJK = new RegExp("([A-Za-z_])([".concat(SYMBOL_RIGHT, "])([").concat(CJK, "])"), 'g');
|
||||||
|
var CJK_SYMBOL_ALPHA = new RegExp("([".concat(CJK, "])([").concat(SYMBOL_LEFT, "])([A-Za-z_])"), 'g');
|
||||||
|
var NUMBER_SYMBOL_CJK = new RegExp("([0-9_])([".concat(SYMBOL, "])([").concat(CJK, "])"), 'g');
|
||||||
|
var CJK_SYMBOL_NUMBER = new RegExp("([".concat(CJK, "])([").concat(SYMBOL, "])([0-9_])"), 'g');
|
||||||
|
var CJK_BRACKET = new RegExp("([".concat(CJK, "])([<\\[{\\(])"), 'g');
|
||||||
|
var BRACKET_CJK = new RegExp("([>\\]\\)}])([".concat(CJK, "])"), 'g');
|
||||||
|
var ALPHA_NUMBER_CJK = new RegExp("([A-Za-z_])([0-9_])([".concat(CJK, "])"), 'g');
|
||||||
|
var CJK_SYMBOL_SYMBOL = new RegExp("([".concat(CJK, "])([").concat(SYMBOL_WIDE, "])([").concat(SYMBOL_WIDE, "])"), 'g');
|
||||||
|
var SYMBOL_SYMBOL_CJK = new RegExp("([".concat(SYMBOL_WIDE, "])([").concat(SYMBOL_WIDE, "])([").concat(CJK, "])"), 'g');
|
||||||
|
var CJK_SYMBOL_CJK_SYMBOL_CJK = new RegExp("([".concat(CJK, "])([").concat(SYMBOL_SAFE, "])([").concat(CJK, "])([").concat(SYMBOL_SAFE, "])([").concat(CJK, "])"), 'g');
|
||||||
|
var CJK_SYMBOL_CJK = new RegExp("([".concat(CJK, "])([").concat(SYMBOL_SAFE, "])([").concat(CJK, "])"), 'g');
|
||||||
|
var CJK_ACCOUNT_CJK = new RegExp("([".concat(CJK, "])(\\s*)(@[A-za-z0-9_]*)(\\s*)([").concat(CJK, "]+)(\\s*)([A-za-z0-9_]+)(\\s*)([").concat(CJK, "])"));
|
||||||
|
|
||||||
|
var Pangu = function () {
|
||||||
|
function Pangu() {
|
||||||
|
_classCallCheck(this, Pangu);
|
||||||
|
|
||||||
|
this.version = '1.0.0';
|
||||||
|
}
|
||||||
|
|
||||||
|
_createClass(Pangu, [{
|
||||||
|
key: "spacing",
|
||||||
|
value: function spacing(text) {
|
||||||
|
if (typeof text !== 'string') {
|
||||||
|
console.warn("spacing(text) only accepts string but got ".concat(_typeof(text)));
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (text.length <= 1 || !ANY_CJK.test(text)) {
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
var self = this;
|
||||||
|
var newText = text;
|
||||||
|
newText = newText.replace(ALPHA_NUMBER_CJK, '$1$2 $3');
|
||||||
|
newText = newText.replace(ALPHA_CJK, '$1 $2');
|
||||||
|
newText = newText.replace(CJK_ALPHA, '$1 $2');
|
||||||
|
newText = newText.replace(NUMBER_CJK, '$1 $2');
|
||||||
|
newText = newText.replace(CJK_NUMBER, '$1 $2');
|
||||||
|
newText = newText.replace(CJK_AND_ALPHA, '$1 $2 $3');
|
||||||
|
newText = newText.replace(ALPHA_AND_CJK, '$1 $2 $3');
|
||||||
|
newText = newText.replace(ALPHA_SYMBOL_CJK, '$1$2 $3');
|
||||||
|
newText = newText.replace(CJK_SYMBOL_ALPHA, '$1 $2$3');
|
||||||
|
newText = newText.replace(NUMBER_SYMBOL_CJK, '$1$2 $3');
|
||||||
|
newText = newText.replace(CJK_SYMBOL_NUMBER, '$1 $2$3');
|
||||||
|
newText = newText.replace(CJK_SYMBOL_SYMBOL, '$1 $2$3');
|
||||||
|
newText = newText.replace(SYMBOL_SYMBOL_CJK, '$1$2 $3');
|
||||||
|
newText = newText.replace(BRACKET_CJK, '$1 $2');
|
||||||
|
newText = newText.replace(CJK_BRACKET, '$1 $2');
|
||||||
|
newText = newText.replace(CJK_SYMBOL_CJK_SYMBOL_CJK, '$1 $2 $3 $4 $5');
|
||||||
|
newText = newText.replace(CJK_SYMBOL_CJK, '$1 $2 $3');
|
||||||
|
newText = newText.replace(CJK_ACCOUNT_CJK, '$1 $3$5$7 $9');
|
||||||
|
return newText;
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "spacingText",
|
||||||
|
value: function spacingText(text) {
|
||||||
|
var callback = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : function () {};
|
||||||
|
var newText;
|
||||||
|
|
||||||
|
try {
|
||||||
|
newText = this.spacing(text);
|
||||||
|
} catch (err) {
|
||||||
|
callback(err);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
callback(null, newText);
|
||||||
|
}
|
||||||
|
}, {
|
||||||
|
key: "spacingTextSync",
|
||||||
|
value: function spacingTextSync(text) {
|
||||||
|
return this.spacing(text);
|
||||||
|
}
|
||||||
|
}]);
|
||||||
|
|
||||||
|
return Pangu;
|
||||||
|
}();
|
||||||
|
|
||||||
|
var pangu = new Pangu();
|
||||||
|
module.exports = pangu;
|
||||||
|
module.exports["default"] = pangu;
|
||||||
|
module.exports.Pangu = Pangu;
|
||||||
|
});
|
||||||
|
|
||||||
|
/***/ })
|
||||||
|
/******/ ]);
|
||||||
|
});
|
||||||
109
panguFilter.cabal
Normal file
109
panguFilter.cabal
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
cabal-version: 3.0
|
||||||
|
-- The cabal-version field refers to the version of the .cabal specification,
|
||||||
|
-- and can be different from the cabal-install (the tool) version and the
|
||||||
|
-- Cabal (the library) version you are using. As such, the Cabal (the library)
|
||||||
|
-- version used must be equal or greater than the version stated in this field.
|
||||||
|
-- Starting from the specification version 2.2, the cabal-version field must be
|
||||||
|
-- the first thing in the cabal file.
|
||||||
|
|
||||||
|
-- Initial package description 'panguFilter' generated by
|
||||||
|
-- 'cabal init'. For further documentation, see:
|
||||||
|
-- http://haskell.org/cabal/users-guide/
|
||||||
|
--
|
||||||
|
-- The name of the package.
|
||||||
|
name: panguFilter
|
||||||
|
|
||||||
|
-- The package version.
|
||||||
|
-- See the Haskell package versioning policy (PVP) for standards
|
||||||
|
-- guiding when and how versions should be incremented.
|
||||||
|
-- https://pvp.haskell.org
|
||||||
|
-- PVP summary: +-+------- breaking API changes
|
||||||
|
-- | | +----- non-breaking API additions
|
||||||
|
-- | | | +--- code changes with no API change
|
||||||
|
version: 0.1.0.0
|
||||||
|
|
||||||
|
-- A short (one-line) description of the package.
|
||||||
|
-- synopsis:
|
||||||
|
|
||||||
|
-- A longer description of the package.
|
||||||
|
-- description:
|
||||||
|
|
||||||
|
-- The license under which the package is released.
|
||||||
|
license: BSD-3-Clause
|
||||||
|
|
||||||
|
-- The file containing the license text.
|
||||||
|
license-file: LICENSE
|
||||||
|
|
||||||
|
-- The package author(s).
|
||||||
|
author: Yu Cong
|
||||||
|
|
||||||
|
-- An email address to which users can send suggestions, bug reports, and patches.
|
||||||
|
maintainer: sxlxcsxlxc@gmail.com
|
||||||
|
|
||||||
|
-- A copyright notice.
|
||||||
|
-- copyright:
|
||||||
|
category: Text
|
||||||
|
build-type: Simple
|
||||||
|
|
||||||
|
-- Extra doc files to be distributed with the package, such as a CHANGELOG or a README.
|
||||||
|
extra-doc-files: CHANGELOG.md
|
||||||
|
|
||||||
|
-- Extra source files to be distributed with the package, such as examples, or a tutorial module.
|
||||||
|
-- extra-source-files:
|
||||||
|
|
||||||
|
common warnings
|
||||||
|
ghc-options: -Wall
|
||||||
|
|
||||||
|
library
|
||||||
|
-- Import common warning flags.
|
||||||
|
import: warnings
|
||||||
|
|
||||||
|
-- Modules exported by the library.
|
||||||
|
exposed-modules: MyLib
|
||||||
|
|
||||||
|
-- Modules included in this library but not exported.
|
||||||
|
-- other-modules:
|
||||||
|
|
||||||
|
-- LANGUAGE extensions used by modules in this package.
|
||||||
|
-- other-extensions:
|
||||||
|
|
||||||
|
-- Other library packages from which modules are imported.
|
||||||
|
build-depends:
|
||||||
|
base ^>=4.18.3.0,
|
||||||
|
text,
|
||||||
|
megaparsec,
|
||||||
|
replace-megaparsec
|
||||||
|
|
||||||
|
-- Directories containing source files.
|
||||||
|
hs-source-dirs: src
|
||||||
|
|
||||||
|
-- Base language which the package is written in.
|
||||||
|
default-language: Haskell2010
|
||||||
|
|
||||||
|
test-suite panguFilter-test
|
||||||
|
-- Import common warning flags.
|
||||||
|
import: warnings
|
||||||
|
|
||||||
|
-- Base language which the package is written in.
|
||||||
|
default-language: Haskell2010
|
||||||
|
|
||||||
|
-- Modules included in this executable, other than Main.
|
||||||
|
-- other-modules:
|
||||||
|
|
||||||
|
-- LANGUAGE extensions used by modules in this package.
|
||||||
|
-- other-extensions:
|
||||||
|
|
||||||
|
-- The interface type and version of the test suite.
|
||||||
|
type: exitcode-stdio-1.0
|
||||||
|
|
||||||
|
-- Directories containing source files.
|
||||||
|
hs-source-dirs: test
|
||||||
|
|
||||||
|
-- The entrypoint to the test suite.
|
||||||
|
main-is: Main.hs
|
||||||
|
|
||||||
|
-- Test dependencies.
|
||||||
|
build-depends:
|
||||||
|
base ^>=4.18.3.0,
|
||||||
|
panguFilter,
|
||||||
|
hspec
|
||||||
89
src/MyLib.hs
Normal file
89
src/MyLib.hs
Normal file
@@ -0,0 +1,89 @@
|
|||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module MyLib where
|
||||||
|
|
||||||
|
import Data.Text (Text)
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import Data.Void (Void)
|
||||||
|
import Replace.Megaparsec (streamEdit)
|
||||||
|
import Text.Megaparsec
|
||||||
|
import Text.Megaparsec.Char
|
||||||
|
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
type Parser = Parsec Void Text
|
||||||
|
|
||||||
|
type Rule = Parser Text
|
||||||
|
|
||||||
|
type RuleSet = [Rule]
|
||||||
|
|
||||||
|
applyRules :: RuleSet -> Text -> Text
|
||||||
|
applyRules [] input = input
|
||||||
|
applyRules rules input = streamEdit (choice rules) id input
|
||||||
|
|
||||||
|
-- TEST RULES
|
||||||
|
appleToOrange :: Rule
|
||||||
|
appleToOrange = "orange" <$ chunk "apple"
|
||||||
|
|
||||||
|
emailAtRule :: Rule
|
||||||
|
emailAtRule = do
|
||||||
|
prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String))
|
||||||
|
_ <- char '@'
|
||||||
|
suffix <- some (alphaNumChar <|> oneOf (".-" :: String))
|
||||||
|
return $ T.pack prefix <> "[at]" <> T.pack suffix
|
||||||
|
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
-- rules for pangu
|
||||||
|
|
||||||
|
-- | Check if a character falls within the CJK ranges provided
|
||||||
|
isCJK :: Char -> Bool
|
||||||
|
isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges
|
||||||
|
where
|
||||||
|
cjkRanges =
|
||||||
|
[ ('\x2e80', '\x2eff'),
|
||||||
|
('\x2f00', '\x2fdf'),
|
||||||
|
('\x3040', '\x309f'),
|
||||||
|
('\x30a0', '\x30fa'),
|
||||||
|
('\x30fc', '\x30ff'),
|
||||||
|
('\x3100', '\x312f'),
|
||||||
|
('\x3200', '\x32ff'),
|
||||||
|
('\x3400', '\x4dbf'),
|
||||||
|
('\x4e00', '\x9fff'),
|
||||||
|
('\xf900', '\xfaff')
|
||||||
|
]
|
||||||
|
|
||||||
|
convertToFullwidth :: Char -> Char
|
||||||
|
convertToFullwidth c = case c of
|
||||||
|
':' -> ':'
|
||||||
|
'.' -> '。'
|
||||||
|
'~' -> '~'
|
||||||
|
'!' -> '!'
|
||||||
|
'?' -> '?'
|
||||||
|
',' -> ','
|
||||||
|
';' -> ';'
|
||||||
|
_ -> c
|
||||||
|
|
||||||
|
-- A parser that matches a single CJK character
|
||||||
|
cjkChar :: Parser Char
|
||||||
|
cjkChar = satisfy isCJK
|
||||||
|
|
||||||
|
fullWidthSymbolRule :: Rule
|
||||||
|
fullWidthSymbolRule = do
|
||||||
|
c1 <- cjkChar -- First CJK
|
||||||
|
mid <-
|
||||||
|
some $
|
||||||
|
choice -- The "middle" symbol part
|
||||||
|
[ char ' ',
|
||||||
|
char ':',
|
||||||
|
char '.'
|
||||||
|
]
|
||||||
|
c2 <- cjkChar -- Second CJK
|
||||||
|
|
||||||
|
-- In Haskell, we can actually process the 'mid' string logic here.
|
||||||
|
-- For now, let's assume we want to turn ":" into ":" and "." into "。"
|
||||||
|
let transformedMid = T.pack $ map convertToFullwidth mid
|
||||||
|
return $ T.singleton c1 <> transformedMid <> T.singleton c2
|
||||||
|
|
||||||
|
|
||||||
|
-- the rule set
|
||||||
|
myRules :: RuleSet
|
||||||
|
myRules = [appleToOrange, emailAtRule, try fullWidthSymbolRule]
|
||||||
19
test/Main.hs
Normal file
19
test/Main.hs
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
module Main (main) where
|
||||||
|
import MyLib
|
||||||
|
import Test.Hspec
|
||||||
|
|
||||||
|
|
||||||
|
main :: IO ()
|
||||||
|
main = hspec $ do
|
||||||
|
describe "MyLib.mapemail" $ do
|
||||||
|
it "maps @ to [at] in emails" $ do
|
||||||
|
applyRules myRules "aaa@a.com" `shouldBe` "aaa[at]a.com"
|
||||||
|
|
||||||
|
describe "MyLib.mapfruits" $ do
|
||||||
|
it "maps apple to orange" $ do
|
||||||
|
applyRules myRules "apple" `shouldBe` "orange"
|
||||||
|
|
||||||
|
describe "MyLib.fullWidthSymbolRule" $ do
|
||||||
|
it "你:好" $ do
|
||||||
|
applyRules myRules "你:好" `shouldBe` "你:好"
|
||||||
Reference in New Issue
Block a user