first commit

This commit is contained in:
2026-01-02 15:26:19 +08:00
commit 272b1a9a77
9 changed files with 1140 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@@ -0,0 +1,2 @@
dist-newstyle/
.ds_store

7
.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,7 @@
{
"haskell.toolchain": {
"hls": "recommended",
"cabal": "recommended",
"stack": null
}
}

5
CHANGELOG.md Normal file
View File

@@ -0,0 +1,5 @@
# Revision history for panguFilter
## 0.1.0.0 -- YYYY-mm-dd
* First version. Released on an unsuspecting world.

29
LICENSE Normal file
View File

@@ -0,0 +1,29 @@
Copyright (c) 2026, Yu Cong
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

191
pangu.py Normal file
View File

@@ -0,0 +1,191 @@
#!/usr/bin/env python
# coding: utf-8
"""
Paranoid text spacing for good readability, to automatically insert whitespace between CJK (Chinese, Japanese, Korean) and half-width characters (alphabetical letters, numerical digits and symbols).
>>> import pangu
>>> nwe_text = pangu.spacing_text('當你凝視著bugbug也凝視著你')
>>> print(nwe_text)
'當你凝視著 bugbug 也凝視著你'
>>> nwe_content = pangu.spacing_file('path/to/file.txt')
>>> print(nwe_content)
'與 PM 戰鬥的人,應當小心自己不要成為 PM'
"""
import argparse
import os
import re
import sys
__version__ = '4.0.6.1'
__all__ = ['spacing_text', 'spacing_file', 'spacing', 'cli']
CJK = r'\u2e80-\u2eff\u2f00-\u2fdf\u3040-\u309f\u30a0-\u30fa\u30fc-\u30ff\u3100-\u312f\u3200-\u32ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff'
ANY_CJK = re.compile(r'[{CJK}]'.format(CJK=CJK))
CONVERT_TO_FULLWIDTH_CJK_SYMBOLS_CJK = re.compile('([{CJK}])([ ]*(?:[\\:]+|\\.)[ ]*)([{CJK}])'.format(CJK=CJK)) # there is an extra non-capturing group compared to JavaScript version
CONVERT_TO_FULLWIDTH_CJK_SYMBOLS = re.compile('([{CJK}])[ ]*([~\\!;,\\?]+)[ ]*'.format(CJK=CJK))
DOTS_CJK = re.compile('([\\.]{{2,}}|\u2026)([{CJK}])'.format(CJK=CJK)) # need to escape { }
FIX_CJK_COLON_ANS = re.compile('([{CJK}])\\:([A-Z0-9\\(\\)])'.format(CJK=CJK))
CJK_QUOTE = re.compile('([{CJK}])([`"\u05f4])'.format(CJK=CJK)) # no need to escape `
QUOTE_CJK = re.compile('([`"\u05f4])([{CJK}])'.format(CJK=CJK)) # no need to escape `
FIX_QUOTE_ANY_QUOTE = re.compile(r'([`"\u05f4]+)(\s*)(.+?)(\s*)([`"\u05f4]+)')
CJK_SINGLE_QUOTE_BUT_POSSESSIVE = re.compile("([{CJK}])('[^s])".format(CJK=CJK))
SINGLE_QUOTE_CJK = re.compile("(')([{CJK}])".format(CJK=CJK))
FIX_POSSESSIVE_SINGLE_QUOTE = re.compile("([{CJK}A-Za-z0-9])( )('s)".format(CJK=CJK))
HASH_ANS_CJK_HASH = re.compile('([{CJK}])(#)([{CJK}]+)(#)([{CJK}])'.format(CJK=CJK))
CJK_HASH = re.compile('([{CJK}])(#([^ ]))'.format(CJK=CJK))
HASH_CJK = re.compile('(([^ ])#)([{CJK}])'.format(CJK=CJK))
CJK_OPERATOR_ANS = re.compile('([{CJK}])([\\+\\-\\*\\/=&\\|<>])([A-Za-z0-9])'.format(CJK=CJK))
ANS_OPERATOR_CJK = re.compile('([A-Za-z0-9])([\\+\\-\\*\\/=&\\|<>])([{CJK}])'.format(CJK=CJK))
FIX_SLASH_AS = re.compile(r'([/]) ([a-z\-_\./]+)')
FIX_SLASH_AS_SLASH = re.compile(r'([/\.])([A-Za-z\-_\./]+) ([/])')
CJK_LEFT_BRACKET = re.compile('([{CJK}])([\\(\\[\\{{<>\u201c])'.format(CJK=CJK)) # need to escape {
RIGHT_BRACKET_CJK = re.compile('([\\)\\]\\}}<>\u201d])([{CJK}])'.format(CJK=CJK)) # need to escape }
FIX_LEFT_BRACKET_ANY_RIGHT_BRACKET = re.compile(r'([\(\[\{<\u201c]+)(\s*)(.+?)(\s*)([\)\]\}>\u201d]+)') # need to escape { }
ANS_CJK_LEFT_BRACKET_ANY_RIGHT_BRACKET = re.compile('([A-Za-z0-9{CJK}])[ ]*([\u201c])([A-Za-z0-9{CJK}\\-_ ]+)([\u201d])'.format(CJK=CJK))
LEFT_BRACKET_ANY_RIGHT_BRACKET_ANS_CJK = re.compile('([\u201c])([A-Za-z0-9{CJK}\\-_ ]+)([\u201d])[ ]*([A-Za-z0-9{CJK}])'.format(CJK=CJK))
AN_LEFT_BRACKET = re.compile(r'([A-Za-z0-9])([\(\[\{])')
RIGHT_BRACKET_AN = re.compile(r'([\)\]\}])([A-Za-z0-9])')
CJK_ANS = re.compile('([{CJK}])([A-Za-z\u0370-\u03ff0-9@\\$%\\^&\\*\\-\\+\\\\=\\|/\u00a1-\u00ff\u2150-\u218f\u2700\u27bf])'.format(CJK=CJK))
ANS_CJK = re.compile('([A-Za-z\u0370-\u03ff0-9~\\!\\$%\\^&\\*\\-\\+\\\\=\\|;:,\\./\\?\u00a1-\u00ff\u2150-\u218f\u2700\u27bf])([{CJK}])'.format(CJK=CJK))
S_A = re.compile(r'(%)([A-Za-z])')
MIDDLE_DOT = re.compile(r'([ ]*)([\u00b7\u2022\u2027])([ ]*)')
# Python version only
TILDES = re.compile(r'~+')
EXCLAMATION_MARKS = re.compile(r'!+')
SEMICOLONS = re.compile(r';+')
COLONS = re.compile(r':+')
COMMAS = re.compile(r',+')
PERIODS = re.compile(r'\.+')
QUESTION_MARKS = re.compile(r'\?+')
def convert_to_fullwidth(symbols):
symbols = TILDES.sub('', symbols)
symbols = EXCLAMATION_MARKS.sub('', symbols)
symbols = SEMICOLONS.sub('', symbols)
symbols = COLONS.sub('', symbols)
symbols = COMMAS.sub('', symbols)
symbols = PERIODS.sub('', symbols)
symbols = QUESTION_MARKS.sub('', symbols)
return symbols.strip()
def spacing(text):
"""
Perform paranoid text spacing on text.
"""
if len(text) <= 1 or not ANY_CJK.search(text):
return text
new_text = text
# TODO: refactoring
matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS_CJK.search(new_text)
while matched:
start, end = matched.span()
new_text = ''.join((new_text[:start + 1], convert_to_fullwidth(new_text[start + 1:end - 1]), new_text[end - 1:]))
matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS_CJK.search(new_text)
matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS.search(new_text)
while matched:
start, end = matched.span()
new_text = ''.join((new_text[:start + 1].strip(), convert_to_fullwidth(new_text[start + 1:end]), new_text[end:].strip()))
matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS.search(new_text)
new_text = DOTS_CJK.sub(r'\1 \2', new_text)
new_text = FIX_CJK_COLON_ANS.sub(r'\1\2', new_text)
new_text = CJK_QUOTE.sub(r'\1 \2', new_text)
new_text = QUOTE_CJK.sub(r'\1 \2', new_text)
new_text = FIX_QUOTE_ANY_QUOTE.sub(r'\1\3\5', new_text)
new_text = CJK_SINGLE_QUOTE_BUT_POSSESSIVE.sub(r'\1 \2', new_text)
new_text = SINGLE_QUOTE_CJK.sub(r'\1 \2', new_text)
new_text = FIX_POSSESSIVE_SINGLE_QUOTE.sub(r"\1's", new_text)
new_text = HASH_ANS_CJK_HASH.sub(r'\1 \2\3\4 \5', new_text)
new_text = CJK_HASH.sub(r'\1 \2', new_text)
new_text = HASH_CJK.sub(r'\1 \3', new_text)
new_text = CJK_OPERATOR_ANS.sub(r'\1 \2 \3', new_text)
new_text = ANS_OPERATOR_CJK.sub(r'\1 \2 \3', new_text)
new_text = FIX_SLASH_AS.sub(r'\1\2', new_text)
new_text = FIX_SLASH_AS_SLASH.sub(r'\1\2\3', new_text)
new_text = CJK_LEFT_BRACKET.sub(r'\1 \2', new_text)
new_text = RIGHT_BRACKET_CJK.sub(r'\1 \2', new_text)
new_text = FIX_LEFT_BRACKET_ANY_RIGHT_BRACKET.sub(r'\1\3\5', new_text)
new_text = ANS_CJK_LEFT_BRACKET_ANY_RIGHT_BRACKET.sub(r'\1 \2\3\4', new_text)
new_text = LEFT_BRACKET_ANY_RIGHT_BRACKET_ANS_CJK.sub(r'\1\2\3 \4', new_text)
new_text = AN_LEFT_BRACKET.sub(r'\1 \2', new_text)
new_text = RIGHT_BRACKET_AN.sub(r'\1 \2', new_text)
new_text = CJK_ANS.sub(r'\1 \2', new_text)
new_text = ANS_CJK.sub(r'\1 \2', new_text)
new_text = S_A.sub(r'\1 \2', new_text)
new_text = MIDDLE_DOT.sub('', new_text)
return new_text.strip()
def spacing_text(text):
"""
Perform paranoid text spacing on text. An alias of `spacing()`.
"""
return spacing(text)
def spacing_file(path):
"""
Perform paranoid text spacing from file.
"""
# TODO: read line by line
with open(os.path.abspath(path)) as f:
return spacing_text(f.read())
def cli(args=None):
if not args:
args = sys.argv[1:]
parser = argparse.ArgumentParser(
prog='pangu',
description='pangu.py -- Paranoid text spacing for good readability, to automatically insert whitespace between CJK and half-width characters (alphabetical letters, numerical digits and symbols).',
)
parser.add_argument('-v', '--version', action='version', version=__version__)
parser.add_argument('-t', '--text', action='store_true', dest='is_text', required=False, help='specify the input value is a text')
parser.add_argument('-f', '--file', action='store_true', dest='is_file', required=False, help='specify the input value is a file path')
parser.add_argument('text_or_path', action='store', type=str, help='the text or file path to apply spacing')
if not sys.stdin.isatty():
print(spacing_text(sys.stdin.read())) # noqa: T003
else:
args = parser.parse_args(args)
if args.is_text:
print(spacing_text(args.text_or_path)) # noqa: T003
elif args.is_file:
print(spacing_file(args.text_or_path)) # noqa: T003
else:
print(spacing_text(args.text_or_path)) # noqa: T003
if __name__ == '__main__':
cli()

689
pangu.simple.js Normal file
View File

@@ -0,0 +1,689 @@
/*!
* pangu.simple.js
* --------
* @version: 1.0.5
* @homepage: https://github.com/backrunner/pangu.simple.js
* @license: MIT
* @author: BackRunner
*/
(function webpackUniversalModuleDefinition(root, factory) {
if(typeof exports === 'object' && typeof module === 'object')
module.exports = factory();
else if(typeof define === 'function' && define.amd)
define("pangu", [], factory);
else if(typeof exports === 'object')
exports["pangu"] = factory();
else
root["pangu"] = factory();
})(window, function() {
return
/******/ (function(modules) { // webpackBootstrap
/******/ // The module cache
/******/ var installedModules = {};
/******/
/******/ // The require function
/******/ function __webpack_require__(moduleId) {
/******/
/******/ // Check if module is in cache
/******/ if(installedModules[moduleId]) {
/******/ return installedModules[moduleId].exports;
/******/ }
/******/ // Create a new module (and put it into the cache)
/******/ var module = installedModules[moduleId] = {
/******/ i: moduleId,
/******/ l: false,
/******/ exports: {}
/******/ };
/******/
/******/ // Execute the module function
/******/ modules[moduleId].call(module.exports, module, module.exports, __webpack_require__);
/******/
/******/ // Flag the module as loaded
/******/ module.l = true;
/******/
/******/ // Return the exports of the module
/******/ return module.exports;
/******/ }
/******/
/******/
/******/ // expose the modules object (__webpack_modules__)
/******/ __webpack_require__.m = modules;
/******/
/******/ // expose the module cache
/******/ __webpack_require__.c = installedModules;
/******/
/******/ // define getter function for harmony exports
/******/ __webpack_require__.d = function(exports, name, getter) {
/******/ if(!__webpack_require__.o(exports, name)) {
/******/ Object.defineProperty(exports, name, { enumerable: true, get: getter });
/******/ }
/******/ };
/******/
/******/ // define __esModule on exports
/******/ __webpack_require__.r = function(exports) {
/******/ if(typeof Symbol !== 'undefined' && Symbol.toStringTag) {
/******/ Object.defineProperty(exports, Symbol.toStringTag, { value: 'Module' });
/******/ }
/******/ Object.defineProperty(exports, '__esModule', { value: true });
/******/ };
/******/
/******/ // create a fake namespace object
/******/ // mode & 1: value is a module id, require it
/******/ // mode & 2: merge all properties of value into the ns
/******/ // mode & 4: return value when already ns object
/******/ // mode & 8|1: behave like require
/******/ __webpack_require__.t = function(value, mode) {
/******/ if(mode & 1) value = __webpack_require__(value);
/******/ if(mode & 8) return value;
/******/ if((mode & 4) && typeof value === 'object' && value && value.__esModule) return value;
/******/ var ns = Object.create(null);
/******/ __webpack_require__.r(ns);
/******/ Object.defineProperty(ns, 'default', { enumerable: true, value: value });
/******/ if(mode & 2 && typeof value != 'string') for(var key in value) __webpack_require__.d(ns, key, function(key) { return value[key]; }.bind(null, key));
/******/ return ns;
/******/ };
/******/
/******/ // getDefaultExport function for compatibility with non-harmony modules
/******/ __webpack_require__.n = function(module) {
/******/ var getter = module && module.__esModule ?
/******/ function getDefault() { return module['default']; } :
/******/ function getModuleExports() { return module; };
/******/ __webpack_require__.d(getter, 'a', getter);
/******/ return getter;
/******/ };
/******/
/******/ // Object.prototype.hasOwnProperty.call
/******/ __webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); };
/******/
/******/ // __webpack_public_path__
/******/ __webpack_require__.p = "";
/******/
/******/
/******/ // Load entry module and return exports
/******/ return __webpack_require__(__webpack_require__.s = 0);
/******/ })
/************************************************************************/
/******/ ([
/* 0 */
/***/ (function(module, exports, __webpack_require__) {
var __WEBPACK_AMD_DEFINE_FACTORY__, __WEBPACK_AMD_DEFINE_ARRAY__, __WEBPACK_AMD_DEFINE_RESULT__;function _typeof(obj) { "@babel/helpers - typeof"; if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; } return _typeof(obj); }
(function (global, factory) {
if (true) {
!(__WEBPACK_AMD_DEFINE_ARRAY__ = [], __WEBPACK_AMD_DEFINE_FACTORY__ = (factory),
__WEBPACK_AMD_DEFINE_RESULT__ = (typeof __WEBPACK_AMD_DEFINE_FACTORY__ === 'function' ?
(__WEBPACK_AMD_DEFINE_FACTORY__.apply(exports, __WEBPACK_AMD_DEFINE_ARRAY__)) : __WEBPACK_AMD_DEFINE_FACTORY__),
__WEBPACK_AMD_DEFINE_RESULT__ !== undefined && (module.exports = __WEBPACK_AMD_DEFINE_RESULT__));
} else { var mod; }
})(typeof globalThis !== "undefined" ? globalThis : typeof self !== "undefined" ? self : this, function () {
"use strict";
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }
function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }
function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); }
function _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); }
function _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = _getPrototypeOf(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = _getPrototypeOf(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return _possibleConstructorReturn(this, result); }; }
function _possibleConstructorReturn(self, call) { if (call && (_typeof(call) === "object" || typeof call === "function")) { return call; } return _assertThisInitialized(self); }
function _assertThisInitialized(self) { if (self === void 0) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return self; }
function _isNativeReflectConstruct() { if (typeof Reflect === "undefined" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === "function") return true; try { Boolean.prototype.valueOf.call(Reflect.construct(Boolean, [], function () {})); return true; } catch (e) { return false; } }
function _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); }
var _require = __webpack_require__(1),
Pangu = _require.Pangu;
function once(func) {
var _arguments = arguments,
_this = this;
var executed = false;
return function () {
if (executed) {
return;
}
var self = _this;
executed = true;
func.apply(self, _arguments);
};
}
function debounce(func, delay, mustRunDelay) {
var _arguments2 = arguments,
_this2 = this;
var timer = null;
var startTime = null;
return function () {
var self = _this2;
var args = _arguments2;
var currentTime = +new Date();
clearTimeout(timer);
if (!startTime) {
startTime = currentTime;
}
if (currentTime - startTime >= mustRunDelay) {
func.apply(self, args);
startTime = currentTime;
} else {
timer = setTimeout(function () {
func.apply(self, args);
}, delay);
}
};
}
var BrowserPangu = function (_Pangu) {
_inherits(BrowserPangu, _Pangu);
var _super = _createSuper(BrowserPangu);
function BrowserPangu() {
var _this3;
_classCallCheck(this, BrowserPangu);
_this3 = _super.call(this);
_this3.blockTags = /^(div|p|h1|h2|h3|h4|h5|h6)$/i;
_this3.ignoredTags = /^(script|code|pre|textarea)$/i;
_this3.presentationalTags = /^(b|code|del|em|i|s|strong|kbd)$/i;
_this3.spaceLikeTags = /^(br|hr|i|img|pangu)$/i;
_this3.spaceSensitiveTags = /^(a|del|pre|s|strike|u)$/i;
_this3.isAutoSpacingPageExecuted = false;
return _this3;
}
_createClass(BrowserPangu, [{
key: "isContentEditable",
value: function isContentEditable(node) {
return node.isContentEditable || node.getAttribute && node.getAttribute('g_editable') === 'true';
}
}, {
key: "isSpecificTag",
value: function isSpecificTag(node, tagRegex) {
return node && node.nodeName && node.nodeName.search(tagRegex) >= 0;
}
}, {
key: "isInsideSpecificTag",
value: function isInsideSpecificTag(node, tagRegex) {
var checkCurrent = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
var currentNode = node;
if (checkCurrent) {
if (this.isSpecificTag(currentNode, tagRegex)) {
return true;
}
}
while (currentNode.parentNode) {
currentNode = currentNode.parentNode;
if (this.isSpecificTag(currentNode, tagRegex)) {
return true;
}
}
return false;
}
}, {
key: "canIgnoreNode",
value: function canIgnoreNode(node) {
var currentNode = node;
if (currentNode && (this.isSpecificTag(currentNode, this.ignoredTags) || this.isContentEditable(currentNode))) {
return true;
}
while (currentNode.parentNode) {
currentNode = currentNode.parentNode;
if (currentNode && (this.isSpecificTag(currentNode, this.ignoredTags) || this.isContentEditable(currentNode))) {
return true;
}
}
return false;
}
}, {
key: "isFirstTextChild",
value: function isFirstTextChild(parentNode, targetNode) {
var childNodes = parentNode.childNodes;
for (var i = 0; i < childNodes.length; i++) {
var childNode = childNodes[i];
if (childNode.nodeType !== Node.COMMENT_NODE && childNode.textContent) {
return childNode === targetNode;
}
}
return false;
}
}, {
key: "isLastTextChild",
value: function isLastTextChild(parentNode, targetNode) {
var childNodes = parentNode.childNodes;
for (var i = childNodes.length - 1; i > -1; i--) {
var childNode = childNodes[i];
if (childNode.nodeType !== Node.COMMENT_NODE && childNode.textContent) {
return childNode === targetNode;
}
}
return false;
}
}, {
key: "spacingNodeByXPath",
value: function spacingNodeByXPath(xPathQuery, contextNode) {
if (!(contextNode instanceof Node) || contextNode instanceof DocumentFragment) {
return;
}
var textNodes = document.evaluate(xPathQuery, contextNode, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
var currentTextNode;
var nextTextNode;
for (var i = textNodes.snapshotLength - 1; i > -1; --i) {
currentTextNode = textNodes.snapshotItem(i);
if (this.isSpecificTag(currentTextNode.parentNode, this.presentationalTags) && !this.isInsideSpecificTag(currentTextNode.parentNode, this.ignoredTags)) {
var elementNode = currentTextNode.parentNode;
if (elementNode.previousSibling) {
var previousSibling = elementNode.previousSibling;
if (previousSibling.nodeType === Node.TEXT_NODE) {
var testText = previousSibling.data.substr(-1) + currentTextNode.data.toString().charAt(0);
var testNewText = this.spacing(testText);
if (testText !== testNewText) {
previousSibling.data = "".concat(previousSibling.data, " ");
}
}
}
if (elementNode.nextSibling) {
var nextSibling = elementNode.nextSibling;
if (nextSibling.nodeType === Node.TEXT_NODE) {
var _testText = currentTextNode.data.substr(-1) + nextSibling.data.toString().charAt(0);
var _testNewText = this.spacing(_testText);
if (_testText !== _testNewText) {
nextSibling.data = " ".concat(nextSibling.data);
}
}
}
}
if (this.canIgnoreNode(currentTextNode)) {
nextTextNode = currentTextNode;
continue;
}
var newText = this.spacing(currentTextNode.data);
if (currentTextNode.data !== newText) {
currentTextNode.data = newText;
}
if (nextTextNode) {
if (currentTextNode.nextSibling && currentTextNode.nextSibling.nodeName.search(this.spaceLikeTags) >= 0) {
nextTextNode = currentTextNode;
continue;
}
var _testText2 = currentTextNode.data.toString().substr(-1) + nextTextNode.data.toString().substr(0, 1);
var _testNewText2 = this.spacing(_testText2);
if (_testNewText2 !== _testText2) {
var nextNode = nextTextNode;
while (nextNode.parentNode && nextNode.nodeName.search(this.spaceSensitiveTags) === -1 && this.isFirstTextChild(nextNode.parentNode, nextNode)) {
nextNode = nextNode.parentNode;
}
var currentNode = currentTextNode;
while (currentNode.parentNode && currentNode.nodeName.search(this.spaceSensitiveTags) === -1 && this.isLastTextChild(currentNode.parentNode, currentNode)) {
currentNode = currentNode.parentNode;
}
if (currentNode.nextSibling) {
if (currentNode.nextSibling.nodeName.search(this.spaceLikeTags) >= 0) {
nextTextNode = currentTextNode;
continue;
}
}
if (currentNode.nodeName.search(this.blockTags) === -1) {
if (nextNode.nodeName.search(this.spaceSensitiveTags) === -1) {
if (nextNode.nodeName.search(this.ignoredTags) === -1 && nextNode.nodeName.search(this.blockTags) === -1) {
if (nextTextNode.previousSibling) {
if (nextTextNode.previousSibling.nodeName.search(this.spaceLikeTags) === -1) {
nextTextNode.data = " ".concat(nextTextNode.data);
}
} else {
if (!this.canIgnoreNode(nextTextNode)) {
nextTextNode.data = " ".concat(nextTextNode.data);
}
}
}
} else if (currentNode.nodeName.search(this.spaceSensitiveTags) === -1) {
currentTextNode.data = "".concat(currentTextNode.data, " ");
} else {
var panguSpace = document.createElement('pangu');
panguSpace.innerHTML = ' ';
if (nextNode.previousSibling) {
if (nextNode.previousSibling.nodeName.search(this.spaceLikeTags) === -1) {
nextNode.parentNode.insertBefore(panguSpace, nextNode);
}
} else {
nextNode.parentNode.insertBefore(panguSpace, nextNode);
}
if (!panguSpace.previousElementSibling) {
if (panguSpace.parentNode) {
panguSpace.parentNode.removeChild(panguSpace);
}
}
}
}
}
}
nextTextNode = currentTextNode;
}
}
}, {
key: "spacingNode",
value: function spacingNode(contextNode) {
var xPathQuery = './/*/text()[normalize-space(.)]';
if (contextNode.children && contextNode.children.length === 0) {
xPathQuery = './/text()[normalize-space(.)]';
}
this.spacingNodeByXPath(xPathQuery, contextNode);
}
}, {
key: "spacingElementById",
value: function spacingElementById(idName) {
var xPathQuery = "id(\"".concat(idName, "\")//text()");
this.spacingNodeByXPath(xPathQuery, document);
}
}, {
key: "spacingElementByClassName",
value: function spacingElementByClassName(className) {
var xPathQuery = "//*[contains(concat(\" \", normalize-space(@class), \" \"), \"".concat(className, "\")]//text()");
this.spacingNodeByXPath(xPathQuery, document);
}
}, {
key: "spacingElementByTagName",
value: function spacingElementByTagName(tagName) {
var xPathQuery = "//".concat(tagName, "//text()");
this.spacingNodeByXPath(xPathQuery, document);
}
}, {
key: "spacingPageTitle",
value: function spacingPageTitle() {
var xPathQuery = '/html/head/title/text()';
this.spacingNodeByXPath(xPathQuery, document);
}
}, {
key: "spacingPageBody",
value: function spacingPageBody() {
var xPathQuery = '/html/body//*/text()[normalize-space(.)]';
['script', 'style', 'textarea'].forEach(function (tag) {
xPathQuery = "".concat(xPathQuery, "[translate(name(..),\"ABCDEFGHIJKLMNOPQRSTUVWXYZ\",\"abcdefghijklmnopqrstuvwxyz\")!=\"").concat(tag, "\"]");
});
this.spacingNodeByXPath(xPathQuery, document);
}
}, {
key: "spacingPage",
value: function spacingPage() {
this.spacingPageTitle();
this.spacingPageBody();
}
}, {
key: "autoSpacingPage",
value: function autoSpacingPage() {
var pageDelay = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1000;
var nodeDelay = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 500;
var nodeMaxWait = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 2000;
if (!(document.body instanceof Node)) {
return;
}
if (this.isAutoSpacingPageExecuted) {
return;
}
this.isAutoSpacingPageExecuted = true;
var self = this;
var onceSpacingPage = once(function () {
self.spacingPage();
});
var videos = document.getElementsByTagName('video');
if (videos.length === 0) {
setTimeout(function () {
onceSpacingPage();
}, pageDelay);
} else {
for (var i = 0; i < videos.length; i++) {
var video = videos[i];
if (video.readyState === 4) {
setTimeout(function () {
onceSpacingPage();
}, 3000);
break;
}
video.addEventListener('loadeddata', function () {
setTimeout(function () {
onceSpacingPage();
}, 4000);
});
}
}
var queue = [];
var debouncedSpacingNodes = debounce(function () {
while (queue.length) {
var node = queue.shift();
if (node) {
self.spacingNode(node);
}
}
}, nodeDelay, {
'maxWait': nodeMaxWait
});
var mutationObserver = new MutationObserver(function (mutations, observer) {
mutations.forEach(function (mutation) {
switch (mutation.type) {
case 'childList':
mutation.addedNodes.forEach(function (node) {
if (node.nodeType === Node.ELEMENT_NODE) {
queue.push(node);
} else if (node.nodeType === Node.TEXT_NODE) {
queue.push(node.parentNode);
}
});
break;
case 'characterData':
var node = mutation.target;
if (node.nodeType === Node.TEXT_NODE) {
queue.push(node.parentNode);
}
break;
default:
break;
}
});
debouncedSpacingNodes();
});
mutationObserver.observe(document.body, {
characterData: true,
childList: true,
subtree: true
});
}
}]);
return BrowserPangu;
}(Pangu);
var pangu = new BrowserPangu();
module.exports = pangu;
module.exports["default"] = pangu;
module.exports.Pangu = BrowserPangu;
});
/***/ }),
/* 1 */
/***/ (function(module, exports, __webpack_require__) {
var __WEBPACK_AMD_DEFINE_FACTORY__, __WEBPACK_AMD_DEFINE_ARRAY__, __WEBPACK_AMD_DEFINE_RESULT__;(function (global, factory) {
if (true) {
!(__WEBPACK_AMD_DEFINE_ARRAY__ = [], __WEBPACK_AMD_DEFINE_FACTORY__ = (factory),
__WEBPACK_AMD_DEFINE_RESULT__ = (typeof __WEBPACK_AMD_DEFINE_FACTORY__ === 'function' ?
(__WEBPACK_AMD_DEFINE_FACTORY__.apply(exports, __WEBPACK_AMD_DEFINE_ARRAY__)) : __WEBPACK_AMD_DEFINE_FACTORY__),
__WEBPACK_AMD_DEFINE_RESULT__ !== undefined && (module.exports = __WEBPACK_AMD_DEFINE_RESULT__));
} else { var mod; }
})(typeof globalThis !== "undefined" ? globalThis : typeof self !== "undefined" ? self : this, function () {
"use strict";
function _typeof(obj) { "@babel/helpers - typeof"; if (typeof Symbol === "function" && typeof Symbol.iterator === "symbol") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; }; } return _typeof(obj); }
function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
function _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }
function _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }
var CJK = "\u2E80-\u2EFF\u2F00-\u2FDF\u3040-\u309F\u30A0-\u30FA\u30FC-\u30FF\u3100-\u312F\u3200-\u32FF\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF";
var ANY_CJK = new RegExp("[".concat(CJK, "]"));
var SYMBOL_WIDE = '`~!@#$%*^&()/\\-+=<>?:"{}|,.;\'[\\]·~¥%——|\\\\';
var SYMBOL = '`~!@#$%^&()/\\-+=<>?:"{}|,.;\'[\\]·~¥%——|\\\\';
var SYMBOL_LEFT = '`~!@#$%^&(/\\-+=<>?:"{|,.;\'[·~¥%——|\\\\';
var SYMBOL_RIGHT = '`~!@#$%^&)/\\-+=<>?:"}|,.;\'\\]·~¥%——|\\\\';
var SYMBOL_SAFE = '`~!#$%^&/+=<>?:"|,;\'·~¥%——|\\\\';
var ALPHA_CJK = new RegExp("([A-Za-z_])([".concat(CJK, "]+)"), 'g');
var CJK_ALPHA = new RegExp("([".concat(CJK, "]+)([A-Za-z_])"), 'g');
var NUMBER_CJK = new RegExp("([0-9_])([".concat(CJK, "]+)"), 'g');
var CJK_NUMBER = new RegExp("([".concat(CJK, "]+)([0-9_])"), 'g');
var CJK_AND_ALPHA = new RegExp("([".concat(CJK, "]+)(&)([A-Za-z_])"), 'g');
var ALPHA_AND_CJK = new RegExp("([A-Za-z_])(&)([".concat(CJK, "]+)"), 'g');
var ALPHA_SYMBOL_CJK = new RegExp("([A-Za-z_])([".concat(SYMBOL_RIGHT, "])([").concat(CJK, "])"), 'g');
var CJK_SYMBOL_ALPHA = new RegExp("([".concat(CJK, "])([").concat(SYMBOL_LEFT, "])([A-Za-z_])"), 'g');
var NUMBER_SYMBOL_CJK = new RegExp("([0-9_])([".concat(SYMBOL, "])([").concat(CJK, "])"), 'g');
var CJK_SYMBOL_NUMBER = new RegExp("([".concat(CJK, "])([").concat(SYMBOL, "])([0-9_])"), 'g');
var CJK_BRACKET = new RegExp("([".concat(CJK, "])([<\\[{\\(])"), 'g');
var BRACKET_CJK = new RegExp("([>\\]\\)}])([".concat(CJK, "])"), 'g');
var ALPHA_NUMBER_CJK = new RegExp("([A-Za-z_])([0-9_])([".concat(CJK, "])"), 'g');
var CJK_SYMBOL_SYMBOL = new RegExp("([".concat(CJK, "])([").concat(SYMBOL_WIDE, "])([").concat(SYMBOL_WIDE, "])"), 'g');
var SYMBOL_SYMBOL_CJK = new RegExp("([".concat(SYMBOL_WIDE, "])([").concat(SYMBOL_WIDE, "])([").concat(CJK, "])"), 'g');
var CJK_SYMBOL_CJK_SYMBOL_CJK = new RegExp("([".concat(CJK, "])([").concat(SYMBOL_SAFE, "])([").concat(CJK, "])([").concat(SYMBOL_SAFE, "])([").concat(CJK, "])"), 'g');
var CJK_SYMBOL_CJK = new RegExp("([".concat(CJK, "])([").concat(SYMBOL_SAFE, "])([").concat(CJK, "])"), 'g');
var CJK_ACCOUNT_CJK = new RegExp("([".concat(CJK, "])(\\s*)(@[A-za-z0-9_]*)(\\s*)([").concat(CJK, "]+)(\\s*)([A-za-z0-9_]+)(\\s*)([").concat(CJK, "])"));
var Pangu = function () {
function Pangu() {
_classCallCheck(this, Pangu);
this.version = '1.0.0';
}
_createClass(Pangu, [{
key: "spacing",
value: function spacing(text) {
if (typeof text !== 'string') {
console.warn("spacing(text) only accepts string but got ".concat(_typeof(text)));
return text;
}
if (text.length <= 1 || !ANY_CJK.test(text)) {
return text;
}
var self = this;
var newText = text;
newText = newText.replace(ALPHA_NUMBER_CJK, '$1$2 $3');
newText = newText.replace(ALPHA_CJK, '$1 $2');
newText = newText.replace(CJK_ALPHA, '$1 $2');
newText = newText.replace(NUMBER_CJK, '$1 $2');
newText = newText.replace(CJK_NUMBER, '$1 $2');
newText = newText.replace(CJK_AND_ALPHA, '$1 $2 $3');
newText = newText.replace(ALPHA_AND_CJK, '$1 $2 $3');
newText = newText.replace(ALPHA_SYMBOL_CJK, '$1$2 $3');
newText = newText.replace(CJK_SYMBOL_ALPHA, '$1 $2$3');
newText = newText.replace(NUMBER_SYMBOL_CJK, '$1$2 $3');
newText = newText.replace(CJK_SYMBOL_NUMBER, '$1 $2$3');
newText = newText.replace(CJK_SYMBOL_SYMBOL, '$1 $2$3');
newText = newText.replace(SYMBOL_SYMBOL_CJK, '$1$2 $3');
newText = newText.replace(BRACKET_CJK, '$1 $2');
newText = newText.replace(CJK_BRACKET, '$1 $2');
newText = newText.replace(CJK_SYMBOL_CJK_SYMBOL_CJK, '$1 $2 $3 $4 $5');
newText = newText.replace(CJK_SYMBOL_CJK, '$1 $2 $3');
newText = newText.replace(CJK_ACCOUNT_CJK, '$1 $3$5$7 $9');
return newText;
}
}, {
key: "spacingText",
value: function spacingText(text) {
var callback = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : function () {};
var newText;
try {
newText = this.spacing(text);
} catch (err) {
callback(err);
return;
}
callback(null, newText);
}
}, {
key: "spacingTextSync",
value: function spacingTextSync(text) {
return this.spacing(text);
}
}]);
return Pangu;
}();
var pangu = new Pangu();
module.exports = pangu;
module.exports["default"] = pangu;
module.exports.Pangu = Pangu;
});
/***/ })
/******/ ]);
});

109
panguFilter.cabal Normal file
View File

@@ -0,0 +1,109 @@
cabal-version: 3.0
-- The cabal-version field refers to the version of the .cabal specification,
-- and can be different from the cabal-install (the tool) version and the
-- Cabal (the library) version you are using. As such, the Cabal (the library)
-- version used must be equal or greater than the version stated in this field.
-- Starting from the specification version 2.2, the cabal-version field must be
-- the first thing in the cabal file.
-- Initial package description 'panguFilter' generated by
-- 'cabal init'. For further documentation, see:
-- http://haskell.org/cabal/users-guide/
--
-- The name of the package.
name: panguFilter
-- The package version.
-- See the Haskell package versioning policy (PVP) for standards
-- guiding when and how versions should be incremented.
-- https://pvp.haskell.org
-- PVP summary: +-+------- breaking API changes
-- | | +----- non-breaking API additions
-- | | | +--- code changes with no API change
version: 0.1.0.0
-- A short (one-line) description of the package.
-- synopsis:
-- A longer description of the package.
-- description:
-- The license under which the package is released.
license: BSD-3-Clause
-- The file containing the license text.
license-file: LICENSE
-- The package author(s).
author: Yu Cong
-- An email address to which users can send suggestions, bug reports, and patches.
maintainer: sxlxcsxlxc@gmail.com
-- A copyright notice.
-- copyright:
category: Text
build-type: Simple
-- Extra doc files to be distributed with the package, such as a CHANGELOG or a README.
extra-doc-files: CHANGELOG.md
-- Extra source files to be distributed with the package, such as examples, or a tutorial module.
-- extra-source-files:
common warnings
ghc-options: -Wall
library
-- Import common warning flags.
import: warnings
-- Modules exported by the library.
exposed-modules: MyLib
-- Modules included in this library but not exported.
-- other-modules:
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
-- Other library packages from which modules are imported.
build-depends:
base ^>=4.18.3.0,
text,
megaparsec,
replace-megaparsec
-- Directories containing source files.
hs-source-dirs: src
-- Base language which the package is written in.
default-language: Haskell2010
test-suite panguFilter-test
-- Import common warning flags.
import: warnings
-- Base language which the package is written in.
default-language: Haskell2010
-- Modules included in this executable, other than Main.
-- other-modules:
-- LANGUAGE extensions used by modules in this package.
-- other-extensions:
-- The interface type and version of the test suite.
type: exitcode-stdio-1.0
-- Directories containing source files.
hs-source-dirs: test
-- The entrypoint to the test suite.
main-is: Main.hs
-- Test dependencies.
build-depends:
base ^>=4.18.3.0,
panguFilter,
hspec

89
src/MyLib.hs Normal file
View File

@@ -0,0 +1,89 @@
{-# LANGUAGE OverloadedStrings #-}
module MyLib where
import Data.Text (Text)
import qualified Data.Text as T
import Data.Void (Void)
import Replace.Megaparsec (streamEdit)
import Text.Megaparsec
import Text.Megaparsec.Char
-------------------------------------------------------------------------------
type Parser = Parsec Void Text
type Rule = Parser Text
type RuleSet = [Rule]
applyRules :: RuleSet -> Text -> Text
applyRules [] input = input
applyRules rules input = streamEdit (choice rules) id input
-- TEST RULES
appleToOrange :: Rule
appleToOrange = "orange" <$ chunk "apple"
emailAtRule :: Rule
emailAtRule = do
prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String))
_ <- char '@'
suffix <- some (alphaNumChar <|> oneOf (".-" :: String))
return $ T.pack prefix <> "[at]" <> T.pack suffix
-------------------------------------------------------------------------------
-- rules for pangu
-- | Check if a character falls within the CJK ranges provided
isCJK :: Char -> Bool
isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges
where
cjkRanges =
[ ('\x2e80', '\x2eff'),
('\x2f00', '\x2fdf'),
('\x3040', '\x309f'),
('\x30a0', '\x30fa'),
('\x30fc', '\x30ff'),
('\x3100', '\x312f'),
('\x3200', '\x32ff'),
('\x3400', '\x4dbf'),
('\x4e00', '\x9fff'),
('\xf900', '\xfaff')
]
convertToFullwidth :: Char -> Char
convertToFullwidth c = case c of
':' -> ''
'.' -> '。'
'~' -> ''
'!' -> ''
'?' -> ''
',' -> ''
';' -> ''
_ -> c
-- A parser that matches a single CJK character
cjkChar :: Parser Char
cjkChar = satisfy isCJK
fullWidthSymbolRule :: Rule
fullWidthSymbolRule = do
c1 <- cjkChar -- First CJK
mid <-
some $
choice -- The "middle" symbol part
[ char ' ',
char ':',
char '.'
]
c2 <- cjkChar -- Second CJK
-- In Haskell, we can actually process the 'mid' string logic here.
-- For now, let's assume we want to turn ":" into "" and "." into "。"
let transformedMid = T.pack $ map convertToFullwidth mid
return $ T.singleton c1 <> transformedMid <> T.singleton c2
-- the rule set
myRules :: RuleSet
myRules = [appleToOrange, emailAtRule, try fullWidthSymbolRule]

19
test/Main.hs Normal file
View File

@@ -0,0 +1,19 @@
{-# LANGUAGE OverloadedStrings #-}
module Main (main) where
import MyLib
import Test.Hspec
main :: IO ()
main = hspec $ do
describe "MyLib.mapemail" $ do
it "maps @ to [at] in emails" $ do
applyRules myRules "aaa@a.com" `shouldBe` "aaa[at]a.com"
describe "MyLib.mapfruits" $ do
it "maps apple to orange" $ do
applyRules myRules "apple" `shouldBe` "orange"
describe "MyLib.fullWidthSymbolRule" $ do
it "你:好" $ do
applyRules myRules "你:好" `shouldBe` "你:好"