add readme. remove changelog

rename lib & remove python version
z
2026-01-06 22:06:22 +08:00 · 2026-01-04 14:48:29 +08:00 · 2026-01-03 17:03:47 +08:00 · 2026-01-03 15:14:31 +08:00
7 changed files with 275 additions and 349 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +0,0 @@
 # Revision history for panguFilter
 ## 0.1.0.0 -- YYYY-mm-dd
 * First version. Released on an unsuspecting world.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,11 @@
 # pangu.hs
 Insert whitespace between CJK and half-width characters.
 This haskell version implements a subset of spacing rules in [pangu.py](https://github.com/vinta/pangu.py).
 ## test
 ```sh
 cabal test
 ```
--- a/pangu.py
+++ b/pangu.py
@@ -1,191 +0,0 @@
 #!/usr/bin/env python
 # coding: utf-8
 """
 Paranoid text spacing for good readability, to automatically insert whitespace between CJK (Chinese, Japanese, Korean) and half-width characters (alphabetical letters, numerical digits and symbols).
 >>> import pangu
 >>> nwe_text = pangu.spacing_text('當你凝視著bug，bug也凝視著你')
 >>> print(nwe_text)
 '當你凝視著 bug，bug 也凝視著你'
 >>> nwe_content = pangu.spacing_file('path/to/file.txt')
 >>> print(nwe_content)
 '與 PM 戰鬥的人，應當小心自己不要成為 PM'
 """
 import argparse
 import os
 import re
 import sys
 __version__ = '4.0.6.1'
 __all__ = ['spacing_text', 'spacing_file', 'spacing', 'cli']
 CJK = r'\u2e80-\u2eff\u2f00-\u2fdf\u3040-\u309f\u30a0-\u30fa\u30fc-\u30ff\u3100-\u312f\u3200-\u32ff\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff'
 ANY_CJK = re.compile(r'[{CJK}]'.format(CJK=CJK))
 CONVERT_TO_FULLWIDTH_CJK_SYMBOLS_CJK = re.compile('([{CJK}])([ ]*(?:[\\:]+|\\.)[ ]*)([{CJK}])'.format(CJK=CJK))  # there is an extra non-capturing group compared to JavaScript version
 CONVERT_TO_FULLWIDTH_CJK_SYMBOLS = re.compile('([{CJK}])[ ]*([~\\!;,\\?]+)[ ]*'.format(CJK=CJK))
 DOTS_CJK = re.compile('([\\.]{{2,}}|\u2026)([{CJK}])'.format(CJK=CJK))  # need to escape { }
 FIX_CJK_COLON_ANS = re.compile('([{CJK}])\\:([A-Z0-9\\(\\)])'.format(CJK=CJK))
 CJK_QUOTE = re.compile('([{CJK}])([`"\u05f4])'.format(CJK=CJK))  # no need to escape `
 QUOTE_CJK = re.compile('([`"\u05f4])([{CJK}])'.format(CJK=CJK))  # no need to escape `
 FIX_QUOTE_ANY_QUOTE = re.compile(r'([`"\u05f4]+)(\s*)(.+?)(\s*)([`"\u05f4]+)')
 CJK_SINGLE_QUOTE_BUT_POSSESSIVE = re.compile("([{CJK}])('[^s])".format(CJK=CJK))
 SINGLE_QUOTE_CJK = re.compile("(')([{CJK}])".format(CJK=CJK))
 FIX_POSSESSIVE_SINGLE_QUOTE = re.compile("([{CJK}A-Za-z0-9])( )('s)".format(CJK=CJK))
 HASH_ANS_CJK_HASH = re.compile('([{CJK}])(#)([{CJK}]+)(#)([{CJK}])'.format(CJK=CJK))
 CJK_HASH = re.compile('([{CJK}])(#([^ ]))'.format(CJK=CJK))
 HASH_CJK = re.compile('(([^ ])#)([{CJK}])'.format(CJK=CJK))
 CJK_OPERATOR_ANS = re.compile('([{CJK}])([\\+\\-\\*\\/=&\\|<>])([A-Za-z0-9])'.format(CJK=CJK))
 ANS_OPERATOR_CJK = re.compile('([A-Za-z0-9])([\\+\\-\\*\\/=&\\|<>])([{CJK}])'.format(CJK=CJK))
 FIX_SLASH_AS = re.compile(r'([/]) ([a-z\-_\./]+)')
 FIX_SLASH_AS_SLASH = re.compile(r'([/\.])([A-Za-z\-_\./]+) ([/])')
 CJK_LEFT_BRACKET = re.compile('([{CJK}])([\\(\\[\\{{<>\u201c])'.format(CJK=CJK))  # need to escape {
 RIGHT_BRACKET_CJK = re.compile('([\\)\\]\\}}<>\u201d])([{CJK}])'.format(CJK=CJK))  # need to escape }
 FIX_LEFT_BRACKET_ANY_RIGHT_BRACKET = re.compile(r'([\(\[\{<\u201c]+)(\s*)(.+?)(\s*)([\)\]\}>\u201d]+)')  # need to escape { }
 ANS_CJK_LEFT_BRACKET_ANY_RIGHT_BRACKET = re.compile('([A-Za-z0-9{CJK}])[ ]*([\u201c])([A-Za-z0-9{CJK}\\-_ ]+)([\u201d])'.format(CJK=CJK))
 LEFT_BRACKET_ANY_RIGHT_BRACKET_ANS_CJK = re.compile('([\u201c])([A-Za-z0-9{CJK}\\-_ ]+)([\u201d])[ ]*([A-Za-z0-9{CJK}])'.format(CJK=CJK))
 AN_LEFT_BRACKET = re.compile(r'([A-Za-z0-9])([\(\[\{])')
 RIGHT_BRACKET_AN = re.compile(r'([\)\]\}])([A-Za-z0-9])')
 CJK_ANS = re.compile('([{CJK}])([A-Za-z\u0370-\u03ff0-9@\\$%\\^&\\*\\-\\+\\\\=\\|/\u00a1-\u00ff\u2150-\u218f\u2700—\u27bf])'.format(CJK=CJK))
 ANS_CJK = re.compile('([A-Za-z\u0370-\u03ff0-9~\\!\\$%\\^&\\*\\-\\+\\\\=\\|;:,\\./\\?\u00a1-\u00ff\u2150-\u218f\u2700—\u27bf])([{CJK}])'.format(CJK=CJK))
 S_A = re.compile(r'(%)([A-Za-z])')
 MIDDLE_DOT = re.compile(r'([ ]*)([\u00b7\u2022\u2027])([ ]*)')
 # Python version only
 TILDES = re.compile(r'~+')
 EXCLAMATION_MARKS = re.compile(r'!+')
 SEMICOLONS = re.compile(r';+')
 COLONS = re.compile(r':+')
 COMMAS = re.compile(r',+')
 PERIODS = re.compile(r'\.+')
 QUESTION_MARKS = re.compile(r'\?+')
 def convert_to_fullwidth(symbols):
    symbols = TILDES.sub('～', symbols)
    symbols = EXCLAMATION_MARKS.sub('！', symbols)
    symbols = SEMICOLONS.sub('；', symbols)
    symbols = COLONS.sub('：', symbols)
    symbols = COMMAS.sub('，', symbols)
    symbols = PERIODS.sub('。', symbols)
    symbols = QUESTION_MARKS.sub('？', symbols)
    return symbols.strip()
 def spacing(text):
    """
    Perform paranoid text spacing on text.
    """
    if len(text) <= 1 or not ANY_CJK.search(text):
        return text
    new_text = text
    # TODO: refactoring
    matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS_CJK.search(new_text)
    while matched:
        start, end = matched.span()
        new_text = ''.join((new_text[:start + 1], convert_to_fullwidth(new_text[start + 1:end - 1]), new_text[end - 1:]))
        matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS_CJK.search(new_text)
    matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS.search(new_text)
    while matched:
        start, end = matched.span()
        new_text = ''.join((new_text[:start + 1].strip(), convert_to_fullwidth(new_text[start + 1:end]), new_text[end:].strip()))
        matched = CONVERT_TO_FULLWIDTH_CJK_SYMBOLS.search(new_text)
    new_text = DOTS_CJK.sub(r'\1 \2', new_text)
    new_text = FIX_CJK_COLON_ANS.sub(r'\1：\2', new_text)
    new_text = CJK_QUOTE.sub(r'\1 \2', new_text)
    new_text = QUOTE_CJK.sub(r'\1 \2', new_text)
    new_text = FIX_QUOTE_ANY_QUOTE.sub(r'\1\3\5', new_text)
    new_text = CJK_SINGLE_QUOTE_BUT_POSSESSIVE.sub(r'\1 \2', new_text)
    new_text = SINGLE_QUOTE_CJK.sub(r'\1 \2', new_text)
    new_text = FIX_POSSESSIVE_SINGLE_QUOTE.sub(r"\1's", new_text)
    new_text = HASH_ANS_CJK_HASH.sub(r'\1 \2\3\4 \5', new_text)
    new_text = CJK_HASH.sub(r'\1 \2', new_text)
    new_text = HASH_CJK.sub(r'\1 \3', new_text)
    new_text = CJK_OPERATOR_ANS.sub(r'\1 \2 \3', new_text)
    new_text = ANS_OPERATOR_CJK.sub(r'\1 \2 \3', new_text)
    new_text = FIX_SLASH_AS.sub(r'\1\2', new_text)
    new_text = FIX_SLASH_AS_SLASH.sub(r'\1\2\3', new_text)
    new_text = CJK_LEFT_BRACKET.sub(r'\1 \2', new_text)
    new_text = RIGHT_BRACKET_CJK.sub(r'\1 \2', new_text)
    new_text = FIX_LEFT_BRACKET_ANY_RIGHT_BRACKET.sub(r'\1\3\5', new_text)
    new_text = ANS_CJK_LEFT_BRACKET_ANY_RIGHT_BRACKET.sub(r'\1 \2\3\4', new_text)
    new_text = LEFT_BRACKET_ANY_RIGHT_BRACKET_ANS_CJK.sub(r'\1\2\3 \4', new_text)
    new_text = AN_LEFT_BRACKET.sub(r'\1 \2', new_text)
    new_text = RIGHT_BRACKET_AN.sub(r'\1 \2', new_text)
    new_text = CJK_ANS.sub(r'\1 \2', new_text)
    new_text = ANS_CJK.sub(r'\1 \2', new_text)
    new_text = S_A.sub(r'\1 \2', new_text)
    new_text = MIDDLE_DOT.sub('・', new_text)
    return new_text.strip()
 def spacing_text(text):
    """
    Perform paranoid text spacing on text. An alias of `spacing()`.
    """
    return spacing(text)
 def spacing_file(path):
    """
    Perform paranoid text spacing from file.
    """
    # TODO: read line by line
    with open(os.path.abspath(path)) as f:
        return spacing_text(f.read())
 def cli(args=None):
    if not args:
        args = sys.argv[1:]
    parser = argparse.ArgumentParser(
        prog='pangu',
        description='pangu.py -- Paranoid text spacing for good readability, to automatically insert whitespace between CJK and half-width characters (alphabetical letters, numerical digits and symbols).',
    )
    parser.add_argument('-v', '--version', action='version', version=__version__)
    parser.add_argument('-t', '--text', action='store_true', dest='is_text', required=False, help='specify the input value is a text')
    parser.add_argument('-f', '--file', action='store_true', dest='is_file', required=False, help='specify the input value is a file path')
    parser.add_argument('text_or_path', action='store', type=str, help='the text or file path to apply spacing')
    if not sys.stdin.isatty():
        print(spacing_text(sys.stdin.read()))  # noqa: T003
    else:
        args = parser.parse_args(args)
        if args.is_text:
            print(spacing_text(args.text_or_path))  # noqa: T003
        elif args.is_file:
            print(spacing_file(args.text_or_path))  # noqa: T003
        else:
            print(spacing_text(args.text_or_path))  # noqa: T003
 if __name__ == '__main__':
    cli()
--- a/panguFilter.cabal
+++ b/panguFilter.cabal
@@ -46,7 +46,7 @@ category:           Text
 build-type:         Simple
 -- Extra doc files to be distributed with the package, such as a CHANGELOG or a README.
-extra-doc-files:    CHANGELOG.md
+-- extra-doc-files:    CHANGELOG.md
 -- Extra source files to be distributed with the package, such as examples, or a tutorial module.
 -- extra-source-files:
@@ -59,7 +59,7 @@ library
    import:           warnings
    -- Modules exported by the library.
-    exposed-modules:  MyLib
+    exposed-modules:  Pangu
    -- Modules included in this library but not exported.
    -- other-modules:
--- a/src/MyLib.hs
+++ b/src/MyLib.hs
@@ -1,138 +0,0 @@
 {-# LANGUAGE OverloadedStrings #-}
 module MyLib where
 import Data.Function (fix)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.Void (Void)
 import Replace.Megaparsec (streamEdit)
 import Text.Megaparsec
 import Text.Megaparsec.Char
 -------------------------------------------------------------------------------
 type Parser = Parsec Void Text
 type Rule = Parser Text
 type RuleSet = [Rule]
 applyUntilFixed :: Rule -> Text -> Text
 applyUntilFixed rule =
  fix
    ( \loop current ->
        let next = streamEdit rule id current
         in if next == current then next else loop next
    )
 applyRules :: RuleSet -> Text -> Text
 applyRules rules input = foldl (flip applyUntilFixed) input rules
 -------------------------------------------------------------------------------
 -- rules for pangu
 -- | Check if a character falls within the CJK ranges provided
 isCJK :: Char -> Bool
 isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges
  where
    cjkRanges =
      [ ('\x2e80', '\x2eff'),
        ('\x2f00', '\x2fdf'),
        ('\x3040', '\x309f'),
        ('\x30a0', '\x30fa'),
        ('\x30fc', '\x30ff'),
        ('\x3100', '\x312f'),
        ('\x3200', '\x32ff'),
        ('\x3400', '\x4dbf'),
        ('\x4e00', '\x9fff'),
        ('\xf900', '\xfaff')
      ]
 convertToFullwidth :: Char -> Char
 convertToFullwidth c =
  case c of
    ':' -> '：'
    '.' -> '。'
    '~' -> '～'
    '!' -> '！'
    '?' -> '？'
    ',' -> '，'
    ';' -> '；'
    '\"' -> '”'
    '\'' -> '’'
    _ -> c
 -- A parser that matches a single CJK character
 cjkChar :: Parser Char
 cjkChar = satisfy isCJK
 -- use python.py as reference for these rules
 fullwidthCJKsymCJK :: Rule
 fullwidthCJKsymCJK = do
  lcjk <- cjkChar
  _ <- many (char ' ')
  sym <- try (some (char ':')) <|> count 1 (char '.')
  _ <- many (char ' ')
  rcjk <- cjkChar
  let transformedsym =  map convertToFullwidth sym
  return $ T.pack $ [lcjk] ++ transformedsym ++ [rcjk]
 fullwidthCJKsym :: Rule
 fullwidthCJKsym = do
  cjk <- cjkChar
  _ <- many (char ' ')
  sym <- some $ oneOf ("~!?,;" :: [Char])
  _ <- many (char ' ')
  let transformedsym = T.pack $ map convertToFullwidth sym
  return $ T.pack [cjk] <> transformedsym
 dotsCJK :: Rule
 dotsCJK = do
  dots <- chunk "..." <|> chunk "…"
  cjk <- cjkChar
  return $ dots <> T.pack (" " ++ [cjk])
 fixCJKcolAN :: Rule
 fixCJKcolAN = do
  cjk <- cjkChar
  _ <- chunk ":"
  an <- alphaNumChar
  return $ T.pack $ [cjk] ++ "：" ++ [an]
 -- quotes
 -- seems confusing ...
 quotesym :: [Char]
 quotesym = "\x05f4\"\'`"
 cjkquote :: Rule
 cjkquote = do
  cjk <- cjkChar
  quote <- oneOf quotesym
  return $ T.pack $ [cjk] ++ " " ++ [quote]
 quoteCJK :: Rule
 quoteCJK = do
  quote <- oneOf quotesym
  cjk <- cjkChar
  return $ T.pack $ [quote] ++ " " ++ [cjk]
 fixQuote :: Rule
 fixQuote = do
  openQuotes <- T.pack <$> some (oneOf quotesym)
  _ <- many spaceChar
  content <- T.pack <$> someTill anySingle (lookAhead $ some (oneOf quotesym))
  closeQuotes <- T.pack  <$> some (oneOf quotesym)
  return $ openQuotes <> T.strip content <> closeQuotes
 -- the rule set
 myRules :: RuleSet
 myRules =
  [ fullwidthCJKsymCJK,
    fullwidthCJKsym,
    dotsCJK,
    fixCJKcolAN,
    cjkquote,
    quoteCJK,
    fixQuote
  ]
--- a/src/Pangu.hs
+++ b/src/Pangu.hs
@@ -0,0 +1,242 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Pangu (pangu, isCJK) where
 import Data.Function (fix)
 import Data.Text (Text)
 import qualified Data.Text as T
 import Data.Void (Void)
 import Replace.Megaparsec (streamEdit)
 import Text.Megaparsec
 import Text.Megaparsec.Char
 -------------------------------------------------------------------------------
 type Parser = Parsec Void Text
 type Rule = Parser Text
 type RuleSet = [Rule]
 applyUntilFixed :: Rule -> Text -> Text
 applyUntilFixed rule =
  fix
    ( \loop current ->
        let next = streamEdit (try rule) id current
         in if next == current then next else loop next
    )
 applyRulesRecursively :: RuleSet -> Text -> Text
 applyRulesRecursively rules input = foldl (flip applyUntilFixed) input rules
 applyRules :: RuleSet -> Text -> Text
 applyRules rules input = foldl (flip applyOnce) input rules
  where
    applyOnce rule = streamEdit (try rule) id
 -------------------------------------------------------------------------------
 -- rules for pangu
 -- alphaNumChar from megaparsec matches CJK chars...
 -- need to implement a new one
 alphanumericChar :: Parser Char
 alphanumericChar = satisfy $ \c ->
  (c >= 'a' && c <= 'z')
    || (c >= 'A' && c <= 'Z')
    || (c >= '0' && c <= '9')
 -- | Check if a character falls within the CJK ranges provided
 isCJK :: Char -> Bool
 isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges
  where
    cjkRanges =
      [ ('\x2e80', '\x2eff'),
        ('\x2f00', '\x2fdf'),
        ('\x3040', '\x309f'),
        ('\x30a0', '\x30fa'),
        ('\x30fc', '\x30ff'),
        ('\x3100', '\x312f'),
        ('\x3200', '\x32ff'),
        ('\x3400', '\x4dbf'),
        ('\x4e00', '\x9fff'),
        ('\xf900', '\xfaff')
      ]
 convertToFullwidth :: Char -> Char
 convertToFullwidth c =
  case c of
    ':' -> '：'
    '.' -> '。'
    '~' -> '～'
    '!' -> '！'
    '?' -> '？'
    ',' -> '，'
    ';' -> '；'
    '\"' -> '”'
    '\'' -> '’'
    _ -> c
 -- A parser that matches a single CJK character
 cjkChar :: Parser Char
 cjkChar = satisfy isCJK
 -- use python.py as reference for these rules
 fullwidthCJKsymCJK :: Rule
 fullwidthCJKsymCJK = do
  lcjk <- cjkChar
  _ <- many (char ' ')
  sym <- try (some (char ':')) <|> count 1 (char '.')
  _ <- many (char ' ')
  rcjk <- cjkChar
  let transformedsym = map convertToFullwidth sym
  return $ T.pack $ [lcjk] ++ transformedsym ++ [rcjk]
 fullwidthCJKsym :: Rule
 fullwidthCJKsym = do
  cjk <- cjkChar
  _ <- many (char ' ')
  sym <- some $ oneOf ("~!?,;" :: [Char])
  _ <- many (char ' ')
  let transformedsym = T.pack $ map convertToFullwidth sym
  return $ T.pack [cjk] <> transformedsym
 dotsCJK :: Rule
 dotsCJK = do
  dots <- chunk "..." <|> chunk "…"
  cjk <- cjkChar
  return $ dots <> T.pack (" " ++ [cjk])
 fixCJKcolAN :: Rule
 fixCJKcolAN = do
  cjk <- cjkChar
  _ <- char ':'
  an <- alphanumericChar
  return $ T.pack $ [cjk] ++ "：" ++ [an]
 -- quotes
 -- seems confusing ...
 quotesym :: [Char]
 quotesym = "'`\x05f4\""
 cjkquote :: Rule
 cjkquote = do
  cjk <- cjkChar
  quote <- oneOf quotesym
  return $ T.pack $ [cjk] ++ " " ++ [quote]
 quoteCJK :: Rule
 quoteCJK = do
  quote <- oneOf quotesym
  cjk <- cjkChar
  return $ T.pack $ [quote] ++ " " ++ [cjk]
 fixQuote :: Rule
 fixQuote = do
  openQuotes <- T.pack <$> some (oneOf quotesym)
  _ <- many spaceChar
  content <- T.pack <$> someTill anySingle (lookAhead $ some (oneOf quotesym))
  closeQuotes <- T.pack <$> some (oneOf quotesym)
  return $ openQuotes <> T.strip content <> closeQuotes
 cjkpossessivequote :: Rule
 cjkpossessivequote = do
  cjk <- cjkChar
  _ <- char '\''
  _ <- lookAhead $ anySingleBut 's'
  return $ T.pack $ cjk : " '"
 -- This singlequoteCJK rule will turn '你好' into ' 你好'
 -- which seems not desirable...
 -- however, the behavior is aligned with python version
 singlequoteCJK :: Rule
 singlequoteCJK = do
  _ <- char '\''
  cjk <- cjkChar
  return $ T.pack $ "' " ++ [cjk]
 fixPossessivequote :: Rule
 fixPossessivequote = do
  pre <- cjkChar <|> alphanumericChar
  _ <- some spaceChar
  _ <- chunk "'s"
  return $ T.pack $ pre : "'s"
 -- hash
 hashANSCJKhash :: Rule
 hashANSCJKhash = do
  cjk1 <- cjkChar
  _ <- char '#'
  mid <- some cjkChar
  _ <- char '#'
  cjk2 <- cjkChar
  return $ T.pack $ [cjk1] ++ " #" ++ mid ++ "# " ++ [cjk2]
 cjkhash :: Rule
 cjkhash = do
  cjk <- cjkChar
  _ <- char '#'
  _ <- lookAhead $ anySingleBut ' '
  return $ T.pack $ cjk : " #"
 hashcjk :: Rule
 hashcjk = do
  _ <- char '#'
  _ <- lookAhead $ anySingleBut ' '
  cjk <- cjkChar
  return $ T.pack $ "# " ++ [cjk]
 -- operators
 cjkOPTan :: Rule
 cjkOPTan = do
  cjk <- cjkChar
  opt <- oneOf ("+-=*/&|<>%" :: [Char])
  an <- alphanumericChar
  return $ T.pack [cjk, ' ', opt, ' ', an]
 anOPTcjk :: Rule
 anOPTcjk = do
  an <- alphanumericChar
  opt <- oneOf ("+-=*/&|<>%" :: [Char])
  cjk <- cjkChar
  return $ T.pack [an, ' ', opt, ' ', cjk]
 -- slash/bracket rules are not implemented
 -- CJK and alphanumeric without space
 cjkans :: Rule
 cjkans = do
  cjk <- cjkChar
  _ <- lookAhead (alphanumericChar <|> oneOf ("@$%^&*-+\\=|/" :: [Char]))
  return $ T.pack [cjk, ' ']
 anscjk :: Rule
 anscjk = do
  an <- alphanumericChar <|> oneOf ("~!$%^&*-+\\=|;:,./?" :: [Char])
  _ <- lookAhead cjkChar
  return $ T.pack [an, ' ']
 -- rule set, the order matters
 recursiveRules :: RuleSet
 recursiveRules = [fullwidthCJKsymCJK, fullwidthCJKsym]
 onepassRules :: RuleSet
 onepassRules =
  [ dotsCJK,
    fixCJKcolAN,
    cjkquote,
    quoteCJK,
    fixQuote,
    cjkpossessivequote,
    -- singlequoteCJK,
    fixPossessivequote,
    hashANSCJKhash,
    cjkhash,
    -- hashcjk,
    anscjk,
    cjkans,
    empty -- a dummy rule
  ]
 pangu :: Text -> Text
 pangu input = applyRules onepassRules $ applyRulesRecursively recursiveRules input
--- a/test/Main.hs
+++ b/test/Main.hs
@@ -1,21 +1,28 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Main (main) where
-import MyLib
+import Pangu
 import Test.Hspec
 main :: IO ()
 main = hspec $ do
-  describe "MyLib.cjksym(cjk)" $ do
+  describe "Pangu.cjksym(cjk)" $ do
    it "converts symbols to fullwidth" $ do
-      applyRules myRules "你 : 好" `shouldBe` "你：好"
+      pangu "你 : 好" `shouldBe` "你：好"
-      applyRules myRules "你.好" `shouldBe` "你。好"
+      pangu "你.好" `shouldBe` "你。好"
-      applyRules myRules "你:好:他" `shouldBe` "你：好：他"
+      pangu "你:好:他" `shouldBe` "你：好：他"
-      applyRules myRules "你   ? 好" `shouldBe` "你？好"
+      pangu "你   ? 好" `shouldBe` "你？好"
-      applyRules myRules "你…好" `shouldBe` "你… 好"
+      pangu "你…好" `shouldBe` "你… 好"
-      applyRules myRules "你...好" `shouldBe` "你... 好"
+      pangu "你...好" `shouldBe` "你... 好"
-      applyRules myRules "你:0" `shouldBe` "你：0"
+      pangu "你:0" `shouldBe` "你：0"
-      applyRules myRules "我说:\" 他说:\'你好\'\"" `shouldBe` "我说：\"他说：\'你好\'\""
+    it "fixes quotes" $ do
-    it "adds spaces" $ do
+      pangu "我说:\" 他说:'你好'\"" `shouldBe` "我说:\"他说:' 你好 '\""
-      applyRules myRules "\'你好\'" `shouldBe` "\'你好\'"
+      -- pangu "'你好'" `shouldBe` "' 你好'"  -- strange behavior
-      applyRules myRules "你\'hello\'" `shouldBe` "你 \'hello\'"
+      pangu "你'hello'" `shouldBe` "你 'hello'"
      pangu "我 's " `shouldBe` "我's "
    it "fixes hash" $ do
      pangu "你好#测试#世界" `shouldBe` "你好 #测试# 世界"
    it "add spaces" $ do
      pangu "你好and世界" `shouldBe` "你好 and 世界"
      pangu "當你凝視著bug，bug也凝視著你" `shouldBe` "當你凝視著 bug，bug 也凝視著你"
      pangu "與PM戰鬥的人,應當小心自己不要成為PM" `shouldBe` "與 PM 戰鬥的人，應當小心自己不要成為 PM"
Author	SHA1	Message	Date
Yu Cong	984069530e	add readme. remove changelog	2026-01-06 22:06:22 +08:00
Yu Cong	211df1191f	rename lib & remove python version	2026-01-04 14:48:29 +08:00
Yu Cong	0418e29edf	z	2026-01-03 17:03:47 +08:00
Yu Cong	5ba00b7fc9	Refactor quote handling and add new rules for possessive quotes	2026-01-03 15:14:31 +08:00