Compare commits

...

2 Commits

Author SHA1 Message Date
792bbc80c6 quotes 2026-01-03 03:01:25 +08:00
a2ea14dde2 z 2026-01-02 22:25:32 +08:00
2 changed files with 90 additions and 28 deletions

View File

@@ -2,6 +2,7 @@
module MyLib where module MyLib where
import Data.Function (fix)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Void (Void) import Data.Void (Void)
@@ -16,20 +17,16 @@ type Rule = Parser Text
type RuleSet = [Rule] type RuleSet = [Rule]
applyUntilFixed :: Rule -> Text -> Text
applyUntilFixed rule =
fix
( \loop current ->
let next = streamEdit rule id current
in if next == current then next else loop next
)
applyRules :: RuleSet -> Text -> Text applyRules :: RuleSet -> Text -> Text
applyRules [] input = input applyRules rules input = foldl (flip applyUntilFixed) input rules
applyRules rules input = streamEdit (choice rules) id input
-- -- TEST RULES
-- appleToOrange :: Rule
-- appleToOrange = "orange" <$ chunk "apple"
-- emailAtRule :: Rule
-- emailAtRule = do
-- prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String))
-- _ <- char '@'
-- suffix <- some (alphaNumChar <|> oneOf (".-" :: String))
-- return $ T.pack prefix <> "[at]" <> T.pack suffix
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
-- rules for pangu -- rules for pangu
@@ -61,24 +58,81 @@ convertToFullwidth c =
'?' -> '' '?' -> ''
',' -> '' ',' -> ''
';' -> '' ';' -> ''
'\"' -> '”'
'\'' -> ''
_ -> c _ -> c
-- A parser that matches a single CJK character -- A parser that matches a single CJK character
cjkChar :: Parser Char cjkChar :: Parser Char
cjkChar = satisfy isCJK cjkChar = satisfy isCJK
cjksymcjk :: Rule -- use python.py as reference for these rules
cjksymcjk = do
c1 <- cjkChar fullwidthCJKsymCJK :: Rule
mid <- do fullwidthCJKsymCJK = do
_ <- many (char ' ') -- leading spaces lcjk <- cjkChar
core <- some $ oneOf (":.~!?,;" :: [Char]) _ <- many (char ' ')
_ <- many (char ' ') -- trailing spaces sym <- try (some (char ':')) <|> count 1 (char '.')
return $ T.pack core _ <- many (char ' ')
c2 <- cjkChar rcjk <- cjkChar
let transformedMid = T.pack $ map convertToFullwidth (T.unpack mid) let transformedsym = map convertToFullwidth sym
return $ T.singleton c1 <> transformedMid <> T.singleton c2 return $ T.pack $ [lcjk] ++ transformedsym ++ [rcjk]
fullwidthCJKsym :: Rule
fullwidthCJKsym = do
cjk <- cjkChar
_ <- many (char ' ')
sym <- some $ oneOf ("~!?,;" :: [Char])
_ <- many (char ' ')
let transformedsym = T.pack $ map convertToFullwidth sym
return $ T.pack [cjk] <> transformedsym
dotsCJK :: Rule
dotsCJK = do
dots <- chunk "..." <|> chunk ""
cjk <- cjkChar
return $ dots <> T.pack (" " ++ [cjk])
fixCJKcolAN :: Rule
fixCJKcolAN = do
cjk <- cjkChar
_ <- chunk ":"
an <- alphaNumChar
return $ T.pack $ [cjk] ++ "" ++ [an]
-- quotes
-- seems confusing ...
quotesym :: [Char]
quotesym = "\x05f4\"\'`"
cjkquote :: Rule
cjkquote = do
cjk <- cjkChar
quote <- oneOf quotesym
return $ T.pack $ [cjk] ++ " " ++ [quote]
quoteCJK :: Rule
quoteCJK = do
quote <- oneOf quotesym
cjk <- cjkChar
return $ T.pack $ [quote] ++ " " ++ [cjk]
fixQuote :: Rule
fixQuote = do
openQuotes <- T.pack <$> some (oneOf quotesym)
_ <- many spaceChar
content <- T.pack <$> someTill anySingle (lookAhead $ some (oneOf quotesym))
closeQuotes <- T.pack <$> some (oneOf quotesym)
return $ openQuotes <> T.strip content <> closeQuotes
-- the rule set -- the rule set
myRules :: RuleSet myRules :: RuleSet
myRules = [cjksymcjk] myRules =
[ fullwidthCJKsymCJK,
fullwidthCJKsym,
dotsCJK,
fixCJKcolAN,
cjkquote,
quoteCJK,
fixQuote
]

View File

@@ -6,8 +6,16 @@ import Test.Hspec
main :: IO () main :: IO ()
main = hspec $ do main = hspec $ do
describe "MyLib.cjksymcjk" $ do describe "MyLib.cjksym(cjk)" $ do
it "converts symbols between CJK characters to fullwidth" $ do it "converts symbols to fullwidth" $ do
applyRules myRules "你 : 好" `shouldBe` "你:好" applyRules myRules "你 : 好" `shouldBe` "你:好"
applyRules myRules "你.好" `shouldBe` "你。好" applyRules myRules "你.好" `shouldBe` "你。好"
applyRules myRules "你:好:他" `shouldBe` "你:好:他"
applyRules myRules "你 ? 好" `shouldBe` "你?好" applyRules myRules "你 ? 好" `shouldBe` "你?好"
applyRules myRules "你…好" `shouldBe` "你… 好"
applyRules myRules "你...好" `shouldBe` "你... 好"
applyRules myRules "你:0" `shouldBe` "0"
applyRules myRules "我说:\" 他说:\'你好\'\"" `shouldBe` "我说:\"他说:\'你好\'\""
it "adds spaces" $ do
applyRules myRules "\'你好\'" `shouldBe` "\'你好\'"
applyRules myRules "\'hello\'" `shouldBe` "\'hello\'"