From a2ea14dde22f32f50b058b6ccfb3a995b609c79e Mon Sep 17 00:00:00 2001 From: Yu Cong Date: Fri, 2 Jan 2026 22:25:32 +0800 Subject: [PATCH] z --- src/MyLib.hs | 91 +++++++++++++++++++++++++++++++++++++--------------- test/Main.hs | 13 ++++++-- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/src/MyLib.hs b/src/MyLib.hs index 68f33f8..569b59b 100644 --- a/src/MyLib.hs +++ b/src/MyLib.hs @@ -2,6 +2,7 @@ module MyLib where +import Data.Function (fix) import Data.Text (Text) import qualified Data.Text as T import Data.Void (Void) @@ -16,20 +17,16 @@ type Rule = Parser Text type RuleSet = [Rule] +applyUntilFixed :: Rule -> Text -> Text +applyUntilFixed rule = + fix + ( \loop current -> + let next = streamEdit rule id current + in if next == current then next else loop next + ) + applyRules :: RuleSet -> Text -> Text -applyRules [] input = input -applyRules rules input = streamEdit (choice rules) id input - --- -- TEST RULES --- appleToOrange :: Rule --- appleToOrange = "orange" <$ chunk "apple" - --- emailAtRule :: Rule --- emailAtRule = do --- prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String)) --- _ <- char '@' --- suffix <- some (alphaNumChar <|> oneOf (".-" :: String)) --- return $ T.pack prefix <> "[at]" <> T.pack suffix +applyRules rules input = foldl (flip applyUntilFixed) input rules ------------------------------------------------------------------------------- -- rules for pangu @@ -61,24 +58,68 @@ convertToFullwidth c = '?' -> '?' ',' -> ',' ';' -> ';' + '\"' -> '”' + '\'' -> '’' _ -> c -- A parser that matches a single CJK character cjkChar :: Parser Char cjkChar = satisfy isCJK -cjksymcjk :: Rule -cjksymcjk = do - c1 <- cjkChar - mid <- do - _ <- many (char ' ') -- leading spaces - core <- some $ oneOf (":.~!?,;" :: [Char]) - _ <- many (char ' ') -- trailing spaces - return $ T.pack core - c2 <- cjkChar - let transformedMid = T.pack $ map convertToFullwidth (T.unpack mid) - return $ T.singleton c1 <> transformedMid <> T.singleton c2 +-- use python.py as reference for these rules + +fullwidthCJKsymCJK :: Rule +fullwidthCJKsymCJK = do + lcjk <- cjkChar + _ <- many (char ' ') + sym <- fmap T.unpack (chunk ".") <|> some (oneOf (":" :: [Char])) + _ <- many (char ' ') + rcjk <- cjkChar + + let transformedsym = T.pack $ map convertToFullwidth sym + return $ T.pack [lcjk] <> transformedsym <> T.pack [rcjk] + +fullwidthCJKsym :: Rule +fullwidthCJKsym = do + cjk <- cjkChar + _ <- many (char ' ') + sym <- some $ oneOf ("~!?,;" :: [Char]) + _ <- many (char ' ') + let transformedsym = T.pack $ map convertToFullwidth sym + return $ T.pack [cjk] <> transformedsym + +dotsCJK :: Rule +dotsCJK = do + dots <- chunk "..." <|> chunk "…" + cjk <- cjkChar + return $ dots <> T.pack (" " ++ [cjk]) + +fixCJKcolAN :: Rule +fixCJKcolAN = do + cjk <- cjkChar + _ <- chunk ":" + an <- alphaNumChar + return $ T.pack $ [cjk] ++ ":" ++ [an] + +cjkquote :: Rule +cjkquote = do + cjk <- cjkChar + quote <- oneOf ("\x05f4\"\'" :: [Char]) + return $ T.pack $ [cjk] ++ " " ++ [quote] + +quoteCJK :: Rule +quoteCJK = do + quote <- oneOf ("\x05f4\"\'" :: [Char]) + cjk <- cjkChar + return $ T.pack $ [quote] ++ " " ++ [cjk] -- the rule set myRules :: RuleSet -myRules = [cjksymcjk] \ No newline at end of file +myRules = + [ fullwidthCJKsymCJK, + fullwidthCJKsym, + dotsCJK, + fixCJKcolAN, + cjkquote, + quoteCJK + ] \ No newline at end of file diff --git a/test/Main.hs b/test/Main.hs index 996a92a..e4b9132 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -6,8 +6,15 @@ import Test.Hspec main :: IO () main = hspec $ do - describe "MyLib.cjksymcjk" $ do - it "converts symbols between CJK characters to fullwidth" $ do + describe "MyLib.cjksym(cjk)" $ do + it "converts symbols to fullwidth" $ do applyRules myRules "你 : 好" `shouldBe` "你:好" applyRules myRules "你.好" `shouldBe` "你。好" - applyRules myRules "你 ? 好" `shouldBe` "你?好" \ No newline at end of file + applyRules myRules "你:好:他" `shouldBe` "你:好:他" + applyRules myRules "你 ? 好" `shouldBe` "你?好" + applyRules myRules "你…好" `shouldBe` "你… 好" + applyRules myRules "你...好" `shouldBe` "你... 好" + applyRules myRules "你:0" `shouldBe` "你:0" + -- applyRules myRules "我说:\" 他说:\'你好\'\"" `shouldBe` "我说:“他说:‘你好’”" + it "adds spaces" $ do + applyRules myRules "\'你好\'" `shouldBe` "\' 你好 \'" \ No newline at end of file