From 0f8ce47fa1a6a08c49bf6d3a2aabd2e31deffc8c Mon Sep 17 00:00:00 2001 From: Yu Cong Date: Fri, 2 Jan 2026 18:50:01 +0800 Subject: [PATCH] cjksymcjk --- src/MyLib.hs | 65 ++++++++++++++++++++++++---------------------------- test/Main.hs | 16 ++++--------- 2 files changed, 35 insertions(+), 46 deletions(-) diff --git a/src/MyLib.hs b/src/MyLib.hs index 202df12..68f33f8 100644 --- a/src/MyLib.hs +++ b/src/MyLib.hs @@ -20,16 +20,16 @@ applyRules :: RuleSet -> Text -> Text applyRules [] input = input applyRules rules input = streamEdit (choice rules) id input --- TEST RULES -appleToOrange :: Rule -appleToOrange = "orange" <$ chunk "apple" +-- -- TEST RULES +-- appleToOrange :: Rule +-- appleToOrange = "orange" <$ chunk "apple" -emailAtRule :: Rule -emailAtRule = do - prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String)) - _ <- char '@' - suffix <- some (alphaNumChar <|> oneOf (".-" :: String)) - return $ T.pack prefix <> "[at]" <> T.pack suffix +-- emailAtRule :: Rule +-- emailAtRule = do +-- prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String)) +-- _ <- char '@' +-- suffix <- some (alphaNumChar <|> oneOf (".-" :: String)) +-- return $ T.pack prefix <> "[at]" <> T.pack suffix ------------------------------------------------------------------------------- -- rules for pangu @@ -52,38 +52,33 @@ isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges ] convertToFullwidth :: Char -> Char -convertToFullwidth c = case c of - ':' -> ':' - '.' -> '。' - '~' -> '~' - '!' -> '!' - '?' -> '?' - ',' -> ',' - ';' -> ';' - _ -> c +convertToFullwidth c = + case c of + ':' -> ':' + '.' -> '。' + '~' -> '~' + '!' -> '!' + '?' -> '?' + ',' -> ',' + ';' -> ';' + _ -> c -- A parser that matches a single CJK character cjkChar :: Parser Char cjkChar = satisfy isCJK -fullWidthSymbolRule :: Rule -fullWidthSymbolRule = do - c1 <- cjkChar -- First CJK - mid <- - some $ - choice -- The "middle" symbol part - [ char ' ', - char ':', - char '.' - ] - c2 <- cjkChar -- Second CJK - - -- In Haskell, we can actually process the 'mid' string logic here. - -- For now, let's assume we want to turn ":" into ":" and "." into "。" - let transformedMid = T.pack $ map convertToFullwidth mid +cjksymcjk :: Rule +cjksymcjk = do + c1 <- cjkChar + mid <- do + _ <- many (char ' ') -- leading spaces + core <- some $ oneOf (":.~!?,;" :: [Char]) + _ <- many (char ' ') -- trailing spaces + return $ T.pack core + c2 <- cjkChar + let transformedMid = T.pack $ map convertToFullwidth (T.unpack mid) return $ T.singleton c1 <> transformedMid <> T.singleton c2 - -- the rule set myRules :: RuleSet -myRules = [appleToOrange, emailAtRule, try fullWidthSymbolRule] \ No newline at end of file +myRules = [cjksymcjk] \ No newline at end of file diff --git a/test/Main.hs b/test/Main.hs index 89752fe..996a92a 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -6,14 +6,8 @@ import Test.Hspec main :: IO () main = hspec $ do - describe "MyLib.mapemail" $ do - it "maps @ to [at] in emails" $ do - applyRules myRules "aaa@a.com" `shouldBe` "aaa[at]a.com" - - describe "MyLib.mapfruits" $ do - it "maps apple to orange" $ do - applyRules myRules "apple" `shouldBe` "orange" - - describe "MyLib.fullWidthSymbolRule" $ do - it "你:好" $ do - applyRules myRules "你:好" `shouldBe` "你:好" \ No newline at end of file + describe "MyLib.cjksymcjk" $ do + it "converts symbols between CJK characters to fullwidth" $ do + applyRules myRules "你 : 好" `shouldBe` "你:好" + applyRules myRules "你.好" `shouldBe` "你。好" + applyRules myRules "你 ? 好" `shouldBe` "你?好" \ No newline at end of file