cjksymcjk

This commit is contained in:
2026-01-02 18:50:01 +08:00
parent b4987b24f9
commit 0f8ce47fa1
2 changed files with 35 additions and 46 deletions

View File

@@ -20,16 +20,16 @@ applyRules :: RuleSet -> Text -> Text
applyRules [] input = input applyRules [] input = input
applyRules rules input = streamEdit (choice rules) id input applyRules rules input = streamEdit (choice rules) id input
-- TEST RULES -- -- TEST RULES
appleToOrange :: Rule -- appleToOrange :: Rule
appleToOrange = "orange" <$ chunk "apple" -- appleToOrange = "orange" <$ chunk "apple"
emailAtRule :: Rule -- emailAtRule :: Rule
emailAtRule = do -- emailAtRule = do
prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String)) -- prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String))
_ <- char '@' -- _ <- char '@'
suffix <- some (alphaNumChar <|> oneOf (".-" :: String)) -- suffix <- some (alphaNumChar <|> oneOf (".-" :: String))
return $ T.pack prefix <> "[at]" <> T.pack suffix -- return $ T.pack prefix <> "[at]" <> T.pack suffix
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
-- rules for pangu -- rules for pangu
@@ -52,38 +52,33 @@ isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges
] ]
convertToFullwidth :: Char -> Char convertToFullwidth :: Char -> Char
convertToFullwidth c = case c of convertToFullwidth c =
':' -> '' case c of
'.' -> '' ':' -> ''
'~' -> '' '.' -> ''
'!' -> '' '~' -> ''
'?' -> '' '!' -> ''
',' -> '' '?' -> ''
';' -> '' ',' -> ''
_ -> c ';' -> ''
_ -> c
-- A parser that matches a single CJK character -- A parser that matches a single CJK character
cjkChar :: Parser Char cjkChar :: Parser Char
cjkChar = satisfy isCJK cjkChar = satisfy isCJK
fullWidthSymbolRule :: Rule cjksymcjk :: Rule
fullWidthSymbolRule = do cjksymcjk = do
c1 <- cjkChar -- First CJK c1 <- cjkChar
mid <- mid <- do
some $ _ <- many (char ' ') -- leading spaces
choice -- The "middle" symbol part core <- some $ oneOf (":.~!?,;" :: [Char])
[ char ' ', _ <- many (char ' ') -- trailing spaces
char ':', return $ T.pack core
char '.' c2 <- cjkChar
] let transformedMid = T.pack $ map convertToFullwidth (T.unpack mid)
c2 <- cjkChar -- Second CJK
-- In Haskell, we can actually process the 'mid' string logic here.
-- For now, let's assume we want to turn ":" into "" and "." into "。"
let transformedMid = T.pack $ map convertToFullwidth mid
return $ T.singleton c1 <> transformedMid <> T.singleton c2 return $ T.singleton c1 <> transformedMid <> T.singleton c2
-- the rule set -- the rule set
myRules :: RuleSet myRules :: RuleSet
myRules = [appleToOrange, emailAtRule, try fullWidthSymbolRule] myRules = [cjksymcjk]

View File

@@ -6,14 +6,8 @@ import Test.Hspec
main :: IO () main :: IO ()
main = hspec $ do main = hspec $ do
describe "MyLib.mapemail" $ do describe "MyLib.cjksymcjk" $ do
it "maps @ to [at] in emails" $ do it "converts symbols between CJK characters to fullwidth" $ do
applyRules myRules "aaa@a.com" `shouldBe` "aaa[at]a.com" applyRules myRules "你 : 好" `shouldBe` "你:好"
applyRules myRules "你.好" `shouldBe` "你。好"
describe "MyLib.mapfruits" $ do applyRules myRules "你 ? 好" `shouldBe` "你?好"
it "maps apple to orange" $ do
applyRules myRules "apple" `shouldBe` "orange"
describe "MyLib.fullWidthSymbolRule" $ do
it "你:好" $ do
applyRules myRules "你:好" `shouldBe` "你:好"