cjksymcjk
This commit is contained in:
49
src/MyLib.hs
49
src/MyLib.hs
@@ -20,16 +20,16 @@ applyRules :: RuleSet -> Text -> Text
|
|||||||
applyRules [] input = input
|
applyRules [] input = input
|
||||||
applyRules rules input = streamEdit (choice rules) id input
|
applyRules rules input = streamEdit (choice rules) id input
|
||||||
|
|
||||||
-- TEST RULES
|
-- -- TEST RULES
|
||||||
appleToOrange :: Rule
|
-- appleToOrange :: Rule
|
||||||
appleToOrange = "orange" <$ chunk "apple"
|
-- appleToOrange = "orange" <$ chunk "apple"
|
||||||
|
|
||||||
emailAtRule :: Rule
|
-- emailAtRule :: Rule
|
||||||
emailAtRule = do
|
-- emailAtRule = do
|
||||||
prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String))
|
-- prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String))
|
||||||
_ <- char '@'
|
-- _ <- char '@'
|
||||||
suffix <- some (alphaNumChar <|> oneOf (".-" :: String))
|
-- suffix <- some (alphaNumChar <|> oneOf (".-" :: String))
|
||||||
return $ T.pack prefix <> "[at]" <> T.pack suffix
|
-- return $ T.pack prefix <> "[at]" <> T.pack suffix
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
-- rules for pangu
|
-- rules for pangu
|
||||||
@@ -52,7 +52,8 @@ isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges
|
|||||||
]
|
]
|
||||||
|
|
||||||
convertToFullwidth :: Char -> Char
|
convertToFullwidth :: Char -> Char
|
||||||
convertToFullwidth c = case c of
|
convertToFullwidth c =
|
||||||
|
case c of
|
||||||
':' -> ':'
|
':' -> ':'
|
||||||
'.' -> '。'
|
'.' -> '。'
|
||||||
'~' -> '~'
|
'~' -> '~'
|
||||||
@@ -66,24 +67,18 @@ convertToFullwidth c = case c of
|
|||||||
cjkChar :: Parser Char
|
cjkChar :: Parser Char
|
||||||
cjkChar = satisfy isCJK
|
cjkChar = satisfy isCJK
|
||||||
|
|
||||||
fullWidthSymbolRule :: Rule
|
cjksymcjk :: Rule
|
||||||
fullWidthSymbolRule = do
|
cjksymcjk = do
|
||||||
c1 <- cjkChar -- First CJK
|
c1 <- cjkChar
|
||||||
mid <-
|
mid <- do
|
||||||
some $
|
_ <- many (char ' ') -- leading spaces
|
||||||
choice -- The "middle" symbol part
|
core <- some $ oneOf (":.~!?,;" :: [Char])
|
||||||
[ char ' ',
|
_ <- many (char ' ') -- trailing spaces
|
||||||
char ':',
|
return $ T.pack core
|
||||||
char '.'
|
c2 <- cjkChar
|
||||||
]
|
let transformedMid = T.pack $ map convertToFullwidth (T.unpack mid)
|
||||||
c2 <- cjkChar -- Second CJK
|
|
||||||
|
|
||||||
-- In Haskell, we can actually process the 'mid' string logic here.
|
|
||||||
-- For now, let's assume we want to turn ":" into ":" and "." into "。"
|
|
||||||
let transformedMid = T.pack $ map convertToFullwidth mid
|
|
||||||
return $ T.singleton c1 <> transformedMid <> T.singleton c2
|
return $ T.singleton c1 <> transformedMid <> T.singleton c2
|
||||||
|
|
||||||
|
|
||||||
-- the rule set
|
-- the rule set
|
||||||
myRules :: RuleSet
|
myRules :: RuleSet
|
||||||
myRules = [appleToOrange, emailAtRule, try fullWidthSymbolRule]
|
myRules = [cjksymcjk]
|
||||||
16
test/Main.hs
16
test/Main.hs
@@ -6,14 +6,8 @@ import Test.Hspec
|
|||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = hspec $ do
|
main = hspec $ do
|
||||||
describe "MyLib.mapemail" $ do
|
describe "MyLib.cjksymcjk" $ do
|
||||||
it "maps @ to [at] in emails" $ do
|
it "converts symbols between CJK characters to fullwidth" $ do
|
||||||
applyRules myRules "aaa@a.com" `shouldBe` "aaa[at]a.com"
|
applyRules myRules "你 : 好" `shouldBe` "你:好"
|
||||||
|
applyRules myRules "你.好" `shouldBe` "你。好"
|
||||||
describe "MyLib.mapfruits" $ do
|
applyRules myRules "你 ? 好" `shouldBe` "你?好"
|
||||||
it "maps apple to orange" $ do
|
|
||||||
applyRules myRules "apple" `shouldBe` "orange"
|
|
||||||
|
|
||||||
describe "MyLib.fullWidthSymbolRule" $ do
|
|
||||||
it "你:好" $ do
|
|
||||||
applyRules myRules "你:好" `shouldBe` "你:好"
|
|
||||||
Reference in New Issue
Block a user