cjksymcjk
This commit is contained in:
49
src/MyLib.hs
49
src/MyLib.hs
@@ -20,16 +20,16 @@ applyRules :: RuleSet -> Text -> Text
|
||||
applyRules [] input = input
|
||||
applyRules rules input = streamEdit (choice rules) id input
|
||||
|
||||
-- TEST RULES
|
||||
appleToOrange :: Rule
|
||||
appleToOrange = "orange" <$ chunk "apple"
|
||||
-- -- TEST RULES
|
||||
-- appleToOrange :: Rule
|
||||
-- appleToOrange = "orange" <$ chunk "apple"
|
||||
|
||||
emailAtRule :: Rule
|
||||
emailAtRule = do
|
||||
prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String))
|
||||
_ <- char '@'
|
||||
suffix <- some (alphaNumChar <|> oneOf (".-" :: String))
|
||||
return $ T.pack prefix <> "[at]" <> T.pack suffix
|
||||
-- emailAtRule :: Rule
|
||||
-- emailAtRule = do
|
||||
-- prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String))
|
||||
-- _ <- char '@'
|
||||
-- suffix <- some (alphaNumChar <|> oneOf (".-" :: String))
|
||||
-- return $ T.pack prefix <> "[at]" <> T.pack suffix
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- rules for pangu
|
||||
@@ -52,7 +52,8 @@ isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges
|
||||
]
|
||||
|
||||
convertToFullwidth :: Char -> Char
|
||||
convertToFullwidth c = case c of
|
||||
convertToFullwidth c =
|
||||
case c of
|
||||
':' -> ':'
|
||||
'.' -> '。'
|
||||
'~' -> '~'
|
||||
@@ -66,24 +67,18 @@ convertToFullwidth c = case c of
|
||||
cjkChar :: Parser Char
|
||||
cjkChar = satisfy isCJK
|
||||
|
||||
fullWidthSymbolRule :: Rule
|
||||
fullWidthSymbolRule = do
|
||||
c1 <- cjkChar -- First CJK
|
||||
mid <-
|
||||
some $
|
||||
choice -- The "middle" symbol part
|
||||
[ char ' ',
|
||||
char ':',
|
||||
char '.'
|
||||
]
|
||||
c2 <- cjkChar -- Second CJK
|
||||
|
||||
-- In Haskell, we can actually process the 'mid' string logic here.
|
||||
-- For now, let's assume we want to turn ":" into ":" and "." into "。"
|
||||
let transformedMid = T.pack $ map convertToFullwidth mid
|
||||
cjksymcjk :: Rule
|
||||
cjksymcjk = do
|
||||
c1 <- cjkChar
|
||||
mid <- do
|
||||
_ <- many (char ' ') -- leading spaces
|
||||
core <- some $ oneOf (":.~!?,;" :: [Char])
|
||||
_ <- many (char ' ') -- trailing spaces
|
||||
return $ T.pack core
|
||||
c2 <- cjkChar
|
||||
let transformedMid = T.pack $ map convertToFullwidth (T.unpack mid)
|
||||
return $ T.singleton c1 <> transformedMid <> T.singleton c2
|
||||
|
||||
|
||||
-- the rule set
|
||||
myRules :: RuleSet
|
||||
myRules = [appleToOrange, emailAtRule, try fullWidthSymbolRule]
|
||||
myRules = [cjksymcjk]
|
||||
14
test/Main.hs
14
test/Main.hs
@@ -6,14 +6,8 @@ import Test.Hspec
|
||||
|
||||
main :: IO ()
|
||||
main = hspec $ do
|
||||
describe "MyLib.mapemail" $ do
|
||||
it "maps @ to [at] in emails" $ do
|
||||
applyRules myRules "aaa@a.com" `shouldBe` "aaa[at]a.com"
|
||||
|
||||
describe "MyLib.mapfruits" $ do
|
||||
it "maps apple to orange" $ do
|
||||
applyRules myRules "apple" `shouldBe` "orange"
|
||||
|
||||
describe "MyLib.fullWidthSymbolRule" $ do
|
||||
it "你:好" $ do
|
||||
describe "MyLib.cjksymcjk" $ do
|
||||
it "converts symbols between CJK characters to fullwidth" $ do
|
||||
applyRules myRules "你 : 好" `shouldBe` "你:好"
|
||||
applyRules myRules "你.好" `shouldBe` "你。好"
|
||||
applyRules myRules "你 ? 好" `shouldBe` "你?好"
|
||||
Reference in New Issue
Block a user