z
This commit is contained in:
97
src/MyLib.hs
97
src/MyLib.hs
@@ -21,16 +21,29 @@ applyUntilFixed :: Rule -> Text -> Text
|
|||||||
applyUntilFixed rule =
|
applyUntilFixed rule =
|
||||||
fix
|
fix
|
||||||
( \loop current ->
|
( \loop current ->
|
||||||
let next = streamEdit rule id current
|
let next = streamEdit (try rule) id current
|
||||||
in if next == current then next else loop next
|
in if next == current then next else loop next
|
||||||
)
|
)
|
||||||
|
|
||||||
|
applyRulesRecursively :: RuleSet -> Text -> Text
|
||||||
|
applyRulesRecursively rules input = foldl (flip applyUntilFixed) input rules
|
||||||
|
|
||||||
applyRules :: RuleSet -> Text -> Text
|
applyRules :: RuleSet -> Text -> Text
|
||||||
applyRules rules input = foldl (flip applyUntilFixed) input rules
|
applyRules rules input = foldl (flip applyOnce) input rules
|
||||||
|
where
|
||||||
|
applyOnce rule = streamEdit (try rule) id
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
-- rules for pangu
|
-- rules for pangu
|
||||||
|
|
||||||
|
-- alphaNumChar from megaparsec matches CJK chars...
|
||||||
|
-- need to implement a new one
|
||||||
|
alphanumericChar :: Parser Char
|
||||||
|
alphanumericChar = satisfy $ \c ->
|
||||||
|
(c >= 'a' && c <= 'z')
|
||||||
|
|| (c >= 'A' && c <= 'Z')
|
||||||
|
|| (c >= '0' && c <= '9')
|
||||||
|
|
||||||
-- | Check if a character falls within the CJK ranges provided
|
-- | Check if a character falls within the CJK ranges provided
|
||||||
isCJK :: Char -> Bool
|
isCJK :: Char -> Bool
|
||||||
isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges
|
isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges
|
||||||
@@ -97,7 +110,7 @@ fixCJKcolAN :: Rule
|
|||||||
fixCJKcolAN = do
|
fixCJKcolAN = do
|
||||||
cjk <- cjkChar
|
cjk <- cjkChar
|
||||||
_ <- char ':'
|
_ <- char ':'
|
||||||
an <- alphaNumChar
|
an <- alphanumericChar
|
||||||
return $ T.pack $ [cjk] ++ ":" ++ [an]
|
return $ T.pack $ [cjk] ++ ":" ++ [an]
|
||||||
|
|
||||||
-- quotes
|
-- quotes
|
||||||
@@ -143,24 +156,76 @@ singlequoteCJK = do
|
|||||||
|
|
||||||
fixPossessivequote :: Rule
|
fixPossessivequote :: Rule
|
||||||
fixPossessivequote = do
|
fixPossessivequote = do
|
||||||
pre <- cjkChar <|> alphaNumChar
|
pre <- cjkChar <|> alphanumericChar
|
||||||
_ <- some spaceChar
|
_ <- some spaceChar
|
||||||
_ <- chunk "'s"
|
_ <- chunk "'s"
|
||||||
return $ T.pack $ pre : "'s"
|
return $ T.pack $ pre : "'s"
|
||||||
|
|
||||||
-- hash
|
-- hash
|
||||||
-- hashANSCJKhash :: Rule
|
hashANSCJKhash :: Rule
|
||||||
-- hashANSCJKhash = do
|
hashANSCJKhash = do
|
||||||
-- cjk1 <- cjkChar
|
cjk1 <- cjkChar
|
||||||
-- _ <- char '#'
|
_ <- char '#'
|
||||||
|
mid <- some cjkChar
|
||||||
|
_ <- char '#'
|
||||||
|
cjk2 <- cjkChar
|
||||||
|
return $ T.pack $ [cjk1] ++ " #" ++ mid ++ "# " ++ [cjk2]
|
||||||
|
|
||||||
|
cjkhash :: Rule
|
||||||
|
cjkhash = do
|
||||||
|
cjk <- cjkChar
|
||||||
|
_ <- char '#'
|
||||||
|
_ <- lookAhead $ anySingleBut ' '
|
||||||
|
return $ T.pack $ cjk : " #"
|
||||||
|
|
||||||
|
hashcjk :: Rule
|
||||||
|
hashcjk = do
|
||||||
|
_ <- char '#'
|
||||||
|
_ <- lookAhead $ anySingleBut ' '
|
||||||
|
cjk <- cjkChar
|
||||||
|
return $ T.pack $ "# " ++ [cjk]
|
||||||
|
|
||||||
|
-- operators
|
||||||
|
cjkOPTan :: Rule
|
||||||
|
cjkOPTan = do
|
||||||
|
cjk <- cjkChar
|
||||||
|
opt <- oneOf ("+-=*/&|<>%" :: [Char])
|
||||||
|
an <- alphanumericChar
|
||||||
|
return $ T.pack [cjk, ' ', opt, ' ', an]
|
||||||
|
|
||||||
|
anOPTcjk :: Rule
|
||||||
|
anOPTcjk = do
|
||||||
|
an <- alphanumericChar
|
||||||
|
opt <- oneOf ("+-=*/&|<>%" :: [Char])
|
||||||
|
cjk <- cjkChar
|
||||||
|
return $ T.pack [an, ' ', opt, ' ', cjk]
|
||||||
|
|
||||||
|
-- slash/bracket rules are not implemented
|
||||||
|
|
||||||
|
-- CJK and alphanumeric without space
|
||||||
|
|
||||||
|
cjkans :: Rule
|
||||||
|
cjkans = do
|
||||||
|
cjk <- cjkChar
|
||||||
|
_ <- lookAhead (alphanumericChar <|> oneOf ("@$%^&*-+\\=|/" :: [Char]))
|
||||||
|
return $ T.pack [cjk, ' ']
|
||||||
|
|
||||||
|
anscjk :: Rule
|
||||||
|
anscjk = do
|
||||||
|
an <- alphanumericChar <|> oneOf ("~!$%^&*-+\\=|;:,./?" :: [Char])
|
||||||
|
_ <- lookAhead cjkChar
|
||||||
|
return $ T.pack [an, ' ']
|
||||||
|
|
||||||
-- rule set, the order matters
|
-- rule set, the order matters
|
||||||
myRules :: RuleSet
|
recursiveRules :: RuleSet
|
||||||
myRules =
|
recursiveRules =
|
||||||
[ fullwidthCJKsymCJK,
|
[ fullwidthCJKsymCJK,
|
||||||
fullwidthCJKsym,
|
fullwidthCJKsym
|
||||||
dotsCJK,
|
]
|
||||||
|
|
||||||
|
onepassRules :: RuleSet
|
||||||
|
onepassRules =
|
||||||
|
[ dotsCJK,
|
||||||
fixCJKcolAN,
|
fixCJKcolAN,
|
||||||
cjkquote,
|
cjkquote,
|
||||||
quoteCJK,
|
quoteCJK,
|
||||||
@@ -168,5 +233,13 @@ myRules =
|
|||||||
cjkpossessivequote,
|
cjkpossessivequote,
|
||||||
-- singlequoteCJK,
|
-- singlequoteCJK,
|
||||||
fixPossessivequote,
|
fixPossessivequote,
|
||||||
|
hashANSCJKhash,
|
||||||
|
cjkhash,
|
||||||
|
-- hashcjk,
|
||||||
|
anscjk,
|
||||||
|
cjkans,
|
||||||
empty -- a dummy rule
|
empty -- a dummy rule
|
||||||
]
|
]
|
||||||
|
|
||||||
|
pangu :: Text -> Text
|
||||||
|
pangu input = applyRules onepassRules $ applyRulesRecursively recursiveRules input
|
||||||
28
test/Main.hs
28
test/Main.hs
@@ -8,15 +8,21 @@ main :: IO ()
|
|||||||
main = hspec $ do
|
main = hspec $ do
|
||||||
describe "MyLib.cjksym(cjk)" $ do
|
describe "MyLib.cjksym(cjk)" $ do
|
||||||
it "converts symbols to fullwidth" $ do
|
it "converts symbols to fullwidth" $ do
|
||||||
applyRules myRules "你 : 好" `shouldBe` "你:好"
|
pangu "你 : 好" `shouldBe` "你:好"
|
||||||
applyRules myRules "你.好" `shouldBe` "你。好"
|
pangu "你.好" `shouldBe` "你。好"
|
||||||
applyRules myRules "你:好:他" `shouldBe` "你:好:他"
|
pangu "你:好:他" `shouldBe` "你:好:他"
|
||||||
applyRules myRules "你 ? 好" `shouldBe` "你?好"
|
pangu "你 ? 好" `shouldBe` "你?好"
|
||||||
applyRules myRules "你…好" `shouldBe` "你… 好"
|
pangu "你…好" `shouldBe` "你… 好"
|
||||||
applyRules myRules "你...好" `shouldBe` "你... 好"
|
pangu "你...好" `shouldBe` "你... 好"
|
||||||
applyRules myRules "你:0" `shouldBe` "你:0"
|
pangu "你:0" `shouldBe` "你:0"
|
||||||
it "fixes quotes" $ do
|
it "fixes quotes" $ do
|
||||||
applyRules myRules "我说:\" 他说:'你好'\"" `shouldBe` "我说:\"他说:' 你好 '\""
|
pangu "我说:\" 他说:'你好'\"" `shouldBe` "我说:\"他说:' 你好 '\""
|
||||||
-- applyRules myRules "'你好'" `shouldBe` "' 你好'" -- strange behavior
|
-- pangu "'你好'" `shouldBe` "' 你好'" -- strange behavior
|
||||||
applyRules myRules "你'hello'" `shouldBe` "你 'hello'"
|
pangu "你'hello'" `shouldBe` "你 'hello'"
|
||||||
applyRules myRules "我 's " `shouldBe` "我's "
|
pangu "我 's " `shouldBe` "我's "
|
||||||
|
it "fixes hash" $ do
|
||||||
|
pangu "你好#测试#世界" `shouldBe` "你好 #测试# 世界"
|
||||||
|
it "add spaces" $ do
|
||||||
|
pangu "你好and世界" `shouldBe` "你好 and 世界"
|
||||||
|
pangu "當你凝視著bug,bug也凝視著你" `shouldBe` "當你凝視著 bug,bug 也凝視著你"
|
||||||
|
pangu "與PM戰鬥的人,應當小心自己不要成為PM" `shouldBe` "與 PM 戰鬥的人,應當小心自己不要成為 PM"
|
||||||
Reference in New Issue
Block a user