Refactor quote handling and add new rules for possessive quotes
This commit is contained in:
42
src/MyLib.hs
42
src/MyLib.hs
@@ -96,14 +96,14 @@ dotsCJK = do
|
|||||||
fixCJKcolAN :: Rule
|
fixCJKcolAN :: Rule
|
||||||
fixCJKcolAN = do
|
fixCJKcolAN = do
|
||||||
cjk <- cjkChar
|
cjk <- cjkChar
|
||||||
_ <- chunk ":"
|
_ <- char ':'
|
||||||
an <- alphaNumChar
|
an <- alphaNumChar
|
||||||
return $ T.pack $ [cjk] ++ ":" ++ [an]
|
return $ T.pack $ [cjk] ++ ":" ++ [an]
|
||||||
|
|
||||||
-- quotes
|
-- quotes
|
||||||
-- seems confusing ...
|
-- seems confusing ...
|
||||||
quotesym :: [Char]
|
quotesym :: [Char]
|
||||||
quotesym = "\x05f4\"\'`"
|
quotesym = "'`\x05f4\""
|
||||||
|
|
||||||
cjkquote :: Rule
|
cjkquote :: Rule
|
||||||
cjkquote = do
|
cjkquote = do
|
||||||
@@ -125,7 +125,37 @@ fixQuote = do
|
|||||||
closeQuotes <- T.pack <$> some (oneOf quotesym)
|
closeQuotes <- T.pack <$> some (oneOf quotesym)
|
||||||
return $ openQuotes <> T.strip content <> closeQuotes
|
return $ openQuotes <> T.strip content <> closeQuotes
|
||||||
|
|
||||||
-- the rule set
|
cjkpossessivequote :: Rule
|
||||||
|
cjkpossessivequote = do
|
||||||
|
cjk <- cjkChar
|
||||||
|
_ <- char '\''
|
||||||
|
_ <- lookAhead $ anySingleBut 's'
|
||||||
|
return $ T.pack $ cjk : " '"
|
||||||
|
|
||||||
|
-- This singlequoteCJK rule will turn '你好' into ' 你好'
|
||||||
|
-- which seems not desirable...
|
||||||
|
-- however, the behavior is aligned with python version
|
||||||
|
singlequoteCJK :: Rule
|
||||||
|
singlequoteCJK = do
|
||||||
|
_ <- char '\''
|
||||||
|
cjk <- cjkChar
|
||||||
|
return $ T.pack $ "' " ++ [cjk]
|
||||||
|
|
||||||
|
fixPossessivequote :: Rule
|
||||||
|
fixPossessivequote = do
|
||||||
|
pre <- cjkChar <|> alphaNumChar
|
||||||
|
_ <- some spaceChar
|
||||||
|
_ <- chunk "'s"
|
||||||
|
return $ T.pack $ pre : "'s"
|
||||||
|
|
||||||
|
-- hash
|
||||||
|
-- hashANSCJKhash :: Rule
|
||||||
|
-- hashANSCJKhash = do
|
||||||
|
-- cjk1 <- cjkChar
|
||||||
|
-- _ <- char '#'
|
||||||
|
|
||||||
|
|
||||||
|
-- rule set, the order matters
|
||||||
myRules :: RuleSet
|
myRules :: RuleSet
|
||||||
myRules =
|
myRules =
|
||||||
[ fullwidthCJKsymCJK,
|
[ fullwidthCJKsymCJK,
|
||||||
@@ -134,5 +164,9 @@ myRules =
|
|||||||
fixCJKcolAN,
|
fixCJKcolAN,
|
||||||
cjkquote,
|
cjkquote,
|
||||||
quoteCJK,
|
quoteCJK,
|
||||||
fixQuote
|
fixQuote,
|
||||||
|
cjkpossessivequote,
|
||||||
|
-- singlequoteCJK,
|
||||||
|
fixPossessivequote,
|
||||||
|
empty -- a dummy rule
|
||||||
]
|
]
|
||||||
@@ -15,7 +15,8 @@ main = hspec $ do
|
|||||||
applyRules myRules "你…好" `shouldBe` "你… 好"
|
applyRules myRules "你…好" `shouldBe` "你… 好"
|
||||||
applyRules myRules "你...好" `shouldBe` "你... 好"
|
applyRules myRules "你...好" `shouldBe` "你... 好"
|
||||||
applyRules myRules "你:0" `shouldBe` "你:0"
|
applyRules myRules "你:0" `shouldBe` "你:0"
|
||||||
applyRules myRules "我说:\" 他说:\'你好\'\"" `shouldBe` "我说:\"他说:\'你好\'\""
|
it "fixes quotes" $ do
|
||||||
it "adds spaces" $ do
|
applyRules myRules "我说:\" 他说:'你好'\"" `shouldBe` "我说:\"他说:' 你好 '\""
|
||||||
applyRules myRules "\'你好\'" `shouldBe` "\'你好\'"
|
-- applyRules myRules "'你好'" `shouldBe` "' 你好'" -- strange behavior
|
||||||
applyRules myRules "你\'hello\'" `shouldBe` "你 \'hello\'"
|
applyRules myRules "你'hello'" `shouldBe` "你 'hello'"
|
||||||
|
applyRules myRules "我 's " `shouldBe` "我's "
|
||||||
Reference in New Issue
Block a user