Refactor quote handling and add new rules for possessive quotes
This commit is contained in:
42
src/MyLib.hs
42
src/MyLib.hs
@@ -96,14 +96,14 @@ dotsCJK = do
|
||||
fixCJKcolAN :: Rule
|
||||
fixCJKcolAN = do
|
||||
cjk <- cjkChar
|
||||
_ <- chunk ":"
|
||||
_ <- char ':'
|
||||
an <- alphaNumChar
|
||||
return $ T.pack $ [cjk] ++ ":" ++ [an]
|
||||
|
||||
-- quotes
|
||||
-- seems confusing ...
|
||||
quotesym :: [Char]
|
||||
quotesym = "\x05f4\"\'`"
|
||||
quotesym = "'`\x05f4\""
|
||||
|
||||
cjkquote :: Rule
|
||||
cjkquote = do
|
||||
@@ -125,7 +125,37 @@ fixQuote = do
|
||||
closeQuotes <- T.pack <$> some (oneOf quotesym)
|
||||
return $ openQuotes <> T.strip content <> closeQuotes
|
||||
|
||||
-- the rule set
|
||||
cjkpossessivequote :: Rule
|
||||
cjkpossessivequote = do
|
||||
cjk <- cjkChar
|
||||
_ <- char '\''
|
||||
_ <- lookAhead $ anySingleBut 's'
|
||||
return $ T.pack $ cjk : " '"
|
||||
|
||||
-- This singlequoteCJK rule will turn '你好' into ' 你好'
|
||||
-- which seems not desirable...
|
||||
-- however, the behavior is aligned with python version
|
||||
singlequoteCJK :: Rule
|
||||
singlequoteCJK = do
|
||||
_ <- char '\''
|
||||
cjk <- cjkChar
|
||||
return $ T.pack $ "' " ++ [cjk]
|
||||
|
||||
fixPossessivequote :: Rule
|
||||
fixPossessivequote = do
|
||||
pre <- cjkChar <|> alphaNumChar
|
||||
_ <- some spaceChar
|
||||
_ <- chunk "'s"
|
||||
return $ T.pack $ pre : "'s"
|
||||
|
||||
-- hash
|
||||
-- hashANSCJKhash :: Rule
|
||||
-- hashANSCJKhash = do
|
||||
-- cjk1 <- cjkChar
|
||||
-- _ <- char '#'
|
||||
|
||||
|
||||
-- rule set, the order matters
|
||||
myRules :: RuleSet
|
||||
myRules =
|
||||
[ fullwidthCJKsymCJK,
|
||||
@@ -134,5 +164,9 @@ myRules =
|
||||
fixCJKcolAN,
|
||||
cjkquote,
|
||||
quoteCJK,
|
||||
fixQuote
|
||||
fixQuote,
|
||||
cjkpossessivequote,
|
||||
-- singlequoteCJK,
|
||||
fixPossessivequote,
|
||||
empty -- a dummy rule
|
||||
]
|
||||
@@ -15,7 +15,8 @@ main = hspec $ do
|
||||
applyRules myRules "你…好" `shouldBe` "你… 好"
|
||||
applyRules myRules "你...好" `shouldBe` "你... 好"
|
||||
applyRules myRules "你:0" `shouldBe` "你:0"
|
||||
applyRules myRules "我说:\" 他说:\'你好\'\"" `shouldBe` "我说:\"他说:\'你好\'\""
|
||||
it "adds spaces" $ do
|
||||
applyRules myRules "\'你好\'" `shouldBe` "\'你好\'"
|
||||
applyRules myRules "你\'hello\'" `shouldBe` "你 \'hello\'"
|
||||
it "fixes quotes" $ do
|
||||
applyRules myRules "我说:\" 他说:'你好'\"" `shouldBe` "我说:\"他说:' 你好 '\""
|
||||
-- applyRules myRules "'你好'" `shouldBe` "' 你好'" -- strange behavior
|
||||
applyRules myRules "你'hello'" `shouldBe` "你 'hello'"
|
||||
applyRules myRules "我 's " `shouldBe` "我's "
|
||||
Reference in New Issue
Block a user