Compare commits
2 Commits
0f8ce47fa1
...
792bbc80c6
| Author | SHA1 | Date | |
|---|---|---|---|
| 792bbc80c6 | |||
| a2ea14dde2 |
104
src/MyLib.hs
104
src/MyLib.hs
@@ -2,6 +2,7 @@
|
||||
|
||||
module MyLib where
|
||||
|
||||
import Data.Function (fix)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Data.Void (Void)
|
||||
@@ -16,20 +17,16 @@ type Rule = Parser Text
|
||||
|
||||
type RuleSet = [Rule]
|
||||
|
||||
applyUntilFixed :: Rule -> Text -> Text
|
||||
applyUntilFixed rule =
|
||||
fix
|
||||
( \loop current ->
|
||||
let next = streamEdit rule id current
|
||||
in if next == current then next else loop next
|
||||
)
|
||||
|
||||
applyRules :: RuleSet -> Text -> Text
|
||||
applyRules [] input = input
|
||||
applyRules rules input = streamEdit (choice rules) id input
|
||||
|
||||
-- -- TEST RULES
|
||||
-- appleToOrange :: Rule
|
||||
-- appleToOrange = "orange" <$ chunk "apple"
|
||||
|
||||
-- emailAtRule :: Rule
|
||||
-- emailAtRule = do
|
||||
-- prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String))
|
||||
-- _ <- char '@'
|
||||
-- suffix <- some (alphaNumChar <|> oneOf (".-" :: String))
|
||||
-- return $ T.pack prefix <> "[at]" <> T.pack suffix
|
||||
applyRules rules input = foldl (flip applyUntilFixed) input rules
|
||||
|
||||
-------------------------------------------------------------------------------
|
||||
-- rules for pangu
|
||||
@@ -61,24 +58,81 @@ convertToFullwidth c =
|
||||
'?' -> '?'
|
||||
',' -> ','
|
||||
';' -> ';'
|
||||
'\"' -> '”'
|
||||
'\'' -> '’'
|
||||
_ -> c
|
||||
|
||||
-- A parser that matches a single CJK character
|
||||
cjkChar :: Parser Char
|
||||
cjkChar = satisfy isCJK
|
||||
|
||||
cjksymcjk :: Rule
|
||||
cjksymcjk = do
|
||||
c1 <- cjkChar
|
||||
mid <- do
|
||||
_ <- many (char ' ') -- leading spaces
|
||||
core <- some $ oneOf (":.~!?,;" :: [Char])
|
||||
_ <- many (char ' ') -- trailing spaces
|
||||
return $ T.pack core
|
||||
c2 <- cjkChar
|
||||
let transformedMid = T.pack $ map convertToFullwidth (T.unpack mid)
|
||||
return $ T.singleton c1 <> transformedMid <> T.singleton c2
|
||||
-- use python.py as reference for these rules
|
||||
|
||||
fullwidthCJKsymCJK :: Rule
|
||||
fullwidthCJKsymCJK = do
|
||||
lcjk <- cjkChar
|
||||
_ <- many (char ' ')
|
||||
sym <- try (some (char ':')) <|> count 1 (char '.')
|
||||
_ <- many (char ' ')
|
||||
rcjk <- cjkChar
|
||||
let transformedsym = map convertToFullwidth sym
|
||||
return $ T.pack $ [lcjk] ++ transformedsym ++ [rcjk]
|
||||
|
||||
fullwidthCJKsym :: Rule
|
||||
fullwidthCJKsym = do
|
||||
cjk <- cjkChar
|
||||
_ <- many (char ' ')
|
||||
sym <- some $ oneOf ("~!?,;" :: [Char])
|
||||
_ <- many (char ' ')
|
||||
let transformedsym = T.pack $ map convertToFullwidth sym
|
||||
return $ T.pack [cjk] <> transformedsym
|
||||
|
||||
dotsCJK :: Rule
|
||||
dotsCJK = do
|
||||
dots <- chunk "..." <|> chunk "…"
|
||||
cjk <- cjkChar
|
||||
return $ dots <> T.pack (" " ++ [cjk])
|
||||
|
||||
fixCJKcolAN :: Rule
|
||||
fixCJKcolAN = do
|
||||
cjk <- cjkChar
|
||||
_ <- chunk ":"
|
||||
an <- alphaNumChar
|
||||
return $ T.pack $ [cjk] ++ ":" ++ [an]
|
||||
|
||||
-- quotes
|
||||
-- seems confusing ...
|
||||
quotesym :: [Char]
|
||||
quotesym = "\x05f4\"\'`"
|
||||
|
||||
cjkquote :: Rule
|
||||
cjkquote = do
|
||||
cjk <- cjkChar
|
||||
quote <- oneOf quotesym
|
||||
return $ T.pack $ [cjk] ++ " " ++ [quote]
|
||||
|
||||
quoteCJK :: Rule
|
||||
quoteCJK = do
|
||||
quote <- oneOf quotesym
|
||||
cjk <- cjkChar
|
||||
return $ T.pack $ [quote] ++ " " ++ [cjk]
|
||||
|
||||
fixQuote :: Rule
|
||||
fixQuote = do
|
||||
openQuotes <- T.pack <$> some (oneOf quotesym)
|
||||
_ <- many spaceChar
|
||||
content <- T.pack <$> someTill anySingle (lookAhead $ some (oneOf quotesym))
|
||||
closeQuotes <- T.pack <$> some (oneOf quotesym)
|
||||
return $ openQuotes <> T.strip content <> closeQuotes
|
||||
|
||||
-- the rule set
|
||||
myRules :: RuleSet
|
||||
myRules = [cjksymcjk]
|
||||
myRules =
|
||||
[ fullwidthCJKsymCJK,
|
||||
fullwidthCJKsym,
|
||||
dotsCJK,
|
||||
fixCJKcolAN,
|
||||
cjkquote,
|
||||
quoteCJK,
|
||||
fixQuote
|
||||
]
|
||||
14
test/Main.hs
14
test/Main.hs
@@ -6,8 +6,16 @@ import Test.Hspec
|
||||
|
||||
main :: IO ()
|
||||
main = hspec $ do
|
||||
describe "MyLib.cjksymcjk" $ do
|
||||
it "converts symbols between CJK characters to fullwidth" $ do
|
||||
describe "MyLib.cjksym(cjk)" $ do
|
||||
it "converts symbols to fullwidth" $ do
|
||||
applyRules myRules "你 : 好" `shouldBe` "你:好"
|
||||
applyRules myRules "你.好" `shouldBe` "你。好"
|
||||
applyRules myRules "你 ? 好" `shouldBe` "你?好"
|
||||
applyRules myRules "你:好:他" `shouldBe` "你:好:他"
|
||||
applyRules myRules "你 ? 好" `shouldBe` "你?好"
|
||||
applyRules myRules "你…好" `shouldBe` "你… 好"
|
||||
applyRules myRules "你...好" `shouldBe` "你... 好"
|
||||
applyRules myRules "你:0" `shouldBe` "你:0"
|
||||
applyRules myRules "我说:\" 他说:\'你好\'\"" `shouldBe` "我说:\"他说:\'你好\'\""
|
||||
it "adds spaces" $ do
|
||||
applyRules myRules "\'你好\'" `shouldBe` "\'你好\'"
|
||||
applyRules myRules "你\'hello\'" `shouldBe` "你 \'hello\'"
|
||||
Reference in New Issue
Block a user