z
This commit is contained in:
91
src/MyLib.hs
91
src/MyLib.hs
@@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
module MyLib where
|
module MyLib where
|
||||||
|
|
||||||
|
import Data.Function (fix)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Data.Void (Void)
|
import Data.Void (Void)
|
||||||
@@ -16,20 +17,16 @@ type Rule = Parser Text
|
|||||||
|
|
||||||
type RuleSet = [Rule]
|
type RuleSet = [Rule]
|
||||||
|
|
||||||
|
applyUntilFixed :: Rule -> Text -> Text
|
||||||
|
applyUntilFixed rule =
|
||||||
|
fix
|
||||||
|
( \loop current ->
|
||||||
|
let next = streamEdit rule id current
|
||||||
|
in if next == current then next else loop next
|
||||||
|
)
|
||||||
|
|
||||||
applyRules :: RuleSet -> Text -> Text
|
applyRules :: RuleSet -> Text -> Text
|
||||||
applyRules [] input = input
|
applyRules rules input = foldl (flip applyUntilFixed) input rules
|
||||||
applyRules rules input = streamEdit (choice rules) id input
|
|
||||||
|
|
||||||
-- -- TEST RULES
|
|
||||||
-- appleToOrange :: Rule
|
|
||||||
-- appleToOrange = "orange" <$ chunk "apple"
|
|
||||||
|
|
||||||
-- emailAtRule :: Rule
|
|
||||||
-- emailAtRule = do
|
|
||||||
-- prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String))
|
|
||||||
-- _ <- char '@'
|
|
||||||
-- suffix <- some (alphaNumChar <|> oneOf (".-" :: String))
|
|
||||||
-- return $ T.pack prefix <> "[at]" <> T.pack suffix
|
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
-------------------------------------------------------------------------------
|
||||||
-- rules for pangu
|
-- rules for pangu
|
||||||
@@ -61,24 +58,68 @@ convertToFullwidth c =
|
|||||||
'?' -> '?'
|
'?' -> '?'
|
||||||
',' -> ','
|
',' -> ','
|
||||||
';' -> ';'
|
';' -> ';'
|
||||||
|
'\"' -> '”'
|
||||||
|
'\'' -> '’'
|
||||||
_ -> c
|
_ -> c
|
||||||
|
|
||||||
-- A parser that matches a single CJK character
|
-- A parser that matches a single CJK character
|
||||||
cjkChar :: Parser Char
|
cjkChar :: Parser Char
|
||||||
cjkChar = satisfy isCJK
|
cjkChar = satisfy isCJK
|
||||||
|
|
||||||
cjksymcjk :: Rule
|
-- use python.py as reference for these rules
|
||||||
cjksymcjk = do
|
|
||||||
c1 <- cjkChar
|
fullwidthCJKsymCJK :: Rule
|
||||||
mid <- do
|
fullwidthCJKsymCJK = do
|
||||||
_ <- many (char ' ') -- leading spaces
|
lcjk <- cjkChar
|
||||||
core <- some $ oneOf (":.~!?,;" :: [Char])
|
_ <- many (char ' ')
|
||||||
_ <- many (char ' ') -- trailing spaces
|
sym <- fmap T.unpack (chunk ".") <|> some (oneOf (":" :: [Char]))
|
||||||
return $ T.pack core
|
_ <- many (char ' ')
|
||||||
c2 <- cjkChar
|
rcjk <- cjkChar
|
||||||
let transformedMid = T.pack $ map convertToFullwidth (T.unpack mid)
|
|
||||||
return $ T.singleton c1 <> transformedMid <> T.singleton c2
|
let transformedsym = T.pack $ map convertToFullwidth sym
|
||||||
|
return $ T.pack [lcjk] <> transformedsym <> T.pack [rcjk]
|
||||||
|
|
||||||
|
fullwidthCJKsym :: Rule
|
||||||
|
fullwidthCJKsym = do
|
||||||
|
cjk <- cjkChar
|
||||||
|
_ <- many (char ' ')
|
||||||
|
sym <- some $ oneOf ("~!?,;" :: [Char])
|
||||||
|
_ <- many (char ' ')
|
||||||
|
let transformedsym = T.pack $ map convertToFullwidth sym
|
||||||
|
return $ T.pack [cjk] <> transformedsym
|
||||||
|
|
||||||
|
dotsCJK :: Rule
|
||||||
|
dotsCJK = do
|
||||||
|
dots <- chunk "..." <|> chunk "…"
|
||||||
|
cjk <- cjkChar
|
||||||
|
return $ dots <> T.pack (" " ++ [cjk])
|
||||||
|
|
||||||
|
fixCJKcolAN :: Rule
|
||||||
|
fixCJKcolAN = do
|
||||||
|
cjk <- cjkChar
|
||||||
|
_ <- chunk ":"
|
||||||
|
an <- alphaNumChar
|
||||||
|
return $ T.pack $ [cjk] ++ ":" ++ [an]
|
||||||
|
|
||||||
|
cjkquote :: Rule
|
||||||
|
cjkquote = do
|
||||||
|
cjk <- cjkChar
|
||||||
|
quote <- oneOf ("\x05f4\"\'" :: [Char])
|
||||||
|
return $ T.pack $ [cjk] ++ " " ++ [quote]
|
||||||
|
|
||||||
|
quoteCJK :: Rule
|
||||||
|
quoteCJK = do
|
||||||
|
quote <- oneOf ("\x05f4\"\'" :: [Char])
|
||||||
|
cjk <- cjkChar
|
||||||
|
return $ T.pack $ [quote] ++ " " ++ [cjk]
|
||||||
|
|
||||||
-- the rule set
|
-- the rule set
|
||||||
myRules :: RuleSet
|
myRules :: RuleSet
|
||||||
myRules = [cjksymcjk]
|
myRules =
|
||||||
|
[ fullwidthCJKsymCJK,
|
||||||
|
fullwidthCJKsym,
|
||||||
|
dotsCJK,
|
||||||
|
fixCJKcolAN,
|
||||||
|
cjkquote,
|
||||||
|
quoteCJK
|
||||||
|
]
|
||||||
13
test/Main.hs
13
test/Main.hs
@@ -6,8 +6,15 @@ import Test.Hspec
|
|||||||
|
|
||||||
main :: IO ()
|
main :: IO ()
|
||||||
main = hspec $ do
|
main = hspec $ do
|
||||||
describe "MyLib.cjksymcjk" $ do
|
describe "MyLib.cjksym(cjk)" $ do
|
||||||
it "converts symbols between CJK characters to fullwidth" $ do
|
it "converts symbols to fullwidth" $ do
|
||||||
applyRules myRules "你 : 好" `shouldBe` "你:好"
|
applyRules myRules "你 : 好" `shouldBe` "你:好"
|
||||||
applyRules myRules "你.好" `shouldBe` "你。好"
|
applyRules myRules "你.好" `shouldBe` "你。好"
|
||||||
applyRules myRules "你 ? 好" `shouldBe` "你?好"
|
applyRules myRules "你:好:他" `shouldBe` "你:好:他"
|
||||||
|
applyRules myRules "你 ? 好" `shouldBe` "你?好"
|
||||||
|
applyRules myRules "你…好" `shouldBe` "你… 好"
|
||||||
|
applyRules myRules "你...好" `shouldBe` "你... 好"
|
||||||
|
applyRules myRules "你:0" `shouldBe` "你:0"
|
||||||
|
-- applyRules myRules "我说:\" 他说:\'你好\'\"" `shouldBe` "我说:“他说:‘你好’”"
|
||||||
|
it "adds spaces" $ do
|
||||||
|
applyRules myRules "\'你好\'" `shouldBe` "\' 你好 \'"
|
||||||
Reference in New Issue
Block a user