This commit is contained in:
2026-01-02 22:25:32 +08:00
parent 0f8ce47fa1
commit a2ea14dde2
2 changed files with 76 additions and 28 deletions

View File

@@ -2,6 +2,7 @@
module MyLib where module MyLib where
import Data.Function (fix)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Void (Void) import Data.Void (Void)
@@ -16,20 +17,16 @@ type Rule = Parser Text
type RuleSet = [Rule] type RuleSet = [Rule]
applyUntilFixed :: Rule -> Text -> Text
applyUntilFixed rule =
fix
( \loop current ->
let next = streamEdit rule id current
in if next == current then next else loop next
)
applyRules :: RuleSet -> Text -> Text applyRules :: RuleSet -> Text -> Text
applyRules [] input = input applyRules rules input = foldl (flip applyUntilFixed) input rules
applyRules rules input = streamEdit (choice rules) id input
-- -- TEST RULES
-- appleToOrange :: Rule
-- appleToOrange = "orange" <$ chunk "apple"
-- emailAtRule :: Rule
-- emailAtRule = do
-- prefix <- some (alphaNumChar <|> oneOf ("._%+-" :: String))
-- _ <- char '@'
-- suffix <- some (alphaNumChar <|> oneOf (".-" :: String))
-- return $ T.pack prefix <> "[at]" <> T.pack suffix
------------------------------------------------------------------------------- -------------------------------------------------------------------------------
-- rules for pangu -- rules for pangu
@@ -61,24 +58,68 @@ convertToFullwidth c =
'?' -> '' '?' -> ''
',' -> '' ',' -> ''
';' -> '' ';' -> ''
'\"' -> '”'
'\'' -> ''
_ -> c _ -> c
-- A parser that matches a single CJK character -- A parser that matches a single CJK character
cjkChar :: Parser Char cjkChar :: Parser Char
cjkChar = satisfy isCJK cjkChar = satisfy isCJK
cjksymcjk :: Rule -- use python.py as reference for these rules
cjksymcjk = do
c1 <- cjkChar fullwidthCJKsymCJK :: Rule
mid <- do fullwidthCJKsymCJK = do
_ <- many (char ' ') -- leading spaces lcjk <- cjkChar
core <- some $ oneOf (":.~!?,;" :: [Char]) _ <- many (char ' ')
_ <- many (char ' ') -- trailing spaces sym <- fmap T.unpack (chunk ".") <|> some (oneOf (":" :: [Char]))
return $ T.pack core _ <- many (char ' ')
c2 <- cjkChar rcjk <- cjkChar
let transformedMid = T.pack $ map convertToFullwidth (T.unpack mid)
return $ T.singleton c1 <> transformedMid <> T.singleton c2 let transformedsym = T.pack $ map convertToFullwidth sym
return $ T.pack [lcjk] <> transformedsym <> T.pack [rcjk]
fullwidthCJKsym :: Rule
fullwidthCJKsym = do
cjk <- cjkChar
_ <- many (char ' ')
sym <- some $ oneOf ("~!?,;" :: [Char])
_ <- many (char ' ')
let transformedsym = T.pack $ map convertToFullwidth sym
return $ T.pack [cjk] <> transformedsym
dotsCJK :: Rule
dotsCJK = do
dots <- chunk "..." <|> chunk ""
cjk <- cjkChar
return $ dots <> T.pack (" " ++ [cjk])
fixCJKcolAN :: Rule
fixCJKcolAN = do
cjk <- cjkChar
_ <- chunk ":"
an <- alphaNumChar
return $ T.pack $ [cjk] ++ "" ++ [an]
cjkquote :: Rule
cjkquote = do
cjk <- cjkChar
quote <- oneOf ("\x05f4\"\'" :: [Char])
return $ T.pack $ [cjk] ++ " " ++ [quote]
quoteCJK :: Rule
quoteCJK = do
quote <- oneOf ("\x05f4\"\'" :: [Char])
cjk <- cjkChar
return $ T.pack $ [quote] ++ " " ++ [cjk]
-- the rule set -- the rule set
myRules :: RuleSet myRules :: RuleSet
myRules = [cjksymcjk] myRules =
[ fullwidthCJKsymCJK,
fullwidthCJKsym,
dotsCJK,
fixCJKcolAN,
cjkquote,
quoteCJK
]

View File

@@ -6,8 +6,15 @@ import Test.Hspec
main :: IO () main :: IO ()
main = hspec $ do main = hspec $ do
describe "MyLib.cjksymcjk" $ do describe "MyLib.cjksym(cjk)" $ do
it "converts symbols between CJK characters to fullwidth" $ do it "converts symbols to fullwidth" $ do
applyRules myRules "你 : 好" `shouldBe` "你:好" applyRules myRules "你 : 好" `shouldBe` "你:好"
applyRules myRules "你.好" `shouldBe` "你。好" applyRules myRules "你.好" `shouldBe` "你。好"
applyRules myRules "你:好:他" `shouldBe` "你:好:他"
applyRules myRules "你 ? 好" `shouldBe` "你?好" applyRules myRules "你 ? 好" `shouldBe` "你?好"
applyRules myRules "你…好" `shouldBe` "你… 好"
applyRules myRules "你...好" `shouldBe` "你... 好"
applyRules myRules "你:0" `shouldBe` "0"
-- applyRules myRules "我说:\" 他说:\'你好\'\"" `shouldBe` "我说:“他说:‘你好’”"
it "adds spaces" $ do
applyRules myRules "\'你好\'" `shouldBe` "\' 你好 \'"