first commit

This commit is contained in:
2026-03-20 13:04:46 +08:00
commit c65e9722a5
41 changed files with 3082 additions and 0 deletions
+607
View File
@@ -0,0 +1,607 @@
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneKindSignatures #-}
module ChaoDoc (chaoDocRead, chaoDocWrite, chaoDocPandocCompiler, chaoDocCompiler) where
import Control.Monad.State
import Data.Char (isAlphaNum, isSpace)
import Data.Kind (Type)
import Data.Either
import Data.Functor
import Data.List (intersect, stripPrefix)
import qualified Data.Map as M
import Data.Maybe
import Data.Text (Text, pack)
import qualified Data.Text as T
import Hakyll
import Pangu (isCJK, pangu)
import SideNoteHTML (usingSideNotesHTML)
import System.IO.Unsafe
import Text.Pandoc
-- import Text.Pandoc.Builder
import Text.Pandoc.Citeproc
import Text.Pandoc.Walk (query, walk, walkM)
-- setMeta key val (Pandoc (Meta ms) bs) = Pandoc (Meta $ M.insert key val ms) bs
-- On mac, please do `export LANG=C` before using this thing
chaoDocRead :: ReaderOptions
chaoDocRead =
def
{ readerExtensions =
enableExtension Ext_lists_without_preceding_blankline $
enableExtension Ext_wikilinks_title_after_pipe $
enableExtension Ext_tex_math_double_backslash $
enableExtension Ext_tex_math_single_backslash $
enableExtension Ext_latex_macros $
enableExtension Ext_raw_tex $
disableExtension Ext_blank_before_header pandocExtensions
}
chaoDocWrite :: WriterOptions
chaoDocWrite =
def
{ writerHTMLMathMethod = MathML,
-- writerHtml5 = True,
-- writerHighlightStyle = Just syntaxHighlightingStyle,
writerNumberSections = True,
writerTableOfContents = True,
writerTOCDepth = 2
}
-- getInline :: Inline -> [Inline]
-- getInline x = [x]
pandocToInline :: Pandoc -> [Inline]
pandocToInline (Pandoc _ blocks) = go (reverse blocks)
where
go (Plain inlines : _) = inlines
go (Para inlines : _) = inlines
go (_ : xs) = go xs
go [] = []
incrementalBlock :: [Text]
incrementalBlock =
[ "Theorem",
"Conjecture",
"Definition",
"Example",
"Lemma",
"Problem",
"Proposition",
"Corollary",
"Observation",
"Claim",
"定理",
"猜想",
"定义",
"",
"引理",
"问题",
"命题",
"推论",
"观察"
]
otherBlock :: [Text]
otherBlock = ["Proof", "Remark", "证明", "备注"]
theoremClasses :: [Text]
theoremClasses = incrementalBlock ++ otherBlock
canonicalTheoremType :: Text -> Maybe Text
canonicalTheoremType raw =
listToMaybe
[ cls
| cls <- theoremClasses,
T.toCaseFold cls == T.toCaseFold (T.strip raw)
]
-- create a filter for theorems
getClass :: Attr -> [Text]
getClass (_, c, _) = c
addClass :: Attr -> Text -> Attr
addClass (a, b, c) d = (a, d : b, c)
addAttr :: Attr -> Text -> Text -> Attr
addAttr (a, b, c) x y = (a, b, (x, y) : c)
-- For each theorem, add a number, and also add add class theorem
preprocessTheorems :: Block -> State Int Block
preprocessTheorems (Div attr xs)
| isIncremental = do
curId <- get
put (curId + 1)
return $ Div (addAttr attr' "index" (pack $ show curId)) xs
| isOtherBlock = return $ Div attr' xs
| otherwise = return (Div attr xs)
where
isIncremental = getClass attr `intersect` incrementalBlock /= []
isOtherBlock = getClass attr `intersect` otherBlock /= []
theoremType = head (getClass attr `intersect` theoremClasses)
attr' = addAttr attr "type" theoremType
preprocessTheorems x = return x
theoremFilter :: Pandoc -> Pandoc
theoremFilter doc =
walk makeTheorem $
autorefFilter $
evalState (walkM preprocessTheorems normalizedDoc) 1
where
normalizedDoc = obsidianTheoremFilter doc
-- [idx, type, maybe index]
theoremIndex :: Block -> [(Text, (Text, Maybe Text))]
theoremIndex (Div attr _)
| isNothing t = []
| T.null idx = []
| otherwise = [(idx, (fromJust t, index))]
where
(idx, _, parm) = attr
t = lookup "type" parm
index = lookup "index" parm
theoremIndex _ = []
theoremLink :: [(Text, (Text, Maybe Text))] -> Text -> Maybe Inline
theoremLink refs blockId = do
(theoremType, index) <- lookup blockId refs
let num = fromMaybe "" index
linkTitle
| T.null num = theoremType
| otherwise = theoremType <> " " <> num
return $ Link nullAttr [Str linkTitle] ("#" <> blockId, linkTitle)
autoref :: [(Text, (Text, Maybe Text))] -> Inline -> Inline
autoref x (Cite citations inlines)
| Just link <- theoremLink x citeid = link
| otherwise = Cite citations inlines
where
citeid = citationId $ head citations
autoref x (Link attr inlines (target, title))
| Just blockId <- T.stripPrefix "#^" target,
Just link <- theoremLink x blockId =
link
| otherwise = Link attr inlines (target, title)
autoref _ y = y
autorefFilter :: Pandoc -> Pandoc
autorefFilter x = walk (autoref links) x
where
links = query theoremIndex x
-- processCitations works on AST. If you want to use citations in theorem name,
-- then you need to convert citations there to AST as well and then use processCitations\
-- Thus one need to apply the theorem filter first.
-- autoref still does not work.
mathMacros :: Text
mathMacros = unsafePerformIO (pack <$> readFile "math-macros.md")
{-# NOINLINE mathMacros #-}
prependMacros :: Text -> Text -> Text
prependMacros macros body = macros <> "\n\n" <> body
prependMathMacros :: Text -> Text
prependMathMacros = prependMacros mathMacros
thmNamePandoc :: Text -> Pandoc
thmNamePandoc x =
fromRight (Pandoc nullMeta []) . runPure $
readMarkdown chaoDocRead (prependMathMacros x)
obsidianTheoremFilter :: Pandoc -> Pandoc
obsidianTheoremFilter = attachStandaloneLabels . walk rewriteObsidianBlockQuote
rewriteObsidianBlockQuote :: Block -> Block
rewriteObsidianBlockQuote block@(BlockQuote quoteBlocks) = fromMaybe block do
(theoremType, title, bodyBlocks0) <- parseObsidianTheorem quoteBlocks
let (bodyBlocks, label) = stripTrailingLabel bodyBlocks0
attrs :: [(Text, Text)]
attrs = maybe [] (\x -> [("title", x)]) title
return $ Div (fromMaybe "" label, [theoremType], attrs) bodyBlocks
rewriteObsidianBlockQuote block = block
attachStandaloneLabels :: Pandoc -> Pandoc
attachStandaloneLabels (Pandoc meta blocks) = Pandoc meta (attachLabels blocks)
attachLabels :: [Block] -> [Block]
attachLabels (Div attr xs : next : rest)
| isTheoremAttr attr,
T.null blockId =
case blockLabel next of
Just label -> Div (setAttrId attr label) xs : attachLabels rest
Nothing -> Div attr xs : attachLabels (next : rest)
where
(blockId, _, _) = attr
attachLabels (x : xs) = x : attachLabels xs
attachLabels [] = []
isTheoremAttr :: Attr -> Bool
isTheoremAttr attr = getClass attr `intersect` theoremClasses /= []
setAttrId :: Attr -> Text -> Attr
setAttrId (_, classes, attrs) blockId = (blockId, classes, attrs)
parseObsidianTheorem :: [Block] -> Maybe (Text, Maybe Text, [Block])
parseObsidianTheorem [] = Nothing
parseObsidianTheorem (x : xs) = do
(theoremType, title, bodyStart) <- parseCalloutHeader x
return (theoremType, title, maybeToList bodyStart ++ xs)
parseCalloutHeader :: Block -> Maybe (Text, Maybe Text, Maybe Block)
parseCalloutHeader (Para inlines) = parseCalloutHeaderWith Para inlines
parseCalloutHeader (Plain inlines) = parseCalloutHeaderWith Plain inlines
parseCalloutHeader _ = Nothing
parseCalloutHeaderWith :: ([Inline] -> Block) -> [Inline] -> Maybe (Text, Maybe Text, Maybe Block)
parseCalloutHeaderWith mkBlock inlines = do
(theoremType, rest) <- parseCalloutPrefix inlines
let (titleInlines, bodyInlines) = splitAtFirstBreak rest
titleMarkdown = inlineMarkdown $ trimInlineSpaces titleInlines
bodyBlock = nonEmptyBlock mkBlock $ trimInlineSpaces bodyInlines
title =
if T.null titleMarkdown
then Nothing
else Just titleMarkdown
return (theoremType, title, bodyBlock)
parseCalloutPrefix :: [Inline] -> Maybe (Text, [Inline])
parseCalloutPrefix (Str marker : rest) = do
theoremType <- canonicalTheoremType =<< (T.stripPrefix "[!" marker >>= T.stripSuffix "]")
return (theoremType, dropLeadingInlineSpaces rest)
parseCalloutPrefix _ = Nothing
stripTrailingLabel :: [Block] -> ([Block], Maybe Text)
stripTrailingLabel [] = ([], Nothing)
stripTrailingLabel blocks = case unsnoc blocks of
Nothing -> (blocks, Nothing)
Just (prefix, lastBlock) -> case splitBlockLabel lastBlock of
Nothing -> (blocks, Nothing)
Just (cleaned, label) -> (prefix ++ maybeToList cleaned, Just label)
blockLabel :: Block -> Maybe Text
blockLabel block = case splitBlockLabel block of
Just (Nothing, label) -> Just label
_ -> Nothing
splitBlockLabel :: Block -> Maybe (Maybe Block, Text)
splitBlockLabel (Para inlines) = splitBlockLabelWith Para inlines
splitBlockLabel (Plain inlines) = splitBlockLabelWith Plain inlines
splitBlockLabel (BlockQuote blocks) = splitBlockLabelWithBlocks BlockQuote blocks
splitBlockLabel (Div attr blocks) = splitBlockLabelWithBlocks (Div attr) blocks
splitBlockLabel (BulletList items) = do
(items', label) <- splitBlockLabelItems items
return (nonEmptyListBlock BulletList items', label)
splitBlockLabel (OrderedList attrs items) = do
(items', label) <- splitBlockLabelItems items
return (nonEmptyListBlock (OrderedList attrs) items', label)
splitBlockLabel _ = Nothing
splitBlockLabelWith :: ([Inline] -> Block) -> [Inline] -> Maybe (Maybe Block, Text)
splitBlockLabelWith mkBlock inlines = do
(prefix, label) <- splitTrailingLabelLine inlines
return (nonEmptyBlock mkBlock prefix, label)
splitBlockLabelWithBlocks :: ([Block] -> Block) -> [Block] -> Maybe (Maybe Block, Text)
splitBlockLabelWithBlocks mkBlock blocks = do
let (blocks', label) = stripTrailingLabel blocks
label' <- label
return (nonEmptyBlocks mkBlock blocks', label')
splitBlockLabelItems :: [[Block]] -> Maybe ([[Block]], Text)
splitBlockLabelItems items = do
(prefixItems, lastItem) <- unsnoc items
let (lastItem', label) = stripTrailingLabel lastItem
label' <- label
return (prefixItems ++ maybeToList (nonEmptyListItem lastItem'), label')
splitTrailingLabelLine :: [Inline] -> Maybe ([Inline], Text)
splitTrailingLabelLine inlines = do
(prefixLines, lastLine) <- unsnoc (inlineLines inlines)
label <- labelFromInlines lastLine
return (joinInlineLines prefixLines, label)
labelFromInlines :: [Inline] -> Maybe Text
labelFromInlines [Str s] = labelFromText s
labelFromInlines _ = Nothing
labelFromText :: Text -> Maybe Text
labelFromText s = do
label <- T.stripPrefix "^" (T.strip s)
if T.null label then Nothing else Just label
inlineLines :: [Inline] -> [[Inline]]
inlineLines = foldr step [[]]
where
step SoftBreak acc = [] : acc
step LineBreak acc = [] : acc
step x (line : rest) = (x : line) : rest
step _ [] = [[]]
joinInlineLines :: [[Inline]] -> [Inline]
joinInlineLines [] = []
joinInlineLines (x : xs) = x ++ concatMap (SoftBreak :) xs
inlineMarkdownWrite :: WriterOptions
inlineMarkdownWrite =
def
{ writerExtensions = readerExtensions chaoDocRead
}
inlineMarkdown :: [Inline] -> Text
inlineMarkdown inlines =
T.strip $
fromRight "" $
runPure $
writeMarkdown inlineMarkdownWrite (Pandoc nullMeta [Plain inlines])
splitAtFirstBreak :: [Inline] -> ([Inline], [Inline])
splitAtFirstBreak = go []
where
go acc [] = (reverse acc, [])
go acc (SoftBreak : xs) = (reverse acc, xs)
go acc (LineBreak : xs) = (reverse acc, xs)
go acc (x : xs) = go (x : acc) xs
trimInlineSpaces :: [Inline] -> [Inline]
trimInlineSpaces = dropTrailingInlineSpaces . dropLeadingInlineSpaces
dropLeadingInlineSpaces :: [Inline] -> [Inline]
dropLeadingInlineSpaces = dropWhile isInlineSpace
dropTrailingInlineSpaces :: [Inline] -> [Inline]
dropTrailingInlineSpaces = reverse . dropWhile isInlineSpace . reverse
isInlineSpace :: Inline -> Bool
isInlineSpace Space = True
isInlineSpace _ = False
nonEmptyBlock :: ([Inline] -> Block) -> [Inline] -> Maybe Block
nonEmptyBlock mkBlock inlines
| (not . all isWhitespaceInline) inlines = Just (mkBlock inlines)
| otherwise = Nothing
nonEmptyBlocks :: ([Block] -> Block) -> [Block] -> Maybe Block
nonEmptyBlocks mkBlock blocks
| null blocks = Nothing
| otherwise = Just (mkBlock blocks)
nonEmptyListBlock :: ([[Block]] -> Block) -> [[Block]] -> Maybe Block
nonEmptyListBlock mkBlock items
| null items = Nothing
| otherwise = Just (mkBlock items)
nonEmptyListItem :: [Block] -> Maybe [Block]
nonEmptyListItem []
= Nothing
nonEmptyListItem blocks = Just blocks
isWhitespaceInline :: Inline -> Bool
isWhitespaceInline Space = True
isWhitespaceInline SoftBreak = True
isWhitespaceInline LineBreak = True
isWhitespaceInline _ = False
unsnoc :: [a] -> Maybe ([a], a)
unsnoc [] = Nothing
unsnoc [x] = Just ([], x)
unsnoc (x : xs) = do
(prefix, lastElem) <- unsnoc xs
return (x : prefix, lastElem)
makeTheorem :: Block -> Block
makeTheorem (Div attr xs)
| isNothing t = Div attr xs
| otherwise = Div (addClass attr "theorem-environment") (Plain [header] : xs)
where
(_, _, parm) = attr
t = lookup "type" parm
name = lookup "title" parm
index = lookup "index" parm
header = Span (addClass nullAttr "theorem-header") [typetext, indextext, nametext]
typetext = Span (addClass nullAttr "type") [Str $ fromJust t]
indextext =
if isNothing index
then Str ""
else Span (addClass nullAttr "index") [Str $ fromJust index]
nametext =
if isNothing name
then Str ""
else Span (addClass nullAttr "name") (pandocToInline $ thmNamePandoc $ fromJust name)
makeTheorem x = x
-- bib from https://github.com/chaoxu/chaoxu.github.io/tree/develop
cslFile :: String
cslFile = "bib_style.csl"
bibFile :: String
bibFile = "reference.bib"
chaoDocPandocCompiler :: Compiler (Item Pandoc)
chaoDocPandocCompiler = do
macros <- T.pack <$> loadBody "math-macros.md"
body <- getResourceBody
let bodyWithMacros =
fmap (T.unpack . prependMacros macros . T.pack) body
myReadPandocBiblio chaoDocRead (T.pack cslFile) (T.pack bibFile) myFilter bodyWithMacros
chaoDocCompiler :: Compiler (Item String)
chaoDocCompiler = chaoDocPandocCompiler <&> writePandocWith chaoDocWrite
addMeta :: T.Text -> MetaValue -> Pandoc -> Pandoc
addMeta name value (Pandoc meta a) =
let prevMap = unMeta meta
newMap = M.insert name value prevMap
newMeta = Meta newMap
in Pandoc newMeta a
myReadPandocBiblio ::
ReaderOptions ->
T.Text -> -- csl file name
T.Text ->
(Pandoc -> Pandoc) -> -- apply a filter before citeproc
Item String ->
Compiler (Item Pandoc)
myReadPandocBiblio ropt csl biblio pdfilter item = do
-- Parse CSL file, if given
-- style <- unsafeCompiler $ CSL.readCSLFile Nothing . toFilePath . itemIdentifier $ csl
-- We need to know the citation keys, add then *before* actually parsing the
-- actual page. If we don't do this, pandoc won't even consider them
-- citations!
-- let Biblio refs = itemBody biblio
pandoc <- itemBody <$> readPandocWith ropt item
let pandoc' =
fromRight pandoc $
unsafePerformIO $
runIO $
processCitations $
addMeta "bibliography" (MetaList [MetaString biblio]) $
addMeta "csl" (MetaString csl) $
addMeta "link-citations" (MetaBool True) $
addMeta "reference-section-title" (MetaInlines [Str "References"]) $
pdfilter pandoc -- here's the change
-- let a x = itemSetBody (pandoc' x)
return $ fmap (const pandoc') item
myFilter :: Pandoc -> Pandoc
myFilter = usingSideNotesHTML chaoDocWrite . theoremFilter . panguFilter . displayMathFilter
-- pangu filter
lastChar :: Inline -> Maybe Char
lastChar e = case e of
Str s -> if null (T.unpack s) then Nothing else Just (last (T.unpack s))
Emph is -> lastCharList is
Strong is -> lastCharList is
Strikeout is -> lastCharList is
Link _ is _ -> lastCharList is
Span _ is -> lastCharList is
Quoted _ is -> lastCharList is
_ -> Nothing
where
lastCharList [] = Nothing
lastCharList is = lastChar (last is)
firstChar :: Inline -> Maybe Char
firstChar e = case e of
Str s -> if null (T.unpack s) then Nothing else Just (head (T.unpack s))
Emph is -> firstCharList is
Strong is -> firstCharList is
Strikeout is -> firstCharList is
Link _ is _ -> firstCharList is
Span _ is -> firstCharList is
Quoted _ is -> firstCharList is
_ -> Nothing
where
firstCharList [] = Nothing
firstCharList is = firstChar (head is)
panguInline :: Inline -> Inline
panguInline e = case e of
Str s -> Str (pangu s)
Emph is -> Emph (panguInlines is)
Strong is -> Strong (panguInlines is)
Strikeout is -> Strikeout (panguInlines is)
Link at is tg -> Link at (panguInlines is) tg
Span at is -> Span at (panguInlines is)
Quoted qt is -> Quoted qt (panguInlines is)
_ -> e
panguInlines :: [Inline] -> [Inline]
panguInlines = foldr (addSpace . panguInline) []
where
addSpace x [] = [x]
addSpace x (y : ys)
| shouldSpace x y = x : Space : y : ys
| otherwise = x : y : ys
shouldSpace x y = case (lastChar x, firstChar y) of
(Just lc, Just fc) -> isCJK lc /= isCJK fc
_ -> False
panguFilter :: Pandoc -> Pandoc
panguFilter = walk transformBlocks
where
transformBlocks :: Block -> Block
transformBlocks (Para inlines) = Para (panguInlines inlines)
transformBlocks x = x
type MathTag :: Type
data MathTag = MathTag
{ mathTagStarred :: Bool,
mathTagBody :: Text
}
extractMathTag :: Text -> Maybe (Text, MathTag)
extractMathTag source = do
(mathBody, starred, tagBody) <- findTrailingMathTag (T.unpack source)
return
( T.stripEnd (T.pack mathBody),
MathTag starred (T.strip (T.pack tagBody))
)
findTrailingMathTag :: String -> Maybe (String, Bool, String)
findTrailingMathTag = go [] Nothing
where
go :: String -> Maybe (String, Bool, String) -> String -> Maybe (String, Bool, String)
go _ best [] = best
go prefix best rest@(c : cs) =
go (c : prefix) best' cs
where
best' = case parseMathTagPrefix rest of
Just (starred, tagBody, suffix)
| all isSpace suffix -> Just (reverse prefix, starred, tagBody)
_ -> best
parseMathTagPrefix :: String -> Maybe (Bool, String, String)
parseMathTagPrefix s
| Just rest <- stripPrefix "\\tag*{" s = do
(tagBody, suffix) <- parseBalancedBraces 1 [] rest
return (True, tagBody, suffix)
| Just rest <- stripPrefix "\\tag{" s = do
(tagBody, suffix) <- parseBalancedBraces 1 [] rest
return (False, tagBody, suffix)
| otherwise = Nothing
parseBalancedBraces :: Int -> String -> String -> Maybe (String, String)
parseBalancedBraces _ _ [] = Nothing
parseBalancedBraces depth acc ('\\' : x : xs)
| x `elem` ['{', '}', '\\'] = parseBalancedBraces depth (x : '\\' : acc) xs
parseBalancedBraces depth acc ('{' : xs) =
parseBalancedBraces (depth + 1) ('{' : acc) xs
parseBalancedBraces depth acc ('}' : xs)
| depth == 1 = Just (reverse acc, xs)
| otherwise = parseBalancedBraces (depth - 1) ('}' : acc) xs
parseBalancedBraces depth acc (x : xs) =
parseBalancedBraces depth (x : acc) xs
isPlainMathTag :: Text -> Bool
isPlainMathTag =
T.all \c ->
isAlphaNum c || c `elem` ("*-.:()[] " :: String)
mathTagInlines :: MathTag -> [Inline]
mathTagInlines tag
| mathTagStarred tag = tagBody
| otherwise = [Str "("] ++ tagBody ++ [Str ")"]
where
tagText = mathTagBody tag
tagBody
| isPlainMathTag tagText = [Str tagText]
| otherwise = [Math InlineMath tagText]
-- display math wrapper for MathML
displayMathFilter :: Pandoc -> Pandoc
displayMathFilter = walk wrapDisplayMath
where
wrapDisplayMath (Math DisplayMath source)
| Just (mathBody, tag) <- extractMathTag source =
Span
("", ["math-container", "math-container-tagged"], [])
[ Span ("", ["math-tag-spacer"], []) [],
Span ("", ["math-equation"], []) [Math DisplayMath mathBody],
Span ("", ["math-tag"], []) (mathTagInlines tag)
]
| otherwise =
Span ("", ["math-container"], []) [Math DisplayMath source]
wrapDisplayMath x = x
+227
View File
@@ -0,0 +1,227 @@
{-# LANGUAGE OverloadedStrings #-}
module Pangu (pangu, isCJK) where
import Data.Function (fix)
import Data.Text (Text)
import qualified Data.Text as T
import Data.Void (Void)
import Replace.Megaparsec (streamEdit)
import Text.Megaparsec
import Text.Megaparsec.Char
-------------------------------------------------------------------------------
type Parser = Parsec Void Text
type Rule = Parser Text
type RuleSet = [Rule]
applyUntilFixed :: Rule -> Text -> Text
applyUntilFixed rule =
fix
( \loop current ->
let next = streamEdit (try rule) id current
in if next == current then next else loop next
)
applyRulesRecursively :: RuleSet -> Text -> Text
applyRulesRecursively rules input = foldl (flip applyUntilFixed) input rules
applyRules :: RuleSet -> Text -> Text
applyRules rules input = foldl (flip applyOnce) input rules
where
applyOnce rule = streamEdit (try rule) id
-------------------------------------------------------------------------------
-- rules for pangu
-- alphaNumChar from megaparsec matches CJK chars...
-- need to implement a new one
alphanumericChar :: Parser Char
alphanumericChar = satisfy $ \c ->
(c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9')
-- | Check if a character falls within the CJK ranges provided
isCJK :: Char -> Bool
isCJK c = any (\(start, end) -> c >= start && c <= end) cjkRanges
where
cjkRanges =
[ ('\x2e80', '\x2eff'),
('\x2f00', '\x2fdf'),
('\x3040', '\x309f'),
('\x30a0', '\x30fa'),
('\x30fc', '\x30ff'),
('\x3100', '\x312f'),
('\x3200', '\x32ff'),
('\x3400', '\x4dbf'),
('\x4e00', '\x9fff'),
('\xf900', '\xfaff')
]
convertToFullwidth :: Char -> Char
convertToFullwidth c =
case c of
':' -> ''
'.' -> '。'
'~' -> ''
'!' -> ''
'?' -> ''
',' -> ''
';' -> ''
'\"' -> '”'
'\'' -> ''
_ -> c
-- A parser that matches a single CJK character
cjkChar :: Parser Char
cjkChar = satisfy isCJK
-- use python.py as reference for these rules
fullwidthCJKsymCJK :: Rule
fullwidthCJKsymCJK = do
lcjk <- cjkChar
_ <- many (char ' ')
sym <- try (some (char ':')) <|> count 1 (char '.')
_ <- many (char ' ')
rcjk <- cjkChar
let transformedsym = map convertToFullwidth sym
return $ T.pack $ [lcjk] ++ transformedsym ++ [rcjk]
fullwidthCJKsym :: Rule
fullwidthCJKsym = do
cjk <- cjkChar
_ <- many (char ' ')
sym <- some $ oneOf ("~!?,;" :: [Char])
_ <- many (char ' ')
let transformedsym = T.pack $ map convertToFullwidth sym
return $ T.pack [cjk] <> transformedsym
dotsCJK :: Rule
dotsCJK = do
dots <- chunk "..." <|> chunk ""
cjk <- cjkChar
return $ dots <> T.pack (" " ++ [cjk])
fixCJKcolAN :: Rule
fixCJKcolAN = do
cjk <- cjkChar
_ <- char ':'
an <- alphanumericChar
return $ T.pack $ [cjk] ++ "" ++ [an]
-- quotes
-- seems confusing ...
quotesym :: [Char]
quotesym = "'`\x05f4\""
cjkquote :: Rule
cjkquote = do
cjk <- cjkChar
quote <- oneOf quotesym
return $ T.pack $ [cjk] ++ " " ++ [quote]
quoteCJK :: Rule
quoteCJK = do
quote <- oneOf quotesym
cjk <- cjkChar
return $ T.pack $ [quote] ++ " " ++ [cjk]
fixQuote :: Rule
fixQuote = do
openQuotes <- T.pack <$> some (oneOf quotesym)
_ <- many spaceChar
content <- T.pack <$> someTill anySingle (lookAhead $ some (oneOf quotesym))
closeQuotes <- T.pack <$> some (oneOf quotesym)
return $ openQuotes <> T.strip content <> closeQuotes
cjkpossessivequote :: Rule
cjkpossessivequote = do
cjk <- cjkChar
_ <- char '\''
_ <- lookAhead $ anySingleBut 's'
return $ T.pack $ cjk : " '"
-- This singlequoteCJK rule will turn '你好' into ' 你好'
-- which seems not desirable...
-- however, the behavior is aligned with python version
singlequoteCJK :: Rule
singlequoteCJK = do
_ <- char '\''
cjk <- cjkChar
return $ T.pack $ "' " ++ [cjk]
fixPossessivequote :: Rule
fixPossessivequote = do
pre <- cjkChar <|> alphanumericChar
_ <- some spaceChar
_ <- chunk "'s"
return $ T.pack $ pre : "'s"
-- hash
hashANSCJKhash :: Rule
hashANSCJKhash = do
cjk1 <- cjkChar
_ <- char '#'
mid <- some cjkChar
_ <- char '#'
cjk2 <- cjkChar
return $ T.pack $ [cjk1] ++ " #" ++ mid ++ "# " ++ [cjk2]
cjkhash :: Rule
cjkhash = do
cjk <- cjkChar
_ <- char '#'
_ <- lookAhead $ anySingleBut ' '
return $ T.pack $ cjk : " #"
hashcjk :: Rule
hashcjk = do
_ <- char '#'
_ <- lookAhead $ anySingleBut ' '
cjk <- cjkChar
return $ T.pack $ "# " ++ [cjk]
-- operators
cjkOPTan :: Rule
cjkOPTan = do
cjk <- cjkChar
opt <- oneOf ("+-=*/&|<>%" :: [Char])
an <- alphanumericChar
return $ T.pack [cjk, ' ', opt, ' ', an]
anOPTcjk :: Rule
anOPTcjk = do
an <- alphanumericChar
opt <- oneOf ("+-=*/&|<>%" :: [Char])
cjk <- cjkChar
return $ T.pack [an, ' ', opt, ' ', cjk]
-- slash/bracket rules are not implemented
-- CJK and alphanumeric without space
cjkans :: Rule
cjkans = do
cjk <- cjkChar
_ <- lookAhead (alphanumericChar <|> oneOf ("@$%^&*-+\\=|/" :: [Char]))
return $ T.pack [cjk, ' ']
anscjk :: Rule
anscjk = do
an <- alphanumericChar <|> oneOf ("~!$%^&*-+\\=|;:,./?" :: [Char])
_ <- lookAhead cjkChar
return $ T.pack [an, ' ']
-- rule set, the order matters
recursiveRules :: RuleSet
recursiveRules = [fullwidthCJKsymCJK, fullwidthCJKsym]
onepassRules :: RuleSet
onepassRules = [anscjk, cjkans]
pangu :: Text -> Text
pangu input = applyRules onepassRules $ applyRulesRecursively recursiveRules input
+161
View File
@@ -0,0 +1,161 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : Text.Pandoc.SideNoteHTML
Description : Convert pandoc footnotes to sidenotes
Copyright : (c) Tony Zorman 2023
License : MIT
Maintainer : Tony Zorman <soliditsallgood@mailbox.org>
Stability : experimental
Portability : non-portable
-}
module SideNoteHTML (usingSideNotesHTML) where
import Control.Monad (foldM)
import Control.Monad.State (State, get, modify', runState)
import Data.Text (Text)
import Text.Pandoc (runPure, writeHtml5String)
import Text.Pandoc.Definition (Block (..), Inline (..), Pandoc (..))
import Text.Pandoc.Options (WriterOptions)
import Text.Pandoc.Shared (tshow)
import Text.Pandoc.Walk (walkM)
import qualified Data.Text as T
-- type NoteType :: Type
data NoteType = Sidenote | Marginnote
deriving stock (Show, Eq)
-- type SidenoteState :: Type
data SidenoteState = SNS
{ _writer :: !WriterOptions
, counter :: !Int
}
-- type Sidenote :: Type -> Type
type Sidenote = State SidenoteState
-- | Like 'Text.Pandoc.SideNote.usingSideNotes', but immediately
-- pre-render the sidenotes. This has the advantage that sidenotes may
-- be wrapped in a @<div>@ (instead of a 'Span'), which allows arbitrary
-- blocks to be nested in them. The disadvantage is that one now has to
-- specify the 'WriterOptions' for the current document, meaning this is
-- meant to be used as a module and is unlikely to be useful as a
-- standalone application.
--
-- ==== __Example__
--
-- Using this function with <https://jaspervdj.be/hakyll/ hakyll> could
-- look something like the following, defining an equivalent to the
-- default @pandocCompiler@.
--
-- > myPandocCompiler :: Compiler (Item String)
-- > myPandocCompiler =
-- > pandocCompilerWithTransformM
-- > defaultHakyllReaderOptions
-- > defaultHakyllWriterOptions
-- > (usingSideNotesHTML defaultHakyllWriterOptions)
--
usingSideNotesHTML :: WriterOptions -> Pandoc -> Pandoc
usingSideNotesHTML writer (Pandoc meta blocks) =
-- Drop a superfluous paragraph at the start of the document.
Pandoc meta . someStart . walkBlocks (SNS writer 0) $ blocks
where
someStart :: [Block] -> [Block]
someStart = \case
(Para [Str ""] : bs) -> bs
bs -> bs
walkBlocks :: SidenoteState -> [Block] -> [Block]
walkBlocks sns = \case
[] -> []
(b : bs) -> b' <> walkBlocks s' bs
where (b', s') = walkM mkSidenote [b] `runState` sns
-- Sidenotes can probably appear in more places; this should be
-- filled-in at some point.
mkSidenote :: [Block] -> Sidenote [Block]
mkSidenote = foldM (\acc b -> (acc <>) <$> single b) []
where
-- Try to find and render a sidenote in a single block.
single :: Block -> Sidenote [Block]
single = \case
-- Simulate a paragraph by inserting a dummy block; this is needed
-- in case two consecutive paragraphs have sidenotes, or a paragraph
-- doesn't have one at all.
Para inlines -> (Para [Str ""] :) <$> renderSidenote [] inlines
Plain inlines -> renderSidenote [] inlines
OrderedList attrs bs -> (:[]) . OrderedList attrs <$> traverse mkSidenote bs
BulletList bs -> (:[]) . BulletList <$> traverse mkSidenote bs
block -> pure [block]
renderSidenote :: [Inline] -> [Inline] -> Sidenote [Block]
renderSidenote !inlines = \case
[] -> pure [plain inlines]
Note bs : xs -> do block <- go bs
mappend [ -- Start gluing before, see [Note Comment].
plain (RawInline "html" commentStart : inlines)
, block
]
<$> renderSidenote
[RawInline "html" commentEnd] -- End gluing after
xs
b : xs -> renderSidenote (b : inlines) xs
where
go :: [Block] -> Sidenote Block
go blocks = do
SNS w i <- get <* modify' (\sns -> sns{ counter = 1 + counter sns })
let (typ, noteText) = getNoteType (render w blocks)
pure . RawBlock "html" $
mconcat [ commentEnd -- End gluing before
, label typ i <> input i <> note typ noteText
, commentStart -- Start gluing after
]
-- The '{-}' symbol differentiates between margin note and side note.
getNoteType :: Text -> (NoteType, Text)
getNoteType t
| "{-} " `T.isPrefixOf` t = (Marginnote, T.drop 4 t)
| otherwise = (Sidenote , t)
render :: WriterOptions -> [Block] -> Text
render w bs = case runPure (writeHtml5String w (Pandoc mempty bs)) of
Left err -> error $ "Text.Pandoc.SideNoteHTML.writePandocWith: " ++ show err
Right txt -> T.drop 1 (T.dropWhile (/= '\n') txt)
commentEnd :: T.Text
commentEnd = "-->"
commentStart :: T.Text
commentStart = "<!--"
plain :: [Inline] -> Block
plain = Plain . reverse
label :: NoteType -> Int -> Text
label nt i = "<label for=\"sn-" <> tshow i <> "\" class=\"margin-toggle" <> sidenoteNumber <> "\">" <> altSymbol <> "</label>"
where
sidenoteNumber :: Text = case nt of
Sidenote -> " sidenote-number"
Marginnote -> ""
altSymbol :: Text = case nt of
Sidenote -> ""
Marginnote -> "&#8853;"
input :: Int -> Text
input i = "<input type=\"checkbox\" id=\"sn-" <> tshow i <> "\" class=\"margin-toggle\"/>"
note :: NoteType -> Text -> Text
note nt body = "<div class=\"" <> T.toLower (tshow nt) <> "\">" <> body <> "</div>"
{- [Note Comment]
This is obviously horrible, but we have to do this in order for the
blocks (which are now not inline elements anymore!) immediately before
and after the sidenote to be "glued" to the sidenote itself. In this
way, the number indicating the sidenote does not have an extra space
associated to it on either side, which otherwise would be the case.
-}
+255
View File
@@ -0,0 +1,255 @@
{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
import ChaoDoc
import Data.List (sortOn)
import qualified Data.Text as T
import Hakyll
import System.FilePath
import Text.Pandoc
--------------------------------------------------------------------------------
-- https://www.rohanjain.in/hakyll-clean-urls/
cleanRoute :: Routes
cleanRoute = customRoute createIndexRoute
where
createIndexRoute ident = takeDirectory p </> takeBaseName p </> "index.html"
where
p = toFilePath ident
cleanIndexHtmls :: Item String -> Compiler (Item String)
cleanIndexHtmls = return . fmap (replaceAll pattern replacement)
where
pattern :: String = "/index.html"
replacement :: String -> String = const "/"
--------------------------------------------------------------------------------
config :: Configuration
config =
defaultConfiguration
{ ignoreFile = \path ->
ignoreFile defaultConfiguration path
|| ".git" `elem` splitDirectories (normalise path)
}
main :: IO ()
main = hakyllWith config $ do
match "images/**" $ do
route idRoute
compile copyFileCompiler
match "math-macros.md" $ compile getResourceBody
match "fonts/*.woff2" $ do
route idRoute
compile copyFileCompiler
match "favicon.ico" $ do
route idRoute
compile copyFileCompiler
-- match "404.html" $ do
-- route cleanRoute
-- compile copyFileCompiler
match "css/*" $ do
route idRoute
compile compressCssCompiler
-- match "about.md" $ do
-- route cleanRoute
-- compile $
-- chaoDocCompiler
-- >>= loadAndApplyTemplate "templates/about.html" defaultContext
-- >>= relativizeUrls
-- -- build up tags
-- tags <- buildTags "posts/*" (fromCapture "tags/*.html")
-- tagsRules tags $ \tag pattern -> do
-- let title = "Posts tagged \"" ++ tag ++ "\""
-- route cleanRoute
-- compile $ do
-- posts <- recentFirst =<< loadAll pattern
-- let ctx =
-- constField "title" title
-- `mappend` listField "posts" (postCtxWithTags tags) (return posts)
-- `mappend` defaultContext
-- makeItem ""
-- >>= loadAndApplyTemplate "templates/tag.html" ctx
-- >>= loadAndApplyTemplate "templates/default.html" ctx
-- >>= relativizeUrls
-- create ["tags.html"] $ do
-- route cleanRoute
-- compile $ do
-- makeItem ""
-- >>= loadAndApplyTemplate "templates/tags.html" (defaultCtxWithTags tags)
-- >>= loadAndApplyTemplate "templates/default.html" (defaultCtxWithTags tags)
-- match "posts/*" $ do
-- route cleanRoute
-- compile $ do
-- tocCtx <- getTocCtx (postCtxWithTags tags)
-- chaoDocCompiler
-- >>= loadAndApplyTemplate "templates/post.html" tocCtx
-- >>= loadAndApplyTemplate "templates/default.html" tocCtx
-- >>= relativizeUrls
-- -- >>= katexFilter
match "notes/*" $ do
route cleanRoute
compile $ do
tocCtx <- getTocCtx defaultContext
chaoDocCompiler
>>= loadAndApplyTemplate "templates/note.html" tocCtx
>>= relativizeUrls
create ["index.html"] $ do
route idRoute
compile $ do
notes <- sortOn (toFilePath . itemIdentifier) <$> loadAll "notes/*"
let notesCtx =
listField "posts" defaultContext (return notes)
`mappend` constField "title" "Notes"
`mappend` defaultContext
makeItem ""
>>= loadAndApplyTemplate "templates/notes.html" notesCtx
>>= loadAndApplyTemplate "templates/index.html" notesCtx
>>= relativizeUrls
>>= cleanIndexHtmls
-- create ["archive.html"] $ do
-- route cleanRoute
-- compile $ do
-- posts <- recentFirst =<< loadAll "posts/*"
-- let archiveCtx =
-- listField "posts" postCtx (return posts)
-- `mappend` constField "title" "Archives"
-- `mappend` defaultContext
-- makeItem ""
-- >>= loadAndApplyTemplate "templates/archive.html" archiveCtx
-- >>= loadAndApplyTemplate "templates/index.html" archiveCtx
-- >>= relativizeUrls
-- >>= cleanIndexHtmls
-- create ["draft.html"] $ do
-- route cleanRoute
-- compile $ do
-- posts <- recentFirst =<< loadAll "posts/*"
-- let draftCtx =
-- listField "posts" postCtx (return posts)
-- `mappend` constField "title" "Drafts"
-- `mappend` defaultContext
-- makeItem ""
-- >>= loadAndApplyTemplate "templates/draft.html" draftCtx
-- >>= loadAndApplyTemplate "templates/index.html" draftCtx
-- >>= relativizeUrls
-- >>= cleanIndexHtmls
-- match "index.html" $ do
-- route idRoute
-- compile $ do
-- posts <- fmap (take 25) . recentFirst =<< loadAll "posts/*"
-- let indexCtx =
-- listField "posts" postCtx (return posts)
-- `mappend` defaultContext
-- getResourceBody
-- >>= applyAsTemplate indexCtx
-- >>= loadAndApplyTemplate "templates/index.html" indexCtx
-- >>= relativizeUrls
-- >>= cleanIndexHtmls
match "templates/*" $ compile templateBodyCompiler
-- https://robertwpearce.com/hakyll-pt-2-generating-a-sitemap-xml-file.html
-- create ["sitemap.xml"] $ do
-- route idRoute
-- compile $ do
-- posts <- recentFirst =<< loadAll "posts/*"
-- singlePages <- loadAll (fromList ["about.md"])
-- let pages = posts <> singlePages
-- sitemapCtx =
-- constField "root" root
-- <> listField "pages" postCtx (return pages) -- here
-- makeItem ""
-- >>= loadAndApplyTemplate "templates/sitemap.xml" sitemapCtx
--------------------------------------------------------------------------------
-- isZhField :: Context String
-- isZhField = boolFieldM "isZh" isZh
-- where
-- isZh :: Item String -> Compiler Bool
-- isZh item = do
-- maybeLang <- getMetadataField (itemIdentifier item) "lang"
-- return (maybeLang == Just "zh")
-- postCtx :: Context String
-- postCtx =
-- dateField "date" "%B %e, %Y"
-- <> dateField "date" "%Y-%m-%d"
-- <> isZhField
-- <> defaultContext
-- postCtxWithTags :: Tags -> Context String
-- postCtxWithTags tags = tagsField "tags" tags `mappend` postCtx
-- defaultCtxWithTags :: Tags -> Context String
-- defaultCtxWithTags tags = listField "tags" tagsCtx getAllTags <> defaultContext
-- where
-- getAllTags :: Compiler [Item (String, [Identifier])]
-- getAllTags = pure . map mkItem $ tagsMap tags
-- where
-- mkItem :: (String, [Identifier]) -> Item (String, [Identifier])
-- mkItem x@(t, _) = Item (tagsMakeId tags t) x
-- tagsCtx =
-- listFieldWith "posts" (postCtxWithTags tags) getPosts
-- <> metadataField
-- <> urlField "url"
-- <> pathField "path"
-- <> titleField "title"
-- <> missingField
-- where
-- getPosts ::
-- Item (String, [Identifier]) ->
-- Compiler [Item String]
-- getPosts (itemBody -> (_, is)) = mapM load is
-- toc from https://github.com/slotThe/slotThe.github.io
getTocCtx :: Context a -> Compiler (Context a)
getTocCtx ctx = do
noToc <- (Just "true" ==) <$> (getUnderlying >>= (`getMetadataField` "no-toc"))
writerOpts <- mkTocWriter defaultHakyllWriterOptions
toc <- writePandocWith writerOpts <$> chaoDocPandocCompiler
pure $
mconcat
[ ctx,
constField "toc" $ killLinkIds (itemBody toc),
if noToc then boolField "no-toc" (pure noToc) else mempty
]
where
mkTocWriter :: WriterOptions -> Compiler WriterOptions
mkTocWriter writerOpts = do
tmpl <- either (const Nothing) Just <$> unsafeCompiler (compileTemplate "" "$toc$")
pure $
writerOpts
{ writerTableOfContents = True,
writerTOCDepth = 2,
writerTemplate = tmpl,
writerHTMLMathMethod = MathML
}
asTxt :: (T.Text -> T.Text) -> String -> String
asTxt f = T.unpack . f . T.pack
killLinkIds :: String -> String
killLinkIds = asTxt (mconcat . go . T.splitOn "id=\"toc-")
where
go :: [T.Text] -> [T.Text]
go = \case
[] -> []
x : xs -> x : map (T.drop 1 . T.dropWhile (/= '\"')) xs