Files
Hakysidian/src/ChaoDoc.hs
T
sxlxc 6daea71f00 add a workaround for mathrlap{/}{\backslash}
problems are not labeled in obsidian. so remove problem number
2026-03-21 12:33:22 +08:00

608 lines
20 KiB
Haskell

{-# LANGUAGE BlockArguments #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE StandaloneKindSignatures #-}
module ChaoDoc (chaoDocRead, chaoDocWrite, chaoDocPandocCompiler, chaoDocCompiler) where
import Control.Monad.State
import Data.Char (isAlphaNum, isSpace)
import Data.Kind (Type)
import Data.Either
import Data.Functor
import Data.List (intersect, stripPrefix)
import qualified Data.Map as M
import Data.Maybe
import Data.Text (Text, pack)
import qualified Data.Text as T
import Hakyll
import Pangu (isCJK, pangu)
import SideNoteHTML (usingSideNotesHTML)
import System.IO.Unsafe
import Text.Pandoc
-- import Text.Pandoc.Builder
import Text.Pandoc.Citeproc
import Text.Pandoc.Walk (query, walk, walkM)
-- setMeta key val (Pandoc (Meta ms) bs) = Pandoc (Meta $ M.insert key val ms) bs
-- On mac, please do `export LANG=C` before using this thing
chaoDocRead :: ReaderOptions
chaoDocRead =
def
{ readerExtensions =
enableExtension Ext_lists_without_preceding_blankline $
enableExtension Ext_wikilinks_title_after_pipe $
enableExtension Ext_tex_math_double_backslash $
enableExtension Ext_tex_math_single_backslash $
enableExtension Ext_latex_macros $
enableExtension Ext_raw_tex $
disableExtension Ext_blank_before_header pandocExtensions
}
chaoDocWrite :: WriterOptions
chaoDocWrite =
def
{ writerHTMLMathMethod = MathML,
-- writerHtml5 = True,
-- writerHighlightStyle = Just syntaxHighlightingStyle,
writerNumberSections = True,
writerTableOfContents = True,
writerTOCDepth = 2
}
-- getInline :: Inline -> [Inline]
-- getInline x = [x]
pandocToInline :: Pandoc -> [Inline]
pandocToInline (Pandoc _ blocks) = go (reverse blocks)
where
go (Plain inlines : _) = inlines
go (Para inlines : _) = inlines
go (_ : xs) = go xs
go [] = []
incrementalBlock :: [Text]
incrementalBlock =
[ "Theorem",
"Conjecture",
"Definition",
"Example",
"Lemma",
-- "Problem",
"Proposition",
"Corollary",
"Observation",
"Claim",
"定理",
"猜想",
"定义",
"",
"引理",
-- "问题",
"命题",
"推论",
"观察"
]
otherBlock :: [Text]
otherBlock = ["Problem", "Proof", "Remark", "问题", "证明", "备注"]
theoremClasses :: [Text]
theoremClasses = incrementalBlock ++ otherBlock
canonicalTheoremType :: Text -> Maybe Text
canonicalTheoremType raw =
listToMaybe
[ cls
| cls <- theoremClasses,
T.toCaseFold cls == T.toCaseFold (T.strip raw)
]
-- create a filter for theorems
getClass :: Attr -> [Text]
getClass (_, c, _) = c
addClass :: Attr -> Text -> Attr
addClass (a, b, c) d = (a, d : b, c)
addAttr :: Attr -> Text -> Text -> Attr
addAttr (a, b, c) x y = (a, b, (x, y) : c)
-- For each theorem, add a number, and also add add class theorem
preprocessTheorems :: Block -> State Int Block
preprocessTheorems (Div attr xs)
| isIncremental = do
curId <- get
put (curId + 1)
return $ Div (addAttr attr' "index" (pack $ show curId)) xs
| isOtherBlock = return $ Div attr' xs
| otherwise = return (Div attr xs)
where
isIncremental = getClass attr `intersect` incrementalBlock /= []
isOtherBlock = getClass attr `intersect` otherBlock /= []
theoremType = head (getClass attr `intersect` theoremClasses)
attr' = addAttr attr "type" theoremType
preprocessTheorems x = return x
theoremFilter :: Pandoc -> Pandoc
theoremFilter doc =
walk makeTheorem $
autorefFilter $
evalState (walkM preprocessTheorems normalizedDoc) 1
where
normalizedDoc = obsidianTheoremFilter doc
-- [idx, type, maybe index]
theoremIndex :: Block -> [(Text, (Text, Maybe Text))]
theoremIndex (Div attr _)
| isNothing t = []
| T.null idx = []
| otherwise = [(idx, (fromJust t, index))]
where
(idx, _, parm) = attr
t = lookup "type" parm
index = lookup "index" parm
theoremIndex _ = []
theoremLink :: [(Text, (Text, Maybe Text))] -> Text -> Maybe Inline
theoremLink refs blockId = do
(theoremType, index) <- lookup blockId refs
let num = fromMaybe "" index
linkTitle
| T.null num = theoremType
| otherwise = theoremType <> " " <> num
return $ Link nullAttr [Str linkTitle] ("#" <> blockId, linkTitle)
autoref :: [(Text, (Text, Maybe Text))] -> Inline -> Inline
autoref x (Cite citations inlines)
| Just link <- theoremLink x citeid = link
| otherwise = Cite citations inlines
where
citeid = citationId $ head citations
autoref x (Link attr inlines (target, title))
| Just blockId <- T.stripPrefix "#^" target,
Just link <- theoremLink x blockId =
link
| otherwise = Link attr inlines (target, title)
autoref _ y = y
autorefFilter :: Pandoc -> Pandoc
autorefFilter x = walk (autoref links) x
where
links = query theoremIndex x
-- processCitations works on AST. If you want to use citations in theorem name,
-- then you need to convert citations there to AST as well and then use processCitations\
-- Thus one need to apply the theorem filter first.
-- autoref still does not work.
mathMacros :: Text
mathMacros = unsafePerformIO (pack <$> readFile "math-macros.md")
{-# NOINLINE mathMacros #-}
prependMacros :: Text -> Text -> Text
prependMacros macros body = macros <> "\n\n" <> body
prependMathMacros :: Text -> Text
prependMathMacros = prependMacros mathMacros
thmNamePandoc :: Text -> Pandoc
thmNamePandoc x =
fromRight (Pandoc nullMeta []) . runPure $
readMarkdown chaoDocRead (prependMathMacros x)
obsidianTheoremFilter :: Pandoc -> Pandoc
obsidianTheoremFilter = attachStandaloneLabels . walk rewriteObsidianBlockQuote
rewriteObsidianBlockQuote :: Block -> Block
rewriteObsidianBlockQuote block@(BlockQuote quoteBlocks) = fromMaybe block do
(theoremType, title, bodyBlocks0) <- parseObsidianTheorem quoteBlocks
let (bodyBlocks, label) = stripTrailingLabel bodyBlocks0
attrs :: [(Text, Text)]
attrs = maybe [] (\x -> [("title", x)]) title
return $ Div (fromMaybe "" label, [theoremType], attrs) bodyBlocks
rewriteObsidianBlockQuote block = block
attachStandaloneLabels :: Pandoc -> Pandoc
attachStandaloneLabels (Pandoc meta blocks) = Pandoc meta (attachLabels blocks)
attachLabels :: [Block] -> [Block]
attachLabels (Div attr xs : next : rest)
| isTheoremAttr attr,
T.null blockId =
case blockLabel next of
Just label -> Div (setAttrId attr label) xs : attachLabels rest
Nothing -> Div attr xs : attachLabels (next : rest)
where
(blockId, _, _) = attr
attachLabels (x : xs) = x : attachLabels xs
attachLabels [] = []
isTheoremAttr :: Attr -> Bool
isTheoremAttr attr = getClass attr `intersect` theoremClasses /= []
setAttrId :: Attr -> Text -> Attr
setAttrId (_, classes, attrs) blockId = (blockId, classes, attrs)
parseObsidianTheorem :: [Block] -> Maybe (Text, Maybe Text, [Block])
parseObsidianTheorem [] = Nothing
parseObsidianTheorem (x : xs) = do
(theoremType, title, bodyStart) <- parseCalloutHeader x
return (theoremType, title, maybeToList bodyStart ++ xs)
parseCalloutHeader :: Block -> Maybe (Text, Maybe Text, Maybe Block)
parseCalloutHeader (Para inlines) = parseCalloutHeaderWith Para inlines
parseCalloutHeader (Plain inlines) = parseCalloutHeaderWith Plain inlines
parseCalloutHeader _ = Nothing
parseCalloutHeaderWith :: ([Inline] -> Block) -> [Inline] -> Maybe (Text, Maybe Text, Maybe Block)
parseCalloutHeaderWith mkBlock inlines = do
(theoremType, rest) <- parseCalloutPrefix inlines
let (titleInlines, bodyInlines) = splitAtFirstBreak rest
titleMarkdown = inlineMarkdown $ trimInlineSpaces titleInlines
bodyBlock = nonEmptyBlock mkBlock $ trimInlineSpaces bodyInlines
title =
if T.null titleMarkdown
then Nothing
else Just titleMarkdown
return (theoremType, title, bodyBlock)
parseCalloutPrefix :: [Inline] -> Maybe (Text, [Inline])
parseCalloutPrefix (Str marker : rest) = do
theoremType <- canonicalTheoremType =<< (T.stripPrefix "[!" marker >>= T.stripSuffix "]")
return (theoremType, dropLeadingInlineSpaces rest)
parseCalloutPrefix _ = Nothing
stripTrailingLabel :: [Block] -> ([Block], Maybe Text)
stripTrailingLabel [] = ([], Nothing)
stripTrailingLabel blocks = case unsnoc blocks of
Nothing -> (blocks, Nothing)
Just (prefix, lastBlock) -> case splitBlockLabel lastBlock of
Nothing -> (blocks, Nothing)
Just (cleaned, label) -> (prefix ++ maybeToList cleaned, Just label)
blockLabel :: Block -> Maybe Text
blockLabel block = case splitBlockLabel block of
Just (Nothing, label) -> Just label
_ -> Nothing
splitBlockLabel :: Block -> Maybe (Maybe Block, Text)
splitBlockLabel (Para inlines) = splitBlockLabelWith Para inlines
splitBlockLabel (Plain inlines) = splitBlockLabelWith Plain inlines
splitBlockLabel (BlockQuote blocks) = splitBlockLabelWithBlocks BlockQuote blocks
splitBlockLabel (Div attr blocks) = splitBlockLabelWithBlocks (Div attr) blocks
splitBlockLabel (BulletList items) = do
(items', label) <- splitBlockLabelItems items
return (nonEmptyListBlock BulletList items', label)
splitBlockLabel (OrderedList attrs items) = do
(items', label) <- splitBlockLabelItems items
return (nonEmptyListBlock (OrderedList attrs) items', label)
splitBlockLabel _ = Nothing
splitBlockLabelWith :: ([Inline] -> Block) -> [Inline] -> Maybe (Maybe Block, Text)
splitBlockLabelWith mkBlock inlines = do
(prefix, label) <- splitTrailingLabelLine inlines
return (nonEmptyBlock mkBlock prefix, label)
splitBlockLabelWithBlocks :: ([Block] -> Block) -> [Block] -> Maybe (Maybe Block, Text)
splitBlockLabelWithBlocks mkBlock blocks = do
let (blocks', label) = stripTrailingLabel blocks
label' <- label
return (nonEmptyBlocks mkBlock blocks', label')
splitBlockLabelItems :: [[Block]] -> Maybe ([[Block]], Text)
splitBlockLabelItems items = do
(prefixItems, lastItem) <- unsnoc items
let (lastItem', label) = stripTrailingLabel lastItem
label' <- label
return (prefixItems ++ maybeToList (nonEmptyListItem lastItem'), label')
splitTrailingLabelLine :: [Inline] -> Maybe ([Inline], Text)
splitTrailingLabelLine inlines = do
(prefixLines, lastLine) <- unsnoc (inlineLines inlines)
label <- labelFromInlines lastLine
return (joinInlineLines prefixLines, label)
labelFromInlines :: [Inline] -> Maybe Text
labelFromInlines [Str s] = labelFromText s
labelFromInlines _ = Nothing
labelFromText :: Text -> Maybe Text
labelFromText s = do
label <- T.stripPrefix "^" (T.strip s)
if T.null label then Nothing else Just label
inlineLines :: [Inline] -> [[Inline]]
inlineLines = foldr step [[]]
where
step SoftBreak acc = [] : acc
step LineBreak acc = [] : acc
step x (line : rest) = (x : line) : rest
step _ [] = [[]]
joinInlineLines :: [[Inline]] -> [Inline]
joinInlineLines [] = []
joinInlineLines (x : xs) = x ++ concatMap (SoftBreak :) xs
inlineMarkdownWrite :: WriterOptions
inlineMarkdownWrite =
def
{ writerExtensions = readerExtensions chaoDocRead
}
inlineMarkdown :: [Inline] -> Text
inlineMarkdown inlines =
T.strip $
fromRight "" $
runPure $
writeMarkdown inlineMarkdownWrite (Pandoc nullMeta [Plain inlines])
splitAtFirstBreak :: [Inline] -> ([Inline], [Inline])
splitAtFirstBreak = go []
where
go acc [] = (reverse acc, [])
go acc (SoftBreak : xs) = (reverse acc, xs)
go acc (LineBreak : xs) = (reverse acc, xs)
go acc (x : xs) = go (x : acc) xs
trimInlineSpaces :: [Inline] -> [Inline]
trimInlineSpaces = dropTrailingInlineSpaces . dropLeadingInlineSpaces
dropLeadingInlineSpaces :: [Inline] -> [Inline]
dropLeadingInlineSpaces = dropWhile isInlineSpace
dropTrailingInlineSpaces :: [Inline] -> [Inline]
dropTrailingInlineSpaces = reverse . dropWhile isInlineSpace . reverse
isInlineSpace :: Inline -> Bool
isInlineSpace Space = True
isInlineSpace _ = False
nonEmptyBlock :: ([Inline] -> Block) -> [Inline] -> Maybe Block
nonEmptyBlock mkBlock inlines
| (not . all isWhitespaceInline) inlines = Just (mkBlock inlines)
| otherwise = Nothing
nonEmptyBlocks :: ([Block] -> Block) -> [Block] -> Maybe Block
nonEmptyBlocks mkBlock blocks
| null blocks = Nothing
| otherwise = Just (mkBlock blocks)
nonEmptyListBlock :: ([[Block]] -> Block) -> [[Block]] -> Maybe Block
nonEmptyListBlock mkBlock items
| null items = Nothing
| otherwise = Just (mkBlock items)
nonEmptyListItem :: [Block] -> Maybe [Block]
nonEmptyListItem []
= Nothing
nonEmptyListItem blocks = Just blocks
isWhitespaceInline :: Inline -> Bool
isWhitespaceInline Space = True
isWhitespaceInline SoftBreak = True
isWhitespaceInline LineBreak = True
isWhitespaceInline _ = False
unsnoc :: [a] -> Maybe ([a], a)
unsnoc [] = Nothing
unsnoc [x] = Just ([], x)
unsnoc (x : xs) = do
(prefix, lastElem) <- unsnoc xs
return (x : prefix, lastElem)
makeTheorem :: Block -> Block
makeTheorem (Div attr xs)
| isNothing t = Div attr xs
| otherwise = Div (addClass attr "theorem-environment") (Plain [header] : xs)
where
(_, _, parm) = attr
t = lookup "type" parm
name = lookup "title" parm
index = lookup "index" parm
header = Span (addClass nullAttr "theorem-header") [typetext, indextext, nametext]
typetext = Span (addClass nullAttr "type") [Str $ fromJust t]
indextext =
if isNothing index
then Str ""
else Span (addClass nullAttr "index") [Str $ fromJust index]
nametext =
if isNothing name
then Str ""
else Span (addClass nullAttr "name") (pandocToInline $ thmNamePandoc $ fromJust name)
makeTheorem x = x
-- bib from https://github.com/chaoxu/chaoxu.github.io/tree/develop
cslFile :: String
cslFile = "bib_style.csl"
bibFile :: String
bibFile = "reference.bib"
chaoDocPandocCompiler :: Compiler (Item Pandoc)
chaoDocPandocCompiler = do
macros <- T.pack <$> loadBody "math-macros.md"
body <- getResourceBody
let bodyWithMacros =
fmap (T.unpack . prependMacros macros . T.pack) body
myReadPandocBiblio chaoDocRead (T.pack cslFile) (T.pack bibFile) myFilter bodyWithMacros
chaoDocCompiler :: Compiler (Item String)
chaoDocCompiler = chaoDocPandocCompiler <&> writePandocWith chaoDocWrite
addMeta :: T.Text -> MetaValue -> Pandoc -> Pandoc
addMeta name value (Pandoc meta a) =
let prevMap = unMeta meta
newMap = M.insert name value prevMap
newMeta = Meta newMap
in Pandoc newMeta a
myReadPandocBiblio ::
ReaderOptions ->
T.Text -> -- csl file name
T.Text ->
(Pandoc -> Pandoc) -> -- apply a filter before citeproc
Item String ->
Compiler (Item Pandoc)
myReadPandocBiblio ropt csl biblio pdfilter item = do
-- Parse CSL file, if given
-- style <- unsafeCompiler $ CSL.readCSLFile Nothing . toFilePath . itemIdentifier $ csl
-- We need to know the citation keys, add then *before* actually parsing the
-- actual page. If we don't do this, pandoc won't even consider them
-- citations!
-- let Biblio refs = itemBody biblio
pandoc <- itemBody <$> readPandocWith ropt item
let pandoc' =
fromRight pandoc $
unsafePerformIO $
runIO $
processCitations $
addMeta "bibliography" (MetaList [MetaString biblio]) $
addMeta "csl" (MetaString csl) $
addMeta "link-citations" (MetaBool True) $
addMeta "reference-section-title" (MetaInlines [Str "References"]) $
pdfilter pandoc -- here's the change
-- let a x = itemSetBody (pandoc' x)
return $ fmap (const pandoc') item
myFilter :: Pandoc -> Pandoc
myFilter = usingSideNotesHTML chaoDocWrite . theoremFilter . panguFilter . displayMathFilter
-- pangu filter
lastChar :: Inline -> Maybe Char
lastChar e = case e of
Str s -> if null (T.unpack s) then Nothing else Just (last (T.unpack s))
Emph is -> lastCharList is
Strong is -> lastCharList is
Strikeout is -> lastCharList is
Link _ is _ -> lastCharList is
Span _ is -> lastCharList is
Quoted _ is -> lastCharList is
_ -> Nothing
where
lastCharList [] = Nothing
lastCharList is = lastChar (last is)
firstChar :: Inline -> Maybe Char
firstChar e = case e of
Str s -> if null (T.unpack s) then Nothing else Just (head (T.unpack s))
Emph is -> firstCharList is
Strong is -> firstCharList is
Strikeout is -> firstCharList is
Link _ is _ -> firstCharList is
Span _ is -> firstCharList is
Quoted _ is -> firstCharList is
_ -> Nothing
where
firstCharList [] = Nothing
firstCharList is = firstChar (head is)
panguInline :: Inline -> Inline
panguInline e = case e of
Str s -> Str (pangu s)
Emph is -> Emph (panguInlines is)
Strong is -> Strong (panguInlines is)
Strikeout is -> Strikeout (panguInlines is)
Link at is tg -> Link at (panguInlines is) tg
Span at is -> Span at (panguInlines is)
Quoted qt is -> Quoted qt (panguInlines is)
_ -> e
panguInlines :: [Inline] -> [Inline]
panguInlines = foldr (addSpace . panguInline) []
where
addSpace x [] = [x]
addSpace x (y : ys)
| shouldSpace x y = x : Space : y : ys
| otherwise = x : y : ys
shouldSpace x y = case (lastChar x, firstChar y) of
(Just lc, Just fc) -> isCJK lc /= isCJK fc
_ -> False
panguFilter :: Pandoc -> Pandoc
panguFilter = walk transformBlocks
where
transformBlocks :: Block -> Block
transformBlocks (Para inlines) = Para (panguInlines inlines)
transformBlocks x = x
type MathTag :: Type
data MathTag = MathTag
{ mathTagStarred :: Bool,
mathTagBody :: Text
}
extractMathTag :: Text -> Maybe (Text, MathTag)
extractMathTag source = do
(mathBody, starred, tagBody) <- findTrailingMathTag (T.unpack source)
return
( T.stripEnd (T.pack mathBody),
MathTag starred (T.strip (T.pack tagBody))
)
findTrailingMathTag :: String -> Maybe (String, Bool, String)
findTrailingMathTag = go [] Nothing
where
go :: String -> Maybe (String, Bool, String) -> String -> Maybe (String, Bool, String)
go _ best [] = best
go prefix best rest@(c : cs) =
go (c : prefix) best' cs
where
best' = case parseMathTagPrefix rest of
Just (starred, tagBody, suffix)
| all isSpace suffix -> Just (reverse prefix, starred, tagBody)
_ -> best
parseMathTagPrefix :: String -> Maybe (Bool, String, String)
parseMathTagPrefix s
| Just rest <- stripPrefix "\\tag*{" s = do
(tagBody, suffix) <- parseBalancedBraces 1 [] rest
return (True, tagBody, suffix)
| Just rest <- stripPrefix "\\tag{" s = do
(tagBody, suffix) <- parseBalancedBraces 1 [] rest
return (False, tagBody, suffix)
| otherwise = Nothing
parseBalancedBraces :: Int -> String -> String -> Maybe (String, String)
parseBalancedBraces _ _ [] = Nothing
parseBalancedBraces depth acc ('\\' : x : xs)
| x `elem` ['{', '}', '\\'] = parseBalancedBraces depth (x : '\\' : acc) xs
parseBalancedBraces depth acc ('{' : xs) =
parseBalancedBraces (depth + 1) ('{' : acc) xs
parseBalancedBraces depth acc ('}' : xs)
| depth == 1 = Just (reverse acc, xs)
| otherwise = parseBalancedBraces (depth - 1) ('}' : acc) xs
parseBalancedBraces depth acc (x : xs) =
parseBalancedBraces depth (x : acc) xs
isPlainMathTag :: Text -> Bool
isPlainMathTag =
T.all \c ->
isAlphaNum c || c `elem` ("*-.:()[] " :: String)
mathTagInlines :: MathTag -> [Inline]
mathTagInlines tag
| mathTagStarred tag = tagBody
| otherwise = [Str "("] ++ tagBody ++ [Str ")"]
where
tagText = mathTagBody tag
tagBody
| isPlainMathTag tagText = [Str tagText]
| otherwise = [Math InlineMath tagText]
-- display math wrapper for MathML
displayMathFilter :: Pandoc -> Pandoc
displayMathFilter = walk wrapDisplayMath
where
wrapDisplayMath (Math DisplayMath source)
| Just (mathBody, tag) <- extractMathTag source =
Span
("", ["math-container", "math-container-tagged"], [])
[ Span ("", ["math-tag-spacer"], []) [],
Span ("", ["math-equation"], []) [Math DisplayMath mathBody],
Span ("", ["math-tag"], []) (mathTagInlines tag)
]
| otherwise =
Span ("", ["math-container"], []) [Math DisplayMath source]
wrapDisplayMath x = x