From 1eca7a902cf9fd25a4aa03474ef818f0eb202c8f Mon Sep 17 00:00:00 2001 From: Yu Cong Date: Sun, 12 Oct 2025 23:09:00 +0800 Subject: [PATCH] use a bipartite graph --- filter.lua | 88 +++++++++++++++++++++++++++++++++++++---------------- input.md | 33 +++++++++++++------- output.html | 22 ++++++++------ 3 files changed, 96 insertions(+), 47 deletions(-) diff --git a/filter.lua b/filter.lua index d8ab7fa..492096e 100644 --- a/filter.lua +++ b/filter.lua @@ -1,11 +1,18 @@ - -local label_map = {} -- label has to be unique! - -- helper functions for debugging local function show(s) io.stderr:write("[Debug] " .. s .. "\n") end +------------------------------------------------ +local label_map = {} -- label has to be unique! +local adj = {} -- graph +local function add_edge(u, v) + if not adj[u] then + adj[u] = {} + end + table.insert(adj[u], v) +end + local function collect_labels(blk) if blk.identifier and blk.identifier ~= "" then label_map[blk.identifier] = blk:clone() @@ -13,27 +20,6 @@ local function collect_labels(blk) return nil end -local function dfs(blk, stack) -- depth first search on a top level blk - -- check for identifier and build label_map - if blk.identifier and blk.identifier ~= "" then - label_map[blk.identifier] = blk:clone() - table.insert(stack, blk.identifier) - end - - -- recurse into child blocks - -- type matters. see https://hackage-content.haskell.org/package/pandoc-types-1.23.1/docs/Text-Pandoc-Definition.html#t:Block - -- fortunately, we only need to recurse on divs. - if blk.t == 'Div' then - for _, inner in ipairs(blk.content) do - dfs(inner, stack) - end - end - -- pop - if blk.identifier and blk.identifier ~= "" then - table.remove(stack) -- pop - end -end - local function words(s) local res = {} for part in s:gmatch("[^,]+") do -- split by commas @@ -45,6 +31,49 @@ local function words(s) return res end +local function dfs(blk, stack) -- depth first search on a top level blk + -- check for identifier and build label_map + -- look for 2 types of AST node: divs with include attr and divs with labels + local labelled = false + local include = false + if blk.attributes and blk.attributes["include"] then -- this must be a leaf node + include = true + -- labels in include may appears later in the dfs than this include-node + -- but we assume every label will be there and build the graph now + -- This is a directed bipartite grpah. + -- one side for labeled nodes and one side for include-nodes + for _, l in ipairs(words(blk.attributes["include"])) do + -- insert edges + -- what's the identifier of this include-node?... + -- well... you must write a label for each include-node... + -- this can be done using another filter + add_edge(blk.identifier, l) + end + -- insert more edges + for _, l in ipairs(stack) do + add_edge(l, blk.identifier) + end + elseif blk.identifier and blk.identifier ~= "" then + -- collect labelled nodes & maintain the stack + labelled = true + label_map[blk.identifier] = blk:clone() + table.insert(stack, blk.identifier) + end + + -- recurse into child blocks + -- type matters. see https://hackage-content.haskell.org/package/pandoc-types-1.23.1/docs/Text-Pandoc-Definition.html#t:Block + -- fortunately, we only need to recurse on divs. + if not include and blk.t == 'Div' then + for _, inner in ipairs(blk.content) do + dfs(inner, stack) + end + end + -- pop + if labelled then + table.remove(stack) + end +end + local function replace(e) local include = e.attributes["include"] if include then @@ -66,9 +95,16 @@ end return { -- traverse = 'topdown', Pandoc = function(doc) - -- collect labels + -- collect labels & build the graph for _, blk in ipairs(doc.blocks) do - dfs(blk,{}) + dfs(blk, {}) + end + + show("edges:") + for u, vs in pairs(adj) do + for _, v in ipairs(vs) do + show(u .. "->" .. v) + end end -- replace diff --git a/input.md b/input.md index 19c4558..def3579 100644 --- a/input.md +++ b/input.md @@ -1,31 +1,42 @@ -::: {#lorem} -Lorem ipsum dolor sit amet -::: +::: {#structure} +inc (need thm2 and a fake label) -:::{include="lorem, thm2 , fakelabel , ,inthm1 ," #inc} -::: +thm1 - inthm1 + +thm2 - inthm2 (need thm1) + +:::::::::::::::: + +:::{include="thm2 , fakelabel" #inc} +This line will be ignored +::::::::::::::::::::::::::::::::::::::::::::::::::::::: -::: Theorem +::: {.Theorem #thm1} :::::::::::::::::::::: test thm1 ::: {.Definition #inthm1} ::: something ::::::::::::::::::::::::::::: -::: +::::::::::::::::::::::::::::::::::::::::::: -:::{.Theorem #thm2} +:::{.Theorem #thm2}::::::::::::::::::::: test thm2 -::: + +::::{#inthm2 include="thm1"} +i need theorem 1 +:::::::::::::::::::::::::::: + +:::::::::::::::::::::::::::::::::::::::: :::{class="Theorem" id="thm3"} test thm3 -::: +:::::::::::::::::::::::::::::: ::::: {#special .sidebar} Here is a paragraph. And another. -::::: \ No newline at end of file +::::::::::::::::::::::::: \ No newline at end of file diff --git a/output.html b/output.html index c51fa86..cbdddac 100644 --- a/output.html +++ b/output.html @@ -1,18 +1,17 @@ -
-

Lorem ipsum dolor sit amet

-
-
-
-

Lorem ipsum dolor sit amet

+
+

inc (need thm2 and a fake label)

+

thm1 - inthm1

+

thm2 - inthm2 (need thm1)

+

test thm2

-
-
-

something

+
+

i need theorem 1

-
+
+

test thm1

something

@@ -20,6 +19,9 @@

test thm2

+
+

i need theorem 1

+

test thm3