-- ***************************************************************
--
-- Format a parsed HTML document into text.
-- Copyright 2020 by Sean Conner. All Rights Reserved.
--
-- This library is free software; you can redistribute it and/or modify it
-- under the terms of the GNU Lesser General Public License as published by
-- the Free Software Foundation; either version 3 of the License, or (at your
-- option) any later version.
--
-- This library is distributed in the hope that it will be useful, but
-- WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-- or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
-- License for more details.
--
-- You should have received a copy of the GNU Lesser General Public License
-- along with this library; if not, see
end
return res
end
function formats.br()
return "\194\133"
end
function formats.script()
return ""
end
function formats.q(node,state)
return ENTITIES.ldquo .. run_inline(node,state) .. ENTITIES.rdquo
end
function formats.sub()
return ""
end
function formats.sup(node,state)
return "^" .. run_inline(node,state)
end
function formats.span(node,state)
local res = run_inline(node,state)
if node.attributes.lang
or (node.attributes.class and node.attributes.class:match"booktitle") then
res = "_" .. res .. "_"
end
return res
end
function formats.bdo()
return ""
end
function formats.map()
return ""
end
function formats.area()
return ""
end
function formats.object()
return ""
end
function formats.param()
return ""
end
function formats.input()
return ""
end
function formats.select()
return ""
end
function formats.textarea()
return ""
end
function formats.label()
return ""
end
function formats.button()
return ""
end
function formats.optgroup()
return ""
end
function formats.option()
return ""
end
function formats.u(node,state)
return "_" .. run_inline(node,state) .. "_"
end
function formats.font(node,state)
return run_inline(node,state)
end
-- *************************************************************************
-- BLOCK
-- *************************************************************************
function formats.p(node,state)
local res = run_inline(node,state)
if node.attributes.class then
if state.dl_screenplay and node.attributes.class == 'setting' then
res = '[Setting: ' .. res .. ']'
elseif state.dl_screenplay and node.attributes.class == 'direction' then
res = '[' .. res .. ']'
end
end
return wrap_text(res,state)
end
local fixline do
local char = lpeg.P"\n" * #lpeg.P(1) / '\n' * lpeg.Carg(1)
+ lpeg.P(1)
fixline = lpeg.Cs(lpeg.Carg(1) * char^0)
end
function formats.pre(node,state)
local res = ""
for _,item in ipairs(node) do
if type(item) == 'table' then
res = res .. formats[item.tag](item,state)
else
res = res .. item
end
end
return fixline:match(res,1,state.nest[#state.nest])
end
function formats.blockquote(node,state)
table.insert(state.initial,state.initial[#state.initial] .. "| ")
table.insert(state.nest,state.nest[#state.nest] .. "| ")
local res = run_flow(node,state)
table.remove(state.nest)
table.remove(state.initial)
return res
end
function formats.hr(_,state)
return state.nest[#state.nest] .. "* * * * *\n"
end
function formats.address()
return ""
end
function formats.h1()
return ""
end
function formats.h2()
return ""
end
function formats.h3()
return ""
end
function formats.h4(node,state)
local res = run_inline(node,state)
return wrap_text(res,state)
end
function formats.h5()
return ""
end
function formats.h6()
return ""
end
function formats.div(node,state)
state.div_pf = node.attributes.class and node.attributes.class == 'pf'
local res = run_flow(node,state)
state.div_pf = false
return res
end
-- ***************
-- DICTIONARY LIST
-- ***************
function formats.dl(node,state)
if node.attributes.class then
state.dl_header = node.attributes.class:match "header"
state.dl_screenplay = node.attributes.class:match "screenplay"
end
local res = run_block(node,state)
state.dl_header = false
state.dl_screenplay = false
return res
end
function formats.dt(node,state)
local res = run_inline(node,state)
state.dt = true
if state.dl_header then
res = state.nest[#state.nest] .. res .. ":" .. ENTITIES.nbsp
table.insert(state.initial,state.initial[#state.initial] .. res)
table.insert(state.nest,state.nest[#state.nest] .. " ")
return ""
elseif state.dl_screenplay then
res = state.nest[#state.nest] .. res .. ": "
table.insert(state.initial,state.initial[#state.initial] .. res)
table.insert(state.nest,state.nest[#state.nest] .. string.rep(" ",#res))
return ""
else
res = state.nest[#state.nest] .. res .. '\n'
table.insert(state.initial,state.initial[#state.initial] .. ' ')
table.insert(state.nest,state.nest[#state.nest] .. ' ')
return res
end
end
function formats.dd(node,state)
local res = run_flow(node,state)
if state.dt then
table.remove(state.nest)
table.remove(state.initial)
end
return res
end
-- ********
-- LISTS
-- ********
function formats.ul(node,state)
table.insert(state.list, { type = 'u' })
local res = run_flow(node,state)
table.remove(state.list)
return res
end
function formats.ol(node,state)
table.insert(state.list, { type = 'o' , idx = 1 })
state.ol_footnote = node.attributes.class and node.attributes.class == 'footnote'
local res = run_flow(node,state)
table.remove(state.list)
return res
end
function formats.li(node,state)
local info = state.list[#state.list]
local hdr,pad do
if info.type == 'u' then
hdr = "* "
pad = " "
else
local i = info.idx
info.idx = info.idx + 1
if state.ol_footnote then
hdr = string.format("[%3d] ",i)
pad = " "
else
hdr = string.format("%3d. ",i)
pad = " "
end
end
end
table.insert(state.initial,state.initial[#state.initial] .. hdr)
table.insert(state.nest,state.nest[#state.nest] .. pad)
local res = run_flow(node,state)
table.remove(state.nest)
table.remove(state.initial)
return res
end
function formats.noscript()
return ""
end
function formats.form()
return ""
end
function formats.fieldset()
return ""
end
function formats.legend()
return ""
end
-- **********
-- TABLE
-- **********
function formats.table(node,state)
return run_block(node,state) .. (state.tfoot or "")
end
function formats.caption(node,state)
return state.nest[#state.nest] .. "Table: " .. run_inline(node,state) .. '\n'
end
function formats.col()
return ""
end
function formats.colgroup()
return ""
end
function formats.thead(node,state)
return run_block(node,state)
.. state.nest[#state.nest] .. "------------------------------\n"
end
function formats.tfoot(node,state)
state.tfoot = state.nest[#state.nest] .. "------------------------------\n"
.. run_block(node,state)
return ""
end
function formats.tbody(node,state)
return run_block(node,state)
end
function formats.tr(node,state)
state.tr_sep = ""
return state.nest[#state.nest] .. run_block(node,state)
end
-- -------------------------------------------------------------------------
--