💾 Archived View for gemini.conman.org › extensions › GLV-1 › handlers › blog › format.lua captured on 2023-11-04 at 14:37:36.

View Raw

More Information

⬅️ Previous capture (2023-01-29)

➡️ Next capture (2023-12-28)

🚧 View Differences

-=-=-=-=-=-=-

-- ***********************************************************************
--
-- Copyright 2020 by Sean Conner.
--
-- This program is free software: you can redistribute it and/or modify it
-- under the terms of the GNU General Public License as published by the
-- Free Software Foundation, either version 3 of the License, or (at your
-- option) any later version.
--
-- This program is distributed in the hope that it will be useful, but
-- WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-- Public License for more details.
--
-- You should have received a copy of the GNU General Public License along
-- with this program.  If not, see <http://www.gnu.org/licenses/>.
--
-- Comments, questions and criticisms can be sent to: sean@conman.org
--
-- =======================================================================
--
-- Code to format an HTML document into Gemini
--
-- ***********************************************************************

-- luacheck: ignore 611

local ENTITIES = require "org.conman.const.entity"
local lpeg     = require "lpeg"
local ipairs   = ipairs

local formats = {}

-- *************************************************************************

local function wrap_text(text,state)
  return state.nest[#state.nest] .. text .. '\n'
end

-- *************************************************************************

local function run_inline(node,state)
  local res = ""
  for _,item in ipairs(node) do
    if type(item) == 'table' then
      if not item.comment then
        res = res .. formats[item.tag](item,state)
      end
    else
      res = res .. item
    end
  end
  
  return res
end

-- *************************************************************************

local function run_block(node,state)
  local res = ""
  for _,item in ipairs(node) do
    if type(item) == 'table' then
      if not item.comment then
        res = res .. formats[item.tag](item,state)
      end
    else
      res = res .. item
    end
  end
  
  if node.tag  ~= 'thead'
  and node.tag ~= 'tfoot'
  and node.tag ~= 'tbody'
  and node.tag ~= 'blockquote' then
    res = res .. state.nest[#state.nest]
  end
  
  return res
end

-- *************************************************************************

local function run_flow(node,state)
  local acc  = {}
  local text = ""
  
  for _,item in ipairs(node) do
    if type(item) == 'table' then
      if item.inline then
        text = text .. formats[item.tag](item,state)
      elseif item.comment then
        text = text .. ""
      else
        assert(item.block)
        
        if text ~= "" then
          table.insert(acc,wrap_text(text,state))
        end
        
        text = formats[item.tag](item,state)
        if  item.tag ~= 'li'
        and item.tag ~= 'table'
        and item.tag ~= 'dl' then
          text = state.nest[#state.nest] .. text
        end
        
        table.insert(acc,text)
        text = ""
      end
    else
      text = text .. item
    end
  end
  
  if text ~= "" then
    table.insert(acc,wrap_text(text,state))
  end
  
  return table.concat(acc)
end

-- *************************************************************************
-- INLINE
-- *************************************************************************

function formats.tt(node,state)
  return "`" .. run_inline(node,state) .. "`"
end

function formats.i(node,state)
  return run_inline(node,state):gsub("%C",function(c) return c .. "\u{332}" end)
end

function formats.b(node,state)
  return "*" .. run_inline(node,state) .. "*"
end

formats.big  = run_inline
formats.small = run_inline

function formats.em(node,state)
  return run_inline(node,state):gsub("%C",function(c) return c .. "\u{332}" end)
end

function formats.u(node,state)
  return run_inline(node,state):gsub("%C",function(c) return c .. "\u{332}" end)
end

formats.strong = formats.b
formats.dfn    = run_inline
formats.code   = formats.tt
formats.samp   = formats.tt
formats.kbd    = formats.tt
formats.var    = formats.tt

function formats.cite(node,state)
  return ENTITIES.ldquo .. run_inline(node,state) .. ENTITIES.rdquo
end

function formats.abbr(node,state)
  local res = run_inline(node,state)
  if not state.abbr[res] then
    state.abbr[res] = true
    res = res .. string.format(" (%s)",node.attributes.title)
  end
  
  return res
end

formats.acronym = formats.abbr

function formats.a(node,state)
  local res = run_inline(node,state)
  
  if node.attributes.title then
    res = string.format("%s (%s)",res,node.attributes.title)
  end
  
  if node.attributes.href then
    table.insert(state.links,node.attributes.href)
    res = string.format("%s [%d]",res,#state.links)
  end
  
  return res
end

function formats.img(node,state)
  local ref do
    if node.attributes.src then
      table.insert(state.links,node.attributes.src)
      ref = string.format(" [%d]",#state.links)
    else
      ref = ""
    end
  end
  
  local verbiage do
    if node.attributes.alt then
      if node.attributes.alt == "" then
        if node.attributes.title then
          if node.attributes.title ~= "" then
            verbiage = "[" .. node.attributes.title .. "]"
          else
            verbiage = ""
          end
        else
         verbiage = ""
        end
      else
        verbiage = node.attributes.alt
      end
    else
      verbiage = node.attributes.title or ""
    end
  end
  
  local res = verbiage .. ref
  if state.div_pf then
    res = res .. "\194\133" -- <BR>
  end
  
  return res
end

function formats.br(_,state)
  return "\194\133" .. state.nest[#state.nest]
end

function formats.script()
  return ""
end

function formats.q(node,state)
  return ENTITIES.ldquo .. run_inline(node,state) .. ENTITIES.rdquo
end

function formats.sub()
  return ""
end

function formats.sup(node,state)
  return "^" .. run_inline(node,state)
end

function formats.span(node,state)
  local res = run_inline(node,state)
  if node.attributes.lang
  or (node.attributes.class and node.attributes.class:match"booktitle") then
    res = res:gsub("%C",function(c) return c .. "\u{332}" end)
  end
  return res
end

function formats.bdo()
  return ""
end

function formats.map()
  return ""
end

function formats.area()
  return ""
end

function formats.object()
  return ""
end

function formats.param()
  return ""
end

function formats.input()
  return ""
end

function formats.select()
  return ""
end

function formats.textarea()
  return ""
end

function formats.label()
  return ""
end

function formats.button()
  return ""
end

function formats.optgroup()
  return ""
end

function formats.option()
  return ""
end

function formats.u(node,state)
  return run_inline(node,state):gsub("%C",function(c) return c .. "\u{332}" end)
end

function formats.font(node,state)
  return run_inline(node,state)
end

-- *************************************************************************
-- BLOCK
-- *************************************************************************

function formats.p(node,state)
  local res = run_inline(node,state)
  
  if node.attributes.class then
    if state.dl_screenplay and node.attributes.class == 'setting' then
      res = '[Setting: ' .. res .. ']'
    elseif state.dl_screenplay and node.attributes.class == 'direction' then
      res = '[' .. res .. ']'
    end
  end
  
  res = res .. '\n' .. state.nest[#state.nest] .. '\n'
  return res
end

local fixline do
  local char = lpeg.P"\n" * #lpeg.P(1) / '\n' * lpeg.Carg(1)
             + lpeg.P(1)
  fixline    = lpeg.Cs(lpeg.Carg(1) * char^0)
end

function formats.pre(node,state)
  local res = "\n```\n"
  for _,item in ipairs(node) do
    if type(item) == 'table' then
      res = res .. formats[item.tag](item,state)
    else
      res = res .. item
    end
  end
  res = res .. "\n```\n\n"
  return fixline:match(res,1,state.nest[#state.nest])
end

function formats.blockquote(node,state)
  table.insert(state.nest,state.nest[#state.nest] .. "> ")
  local res = run_flow(node,state)
  table.remove(state.nest)
  return res .. '\n'
end

function formats.hr(_,state)
  return state.nest[#state.nest] .. " * * * * *\n"
end

function formats.address()
  return ""
end

function formats.h1(node,state)
  local res = run_inline(node,state)
  return '# ' .. res
end

function formats.h2(node,state)
  return '## ' .. run_inline(node,state) .. '\n\n'
end

function formats.h3(node,state)
  return '## ' .. run_inline(node,state) .. '\n\n'
end

function formats.h4(node,state)
  return '## ' .. run_inline(node,state) .. '\n\n'
end

formats.h5 = formats.h1
formats.h6 = formats.h1

function formats.div(node,state)
  state.div_pf = node.attributes.class and node.attributes.class == 'pf'
  local res = run_flow(node,state)
  state.div_pf = false
  return res
end

        -- ***************
        -- DICTIONARY LIST
        -- ***************
        
function formats.dl(node,state)
  if node.attributes.class then
    state.dl_header = node.attributes.class:match "header"
    state.dl_screenplay = node.attributes.class:match "screenplay"
  end
  
  local res = run_block(node,state)
  
  state.dl_header     = false
  state.dl_screenplay = false
  
  return res .. "\n" .. state.nest[#state.nest]
end

function formats.dt(node,state)
  local res = run_inline(node,state)
  
  if state.dl_header then
    res = state.nest[#state.nest] .. res .. ":" .. ENTITIES.nbsp
    return res
  elseif state.dl_screenplay then
    res = state.nest[#state.nest] .. res .. ": "
    return res
  else
    res = state.nest[#state.nest] .. res .. '\n\n'
    return res
  end
end

function formats.dd(node,state)
  if state.dl_header then
    local nest = table.remove(state.nest)
    local res = run_flow(node,state)
    table.insert(state.nest,nest)
    return res
  else
    return run_flow(node,state)
  end
end

        -- ********
        -- LISTS
        -- ********
        
function formats.ul(node,state)
  table.insert(state.list, { type = 'u' })
  local res = run_flow(node,state)
  table.remove(state.list)
  res = res .. '\n\n'
  return res
end

function formats.ol(node,state)
  table.insert(state.list, { type = 'o' , idx = 1 })
  state.ol_footnote = node.attributes.class and node.attributes.class == 'footnote'
  state.ol_outline  = node.attributes.class and node.attributes.class == 'outline'
  local res = run_flow(node,state)
  table.remove(state.list)
  res = res .. '\n\n'
  return res
end

function formats.li(node,state)
  local info = state.list[#state.list]
  local hdr do
    if info.type == 'u' then
      hdr = "* "
    else
      local i = info.idx
      info.idx = info.idx + 1
      
      if state.ol_footnote then
        hdr = string.format("* [%3d] ",i)
      else
        hdr = string.format("* %3d. ",i)
      end
    end
  end
  
  table.insert(state.nest,state.nest[#state.nest] .. hdr)
  local res = run_flow(node,state)
  table.remove(state.nest)
  return res
end

function formats.noscript()
  return ""
end

function formats.form()
  return ""
end

function formats.fieldset()
  return ""
end

function formats.legend()
  return ""
end

        -- **********
        -- TABLE
        -- **********
        
function formats.table(node,state)
  local res = "```\n"
      .. run_block(node,state)
      .. (state.tfoot or "")
      .. "\n```\n\n"
  state.tfoot = nil
  return res
end

function formats.caption(node,state)
  return state.nest[#state.nest] .. "Table: " .. run_inline(node,state) .. '\n'
end

function formats.col()
  return ""
end

function formats.colgroup()
  return ""
end

function formats.thead(node,state)
  return run_block(node,state)
      .. state.nest[#state.nest] .. "------------------------------\n"
end

function formats.tfoot(node,state)
  state.tfoot = state.nest[#state.nest] .. "------------------------------\n"
             .. run_block(node,state)
  return ""
end

function formats.tbody(node,state)
  return run_block(node,state)
end

function formats.tr(node,state)
  state.tr_sep = ""
  return state.nest[#state.nest] .. run_block(node,state) .. '\n'
end

        -- -------------------------------------------------------------------------
        -- <TH> and <TD> are flow types, but I don't use tables to format
        -- text, but to present data, so I'm uisng inline type here.
        -- -------------------------------------------------------------------------
        
function formats.th(node,state)
  local text   = state.tr_sep .. run_inline(node,state)
  state.tr_sep = "\t"
  return text
end

function formats.td(node,state)
  local  text  = state.tr_sep .. run_inline(node,state)
  state.tr_sep = "\t"
  return text
end

-- *************************************************************************
-- INS/DEL
-- *************************************************************************

function formats.ins(node,state)
  if node.inline then
    return run_inline(node,state)
  else
    return run_block(node,state)
  end
end

function formats.del(node,state)
  if node.inline then
    return run_inline(node,state):gsub("%C",function(c) return "\u{336}" .. c end)
  else
    return run_block(node,state):gsub("%C",function(c) return "\u{336}" .. c end) .. "\n"
  end
end

-- *************************************************************************

return function(doc,base,what,affiliate)
  local state =
  {
    links     = {},
    abbr      = {},
    list      = {},
    nest      = { "" },
    pre       = false,
    base      = base,
    what      = what,
    affiliate = affiliate,
  }
  
  local res = run_flow(doc,state)
  return res,state.links
end

-- *************************************************************************