💾 Archived View for mederle.de › b › i › surfmap.org captured on 2022-01-08 at 13:51:56.
View Raw
More Information
⬅️ Previous capture (2021-12-03)
-=-=-=-=-=-=-
# surfmap.org -- 2021-03-06 15:37
#+Title:Surfmap for Germinal
#+STARTUP: indent
#+PROPERTY: header-args :tangle "surfmap.lisp"
Create a flat listing of all external pointers from all pages in a capsule. Sort order: by date, newest page (modification date) first, by position, last URL in page first.
- https://www.rosettacode.org/wiki/File_modification_time#Common_Lisp
#+begin_src lisp
(in-package :germinal-med)
#+end_src
#+begin_src lisp
(defun get-gmi-files (&optional (root *germinal-root*))
"Recursively get all pathnames for gmi files with their modification dates, mind blacklist"
(let (pathlist)
(fad:walk-directory root
(lambda (p)
(let ((type (pathname-type p)))
(when (and (string-equal type "gmi")
(not (germinal::path-blacklisted-p p)))
(setf pathlist (cons p pathlist))))))
pathlist))
#+end_src
- * Sort list of files chronologically
~file-write-date~ returns unix-millis.
#+begin_src lisp
(defun sort-files-chronologically (pathlist &key (sort-order :desc))
(loop for path in pathlist
collect (cons path (file-write-date path)) into dated
finally (if (eq :desc sort-order)
(return (mapcar #'car (sort dated #'> :key #'cdr)))
(return (mapcar #'car (sort dated #'< :key #'cdr))))))
#+end_src
- * Extract external URLs from file
An external URL is every line that starts with «=> » followed by a URL. If the host part does not match the supplied server name, the URL is external and to be listed. It will be listed exactly as on the originating page, whose main heading and URL will be used as subtitle und reference link.
#+begin_src lisp
(defun extract-ext-urls (page &key (server *germinal-server-name*) (root *germinal-root*) (title t))
"Extracts all external urls from page. If :title is nil, returns a list of the URL lines.
If :title is t, returns a string of page heading, page url, and rest URL lines."
(let ((listing (make-string-output-stream))
(heading)
(parent-url)
(urls))
(with-open-file (in page :direction :input)
(loop for line = (read-line in nil 'eof)
until (eq line 'eof)
do (when (> (length line) 3)
(cond ((and title
(string= "# " line :start2 0 :end2 2))
(setf heading (subseq line 2)))
((and (string= "=> " line :start2 0 :end2 3)
(search "://" line) ; otherwise local
(not (search (concatenate 'string "gemini://" server) line)))
(setf urls (cons line urls)))))))
(when (and title urls)
(setf parent-url (enough-namestring page (fad:pathname-as-directory root))))
(when urls
(when title
(format listing "~&## ~A~%=> ~A Parent URL: ~A~%" (or heading "Untitled") parent-url (or heading "Untitled")))
(format listing "~&~{~A~%~}~%" urls))
(get-output-stream-string listing)))
#+end_src
#+RESULTS:
: EXTRACT-EXT-URLS
#+begin_src lisp
(defun surfmap-view (request &optional name &rest junk)
(declare (ignore request name junk))
"A compilation of all the capsule's external links"
(make-response
20
"text/gemini"
(format nil "# Spacewalk~%~%~{~A~}~%"
(loop for page in (sort-files-chronologically (get-gmi-files)) collect (extract-ext-urls page)))))
#+end_src
#+RESULTS:
: SURFMAP-VIEW
- * COMMENT Local Definitions for Tests
#+begin_src lisp
(setf *germinal-routes*
'(("/hello/(.*)/?" . hello-world-view)
("/hello/?" . hello-world-view)
("/links" . surfmap-view)
(".*" . gemini-serve-file-or-directory)))
#+end_src
######################################################################
# Local variables:
# org-confirm-babel-evaluate: nil
# eval: (add-hook 'after-save-hook (lambda () (org-babel-tangle)) nil t)
# End: