💾 Archived View for zaibatsu.circumlunar.space › ~solderpunk › files › vf1.py captured on 2020-09-24 at 02:09:56.
-=-=-=-=-=-=-
#!/usr/bin/env python3 # VF-1 Gopher client # (C) 2018 Solderpunk <solderpunk@sdf.org> # With contributions from: # - Alex Schroeder <alex@gnu.org> # - Joseph Lyman <tfurrows@sdf.org> import argparse import cmd import codecs import collections import fnmatch import io import mimetypes import os.path import random import shlex import shutil import socket import subprocess import sys import tempfile import traceback import urllib.parse import ssl # Use chardet if it's there, but don't depend on it try: import chardet _HAS_CHARDET = True except ImportError: _HAS_CHARDET = False # Command abbreviations _ABBREVS = { "a": "add", "b": "back", "bm": "bookmarks", "book": "bookmarks", "f": "fold", "fo": "forward", "g": "go", "h": "history", "hist": "history", "l": "less", "li": "links", "m": "mark", "n": "next", "p": "previous", "prev": "previous", "q": "quit", "r": "reload", "s": "save", "se": "search", "/": "search", "t": "tour", "u": "up", "v": "veronica", } # Programs to handle different item types _ITEMTYPE_TO_MIME = { "1": "text/plain", "0": "text/plain", "h": "text/html", "g": "image/gif", } _MIME_HANDLERS = { "application/pdf": "xpdf %s", "audio/mpeg": "mpg123 %s", "audio/ogg": "ogg123 %s", "image/*": "feh %s", "text/html": "lynx -dump -force_html %s", "text/plain": "cat %s", } # Item type formatting stuff _ITEMTYPE_TITLES = { "7": " <INP>", "8": " <TEL>", "9": " <BIN>", "g": " <IMG>", "h": " <HTM>", "s": " <SND>", "I": " <IMG>", "T": " <TEL>", } _ANSI_COLORS = { "red": "\x1b[0;31m", "green": "\x1b[0;32m", "yellow": "\x1b[0;33m", "blue": "\x1b[0;34m", "purple": "\x1b[0;35m", "cyan": "\x1b[0;36m", "white": "\x1b[0;37m", "black": "\x1b[0;30m", } _ITEMTYPE_COLORS = { "0": _ANSI_COLORS["green"], # Text File "1": _ANSI_COLORS["blue"], # Sub-menu "7": _ANSI_COLORS["red"], # Search / Input "8": _ANSI_COLORS["purple"], # Telnet "9": _ANSI_COLORS["cyan"], # Binary "g": _ANSI_COLORS["blue"], # Gif "h": _ANSI_COLORS["green"], # HTML "s": _ANSI_COLORS["cyan"], # Sound "I": _ANSI_COLORS["cyan"], # Gif "T": _ANSI_COLORS["purple"], # Telnet } # Lightweight representation of an item in Gopherspace GopherItem = collections.namedtuple("GopherItem", ("host", "port", "path", "itemtype", "name", "tls")) def url_to_gopheritem(url): u = urllib.parse.urlparse(url) # https://tools.ietf.org/html/rfc4266#section-2.1 path = u.path if u.path and u.path[0] == '/' and len(u.path) > 1: itemtype = u.path[1] path = u.path[2:] else: # Use item type 1 for top-level selector itemtype = 1 return GopherItem(u.hostname, u.port or 70, path, str(itemtype), "<direct URL>", True if u.scheme == "gophers" else False) def gopheritem_to_url(gi): if gi and gi.host: return ("gopher%s://%s:%d/%s%s" % ( "s" if gi.tls else "", gi.host, int(gi.port), gi.itemtype, gi.path)) elif gi: return gi.path else: return "" def gopheritem_from_line(line, tls): # Split on tabs. Strip final element after splitting, # since if we split first we loose empty elements. parts = line.split("\t") parts[-1] = parts[-1].strip() # Discard Gopher+ noise if parts[-1] == "+": parts = parts[:-1] # Attempt to assign variables. This may fail. # It's up to the caller to catch the Exception. name, path, server, port = parts itemtype = name[0] name = name[1:] return GopherItem(server, port, path, itemtype, name, tls) def gopheritem_to_line(gi, name=""): # Prepend itemtype to name name = str(gi.itemtype) + (name or gi.name) return "\t".join((name, gi.path, gi.host or "", str(gi.port))) + "\n" # Cheap and cheerful URL detector def looks_like_url(word): return "." in word and word.startswith(("gopher://", "gophers://")) # Decorators def needs_gi(inner): def outer(self, *args, **kwargs): if not self.gi: print("You need to 'go' somewhere, first") return None else: return inner(self, *args, **kwargs) return outer class GopherClient(cmd.Cmd): def __init__(self, tls=False): cmd.Cmd.__init__(self) self.set_prompt(tls) self.tmp_filename = "" self.idx_filename = "" self.index = [] self.index_index = -1 self.history = [] self.hist_index = 0 self.page_index = 0 self.lookup = self.index self.gi = None self.waypoints = [] self.marks = {} self.mirrors = {} self.options = { "auto_page" : False, "auto_page_threshold" : 40, "color_menus" : False, "encoding" : "iso-8859-1", } def set_prompt(self, tls): self.tls = tls if self.tls: self.prompt = "\x1b[38;5;196m" + "VF-1" + "\x1b[38;5;255m" + "> " + "\x1b[0m" else: self.prompt = "\x1b[38;5;202m" + "VF-1" + "\x1b[38;5;255m" + "> " + "\x1b[0m" def _go_to_gi(self, gi, update_hist=True, query_str=None): # Telnet is a completely separate thing if gi.itemtype in ("8", "T"): if gi.path: subprocess.call(shlex.split("telnet -l %s %s %s" % (gi.path, gi.host, gi.port))) else: subprocess.call(shlex.split("telnet %s %s" % (gi.host, gi.port))) if update_hist: self._update_history(gi) return elif gi.itemtype == "S": subprocess.call(shlex.split("ssh %s@%s -p %s" % (gi.path, gi.host, gi.port))) if update_hist: self._update_history(gi) return # From here on in, it's gopher only # Hit the network try: # Is this a local file? if not gi.host: f = open(gi.path, "rb") # Is this a search point? elif gi.itemtype == "7": if not query_str: query_str = input("Query term: ") f = send_query(gi.path, query_str, gi.host, gi.port or 70, self.tls) else: f = send_selector(gi.path, gi.host, gi.port or 70, self.tls) # Catch network exceptions, which may be recoverable if a redundant # mirror is specified except (socket.gaierror, ConnectionRefusedError, ConnectionResetError, TimeoutError, socket.timeout, ) as network_error: # Print an error message if isinstance(network_error, socket.gaierror): print("ERROR: DNS error!") elif isinstance(network_error, ConnectionRefusedError): print("ERROR: Connection refused!") elif isinstance(network_error, ConnectionResetError): print("ERROR: Connection reset!") elif isinstance(network_error, TimeoutError): print("ERROR: Connection timed out!") if self.tls: print("Disable battloid mode using 'tls' to enter civilian territory.") else: print("Switch to battloid mode using 'tls' to enable encryption.") elif isinstance(network_error, socket.timeout): print("ERROR: This is taking too long.") if not self.tls: print("Switch to battloid mode using 'tls' to enable encryption.") # Try to fall back on a redundant mirror new_gi = self._get_mirror_gi(gi) if new_gi: print("Trying redundant mirror %s..." % gopheritem_to_url(new_gi)) self._go_to_gi(new_gi) return # Catch non-network exceptions except OSError: print("ERROR: Operating system error... Recovery initiated...") print("Consider toggling battloid mode using 'tls' to adapt to the new situation.") return except ssl.SSLError as err: print("ERROR: " + err.reason) if err.reason == "UNKNOWN_PROTOCOL": print(gopheritem_to_url(gi) + " is probably not encrypted.") print("In battloid mode, encryption is mandatory.") print("Use 'tls' to toggle battloid mode.") return # Attempt to decode something that is supposed to be text if gi.itemtype in ("0", "1", "7", "h"): try: f = self._decode_text(f) except UnicodeError: print("""ERROR: Unknown text encoding! If you know the correct encoding, use e.g. 'set encoding koi8-r' and try again. Otherwise, install the 'chardet' library for Python 3 to enable automatic encoding detection.""") return # Save the result in a temporary file ## Delete old file if self.tmp_filename: os.unlink(self.tmp_filename) ## Set file mode if gi.itemtype in ("0", "1", "7", "h"): mode = "w" encoding = "UTF-8" else: mode = "wb" encoding = None ## Write tmpf = tempfile.NamedTemporaryFile(mode, encoding=encoding, delete=False) tmpf.write(f.read()) tmpf.close() self.tmp_filename = tmpf.name # Process that file handler depending upon itemtype if gi.itemtype in ("1", "7"): f.seek(0) self._handle_index(f) else: cmd_str = self.get_handler_cmd(gi) try: subprocess.call(shlex.split(cmd_str % tmpf.name)) except FileNotFoundError: print("Handler program %s not found!" % shlex.split(cmd_str)[0]) print("You can use the ! command to specify another handler program or pipeline.") # Update state self.gi = gi if update_hist: self._update_history(gi) def get_handler_cmd(self, gi): # First, get mimetype, either from itemtype or filename if gi.itemtype in _ITEMTYPE_TO_MIME: mimetype = _ITEMTYPE_TO_MIME[gi.itemtype] else: mimetype, encoding = mimetypes.guess_type(gi.path) if mimetype is None: # No idea what this is, try harder by looking at the # magic number using file(1) out = subprocess.check_output( shlex.split("file --brief --mime-type %s" % self.tmp_filename)) mimetype = out.decode("UTF-8").strip() # Don't permit file extensions to completely override the # vaguer imagetypes if gi.itemtype == "I" and not mimetype.startswith("image"): # The server declares this to be an image. # But it has a weird or missing file extension, so the # MIME type was guessed as something else. # We shall trust the server that it's an image. # Pretend it's a jpeg, because whatever handler the user has # set for jpegs probably has the best shot at handling this. mimetype = "image/jpeg" elif gi.itemtype == "s" and not mimetype.startswith("audio"): # As above, this is "weird audio". # Pretend it's an mp3? mimetype = "audio/mpeg" # Now look for a handler for this mimetype # Consider exact matches before wildcard matches exact_matches = [] wildcard_matches = [] for handled_mime, cmd_str in _MIME_HANDLERS.items(): if "*" in handled_mime: wildcard_matches.append((handled_mime, cmd_str)) else: exact_matches.append((handled_mime, cmd_str)) for handled_mime, cmd_str in exact_matches + wildcard_matches: if fnmatch.fnmatch(mimetype, handled_mime): break else: # Use "strings" as a last resort. cmd_str = "strings %s" return cmd_str def _decode_text(self, f): # Attempt to decode some bytes into a Unicode string. # First of all, try UTF-8 as the default. # If this fails, attempt to autodetect the encoding if chardet # library is installed. # If chardet is not installed, or fails to work, fall back on # the user-specified alternate encoding. # If none of this works, this will raise UnicodeError and it's # up to the caller to handle it gracefully. raw_bytes = f.read() # Try UTF-8 first: try: text = raw_bytes.decode("UTF-8") except UnicodeError: # If we have chardet, try the magic if _HAS_CHARDET: autodetect = chardet.detect(raw_bytes) # Make sure we're vaguely certain if autodetect["confidence"] > 0.5: text = raw_bytes.decode(autodetect["encoding"]) else: # Try the user-specified encoding text = raw_bytes.decode(self.options["encoding"]) else: # Try the user-specified encoding text = raw_bytes.decode(self.options["encoding"]) new_f = io.StringIO() new_f.write(text) new_f.seek(0) return new_f def _handle_index(self, f): self.index = [] if self.idx_filename: os.unlink(self.idx_filename) tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False) self.idx_filename = tmpf.name menu_lines = 0 self.page_index = 0 for line in f: if line.startswith("3"): print("Error message from server:") print(line[1:].split("\t")[0]) tmpf.close() os.unlink(self.idx_filename) self.idx_filename = "" return elif line.startswith("i"): tmpf.write(line[1:].split("\t")[0] + "\n") menu_lines += 1 else: try: gi = gopheritem_from_line(line, self.tls) except: # Silently ignore things which are not errors, information # lines or things which look like valid menu items continue if gi.itemtype == "+": self._register_redundant_server(gi) continue self.index.append(gi) tmpf.write(self._format_gopheritem(len(self.index), gi) + "\n") menu_lines += 1 if self.options["auto_page"] and menu_lines == self.options["auto_page_threshold"]: self.page_index = len(self.index) tmpf.close() self.lookup = self.index self.index_index = -1 if self.options["auto_page"]: subprocess.call(shlex.split("head -n %d %s" % (self.options["auto_page_threshold"], self.idx_filename))) if menu_lines > self.options["auto_page_threshold"]: print("""... (Long menu truncated. Use 'cat' or 'less' to view whole menu, including informational messages. Use 'ls', 'search' or blank line pagination to view only menu entries.)""") else: cmd_str = _MIME_HANDLERS["text/plain"] subprocess.call(shlex.split(cmd_str % self.idx_filename)) def _register_redundant_server(self, gi): # This mirrors the last non-mirror item target = self.index[-1] target = (target.host, target.port, target.path) if target not in self.mirrors: self.mirrors[target] = [] self.mirrors[target].append((gi.host, gi.port, gi.path)) def _get_mirror_gi(self, gi): # Search for a redundant mirror that matches this GI for (host, port, path_prefix), mirrors in self.mirrors.items(): if (host == gi.host and port == gi.port and gi.path.startswith(path_prefix)): break else: # If there are no mirrors, we're done return None # Pick a mirror at random and build a new GI for it mirror_host, mirror_port, mirror_path = random.sample(mirrors, 1)[0] new_gi = GopherItem(mirror_host, mirror_port, mirror_path + "/" + gi.path[len(path_prefix):], gi.itemtype, gi.name, gi.tls) return new_gi def _format_gopheritem(self, index, gi, name=True, url=False): line = "[%d] " % index # Add item name, with itemtype indicator for non-text items if name: line += gi.name if gi.itemtype in _ITEMTYPE_TITLES: line += _ITEMTYPE_TITLES[gi.itemtype] elif gi.itemtype == "1": line += "/" # Add URL if requested if url: line += " (%s)" % gopheritem_to_url(gi) # Colourise if self.options["color_menus"] and gi.itemtype in _ITEMTYPE_COLORS: line = _ITEMTYPE_COLORS[gi.itemtype] + line + "\x1b[0m" return line def show_lookup(self, offset=0, end=None, name=True, url=False, reverse=False): if reverse: iterator = enumerate(self.lookup[end:offset:-1]) else: iterator = enumerate(self.lookup[offset:end]) for n, gi in iterator: print(self._format_gopheritem(n+offset+1, gi, name, url)) def _update_history(self, gi): # Don't duplicate if self.history and self.history[self.hist_index] == gi: return self.history = self.history[0:self.hist_index+1] self.history.append(gi) self.hist_index = len(self.history) - 1 def _get_active_tmpfile(self): return self.idx_filename if self.gi.itemtype == "1" else self.tmp_filename # Cmd implementation follows def default(self, line): if line.strip() == "EOF": return self.onecmd("quit") elif line.strip() == "..": return self.do_up() elif line.startswith("/"): return self.do_search(line[1:]) # Expand abbreviated commands first_word = line.split()[0].strip() if first_word in _ABBREVS: full_cmd = _ABBREVS[first_word] expanded = line.replace(first_word, full_cmd, 1) return self.onecmd(expanded) # Try to parse numerical index for lookup table try: n = int(line.strip()) except ValueError: print("What?") return try: gi = self.lookup[n-1] except IndexError: print ("Index too high!") return self.index_index = n self._go_to_gi(gi) ### Settings def do_set(self, line): """View or set various options.""" if not line.strip(): # Show all current settings for option in sorted(self.options.keys()): print("%s %s" % (option, self.options[option])) elif len(line.split()) == 1: option = line.strip() if option in self.options: print("%s %s" % (option, self.options[option])) else: print("Unrecognised option %s" % option) else: option, value = line.split(" ", 1) if option not in self.options: print("Unrecognised option %s" % option) return elif option == "encoding": try: codecs.lookup(value) except LookupError: print("Unknown encoding %s" % value) return elif value.isnumeric(): value = int(value) elif value.lower() == "false": value = False elif value.lower() == "true": value = True self.options[option] = value if "auto_page" in option: print("""******************** WARNING!!!