[ANN] A Gemini crawler, for statistics about the geminispace



> On Dec 16, 2020, at 16:05, Stephane Bortzmeyer <stephane at sources.org> wrote:
> 
> I'm running a Gemini crawler, which gathers metadata about the
> geminispace.

Along those lines, a couple of one-liners to gather various host & content information:


# IP address(es)
# dig +short mozz.us
174.138.124.169


# geolocation
# curl --silent https://tools.keycdn.com/geo.json?host=174.138.124.169 | jq | gron
json = {};
json.data = {};
json.data.geo = {};
json.data.geo.asn = 14061;
json.data.geo.city = "North Bergen";
json.data.geo.continent_code = "NA";
json.data.geo.continent_name = "North America";
json.data.geo.country_code = "US";
json.data.geo.country_name = "United States";
json.data.geo.datetime = "2020-12-18 09:04:57";
json.data.geo.host = "174.138.124.169";
json.data.geo.ip = "174.138.124.169";
json.data.geo.isp = "DIGITALOCEAN-ASN";
json.data.geo.latitude = 40.793;
json.data.geo.longitude = -74.0247;
json.data.geo.metro_code = 501;
json.data.geo.postal_code = "07047";
json.data.geo.rdns = "174.138.124.169";
json.data.geo.region_code = "NJ";
json.data.geo.region_name = "New Jersey";
json.data.geo.timezone = "America/New_York";
json.description = "Data successfully received.";
json.status = "success";


# certificate info
# cfssl certinfo -domain mozz.us | jq | gron
json = {};
json.authority_key_id = "A8:4A:6A:63:04:7D:DD:BA:E6:D1:39:B7:A6:45:65:EF:F3:A8:EC:A1";
json.issuer = {};
json.issuer.common_name = "Let's Encrypt Authority X3";
json.issuer.country = "US";
json.issuer.names = [];
json.issuer.names[0] = "US";
json.issuer.names[1] = "Let's Encrypt";
json.issuer.names[2] = "Let's Encrypt Authority X3";
json.issuer.organization = "Let's Encrypt";
json.not_after = "2021-01-21T01:36:54Z";
json.not_before = "2020-10-23T01:36:54Z";
json.pem = "-----BEGIN 
CERTIFICATE-----\nMIIGJzCCBQ+gAwIBAgISBAK7/ku/XjgmczVT7mmM1cEcMA0GCSqGSIb3D
QEBCwUA\nMEoxCzAJBgNVBAYTAlVTMRYwFAYDVQQKEw1MZXQncyBFbmNyeXB0MSMwIQYDVQQD\n
ExpMZXQncyBFbmNyeXB0IEF1dGhvcml0eSBYMzAeFw0yMDEwMjMwMTM2NTRaFw0y\nMTAxMjEwM
TM2NTRaMBIxEDAOBgNVBAMTB21venoudXMwggEiMA0GCSqGSIb3DQEB\nAQUAA4IBDwAwggEKAo
IBAQDZ4pi5q0QlIxAo8sKNBgInG1BGH584lRghCdnrBsZD\n68IuFlJ3V3wrnfsaNv8IZOHRkvx
N2uxDo/oVxCCSNug/Ne4b+Pqw7U8thB9zL46A\nMbrHVtAmloykToDRlOHv/OLp2YRQiW7cD57l
xot+9+TPlHsAuMccQXQDMbmhT6bf\nirO4m6F6gRf478YLLVOmpxkLd87dhHa7gO3NwmRroIB/D
MLdQRAVAMbdDGTjdCrA\nlToWeHOnPNBLKPmI6M9DCqEXoTbIa9OhpJmo+txlS85O8/RHzXu2fV
kgnEnBIcsE\n/ZEh5ytov1SogIXzNQgIJFesaWCqgBPLun4molEnfcq5AgMBAAGjggM9MIIDOTA
O\nBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMAwG\nA1UdEw
EB/wQCMAAwHQYDVR0OBBYEFI3x/VWfHHCG1IfE32kGHZPG4RC6MB8GA1Ud\nIwQYMBaAFKhKamM
Efd265tE5t6ZFZe/zqOyhMG8GCCsGAQUFBwEBBGMwYTAuBggr\nBgEFBQcwAYYiaHR0cDovL29j
c3AuaW50LXgzLmxldHNlbmNyeXB0Lm9yZzAvBggr\nBgEFBQcwAoYjaHR0cDovL2NlcnQuaW50L
XgzLmxldHNlbmNyeXB0Lm9yZy8wgfIG\nA1UdEQSB6jCB54ILYXBpLm1venoudXOCE2FzdHJvYm
90YW55Lm1venoudXOCDGNo\nYXQubW96ei51c4ILZGV2Lm1venoudXOCDmdlbWluaS5tb3p6LnV
zggtnaXQubW96\nei51c4IRZ29vZHZpYmVzLm1venoudXOCDmdvcGhlci5tb3p6LnVzghRtYWls
LWFy\nY2hpdmUubW96ei51c4IMbWFpbC5tb3p6LnVzgg9taWNoYWVsLm1venoudXOCB21v\neno
udXOCDnBvcnRhbC5tb3p6LnVzgg1wcm94eS5tb3p6LnVzggt3d3cubW96ei51\nczBMBgNVHSAE
RTBDMAgGBmeBDAECATA3BgsrBgEEAYLfEwEBATAoMCYGCCsGAQUF\nBwIBFhpodHRwOi8vY3BzL
mxldHNlbmNyeXB0Lm9yZzCCAQQGCisGAQQB1nkCBAIE\ngfUEgfIA8AB3AJQgvB6O1Y1siHMfgo
siLA3R2k1ebE+UPWHbTi9YTaLCAAABdVNQ\n7ygAAAQDAEgwRgIhALmUv4K/i3UcPYCIseckN2n
fpk8g+Gi4MZRq6Ybr8/JXAiEA\n00kRkd+19OB2j4VASwsoQatWKasN+yTMnkQWOf2YMbsAdQB9
PvL4j/+IVWgkwsDK\nnlKJeSvFDngJfy5ql2iZfiLw1wAAAXVTUO9TAAAEAwBGMEQCICOymh52O
gxx/wjJ\ngo5TEIgfEDtgXvKdfBsVtibLeZQWAiAyiUPq2MBPxn9+KJFhhxE8LRI9VIhpWnHV\n
5JlOp2dIYzANBgkqhkiG9w0BAQsFAAOCAQEARqt9QyY4Fq7SBindKcHyrsQ9JtqB\nvfZy5yDKz
FwuQZKmk2pxOzapCNRLNeyiEalfIFzrtHI11gr1ZEFHL1rA7pO3ud/j\nM2r0lmvNf8W+kUVf4G
ng0TqGyRRh28RDNDCaz8uaYeg5C6BPUIZtHbO6qJBNme2W\noS4Qp0fjjAUvSQwTKDEh5GKnZv4
AnJifMRqSXgZ+HgsamqydODRRTszwCMTMGBhO\naUOf+wF9l90T9N3MLDxSdixh4/qMuE0LpIsy
eLJJ08ZsmOvOPtar0zxUw8AXMtGG\n62wmZhlY+vXD4Nk6cKTepSCVEHmCLTtckbHfn518wCQEv
JZYYVApG0y1QQ==\n-----END CERTIFICATE-----\n";
json.sans = [];
json.sans[0] = "api.mozz.us";
json.sans[1] = "astrobotany.mozz.us";
json.sans[2] = "chat.mozz.us";
json.sans[3] = "dev.mozz.us";
json.sans[4] = "gemini.mozz.us";
json.sans[5] = "git.mozz.us";
json.sans[6] = "goodvibes.mozz.us";
json.sans[7] = "gopher.mozz.us";
json.sans[8] = "mail-archive.mozz.us";
json.sans[9] = "mail.mozz.us";
json.sans[10] = "michael.mozz.us";
json.sans[11] = "mozz.us";
json.sans[12] = "portal.mozz.us";
json.sans[13] = "proxy.mozz.us";
json.sans[14] = "www.mozz.us";
json.serial_number = "349379594475839169414317025618006180741404";
json.sigalg = "SHA256WithRSA";
json.subject = {};
json.subject.common_name = "mozz.us";
json.subject.names = [];
json.subject.names[0] = "mozz.us";
json.subject_key_id = "8D:F1:FD:55:9F:1C:70:86:D4:87:C4:DF:69:06:1D:93:C6:E1:10:BA";


# retrieve content type
# openssl s_client -quiet -crlf -connect mozz.us:1965 <<< 
gemini://mozz.us/ 2>/dev/null | head -1
20 text/gemini; lang=en


# double check content type
# openssl s_client -quiet -crlf -connect mozz.us:1965 <<< 
gemini://mozz.us/ 2>/dev/null | file --brief --mime-type --mime-encoding -
text/plain; charset=utf-8


# validate encoding
# openssl s_client -quiet -crlf -connect mozz.us:1965 <<< 
gemini://mozz.us/ 2>/dev/null | iconv -f utf-8 -t utf-8 > /dev/null; echo $?
0


# guess language
# echo $(openssl s_client -quiet -crlf -connect mozz.us:1965 <<< 
gemini://mozz.us/ 2>/dev/null ) | polyglot detect | cut -d' ' -f1 | uniq
English

---

Previous in thread (4 of 28): 🗣️ Luke Emmet (luke (a) marmaladefoo.com)

Next in thread (6 of 28): 🗣️ Stephane Bortzmeyer (stephane (a) sources.org)

View entire thread.