2020-12-22 Apache config file to block user agents

This is a long one… My “/etc/apache2/conf-enabled/blocklist.conf” has grown over the years. It all started with a blog post I had seen entitled “stop fake user agents with htaccess”. The URL is somewhere in the config below, if you’re interested.

RewriteEngine on

# fediverse instances asking for previews
RewriteCond "%{HTTP_USER_AGENT}" "Mastodon|Friendica|Pleroma" [nocase]
# except for Epicyon on the fedi.alexschroeder.ch domain
# RewriteCond "%{HTTP_HOST}" !"fedi.alexschroeder.ch"
# except for Webfinger stuff
# RewriteCond "%{REQUEST_URI}" !"\.well-known/webfinger"
# except for Oddmuse ActivityPub
# RewriteCond "%{QUERY_STRING}" !"action=(webfinger|actor|inbox|outbox)"
RewriteRule ^(.*)$ - [forbidden,last]

# misbehaving bots
RewriteCond "%{HTTP_USER_AGENT}" "pcore|megaindex|semrushbot|wiederfrei" [nocase]
# forbidden!
RewriteRule ^(.*)$ - [forbidden,last]

# https://www.winhelp.info/stop-fake-user-agents-with-htaccess.html

## Detect abnormal user-agents by winhelp.info
## Version 1.656, 2018-12-03
## Part 1 - basic rules
# Add your informative page to prevent redirection loops
# RewriteCond %{REQUEST_URI} !strangebrowser\.html
## Exclusions
# Covenant Eyes parental monitoring
# RewriteCond %{REMOTE_ADDR} !^69\.41\.14\.
# VirusTotal Cloud uses MSIE 9.0; Windows NT 9.0 user agent
# RewriteCond %{HTTP_USER_AGENT} !virustotalcloud\)$
# Google-SearchByImage uses mismatching version numbers
RewriteCond %{HTTP_USER_AGENT} !\ Google-SearchByImage\)
## UA blacklist
# Known bad bots ignoring or not reading robots.txt
RewriteCond %{HTTP_USER_AGENT} "centurybot" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "cognitiveseo\.com" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "DnyzBot/" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "evc-batch/" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Facebot\ Twitterbot/" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Gluten\ Free\ Crawler" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Gowikibot/" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "GrapeshotCrawler/" [OR]
RewriteCond %{HTTP_USER_AGENT} "IndeedBot\ " [OR]
RewriteCond %{HTTP_USER_AGENT} "linkdexbot" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MS\ Search\ 6\.0\ Robot" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "PaperLiBot/" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "PowerMapper\.com" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "raventools\.com" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "RukiCrawler" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "SemrushBot" [OR]
RewriteCond %{HTTP_USER_AGENT} "SeoBotM6" [OR]
RewriteCond %{HTTP_USER_AGENT} "seocharger" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "SEOkicks-Robot" [OR]
RewriteCond %{HTTP_USER_AGENT} "SMTBot/" [OR]
RewriteCond %{HTTP_USER_AGENT} "XoviBot/" [OR]
RewriteCond %{HTTP_USER_AGENT} "ZoomBot" [OR]
# Known bad WordPress login page bot; Firefox does not use full version numbers anymore
RewriteCond %{HTTP_USER_AGENT} "rv:40\.0\)\ Gecko/20100101\ Firefox/40\.1$" [NC,OR]
# Known site analysis bots that try to reveal sensitive data about your server
RewriteCond %{HTTP_USER_AGENT} "Wappalyzer" [OR]
# Non-standard beginnings
# RewriteCond %{HTTP_USER_AGENT} ^-?$ [OR]
RewriteCond %{HTTP_USER_AGENT} "^\(" [OR]
RewriteCond %{HTTP_USER_AGENT} ^\' [OR]
RewriteCond %{HTTP_USER_AGENT} "^\ " [OR]
RewriteCond %{HTTP_USER_AGENT} ^\" [OR]
# RewriteCond %{HTTP_USER_AGENT} ^- [OR]
RewriteCond %{HTTP_USER_AGENT} ^= [OR]
RewriteCond %{HTTP_USER_AGENT} ^\.$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^\\$ [OR]
RewriteCond %{HTTP_USER_AGENT} ^\d [OR]
RewriteCond %{HTTP_USER_AGENT} "^Chrome" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^Empty" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^Firefox" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^IE" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^Internet\ Explorer" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^MSIE" [NC,OR]
# Non-standard endings
RewriteCond %{HTTP_USER_AGENT} "Chrome$" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Firefox$" [NC,OR]
# Way too short user agent strings
RewriteCond %{HTTP_USER_AGENT} "^Mozilla$" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^Mozilla/\d\.\d$" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^Mozilla\ compatible$" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^Mozilla/\d\.\d\ \(compatible\)$" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^Mozilla/\d\.\d\ \(compatible;\)$" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^Opera$" [NC,OR]
# Repeating same stuff
RewriteCond %{HTTP_USER_AGENT} "compatible.*compatible" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Gecko.*Gecko" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Mozilla.*Mozilla" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE.*MSIE" [NC,OR]
# Missing space after closing parenthesis
RewriteCond %{HTTP_USER_AGENT} "\)([a-z|A-Z])" [NC,OR]
# Fake mixtures of browsers
RewriteCond %{HTTP_USER_AGENT} "Firefox.*Netscape" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Firefox.*Opera" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE.*Chrome/" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE.*Firefox" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE.*Edge/" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE.*rv:" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Opera.*Trident/" [NC,OR]
# Letters instead of version numbers
RewriteCond %{HTTP_USER_AGENT} "Chrome/([a-z]|[A-Z])\." [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Firefox/([a-z]|[A-Z])\." [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Gecko/([a-z]|[A-Z])\." [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ^Mozilla/([a-z]|[A-Z]) [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ ([a-z]|[A-Z])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Opera/([a-z]|[A-Z])\." [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "\ rv:([a-z]|[A-Z])\." [NC,OR]
# Impossible Mozilla versions
RewriteCond %{HTTP_USER_AGENT} "Mozilla/([0-3]|[6-9]|1[0-9]|2[0-9]|3[0-9]|4[0-9]|5[0-9]|6[0-9]|7[0-9]|8[0-9]|9[0-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Mozilla/\d\.([1-9])" [NC,OR]
# Impossible MSIE versions
RewriteCond %{HTTP_USER_AGENT} "MSIE\ \d\.([1-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ \d\d\.([1-9])" [NC,OR]
# Impossible MSIE versions on certain Windows versions
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 5\..*\ NT\ 6\." [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 5\..*\ NT\ 10" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 6\..*\ NT\ 6\." [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 6\..*\ NT\ 10" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 8\..*\ NT\ 6\.2" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 8\..*\ NT\ 6\.3" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 8\..*\ NT\ 10" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 9\..*\ NT\ 6\.2" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 9\..*\ NT\ 6\.3" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 9\..*\ NT\ 10" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 10\..*\ NT\ 6\.3" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 10\..*\ NT\ 10" [NC,OR]
# Impossible MSIE and Trident combinations
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 8\..*\ Trident/([0-3]|[5-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 9\..*\ Trident/([0-4]|[6-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ 10\..*\ Trident/([0-5]|[7-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ".*\ Trident/([0-6]|[8-9]);\ rv:11\.0" [NC,OR]
# Other impossible browser or engine version numbers
RewriteCond %{HTTP_USER_AGENT} "Firefox/99" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\ \d\d\d" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^Opera/9\.99" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Opera/\d\d\d" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Presto/9" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Trident/\d\d" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Trident/([0-3]|[8-9])" [NC,OR]
# Mismatching Firefox and rv versions
RewriteCond %{HTTP_USER_AGENT} ".*rv:1\..*Firefox/(0|[2-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ".*rv:2\..*Firefox/([0-1]|[2-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ".*rv:3\..*Firefox/([0-2]|[4-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} ".*rv:4\..*Firefox/([0-3]|[5-9])" [NC,OR]
# Fake user agents used while testing programs and apps
RewriteCond %{HTTP_USER_AGENT} "BUILDDATE" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^MyApp$" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Synapse\)" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Test\ Certificate\ Info" [NC,OR]
# Unblocking because of false positives like "powerpc64le-unknown-linux-gnu" and "Tiny Tiny RSS/UNKNOWN"
# RewriteCond %{HTTP_USER_AGENT} "Unknown" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "WinHTTP" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "WinHttpRequest" [NC,OR]
# Other standards-breaking fake user agents
RewriteCond %{HTTP_USER_AGENT} "\ \(Chrome\)" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "\(Win/$" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "compatible\ ;" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "CAIMEO\ Artificial\ Intelligence" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Gecko/\ " [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Gecko/20([2-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "-IE\d" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "\ IE\d" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Mozilla\ ([0-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Mozilla/\d\.\d\(" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Mozilla/\d\.\d\ \(\ " [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE/" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "MSIE\d" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "^QuickTime/" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "SuperCleaner" [OR]
RewriteCond %{HTTP_USER_AGENT} "User\ Agent" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Windows/" [NC,OR]
# Fake Windows strings and versions
RewriteCond %{HTTP_USER_AGENT} "Windows/" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Windows\ ([0-2]|[4-8])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Windows\ NT\)" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Windows\ NT\ 0" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Windows\ NT\ 1\." [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Windows\ NT\ ([2-3]|[7-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Windows\ NT\ 5\.([3-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Windows\ NT\ 6\.([4-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Windows\ NT\ 10\.([1-9])" [NC,OR]
RewriteCond %{HTTP_USER_AGENT} "Windows\ NT\ (1[1-9]|4[0-9]|5[0-9]|6[0-9])" [NC,OR]
# Suspicious user agents
RewriteCond %{HTTP_USER_AGENT} "TO-Browser/TOB" [NC]
# Redirect during the testing period
# RewriteRule .* /strangebrowser.html? [R=307,L]
# Comment out the line above and remove the comment mark below to block fake browsers after the testing period
RewriteRule ^.*$ - [F,L]

## Part 2 - additional bad browser rules with conditions
# 1. Double Trident is used by MSNBot
# Add your informative page to prevent redirection loops
# RewriteCond %{REQUEST_URI} !strangebrowser\.html
# RewriteCond %{REMOTE_ADDR} !^131\.253\.25\.
RewriteCond %{HTTP_USER_AGENT} "Trident/.*Trident" [NC]
# Redirect during the testing period
# RewriteRule .* /strangebrowser.html? [R=307,L]
# Comment out the line above and remove the comment mark below to block fake browsers after the testing period
RewriteRule ^.*$ - [F,L]

# 2. Firefox full version numbers are used by WaterFox
# Add your informative page to prevent redirection loops
# RewriteCond %{REQUEST_URI} !strangebrowser\.html
RewriteCond %{HTTP_USER_AGENT} !Waterfox/\d\d\.
RewriteCond %{HTTP_USER_AGENT} !Waterfox\)
RewriteCond %{HTTP_USER_AGENT} ".*Firefox/\d\d\.\d\."
# Redirect during the testing period
# RewriteRule .* /strangebrowser.html? [R=307,L]
# Comment out the line above and remove the comment mark below to block fake browsers after the testing period
RewriteRule ^.*$ - [F,L]

# 3. MSIE user string must also have a Trident version
# Add your informative page to prevent redirection loops
# RewriteCond %{REQUEST_URI} !strangebrowser\.html
# Exclusions
# VirusTotal Cloud uses MSIE 9.0; Windows NT 9.0 user agent
# RewriteCond %{HTTP_USER_AGENT} !virustotalcloud\)$
## The rule
RewriteCond %{HTTP_USER_AGENT} !Trident/
RewriteCond %{HTTP_USER_AGENT} MSIE\ .*Windows
# Redirect during the testing period
# RewriteRule .* /strangebrowser.html? [R=307,L]
# Comment out the line above and remove the comment mark below to block fake browsers after the testing period
RewriteRule ^.*$ - [F,L]

# 4. PaleMoon has higher Gecko version numbers than other browsers
# Add your informative page to prevent redirection loops
# RewriteCond %{REQUEST_URI} !strangebrowser\.html
RewriteCond %{HTTP_USER_AGENT} !PaleMoon/
RewriteCond %{HTTP_USER_AGENT} "Gecko/201([1-9])"
# Redirect during the testing period
# RewriteCond %{REQUEST_URI} !strangebrowser\.html
# RewriteCond %{REMOTE_ADDR} !^131\.253\.25\.
# Redirect during the testing period
# RewriteRule .* /strangebrowser.html? [R=307,L]
# Comment out the line above and remove the comment mark below to block fake browsers after the testing period
RewriteRule ^.*$ - [F,L]

​#Administration ​#Apache ​#Web