💾 Archived View for godocs.io › github.com › temoto › robotstxt captured on 2024-06-16 at 17:10:32. Gemini links have been rewritten to link to archived content
⬅️ Previous capture (2024-05-26)
-=-=-=-=-=-=-
import "github.com/temoto/robotstxt"
Package robotstxt implements the robots.txt Exclusion Protocol as specified in http://www.robotstxt.org/wc/robots.html with various extensions.
var WhitespaceChars = []rune{' ', '\t', '\v'}
type Group struct { Agent string CrawlDelay time.Duration // contains filtered or unexported fields }
func (g *Group) Test(path string) bool
type ParseError struct { Errs []error }
func (e ParseError) Error() string
type RobotsData struct { Host string Sitemaps []string // contains filtered or unexported fields }
func FromBytes(body []byte) (r *RobotsData, err error)
func FromResponse(res *http.Response) (*RobotsData, error)
func FromStatusAndBytes(statusCode int, body []byte) (*RobotsData, error)
func FromStatusAndString(statusCode int, body string) (*RobotsData, error)
func FromString(body string) (r *RobotsData, err error)
func (r *RobotsData) FindGroup(agent string) (ret *Group)
FindGroup searches block of declarations for specified user-agent. From Google's spec: Only one group of group-member records is valid for a particular crawler. The crawler must determine the correct group of records by finding the group with the most specific user-agent that still matches. All other groups of records are ignored by the crawler. The user-agent is non-case-sensitive. The order of the groups within the robots.txt file is irrelevant.
func (r *RobotsData) TestAgent(path, agent string) bool