from whoosh.fields import ID from whoosh.index import open_dir from whoosh.query import Every from gus.lib.gemini import GeminiResource from gus.lib.whoosh_extensions import UrlAnalyzer def main(): ix = open_dir("index") with ix.searcher() as searcher: query = Every() results = searcher.search(query, limit=None) count = 0 for result in results: if "charset" not in result: count += 1 with ix.writer() as writer: writer.delete_document(result.docnum) writer.add_document( url = result["url"], fetchable_url= result["fetchable_url"], domain = GeminiResource(result["url"]).normalized_host, content_type = result["content_type"], charset = "none", content = result["content"] if "content" in result else None, regex = result["regex"] if "regex" in result else None, prompt = result["prompt"] if "prompt" in result else None, indexed_at = result["indexed_at"], ) print("{} documents updated.".format(count)) if __name__ == "__main__": main()