💾 Archived View for gmn.clttr.info › sources › geminispace.git › tree › scripts › remove_domain.py.t… captured on 2022-06-11 at 23:40:47.

View Raw

More Information

⬅️ Previous capture (2021-12-03)

-=-=-=-=-=-=-

import sys

from whoosh.qparser import QueryParser
from whoosh.index import open_dir

def main():
    if len(sys.argv) < 2:
        print("Please specify a domain...")
        return

    ix = open_dir("index")
    with ix.searcher() as searcher:
        query_parser = QueryParser("domain", ix.schema)
        query = query_parser.parse(sys.argv[1])
        results = searcher.search(query, limit=None)

        if len(results) == 0:
            print("No documents found for domain.")
            return

        # confirm removal before proceeding
        print("Documents facing removal")
        print("------------------------")
        for result in results:
            print(result["url"])
        answer = input("\nPlease confirm removal [y/n]:")
        if answer.lower()[0] != "y":
            print("Aborting removal.")
            return
        docnums = [result.docnum for result in results]

    with ix.writer() as writer:
        for docnum in docnums:
            writer.delete_document(docnum)

    print("{} documents removed from index.".format(len(results)))


if __name__ == "__main__":
    main()