Пример #1
0
    parser.add_argument("--save_linkgraph_domains",
                        default=False,
                        type=str,
                        help="Save a linkgraph domain file to this path")

    parser.add_argument("--profile",
                        action='store_true',
                        help="Profile Python usage")

    return parser.parse_args()


# Shared variables while indexing
args = get_args()
indexer = Indexer()
urlclient = indexer.urlclient


def list_warc_filenames():
    """ Return a list of all indexable WARC files """

    if args.warc_files:
        if args.warc_files.endswith(".txt"):
            with open(args.warc_files, "rb") as f:
                warc_files = [x.strip() for x in f.readlines()]
        else:
            warc_files = [x.strip() for x in args.warc_files.split(",")]

    else:
        warc_files = list_commoncrawl_warc_filenames(limit=args.warc_limit,
Пример #2
0
 def make_client(self):
     return Indexer()