Пример #1
0
def main():
    docopt_args = docopt(__doc__)

    BASEURL = docopt_args.pop("BASEURL")

    # strip the -- and convert - to _
    args = {}
    for option in docopt_args:
        args[option[2:].replace('-', '_')] = docopt_args[option]

    # configure stdout logging
    docopt_args["stdout"] = True
    if "quiet" in args:
        quiet = args.pop("quiet")
        args["stdout"] = not quiet

    scraper = autoscrape.ManualControlScraper(BASEURL, **args)

    logger.debug("AutoScrape starting with arguments: %s" % (docopt_args))
    scraper.run()
Пример #2
0
        crawl. This directory will be created if it does not
        currently exist.  This directory will have several
        sub-directories that contain the different types of pages
        found (i.e., search_pages, data_pages, screenshots).
        [default: autoscrape-data]
"""

from docopt import docopt

import autoscrape

if __name__ == "__main__":
    docopt_args = docopt(__doc__)

    BASEURL = docopt_args.pop("BASEURL")

    # strip the -- and convert - to _
    args = {}
    for option in docopt_args:
        args[option[2:].replace('-', '_')] = docopt_args[option]

    autoscrape.ManualControlScraper(BASEURL, **args).run()

    # elif args.scraper == "autoscrape-ml":
    #     kwargs["html_embeddings"] = args.html_embeddings or None
    #     kwargs["word_embeddings"] = args.word_embeddings or None
    #     autoscrape.MLAutoScraper(args.baseurl, **kwargs).run()

    # else:
    #     print("No scraper found for %s" % args.scraper)