def main(): docopt_args = docopt(__doc__) BASEURL = docopt_args.pop("BASEURL") # strip the -- and convert - to _ args = {} for option in docopt_args: args[option[2:].replace('-', '_')] = docopt_args[option] # configure stdout logging docopt_args["stdout"] = True if "quiet" in args: quiet = args.pop("quiet") args["stdout"] = not quiet scraper = autoscrape.ManualControlScraper(BASEURL, **args) logger.debug("AutoScrape starting with arguments: %s" % (docopt_args)) scraper.run()
crawl. This directory will be created if it does not currently exist. This directory will have several sub-directories that contain the different types of pages found (i.e., search_pages, data_pages, screenshots). [default: autoscrape-data] """ from docopt import docopt import autoscrape if __name__ == "__main__": docopt_args = docopt(__doc__) BASEURL = docopt_args.pop("BASEURL") # strip the -- and convert - to _ args = {} for option in docopt_args: args[option[2:].replace('-', '_')] = docopt_args[option] autoscrape.ManualControlScraper(BASEURL, **args).run() # elif args.scraper == "autoscrape-ml": # kwargs["html_embeddings"] = args.html_embeddings or None # kwargs["word_embeddings"] = args.word_embeddings or None # autoscrape.MLAutoScraper(args.baseurl, **kwargs).run() # else: # print("No scraper found for %s" % args.scraper)