def main(argv=None): """ Guido van Rossum's pattern for a Python main function """ if argv is None: argv = sys.argv try: try: opts, _ = getopt.getopt( argv[1:], "hirbl:u:d:n:", [ "help", "init", "reparse", "debug", "limit=", "urls=", "uuid=", "numprocs=" ], ) except getopt.error as msg: raise Usage(msg) init = False # !!! DEBUG default limit on number of articles to parse, unless otherwise specified limit = 10 reparse = False urls = None uuid = None numprocs = None debug = False def parse_int(i): try: return int(a) except ValueError: return None # Process options for o, a in opts: if o in ("-h", "--help"): print(__doc__) sys.exit(0) elif o in ("-i", "--init"): # Initialize database (without overwriting existing data) init = True elif o in ("-b", "--debug"): # Run in debug mode debug = True elif o in ("-r", "--reparse"): # Reparse already parsed articles, oldest first reparse = True elif o in ("-l", "--limit"): # Maximum number of articles to parse limit = parse_int(a) elif o in ("-u", "--urls"): # Text file with list of URLs urls = a elif o in ("-d", "--uuid"): # UUID of a single article to reparse uuid = a elif o in ("-n", "--numprocs"): # Max number of processes to fork when parsing # (default: use all CPU cores) numprocs = parse_int(a) # Set logging format logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO) # Read the configuration settings file try: Settings.read("config/Greynir.conf") # Don't run the scraper in debug mode unless --debug is specified Settings.DEBUG = debug except ConfigError as e: print("Configuration error: {0}".format(e), file=sys.stderr) return 2 if init: # Initialize the scraper database init_roots() else: # Run the scraper scrape_articles(reparse=reparse, limit=limit, urls=urls, uuid=uuid, numprocs=numprocs) except Usage as err: print(err.msg, file=sys.stderr) print("For help use --help", file=sys.stderr) return 2 finally: SessionContext.cleanup() Article.cleanup() # Completed with no error return 0
def main(argv=None): """ Guido van Rossum's pattern for a Python main function """ if argv is None: argv = sys.argv try: try: opts, args = getopt.getopt( argv[1:], "hirl:u:d:", ["help", "init", "reparse", "limit=", "urls=", "uuid="], ) except getopt.error as msg: raise Usage(msg) init = False # !!! DEBUG default limit on number of articles to parse, unless otherwise specified limit = 10 reparse = False urls = None uuid = None # Process options for o, a in opts: if o in ("-h", "--help"): print(__doc__) sys.exit(0) elif o in ("-i", "--init"): init = True elif o in ("-r", "--reparse"): reparse = True elif o in ("-l", "--limit"): # Maximum number of articles to parse try: limit = int(a) except ValueError: pass elif o in ("-u", "--urls"): urls = a # Text file with list of URLs elif o in ("-d", "--uuid"): uuid = a # UUID of article to reparse # Process arguments for _ in args: pass # Set logging format logging.basicConfig(format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO) # Read the configuration settings file try: Settings.read("config/Greynir.conf") # Don't run the scraper in debug mode Settings.DEBUG = False except ConfigError as e: print("Configuration error: {0}".format(e), file=sys.stderr) return 2 if init: # Initialize the scraper database init_roots() else: # Run the scraper scrape_articles(reparse=reparse, limit=limit, urls=urls, uuid=uuid) except Usage as err: print(err.msg, file=sys.stderr) print("For help use --help", file=sys.stderr) return 2 finally: SessionContext.cleanup() Article.cleanup() # Completed with no error return 0
def main(argv=None): """ Guido van Rossum's pattern for a Python main function """ if argv is None: argv = sys.argv try: try: opts, args = getopt.getopt( argv[1:], "hirl:u:", ["help", "init", "reparse", "limit=", "urls="] ) except getopt.error as msg: raise Usage(msg) init = False # !!! DEBUG default limit on number of articles to parse, unless otherwise specified limit = 10 reparse = False urls = None # Process options for o, a in opts: if o in ("-h", "--help"): print(__doc__) sys.exit(0) elif o in ("-i", "--init"): init = True elif o in ("-r", "--reparse"): reparse = True elif o in ("-l", "--limit"): # Maximum number of articles to parse try: limit = int(a) except ValueError: pass elif o in ("-u", "--urls"): urls = a # Text file with list of URLs # Process arguments for _ in args: pass # Set logging format logging.basicConfig( format="%(asctime)s %(levelname)s:%(message)s", level=logging.INFO ) # Read the configuration settings file try: Settings.read("config/Reynir.conf") # Don't run the scraper in debug mode Settings.DEBUG = False except ConfigError as e: print("Configuration error: {0}".format(e), file=sys.stderr) return 2 if init: # Initialize the scraper database init_roots() else: # Run the scraper scrape_articles(reparse=reparse, limit=limit, urls=urls) except Usage as err: print(err.msg, file=sys.stderr) print("For help use --help", file=sys.stderr) return 2 finally: SessionContext.cleanup() Article.cleanup() # Completed with no error return 0