def save_report(report): options = utils.options() # create some inferred fields, set defaults preprocess_report(report) # validate report will return True, or a string message validation = validate_report(report) if validation != True: raise Exception("[%s][%s][%s] Invalid report: %s\n\n%s" % ( report.get('type'), report.get('published_on'), report.get('report_id'), validation, str(report))) logging.warn("[%s][%s][%s]" % (report['type'], report['published_on'], report['report_id'])) if options.get('dry_run'): logging.warn('\tskipping download and extraction, dry_run == True') elif report.get('unreleased', False) is True: logging.warn('\tno download/extraction of unreleased report') else: report_path = download_report(report) if not report_path: logging.warn("\terror downloading report: sadly, skipping.") return False logging.warn("\treport: %s" % report_path) text_path = extract_report(report) logging.warn("\ttext: %s" % text_path) data_path = write_report(report) logging.warn("\tdata: %s" % data_path) return True
def save_report(report): caller_filename = inspect.stack()[1][1] caller_scraper = os.path.splitext(os.path.basename(caller_filename))[0] options = utils.options() # create some inferred fields, set defaults preprocess_report(report) # validate report will return True, or a string message validation = validate_report(report) if validation != True: raise Exception("[%s][%s][%s] Invalid report: %s\n\n%s" % (report.get('type'), report.get('published_on'), report.get('report_id'), validation, str(report))) check_uniqueness(report['inspector'], report['report_id'], report['year'], caller_scraper) logging.warn("[%s][%s][%s]" % (report['type'], report['published_on'], report['report_id'])) if options.get('dry_run'): logging.warn('\tdry run: skipping download and extraction') if (not options.get('quick')) and report.get('url'): utils.check_report_url(report['url']) elif report.get('unreleased', False) is True: logging.warn('\tno download/extraction of unreleased report') else: report_path = download_report(report, caller_scraper=caller_scraper) if not report_path: logging.warn("\terror downloading report: sadly, skipping.") return False logging.warn("\treport: %s" % report_path) metadata = extract_metadata(report) if metadata: for key, value in metadata.items(): logging.debug("\t%s: %s" % (key, value)) text_path = extract_report(report) logging.warn("\ttext: %s" % text_path) data_path = write_report(report) logging.warn("\tdata: %s" % data_path) admin.log_report(caller_scraper) return True
def save_report(report): caller_filename = inspect.stack()[1][1] caller_scraper = os.path.splitext(os.path.basename(caller_filename))[0] options = utils.options() # create some inferred fields, set defaults preprocess_report(report) # validate report will return True, or a string message validation = validate_report(report) if validation != True: raise Exception("[%s][%s][%s] Invalid report: %s\n\n%s" % ( report.get('type'), report.get('published_on'), report.get('report_id'), validation, str(report))) check_uniqueness(report['inspector'], report['report_id'], report['year'], caller_scraper) logging.warn("[%s][%s][%s]" % (report['type'], report['published_on'], report['report_id'])) if options.get('dry_run'): logging.warn('\tdry run: skipping download and extraction') if (not options.get('quick')) and report.get('url'): utils.check_report_url(report['url']) elif report.get('unreleased', False) is True: logging.warn('\tno download/extraction of unreleased report') else: report_path = download_report(report, caller_scraper=caller_scraper) if not report_path: logging.warn("\terror downloading report: sadly, skipping.") return False logging.warn("\treport: %s" % report_path) metadata = extract_metadata(report) if metadata: for key, value in metadata.items(): logging.debug("\t%s: %s" % (key, value)) text_path = extract_report(report) logging.warn("\ttext: %s" % text_path) data_path = write_report(report) logging.warn("\tdata: %s" % data_path) admin.log_report(caller_scraper) return True
def save_report(report): options = utils.options() # create some inferred fields, set defaults preprocess_report(report) # validate report will return True, or a string message validation = validate_report(report) if validation != True: raise Exception( "[%s][%s][%s] Invalid report: %s\n\n%s" % (report.get("type"), report.get("published_on"), report.get("report_id"), validation, str(report)) ) logging.warn("[%s][%s][%s]" % (report["type"], report["published_on"], report["report_id"])) if options.get("dry_run"): logging.warn("\tdry run: skipping download and extraction") elif report.get("unreleased", False) is True: logging.warn("\tno download/extraction of unreleased report") else: report_path = download_report(report) if not report_path: logging.warn("\terror downloading report: sadly, skipping.") return False logging.warn("\treport: %s" % report_path) metadata = extract_metadata(report) if metadata: for key, value in metadata.items(): logging.debug("\t%s: %s" % (key, value)) text_path = extract_report(report) logging.warn("\ttext: %s" % text_path) data_path = write_report(report) logging.warn("\tdata: %s" % data_path) return True