示例#1
0
 def process_report(self, filename, sanitised_streams, raw_streams):
     target = get_luigi_target(filename)
     sanitised_yaml_filename = os.path.basename(filename)
     if not sanitised_yaml_filename.endswith(".gz"):
         sanitised_yaml_filename = sanitised_yaml_filename + ".gz"
     sanitised_yaml = get_luigi_target(os.path.join(
         self.dst_public,
         "reports-sanitised",
         "yaml",
         self.date.strftime("%Y-%m-%d"),
         sanitised_yaml_filename
     )).open('w')
     logger.info("Sanitising %s" % filename)
     with target.open('r') as in_file:
         report = Report(in_file, self.bridge_db, target.path)
         for sanitised_entry, raw_entry in report.entries():
             try:
                 logger.debug("writing sanitised entry to stream")
                 sanitised_streams.write(json_dumps(sanitised_entry))
                 sanitised_streams.write("\n")
                 logger.debug("writing raw entry to stream")
                 raw_streams.write(json_dumps(raw_entry))
                 raw_streams.write("\n")
                 logger.debug("writing sanitised yaml file")
                 yaml_dump(sanitised_entry, sanitised_yaml)
             except Exception:
                 logger.error("error in dumping %s" % filename)
                 logger.error(traceback.format_exc())
     sanitised_yaml.close()
示例#2
0
 def process_report(self, in_file):
     report = Report(in_file)
     for sanitised_entry, raw_entry in report.entries():
         report_id = sanitised_entry["report_id"]
         record_type = sanitised_entry["record_type"]
         s_report_data = json_dumps(sanitised_entry)
         self.emit([report_id, record_type, s_report_data])
     in_file.close()
     os.remove(in_file.name)