def process_report(self, filename, sanitised_streams, raw_streams): target = get_luigi_target(filename) sanitised_yaml_filename = os.path.basename(filename) if not sanitised_yaml_filename.endswith(".gz"): sanitised_yaml_filename = sanitised_yaml_filename + ".gz" sanitised_yaml = get_luigi_target(os.path.join( self.dst_public, "reports-sanitised", "yaml", self.date.strftime("%Y-%m-%d"), sanitised_yaml_filename )).open('w') logger.info("Sanitising %s" % filename) with target.open('r') as in_file: report = Report(in_file, self.bridge_db, target.path) for sanitised_entry, raw_entry in report.entries(): try: logger.debug("writing sanitised entry to stream") sanitised_streams.write(json_dumps(sanitised_entry)) sanitised_streams.write("\n") logger.debug("writing raw entry to stream") raw_streams.write(json_dumps(raw_entry)) raw_streams.write("\n") logger.debug("writing sanitised yaml file") yaml_dump(sanitised_entry, sanitised_yaml) except Exception: logger.error("error in dumping %s" % filename) logger.error(traceback.format_exc()) sanitised_yaml.close()
def process_report(self, in_file): report = Report(in_file) for sanitised_entry, raw_entry in report.entries(): report_id = sanitised_entry["report_id"] record_type = sanitised_entry["record_type"] s_report_data = json_dumps(sanitised_entry) self.emit([report_id, record_type, s_report_data]) in_file.close() os.remove(in_file.name)