def report_summary(vip_id, election_id, file_details, election_details, element_counts=None): dt.create_directory(REPORT_DIRECTORY) fname = "report_summary_" + file_details["file_timestamp"] + ".txt" if not vip_id: directory = REPORT_DIRECTORY + "unknown" dt.create_directory(directory) with open(fname, "w") as w: summary_header(file_details, w) file_summary(file_details, w) w.write("Missing source information, could not process feed") else: directory = REPORT_DIRECTORY + str(vip_id) + "/" dt.create_directory(directory) dt.create_directory(directory + "archives/") dt.clear_or_create(directory + "current/") with open(directory + "current/" + fname, "w") as w: summary_header(file_details, w) source_summary(vip_id, file_details, w) if not election_id: file_summary(file_details, w) w.write("Missing election information, could not process feed") else: election_summary(election_details, w) file_summary(file_details, w) if element_counts: w.write("----------------------\nElement Counts\n----------------------\n\n") for k, v in element_counts.iteritems(): w.write(k + ":" + v + "\n") copyfile(directory + "current/" + fname, directory + "archives/" + fname)
def report_setup(vip_id=None): dt.create_directory(REPORT_DIRECTORY) if not vip_id: dt.create_directory(REPORT_DIRECTORY + "unknown") else: dt.create_directory(REPORT_DIRECTORY + vip_id) dt.create_directory(REPORT_DIRECTORY + vip_id + "/archives") dt.clear_or_create(REPORT_DIRECTORY + vip_id + "/current")
def report_setup(vip_id=None): dt.create_directory(REPORT_DIRECTORY) if not vip_id: dt.create_directory(REPORT_DIRECTORY+"unknown") else: dt.create_directory(REPORT_DIRECTORY+vip_id) dt.create_directory(REPORT_DIRECTORY+vip_id + "/archives") dt.clear_or_create(REPORT_DIRECTORY+vip_id + "/current")
def report_summary(vip_id, election_id, file_details, election_details, element_counts=None): dt.create_directory(REPORT_DIRECTORY) fname = "report_summary_" + file_details["file_timestamp"] + ".txt" if not vip_id: directory = REPORT_DIRECTORY + "unknown" dt.create_directory(directory) with open(fname, "w") as w: summary_header(file_details, w) file_summary(file_details, w) w.write("Missing source information, could not process feed") else: directory = REPORT_DIRECTORY + str(vip_id) + "/" dt.create_directory(directory) dt.create_directory(directory + "archives/") dt.clear_or_create(directory + "current/") with open(directory + "current/" + fname, "w") as w: summary_header(file_details, w) source_summary(vip_id, file_details, w) if not election_id: file_summary(file_details, w) w.write("Missing election information, could not process feed") else: election_summary(election_details, w) file_summary(file_details, w) if element_counts: w.write( "----------------------\nElement Counts\n----------------------\n\n" ) for k, v in element_counts.iteritems(): w.write(k + ":" + v + "\n") copyfile(directory + "current/" + fname, directory + "archives/" + fname)
def main(): print "setting up directories..." dt.clear_or_create(DIRECTORIES["temp"]) dt.create_directory(DIRECTORIES["archives"]) print "done setting up directories" ftype = ft.get_type(unpack_file) print "unpacking and flattening files..." unpack.unpack(unpack_file, DIRECTORIES["temp"]) unpack.flatten_folder(DIRECTORIES["temp"]) # I could have flatten_folder return a list of files in the directory, so that # we wouldn't have to search through the directory everytime for specific files # since os.walk is slow with directories with large files print "done unpacking and flattening" sp = SchemaProps(SCHEMA_URL) file_details = {"file":unpack_file, "process_time":process_time, "file_timestamp":file_timestamp} election_details = {} vip_id = None election_id = None print "converting to db style flat files...." if dt.file_by_name(CONFIG_FILE, DIRECTORIES["temp"]): file_details.update(process_config(DIRECTORIES["temp"], DIRECTORIES["temp"] + CONFIG_FILE, sp)) if dt.files_by_extension(".txt", DIRECTORIES["temp"]) > 0: file_details.update(process_flatfiles(DIRECTORIES["temp"], sp)) print "processing xml files..." xml_files = dt.files_by_extension(".xml", DIRECTORIES["temp"]) if len(xml_files) >= 1: ftff.feed_to_db_files(DIRECTORIES["temp"], xml_files[0], sp.full_header_data("db"), sp.version) os.remove(xml_files[0]) if "valid_files" in file_details: file_details["valid_files"].append(xml_files[0]) else: file_details["valid_files"] = [xml_files[0]] print "done processing xml files" print "getting feed details..." db = EasySQL("localhost","vip","username","password") try: with open(DIRECTORIES["temp"] + "source.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() vip_id = row["vip_id"] election_details["vip_id"] = vip_id with open(DIRECTORIES["temp"] + "election.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() election_details["election_date"] = row["date"] election_details["election_type"] = row["election_type"] except: er.report_summary(vip_id, election_id, file_details, election_details) return election_id = get_election_id(election_details, db) election_details["election_id"] = election_id print "done getting feed details" print "converting to full db files...." element_counts, error_data, warning_data = convert_to_db_files(vip_id, election_id, file_details["file_timestamp"], DIRECTORIES["temp"], sp) print "done converting to full db files" er.report_summary(vip_id, election_id, file_details, election_details, element_counts) if len(error_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], error_data, "error") if len(warning_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], warning_data, "warning") update_data(vip_id, election_id, file_details["file_timestamp"], db, element_counts, DIRECTORIES["temp"], DIRECTORIES["archives"]) db_validations(vip_id, election_id, db, sp) generate_feed(file_details)
def main(): print "setting up directories..." dt.clear_or_create(DIRECTORIES["temp"]) dt.create_directory(DIRECTORIES["archives"]) print "done setting up directories" ftype = ft.get_type(unpack_file) print "unpacking and flattening files..." unpack.unpack(unpack_file, DIRECTORIES["temp"]) unpack.flatten_folder(DIRECTORIES["temp"]) # I could have flatten_folder return a list of files in the directory, so that # we wouldn't have to search through the directory everytime for specific files # since os.walk is slow with directories with large files print "done unpacking and flattening" sp = SchemaProps(SCHEMA_URL) file_details = { "file": unpack_file, "process_time": process_time, "file_timestamp": file_timestamp } election_details = {} vip_id = None election_id = None print "converting to db style flat files...." if dt.file_by_name(CONFIG_FILE, DIRECTORIES["temp"]): file_details.update( process_config(DIRECTORIES["temp"], DIRECTORIES["temp"] + CONFIG_FILE, sp)) if dt.files_by_extension(".txt", DIRECTORIES["temp"]) > 0: file_details.update(process_flatfiles(DIRECTORIES["temp"], sp)) print "processing xml files..." xml_files = dt.files_by_extension(".xml", DIRECTORIES["temp"]) if len(xml_files) >= 1: ftff.feed_to_db_files(DIRECTORIES["temp"], xml_files[0], sp.full_header_data("db"), sp.version) os.remove(xml_files[0]) if "valid_files" in file_details: file_details["valid_files"].append(xml_files[0]) else: file_details["valid_files"] = [xml_files[0]] print "done processing xml files" print "getting feed details..." db = EasySQL("localhost", "vip", "username", "password") try: with open(DIRECTORIES["temp"] + "source.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() vip_id = row["vip_id"] election_details["vip_id"] = vip_id with open(DIRECTORIES["temp"] + "election.txt", "r") as f: reader = csv.DictReader(f) row = reader.next() election_details["election_date"] = row["date"] election_details["election_type"] = row["election_type"] except: er.report_summary(vip_id, election_id, file_details, election_details) return election_id = get_election_id(election_details, db) election_details["election_id"] = election_id print "done getting feed details" print "converting to full db files...." element_counts, error_data, warning_data = convert_to_db_files( vip_id, election_id, file_details["file_timestamp"], DIRECTORIES["temp"], sp) print "done converting to full db files" er.report_summary(vip_id, election_id, file_details, election_details, element_counts) if len(error_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], error_data, "error") if len(warning_data) > 0: er.feed_issues(vip_id, file_details["file_timestamp"], warning_data, "warning") update_data(vip_id, election_id, file_details["file_timestamp"], db, element_counts, DIRECTORIES["temp"], DIRECTORIES["archives"]) db_validations(vip_id, election_id, db, sp) generate_feed(file_details)