Пример #1
0
def main():

    print "setting up directories..."

    dt.clear_or_create(DIRECTORIES["temp"])
    dt.create_directory(DIRECTORIES["archives"])

    print "done setting up directories"

    ftype = ft.get_type(unpack_file)

    print "unpacking and flattening files..."

    unpack.unpack(unpack_file, DIRECTORIES["temp"])
    unpack.flatten_folder(DIRECTORIES["temp"])
    # I could have flatten_folder return a list of files in the directory, so that
    # we wouldn't have to search through the directory everytime for specific files
    # since os.walk is slow with directories with large files

    print "done unpacking and flattening"

    sp = SchemaProps(SCHEMA_URL)
    file_details = {
        "file": unpack_file,
        "process_time": process_time,
        "file_timestamp": file_timestamp
    }
    election_details = {}
    vip_id = None
    election_id = None

    print "converting to db style flat files...."

    if dt.file_by_name(CONFIG_FILE, DIRECTORIES["temp"]):
        file_details.update(
            process_config(DIRECTORIES["temp"],
                           DIRECTORIES["temp"] + CONFIG_FILE, sp))
    if dt.files_by_extension(".txt", DIRECTORIES["temp"]) > 0:
        file_details.update(process_flatfiles(DIRECTORIES["temp"], sp))
    print "processing xml files..."
    xml_files = dt.files_by_extension(".xml", DIRECTORIES["temp"])
    if len(xml_files) >= 1:
        ftff.feed_to_db_files(DIRECTORIES["temp"], xml_files[0],
                              sp.full_header_data("db"), sp.version)
        os.remove(xml_files[0])
        if "valid_files" in file_details:
            file_details["valid_files"].append(xml_files[0])
        else:
            file_details["valid_files"] = [xml_files[0]]

    print "done processing xml files"

    print "getting feed details..."
    db = EasySQL("localhost", "vip", "username", "password")
    try:
        with open(DIRECTORIES["temp"] + "source.txt", "r") as f:
            reader = csv.DictReader(f)
            row = reader.next()
            vip_id = row["vip_id"]
            election_details["vip_id"] = vip_id
        with open(DIRECTORIES["temp"] + "election.txt", "r") as f:
            reader = csv.DictReader(f)
            row = reader.next()
            election_details["election_date"] = row["date"]
            election_details["election_type"] = row["election_type"]
    except:
        er.report_summary(vip_id, election_id, file_details, election_details)
        return

    election_id = get_election_id(election_details, db)
    election_details["election_id"] = election_id
    print "done getting feed details"

    print "converting to full db files...."
    element_counts, error_data, warning_data = convert_to_db_files(
        vip_id, election_id, file_details["file_timestamp"],
        DIRECTORIES["temp"], sp)
    print "done converting to full db files"

    er.report_summary(vip_id, election_id, file_details, election_details,
                      element_counts)
    if len(error_data) > 0:
        er.feed_issues(vip_id, file_details["file_timestamp"], error_data,
                       "error")
    if len(warning_data) > 0:
        er.feed_issues(vip_id, file_details["file_timestamp"], warning_data,
                       "warning")

    update_data(vip_id, election_id, file_details["file_timestamp"], db,
                element_counts, DIRECTORIES["temp"], DIRECTORIES["archives"])

    db_validations(vip_id, election_id, db, sp)

    generate_feed(file_details)
Пример #2
0
def main():

	print "setting up directories..."
	
	dt.clear_or_create(DIRECTORIES["temp"])
	dt.create_directory(DIRECTORIES["archives"])
	
	print "done setting up directories"

	ftype = ft.get_type(unpack_file)

	print "unpacking and flattening files..."

	unpack.unpack(unpack_file, DIRECTORIES["temp"])
	unpack.flatten_folder(DIRECTORIES["temp"])
# I could have flatten_folder return a list of files in the directory, so that
# we wouldn't have to search through the directory everytime for specific files
# since os.walk is slow with directories with large files

	print "done unpacking and flattening"

	sp = SchemaProps(SCHEMA_URL)
	file_details = {"file":unpack_file, "process_time":process_time, "file_timestamp":file_timestamp}
	election_details = {}
	vip_id = None
	election_id = None

	print "converting to db style flat files...."

	if dt.file_by_name(CONFIG_FILE, DIRECTORIES["temp"]):
		file_details.update(process_config(DIRECTORIES["temp"], DIRECTORIES["temp"] + CONFIG_FILE, sp))
	if dt.files_by_extension(".txt", DIRECTORIES["temp"]) > 0:
		file_details.update(process_flatfiles(DIRECTORIES["temp"], sp))
	print "processing xml files..."
	xml_files = dt.files_by_extension(".xml", DIRECTORIES["temp"])
	if len(xml_files) >= 1:
		ftff.feed_to_db_files(DIRECTORIES["temp"], xml_files[0], sp.full_header_data("db"), sp.version)
		os.remove(xml_files[0])
		if "valid_files" in file_details:
			file_details["valid_files"].append(xml_files[0])
		else:
			file_details["valid_files"] = [xml_files[0]]

	print "done processing xml files"

	print "getting feed details..."
	db = EasySQL("localhost","vip","username","password")
	try:
		with open(DIRECTORIES["temp"] + "source.txt", "r") as f:
			reader = csv.DictReader(f)
			row = reader.next()
			vip_id = row["vip_id"]
			election_details["vip_id"] = vip_id
		with open(DIRECTORIES["temp"] + "election.txt", "r") as f:
			reader = csv.DictReader(f)
			row = reader.next()
			election_details["election_date"] = row["date"]
			election_details["election_type"] = row["election_type"]
	except:
		er.report_summary(vip_id, election_id, file_details, election_details)
		return

	election_id = get_election_id(election_details, db)
	election_details["election_id"] = election_id
	print "done getting feed details"

	print "converting to full db files...."
	element_counts, error_data, warning_data = convert_to_db_files(vip_id, election_id, file_details["file_timestamp"], DIRECTORIES["temp"], sp)
	print "done converting to full db files"
	
	er.report_summary(vip_id, election_id, file_details, election_details, element_counts)
	if len(error_data) > 0:
		er.feed_issues(vip_id, file_details["file_timestamp"], error_data, "error")
	if len(warning_data) > 0:
		er.feed_issues(vip_id, file_details["file_timestamp"], warning_data, "warning")

	update_data(vip_id, election_id, file_details["file_timestamp"], db, element_counts, DIRECTORIES["temp"], DIRECTORIES["archives"])	

	db_validations(vip_id, election_id, db, sp)

	generate_feed(file_details)
def file_hash(fname):
	m = md5()
	with open(fname, "rb") as fh:
		for data in fh.read(8192):
			m.update(data)
	return m.hexdigest()

conn = sqlite3.connect(DB_LOC)
setup_db(conn)

folders = listdir(FEED_DIR)
for f in folders:
	files = listdir(FEED_DIR + f)
	for fname in files:
		if fname.startswith("vipFeed") and fname.split(".")[0].endswith("2012-11-06"):
			fullpath = FEED_DIR + f + "/" + fname
			if has_changed(conn, fullpath):
				flatfiledir = fname.split(".")[0] + "_flatfiles/"
				dt.clear_or_create(flatfiledir)
				dt.clear_or_create(TEMP_DIR)
				unpack.unpack(fullpath, TEMP_DIR)
				unpack.flatten_folder(TEMP_DIR)
				xml_file = dt.file_by_extension(".xml", TEMP_DIR)
				ftff.feed_to_db_files(flatfiledir, xml_file)
				make_archive(fname.split(".")[0] + "_flatfiles", "zip", flatfiledir)
				move(fname.split(".")[0] + "_flatfiles.zip", FEED_DIR + f + "/" + fname.split(".")[0] + "_flatfiles.zip")
				rmtree(TEMP_DIR)
				rmtree(flatfiledir)