def test(cursor): cursor.execute("""SELECT distinct(source) FROM flows""") set_flow= set(cursor) print "distinct source in flow", len(set_flow) cursor.execute("""SELECT distinct(source) FROM exchange_rates""") set_ex = set(cursor) print "distinct source in exchange_rates", len(set_ex) cursor.execute("""SELECT slug FROM sources""") set_source = set(cursor) print "nb elem in source", len(set_source) missing_flow_source_list = set_flow - set_source writer = UnicodeWriter(open(os.path.join("../out_data", "missing_flow_source_list" + ".csv"), "wb")) writer.writerows(missing_flow_source_list) missing_ex_source_list = set_ex - set_source writer = UnicodeWriter(open(os.path.join("../out_data", "missing_ex_source_list" + ".csv"), "wb")) writer.writerows(missing_ex_source_list)
next_date = dates[i_date + 1] if current_date == next_date - 1: pass else: periods[-1] = "%s-%s" % ( periods[-1], current_date ) if periods[-1] != current_date else str(current_date) periods.append(next_date) else: # fin 2 : fin de la liste periods[-1] = "%s-%s" % ( periods[-1], current_date ) if periods[-1] != current_date else str(current_date) row[2] = ",".join(periods) if row[1] == None: row[1] = "champs vide" return table writer = UnicodeWriter( open(os.path.join("out_data", 'report_by_sources_and_period.csv'), "wb")) writer.writerow([description[0] for description in c.description]) data = dateByReportingBySource(c.fetchall()) for d in data: print d[2] writer.writerows(data)
print "-------------------------------------------------------------------------" print "cleaning done" conn.commit() print "commited" print "-------------------------------------------------------------------------" ################################################################################ ## Export all tables in csv files ################################################################################ tables = [ "sources", "entity_names", "RICentities", "exchange_rates", "currencies", "expimp_spegen", "RICentities_groups", "flows" ] for item in tables: c.execute("select * from " + item) writer = UnicodeWriter(open(os.path.join("out_data", item + ".csv"), "wb")) writer.writerow([description[0] for description in c.description]) # c.fetchall() writer.writerows(c) print "export " + item + ".csv done" print "-------------------------------------------------------------------------"
def test(cursor): # # Get distinct source in flows, exchnange_rates and sources # cursor.execute("""SELECT distinct(source) FROM flows""") set_flow= set(_ for _ in cursor) print "distinct source in flow", len(set_flow) cursor.execute("""SELECT distinct(source) FROM exchange_rates""") set_ex = set(_ for _ in cursor) print "distinct source in exchange_rates", len(set_ex) cursor.execute("""SELECT distinct(slug) FROM sources""") set_source = set(_ for _ in cursor) print "nb elem in source", len(set_source) # # output missing source in flows # missing_flow_source_list = set_flow - set_source print "flow sources missin in source table", len(missing_flow_source_list) with codecs.open(os.path.join("../out_data/logs", "missing_flow_source_list" + ".csv"), "wb","UTF8") as f: for s in missing_flow_source_list: f.write((s[0] if s[0] is not None else u"") +u"\n") # # output missing source in exchange_rates # missing_ex_source_list = set_ex - set_source print missing_ex_source_list print "Exchange rate missing in source table", len(missing_ex_source_list) with codecs.open(os.path.join("../out_data/logs", "missing_ex_source_list" + ".csv"), "wb","utf8") as f: for s in list(missing_ex_source_list): f.write((s[0] if s[0] is not None else u"") + u"\n") # # output missing source with id in flows # missing_flow_source_list_id =[] flow_matching = 0 for row in missing_flow_source_list: cursor.execute("""SELECT * FROM flows where source=?""",[row[0]]) table = [list(r) for r in cursor] flow_matching+=1 for row in table: missing_flow_source_list_id.append(row) unique_flow = [] for r in missing_flow_source_list_id: if r not in unique_flow: unique_flow.append(r) writer = UnicodeWriter(open(os.path.join("../out_data/logs", "missing_flow_source_list_id" + ".csv"), "wb")) writer.writerows(unique_flow) # # output missing source with id in exchange_rates # missing_ex_source_list_id =[] ex_matching = 0 for row in missing_ex_source_list: cursor.execute("""SELECT * FROM exchange_rates where source=?""",[row[0]]) table = [list(r) for r in cursor] ex_matching+=1 for row in table: # print row missing_ex_source_list_id.append(row) unique_ex = [] for r in missing_ex_source_list_id: if r not in unique_ex: unique_ex.append(r) writer = UnicodeWriter(open(os.path.join("../out_data/logs", "missing_ex_source_list_id" + ".csv"), "wb")) writer.writerows(unique_ex)
for i_date,current_date in enumerate(dates): if i_date<len(dates)-1: next_date=dates[i_date+1] if current_date==next_date-1: pass else: periods[-1]="%s-%s"%(periods[-1],current_date) if periods[-1]!=current_date else str(current_date) periods.append(next_date) else: # fin 2 : fin de la liste periods[-1]="%s-%s"%(periods[-1],current_date) if periods[-1]!=current_date else str(current_date) row[2] = ",".join(periods) if row[1] == None: row[1] = "champs vide" return table writer = UnicodeWriter(open(os.path.join("out_data", 'report_by_sources_and_period.csv'), "wb")) writer.writerow([description[0] for description in c.description]) data = dateByReportingBySource(c.fetchall()) for d in data: print d[2] writer.writerows(data)
current_source = row[0] next_source = table[i_source+1][0] if (current_source == next_source): newCSV.append(row); else: # csvTitle = unicode(current_source, 'utf-8') print newSource nameStats.append([current_source, len(newCSV)]) csvTitle = unicodedata.normalize('NFD', current_source).encode('ascii', 'ignore') csvTitle = csvTitle.replace(" ", "_") if len(csvTitle) > 255: csvTitle = csvTitle[:200] try: writer = UnicodeWriter(open(os.path.join("./out_data/sources", csvTitle +'.csv'), "w")) writer.writerow([description[0] for description in c.description]) writer.writerows(newCSV) newCSV = [] except IOError as e: print "I/O error({0}): {1}".format(e.errno, e.strerror) elem = csvTitle.encode('utf8') errors.append(elem) pass errorsNameFormat = open('./out_data/errors/errorsNameFormat.txt', 'w') for item in errors: print>>errorsNameFormat, item print "errorsNameFormat.txt done" sourceNameErrors = open('./out_data/errors/sourceNameErrors.txt', 'w') for item in set(nameProblem):