def test(): test_sqlite_filename = "out_data/RICardo.sqlite" conn = sqlite3.connect(test_sqlite_filename) c = conn.cursor() c.execute( """select name from sqlite_master where type='table' and name!="sqlite_sequence" """ ) sc = conn.cursor() tables = {} for t in c: t = t[0] sc.execute("""select count(*) from `%s`""" % t) tables[t] = sc.next()[0] if not os.path.exists("out_data/test_csvs"): os.makedirs("out_data/test_csvs") utils.sqlitedatabase2csv(test_sqlite_filename, "out_data/test_csvs") if os.path.exists("test.sqlite"): os.remove("test.sqlite") utils.csv2sqlite("out_data/test_csvs/*.csv", "test.sqlite", "RICardo_schema.sql") conn = sqlite3.connect("test.sqlite") c = conn.cursor() c.execute( """select name from sqlite_master where type='table' and name!="sqlite_sequence" """ ) sc = conn.cursor() test_passed = True for t in c: t = t[0] sc.execute("""select count(*) from `%s`""" % t) table_test_result = "ok" nb_rows = sc.next()[0] if t not in tables or tables[t] != nb_rows: table_test_result = "failed" test_passed = False print "test %s for table %s, exported/imported %s/%s rows" % ( table_test_result, t, tables[t], nb_rows) os.remove("out_data/test_csvs/%s.csv" % t) os.remove("test.sqlite") os.rmdir("out_data/test_csvs/") return test_passed
try : conf=json.load(open("config.json","r")) database_filename=os.path.join('out_data',conf["sqlite_viz"]) except : print "couldn't load config.json database" exit(1) try: if os.path.isfile(database_filename): os.remove(database_filename) except: print "couldn't delete target sqlite database file" exit(1) print "building sqlite database from CSV" utils.csv2sqlite("out_data/csv_data/*.csv",database_filename,conf["sqlite_schema"]) conn=sqlite3.connect(database_filename) c=conn.cursor() print "importing Frederico Tena from csv" FedericoTena.import_fredericotena(c) ################################################################################ ## Create table flow_joined ################################################################################ print "Create table flow_joined" print "-------------------------------------------------------------------------"
def deduplicate_flows(): try: with open("config.json", "r") as f_conf: conf = json.load(f_conf) database_filename = os.path.join('../sqlite_data', conf["sqlite_viz"]) except: print("couldn't load config.json database") exit(1) try: if os.path.isfile(database_filename): os.remove(database_filename) except: print("couldn't delete target sqlite database file") exit(1) print("building sqlite database from CSV") utils.csv2sqlite("../data/*.csv", database_filename, conf["sqlite_schema"]) conn = sqlite3.connect(database_filename) c = conn.cursor() ################################################################################ ## UPDATE OR CREATE RICentities slug ################################################################################ ricslug = lambda _: re.sub("[ ()/]", "", re.sub("&", "_", _)) ricnames = c.execute("""SELECT RICname FROM RICentities""") newricslugs = [(ricslug(ricname[0]), ricname[0]) for ricname in ricnames] c.executemany("""UPDATE RICentities SET slug = ? WHERE RICname = ? """, newricslugs) ################################################################################ ## Update every table with uniformed attributes # ################################################################################ # c.execute("""UPDATE flows SET source= UPPER(SUBSTR(source, 1, 1)) || SUBSTR(source, 2) """) # c.execute("""UPDATE sources SET slug = UPPER(SUBSTR(slug, 1, 1)) || SUBSTR(slug, 2) """) # c.execute("""UPDATE exchange_rates SET source= UPPER(SUBSTR(source, 1, 1)) || SUBSTR(source, 2) """) ################################################################################ ## Remove dup in entities_name table ################################################################################ # c.execute("""delete from entity_names # where original_name in ("Dutch new Guinea","Ionian islands","United states"); # """) ################################################################################ ## Create table flow_joined ################################################################################ print("Create table flow_joined") print( "-------------------------------------------------------------------------" ) c.execute("""DROP TABLE IF EXISTS flow_joined;""") c.execute("""CREATE TABLE IF NOT EXISTS flow_joined AS SELECT f.id, f.source, s.type, f.flow, f.year, f.unit as unit, eisg.modified_export_import as expimp, eisg.modified_special_general as spegen, rate.rate_to_pounds as rate, c.modified_currency as currency, r1.RICname as reporting, f.reporting as original_reporting, r2.slug as reporting_slug, CASE WHEN p2.RICname="World" and world_trade_type="total_estimated" THEN "Worldestimated" WHEN p2.RICname="World" and world_trade_type="total_reporting" THEN "Worldasreported" WHEN p2.RICname="World" and world_trade_type="total_subreporting" THEN "Worldasreported2" WHEN p2.RICname="World" and world_trade_type is null THEN "Worldundefined" ELSE p2.slug END as partner_slug, CASE WHEN p2.RICname="World" and world_trade_type="total_estimated" THEN "World estimated" WHEN p2.RICname="World" and world_trade_type="total_reporting" THEN "World as reported" WHEN p2.RICname="World" and world_trade_type="total_subreporting" THEN "World as reported2" WHEN p2.RICname="World" and world_trade_type is null THEN "World undefined" ELSE p2.RICname END as partner, f.partner as original_partner, r2.type as reporting_type, r2.continent as reporting_continent, r2.part_of_country as reporting_part_of_country, r2.GPH_code as reporting_GPH_code, p2.type as partner_type, p2.continent as partner_continent, p2.part_of_country as partner_part_of_country, p2.GPH_code as partner_GPH_code, transport_type, f.notes, species_bullions, ifnull(s.author,s.name) || ifnull(' ('||s.edition_date||')','') as source_label from flows as f LEFT OUTER JOIN currencies as c ON f.currency=c.currency AND f.year=c.year AND f.reporting = c.reporting LEFT OUTER JOIN exchange_rates as rate ON c.modified_currency=rate.modified_currency AND c.year=rate.year LEFT OUTER JOIN entity_names as r1 ON r1.original_name=f.reporting COLLATE NOCASE LEFT OUTER JOIN entity_names as p1 ON p1.original_name=f.partner COLLATE NOCASE LEFT OUTER JOIN RICentities as p2 ON p2.RICname=p1.RICname LEFT OUTER JOIN RICentities as r2 ON r2.RICname=r1.RICname LEFT OUTER JOIN expimp_spegen as eisg USING (export_import, special_general) LEFT OUTER JOIN sources as s ON s.slug=f.source WHERE expimp != "Re-exp" and partner is not null and partner_sum is null and f.flow is not null """) print("flow_joined created") print( "-------------------------------------------------------------------------" ) # taking care of Total_type flag to define the world partner # and ((`Total Trade Estimation` is null and partner != "World" )or(`Total Trade Estimation`=1 and partner = "World")) c.execute("""INSERT INTO RICentities (`RICname`,`type`,`continent`, `slug`) VALUES ("World estimated","geographical_area","World", "Worldestimated")""" ) print("World estimated added to RICentities") c.execute("""INSERT INTO RICentities (`RICname`,`type`,`continent`, `slug`) VALUES ("World as reported","geographical_area","World", "Worldasreported")""" ) print("World as reported added to RICentities") c.execute("""INSERT INTO RICentities (`RICname`,`type`,`continent`, `slug`) VALUES ("World as reported2","geographical_area","World", "Worldasreported2")""" ) print("World as reported2 added to RICentities") c.execute("""INSERT INTO RICentities (`RICname`,`type`,`continent`, `slug`) VALUES ("World undefined","geographical_area","World", "Worldundefined")""" ) print("World undefined added to RICentities") print( "-------------------------------------------------------------------------" ) ################################################################################ # merge duplicates from land and sea ################################################################################ c.execute( """SELECT count(*) as nb,group_concat(`flow`,'|'),group_concat(ID,'|'), group_concat(transport_type,'|') FROM `flow_joined` WHERE transport_type is not null GROUP BY year, expimp, reporting, partner HAVING count(*)>1 """) sub_c = conn.cursor() rows_grouped = 0 for n, flows, ids, land_seas in c: if n == 2: land_sea = ", ".join(set(land_seas.split("|"))) if len(set(land_seas.split("|"))) > 1: # if notes : # notes=", ".join(set(notes.split("|"))) sub_c.execute( """UPDATE `flow_joined` SET flow=%.1f,transport_type="%s" WHERE ID=%s""" % (sum(float(_) for _ in flows.split("|")), land_sea, ids.split("|")[0])) sub_c.execute("""DELETE FROM `flow_joined` WHERE ID=%s""" % ids.split("|")[1]) rows_grouped += 2 if rows_grouped > 0: print("removing %s land/seas duplicates by suming them" % rows_grouped) sub_c.close() print("merge duplicates from land and sea done") print( "-------------------------------------------------------------------------" ) ################################################################################ # remove 'valeurs officielles' when duplicates with 'Valeurs actuelles' # for France between 1847 and 1856 both included ################################################################################ c.execute( """SELECT count(*) as nb,group_concat(notes,'|'),group_concat(ID,'|'), group_concat(Source,'|') as notes_group FROM `flow_joined` WHERE `reporting`="France" and year >= 1847 AND year <= 1856 GROUP BY year,expimp,reporting,partner HAVING count(*)>1 """) ids_to_remove = [] for n, notes, ids, sources in c: if n == 2 and notes: i = notes.split("|").index("Valeur officielle") id = ids.split("|")[i] #print(sources.split("|")[i].encode("UTF8")) if sources.split( "|" )[i] == u"""TableauDécennalDuCommerceDeLaFranceAvecSesColoniesEtLesPuissancesÉtrangères_18471856_Vol1""": ids_to_remove.append(id) else: raise Exception("missing source Tableau décennal") # else: # raise Exception("exception ---> ", n) if len(ids_to_remove) > 0: print( "removing %s 'Valeur officielle' noted duplicates for France between 1847 1856" % len(ids_to_remove)) c.execute("DELETE FROM flow_joined WHERE id IN (%s)" % ",".join(ids_to_remove)) print( "remove 'valeurs officielles' when duplicates with 'Valeurs actuelles' done" ) print( "-------------------------------------------------------------------------" ) ################################################################################ # remove "species and billions" remove species flows when exists ################################################################################ c.execute("""SELECT * from (SELECT count(*) as nb, group_concat(species_bullions,'|') as sb, group_concat(ID,'|'), reporting, partner FROM `flow_joined` GROUP BY year,expimp,reporting,partner HAVING count(*)>1) WHERE sb="S|NS" """) # ids_to_remove = [] rps = [] for n, sb, ids, r, p in c: if n == 2: i = sb.split("|").index("S") id = ids.split("|")[i] ids_to_remove.append(id) rps.append('"%s"' % "|".join((r, p))) rps = set(rps) if len(ids_to_remove) > 0: print("""removing %s flows S duplicated with NS for reporting|partner couples %s""" % (len(ids_to_remove), ",".join(rps))) c.execute("DELETE FROM flow_joined WHERE id IN (%s)" % (",".join(ids_to_remove))) print("remove duplicates from double source primary and secondary") print( "-------------------------------------------------------------------------" ) ################################################################################ # remove duplicates from double source primary and secondary ################################################################################ # Y a t'il pour une même année deux sources primaires et secondaires pour un même reporting # Si oui il faut supprimer les flux de la source secondaire pour ce reporting pour cette année print( "Filtering duplicated sources which describes same reportings on same years..." ) # the first query gets all sources duplications by reporting and year # but duplications on total (World%) trade c.execute(""" SELECT reporting, year, group_concat(DISTINCT type) from flow_joined WHERE partner not LIKE 'World%' AND type != 'FedericoTena' GROUP by reporting, year HAVING count(DISTINCT source) >1 and count(DISTINCT type) > 1 """) sub_c = conn.cursor() primarysecondaryestimation_duplicates = {} for reporting, year, gtypes in c: types = sorted(gtypes.split(',')) removed = False # when a secondary source cooccurres with a primary if "secondary" in types and "primary" in types: # remove the secondary sub_c.execute( """DELETE FROM flow_joined WHERE type='secondary' AND reporting=? AND year=?""", (reporting, year)) removed = True if reporting in primarysecondaryestimation_duplicates: primarysecondaryestimation_duplicates[reporting].append( (year, sub_c.rowcount, 'secondary')) else: primarysecondaryestimation_duplicates[reporting] = [ (year, sub_c.rowcount, 'secondary') ] # when an estimation source cooccurres with a primary if "estimation" in types and ("secondary" in types or "primary" in types): # remove the estimation sub_c.execute( """DELETE FROM flow_joined WHERE type='estimation' AND partner!='World estimated' AND reporting=? AND year=?""", (reporting, year)) removed = True if reporting in primarysecondaryestimation_duplicates: primarysecondaryestimation_duplicates[reporting].append( (year, sub_c.rowcount, 'estimation')) else: primarysecondaryestimation_duplicates[reporting] = [ (year, sub_c.rowcount, 'estimation') ] if not removed: print("/!\ duplicates on more types %s %s %s" % (types, reporting, year)) # logging what was done for reporting, years in primarysecondaryestimation_duplicates.items(): nb_flows_secondary = sum(n for (y, n, t) in years if t == 'secondary') years_secondary = (y for (y, n, t) in years if t == 'secondary') years_secondary = ','.join( '-'.join(str(e) for e in p) for p in custom_exports.reduce_years_list_into_periods( years_secondary)) if nb_flows_secondary > 0: print("%s: %s secondary flows %s" % (reporting, nb_flows_secondary, years_secondary)) nb_flows_estimation = sum(n for (y, n, t) in years if t == 'estimation') years_estimation = (y for (y, n, t) in years if t == 'estimation') years_estimation = ','.join( '-'.join(str(e) for e in p) for p in custom_exports.reduce_years_list_into_periods( years_estimation)) if nb_flows_estimation > 0: print("%s: %s estimation flows %s" % (reporting, nb_flows_estimation, years_estimation)) # this second query gets duplicated 'World as reported' flows due to # different sources for the same reportingg / year. # Those are not targeted by the first select because some secondary sources only reports total trade. print( "\nFiltering duplicated sources which describes same reportings on same years..." ) c.execute(""" SELECT reporting, year, group_concat(DISTINCT type) from flow_joined WHERE partner = 'World as reported' GROUP by reporting, year HAVING count(DISTINCT source) >1 and count(DISTINCT type) > 1""") sub_c = conn.cursor() primarysecondaryestimation_duplicates = {} for reporting, year, gtypes in c: types = sorted(gtypes.split(',')) removed = False # when a secondary source cooccurres with a primary if "secondary" in types and "primary" in types: # remove the secondary sub_c.execute( """DELETE FROM flow_joined WHERE type='secondary' AND partner = 'World as reported' AND reporting=? AND year=?""", (reporting, year)) removed = True if reporting in primarysecondaryestimation_duplicates: primarysecondaryestimation_duplicates[reporting].append( (year, sub_c.rowcount, 'secondary')) else: primarysecondaryestimation_duplicates[reporting] = [ (year, sub_c.rowcount, 'secondary') ] # when an estimation source cooccurres with a primary if "estimation" in types and ("secondary" in types or "primary" in types): # remove the estimation sub_c.execute( """DELETE FROM flow_joined WHERE type='estimation' AND partner = 'World as reported' AND reporting=? AND year=?""", (reporting, year)) removed = True if reporting in primarysecondaryestimation_duplicates: primarysecondaryestimation_duplicates[reporting].append( (year, sub_c.rowcount, 'estimation')) else: primarysecondaryestimation_duplicates[reporting] = [ (year, sub_c.rowcount, 'estimation') ] if not removed: print("/!\ duplicates on more types %s %s %s" % (types, reporting, year)) # logging what was done for reporting, years in primarysecondaryestimation_duplicates.items(): nb_flows_secondary = sum(n for (y, n, t) in years if t == 'secondary') years_secondary = (y for (y, n, t) in years if t == 'secondary') years_secondary = ','.join( '-'.join(str(e) for e in p) for p in custom_exports.reduce_years_list_into_periods( years_secondary)) if nb_flows_secondary > 0: print("%s: %s secondary World as reported flows %s" % (reporting, nb_flows_secondary, years_secondary)) nb_flows_estimation = sum(n for (y, n, t) in years if t == 'estimation') years_estimation = (y for (y, n, t) in years if t == 'estimation') years_estimation = ','.join( '-'.join(str(e) for e in p) for p in custom_exports.reduce_years_list_into_periods( years_estimation)) if nb_flows_estimation > 0: print("%s: %s estimation World as reported flows %s" % (reporting, nb_flows_estimation, years_estimation)) sub_c.close() ################################################################################ # remove GEN flows when duplicates with SPE flows ################################################################################ c.execute("""SELECT count(*) as nb, group_concat(spegen,'|'), group_concat(species_bullions,'|') as sb, group_concat(ID,'|'), reporting, partner, year, expimp, group_concat(flow,'|') FROM `flow_joined` GROUP BY year,`expimp`,`reporting`,`partner` HAVING count(*)>1 """) lines = c.fetchall() ids_to_remove = {} gen_remove = 0 for n, spe_gens, sb, ids, reporting, partner, year, e_i, f in lines: local_ids_to_remove = [] dup_found = True if spe_gens and "Gen" in spe_gens.split( "|") and "Spe" in spe_gens.split("|"): spe_indeces = [ k for k, v in enumerate(spe_gens.split("|")) if v == "Spe" ] if len(spe_indeces) > 1 and sb != None: #if we have more than 1 Spe as dups speNS_indeces = [ k for k, v in enumerate(sb.split("|")) if v == "NS" and k in spe_indeces ] if len(speNS_indeces) > 1: #if we have more than 1 NS in Spe dups dup_found = False elif len(ids.split("|")) == len( sb.split("|")) and len(speNS_indeces) > 1: # keep only the Spe & NS flow when duplicate and if no nulls in sb # otherwise we can't figure out which ID to remove local_ids_to_remove = [ v for k, v in enumerate(ids.split("|")) if k != speNS_indeces[0] ] else: dup_found = False elif len(ids.split("|")) == len(spe_gens.split("|")): # remove the Gen flows which dups with one Spe flow and if no nulls in # spe_gens other wise we can't figure out which ID to remove local_ids_to_remove = [ v for k, v in enumerate(ids.split("|")) if k != spe_indeces[0] ] else: dup_found = False if len(local_ids_to_remove) > 0: if reporting in ids_to_remove.keys(): ids_to_remove[reporting] += local_ids_to_remove else: ids_to_remove[reporting] = local_ids_to_remove else: dup_found = False if not dup_found: # flows are dups but not on GEN/SPE distinction or some null values in the groupings gen_remove += 1 #print(gen_remove, ("duplicate found :%s flows for %s,%s,%s,%s,%s,%s"%(n,year,reporting,) # partner,e_i,spe_gens,sb)).encode("utf8") print( "-------------------------------------------------------------------------" ) if gen_remove > 0: print("We found %s duplicate flows but not on GEN/SPE distinction..." % gen_remove) if ids_to_remove: for r, ids in ids_to_remove.items(): print(("removing %s General or Special duplicates for %s" % (r, len(ids))).encode("utf8")) c.execute("DELETE FROM flow_joined WHERE id IN (%s)" % ",".join(ids)) print( "-------------------------------------------------------------------------" ) ################################################################################ ## Create the partner World as sum of partners ################################################################################ c.execute( """INSERT INTO RICentities (`RICname`, `type`, `continent`, `slug`) VALUES ("World sum partners", "geographical_area", "World", "Worldsumpartners")""" ) print("World sum partners added to RICentities") c.execute( """INSERT INTO flow_joined (flow, unit, reporting, reporting_slug, year, expimp, currency, partner, partner_slug, rate, source, source_label, type, reporting_type, reporting_continent, reporting_GPH_code) SELECT sum(flow*unit) as flow, 1 as unit, reporting, reporting_slug, year, expimp, currency, 'World sum partners' as partner, 'Worldsumpartners' as partner_slug, rate, source, source_label, type, reporting_type, reporting_continent, reporting_GPH_code from flow_joined WHERE partner not like 'world%' group by reporting, expimp, year """) print("World sum partners added to flow_joined") print( "-------------------------------------------------------------------------" ) # ################################################################################ # ## Create the partner World as best guess # ################################################################################ c.execute("""INSERT INTO RICentities (`RICname`,`type`,`continent`, `slug`) VALUES ("World best guess", "geographical_area", "World", "Worldbestguess")""" ) print("World as best guess added to RICentities") print( "-------------------------------------------------------------------------" ) c.execute( """SELECT year, expimp, partner, reporting, partner_slug, reporting_slug, flow, unit, currency, rate, source, source_label, type, reporting_type, reporting_continent, reporting_GPH_code from flow_joined WHERE partner LIKE "world%" """) data = list(c) data.sort(key=lambda _: (_[3], _[0], _[1])) world_best_guess_added = 0 for g, d in itertools.groupby(data, lambda _: (_[3], _[0], _[1])): dd = list(d) world_best_guess = [sd for sd in dd if sd[4] == u"Worldestimated"] if len(world_best_guess) == 0: world_best_guess = [sd for sd in dd if sd[4] == u"Worldasreported"] if len(world_best_guess) == 0: world_best_guess = [ sd for sd in dd if sd[4] == u"Worldsumpartners" ] if len(world_best_guess) == 0: pass else: world_best_guess = list(world_best_guess[0]) world_best_guess[2] = u"World_best_guess" world_best_guess[4] = u"Worldbestguess" c.execute( """INSERT INTO flow_joined (year, expimp, partner, reporting, partner_slug, reporting_slug, flow, unit, currency, rate, source, source_label, type, reporting_type, reporting_continent, reporting_GPH_code) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""", world_best_guess) world_best_guess_added += 1 print("World best guess added to flow_joined", world_best_guess_added) print( "-------------------------------------------------------------------------" ) ################################################################################ ## Create table metadata bilateral ################################################################################ # print("Create table metadata_bilateral") # print("-------------------------------------------------------------------------") c.execute("""DROP TABLE IF EXISTS metadata_bilateral;""") c.execute("""CREATE TABLE IF NOT EXISTS metadata_bilateral AS SELECT tot.reporting_id as reporting_id, tot.reporting as reporting, group_concat(tot.flow,"|") as flow, group_concat(tot.expimp,"|") as expimp, group_concat(tot.partner,"|") as partner, tot.year as year, group_concat(tot.type,"|") as sourcetype, group_concat(tot.source,"|") as source,count(distinct tot.source) as source_count, tot.reporting_continent as reporting_continent, tot.reporting_type as reporting_type,group_concat(mirror_partner,"|") as mirror_partner from ( SELECT reporting_id, reporting, flow, r.expimp as expimp, partner as partner, r.year as year, type, source, reporting_continent, reporting_type,(t1.reportings ||"+"|| t1.expimp) as mirror_partner FROM ( select t.reporting_slug as reporting_id,t.reporting as reporting, sum(t.flow) as flow, t.expimp as expimp, group_concat(t.partner_slug) as partner, t.year as year, t.reporting_continent as reporting_continent, t.reporting_type as reporting_type,group_concat(distinct t.type) as type,group_concat(distinct t.source) as source FROM ( SELECT reporting, reporting_slug, flow*Unit/ifnull(rate,1) as flow, (replace(partner_slug,",","")||"+"||partner_continent) as partner_slug, year, source_label as source, type,reporting_continent,reporting_type, expimp FROM flow_joined WHERE partner_slug NOT LIKE 'world%' AND flow*Unit/rate is not NULL AND partner_continent is not NULL GROUP BY reporting_slug, partner_slug,year,expimp ) t Group by t.reporting_slug, t.year, t.expimp ) r LEFT JOIN ( SELECT group_concat(distinct replace(reporting_slug,",","")) as reportings,partner_slug,year,expimp FROM flow_joined Where flow is not NULL GROUP BY partner_slug, year,expimp ) t1 ON r.reporting_id=t1.partner_slug and r.year =t1.year and r.expimp!=t1.expimp ) tot GROUP BY tot.reporting_id, tot.year """) print("metadata_bilateral created") print( "-------------------------------------------------------------------------" ) ################################################################################ ## Create table metadata world ################################################################################ # print("Create table metadata_world") # print("-------------------------------------------------------------------------") c.execute("""DROP TABLE IF EXISTS metadata_world;""") c.execute("""CREATE TABLE IF NOT EXISTS metadata_world AS SELECT reporting_slug,reporting as reporting, group_concat(flow,"|") as flow, group_concat(expimp,"|") as expimp, group_concat(partner,"|") as partner, year,group_concat(type,"|") as type,group_concat(source,"|")as source, count(distinct source)as source_count, reporting_continent, reporting_type From (SELECT reporting, reporting_slug, flow*Unit/ifnull(rate,1) as flow, group_concat(partner,"+") as partner, year,group_concat(source_label,"+") as source, group_concat(type,"+") as type, reporting_continent,reporting_type, expimp FROM flow_joined WHERE flow is not NULL AND(partner_slug like 'Worldestimated' OR partner_slug like 'Worldasreported' OR partner_slug like 'Worldsumpartners' OR partner_slug like 'WorldFedericoTena') GROUP BY reporting_slug,year,expimp) Group by reporting_slug, year """) print("metadata_world created") print( "-------------------------------------------------------------------------" ) conn.commit() print('Creating CSV exports') print('source.csv (...)') custom_exports.export_sources_csv(c, conf['sources_export_filename']) print('done') print('RICentities.csv (...)') custom_exports.export_RICentities_csv(c, conf['RICentities_export_filename']) print('done') print('flows_deduplicated.csv (...)') utils.sqlitetables2csv(database_filename, ['flow_joined']) shutil.move(os.path.join('out_data', 'flow_joined.csv'), 'RICardo_trade_flows_deduplicated.csv')