def gen_intermediate_data_for_both_way_linear_regression_plot(data,f1, f2, f3): out1 = open(f1, 'w') out2 = open(f2, 'w') i = 0 for exporter in countries.countries: subdir = 'in/wtf/' + file_safe(exporter) + '/' for importer in countries.countries: if importer == exporter: continue filepath = subdir + 'exports-to-' + file_safe(importer) + '.txt' if not os.path.exists(rootdir + subdir): os.makedirs(rootdir + subdir) file_non_empty = False with open(rootdir + filepath, 'wb') as csvfile: writer = csv.writer(csvfile, delimiter='\t') for y in YEAR_COLUMNS: year = column_to_year(y) a1 = float(data.export_data(year, exporter, importer)) a2 = data.total_exports(exporter, year) a3 = float(data.export_data(year, importer, exporter)) a4 = data.total_exports(importer, year) print "In %d, %s to %s: %f/%f, %s to %s : %f/%f" % ( year, exporter, importer, a1, a2, importer, exporter, a3, a4) if a1 == 0.0 and a3 == 0.0: continue file_non_empty = True export_percentage = 0 if a2 == 0 else a1 / a2 * 100 import_percentage = 0 if a4 == 0 else a3 / a4 * 100 writer.writerow([year, export_percentage, import_percentage]) if file_non_empty: out1.write(filepath + "\n") out2.write( "out/wtf/" + file_safe(exporter) + "/" + file_safe(exporter) + "-export-to-" + file_safe( importer) + "\n") print i i += 1 else: os.remove(rootdir + '/' + filepath) pass pass
def _load_row(self, exporter, importer, row_map, year_columns=YEAR_COLUMNS): for column in year_columns: export_quantity = row_map.get(column) year = column_to_year(column) if not year in self.all_years: continue if export_quantity == 'NaN': self.__record_nan_data(year, exporter, importer) else: self.__export_data_for_a_country(exporter, year).set_export_to_country(importer, float(export_quantity)) if exporter not in self.__all_countries: self.__all_countries.append(exporter) if importer not in self.__all_countries: self.__all_countries.append(importer) print "in " + str(year) + ", " + exporter + " exported " + export_quantity + " to " + importer def reorder_countries(countries_list): copy_list = list(countries_list) inputHasUSA = "USA" in copy_list if inputHasUSA: copy_list.remove("USA") return ['USA'] + sorted(copy_list) if inputHasUSA else sorted(copy_list) self.__all_countries = reorder_countries(self.__all_countries)
def print_sql_inserts(csv_file, sql_file): reader = csv.DictReader(open(csv_file, 'rb'), skipinitialspace=True) sql_out = open(sql_file, 'w') i = 0 for row in reader: table1_query = 'insert into export_data_row_wise values(' columns = [] importer = row.get('Importer') exporter = row.get('Exporter') columns.append("'%s'" % exporter) columns.append("'%s'" % importer) for column in YEAR_COLUMNS: export_quantity = row.get(column) val = "NULL" if export_quantity == 'NaN' else export_quantity sql_out.write("insert into export_data_column_wise values('%s','%s','Y%d',%s);\n" % (exporter, importer, column_to_year(column), val)) columns.append(val) table1_query += ",".join(columns) + ');' print i i += 1 sql_out.write(table1_query + '\n') sql_out.close()
def gen_intermediate_data_for_one_way_linear_regression_plot(data,input_file, f1, f2, f3): i = 0 reader = csv.DictReader(open(input_file, 'rb'), skipinitialspace=True) out1 = open(f1, 'w') out2 = open(f2, 'w') out3 = open(f3, 'w') for row in reader: importer = row.get('Importer') exporter = row.get('Exporter') if importer == exporter or exporter == 'World': continue if not is_valid_country(importer) or not is_valid_country(exporter): continue subdir = 'in/wtf/' + file_safe(exporter) + '/' filepath = subdir + 'exports-to-' + file_safe(importer) + '.txt' if not os.path.exists(rootdir + subdir): os.makedirs(rootdir + subdir) with open(rootdir + filepath, 'wb') as csvfile: writer = csv.writer(csvfile, delimiter='\t') for column in YEAR_COLUMNS: export_quantity = row.get(column) if export_quantity == 'NaN': continue year = column_to_year(column) print exporter + ' ' + importer + ' ' + str(year) + ' ' + export_quantity + ' ' + str(data.total_exports( exporter, year)) writer.writerow([year, float(export_quantity) / data.total_exports(exporter, year) * 100]) out1.write(filepath + "\n") out2.write( "out/wtf/" + file_safe(exporter) + "/" + file_safe(exporter) + "-export-to-" + file_safe(importer) + "\n") print i i += 1 for c in countries.countries: out3.write("mkdir('out/wtf/" + file_safe(c) + "')\n") out3.write("clear\n") out3.write("total = " + str(i) + "\n") out3.write("inputfile123=textread('input-files-percent.txt','%s',total)\n") out3.write("outputfile123=textread('output-files-percent.txt','%s',total)\n") out3.write("\n") out3.write("for i=1:total,\n") out3.write(" data = load(inputfile123{i})\n") out3.write(" datasize = size(data)\n") out3.write(" isemptyfile = datasize(1) == 0\n") out3.write(" if isemptyfile\n") out3.write(" data = [0 0]\n") out3.write(" end\n") out3.write(" x = data(:,1)\n") out3.write(" y = data(:,2)\n") out3.write(" ylinearfit = polyval(polyfit(x,y,1),x)\n") out3.write(" yquadfit = polyval(polyfit(x,y,2),x)\n") out3.write(" plot(x,y,'k-s',x,ylinearfit,x,yquadfit)\n") out3.write(" if isemptyfile\n") out3.write(" xlabel('No data')\n") out3.write(" ylabel('No data')\n") out3.write(" else\n") out3.write(" xlabel('Year')\n") out3.write(" ylabel('Export Quantity')\n") out3.write(" end\n") out3.write(" saveas(gcf,outputfile123{i},'png')\n") out3.write(" i\n") out3.write("end\n")