def truncate_serial_number_row(data): row = data[0] to_check = min(5, len(row)) matched = 0 for i, value in enumerate(row[1:]): try: v = utils.flt(value) except ValueError: v = 0 if v == utils.flt(i + 1): matched += 1 if float(matched) / to_check > 0.8: data = data[1:] return data
def truncate_serial_number_row(data): row = data[0] to_check = min(5, len(row)) matched = 0 for i, value in enumerate(row[1:]): try: v = utils.flt(value) except ValueError: v = 0 if v==utils.flt(i+1): matched+=1 if float(matched) / to_check > 0.8: data = data[1:] return data
def get_chart_data(file_data, file_properties, chart_type): global consolelog if file_properties.get("transpose"): map_dataset = map(list, zip(*file_data)) else: map_dataset = [[val for val in row] for row in file_data] map_dataset = exclude_total_type_columns(map_dataset, 1) start_row, start_column = get_start_row_and_column(map_dataset) value_map_dataset = get_numeric_dataset(map_dataset, start_row, start_column) x_labels = file_data[0][1:] x_labels = [label[:15] for label in map_dataset[0][start_column:]] x_label_color = [] data_sets = [] color_steps = int(255.0 / (len(value_map_dataset))) i = 0 for row in value_map_dataset: i += color_steps if chart_type == "Line": data_set_row = { "fillColor": "rgba(%i, %i, %i, %s)" % (i, i / 1.2, i / 4, .3), "strokeColor": "rgba(%i, %i, %i, %s)" % (i, i / 1.2, i / 4, 1), "pointColor": "rgba(%i, %i, %i, %s)" % (i, i, i / 1.2, 1), "pointStrokeColor": "#fff", "data": [utils.flt(val) for val in row] } data_sets.append(data_set_row) x_label_color.append(data_set_row["strokeColor"]) elif chart_type == "Pie": data_sets.append({ "value": utils.flt(row[1]), "color": "rgba(%i, %i, %i, %s)" % (i, i / 1.2, i / 4, .3), }) chart_data = { "chart_type": chart_type, "labels": x_labels, "datasets": data_sets } return chart_data
def get_chart_data(file_data, file_properties, chart_type): global consolelog if file_properties.get("transpose"): map_dataset = map(list, zip(*file_data)) else: map_dataset = [[val for val in row] for row in file_data] map_dataset = exclude_total_type_columns(map_dataset, 1) start_row, start_column = get_start_row_and_column(map_dataset) value_map_dataset = get_numeric_dataset(map_dataset, start_row, start_column) x_labels = file_data[0][1:] x_labels = [label[:15] for label in map_dataset[0][start_column:]] x_label_color = [] data_sets = [] color_steps = int(255.0 / (len(value_map_dataset))) i = 0 for row in value_map_dataset: i += color_steps if chart_type == "Line": data_set_row = { "fillColor" : "rgba(%i, %i, %i, %s)" % (i, i/1.2, i/4, .3), "strokeColor" : "rgba(%i, %i, %i, %s)" % (i, i/1.2, i/4, 1), "pointColor" : "rgba(%i, %i, %i, %s)" % (i, i, i/1.2, 1), "pointStrokeColor" : "#fff", "data": [utils.flt(val) for val in row] } data_sets.append(data_set_row) x_label_color.append(data_set_row["strokeColor"]) elif chart_type == "Pie": data_sets.append({ "value": utils.flt(row[1]), "color": "rgba(%i, %i, %i, %s)" % (i, i/1.2, i/4, .3), }) chart_data = { "chart_type": chart_type, "labels": x_labels, "datasets": data_sets } return chart_data
def start(): print "importing worldbank data..." db.insert("source", {"name": "World Bank"}) utils.convert_to_csv( os.path.join("data", "worldbank", "IND_Country_MetaData_en_EXCEL.xls"), os.path.join("data", "worldbank")) # import dataset with open( os.path.join( "data", "worldbank", "IND_Country_MetaData_en_EXCEL-sheet2.csv")) as datafile: reader = csv.reader(datafile.read().splitlines()) for i, row in enumerate(reader): if i == 0: continue row = [unicode(c, "utf-8", errors="ingore") for c in row] db.insert_dataset({ "name": row[1][:150], "title": row[1], "description": row[2], "source_info": row[3], "source": "World Bank" }) # import data with open( os.path.join( "data", "worldbank", "IND_Country_MetaData_en_EXCEL-sheet1.csv")) as datafile: reader = csv.reader(datafile.read().splitlines()) db.insert("region", {"name": "India"}) for i, row in enumerate(reader): if i == 0: headers = row for year in row[2:]: db.insert("period", {"name": year}) else: for ci, value in enumerate(row): if ci > 1 and utils.flt(value): db.insert( "data", { "dataset": row[0], "period": headers[ci], "value": value, "region": "India", }) if i % 100 == 0: sys.stdout.write(".") sys.stdout.flush()
def is_year(v): if len(v) > 30: return False if len(v)==4 and utils.is_number(v): v = utils.flt(v) return v > 1900 and v < 2050 else: matched = re.search("19[0-9]{2}[^0-9]+", v) \ or re.search("20[0-9]{2}[^0-9]+", v) \ or re.search("[0189][0-9]-[0189][0-9]", v) return matched
def is_year(v): if len(v) > 30: return False if len(v) == 4 and utils.is_number(v): v = utils.flt(v) return v > 1900 and v < 2050 else: matched = re.search("19[0-9]{2}[^0-9]+", v) \ or re.search("20[0-9]{2}[^0-9]+", v) \ or re.search("[0189][0-9]-[0189][0-9]", v) return matched
def get_chart_data(file_data, file_properties): global consolelog chart_type = file_properties.get("chart_type") if not chart_type: return data_index = file_properties["data_index"] x_labels = file_data[file_properties["x_axis"]][data_index:] map_dataset = file_data if file_properties["transpose"]: map_dataset = map(list, zip(*file_data)) x_labels = map_dataset[file_properties["x_axis"]][data_index:] data_sets = [] color_steps = int(255.0 / (len(map_dataset) - 1)) i = 0 for d in map_dataset[data_index:]: i += color_steps if chart_type == "Line": data_sets.append({ "fillColor" : "rgba(%i, %i, %i, %s)" % (i, i/1.2, i/4, .3), "strokeColor" : "rgba(%i, %i, %i, %s)" % (i, i/1.2, i/4, 1), "pointColor" : "rgba(%i, %i, %i, %s)" % (i, i, i/1.2, 1), "pointStrokeColor" : "#fff", "data": [utils.flt(val) for val in d[data_index:]] }) elif chart_type == "Pie": data_sets.append({ "value": utils.flt(d[data_index]), "color": "rgba(%i, %i, %i, %s)" % (i, i/1.2, i/4, .3), }) chart_data = { "chart_type": chart_type, "labels": x_labels, "datasets": data_sets } return json.dumps(chart_data)