def start(): print "reading devinfo file" with open(os.path.join("data", "devinfo", "DevInfo India_2011_en.csv"), "r") as datafile: reader = csv.reader(datafile.read().splitlines()) db.insert("source", {"name": "DevInfo India"}) columns = { "dataset": 0, "unit": 1, "head": 2, "region": 3, "region_id": 4, "period": 5, "source": 6, "value": 7, "footnotes": 8 } for i, row in enumerate(reader): if i == 0: continue row = [unicode(c, errors="ignore") for c in row] db.insert_dataset({ "name": row[columns["dataset"]], "title": row[columns["dataset"]], "source": "DevInfo India" }) db.insert("region", {"name": row[columns["region"]]}) db.insert("period", {"name": row[columns["period"]]}) db.insert( "data", { "unit": row[columns["unit"]], "value": row[columns["value"]], "dataset": row[columns["dataset"]], "region": row[columns["region"]], "period": row[columns["period"]], }) d_id = db.sql("last_insert_id()")[0][0] if row[columns["head"]]: db.insert("head", {"name": row[columns["head"]]}) db.insert("data_head", { "data": d_id, "head": row[columns["head"]] }) if i % 1000 == 0: sys.stdout.write(".") sys.stdout.flush()
def start(): print "importing worldbank data..." db.insert("source", {"name": "World Bank"}) utils.convert_to_csv( os.path.join("data", "worldbank", "IND_Country_MetaData_en_EXCEL.xls"), os.path.join("data", "worldbank")) # import dataset with open( os.path.join( "data", "worldbank", "IND_Country_MetaData_en_EXCEL-sheet2.csv")) as datafile: reader = csv.reader(datafile.read().splitlines()) for i, row in enumerate(reader): if i == 0: continue row = [unicode(c, "utf-8", errors="ingore") for c in row] db.insert_dataset({ "name": row[1][:150], "title": row[1], "description": row[2], "source_info": row[3], "source": "World Bank" }) # import data with open( os.path.join( "data", "worldbank", "IND_Country_MetaData_en_EXCEL-sheet1.csv")) as datafile: reader = csv.reader(datafile.read().splitlines()) db.insert("region", {"name": "India"}) for i, row in enumerate(reader): if i == 0: headers = row for year in row[2:]: db.insert("period", {"name": year}) else: for ci, value in enumerate(row): if ci > 1 and utils.flt(value): db.insert( "data", { "dataset": row[0], "period": headers[ci], "value": value, "region": "India", }) if i % 100 == 0: sys.stdout.write(".") sys.stdout.flush()
def start(): print "reading devinfo file" with open(os.path.join("data", "devinfo", "DevInfo India_2011_en.csv"), "r") as datafile: reader = csv.reader(datafile.read().splitlines()) db.insert("source", {"name": "DevInfo India"}) columns = { "dataset": 0, "unit": 1, "head": 2, "region": 3, "region_id": 4, "period": 5, "source": 6, "value": 7, "footnotes": 8, } for i, row in enumerate(reader): if i == 0: continue row = [unicode(c, errors="ignore") for c in row] db.insert_dataset( {"name": row[columns["dataset"]], "title": row[columns["dataset"]], "source": "DevInfo India"} ) db.insert("region", {"name": row[columns["region"]]}) db.insert("period", {"name": row[columns["period"]]}) db.insert( "data", { "unit": row[columns["unit"]], "value": row[columns["value"]], "dataset": row[columns["dataset"]], "region": row[columns["region"]], "period": row[columns["period"]], }, ) d_id = db.sql("last_insert_id()")[0][0] if row[columns["head"]]: db.insert("head", {"name": row[columns["head"]]}) db.insert("data_head", {"data": d_id, "head": row[columns["head"]]}) if i % 1000 == 0: sys.stdout.write(".") sys.stdout.flush()
def process_file(fpath): sys.stdout.write(".") sys.stdout.flush() headers, data = utils.get_file_data(os.path.basename(fpath)) if data and headers: # dataset db.insert_dataset({ "name": headers["title"], "description": headers["description"], "raw_filename": headers["file_name"], "url": headers["url"], "source": "data.gov.in" }) data = clean_data(data) set_series(headers, data) set_data(headers, data)