示例#1
0
def start():
    print "reading devinfo file"
    with open(os.path.join("data", "devinfo", "DevInfo India_2011_en.csv"),
              "r") as datafile:
        reader = csv.reader(datafile.read().splitlines())

    db.insert("source", {"name": "DevInfo India"})

    columns = {
        "dataset": 0,
        "unit": 1,
        "head": 2,
        "region": 3,
        "region_id": 4,
        "period": 5,
        "source": 6,
        "value": 7,
        "footnotes": 8
    }

    for i, row in enumerate(reader):
        if i == 0:
            continue

        row = [unicode(c, errors="ignore") for c in row]

        db.insert_dataset({
            "name": row[columns["dataset"]],
            "title": row[columns["dataset"]],
            "source": "DevInfo India"
        })
        db.insert("region", {"name": row[columns["region"]]})
        db.insert("period", {"name": row[columns["period"]]})

        db.insert(
            "data", {
                "unit": row[columns["unit"]],
                "value": row[columns["value"]],
                "dataset": row[columns["dataset"]],
                "region": row[columns["region"]],
                "period": row[columns["period"]],
            })

        d_id = db.sql("last_insert_id()")[0][0]

        if row[columns["head"]]:
            db.insert("head", {"name": row[columns["head"]]})
            db.insert("data_head", {
                "data": d_id,
                "head": row[columns["head"]]
            })

        if i % 1000 == 0:
            sys.stdout.write(".")
            sys.stdout.flush()
示例#2
0
def start():
    print "importing worldbank data..."
    db.insert("source", {"name": "World Bank"})
    utils.convert_to_csv(
        os.path.join("data", "worldbank", "IND_Country_MetaData_en_EXCEL.xls"),
        os.path.join("data", "worldbank"))

    # import dataset
    with open(
            os.path.join(
                "data", "worldbank",
                "IND_Country_MetaData_en_EXCEL-sheet2.csv")) as datafile:
        reader = csv.reader(datafile.read().splitlines())

    for i, row in enumerate(reader):
        if i == 0:
            continue
        row = [unicode(c, "utf-8", errors="ingore") for c in row]
        db.insert_dataset({
            "name": row[1][:150],
            "title": row[1],
            "description": row[2],
            "source_info": row[3],
            "source": "World Bank"
        })

    # import data
    with open(
            os.path.join(
                "data", "worldbank",
                "IND_Country_MetaData_en_EXCEL-sheet1.csv")) as datafile:
        reader = csv.reader(datafile.read().splitlines())

    db.insert("region", {"name": "India"})

    for i, row in enumerate(reader):
        if i == 0:
            headers = row
            for year in row[2:]:
                db.insert("period", {"name": year})

        else:
            for ci, value in enumerate(row):
                if ci > 1 and utils.flt(value):
                    db.insert(
                        "data", {
                            "dataset": row[0],
                            "period": headers[ci],
                            "value": value,
                            "region": "India",
                        })
            if i % 100 == 0:
                sys.stdout.write(".")
                sys.stdout.flush()
def start():
    print "reading devinfo file"
    with open(os.path.join("data", "devinfo", "DevInfo India_2011_en.csv"), "r") as datafile:
        reader = csv.reader(datafile.read().splitlines())

    db.insert("source", {"name": "DevInfo India"})

    columns = {
        "dataset": 0,
        "unit": 1,
        "head": 2,
        "region": 3,
        "region_id": 4,
        "period": 5,
        "source": 6,
        "value": 7,
        "footnotes": 8,
    }

    for i, row in enumerate(reader):
        if i == 0:
            continue

        row = [unicode(c, errors="ignore") for c in row]

        db.insert_dataset(
            {"name": row[columns["dataset"]], "title": row[columns["dataset"]], "source": "DevInfo India"}
        )
        db.insert("region", {"name": row[columns["region"]]})
        db.insert("period", {"name": row[columns["period"]]})

        db.insert(
            "data",
            {
                "unit": row[columns["unit"]],
                "value": row[columns["value"]],
                "dataset": row[columns["dataset"]],
                "region": row[columns["region"]],
                "period": row[columns["period"]],
            },
        )

        d_id = db.sql("last_insert_id()")[0][0]

        if row[columns["head"]]:
            db.insert("head", {"name": row[columns["head"]]})
            db.insert("data_head", {"data": d_id, "head": row[columns["head"]]})

        if i % 1000 == 0:
            sys.stdout.write(".")
            sys.stdout.flush()
def process_file(fpath):
	sys.stdout.write(".")
	sys.stdout.flush()
	
	headers, data = utils.get_file_data(os.path.basename(fpath))
	if data and headers:
		# dataset
		db.insert_dataset({
			"name": headers["title"],
			"description": headers["description"],
			"raw_filename": headers["file_name"],
			"url": headers["url"],
			"source": "data.gov.in"
		})
		
		data = clean_data(data)
		set_series(headers, data)
		set_data(headers, data)
示例#5
0
def process_file(fpath):
    sys.stdout.write(".")
    sys.stdout.flush()

    headers, data = utils.get_file_data(os.path.basename(fpath))
    if data and headers:
        # dataset
        db.insert_dataset({
            "name": headers["title"],
            "description": headers["description"],
            "raw_filename": headers["file_name"],
            "url": headers["url"],
            "source": "data.gov.in"
        })

        data = clean_data(data)
        set_series(headers, data)
        set_data(headers, data)