select = """
SELECT * FROM Demographic WHERE element_id BETWEEN 511 AND 603
"""
xs = np.ma.masked_less_equal(np.array(cursor.execute(select).fetchall()), 0)[:,1:]

# Calculate masked differences along the first axis
diff_xs = np.ma.masked_all(np.shape(xs[:, 4:]))
diff_xs[:, 1:] = np.ma.diff(xs[:, 4:], axis=1)
diff_xs = np.ma.filled(diff_xs, -1)

# Stack id fields with net change values
xs = np.ma.filled(xs, -1)
ys = np.hstack((xs[:, :4], diff_xs))

# Convert ndarray to recarray
ys = ys.reshape(-1, ).view(get_dtype(connection, 'Demographic', remove_id=True))

# Replace element values w/ their corresponding net change values
for element_id in [511, 512, 513, 551, 561, 571, 581, 591, 592, 593, 601, 602, 603]:
    np.put(ys['element_id'], np.where(ys['element_id']==element_id), element_id + 100)

# Get last index value + 1 of Demographic table for primary key values
max_id, = np.array(cursor.execute("SELECT MAX(id) FROM Demographic").fetchall()).flatten() + 1

# Insert new data into the database using sqlite_io structure
import sqlite_io
sqlite_io.tosqlite(ys, max_id, DB, "Demographic", autoid=True, create=False)

# Close the cursor and the connection
cursor.close()
connection.close()
# Resize insert array
insert_xs = np.resize(insert_xs, (count, len(names)))

# Fill masked values with -1
insert_xs = np.ma.filled(insert_xs, -1)

# Convert ndarray to recarray
insert_xs = insert_xs.view(ndtype).flatten()

# Replace element_ids with those for the per capita elements
np.put(insert_xs['element_id'], np.where(insert_xs['element_id']==100), 101) # per capita consumption
np.put(insert_xs['element_id'], np.where(insert_xs['element_id']==51), 52) # per capita production

# Replace unit_ids with those for the per capita elements
np.put(insert_xs['unit_id'], np.where(insert_xs['unit_id']==3), 17) # tonnes/1000 people
np.put(insert_xs['unit_id'], np.where(insert_xs['unit_id']==9), 18) # 1000 No/1000 people

# Replace source_ids with a new code for 'InnovoSoy Calculated'
insert_xs['source_id'] = 8

# Get last index value + 1 of Commodity table for primary key values
max_id, = np.array(cursor.execute("SELECT MAX(id) FROM Commodity").fetchall()).flatten() + 1

# Insert new data into the database using sqlite_io structure
sqlite_io.tosqlite(insert_xs, max_id, DB, "Commodity", autoid=True, create=False)

# Close the cursor and the connection
cursor.close()
connection.close()
TABLE_NAME = "Commodity_Raw_Data"

#Countries to merge with country one being the country to stay in the database
country_one = 185
country_two = 228
country_name = "Russian_Federation" #table_name

#Query to merge the rows of the two countries
query = """
SELECT country_id, item_id, element_id, unit_id, source_id, %s
FROM %%s
WHERE country_id=%s OR country_id=%s
GROUP BY item_id||element_id||source_id
"""%(",".join("SUM(yr%s) AS yr%s"%(x, x) for x in xrange(1961, 2011)), country_one, country_two)

#Run query through sqlite_io file, creating a temporary table and then dropping when complete
xs = sqlite_io.fromsqlite(DB, query%TABLE_NAME, "tmp")
print xs[xs['item_id']==1012]
exit()

#Extract out merged data for country remaining in the database
xs_merged = xs[xs['country_id']==country_one]

#Create a new table in the database for this new merged country
count = 0
foreign_keys = {'country_id':'Country', 'element_id':'Element',
                'unit_id':'Unit', 'source_id':'Source'}
index = ['source_id', 'element_id', 'item_id', 'country_id'] #index in order
sqlite_io.tosqlite(xs_merged, count, DB, country_name, autoid=True,
    foreign_keys=foreign_keys, index=index)
                    trade_row = np.ma.masked_all(YEARS.size, dtype=float) # masked array with all year values
                    trade_country_id, element_id = column.split("_") # split column on _ separator
                    idx = y['year_id']  - MIN_YEAR # index value of years to include
                    item_lookup = find_key(aggregate_item_lookup, item_id)[0] # lookup new item code
                    trade_row[idx] = z # match up and assign values based on year values
                    trade_rows[count] = trade_row
                    id_rows[count] = np.array([country_id, element_id, item_lookup, trade_country_id, SOURCE_ID],
                        dtype=int)
                    count += 1

# Resize arrays to match actual size of the data
trade_rows = np.ma.filled(np.ma.resize(trade_rows, (count,)), -1) # fill masked values with -1 for database
id_rows = np.ma.resize(id_rows, (count,))

# Merge and flatten the two arrays to keep the proper data types
xs_rows = merge_arrays((id_rows, trade_rows), flatten=True) # values for the database
xs_rows = np.sort(xs_rows, order=["element_id", "item_id", "country_id", "trade_country_id"])

# Create new database table for Trade Relationships
import sqlite_io

DB = r".\GFIN_DB.db3"
TABLE_NAME = "Trade"
foreign_keys = {
    'country_id':'Country', 'element_id':'Element','unit_id':'Unit',
    'source_id':'Source', 'trade_country_id':'Country'
}
index = ['element_id', 'item_id', 'country_id'] # index in order
sqlite_io.tosqlite(xs_rows, 0, DB, TABLE_NAME, autoid=True,
    foreign_keys=foreign_keys, index=index, create=True) # create a new table in the database for trade matrix
示例#5
0
top_splits = create.split(",")[1:]
for i, top in enumerate(top_splits):
    splits = top.split(" ")[:2]
    for j, split in enumerate(splits):
        splits[j] = split.replace("\n", "").replace(")", "").replace("FLOAT", "<f8").replace("INTEGER", "<f8")
    top_splits[i] = splits
ndtype = [i for i in imap(tuple, top_splits)]

# View combined data as a recarray
zs = zs.view(ndtype).flatten()

# Sort new array by index fields in order
zs = np.sort(zs, order=['element_id', 'item_id', 'country_id'])

# Insert data into the new table
import sqlite_io
sqlite_io.tosqlite(zs, 0, DB, "Datum", autoid=True, create=False)

# Add on index
index = "CREATE INDEX Datum_index ON Datum (element_id, item_id, country_id)"
cursor.execute(index)
connection.commit()

# Drop Demographic and Commodity table
drops = ["DROP TABLE Demographic", "DROP TABLE Commodity"]
[cursor.execute(drop) for drop in drops]
connection.commit()

# Close the cursor and the connection
cursor.close()
connection.close()
示例#6
0
[new_cursor.execute(statement) for statement in create_strs]
new_connection.commit()

# Insert data into each table
for table in copy_tables:
    is_autoid = table in ('SchemeColor', 'AreaGroup') # tables with id as primary key
    ndtype, names = get_dtype(connection, table, remove_id=is_autoid, nameReturn=True)

    # Get data from master database for copying
    xs = np.ma.array(cursor.execute("SELECT %s FROM %s"%(",".join(names), table)).fetchall(), ndtype)

    # Mask all None values and create primary keys
    autoid = [False, True][is_autoid] # assign primary keys
    primary_key = ["%s_id"%table.lower(), False][is_autoid] # primary key
    xs = mask_none_values(xs) # mask none values
    sqlite_io.tosqlite(xs, 0, NEW_DB, table, autoid=autoid,
        create=False, primary_key=primary_key)

# Format value tables with -1 values for missing values
for table in TABLES:
    (names, typestr) = zip(*(_[1:3] for _ in connection.execute("PRAGMA TABLE_INFO(%s)"%table).fetchall()))
    names = ",".join([name.strip() for name in names if name.strip()!='id'])
    xs = sqlite_io.fromsqlite(DB, "SELECT %s FROM %s"%(names, table), "tmp_table")
    ndtype = xs.dtype
    xs =  xs.view(float).reshape((-1, len(names.split(","))))
    xs = np.ma.masked_less_equal(xs, 0) # mask any value less than or equal to 0

    # Remove Commodity rows that have less than 5 values
    if table == 'Commodity':
        id_field_idx = 5 # number of columns that split the data b/w foreign keys and values
        id_fields = xs[:,:id_field_idx] # foreign key fields
        value_fields = xs[:,id_field_idx:] # data value fields