def to_sqlite3(): dt = DumpTruck('/tmp/open-data.sqlite', auto_commit = False) dummyrow = dict(zip(['software','catalog','identifier'], ['blah']*3)) dt.create_table(dummyrow, 'datasets', if_not_exists = True) dt.create_index(['software','catalog','identifier'], 'datasets', if_not_exists = True, unique = True) for table in ['ckan','socrata']: dt.create_table({'catalog':'blah','identifier':'blah'}, table, if_not_exists = True) dt.create_index(['catalog','identifier'], table, if_not_exists = True, unique = True) dt.create_table({'view_id':'abc','table_id':123}, 'socrata_tables') dt.create_index(['view_id'], 'socrata_tables', if_not_exists = True, unique = True) dt.create_index(['table_id'], 'socrata_tables', if_not_exists = True) for dataset in datasets(): row = { 'software': dataset['software'], 'catalog': dataset['catalog'], 'identifier': dataset[SOFTWARE_MAP['identifier'][dataset['software']]], } sql = 'SELECT * FROM datasets WHERE software = ? AND catalog = ? AND identifier = ?' if dt.execute(sql, [row['software'],row['catalog'],row['identifier']]) != []: continue dt.upsert(row, 'datasets') if dataset['software'] == 'socrata': socrata_table = { 'view_id': row['identifier'], 'table_id': dataset['tableId'], } dt.upsert(socrata_table, 'socrata_tables') dt.upsert(dataset,dataset['software']) dt.commit()
def users(): dt = DumpTruck(dbname = '/tmp/socrata.db') dt.create_table({'id': 'abcd-efgh'}, 'user') dt.create_index(['id'], 'user', unique = True) _users = {} for portal in os.listdir('data'): for viewid in os.listdir(os.path.join('data', portal, 'views')): handle = open(os.path.join('data', portal, 'views', viewid), 'r') try: view = json.load(handle) except: # *cringe* continue handle.close() if view['owner']['id'] in _users: _users[view['owner']['id']]['views'].add(view['id']) try: _users[view['owner']['id']]['publicationDates'].add((view['id'], view['publicationDate'])) except: return view else: _users[view['owner']['id']] = view['owner'] _users[view['owner']['id']]['views'] = {view['id']} _users[view['owner']['id']]['tables'] = set() _users[view['owner']['id']]['publicationDates'] = set() if view['tableAuthor']['id'] in _users: _users[view['tableAuthor']['id']]['tables'].add(view['tableId']) else: _users[view['tableAuthor']['id']] = view['tableAuthor'] _users[view['tableAuthor']['id']]['views'] = set() _users[view['tableAuthor']['id']]['tables'] = {view['tableId']} _users[view['tableAuthor']['id']]['publicationDates'] = set() for uid in _users.keys(): for key in ['views', 'rights', 'tables']: if key in _users[uid]: _users[uid]['n_' + key] = len(_users[uid][key]) del _users[uid][key] dt.insert(_users.values(), 'user') for uid, user in _users.items(): for viewid, publicationDate in user['publicationDates']: dt.insert({'userid': user['id'], 'viewid': viewid, 'publicationDate': publicationDate}, 'publications', commit = False) dt.commit() return _users
from dumptruck import DumpTruck dt = DumpTruck(dbname = 'cites.db') top10 = dt.execute(''' select shipment_year, export_country_code, taxon_family, sum(quantity_1) as "export" from cites where taxon_family in ( select taxon_family from cites group by taxon_family order by count(taxon_family) desc limit 10 ) group by shipment_year || export_country_code || taxon_family order by export_country_code, taxon_family, shipment_year ;''') for i in range(len(top10) - 1): try: if top10[i]['taxon_family'] == top10[i+1]['taxon_family']: top10[i+1]['change_since_last_year'] = top10[i+1]['export'] - top10[i]['export'] dt.insert(top10[i+1], 'changes_tmp', commit = False) except: dt.commit() raise dt.insert(top10, 'changes')
def main(): dt = DumpTruck(dbname = '/tmp/smart.db') for data in table(): dt.insert(data, 'smart', commit = False) dt.commit()