示例#1
0
def to_sqlite3():
    dt = DumpTruck('/tmp/open-data.sqlite', auto_commit = False)

    dummyrow = dict(zip(['software','catalog','identifier'], ['blah']*3))
    dt.create_table(dummyrow, 'datasets', if_not_exists = True)
    dt.create_index(['software','catalog','identifier'], 'datasets', if_not_exists = True, unique = True)

    for table in ['ckan','socrata']:
        dt.create_table({'catalog':'blah','identifier':'blah'}, table, if_not_exists = True)
        dt.create_index(['catalog','identifier'], table, if_not_exists = True, unique = True)

    dt.create_table({'view_id':'abc','table_id':123}, 'socrata_tables')
    dt.create_index(['view_id'], 'socrata_tables', if_not_exists = True, unique = True)
    dt.create_index(['table_id'], 'socrata_tables', if_not_exists = True)

    for dataset in datasets():
        row = {
            'software': dataset['software'],
            'catalog': dataset['catalog'],
            'identifier': dataset[SOFTWARE_MAP['identifier'][dataset['software']]],
        }
        sql = 'SELECT * FROM datasets WHERE software = ? AND catalog = ? AND identifier = ?'
        if dt.execute(sql, [row['software'],row['catalog'],row['identifier']]) != []:
            continue
        dt.upsert(row, 'datasets')
        if dataset['software'] == 'socrata':
            socrata_table = {
                'view_id': row['identifier'],
                'table_id': dataset['tableId'],
            }
            dt.upsert(socrata_table, 'socrata_tables')
        dt.upsert(dataset,dataset['software'])
        dt.commit()
示例#2
0
def users():
    dt = DumpTruck(dbname = '/tmp/socrata.db')
    dt.create_table({'id': 'abcd-efgh'}, 'user')
    dt.create_index(['id'], 'user', unique = True)

    _users = {}
    for portal in os.listdir('data'):
        for viewid in os.listdir(os.path.join('data', portal, 'views')):
            handle = open(os.path.join('data', portal, 'views', viewid), 'r')
            try:
                view = json.load(handle)
            except:
                # *cringe*
                continue
            handle.close()

            if view['owner']['id'] in _users:
                _users[view['owner']['id']]['views'].add(view['id'])
                try:
                    _users[view['owner']['id']]['publicationDates'].add((view['id'], view['publicationDate']))
                except:
                    return view
            else:
                _users[view['owner']['id']] = view['owner']
                _users[view['owner']['id']]['views'] = {view['id']}
                _users[view['owner']['id']]['tables'] = set()
                _users[view['owner']['id']]['publicationDates'] = set()

            if view['tableAuthor']['id'] in _users:
                _users[view['tableAuthor']['id']]['tables'].add(view['tableId'])
            else:
                _users[view['tableAuthor']['id']] = view['tableAuthor']
                _users[view['tableAuthor']['id']]['views'] = set()
                _users[view['tableAuthor']['id']]['tables'] = {view['tableId']}
                _users[view['tableAuthor']['id']]['publicationDates'] = set()

    for uid in _users.keys():
        for key in ['views', 'rights', 'tables']:
            if key in _users[uid]:
                _users[uid]['n_' + key] = len(_users[uid][key])
                del _users[uid][key]

    dt.insert(_users.values(), 'user')
    for uid, user in _users.items():
        for viewid, publicationDate in user['publicationDates']:
            dt.insert({'userid': user['id'], 'viewid': viewid, 'publicationDate': publicationDate}, 'publications', commit = False)
    dt.commit()
    return _users
示例#3
0
from dumptruck import DumpTruck

dt = DumpTruck(dbname = 'cites.db')
top10 = dt.execute('''
select
  shipment_year, export_country_code, taxon_family,
  sum(quantity_1) as "export"
from cites where taxon_family in (
  select taxon_family from cites
  group by taxon_family
  order by count(taxon_family)
  desc limit 10
)

group by shipment_year || export_country_code || taxon_family
order by export_country_code, taxon_family, shipment_year

;''')

for i in range(len(top10) - 1):
  try:
    if top10[i]['taxon_family'] == top10[i+1]['taxon_family']:
      top10[i+1]['change_since_last_year'] = top10[i+1]['export'] - top10[i]['export']
    dt.insert(top10[i+1], 'changes_tmp', commit = False)
  except:
    dt.commit()
    raise

dt.insert(top10, 'changes')
示例#4
0
def main():
    dt = DumpTruck(dbname = '/tmp/smart.db')
    for data in table():
        dt.insert(data, 'smart', commit = False)
    dt.commit()