Exemplo n.º 1
0
def datasets(softwares = ['ckan','socrata']):
    for software in softwares:
        for catalog in read.catalogs(software):
            if not SOCRATA_FIX.get(catalog, 'this is a string, not None') == None:
                for dataset in getattr(read, software)(catalog):
                    dataset['catalog'] = catalog
                    dataset['software'] = software
                    yield dataset
Exemplo n.º 2
0
def get_links(softwares = ['ckan','socrata']):
    dt = DumpTruck('/tmp/open-data.sqlite')

    dummyrow = dict(zip(['software','catalog','identifier', 'status_code', 'headers', 'error'], (['blah'] * 3) + ([234] * 1) + ([{'a':'b'}] * 2)))
    dt.create_table(dummyrow, 'links', if_not_exists = True)
    dt.create_index(['software','catalog','identifier'], 'links', if_not_exists = True, unique = True)

    for software in softwares:
        for catalog in read.catalogs(software):
            if SOCRATA_FIX.get(catalog, 'this is a string, not None') == None:
                continue
            try:
                for row in _check_catalog(software, catalog):
                    dt.upsert(row, 'links')
            except:
                print(os.path.join('downloads',software,catalog))
                raise