def test_find_shapefiles(data_dir): found = find_shapefiles(str(data_dir.join('geodata/roads-folders.zip'))) assert sorted(found.keys()) == [ 'roads-{0:02d}/roads-{0:02d}'.format(i) for i in xrange(4)] for name, item in found.iteritems(): assert sorted(item.keys()) == ['dbf', 'prj', 'shp', 'shx'] assert item['shp'].open().read(4) == b'\x00\x00\x27\x0a' assert item['shx'].open().read(4) == b'\x00\x00\x27\x0a' assert item['dbf'].open().read(4) == b'\x03\x5f\x07\x1a' assert item['prj'].open().read(4) == b'PROJ'
def test_find_shapefiles(data_dir): found = find_shapefiles(str(data_dir.join('geodata/roads-folders.zip'))) assert sorted(found.keys()) == [ 'roads-{0:02d}/roads-{0:02d}'.format(i) for i in xrange(4) ] for name, item in found.iteritems(): assert sorted(item.keys()) == ['dbf', 'prj', 'shp', 'shx'] assert item['shp'].open().read(4) == b'\x00\x00\x27\x0a' assert item['shx'].open().read(4) == b'\x00\x00\x27\x0a' assert item['dbf'].open().read(4) == b'\x03\x5f\x07\x1a' assert item['prj'].open().read(4) == b'PROJ'
def import_dataset_find_shapefiles(dataset_id, dataset_conf): """ Find all the Shapefiles from archives listed as dataset resources. :param dataset_id: The dataset id :param dataset_conf: The dataset configuration """ destination_table = 'geodata_{0}'.format(dataset_id) create_table_sqls = [] import_data_sqls = [] with TemporaryDir() as tempdir: # First, copy zip files to temporary directory for resource in dataset_conf['resources']: # We assume the file is a zip, but we should double-check that! dest_file = os.path.join(tempdir, _random_file_name('zip')) if isinstance(resource, basestring): resource = {'url': resource} # Copy the resource to disk _copy_resource_to_file(resource, dest_file) # Let's look for shapefiles inside that thing.. found = find_shapefiles(dest_file) for basename, files in found.iteritems(): if 'shp' not in files: continue # Bad match.. # Export shapefiles to temporary files base_name = _random_file_name() for ext, item in files.iteritems(): dest = os.path.join(tempdir, base_name + '.' + ext) with open(dest, 'wb') as fp: # todo: copy file in chunks, not as a whole fp.write(item.open().read()) shp_full_path = os.path.join(tempdir, base_name + '.shp') create_table_sql = shp2pgsql( shp_full_path, table=destination_table, create_table_only=True, mode='create', geometry_column='geom', create_gist_index=True) # Use TEXT fields instead of varchar(XX) # todo: use a less-hackish way!! create_table_sql = re.sub( r'varchar\([0-9]+\)', 'text', create_table_sql, flags=re.IGNORECASE) import_data_sql = shp2pgsql( shp_full_path, table=destination_table, mode='append', geometry_column='geom', create_gist_index=False) create_table_sqls.append(create_table_sql) import_data_sqls.append(import_data_sql) with admin_db, admin_db.cursor() as cur: cur.execute(create_table_sqls[0]) with db, db.cursor() as cur: for sql in import_data_sqls: cur.execute(sql)