示例#1
0
def get_bigquery_schema(filepath, layer_name=None, gdb_name=None):
    """
    Generate a Bigquery table schema from a geospatial file

        python -m geobeam.util get_bigquery_schema ...args

    Args:
        filepath (str): full path to the input file
        layer_name (str, optional): name of the layer, if file contains
            multiple layers
    Returns:
        dict: the schema, convertable to json by json.dumps(schema, indent=2)
    """

    import fiona
    from fiona.io import ZipMemoryFile
    from fiona import prop_type

    bq_schema = []

    if layer_name is None:
        profile = fiona.open(filepath).profile
    elif gdb_name is None:
        profile = fiona.open(filepath, layer=layer_name).profile
    else:
        f = open(filepath, 'rb')
        mem = ZipMemoryFile(f.read())
        profile = mem.open(gdb_name, layer=layer_name).profile

    for field_name, field_type in profile['schema']['properties'].items():
        fiona_type = prop_type(field_type)
        bq_type = BQ_FIELD_TYPES[fiona.schema.FIELD_TYPES_MAP_REV[fiona_type]]
        bq_schema.append({'name': field_name, 'type': bq_type})

    bq_schema.append({
        'name':
        'geom',
        'type':
        'GEOGRAPHY',
        'description':
        '{} reprojected from {}. source: {}'.format(
            profile['schema']['geometry'], profile['crs']['init'],
            profile['driver'])
    })

    return bq_schema
示例#2
0
def test_open_closed_zip():
    """Get an exception when opening a dataset on a closed ZipMemoryFile"""
    memfile = ZipMemoryFile()
    memfile.close()
    assert memfile.closed
    with pytest.raises(OSError):
        memfile.open()
示例#3
0
    def read_records(self, file_name, range_tracker):
        from fiona import transform
        from fiona.io import ZipMemoryFile
        import json

        total_bytes = self.estimate_size()
        next_pos = range_tracker.start_position()

        def split_points_unclaimed(stop_pos):
            return 0 if stop_pos <= next_pos else iobase.RangeTracker.SPLIT_POINTS_UNKNOWN

        with self.open_file(file_name) as f, ZipMemoryFile(f.read()) as mem:
            collection = mem.open(self.gdb_name, layer=self.layer_name)
            src_crs = _GeoSourceUtils.validate_crs(collection.crs,
                                                   self.in_epsg)

            num_features = len(collection)
            feature_bytes = math.floor(total_bytes / num_features)
            i = 0

            # XXX workaround due to https://github.com/Toblerity/Fiona/issues/996
            features = list(collection)

            logging.info(
                json.dumps({
                    'msg': 'read_records',
                    'file_name': file_name,
                    'profile': collection.profile,
                    'num_features': num_features,
                    'total_bytes': total_bytes
                }))

            while range_tracker.try_claim(next_pos):
                i = math.ceil(next_pos / feature_bytes)
                if i >= num_features:
                    break

                cur_feature = features[i]
                geom = cur_feature['geometry']
                props = cur_feature['properties']

                if not self.skip_reproject:
                    geom = transform.transform_geom(src_crs, 'epsg:4326', geom)

                yield (props, geom)

                next_pos = next_pos + feature_bytes
示例#4
0
def read_file(data, format):
    if format == 'csv':
        return pd.read_csv(BytesIO(data))
    if format == 'json':
        return pd.DataFrame(json.loads(data))
    if format in ['geojson', 'gpkg']:
        with BytesCollection(data) as f:
            return gpd.GeoDataFrame.from_features(f, crs=f.crs)
    if format == 'zip':
        with ZipMemoryFile(data) as f:
            for layer in fiona.listlayers(f.name, vfs='zip://'):
                # Only reading the first layer of the Shapefile

                with f.open('{0}.shp'.format(layer)) as collection:
                    return gpd.GeoDataFrame.from_features(collection,
                                                          crs=collection.crs)
    else:
        raise 'Incompatible format'
示例#5
0
    def download(self, overwrite=False):
        url = 'https://www.fs.usda.gov/Internet/FSE_DOCUMENTS/stelprdb5332131.zip'
        local_path = self.raw_dir / Path(url).name
        if overwrite or (not local_path.exists()):
            urlretrieve(url, local_path)

        with open(local_path, 'rb') as f:
            with ZipMemoryFile(f.read()) as z:
                with z.open('PacificCrestTrail.shp') as collection:
                    crs = collection.crs
                    fc = list(collection)

        gdf = gpd.GeoDataFrame.from_features(fc, crs=crs)
        gdf = gdf.to_crs(epsg=4326)

        save_dir = self.data_dir / 'pct' / 'line' / 'usfs'
        save_dir.mkdir(parents=True, exist_ok=True)
        gdf.to_file(save_dir / 'trail.geojson', driver='GeoJSON')
def read_warnings(zipfile, start_date=None, end_date=None):
    if zipfile.endswith('.pic'):
        with open(zipfile, 'rb') as f:
            records = pickle.load(f)
        records = [wwaVTEC(r) for r in records]
        return records

    with open(zipfile, 'rb') as fid:
        file = ZipFile(fid).namelist()
        fid.seek(0)
        data = fid.read()

    #
    for item in file:
        if item.endswith('.shp'):
            shapefile = item

    #
    with ZipMemoryFile(data) as zip:
        with zip.open(shapefile) as collection:
            records = []
            for record in collection:
                try:
                    wwa = wwaVTEC(record)
                except:
                    continue
                if start_date is None and end_date is None:
                    records.append(wwa)
                elif start_date and end_date is None:
                    if wwa.EXPIRED >= start_date:
                        records.append(wwa)
                elif start_date is None and end_date:
                    if wwa.ISSUED <= end_date:
                        records.append(wwa)
                else:
                    if (wwa.EXPIRED >= start_date and wwa.ISSUED <= end_date
                        ) or (wwa.ISSUED <= end_date
                              and wwa.EXPIRED >= start_date):
                        records.append(wwa)

    return records
示例#7
0
def load_geodataframe(filepath: Path) -> GeoDataFrame:
    """Given a filepath for a cached shapefile, load it as a dataframe.

    This function takes a roundabout approach to reading the zipped
    shapefile due to cryptic and unpredictable errors that occur when
    opening zip files on disk with Fiona/GDAL.
    """
    # Get .shp filename from within zipped shapefile
    with ZipFile(filepath, 'r') as zip_file:
        shp_filename: str = next(filter(is_shp_file, zip_file.filelist)).filename

    # Use default Python opener to prevent cryptic GDAL filepath errors
    with open(filepath, 'rb') as bytes_file:
        with ZipMemoryFile(bytes_file.read()) as zip_memory_file:
            with zip_memory_file.open(shp_filename) as collection:
                # Load GeoDataFrame using NAD83 projection (EPSG 4269)
                geodataframe = GeoDataFrame.from_features(collection, crs='EPSG:4269')

    # Add year column
    geodataframe['year'] = int(shp_filename[3:7])

    return geodataframe
def import_data(file_path, dataset):
    """Takes InMemoryFile user, and dat_created and imports data into the database accordingly (creates GeoTiff or Shapefile model and 
    creates a Dataset for each file)
    Args:
        user (User): Data owner
        file_path (String): File Path on system
        dataset (Dataset): Dataset in order to use existing dataset
    Returns:
        True for success, False for failure
        may be worth returning other info to debug?
    """
    if zipfile.is_zipfile(file_path):
        binary = open(file_path, 'rb').read()
        try:
            with ZipMemoryFile(binary) as zip_mem:
                zf = zipfile.ZipFile(io.BytesIO(binary))
                shapefile_locs = list(
                    filter(lambda v: v.endswith('.shp'), zf.namelist()))
                new_path = os.path.join(settings.PRIVATE_STORAGE_ROOT,
                                        "raw_files/", str(uuid.uuid4()))
                raw_shp_data = RawData.objects.create()
                raw_shp_data.name = f"{dataset.name}"
                raw_shp_data.ext = "zip"
                raw_shp_data.path.save(new_path, io.BytesIO(binary))
                raw_shp_data.save()
                raw_shp = RawShapefile.objects.create(rawshp=raw_shp_data,
                                                      dataset=dataset)
                raw_shp.save()
                return __import_shapefile(shapefile_locs,
                                          zip_mem,
                                          dataset,
                                          zip=zf)
        except zipfile.BadZipfile:
            return False
    else:
        dataset = __import_tiff(file_path, dataset)
        return dataset is not None
示例#9
0
def read_vector_file_to_df(
    uploaded_file: st.uploaded_file_manager.UploadedFile,
) -> Union[GeoDataFrame, None]:
    """

    Args:
        uploaded_file: A single bytesIO like object

    Returns:
        Geopandas dataframe
    """
    filename = uploaded_file.name
    suffix = Path(filename).suffix
    if suffix == ".kml":
        # st.info("Reading KML file ...")
        gpd.io.file.fiona.drvsupport.supported_drivers["KML"] = "rw"
        df = gpd.read_file(uploaded_file, driver="KML")
    elif suffix == ".wkt":
        # st.info("Reading WKT file ...")
        wkt_string = uploaded_file.read().decode("utf-8")
        df = pd.DataFrame({"geometry": [wkt_string]})
        df["geometry"] = df["geometry"].apply(shapely.wkt.loads)
        df = gpd.GeoDataFrame(df, geometry="geometry", crs=4326)
    elif suffix == ".zip":
        # st.info("Reading zipped Shapefile ...")
        with (ZipMemoryFile(uploaded_file)) as memfile:
            with memfile.open() as src:
                crs = src.crs
                df = gpd.GeoDataFrame.from_features(src, crs=crs)
                if df.crs is None:
                    st.error("The provided shapefile has no crs!")
                    st.stop()
    else:
        # st.info("Reading GeoJSON/JSON file ...")
        df = gpd.read_file(uploaded_file)  # Geojson etc.

    return df
示例#10
0
def test_zip_memoryfile_infer_layer_name(bytes_coutwildrnp_zip):
    """In-memory zipped Shapefile can be read with the default layer"""
    with ZipMemoryFile(bytes_coutwildrnp_zip) as memfile:
        with memfile.open() as collection:
            assert len(collection) == 67
示例#11
0
def test_zip_memoryfile(bytes_coutwildrnp_zip):
    """In-memory zipped Shapefile can be read"""
    with ZipMemoryFile(bytes_coutwildrnp_zip) as memfile:
        with memfile.open('coutwildrnp.shp') as collection:
            assert len(collection) == 67