def test_tools_check_crop(): assert [5, 10, 25, 28] == tools.check_crop(IN_CROP, IN_LIMITS)
import pytest from hypothesis import given from hypothesis.strategies import integers, text, floats import numpy as np from segysak import tools IN_CROP = [5, 10, 25, 35] IN_LIMITS = [0, 20, 0, 28] def test_tools_check_crop(): assert [5, 10, 25, 28] == tools.check_crop(IN_CROP, IN_LIMITS) @given(floats(-10_000, 10_000), floats(-10_000, 10_000), integers(2, 1000)) def test_halfsample(a, b, n): test_array = np.linspace(a, b, n) truth_array = np.linspace(a, b, n * 2 - 1) assert np.allclose(tools.halfsample(test_array), truth_array) if __name__ == "__main__": c = tools.check_crop(IN_CROP, IN_LIMITS) print(c)
def _loader_converter_header_handling( segyfile, cdp=None, iline=None, xline=None, cdpx=None, cdpy=None, offset=None, vert_domain="TWT", data_type="AMP", ix_crop=None, cdp_crop=None, xy_crop=None, z_crop=None, return_geometry=False, silent=False, extra_byte_fields=None, head_df=None, **segyio_kwargs, ): if head_df is None: # Start by scraping the headers. head_df = segy_header_scrape(segyfile, silent=silent, **segyio_kwargs) head_bin = segy_bin_scrape(segyfile, **segyio_kwargs) head_loc = AttrDict( dict(cdp=cdp, offset=offset, iline=iline, xline=xline, cdpx=cdpx, cdpy=cdpy)) if all(map(lambda x: x is None, (cdp, iline, xline, offset))): # lets try and guess the data types if no hints given try: head_loc.update(what_geometry_am_i(head_df)) except (KeyError, ValueError): print("Couldn't determine geometry, will load traces as flat 2D.") pass head_loc = AttrDict({ key: (_get_tf(val) if val is not None else None) for key, val in head_loc.items() }) if all(v is not None for v in (head_loc.cdpx, head_loc.cdpy)): extra_byte_fields[CoordKeyField.cdp_x] = head_loc.cdpx extra_byte_fields[CoordKeyField.cdp_y] = head_loc.cdpy # Scale Coordinates coord_scalar = head_df.SourceGroupScalar.median() coord_scalar_mult = np.power(abs(coord_scalar), np.sign(coord_scalar)) head_df[head_loc.cdpx] = head_df[head_loc.cdpx].astype(np.float32) head_df[head_loc.cdpy] = head_df[head_loc.cdpy].astype(np.float32) head_df[ head_loc.cdpx] = head_df[head_loc.cdpx] * coord_scalar_mult * 1.0 head_df[ head_loc.cdpy] = head_df[head_loc.cdpy] * coord_scalar_mult * 1.0 # TODO: might need to scale offsets as well? # Cropping if cdp_crop and cdp is not None: # 2d cdp cropping crop_min, crop_max = check_crop( cdp_crop, [head_df[head_loc.cdp].min(), head_df[head_loc.cdp].max()]) head_df = head_df.query( "@head_loc.cdp >= @crop_min & @head_loc.cdp <= @crop_max") if ix_crop is not None and cdp is None: # 3d inline/xline cropping il_min, il_max, xl_min, xl_max = check_crop( ix_crop, [ head_df[head_loc.iline].min(), head_df[head_loc.iline].max(), head_df[head_loc.xline].min(), head_df[head_loc.xline].max(), ], ) query = " & ".join([ f"{head_loc.iline} >= @il_min", f"{head_loc.iline} <= @il_max", f"{head_loc.xline} >= @xl_min", f"{head_loc.xline} <= @xl_max", ]) head_df = head_df.query(query).copy(deep=True) # TODO: -> Could implement some cropping with a polygon here if xy_crop is not None and cdp is None: x_min, x_max, y_min, y_max = check_crop( xy_crop, [ head_df[head_loc.cdpx].min(), head_df[head_loc.cdpx].max(), head_df[head_loc.cdpy].min(), head_df[head_loc.cdpy].max(), ], ) query = " & ".join([ f"{head_loc.cdpx} >= @x_min", f"{head_loc.cdpx} <= @x_max", f"{head_loc.cdpy} >= @y_min", f"{head_loc.cdpy} <= @y_max", ]) head_df = head_df.query(query).copy(deep=True) return head_df, head_bin, head_loc
def segy_loader( segyfile, ncfile=None, cdp=None, iline=None, xline=None, cdpx=None, cdpy=None, offset=None, vert_domain="TWT", data_type="AMP", ix_crop=None, cdp_crop=None, xy_crop=None, z_crop=None, return_geometry=False, silent=False, extra_byte_fields=None, **segyio_kwargs, ): """Convert SEGY data to NetCDF4 File The output ncfile has the following structure Dimensions: d1 - CDP or Inline axis d2 - Xline axis d3 - The vertical axis d4 - Offset/Angle Axis Coordinates: iline - The inline numbering xline - The xline numbering cdp_x - Eastings cdp_y - Northings cdp - Trace Number for 2d Variables data - The data volume Attributes: TBC Args: segyfile (str): Input segy file path ncfile (str, optional): Output SEISNC file path. If none the loaded data will be returned in memory as an xarray.Dataset. iline (int, optional): Inline byte location, usually 189 xline (int, optional): Cross-line byte location, usally 193 vert (str, optional): Vertical sampling domain. One of ['TWT', 'DEPTH']. Defaults to 'TWT'. cdp (int, optional): The CDP byte location, usually 21. data_type (str, optional): Data type ['AMP', 'VEL']. Defaults to 'AMP'. cdp_crop (list, optional): List of minimum and maximum cmp values to output. Has the form '[min_cmp, max_cmp]'. Ignored for 3D data. ix_crop (list, optional): List of minimum and maximum inline and crossline to output. Has the form '[min_il, max_il, min_xl, max_xl]'. Ignored for 2D data. xy_crop (list, optional): List of minimum and maximum cdp_x and cdp_y to output. Has the form '[min_x, max_x, min_y, max_y]'. Ignored for 2D data. z_crop (list, optional): List of minimum and maximum vertical samples to output. Has the form '[min, max]'. return_geometry (bool, optional): If true returns an xarray.dataset which doesn't contain data but mirrors the input volume header information. extra_byte_fields (list/mapping): A list of int or mapping of byte fields that should be returned as variables in the dataset. silent (bool): Disable progress bar. **segyio_kwargs: Extra keyword arguments for segyio.open Returns: xarray.Dataset: If ncfile keyword is specified returns open handle to disk netcdf4, otherwise the data in memory. If return_geometry is True does not load trace data and returns headers in geometry. """ # Input sanity checks if cdp is not None and (iline is not None or xline is not None): raise ValueError("cdp cannot be defined with iline and xiline") if iline is None and xline is not None: raise ValueError("iline must be defined with xline") if xline is None and iline is not None: raise ValueError("xline must be defined with iline") if isinstance(extra_byte_fields, list): extra_byte_fields = { _get_tf(field): _get_tf(field) for field in extra_byte_fields } elif isinstance(extra_byte_fields, dict): extra_byte_fields = { key: _get_tf(field) for key, field in extra_byte_fields.items() } elif extra_byte_fields is None: extra_byte_fields = dict() else: raise ValueError("Unknown type for extra_byte_fields") if cdpx is None: cdpx = 181 # Assume standard location if misisng x_head_loc = _get_tf(cdpx) if cdpy is None: cdpy = 185 # Assume standard location if missing y_head_loc = _get_tf(cdpy) extra_byte_fields[CoordKeyField.cdp_x.value] = x_head_loc extra_byte_fields[CoordKeyField.cdp_y.value] = y_head_loc # Start by scraping the headers. head_df = segy_header_scrape(segyfile, silent=silent, **segyio_kwargs) head_bin = segy_bin_scrape(segyfile, **segyio_kwargs) # Scale Coordinates coord_scalar = head_df.SourceGroupScalar.median() coord_scalar_mult = np.power(abs(coord_scalar), np.sign(coord_scalar)) head_df[x_head_loc] = head_df[x_head_loc].astype(float) head_df[y_head_loc] = head_df[y_head_loc].astype(float) head_df[x_head_loc] = head_df[x_head_loc] * coord_scalar_mult * 1.0 head_df[y_head_loc] = head_df[y_head_loc] * coord_scalar_mult * 1.0 # TODO: might need to scale offsets as well? # Cropping if cdp_crop and cdp is not None: # 2d cdp cropping cmp_head_loc = _get_tf(cdp) crop_min, crop_max = check_crop( cdp_crop, [head_df[cmp_head_loc].min(), head_df[cmp_head_loc].max()]) head_df = head_df.query( "@cmp_head_loc >= @crop_min & @cmp_head_loc <= @crop_max") if ix_crop is not None and cmp is None: # 3d inline/xline cropping il_head_loc = _get_tf(iline) xl_head_loc = _get_tf(xline) il_min, il_max, xl_min, xl_max = check_crop( ix_crop, [ head_df[il_head_loc].min(), head_df[il_head_loc].max(), head_df[xl_head_loc].min(), head_df[xl_head_loc].max(), ], ) query = f"@il_head_loc >= @il_min & @il_head_loc <= @il_max & @xl_head_loc >= @xl_min and @xl_head_loc <= @xl_max" head_df = head_df.query(query) # TODO: -> Could implement some cropping with a polygon here if xy_crop is not None and cdp is None: x_min, x_max, y_min, y_max = check_crop( xy_crop, [ head_df[x_head_loc].min(), head_df[x_head_loc].max(), head_df[y_head_loc].min(), head_df[y_head_loc].max(), ], ) query = "@x_head_loc >= @x_min & x_head_loc <= @x_max & @y_head_loc >= @y_min and @y_head_loc <= @y_max" head_df = head_df.query(query) common_kwargs = dict( zcrop=z_crop, ncfile=ncfile, offset=offset, vert_domain=vert_domain, data_type=data_type, return_geometry=return_geometry, silent=silent, ) # 3d data needs iline and xline if iline is not None and xline is not None: ds = _3dsegy_loader( segyfile, head_df, head_bin, iline=iline, xline=xline, **common_kwargs, **segyio_kwargs, ) indexer = ["il_index", "xl_index"] dims = (DimensionKeyField.threed_head.value if offset is None else DimensionKeyField.threed_ps_head.value) # 2d data elif cdp is not None: ds = _2dsegy_loader(segyfile, head_df, head_bin, cdp=cdp, **common_kwargs, **segyio_kwargs) indexer = ["cdp_index"] dims = (DimensionKeyField.twod_head.value if offset is None else DimensionKeyField.twod_ps_head.value) # fallbak to just a 2d array of traces else: ds = _2dsegy_loader(segyfile, head_df, head_bin, **common_kwargs, **segyio_kwargs) indexer = [] dims = DimensionKeyField.cdp_2d.value indexer = indexer + ["off_index"] if offset is not None else indexer # we have some some geometry to assign headers to if cdp is not None or iline is not None: head_ds = head_df.set_index(indexer).to_xarray() for key, field in extra_byte_fields.items(): ds[key] = (dims, head_ds[field].values) ds = ds.set_coords( [CoordKeyField.cdp_x.value, CoordKeyField.cdp_y.value]) # geometry is not known else: for key, field in extra_byte_fields.items(): ds[key] = (dims, head_df[field].values) return ds
def segy2ncdf(segyfile, ncfile, CMP=False, iline=189, xline=193, cdpx=181, cdpy=185, vert='TWT', units='AMP', crop=None, zcrop=None, silent=False): """Convert SEGY data to NetCDF4 File The output ncfile has the following structure Dimensions: vert - The vertical axis iline - Inline axis xline - Xline axis Variables: INLINE_3D - The inline numbering CROSSLINE_3D - The xline numbering CDP_X - Eastings CDP_Y - Northings CDP_TRACE - Trace Number data - The data volume Attributes: vert.units vert.data.units ns - Number of samples in vert ds - Sample rate Args: segyfile (str): Input segy file path ncfile (str): Output SEISNC file path. iline (int): Inline byte location. xline (int): Cross-line byte location. vert (str): Vertical sampling domain. units (str): Units of amplitude data. crop (list): List of minimum and maximum inline and crossline to output. Has the form '[min_il, max_il, min_xl, max_xl]'. zcrop (list): List of minimum and maximum vertical samples to output. Has the form '[min, max]'. silent (bool): Disable progress bar. """ head_df = segy_header_scrape(segyfile) head_bin = segy_bin_scrape(segyfile) # get names of columns where stuff we want is il_head_loc = str(segyio.TraceField(iline)) xl_head_loc = str(segyio.TraceField(xline)) x_head_loc = str(segyio.TraceField(cdpx)) y_head_loc = str(segyio.TraceField(cdpy)) # calculate vert, inline and crossline ranges/meshgrids il0 = head_df[il_head_loc].min() iln = head_df[il_head_loc].max() xl0 = head_df[xl_head_loc].min() xln = head_df[xl_head_loc].max() n0 = 0 nsamp = head_df.TRACE_SAMPLE_COUNT.min() ns0 = head_df.DelayRecordingTime.min() coord_scalar = head_df.SourceGroupScalar.median() coord_scalar_sign = coord_scalar / abs(coord_scalar) coord_scalar_mult = np.power(abs(coord_scalar), coord_scalar_sign) dil = np.max(head_df[il_head_loc].values[1:] - head_df[il_head_loc].values[:-1]) dxl = np.max(head_df[xl_head_loc].values[1:] - head_df[xl_head_loc].values[:-1]) if crop is not None: crop = check_crop(crop, [il0, iln, xl0, xln]) il0, iln, xl0, xln = crop # first and last values ni = 1 + (iln - il0) // dil nx = 1 + (xln - xl0) // dxl # binary header translation ns = head_bin['Samples'] ds = head_bin['Interval'] msys = _SEGY_MEASUREMENT_SYSTEM[head_bin['MeasurementSystem']] if zcrop is not None: zcrop = check_zcrop(zcrop, [0, ns]) n0, ns = zcrop ns0 = ds * n0 nsamp = ns - n0 + 1 create_empty_seisnc(ncfile, (ni, nx, nsamp)) set_seisnc_dims(ncfile, first_sample=ns0, sample_rate=ds // 1000, first_iline=il0, iline_step=dil, first_xline=xl0, xline_step=dxl, vert_domain=vert, measurement_system=msys) text = get_segy_texthead(segyfile) with segyio.open(segyfile, 'r', ignore_geometry=True, iline=iline, xline=xline) as segyf, \ netCDF4.Dataset(ncfile, "a", format="NETCDF4") as seisnc: seisnc.text = text #assign CDPXY query = f"{il_head_loc} >= @il0 & {il_head_loc} <= @iln & {xl_head_loc} >= @xl0 and {xl_head_loc} <= @xln" cdpx = head_df.query(query)[[il_head_loc, xl_head_loc, x_head_loc ]].pivot(il_head_loc, xl_head_loc).values cdpy = head_df.query(query)[[il_head_loc, xl_head_loc, y_head_loc ]].pivot(il_head_loc, xl_head_loc).values seisnc['CDP_X'][:, :] = cdpx * coord_scalar_mult seisnc['CDP_Y'][:, :] = cdpy * coord_scalar_mult segyf.mmap() # load trace temp_line = np.full((nx, nsamp), np.nan, float) cur_iline = head_df[il_head_loc][0] pb = tqdm(total=segyf.tracecount, desc="Converting SEGY", disable=silent) for n, trc in enumerate(segyf.trace): cxl = head_df[xl_head_loc][n] cil = head_df[il_head_loc][n] if cxl < xl0 or cxl > xln or cil < il0 or cil > iln: pb.update() continue cur_xline = (cxl - xl0) // dxl temp_line[cur_xline, :] = trc[n0:ns + 1] if cil > cur_iline: cur_iline = cil seisnc['data'][(cur_iline - il0) / dil, :, :] = temp_line temp_line[:, :] = np.nan pb.update() pb.close()