示例#1
0
def _3dsegy_loader(
    segyfile,
    head_df,
    head_bin,
    ncfile=None,
    iline=189,
    xline=193,
    offset=None,
    vert_domain="TWT",
    data_type="AMP",
    crop=None,
    zcrop=None,
    silent=False,
    return_geometry=False,
    **segyio_kwargs,
):
    """Convert SEGY data to Xarray or Netcdf4

    This is a helper function for segy_loader. Users should use that function
    directly to load all segy data.

    """

    # get names of columns where stuff we want is
    il_head_loc = _get_tf(iline)
    xl_head_loc = _get_tf(xline)

    # get vertical sample ranges
    n0 = 0
    nsamp = head_bin["Samples"]
    ns0 = head_df.DelayRecordingTime.min()

    # short way to get inlines/xlines
    ilines = head_df[il_head_loc].unique()
    xlines = head_df[xl_head_loc].unique()
    inlines = np.sort(ilines)
    xlines = np.sort(xlines)
    iline_index_map = {il: i for i, il in enumerate(ilines)}
    xline_index_map = {xl: i for i, xl in enumerate(xlines)}
    head_df["il_index"] = head_df[il_head_loc].replace(iline_index_map)
    head_df["xl_index"] = head_df[xl_head_loc].replace(xline_index_map)

    # binary header translation
    ns = head_bin["Samples"]
    ds = head_bin["Interval"] / 1000.0
    msys = _SEGY_MEASUREMENT_SYSTEM[head_bin["MeasurementSystem"]]

    # for offset
    if offset is not None:
        off_head_loc = _get_tf(offset)
        offsets = head_df[off_head_loc].unique()
        offsets = np.sort(offsets)
        offset_index_map = {off: i for i, off in enumerate(offsets)}
        head_df["off_index"] = head_df[off_head_loc].replace(offset_index_map)
    else:
        offsets = None

    if zcrop is not None:
        zcrop = check_zcrop(zcrop, [0, ns])
        n0, ns = zcrop
        ns0 = ds * n0
        nsamp = ns - n0 + 1
    vert_samples = np.arange(ns0, ns0 + ds * nsamp, ds, dtype=int)

    builder, domain = _dataset_coordinate_helper(vert_samples,
                                                 vert_domain,
                                                 iline=ilines,
                                                 xline=xlines,
                                                 offset=offsets)

    ds = create_seismic_dataset(**builder)

    # create_seismic_dataset(d1=ni, d2=nx, d3=nsamp)
    text = get_segy_texthead(segyfile, **segyio_kwargs)
    ds.attrs[AttrKeyField.text.value] = text

    if ncfile is not None and return_geometry == False:
        ds.seisio.to_netcdf(ncfile)
    elif return_geometry:
        # return geometry -> e.g. don't process segy traces
        return ds
    else:
        ncfile = ds

    segyio_kwargs.update(dict(ignore_geometry=True, iline=iline, xline=xline))

    # not prestack data
    if offset is None and not isinstance(ncfile, xr.Dataset):
        ds = _segy3d_ncdf(
            segyfile,
            ncfile,
            segyio_kwargs,
            n0,
            ns,
            head_df,
            il_head_loc,
            xl_head_loc,
            vert_domain=vert_domain,
            silent=silent,
        )

    # not prestack data load into memory
    if offset is None and isinstance(ncfile, xr.Dataset):
        ds = _segy3d_xr(
            segyfile,
            ncfile,
            segyio_kwargs,
            n0,
            ns,
            head_df,
            il_head_loc,
            xl_head_loc,
            vert_domain=vert_domain,
            silent=silent,
        )

    # prestack data
    if offset is not None and not isinstance(ncfile, xr.Dataset):
        ds = _segy3dps_ncdf(
            segyfile,
            ncfile,
            segyio_kwargs,
            n0,
            ns,
            head_df,
            il_head_loc,
            xl_head_loc,
            vert_domain=vert_domain,
            silent=silent,
        )

    # prestack data load into memory
    if offset is not None and isinstance(ncfile, xr.Dataset):
        ds = _segy3dps_xr(
            segyfile,
            ncfile,
            segyio_kwargs,
            n0,
            ns,
            head_df,
            il_head_loc,
            xl_head_loc,
            vert_domain=vert_domain,
            silent=silent,
        )

    return ds
示例#2
0
def _2dsegy_loader(
    segyfile,
    head_df,
    head_bin,
    ncfile=None,
    cdp=None,
    offset=None,
    vert_domain="TWT",
    data_type="AMP",
    crop=None,
    zcrop=None,
    silent=False,
    return_geometry=False,
    **segyio_kwargs,
):
    """Convert SEGY data to Xarray or Netcdf4

    This is a helper function for segy_loader. Users should use that function
    directly to load all segy data.

    """

    # get names of columns where stuff we want is
    if cdp is None:
        cdp = 21
        cdp_head_loc = _get_tf(cdp)
        head_df[cdp_head_loc] = head_df.index.values
    else:
        cdp_head_loc = _get_tf(cdp)

    # get vertical sample ranges
    n0 = 0
    nsamp = head_bin["Samples"]
    ns0 = head_df.DelayRecordingTime.min()

    # short way to get cdps
    cdps = head_df[cdp_head_loc].unique()
    cdps = np.sort(cdps)
    head_df["cdp_index"] = _header_to_index_mapping(head_df[cdp_head_loc])

    # binary header translation
    nsamp = head_bin["Samples"]
    sample_rate = head_bin["Interval"] / 1000.0
    msys = _SEGY_MEASUREMENT_SYSTEM[head_bin["MeasurementSystem"]]

    # for offset
    if offset is not None:
        off_head_loc = _get_tf(offset)
        offsets = head_df[off_head_loc].unique()
        offsets = np.sort(offsets)
        head_df["off_index"] = _header_to_index_mapping(head_df[off_head_loc])
    else:
        offsets = None

    if zcrop is not None:
        zcrop = check_zcrop(zcrop, [0, nsamp])
        n0, nsamp = zcrop
        ns0 = sample_rate * n0
        nsamp = nsamp - n0 + 1
    vert_samples = np.arange(ns0,
                             ns0 + sample_rate * nsamp,
                             sample_rate,
                             dtype=int)

    builder, domain = _dataset_coordinate_helper(vert_samples,
                                                 vert_domain,
                                                 cdp=cdps,
                                                 offset=offsets)

    ds = create_seismic_dataset(**builder)

    # create_seismic_dataset(d1=ni, d2=nx, d3=nsamp)
    text = get_segy_texthead(segyfile, **segyio_kwargs)
    ds.attrs[AttrKeyField.text.value] = text

    if ncfile is not None and return_geometry == True:
        ds.seisio.to_netcdf(ncfile)
        return ds
    elif return_geometry:
        return ds

    segyio_kwargs.update(dict(ignore_geometry=True))

    # stacked data
    if offset is None:
        ds = _segy2d_xr(
            segyfile,
            ds,
            segyio_kwargs,
            n0,
            nsamp,
            head_df,
            cdp_head_loc,
            vert_domain=vert_domain,
            silent=silent,
        )

    # # prestack data
    if offset is not None:
        ds = _segy2d_ps_xr(
            segyfile,
            ds,
            segyio_kwargs,
            n0,
            nsamp,
            head_df,
            cdp_head_loc,
            vert_domain=vert_domain,
            silent=silent,
        )

    if ncfile is not None:
        ds.seisio.to_netcdf(ncfile)

    return ds
示例#3
0
def segy2ncdf(segyfile,
              ncfile,
              CMP=False,
              iline=189,
              xline=193,
              cdpx=181,
              cdpy=185,
              vert='TWT',
              units='AMP',
              crop=None,
              zcrop=None,
              silent=False):
    """Convert SEGY data to NetCDF4 File

    The output ncfile has the following structure
        Dimensions:
            vert - The vertical axis
            iline - Inline axis
            xline - Xline axis
        Variables:
            INLINE_3D - The inline numbering
            CROSSLINE_3D - The xline numbering
            CDP_X - Eastings
            CDP_Y - Northings
            CDP_TRACE - Trace Number
            data - The data volume
        Attributes:
            vert.units
            vert.data.units
            ns - Number of samples in vert
            ds - Sample rate

    Args:
        segyfile (str): Input segy file path
        ncfile (str): Output SEISNC file path.
        iline (int): Inline byte location.
        xline (int): Cross-line byte location.
        vert (str): Vertical sampling domain.
        units (str): Units of amplitude data.
        crop (list): List of minimum and maximum inline and crossline to output.
            Has the form '[min_il, max_il, min_xl, max_xl]'.
        zcrop (list): List of minimum and maximum vertical samples to output.
            Has the form '[min, max]'.
        silent (bool): Disable progress bar.

    """
    head_df = segy_header_scrape(segyfile)
    head_bin = segy_bin_scrape(segyfile)

    # get names of columns where stuff we want is
    il_head_loc = str(segyio.TraceField(iline))
    xl_head_loc = str(segyio.TraceField(xline))
    x_head_loc = str(segyio.TraceField(cdpx))
    y_head_loc = str(segyio.TraceField(cdpy))

    # calculate vert, inline and crossline ranges/meshgrids
    il0 = head_df[il_head_loc].min()
    iln = head_df[il_head_loc].max()
    xl0 = head_df[xl_head_loc].min()
    xln = head_df[xl_head_loc].max()
    n0 = 0
    nsamp = head_df.TRACE_SAMPLE_COUNT.min()
    ns0 = head_df.DelayRecordingTime.min()
    coord_scalar = head_df.SourceGroupScalar.median()
    coord_scalar_sign = coord_scalar / abs(coord_scalar)
    coord_scalar_mult = np.power(abs(coord_scalar), coord_scalar_sign)

    dil = np.max(head_df[il_head_loc].values[1:] -
                 head_df[il_head_loc].values[:-1])
    dxl = np.max(head_df[xl_head_loc].values[1:] -
                 head_df[xl_head_loc].values[:-1])

    if crop is not None:
        crop = check_crop(crop, [il0, iln, xl0, xln])
        il0, iln, xl0, xln = crop

    # first and last values
    ni = 1 + (iln - il0) // dil
    nx = 1 + (xln - xl0) // dxl

    # binary header translation
    ns = head_bin['Samples']
    ds = head_bin['Interval']
    msys = _SEGY_MEASUREMENT_SYSTEM[head_bin['MeasurementSystem']]

    if zcrop is not None:
        zcrop = check_zcrop(zcrop, [0, ns])
        n0, ns = zcrop
        ns0 = ds * n0
        nsamp = ns - n0 + 1

    create_empty_seisnc(ncfile, (ni, nx, nsamp))
    set_seisnc_dims(ncfile,
                    first_sample=ns0,
                    sample_rate=ds // 1000,
                    first_iline=il0,
                    iline_step=dil,
                    first_xline=xl0,
                    xline_step=dxl,
                    vert_domain=vert,
                    measurement_system=msys)

    text = get_segy_texthead(segyfile)

    with segyio.open(segyfile, 'r', ignore_geometry=True, iline=iline, xline=xline) as segyf, \
      netCDF4.Dataset(ncfile, "a", format="NETCDF4") as seisnc:
        seisnc.text = text

        #assign CDPXY
        query = f"{il_head_loc} >= @il0 & {il_head_loc} <= @iln & {xl_head_loc} >= @xl0 and {xl_head_loc} <= @xln"
        cdpx = head_df.query(query)[[il_head_loc, xl_head_loc, x_head_loc
                                     ]].pivot(il_head_loc, xl_head_loc).values
        cdpy = head_df.query(query)[[il_head_loc, xl_head_loc, y_head_loc
                                     ]].pivot(il_head_loc, xl_head_loc).values
        seisnc['CDP_X'][:, :] = cdpx * coord_scalar_mult
        seisnc['CDP_Y'][:, :] = cdpy * coord_scalar_mult

        segyf.mmap()
        # load trace
        temp_line = np.full((nx, nsamp), np.nan, float)
        cur_iline = head_df[il_head_loc][0]
        pb = tqdm(total=segyf.tracecount,
                  desc="Converting SEGY",
                  disable=silent)
        for n, trc in enumerate(segyf.trace):
            cxl = head_df[xl_head_loc][n]
            cil = head_df[il_head_loc][n]
            if cxl < xl0 or cxl > xln or cil < il0 or cil > iln:
                pb.update()
                continue
            cur_xline = (cxl - xl0) // dxl
            temp_line[cur_xline, :] = trc[n0:ns + 1]
            if cil > cur_iline:
                cur_iline = cil
                seisnc['data'][(cur_iline - il0) / dil, :, :] = temp_line
                temp_line[:, :] = np.nan
            pb.update()
        pb.close()