def get_xy(fname,xhdr,yhdr,xyscalerhdr,trlstevery=50000):
    """."""
    xclst = list()
    yclst = list()
    trnlst =list()
    with sg.open(fname,'r',ignore_geometry=True) as srcp:
        print(f'Total # of Traces: {len(srcp.trace)}')
        for trnum,tr in enumerate(srcp.trace):
            xysch = np.fabs(srcp.header[trnum][xyscalerhdr])
            if xysch == 0:
                xysc = 1.0
            else:
                xysc = xysch
            xci = srcp.header[trnum][xhdr] / xysc
            yci = srcp.header[trnum][yhdr] / xysc
            xclst.append(xci)
            yclst.append(yci)
            trnlst.append(trnum)
            if trnum % trlstevery == 0:
                print(f'Trace # {trnum:0d}  {xci:.2f}  {yci:.2f}')
    xc = np.array(xclst)
    yc = np.array(yclst)
    trn = np.array(trnlst,dtype=int)
    trcols = ['XC','YC','TRNUM']
    xytrcdf = pd.DataFrame({'XC':xc,'YC':yc,'TRNUM':trn})
    xytrcdf = xytrcdf[trcols].copy()
    print(xytrcdf.head())
    return xytrcdf
Пример #2
0
def get_xy(fname, xhdr, yhdr, xyscalerhdr):
    xclst = list()
    yclst = list()
    with sg.open(fname, 'r', ignore_geometry=True) as srcp:
        for trnum, tr in enumerate(srcp.trace):
            xysc = np.fabs(srcp.header[trnum][xyscalerhdr])
            xclst.append(srcp.header[trnum][xhdr] / xysc)
            yclst.append(srcp.header[trnum][yhdr] / xysc)
    return xclst, yclst
def seisattrib_atwell(sflist,swa):
    sr = get_samplerate(sflist[0])
    swa['SLICENUM'] = swa.iloc[:,1] // sr
    for sf in sflist:
        print(sf)
        dirsplit,fextsplit= os.path.split(sf)
        fname,fextn= os.path.splitext(fextsplit)
        tracesample=list()
        with sg.open(sf,'r',ignore_geometry=True) as srcp:
            for i in range(swa.shape[0]):
                tracesample.append(srcp.trace[swa.loc[i,'TRNUM']][swa.loc[i,'SLICENUM']  ])
            swa[fname] = tracesample
    swa.drop(['TRNUM','SLICENUM'],inplace=True,axis=1)
    colslst =swa.columns.tolist()
    colslst.append(colslst[4])
    colslst.pop(4)
    print(colslst)
    swa = swa[colslst]
    return swa
Пример #4
0
def main():
    cmdl = getcommandline()
    if cmdl.wellscsv:
        allwells = pd.read_csv(cmdl.wellscsv)
        # dz = np.diff(allwells.DEPTH)[2]
        dz = np.diff(allwells[allwells.columns[1]])[2]
        print('Well Vertical increment {}'.format(dz))
        wdirsplit, wfextsplit = os.path.split(cmdl.wellscsv)
        wfname, wfextn = os.path.splitext(wfextsplit)
        # logname = allwells.columns[-1]
        wcols = allwells.columns.tolist()
        print(wcols)
        logname = wcols[-1]
        print('logname:', logname)
        lognamepred = logname + 'pred'
        wcols.append(lognamepred)

        if cmdl.outdir:
            outfw = os.path.join(cmdl.outdir, wfname) + "_pred.csv"
        else:
            outfw = os.path.join(wdirsplit, wfname) + "_pred.csv"

    if cmdl.segyfileslist:
        sflist = list()
        sflist = process_segylist(cmdl.segyfileslist)

        dirsplit, fextsplit = os.path.split(sflist[0])
        fname, fextn = os.path.splitext(fextsplit)
        if cmdl.outdir:
            outfsegy = os.path.join(cmdl.outdir,
                                    wfname) + "_p%s.sgy" % (logname)
        else:
            outfsegy = os.path.join(dirsplit, wfname) + "_p%s.sgy" % (logname)

        print('Copying file, please wait ........')
        start_copy = datetime.now()
        copyfile(sflist[0], outfsegy)
        end_copy = datetime.now()
        print('Duration of copying: {}'.format(end_copy - start_copy))

        sr = get_samplerate(outfsegy)
        print('Seismic Sample Rate: {}'.format(sr))

        print('Zeroing segy file, please wait ........')
        start_zero = datetime.now()
        zero_segy(outfsegy)
        end_zero = datetime.now()
        print('Duration of zeroing: {}'.format(end_zero - start_zero))

        xclst, yclst = get_xy(fextsplit, cmdl.segyxhdr, cmdl.segyyhdr,
                              cmdl.xyscalerhdr)
        xydf = pd.DataFrame({'XC': xclst, 'YC': yclst})
        preddf = xydf.copy()
        scols = list()
        for f in sflist:
            dirsplit, fextsplit = os.path.split(f)
            fname, fextn = os.path.splitext(fextsplit)
            scols.append(fname)

        sfname = 'allattrib'
        # slicerange = cmdl.startendslice[1] - cmdl.startendslice[0]
        sstart = int(cmdl.startendslice[0] // dz)
        send = int(cmdl.startendslice[1] // dz)
        start_process = datetime.now()
        slicelst = list()
        slicenumlst = list()
        wnlst = list()
        slicewnlst = list()
        coef0lst = list()
        coef1lst = list()
        r2lst = list()
        for slicenum in range(sstart, send):
            if cmdl.outdir:
                outfslice = os.path.join(cmdl.outdir,
                                         sfname) + "_slice%d.csv" % slicenum
            else:
                outfslice = os.path.join(dirsplit,
                                         sfname) + "_slice%d.csv" % slicenum
            zslice = slicenum * dz
            if cmdl.intime:
                wdf = allwells[allwells.TIME == zslice]
            else:
                wdf = allwells[allwells.DEPTH == zslice]
            c = wdf.columns[4]  #log name
            nw = wdf[~wdf[c].isnull()].count()[4]
            if cmdl.intime:
                print('# of wells for time slice {} is {}'.format(zslice, nw))
            else:
                print('# of wells for depth slice {} is {}'.format(zslice, nw))

            slicefiles = list()
            for i in range(len(sflist)):
                slicefiles.append(get_slice(sflist[i], slicenum))
            slicear = np.array(slicefiles).T
            slicedf = pd.DataFrame(slicear, columns=scols)

            alldata = pd.concat((xydf, slicedf), axis=1)
            if cmdl.intime:
                print('Slice#: {} @ Time : {} ms'.format(slicenum, zslice))
            else:
                print('Slice#: {} @ Depth : {} ms'.format(slicenum, zslice))

            # print(alldata.head())

            if cmdl.slicesout:
                alldata.to_csv(outfslice, index=False)
            alldatas = process_sscalecols(alldata, includexy=cmdl.includexy)
            # print('After Scaling .....')
            # print(alldatas.head())
            wdfsa = process_seiswellattrib(alldatas, wdf, cmdl.intime)
            print(wdfsa.tail())
            # lastcol = wdfsa.shape[1]
            X = wdfsa.iloc[:, 4:-1]
            y = wdfsa.iloc[:, -1]
            inshape = y.size
            # print( f"size of y: {inshape}")
            if y.size > 2 and cmdl.generatesamples:
                X, y = gensamples(X,
                                  y,
                                  nsamples=cmdl.generatensamples,
                                  ncomponents=cmdl.generatencomponents,
                                  kind='r',
                                  func='cbr')
            Xpred = alldatas.iloc[:, 2:]
            # print(f'Xpred: {Xpred.shape}' )
            # print('# of wells used: ', X.shape[0], y.shape)
            # print(f'X shape: {X.shape} ')
            # print(X )

            model = CatBoostRegressor(iterations=cmdl.cbriterations,
                                      learning_rate=cmdl.cbrlearningrate,
                                      depth=cmdl.cbrdepth,
                                      loss_function='RMSE',
                                      random_seed=42,
                                      logging_level='Silent')
            # Fit model
            model.fit(X, y)
            # Get predictions
            ypred = model.predict(X)
            # Calculating Mean Squared Error
            mse = np.mean((ypred - y)**2)
            print('Metrics on input data: ')
            print('MSE: %.4f' % (mse))
            r2 = r2_score(y, ypred)
            print('R2 : %10.3f' % r2)

            ccmdl = sts.pearsonr(y, ypred)
            if slicenum == sstart:
                wellsdf = wdfsa[wdfsa.columns[:4]].copy()
                wellsdf[logname] = wdfsa[wdfsa.columns[-1]].copy()
                if cmdl.generatesamples:
                    wellsdf[lognamepred] = ypred[:inshape]
                else:
                    wellsdf[lognamepred] = ypred

                # print(wellsdf.tail())
                # print(wellsdf.shape)
            else:
                wellsdf0 = wdfsa[wdfsa.columns[:4]].copy()
                wellsdf0[logname] = wdfsa[wdfsa.columns[-1]].copy()
                if cmdl.generatesamples:
                    wellsdf0[lognamepred] = ypred[:inshape]
                else:
                    wellsdf0[lognamepred] = ypred
                allwellspred = wellsdf.append(wellsdf0)
                wellsdf = allwellspred[wcols].copy()
                print(allwellspred.tail())
                print(allwellspred.shape)

            pred = model.predict(Xpred)
            alldatas[wdfsa.columns[4]] = pred
            # print('After Prediction........')
            # print(alldatas.head())
            slicestr = '{:.0f}'.format(zslice)
            preddf[slicestr] = pred

            qc0 = np.polyfit(y, ypred, 1)
            xrngmin, xrngmax = y.min(), y.max()
            xvi = np.linspace(xrngmin, xrngmax)
            yvi0 = np.polyval(qc0, xvi)

            if slicenum % cmdl.plotincrement == 0:
                slicedepth = slicenum * dz
                fig, ax = plt.subplots()

                plt.scatter(y,
                            ypred,
                            alpha=0.5,
                            c='b',
                            s=15,
                            label='Model Predicted')
                if cmdl.generatesamples:
                    ax.scatter(y[inshape:],
                               ypred[inshape:],
                               c='r',
                               marker='X',
                               s=25,
                               label='Generated Samples')

                plt.plot(xvi, yvi0, c='k', lw=2)

                ax.annotate('Model = %-.*f * Actual + %-.*f' %
                            (2, qc0[0], 2, qc0[1]),
                            xy=(xvi[0], yvi0[0]),
                            xytext=(0.14, 0.85),
                            textcoords='figure fraction',
                            fontsize=10)
                ax.annotate('Model Pearson cc = %-.*f   Pearson p = %-.*f' %
                            (2, ccmdl[0], 3, ccmdl[1]),
                            xy=(xvi[0], yvi0[0]),
                            xytext=(0.14, 0.81),
                            textcoords='figure fraction',
                            fontsize=10)
                ax.set_title(f'CBR Slice {slicedepth:.0f} Pseudo {logname}')
                ax.set_xlabel('Actual')
                ax.set_ylabel('Predicted')
                if not cmdl.hideplots:
                    plt.show()
                swfname = 'SWAttrib'
                if cmdl.outdir:
                    # pdfcl = os.path.join(cmdl.outdir,swfname)+"%d" %(slicenum) +"_cbr%s.pdf" %(logname)
                    # wsdf = os.path.join(cmdl.outdir,swfname)+"%d" %(slicenum) +"_cbr%s.csv" %(logname)
                    pdfcl = os.path.join(
                        cmdl.outdir, swfname
                    ) + f"{slicedepth:.0f}" + "_cbr%s.pdf" % (logname)
                    wsdf = os.path.join(
                        cmdl.outdir, swfname
                    ) + f"{slicedepth:.0f}" + "_cbr%s.csv" % (logname)
                else:
                    pdfcl = os.path.join(
                        dirsplit, swfname
                    ) + f"{slicedepth:.0f}" + "_cbr%s.pdf" % (logname)
                    wsdf = os.path.join(
                        dirsplit, swfname
                    ) + f"{slicedepth:.0f}" + "_cbr%s.csv" % (logname)
                fig.savefig(pdfcl)
                wdfsa.to_csv(wsdf, index=False)
                print(f'Successfully generated {wsdf}')

            slicelst.append(zslice)
            wnlst.append(nw)
            slicewnlst.append(wdfsa.shape[0])
            slicenumlst.append(slicenum)
            r2lst.append(r2)
            coef0lst.append(qc0[0])
            coef1lst.append(qc0[1])

        end_process = datetime.now()
        print('Duration of ML model building and prediction : {}'.format(
            end_process - start_process))

        qccols = [
            'SLICENUM', 'SLICEZ', 'WELLSFOUND', 'WELLSUSED', 'COEF0', 'COEF1',
            'R2'
        ]
        qcdf = pd.DataFrame({
            'SLICENUM': slicenumlst,
            'SLICEZ': slicelst,
            'WELLSFOUND': wnlst,
            'WELLSUSED': slicewnlst,
            'COEF0': coef0lst,
            'COEF1': coef1lst,
            'R2': r2lst
        })
        qcdf = qcdf[qccols].copy()

        if cmdl.outdir:
            outseispred = os.path.join(cmdl.outdir, wfname) + "_slices.csv"
            outqc = os.path.join(cmdl.outdir, wfname) + "_qc.csv"
        else:
            outseispred = os.path.join(dirsplit, wfname) + "_slices.csv"
            outqc = os.path.join(dirsplit, wfname) + "_qc.csv"

        preddf.to_csv(outseispred, index=False)
        print('Successfully generated {}'.format(outseispred))
        print('DataFrame size: ', preddf.shape)
        endsmpl = preddf.shape[1] - 2
        # print(preddf.head())

        qcdf.to_csv(outqc, index=False)
        print('Successfully generated {}'.format(outqc))

        with sg.open(outfsegy, "r+") as srcp:
            for trnum, tr in enumerate(srcp.trace):
                trplog = preddf.iloc[trnum, 2:].values
                # lentrplog = trplog.size
                # print(trplog)
                tr[sstart:(sstart + endsmpl)] = trplog
                srcp.trace[trnum] = tr
        print('Successfully generated {}'.format(outfsegy))

        allwellspred.to_csv(outfw, index=False)
        print('Successfully generated {}'.format(outfw))
        plotwells(allwellspred, hideplots=cmdl.hideplots)
Пример #5
0
def get_samplerate(fname):
    with sg.open(fname, 'r', ignore_geometry=True) as srcp:
        hdrdict = dict(enumerate(srcp.header[1].items()))
    return hdrdict[39][1] / 1000
Пример #6
0
def zero_segy(fname):
    with sg.open(fname, 'r+', ignore_geometry=True) as srcp:
        for trnum, tr in enumerate(srcp.trace):
            srcp.trace[trnum] = tr * 0
Пример #7
0
def get_slice(fname, slicenum):
    slc = list()
    with sg.open(fname, 'r', ignore_geometry=True) as srcp:
        for trnum, tr in enumerate(srcp.trace):
            slc.append(tr[slicenum])
    return slc
Пример #8
0
def get_onetrace(fname, tracenum, sstart=None, send=None):
    """Get one trace from one file."""
    with sg.open(fname, 'r', ignore_geometry=True) as srcp:
        tri = srcp.trace[tracenum]
        yield tri[sstart:send]
def main():
    cmdl = getcommandline()
    # csv file generated from _build without prediction column
    allwdfsa = pd.read_csv(cmdl.sattribwellscsv)
    # need to extraqct by well to find depth increment
    wlst = allwdfsa.WELL.unique().tolist()
    wdf0 = allwdfsa[allwdfsa['WELL'] == wlst[0]]
    dz = np.diff(wdf0[wdf0.columns[1]])[2]
    print(f'Well Vertical increment {dz}')
    sstart = int(cmdl.startendinterval[0] // dz)
    send = int(cmdl.startendinterval[1] // dz)
    logname = allwdfsa.columns[-1]
    print(f'Curve Name: {logname} Sample start: {sstart}  Sample end: {send}')

    if cmdl.segyfileslist:
        sflist = list()
        sflist = process_segylist(cmdl.segyfileslist)

        dirsplit, fextsplit = os.path.split(cmdl.segyfileslist)
        fname, fextn = os.path.splitext(fextsplit)
        if cmdl.outdir:
            outfsegy = os.path.join(cmdl.outdir, fname) + f"_p{logname}.sgy"
        else:
            outfsegy = os.path.join(dirsplit, fname) + f"_p{logname}.sgy"

        print('Copying file, please wait ........')
        start_copy = datetime.now()
        copyfile(sflist[0], outfsegy)
        end_copy = datetime.now()
        print(f'Duration of copying: {(end_copy - start_copy)}')

        sr = get_samplerate(outfsegy)
        print(f'Seismic Sample Rate: {sr}')

        print('Zeroing segy file, please wait ........')
        start_zero = datetime.now()
        zero_segy(outfsegy)
        end_zero = datetime.now()
        print(f'Duration of zeroing: {(end_zero - start_zero)}')

        scols = list()
        for f in sflist:
            dirsplit, fextsplit = os.path.split(f)
            fname, fextn = os.path.splitext(fextsplit)
            scols.append(fname)
        # sstart = cmdl.startendinterval[0]
        # send = cmdl.startendinterval[1]
        start_process = datetime.now()
        if cmdl.modeltype == 'cbr':
            inmodel = CatBoostRegressor()
            inmodel.load_model(cmdl.MLmodelname)
            # inmodel = pickle.load(open(cmdl.MLmodelname, 'rb'))
        elif cmdl.modeltype == 'linreg':
            inmodel = pickle.load(open(cmdl.MLmodelname, 'rb'))
            # result = loaded_model.score(X_test, Y_test)

        elif cmdl.modeltype == 'knn':
            inmodel = pickle.load(open(cmdl.MLmodelname, 'rb'))
            # result = loaded_model.score(X_test, Y_test)
        elif cmdl.modeltype == 'svr':
            inmodel = pickle.load(open(cmdl.MLmodelname, 'rb'))
            # result = loaded_model.score(X_test, Y_test)
        elif cmdl.modeltype == 'ann':
            anndirsplit, annfextsplit = os.path.split(cmdl.segyfileslist)
            annfname, annfextn = os.path.splitext(annfextsplit)
            annwtsfname = os.path.join(anndirsplit, annfname) + '.h5'
        elif cmdl.modeltype == 'sgdr':
            inmodel = pickle.load(open(cmdl.MLmodelname, 'rb'))

            json_file = open(cmdl.MLmodelname, 'r')
            loaded_model_json = json_file.read()
            json_file.close()
            inmodel = model_from_json(loaded_model_json)
            # load weights into new model
            inmodel.load_weights(annwtsfname)
            print("Loaded model from disk")
            inmodel.compile(loss='mean_squared_error', optimizer='adam')

        with sg.open(outfsegy, "r+") as srcp:
            # numoftraces = len(srcp.trace)
            for trnum, tr in enumerate(srcp.trace):
                Xpred = collect_traces(sflist, trnum, sstart=sstart, send=send)
                # print(Xpred.shape)
                trpred = modelpredict(inmodel,
                                      Xpred,
                                      scalelog=cmdl.donotscalelog,
                                      logmin=cmdl.logscalemm[0],
                                      logmax=cmdl.logscalemm[1])

                tr[sstart:send] = trpred
                srcp.trace[trnum] = tr
        print(f'Successfully generated {outfsegy}')
        end_process = datetime.now()
        print(f'Duration: {end_process - start_process}')