def get_xy(fname,xhdr,yhdr,xyscalerhdr,trlstevery=50000): """.""" xclst = list() yclst = list() trnlst =list() with sg.open(fname,'r',ignore_geometry=True) as srcp: print(f'Total # of Traces: {len(srcp.trace)}') for trnum,tr in enumerate(srcp.trace): xysch = np.fabs(srcp.header[trnum][xyscalerhdr]) if xysch == 0: xysc = 1.0 else: xysc = xysch xci = srcp.header[trnum][xhdr] / xysc yci = srcp.header[trnum][yhdr] / xysc xclst.append(xci) yclst.append(yci) trnlst.append(trnum) if trnum % trlstevery == 0: print(f'Trace # {trnum:0d} {xci:.2f} {yci:.2f}') xc = np.array(xclst) yc = np.array(yclst) trn = np.array(trnlst,dtype=int) trcols = ['XC','YC','TRNUM'] xytrcdf = pd.DataFrame({'XC':xc,'YC':yc,'TRNUM':trn}) xytrcdf = xytrcdf[trcols].copy() print(xytrcdf.head()) return xytrcdf
def get_xy(fname, xhdr, yhdr, xyscalerhdr): xclst = list() yclst = list() with sg.open(fname, 'r', ignore_geometry=True) as srcp: for trnum, tr in enumerate(srcp.trace): xysc = np.fabs(srcp.header[trnum][xyscalerhdr]) xclst.append(srcp.header[trnum][xhdr] / xysc) yclst.append(srcp.header[trnum][yhdr] / xysc) return xclst, yclst
def seisattrib_atwell(sflist,swa): sr = get_samplerate(sflist[0]) swa['SLICENUM'] = swa.iloc[:,1] // sr for sf in sflist: print(sf) dirsplit,fextsplit= os.path.split(sf) fname,fextn= os.path.splitext(fextsplit) tracesample=list() with sg.open(sf,'r',ignore_geometry=True) as srcp: for i in range(swa.shape[0]): tracesample.append(srcp.trace[swa.loc[i,'TRNUM']][swa.loc[i,'SLICENUM'] ]) swa[fname] = tracesample swa.drop(['TRNUM','SLICENUM'],inplace=True,axis=1) colslst =swa.columns.tolist() colslst.append(colslst[4]) colslst.pop(4) print(colslst) swa = swa[colslst] return swa
def main(): cmdl = getcommandline() if cmdl.wellscsv: allwells = pd.read_csv(cmdl.wellscsv) # dz = np.diff(allwells.DEPTH)[2] dz = np.diff(allwells[allwells.columns[1]])[2] print('Well Vertical increment {}'.format(dz)) wdirsplit, wfextsplit = os.path.split(cmdl.wellscsv) wfname, wfextn = os.path.splitext(wfextsplit) # logname = allwells.columns[-1] wcols = allwells.columns.tolist() print(wcols) logname = wcols[-1] print('logname:', logname) lognamepred = logname + 'pred' wcols.append(lognamepred) if cmdl.outdir: outfw = os.path.join(cmdl.outdir, wfname) + "_pred.csv" else: outfw = os.path.join(wdirsplit, wfname) + "_pred.csv" if cmdl.segyfileslist: sflist = list() sflist = process_segylist(cmdl.segyfileslist) dirsplit, fextsplit = os.path.split(sflist[0]) fname, fextn = os.path.splitext(fextsplit) if cmdl.outdir: outfsegy = os.path.join(cmdl.outdir, wfname) + "_p%s.sgy" % (logname) else: outfsegy = os.path.join(dirsplit, wfname) + "_p%s.sgy" % (logname) print('Copying file, please wait ........') start_copy = datetime.now() copyfile(sflist[0], outfsegy) end_copy = datetime.now() print('Duration of copying: {}'.format(end_copy - start_copy)) sr = get_samplerate(outfsegy) print('Seismic Sample Rate: {}'.format(sr)) print('Zeroing segy file, please wait ........') start_zero = datetime.now() zero_segy(outfsegy) end_zero = datetime.now() print('Duration of zeroing: {}'.format(end_zero - start_zero)) xclst, yclst = get_xy(fextsplit, cmdl.segyxhdr, cmdl.segyyhdr, cmdl.xyscalerhdr) xydf = pd.DataFrame({'XC': xclst, 'YC': yclst}) preddf = xydf.copy() scols = list() for f in sflist: dirsplit, fextsplit = os.path.split(f) fname, fextn = os.path.splitext(fextsplit) scols.append(fname) sfname = 'allattrib' # slicerange = cmdl.startendslice[1] - cmdl.startendslice[0] sstart = int(cmdl.startendslice[0] // dz) send = int(cmdl.startendslice[1] // dz) start_process = datetime.now() slicelst = list() slicenumlst = list() wnlst = list() slicewnlst = list() coef0lst = list() coef1lst = list() r2lst = list() for slicenum in range(sstart, send): if cmdl.outdir: outfslice = os.path.join(cmdl.outdir, sfname) + "_slice%d.csv" % slicenum else: outfslice = os.path.join(dirsplit, sfname) + "_slice%d.csv" % slicenum zslice = slicenum * dz if cmdl.intime: wdf = allwells[allwells.TIME == zslice] else: wdf = allwells[allwells.DEPTH == zslice] c = wdf.columns[4] #log name nw = wdf[~wdf[c].isnull()].count()[4] if cmdl.intime: print('# of wells for time slice {} is {}'.format(zslice, nw)) else: print('# of wells for depth slice {} is {}'.format(zslice, nw)) slicefiles = list() for i in range(len(sflist)): slicefiles.append(get_slice(sflist[i], slicenum)) slicear = np.array(slicefiles).T slicedf = pd.DataFrame(slicear, columns=scols) alldata = pd.concat((xydf, slicedf), axis=1) if cmdl.intime: print('Slice#: {} @ Time : {} ms'.format(slicenum, zslice)) else: print('Slice#: {} @ Depth : {} ms'.format(slicenum, zslice)) # print(alldata.head()) if cmdl.slicesout: alldata.to_csv(outfslice, index=False) alldatas = process_sscalecols(alldata, includexy=cmdl.includexy) # print('After Scaling .....') # print(alldatas.head()) wdfsa = process_seiswellattrib(alldatas, wdf, cmdl.intime) print(wdfsa.tail()) # lastcol = wdfsa.shape[1] X = wdfsa.iloc[:, 4:-1] y = wdfsa.iloc[:, -1] inshape = y.size # print( f"size of y: {inshape}") if y.size > 2 and cmdl.generatesamples: X, y = gensamples(X, y, nsamples=cmdl.generatensamples, ncomponents=cmdl.generatencomponents, kind='r', func='cbr') Xpred = alldatas.iloc[:, 2:] # print(f'Xpred: {Xpred.shape}' ) # print('# of wells used: ', X.shape[0], y.shape) # print(f'X shape: {X.shape} ') # print(X ) model = CatBoostRegressor(iterations=cmdl.cbriterations, learning_rate=cmdl.cbrlearningrate, depth=cmdl.cbrdepth, loss_function='RMSE', random_seed=42, logging_level='Silent') # Fit model model.fit(X, y) # Get predictions ypred = model.predict(X) # Calculating Mean Squared Error mse = np.mean((ypred - y)**2) print('Metrics on input data: ') print('MSE: %.4f' % (mse)) r2 = r2_score(y, ypred) print('R2 : %10.3f' % r2) ccmdl = sts.pearsonr(y, ypred) if slicenum == sstart: wellsdf = wdfsa[wdfsa.columns[:4]].copy() wellsdf[logname] = wdfsa[wdfsa.columns[-1]].copy() if cmdl.generatesamples: wellsdf[lognamepred] = ypred[:inshape] else: wellsdf[lognamepred] = ypred # print(wellsdf.tail()) # print(wellsdf.shape) else: wellsdf0 = wdfsa[wdfsa.columns[:4]].copy() wellsdf0[logname] = wdfsa[wdfsa.columns[-1]].copy() if cmdl.generatesamples: wellsdf0[lognamepred] = ypred[:inshape] else: wellsdf0[lognamepred] = ypred allwellspred = wellsdf.append(wellsdf0) wellsdf = allwellspred[wcols].copy() print(allwellspred.tail()) print(allwellspred.shape) pred = model.predict(Xpred) alldatas[wdfsa.columns[4]] = pred # print('After Prediction........') # print(alldatas.head()) slicestr = '{:.0f}'.format(zslice) preddf[slicestr] = pred qc0 = np.polyfit(y, ypred, 1) xrngmin, xrngmax = y.min(), y.max() xvi = np.linspace(xrngmin, xrngmax) yvi0 = np.polyval(qc0, xvi) if slicenum % cmdl.plotincrement == 0: slicedepth = slicenum * dz fig, ax = plt.subplots() plt.scatter(y, ypred, alpha=0.5, c='b', s=15, label='Model Predicted') if cmdl.generatesamples: ax.scatter(y[inshape:], ypred[inshape:], c='r', marker='X', s=25, label='Generated Samples') plt.plot(xvi, yvi0, c='k', lw=2) ax.annotate('Model = %-.*f * Actual + %-.*f' % (2, qc0[0], 2, qc0[1]), xy=(xvi[0], yvi0[0]), xytext=(0.14, 0.85), textcoords='figure fraction', fontsize=10) ax.annotate('Model Pearson cc = %-.*f Pearson p = %-.*f' % (2, ccmdl[0], 3, ccmdl[1]), xy=(xvi[0], yvi0[0]), xytext=(0.14, 0.81), textcoords='figure fraction', fontsize=10) ax.set_title(f'CBR Slice {slicedepth:.0f} Pseudo {logname}') ax.set_xlabel('Actual') ax.set_ylabel('Predicted') if not cmdl.hideplots: plt.show() swfname = 'SWAttrib' if cmdl.outdir: # pdfcl = os.path.join(cmdl.outdir,swfname)+"%d" %(slicenum) +"_cbr%s.pdf" %(logname) # wsdf = os.path.join(cmdl.outdir,swfname)+"%d" %(slicenum) +"_cbr%s.csv" %(logname) pdfcl = os.path.join( cmdl.outdir, swfname ) + f"{slicedepth:.0f}" + "_cbr%s.pdf" % (logname) wsdf = os.path.join( cmdl.outdir, swfname ) + f"{slicedepth:.0f}" + "_cbr%s.csv" % (logname) else: pdfcl = os.path.join( dirsplit, swfname ) + f"{slicedepth:.0f}" + "_cbr%s.pdf" % (logname) wsdf = os.path.join( dirsplit, swfname ) + f"{slicedepth:.0f}" + "_cbr%s.csv" % (logname) fig.savefig(pdfcl) wdfsa.to_csv(wsdf, index=False) print(f'Successfully generated {wsdf}') slicelst.append(zslice) wnlst.append(nw) slicewnlst.append(wdfsa.shape[0]) slicenumlst.append(slicenum) r2lst.append(r2) coef0lst.append(qc0[0]) coef1lst.append(qc0[1]) end_process = datetime.now() print('Duration of ML model building and prediction : {}'.format( end_process - start_process)) qccols = [ 'SLICENUM', 'SLICEZ', 'WELLSFOUND', 'WELLSUSED', 'COEF0', 'COEF1', 'R2' ] qcdf = pd.DataFrame({ 'SLICENUM': slicenumlst, 'SLICEZ': slicelst, 'WELLSFOUND': wnlst, 'WELLSUSED': slicewnlst, 'COEF0': coef0lst, 'COEF1': coef1lst, 'R2': r2lst }) qcdf = qcdf[qccols].copy() if cmdl.outdir: outseispred = os.path.join(cmdl.outdir, wfname) + "_slices.csv" outqc = os.path.join(cmdl.outdir, wfname) + "_qc.csv" else: outseispred = os.path.join(dirsplit, wfname) + "_slices.csv" outqc = os.path.join(dirsplit, wfname) + "_qc.csv" preddf.to_csv(outseispred, index=False) print('Successfully generated {}'.format(outseispred)) print('DataFrame size: ', preddf.shape) endsmpl = preddf.shape[1] - 2 # print(preddf.head()) qcdf.to_csv(outqc, index=False) print('Successfully generated {}'.format(outqc)) with sg.open(outfsegy, "r+") as srcp: for trnum, tr in enumerate(srcp.trace): trplog = preddf.iloc[trnum, 2:].values # lentrplog = trplog.size # print(trplog) tr[sstart:(sstart + endsmpl)] = trplog srcp.trace[trnum] = tr print('Successfully generated {}'.format(outfsegy)) allwellspred.to_csv(outfw, index=False) print('Successfully generated {}'.format(outfw)) plotwells(allwellspred, hideplots=cmdl.hideplots)
def get_samplerate(fname): with sg.open(fname, 'r', ignore_geometry=True) as srcp: hdrdict = dict(enumerate(srcp.header[1].items())) return hdrdict[39][1] / 1000
def zero_segy(fname): with sg.open(fname, 'r+', ignore_geometry=True) as srcp: for trnum, tr in enumerate(srcp.trace): srcp.trace[trnum] = tr * 0
def get_slice(fname, slicenum): slc = list() with sg.open(fname, 'r', ignore_geometry=True) as srcp: for trnum, tr in enumerate(srcp.trace): slc.append(tr[slicenum]) return slc
def get_onetrace(fname, tracenum, sstart=None, send=None): """Get one trace from one file.""" with sg.open(fname, 'r', ignore_geometry=True) as srcp: tri = srcp.trace[tracenum] yield tri[sstart:send]
def main(): cmdl = getcommandline() # csv file generated from _build without prediction column allwdfsa = pd.read_csv(cmdl.sattribwellscsv) # need to extraqct by well to find depth increment wlst = allwdfsa.WELL.unique().tolist() wdf0 = allwdfsa[allwdfsa['WELL'] == wlst[0]] dz = np.diff(wdf0[wdf0.columns[1]])[2] print(f'Well Vertical increment {dz}') sstart = int(cmdl.startendinterval[0] // dz) send = int(cmdl.startendinterval[1] // dz) logname = allwdfsa.columns[-1] print(f'Curve Name: {logname} Sample start: {sstart} Sample end: {send}') if cmdl.segyfileslist: sflist = list() sflist = process_segylist(cmdl.segyfileslist) dirsplit, fextsplit = os.path.split(cmdl.segyfileslist) fname, fextn = os.path.splitext(fextsplit) if cmdl.outdir: outfsegy = os.path.join(cmdl.outdir, fname) + f"_p{logname}.sgy" else: outfsegy = os.path.join(dirsplit, fname) + f"_p{logname}.sgy" print('Copying file, please wait ........') start_copy = datetime.now() copyfile(sflist[0], outfsegy) end_copy = datetime.now() print(f'Duration of copying: {(end_copy - start_copy)}') sr = get_samplerate(outfsegy) print(f'Seismic Sample Rate: {sr}') print('Zeroing segy file, please wait ........') start_zero = datetime.now() zero_segy(outfsegy) end_zero = datetime.now() print(f'Duration of zeroing: {(end_zero - start_zero)}') scols = list() for f in sflist: dirsplit, fextsplit = os.path.split(f) fname, fextn = os.path.splitext(fextsplit) scols.append(fname) # sstart = cmdl.startendinterval[0] # send = cmdl.startendinterval[1] start_process = datetime.now() if cmdl.modeltype == 'cbr': inmodel = CatBoostRegressor() inmodel.load_model(cmdl.MLmodelname) # inmodel = pickle.load(open(cmdl.MLmodelname, 'rb')) elif cmdl.modeltype == 'linreg': inmodel = pickle.load(open(cmdl.MLmodelname, 'rb')) # result = loaded_model.score(X_test, Y_test) elif cmdl.modeltype == 'knn': inmodel = pickle.load(open(cmdl.MLmodelname, 'rb')) # result = loaded_model.score(X_test, Y_test) elif cmdl.modeltype == 'svr': inmodel = pickle.load(open(cmdl.MLmodelname, 'rb')) # result = loaded_model.score(X_test, Y_test) elif cmdl.modeltype == 'ann': anndirsplit, annfextsplit = os.path.split(cmdl.segyfileslist) annfname, annfextn = os.path.splitext(annfextsplit) annwtsfname = os.path.join(anndirsplit, annfname) + '.h5' elif cmdl.modeltype == 'sgdr': inmodel = pickle.load(open(cmdl.MLmodelname, 'rb')) json_file = open(cmdl.MLmodelname, 'r') loaded_model_json = json_file.read() json_file.close() inmodel = model_from_json(loaded_model_json) # load weights into new model inmodel.load_weights(annwtsfname) print("Loaded model from disk") inmodel.compile(loss='mean_squared_error', optimizer='adam') with sg.open(outfsegy, "r+") as srcp: # numoftraces = len(srcp.trace) for trnum, tr in enumerate(srcp.trace): Xpred = collect_traces(sflist, trnum, sstart=sstart, send=send) # print(Xpred.shape) trpred = modelpredict(inmodel, Xpred, scalelog=cmdl.donotscalelog, logmin=cmdl.logscalemm[0], logmax=cmdl.logscalemm[1]) tr[sstart:send] = trpred srcp.trace[trnum] = tr print(f'Successfully generated {outfsegy}') end_process = datetime.now() print(f'Duration: {end_process - start_process}')