def generate_all_data(random_model_repeat=c.random_model_repeat): K.clear_session() start_modeling_time = time.time() procs = [] for iShotBlock in range(random_model_repeat): # we run modeling on 1-3 GPUs, GPU 0 is for the network os.environ["CUDA_VISIBLE_DEVICES"] = str((iShotBlock % 3) + 1) proc = multiprocessing.Process(target=generate_rsf_data_multi, kwargs={'iShotBlock': iShotBlock}) proc.start() procs.append(proc) if len(procs) > 100: for proc in procs[:50]: proc.join() procs = procs[50:] for proc in procs: proc.join() print(f"Time for modeling = {toc(start_modeling_time)}") start_merging_rsf_time = time.time() cmd(f"sfcat data_*/shots_cmp.rsf axis=4 > shots_cmp_full.rsf") cmd(f"sfcat data_*/logs.rsf axis=3 > logs_full.rsf") print(f"Time for merging rsf files = {toc(start_merging_rsf_time)}") os.environ["CUDA_VISIBLE_DEVICES"] = CUDA_VISIBLE_DEVICES
def train_model(prefix="multi", X_scaled=X_scaled_multi, T_scaled=T_scaled_multi, weights=None): cmd("rm new_data_ready") #cmd("ssh 10.109.66.7 'rm ~/log_estimation/data/new_data_ready'") lr_start = 0.001 if weights != None: lr_start = 1e-5 net = create_model(np.shape(X_scaled)[1:], np.shape(T_scaled)[1:]) net.compile(loss=tv_loss, optimizer=keras.optimizers.Nadam(lr_start), metrics=[R2]) #net.summary() if weights != None: net.load_weights(weights) early_stopping = EarlyStopping(monitor='val_loss', patience=21) model_checkpoint = ModelCheckpoint("trained_net", monitor='val_loss', save_best_only=True, verbose=1, period=5) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=7, min_lr=1e-5, verbose=1) X_valid = X_scaled T_valid = T_scaled print(f"X validation data size = {np.shape(X_valid)}") # TRAINING batch_size = 128 # we flip every batch, so, going through whole data needs twice as many batches steps_per_epoch = len(X_scaled) // batch_size print(f"Batch size = {batch_size}, batches per epoch = {steps_per_epoch}") history = net.fit_generator( batch_generator(X_scaled, T_scaled, batch_size=batch_size), validation_data=(X_valid, T_valid), epochs=100, verbose=2, shuffle=True, max_queue_size=200, workers=10, use_multiprocessing=False, steps_per_epoch=steps_per_epoch, callbacks=[model_checkpoint, reduce_lr, early_stopping]) print("Optimization Finished!") return net, history
def generate_rsf_data_multi(model_name="marm.rsf", central_freq=c.central_freq, dt=c.dt, nt=c.nt, sxbeg=c.sxbeg, gxbeg=c.gxbeg, szbeg=c.szbeg, jsx=c.jsx, jgx=c.jgx, jdt=c.jdt, logs_out="logs.rsf", shots_out="shots_cmp.rsf", iShotBlock=None): cmd(f"mkdir /dev/shm/RSFTMP/data_{iShotBlock}") cmd(f"chmod 777 /dev/shm/RSFTMP/data_{iShotBlock}") os.environ["DATAPATH"] = f"/dev/shm/RSFTMP/data_{iShotBlock}" cmd(f"echo $DATAPATH") cmd(f"mkdir data_{iShotBlock}") seed() #cmd(f"sfwindow < overthrust3D.hh n3=120 f1={iShotBlock*randint(0,1e7) % 400} n1=1 | sftransp | sfadd scale=1000 | sfput d1=25 d2=25 --out=stdout > data_{iShotBlock}/overthrust2D.hh") #cmd(f"cp {c.trmodel} data_{iShotBlock}/") with cd(f"data_{iShotBlock}"): _vel = generate_model(model_input=f"../{c.trmodel}", random_state_number=(iShotBlock + randint(0, 1e7))) #plt_nb_T(_vel) generate_rsf_data()
def batch_generator(X, T, T_scaler=T_scaler, batch_size=None): batch = [] print( "generator restarted !!!!!!!!!!!!!!!!!!!!!!!!!!! waiting for new data") while not os.path.exists("new_data_ready"): time.sleep(1) while True: # it might be a good idea to shuffle your data before each epoch # for iData in range(40): # print(f"loading NEW DATA {iData}") # X_rsf, T_rsf = read_rsf_XT(shots_rsf=f'/data/ibex_data/fullCMP_{iData}/shots_cmp_full.hh', # logs_rsf=f'/data/ibex_data/fullCMP_{iData}/logs_cmp_full.hh') # X, T = prepare_XT(X_rsf, T_rsf, T_scaler) # indices = np.arange(len(X)) # np.random.shuffle(indices) # for i in indices: # # if os.path.exists("new_data_ready"): # # break # batch.append(i) # if len(batch)==batch_size: # yield X[batch], T[batch] # batch=[] if os.path.exists("new_data_ready"): cmd("rm new_data_ready") X_rsf, T_rsf = read_rsf_XT(shots_rsf='shots_cmp_full.rsf', logs_rsf='logs_full.rsf') #cmd("ssh glogin.ibex.kaust.edu.sa 'rm ~/log_estimation/data/new_data_ready'") X, T = prepare_XT(X_rsf, T_rsf, T_scaler) print("new data loaded") else: print("reusing the old data") indices = np.arange(len(X)) np.random.shuffle(indices) print("indices reshuffled") for i in indices: if os.path.exists("new_data_ready"): break batch.append(i) if len(batch) == batch_size: yield X[batch], T[batch] batch = []
def generate_rsf_data(model_name="marm.rsf", central_freq=c.central_freq, dt=c.dt, dx=c.dx, nt=c.nt, sxbeg=c.sxbeg, gxbeg=c.gxbeg, szbeg=c.szbeg, jsx=c.jsx, jgx=c.jgx, jdt=c.jdt, logs_out="logs.rsf", shots_out="shots_cmp.rsf", full_shots_out=None): #get size of the model model_orig = sf.Input(model_name) Nx = model_orig.int("n2") print(Nx) ns = (Nx - 2 * sxbeg) // jgx ng = 2 * (sxbeg - gxbeg) // jgx + 1 print(f"Total number of shots = {ns}") t_start = time.time() cmd(( f"sfgenshots < {model_name} csdgather=y fm={central_freq} amp=1 dt={dt} ns={ns} ng={ng} nt={nt} " f"sxbeg={sxbeg} chk=n szbeg={szbeg} jsx={jgx} jsz=0 gxbeg={gxbeg} gzbeg={szbeg} jgx={jgx} jgz=0 > shots.rsf" )) print(f"Modeling time for {ns} shots = {time.time()-t_start}") if full_shots_out != None: cmd(f"sfcp < shots.rsf > {full_shots_out}") # ## Analyze and filter the data set generated # correct header and reduce sampling in time jdt (usually 4) times cmd(f"sfput < shots.rsf d3={jgx*dx} | sfwindow j1={jdt} | sfbandpass flo=2 fhi=4 > shots_decimated.rsf" ) cmd(f"sfrm shots.rsf") # sort into cmp gathers and discard odd cmps and not full cmps cmd(f"sfshot2cmp < shots_decimated.rsf half=n | sfwindow j3=2 f3={ng//2} n3={ns} > {shots_out}" ) print( f"sfshot2cmp < shots_decimated.rsf half=n | sfwindow j3=2 f3={ng//2} n3={ns} > {shots_out}" ) # cmd(f"sfrm shots_decimated.rsf") # cmd(f"sfrm shots_decimated.rsf") # create the logs -- training outputs cmd(f"sfsmooth < {model_name} rect2=2 | sfwindow f2={sxbeg} j2={jsx} n2={ns} > {logs_out}" ) #cmd(f"sfin < {logs_out}") return 0
aug_flip, merge_dict, np_to_rsf, rsf_to_np, nrms, tf_random_flip_channels) from myutils import const as c seed() # set up matplotlib matplotlib.rc('image', cmap='RdBu_r') seaborn.set_context('paper', font_scale=5) CUDA_VISIBLE_DEVICES = "1,2,3" os.environ["CUDA_VISIBLE_DEVICES"] = CUDA_VISIBLE_DEVICES os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # Madagascar binaries will be stored in DATAPATH (RAM on Linux recommended) cmd("mkdir /dev/shm/RSFTMP") cmd("chmod 777 /dev/shm/RSFTMP") os.environ["DATAPATH"] = "/dev/shm/RSFTMP/" #%% alpha_deform = 500 sigma_deform = 50 def generate_model(model_input=c.trmodel, model_output="marm.rsf", dx=c.dx, stretch_X=1, training_flag=False, random_state_number=c.random_state_number,
def test_on_model(folder="marmvel1D", net_dict=None, prefix="singleCMP", model_filename=None, distort_flag=False, stretch_X=None, nCMP_max=nCMP, generate_rsf_data_flag=True, jgx=jgx, sxbeg=sxbeg, gxbeg=gxbeg, X_scaler=X_scaler): if model_filename==None: model_filename=f"{folder}.hh" fig_path = f"../latex/Fig/test_{prefix}_{folder}" # expand model model_output="vel_test.rsf" print(model_output) vel_test = generate_model(model_input=model_filename, model_output=model_output, stretch_X=stretch_X, random_state_number=const.random_state_number, distort_flag=distort_flag, crop_flag=False) # model data if generate_rsf_data_flag: cmd(f"mkdir {folder}") cmd(f"cp {model_output} {folder}/{model_output}") # check stability print(f"you chose dt = {dt}, dt < {dx/np.max(vel_test):.4f} should be chosen for stability \n") # force stability assert dt < dx/np.max(vel_test) generate_rsf_data(model_name=f"{folder}/vel_test.rsf", shots_out=f"{folder}/shots_cmp_test.rsf", logs_out=f"{folder}/logs_test.rsf") # read data X_data_test, T_data_test = read_rsf_to_np(shots_rsf=f"{folder}/shots_cmp_test.rsf", logs_rsf=f"{folder}/logs_test.rsf") # X_scaled X_scaled = scale_X_data(X_data_test, X_scaler) nCMP = int(net_dict["0"].input.shape[3]) X_scaled, T_data_test = make_multi_CMP_inputs(X_scaled, T_data_test, nCMP_max) sample_reveal = nCMP_max+1 plt_nb_T(1e3*np.concatenate((np.squeeze(X_scaled[sample_reveal,:,:,-1]), np.flipud(np.squeeze(X_scaled[sample_reveal,:,:,0]))), axis=0), title="CMP first | CMP last", dx=200, dz=1e3*dt*jdt, origin_in_middle=True, ylabel="Time(s)", fname=f"{fig_path}_X_scaled", cbar_label = "") if nCMP == 1: X_scaled = X_scaled[:,:,:,nCMP_max//2:nCMP_max//2+1] # predict with all networks and save average T_pred_total = np.zeros_like(net_dict["0"].predict(X_scaled)) T_pred_dict = np.zeros((2*len(net_dict), T_pred_total.shape[0], T_pred_total.shape[1])) iNet=0 for net in net_dict.values(): T_pred_tmp = net.predict(X_scaled) T_pred_tmp = T_scaler.inverse_transform(T_pred_tmp) T_pred_dict[iNet,:,:] = T_pred_tmp T_pred_tmp = net.predict(np.flip(X_scaled, axis=3)) T_pred_tmp = T_scaler.inverse_transform(T_pred_tmp) T_pred_dict[iNet+1,:,:] = T_pred_tmp iNet += 2 T_pred = np.mean(T_pred_dict, axis=0) variance = np.var(T_pred_dict, axis=0) plt_nb_T(np.sqrt(variance), title="Standard deviation", dx=jgx*dx, dz=jlogz*dx, fname=f"{fig_path}_inverted_std_dev", vmin=0.05, vmax=1) # plt_nb_T(T_pred-T_data_test, title="Pred-True", # dx=jgx*dx, dz=jlogz*dx, # fname=f"{fig_path}_inverted_std_dev", # vmin=-1, vmax=1) plt_nb_T(T_pred, title=f"{prefix} estimate, NRMS={nrms(T_pred, T_data_test):.1f}%", dx=jgx*dx, dz=jlogz*dx, vmin=np.min(1e-3*T_data_test), vmax=np.max(1e-3*T_data_test), fname=f"{fig_path}_inverted") plt_nb_T(T_data_test, dx=jgx*dx, dz=jlogz*dx, fname=f"{fig_path}_true", title=f"True, R2 = {r2_score(T_pred.flatten(), T_data_test.flatten()):.2f}") print(np.shape(1e3*T_scaled[sample_reveal-(nCMP+1)//2:sample_reveal+(nCMP-1)//2:nCMP]))
#import styler from myutils import (cd, cmd, const, elastic_transform, plt_nb_T, toc, aug_flip, merge_dict, np_to_rsf, rsf_to_np, nrms, tf_random_flip_channels) seed() # set up matplotlib matplotlib.rc('image', cmap='RdBu_r') seaborn.set_context('paper', font_scale=5) CUDA_VISIBLE_DEVICES = "0,1,2,3" os.environ["CUDA_VISIBLE_DEVICES"]=CUDA_VISIBLE_DEVICES os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" # Madagascar binaries will be stored in DATAPATH (RAM on Linux recommended) cmd("mkdir /dev/shm/RSFTMP") cmd("chmod 777 /dev/shm/RSFTMP") os.environ["DATAPATH"]="/dev/shm/RSFTMP/" # execution flags generate_rsf_data_flag = False retrain_flag = False #(sys.argv[1] == "--retrain") print(f"retrain_flag = {retrain_flag}") print(type(retrain_flag)) random_model_repeat = 5000 stretch_X_train = 1 tic_total = time.time() #%% [markdown] # ## Introduction