def evaluate_trail_BDY_lr(multi_flag, eval_data_all, save_Simulator_Ypred=False, MSE_Simulator=False): """ This function is to evaluate all different datasets in the model with one function call """ #lr_list = [2, 1,0.5,0.1] lr_list = [0.5] BDY_list = [0.001] #BDY_list = [0.05, 0.01, 0.001] data_set_list = ["Chen"] #data_set_list = ["robotic_arm", "ballistics"] for eval_model in data_set_list: for lr in lr_list: for BDY in BDY_list: useless_flags = flag_reader.read_flag() useless_flags.eval_model = "retrain5" + eval_model evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all, save_Simulator_Ypred=save_Simulator_Ypred, MSE_Simulator=MSE_Simulator, init_lr=lr, BDY_strength=BDY)
def random_swipe(): """ This is the random version of hyperswiping for the model parameters """ # The list of params that signified by a lower and upper limit and use np.random.uniform to select lambda_mse_range = [1, 1000] lambda_z_range = [1, 1000] lambda_rev_range = [1, 1000] # The list of params that signified by a permutation of the values in the list zeros_noise_scale_list = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1] y_noise_scale_list = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1] # Number of samples to draw num_samples = 60 for i in range(num_samples): flags = flag_reader.read_flag() #setting the base case flags.lambda_mse = np.random.uniform(low=lambda_mse_range[0], high=lambda_mse_range[1]) flags.lambda_z = np.random.uniform(low=lambda_z_range[0], high=lambda_z_range[1]) flags.lambda_rev = np.random.uniform(low=lambda_rev_range[0], high=lambda_rev_range[1]) flags.zeros_noise_scale = zeros_noise_scale_list[np.random.permutation( len(zeros_noise_scale_list))[0]] flags.y_noise_scale = y_noise_scale_list[np.random.permutation( len(y_noise_scale_list))[0]] flags.model_name = flags.data_set + 'lambda__mse_{:.2g}_z_{:.2g}_rev_{:.2g}_noise__zeros_{:.3g}_y_{:.3g}'.format( flags.lambda_mse, flags.lambda_z, flags.lambda_rev, flags.zeros_noise_scale, flags.y_noise_scale) training_from_flag(flags)
def hyperswipe(): """ This is for doing hyperswiping for the model parameters """ reg_scale_list = [1e-4] #reg_scale_list = [1e-3, 1e-4, 5e-3] kl_coeff_list = [5e-2, 0.1, 1, 5] layer_size_list = [1000] dim_z_list = [12, 20, 50] for kl_coeff in kl_coeff_list: for layer_num in range(10, 15, 2): for layer_size in layer_size_list: for dim_z in dim_z_list: for reg_scale in reg_scale_list: flags = flag_reader.read_flag() #setting the base case flags.reg_scale = reg_scale flags.dim_z = dim_z flags.kl_coeff = kl_coeff # Decoder arch linear_d = [layer_size for j in range(layer_num)] linear_d[0] = flags.dim_y + flags.dim_z linear_d[-1] = flags.dim_x # Encoder arch linear_e = [layer_size for j in range(layer_num)] linear_e[0] = flags.dim_y + flags.dim_x linear_e[-1] = 2 * flags.dim_z flags.linear_d = linear_d flags.linear_e = linear_e flags.model_name = flags.data_set + '_kl_coeff_' + str( kl_coeff ) + '_layer_num_' + str(layer_num) + '_unit_' + str( layer_size) + '_dim_z_' + str( dim_z) + '_reg_scale_' + str(flags.reg_scale) training_from_flag(flags)
def hyperswipe(dataset, rep=1): """ This is for doing hyperswiping for the model parameters """ layer_size_list = [5000, 1000] #layer_size_list = [1750] #layer_size_list = [100, 250, 500] reg_scale_list = [1e-4] #layer_num = 7 for reg_scale in reg_scale_list: for i in range(1): for layer_num in range(8, 25, 2): for layer_size in layer_size_list: flags = flag_reader.read_flag() #setting the base case linear = [layer_size for j in range(layer_num)] #Set the linear units linear[0] = 10 # The start of linear linear[-1] = 1001 # The end of linear flags.lr = 1e-4 flags.linear = linear flags.reg_scale = reg_scale #flags.conv_kernel_size = [3, 3, 5] #flags.conv_channel_out = [4, 4, 4] #flags.conv_stride = [1, 1, 1] flags.model_name = flags.data_set + 'no_conv_' + str(layer_size) + '_num_' + str(layer_num) + '_lr_' + str(flags.lr) + 'reg_scale_' + str(reg_scale) + 'trail_' + str(i) #flags.model_name = flags.data_set + 'conv_444_335_111_linear_' + str(layer_size) + '_num_' + str(layer_num) + '_lr_' + str(flags.lr) + 'reg_scale_' + str(reg_scale) + 'trail_' + str(i) training_from_flag(flags) #except: # print("Probably a bad configuration") dirs = os.listdir(spec_dir) """
def evaluate_with_ratio(test_ratio): flags = flag_reader.read_flag() evaluatemain(flags, eval_forward=False, test_ratio=test_ratio, plot_histo=False) plotsAnalysis.SpectrumComparisonNGeometryComparison( 3, 2, (13, 8), flags.model_name, flags.boundary)
def evaluate_different_dataset(multi_flag, eval_data_all, save_Simulator_Ypred=False, MSE_Simulator=False): """ This function is to evaluate all different datasets in the model with one function call """ data_set_list = ["robotic_arm","sine_wave"] for eval_model in data_set_list: useless_flags = flag_reader.read_flag() useless_flags.eval_model = eval_model evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all, save_Simulator_Ypred=save_Simulator_Ypred, MSE_Simulator=MSE_Simulator)
def evaluate_different_dataset(multi_flag, eval_data_all): """ This function is to evaluate all different datasets in the model with one function call """ data_set_list = ["robotic_armcouple_layer_num5dim_total4", "sine_wavecouple_layer_num6dim_total5"] for eval_model in data_set_list: useless_flags = flag_reader.read_flag() useless_flags.eval_model = eval_model evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all)
def evaluate_different_dataset(multi_flag, eval_data_all): """ This function is to evaluate all different datasets in the model with one function call """ data_set_list = ["robotic_arm","sine_wave","ballistics","meta_material"] for eval_model in data_set_list: useless_flags = flag_reader.read_flag() useless_flags.eval_model = eval_model evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all)
def evaluate_different_dataset(multi_flag, eval_data_all): """ This function is to evaluate all different datasets in the model with one function call """ #data_set_list = ['robotic_arm'] data_set_list = ['sine_wave','ballistics','robotic_arm','meta_material'] for eval_model in data_set_list: useless_flags = flag_reader.read_flag() useless_flags.eval_model = eval_model print("current evaluating ", eval_model) evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all)
def evaluate_different_dataset(multi_flag, eval_data_all, save_Simulator_Ypred=False, MSE_Simulator=False): """ This function is to evaluate all different datasets in the model with one function call """ data_set_list = ["Peurifoy"] for eval_model in data_set_list: for j in range(1): useless_flags = flag_reader.read_flag() useless_flags.eval_model = 'retrain' + str(j) + eval_model evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all, save_Simulator_Ypred=save_Simulator_Ypred, MSE_Simulator=MSE_Simulator)
def evaluate_different_dataset(multi_flag, eval_data_all): """ This function is to evaluate all different datasets in the model with one function call """ data_set_list = ['sine_wave', 'ballistics', 'robotic_arm', 'meta_material'] for eval_model in data_set_list: for j in range(10): useless_flags = flag_reader.read_flag() useless_flags.eval_model = "retrain" + str(j) + eval_model evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all)
def evaluate_different_dataset(multi_flag, eval_data_all, save_Simulator_Ypred=False, MSE_Simulator=False): """ This function is to evaluate all different datasets in the model with one function call """ #data_set_list = ["meta_materialreg0.0005trail_2_complexity_swipe_layer1000_num6"] data_set_list = ["robotic_armreg0.0005trail_0_backward_complexity_swipe_layer500_num6", "20200506_104444", "ballisticsreg0.0005trail_0_complexity_swipe_layer500_num5"] for eval_model in data_set_list: useless_flags = flag_reader.read_flag() useless_flags.eval_model = eval_model evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all, save_Simulator_Ypred=save_Simulator_Ypred, MSE_Simulator=MSE_Simulator)
def def evaluate_from_model(model_dir): """ Evaluating interface. 1. Retreive the flags 2. get data 3. initialize network 4. eval :param model_dir: The folder to retrieve the model :return: None """ # Retrieve the flag object if (model_dir.startswith("models")): model_dir = model_dir[7:] print("after removing prefix models/, now model_dir is:", model_dir) print("Retrieving flag object for parameters") flags = flag_reader.load_flags(os.path.join("models", model_dir)) flags.eval_model = model_dir # Reset the eval mode # Get the data train_loader, test_loader = data_reader.read_data(flags) print("Making network now") # Make Network ntwk = Network(Forward, flags, train_loader, test_loader, inference_mode=True, saved_model=flags.eval_model) # Evaluation process print("Start eval now:") pred_file, truth_file = ntwk.evaluate() # Plot the MSE distribution plotMSELossDistrib(pred_file, truth_file, flags) print("Evaluation finished") def evaluate_all(models_dir="models"): """ This function evaluate all the models in the models/. directory :return: None """ for file in os.listdir(models_dir): if os.path.isfile(os.path.join(models_dir, file, 'flags.obj')): evaluate_from_model(os.path.join(models_dir, file)) return None if __name__ == '__main__': # Read the flag, however only the flags.eval_model is used and others are not used useless_flags = flag_reader.read_flag() print(useless_flags.eval_model) # Call the evaluate function from model evaluate_from_model(useless_flags.eval_model)
def evaluate_different_dataset(multi_flag, eval_data_all): """ This function is to evaluate all different datasets in the model with one function call """ #data_set_list = ["meta_materialreg0.0005trail_2_complexity_swipe_layer1000_num6"] data_set_list = [ "robotic_armreg0.0005trail_0_backward_complexity_swipe_layer500_num6", "sine_wavereg0.005trail_1_complexity_swipe_layer1000_num8", "ballisticsreg0.0005trail_0_complexity_swipe_layer500_num5" ] for eval_model in data_set_list: useless_flags = flag_reader.read_flag() useless_flags.eval_model = eval_model evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all)
def evaluate_different_dataset(multi_flag, eval_data_all): """ This function is to evaluate all different datasets in the model with one function call """ data_set_list = [ "robotic_armcouple_layer_num6trail_0", "sine_wavecouple_layer_num8trail_0", "meta_materialcouple_layer_num5trail_1", "gaussian_mixturecouple_layer_num6trail_1" ] for eval_model in data_set_list: useless_flags = flag_reader.read_flag() useless_flags.eval_model = eval_model evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all)
def evaluate_different_dataset(multi_flag, eval_data_all): """ This function is to evaluate all different datasets in the model with one function call """ data_set_list = [ "gaussian_mixturekl_coeff0.04lr0.01reg0.005", "robotic_armlayer_num6unit_500reg0.005trail2", "meta_materialkl_coeff0.06lr0.001reg0.005", "sine_wavekl_coeff0.04lr0.001reg0.005" ] for eval_model in data_set_list: useless_flags = flag_reader.read_flag() useless_flags.eval_model = eval_model print("current evaluating ", eval_model) evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all)
def retrain_different_dataset(index): """ This function is to evaluate all different datasets in the model with one function call """ from utils.helper_functions import load_flags data_set_list = ["Peurifoy"] for eval_model in data_set_list: model_dir = '_'.join([eval_model, 'best_model']) flags = flag_reader.read_flag() flags.data_set = eval_model flags.model_name = "retrain" + str(index) + eval_model flags.geoboundary = [ -1, 1, -1, 1 ] # the geometry boundary of meta-material dataset is already normalized in current version flags.train_step = 300 flags.batch_size = 1024 flags.test_ratio = 0.2 if 'Chen' in eval_model: flags.reg_scale = 0 flags.lr_decay_rate = 0.4 flags.linear = [201, 1000, 1000, 1000, 1000, 1000, 3] flags.linear = [256, 700, 700, 700, 700, 700, 700, 700, 700, 5] flags.conv_kernel_size = [] flags.conv_stride = [] elif 'Peurifoy' in eval_model: flags.reg_scale = 1e-4 flags.lr = 1e-4 flags.lr_decay_rate = 0.6 flags.linear = [201] + 15 * [1700] + [8] flags.conv_kernel_size = [] flags.conv_stride = [] elif 'Yang' in eval_model: flags.reg_scale = 0 flags.lr_decay_rate = 0.4 flags.linear = [201, 1000, 1000, 1000, 1000, 1000, 3] flags.linear = [1990, 1000, 500, 14] flags.conv_kernel_size = [7, 5] flags.conv_stride = [1, 1] print(flags) training_from_flag(flags)
def evaluate_different_dataset(multi_flag, eval_data_all, save_Simulator_Ypred=False, MSE_Simulator=False): """ This function is to evaluate all different datasets in the model with one function call """ data_set_list = [ 'peurifoy', 'meta_material', 'chen', "robotic_arm", "sine_wave", "ballistics" ] for eval_model in data_set_list: for j in range(10): useless_flags = flag_reader.read_flag() useless_flags.eval_model = "retrain" + str(j) + eval_model evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all, save_Simulator_Ypred=save_Simulator_Ypred, MSE_Simulator=MSE_Simulator)
def hyperswipe(): """ This is for doing hyperswiping for the model parameters """ reg_scale_list = [1e-4] lr_list = [1e-3] # lr_list = [1e-1, 1e-2, 1e-3, 1e-4] #reg_scale_list = [1e-2, 1e-3, 1e-1] for reg_scale in reg_scale_list: for couple_layer_num in range(14, 15): for lr in lr_list: for i in range(3): flags = flag_reader.read_flag() #setting the base case flags.couple_layer_num = couple_layer_num flags.lr = lr flags.reg_scale = reg_scale flags.model_name = flags.data_set + 'couple_layer_num' + str( couple_layer_num) + '_lr_' + str( flags.lr) + '_reg_scale_' + str( reg_scale) + '_trail_' + str(i) training_from_flag(flags)
def sim_one(dirx, dset, plot=False): flags = flag_reader.read_flag() flags.data_set = dset flags.model_name = flags.data_set.lower() flags.eval_model = flags.model_name if '.csv' in dirx: fxp = dirx fyp = fxp.replace('Xpred', 'Ypred') fyt = fxp.replace('Xpred', 'Ytruth') else: fxp = dirx + 'test_Xpred_' + flags.data_set + '_best_model.csv' fyp = dirx + 'test_Ypred_' + flags.data_set + '_best_model.csv' fyt = dirx + 'test_Ytruth_' + flags.data_set + '_best_model.csv' xmat = np.genfromtxt(fxp, delimiter=' ') ypred = simulator(flags.data_set, xmat) np.savetxt(fyp, ypred, delimiter=' ') if plot: pl(fyp, fyt, flags, save_dir=dirx)
def hyperswipe(): """ This is for doing hyperswiping for the model parameters """ reg_scale_list = [1e-4] #reg_scale_list = [1e-4, 5e-4, 5e-5, 0] layer_size_list = [1000] #layer_size_list = [500, 1000] num_gauss_list = [8] #num_gauss_list = [5, 10, 15, 20, 25, 30] for reg_scale in reg_scale_list: for layer_num in range(10, 17, 2): for layer_size in layer_size_list: for num_gaussian in num_gauss_list: flags = flag_reader.read_flag() #setting the base case flags.reg_scale = reg_scale linear = [layer_size for j in range(layer_num)] linear[0] = 201 linear[-1] = 8 flags.linear = linear flags.num_gaussian = num_gaussian flags.model_name = flags.data_set + '_gaussian_' + str( num_gaussian) + '_layer_num_' + str( layer_num) + '_unit_' + str( layer_size) + '_lr_' + str( flags.lr) + '_reg_scale_' + str(reg_scale) try: training_from_flag(flags) except RuntimeError as e: print( "Failing the device-side assert for MDN mdn.sample function! doing 3 retries now:" ) for j in range(3): try: print("trying number ", j) training_from_flag(flags) break except: print("Failing again! try again")
def hyperswipe(): """ This is for doing hyperswiping for the model parameters """ dim_pad_list = [10] lambda_mse_list = [0.001, 0.0001] dim_z_list = [3, 5] for dim_z in dim_z_list: for dim_pad in dim_pad_list: for couple_layer_num in range(15, 17): for lambda_mse in lambda_mse_list: flags = flag_reader.read_flag() #setting the base case flags.couple_layer_num = couple_layer_num flags.lambda_mse = lambda_mse flags.dim_z = dim_z flags.dim_tot = flags.dim_y + flags.dim_z + dim_pad #print("currently running flag", flags) print(flags.data_set) flags.model_name = flags.data_set + 'couple_layer_num' + str( couple_layer_num) + 'labmda_mse' + str( lambda_mse) + '_lr_' + str( flags.lr) + '_dim_pad_' + str( dim_pad) + '_dim_z_' + str(flags.dim_z) training_from_flag(flags)
def hyperswipe(): """ This is for doing hyperswiping for the model parameters """ reg_scale_list = [1e-4] #reg_scale_list = [0, 1e-5, 5e-5, 1e-4, 5e-4, 1e-3] layer_size_list = [1500] for reg_scale in reg_scale_list: for i in range(3): for layer_num in range(12, 17): for layer_size in layer_size_list: flags = flag_reader.read_flag() #setting the base case # Decoder arch linear_b = [layer_size for j in range(layer_num)] linear_b[0] = 201 linear_b[-1] = 8 #flags.conv_out_channel_b = [4, 4, 4] #flags.conv_kernel_size_b = [3,3,4] #flags.conv_stride_b = [1,1,2] flags.linear_b = linear_b flags.reg_scale = reg_scale flags.model_name = flags.data_set + '_Backward_no_conv_layer_num_' + str(layer_num) + '_unit_' + str(layer_size) + '_reg_scale_' + str(flags.reg_scale) + '_trail_' + str(i) #flags.model_name = flags.data_set + '_Backward_conv_444_334_112_layer_num_' + str(layer_num) + '_unit_' + str(layer_size) + '_reg_scale_' + str(flags.reg_scale) + '_trail_' + str(i) training_from_flag(flags)
def hyperswipe(): """ This is for doing hyperswiping for the model parameters """ reg_scale_list = [0] layer_size_list = [500, 1000] reg_scale = [1e-4, 0] #layer_num = 7 for reg_scale in reg_scale_list: for i in range(3): for layer_num in range(7, 10): for layer_size in layer_size_list: flags = flag_reader.read_flag() #setting the base case linear = [layer_size for j in range(layer_num)] #Set the linear units linear[0] = 3 # The start of linear linear[-1] = 201 # The end of linear flags.linear = linear flags.reg_scale = reg_scale flags.model_name = flags.data_set + 'conv_444_335_111_linear_' + str( layer_size) + '_num_' + str(layer_num) + '_lr_' + str( flags.lr) + 'reg_scale_' + str( reg_scale) + 'trail_' + str(i) training_from_flag(flags)
Training interface. 1. Read data 2. initialize network 3. train network 4. record flags :param flag: The training flags read from command line or parameter.py :return: None """ # Get the data train_loader, test_loader = data_reader.read_data(flags) print("Making network now") # Make Network ntwk = Network(Discriminator, Generator, Spectra_encoder, Forward, flags, train_loader, test_loader) # Training process ntwk.train_forward() ntwk.train() # Do the house keeping, write the parameters and put into folder, also use pickle to save the flags obejct write_flags_and_BVE(flags, ntwk.best_validation_loss, ntwk.ckpt_dir) # put_param_into_folder(ntwk.ckpt_dir) if __name__ == '__main__': # Read the parameters to be set flags = flag_reader.read_flag() # Call the train from flag function training_from_flag(flags)
""" This .py file is to run train.py for hyper-parameter swipping in a linear fashion. """ import train #os.environ["CUDA_VISIBLE_DEVICE"] = "-1" #Uncomment this line if you want to use CPU only import numpy as np import flag_reader if __name__ == '__main__': # Setting the loop for setting the parameter for code in range(3, 15): flags = flag_reader.read_flag() #setting the base case # linear = [500 for j in range(i)] #Set the linear units # linear[0] = 8 # The start of linear # linear[-1] = 150 # The end of linear # flags.linear = linear flags.dim_code = code """ Calculation based hyper-parameter, no need to change """ flags.encoder_linear[-1] = code flags.decoder_linear[0] = code """ Calculation based hyper-parameter block end """ for j in range(3): flags.model_name = "trail_" + str(j) + "_dim_code_swipe" + str( code) train.training_from_flag(flags)
:return: None """ for file in os.listdir(models_dir): if os.path.isfile(os.path.join(models_dir, file, 'flags.obj')): evaluate_from_model(os.path.join(models_dir, file)) return None def evaluate_different_dataset(multi_flag, eval_data_all): """ This function is to evaluate all different datasets in the model with one function call """ data_set_list = ["robotic_arm","sine_wave","ballistics","meta_material"] for eval_model in data_set_list: useless_flags = flag_reader.read_flag() useless_flags.eval_model = eval_model evaluate_from_model(useless_flags.eval_model, multi_flag=multi_flag, eval_data_all=eval_data_all) if __name__ == '__main__': # Read the flag, however only the flags.eval_model is used and others are not used useless_flags = flag_reader.read_flag() print(useless_flags.eval_model) # Call the evaluate function from model #evaluate_from_model(useless_flags.eval_model) #evaluate_from_model(useless_flags.eval_model, multi_flag=True) #evaluate_from_model(useless_flags.eval_model, multi_flag=False, eval_data_all=True) evaluate_different_dataset(multi_flag=True, eval_data_all=False) #evaluate_all("models/MM")
def random_grid_search(dataset, rep=1): """ This is for doing hyperswiping for the model parameters """ for set in dataset: # Setup dataset folder spec_dir = os.path.join('models', set) if not os.path.exists(spec_dir): os.mkdir(spec_dir) dirs = os.listdir(spec_dir) # Clean up unfinished runs for run in dirs: d = os.path.join(spec_dir, run) for f in os.listdir(d): if f.find("training time.txt") != -1: break else: shutil.rmtree(d) stride_vals = None kernel_vals = None if 'Chen' in set: reg_scale_list = [1e-5] # [1e-4, 1e-3, 1e-2, 1e-1] layer_num = [13, 5, 8, 11] layer_size_list = [300, 1500, 700, 1100, 1900] # [1900,1500,1100,900,700,500,300] lrate = [0.0001, 0.001] # [1e-1,1e-2,1e-4] lr_decay = [0.1, 0.3] # [0.1,0.3,0.5,0.7,0.9] ends = (256, 5) elif 'Peurifoy' in set: reg_scale_list = [1e-3, 1e-4, 1e-5, 0] # [1e-4, 1e-3, 1e-2, 1e-1] layer_num = [10, 13, 15] layer_size_list = [1500, 1700, 2000] # [1900,1500,1100,900,700,500,300] lrate = [0.1, 1e-2, 1e-4] lr_decay = [0.1, 0.2, 0.3] ends = (201, 8) elif 'Yang' in set: reg_scale_list = [1e-3, 1e-4, 1e-5, 0] # [1e-4, 1e-3, 1e-2, 1e-1] stride_vals = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] kernel_vals = [2, 3, 4, 5, 6, 7, 8, 9, 10] layer_num = [17, 13, 5, 8, 11] layer_size_list = [300, 1500, 700, 1100, 1900] # [1900,1500,1100,900,700,500,300] lrate = [0.1, 0.01, 0.001, 0.0001, 1e-5] # [1e-1,1e-2,1e-4] lr_decay = [0.1, 0.3, 0.5, 0.7] ends = (2000, 14) else: return 0 stride = [] kernel = [] while (True): ln = random.choice(layer_num) ls = random.choice(layer_size_list) lr = random.choice(lrate) reg_scale = random.choice(reg_scale_list) ld = random.choice(lr_decay) lin0 = ends[0] conv_config = '' if stride_vals and kernel_vals: num_convs = random.randrange(4) stride = [] kernel = [] if num_convs > 0: for el in range(num_convs): stride.append(random.choice(stride_vals)) kernel.append(random.choice(kernel_vals)) for s, k in iter(zip(stride, kernel)): lin0 = 1 + (lin0 - k) / s if not lin0.is_integer(): continue else: lin0 = int(lin0) mat = ['kernel'] + list(map( str, kernel)) + ['stride'] + list(map(str, stride)) print(mat) conv_config = '-'.join(mat) for i in range(rep): # If this combination has been tested before, name appropriately hyp_config = '_'.join( map(str, (ln, ls, lr, reg_scale, ld, conv_config))) # Name by hyperparameters num_configs = 0 # Count number of test instances for configs in dirs: if hyp_config in configs: num_configs += 1 if num_configs >= rep: # If # instances >= reps, make extra reps required or skip continue name = '_'.join((hyp_config, str(num_configs))) # Model run flags = flag_reader.read_flag() flags.data_set = set # Save info flags.model_name = os.path.join(set, name) flags.linear = [ls for j in range(ln)] # Architecture flags.linear[-1] = ends[-1] flags.conv_stride = stride flags.conv_kernel_size = kernel flags.linear[0] = lin0 flags.lr = lr # Other params flags.lr_decay_rate = ld flags.reg_scale = reg_scale flags.batch_size = 1024 flags.train_step = 300 flags.normalize_input = True training_from_flag(flags) dirs = os.listdir(spec_dir)
def hyperswipe(dataset, rep=1): """ This is for doing hyperswiping for the model parameters """ for set in dataset: # Setup dataset folder spec_dir = os.path.join('models', set) if not os.path.exists(spec_dir): os.mkdir(spec_dir) dirs = os.listdir(spec_dir) # Clean up unfinished runs for run in dirs: d = os.path.join(spec_dir, run) for f in os.listdir(d): if f.find("training time.txt") != -1: break else: shutil.rmtree(d) stride = [] kernel = [] if 'Chen' in set: # Faster drops helped, but did add instability. lr 0.1 was too large, reg 1e-5 reduced instability but not enough # reg 1e-4 combined with faster decay = 0.3 and lower starting lr = 0.001 has great, consistent results # ^ reg = 0 -> High instability but results similar, reg = 1e-5 -> instability is lower # reg 1e-5 shown to be better than 0 or 1e-4, can be improved if lr gets lower later though lr=e-4,lr_decay=.1 reg_scale_list = [1e-5] # [1e-4, 1e-3, 1e-2, 1e-1] layer_num = [13, 5, 8, 11] layer_size_list = [300, 1500, 700, 1100, 1900] # [1900,1500,1100,900,700,500,300] lrate = [0.0001, 0.001, 0.01] # [1e-1,1e-2,1e-4] lr_decay = [0.1, 0.3, 0.4] # [0.1,0.3,0.5,0.7,0.9] ends = (256, 5) elif 'Peurifoy' in set: reg_scale_list = [0] # [1e-4, 1e-3, 1e-2, 1e-1] layer_num = [7] layer_size_list = [300] # [1900,1500,1100,900,700,500,300] lrate = [0.1] # [1e-1,1e-2,1e-4] lr_decay = [0.4] # [0.1,0.3,0.5,0.7,0.9] ends = (201, 3) elif 'Yang' in set: reg_scale_list = [1e-4] # [1e-4, 1e-3, 1e-2, 1e-1] stride = 0 kernel = 0 layer_num = [13, 5, 8, 11] layer_size_list = [300, 1500, 700, 1100, 1900] # [1900,1500,1100,900,700,500,300] lrate = [0.001] # [1e-1,1e-2,1e-4] lr_decay = [0.3] # [0.1,0.3,0.5,0.7,0.9] ends = (2000, 14) else: return 0 for reg_scale in reg_scale_list: for ln in layer_num: for ls in layer_size_list: for lr in lrate: for ld in lr_decay: for i in range(rep): # If this combination has been tested before, name appropriately hyp_config = '_'.join( map(str, (ln, ls, lr, reg_scale, ld))) # Name by hyperparameters num_configs = 0 # Count number of test instances for configs in dirs: if hyp_config in configs: num_configs += 1 if num_configs >= rep: # If # instances >= reps, make extra reps required or skip continue name = '_'.join((hyp_config, str(num_configs))) # Model run flags = flag_reader.read_flag() flags.data_set = set # Save info flags.model_name = os.path.join(set, name) flags.linear = [ls for j in range(ln) ] # Architecture flags.linear[0] = ends[0] flags.linear[-1] = ends[-1] flags.conv_stride = stride flags.conv_kernel_size = kernel flags.lr = lr # Other params flags.lr_decay_rate = ld flags.reg_scale = reg_scale flags.batch_size = 1024 flags.train_step = 300 flags.normalize_input = True training_from_flag(flags) dirs = os.listdir( spec_dir ) # Update dirs to include latest run