classifier = io_args.classifier #init datasets df_train = pd.read_csv('../data/wv_train.csv') df_test = pd.read_csv('../data/wv_test.csv') X = np.array(df_train)[:, :-1] y = np.array(df_train)[:, -1] Xtest = np.array(df_test)[:, :-1] ytest = np.array(df_test)[:, -1] if classifier == 'rf': print("Running Random Forests") model = RandomForest(num_trees=15, max_depth=np.inf) utils.evaluate_model(model, X, y.flatten(), Xtest, ytest.flatten()) elif classifier == 'nb': print("Running NaiveBayes") model = NaiveBayes() utils.evaluate_model(model, X, y.flatten(), Xtest, ytest.flatten()) elif classifier == 'knn': print("Running KNN") model = KNN(k=3) utils.evaluate_model(model, X, y.flatten(), Xtest, ytest.flatten()) elif classifier == 'stack': #Stacking RF, NB and KNN. Metaclassifier = DT print("Running Stacking Classifier") model = Stacking() utils.evaluate_model(model, X, y.flatten(), Xtest, ytest.flatten())
def main(device=torch.device('cuda:0')): # CLI arguments parser = arg.ArgumentParser(description='We all know what we are doing. Fighting!') parser.add_argument("--datasize", "-d", default="small", type=str, help="data size you want to use, small, medium, total") # Parsing args = parser.parse_args() # Data loaders datasize = args.datasize pathname = "data/nyu.zip" tr_loader, va_loader, te_loader = getTrainingValidationTestingData(datasize, pathname, batch_size=config("unet.batch_size")) # Model model = Net() # TODO: define loss function, and optimizer learning_rate = utils.config("unet.learning_rate") criterion = DepthLoss(0.1) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) number_of_epoches = 10 # # print("Number of float-valued parameters:", util.count_parameters(model)) # Attempts to restore the latest checkpoint if exists print("Loading unet...") model, start_epoch, stats = utils.restore_checkpoint(model, utils.config("unet.checkpoint")) # axes = utils.make_training_plot() # Evaluate the randomly initialized model # evaluate_epoch( # axes, tr_loader, va_loader, te_loader, model, criterion, start_epoch, stats # ) # loss = criterion() # initial val loss for early stopping # prev_val_loss = stats[0][1] running_va_loss = [] running_va_acc = [] running_tr_loss = [] running_tr_acc = [] # TODO: define patience for early stopping # patience = 1 # curr_patience = 0 # tr_acc, tr_loss = utils.evaluate_model(model, tr_loader, device) acc, loss = utils.evaluate_model(model, va_loader, device) running_va_acc.append(acc) running_va_loss.append(loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) # Loop over the entire dataset multiple times # for epoch in range(start_epoch, config('cnn.num_epochs')): epoch = start_epoch # while curr_patience < patience: while epoch < number_of_epoches: # Train model utils.train_epoch(tr_loader, model, criterion, optimizer) tr_acc, tr_loss = utils.evaluate_model(model, tr_loader, device) va_acc, va_loss = utils.evaluate_model(model, va_loader, device) running_va_acc.append(va_acc) running_va_loss.append(va_loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) # Evaluate model # evaluate_epoch( # axes, tr_loader, va_loader, te_loader, model, criterion, epoch + 1, stats # ) # Save model parameters utils.save_checkpoint(model, epoch + 1, utils.config("unet.checkpoint"), stats) # update early stopping parameters """ curr_patience, prev_val_loss = early_stopping( stats, curr_patience, prev_val_loss ) """ epoch += 1 print("Finished Training") # Save figure and keep plot open # utils.save_training_plot() # utils.hold_training_plot() utils.make_plot(running_tr_loss, running_tr_acc, running_va_loss, running_va_acc)
def main(device, tr_loader, va_loader, te_loader, modelSelection): """Train CNN and show training plots.""" # CLI arguments # parser = arg.ArgumentParser(description='We all know what we are doing. Fighting!') # parser.add_argument("--datasize", "-d", default="small", type=str, # help="data size you want to use, small, medium, total") # Parsing # args = parser.parse_args() # Data loaders # datasize = args.datasize # Model if modelSelection.lower() == 'res50': model = Res50() elif modelSelection.lower() == 'dense121': model = Dense121() elif modelSelection.lower() == 'mobv2': model = Mob_v2() elif modelSelection.lower() == 'dense169': model = Dense169() elif modelSelection.lower() == 'mob': model = Net() elif modelSelection.lower() == 'squeeze': model = Squeeze() else: assert False, 'Wrong type of model selection string!' # Model # model = Net() # model = Squeeze() model = model.to(device) # TODO: define loss function, and optimizer learning_rate = utils.config(modelSelection + ".learning_rate") criterion = DepthLoss(0.1).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) number_of_epoches = 10 # # Attempts to restore the latest checkpoint if exists print("Loading unet...") model, start_epoch, stats = utils.restore_checkpoint( model, utils.config(modelSelection + ".checkpoint")) running_va_loss = [] if 'va_loss' not in stats else stats['va_loss'] running_va_acc = [] if 'va_err' not in stats else stats['va_err'] running_tr_loss = [] if 'tr_loss' not in stats else stats['tr_loss'] running_tr_acc = [] if 'tr_err' not in stats else stats['tr_err'] tr_acc, tr_loss = utils.evaluate_model(model, tr_loader, device) acc, loss = utils.evaluate_model(model, va_loader, device) running_va_acc.append(acc) running_va_loss.append(loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) stats = { 'va_err': running_va_acc, 'va_loss': running_va_loss, 'tr_err': running_tr_acc, 'tr_loss': running_tr_loss, # 'num_of_epoch': 0 } # Loop over the entire dataset multiple times # for epoch in range(start_epoch, config('cnn.num_epochs')): epoch = start_epoch # while curr_patience < patience: while epoch < number_of_epoches: # Train model utils.train_epoch(device, tr_loader, model, criterion, optimizer) # Save checkpoint utils.save_checkpoint(model, epoch + 1, utils.config(modelSelection + ".checkpoint"), stats) # Evaluate model tr_acc, tr_loss = utils.evaluate_model(model, tr_loader, device) va_acc, va_loss = utils.evaluate_model(model, va_loader, device) running_va_acc.append(va_acc) running_va_loss.append(va_loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) epoch += 1 print("Finished Training") utils.make_plot(running_tr_loss, running_tr_acc, running_va_loss, running_va_acc)
def train_main(cfg): ''' 训练的主函数 :param cfg: 配置 :return: ''' # config train_cfg = cfg.train_cfg dataset_cfg = cfg.dataset_cfg model_cfg = cfg.model_cfg is_parallel = cfg.setdefault(key='is_parallel', default=False) device = cfg.device is_online_train = cfg.setdefault(key='is_online_train', default=False) # 配置logger logging.basicConfig(filename=cfg.logfile, filemode='a', level=logging.INFO, format='%(asctime)s\n%(message)s', datefmt='%Y-%m-%d %H:%M:%S') logger = logging.getLogger() # # 构建数据集 train_dataset = LandDataset(DIR_list=dataset_cfg.train_dir_list, mode='train', input_channel=dataset_cfg.input_channel, transform=dataset_cfg.train_transform) split_val_from_train_ratio = dataset_cfg.setdefault( key='split_val_from_train_ratio', default=None) if split_val_from_train_ratio is None: val_dataset = LandDataset(DIR_list=dataset_cfg.val_dir_list, mode='val', input_channel=dataset_cfg.input_channel, transform=dataset_cfg.val_transform) else: val_size = int(len(train_dataset) * split_val_from_train_ratio) train_size = len(train_dataset) - val_size train_dataset, val_dataset = random_split( train_dataset, [train_size, val_size], generator=torch.manual_seed(cfg.random_seed)) # val_dataset.dataset.transform = dataset_cfg.val_transform # 要配置一下val的transform print(f"按照{split_val_from_train_ratio}切分训练集...") # 构建dataloader def _init_fn(): np.random.seed(cfg.random_seed) train_dataloader = DataLoader(train_dataset, batch_size=train_cfg.batch_size, shuffle=True, num_workers=train_cfg.num_workers, drop_last=True, worker_init_fn=_init_fn()) val_dataloader = DataLoader(val_dataset, batch_size=train_cfg.batch_size, num_workers=train_cfg.num_workers, shuffle=False, drop_last=True, worker_init_fn=_init_fn()) # 构建模型 if train_cfg.is_swa: model = torch.load(train_cfg.check_point_file, map_location=device).to( device) # device参数传在里面,不然默认是先加载到cuda:0,to之后再加载到相应的device上 swa_model = torch.load( train_cfg.check_point_file, map_location=device).to( device) # device参数传在里面,不然默认是先加载到cuda:0,to之后再加载到相应的device上 if is_parallel: model = torch.nn.DataParallel(model) swa_model = torch.nn.DataParallel(swa_model) swa_n = 0 parameters = swa_model.parameters() else: model = build_model(model_cfg).to(device) if is_parallel: model = torch.nn.DataParallel(model) parameters = model.parameters() # 定义优化器 optimizer_cfg = train_cfg.optimizer_cfg lr_scheduler_cfg = train_cfg.lr_scheduler_cfg if optimizer_cfg.type == 'adam': optimizer = optim.Adam(params=parameters, lr=optimizer_cfg.lr, weight_decay=optimizer_cfg.weight_decay) elif optimizer_cfg.type == 'adamw': optimizer = optim.AdamW(params=parameters, lr=optimizer_cfg.lr, weight_decay=optimizer_cfg.weight_decay) elif optimizer_cfg.type == 'sgd': optimizer = optim.SGD(params=parameters, lr=optimizer_cfg.lr, momentum=optimizer_cfg.momentum, weight_decay=optimizer_cfg.weight_decay) elif optimizer_cfg.type == 'RMS': optimizer = optim.RMSprop(params=parameters, lr=optimizer_cfg.lr, weight_decay=optimizer_cfg.weight_decay) else: raise Exception('没有该优化器!') if not lr_scheduler_cfg: lr_scheduler = None elif lr_scheduler_cfg.policy == 'cos': lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer, lr_scheduler_cfg.T_0, lr_scheduler_cfg.T_mult, lr_scheduler_cfg.eta_min, last_epoch=lr_scheduler_cfg.last_epoch) elif lr_scheduler_cfg.policy == 'LambdaLR': import math lf = lambda x: (((1 + math.cos(x * math.pi / train_cfg.num_epochs)) / 2 )**1.0) * 0.95 + 0.05 # cosine lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) lr_scheduler.last_epoch = 0 else: lr_scheduler = None # 定义损失函数 DiceLoss_fn = DiceLoss(mode='multiclass') SoftCrossEntropy_fn = SoftCrossEntropyLoss(smooth_factor=0.1) loss_func = L.JointLoss(first=DiceLoss_fn, second=SoftCrossEntropy_fn, first_weight=0.5, second_weight=0.5).cuda() # loss_cls_func = torch.nn.BCEWithLogitsLoss() # 创建保存模型的文件夹 check_point_dir = '/'.join(model_cfg.check_point_file.split('/')[:-1]) if not os.path.exists(check_point_dir): # 如果文件夹不存在就创建 os.mkdir(check_point_dir) # 开始训练 auto_save_epoch_list = train_cfg.setdefault(key='auto_save_epoch_list', default=5) # 每隔几轮保存一次模型,默认为5 train_loss_list = [] val_loss_list = [] val_loss_min = 999999 best_epoch = 0 best_miou = 0 train_loss = 10 # 设置一个初始值 logger.info('开始在{}上训练{}模型...'.format(device, model_cfg.type)) logger.info('补充信息:{}\n'.format(cfg.setdefault(key='info', default='None'))) for epoch in range(train_cfg.num_epochs): print() print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) start_time = time.time() print(f"正在进行第{epoch}轮训练...") logger.info('*' * 10 + f"第{epoch}轮" + '*' * 10) # # 训练一轮 if train_cfg.is_swa: # swa训练方式 train_loss = train_epoch(swa_model, optimizer, lr_scheduler, loss_func, train_dataloader, epoch, device) moving_average(model, swa_model, 1.0 / (swa_n + 1)) swa_n += 1 bn_update(train_dataloader, model, device) else: train_loss = train_epoch(model, optimizer, lr_scheduler, loss_func, train_dataloader, epoch, device) # train_loss = train_unet3p_epoch(model, optimizer, lr_scheduler, loss_func, train_dataloader, epoch, device) # # 在训练集上评估模型 # val_loss, val_miou = evaluate_unet3p_model(model, val_dataset, loss_func, device, # cfg.num_classes, train_cfg.num_workers, batch_size=train_cfg.batch_size) if not is_online_train: # 只有在线下训练的时候才需要评估模型 val_loss, val_miou = evaluate_model(model, val_dataloader, loss_func, device, cfg.num_classes) else: val_loss = 0 val_miou = 0 train_loss_list.append(train_loss) val_loss_list.append(val_loss) # 保存模型 if not is_online_train: # 非线上训练时需要保存best model if val_loss < val_loss_min: val_loss_min = val_loss best_epoch = epoch best_miou = val_miou if is_parallel: torch.save(model.module, model_cfg.check_point_file) else: torch.save(model, model_cfg.check_point_file) if epoch in auto_save_epoch_list: # 如果再需要保存的轮次中,则保存 model_file = model_cfg.check_point_file.split( '.pth')[0] + '-epoch{}.pth'.format(epoch) if is_parallel: torch.save(model.module, model_file) else: torch.save(model, model_file) # 打印中间结果 end_time = time.time() run_time = int(end_time - start_time) m, s = divmod(run_time, 60) time_str = "{:02d}分{:02d}秒".format(m, s) print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) out_str = "第{}轮训练完成,耗时{},\t训练集上的loss={:.6f};\t验证集上的loss={:.4f},mIoU={:.6f}\t最好的结果是第{}轮,mIoU={:.6f}" \ .format(epoch, time_str, train_loss, val_loss, val_miou, best_epoch, best_miou) # out_str = "第{}轮训练完成,耗时{},\n训练集上的segm_loss={:.6f},cls_loss{:.6f}\n验证集上的segm_loss={:.4f},cls_loss={:.4f},mIoU={:.6f}\n最好的结果是第{}轮,mIoU={:.6f}" \ # .format(epoch, time_str, train_loss, train_cls_loss, val_loss, val_cls_loss, val_miou, best_epoch, # best_miou) print(out_str) logger.info(out_str + '\n')
'DecisionTree': tree_model, 'RandomForest': rf_model, 'XGBoost': xgb_model, 'CatBoost': cat_model, 'LightGBM': lgb_model, 'Linear': lr_model, 'Lasso': lasso_model, 'Ridge': ridge_model } # %% [markdown] id="kCSEOF35MoSB" # ### Unscaled dataset # %% colab={"base_uri": "https://localhost:8080/", "height": 297} execution={"iopub.execute_input": "2020-10-15T12:54:28.081881Z", "iopub.status.busy": "2020-10-15T12:54:28.080854Z", "iopub.status.idle": "2020-10-15T12:55:11.188310Z", "shell.execute_reply": "2020-10-15T12:55:11.187312Z", "shell.execute_reply.started": "2020-10-15T12:54:28.081881Z"} executionInfo={"elapsed": 30383, "status": "ok", "timestamp": 1602559165523, "user": {"displayName": "Abdillah Fikri", "photoUrl": "", "userId": "04470220666512949031"}, "user_tz": -420} id="DgfsmUm-HqGG" outputId="857f512d-6910-4625-e01b-c6b587a9094c" # evaluasi model memakai function unscaled = evaluate_model(models, X_train_full, X_test_full, y_train, y_test) # %% [markdown] id="AodaQJBNMtob" # ### Scaled dataset # %% execution={"iopub.execute_input": "2020-10-15T12:55:11.191302Z", "iopub.status.busy": "2020-10-15T12:55:11.190305Z", "iopub.status.idle": "2020-10-15T12:55:11.236183Z", "shell.execute_reply": "2020-10-15T12:55:11.235184Z", "shell.execute_reply.started": "2020-10-15T12:55:11.191302Z"} executionInfo={"elapsed": 25276, "status": "ok", "timestamp": 1602559165525, "user": {"displayName": "Abdillah Fikri", "photoUrl": "", "userId": "04470220666512949031"}, "user_tz": -420} id="2lQZQbORMwYB" # Scaling data from sklearn.preprocessing import RobustScaler scaler = RobustScaler() scaler.fit(X_train_full) X_train_full_scaled = scaler.transform(X_train_full) X_test_full_scaled = scaler.transform(X_test_full) # %% colab={"base_uri": "https://localhost:8080/", "height": 297} execution={"iopub.execute_input": "2020-10-15T12:55:11.239174Z", "iopub.status.busy": "2020-10-15T12:55:11.238177Z", "iopub.status.idle": "2020-10-15T12:55:54.767071Z", "shell.execute_reply": "2020-10-15T12:55:54.767071Z", "shell.execute_reply.started": "2020-10-15T12:55:11.239174Z"} executionInfo={"elapsed": 54513, "status": "ok", "timestamp": 1602559195270, "user": {"displayName": "Abdillah Fikri", "photoUrl": "", "userId": "04470220666512949031"}, "user_tz": -420} id="58C87fQHNRII" outputId="06962bd1-1bb2-4c3e-bd1c-74e71a1d0ed5" # evaluasi model memakai function
def train_joint(args): """ Simultaneously train Initialisation Policy and Baseline Arguments --------- args : dict Dictionary containing command line arguments """ # Buffers to store statistics reward_init_buffer = list() loss_init_buffer = list() rewards_local_buffer = list() loss_local_buffer = list() mean_square_error_per_epoch = list() mean_relative_distance_per_epoch = list() generator = instance_generator(args.problem) # Initialise Initialisation policy and set its optimizer init_policy = select_initialization_policy(args) init_opt = T.optim.Adam(init_policy.parameters(), lr=args.init_lr_rate) # Initialize Baseline if required if args.use_baseline: baseline_net = Baseline(args.dim_context, args.dim_hidden) opt_base = T.optim.Adam(baseline_net.parameters(), lr=1e-4) loss_base_fn = T.nn.MSELoss() # Initialise local move policy local_move_policy = A2CLocalMovePolicy(args.dim_context, args.dim_problem, args.window_size, args.num_of_scenarios_in_state, gamma=args.gamma, beta_entropy=args.beta, num_local_move=args.num_local_move) # Train for epoch in range(1, args.epochs + 1): print("******************************************************") print(f"Epoch : {epoch}") # Generate instance and environment instance = generator.generate_instance() context = instance.get_context() env = create_environment(args, instance) # Learn using REINFORCE # If using baseline, update the baseline net if args.use_baseline: baseline_reward = baseline_net.forward(context) reward_init, loss_init, start_state = init_policy.REINFORCE( init_opt, env, context, baseline_reward, True) update_baseline_model(loss_base_fn, baseline_reward, reward_init, opt_base) # Without using baseline else: reward_init, loss_init, start_state = init_policy.REINFORCE( init_opt, env, context) reward_init_buffer.append(reward_init) loss_init_buffer.append(loss_init) # Learn using A2C rewards_local, loss_local = local_move_policy.train(start_state, env) rewards_local_buffer.append(rewards_local) loss_local_buffer.append(loss_local) # Save stats and model if epoch % 100 == 0: eval_stats = evaluate_model(args, env, generator, init_policy=init_policy, local_move_policy=local_move_policy) mean_square_error_per_epoch.append(eval_stats["mean_square_error"]) mean_relative_distance_per_epoch.append( eval_stats["mean_relative_distance"]) # Save init policy stats save_stats_and_model(args, epoch, reward_init_buffer, loss_init_buffer, mean_square_error_per_epoch, mean_relative_distance_per_epoch, init_policy, INIT) # Save local move policy stats save_stats_and_model(args, epoch, rewards_local_buffer, loss_local_buffer, mean_square_error_per_epoch, mean_relative_distance_per_epoch, local_move_policy, LOCAL)
vocab_size=vocab_size, output_size=output_size, bidirectional=False) model = model.to(device) loss_func = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=args.learning_rate) model = train_model(model=model, loss_func=loss_func, optimizer=optimizer, data_train=data_train, labels_train=labels_train, n_epochs=args.n_epochs, batch_size=args.batch_size, save_path=SAVE_PATH, device=device) else: """ File was not run with --train_from_scratch, so simply load the model from its saved path """ model = torch.load(SAVE_PATH) """ Whether we're training or just loading the pretrained model, we finish by evaluating the model on the testing set. """ evaluate_model(model=model, tok_to_ix=tok_to_ix, use_og_data_only=args.use_og_data_only, bs=args.batch_size, device=device)
from utils import prepare_data, prepare_model, plot_losses, evaluate_model from keras.callbacks import ModelCheckpoint, TensorBoard import random #Creates our training/val/testing splits random.seed(9001) X_train, X_val, X_test, y_train, y_val, y_test, input_output = prepare_data('./recordings/') #Prepares a CNN with our desired architecture CNN_best_model = prepare_model(input_output, modeltype='CNN', dropout=False, maxpooling=True, batch_n=False) #Creates callbacks to save best model in training callbacks = [ModelCheckpoint(filepath='models/cnn_best_model.h5', monitor='val_loss', save_best_only=True), TensorBoard(log_dir='./Graph', histogram_freq=1, write_graph=False, write_images=False)] #Fits model history = CNN_best_model.fit(X_train, y_train, batch_size=32, epochs=50, verbose= 2, validation_data = [X_val, y_val], callbacks=callbacks) #Plots loss curve plot_losses(history) #Evaluate model on testing set evaluate_model('models/cnn_best_model.h5', X_test, y_test)
save_freq='epoch') # form callback list call_backs = [checkpoint, early_stop] # train the model with the scaled training images t0 = time.time() run_log = utils.train_model(model, scaled_train_images, train_labels, scaled_val_images, val_labels, num_epoch, batch_size, call_backs) t_train = round(time.time() - t0, 3) history_data = pd.DataFrame(run_log.history) print(history_data) # evaluate the model on scaled test images t0 = time.time() test_loss, test_accuracy = utils.evaluate_model(model, scaled_test_images, test_labels) t_test = round(time.time() - t0, 3) # print out loss and accuracy print(f'N = {n}') print(f'\tTest loss =\t{test_loss:.3f}') print(f'\tTest accuracy =\t{test_accuracy:.3f}') print(f'\tTrain time = {t_train}') print(f'\tTest time = {t_test}') print('\n') # record metrics metric_plot['layers'].append(n) metric_plot['test_acc'].append(round(test_accuracy, 4)) metric_plot['train_t'].append(t_train) metric_plot['test_t'].append(t_test)
def train_gerryfair_model(est, model_name, dataset, attributes, seed=42, rdir='results'): X_train, X_test, X_prime_train, X_prime_test, y_train, y_test, sens_cols = \ setup_data(dataset, attributes, seed) print('model:', model_name) # X_combined_train = np.hstack((X_train, X_prime_train)) # X_combined_test = np.hstack((X_test, X_prime_test)) # gamma_list = [0.002, 0.005, 0.01, 0.02, 0.05, 0.1] gamma_list = np.linspace(0.001, 0.999, 100) performance = [] dataset_name = dataset.split('/')[-1].split('.')[0] # loop thru values of gamma t0 = time.process_time() for g in gamma_list: # est.gamma = g model = copy.deepcopy(est) model.set_options(gamma=g) print('gamma:', model.gamma) #train error, fairness_violation = model.train(X_train, X_prime_train, y_train.values) # get predictions train_predictions = model.predict(X_train, sample=True) test_predictions = model.predict(X_test, sample=True) train_probabilities = model.predict(X_train, sample=False) test_probabilities = model.predict(X_test, sample=False) train_perf = evaluate_model(X_train, X_prime_train, y_train, train_predictions, train_probabilities) train_perf.update({ 'self_error': error[-1], 'self_fairness_violation': fairness_violation[-1] }) test_perf = evaluate_model(X_test, X_prime_test, y_test, test_predictions, test_probabilities) # output_train = {**header, # 'model':model_name + str(g), # 'train':True, # **train_perf # } # output_test = {**header, # 'model':model_name + str(g), # 'train':False, # **test_perf # } performance.append({ 'method': model_name, 'dataset': dataset_name, 'seed': seed, 'model': model_name + ':g=' + str(g), 'train': train_perf, 'test': test_perf }) runtime = time.process_time() - t0 header = { 'method': model_name, 'dataset': dataset_name, 'seed': seed, 'time': runtime } # get hypervolume of pareto front hv = get_hypervolumes(performance, dataset_name) hv = [{**header, **i} for i in hv] df_hv = pd.DataFrame.from_records(hv) df_hv.to_csv(rdir + '/hv_' + model_name + '_' + str(seed) + '_' + dataset.split('/')[-1], index=False) with open( rdir + '/perf_' + model_name + '_' + dataset_name + '_' + str(seed) + '.json', 'w') as fp: json.dump(performance, fp, sort_keys=True, indent=4) return performance, df_hv
def train_feat_model(est, model_name, dataset, attributes, seed=42, rdir='results'): X_train, X_test, X_prime_train, X_prime_test, \ y_train, y_test, sens_cols = setup_data(dataset, attributes, seed) print('model:', model_name) # X_combined_train = np.hstack((X_train, X_prime_train)) # X_combined_test = np.hstack((X_test, X_prime_test)) protected_groups = [1 if c in sens_cols else 0 for c in X_train.columns] # protected_groups = ([False for f in np.arange(X_train.shape[1])] + # [True for f in np.arange(X_prime_train.shape[1])]) est.protected_groups = ','.join([str(int(pg)) for pg in protected_groups]).encode() # print('fair_feat.py protected groups:',protected_groups) est.feature_names = ','.join(list(X_train.columns)).encode() # print('feature names:', feature_names) print('est protected_groups: ', est.protected_groups) dataset_name = dataset.split('/')[-1].split('.')[0] t0 = time.process_time() est.fit(X_train, y_train) # pdb.set_trace() # get predictions #TODO: add predict_proba_archive !! print('archive size:', est.get_archive_size()) train_predictions = est.predict_archive(X_train.values) test_predictions = est.predict_archive(X_test.values) print('getting probabilities') train_probs, test_probs = [], [] for i in np.arange(est.get_archive_size()): train_probs.append( np.nan_to_num( est.predict_proba_archive(i, X_train.values).flatten())) test_probs.append( np.nan_to_num( est.predict_proba_archive(i, X_test.values).flatten())) # pdb.set_trace() print('getting performance') performance = [] i = 0 for train_pred, test_pred, train_prob, test_prob \ in zip(train_predictions,test_predictions, train_probs, test_probs): performance.append({ 'method': model_name, 'model': model_name + ':archive(' + str(i) + ')', 'dataset': dataset_name, 'seed': seed, 'train': evaluate_model(X_train, X_prime_train, y_train, train_pred, train_prob), 'test': evaluate_model(X_test, X_prime_test, y_test, test_pred, test_prob) }) i = i + 1 # get hypervolume of pareto front runtime = time.process_time() - t0 header = { 'method': model_name, 'dataset': dataset_name, 'seed': seed, 'time': runtime } hv = get_hypervolumes(performance, dataset_name) hv = [{**header, **i} for i in hv] df_hv = pd.DataFrame.from_records(hv) df_hv.to_csv(rdir + '/hv_' + model_name + '_' + str(seed) + '_' + dataset.split('/')[-1], index=False) with open( rdir + '/perf_' + model_name + '_' + dataset_name + '_' + str(seed) + '.json', 'w') as fp: json.dump(performance, fp, sort_keys=True, indent=4) return performance, hv
def neuralNetHyperparamTuning( hyperparam, hyperparam_vals, X, y, lr=1e-3, momentum=0.9, lammy=1e-5, batch_size=32, epochs=100, num_workers=4, err_type='squared', cross_val=False, valid_size=0.2, n_splits=5, save_fig=True): """ tune NN hyperparameter by varying the specified hyperparameter using the values provided in hyperparam_vals. The specified tuning hyperparameter will only take values from the list (hyperparam_vals), and the value from the corresponding keyword arg will not be used Arguments: hyperparam {str} -- string, hyper-parameter to tune {'lr', 'momentum', 'lammy', 'batch_size'} hyperparam_vals {list-like, ndarray} -- list of values to tune the hyperparam X {ndarray} -- X y {ndarray} -- y Keyword Arguments: lr {float} -- learning rate (default: {1e-3}) momentum {float} -- momentum (default: {0.9}) lammy {float} -- lamda for regularization (default: {1e-5}) batch_size {int} -- mini batch size (default: {32}) epochs {int} -- epochs (default: {100}) num_workers {int} -- num of sub process for memory transfer (default: {4}) err_type {str} -- type of error for evaluation (not during training) 'abs', 'squared', 'rmsle' (default: {'squared'}) cross_val {bool} -- whether to use cross validation (default: {False}) valid_size {float} -- 0.0 to 1.0, portion of validation set (default: {0.2}) n_splits {int} -- how many portions are data split into for cross validation. Only used if cross-val is True (default: {5}) save_fig {bool} -- whether to save figure (default: {True}) Raises: NameError -- Wrong hyperparam name Returns: {ndarray, ndarray} -- training errors, validation errors """ _, num_features = X.shape num_divs = len(hyperparam_vals) # init errors errs_tr = np.empty(num_divs) errs_va = np.empty(num_divs) for i in range(num_divs): print("[{}] {} = {}".format(str(i), str(hyperparam), str(hyperparam_vals[i]))) # randomize random_state seed random_state = np.random.randint(0, high=100) print("random seed generated {}".format(random_state)) if hyperparam == 'lr': model = NeuralNetRegressor( num_features, gpu=True, lr=hyperparam_vals[i], momentum=momentum, lammy=lammy, batch_size=batch_size, epochs=epochs, num_workers=num_workers, verbose=False ) elif hyperparam == 'momentum': model = NeuralNetRegressor( num_features, gpu=True, lr=lr, momentum=hyperparam_vals[i], lammy=lammy, batch_size=batch_size, epochs=epochs, num_workers=num_workers, verbose=False ) elif hyperparam == 'lammy': model = NeuralNetRegressor( num_features, gpu=True, lr=lr, momentum=momentum, lammy=hyperparam_vals[i], batch_size=batch_size, epochs=epochs, num_workers=num_workers, verbose=False ) elif hyperparam == 'batch_size': batch_size = int(hyperparam_vals[i]) model = NeuralNetRegressor( num_features, gpu=True, lr=lr, momentum=momentum, lammy=lammy, batch_size=batch_size, epochs=epochs, num_workers=num_workers, verbose=False ) else: raise NameError("Hyperparam not found") errs_tr[i], errs_va[i] = evaluate_model( model, X, y, valid_size=valid_size, verbose=True, cross_val=cross_val, n_splits=n_splits, random_state=random_state, err_type=err_type) if save_fig: plt.figure() plt.plot(hyperparam_vals, errs_tr, label="training errors") plt.plot(hyperparam_vals, errs_va, label='validation errors') plt.xlabel('{}'.format(hyperparam)) plt.ylabel('mean [{}] errors'.format(err_type)) plt.legend() plt.grid() plt.title('Hyperparam tuning: {}'.format(hyperparam)) fname = os.path.join('..', 'figs', '{}_{}_err.png'.format(hyperparam, str(err_type))) plt.savefig(fname) # plt.show(block=False) return errs_tr, errs_va
def main(): parser = argparse.ArgumentParser() parser.add_argument('--load', type=str, help='Checkpoint to load all weights from.') parser.add_argument('--load-gen', type=str, help='Checkpoint to load generator weights only from.') parser.add_argument('--name', type=str, help='Name of experiment.') parser.add_argument('--overfit', action='store_true', help='Overfit to a single image.') parser.add_argument('--batch-size', type=int, default=16, help='Mini-batch size.') parser.add_argument( '--log-freq', type=int, default=10000, help='How many training iterations between validation/checkpoints.') parser.add_argument('--learning-rate', type=float, default=1e-4, help='Learning rate for Adam.') parser.add_argument('--content-loss', type=str, default='mse', choices=['mse', 'L1', 'edge_loss_mse', 'edge_loss_L1'], help='Metric to use for content loss.') parser.add_argument( '--use-gan', action='store_true', help='Add adversarial loss term to generator and trains discriminator.' ) parser.add_argument('--image-size', type=int, default=96, help='Size of random crops used for training samples.') parser.add_argument('--vgg-weights', type=str, default='vgg_19.ckpt', help='File containing VGG19 weights (tf.slim)') parser.add_argument('--train-dir', type=str, help='Directory containing training images') parser.add_argument( '--validate-benchmarks', action='store_true', help= 'If set, validates that the benchmarking metrics are correct for the images provided by the authors of the SRGAN paper.' ) parser.add_argument('--gpu', type=str, default='0', help='Which GPU to use') parser.add_argument('--epoch', type=int, default='1000000', help='How many iterations ') parser.add_argument('--is-val', action='store_true', help='How many iterations ') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu srresnet_training = tf.placeholder(tf.bool, name='srresnet_training') srresnet_model = srresnet.Srresnet(training=srresnet_training,\ learning_rate=args.learning_rate,\ content_loss=args.content_loss) hr_y = tf.placeholder(tf.float32, [None, None, None, 3], name='HR_image') lr_x = tf.placeholder(tf.float32, [None, None, None, 3], name='LR_image') sr_pred = srresnet_model.forward(lr_x) sr_loss = srresnet_model.loss_function(hr_y, sr_pred) sr_opt = srresnet_model.optimize(sr_loss) benchmarks = [ Benchmark('Benchmarks/Set5', name='Set5'), Benchmark('Benchmarks/Set14', name='Set14'), Benchmark('Benchmarks/BSD100', name='BSD100') ] if args.validate_benchmarks: for benchmark in benchmarks: benchmark.validate() # Create log folder if args.load and not args.name: log_path = os.path.dirname(args.load) else: log_path = build_log_dir(args, sys.argv) train_data_path = 'done_dataset\PreprocessedData.h5' val_data_path = 'done_dataset\PreprocessedData_val.h5' eval_data_path = 'done_dataset\PreprocessedData_eval.h5' with tf.Session() as sess: sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) iteration = 0 epoch = 0 saver = tf.train.Saver() # Load all if args.load: iteration = int(args.load.split('-')[-1]) saver.restore(sess, args.load) print(saver) print("load_process_DEBUG") train_data_set = get_data_set(train_data_path, 'train') val_data_set = get_data_set(val_data_path, 'val') eval_data_set = get_data_set(eval_data_path, 'eval') val_error_li = [] eval_error_li = [] fig = plt.figure() if args.is_val: for benchmark in benchmarks: psnr, ssim, _, _ = benchmark.eval(sess, g_y_pred, log_path, iteration) print(' [%s] PSNR: %.2f, SSIM: %.4f' % (benchmark.name, psnr, ssim), end='') else: while True: t = trange(0, len(train_data_set) - args.batch_size + 1, args.batch_size, desc='Iterations') #One epoch for batch_idx in t: t.set_description("Training... [Iterations: %s]" % iteration) #Each 10000 times evaluate model if iteration % args.log_freq == 0: #Loop over eval dataset for batch_idx in range( 0, len(val_data_set) - args.batch_size + 1, args.batch_size): # Test every log-freq iterations val_error = evaluate_model( sr_loss, val_data_set[batch_idx:batch_idx + 16], sess, 119, args.batch_size) eval_error = evaluate_model( sr_loss, eval_data_set[batch_idx:batch_idx + 16], sess, 119, args.batch_size) val_error_li.append(val_error) eval_error_li.append(eval_error) # Log error plt.plot(val_error_li) plt.savefig('val_error.png') plt.plot(eval_error_li) plt.savefig('eval_error.png') # fig.savefig() print('[%d] Test: %.7f, Train: %.7f' % (iteration, val_error, eval_error), end='') # Evaluate benchmarks log_line = '' for benchmark in benchmarks: psnr, ssim, _, _ = benchmark.evaluate( sess, sr_pred, log_path, iteration) print(' [%s] PSNR: %.2f, SSIM: %.4f' % (benchmark.name, psnr, ssim), end='') log_line += ',%.7f, %.7f' % (psnr, ssim) print() # Write to log with open(log_path + '/loss.csv', 'a') as f: f.write( '%d, %.15f, %.15f%s\n' % (iteration, val_error, eval_error, log_line)) # Save checkpoint saver.save(sess, os.path.join(log_path, 'weights'), global_step=iteration, write_meta_graph=False) # Train Srresnet batch_hr = train_data_set[batch_idx:batch_idx + 16] batch_lr = downsample_batch(batch_hr, factor=4) batch_lr, batch_hr = preprocess(batch_lr, batch_hr) _, err = sess.run([sr_opt,sr_loss],\ feed_dict={srresnet_training: True, lr_x: batch_lr, hr_y: batch_hr}) #print('__training__ %s' % iteration) iteration += 1 print('__epoch__: %s' % epoch) epoch += 1
def main(): parser = argparse.ArgumentParser() parser.add_argument('--load', type=str, help='Checkpoint to load all weights from.') parser.add_argument('--load-gen', type=str, help='Checkpoint to load generator weights only from.') parser.add_argument('--name', type=str, help='Name of experiment.') parser.add_argument('--overfit', action='store_true', help='Overfit to a single image.') parser.add_argument('--batch-size', type=int, default=16, help='Mini-batch size.') parser.add_argument( '--log-freq', type=int, default=10000, help='How many training iterations between validation/checkpoints.') parser.add_argument('--learning-rate', type=float, default=1e-4, help='Learning rate for Adam.') parser.add_argument('--content-loss', type=str, default='mse', choices=['mse', 'L1', 'edge_loss_mse', 'edge_loss_L1'], help='Metric to use for content loss.') parser.add_argument( '--use-gan', action='store_true', help='Add adversarial loss term to generator and trains discriminator.' ) parser.add_argument('--image-size', type=int, default=96, help='Size of random crops used for training samples.') parser.add_argument('--vgg-weights', type=str, default='vgg_19.ckpt', help='File containing VGG19 weights (tf.slim)') parser.add_argument('--train-dir', type=str, help='Directory containing training images') parser.add_argument( '--validate-benchmarks', action='store_true', help= 'If set, validates that the benchmarking metrics are correct for the images provided by the authors of the SRGAN paper.' ) parser.add_argument('--gpu', type=str, default='0', help='Which GPU to use') parser.add_argument('--epoch', type=int, default='1000000', help='How many iterations ') parser.add_argument('--is-val', action='store_true', help='How many iterations ') parser.add_argument('--upSample', type=int, default='2', help='How much scale ') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu srresnet_training = tf.placeholder(tf.bool, name='srresnet_training') srresnet_model = srresnet.Srresnet(training=srresnet_training,\ learning_rate=args.learning_rate,\ content_loss=args.content_loss,\ num_upsamples=args.upSample) lr_A = tf.placeholder(tf.float32, [None, None, None, 3], name='LR_DWT_A') lr_dwt_edge = tf.placeholder(tf.float32, [None, None, None, 9], name='LR_DWT_edge') hr_A = tf.placeholder(tf.float32, [None, None, None, 3], name='HR_image') hr = tf.placeholder(tf.float64, [None, None, None, 3], name='HR') hr_dwt_edge = tf.placeholder(tf.float32, [None, None, None, 9], name='HR_DWT_edge') sr_out_pred, sr_BCD_pred, sr_pred = srresnet_model.forward( lr_A, lr_dwt_edge) # sr_out_pred = srresnet_model.forward_LL_branch(lr_A) # sr_BCD_pred = srresnet_model.forward_edge_branch(lr_dwt_edge) sr_loss = srresnet_model.loss_function(hr_A, sr_out_pred, hr_dwt_edge, sr_BCD_pred, hr, sr_pred) sr_opt = srresnet_model.optimize(sr_loss) ''' 驗證用,input和label的圖要對到。 ''' benchmarks = [ Benchmark('Benchmarks\\Rain12\\input', 'Benchmarks\\Rain12\\label', name='Rain12'), Benchmark('Benchmarks\\val\\input', 'Benchmarks\\val\\label', name='Rain100H'), # #Benchmark('Benchmarks/BSD100', name='BSD100') ] # Create log folder if args.load and not args.name: log_path = os.path.dirname(args.load) else: log_path = build_log_dir(args, sys.argv) train_data_path = 'dataset\PreprocessedData.h5' val_data_path = 'dataset\PreprocessedData_val.h5' eval_data_path = 'dataset\PreprocessedData_eval.h5' with tf.Session() as sess: sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) iteration = 0 epoch = 0 saver = tf.train.Saver(max_to_keep=100) # Load all if args.load: iteration = int(args.load.split('-')[-1]) saver.restore(sess, args.load) print(saver) print("load_process_DEBUG") train_data_set = get_data_set(train_data_path, 'train') #和製作h5 file有關 train_label_data_set = get_data_set(train_data_path, 'label') val_data_set = get_data_set(val_data_path, 'val') val_data_label_set = get_data_set(val_data_path, 'label') eval_data_set = get_data_set(eval_data_path, 'eval') eval_data_label_set = get_data_set(eval_data_path, 'label') val_error_li = [] eval_error_li = [] fig = plt.figure() if args.is_val: #暫不用 benchmarks = [ Benchmark('Benchmarks/Set5', name='Set5'), Benchmark('Benchmarks/Set14', name='Set14'), Benchmark('Benchmarks/BSD100', name='BSD100'), Benchmark('Benchmarks/UCMerced_LandUse', name='UCMerced_LandUse'), Benchmark('Benchmarks/RSSCN7', name='RSSCN7') ] log_line = '' for benchmark in benchmarks: psnr, ssim, _, _ = benchmark.evaluate(sess, sr_pred, log_path, iteration) print(' [%s] PSNR: %.2f, SSIM: %.4f' % (benchmark.name, psnr, ssim), end='') log_line += ',%.7f, %.7f' % (psnr, ssim) print() # Write to log with open(log_path + '/PSNR.csv', 'a') as f: f.write( 'iteration, set5_psnr, set5_ssim, set14_psnr, set14_ssim, bsd100_psnr, bsd100_ssim,UCMerced_LandUse_psnr, UCMerced_LandUse_ssim,RSSCN7_psnr, RSSCN7_ssim\n' ) f.write('%d,%s\n' % (iteration, log_line)) else: while True: t = trange(0, len(train_data_set) - args.batch_size + 1, args.batch_size, desc='Iterations') # #One epoch for batch_idx in t: t.set_description("Training... [Iterations: %s]" % iteration) # # Each 10000 times evaluate model if iteration % args.log_freq == 0: # # Loop over eval dataset for batch_idx in range( 0, len(val_data_set) - args.batch_size + 1, args.batch_size): # # # Test every log-freq iterations val_error = evaluate_model(sr_loss, val_data_set[batch_idx:batch_idx + 16], val_data_label_set[batch_idx:batch_idx + 16], \ sess, 124, args.batch_size) eval_error = evaluate_model(sr_loss,eval_data_set[batch_idx:batch_idx + 16], eval_data_label_set[batch_idx:batch_idx + 16],\ sess, 124, args.batch_size) # val_error_li.append(val_error) # eval_error_li.append(eval_error) # # # Log error # # plt.plot(val_error_li) # # plt.savefig('val_error.png') # # plt.plot(eval_error_li) # # plt.savefig('eval_error.png') # # # fig.savefig() #print('[%d] Test: %.7f, Train: %.7f' % (iteration, val_error, eval_error), end='') # Evaluate benchmarks log_line = '' for benchmark in benchmarks: psnr, ssim, _, _ = benchmark.evaluate( sess, sr_out_pred, sr_BCD_pred, sr_pred, log_path, iteration) print(' [%s] PSNR: %.2f, SSIM: %.4f' % (benchmark.name, psnr, ssim), end='') log_line += ',%.7f, %.7f' % (psnr, ssim) # # # print() # # # # Write to log with open(log_path + '/loss.csv', 'a') as f: f.write( '%d, %.15f, %.15f%s\n' % (iteration, val_error, eval_error, log_line)) # # # Save checkpoint saver.save(sess, os.path.join(log_path, 'weights'), global_step=iteration, write_meta_graph=False) # Train SRResnet batch_rain = train_data_set[batch_idx:batch_idx + 16] batch_label = train_label_data_set[batch_idx:batch_idx + 16] #__DEBUG__ # for i in range(batch_rain.shape[0]): # cv2.imshow('__rain', batch_rain[i]) # cv2.imshow('__label', batch_label[i]) # cv2.waitKey(0) # ycbcr_batch = batch_bgr2ycbcr(batch_hr) batch_rain = batch_bgr2rgb(batch_rain) batch_label = batch_bgr2rgb(batch_label) # batch_lr = downsample_batch(batch_hr, factor=4) batch_dwt_rain = batch_Swt(batch_rain) batch_dwt_label = batch_Swt(batch_label) # batch_dwt_lr[:,:,:,0] /= np.abs(batch_dwt_lr[:,:,:,0]).max()*255. # batch_dwt_lr[:,:,:,4] /= np.abs(batch_dwt_lr[:,:,:,4]).max()*255. # batch_dwt_lr[:,:,:,8] /= np.abs(batch_dwt_lr[:,:,:,8]).max()*255. batch_dwt_rain_A = np.stack([ batch_dwt_rain[:, :, :, 0], batch_dwt_rain[:, :, :, 4], batch_dwt_rain[:, :, :, 8] ], axis=-1) batch_dwt_label_A = np.stack([ batch_dwt_label[:, :, :, 0], batch_dwt_label[:, :, :, 4], batch_dwt_label[:, :, :, 8] ], axis=-1) batch_dwt_rain_A /= 255. batch_dwt_label_A /= 255. # batch_dwt_A[:,:,:,0] /= np.abs(batch_dwt_A[:,:,:,0]).max() # batch_dwt_A[:,:,:,1] /= np.abs(batch_dwt_A[:,:,:,1]).max() # batch_dwt_A[:,:,:,2] /= np.abs(batch_dwt_A[:,:,:,2]).max() # batch_dwt_A[:,:,:,0] *= 255. # batch_dwt_A[:,:,:,1] *= 255. # batch_dwt_A[:,:,:,2] *= 255. # batch_dwt_lr_A = batch_dwt(batch_dwt_A) batch_rain_BCD = np.concatenate([ batch_dwt_rain[:, :, :, 1:4], batch_dwt_rain[:, :, :, 5:8], batch_dwt_rain[:, :, :, 9:12] ], axis=-1) batch_label_BCD = np.concatenate([ batch_dwt_label[:, :, :, 1:4], batch_dwt_label[:, :, :, 5:8], batch_dwt_label[:, :, :, 9:12] ], axis=-1) # batch_lr_BCD = np.concatenate([up_sample_batch(batch_dwt_lr_A[:,:,:,1:4], factor=2),\ # up_sample_batch(batch_dwt_lr_A[:,:,:,5:8], factor=2),\ # up_sample_batch(batch_dwt_lr_A[:,:,:,9:12], factor=2)], axis=-1) # batch_lr = downsample_batch(batch_hr, factor=4) # batch_lr_BCD = up_sample_batch(batch_lr_BCD, factor=2) batch_rain_BCD = batch_rain_BCD / 255. batch_label_BCD = batch_label_BCD / 255. batch_label = batch_label / 255. _, err = sess.run([sr_opt,sr_loss],\ feed_dict={srresnet_training: False,\ lr_A: batch_dwt_rain_A,\ lr_dwt_edge: batch_rain_BCD,\ hr_A: batch_dwt_label_A,\ hr_dwt_edge: batch_label_BCD,\ hr: batch_label,\ }) #print('__training__ %s' % iteration) iteration += 1 print('__epoch__: %s' % epoch) epoch += 1
def train_init_policy(args): """ Train the Intialisation Policy Arguments --------- args : dict Dictionary containing command line arguments """ rewards_per_epoch = list() loss_per_epoch = list() mean_square_error_per_epoch = list() mean_relative_distance_per_epoch = list() generator = instance_generator(args.problem) # Initialise Initialisation policy and set its optimizer init_policy = select_initialization_policy(args) init_opt = T.optim.Adam(init_policy.parameters(), lr=args.init_lr_rate) # Initialize Baseline if required if args.use_baseline: baseline_net = Baseline(args.dim_context, args.dim_hidden) opt_base = T.optim.Adam(baseline_net.parameters(), lr=1e-4) loss_base_fn = T.nn.MSELoss() # Train for epoch in range(1, args.epochs + 1): print("******************************************************") print(f"Epoch : {epoch}") # Generate instance and environment instance = generator.generate_instance() context = instance.get_context() env = create_environment(args, instance) # Learn using REINFORCE # If using baseline, update the baseline net if args.use_baseline: baseline_reward = baseline_net.forward(context) reward_, loss_init_, start_state = init_policy.REINFORCE( init_opt, env, context, baseline_reward, True) update_baseline_model(loss_base_fn, baseline_reward, reward_, opt_base) # Without using baseline else: reward_, loss_init_, start_state = init_policy.REINFORCE( init_opt, env, context) rewards_per_epoch.append(reward_.item()) loss_per_epoch.append(loss_init_.item()) # Save stats and model if epoch % 50 == 0: eval_stats = evaluate_model(args, env, generator, init_policy=init_policy) mean_square_error_per_epoch.append(eval_stats["mean_square_error"]) mean_relative_distance_per_epoch.append( eval_stats["mean_relative_distance"]) # Save init policy stats save_stats_and_model(args, epoch, rewards_per_epoch, loss_per_epoch, mean_square_error_per_epoch, mean_relative_distance_per_epoch, init_policy, INIT)
def main(args): # load data X, y = utils.load_dataset(args.dataset_path) # split into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) # Training mode if args.mode == 'train': print(f'Training data shape {X_train.shape}, {y_train.shape}') print(f'Test data shape {X_test.shape}, {y_test.shape}') print(f'Training class distrubution') class_counter = Counter(y_train) for k, v in class_counter.items(): print(f' Class={k}, Count={v}') # preprocess data # missing data, date engineer (date column), class imbalance, text data X_train_processed = utils.prepare_inputs(X_train, X_train) X_test_processed = utils.prepare_inputs(X_train, X_test) y_train_processed = utils.prepare_targets(y_train, y_train) y_test_processed = utils.prepare_targets(y_train, y_test) # try different classifiers, spot-checking which algorithm perform well print("Starting spot check") # define models models, names = utils.get_models() results = list() # evaluate each model for i in range(len(models)): # evaluate the model and store results scores = utils.evaluate_model(X_train_processed, y_train_processed, models[i]) results.append(scores) # summarize performance print('>%s %.3f (%.3f)' % (names[i], np.mean(scores), np.std(scores))) print("End spot check") # get the best model print("Start model training") model = RandomForestClassifier(n_estimators=100) # fit the model model.fit(X_train_processed, y_train_processed) # save model print("Saving model") pickle.dump(model, open(args.save_model_path, 'wb')) # evaluate the model y_train_preds = model.predict(X_train_processed) y_test_preds = model.predict(X_test_processed) # precition, recall, f-score for training for each category print("Evaluating training performance") utils.print_report(y_train_processed, y_train_preds, class_counter) # precition, recall, f-score for testing for each category print("Evaluating testing performance") utils.print_report(y_test_processed, y_test_preds, class_counter) # confusion matrix plot_confusion_matrix(model, X_test_processed, y_test_processed) plt.show() # feature_importances utils.plot_feature_importance(X_train_processed.columns, model) # Explain mode, how the classifier come to the decicion elif args.mode == 'explain': data = pd.DataFrame(data=[args.input.split(',')], columns=X_train.columns) data['pagesCount'] = data.pagesCount.astype('int64') data['wordCount'] = data.wordCount.astype('int64') data['fileSize'] = data.fileSize.astype('int64') # process test data data_processed = utils.prepare_inputs(X_train, data) # load model model = pickle.load(open(args.save_model_path, 'rb')) # Extract and plot single tree estimator = model.estimators_[5] fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(4, 4), dpi=300) tree.plot_tree(estimator, feature_names=data_processed.columns, filled=True) plt.show() # get decision tree for each tree n_nodes_ = [t.tree_.node_count for t in model.estimators_] children_left_ = [t.tree_.children_left for t in model.estimators_] children_right_ = [t.tree_.children_right for t in model.estimators_] feature_ = [t.tree_.feature for t in model.estimators_] threshold_ = [t.tree_.threshold for t in model.estimators_] for i, e in enumerate(model.estimators_): print("Tree %d\n" % i) sample_id = 0 utils.explore_tree(model.estimators_[i], n_nodes_[i], children_left_[i], children_right_[i], feature_[i], threshold_[i], data_processed.columns, data_processed, sample_id=sample_id) prediction = model.estimators_[i].predict(data_processed) prediction = [int(i) for i in prediction] print( f'Prediction for sample {sample_id}: {utils.decode_targets(y_train, prediction)[sample_id]}' ) print('\n' * 2) # Predict mode elif args.mode == 'predict': data = pd.DataFrame(data=[args.input.split(',')], columns=X_train.columns) data['pagesCount'] = data.pagesCount.astype('int64') data['wordCount'] = data.wordCount.astype('int64') data['fileSize'] = data.fileSize.astype('int64') # process test data data_processed = utils.prepare_inputs(X_train, data) # load model model = pickle.load(open(args.save_model_path, 'rb')) # predict data prediction = model.predict(data_processed) print(f'prediction : {utils.decode_targets(y_train, prediction)[0]}')
def train_local_move_policy(args): """ Train Local Move Policy only Arguments --------- args : dict Dictionary containing command line arguments """ rewards_per_epoch = list() loss_per_epoch = list() mean_square_error_per_epoch = list() mean_relative_distance_per_epoch = list() # Instance generator generator = instance_generator(args.problem) # Initialise local move policy local_move_policy = A2CLocalMovePolicy(args.dim_context, args.dim_problem, args.window_size, args.num_of_scenarios_in_state, gamma=args.gamma, beta_entropy=args.beta, num_local_move=args.num_local_move, lr_a2c=args.lr_a2c) # Train for epoch in range(1, args.epochs + 1): start_time = time.time() print("******************************************************") print(f"Epoch : {epoch}") # Generate instance and environment instance = generator.generate_instance() context = instance.get_context() env = create_environment(args, instance) start_state = generate_dummy_start_state(env, args.dim_problem) # Take num_local_moves to improves the provided initial solution rewards, loss = local_move_policy.train(start_state, env) rewards_per_epoch.append(rewards) loss_per_epoch.append(loss) # Save stats and model if epoch % 100 == 0: eval_stats = evaluate_model(args, env, generator, local_move_policy=local_move_policy) mean_square_error_per_epoch.append(eval_stats["mean_square_error"]) mean_relative_distance_per_epoch.append( eval_stats["mean_relative_distance"]) save_stats_and_model(args, epoch, rewards_per_epoch, loss_per_epoch, mean_square_error_per_epoch, mean_relative_distance_per_epoch, local_move_policy, LOCAL) print( f"Took {time.time() - start_time} in epoch {epoch}/{args.epochs}")
def train(hparams): input_size = hparams["input_size"] if hparams["network_type"] == "cnn": ds_train = preprocess_images(hparams, 'train', 'training', 0.2) ds_dev = preprocess_images(hparams, 'train', 'validation', 0.2) ds_test = preprocess_images(hparams, 'test') else: x, y = prepare_data(hparams, 'train') x_scaled = preprocessing.scale(x) #x_dev, y_dev = prepare_data(hparams, 'dev') #x_test, y_test = prepare_data(hparams, 'test') x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, test_size=0.15, random_state=42) x_train, x_dev, y_train, y_dev = train_test_split(x_train, y_train, test_size=0.15, random_state=42) if hparams['training_type'] == 'DL': if hparams["network_type"] == "lstm": inputs, outputs = lstm(hparams, input_size, hparams["network_config"]["lstm"]) elif hparams["network_type"] == "cnn": inputs, outputs = cnn(hparams, input_size, hparams["network_config"]["cnn"]) elif hparams["network_type"] == "fully_connected": inputs, outputs = fully_connected(hparams, input_size, hparams["network_config"]["fully_connected"]) else: raise ValueError('Undefined {} network for DL'.format(hparams["network_type"])) model = tf.keras.Model(inputs=inputs, outputs=outputs, name="fault_detector") model.summary() model.compile(optimizer=tf.keras.optimizers.Adam(hparams["learning_rate"]), loss=hparams["loss"]) if hparams["network_type"] == "cnn": model.fit(ds_train, verbose=1, epochs=hparams["epochs"], shuffle=True, validation_data=ds_dev) else: model.fit(x_train, y_train, verbose=1, shuffle=True, validation_data=(x_dev, y_dev), epochs=hparams["epochs"], batch_size=hparams["batch_size"]) elif hparams["training_type"] == 'ML': if hparams["network_type"] == 'NB': model = NaiiveBayes(x_train, y_train, hparams["network_config"]["NB"]["type"]) elif hparams["network_type"] == 'KNN': model = KNN(x_train, y_train, hparams["network_config"]["KNN"]["k"]) elif hparams["network_type"] == 'logistic_regression': model = LR(x_train, y_train, hparams["network_config"]["logistic_regression"]["c"]) elif hparams["network_type"] == 'SVM': model = SVM(x_train, y_train, hparams["network_config"]["SVM"]["c"], hparams["network_config"]["SVM"]["kernel"]) else: raise ValueError('Undefined {} network type for ML'.format(hparams["network_type"])) if hparams["network_type"] == "cnn": model.evaluate(ds_test) else: evaluate_model(model, x_test, y_test, hparams["training_type"]) if bool(hparams['save_model']): if hparams['training_type']=='DL': model.save(hparams['model_path']+'.h5') else: with open(hparams['model_path']+'.pkl', 'wb') as file: pickle.dump(model, file)
def test_main(cfg): # config dataset_cfg = cfg.dataset_cfg test_cfg = cfg.test_cfg device = cfg.device if test_cfg.dataset == 'val_dataset': dataset = LandDataset(DIR_list=dataset_cfg.val_dir_list, mode='val', input_channel=dataset_cfg.input_channel, transform=dataset_cfg.val_transform) elif test_cfg.dataset == 'test_dataset': dataset = LandDataset(DIR_list=dataset_cfg.test_dir_list, mode='test', input_channel=dataset_cfg.input_channel, transform=dataset_cfg.test_transform) else: raise Exception('没有配置数据集!') def _init_fn(): np.random.seed(cfg.random_seed) dataloader = DataLoader(dataset, batch_size=test_cfg.batch_size, shuffle=False, num_workers=test_cfg.num_workers, worker_init_fn=_init_fn()) is_ensemble = test_cfg.setdefault(key='is_ensemble', default=False) boost_type = test_cfg.setdefault(key='boost_type', default=None) if not is_ensemble: # 没有使用多模型集成 # 加载模型 model = torch.load(test_cfg.check_point_file, map_location=device) # device参数传在里面,不然默认是先加载到cuda:0,to之后再加载到相应的device上 # 预测结果 if test_cfg.is_predict: predict(model=model, dataset=dataset, out_dir=test_cfg.out_dir, device=device, batch_size=test_cfg.batch_size) # 评估模型 if test_cfg.is_evaluate: loss_func = nn.CrossEntropyLoss().to(device) evaluate_model(model, dataset, loss_func, device, cfg.num_classes, num_workers=test_cfg.num_workers, batch_size=test_cfg.batch_size) else: # 使用多模型集成 # 加载多个模型 models = [] for ckpt in test_cfg.check_point_file: models.append(torch.load(ckpt, map_location=device)) if boost_type is None: # 采用加权平均集成 # 获取模型集成的权重 ensemble_weight = test_cfg.setdefault(key='ensemble_weight', default=[1.0 / len(models)] * len(models)) if len(ensemble_weight) != len(models): raise Exception('权重个数错误!') if test_cfg.is_evaluate: # 评估模式 miou = ensemble_evaluate(models=models, dataloader=dataloader, ensemble_weight=ensemble_weight, device=device, num_classes=cfg.num_classes) print('miou is : {:.4f}'.format(miou)) return # 预测结果 ensemble_predict(models=models, ensemble_weight=ensemble_weight, dataset=dataset, out_dir=test_cfg.out_dir, device=device, batch_size=test_cfg.batch_size) else: # 采用boost集成 if boost_type == 'adaBoost': # 采用adaBoost集成 boost_model = joblib.load(test_cfg.boost_ckpt_file) elif boost_type == 'XGBoost': # 采用XGBoost集成 boost_model = xgboost.Booster(model_file=test_cfg.boost_ckpt_file) if test_cfg.is_evaluate: miou = ensemble_boost_evaluate(models=models, dataloader=dataloader, device=device, num_classes=cfg.num_classes, boost_type=boost_type, boost_model=boost_model) print('miou is : {:.4f}'.format(miou)) return # 预测结果 ensemble_boost_predict(models=models, boost_model=boost_model, boost_type=boost_type, dataset=dataset, out_dir=test_cfg.out_dir, device=device, batch_size=test_cfg.batch_size)
xs_train = xs[vs != fold, :][:, features] xs_val = xs[vs == fold, :][:, features] ys_train = ys[vs != fold, :][:, targets] ys_val = ys[vs == fold, :][:, targets] print('Run {}/{}, split {}/{}'.format(r, len(runs), fold + 1, nfolds)) model.fit( xs_train, ys_train, batch_size=batch_size, epochs=epochs, verbose=1, ) # Validation results[fold], _ = utils.evaluate_model(model, xs_val, ys_val) del (model) K.clear_session() # Train and save final model print('Run {}/{}, final model training'.format(r, len(runs))) # TODO: make a create_model function, or make it so that this code doesn't get repeated model = Sequential() model.add( Dense(hidden_layers[0], input_dim=input_dim, bias_initializer="zeros", kernel_initializer="normal",
def main(device, tr_loader, va_loader, te_loader, modelSelection): """Train CNN and show training plots.""" # Model if modelSelection.lower() == 'res50': model = Res50() elif modelSelection.lower() == 'dense121': model = Dense121() elif modelSelection.lower() == 'dense161': model = Dense161() elif modelSelection.lower() == 'mobv2': model = Mob_v2() elif modelSelection.lower() == 'dense169': model = Dense169() elif modelSelection.lower() == 'mob': model = Net() elif modelSelection.lower() == 'squeeze': model = Squeeze() else: assert False, 'Wrong type of model selection string!' model = model.to(device) # TODO: define loss function, and optimizer learning_rate = utils.config(modelSelection + ".learning_rate") criterion = DepthLoss(0.1).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) number_of_epoches = 10 # # Attempts to restore the latest checkpoint if exists print("Loading unet...") model, start_epoch, stats = utils.restore_checkpoint(model, utils.config(modelSelection + ".checkpoint")) running_va_loss = [] if 'va_loss' not in stats else stats['va_loss'] running_va_acc = [] if 'va_err' not in stats else stats['va_err'] running_tr_loss = [] if 'tr_loss' not in stats else stats['tr_loss'] running_tr_acc = [] if 'tr_err' not in stats else stats['tr_err'] tr_acc, tr_loss = utils.evaluate_model(model, tr_loader, device) acc, loss = utils.evaluate_model(model, va_loader, device) running_va_acc.append(acc) running_va_loss.append(loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) stats = { 'va_err': running_va_acc, 'va_loss': running_va_loss, 'tr_err': running_tr_acc, 'tr_loss': running_tr_loss, } # Loop over the entire dataset multiple times epoch = start_epoch while epoch < number_of_epoches: # Train model utils.train_epoch(device, tr_loader, model, criterion, optimizer) # Save checkpoint utils.save_checkpoint(model, epoch + 1, utils.config(modelSelection + ".checkpoint"), stats) # Evaluate model tr_acc, tr_loss = utils.evaluate_model(model, tr_loader, device) va_acc, va_loss = utils.evaluate_model(model, va_loader, device) running_va_acc.append(va_acc) running_va_loss.append(va_loss) running_tr_acc.append(tr_acc) running_tr_loss.append(tr_loss) epoch += 1 print("Finished Training") utils.make_plot(running_tr_loss, running_tr_acc, running_va_loss, running_va_acc)
gaussian = lambda mu, sigma, xs: (1 / (sigma * np.sqrt(2 * np.pi)) * np.exp(-( xs - mu)**2 / (2 * sigma**2))) # Single-output analysis ys_pred = np.zeros(ys.shape) rs = np.zeros(ys.shape) fig, axs = plt.subplots(2, 3) axs = np.array(axs).flatten() for i, r in enumerate(runlist[0:6]): model = load_model(resultsfolder + '/run{}.h5'.format(r + 1)) features = runs[r]['features'] targets = runs[r]['targets'] xs_val = xs[:, features] ys_val = ys[:, targets] results, residues, prediction = utils.evaluate_model(model, xs_val, ys_val) ys_pred[:, i:i + 1] = prediction rs[:, i:i + 1] = residues print("\nRun {}".format(r)) utils.print_all_results(results, targets) # Residual histogram targetname = "Target whatever" # bins = axs[i].hist(residues, bins=100, normed=True)[1] # mu = residues.mean() # sigma = residues.std() # axs[i].plot(bins, gaussian(mu, sigma, bins), # linewidth=2, color='r') # # TODO: fit a lorentzian
import utils as UT import model import argparse import os from tensorflow.keras.models import load_model from train import * ap = argparse.ArgumentParser() ap.add_argument("-t", "--testPath", required=True, help="path to test file", default="flickr8k_text/Flickr_8k.testImages.txt") args = vars(ap.parse_args()) model = load_model('model-ep004-loss3.978-val_loss4.168.h5') test = UT.load_identifiers(arg['testPath']) print('Dataset: %d' % len(test)) # descriptions test_descriptions = UT.load_clean_desc('description.txt', test) print('Descriptions: test=%d' % len(test_descriptions)) # photo features test_features = UT.load_photo_features('features.pkl', test) print('Photos: test=%d' % len(test_features)) UT.evaluate_model(model, test_descriptions, test_features, tokens, max_length)
x_train = transformer.fit_transform(x_train) x_test = transformer.transform(x_test) cat_names = transformer.transformers_[1][1].get_feature_names(cat_names) all_feature_names = list(num_names) all_feature_names.extend(cat_names) model = XGBClassifier(max_depth=5, n_estimators=100, min_child_weight=3, colsample_bytree=0.68, subsample=0.63) model.fit(x_train, y_train, eval_set=[(x_train, y_train), (x_test, y_test)], verbose=True) print(evaluate_model(y_test, model.predict_proba(x_test)[:, 1])) if SAVE_MODELS: with open("models/model.pcl", "wb") as f: pickle.dump(model, f) with open("models/transformer.pcl", "wb") as f: pickle.dump(transformer, f) with open("data/processed/features.json", "w") as f: json.dump(all_feature_names, f)
if device.type=='cuda': model = model.cuda() optim = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.98), eps=1e-9) print("Using", device.type) # Load weight if args.restore_file is not None: if os.path.isfile(args.restore_file): print("Load model") state = torch.load(args.restore_file) model.load_state_dict(state['model']) optim.load_state_dict(state['optim']) else: raise Exception("Invalid weight path") # Init weight dir if not os.path.isdir(args.weight_dir): os.makedirs(args.weight_dir) # Train model print("Start training %d epochs" % args.num_epochs) for e in range(1, args.num_epochs+1): logger.info("Epoch %02d/%02d" % (e, args.num_epochs)) logger.info("Start training") print("\nEpoch %02d/%02d" % (e, args.num_epochs), flush=True) save_file = os.path.join(args.weight_dir, 'epoch_%02d.h5' % e) train_loss = train_model(model, optim, train_iter, src_pad_token, device=device, save_path=save_file) logger.info("End training") logger.info("train_loss = %.8f" % train_loss) val_loss = evaluate_model(model, val_iter, src_pad_token, device=device) logger.info("val_loss = %.8f\n" % val_loss)
def train_ddpg(seed): """ test DDPG on the uav_env(cartesian,discrete) :param seed: (int) random seed for A2C """ """ DDPG(policy, env, gamma=0.99, memory_policy=None, eval_env=None, nb_train_steps=50, nb_rollout_steps=100, nb_eval_steps=100, param_noise=None, action_noise=None, normalize_observations=False, tau=0.001, batch_size=128, param_noise_adaption_interval=50, normalize_returns=False, enable_popart=False, observation_range=(-5.0, 5.0), critic_l2_reg=0.0, return_range=(-inf, inf), actor_lr=0.0001, critic_lr=0.001, clip_norm=None, reward_scale=1.0, render=False, render_eval=False, memory_limit=100, verbose=0, tensorboard_log=None, _init_setup_model=True) """ algo = 'DDPG' num_timesteps = 3000000 env = set_up_env(seed) global best_mean_reward, n_steps best_mean_reward, n_steps = -np.inf, 0 model = DDPG(policy=DDPGMlpPolicy, env=env, gamma=0.99, memory_policy=None, eval_env=None, nb_train_steps=50, nb_rollout_steps=100, nb_eval_steps=100, param_noise=None, action_noise=None, normalize_observations=False, tau=0.001, batch_size=128, param_noise_adaption_interval=50, normalize_returns=False, enable_popart=False, observation_range=(-5.0, 5.0), critic_l2_reg=0.0, actor_lr=0.0001, critic_lr=0.001, clip_norm=None, reward_scale=1.0, render=False, render_eval=False, memory_limit=100, verbose=0, tensorboard_log="./logs/{}/tensorboard/{}/".format( EXPERIMENT_NATURE, algo)) model.learn(total_timesteps=num_timesteps, callback=callback, seed=seed, log_interval=500, tb_log_name="seed_{}".format(seed)) # model = DDPG.load(log_dir + 'best_model.pkl') evaluation = evaluate_model(env, model, 100) os.makedirs('./logs/{}/csv/{}/'.format(EXPERIMENT_NATURE, algo), exist_ok=True) os.rename( '/tmp/gym/monitor.csv', "./logs/{}/csv/{}/seed_{}.csv".format(EXPERIMENT_NATURE, algo, seed)) env.close() del model, env gc.collect() return evaluation
def update_model(model='en_core_web_sm', output_dir='models/', n_iter=100): #Load the model, set up the pipeline and train the entity recognizer. if model is not None: nlp = spacy.load(model) # load existing spaCy model print("Loaded model '%s'" % model) else: nlp = spacy.blank("en") # create blank Language class print("Created blank 'en' model") # create the built-in pipeline components and add them to the pipeline # nlp.create_pipe works for built-ins that are registered with spaCy if "ner" not in nlp.pipe_names: ner = nlp.create_pipe("ner") nlp.add_pipe(ner, last=True) # otherwise, get it so we can add labels else: ner = nlp.get_pipe("ner") # add labels for _, annotations in training_data: for ent in annotations.get("entities"): ner.add_label(ent[2]) # get names of other pipes to disable them during training other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"] with nlp.disable_pipes(*other_pipes): # only train NER # reset and initialize the weights randomly – but only if we're # training a new model print("Training model...") final_loss = [] if model is None: nlp.begin_training() else: optimizer = nlp.resume_training() history_pretrained = [] for itn in range(n_iter): random.shuffle(training_data) losses = {} # batch up the examples using spaCy's minibatch batches = minibatch(training_data, size=compounding(4.0, 32.0, 1.001)) for batch in batches: texts, annotations = zip(*batch) nlp.update( texts, # batch of texts annotations, # batch of annotations drop=0.2, # dropout - make it harder to memorise data sgd = optimizer, losses=losses, ) print("Losses", losses) epoch_path = output_dir + 'model_pretrained/epoch_' + str(itn) nlp.to_disk(epoch_path) # Make sure you don't use the SpaCy's large model because each model occupies 786 MB of data. if val is not None: score_prf = evaluate_model(nlp,val) history_pretrained.append({"Epoch": itn, "losses": losses, "Precision": score_prf['ents_p'], "Recall": score_prf['ents_r'], "F1-score": score_prf['ents_f']}) data = pd.DataFrame(history_pretrained) data.to_csv('history_pretrained_model.csv',index=False) save_model(nlp, output_dir) return nlp
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy']) model.summary() plot_model(model, to_file=test_dir + 'model.png', show_shapes=True) # Fit model checkpointer = ModelCheckpoint(test_dir + 'Checkpoints/weights_{epoch:d}.h5', save_weights_only=True) earlystopper = EarlyStopping(patience=10) history = model.fit_generator(train_seq, epochs=epochs, verbose=1, validation_data=val_seq, callbacks=[checkpointer, earlystopper], use_multiprocessing=True, workers=workers) model.save_weights(test_dir + 'model.h5') plot_history(history, outdir=test_dir) with open(model_dir + 'results.csv', 'a') as f: f.write(';'.join([str(p) for p in params]) + ';' + str(min(history.history['val_loss'])) + '\n') # Evaluate model evaluate_model(model, test_seq, test_dir, constructor.classes) # Destroy old model graph to avoid clutter K.clear_session() print('Total time taken: ' + str(time.time() - start_time))
def main(): num_classes = 10 random_seed = 1 l1_regularization_strength = 0 l2_regularization_strength = 1e-4 learning_rate = 1e-3 learning_rate_decay = 1 cuda_device = torch.device("cuda:0") cpu_device = torch.device("cpu:0") model_dir = "saved_models" model_filename = "resnet18_cifar10.pt" model_filename_prefix = "pruned_model" pruned_model_filename = "resnet18_pruned_cifar10.pt" model_filepath = os.path.join(model_dir, model_filename) pruned_model_filepath = os.path.join(model_dir, pruned_model_filename) set_random_seeds(random_seed=random_seed) # Create an untrained model. model = create_model(num_classes=num_classes) # Load a pretrained model. model = load_model(model=model, model_filepath=model_filepath, device=cuda_device) train_loader, test_loader, classes = prepare_dataloader( num_workers=8, train_batch_size=128, eval_batch_size=256) _, eval_accuracy = evaluate_model(model=model, test_loader=test_loader, device=cuda_device, criterion=None) classification_report = create_classification_report( model=model, test_loader=test_loader, device=cuda_device) num_zeros, num_elements, sparsity = measure_global_sparsity(model) print("Test Accuracy: {:.3f}".format(eval_accuracy)) print("Classification Report:") print(classification_report) print("Global Sparsity:") print("{:.2f}".format(sparsity)) print("Iterative Pruning + Fine-Tuning...") pruned_model = copy.deepcopy(model) # iterative_pruning_finetuning( # model=pruned_model, # train_loader=train_loader, # test_loader=test_loader, # device=cuda_device, # learning_rate=learning_rate, # learning_rate_decay=learning_rate_decay, # l1_regularization_strength=l1_regularization_strength, # l2_regularization_strength=l2_regularization_strength, # conv2d_prune_amount=0.3, # linear_prune_amount=0, # num_iterations=8, # num_epochs_per_iteration=50, # model_filename_prefix=model_filename_prefix, # model_dir=model_dir, # grouped_pruning=True) iterative_pruning_finetuning( model=pruned_model, train_loader=train_loader, test_loader=test_loader, device=cuda_device, learning_rate=learning_rate, learning_rate_decay=learning_rate_decay, l1_regularization_strength=l1_regularization_strength, l2_regularization_strength=l2_regularization_strength, conv2d_prune_amount=0.98, linear_prune_amount=0, num_iterations=1, num_epochs_per_iteration=200, model_filename_prefix=model_filename_prefix, model_dir=model_dir, grouped_pruning=True) # Apply mask to the parameters and remove the mask. remove_parameters(model=pruned_model) _, eval_accuracy = evaluate_model(model=pruned_model, test_loader=test_loader, device=cuda_device, criterion=None) classification_report = create_classification_report( model=pruned_model, test_loader=test_loader, device=cuda_device) num_zeros, num_elements, sparsity = measure_global_sparsity(pruned_model) print("Test Accuracy: {:.3f}".format(eval_accuracy)) print("Classification Report:") print(classification_report) print("Global Sparsity:") print("{:.2f}".format(sparsity)) save_model(model=model, model_dir=model_dir, model_filename=model_filename)
train_y[:size], valid_x, valid_y, lr0, lrdecay, bs, epochs, 0, name, e0, rec, print_every=999999) else: print '\nno training' tr_acc = evaluate_model(model.predict_proba, train_x[:size], train_y[:size]) print 'train acc: {}'.format(tr_acc) va_acc = evaluate_model(model.predict_proba, valid_x, valid_y, n_mc=200) print 'valid acc: {}'.format(va_acc) te_acc = evaluate_model(model.predict_proba, test_x, test_y, n_mc=200) print 'test acc: {}'.format(te_acc) if args.totrain == 1: # report the best valid-model's test acc e0 = model.load(save_path) te_acc = evaluate_model(model.predict_proba, test_x, test_y, n_mc=200) print 'test acc (best valid): {}'.format(te_acc) if args.adv_eval == 1: