import data_loader from sklearn.metrics import confusion_matrix, precision_recall_fscore_support arg_parser = argparse.ArgumentParser() arg_parser.add_argument('--data_set', default='val', choices=['train', 'val', 'test'], help='The data set you want to evaluate') arg_parser.add_argument('--model', default='bilstm_mlp_elmo.pt', help='Model name') if __name__ == '__main__': args = arg_parser.parse_args() data_set = args.data_set model = args.model params = utils.Params('data/balanced/dataset_params.json') params.update('experiments/elmo_model/params.json') dl = data_loader.DataLoader('data/averaged_elmo/', params) data = dl.load_elmo_data([data_set], 'data/averaged_elmo') net = net.Network(params) net.load_state_dict(torch.load(model)) # Evaluation val_data_iter = dl.elmo_iterator(data[data_set], params, shuffle=False) total_correct = 0 predictions = torch.tensor([], dtype=torch.long)
x += tuple(d[f].flat_values for f in (EMT, DMT)) if group in (qs.QAS, qs.FIX): y = (d[OUT].to_tensor(), ) else: y = (d[TGT].to_tensor(), ) return x, y def dset_for(ps, root=None, group=None, adapter=adapter, count=None): ds = load(ps, root, group, count=count) ds = ds.map(lambda x: adapter(x, group), -1) return ds.shuffle(1000) if __name__ == '__main__': np.random.seed(12345) import utils as qu ps = dict( dim_batch=5, dim_pool=10, max_val=1000, num_samples=20, num_shards=3, ) ps = qu.Params(**ps) ss = [s for s in dump(ps)] ds = load(ps, shards=ss).map(adapter, -1) for i, _ in enumerate(ds): pass print(f'dumped {i + 1} batches of {ps.dim_batch} samples each')
return metrics_mean, AUROCs if __name__ == '__main__': """ Evaluates the model on the test set. """ # Load user arguments arguments = argument_parser.parse_args() # Load hyperparameters from JSON file json_path = os.path.join(arguments.model_dir, 'params.json') assert os.path.isfile( json_path), 'No json configuration file found at {}'.format(json_path) parameters = utils.Params(json_path) # Record whether GPU is available parameters.cuda = torch.cuda.is_available() # Set random seed for reproducible experiments torch.manual_seed(230) if parameters.cuda: torch.cuda.manual_seed(230) # Configure logger utils.set_logger(os.path.join(arguments.model_dir, 'evaluate_ensemble.log')) # Create data loaders for test data logging.info('Loading test dataset...') test_dataloader = data_loader.fetch_dataloader(
pre_result = pre_result.append( { 'example_id': int(example_id), 'tags': pred_tag, 'split_to_ori': s_to_o }, ignore_index=True) pre_result.to_csv(path_or_buf=params.params_path / f'{mode}_tags_pre.csv', encoding='utf-8', index=False) if __name__ == '__main__': args = parser.parse_args() params = utils.Params(args.pre_model_type, args.ex_index) # set type params.ds_encoder_type = args.ds_encoder_type # 设置模型使用的gpu torch.cuda.set_device(args.device_id) # 查看现在使用的设备 print('current device:', torch.cuda.current_device()) # 预测验证集还是测试集 mode = args.mode # Set the random seed for reproducible experiments random.seed(args.seed) torch.manual_seed(args.seed) params.seed = args.seed # Set the logger
net_classes = {'no-batch-norm': BiggerLeakyUnet, 'batch-norm': BiggerLeakyBNUnet} for normalization in normalizations: print(f'============== normalization: {normalization} ==============') self.params.normalization = normalization if normalization == 'batch-norm': self.params.learning_rate = .1 else: self.params.learning_rate = 1e-5 self.trainer = Trainer(params=self.params, net_class=net_classes[normalization], experiment_dir=self.experiment_dir, is_toy=self.is_toy, set_seed=self.set_seed) history = self.trainer.train() utils.save_history(history, self.trainer, param_name='normalization', name_modifier=name_modifier) if __name__ == '__main__': experiment_dir = Path('experiments/transf_learn_resnet_toy') params = utils.Params(experiment_dir / 'params.json') tuner = Tuner(params=params, net_class=FullUnetResnet, experiment_dir=experiment_dir, is_toy=True, set_seed=True) tuner.tune_lr(rates=(1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6))
# last_report_path = os.path.join(model_dir, f"report_{epoch}.txt") # utils.save_report(report, last_report_path) if __name__ == '__main__': # Load the parameters from json file args = parser.parse_args() model_params_json_path = os.path.join(args.model_dir, 'params.json') data_params_json_path = os.path.join(args.data_dir, 'params.json') assert os.path.isfile( model_params_json_path), "No json configuration file found at {}".format(model_params_json_path) assert os.path.isfile( data_params_json_path), "No json configuration file found at {}".format(data_params_json_path) data_params = utils.DataParams.from_json(data_params_json_path) model_params = utils.Params(cuda=torch.cuda.is_available(), src='en', trg='hu') model_params.update(model_params_json_path) # Set the random seed for reproducible experiments torch.manual_seed(230) if model_params.cuda: torch.cuda.manual_seed(230) # Create tensorboard summary writer tb = SummaryWriter(args.tensorboard_dir) # Set the logger utils.set_logger(os.path.join(args.tensorboard_dir, 'train.log')) # Create the input data pipeline logging.info("Loading the datasets...")
def setup_and_train(args): #set up the bb run, can choose different algorithm to select next param to try bb.run(alg="tree_structured_parzen_estimator") json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # params.loss_fns = [net.negative_log_partial_likelihood_loss] * (1 if params.linear_output_size > 0 else 0) + [nn.MSELoss()] * ( # # params.linear_output_size - 1) + [nn.BCEWithLogitsLoss()] * (params.binary_output_size) # params.linear_output_size - 1) + [nn.BCELoss()] * (params.binary_output_size) # params.survival_indices = eval(params.survival_indices) # params.continuous_phenotype_indices = eval(params.continuous_phenotype_indices) # params.binary_phentoype_indices = eval(params.binary_phentoype_indices) # params.loss_excluded_from_training = eval(params.loss_excluded_from_training) # params.metrics = eval(params.metrics) params.loss_fns, params.mask, linear_output_size, binary_output_size = net.create_lossfns_mask( params) print(params.loss_fns) print(params.mask) # use GPU if available params.cuda = torch.cuda.is_available() # print(params.cuda) # Set the random seed for reproducible experiments torch.manual_seed(230) # if params.cuda: # torch.cuda.manual_seed(230) # Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) tensorboard_dir = os.path.join( args.model_dir, 'tensorboardLog', args.tensorboard_prefix + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) writer = SummaryWriter(tensorboard_dir) copy(json_path, tensorboard_dir) copy(args.data_dir, tensorboard_dir) logging.info("Tensorboard logging directory {}".format(tensorboard_dir)) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders datasets = data_generator.fetch_dataloader_list(args.prefix, ['train', 'val'], args.data_dir, params) _, train_input_size, _ = datasets[0][0]['train'] # _, _, val_dl = dataloaders['val'] # train_dl = dataloaders['train'] # val_dl = dataloaders['val'] input_size = train_input_size # params.dict['num_batches_per_epoch'] = train_steps_gen logging.info("- done.") # Define the model and optimizer # if len(params.out_channels_list) > 0: embedding_model = net.EmbeddingNet( net.ConvolutionBlock, input_size, out_channels_list=params.out_channels_list, FC_size_list=params.FC_size_list, embedding_size=params.embedding_size, kernel_sizes=params.kernel_sizes, strides=params.strides, dropout_rate=params.dropout_rate) # else: # embedding_model = net.EmbeddingNet_FC( # net.FullConnectedBlock, input_size, FC_size_list=params.FC_size_list, embedding_size=params.embedding_size, dropout_rate=params.dropout_rate) outputs = net.outputLayer_simple(params.embedding_size, linear_output_size=linear_output_size, binary_output_size=binary_output_size) if params.cuda: # model = model.cuda() embedding_model = embedding_model.cuda() outputs = outputs.cuda() ### TODO: change other params to bb modifiable params lr = bb.loguniform("lr", 10e-4, 10e-2) #use the bbopt params for learning rate embedding_optimizer = optim.Adam(embedding_model.parameters(), lr=lr, weight_decay=params.weight_decay) outputs_optimizer = optim.Adam(outputs.parameters(), lr=lr, weight_decay=params.weight_decay) # fetch loss function and metrics # loss_fn = net.negative_log_partial_likelihood metrics = net.metrics # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) val_metrics = train_and_evaluate(embedding_model, outputs, datasets, embedding_optimizer, outputs_optimizer, metrics, params, args.model_dir, tensorboard_dir, args.restore_file) # writer.export_scalars_to_json("./all_scalars.json") writer.close() bb.remember(val_metrics) bb.maximize(val_metrics[params.best_model_metric])
model.save_weights(PREFIX + '_weights.hdf5') """ with open(PREFIX + '_trainhist.keras', 'wb') as f: pickle.dump(history.history, f) return model, history if __name__ == "__main__": from keras.backend import tensorflow_backend sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) if TRAIN: params = utils.Params("./configurations/example2.json") neural_network_2c(params) else: model = load_model(PREFIX + '_model.hdf5', custom_objects={"rmse": rmse}) with open("snp_X3k.keras", 'rb') as f: X = pickle.load(f) with open("snp_y3k.keras", 'rb') as f: y = pickle.load(f) y_pred = model.predict(X, batch_size=32) diff = y_pred - y mean_diff = np.mean(diff, axis=0) print(mean_diff)
def autosim(args, eng): os.makedirs(args.output_dir, exist_ok=True) # Set the logger utils.set_logger(os.path.join(args.output_dir, 'train.log')) copyfile(args.output_dir) # Load parameters from json file json_path = os.path.join(args.output_dir, 'Params.json') assert os.path.isfile(json_path), "No json file found at {}".format( json_path) params = utils.Params(json_path) # Add attributes to params params.output_dir = args.output_dir params.cuda = torch.cuda.is_available() params.restore_from = args.restore_from params.numIter = int(params.numIter) params.noise_dims = int(params.noise_dims) params.gkernlen = int(params.gkernlen) params.step_size = int(params.step_size) params.gen_ver = int(args.gen_ver) params.dime = 1 if args.wavelength is not None: params.wavelength = int(args.wavelength) if args.angle is not None: params.angle = int(args.angle) #build a recorder max_recorder = utils.max_recorder() params.recorder = max_recorder #build tools writer = SummaryWriter(log_dir=r'./scan/runs') max_recorder = utils.max_recorder() params.recorder = max_recorder params.writer = writer # make directory os.makedirs(args.output_dir + '/outputs', exist_ok=True) os.makedirs(args.output_dir + '/model', exist_ok=True) os.makedirs(args.output_dir + '/figures/histogram', exist_ok=True) os.makedirs(args.output_dir + '/figures/deviceSamples', exist_ok=True) os.makedirs(args.output_dir + '/figures/deviceSamples_max', exist_ok=True) os.makedirs(args.output_dir + '/deg{}_wl{}_gen_ver{}'.format( params.angle, params.wavelength, params.gen_ver), exist_ok=True) # Define the models if params.gen_ver == 0: generator = Generator0(params) else: generator = Generator(params) # Move to gpu if possible if params.cuda: generator.cuda() # Define the optimizer optimizer = torch.optim.Adam(generator.parameters(), lr=params.lr, betas=(params.beta1, params.beta2)) # Define the scheduler scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=params.step_size, gamma=params.gamma) # Load model data if args.restore_from is not None: params.checkpoint = utils.load_checkpoint(restore_from, generator, optimizer, scheduler) logging.info('Model data loaded') #set the timer timer = utils.timer() # Train the model and save if params.numIter != 0: logging.info('Start training') train(generator, optimizer, scheduler, eng, params) # Generate images and save logging.info('Start generating devices') evaluate(generator, eng, numImgs=500, params=params) timer.out() writer.close()
import os import utils import matplotlib.pyplot as plt import numpy as np from YOLO import YOLO params = utils.Params('experiment/params.json') params.device = "cpu" images = [] for i in range(32): name = utils.get_image_name(i) image = plt.imread('./data/raw_GTSDB/' + name) images.append(image) images = np.array(images) yolo = YOLO(params) output = yolo.predict(images) for i in range(output.shape[0]): plt.subplot(4, 8, i+1) plt.imshow(output[i]) plt.show()
for i in torch.arange(x.shape[0]): include = np.delete(np.arange(x.shape[0]), i) # p = get_class_probs(out_z[i,:], c[include,:], l[include], out_w[include], params) p = get_class_probs(out_z[i, :], c[include, :], l[include], None, params) loss += loss_fn(p, l[i], params) print(", loss: {}".format(loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() if __name__ == "__main__": # Load the parameters from json file args = parser.parse_args() params = utils.Params("params.json") model = Net(params) optimizer = optim.Adam(model.parameters(), params.lr) x, t = simulate_data(params) data = {"x": x, "target": t} storage = {} for epoch in range(params.epochs): train(data, model, optimizer, storage, args, params, epoch + 1)
def train_from_workspace(workspace_dir): global args, data_loader data_dir = workspace_dir model_dir = os.path.join(data_dir, "model") # Load the parameters from json file args = parser.parse_args() src_json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(src_json_path), "No json configuration file found at {}".format(src_json_path) trgt_json_path = os.path.join(model_dir, 'params.json') if not os.path.exists(model_dir): print("Workspace Model Directory does not exist! Making directory {}".format(model_dir)) os.mkdir(model_dir) else: print("Workspace Model Directory exists! ") shutil.copyfile(src_json_path, trgt_json_path) params = utils.Params(trgt_json_path) params.data_dir = data_dir if data_dir else args.data_dir params.model_dir = model_dir if model_dir else args.model_dir # use GPU if available params.cuda = torch.cuda.is_available() # Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) # Set the logger utils.set_logger(os.path.join(params.model_dir, 'train.log')) # Create the input data pipeline logging.info("Loading the datasets...") # load data data_loader = DataLoader(params.data_dir, params) data = data_loader.load_data_from_dir(['train', 'val'], params.data_dir) train_data = data['train'] val_data = data['val'] # specify the train and val dataset sizes params.train_size = train_data['size'] params.val_size = val_data['size'] logging.info("- done.") # Define the model and optimizer model = net.Net(params).cuda() if params.cuda else net.Net(params) optimizer = optim.Adam(model.parameters(), lr=params.learning_rate) # fetch loss function and metrics loss_fn = net.loss_fn metrics = net.metrics # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) best_eval_acc = train_and_evaluate(model, train_data, val_data, optimizer, loss_fn, metrics, params, params.model_dir, args.restore_file) return best_eval_acc
def main(): # Training settings parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, default='tire', help='Dataset name (default: CIFAR10)') parser.add_argument( '--root_path', default= r'D:\2020\project_small_data\Tire_inspection\tire_inspection_cropped_data_final', help="Directory containing the dataset") parser.add_argument('--experiment_path', type=str, default='exp_1', help='the name of the experiment (dir where all the \ log files and trained weights of the experimnet will be saved)' ) parser.add_argument( '--restore_file', default='rotNet_tire_resnet-18_4rot_epoch0_lr_checkpoint.pth', help="name of the file in --experiment_name \ containing weights to load") parser.add_argument('--seed', type=int, default=1, help='Random seed (default: 1)') args = parser.parse_args() torch.manual_seed(args.seed) args.experiment_path = r'D:\2020\project_small_data\Small_Data\pretraining\experiment_dir\exp_1' args.restore_file = 'rotNet_tire_resnet-18_lr_best.pth' yaml_path = os.path.join(args.experiment_path, 'params.yaml') assert os.path.isfile( yaml_path), "No parameters config file found at {}".format(yaml_path) params = utils.Params(yaml_path) use_cuda = params.use_cuda and torch.cuda.is_available() device = torch.device( "cuda:{}".format(params.cuda_num) if use_cuda else "cpu") params.use_cuda = use_cuda ## get the dataloaders params.root_path = args.root_path params.pretraining = None dloader_train, dloader_val, dloader_test = get_data(params) # Load the model params.num_classes = 4 model = get_model(params, os.path.join(args.experiment_path, args.restore_file)) model = model.to(device) layer_name = model.avg_pool sf = SaveFeatures(layer_name) ## Output before the last FC layer # save the feature embeddings for every image train_feat_path = os.path.join(args.experiment_path, 'train_features_dict.p') val_feat_path = os.path.join(args.experiment_path, 'val_features_dict.p') test_feat_path = os.path.join(args.experiment_path, 'test_features_dict.p') img_names,features_dict = save_features_as_dict(model,dloader_train,sf,\ save_path=train_feat_path,num_batch='all') img_names,features_dict = save_features_as_dict(model,dloader_val,sf,\ save_path=val_feat_path,num_batch='all') img_names,features_dict = save_features_as_dict(model,dloader_test,sf,\ save_path=test_feat_path,num_batch='all') hash_params = {'hash_size': 20, 'num_tables': 5, 'dim': 18432} hash_path = os.path.join(args.experiment_path, 'features_hash.p') save_embedding_hash(hash_params, hash_path, img_names, features_dict)
def main(args): if args.cv: folds = range(args.fold, args.nr_folds + 1) else: folds = [args.fold] for fold in folds: loop_restore_file = args.restore_file if args.load_params: json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format( json_path) params = utils.Params(json_path) params.tensortype = torch.float32 args.model_dir_fold = os.path.join(args.model_dir, 'fold%s/' % fold) else: params, exp_path = args_to_params(args) if params.tensortype == 'float32': params.tensortype = torch.float32 args.model_dir_fold = os.path.join(exp_path, 'fold%s/' % fold) args.model_dir = exp_path if args.cv: args.load_params = True # Set the random seed for reproducible experiments torch.manual_seed(SEED) if params.cuda: torch.cuda.manual_seed_all(SEED) # if not os.path.exists(args.model_dir_fold): os.makedirs(args.model_dir_fold) # # reset logger for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) # Set the logger utils.set_logger(os.path.join(args.model_dir_fold, 'train.log')) # # parent_dir = [folder for folder in args.model_dir_fold.split('/') if 'experiment' in folder][0] tb_dir = args.model_dir_fold #args.model_dir_fold.replace(parent_dir, parent_dir + '/tb_logs').replace('/fold', '_fold') logging.info('Saving tensorboard logs to {}'.format(tb_dir)) tb_writer = SummaryWriter(tb_dir) # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") logging.info('using {}'.format(device)) if args.gpu: assert device != 'cpu' # save model parameters before training if args.save_first: model = initialize_model(params, device=device) criterion, optimizer = initialize_loss_and_optimizer(params, model, device=device) utils.save_checkpoint( { 'epoch': 0, 'state_dict': model.state_dict(), 'optim_dict': optimizer.state_dict() }, is_best=False, checkpoint=args.model_dir_fold, save_last=False, is_first=True) logging.info("Loading the datasets...") # getting training data in minibatches train_dataloader, val_dataloader = initialize_dataloader(params, fold) # initialize model torch.nn layer model = initialize_model(params, device=device) # initialize training criterion and optimizer criterion, optimizer = initialize_loss_and_optimizer(params, model, device=device) # logging.info('parameters: {}'.format( params.__dict__)) # log parameters # if args.dont_continue: loop_restore_file = None else: restore_path = os.path.join(args.model_dir_fold, 'last.pth.tar') if os.path.exists(restore_path): logging.info('Restoring from last.pth.tar') loop_restore_file = 'last' # Train the model logging.info("Starting training for {} epoch(s)".format( params.num_epochs)) main_train_and_evaluate(model, train_dataloader, val_dataloader, optimizer, criterion, params, args.model_dir_fold, loop_restore_file, tb_writer=tb_writer, device=device, evol_val=True) logging.info("- done.")
def predict_from_workspace(workspace_dir, input_data): """ Evaluate the model on the test set. """ global args, data_loader data_dir = workspace_dir model_dir = os.path.join(data_dir, "model") # Load the parameters args = parser.parse_args() trgt_json_path = os.path.join(model_dir, 'params.json') assert os.path.isfile( trgt_json_path), "No json configuration file found at {}".format( trgt_json_path) params = utils.Params(trgt_json_path) params.data_dir = data_dir if data_dir else args.data_dir params.model_dir = model_dir if model_dir else args.model_dir # use GPU if available params.cuda = torch.cuda.is_available() # use GPU is available # Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) # Get the logger utils.set_logger(os.path.join(params.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") # load data data_loader = DataLoader(params.data_dir, params) data = data_loader.load_data_for_predict(input_data) batch_sentences = data["predict"]["data"] # compute length of longest sentence in batch batch_max_len = max([len(s) for s in batch_sentences]) # prepare a numpy array with the data, initialising the data with pad_ind and all labels with -1 # initialising labels to -1 differentiates tokens with tags from PADding tokens batch_data = data_loader.pad_ind * np.ones( (len(batch_sentences), batch_max_len)) # copy the data to the numpy array for j in range(len(batch_sentences)): cur_len = len(batch_sentences[j]) batch_data[j][:cur_len] = batch_sentences[j] logging.info("- done.") # Define the model model = net.Net(params).cuda() if params.cuda else net.Net(params) logging.info("Starting prediction") # Reload weights from the saved file utils.load_checkpoint( os.path.join(params.model_dir, args.restore_file + '.pth.tar'), model) # Evaluate results = predict(model, batch_data) return results
pre_result = pre_result.append( { 'example_id': int(example_id), 'tags': pre_bio_labels, 'split_to_ori': s_t_o[:act_len] }, ignore_index=True) pre_result.to_csv(path_or_buf=params.params_path / f'{mode}_tags_pre.csv', encoding='utf-8', index=False) if __name__ == '__main__': args = parser.parse_args() params = utils.Params(args.ex_index) # 设置模型使用的gpu torch.cuda.set_device(args.device_id) # 查看现在使用的设备 print('current device:', torch.cuda.current_device()) # 预测验证集还是测试集 mode = args.mode # Set the random seed for reproducible experiments random.seed(args.seed) torch.manual_seed(args.seed) params.seed = args.seed # Set the logger utils.set_logger()
# save histories to csv utils.save_metric_histories(train_histories, valid_histories, results_path) if __name__ == '__main__': args = parser.parse_args() # load json params params_path = args.experiment assert os.path.isfile( params_path), "No json configuration file found at {}".format( params_path) params = utils.Params() params.load(params_path) # load json features features_path = os.path.join(args.path_to_data, params['dataset'], 'features.json') assert os.path.isfile( features_path), "No json features file found at {}".format( features_path) features = utils.Features() features.load(features_path) # update params with features - needed for network construction params.update(features)
prompt: d.get(prompt)['predictions'] for prompt in d } return self if __name__ == '__main__': args = parser.parse_args() # Loading the evaluation dataset print("Loading dataset") data_params_json_path = os.path.join(args.data_dir, 'params.json') data_params = utils.DataParams.from_json(data_params_json_path) val_dataset = DoulingoDataset(data_params, split='val') # Loading the model print("Loading model...") checkpoint = os.path.join(args.model_dir, f"runs/{args.checkpoint}.pth.tar") config = utils.Params(cuda=torch.cuda.is_available(), src='en', trg='hu') model = Net(config) checkpoint = torch.load(checkpoint) model.load_state_dict(checkpoint['state_dict']) print("Finished Loading") # Evaluation ... print("Starting Evaluation..") if not os.path.exists(args.results_dir): os.mkdir(args.results_dir) metrics = evaluate_model(model.cuda(), val_dataset, args.results_dir) result_json = os.path.join(args.results_dir, 'metrics.json') utils.save_dict_to_json(metrics, result_json)
print('repeat: ', i) self._reset_params() max_test_acc, max_f1 = self._train(criterion, optimizer, max_test_acc_overall=max_test_acc_overall) print('max_test_acc: {0} max_f1: {1}'.format(max_test_acc, max_f1)) max_test_acc_overall = max(max_test_acc, max_test_acc_overall) max_f1_overall = max(max_f1, max_f1_overall) print('#' * 100) print("max_test_acc_overall:", max_test_acc_overall) print("max_f1_overall:", max_f1_overall) if __name__ == '__main__': args = parser.parse_args() json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile(json_path), 'No json configuration file found at {}'.format(json_path) opt = utils.Params(json_path) model_classes = { 'base_model': Cabasc, 'cabasc': Cabasc } dataset_files = { 'twitter': { 'train': 'data/datasets/Twitter_Train.raw', 'test': 'data/datasets/Twitter_Test.raw' }, 'restaurant': { 'train': 'data/datasets/Restaurants_Train.xml.seg', 'test': 'data/datasets/Restaurants_Test.xml.seg' }, 'laptop': {
def mle_k(dataset_name, target_model, task='classification', sampled_number=10, without_wne=False, k=16, s=0, print_iter=10, debug=False): X = [] y = [] params = utils.Params(target_model) ps = params.arg_names total_t = 0.0 info = [] X_t, res_t = None, -1.0 if without_wne: gp = utils.GaussianProcessRegressor() else: K = utils.K(len(ps)) gp = utils.GaussianProcessRegressor(K) for t in range(sampled_number): b_t = time.time() i = t wne = get_wne(dataset_name, 'sampled/s{}'.format(i), cache=True) for v in range(k): kargs = params.random_args(ps) res = get_result(dataset_name, target_model, task, kargs, 'sampled/s{}'.format(i)) if without_wne: X.append([kargs[p] for p in ps]) else: X.append(np.hstack(([kargs[p] for p in ps], wne))) if debug: print('sample {}, {}/{}, kargs: {}, res: {}, time: {:.4f}s'. format(t, v, k, [kargs[p] for p in ps], res, time.time() - b_t)) y.append(res) for t in range(s): b_t = time.time() gp.fit(np.vstack(X), y) X_temp, res_temp = _get_mle_result(gp, dataset_name, target_model, task, without_wne, params, ps, 0, X, y) if without_wne: X.append(X_temp) else: X.append(np.hstack((X_temp, wne))) y.append(res_temp) if res_t < res_temp: res_t = res_temp X_t = X_temp e_t = time.time() total_t += e_t - b_t info.append([res_temp, total_t]) print('iters: {}/{}, params: {}, res: {}, time: {:.4f}s'.format( t, s, X_temp, res_temp, total_t)) if debug: return X_t, res_t, info return X_t, res_t
patience_counter += 1 else: patience_counter = 0 else: patience_counter += 1 # Early stopping and logging best f1 if (patience_counter > params.patience_num and epoch > params.min_epoch_num) or epoch == args.epoch_num: logging.info("Best val f1: {:05.2f}".format(best_val_f1)) break if __name__ == '__main__': args = parser.parse_args() params = utils.Params(ex_index=args.ex_index) utils.set_logger(log_path=os.path.join(params.params_path, 'train.log'), save=True) if args.multi_gpu: params.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") n_gpu = torch.cuda.device_count() params.n_gpu = n_gpu else: # 设置模型使用的gpu torch.cuda.set_device(3) # 查看现在使用的设备 print('current device:', torch.cuda.current_device()) n_gpu = 1 params.n_gpu = n_gpu
def main(stride): logger = logging.getLogger('DeepAR.Train') arg = {'model_name' : f'base_stock_stride={stride}', 'data_folder' : 'data', 'dataset': 'stock', 'relative_metrics' : 0, 'sampling' : 0, 'restore_file' : None, 'save_best' : 0, 'generate_features' : 0, 'default_base' : 1, 'save_directory' : 'stock', 'stride_size' : 8 } train_files, test_files = prepare_data_main(stride, arg) model_dir = os.path.join('experiments', arg['model_name']) json_path = os.path.join(model_dir, 'params.json') data_dir = os.path.join(arg['data_folder'], arg['dataset']) assert os.path.isfile(json_path), f'No json configuration file found at {json_path}' params = utils.Params(json_path) params.relative_metrics = arg['relative_metrics'] params.sampling = arg['sampling'] params.model_dir = model_dir params.plot_dir = os.path.join(model_dir, 'figures') # create missing directories try: os.mkdir(params.plot_dir) except FileExistsError: pass utils.set_logger(os.path.join(model_dir, 'train.log')) # use GPU if available cuda_exist = torch.cuda.is_available() # Set random seeds for reproducible experiments if necessary if cuda_exist: params.device = torch.device('cuda') # torch.cuda.manual_seed(240) logger.info('Using Cuda...') model = net.Net(params).cuda() else: params.device = torch.device('cpu') # torch.manual_seed(230) logger.info('Not using cuda...') model = net.Net(params) torch.manual_seed(777) torch.cuda.manual_seed(777) np.random.seed(777) logger.info('Loading the datasets...') train_set = TrainDataset(data_dir, arg['dataset'], params.num_class, data = train_files[0], label = train_files[-1]) test_set = TestDataset(data_dir, arg['dataset'], params.num_class, data = test_files[0], v = test_files[1], label = test_files[-1]) sampler = WeightedSampler(data_dir, arg['dataset'], v = train_files[1]) # Use weighted sampler instead of random sampler train_loader = DataLoader(train_set, batch_size=params.batch_size, sampler=sampler, num_workers=4) test_loader = DataLoader(test_set, batch_size=params.predict_batch, sampler=RandomSampler(test_set), num_workers=4) logger.info('Loading complete.') logger.info(f'Model: \n{str(model)}') optimizer = optim.Adam(model.parameters(), lr=params.learning_rate) # fetch loss function loss_fn = net.loss_fn # Train the model logger.info('Starting training for {} epoch(s) with stride_size {}'.format(params.num_epochs, stride)) train_and_evaluate(model, train_loader, test_loader, optimizer, loss_fn, params, arg['restore_file'], arg) logger.info(f'Finished processing {stride}') return True
def set_params(): parser = argparse.ArgumentParser() parser.add_argument('--dataset', default='elect', help='Name of the dataset') parser.add_argument('--data-folder', default='data', help='Parent dir of the dataset') parser.add_argument('--model-name', default='base_model', help='Directory containing params.json') parser.add_argument( '--relative-metrics', action='store_true', help='Whether to normalize the metrics by label scales') parser.add_argument( '--restore-file', default='best', help= 'Optional, name of the file in --model_dir containing weights to reload before \ training') # 'best' or 'epoch_#' parser.add_argument('--output_folder', help='Output folder for plots') # Attack parameters parser.add_argument('--c', nargs='+', type=float, default=[0.01, 0.1, 1, 10, 100], help='list of c coefficients (see Carlini et al.)') parser.add_argument('--lr', type=float, default=0.001, help='learning rate') parser.add_argument('--batch_size', nargs='+', type=int, default=50, help='Batch size for perturbation generation') parser.add_argument('--n_iterations', type=int, default=1000, help='Number of iterations for attack') parser.add_argument('--target', type=int, default=-7, help='Attacking output time') parser.add_argument('--tolerance', nargs='+', type=float, default=[0.01, 0.1, 1], help='Max perturbation L2 norm') parser.add_argument('--debug', action="store_true", help='Debug mode') # Batching parser.add_argument('--batch_c', type=int, default=6, help='Number of c values batched together') # Load the parameters args = parser.parse_args() model_dir = os.path.join('experiments', args.model_name) json_path = os.path.join(model_dir, 'params.json') data_dir = os.path.join(args.data_folder, args.dataset) assert os.path.isfile( json_path), 'No json configuration file found at {}'.format(json_path) params = utils.Params(json_path) params.model_dir = model_dir params.plot_dir = os.path.join(model_dir, 'figures') params.c = args.c params.n_iterations = args.n_iterations params.tolerance = args.tolerance params.batch_size = args.batch_size params.learning_rate = args.lr params.output_folder = os.path.join("attack_logs", args.output_folder) params.batch_c = args.batch_c params.target = args.target if not os.path.exists(params.output_folder): os.makedirs(params.output_folder) with open(os.path.join(params.output_folder, "params.txt"), 'w') as param_file: json.dump(params.dict, param_file) return params, model_dir, args, data_dir
def params(): """ read params from json file """ return utils.Params('../experiments/base-model/params.json')
def __init__(self, params=None, experiment_dir=Path('experiments/bigger_leaky_unet'), net_class=None, set_seed=False, is_toy=False ): tf.keras.backend.clear_session() # parameters if params: self.params = params else: self.params = utils.Params(experiment_dir / 'params.json') # net and model self.net = net_class(params=self.params, set_seed=set_seed) self.model = self.net.get_model() # directories and files self.is_toy = is_toy if not is_toy: self.data_dir = Path.home() / 'data/isic_2018' else: self.data_dir = Path.home() / 'data/isic_2018/toy' self.experiment_dir = experiment_dir self.weight_file = self.experiment_dir / 'weights' # data generators self.data_gen = SkinLesionDataGen(params=self.params, data_dir=self.data_dir) self.train_gen = self.data_gen.get_train_gen() self.val_gen = self.data_gen.get_val_gen() # optimizer if self.params.optimizer == 'adam': self.optimizer = tf.keras.optimizers.Adam(lr=self.params.learning_rate) elif self.params.optimizer == 'sgd': self.optimizer = tf.keras.optimizers.SGD(learning_rate=self.params.learning_rate, momentum=.9, nesterov=True) else: raise ValueError # metrics and loss # self.metrics = ['accuracy', pixel_diff] # self.loss = self.params.loss self.metrics = [utils.jaccard_coef] self.loss = utils.jaccard_coef_loss self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=self.metrics) # callbacks self.callbacks = [ tf.keras.callbacks.ModelCheckpoint(str(self.weight_file), save_weights_only=True, monitor='val_loss', save_best_only=True, verbose=1), tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.75, patience=5, min_lr=1e-6, verbose=1), tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=1e-3, patience=15, mode='min', verbose=1) ]
y = pickle.load(f) history = model.fit(X, y, epochs=NUMEPOCHS, callbacks=[early_stop, check, metric]) with open(PREFIX + '_trainhist.keras', 'wb') as f: pickle.dump(history.history, f) return model, history if __name__ == "__main__": from keras.backend import tensorflow_backend sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) if TRAIN: params = utils.Params("./configurations/reverse.json") neural_network_2c(params) else: model = load_model(PREFIX + '_model.hdf5', custom_objects={"rmse": rmse}) with open("snp_X3k.keras", 'rb') as f: X = pickle.load(f) with open("snp_y3k.keras", 'rb') as f: y = pickle.load(f) y_pred = model.predict(X, batch_size=32) diff = y_pred - y mean_diff = np.mean(diff, axis=0) print(mean_diff) print(np.mean(mean_diff))
Returns: (float) accuracy in [0,1] """ # print('outputs', outputs) # print('labels', labels) outputs = np.argmax(outputs, axis=1) # print('outputs', outputs) return np.sum(outputs == labels) / float(labels.size) # maintain all metrics required in this dictionary- these are used in the training and evaluation loops metrics = { 'accuracy': accuracy, # could add more metrics such as accuracy for each token type } if __name__ == '__main__': # Test for class `LeNet5` import torch import sys sys.path.append(".") import utils params = utils.Params('./experiments/cifar10_lenet5/params.json') model = LeNet5(params) print(model) x = torch.randn(2, 3, 32, 32) print(x) y = model(x) print(y) print(y.size())
def run(args=None): usage = "usage : %prog [options]" parser = optparse.OptionParser(usage = usage) parser.add_option("--test", action = "store_true", dest = "test", default = False) # Paramsfile includes hyperparameters for training parser.add_option('--params_file', dest = "params_file", default = './params/exp_params.json', help = "Path to the file containing the training settings") parser.add_option('--data_dir', dest = "data_dir", default = './trees', help = "Directory containing the trees") # Directory containing the model to test parser.add_option("--model_directory", dest = "test_dir", type = "string") parser.add_option("--data", dest = "data", type = "string", default = "train") (opts, args) = parser.parse_args(args) results_dir = "./results" if opts.test: pass else: results_dir_current_job = os.path.join(results_dir, utils.now_as_str_f()) while os.path.isdir(results_dir_current_job): # generate a new timestamp if the current one already exists results_dir_current_job = os.path.join(results_dir, utils.now_as_str_f()) os.makedirs(results_dir_current_job) # Load training settings (e.g. hyperparameters) params = utils.Params(opts.params_file) if opts.test: pass else: # Copy the settings file into the results directory copyfile(opts.params_file, os.path.join(results_dir_current_job, os.path.basename(opts.params_file))) # Get the logger if opts.test: log_path = os.path.join(opts.test_dir, 'testing.log') else: log_path = os.path.join(results_dir_current_job, 'training.log') log_level = params.log_level if hasattr(params, 'log_level') else logging.DEBUG log = utils.get_logger(log_path, log_level) if opts.test: log.info("Testing directory: " + opts.test_dir) log.info("Dataset used for testing: " + opts.data) else: log.info("Results directory: " + results_dir_current_job) log.info("Minibatch: " + str(params.optimizer_settings['minibatch'])) log.info("Optimizer: " + params.optimizer) log.info("Epsilon: " + str(params.optimizer_settings['epsilon'])) log.info("Alpha: " + str(params.optimizer_settings['alpha'])) log.info("Number of samples used: " + str(params.sample_size)) # Testing if opts.test: test(opts.test_dir, opts.data) return log.info("Loading data...") # load training data trees = tr.loadTrees(sample_size = params.sample_size) params.numWords = len(tr.loadWordMap()) overall_performance = pd.DataFrame() rnn = nnet.RNN(params.wvecDim, params.outputDim, params.numWords, params.optimizer_settings['minibatch']) rnn.initParams() sgd = optimizer.SGD(rnn, alpha = params.optimizer_settings['alpha'], minibatch = params.optimizer_settings['minibatch'], optimizer = params.optimizer, epsilon = params.optimizer_settings['epsilon']) best_val_cost = float('inf') best_epoch = 0 for e in range(params.num_epochs): start = time.time() log.info("Running epoch %d" % e) df, updated_model, train_cost, train_acc = sgd.run(trees) end = time.time() log.info("Time per epoch : %f" % (end - start)) log.info("Training accuracy : %f" % train_acc) # VALIDATION val_df, val_cost, val_acc = validate(updated_model, results_dir_current_job) if val_cost < best_val_cost: # best validation cost we have seen so far log.info("Validation score improved, saving model") best_val_cost = val_cost best_epoch = e best_epoch_row = {"epoch": e, "train_cost": train_cost, "val_cost": val_cost, "train_acc": train_acc, "val_acc": val_acc} with open(results_dir_current_job + "/checkpoint.bin", 'w') as fid: pickle.dump(params, fid) pickle.dump(sgd.costt, fid) rnn.toFile(fid) val_df.to_csv(results_dir_current_job + "/validation_preds_epoch_ " + str(e) + ".csv", header = True, index = False) df.to_csv(results_dir_current_job + "/training_preds_epoch_" + str(e) + ".csv", header = True, index = False) row = {"epoch": e, "train_cost": train_cost, "val_cost": val_cost, "train_acc": train_acc, "val_acc": val_acc} overall_performance = overall_performance.append(row, ignore_index = True) # break if no val loss improvement in the last epochs if (e - best_epoch) >= params.num_epochs_early_stop: log.tinfo("No improvement in the last {num_epochs_early_stop} epochs, stop training.".format(num_epochs_early_stop=params.num_epochs_early_stop)) break overall_performance = overall_performance.append(best_epoch_row, ignore_index = True) overall_performance.to_csv(results_dir_current_job + "/train_val_costs.csv", header = True, index = False) log.info("Experiment end")
if __name__ == '__main__': # Setup Slack # sm = SlackManager(channel='#temp') sm = SlackManager(channel='#dl-model-progress') if 'SLACK_API_TOKEN' in os.environ: sm.setup(slack_api_token=os.environ['SLACK_API_TOKEN']) # Collect arguments from command-line options args = parser.parse_args() # Load the parameters from json file json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # Set the logger utils.set_logger(os.path.join(args.model_dir, 'test.log')) slack_message = "*Testing of {} started*".format(args.model_dir) sm.post_slack_message(slack_message) # Set variables data_dir = "./data/" model_name = params.model_name batch_size = params.test_batch_size num_workers = params.num_workers # Get the required input size of the network for resizing images input_size = mh.input_size_of_model(model_name)
for var in dep_vars: print("mean (sd) {}: {:.3f} ({:.3f})".format(var, X[:, var2idx[var]].mean(), X[:, var2idx[var]].std())) return X, var2idx, idx2var if __name__ == '__main__': # Load the parameters from json file args = parser.parse_args() # Load information from last setting if none provided: if args.setting == "" and Path('last-defaults.json').exists(): print("using last default setting") last_defaults = utils.Params("last-defaults.json") args.setting = last_defaults.dict["setting"] for param, value in last_defaults.dict.items(): print("{}: {}".format(param, value)) else: with open("last-defaults.json", "r+") as jsonFile: defaults = json.load(jsonFile) tmp = defaults["setting"] defaults["setting"] = args.setting jsonFile.seek(0) # rewind json.dump(defaults, jsonFile) jsonFile.truncate() setting_home = os.path.join(args.setting_dir, args.setting) setting = utils.Params(os.path.join(setting_home, "setting.json")) data_dir = os.path.join(setting_home, "data")