def evaluate_from_workspace(workspace_dir): global args, data_loader """ Evaluate the model on the test set. """ data_dir = workspace_dir model_dir = os.path.join(data_dir, "model") # Load the parameters args = parser.parse_args() json_path = os.path.join(model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) params.data_dir = data_dir if data_dir else args.data_dir params.model_dir = model_dir if model_dir else args.model_dir # use GPU if available params.cuda = torch.cuda.is_available() # use GPU is available # Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) # Get the logger utils.set_logger(os.path.join(params.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") # load data data_loader = DataLoader(params.data_dir, params) data = data_loader.load_data_from_dir(['test'], params.data_dir) test_data = data['test'] # specify the test set size params.test_size = test_data['size'] test_data_iterator = data_loader.data_iterator(test_data, params) logging.info("- done.") # Define the model model = net.Net(params).cuda() if params.cuda else net.Net(params) loss_fn = net.loss_fn metrics = net.metrics logging.info("Starting evaluation") # Reload weights from the saved file utils.load_checkpoint( os.path.join(params.model_dir, args.restore_file + '.pth.tar'), model) # Evaluate num_steps = (params.test_size + 1) // params.batch_size test_metrics = evaluate(model, loss_fn, test_data_iterator, metrics, params, num_steps) save_path = os.path.join(params.model_dir, "metrics_test_{}.json".format(args.restore_file)) utils.save_dict_to_json(test_metrics, save_path)
def runEvaluate(model_dir, data_dir, restore_file): """ Evaluate the model on the test set. """ # Load the parameters json_path = os.path.join(args.model_dir, 'params.json') assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) # use GPU if available params.cuda = torch.cuda.is_available() # use GPU is available # Set the random seed for reproducible experiments torch.manual_seed(231) if params.cuda: torch.cuda.manual_seed(231) # Get the logger # utils.set_logger(os.path.join(args.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") # fetch dataloaders dataloaders = data_loader.fetch_dataloader(['train'], args.data_dir, params) test_dl = dataloaders['train'] logging.info("- done.") # Define the model model = net.Net(params).cuda() if params.cuda else net.Net(params) loss_fn = net.loss_fn metrics = net.metrics logging.info("Starting evaluation") # Reload weights from the saved file utils.load_checkpoint( os.path.join(args.model_dir, args.restore_file + '.pth.tar'), model) # Evaluate test_metrics = evaluate(model, loss_fn, test_dl, metrics, params) save_path = os.path.join( args.model_dir, "metrics_training_{}.json".format(args.restore_file)) utils.save_dict_to_json(test_metrics, save_path)
def set_cuda(params, logger): cuda_exist = torch.cuda.is_available() # use GPU is available # Set random seeds for reproducible experiments if necessary if cuda_exist: params.device = torch.device('cuda') # torch.cuda.manual_seed(240) logger.info('Using Cuda...') model = net.Net(params).cuda() else: params.device = torch.device('cpu') # torch.manual_seed(230) logger.info('Not using cuda...') model = net.Net(params) return model
def load_model(device): '''Loads model and sets into testing phase Args: device: device for model ''' model = net.Net() model.load_state_dict(torch.load("model.pth")) model.to(device) model.eval() return model
def init_model(): global MODEL global STYLES MODEL = net.Net(ngf=NGF) MODEL.collect_params().load(PARAMS_PATH, ctx=CTX) STYLES = [] for style_name in STYLE_IMAGES: style_path = os.path.join(os.getcwd(), STYLE_FOLDER, style_name) style_image = utils.tensor_load_rgbimage(style_path, CTX, size=STYLE_SIZE) style_image = utils.preprocess_batch(style_image) STYLES.append(style_image) return
def main(): args = parser.parse_args() # torch setting torch.random.manual_seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # os setting path = args.dataset_path train_path = os.path.join(path, "train/train.txt") test_path = os.path.join(path, "test/test.txt") params_path = os.path.join(args.model_dir, 'params.json') # checkpoint_path = os.path.join(FLAGS.checkpoint_path, "checkpoint.tar") checkpoint_dir = os.path.join(args.model_dir, 'checkpoint') tensorboard_log_dir = os.path.join(args.model_dir, 'log') entity2id, relation2id = data_loader.create_mappings(train_path) # params params = utils.Params(params_path) params.device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu') # dataset test_set = data_loader.FB15KDataset(test_path, entity2id, relation2id) test_generator = torch_data.DataLoader(test_set, batch_size=params.validation_batch_size) # model model = net.Net(entity_count=len(entity2id), relation_count=len(relation2id), dim=params.embedding_dim, margin=params.margin, device=params.device, norm=params.norm) optimizer = optim.SGD(model.parameters(), lr=params.learning_rate) summary_writer = tensorboard.SummaryWriter(log_dir=tensorboard_log_dir) # Testing the best checkpoint on test dataset utils.load_checkpoint(checkpoint_dir, model, optimizer) best_model = model.to(params.device) best_model.eval() scores = evaluate(model=best_model, data_generator=test_generator, entities_count=len(entity2id), device=params.device, summary_writer=summary_writer, epoch_id=1, metric_suffix="test") print("Test scores: \n hit%1: {} \n hit%3: {} \nhit%10: {} \n mrr: {}".format(scores[0], scores[1], scores[2], scores[3]))
def main(): args = parser.parse_args() # torch setting torch.random.manual_seed(args.seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False # os setting path = args.dataset_path train_path = os.path.join(path, "train/train.txt") validation_path = os.path.join(path, "valid/valid.txt") test_path = os.path.join(path, "test/test.txt") params_path = os.path.join(args.model_dir, 'params.json') checkpoint_dir = os.path.join(args.model_dir, 'checkpoint') tensorboard_log_dir = os.path.join(args.model_dir, 'log') utils.check_dir(tensorboard_log_dir) entity2id, relation2id = data_loader.create_mappings(train_path) # params params = utils.Params(params_path) params.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') # dataset train_set = data_loader.FB15KDataset(train_path, entity2id, relation2id) train_generator = torch_data.DataLoader(train_set, batch_size=params.batch_size) validation_set = data_loader.FB15KDataset(validation_path, entity2id, relation2id) validation_generator = torch_data.DataLoader( validation_set, batch_size=params.validation_batch_size) test_set = data_loader.FB15KDataset(test_path, entity2id, relation2id) test_generator = torch_data.DataLoader( test_set, batch_size=params.validation_batch_size) # model model = net.Net(entity_count=len(entity2id), relation_count=len(relation2id), dim=params.embedding_dim, margin=params.margin, device=params.device, norm=params.norm) # type: torch.nn.Module model = model.to(params.device) optimizer = optim.SGD(model.parameters(), lr=params.learning_rate) summary_writer = tensorboard.SummaryWriter(log_dir=tensorboard_log_dir) start_epoch_id = 1 step = 0 best_score = 0.0 print("Training Dataset: entity: {} relation: {} triples: {}".format( len(entity2id), len(relation2id), len(train_set))) print("Validation Dataset: triples: {}".format(len(validation_set))) print("Test Dataset: triples: {}".format(len(test_set))) print(model) # Train for epoch_id in range(start_epoch_id, params.epochs + 1): print("Epoch {}/{}".format(epoch_id, params.epochs)) loss_impacting_samples_count = 0 samples_count = 0 model.train() with tqdm(total=len(train_generator)) as t: for local_heads, local_relations, local_tails in train_generator: local_heads, local_relations, local_tails = (local_heads.to( params.device), local_relations.to( params.device), local_tails.to(params.device)) positive_triples = torch.stack( (local_heads, local_relations, local_tails), dim=1) # Preparing negatives. # Generate binary tensor to replace either head or tail. 1 means replace head, 0 means replace tail. head_or_tail = torch.randint(high=2, size=local_heads.size(), device=params.device) random_entities = torch.randint(high=len(entity2id), size=local_heads.size(), device=params.device) broken_heads = torch.where(head_or_tail == 1, random_entities, local_heads) broken_tails = torch.where(head_or_tail == 0, random_entities, local_tails) negative_triples = torch.stack( (broken_heads, local_relations, broken_tails), dim=1) optimizer.zero_grad() loss, pd, nd = model(positive_triples, negative_triples) loss.mean().backward() summary_writer.add_scalar('Loss/train', loss.mean().data.cpu().numpy(), global_step=step) summary_writer.add_scalar('Distance/positive', pd.sum().data.cpu().numpy(), global_step=step) summary_writer.add_scalar('Distance/negative', nd.sum().data.cpu().numpy(), global_step=step) loss = loss.data.cpu() loss_impacting_samples_count += loss.nonzero().size()[0] samples_count += loss.size()[0] optimizer.step() step += 1 t.set_postfix(loss=loss_impacting_samples_count / samples_count * 100) t.update() summary_writer.add_scalar('Metrics/batch_loss', loss_impacting_samples_count / samples_count * 100, global_step=epoch_id) # validation if epoch_id % params.validation_freq == 0: model.eval() _, _, hits_at_10, _ = evaluate( model=model, data_generator=validation_generator, entities_count=len(entity2id), device=params.device, summary_writer=summary_writer, epoch_id=epoch_id, metric_suffix="val") score = hits_at_10 if score > best_score: best_score = score utils.save_checkpoint(checkpoint_dir, model, optimizer, epoch_id, step, best_score) # Testing the best checkpoint on test dataset utils.load_checkpoint(checkpoint_dir, model, optimizer) best_model = model.to(params.device) best_model.eval() scores = evaluate(model=best_model, data_generator=test_generator, entities_count=len(entity2id), device=params.device, summary_writer=summary_writer, epoch_id=1, metric_suffix="test") print("Test scores: \n hit%1: {} \n hit%3: {} \nh it%10: {} \n mrr: {}". format(scores[0], scores[1], scores[2], scores[3])) eval_path = os.path.join(args.model_dir, 'eval.json') evals_params = utils.Params(eval_path) evals_params.hit_1 = scores[0] evals_params.hit_3 = scores[1] evals_params.hit_10 = scores[2] evals_params.mrr = scores[3] evals_params.best_score = best_score evals_params.save(eval_path)
""" Evaluate the model on the test set. """ # Load the parameters args = parser.parse_args() # fetch dataloaders dataloaders = data_loader.fetch_dataloader(data_dir=osp.join( os.environ['PWD'], args.data), batch_size=40, validation_split=0.2) test_dl = dataloaders['test'] # Define the model device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = net.Net(8, 3).to(device) #include puppi #model = net.Net(7, 3).to(device) #remove puppi optimizer = torch.optim.AdamW(model.parameters(), lr=0.001) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=500, threshold=0.05) loss_fn = net.loss_fn metrics = net.metrics model_dir = osp.join(os.environ['PWD'], args.ckpts) deltaR = 0.4 deltaR_dz = 0.3 # Reload weights from the saved file restore_ckpt = osp.join(model_dir, args.restore_file + '.pth.tar')
data_loader = DataLoader(args.data_dir, params) data = data_loader.load_data(['train', 'val'], args.data_dir) train_data = data['train'] val_data = data['val'] # specify the train and val dataset sizes params.train_size = train_data['size'] params.val_size = val_data['size'] logging.info("- done.") # Load embeddings gen_emb = np.load(os.path.join(args.emb_dir, 'gen.npy')) # domain_emb = np.load(os.path.join(args.emb_dir, 'domain.npy')) # Define the model and optimizer model = net.Net(params, gen_emb, None).cuda() if params.cuda else net.Net( params, gen_emb, None) optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=params.learning_rate) # fetch loss function and metrics loss_fn = net.loss_fn metrics = net.metrics # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(model, train_data, val_data, optimizer, loss_fn, metrics, params, args.model_dir, args.restore_file)
utils.set_logger(os.path.join(model_dir, 'eval.log')) args.relative_metrics = args.relative_metrics args.sampling = args.sampling args.model_dir = model_dir args.plot_dir = os.path.join(model_dir, 'figures') cuda_exist = torch.cuda.is_available() # use GPU is available # Set random seeds for reproducible experiments if necessary if cuda_exist: args.device = torch.device('cuda') # torch.cuda.manual_seed(240) logger.info('Using Cuda...') model = net.Net(args).cuda() else: args.device = torch.device('cpu') # torch.manual_seed(230) logger.info('Not using cuda...') model = net.Net(args) # Create the input data pipeline logger.info('Loading the datasets...') test_set = TestDataset(data_dir, args.dataset, args.num_class) test_loader = DataLoader(test_set, batch_size=args.predict_batch, sampler=RandomSampler(test_set), num_workers=4) logger.info('- done.')
def predict_from_workspace(workspace_dir, input_data): """ Evaluate the model on the test set. """ global args, data_loader data_dir = workspace_dir model_dir = os.path.join(data_dir, "model") # Load the parameters args = parser.parse_args() trgt_json_path = os.path.join(model_dir, 'params.json') assert os.path.isfile( trgt_json_path), "No json configuration file found at {}".format( trgt_json_path) params = utils.Params(trgt_json_path) params.data_dir = data_dir if data_dir else args.data_dir params.model_dir = model_dir if model_dir else args.model_dir # use GPU if available params.cuda = torch.cuda.is_available() # use GPU is available # Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) # Get the logger utils.set_logger(os.path.join(params.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") # load data data_loader = DataLoader(params.data_dir, params) data = data_loader.load_data_for_predict(input_data) batch_sentences = data["predict"]["data"] # compute length of longest sentence in batch batch_max_len = max([len(s) for s in batch_sentences]) # prepare a numpy array with the data, initialising the data with pad_ind and all labels with -1 # initialising labels to -1 differentiates tokens with tags from PADding tokens batch_data = data_loader.pad_ind * np.ones( (len(batch_sentences), batch_max_len)) # copy the data to the numpy array for j in range(len(batch_sentences)): cur_len = len(batch_sentences[j]) batch_data[j][:cur_len] = batch_sentences[j] logging.info("- done.") # Define the model model = net.Net(params).cuda() if params.cuda else net.Net(params) logging.info("Starting prediction") # Reload weights from the saved file utils.load_checkpoint( os.path.join(params.model_dir, args.restore_file + '.pth.tar'), model) # Evaluate results = predict(model, batch_data) return results
def Net(params): """ initialize a network object """ return net.Net(params)
# 设置随机种子来重构实验 torch.manual_seed(230) if args.cuda: print('Training on GPU!') torch.cuda.manual_seed(230) # 创建日志文件 utils.set_logger(os.path.join(args.model_dir, 'train.log')) logging.info("Loading the datasets...") # 获取数据 dataloaders = data_loader.fetch_dataloader(['train', 'val'], args) train_dl = dataloaders['train'] val_dl = dataloaders['val'] logging.info("- done.") # 定义网络结构与优化器 model = net.Net().cuda() if args.cuda else net.Net() optimizer = optim.Adam(model.parameters(), lr=args.learning_rate) # 定义损失函数与指标 loss_fn = net.loss_fn metrics = net.metrics # 训练与评估 logging.info("Starting training for {} epoch(s)".format(args.num_epochs)) train_and_evaluate(model, train_dl, val_dl, optimizer, loss_fn, metrics, args)
emb_path=args.embedding_file, emb_delimiter=' ', verbose=True) metric_labels = data_loader.metric_labels # relation labels to be evaluated # Load data train_data = data_loader.load_data('train') # Due to the small dataset, the test data is used as validation data! val_data = data_loader.load_data('test') # Specify the train and val dataset sizes params.train_size = train_data['size'] params.val_size = val_data['size'] logging.info("- done.") # Define the model and optimizer model = net.Net(data_loader, params) if params.optim_method == 'sgd': optimizer = optim.SGD(model.parameters(), lr=params.learning_rate, momentum=0.9, weight_decay=params.weight_decay) elif params.optim_method == 'adam': optimizer = optim.Adam(model.parameters(), lr=params.learning_rate, betas=(0.9, 0.999), weight_decay=params.weight_decay) else: raise ValueError("Unknown optimizer, must be one of 'sgd'/'adam'.") scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))
train_dl = data_loader.fetch_subset_dataloader('train', params) sub_train_dl = None else: train_dl, sub_train_dl = data_loader.fetch_dataloader('train', params) dev_dl, _ = data_loader.fetch_dataloader('dev', params) logging.info("- done.") """Based on the model_version, determine model/optimizer and KD training mode WideResNet and DenseNet were trained on multi-GPU; need to specify a dummy nn.DataParallel module to correctly load the model parameters """ last_time_model = None current_cycle = int(params.model_dir.split('_')[-1:][0]) if current_cycle != 1: last_time_model = net.Net(params).cuda() if params.cuda else net.Net( params) last_time_model_dir = '_'.join( params.model_dir.split('_')[:-1] + [f'{current_cycle - 1}']) utils.load_checkpoint(f'{last_time_model_dir}/best.pth.tar', last_time_model) if "distill" in params.model_version: # train a 5-layer CNN or a 18-layer ResNet with knowledge distillation if params.model_version == "cnn_distill": # model = mobilenetv3_small().cuda() if params.cuda else mobilenetv3_small() model = net.Net(params).cuda() if params.cuda else net.Net(params) optimizer = optim.Adam(model.parameters(), lr=params.learning_rate) if current_cycle == 1: loss_fn_kd = net.loss_fn_kd print("loss_fn_kd")
# load data train and validation data train_dataset = DoulingoDataset(data_params) weights = train_dataset.samples_weights(scale_factor=50) sampler = WeightedRandomSampler(weights=weights, num_samples=len(weights)) train_data = DataLoader(train_dataset, sampler=sampler, batch_size=model_params.batch_size) val_dataset = DoulingoDataset(data_params, split='val') val_data = DataLoader(val_dataset, shuffle=False) logging.info("- done.") # Define the model and optimizer model = net.Net(model_params) model = nn.DataParallel(model) if model_params.cuda: model = model.cuda() # model._reset_parameters() optimizer = AdamW(model.parameters(), lr=model_params.learning_rate, # args.learning_rate - default is 5e-5, our notebook had 2e-5 eps=model_params.adam_eps # args.adam_epsilon - default is 1e-8. ) # Create the learning rate scheduler. total_steps = len(train_data) * model_params.num_epochs scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=1, # Default value in run_glue.py num_training_steps=total_steps)
train_data = data['train'] val_data = data['val'] """ train_filename = 'subset.pkl' val_filename = 'foo.py' # specify the train and val dataset sizes """ params.train_size = train_data['size'] params.val_size = val_data['size'] """ logging.info("- done.") # Define the model and optimizer phrase_model = net.Net(params, True).cuda() if params.cuda else net.Net( params, True) vid_model = net.Net(params, False).cuda() if params.cuda else net.Net( params, False) phrase_optimizer = optim.Adam(phrase_model.parameters(), lr=params.learning_rate) vid_optimizer = optim.Adam(vid_model.parameters(), lr=params.learning_rate) # fetch loss function and metrics loss_fn = torch.nn.modules.loss.TripletMarginLoss() #metrics = net.metrics metrics = None # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(phrase_model, vid_model, train_filename, val_filename,
torch.cuda.manual_seed(230) # dataset data_path = os.path.join(args.dataset_path, 'questions.txt') text, idx2word, word2idx, word_freqs, word_counts = create_corpus( data_path) train_set = data_loader.DataLoader(text, word2idx, idx2word, word_freqs, word_counts) dataloader = torch_data.DataLoader(train_set, batch_size=params.batch_size, shuffle=True) print("corpus: {}".format(len(idx2word))) del text # model model = net.Net(vocab_size=len(idx2word), emb_size=params.embedding_dim).cuda().to(params.device) optimizer = optim.SGD(model.parameters(), lr=params.learning_rate) summary_writer = tensorboard.SummaryWriter( log_dir=args.tensorboard_log_dir) step = 0 start_epoch_id = 1 best_score = 0 print(model) # training for epoch_id in range(start_epoch_id, params.epochs + 1): print("Epoch {}/{}".format(epoch_id, params.epochs)) loss = 0 model.train() with tqdm(total=len(dataloader)) as t: for i, (input_labels, pos_labels,
import model.net as net import model.data_loader as data_loader import numpy as np import torch.nn as nn import torch if __name__ == '__main__': test_loader = data_loader.fetch_dataloader('/scidata/fruits-360/Test', batch_size=128) device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu') model = net.Net() model.load_state_dict(torch.load("model.pth")) model.to(device) model.eval() correct, total = 0, 0 with torch.no_grad(): for images, labels in test_loader: images, labels = images.to(device), labels.to(device) outputs = model(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() print('Accuracy on test data: {:.4f} %'.format(100 * correct / total))
assert os.path.isfile( json_path), "No json configuration file found at {}".format(json_path) params = utils.Params(json_path) if args.model == "resnet18": model = resnet.ResNet18() model_checkpoint = 'experiments/base_resnet18/best.pth.tar' elif args.model == "wrn": model = wrn.wrn(depth=28, num_classes=10, widen_factor=10, dropRate=0.3) model_checkpoint = 'experiments/base_wrn/best.pth.tar' elif args.model == "distill_resnext": model = resnet.ResNet18() model_checkpoint = 'experiments/resnet18_distill/resnext_teacher/best.pth.tar' elif args.model == "distill_densenet": model = resnet.ResNet18() model_checkpoint = 'experiments/resnet18_distill/densenet_teacher/best.pth.tar' elif args.model == "cnn": model = net.Net(params) model_checkpoint = 'experiments/cnn_distill/best.pth.tar' utils.load_checkpoint(model_checkpoint, model) model_size = count_parameters(model) print("Number of parameters in {} is: {}".format(args.model, model_size))
utils.set_logger(os.path.join(args.model_dir, 'evaluate.log')) # Create the input data pipeline logging.info("Creating the dataset...") # fetch dataloaders data_dir = config.get('data paths', 'data_dir') dataloaders = data_loader.fetch_dataloader(['test'], data_dir, params) test_dl = dataloaders['test'] logging.info("- done.") # Define the model input_channels = int(config.get('settings', 'input_channels')) output_channels = int(config.get('settings', 'output_channels')) model = net.Net(input_channels, output_channels).cuda() if params.cuda else net.Net( input_channels, output_channels) loss_fn = nn.CrossEntropyLoss() metrics = net.metrics logging.info("Starting evaluation") # Reload weights from the saved file utils.load_checkpoint( os.path.join(args.model_dir, args.restore_file + '.pth.tar'), model) # Evaluate test_metrics = evaluate(model, loss_fn, test_dl, metrics, params) save_path = os.path.join(args.model_dir, "metrics_test_{}.json".format(args.restore_file))
data_loader = DataLoader(args.data_dir, params) data = data_loader.load_data(['test'], args.data_dir) test_data = data['test'] # specify the test set size params.test_size = test_data['size'] test_data_iterator = data_loader.data_iterator(test_data, params) logging.info("- done.") # Load embeddings gen_emb = np.load(os.path.join(args.emb_dir, 'gen.npy')) domain_emb = np.load(os.path.join(args.emb_dir, 'domain.npy')) # Define the model model = net.Net(params, gen_emb, domain_emb).cuda() if params.cuda else net.Net(params, gen_emb, domain_emb) loss_fn = net.loss_fn metrics = net.metrics logging.info("Starting evaluation") # Reload weights from the saved file utils.load_checkpoint(os.path.join(args.model_dir, args.restore_file + '.pth.tar'), model) # Evaluate num_steps = (params.test_size + 1) // params.batch_size test_metrics = evaluate(model, loss_fn, test_data_iterator, metrics, params, num_steps) save_path = os.path.join(args.model_dir, "metrics_test_{}.json".format(args.restore_file)) utils.save_dict_to_json(test_metrics, save_path)
if __name__ == '__main__': args = parser.parse_args() dataloaders = data_loader.fetch_dataloader(data_dir=osp.join( os.environ['PWD'], args.data), batch_size=60, validation_split=0.5) train_dl = dataloaders['train'] test_dl = dataloaders['test'] print(len(train_dl), len(test_dl)) #model = net.Net().to('cuda') #model = torch.jit.script(net.Net(7, 3)).to('cuda') # [px, py, pt, eta, d0, dz, mass], [pdgid, charge, fromPV] model = net.Net(8, 3).to('cuda') #optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=5e-3) optimizer = torch.optim.AdamW(model.parameters(), lr=0.001) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.5, patience=500, threshold=0.05) first_epoch = 0 best_validation_loss = 10e7 deltaR = 0.4 loss_fn = net.loss_fn metrics = net.metrics model_dir = osp.join(os.environ['PWD'], args.ckpts)
def main(stride): logger = logging.getLogger('DeepAR.Train') arg = {'model_name' : f'base_stock_stride={stride}', 'data_folder' : 'data', 'dataset': 'stock', 'relative_metrics' : 0, 'sampling' : 0, 'restore_file' : None, 'save_best' : 0, 'generate_features' : 0, 'default_base' : 1, 'save_directory' : 'stock', 'stride_size' : 8 } train_files, test_files = prepare_data_main(stride, arg) model_dir = os.path.join('experiments', arg['model_name']) json_path = os.path.join(model_dir, 'params.json') data_dir = os.path.join(arg['data_folder'], arg['dataset']) assert os.path.isfile(json_path), f'No json configuration file found at {json_path}' params = utils.Params(json_path) params.relative_metrics = arg['relative_metrics'] params.sampling = arg['sampling'] params.model_dir = model_dir params.plot_dir = os.path.join(model_dir, 'figures') # create missing directories try: os.mkdir(params.plot_dir) except FileExistsError: pass utils.set_logger(os.path.join(model_dir, 'train.log')) # use GPU if available cuda_exist = torch.cuda.is_available() # Set random seeds for reproducible experiments if necessary if cuda_exist: params.device = torch.device('cuda') # torch.cuda.manual_seed(240) logger.info('Using Cuda...') model = net.Net(params).cuda() else: params.device = torch.device('cpu') # torch.manual_seed(230) logger.info('Not using cuda...') model = net.Net(params) torch.manual_seed(777) torch.cuda.manual_seed(777) np.random.seed(777) logger.info('Loading the datasets...') train_set = TrainDataset(data_dir, arg['dataset'], params.num_class, data = train_files[0], label = train_files[-1]) test_set = TestDataset(data_dir, arg['dataset'], params.num_class, data = test_files[0], v = test_files[1], label = test_files[-1]) sampler = WeightedSampler(data_dir, arg['dataset'], v = train_files[1]) # Use weighted sampler instead of random sampler train_loader = DataLoader(train_set, batch_size=params.batch_size, sampler=sampler, num_workers=4) test_loader = DataLoader(test_set, batch_size=params.predict_batch, sampler=RandomSampler(test_set), num_workers=4) logger.info('Loading complete.') logger.info(f'Model: \n{str(model)}') optimizer = optim.Adam(model.parameters(), lr=params.learning_rate) # fetch loss function loss_fn = net.loss_fn # Train the model logger.info('Starting training for {} epoch(s) with stride_size {}'.format(params.num_epochs, stride)) train_and_evaluate(model, train_loader, test_loader, optimizer, loss_fn, params, arg['restore_file'], arg) logger.info(f'Finished processing {stride}') return True
# fetch dataloaders, considering full-set vs. sub-set scenarios if params.subset_percent < 1.0: train_dl = data_loader.fetch_subset_dataloader('train', params) else: train_dl = data_loader.fetch_dataloader('train', params, os.path.join(args.data_dir)) dev_dl = data_loader.fetch_dataloader('dev', params, os.path.join(args.data_dir)) logging.info("- done.") """Based on the model_version, determine model/optimizer and KD training mode WideResNet and DenseNet were trained on multi-GPU; need to specify a dummy nn.DataParallel module to correctly load the model parameters """ if args.model_student == '3CNN': model = net.Net_3CNN(params).cuda() if params.cuda else net.Net(params) print("Model student 3CNN is selected") elif args.model_student == '5CNN': model = net.Net_5CNN(params).cuda() if params.cuda else net.Net(params) print("Model student 5CNN is selected") elif args.model_student == '7CNN': model = net.Net_7CNN(params).cuda() if params.cuda else net.Net(params) print("Model student 7CNN is selected") else: model = net.Net_3CNN(params).cuda() if params.cuda else net.Net(params) print("Model student 3CNN is selected by default") optimizer = optim.Adam(model.parameters(), lr=params.learning_rate) # fetch loss function and metrics definition in model files loss_fn_kd = net.loss_fn_kd metrics = net.metrics if args.mode == 'B'
for model_dir in params.integrate_model: model_paths.append(os.path.join('experiments', model_dir)) else: current_cycle = int(args.model_dir.split('_')[-1:][0]) if not current_cycle == 1: for i in range(1, current_cycle): model_paths.append( '_'.join(args.model_dir.split('_')[:-1] + [f'{i}'])) model_paths.sort() logging.info(f"model_paths: \n{model_paths}") model_e_lambdas = {} for model_path in model_paths: specified_params = utils.Params(os.path.join(model_path, 'params.json')) model = net.Net(specified_params).cuda() if params.cuda else net.Net( specified_params) utils.load_checkpoint(os.path.join(model_path, 'best.pth.tar'), model) model.eval() model_e_lambdas[model] = specified_params.e_lambda logging.info("Starting evaluation...") test_metrics = evaluate(model_e_lambdas, net.loss_fn, dev_dl, net.metrics, params) save_path = os.path.join(model_path, 'metrics_test_best.json') for i in range(1, 6): save_path = os.path.join(model_path, f'metrics_test_best_{i}.json') if not os.path.exists(save_path): break
model_dir = os.path.join(path, 'result', data_set, 'forecasting_acorn', 'deepar', f'times_{times}') if not os.path.exists(model_dir): os.makedirs(model_dir) params.model_dir = os.path.join( model_dir, f'n_clusters_{n_clusters}_month_{month}.pth.tar') # use GPU if available cuda_exist = torch.cuda.is_available() # Set random seeds for reproducible experiments if necessary if cuda_exist: params.device = torch.device('cuda') # torch.cuda.manual_seed(240) model = net.Net(params).cuda() else: params.device = torch.device('cpu') # torch.manual_seed(230) model = net.Net(params) # dataset train_set = TrainDataset(train_x_input, train_label) test_set = TestDataset(test_x_input, test_v_input, test_label) val_set = TestDataset(val_x_input, val_v_input, val_label) # sampler train_sampler = WeightedSampler( train_v_input ) # Use weighted sampler instead of random sampler
last_json_path = os.path.join(args.model_dir, 'metrics_test_last_weights.json') utils.save_dict_to_json(test_metrics, last_json_path) if __name__ == '__main__': # Load the parameters from json file args = parser.parse_args() model_dir = args.model_dir if args.use_gpu: torch.cuda.manual_seed(42) logger.info('Using Cuda...') model = net.Net(args.feature_dim, args.proj_dim, args.hidden_dim, args.num_layers_lstm).cuda(args.gpu_num) args.device = torch.device(f'cuda:{args.gpu_num}') else: logger.info('Not using cuda...') model = net.Net(args.feature_dim, args.proj_dim, args.hidden_dim, args.num_layers_lstm) args.device = torch.device(f'cpu') utils.set_logger(os.path.join(args.model_dir, 'train.log')) logger.info('Loading the datasets...') train_set = MyTrainDataset(args.train_data) test_set = MyTestDataset(args.val_data) train_loader = DataLoader(train_set, batch_size=args.batch_size, sampler=RandomSampler(train_set),
# Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) # Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders dataloaders = data_loader.fetch_dataloader(['train', 'val'], args.data_dir, params) train_dl = dataloaders['train'] val_dl = dataloaders['val'] logging.info("- done.") # Define the model and optimizer model = net.Net(params).cuda() if params.cuda else net.Net(params) optimizer = optim.Adam(model.parameters(), lr=params.learning_rate) # fetch loss function and metrics loss_fn = net.loss_fn metrics = net.metrics # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(model, train_dl, val_dl, optimizer, loss_fn, metrics, params, args.model_dir, args.restore_file)
params.cuda = torch.cuda.is_available() # Set the random seed for reproducible experiments torch.manual_seed(230) if params.cuda: torch.cuda.manual_seed(230) # Set the logger utils.set_logger(os.path.join(args.model_dir, 'train.log')) # Create the input data pipeline logging.info("Loading the datasets...") # fetch dataloaders dataloaders = data_loader.fetch_dataloader(['train', 'val'], args.data_dir, params) train_dl = dataloaders['train'] val_dl = dataloaders['val'] num_features = train_dl.dataset.num_input_features logging.info("Training on {} examples, each with {} features.".format(len(train_dl.dataset), num_features)) # Define the model and optimizer model = net.Net(params, num_features).cuda() if params.cuda else net.Net(params, num_features) optimizer = optim.Adam(model.parameters(), lr=params.learning_rate) # fetch loss function and metrics loss_fn = net.loss_fn metrics = net.metrics # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(model, train_dl, val_dl, optimizer, loss_fn, metrics, params, args.model_dir, args.restore_file)