def create_trainer(): model = Baseline(bert_vocab_num=24000, emb_dim=300, hidden_dim=256, output_dim=3).to(device) optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3) criterion = torch.nn.CrossEntropyLoss() # criterion = FocalLoss(num_classes=3) trainer = Trainer(model, optimizer, criterion, NUM_EPOCH, device) return trainer
def main(config): setup_logging(os.getcwd()) logger = logging.getLogger('test') use_gpu = config['n_gpu'] > 0 and torch.cuda.is_available() device = torch.device('cuda:0' if use_gpu else 'cpu') datamanager = DataManger(config['data'], phase='test') model = Baseline(num_classes=datamanager.datasource.get_num_classes( 'train'), is_training=False) model = model.eval() logger.info('Loading checkpoint: {} ...'.format(config['resume'])) checkpoint = torch.load(config['resume'], map_location='cpu') model.load_state_dict(checkpoint['state_dict']) if config['extract']: logger.info('Extract feature from query set...') query_feature, query_label = feature_extractor( model, datamanager.get_dataloader('query'), device) logger.info('Extract feature from gallery set...') gallery_feature, gallery_label = feature_extractor( model, datamanager.get_dataloader('gallery'), device) gallery_embeddings = (gallery_feature, gallery_label) query_embeddings = (query_feature, query_label) os.makedirs(config['testing']['ouput_dir'], exist_ok=True) with open(os.path.join(config['testing']['ouput_dir'], 'gallery_embeddings.pt'), 'wb') as f: torch.save(gallery_embeddings, f) with open(os.path.join(config['testing']['ouput_dir'], 'query_embeddings.pt'), 'wb') as f: torch.save(query_embeddings, f) gallery_feature, gallery_label = torch.load(os.path.join( config['testing']['ouput_dir'], 'gallery_embeddings.pt'), map_location='cpu') query_feature, query_label = torch.load(os.path.join( config['testing']['ouput_dir'], 'query_embeddings.pt'), map_location='cpu') distance = compute_distance_matrix(query_feature, gallery_feature) top1 = top_k(distance, output=gallery_label, target=query_label, k=1) top5 = top_k(distance, output=gallery_label, target=query_label, k=5) top10 = top_k(distance, output=gallery_label, target=query_label, k=10) m_ap = mAP(distance, output=gallery_label, target=query_label, k='all') logger.info('Datasets: {}, without spatial-temporal: top1: {}, top5: {}, top10: {}, mAP: {}'.format( datamanager.datasource.get_name_dataset(), top1, top5, top10, m_ap))
def get_model(opts): """ ---------------------------------------------------------------------------------------------------------------- Test(id=Baseline.SGD.CosineAnnealingLR.CIFAR10.1000.512.01, loss=1.6240, mA=0.8682) ✡ Test(id=SEBaseline.SGD.CosineAnnealingLR.CIFAR10.1000.512.01, loss=1.6408, mA=0.8717) ✡ ---------------------------------------------------------------------------------------------------------------- ✡ Test(id=Baseline.SGD.OneCycleLR.CIFAR10.1000.512.01, loss=1.6121, mA=0.8697) ✡ Test(id=SEBaseline.SGD.OneCycleLR.CIFAR10.1000.512.01, loss=1.6210, mA=0.8695) Test(id=AABaseline.SGD.OneCycleLR.CIFAR10.1000.2048.01, loss=1.6297, mA=0.8625) TODO Test(id=SASABaseline.AdamW.OneCycleLR.CIFAR10.300.128.001, loss=1.6930, mA=0.8438) ---------------------------------------------------------------------------------------------------------------- Test(id=SimpleResNet56.SGD.CosineAnnealingWarmRestarts.CIFAR10.1000.256.01, loss=1.5356, mA=0.9205) ✡ Test(id=SimpleSEResNet56.SGD.CosineAnnealingWarmRestarts.CIFAR10.1000.256.01, loss=1.5866, mA=0.9238) ✡ ---------------------------------------------------------------------------------------------------------------- Test(id=SimpleResNet56.SGD.CosineAnnealingLR.CIFAR10.1000.256.01, loss=1.5238, mA=0.9273) ✡ Test(id=SimpleSEResNet56.SGD.CosineAnnealingLR.CIFAR10.1000.512.01, loss=1.5145, mA=0.9353) ✡ Test(id=SimpleStdAAResNet56.SGD.CosineAnnealingLR.CIFAR10.1000.512.01, loss=1.5243, mA=0.9275) ---------------------------------------------------------------------------------------------------------------- Test(id=SimpleResNet56.SGD.OneCycleLR.CIFAR10.1000.256.01, loss=1.5239, mA=0.9254) ✡ Test(id=SimpleSEResNet56.SGD.OneCycleLR.CIFAR10.1000.512.01, loss=1.5160, mA=0.9356) ✡ Test(id=SimpleOrigAAResNet56.SGD.OneCycleLR.CIFAR10.1000.512.01, loss=1.5261, mA=0.9138) Test(id=SimpleStdAAResNet56.SGD.OneCycleLR.CIFAR10.1000.512.01, loss=1.5381, mA=0.9265) Test(id=SimpleSASAResNet56.AdamW.OneCycleLR.CIFAR10.300.256.01, loss=0.8838, mA=0.8457) ---------------------------------------------------------------------------------------------------------------- ✡ Test(id=SimpleResNet110.SGD.CosineAnnealingLR.CIFAR10.1000.1024.01, loss=1.5177, mA=0.9336) ✡ Test(id=SimpleSEResNet110.SGD.CosineAnnealingLR.CIFAR10.1000.1024.01, loss=1.5231, mA=0.9319) ---------------------------------------------------------------------------------------------------------------- Test(id=ResNet50.SGD.CosineAnnealingLR.CIFAR10.1000.128.01, loss=1.5777, mA=0.9244) ✡ Test(id=ResNet50.AdamW.OneCycleLR.CIFAR10.300.128.005, loss=1.5237, mA=0.9395) ✡ Test(id=SEResNet50.SGD.CosineAnnealingLR.CIFAR10.1000.128.01, loss=1.5243, mA=0.9156) Test(id=AAResNet50.SGD.OneCycleLR.CIFAR10.1000.256.01, loss=1.5223, mA=0.9072) ---------------------------------------------------------------------------------------------------------------- """ return { 'Baseline': lambda: Baseline(opts), 'SEBaseline': lambda: SEBaseline(opts), 'AABaseline': lambda: AABaseline(opts), 'SASABaseline': lambda: SASABaseline(stem=False), 'SASAStemBaseline': lambda: SASABaseline(stem=True), 'SimpleChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=True, mode='none'), 'ComplexChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=False, mode='none'), 'SkipSimpleChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=True, mode='skip'), 'SkipComplexChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=False, mode='skip'), 'ScaleSimpleChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=True, mode='scale'), 'ScaleComplexChannelAttnBaseline': lambda: ChannelAttnBaseline(simple=False, mode='scale'), 'SimpleResNet56': lambda: SimpleResNet(n=9), 'SimpleResNet110': lambda: SimpleResNet(n=18), 'SimpleSEResNet56': lambda: SimpleSEResNet(n=9), 'SimpleSEResNet110': lambda: SimpleSEResNet(n=18), 'SimpleOrigAAResNet56': lambda: SimpleAAResNet(n=9, original=True), 'SimpleStdAAResNet56': lambda: SimpleAAResNet(n=9, original=False), 'SimpleSASAResNet56': lambda: SimpleSASAResNet(n=9, stem=False), 'SimpleStemSASAResNet56': lambda: SimpleSASAResNet(n=9, stem=True), 'ResNet50': lambda: ResNet(sizes=[3, 4, 6, 3]), 'SEResNet50': lambda: SEResNet(sizes=[3, 4, 6, 3]), 'AAResNet50': lambda: AAResNet(sizes=[3, 4, 6, 3]), 'ResNet101': lambda: ResNet(sizes=[3, 4, 23, 3]), 'SEResNet101': lambda: SEResNet(sizes=[3, 4, 23, 3]), }[opts.model_name]()
def train_model(data_pack, num_epochs, learning_rate, num_words, dim_embedding, num_classes, model_name): train_X, train_y, valid_X, valid_y, test_X, test_y = data_pack if model_name == "Baseline-BoW": model = Bag_of_Words(num_words, num_classes) elif model_name == "Baseline-AvEmbedding": model = Baseline(num_words, dim_embedding, num_classes) elif model_name == "Shallow-CNN": n_filters = [40, 40] model = CNN(num_words, dim_embedding, num_classes, n_filters) elif model_name == "Deep-CNN": n_filters = [40, 48, 72, 48] model = CNN_Deep(num_words, dim_embedding, num_classes, n_filters) elif model_name == "Shallow-LSTM": memory_size = 100 model = LSTM(num_words, dim_embedding, num_classes, memory_size) elif model_name == "Deep-LSTM": memory_size = 100 model = LSTM_Deep(num_words, dim_embedding, num_classes, memory_size) elif model_name == "Shallow-CNN-CE": n_filters = [40, 40] model = CE_CNN(dim_embedding, num_classes, n_filters) elif model_name == "Deep-CNN-CE": n_filters = [40, 48, 72, 48] model = CE_CNN_Deep(dim_embedding, num_classes, n_filters) elif model_name == "Block-CNN-CE": n_filters = [64, 128, 256, 512] model = CE_CNN_Block(dim_embedding, num_classes, n_filters) elif model_name == "ResNet-CE": n_filters = [64, 128, 256, 512] model = CE_ResNet(dim_embedding, num_classes, n_filters) model.cuda() # n_filters = [15, 20, 40] # model = CNN_Deep(num_words, dim_embedding, num_classes, n_filters) max_train, max_val, max_test = 0, 0, 0 min_train, min_val, min_test = 10, 10, 10 model = torch.load(model_name + ".pt") model.cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate) model.eval() criterion = torch.nn.CrossEntropyLoss() a = [] batch_x_one = torch.FloatTensor(batch_size, test_X[0].shape[1], dim_embedding) t_acc, output_results = run_example_set(model, criterion, test_X, test_y, batch_x_one=batch_x_one) print(output_results) results = open("results/example_set_prediction.txt", "w") for e in output_results: results.write(str(e) + "\n") print(str(t_acc))
def run(dataset_train, dataset_dev, dataset_test, model_type, word_embed_size, hidden_size, batch_size, use_cuda, n_epochs): if model_type == 'base': model = Baseline(vocab=dataset_train.vocab, word_embed_size=word_embed_size, hidden_size=hidden_size, use_cuda=use_cuda, inference=False) else: raise NotImplementedError if use_cuda: model = model.cuda() optim_params = model.parameters() optimizer = optim.Adam(optim_params, lr=10**-3) print('start training') for epoch in range(n_epochs): train_loss, tokens, preds, golds = train(dataset_train, model, optimizer, batch_size, epoch, Phase.TRAIN, use_cuda) dev_loss, tokens, preds, golds = train(dataset_dev, model, optimizer, batch_size, epoch, Phase.DEV, use_cuda) logger = '\t'.join([ 'epoch {}'.format(epoch + 1), 'TRAIN Loss: {:.9f}'.format(train_loss), 'DEV Loss: {:.9f}'.format(dev_loss) ]) print('\r' + logger, end='') test_loss, tokens, preds, golds = train(dataset_test, model, optimizer, batch_size, epoch, Phase.TEST, use_cuda) print('====', 'TEST', '=====') print_scores(preds, golds) output_results(tokens, preds, golds)
def embedding_baseline_test(args): chosen_params = dict(params) results = [] for rand_emb in [True, False]: chosen_params['rand_emb'] = rand_emb train_dataset, valid_dataset, test_dataset = data.load_dataset( args.train_batch_size, args.test_batch_size, min_freq=chosen_params['min_freq']) embedding = data.generate_embedding_matrix( train_dataset.dataset.text_vocab, rand=chosen_params['rand_emb'], freeze=chosen_params['freeze']) result = {} for m in ['baseline', 'rnn']: if m == 'rnn': model = RNN(embedding, chosen_params) else: model = Baseline(embedding) criterion = nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) for epoch in range(args.epochs): print(f'******* epoch: {epoch} *******') train(model, train_dataset, optimizer, criterion, args) evaluate(model, valid_dataset, criterion, 'Validation') acc, f1 = evaluate(model, test_dataset, criterion, 'Test') result[m + '_acc_rand_emb' + str(rand_emb)] = acc result[m + '_f1_rand_emb' + str(rand_emb)] = f1 results.append(result) with open(os.path.join(SAVE_DIR, 'embedding_baseline.txt'), 'a') as f: for res in results: print(res, file=f)
def main(args): # Set up logging args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False) log = util.get_logger(args.save_dir, args.name) log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) # Get model log.info('Building model...') nbr_model = 0 if (args.load_path_baseline): model_baseline = Baseline(word_vectors=word_vectors, hidden_size=100) model_baseline = nn.DataParallel(model_baseline, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_baseline}...') model_baseline = util.load_model(model_baseline, args.load_path_baseline, gpu_ids, return_step=False) model_baseline = model_baseline.to(device) model_baseline.eval() nll_meter_baseline = util.AverageMeter() nbr_model += 1 save_prob_baseline_start = [] save_prob_baseline_end = [] if (args.load_path_bidaf): model_bidaf = BiDAF(word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size) model_bidaf = nn.DataParallel(model_bidaf, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_bidaf}...') model_bidaf = util.load_model(model_bidaf, args.load_path_bidaf, gpu_ids, return_step=False) model_bidaf = model_bidaf.to(device) model_bidaf.eval() nll_meter_bidaf = util.AverageMeter() nbr_model += 1 save_prob_bidaf_start = [] save_prob_bidaf_end = [] if (args.load_path_bidaf_fusion): model_bidaf_fu = BiDAF_fus(word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size) model_bidaf_fu = nn.DataParallel(model_bidaf_fu, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_bidaf_fusion}...') model_bidaf_fu = util.load_model(model_bidaf_fu, args.load_path_bidaf_fusion, gpu_ids, return_step=False) model_bidaf_fu = model_bidaf_fu.to(device) model_bidaf_fu.eval() nll_meter_bidaf_fu = util.AverageMeter() nbr_model += 1 save_prob_bidaf_fu_start = [] save_prob_bidaf_fu_end = [] if (args.load_path_qanet): model_qanet = QANet(word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size, n_heads=args.n_heads, n_conv_emb_enc=args.n_conv_emb, n_conv_mod_enc=args.n_conv_mod, n_emb_enc_blocks=args.n_emb_blocks, n_mod_enc_blocks=args.n_mod_blocks, divisor_dim_kqv=args.divisor_dim_kqv) model_qanet = nn.DataParallel(model_qanet, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_qanet}...') model_qanet = util.load_model(model_qanet, args.load_path_qanet, gpu_ids, return_step=False) model_qanet = model_qanet.to(device) model_qanet.eval() nll_meter_qanet = util.AverageMeter() nbr_model += 1 save_prob_qanet_start = [] save_prob_qanet_end = [] if (args.load_path_qanet_old): model_qanet_old = QANet_old(word_vectors=word_vectors, char_vectors=char_vectors, device=device, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size, n_heads=args.n_heads, n_conv_emb_enc=args.n_conv_emb, n_conv_mod_enc=args.n_conv_mod, n_emb_enc_blocks=args.n_emb_blocks, n_mod_enc_blocks=args.n_mod_blocks) model_qanet_old = nn.DataParallel(model_qanet_old, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_qanet_old}...') model_qanet_old = util.load_model(model_qanet_old, args.load_path_qanet_old, gpu_ids, return_step=False) model_qanet_old = model_qanet_old.to(device) model_qanet_old.eval() nll_meter_qanet_old = util.AverageMeter() nbr_model += 1 save_prob_qanet_old_start = [] save_prob_qanet_old_end = [] if (args.load_path_qanet_inde): model_qanet_inde = QANet_independant_encoder( word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size, n_heads=args.n_heads, n_conv_emb_enc=args.n_conv_emb, n_conv_mod_enc=args.n_conv_mod, n_emb_enc_blocks=args.n_emb_blocks, n_mod_enc_blocks=args.n_mod_blocks, divisor_dim_kqv=args.divisor_dim_kqv) model_qanet_inde = nn.DataParallel(model_qanet_inde, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_qanet_inde}...') model_qanet_inde = util.load_model(model_qanet_inde, args.load_path_qanet_inde, gpu_ids, return_step=False) model_qanet_inde = model_qanet_inde.to(device) model_qanet_inde.eval() nll_meter_qanet_inde = util.AverageMeter() nbr_model += 1 save_prob_qanet_inde_start = [] save_prob_qanet_inde_end = [] if (args.load_path_qanet_s_e): model_qanet_s_e = QANet_S_E(word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size, n_heads=args.n_heads, n_conv_emb_enc=args.n_conv_emb, n_conv_mod_enc=args.n_conv_mod, n_emb_enc_blocks=args.n_emb_blocks, n_mod_enc_blocks=args.n_mod_blocks, divisor_dim_kqv=args.divisor_dim_kqv) model_qanet_s_e = nn.DataParallel(model_qanet_s_e, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_qanet_s_e}...') model_qanet_s_e = util.load_model(model_qanet_s_e, args.load_path_qanet_s_e, gpu_ids, return_step=False) model_qanet_s_e = model_qanet_s_e.to(device) model_qanet_s_e.eval() nll_meter_qanet_s_e = util.AverageMeter() nbr_model += 1 save_prob_qanet_s_e_start = [] save_prob_qanet_s_e_end = [] # Get data loader log.info('Building dataset...') record_file = vars(args)[f'{args.split}_record_file'] dataset = SQuAD(record_file, args.use_squad_v2) data_loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Evaluate log.info(f'Evaluating on {args.split} split...') pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission eval_file = vars(args)[f'{args.split}_eval_file'] with open(eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) batch_size = cw_idxs.size(0) y1, y2 = y1.to(device), y2.to(device) l_p1, l_p2 = [], [] # Forward if (args.load_path_baseline): log_p1_baseline, log_p2_baseline = model_baseline( cw_idxs, cc_idxs) loss_baseline = F.nll_loss(log_p1_baseline, y1) + F.nll_loss( log_p2_baseline, y2) nll_meter_baseline.update(loss_baseline.item(), batch_size) l_p1 += [log_p1_baseline.exp()] l_p2 += [log_p2_baseline.exp()] if (args.save_probabilities): save_prob_baseline_start += [ log_p1_baseline.exp().detach().cpu().numpy() ] save_prob_baseline_end += [ log_p2_baseline.exp().detach().cpu().numpy() ] if (args.load_path_qanet): log_p1_qanet, log_p2_qanet = model_qanet( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_qanet = F.nll_loss(log_p1_qanet, y1) + F.nll_loss( log_p2_qanet, y2) nll_meter_qanet.update(loss_qanet.item(), batch_size) # Get F1 and EM scores l_p1 += [log_p1_qanet.exp()] l_p2 += [log_p2_qanet.exp()] if (args.save_probabilities): save_prob_qanet_start += [ log_p1_qanet.exp().detach().cpu().numpy() ] save_prob_qanet_end += [ log_p2_qanet.exp().detach().cpu().numpy() ] if (args.load_path_qanet_old): log_p1_qanet_old, log_p2_qanet_old = model_qanet_old( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_qanet_old = F.nll_loss(log_p1_qanet_old, y1) + F.nll_loss( log_p2_qanet_old, y2) nll_meter_qanet_old.update(loss_qanet_old.item(), batch_size) # Get F1 and EM scores l_p1 += [log_p1_qanet_old.exp()] l_p2 += [log_p2_qanet_old.exp()] if (args.save_probabilities): save_prob_qanet_old_start += [ log_p1_qanet_old.exp().detach().cpu().numpy() ] save_prob_qanet_old_end += [ log_p2_qanet_old.exp().detach().cpu().numpy() ] if (args.load_path_qanet_inde): log_p1_qanet_inde, log_p2_qanet_inde = model_qanet_inde( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_qanet_inde = F.nll_loss( log_p1_qanet_inde, y1) + F.nll_loss(log_p2_qanet_inde, y2) nll_meter_qanet_inde.update(loss_qanet_inde.item(), batch_size) # Get F1 and EM scores l_p1 += [log_p1_qanet_inde.exp()] l_p2 += [log_p2_qanet_inde.exp()] if (args.save_probabilities): save_prob_qanet_inde_start += [ log_p1_qanet_inde.exp().detach().cpu().numpy() ] save_prob_qanet_inde_end += [ log_p2_qanet_inde.exp().detach().cpu().numpy() ] if (args.load_path_qanet_s_e): log_p1_qanet_s_e, log_p2_qanet_s_e = model_qanet_s_e( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_qanet_s_e = F.nll_loss(log_p1_qanet_s_e, y1) + F.nll_loss( log_p2_qanet_s_e, y2) nll_meter_qanet_s_e.update(loss_qanet_s_e.item(), batch_size) # Get F1 and EM scores l_p1 += [log_p1_qanet_s_e.exp()] l_p2 += [log_p2_qanet_s_e.exp()] if (args.save_probabilities): save_prob_qanet_s_e_start += [ log_p1_qanet_s_e.exp().detach().cpu().numpy() ] save_prob_qanet_s_e_end += [ log_p2_qanet_s_e.exp().detach().cpu().numpy() ] if (args.load_path_bidaf): log_p1_bidaf, log_p2_bidaf = model_bidaf( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_bidaf = F.nll_loss(log_p1_bidaf, y1) + F.nll_loss( log_p2_bidaf, y2) nll_meter_bidaf.update(loss_bidaf.item(), batch_size) l_p1 += [log_p1_bidaf.exp()] l_p2 += [log_p2_bidaf.exp()] if (args.save_probabilities): save_prob_bidaf_start += [ log_p1_bidaf.exp().detach().cpu().numpy() ] save_prob_bidaf_end += [ log_p2_bidaf.exp().detach().cpu().numpy() ] if (args.load_path_bidaf_fusion): log_p1_bidaf_fu, log_p2_bidaf_fu = model_bidaf_fu( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_bidaf_fu = F.nll_loss(log_p1_bidaf_fu, y1) + F.nll_loss( log_p2_bidaf_fu, y2) nll_meter_bidaf_fu.update(loss_bidaf_fu.item(), batch_size) l_p1 += [log_p1_bidaf_fu.exp()] l_p2 += [log_p2_bidaf_fu.exp()] if (args.save_probabilities): save_prob_bidaf_fu_start += [ log_p1_bidaf_fu.exp().detach().cpu().numpy() ] save_prob_bidaf_fu_end += [ log_p2_bidaf_fu.exp().detach().cpu().numpy() ] p1, p2 = l_p1[0], l_p2[0] for i in range(1, nbr_model): p1 += l_p1[i] p2 += l_p2[i] p1 /= nbr_model p2 /= nbr_model starts, ends = util.discretize(p1, p2, args.max_ans_len, args.use_squad_v2) # Log info progress_bar.update(batch_size) if args.split != 'test': # No labels for the test set, so NLL would be invalid if (args.load_path_qanet): progress_bar.set_postfix(NLL=nll_meter_qanet.avg) elif (args.load_path_bidaf): progress_bar.set_postfix(NLL=nll_meter_bidaf.avg) elif (args.load_path_bidaf_fusion): progress_bar.set_postfix(NLL=nll_meter_bidaf_fu.avg) elif (args.load_path_qanet_old): progress_bar.set_postfix(NLL=nll_meter_qanet_old.avg) elif (args.load_path_qanet_inde): progress_bar.set_postfix(NLL=nll_meter_qanet_inde.avg) elif (args.load_path_qanet_s_e): progress_bar.set_postfix(NLL=nll_meter_qanet_s_e.avg) else: progress_bar.set_postfix(NLL=nll_meter_baseline.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), args.use_squad_v2) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) if (args.save_probabilities): if (args.load_path_baseline): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_baseline_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_baseline_end, fp) if (args.load_path_bidaf): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_bidaf_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_bidaf_end, fp) if (args.load_path_bidaf_fusion): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_bidaf_fu_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_bidaf_fu_end, fp) if (args.load_path_qanet): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_end, fp) if (args.load_path_qanet_old): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_old_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_old_end, fp) if (args.load_path_qanet_inde): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_inde_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_inde_end, fp) if (args.load_path_qanet_s_e): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_s_e_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_s_e_end, fp) # Log results (except for test set, since it does not come with labels) if args.split != 'test': results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2) if (args.load_path_qanet): meter_avg = nll_meter_qanet.avg elif (args.load_path_bidaf): meter_avg = nll_meter_bidaf.avg elif (args.load_path_bidaf_fusion): meter_avg = nll_meter_bidaf_fu.avg elif (args.load_path_qanet_inde): meter_avg = nll_meter_qanet_inde.avg elif (args.load_path_qanet_s_e): meter_avg = nll_meter_qanet_s_e.avg elif (args.load_path_qanet_old): meter_avg = nll_meter_qanet_old.avg else: meter_avg = nll_meter_baseline.avg results_list = [('NLL', meter_avg), ('F1', results['F1']), ('EM', results['EM'])] if args.use_squad_v2: results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'{args.split.title()} {results_str}') # Log to TensorBoard tbx = SummaryWriter(args.save_dir) util.visualize(tbx, pred_dict=pred_dict, eval_path=eval_file, step=0, split=args.split, num_visuals=args.num_visuals) # Write submission file sub_path = join(args.save_dir, args.split + '_' + args.sub_file) log.info(f'Writing submission file to {sub_path}...') with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh: csv_writer = csv.writer(csv_fh, delimiter=',') csv_writer.writerow(['Id', 'Predicted']) for uuid in sorted(sub_dict): csv_writer.writerow([uuid, sub_dict[uuid]])
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1024, num_workers=0, shuffle=True) return train_loader if __name__ == "__main__": device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Assuming that we are on a CUDA machine, this should print a CUDA device: epochs = 100 best_val_loss = 999999 print(device) net = Baseline() net.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.005, momentum=0.9) for epoch in range(epochs): with tqdm(total=len(load_dataset(train_path))) as epoch_pbar: epoch_pbar.set_description(f'Epoch {epoch}') running_loss = 0.0 running_val_loss = 0.0 for i, data in enumerate(load_dataset(train_path)): # get the inputs; data is a list of [inputs, labels] inputs = data[0].to(device) labels = data[1].to(device) outputs = net(inputs) loss = criterion(outputs, labels) running_loss += loss.item()
if __name__ == '__main__': parser = Flags() parser.set_arguments() FG = parser.parse_args() rf = str(FG.cur_fold) vis = Visdom(port=FG.vis_port, env=FG.model + '_fold' + rf) report = parser.report(end='<br>') vis.text(report, win='report f{}'.format(FG.cur_fold)) torch.cuda.set_device(FG.devices[0]) device = torch.device(FG.devices[0]) net = Baseline(FG.ckpt_dir, len(FG.labels)) # net = Baseline3D(FG.ckpt_dir, len(FG.labels)) if len(FG.devices) > 1: net = torch.nn.DataParallel(net, device_ids=FG.devices) print(net.module) else: print(net) optimizer = Adam(net.parameters(), lr=FG.lr, weight_decay=FG.l2_decay) scheduler = ExponentialLR(optimizer, gamma=FG.lr_gamma) trainloader, testloader = get_dataloader(k=FG.fold, cur_fold=FG.cur_fold, modality=FG.modality, axis=FG.axis,
def main(args): MaxEpochs = args.epochs lr = args.lr batchsize = args.batch_size curr_model = args.model # 3.2 Processing of the data TEXT = data.Field(sequential=True, include_lengths=True, tokenize='spacy') LABEL = data.Field(sequential=False, use_vocab=False) train, val, test = data.TabularDataset.splits( path='/Users/jiyun/PycharmProjects/mie324/assign4', train='train.tsv', validation='validation.tsv', test='test.tsv', format='tsv', skip_header=True, fields=[('text', TEXT), ('label', LABEL)]) train_iter, val_iter, test_iter = data.BucketIterator.splits( datasets=(train, val, test), sort_key=lambda x: len(x.text), sort_within_batch=True, repeat=False, batch_sizes=(batchsize, batchsize, batchsize), device=-1) # train_iter, val_iter, test_iter = data.Iterator.splits(datasets=(train, val, test), # sort_key=lambda x: len(x.text), sort_within_batch=True, # repeat=False, # batch_sizes=(batchsize, batchsize, batchsize), # device=-1) TEXT.build_vocab(train) vocab = TEXT.vocab vocab.load_vectors(torchtext.vocab.GloVe(name='6B', dim=100)) # 5 Training and Evaluation loss_fnc = torch.nn.BCELoss() base_model = Baseline(100, vocab) rnn_model = RNN(100, vocab, 100) cnn_model = CNN(100, vocab, 50, [2, 4]) if curr_model == 'baseline': model = base_model elif curr_model == 'rnn': model = rnn_model elif curr_model == 'cnn': model = cnn_model optimizer = optim.Adam(model.parameters(), lr=lr) for epoch in range(MaxEpochs): accum_loss = 0 tot_corr = 0 for i, batch in enumerate(train_iter): label = batch.label feats, lengths = batch.text optimizer.zero_grad() predicts = model(feats, lengths) batch_loss = loss_fnc(input=predicts.squeeze(), target=label.float()) accum_loss += batch_loss batch_loss.backward() optimizer.step() corr = (predicts > 0.5).squeeze().long() == label tot_corr += int(corr.sum()) train_acc = float(tot_corr) / (batchsize * 100) train_loss = accum_loss / (batchsize * 100) valid_acc, valid_loss = evaluate(model, val_iter) print( "Epoch: {} | Train acc: {} | Train loss: {} | Valid acc: {} | Valid loss: {}" .format(epoch, train_acc, train_loss, valid_acc, valid_loss)) print('Finished Training') torch.save(model, "model_%s.pt" % (curr_model)) test_model = torch.load("model_%s.pt" % (curr_model)) test_acc, test_loss = evaluate(test_model, test_iter) # test_acc, test_loss = evaluate(model, test_iter) print('Test acc: {} | Test loss: {}'.format(test_acc, test_loss))
goldLabels.append(label) s1 = " ".join(leaves(t1)) s2 = " ".join(leaves(t2)) modelPredict = model.predict(s1, s2) predictions.append(modelPredict) count += 1 accuracy = accuracy_score(predictions, goldLabels) print "Accuracy on SICK %s set: %f" % (dataSet, accuracy) if __name__ == "__main__": parser = argparse.ArgumentParser( description="arguments for CioEntails system") parser.add_argument("--model", type=str, default="baseline", help="Name of model to use for system") args = parser.parse_args() if args.model == "baseline": model = Baseline("cosineSimilarity", ["keyword_overlap"]) elif args.model == "keyword": model = Keyword("cosineSimilarity", ["keyword_overlap"]) elif args.model == "NB": model = NaiveBayes("cosineSimilarity", ["keyword_overlap"]) start = time.time() evaluateModel(model, args.model, sick_dev_reader) print "Evaluation done in %f seconds" % (time.time() - start)
def run_fold(parser, vis): devices = parser.args.devices parser.args.ckpt_dir = os.path.join('checkpoint', parser.args.model, 'f' + str(parser.args.cur_fold)) FG = parser.load() FG.devices = devices print(FG) torch.cuda.set_device(FG.devices[0]) device = torch.device(FG.devices[0]) net = Baseline(FG.ckpt_dir, len(FG.labels)) performances = net.load(epoch=None, is_best=True) net = net.to(device) trainloader, testloader = get_dataloader(k=FG.fold, cur_fold=FG.cur_fold, modality=FG.modality, axis=FG.axis, labels=FG.labels, batch_size=FG.batch_size) evaluator = create_supervised_evaluator( net, device=device, non_blocking=True, prepare_batch=process_ninecrop_batch, metrics={ 'sensitivity': Recall(False, mean_over_ninecrop), 'precision': Precision(False, mean_over_ninecrop), 'specificity': Specificity(False, mean_over_ninecrop) }) class Tracker(object): def __init__(self): self.data = [] outputs = Tracker() targets = Tracker() @evaluator.on(Events.ITERATION_COMPLETED) def transform_ninecrop_output(engine): output, target = engine.state.output if output.size(0) != target.size(0): n = target.size(0) npatches = output.size(0) // n output = output.view(n, npatches, *output.shape[1:]) output = torch.mean(output, dim=1) outputs.data += [output] targets.data += [target] evaluator.run(testloader) string = 'Fold {}'.format(FG.cur_fold) + '<br>' string += 'Epoch {}'.format(performances.pop('epoch')) + '<br>' for k in sorted(performances.keys()): string += k + ': ' + '{:.4f}'.format(performances[k]) string += '<br>' string += 'pre : ' + str(evaluator.state.metrics['precision']) + '<br>' string += 'sen : ' + str(evaluator.state.metrics['sensitivity']) + '<br>' string += 'spe : ' + str(evaluator.state.metrics['specificity']) + '<br>' vis.text(string, win=FG.model + '_result_fold{}'.format(FG.cur_fold)) del net return outputs.data, targets.data
class Trainer(BaseTrainer): def __init__(self, config): super(Trainer, self).__init__(config) self.datamanager = DataManger(config["data"]) # model self.model = Baseline( num_classes=self.datamanager.datasource.get_num_classes("train") ) # summary model summary( self.model, input_size=(3, 256, 128), batch_size=config["data"]["batch_size"], device="cpu", ) # losses cfg_losses = config["losses"] self.criterion = Softmax_Triplet_loss( num_class=self.datamanager.datasource.get_num_classes("train"), margin=cfg_losses["margin"], epsilon=cfg_losses["epsilon"], use_gpu=self.use_gpu, ) self.center_loss = CenterLoss( num_classes=self.datamanager.datasource.get_num_classes("train"), feature_dim=2048, use_gpu=self.use_gpu, ) # optimizer cfg_optimizer = config["optimizer"] self.optimizer = torch.optim.Adam( self.model.parameters(), lr=cfg_optimizer["lr"], weight_decay=cfg_optimizer["weight_decay"], ) self.optimizer_centerloss = torch.optim.SGD( self.center_loss.parameters(), lr=0.5 ) # learing rate scheduler cfg_lr_scheduler = config["lr_scheduler"] self.lr_scheduler = WarmupMultiStepLR( self.optimizer, milestones=cfg_lr_scheduler["steps"], gamma=cfg_lr_scheduler["gamma"], warmup_factor=cfg_lr_scheduler["factor"], warmup_iters=cfg_lr_scheduler["iters"], warmup_method=cfg_lr_scheduler["method"], ) # track metric self.train_metrics = MetricTracker("loss", "accuracy") self.valid_metrics = MetricTracker("loss", "accuracy") # save best accuracy for function _save_checkpoint self.best_accuracy = None # send model to device self.model.to(self.device) self.scaler = GradScaler() # resume model from last checkpoint if config["resume"] != "": self._resume_checkpoint(config["resume"]) def train(self): for epoch in range(self.start_epoch, self.epochs + 1): result = self._train_epoch(epoch) if self.lr_scheduler is not None: self.lr_scheduler.step() result = self._valid_epoch(epoch) # add scalars to tensorboard self.writer.add_scalars( "Loss", { "Train": self.train_metrics.avg("loss"), "Val": self.valid_metrics.avg("loss"), }, global_step=epoch, ) self.writer.add_scalars( "Accuracy", { "Train": self.train_metrics.avg("accuracy"), "Val": self.valid_metrics.avg("accuracy"), }, global_step=epoch, ) # logging result to console log = {"epoch": epoch} log.update(result) for key, value in log.items(): self.logger.info(" {:15s}: {}".format(str(key), value)) # save model if ( self.best_accuracy == None or self.best_accuracy < self.valid_metrics.avg("accuracy") ): self.best_accuracy = self.valid_metrics.avg("accuracy") self._save_checkpoint(epoch, save_best=True) else: self._save_checkpoint(epoch, save_best=False) # save logs self._save_logs(epoch) def _train_epoch(self, epoch): """Training step""" self.model.train() self.train_metrics.reset() with tqdm(total=len(self.datamanager.get_dataloader("train"))) as epoch_pbar: epoch_pbar.set_description(f"Epoch {epoch}") for batch_idx, (data, labels, _) in enumerate( self.datamanager.get_dataloader("train") ): # push data to device data, labels = data.to(self.device), labels.to(self.device) # zero gradient self.optimizer.zero_grad() self.optimizer_centerloss.zero_grad() with autocast(): # forward batch score, feat = self.model(data) # calculate loss and accuracy loss = ( self.criterion(score, feat, labels) + self.center_loss(feat, labels) * self.config["losses"]["beta"] ) _, preds = torch.max(score.data, dim=1) # backward parameters # loss.backward() self.scaler.scale(loss).backward() # backward parameters for center_loss for param in self.center_loss.parameters(): param.grad.data *= 1.0 / self.config["losses"]["beta"] # optimize # self.optimizer.step() self.scaler.step(self.optimizer) self.optimizer_centerloss.step() self.scaler.update() # update loss and accuracy in MetricTracker self.train_metrics.update("loss", loss.item()) self.train_metrics.update( "accuracy", torch.sum(preds == labels.data).double().item() / data.size(0), ) # update process bar epoch_pbar.set_postfix( { "train_loss": self.train_metrics.avg("loss"), "train_acc": self.train_metrics.avg("accuracy"), } ) epoch_pbar.update(1) return self.train_metrics.result() def _valid_epoch(self, epoch): """Validation step""" self.model.eval() self.valid_metrics.reset() with torch.no_grad(): with tqdm(total=len(self.datamanager.get_dataloader("val"))) as epoch_pbar: epoch_pbar.set_description(f"Epoch {epoch}") for batch_idx, (data, labels, _) in enumerate( self.datamanager.get_dataloader("val") ): # push data to device data, labels = data.to(self.device), labels.to(self.device) with autocast(): # forward batch score, feat = self.model(data) # calculate loss and accuracy loss = ( self.criterion(score, feat, labels) + self.center_loss(feat, labels) * self.config["losses"]["beta"] ) _, preds = torch.max(score.data, dim=1) # update loss and accuracy in MetricTracker self.valid_metrics.update("loss", loss.item()) self.valid_metrics.update( "accuracy", torch.sum(preds == labels.data).double().item() / data.size(0), ) # update process bar epoch_pbar.set_postfix( { "val_loss": self.valid_metrics.avg("loss"), "val_acc": self.valid_metrics.avg("accuracy"), } ) epoch_pbar.update(1) return self.valid_metrics.result() def _save_checkpoint(self, epoch, save_best=True): """save model to file""" state = { "epoch": epoch, "state_dict": self.model.state_dict(), "center_loss": self.center_loss.state_dict(), "optimizer": self.optimizer.state_dict(), "optimizer_centerloss": self.optimizer_centerloss.state_dict(), "lr_scheduler": self.lr_scheduler.state_dict(), "best_accuracy": self.best_accuracy, } filename = os.path.join(self.checkpoint_dir, "model_last.pth") self.logger.info("Saving last model: model_last.pth ...") torch.save(state, filename) if save_best: filename = os.path.join(self.checkpoint_dir, "model_best.pth") self.logger.info("Saving current best: model_best.pth ...") torch.save(state, filename) def _resume_checkpoint(self, resume_path): """Load model from checkpoint""" if not os.path.exists(resume_path): raise FileExistsError("Resume path not exist!") self.logger.info("Loading checkpoint: {} ...".format(resume_path)) checkpoint = torch.load(resume_path, map_location=self.map_location) self.start_epoch = checkpoint["epoch"] + 1 self.model.load_state_dict(checkpoint["state_dict"]) self.center_loss.load_state_dict(checkpoint["center_loss"]) self.optimizer.load_state_dict(checkpoint["optimizer"]) self.optimizer_centerloss.load_state_dict(checkpoint["optimizer_centerloss"]) self.lr_scheduler.load_state_dict(checkpoint["lr_scheduler"]) self.best_accuracy = checkpoint["best_accuracy"] self.logger.info( "Checkpoint loaded. Resume training from epoch {}".format(self.start_epoch) ) def _save_logs(self, epoch): """Save logs from google colab to google drive""" if os.path.isdir(self.logs_dir_saved): shutil.rmtree(self.logs_dir_saved) destination = shutil.copytree(self.logs_dir, self.logs_dir_saved)
def train_model(data_pack, num_epochs, learning_rate, num_words, dim_embedding, num_classes, model_name): train_X, train_y, valid_X, valid_y, test_X, test_y = data_pack if model_name == "Baseline-BoW": model = Bag_of_Words(num_words, num_classes) elif model_name == "Baseline-AvEmbedding": model = Baseline(num_words, dim_embedding, num_classes) elif model_name == "Shallow-CNN": n_filters = [40, 40] model = CNN(num_words, dim_embedding, num_classes, n_filters) elif model_name == "Deep-CNN": n_filters = [40, 48, 72, 48] model = CNN_Deep(num_words, dim_embedding, num_classes, n_filters) elif model_name == "Shallow-LSTM": memory_size = 100 model = LSTM(num_words, dim_embedding, num_classes, memory_size) elif model_name == "Deep-LSTM": memory_size = 100 model = LSTM_Deep(num_words, dim_embedding, num_classes, memory_size) elif model_name == "Shallow-CNN-CE": n_filters = [40, 40] model = CE_CNN(dim_embedding, num_classes, n_filters) elif model_name == "Deep-CNN-CE": n_filters = [40, 48, 72, 48] model = CE_CNN_Deep(dim_embedding, num_classes, n_filters) elif model_name == "Block-CNN-CE": n_filters = [64, 128, 256, 512] model = CE_CNN_Block(dim_embedding, num_classes, n_filters) elif model_name == "ResNet-CE": n_filters = [64, 128, 256, 512] model = CE_ResNet(dim_embedding, num_classes, n_filters) model.cuda() model = torch.load(model_name + ".pt") # n_filters = [15, 20, 40] # model = CNN_Deep(num_words, dim_embedding, num_classes, n_filters) max_train, max_val, max_test = 0, 0, 0 min_train, min_val, min_test = 10, 10, 10 optimizer = optim.Adam(model.parameters(), lr=learning_rate) model.train() criterion = torch.nn.CrossEntropyLoss() reduce_size = 0.2 a = [] batch_x_one = torch.FloatTensor(batch_size, train_X[0].shape[1], dim_embedding) tt_best = 0 epoch = 0 while epoch < num_epochs: break i = 0 s1 = np.random.choice(range(len(train_X)), int(reduce_size * len(train_X)), replace=False) t1 = time.time() while i < len(s1): # for each batch......... ???? optimizer.zero_grad() batch_x = train_X[s1[i]] batch_y = train_y[s1[i]] # print(s1[i]) # print(len(train_X)) # print(batch_x.shape) batch_x = torch.Tensor(batch_x).type('torch.LongTensor') if word_path[0:4] == "char": batch_x = torch.unsqueeze(batch_x, 2) batch_x_one.zero_() batch_x_one.scatter_(2, batch_x, 2) batch_x = batch_x_one batch_x = batch_x.to("cuda") output = model(batch_x) batch_y = torch.Tensor(batch_y).type('torch.LongTensor') batch_y = batch_y.to("cuda") loss = criterion(output, batch_y) loss.backward() i += 1 t2 = time.time() print(t2 - t1) model.eval() tt_loss, tt_acc = run_testing(model, criterion, test_X, test_y, batch_x_one=batch_x_one) results = open("results/" + model_name + ".txt", "w") for e in [min_train, min_test, min_val, max_train, max_val, max_test]: results.write(str(e) + ", ") results.close()
def __init__(self): self.log_dir = settings.log_dir self.model_dir = settings.model_dir ensure_dir(settings.log_dir) ensure_dir(settings.model_dir) logger.info('set log dir as %s' % settings.log_dir) logger.info('set model dir as %s' % settings.model_dir) ##################################### Import models ########################### self.feature_generator = Baseline( last_stride=1, model_path=settings.pretrained_model_path) self.feature_embedder_rgb = FeatureEmbedder(2048) self.feature_embedder_ir = FeatureEmbedder(2048) self.id_classifier = IdClassifier() if torch.cuda.is_available(): self.feature_generator.cuda() self.feature_embedder_rgb.cuda() self.feature_embedder_ir.cuda() self.id_classifier.cuda() self.feature_generator = nn.DataParallel(self.feature_generator, device_ids=range( settings.num_gpu)) self.feature_embedder_rgb = nn.DataParallel(self.feature_embedder_rgb, device_ids=range( settings.num_gpu)) self.feature_embedder_ir = nn.DataParallel(self.feature_embedder_ir, device_ids=range( settings.num_gpu)) self.id_classifier = nn.DataParallel(self.id_classifier, device_ids=range( settings.num_gpu)) ############################# Get Losses & Optimizers ######################### self.criterion_at = expATLoss() self.criterion_identity = CrossEntropyLabelSmoothLoss( settings.num_classes, epsilon=0.1) #torch.nn.CrossEntropyLoss() opt_models = [ self.feature_generator, self.feature_embedder_rgb, self.feature_embedder_ir, self.id_classifier ] def make_optimizer(opt_models): train_params = [] for opt_model in opt_models: for key, value in opt_model.named_parameters(): if not value.requires_grad: continue lr = settings.BASE_LR weight_decay = settings.WEIGHT_DECAY if "bias" in key: lr = settings.BASE_LR * settings.BIAS_LR_FACTOR weight_decay = settings.WEIGHT_DECAY_BIAS train_params += [{ "params": [value], "lr": lr, "weight_decay": weight_decay }] optimizer = torch.optim.Adam(train_params) return optimizer self.optimizer_G = make_optimizer(opt_models) self.epoch_count = 0 self.step = 0 self.save_steps = settings.save_steps self.num_workers = settings.num_workers self.writers = {} self.dataloaders = {} self.sche_G = solver.WarmupMultiStepLR(self.optimizer_G, milestones=settings.iter_sche, gamma=0.1) # default setting
def __init__(self, config): super(Trainer, self).__init__(config) self.datamanager = DataManger(config["data"]) # model self.model = Baseline( num_classes=self.datamanager.datasource.get_num_classes("train") ) # summary model summary( self.model, input_size=(3, 256, 128), batch_size=config["data"]["batch_size"], device="cpu", ) # losses cfg_losses = config["losses"] self.criterion = Softmax_Triplet_loss( num_class=self.datamanager.datasource.get_num_classes("train"), margin=cfg_losses["margin"], epsilon=cfg_losses["epsilon"], use_gpu=self.use_gpu, ) self.center_loss = CenterLoss( num_classes=self.datamanager.datasource.get_num_classes("train"), feature_dim=2048, use_gpu=self.use_gpu, ) # optimizer cfg_optimizer = config["optimizer"] self.optimizer = torch.optim.Adam( self.model.parameters(), lr=cfg_optimizer["lr"], weight_decay=cfg_optimizer["weight_decay"], ) self.optimizer_centerloss = torch.optim.SGD( self.center_loss.parameters(), lr=0.5 ) # learing rate scheduler cfg_lr_scheduler = config["lr_scheduler"] self.lr_scheduler = WarmupMultiStepLR( self.optimizer, milestones=cfg_lr_scheduler["steps"], gamma=cfg_lr_scheduler["gamma"], warmup_factor=cfg_lr_scheduler["factor"], warmup_iters=cfg_lr_scheduler["iters"], warmup_method=cfg_lr_scheduler["method"], ) # track metric self.train_metrics = MetricTracker("loss", "accuracy") self.valid_metrics = MetricTracker("loss", "accuracy") # save best accuracy for function _save_checkpoint self.best_accuracy = None # send model to device self.model.to(self.device) self.scaler = GradScaler() # resume model from last checkpoint if config["resume"] != "": self._resume_checkpoint(config["resume"])
class Session: def __init__(self): self.log_dir = settings.log_dir self.model_dir = settings.model_dir ensure_dir(settings.log_dir) ensure_dir(settings.model_dir) logger.info('set log dir as %s' % settings.log_dir) logger.info('set model dir as %s' % settings.model_dir) ##################################### Import models ########################### self.feature_generator = Baseline( last_stride=1, model_path=settings.pretrained_model_path) self.feature_embedder_rgb = FeatureEmbedder(2048) self.feature_embedder_ir = FeatureEmbedder(2048) self.id_classifier = IdClassifier() if torch.cuda.is_available(): self.feature_generator.cuda() self.feature_embedder_rgb.cuda() self.feature_embedder_ir.cuda() self.id_classifier.cuda() self.feature_generator = nn.DataParallel(self.feature_generator, device_ids=range( settings.num_gpu)) self.feature_embedder_rgb = nn.DataParallel(self.feature_embedder_rgb, device_ids=range( settings.num_gpu)) self.feature_embedder_ir = nn.DataParallel(self.feature_embedder_ir, device_ids=range( settings.num_gpu)) self.id_classifier = nn.DataParallel(self.id_classifier, device_ids=range( settings.num_gpu)) ############################# Get Losses & Optimizers ######################### self.criterion_at = expATLoss() self.criterion_identity = CrossEntropyLabelSmoothLoss( settings.num_classes, epsilon=0.1) #torch.nn.CrossEntropyLoss() opt_models = [ self.feature_generator, self.feature_embedder_rgb, self.feature_embedder_ir, self.id_classifier ] def make_optimizer(opt_models): train_params = [] for opt_model in opt_models: for key, value in opt_model.named_parameters(): if not value.requires_grad: continue lr = settings.BASE_LR weight_decay = settings.WEIGHT_DECAY if "bias" in key: lr = settings.BASE_LR * settings.BIAS_LR_FACTOR weight_decay = settings.WEIGHT_DECAY_BIAS train_params += [{ "params": [value], "lr": lr, "weight_decay": weight_decay }] optimizer = torch.optim.Adam(train_params) return optimizer self.optimizer_G = make_optimizer(opt_models) self.epoch_count = 0 self.step = 0 self.save_steps = settings.save_steps self.num_workers = settings.num_workers self.writers = {} self.dataloaders = {} self.sche_G = solver.WarmupMultiStepLR(self.optimizer_G, milestones=settings.iter_sche, gamma=0.1) # default setting def tensorboard(self, name): self.writers[name] = SummaryWriter( os.path.join(self.log_dir, name + '.events')) return self.writers[name] def write(self, name, out): for k, v in out.items(): self.writers[name].add_scalar(name + '/' + k, v, self.step) out['G_lr'] = self.optimizer_G.param_groups[0]['lr'] out['step'] = self.step out['eooch_count'] = self.epoch_count outputs = ["{}:{:.4g}".format(k, v) for k, v in out.items()] logger.info(name + '--' + ' '.join(outputs)) def save_checkpoints(self, name): ckp_path = os.path.join(self.model_dir, name) obj = { 'feature_generator': self.feature_generator.state_dict(), 'feature_embedder_rgb': self.feature_embedder_rgb.state_dict(), 'feature_embedder_ir': self.feature_embedder_ir.state_dict(), 'id_classifier': self.id_classifier.state_dict(), 'clock': self.step, 'epoch_count': self.epoch_count, 'opt_G': self.optimizer_G.state_dict(), } torch.save(obj, ckp_path) def load_checkpoints(self, name): ckp_path = os.path.join(self.model_dir, name) try: obj = torch.load(ckp_path) print('load checkpoint: %s' % ckp_path) except FileNotFoundError: return self.feature_generator.load_state_dict(obj['feature_generator']) self.feature_embedder_rgb.load_state_dict(obj['feature_embedder_rgb']) self.feature_embedder_ir.load_state_dict(obj['feature_embedder_ir']) self.id_classifier.load_state_dict(obj['id_classifier']) self.optimizer_G.load_state_dict(obj['opt_G']) self.step = obj['clock'] self.epoch_count = obj['epoch_count'] self.sche_G.last_epoch = self.step def load_checkpoints_delf_init(self, name): ckp_path = os.path.join(self.model_dir, name) obj = torch.load(ckp_path) self.backbone.load_state_dict(obj['backbone']) def cal_fea(self, x, domain_mode): feat = self.feature_generator(x) if domain_mode == 'rgb': return self.feature_embedder_rgb(feat) elif domain_mode == 'ir': return self.feature_embedder_ir(feat) def inf_batch(self, batch): alpha = settings.alpha beta = settings.beta anchor_rgb, positive_rgb, negative_rgb, anchor_ir, positive_ir, \ negative_ir, anchor_label, modality_rgb, modality_ir = batch if torch.cuda.is_available(): anchor_rgb = anchor_rgb.cuda() positive_rgb = positive_rgb.cuda() negative_rgb = negative_rgb.cuda() anchor_ir = anchor_ir.cuda() positive_ir = positive_ir.cuda() negative_ir = negative_ir.cuda() anchor_label = anchor_label.cuda() anchor_rgb_features = self.cal_fea(anchor_rgb, 'rgb') positive_rgb_features = self.cal_fea(positive_rgb, 'rgb') negative_rgb_features = self.cal_fea(negative_rgb, 'rgb') anchor_ir_features = self.cal_fea(anchor_ir, 'ir') positive_ir_features = self.cal_fea(positive_ir, 'ir') negative_ir_features = self.cal_fea(negative_ir, 'ir') at_loss_rgb = self.criterion_at.forward(anchor_rgb_features, positive_ir_features, negative_ir_features) at_loss_ir = self.criterion_at.forward(anchor_ir_features, positive_rgb_features, negative_rgb_features) at_loss = at_loss_rgb + at_loss_ir predicted_id_rgb = self.id_classifier(anchor_rgb_features) predicted_id_ir = self.id_classifier(anchor_ir_features) identity_loss = self.criterion_identity(predicted_id_rgb, anchor_label) + \ self.criterion_identity(predicted_id_ir, anchor_label) loss_G = alpha * at_loss + beta * identity_loss self.optimizer_G.zero_grad() loss_G.backward() self.optimizer_G.step() self.write('train_stats', { 'loss_G': loss_G, 'at_loss': at_loss, 'identity_loss': identity_loss })
valid_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, num_workers=4, collate_fn=collate_fn, sampler=valid_sampler) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=1, num_workers=4, shuffle=False) config = { "epochs": 100, "device": get_device(), "sampling": True, "temperature": 1.0, "max_sentence_length": 18 } embedding_dim = 256 hidden_dim = 512 vocab_size = len(vocab) model = Baseline(embedding_dim, hidden_dim, vocab_size, vanilla=False) criterion = nn.CrossEntropyLoss() optimizer = Adam(model.parameters(), lr=5e-4) model.cuda() train(model, optimizer, criterion, train_loader, valid_loader, vocab, config) test(model, criterion, test_loader, vocab, config)
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) # Get model log.info('Building model...') if (args.model == 'baseline'): model = Baseline(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) elif (args.model == 'bidaf'): model = BiDAF(word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size, drop_prob=args.drop_prob) optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) elif (args.model == 'qanet'): model = QANet(word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size, n_conv_emb_enc=args.n_conv_emb, n_conv_mod_enc=args.n_conv_mod, drop_prob_word=0.1, drop_prob_char=0.05, kernel_size_emb_enc_block=7, kernel_size_mod_enc_block=7, n_heads=args.n_heads) optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta_1, args.beta_2), eps=args.epsilon, weight_decay=args.l2_wd) elif (args.model == 'qanet_out'): model = QANet(word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size, n_conv_emb_enc=args.n_conv_emb, n_conv_mod_enc=args.n_conv_mod, drop_prob_word=0.1, drop_prob_char=0.05, kernel_size_emb_enc_block=7, kernel_size_mod_enc_block=7, n_heads=args.n_heads) optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta_1, args.beta_2), eps=args.epsilon, weight_decay=args.l2_wd) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
import numpy as np if __name__ == '__main__': parser = Flags() parser.set_arguments() FG = parser.parse_args() c_code, axis, z_dim = FG.c_code, FG.axis, FG.z_dim device = torch.device(FG.devices[0]) torch.cuda.set_device(FG.devices[0]) nets = [] for i in range(FG.fold): parser.configure('cur_fold', i) parser.configure('ckpt_dir') FG = parser.load() net = Baseline(FG.ckpt_dir, len(FG.labels)) net.to(device) net.load(epoch=None, optimizer=None, is_best=True) net.eval() nets += [net] #G = Generator(FG) G = torch.nn.DataParallel(Generator(z_dim, c_code, axis)) # state_dict = torch.load(os.path.join('BiGAN-info-c4-f', 'G.pth'), 'cpu') state_dict = torch.load(os.path.join('157-G8', 'G.pth'), 'cpu') G.load_state_dict(state_dict) G.to(device) G.eval() if axis == 1:
def main(_): # Load MNIST data mnist = load_mnist() pre_training = FLAGS.pre_train # Define the deep learning model if FLAGS.model == 'Base': pre_training = False kernlen = int(FLAGS.frame_size / 2) net = Baseline(directory=FLAGS.dir, optimizer=FLAGS.optimizer, learning_rate=FLAGS.learning_rate, layer_sizes=FLAGS.arch, num_features=FLAGS.num_features, num_filters=FLAGS.num_filters, frame_size=FLAGS.frame_size) if FLAGS.model == 'Cat': kernlen = int(FLAGS.frame_size / 2) net = Cat_Net(layer_sizes=FLAGS.arch, optimizer=FLAGS.optimizer, num_filters=FLAGS.num_filters, num_features=FLAGS.num_features, num_samples=FLAGS.num_samples, frame_size=FLAGS.frame_size, num_cat=FLAGS.num_cat, learning_rate=FLAGS.learning_rate, feedback_distance=FLAGS.feedback_distance, directory=FLAGS.dir) elif FLAGS.model == 'Gumbel': kernlen = int(FLAGS.frame_size / 2) net = Gumbel_Net(layer_sizes=FLAGS.arch, optimizer=FLAGS.optimizer, num_filters=FLAGS.num_filters, num_features=FLAGS.num_features, frame_size=FLAGS.frame_size, num_cat=FLAGS.num_cat, learning_rate=FLAGS.learning_rate, feedback_distance=FLAGS.feedback_distance, directory=FLAGS.dir, second_conv=FLAGS.second_conv, initial_tau=FLAGS.initial_tau, tau_decay=FLAGS.tau_decay, reg=FLAGS.reg) elif FLAGS.model == 'RawG': pre_training = False kernlen = 60 net = Raw_Gumbel_Net(layer_sizes=FLAGS.arch, optimizer=FLAGS.optimizer, num_filters=FLAGS.num_filters, num_features=FLAGS.frame_size**2, frame_size=FLAGS.frame_size, num_cat=FLAGS.num_cat, learning_rate=FLAGS.learning_rate, feedback_distance=FLAGS.feedback_distance, directory=FLAGS.dir, second_conv=FLAGS.second_conv, initial_tau=FLAGS.initial_tau, meta=None) elif FLAGS.model == 'RL': kernlen = int(FLAGS.frame_size / 2) net = Bernoulli_Net(layer_sizes=FLAGS.arch, optimizer=FLAGS.optimizer, num_filters=FLAGS.num_filters, num_features=FLAGS.num_features, num_samples=FLAGS.num_samples, frame_size=FLAGS.frame_size, learning_rate=FLAGS.learning_rate, feedback_distance=FLAGS.feedback_distance, directory=FLAGS.dir, second_conv=FLAGS.second_conv) elif FLAGS.model == 'RawB': pre_training = True kernlen = 60 net = Raw_Bernoulli_Net(layer_sizes=FLAGS.arch, optimizer=FLAGS.optimizer, num_filters=FLAGS.num_filters, num_features=FLAGS.frame_size**2, num_samples=FLAGS.num_samples, frame_size=FLAGS.frame_size, learning_rate=FLAGS.learning_rate, feedback_distance=FLAGS.feedback_distance, directory=FLAGS.dir, second_conv=FLAGS.second_conv) X_train, train_coords = convertCluttered( mnist.train.images, finalImgSize=FLAGS.frame_size, number_patches=FLAGS.number_patches) y_train = mnist.train.labels train_coords = np.array( [gkern(coord[0], coord[1], kernlen=kernlen) for coord in train_coords]) X_test, test_coords = convertCluttered(mnist.test.images, finalImgSize=FLAGS.frame_size, number_patches=FLAGS.number_patches) # test_coords = np.array([gkern(coord[0], coord[1], kernlen=20) for coord in test_coords]) y_test = mnist.test.labels batch_size = FLAGS.batch_size if pre_training: print("Pre-training") for epoch in tqdm(range(FLAGS.epochs)): _x, _y = input_fn(X_test, y_test, batch_size=batch_size) net.evaluate(_x, _y, pre_trainining=True) X_train, train_coords = convertCluttered( mnist.train.images, finalImgSize=FLAGS.frame_size, number_patches=FLAGS.number_patches) y_train = mnist.train.labels # print(net.confusion_matrix(_x, _y)) net.save() X_train, y_train, train_coords = shuffle_in_unison( X_train, y_train, train_coords) for i in range(0, len(X_train), batch_size): _x, _y = input_fn(X_train[i:i + batch_size], y_train[i:i + batch_size], batch_size=batch_size) net.pre_train(_x, _y, dropout=0.8) print("Training") for epoch in tqdm(range(FLAGS.epochs)): X_train, y_train, train_coords = shuffle_in_unison( X_train, y_train, train_coords) _x, _y = input_fn(X_test, y_test, batch_size=batch_size) net.evaluate(_x, _y) X_train, train_coords = convertCluttered( mnist.train.images, finalImgSize=FLAGS.frame_size, number_patches=FLAGS.number_patches) y_train = mnist.train.labels # print(net.confusion_matrix(_x, _y)) net.save() for i in range(0, len(X_train), batch_size): _x, _y = X_train[i:i + batch_size], y_train[i:i + batch_size] net.train(_x, _y, dropout=FLAGS.dropout) if FLAGS.model == 'RL' or FLAGS.model == 'Gumbel' or FLAGS.model == 'Cat' or FLAGS.model == 'RawB' or FLAGS.model == 'RawG': print("Feedback Training") for epoch in tqdm(range(FLAGS.epochs)): _x, _y = input_fn(X_test, y_test, batch_size=batch_size) net.evaluate(_x, _y) X_train, train_coords = convertCluttered( mnist.train.images, finalImgSize=FLAGS.frame_size, number_patches=FLAGS.number_patches) y_train = mnist.train.labels train_coords = np.array([ gkern(coord[0], coord[1], kernlen=kernlen) for coord in train_coords ]) # print(net.confusion_matrix(_x, _y)) net.save() X_train, y_train, train_coords = shuffle_in_unison( X_train, y_train, train_coords) for i in range(0, len(X_train), batch_size): _x, _y, _train_coords = input_fn(X_train, y_train, train_coords, batch_size=batch_size) net.feedback_train(_x, _y, _train_coords, dropout=FLAGS.dropout)
def getDayBaseline(meter, channel, day, data_type): day = day.date() try: baseline = Baseline.objects.get(date=day, sensor=meter, channel=channel) created = False except Baseline.DoesNotExist: baseline = Baseline(date=day, sensor=meter, channel=channel, value=0.0) created = True logger.debug('getDayBaseline') #powerFactor = 60 * 60.0 / channel.reading_frequency valid = False if not created: lastModifiedDay = baseline.last_modified.date() if day == date.today(): if (datetime.now() - baseline.last_modified) < timedelta(hours=1): # TODO: check me! valid = True else: # day is not today if lastModifiedDay > day: valid = True logger.debug('valid: ' + str(valid)) if valid: return baseline.value else: # filter all energy data from the specific reading meter and specific period (1 day) filter_energy_objects = SensorReading.objects.filter( sensor=meter, channel=channel).filter( timestamp__gte=day).filter( timestamp__lt=(day+timedelta(days=1)) ) logger.debug('filter_energy_objects.count(): ' + str(filter_energy_objects.count())) if filter_energy_objects.count() > 0: energy = [x.value for x in filter_energy_objects] # hard-coded subset size for moving average calculation window_size = ALWAYS_ON_WINDOW_SIZE mav = moving_average(energy, window_size) import numpy as np # calculate the moving average using a rectangular window window = (np.zeros(int(window_size)) + 1.0) / window_size mav = np.convolve(energy, window, 'valid') try: min_baseline = min( mav ) except ValueError: min_baseline = 0 else: min_baseline = 0 baseline.value = min_baseline try: baseline.save() except IntegrityError: b2 = Baseline.objects.get(date=day, sensor=meter, channel=channel) b2.value = min_baseline b2.save() return min_baseline
def main(): saver = utils.Saver(opt) # randomize seed opt.manualSeed = random.randint(1, 10000) # fix seed random.seed(opt.manualSeed) torch.manual_seed(opt.manualSeed) torch.cuda.manual_seed_all(opt.manualSeed) # load data root = "data/modelnet40_ply_hdf5_2048/" #"data/modelnet40_normal_resampled"# use_cuda = torch.cuda.is_available() transforms_list = [] random_permute = utils.Random_permute(opt.num_points, delta=opt.distance) # load transformations if opt.random_input: print("random_input") transforms_list.append(random_permute) # Load dataset / data loader train_dataset = data.ModelNetDataset( root, train=True, sort=opt.sort, transform=transforms.Compose(transforms_list), distance=opt.distance, normal=opt.normal) train_loader = DataLoader(train_dataset, batch_size=opt.batchSize, shuffle=True, num_workers=opt.workers) test_dataset = data.ModelNetDataset(root, train=False, sort=opt.sort, distance=opt.distance, normal=opt.normal) test_loader = DataLoader(test_dataset, batch_size=opt.batchSize, shuffle=False, num_workers=opt.workers) # define model ndim = 6 if opt.distance or opt.normal else 3 if opt.model == 'lstm': model = Baseline(input_dim=ndim, maxout=opt.elem_max) elif opt.model == 'lstm_mlp': model = LSTM_mlp(input_dim=ndim, maxout=opt.elem_max, mlp=[64, 128, 256, 512], fc=[512, 256, 40]) elif opt.model == 'test': model = Test(input_dim=ndim, maxout=opt.elem_max) # load speicified pre-trained model if opt.path != '': model.load_state_dict(torch.load(opt.path)) # define optimizer and loss function optimizer = optim.Adam(model.parameters(), lr=opt.learning_rate, weight_decay=1e-5) criterion = nn.CrossEntropyLoss() # transfer model and criterion to cuda if exist if use_cuda: model = model.cuda( ) #nn.DataParallel(model).cuda()#model.cuda() #nn.DataParallel(model).cuda() criterion = criterion.cuda() best_model_wts = model.state_dict() early_stopping = utils.Early_stopping(opt.early_stopping, patience=15) saver.log_parameters(model.parameters()) for epoch in range(opt.nepoch): adjust_learning_rate(optimizer, epoch, saver) train(model, optimizer, criterion, saver, train_loader, epoch) test_loss = test(model, criterion, saver, test_loader, epoch) early_stopping.update(test_loss) if early_stopping.stop(): break saver.save_result()
#%% data, label = load_data(data_path, label_path, 'indian_pines') #%% get_value_data(data, label) #%% DATA = pd.read_csv('datasets/Indian_pines.csv', header=None).values data_D = DATA[:, :-1] data_L = DATA[:, -1] data_train, data_test, label_train, label_test = train_test_split( data_D, data_L, test_size=0.8) #%% train_set = GetLoader(data_train, label_train) train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True) val_set = GetLoader(data_test, label_test) val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, shuffle=False) #%% data_p, label_p = next(iter(train_loader)) # print(data_p[:-1]) #%% net = Baseline(INPUT_CHANNELS, CLASSES, dropout=False) optimizer = optim.Adam(net.parameters(), lr=0.0001) weight = torch.ones(CLASSES) weight[torch.LongTensor([0])] = 0. w = weight.to(DEVICE) criterion = nn.CrossEntropyLoss(weight=w) #%% train_loss, val_accuracy = train(net, optimizer, criterion, train_loader, val_loader, EPOCH, DEVICE) plot_curve(train_loss) plot_curve(val_accuracy)
y = ['close'] wp = data_processor.WindowGenerator(input_width, label_width, shift, train_df=train, val_df=validate, test_df=test, label_columns=y) print("wp:") print(wp) print("============") for example_inputs, example_labels in wp.train.take(1): print(f'Inputs shape (batch, time, features): {example_inputs.shape}') print(f'Labels shape (batch, time, features): {example_labels.shape}') bl = Baseline() dense = tf.keras.Sequential([ tf.keras.layers.Dense(units=64, activation='relu'), tf.keras.layers.Dense(units=64, activation='relu'), tf.keras.layers.Dense(units=1) ]) conv_model = tf.keras.Sequential([ tf.keras.layers.Conv1D(filters=32, kernel_size=(3, ), activation='relu'), tf.keras.layers.Dense(units=32, activation='relu'), tf.keras.layers.Dense(units=1), ]) mr = ModelRunner() val_performance = {}