Exemplo n.º 1
0
def evaluate_process(model, valid_loader, log_start_time, global_t, epoch,
                     logger, tb_writer, api):
    model.eval()
    valid_loader.epoch_init(1, shuffle=False)  # batch_size是1,重复10次,计算BLEU

    f_eval = open(
        "./output/{}/{}/eval_global_{}_epoch{}.txt".format(
            args.expname, log_start_time, global_t, epoch), "w")
    repeat = 10

    # 测试当前model
    # Define the metrics
    metrics = Metrics(model.embedder)
    recall_bleu, prec_bleu, bow_extrema, bow_avg, bow_greedy, intra_dist1, intra_dist2, avg_len, inter_dist1, inter_dist2 \
        = evaluate(model, metrics, valid_loader, api.vocab, api.rev_vocab, f_eval, repeat)

    logger.info("Avg recall BLEU %f, avg precision BLEU %f, bow_extrema %f, bow_avg %f, bow_greedy %f, intra_dist1 %f,"
                " intra_dist2 %f, avg_len %f, \ninter_dist1 %f, inter_dist2 %f (only 1 ref, not final results)" \
                % (recall_bleu, prec_bleu, bow_extrema, bow_avg, bow_greedy, intra_dist1, intra_dist2, avg_len,
                   inter_dist1, inter_dist2))

    if args.visual:
        tb_writer.add_scalar('recall_bleu', recall_bleu, epoch)
        tb_writer.add_scalar('prec_bleu', prec_bleu, epoch)
        tb_writer.add_scalar('bow_extrema', bow_extrema, epoch)
        tb_writer.add_scalar('bow_avg', bow_avg, epoch)
        tb_writer.add_scalar('bow_greedy', bow_greedy, epoch)
        tb_writer.add_scalar('intra_dist1', intra_dist1, epoch)
        tb_writer.add_scalar('intra_dist2', intra_dist2, epoch)
        tb_writer.add_scalar('inter_dist1', inter_dist1, epoch)
        tb_writer.add_scalar('inter_dist2', inter_dist2, epoch)
            logger.info(log)

        itr += 1
        itr_global += 1

    if epoch % args.eval_every == 0:  # evaluate the model in the validation set
        model.eval()
        valid_loader.epoch_init(1, config['diaglen'], 1, shuffle=False)

        f_eval = open(
            "./output/{}/{}/{}/tmp_results/epoch{}.txt".format(
                args.model, args.expname, args.dataset, epoch), "w")
        repeat = 10

        recall_bleu, prec_bleu, bow_extrema, bow_avg, bow_greedy, intra_dist1, intra_dist2, avg_len, inter_dist1, inter_dist2\
             =evaluate(model, metrics, valid_loader, vocab, ivocab, f_eval, repeat)

        if args.visual:
            tb_writer.add_scalar('recall_bleu', recall_bleu, epoch)
            tb_writer.add_scalar('prec_bleu', prec_bleu, epoch)
            tb_writer.add_scalar('bow_extrema', bow_extrema, epoch)
            tb_writer.add_scalar('bow_avg', bow_avg, epoch)
            tb_writer.add_scalar('bow_greedy', bow_greedy, epoch)
            tb_writer.add_scalar('intra_dist1', intra_dist1, epoch)
            tb_writer.add_scalar('intra_dist2', intra_dist2, epoch)
            tb_writer.add_scalar('inter_dist1', inter_dist1, epoch)
            tb_writer.add_scalar('inter_dist2', inter_dist2, epoch)

    # end of epoch ----------------------------
    model.adjust_lr()
    save_model(model, epoch)  # save model after each epoch
Exemplo n.º 3
0
                                         itr_global)
            logger.info(log)

        itr += 1
        itr_global += 1

        if itr_global % args.eval_every == 0:  # evaluate the model in the develop set
            model.eval()
            valid_loader = torch.utils.data.DataLoader(dataset=valid_set,
                                                       batch_size=1,
                                                       shuffle=False,
                                                       num_workers=1)

            f_eval = open(
                "./output/{}/{}/tmp_results/iter{}.txt".format(
                    args.model, args.expname, itr_global), "w")
            repeat = 10

            recall_bleu, prec_bleu = evaluate(model, metrics, valid_loader,
                                              vocab_desc, vocab_api, f_eval,
                                              repeat)

            if args.visual:
                tb_writer.add_scalar('recall_bleu', recall_bleu, itr_global)
                tb_writer.add_scalar('prec_bleu', prec_bleu, itr_global)

            save_model(model, itr_global)  # save model after each epoch

    # end of epoch ----------------------------
    model.adjust_lr()
Exemplo n.º 4
0
def train(args):
    timestamp=datetime.now().strftime('%Y%m%d%H%M')    
    # LOG #
    logger = logging.getLogger(__name__)
    logging.basicConfig(level=logging.DEBUG, format="%(message)s")#,format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")
    tb_writer=None
    if args.visual:
        # make output directory if it doesn't already exist
        os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/models', exist_ok=True)
        os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/temp_results', exist_ok=True)
        fh = logging.FileHandler(f"./output/{args.model}/{args.expname}/{timestamp}/logs.txt")
                                      # create file handler which logs even debug messages
        logger.addHandler(fh)# add the handlers to the logger
        tb_writer = SummaryWriter(f"./output/{args.model}/{args.expname}/{timestamp}/logs/")
        # save arguments
        json.dump(vars(args), open(f'./output/{args.model}/{args.expname}/{timestamp}/args.json', 'w'))

    # Device #
    if args.gpu_id<0: 
        device = torch.device("cuda")
    else:
        device = torch.device(f"cuda:{args.gpu_id}" if torch.cuda.is_available() and args.gpu_id>-1 else "cpu")
    print(device)
    n_gpu = torch.cuda.device_count() if args.gpu_id<0 else 1
    print(f"num of gpus:{n_gpu}")
    # Set the random seed manually for reproducibility.
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    def save_model(model, epoch, timestamp):
        """Save model parameters to checkpoint"""
        os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/models', exist_ok=True)
        ckpt_path=f'./output/{args.model}/{args.expname}/{timestamp}/models/model_epo{epoch}.pkl'
        print(f'Saving model parameters to {ckpt_path}')
        torch.save(model.state_dict(), ckpt_path)

    def load_model(model, epoch, timestamp):
        """Load parameters from checkpoint"""
        ckpt_path=f'./output/{args.model}/{args.expname}/{timestamp}/models/model_epo{epoch}.pkl'
        print(f'Loading model parameters from {ckpt_path}')
        model.load_state_dict(torch.load(checkpoint))

    config = getattr(configs, 'config_'+args.model)()

    ###############################################################################
    # Load dataset
    ###############################################################################
    train_set=APIDataset(args.data_path+'train.desc.h5', args.data_path+'train.apiseq.h5', config['max_sent_len'])
    valid_set=APIDataset(args.data_path+'test.desc.h5', args.data_path+'test.apiseq.h5', config['max_sent_len'])
    train_loader=torch.utils.data.DataLoader(dataset=train_set, batch_size=config['batch_size'], shuffle=True, num_workers=1)
    valid_loader=torch.utils.data.DataLoader(dataset=valid_set, batch_size=config['batch_size'], shuffle=True, num_workers=1)
    print("Loaded dataset!")

    ###############################################################################
    # Define the models
    ###############################################################################
    model = getattr(models, args.model)(config) 
    if args.reload_from>=0:
        load_model(model, args.reload_from)
    model=model.to(device)
    
    
    ###############################################################################
    # Prepare the Optimizer
    ###############################################################################
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
            {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
            {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]    
    optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=config['lr'], eps=config['adam_epsilon'])        
    scheduler = get_cosine_schedule_with_warmup(
            optimizer, num_warmup_steps=config['warmup_steps'], 
            num_training_steps=len(train_loader)*config['epochs']) # do not foget to modify the number when dataset is changed

    ###############################################################################
    # Training
    ###############################################################################
    logger.info("Training...")
    itr_global=1
    start_epoch=1 if args.reload_from==-1 else args.reload_from+1
    for epoch in range(start_epoch, config['epochs']+1):

        epoch_start_time = time.time()
        itr_start_time = time.time()

        # shuffle (re-define) dataset between epochs

        for batch in train_loader:# loop through all batches in training dataset
            model.train()
            batch_gpu = [tensor.to(device) for tensor in batch]
            loss = model(*batch_gpu)  
            
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), config['clip'])
            optimizer.step()
            scheduler.step()
            model.zero_grad()

            if itr_global % args.log_every == 0:
                elapsed = time.time() - itr_start_time
                log = '%s-%s|@gpu%d epo:[%d/%d] iter:%d step_time:%ds loss:%f'\
                %(args.model, args.expname, args.gpu_id, epoch, config['epochs'],itr_global, elapsed, loss)
                if args.visual:
                        tb_writer.add_scalar('loss', loss, itr_global)
                logger.info(log)

                itr_start_time = time.time()   

            if itr_global % args.valid_every == 0:
             
                model.eval()
                loss_records={}

                for batch in valid_loader:
                    batch_gpu = [tensor.to(device) for tensor in batch]
                    with torch.no_grad():
                        valid_loss = model.valid(*batch_gpu)    
                    for loss_name, loss_value in valid_loss.items():
                        v=loss_records.get(loss_name, [])
                        v.append(loss_value)
                        loss_records[loss_name]=v

                log = 'Validation '
                for loss_name, loss_values in loss_records.items():
                    log = log + loss_name + ':%.4f  '%(np.mean(loss_values))
                    if args.visual:
                        tb_writer.add_scalar(loss_name, np.mean(loss_values), itr_global)                 
                logger.info(log)    

            itr_global+=1        

            if itr_global % args.eval_every == 0:  # evaluate the model in the develop set
                model.eval()      
                save_model(model, itr_global, timestamp) # save model after each epoch
                
                valid_loader=torch.utils.data.DataLoader(dataset=valid_set, batch_size=1, shuffle=False, num_workers=1)
                vocab_api = load_dict(args.data_path+'vocab.apiseq.json')
                vocab_desc = load_dict(args.data_path+'vocab.desc.json')
                metrics=Metrics()
                
                os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/temp_results', exist_ok=True)
                f_eval = open(f"./output/{args.model}/{args.expname}/{timestamp}/temp_results/iter{itr_global}.txt", "w")
                repeat = 1
                decode_mode = 'sample'
                recall_bleu, prec_bleu = evaluate(model, metrics, valid_loader, vocab_desc, vocab_api, repeat, decode_mode, f_eval)

                if args.visual:
                    tb_writer.add_scalar('recall_bleu', recall_bleu, itr_global)
                    tb_writer.add_scalar('prec_bleu', prec_bleu, itr_global)
                

        # end of epoch ----------------------------
        model.adjust_lr()
Exemplo n.º 5
0
def train(args):
    timestamp = datetime.now().strftime('%Y%m%d%H%M')
    # LOG #
    logger = logging.getLogger(__name__)
    logging.basicConfig(
        level=logging.DEBUG, format="%(message)s"
    )  #,format="%(asctime)s: %(name)s: %(levelname)s: %(message)s")
    tb_writer = None
    if args.visual:
        # make output directory if it doesn't already exist
        os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/models',
                    exist_ok=True)
        os.makedirs(
            f'./output/{args.model}/{args.expname}/{timestamp}/temp_results',
            exist_ok=True)
        fh = logging.FileHandler(
            f"./output/{args.model}/{args.expname}/{timestamp}/logs.txt")
        # create file handler which logs even debug messages
        logger.addHandler(fh)  # add the handlers to the logger
        tb_writer = SummaryWriter(
            f"./output/{args.model}/{args.expname}/{timestamp}/logs/")
        # save arguments
        json.dump(
            vars(args),
            open(f'./output/{args.model}/{args.expname}/{timestamp}/args.json',
                 'w'))

    # Device #
    if args.gpu_id < 0:
        device = torch.device("cuda")
    else:
        device = torch.device(f"cuda:{args.gpu_id}" if torch.cuda.is_available(
        ) and args.gpu_id > -1 else "cpu")
    print(device)
    n_gpu = torch.cuda.device_count() if args.gpu_id < 0 else 1
    print(f"num of gpus:{n_gpu}")
    # Set the random seed manually for reproducibility.
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    def save_model(model, epoch, timestamp):
        """Save model parameters to checkpoint"""
        os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/models',
                    exist_ok=True)
        ckpt_path = f'./output/{args.model}/{args.expname}/{timestamp}/models/model_epo{epoch}.pkl'
        print(f'Saving model parameters to {ckpt_path}')
        torch.save(model.state_dict(), ckpt_path)

    def load_model(model, epoch, timestamp):
        """Load parameters from checkpoint"""
        ckpt_path = f'./output/{args.model}/{args.expname}/{timestamp}/models/model_epo{epoch}.pkl'
        print(f'Loading model parameters from {ckpt_path}')
        model.load_state_dict(torch.load(checkpoint))

    config = getattr(configs, 'config_' + args.model)()

    ###############################################################################
    # Load data
    ###############################################################################
    train_set = APIDataset(args.data_path + 'train.desc.h5',
                           args.data_path + 'train.apiseq.h5',
                           config['max_sent_len'])
    valid_set = APIDataset(args.data_path + 'test.desc.h5',
                           args.data_path + 'test.apiseq.h5',
                           config['max_sent_len'])
    print("Loaded data!")

    ###############################################################################
    # Define the models
    ###############################################################################
    model = getattr(models, args.model)(config)
    if args.reload_from >= 0:
        load_model(model, args.reload_from)
    model = model.to(device)

    ###############################################################################
    # Training
    ###############################################################################
    logger.info("Training...")
    itr_global = 1
    start_epoch = 1 if args.reload_from == -1 else args.reload_from + 1
    for epoch in range(start_epoch, config['epochs'] + 1):

        epoch_start_time = time.time()
        itr_start_time = time.time()

        # shuffle (re-define) data between epochs
        train_loader = torch.utils.data.DataLoader(
            dataset=train_set,
            batch_size=config['batch_size'],
            shuffle=True,
            num_workers=1)
        train_data_iter = iter(train_loader)
        n_iters = train_data_iter.__len__()

        itr = 1
        while True:  # loop through all batches in training data
            model.train()
            try:
                descs, apiseqs, desc_lens, api_lens = train_data_iter.next()
            except StopIteration:  # end of epoch
                break
            batch = [
                tensor.to(device)
                for tensor in [descs, desc_lens, apiseqs, api_lens]
            ]
            loss_AE = model.train_AE(*batch)

            if itr % args.log_every == 0:
                elapsed = time.time() - itr_start_time
                log = '%s-%s|@gpu%d epo:[%d/%d] iter:[%d/%d] step_time:%ds elapsed:%s \n                      '\
                %(args.model, args.expname, args.gpu_id, epoch, config['epochs'],
                         itr, n_iters, elapsed, timeSince(epoch_start_time,itr/n_iters))
                for loss_name, loss_value in loss_AE.items():
                    log = log + loss_name + ':%.4f ' % (loss_value)
                    if args.visual:
                        tb_writer.add_scalar(loss_name, loss_value, itr_global)
                logger.info(log)

                itr_start_time = time.time()

            if itr % args.valid_every == 0:
                valid_loader = torch.utils.data.DataLoader(
                    dataset=valid_set,
                    batch_size=config['batch_size'],
                    shuffle=True,
                    num_workers=1)
                model.eval()
                loss_records = {}

                for descs, apiseqs, desc_lens, api_lens in valid_loader:
                    batch = [
                        tensor.to(device)
                        for tensor in [descs, desc_lens, apiseqs, api_lens]
                    ]
                    valid_loss = model.valid(*batch)
                    for loss_name, loss_value in valid_loss.items():
                        v = loss_records.get(loss_name, [])
                        v.append(loss_value)
                        loss_records[loss_name] = v

                log = 'Validation '
                for loss_name, loss_values in loss_records.items():
                    log = log + loss_name + ':%.4f  ' % (np.mean(loss_values))
                    if args.visual:
                        tb_writer.add_scalar(loss_name, np.mean(loss_values),
                                             itr_global)
                logger.info(log)

            itr += 1
            itr_global += 1

            if itr_global % args.eval_every == 0:  # evaluate the model in the develop set
                model.eval()
                save_model(model, itr_global,
                           timestamp)  # save model after each epoch

                valid_loader = torch.utils.data.DataLoader(dataset=valid_set,
                                                           batch_size=1,
                                                           shuffle=False,
                                                           num_workers=1)
                vocab_api = load_dict(args.data_path + 'vocab.apiseq.json')
                vocab_desc = load_dict(args.data_path + 'vocab.desc.json')
                metrics = Metrics()

                os.makedirs(
                    f'./output/{args.model}/{args.expname}/{timestamp}/temp_results',
                    exist_ok=True)
                f_eval = open(
                    f"./output/{args.model}/{args.expname}/{timestamp}/temp_results/iter{itr_global}.txt",
                    "w")
                repeat = 1
                decode_mode = 'sample'
                recall_bleu, prec_bleu = evaluate(model, metrics, valid_loader,
                                                  vocab_desc, vocab_api,
                                                  repeat, decode_mode, f_eval)

                if args.visual:
                    tb_writer.add_scalar('recall_bleu', recall_bleu,
                                         itr_global)
                    tb_writer.add_scalar('prec_bleu', prec_bleu, itr_global)

        # end of epoch ----------------------------
        model.adjust_lr()