Пример #1
0
def main():
    # 设定种子
    torch.manual_seed(args.seed)
    if use_cuda:
        torch.cuda.manual_seed(args.seed)

    # checkpoint
    if args.restore:  # 存储已有模型的路径
        print('loading checkpoint...\n')
        checkpoints = torch.load(os.path.join(log_path, args.restore))

    contentfile = os.path.join(config.data, "segged_content.txt")
    # word2id, id2word, word2count = load_vocab(args.vocab_file, args.vocab_size)
    vocab = Vocab(config.vocab, contentfile, config.vocab_size)

    # Load data
    start_time = time.time()
    use_gnn = False
    if args.graph_model == 'GNN':
        use_gnn = True
    dataloader = DataLoader(config, config.data, config.batch_size, vocab,
                            args.adj, use_gnn, args.model, args.notrain,
                            args.debug)
    print("DATA loaded!")

    torch.backends.cudnn.benchmark = True

    # data
    print('loading data...\n')
    print('loading time cost: %.3f' % (time.time() - start_time))

    # model
    print('building model...\n')
    # configure the model
    # Model and optimizer
    # 增加模型graph2gru, graoh2gru_noAtten
    if args.model == 'graph2seq':
        model = graph2seq(config, vocab, use_cuda, args.use_copy,
                          args.use_bert, args.word_level_model,
                          args.graph_model)
    elif args.model == 'graph2gru':
        model = graph2gru.graph2gru(config, vocab, use_cuda, args.use_copy,
                                    args.use_bert, args.word_level_model,
                                    args.graph_model)
    elif args.model == 'graph2gru_noAtten':
        model = graph2gru_noAtten.graph2gru_noAtten(config, vocab, use_cuda,
                                                    args.use_copy,
                                                    args.use_bert,
                                                    args.word_level_model,
                                                    args.graph_model)
    elif args.model == 'seq2seq':
        model = seq2seq(config, vocab, use_cuda, use_content=args.use_content)
    elif args.model == 'bow2seq':
        model = bow2seq(config, vocab, use_cuda)
    elif args.model == 'h_attention':
        model = hierarchical_attention(config, vocab, use_cuda)

    if args.restore:
        model.load_state_dict(checkpoints['model'])
    if use_cuda:
        model.cuda()
        # lm_model.cuda()
    if len(args.gpus) > 1:  # 并行
        model = nn.DataParallel(model, device_ids=args.gpus, dim=1)
    logging(repr(model) + "\n\n")  # 记录这个文件的框架

    # total number of parameters
    param_count = 0
    for param in model.parameters():
        param_count += param.view(-1).size()[0]

    logging('total number of parameters: %d\n\n' % param_count)

    # updates是已经进行了几个epoch, 防止中间出现程序中断的情况.
    if args.restore:
        updates = checkpoints['updates']
        ori_updates = updates
    else:
        updates = 0

    # optimizer
    if args.restore:
        optim = checkpoints['optim']
    else:
        optim = Optim(config.optim,
                      config.learning_rate,
                      config.max_grad_norm,
                      lr_decay=config.learning_rate_decay,
                      start_decay_at=config.start_decay_at)

    # if opt.pretrain:
    # pretrain_lm(lm_model, vocab)
    optim.set_parameters(model.parameters())
    if config.schedule:
        scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)
    else:
        scheduler = None
    print("nana...")
    if not args.notrain:
        max_bleu = train(model, vocab, dataloader, scheduler, optim, updates)
        logging("Best bleu score: %.2f\n" % (max_bleu))
    else:
        assert args.restore is not None
        eval(model, vocab, dataloader, 0, updates, do_test=False)
    print("nana```")
Пример #2
0
    model.cuda()
if len(opt.gpus) > 1:
    model = nn.DataParallel(model, device_ids=opt.gpus, dim=1)

# optimizer
if 0 and opt.restore:
    optim = checkpoints['optim']
else:
    optim = Optim(config.optim,
                  config.learning_rate,
                  config.max_grad_norm,
                  lr_decay=config.learning_rate_decay,
                  start_decay_at=config.start_decay_at)

if config.use_center_loss:
    optim.set_parameters(
        list(model.parameters()) + list(center_loss.parameters()))
else:
    optim.set_parameters(list(model.parameters()))

if config.schedule:
    # scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)
    scheduler = L.StepLR(optim.optimizer, step_size=15, gamma=0.2)

# total number of parameters
param_count = 0
for param in model.parameters():
    param_count += param.view(-1).size()[0]

# logging modeule
if not os.path.exists(config.log):
    os.mkdir(config.log)
Пример #3
0
    model.load_state_dict(checkpoints['model'])
if use_cuda:
    model.cuda()
if len(opt.gpus) > 1:
    model = nn.DataParallel(model, device_ids=opt.gpus, dim=1)

# optimizer
if opt.restore:
    optim = checkpoints['optim']
else:
    optim = Optim(config.optim,
                  config.learning_rate,
                  config.max_grad_norm,
                  lr_decay=config.learning_rate_decay,
                  start_decay_at=config.start_decay_at)
optim.set_parameters(model.parameters())
if config.schedule:
    scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)

# total number of parameters
param_count = 0
for param in model.parameters():
    param_count += param.view(-1).size()[0]

if not os.path.exists(config.log):
    os.mkdir(config.log)
if opt.log == '':
    log_path = config.log + utils.format_time(time.localtime()) + '/'
else:
    log_path = config.log + opt.log + '/'
if not os.path.exists(log_path):
if opt.restore:
    optimG = checkpoints['optimG']
    optimD = checkpoints['optimD']
else:
    optimG = Optim(config.optim,
                   config.learning_rate,
                   config.max_grad_norm,
                   lr_decay=config.learning_rate_decay,
                   start_decay_at=config.start_decay_at)
    optimD = Optim(config.optim,
                   config.learning_rate,
                   config.max_grad_norm,
                   lr_decay=config.learning_rate_decay,
                   start_decay_at=config.start_decay_at)

optimG.set_parameters(netG.parameters())
optimD.set_parameters(netD.parameters())

if config.schedule:
    schedulerG = L.CosineAnnealingLR(optimG.optimizer, T_max=config.epoch)
    schedulerD = L.CosineAnnealingLR(optimD.optimizer, T_max=config.epoch)

# total number of parameters

if not os.path.exists(config.log):
    os.makedirs(config.log)

if config.log.endswith('/'):
    log_path = config.log
else:
    log_path = config.log + '/'
def main(vocab, dataloader):
    # 设定种子
    torch.manual_seed(args.seed)
    if use_cuda:
        torch.cuda.manual_seed(args.seed)

    # checkpoint
    if args.restore:  # 存储已有模型的路径
        print('loading checkpoint...\n')
        checkpoints = torch.load(os.path.join(log_path, args.restore))

    torch.backends.cudnn.benchmark = True

    # model
    print('building model...\n')
    # configure the model
    # Model and optimizer
    model = GLSTM(config, vocab)
    # model = hierarchical_attention(config, vocab)
    # model = SLSTM(config, vocab)
    # model = Transformer(config, vocab)
    if args.restore:
        model.load_state_dict(checkpoints['model'])
    if use_cuda:
        model.cuda()
    if len(args.gpus) > 1:  # 并行
        model = nn.DataParallel(model, device_ids=args.gpus, dim=1)
    logging(repr(model) + "\n\n")  # 记录这个文件的框架

    # total number of parameters
    param_count = 0
    for param in model.parameters():
        param_count += param.view(-1).size()[0]

    logging('total number of parameters: %d\n\n' % param_count)

    # updates是已经进行了几个epoch, 防止中间出现程序中断的情况.
    if args.restore:
        updates = checkpoints['updates']
        ori_updates = updates
    else:
        updates = 0

    # optimizer
    if args.restore:
        optim = checkpoints['optim']
    else:
        optim = Optim(config.optim,
                      config.learning_rate,
                      config.max_grad_norm,
                      lr_decay=config.learning_rate_decay,
                      start_decay_at=config.start_decay_at)

    optim.set_parameters(model.parameters())
    if config.schedule:
        scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)
    else:
        scheduler = None

    if not args.notrain:
        max_acc, test_acc = train(model, dataloader, scheduler, optim, updates)
        logging("Best accuracy: %.2f, test accuracy: %.2f\n" %
                (max_acc * 100, test_acc * 100))
        return test_acc
    else:
        assert args.restore is not None
        eval(model, vocab, dataloader, 0, updates, do_test=True)
Пример #6
0
def main():
    # 设定种子
    torch.manual_seed(args.seed)
    if use_cuda:
        torch.cuda.manual_seed(args.seed)

    # checkpoint
    if args.restore:  # 存储已有模型的路径
        print('loading checkpoint...\n')
        checkpoints = torch.load(os.path.join(log_path, args.restore))

    # word2id, id2word, word2count = load_vocab(args.vocab_file, args.vocab_size)
    vocab = Vocab(config.vocab, config.data, config.vocab_size)

    # Load data
    start_time = time.time()
    dataloader = DataLoader(config, args.task, config.has_dev,
                            config.batch_size, vocab, args.model,
                            args.use_depparse, args.notrain, args.debug)
    print("DATA loaded!")

    torch.backends.cudnn.benchmark = True

    # data
    print('loading data...\n')
    print('loading time cost: %.3f' % (time.time() - start_time))

    # model
    print('building model...\n')
    # configure the model
    # Model and optimizer
    if args.model == 'h_attention':
        model = hierarchical_attention(config, vocab, use_cuda)
    elif args.model == 'slstm':
        model = SLSTM(config, vocab, use_cuda)
    elif args.model == 'glstm':
        model = GLSTM(config, vocab, use_cuda)
    elif args.model == 'hglstm':
        model = HGLSTM(config, vocab, use_cuda)
    if args.restore:
        model.load_state_dict(checkpoints['model'])
    if use_cuda:
        model.cuda()
        # lm_model.cuda()
    if len(args.gpus) > 1:  # 并行
        model = nn.DataParallel(model, device_ids=args.gpus, dim=1)
    logging(repr(model) + "\n\n")  # 记录这个文件的框架

    # total number of parameters
    param_count = 0
    for param in model.parameters():
        param_count += param.view(-1).size()[0]

    logging('total number of parameters: %d\n\n' % param_count)

    # updates是已经进行了几个epoch, 防止中间出现程序中断的情况.
    if args.restore:
        updates = checkpoints['updates']
        ori_updates = updates
    else:
        updates = 0

    # optimizer
    if args.restore:
        optim = checkpoints['optim']
    else:
        optim = Optim(config.optim,
                      config.learning_rate,
                      config.max_grad_norm,
                      lr_decay=config.learning_rate_decay,
                      start_decay_at=config.start_decay_at)

    # if opt.pretrain:
    # pretrain_lm(lm_model, vocab)
    optim.set_parameters(model.parameters())
    if config.schedule:
        scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)
    else:
        scheduler = None

    if not args.notrain:
        max_acc, test_acc = train(model, vocab, dataloader, scheduler, optim,
                                  updates)
        logging("Best accuracy: %.2f, test accuracy: %.2f\n" %
                (max_acc * 100, test_acc * 100))
    else:
        assert args.restore is not None
        eval(model, vocab, dataloader, 0, updates, do_test=True)
Пример #7
0
if config.restore:
    cnn_model.load_state_dict(check_point['model'])
if use_cuda:
    cnn_model = cnn_model.cuda()
if len(config.gpus) > 1:
    model = nn.DataParallel(model, device_ids=config.gpus, dim=0)
# optimizer
if config.restore:
    optim = check_point['optim']
    updates = check_point['updates']
else:
    optim = Optim(config.optim, config.learning_rate, config.max_grad_norm,
                  lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at)
    updates = 0
optim.set_parameters(cnn_model.parameters())
if config.schedule:
    scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)

# total number of parameters
param_count = 0
for param in cnn_model.parameters():
    param_count += param.view(-1).size()[0]
print('model all parameters is %d'% param_count)


total_loss, start_time = 0, time.time()
report_total, report_correct = 0, 0
scores = [[] for metric in config.metric]
scores = collections.OrderedDict(zip(config.metric, scores))
# train model
Пример #8
0
if use_cuda:
    model.cuda()
if len(opt.gpus) > 1:
    model = nn.DataParallel(model, device_ids=opt.gpus, dim=1)

# optimizer
if 0 and opt.restore:
    optim = checkpoints['optim']
else:
    optim = Optim(config.optim,
                  config.learning_rate,
                  config.max_grad_norm,
                  lr_decay=config.learning_rate_decay,
                  start_decay_at=config.start_decay_at)

optim.set_parameters(list(model.parameters()))

if config.schedule:
    # scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)
    scheduler = L.StepLR(optim.optimizer, step_size=2, gamma=0.8)

# total number of parameters
param_count = 0
for param in model.parameters():
    param_count += param.view(-1).size()[0]

# logging modeule
if not os.path.exists(config.log):
    os.mkdir(config.log)
if opt.log == '':
    log_path = config.log + utils.format_time(time.localtime()) + '/'
Пример #9
0
if use_cuda:
    model.cuda()
if len(opt.gpus) > 1:
    model = nn.DataParallel(model, device_ids=opt.gpus, dim=1)

# optimizer
if 0 and opt.restore:
    optim = checkpoints['optim']
else:
    optim = Optim(config.optim,
                  config.learning_rate,
                  config.max_grad_norm,
                  lr_decay=config.learning_rate_decay,
                  start_decay_at=config.start_decay_at)

optim.set_parameters(model.parameters())
if config.is_dis:
    if 0 and opt.restore:
        optim_dis = checkpoints['optim_dis']
    else:
        optim_dis = Optim(config.optim,
                          config.learning_rate,
                          config.max_grad_norm,
                          lr_decay=config.learning_rate_decay,
                          start_decay_at=config.start_decay_at)
        optim_dis.set_parameters(model_dis.parameters())
    scheduler_dis = L.CosineAnnealingLR(optim_dis.optimizer,
                                        T_max=config.epoch)

if config.schedule:
    scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)
    # model.decoder.load_state_dict({dd.replace('decoder.',''): checkpoints['model'][dd] for dd in checkpoints['model'] if dd[:7]=='decoder'})
    # model.ss_model.load_state_dict({dd.replace('ss_model.',''): checkpoints['model'][dd] for dd in checkpoints['model'] if dd[:8]=='ss_model'})

if use_cuda:
    model.cuda()
if len(opt.gpus) > 1:
    model = nn.DataParallel(model, device_ids=opt.gpus, dim=1)

# optimizer
if 0 and opt.restore:
    optim = checkpoints['optim']
else:
    optim = Optim(config.optim, config.learning_rate, config.max_grad_norm,
                  lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at)

optim.set_parameters(list(model.module.second_ss_model.parameters()))
# optim.set_parameters(list(model.parameters()))

if config.schedule:
    # scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)
    scheduler = L.StepLR(optim.optimizer, step_size=15, gamma=0.2)

# total number of parameters
param_count = 0
for param in model.parameters():
    param_count += param.view(-1).size()[0]

# logging modeule
if not os.path.exists(config.log):
    os.mkdir(config.log)
if opt.log == '':
Пример #11
0
    # for i, p in enumerate(model.parameters()):
    #     print(i)
    #     print(p.shape)

    if use_cuda:
        model.cuda()
    if len(opt.gpus) > 1:
        model = nn.DataParallel(model, device_ids=opt.gpus, dim=1)

    # optimizer
    if opt.restore:
        optim = checkpoints['optim']
    else:
        optim = Optim(config.optim, config.learning_rate, config.max_grad_norm,
                      lr_decay=config.learning_rate_decay, start_decay_at=config.start_decay_at)
    optim.set_parameters(filter(lambda p: p.requires_grad,model.parameters()))
    if config.schedule:
        scheduler = L.CosineAnnealingLR(optim.optimizer, T_max=config.epoch)

    # total number of parameters
    param_count = 0
    for param in model.parameters():
        param_count += param.view(-1).size()[0]



    logging_csv = utils.logging_csv(log_path+'record.csv')
    for k, v in config.items():
        logging("%s:\t%s\n" % (str(k), str(v)))
    logging("\n")
    logging(repr(model)+"\n\n")
Пример #12
0
def main():
    # 设定种子
    torch.manual_seed(args.seed)
    if use_cuda:
        torch.cuda.manual_seed(args.seed)

    vocab = Vocab(config.vocab_file,
                  config.emb_size,
                  use_pre_emb=False,
                  vocab_size=config.vocab_size)
    print('vocab clear')

    torch.backends.cudnn.benchmark = True

    # model
    print('building model...\n')
    # configure the model
    # Model and optimizer

    if args.model == 'seq2seq':
        model_generate = seq2seq(config, vocab, use_cuda, pretrain=None)
    elif args.model == 'transformer':
        model_generate = Transformer(config, vocab, use_cuda, pretrain=None)
    elif args.model == 'transformer_gcn':
        model_generate = Transformer_gcn(config,
                                         vocab,
                                         use_cuda,
                                         pretrain=None)

    if args.restore:
        print('loading checkpoint...\n')
        checkpoints = torch.load(os.path.join(log_path, args.restore))
        model_generate.load_state_dict(checkpoints['model_generate'])
        if args.train_type == 'sample_rl':
            emb = model_generate.embedding
            model_sample = graph2seq_rl(config,
                                        vocab,
                                        use_cuda,
                                        emb,
                                        pretrain=None)
        elif args.train_type == 'generate':
            model_sample = graph2seq_rl(config,
                                        vocab,
                                        use_cuda,
                                        0,
                                        pretrain=None)
            model_sample.load_state_dict(checkpoints['model_sample'])
        else:
            print('err')
    else:
        #model_sample = graph2seq_rl(config, vocab, use_cuda, model_generate.embedding, pretrain=None)
        model_sample = graph2seq_rl(config, vocab, use_cuda, 0, pretrain=None)
    '''if args.restore:
        print('loading checkpoint...\n')
        checkpoints = torch.load(os.path.join(log_path, args.restore))

        model_sample.load_state_dict(checkpoints['model_sample'])
        model_generate.load_state_dict(checkpoints['model_generate'])'''

    if use_cuda:
        model_sample.cuda()
        model_generate.cuda()

    # if len(args.gpus) > 1:  # 并行
    # model = nn.DataParallel(model, device_ids=args.gpus, dim=1)
    logging(repr(model_sample) + "\n\n")  # 记录这个文件的框架
    logging(repr(model_generate) + "\n\n")

    # total number of parameters
    sample_param_count = 0
    generate_param_count = 0

    for param in model_sample.parameters():
        sample_param_count += param.view(-1).size()[0]
    for param in model_generate.parameters():
        generate_param_count += param.view(-1).size()[0]

    logging('total number of sample parameters: %d\n\n' % sample_param_count)
    logging('total number of generate parameters: %d\n\n' %
            generate_param_count)

    print('# generator parameters:',
          sum(param.numel() for param in model_generate.parameters()))

    # updates是已经进行了几个epoch, 防止中间出现程序中断的情况.
    if args.restore:
        updates = checkpoints['updates']
        ori_updates = updates
    else:
        updates = 0

    # optimizer
    '''if args.restore:
        optim_sample = checkpoints['optim_sample']
        optim_generate = checkpoints['optim_generate']
    else:'''
    #optimizer = optim.Adam(self.params, lr=self.lr)
    optim_sample = Optim(config.optim,
                         config.learning_rate_sample,
                         config.max_grad_norm,
                         lr_decay=config.learning_rate_decay,
                         start_decay_at=config.start_decay_at)
    optim_generate = Optim(config.optim,
                           config.learning_rate,
                           config.max_grad_norm,
                           lr_decay=config.learning_rate_decay,
                           start_decay_at=config.start_decay_at)

    optim_sample.set_parameters(model_sample.parameters())
    optim_generate.set_parameters(model_generate.parameters())
    if config.schedule:
        scheduler_sample = L.SetLR(optim_sample.optimizer)
        scheduler_generate = L.SetLR(optim_generate.optimizer)

    else:
        scheduler_sample = None
        scheduler_generate = None

    if args.type == 'train':

        start_time = time.time()

        #dataloader_train = get_dataloader(vocab, split='train', train_type='qk')
        dataloader_train = get_dataloader(vocab, split='train', train_type='k')
        dataloader_dev = get_dataloader(vocab, split='dev', train_type='k')
        dataloader_dev_qk = get_dataloader(vocab, split='dev', train_type='qk')

        print('loading data...\n')
        print('loading time cost: %.3f' % (time.time() - start_time))

        max_bleu = train(model_sample, model_generate, vocab, dataloader_train,
                         dataloader_dev, dataloader_dev_qk, scheduler_sample,
                         scheduler_generate, optim_sample, optim_generate,
                         updates)
        logging("Best bleu score: %.2f\n" % (max_bleu))

    elif args.type == 'eval':
        # Load data
        start_time = time.time()

        dataloader_test = get_dataloader(vocab, split='test', train_type='k')
        dataloader_test_qk = get_dataloader(vocab,
                                            split='test',
                                            train_type='qk')

        print('loading data...\n')
        print('loading time cost: %.3f' % (time.time() - start_time))
        assert args.restore is not None
        eval(model_sample, model_generate, vocab, dataloader_test,
             dataloader_test_qk, 0, updates)
    else:
        print('error')