示例#1
0
def evaluate(args, model, tokenizer, processor, label_list, device, mode="test"):
    num_labels = len(label_list) + 1
    eval_data = load_examples(args, tokenizer, processor, label_list, mode)
    # Run prediction for full data
    eval_sampler = SequentialSampler(eval_data)
    eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size)

    model.eval()
    y_true = []
    y_pred = []
    label_map = {i: label for i, label in enumerate(label_list, 1)}
    label_map[0] = 'unknown'
    nb_tr_examples, nb_tr_steps = 0, 0

    for batch in tqdm(eval_dataloader, desc="Evaluating"):
        batch = tuple(t.to(device) for t in batch)
        input_ids, input_mask, segment_ids, label_ids, valid_ids, label_mask, b_use_valid_filter,\
            adj_matrix, dep_matrix = batch

        with torch.no_grad():
            logits = model(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask,
                         valid_ids=valid_ids, adjacency_matrix=adj_matrix)

        nb_tr_examples += input_ids.size(0)
        nb_tr_steps += 1

        logits = torch.argmax(F.log_softmax(logits, dim=2), dim=2)
        logits = logits.detach().cpu().numpy()
        label_ids = label_ids.detach().cpu().numpy()

        for i, label in enumerate(label_ids):
            temp_1 = []
            temp_2 = []
            for j, m in enumerate(label):
                if j == 0:
                    continue
                elif label_ids[i][j] == num_labels - 1:
                    y_true.append(temp_1)
                    y_pred.append(temp_2)
                    break
                else:
                    temp_1.append(label_map[label_ids[i][j]])
                    temp_2.append(label_map[logits[i][j]])

    logger.info("nb_tr_examples: {}, nb_tr_steps: {}".format(nb_tr_examples, nb_tr_steps))

    result = evaluate_ote(y_true, y_pred)
    logging.info(result)

    return {
        "precision": result[0],
        "recall": result[1],
        "f1": result[2]
    }
示例#2
0
def train(data_dir='data/memes/',
          dim_proj=512,
          maxlen=30,
          batch_size=256,
          keep_ratio=1.,
          shuffle_data=True,
          learning_rate=0.001,
          global_steps=50000,
          disp_freq=100,
          save_freq=1000,
          test_freq=1000,
          saveto_file='params.npz',
          weight_decay=0.0005,
          reload_model=False,
          train=True):
    """
    Topo-LSTM model training.
    """
    options = locals().copy()
    saveto = data_dir + saveto_file

    # loads graph
    G, node_index = data_utils.load_graph(data_dir)
    print nx.info(G)
    options['n_words'] = len(node_index)

    print options

    # creates and initializes shared variables.
    print 'Initializing variables...'
    params = init_params(options)
    if reload_model:
        print 'reusing saved model.'
        load_params(saveto, params)
    tparams = init_tparams(params)

    # builds Topo-LSTM model
    print 'Building model...'
    model = tprnn_model.build_model(tparams, options)

    print 'Loading test data...'
    test_examples = data_utils.load_examples(data_dir,
                                             dataset='test',
                                             node_index=node_index,
                                             maxlen=maxlen,
                                             G=G)
    test_loader = data_utils.Loader(test_examples, options=options)
    print 'Loaded %d test examples' % len(test_examples)

    if train:
        # prepares training data.
        print 'Loading train data...'
        train_examples = data_utils.load_examples(
            data_dir,
            dataset='train',
            keep_ratio=options['keep_ratio'],
            node_index=node_index,
            maxlen=maxlen,
            G=G)
        train_loader = data_utils.Loader(train_examples, options=options)
        print 'Loaded %d training examples.' % len(train_examples)

        # compiles updates.
        optimizer = downhill.build(algo='adam',
                                   loss=model['cost'],
                                   params=tparams.values(),
                                   inputs=model['data'])

        updates = optimizer.get_updates(max_gradient_elem=5.,
                                        learning_rate=learning_rate)

        f_update = theano.function(model['data'],
                                   model['cost'],
                                   updates=list(updates))

        # training loop.
        start_time = timeit.default_timer()

        # downhill.minimize(
        #     loss=cost,
        #     algo='adam',
        #     train=train_loader,
        #     # inputs=input_list + [labels],
        #     # params=tparams.values(),
        #     # patience=0,
        #     max_gradient_clip=1,
        #     # max_gradient_norm=1,
        #     learning_rate=learning_rate,
        #     monitors=[('cost', cost)],
        #     monitor_gradients=False)

        n_examples = len(train_examples)
        batches_per_epoch = n_examples // options['batch_size'] + 1
        n_epochs = global_steps // batches_per_epoch + 1

        global_step = 0
        cost_history = []
        for _ in range(n_epochs):
            for _ in range(batches_per_epoch):
                cost = f_update(*train_loader())
                cost_history += [cost]

                if global_step % disp_freq == 0:
                    print 'global step %d, cost: %f' % (global_step, cost)

                # dump model parameters.
                if global_step % save_freq == 0:
                    params = unzip(tparams)
                    np.savez(saveto, **params)
                    pickle.dump(options, open('%s.pkl' % saveto, 'wb'), -1)

                # evaluate on test data.
                if global_step % test_freq == 0:
                    scores = evaluate(model['f_prob'], test_loader)
                    print 'eval scores: ', scores
                    end_time = timeit.default_timer()
                    print 'time used: %d seconds.' % (end_time - start_time)

                global_step += 1

    scores = evaluate(model['f_prob'], test_loader)
    pprint.pprint(scores)
示例#3
0
def train_pet(args) -> List:
    # Load configs
    model_config, train_config, eval_config = load_pet_configs(args)

    # Load dataset
    train_data = load_examples(args.task_name, args.data_dir, TRAIN_SET,
                               num_examples=args.train_examples, split_examples_evenly=args.split_examples_evenly)
    eval_data = load_examples(args.task_name, args.data_dir, TEST_SET if args.eval_set == 'test' else DEV_SET,
                              num_examples=args.eval_examples, split_examples_evenly=args.split_examples_evenly)
    dev_data = load_examples(args.task_name, args.data_dir, DEV32_SET,
                             num_examples=args.dev_examples, split_examples_evenly=args.split_examples_evenly)

    set_seed(args.seed)

    # Record all evaluation results on dev & eval set
    all_result = []
    dev_result_all = defaultdict(lambda: defaultdict(list))
    eval_result_all = defaultdict(lambda: defaultdict(list))
    # In 2 stage training, the 1st stage evaluations should also be recorded
    if args.do_train and args.do_eval and args.two_stage_train:
        dev_stage1_all = defaultdict(lambda: defaultdict(list))
        eval_stage1_all = defaultdict(lambda: defaultdict(list))

    # Iterates through all patterns
    for pattern_id in args.pattern_ids:
        # Repeat training
        for iteration in range(args.pet_repetitions):
            results_dict = {}
            model_config.pattern_id = pattern_id
            pattern_iter_output_dir = "{}/p{}-i{}".format(
                args.output_dir, pattern_id, iteration)

            if os.path.exists(pattern_iter_output_dir):
                logger.warning(
                    f"Path {pattern_iter_output_dir} already exists, skipping it...")
                continue
            os.makedirs(pattern_iter_output_dir)

            # Init wrapper model
            assert model_config.pattern_id is not None, 'A pattern_id must be set for initializing a new PET model'
            wrapper = TransformerModelWrapper(model_config)

            # Training
            logger.info('--- Start iteration %d ---' % iteration)
            if args.do_train:
                if not args.two_stage_train:
                    # Single stage training
                    logger.info('=== Start training ===')
                    results_dict.update(train_single_model(train_data, eval_data, dev_data, pattern_iter_output_dir,
                                                           wrapper, train_config, eval_config))
                    evaluate_single_model(pattern_id, pattern_iter_output_dir, eval_data,
                                          dev_data, eval_config, results_dict, dev_result_all, eval_result_all)
                    with open(os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh:
                        json.dump(results_dict, fh)
                else:
                    # Two stage training
                    # 1. Only train prompts and label tokens
                    logger.info('=== Start training stage 1 ===')
                    results_dict.update(train_single_model(train_data, eval_data, dev_data, pattern_iter_output_dir,
                                                           wrapper, train_config, eval_config, stage=1))
                    evaluate_single_model(pattern_id, pattern_iter_output_dir, eval_data,
                                          dev_data, eval_config, results_dict, dev_stage1_all, eval_stage1_all)
                    with open(os.path.join(pattern_iter_output_dir, 'results_stage1.json'), 'w') as fh:
                        json.dump(results_dict, fh)

                    # 2. Train full model
                    logger.info('=== Start training stage 2 ===')
                    results_dict.update(train_single_model(train_data, eval_data, dev_data, pattern_iter_output_dir,
                                                           wrapper, train_config, eval_config, stage=2))
                    evaluate_single_model(pattern_id, pattern_iter_output_dir, eval_data,
                                          dev_data, eval_config, results_dict, dev_result_all, eval_result_all)
                    with open(os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh:
                        json.dump(results_dict, fh)

                # Save configs
                train_config.save(os.path.join(
                    pattern_iter_output_dir, 'train_config.json'))
                eval_config.save(os.path.join(
                    pattern_iter_output_dir, 'eval_config.json'))
                logger.info("Saving complete")

            # Do evaluation only
            elif args.do_eval:
                evaluate_single_model(pattern_id, pattern_iter_output_dir, eval_data,
                                      dev_data, eval_config, results_dict, dev_result_all, eval_result_all)
                # Write overall results
                with open(os.path.join(pattern_iter_output_dir, 'results.json'), 'w') as fh:
                    json.dump(results_dict, fh)

            # Clear cache
            wrapper.model = None
            wrapper = None
            torch.cuda.empty_cache()

    # Calculate average results of current pattern
    if args.do_eval:
        logger.info("=== OVERALL RESULTS ===")
        if args.do_train and args.do_eval and args.two_stage_train:
            # Store stage 1 results first
            all_result.extend(write_results(os.path.join(
                args.output_dir, 'result_stage1.txt'), dev_stage1_all, eval_stage1_all))
        all_result.extend(write_results(os.path.join(
            args.output_dir, 'result.txt'), dev_result_all, eval_result_all))

    return all_result
示例#4
0
def train(data_dir='data/memes/',
          dim_proj=256,
          dim_att=128,
          maxlen=30,
          batch_size=256,
          keep_ratio=1.,
          shuffle_data=True,
          learning_rate=0.001,
          global_steps=50000,
          disp_freq=100,
          save_freq=100,
          test_freq=100,
          saveto_file='params.npz',
          tmsaveto_file='timeparams.npz',
          weight_decay=0.0005,
          sigmasqr=1,
          tdim=1.,
          reload_model=False,
          train=True):
    """
    Topo-LSTM model training.
    tdim: scale time down by how many times
    """
    options = locals().copy()
    #savedstep = '0'
    saveto = data_dir + saveto_file
    tmsaveto = data_dir + tmsaveto_file

    # loads graph
    Gp, node_index = data_utils.load_graph(data_dir)
    #print nx.info(G)
    options['n_events'] = len(node_index)

    print options

    # creates and initializes shared variables.
    print 'Initializing variables...'
    params = init_params(options)
    if reload_model:
        print 'reusing saved model.'
        load_params(saveto, params)
    tparams = init_tparams(params)

    timeparams = init_timeparams(options)
    if reload_model:
        print 'reusing saved model.'
        load_params(tmsaveto, timeparams)
    timetparams = init_tparams(timeparams)

    # builds Topo-LSTM model
    print 'Building model...'
    model = tpgru_model.build_model(tparams, timetparams, options)

    print 'Loading test data...'
    test_examples = data_utils.load_examples(data_dir,
                                             dataset='test',
                                             node_index=node_index,
                                             maxlen=maxlen,
                                             Gp=Gp)
    test_loader = data_utils.Loader(test_examples, options=options)
    print 'Loaded %d test examples' % len(test_examples)

    if train:
        # prepares training data.
        print 'Loading train data...'
        train_examples = data_utils.load_examples(
            data_dir,
            dataset='train',
            keep_ratio=options['keep_ratio'],
            node_index=node_index,
            maxlen=maxlen,
            Gp=Gp)
        train_loader = data_utils.Loader(train_examples, options=options)
        print 'Loaded %d training examples.' % len(train_examples)

        # compiles updates.
        optimizer = downhill.build(algo='adam',
                                   loss=model['cost'],
                                   params=tparams.values(),
                                   inputs=model['data'])

        updates = optimizer.get_updates(max_gradient_elem=5.,
                                        learning_rate=learning_rate)

        f_update = theano.function(model['data'],
                                   model['cost'],
                                   updates=list(updates))

        toptimizer = downhill.build(algo='adam',
                                    loss=model['timecost'],
                                    params=timetparams.values(),
                                    inputs=model['timedata'])

        tupdates = toptimizer.get_updates(max_gradient_elem=5.,
                                          learning_rate=0.005)

        f_t_update = theano.function(model['timedata'],
                                     model['timecost'],
                                     updates=list(tupdates))

        # training loop.
        start_time = timeit.default_timer()

        n_examples = len(train_examples)
        batches_per_epoch = n_examples // options['batch_size'] + 1
        n_epochs = global_steps // batches_per_epoch + 1

        global_step = 0
        #cost_history = []
        for _ in range(n_epochs):
            for _ in range(batches_per_epoch):
                batch_data = train_loader()
                cost = f_update(*(batch_data[:-3] + (batch_data[-2], )))
                #cost_history += [cost]
                timecost = f_t_update(*(batch_data[:-2] + (batch_data[-1], )))

                if global_step % disp_freq == 0:
                    print 'global step %d, cost: %f' % (global_step, cost)
                    print 'timecost: %f' % (timecost)

                # dump model parameters.
                if global_step % save_freq == 0:
                    params = unzip(tparams)
                    np.savez(data_dir + saveto_file, **params)
                    pickle.dump(
                        options, open('%s.pkl' % (data_dir + saveto_file),
                                      'wb'), -1)
                    timeparams = unzip(timetparams)
                    np.savez(data_dir + tmsaveto_file, **timeparams)

                # evaluate on test data.
                if global_step % test_freq == 0:
                    scores = evaluate(model['f_prob'], test_loader,
                                      model['f_tprob'], options['tdim'])
                    print 'eval scores: ', scores
                    end_time = timeit.default_timer()
                    print 'time used: %d seconds.' % (end_time - start_time)

                global_step += 1

    scores = evaluate(model['f_prob'], test_loader, model['f_tprob'],
                      options['tdim'])
    pprint.pprint(scores)
示例#5
0
def main_train(args, model, tokenizer, processor, label_list, device, n_gpu):
    train_examples = processor.get_train_examples(args.data_dir)
    num_train_optimization_steps = int(
        len(train_examples) / args.train_batch_size / args.gradient_accumulation_steps) * args.num_train_epochs
    if args.local_rank != -1:
        num_train_optimization_steps = num_train_optimization_steps // torch.distributed.get_world_size()

    # Prepare optimizer
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01},
        {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    if args.fp16:
        print("using fp16")
        try:
            from apex.optimizers import FusedAdam
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")

        optimizer = FusedAdam(optimizer_grouped_parameters,
                              lr=args.learning_rate,
                              bias_correction=False)

        if args.loss_scale == 0:

            model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False,
                                              loss_scale="dynamic")
        else:
            model, optimizer = amp.initialize(model, optimizer, opt_level="O2", keep_batchnorm_fp32=False,
                                              loss_scale=args.loss_scale)
        scheduler = LinearWarmUpScheduler(optimizer, warmup=args.warmup_proportion,
                                          total_steps=num_train_optimization_steps)
    else:
        print("using fp32")
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=args.learning_rate,
                             warmup=args.warmup_proportion,
                             t_total=num_train_optimization_steps)

    if args.local_rank != -1:
        try:
            from apex.parallel import DistributedDataParallel as DDP
        except ImportError:
            raise ImportError(
                "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")

        model = DDP(model)
    elif n_gpu > 1:
        model = torch.nn.DataParallel(model)


    global_step = 0
    nb_tr_steps = 0
    tr_loss = 0
    average_loss = 0

    print("data prep")
    train_data = load_examples(args, tokenizer, processor, label_list, "train")

    if args.local_rank == -1:
        train_sampler = RandomSampler(train_data)
    else:
        train_sampler = DistributedSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=args.train_batch_size)

    model.train()
    nb_tr_examples = 0
    for epoch_num in trange(int(args.num_train_epochs), desc="Epoch"):
        if args.max_steps > 0 and global_step > args.max_steps:
            break
        for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
            if args.max_steps > 0 and global_step > args.max_steps:
                break
            batch = tuple(t.to(device) for t in batch)
            input_ids, input_mask, segment_ids, label_ids, valid_ids, label_mask, b_use_valid_filter, \
            adj_matrix, dep_matrix = batch

            loss = model(input_ids=input_ids, token_type_ids=segment_ids, attention_mask=input_mask, labels=label_ids,
                         valid_ids=valid_ids, adjacency_matrix=adj_matrix)
            if n_gpu > 1:
                loss = loss.mean()  # mean() to average on multi-gpu.
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            if args.fp16:
                with amp.scale_loss(loss, optimizer) as scaled_loss:
                    scaled_loss.backward()
            else:
                loss.backward()

            tr_loss += loss.item()
            average_loss += loss
            nb_tr_examples += input_ids.size(0)
            nb_tr_steps += 1
            if (step + 1) % args.gradient_accumulation_steps == 0:
                if args.fp16:
                    torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), 1.0)
                else:
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

                if args.fp16:
                    # modify learning rate with special warm up for BERT which FusedAdam doesn't do
                    scheduler.step()

                optimizer.step()
                optimizer.zero_grad()
                global_step += 1

                logging.info("Global Steps:{} Final Loss = {}".format(global_step, average_loss))
                average_loss = 0

        if args.local_rank == -1 or torch.distributed.get_rank() == 0 or args.world_size <= 1:
            # Save model checkpoint
            output_dir = os.path.join(args.output_dir, "epoch-{}".format(epoch_num))
            if not os.path.exists(output_dir):
                os.makedirs(output_dir)
            save_zen_model(output_dir, model, args)

    loss = tr_loss / nb_tr_steps if args.do_train else None
    return loss, global_step