示例#1
0
def train(train_queue, model, optimizer, scheduler, global_step, criterion):
    objs = utils.AverageMeter()
    top1 = utils.AverageMeter()
    top5 = utils.AverageMeter()
    model.train()
    for step, (input, target) in enumerate(train_queue):
        input = utils.move_to_cuda(input)
        target = utils.move_to_cuda(target)
    
        #optimizer.zero_grad()
        model.zero_grad()
        logits = model(input)
        global_step += 1
        loss = criterion(logits, target)
        loss.backward()
        optimizer.step()
        scheduler.step()

        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        n = input.size(0)
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)
    
        if (step+1) % 100 == 0:
            lr = scheduler.get_lr()[0]
            logging.info('train %03d lr %e loss %e top1 %f top5 %f', step+1, lr, objs.avg, top1.avg, top5.avg)

    return top1.avg, objs.avg, global_step
示例#2
0
def controller_train(train_queue, model, optimizer):
    objs = utils.AvgrageMeter()
    mse = utils.AvgrageMeter()
    nll = utils.AvgrageMeter()
    model.train()
    for step, sample in enumerate(train_queue):
        encoder_input = utils.move_to_cuda(sample['encoder_input'])
        encoder_target = utils.move_to_cuda(sample['encoder_target'])
        decoder_input = utils.move_to_cuda(sample['decoder_input'])
        decoder_target = utils.move_to_cuda(sample['decoder_target'])

        optimizer.zero_grad()
        predict_value, log_prob, arch = model(encoder_input, decoder_input)
        loss_1 = F.mse_loss(predict_value.squeeze(), encoder_target.squeeze())
        loss_2 = F.nll_loss(log_prob.contiguous().view(-1, log_prob.size(-1)),
                            decoder_target.view(-1))
        loss = args.controller_trade_off * loss_1 + (
            1 - args.controller_trade_off) * loss_2
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),
                                       args.controller_grad_clip)
        optimizer.step()

        n = encoder_input.size(0)
        objs.update(loss.data, n)
        mse.update(loss_1.data, n)
        nll.update(loss_2.data, n)

    return objs.avg, mse.avg, nll.avg
示例#3
0
 def train_epoch(self, split):
     objs = utils.AvgrageMeter()
     mse = utils.AvgrageMeter()
     nll = utils.AvgrageMeter()
     queue = self.queues[split]
     self.train()
     for step, sample in enumerate(queue):
         encoder_input = utils.move_to_cuda(sample['encoder_input'])
         encoder_target = utils.move_to_cuda(sample['encoder_target'])
         decoder_input = utils.move_to_cuda(sample['decoder_input'])
         decoder_target = utils.move_to_cuda(sample['decoder_target'])
         
         self.optimizer.zero_grad()
         predict_value, log_prob, arch = self(encoder_input, decoder_input)
         loss_1 = F.mse_loss(predict_value.squeeze(), encoder_target.squeeze())
         loss_2 = F.nll_loss(log_prob.contiguous().view(-1, log_prob.size(-1)), decoder_target.view(-1))
         loss = self.trade_off * loss_1 + (1 - self.trade_off) * loss_2
         loss.backward()
         torch.nn.utils.clip_grad_norm_(self.parameters(), self.clip_grad_norm)
         self.optimizer.step()
         
         n = encoder_input.size(0)
         objs.update(loss.data, n)
         mse.update(loss_1.data, n)
         nll.update(loss_2.data, n)
     return objs.avg, mse.avg, nll.avg
示例#4
0
def child_train(train_queue, model, optimizer, global_step, arch_pool, arch_pool_prob, criterion, log_interval=100):
    objs = utils.AverageMeter()
    top1 = utils.AverageMeter()
    top5 = utils.AverageMeter()
    model.train()
    for step, (input, target) in enumerate(train_queue):
        input = utils.move_to_cuda(input)
        target = utils.move_to_cuda(target)

        optimizer.zero_grad()
        # sample an arch to train
        arch = utils.sample_arch(arch_pool, arch_pool_prob)
        logits = model(input, arch)
        loss = criterion(logits, target)
        loss.backward()
        optimizer.step()
        
        prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
        n = input.size(0)
        objs.update(loss.data, n)
        top1.update(prec1.data, n)
        top5.update(prec5.data, n)

        global_step += 1
        
        if global_step % log_interval == 0:
            logging.info('Train %03d loss %e top1 %f top5 %f', global_step, objs.avg, top1.avg, top5.avg)
            logging.info('Arch: %s', ' '.join(map(str, arch)))
        
        if global_step >= args.max_num_updates:
            break

    return top1.avg, objs.avg, global_step
示例#5
0
    def __init__(self, config):
        """
        Args:
            config: A config dictonary.

        """

        self._config = config
        utils.move_to_cuda(self._config)

        # TODO This should not depend on rl_algorithm_config in the future
        self._episode_length = self._config['steps_per_episodes']
        self._reward_scale = 1.0  #self._config['rl_algorithm_config']['algo_params']['reward_scale']

        self._env_class = select_environment(self._config['env']['env_name'])
        self._env = evoenvs.HalfCheetahEnv(config=self._config)

        self._replay = EvoReplayLocalGlobalStart(
            self._env,
            max_replay_buffer_size_species=int(1e6),
            max_replay_buffer_size_population=int(1e7))

        self._rl_alg_class = select_rl_alg(self._config['rl_method'])

        self._networks = self._rl_alg_class.create_networks(env=self._env,
                                                            config=config)

        self._rl_alg = self._rl_alg_class(config=self._config,
                                          env=self._env,
                                          replay=self._replay,
                                          networks=self._networks)

        self._do_alg_class = select_design_opt_alg(
            self._config['design_optim_method'])
        self._do_alg = self._do_alg_class(config=self._config,
                                          replay=self._replay,
                                          env=self._env)

        # if self._config['use_cpu_for_rollout']:
        #     utils.move_to_cpu()
        # else:
        #     utils.move_to_cuda(self._config)
        # # TODO this is a temp fix - should be cleaned up, not so hppy with it atm
        # self._policy_cpu = self._rl_alg_class.get_policy_network(SoftActorCritic.create_networks(env=self._env, config=config)['individual'])
        utils.move_to_cuda(self._config)

        self._last_single_iteration_time = 0
        self._design_counter = 0
        self._episode_counter = 0
        self._data_design_type = 'Initial'
示例#6
0
    def valid_step(self, sample):
        """Do forward pass in evaluation mode."""
        with torch.no_grad():
            self._model.eval()
            self.criterion.eval()
            if self.cuda:
                sample = utils.move_to_cuda(sample)

            results = self.criterion(self._model, sample)
            loss, nll_loss, ko_loss, sample_size, logging_output, to_print, \
            offset_print = results
            nkp_tokens = logging_output.get('nkp_tokens', 0)
            self.meters['valid_offset_loss'].update(
                logging_output.get('kp_offset_loss', 0) / nkp_tokens,
                nkp_tokens)

        # update meters for validation
        ntokens = logging_output.get('ntokens', 0)
        self.meters['valid_nll_loss'].update(
            logging_output.get('nll_loss', 0) / ntokens, ntokens)
        self.meters['valid_total_loss'].update(
            logging_output.get('total_loss', 0) / ntokens, ntokens)
        self.meters['valid_offset_loss'].update(
            logging_output.get('kp_offset_loss', 0) / nkp_tokens, nkp_tokens)

        return logging_output, to_print, offset_print
示例#7
0
def validation_acc(model, dev_iters, epoch, epochs, node_dict, edge_dict,
                   max_nodes, cuda):
    """ Evaluate the model on dev set"""
    model.eval()
    eval_st = time.time()
    graphs, graph_corrects = 0, 0

    for i, dev_it in enumerate(dev_iters):
        if cuda:
            samples = move_to_cuda(dev_it)
        else:
            samples = dev_it

        _, _, _, batch_graph_correct = greedy_search(
            model, samples["src_graph"], samples["src_text"],
            samples["tgt_graph"], node_dict, edge_dict, max_nodes, cuda)
        graph_corrects += batch_graph_correct
        graphs += 1

    acc = graph_corrects / graphs
    eval_time = (time.time() - eval_st) / 60
    eval_info = "[  Eval {:02}/{:02}]: accuracy={:.4f}  elapse={:.4f} mins"
    print(eval_info.format(epoch + 1, epochs, acc, eval_time))

    model.train()
    return acc
    def _prepare_sample(self, sample):
        if sample is None or len(sample) == 0:
            return None

        if self.cuda:
            sample = utils.move_to_cuda(sample)

        return sample
示例#9
0
 def infer(self, split, step, direction='+'):
     queue = self.queues[split]
     new_arch_list = []
     self.eval()
     for i, sample in enumerate(queue):
         encoder_input = utils.move_to_cuda(sample['encoder_input'])
         self.zero_grad()
         new_arch = self.generate_new_arch(encoder_input, step, direction=direction)
         new_arch_list.extend(new_arch.data.squeeze().tolist())
     return new_arch_list
示例#10
0
    def collect_training_experience(self):
        """ Collect training data.

        This function executes a single episode in the environment using the
        exploration strategy/mechanism and the policy.
        The data, i.e. state-action-reward-nextState, is stored in the replay
        buffer.

        """
        state = self._env.reset()
        nmbr_of_steps = 0
        done = False

        if self._episode_counter < self._config['initial_episodes']:
            policy_gpu_ind = self._rl_alg_class.get_policy_network(
                self._networks['population'])
        else:
            policy_gpu_ind = self._rl_alg_class.get_policy_network(
                self._networks['individual'])
        # self._policy_cpu = utils.copy_network(network_to=self._policy_cpu, network_from=policy_gpu_ind, config=self._config, force_cpu=self._config['use_cpu_for_rollout'])
        self._policy_cpu = policy_gpu_ind

        if self._config['use_cpu_for_rollout']:
            utils.move_to_cpu()
        else:
            utils.move_to_cuda(self._config)

        while not (done) and nmbr_of_steps <= self._episode_length:
            nmbr_of_steps += 1
            action, _ = self._policy_cpu.get_action(state)
            new_state, reward, done, info = self._env.step(action)
            # TODO this has to be fixed _variant_spec
            reward = reward * self._reward_scale
            terminal = np.array([done])
            reward = np.array([reward])
            self._replay.add_sample(observation=state,
                                    action=action,
                                    reward=reward,
                                    next_observation=new_state,
                                    terminal=terminal)
            state = new_state
        self._replay.terminate_episode()
        utils.move_to_cuda(self._config)
示例#11
0
def main():

    parser = get_inference_parser()
    args = parser.parse_args()

    vocab = Vocab(utils.DATA_DIR + "vocab.txt")

    test_dataset = ArgumentGenerationDataset(args=args,
                                             set_type="oracle_test.toy",
                                             vocab=vocab)
    test_dataloader = DataLoader(test_dataset,
                                 batch_size=args.batch_size,
                                 collate_fn=test_dataset.collater)

    ckpt_path = utils.find_ckpt_path(args.exp_name, args.epoch_id)
    model = Candela.load_from_checkpoint(ckpt_path)
    model.eval()
    model.cuda()

    decoding_strategy = DecodingStrategy(model=model, vocab=vocab, args=args)
    fout = open(f"output/{args.exp_name}_epoch={args.epoch_id}.jsonl", "w")

    test_tqdm = tqdm(enumerate(test_dataloader),
                     total=len(test_dataset) / args.batch_size)
    for batch_ix, batch in test_tqdm:
        batch = utils.move_to_cuda(batch)
        batch_size = len(batch['id'])

        with torch.no_grad():
            output, stype_results, ph_sel_results = decoding_strategy.generate(
                batch)

        for b in range(batch_size):
            cur_tok_ids_raw = output[b][0]
            cur_tok_ids_no_special = [
                item for item in cur_tok_ids_raw
                if item not in vocab.special_token_idx
            ]
            cur_output_tokens_raw = vocab.decode(cur_tok_ids_raw)
            cur_output_str = " ".join(vocab.decode(cur_tok_ids_no_special))

            enc_src_len = batch['enc_src_len'][b]
            enc_src = batch['enc_src'][b][:enc_src_len]
            enc_src = vocab.decode(enc_src)

            output_obj = {
                "id": batch['id'][b],
                "op": " ".join(enc_src),
                "output_tokens": cur_output_tokens_raw,
                "output": cur_output_str,
                "sentence_types": stype_results[b],
                "phrase_selection": ph_sel_results[b],
            }
            fout.write(json.dumps(output_obj) + "\n")
    fout.close()
示例#12
0
def controller_infer(queue, model, step, direction='+'):
    new_arch_list = []
    model.eval()
    for i, sample in enumerate(queue):
        encoder_input = utils.move_to_cuda(sample['encoder_input'])
        model.zero_grad()
        new_arch = model.generate_new_arch(encoder_input,
                                           step,
                                           direction=direction)
        new_arch_list.extend(new_arch.data.squeeze().tolist())
    return new_arch_list
示例#13
0
def main():
    parser = get_parser("test")
    args = parser.parse_args()

    print(args)

    cuda = torch.cuda.is_available()

    node_dict, edge_dict, text_dict = load_dict(args)

    test_data = load_data(args, node_dict, edge_dict, text_dict,
                          stage="test")

    test_tgt_sizes = [test_data.item_size(i, -1) for i in range(len(test_data))]
    print(" [test]: {} examples".format(len(test_data)))

    test_iters = DataLoader(test_data,
                            batch_sampler=BatchSampler(torch.tensor(test_tgt_sizes), batch=args.batch_size),
                            collate_fn=test_data.collate_fn)

    model = GraphTrans(args, node_dict, edge_dict, text_dict)
    model.eval()
    if cuda:
        model.cuda()

    saved = load_model(args, model, inference=True)
    if not saved:
        raise FileNotFoundError("Checkpoint does not exist")

    edges_correct, edges_num, edges_pred = 0, 0, 0
    nodes_correct, nodes_num, nodes_pred = 0, 0, 0
    graphs, graph_corrects = 0, 0

    for i, test_it in enumerate(test_iters):
        if cuda:
            samples = move_to_cuda(test_it)
        else:
            samples = test_it

        batch_correct, batch_num, batch_pred, batch_graph_correct = greedy_search(model, samples["src_graph"], samples["src_text"], samples["tgt_graph"],
                      node_dict, edge_dict, args.max_nodes, cuda)

        nodes_correct += batch_correct[0]
        nodes_num += batch_num[0]
        nodes_pred += batch_pred[0]
        edges_correct += batch_correct[1]
        edges_num += batch_num[1]
        edges_pred += batch_pred[1]
        graph_corrects += batch_graph_correct
        graphs += 1

    print("Node: Recall: {:.2f}({}/{}), Precision: {:.2f}({}/{}) ".format(nodes_correct/nodes_num * 100, nodes_correct, nodes_num, nodes_correct/nodes_pred * 100, nodes_correct, nodes_pred))
    print("Edge: Recall: {:.2f}({}/{}), Precision: {:.2f}({}/{}) ".format(edges_correct/edges_num * 100, edges_correct, edges_num, edges_correct/edges_pred * 100, edges_correct, edges_pred))
    print("Accuracy: {:.2f}({}/{})".format(graph_corrects/graphs * 100, graph_corrects, graphs))
示例#14
0
def child_valid(valid_queue, model, arch_pool, criterion, log_interval=1):
    valid_acc_list = []
    with torch.no_grad():
        model.eval()
        for i, arch in enumerate(arch_pool):
            # for step, (input, target) in enumerate(valid_queue):
            inputs, targets = next(iter(valid_queue))
            inputs = utils.move_to_cuda(inputs)
            targets = utils.move_to_cuda(targets)

            logits = model(inputs, arch, bn_train=True)
            loss = criterion(logits, targets)

            prec1, prec5 = utils.accuracy(logits, targets, topk=(1, 5))
            valid_acc_list.append(prec1.data / 100)

            if (i + 1) % log_interval == 0:
                logging.info('Valid arch %s\n loss %.2f top1 %f top5 %f',
                             ' '.join(map(str, arch)), loss, prec1, prec5)

    return valid_acc_list
示例#15
0
    def execute_policy(self):
        """ Evaluates the current deterministic policy.

        Evaluates the current policy in the environment by unrolling a single
        episode in the environment.
        The achieved cumulative reward is logged.

        """
        state = self._env.reset()
        done = False
        reward_ep = 0.0
        reward_original = 0.0
        action_cost = 0.0
        nmbr_of_steps = 0

        if self._episode_counter < self._config['initial_episodes']:
            policy_gpu_ind = self._rl_alg_class.get_policy_network(
                self._networks['population'])
        else:
            policy_gpu_ind = self._rl_alg_class.get_policy_network(
                self._networks['individual'])
        # self._policy_cpu = utils.copy_network(network_to=self._policy_cpu, network_from=policy_gpu_ind, config=self._config, force_cpu=self._config['use_cpu_for_rollout'])
        self._policy_cpu = policy_gpu_ind

        if self._config['use_cpu_for_rollout']:
            utils.move_to_cpu()
        else:
            utils.move_to_cuda(self._config)

        while not (done) and nmbr_of_steps <= self._episode_length:
            nmbr_of_steps += 1
            action, _ = self._policy_cpu.get_action(state, deterministic=True)
            new_state, reward, done, info = self._env.step(action)
            action_cost += info['orig_action_cost']
            reward_ep += float(reward)
            reward_original += float(info['orig_reward'])
            state = new_state
        utils.move_to_cuda(self._config)
        # Do something here to log the results
        self._data_rewards.append(reward_ep)
示例#16
0
def infer():
    parser = get_inference_config()
    args = parser.parse_args()

    ckpt_path = utils.get_latest_ckpt_path(args.ckpt_dir)
    print(f'Evaluating on {ckpt_path}')

    model = XRef(args)
    model.load_from_checkpoint(ckpt_path)

    # model.freeze()
    model.cuda()

    fout = open('output/' + args.output_path, 'w')
    results = dict(
    )  # mapping mentions to list of prediction results over all candidates

    neg_count, pos_count = 0, 0
    for batch in tqdm(model.test_dataloader()):
        net_input = utils.move_to_cuda(batch)
        _, output_probs, accuracy = model(net_input)
        output_probs = (output_probs[0] > 0.5).long().tolist()
        for ix, ins_id in enumerate(batch['id']):
            art_id, cmt_id, ment_id, cand_id = ins_id.split('_')
            cmt_text = batch['comment_text'][ix]
            cand_text = batch['cand_text'][ix]
            ment = batch['mention_tuple'][ix]
            label = batch['labels'][ix].item()

            ment_id = f'{art_id}_{cmt_id}_{ment_id}'
            if ment_id not in results:
                results[ment_id] = {
                    'comment': cmt_text,
                    'mention': ment,
                    'candidates': []
                }

            results[ment_id]['candidates'].append(
                (cand_text, output_probs[ix], int(label)))
            if output_probs[ix] == 1:
                pos_count += 1
            else:
                neg_count += 1
    for ment, rst in results.items():
        modified_output_obj = rst
        modified_output_obj['candidates'] = sorted(rst['candidates'],
                                                   key=lambda x: x[-1],
                                                   reverse=True)
        fout.write(json.dumps(modified_output_obj) + '\n')
    fout.close()
    print(pos_count)
    print(neg_count)
示例#17
0
def generate_batch(model, batch, beam_size, alpha, max_time_step):
    batch = move_to_cuda(batch, model.device)
    res = dict()
    token_batch, score_batch = [], []
    beams = model.work(batch, beam_size, max_time_step)
    for beam in beams:
        best_hyp = beam.get_k_best(1, alpha)[0]
        predicted_token = [token for token in best_hyp.seq[1:-1]]
        token_batch.append(predicted_token)
        score_batch.append(best_hyp.score)
    res['token'] = token_batch
    res['score'] = score_batch
    return res
示例#18
0
def child_valid(valid_queue, model, arch_pool, criterion, log_interval=1):
    valid_acc_list = []
    #top1 = utils.AverageMeter()
    with torch.no_grad():
        model.eval()
        for i, arch in enumerate(arch_pool):
            #top1.reset()         
            #for step, (input, target) in enumerate(valid_queue):
            inputs, targets = next(iter(valid_queue))
            inputs = utils.move_to_cuda(inputs)
            targets = utils.move_to_cuda(targets)
                
            logits = model(inputs, arch, bn_train=True)
            loss = criterion(logits, targets)
                
            prec1, prec5 = utils.accuracy(logits, targets, topk=(1, 5))
            #top1.update(prec1.item(), inputs.size(0))
            valid_acc_list.append(prec1.item()/100)
            
            logging.info('Valid %d arch %s\n loss %.2f top1 %f', i+1, ' '.join(map(str, arch)), loss, prec1.item())
        
    return valid_acc_list
示例#19
0
def generate_batched_itr(data_itr, strategy, model, task_dict):

    for sample in data_itr:
        s = utils.move_to_cuda(sample)

        with torch.no_grad():

            hypos, kp_offset_pred = strategy.generate(model, s)

            for batch in range(hypos.size(0)):
                example_id = s['id'][batch]
                src_ids = s['net_input']['input_ids'][batch].tolist()

                ret_obj = {'id': example_id}

                gtruth_kp_tgt = s['kp_target'][batch].tolist()
                ref_kp_tgt_len = s['kp_target_length'][batch].item()
                gtruth_kp_tgt = gtruth_kp_tgt[:ref_kp_tgt_len]
                ret_obj['gtruth_kp_tgt'] = gtruth_kp_tgt

                prompt_end = src_ids.index(task_dict.sep())
                ret_obj['prompt_ids'] = src_ids[:prompt_end]


                kp_src_end = src_ids.index(task_dict.pad()) if task_dict.pad() in src_ids else len(src_ids)
                kp_src_ids = src_ids[prompt_end + 1: kp_src_end]
                kp_tgt_ids = None
                ret_obj['kp_tgt_ids'] = kp_tgt_ids
                ret_obj['kp_src_ids'] = kp_src_ids

                hypo = hypos[batch].tolist()


                if task_dict.bok() in hypo:
                    kp_tgt_start = hypo.index(task_dict.bok())
                    kp_tgt_end = hypo.index(task_dict.eos()) if task_dict.eos() in hypo else len(hypo)
                    kp_tgt_ids = hypo[kp_tgt_start + 1: kp_tgt_end]
                    ret_obj['kp_tgt_ids'] = kp_tgt_ids

                # if task_dict.bos() in hypo:
                #     hypo_start = hypo.index(task_dict.bos())
                #     hypo_end = hypo.index(task_dict.eos())
                #     hypo = hypo[hypo_start:]
                #
                #     generated_tgt = hypo[: hypo_end]
                #     ret_obj['tgt'] = generated_tgt

                cur_kp_offset_pred = kp_offset_pred[batch]
                ret_obj['offset'] = cur_kp_offset_pred

                yield ret_obj
示例#20
0
def valid_epoch(model, valid_dataloader, args, vocab, tb_logger):
    total_losses = {
        "total": 0,
        "token_ppl": 0,
        "token_loss": 0,
        "sentence_type_loss": 0,
        "phrase_selection_loss": 0,
    }
    n_iters = 0
    for batch_ix, batch in tqdm(enumerate(valid_dataloader),
                                total=len(valid_dataloader.dataset) /
                                args.batch_size):
        batch = utils.move_to_cuda(batch)
        stype_logits, token_logits, ph_attn, _ = model(batch)

        losses = compute_losses(
            token_logits=token_logits,
            token_targets=batch["dec_out"],
            pad_token_id=vocab.pad_idx,
            sentence_type_logits=stype_logits,
            sentence_type_targets=batch["sent_types"],
            ph_bank_attn=ph_attn,
            ph_bank_len=batch["ph_bank_len_tensor"],
            ph_bank_sel_ind_targets=batch["ph_sel_ind_tensor"])

        model_loss = losses['token_loss'] + \
                     args.gamma * losses["sentence_type_loss"] + \
                     args.eta * losses["phrase_selection_loss"]

        for loss_type in losses:
            total_losses[loss_type] += losses[loss_type].item()
        total_losses["total"] += model_loss.item()
        n_iters += 1

    tb_logger.add_scalar("valid_loss_total", total_losses["total"] / n_iters,
                         model.global_steps)
    tb_logger.add_scalar("valid_loss_token",
                         total_losses["token_loss"] / n_iters,
                         model.global_steps)
    tb_logger.add_scalar("valid_loss_sentence_type",
                         total_losses["sentence_type_loss"] / n_iters,
                         model.global_steps)
    tb_logger.add_scalar("valid_loss_phrase_selection",
                         total_losses["phrase_selection_loss"] / n_iters,
                         model.global_steps)
    tb_logger.add_scalar("valid_PPL", total_losses["token_ppl"] / n_iters,
                         model.global_steps)
    return {
        loss_type: loss_val / n_iters
        for loss_type, loss_val in total_losses.items()
    }
示例#21
0
def multi_task():
    content = request.get_json(silent=True, force=True)
    if "data" in content:
        length = content.get("length", 30)
        is_beam = content.get("beam", -1)
        task_type = content.get("type", 0)
        if task_type not in [0, 1, 2]:
            task_type = 0
        repeat = content.get("repeat", 1)
        response = OrderedDict()
        begin_time = time.time()
        logger.error("user message...")
        text = content["data"]
        logger.error(text)
        with torch.no_grad():
            response["user-query"] = text
            context = convert_task_content(client.tokenizer, text, task_type)
            ids_length = context["input_tokens"].size(1)
            context = move_to_cuda(context, client.device)
            reply = []
            for i in range(repeat):
                out = client.generator(client.multi_task_model,
                                       client.tokenizer,
                                       length,
                                       context=context,
                                       temperature=1,
                                       top_k=5,
                                       device=client.device,
                                       sample=True)
                out = out[ids_length - 1:]
                out = client.tokenizer.convert_ids_to_text(out)
                out = out.replace("##", "")
                reply.append(out)
            if len(reply) == 1:
                reply = reply[0]

            beam_out = None
            if is_beam != -1:
                client.multi_task_beam.beam_size = is_beam
                client.multi_task_beam.max_lens = length
                beam_out = client.multi_task_beam.generate_response(context)
            response["sampling-response"] = reply
            if beam_out is not None:
                response[
                    "beam-response"] = client.tokenizer.convert_ids_to_text(
                        beam_out).replace("##", "")
        interval = time.time() - begin_time
        logger.error("elapsed time = %s", interval)
        response["interval"] = interval
    return json.dumps(response, ensure_ascii=False)
示例#22
0
def valid(valid_queue, model, criterion):
    objs = utils.AverageMeter()
    top1 = utils.AverageMeter()
    top5 = utils.AverageMeter()
    with torch.no_grad():
        model.eval()
        for step, (input, target) in enumerate(valid_queue):
            input = utils.move_to_cuda(input)
            target = utils.move_to_cuda(target)
        
            logits = model(input)
            loss = criterion(logits, target)
        
            prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5))
            n = input.size(0)
            objs.update(loss.data, n)
            top1.update(prec1.data, n)
            top5.update(prec5.data, n)
        
            if (step+1) % 100 == 0:
                logging.info('valid %03d %e %f %f', step+1, objs.avg, top1.avg, top5.avg)

    return top1.avg, top5.avg, objs.avg
示例#23
0
def forward_step(data, model, tokenizer, criterion, args):
    """Forward step."""

    sample = move_to_cuda(data, torch.cuda.current_device())
    output, nsp, past = model(**sample["net_input"])
    nsp_labels = sample["nsp_labels"]
    target = sample["target"]
    nsp_loss = criterion(
        nsp.view(-1, 3).contiguous().float(),
        nsp_labels.view(-1).contiguous())
    losses = criterion(
        output.view(-1, tokenizer.num_tokens).contiguous().float(),
        target.contiguous().view(-1).contiguous())
    # pdb.set_trace()

    return losses, nsp_loss, sample["nsentences"], sample["ntokens"]
示例#24
0
def predict(args, model, eval_dataloader, device, fp16=False):
    if type(model) == list:
        model = [m.eval() for m in model]
    else:
        model.eval()

    if fp16:
        if type(model) == list:
            model = [m.half() for m in model]
        else:
            model.half()

    num_correct = 0.0
    num_total = 0.0
    for batch in tqdm(eval_dataloader):
        batch_to_feed = move_to_cuda(batch)
        if fp16:
            batch_to_feed = convert_to_half(batch_to_feed)
        with torch.no_grad():
            results = model(batch_to_feed)
            product = torch.mm(results["q"], results["c"].t())
            target = torch.arange(product.size(0)).to(product.device)
            prediction = product.argmax(-1)
            pred_res = prediction == target
            num_total += len(pred_res)
            num_correct += sum(pred_res)

    ## linear combination tuning on dev data
    acc = num_correct / num_total
    best_acc = 0
    if acc > best_acc:
        best_acc = acc
    print(f"evaluated {num_total} examples...")
    print(f"avg. Acc: {acc}")

    if fp16:
        model.float()
    model.train()

    return best_acc
def predict(args, model, eval_dataloader, device, logger):
    model.eval()
    id2result = collections.defaultdict(list)
    for batch in tqdm(eval_dataloader):
        batch_to_feed = move_to_cuda(batch["net_inputs"])
        batch_qids = batch["qids"]
        batch_labels = batch["net_inputs"]["label"].view(-1).tolist()
        with torch.no_grad():
            scores = model(batch_to_feed)
            scores = scores.view(-1).tolist()
        for qid, label, score in zip(batch_qids, batch_labels, scores):
            id2result[qid].append((label, score))

    acc = []
    top_pred = {}
    for qid, res in id2result.items():
        res.sort(key=lambda x: x[1], reverse=True)
        acc.append(res[0][0] == 1)
    logger.info(f"evaluated {len(id2result)} questions...")
    logger.info(f'acc: {np.mean(acc)}')
    model.train()
    return np.mean(acc)
示例#26
0
def generate_synthetic_controller_data(model, exclude=[], maxn=1000):
    synthetic_input = []
    synthetic_target = []
    while len(synthetic_input) < maxn:
        synthetic_arch = utils.generate_arch(1, args.layers, args.num_ops)[0]
        if synthetic_arch not in exclude and synthetic_arch not in synthetic_input:
            synthetic_input.append(synthetic_arch)

    synthetic_dataset = utils.ControllerDataset(synthetic_input, None, False)
    synthetic_queue = torch.utils.data.DataLoader(
        synthetic_dataset,
        batch_size=len(synthetic_dataset),
        shuffle=False,
        pin_memory=True)

    with torch.no_grad():
        model.eval()
        for sample in synthetic_queue:
            input = utils.move_to_cuda(sample['encoder_input'])
            _, _, _, predict_value = model.encoder(input)
            synthetic_target += predict_value.data.squeeze().tolist()
    assert len(synthetic_input) == len(synthetic_target)
    return synthetic_input, synthetic_target
示例#27
0
    def train_step(self, sample):
        """Do forward, backward and parameter update."""
        self._set_seed()
        self._model.train()
        self.criterion.train()
        self.optimizer.zero_grad()

        # forward and backward pass
        if self.cuda:
            sample = utils.move_to_cuda(sample)

        # forward and backward
        results = self.criterion(self._model, sample)

        loss, nll_loss, ko_loss, sample_size, logging_output, to_print, \
        offset_print = results
        nkp_tokens = logging_output.get('nkp_tokens', 0)
        self.meters['train_offset_loss'].update(
            logging_output.get('kp_offset_loss', 0) / nkp_tokens, nkp_tokens)

        # clip grads
        self.optimizer.clip_grad_norm(self.args.clip_norm)

        # take an optimization step
        loss.backward()
        self.optimizer.step()
        self.set_num_updates(self.get_num_updates() + 1)

        # update meters
        ntokens = logging_output.get('ntokens', 0)

        self.meters['train_nll_loss'].update(
            logging_output.get('nll_loss', 0) / ntokens, ntokens)
        self.meters['train_total_loss'].update(
            logging_output.get('total_loss', 0) / ntokens, ntokens)
        self.meters['train_offset_loss'].update(
            logging_output.get('kp_offset_loss', 0) / nkp_tokens, nkp_tokens)
示例#28
0
def predict(args,
            model,
            eval_dataloader,
            device,
            fp16=False,
            is_query_embed=True):
    if type(model) == list:
        model = [m.eval() for m in model]
    else:
        model.eval()
    if fp16:
        if type(model) == list:
            model = [m.half() for m in model]
        else:
            model.half()

    num_correct = 0.0
    num_total = 0.0
    embed_array = []
    for batch in tqdm(eval_dataloader):
        batch_to_feed = move_to_cuda(batch)
        with torch.no_grad():
            results = model.get_embed(batch_to_feed, is_query_embed)
            embed = results['embed']
            embed_array.append(embed)
            #print(prediction, target, sum(prediction==target), len(prediction))
            #print(num_total, num_correct)

    ## linear combination tuning on dev data
    embed_array = torch.cat(embed_array)

    if fp16:
        model.float()

    model.train()
    return embed_array
示例#29
0
文件: train.py 项目: zhw12/ProQA
def predict(logger, args, model, eval_dataloader, device, fp16=False):
    model.eval()
    all_results = []

    if fp16:
        model.half()

    qid2results = {}
    for batch in tqdm(eval_dataloader):
        batch_to_feed = move_to_cuda(batch["net_input"])
        if fp16:
            batch_to_feed = convert_to_half(batch_to_feed)
        with torch.no_grad():
            results = model(batch_to_feed)
            batch_start_logits = results["start_logits"]
            batch_end_logits = results["end_logits"]
            question_mask = batch_to_feed["paragraph_mask"].ne(1)
            outs = [
                o.float().masked_fill(question_mask, -1e10).type_as(o)
                for o in [batch_start_logits, batch_end_logits]
            ]

        span_scores = outs[0][:, :, None] + outs[1][:, None]
        max_answer_lens = 20
        max_seq_len = span_scores.size(1)
        span_mask = np.tril(np.triu(np.ones((max_seq_len, max_seq_len)), 0),
                            max_answer_lens)
        span_mask = span_scores.data.new(max_seq_len, max_seq_len).copy_(
            torch.from_numpy(span_mask))
        span_scores_masked = span_scores.float().masked_fill(
            (1 - span_mask[None].expand_as(span_scores)).bool(),
            -1e10).type_as(span_scores)

        start_position = span_scores_masked.max(dim=2)[0].max(dim=1)[1]
        end_position = span_scores_masked.max(dim=2)[1].gather(
            1, start_position.unsqueeze(1)).squeeze(1)

        para_offset = batch['para_offset']
        start_position_ = list(
            np.array(start_position.tolist()) - np.array(para_offset))
        end_position_ = list(
            np.array(end_position.tolist()) - np.array(para_offset))

        for idx, qid in enumerate(batch['id']):
            start = start_position_[idx]
            end = end_position_[idx]
            tok_to_orig_index = batch['tok_to_orig_index'][idx]
            doc_tokens = batch['doc_tokens'][idx]
            wp_tokens = batch['wp_tokens'][idx]
            orig_doc_start = tok_to_orig_index[start]
            orig_doc_end = tok_to_orig_index[end]
            orig_tokens = doc_tokens[orig_doc_start:(orig_doc_end + 1)]
            tok_tokens = wp_tokens[start:end + 1]
            tok_text = " ".join(tok_tokens)
            tok_text = tok_text.replace(" ##", "")
            tok_text = tok_text.replace("##", "")
            tok_text = tok_text.strip()
            tok_text = " ".join(tok_text.split())
            orig_text = " ".join(orig_tokens)
            final_text = get_final_text(tok_text,
                                        orig_text,
                                        logger,
                                        do_lower_case=args.do_lower_case,
                                        verbose_logging=False)
            qid2results[qid] = [final_text, batch['true_answers'][idx]]

    f1s = [
        metric_max_over_ground_truths(f1_score, item[0], item[1])
        for item in qid2results.values()
    ]
    ems = [
        metric_max_over_ground_truths(exact_match_score, item[0], item[1])
        for item in qid2results.values()
    ]

    print(f"evaluated {len(f1s)} examples...")
    if fp16:
        model.float()
    model.train()

    return (np.mean(f1s), np.mean(ems))
示例#30
0
    def generate(self, model, batch):

        net_input = utils.move_to_cuda(batch['net_input'])
        encoder_input_ids = net_input['input_ids']
        encoder_attn_mask = net_input['attention_mask']
        batch_size = encoder_input_ids.shape[0]

        encoder = model.get_encoder()
        encoder_outputs = encoder(encoder_input_ids,
                                  attention_mask=encoder_attn_mask)

        # create empty decoder_input_ids
        input_ids = torch.full(
            (batch_size, 1),
            self.decoder_bos_idx,
            dtype=torch.long,
            device=next(model.parameters()).device,
        )
        cur_len = 1
        probs = [[] for _ in range(batch_size)]

        unfinished_sents = input_ids.new(batch_size).fill_(1)

        past = encoder_outputs  # defined for encoder-decoder models, None for decoder-only models

        while cur_len < self.domain_to_max_len[self.domain]:
            model_inputs = self.prepare_inputs_for_generation(
                input_ids, past=past, attention_mask=encoder_attn_mask)

            outputs = model(**model_inputs)
            next_token_logits = outputs[0][:, -1, :]
            past = outputs[1]

            if self.do_sampling:
                # Temperature (higher temperature => more likely to sample low probability tokens)
                if self.temperature != 1.0:
                    next_token_logits = next_token_logits / self.temperature
                # Top-p/top-k filtering
                next_token_logits = top_k_top_p_filtering(next_token_logits,
                                                          top_k=self.topk,
                                                          top_p=self.topp)
                # Sample
                next_token_probs = F.softmax(next_token_logits, dim=-1)
                next_token = torch.multinomial(next_token_probs,
                                               num_samples=1).squeeze(1)
            else:
                # Greedy decoding
                next_token_probs = F.softmax(next_token_logits, dim=-1)
                next_token = torch.argmax(next_token_logits, dim=-1)

            chosen_token_probs = next_token_probs.gather(
                1, next_token.view(-1, 1))
            for b in range(batch_size):
                probs[b].append(chosen_token_probs[b, 0].item())

            # pad finished sentences if eos_token_id exist
            tokens_to_add = next_token * unfinished_sents + (self.pad_idx) * (
                1 - unfinished_sents)

            if not self.quiet:
                output_str = ''
                for b in range(batch_size):
                    w = self.tokenizer.convert_ids_to_tokens(
                        [tokens_to_add[b]])[0]
                    p = probs[b][-1]
                    output_str += '{:>12}({:.2f})|'.format(w, 100 * p)
                if cur_len == 1:
                    print('=' * 50)
                print('step={:<3d}|{}'.format(cur_len, output_str))

            input_ids = torch.cat(
                [input_ids, tokens_to_add.unsqueeze(-1)], dim=-1)
            eos_in_sents = tokens_to_add == self.eos_idx
            unfinished_sents.mul_((~eos_in_sents).long())

            # stop when there is a </s> in each sentence, or if we exceed the maximul length
            if unfinished_sents.max() == 0:
                break
            cur_len = cur_len + 1

        return input_ids, probs