def train(train_queue, model, optimizer, scheduler, global_step, criterion): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.train() for step, (input, target) in enumerate(train_queue): input = utils.move_to_cuda(input) target = utils.move_to_cuda(target) #optimizer.zero_grad() model.zero_grad() logits = model(input) global_step += 1 loss = criterion(logits, target) loss.backward() optimizer.step() scheduler.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if (step+1) % 100 == 0: lr = scheduler.get_lr()[0] logging.info('train %03d lr %e loss %e top1 %f top5 %f', step+1, lr, objs.avg, top1.avg, top5.avg) return top1.avg, objs.avg, global_step
def controller_train(train_queue, model, optimizer): objs = utils.AvgrageMeter() mse = utils.AvgrageMeter() nll = utils.AvgrageMeter() model.train() for step, sample in enumerate(train_queue): encoder_input = utils.move_to_cuda(sample['encoder_input']) encoder_target = utils.move_to_cuda(sample['encoder_target']) decoder_input = utils.move_to_cuda(sample['decoder_input']) decoder_target = utils.move_to_cuda(sample['decoder_target']) optimizer.zero_grad() predict_value, log_prob, arch = model(encoder_input, decoder_input) loss_1 = F.mse_loss(predict_value.squeeze(), encoder_target.squeeze()) loss_2 = F.nll_loss(log_prob.contiguous().view(-1, log_prob.size(-1)), decoder_target.view(-1)) loss = args.controller_trade_off * loss_1 + ( 1 - args.controller_trade_off) * loss_2 loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.controller_grad_clip) optimizer.step() n = encoder_input.size(0) objs.update(loss.data, n) mse.update(loss_1.data, n) nll.update(loss_2.data, n) return objs.avg, mse.avg, nll.avg
def train_epoch(self, split): objs = utils.AvgrageMeter() mse = utils.AvgrageMeter() nll = utils.AvgrageMeter() queue = self.queues[split] self.train() for step, sample in enumerate(queue): encoder_input = utils.move_to_cuda(sample['encoder_input']) encoder_target = utils.move_to_cuda(sample['encoder_target']) decoder_input = utils.move_to_cuda(sample['decoder_input']) decoder_target = utils.move_to_cuda(sample['decoder_target']) self.optimizer.zero_grad() predict_value, log_prob, arch = self(encoder_input, decoder_input) loss_1 = F.mse_loss(predict_value.squeeze(), encoder_target.squeeze()) loss_2 = F.nll_loss(log_prob.contiguous().view(-1, log_prob.size(-1)), decoder_target.view(-1)) loss = self.trade_off * loss_1 + (1 - self.trade_off) * loss_2 loss.backward() torch.nn.utils.clip_grad_norm_(self.parameters(), self.clip_grad_norm) self.optimizer.step() n = encoder_input.size(0) objs.update(loss.data, n) mse.update(loss_1.data, n) nll.update(loss_2.data, n) return objs.avg, mse.avg, nll.avg
def child_train(train_queue, model, optimizer, global_step, arch_pool, arch_pool_prob, criterion, log_interval=100): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() model.train() for step, (input, target) in enumerate(train_queue): input = utils.move_to_cuda(input) target = utils.move_to_cuda(target) optimizer.zero_grad() # sample an arch to train arch = utils.sample_arch(arch_pool, arch_pool_prob) logits = model(input, arch) loss = criterion(logits, target) loss.backward() optimizer.step() prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) global_step += 1 if global_step % log_interval == 0: logging.info('Train %03d loss %e top1 %f top5 %f', global_step, objs.avg, top1.avg, top5.avg) logging.info('Arch: %s', ' '.join(map(str, arch))) if global_step >= args.max_num_updates: break return top1.avg, objs.avg, global_step
def __init__(self, config): """ Args: config: A config dictonary. """ self._config = config utils.move_to_cuda(self._config) # TODO This should not depend on rl_algorithm_config in the future self._episode_length = self._config['steps_per_episodes'] self._reward_scale = 1.0 #self._config['rl_algorithm_config']['algo_params']['reward_scale'] self._env_class = select_environment(self._config['env']['env_name']) self._env = evoenvs.HalfCheetahEnv(config=self._config) self._replay = EvoReplayLocalGlobalStart( self._env, max_replay_buffer_size_species=int(1e6), max_replay_buffer_size_population=int(1e7)) self._rl_alg_class = select_rl_alg(self._config['rl_method']) self._networks = self._rl_alg_class.create_networks(env=self._env, config=config) self._rl_alg = self._rl_alg_class(config=self._config, env=self._env, replay=self._replay, networks=self._networks) self._do_alg_class = select_design_opt_alg( self._config['design_optim_method']) self._do_alg = self._do_alg_class(config=self._config, replay=self._replay, env=self._env) # if self._config['use_cpu_for_rollout']: # utils.move_to_cpu() # else: # utils.move_to_cuda(self._config) # # TODO this is a temp fix - should be cleaned up, not so hppy with it atm # self._policy_cpu = self._rl_alg_class.get_policy_network(SoftActorCritic.create_networks(env=self._env, config=config)['individual']) utils.move_to_cuda(self._config) self._last_single_iteration_time = 0 self._design_counter = 0 self._episode_counter = 0 self._data_design_type = 'Initial'
def valid_step(self, sample): """Do forward pass in evaluation mode.""" with torch.no_grad(): self._model.eval() self.criterion.eval() if self.cuda: sample = utils.move_to_cuda(sample) results = self.criterion(self._model, sample) loss, nll_loss, ko_loss, sample_size, logging_output, to_print, \ offset_print = results nkp_tokens = logging_output.get('nkp_tokens', 0) self.meters['valid_offset_loss'].update( logging_output.get('kp_offset_loss', 0) / nkp_tokens, nkp_tokens) # update meters for validation ntokens = logging_output.get('ntokens', 0) self.meters['valid_nll_loss'].update( logging_output.get('nll_loss', 0) / ntokens, ntokens) self.meters['valid_total_loss'].update( logging_output.get('total_loss', 0) / ntokens, ntokens) self.meters['valid_offset_loss'].update( logging_output.get('kp_offset_loss', 0) / nkp_tokens, nkp_tokens) return logging_output, to_print, offset_print
def validation_acc(model, dev_iters, epoch, epochs, node_dict, edge_dict, max_nodes, cuda): """ Evaluate the model on dev set""" model.eval() eval_st = time.time() graphs, graph_corrects = 0, 0 for i, dev_it in enumerate(dev_iters): if cuda: samples = move_to_cuda(dev_it) else: samples = dev_it _, _, _, batch_graph_correct = greedy_search( model, samples["src_graph"], samples["src_text"], samples["tgt_graph"], node_dict, edge_dict, max_nodes, cuda) graph_corrects += batch_graph_correct graphs += 1 acc = graph_corrects / graphs eval_time = (time.time() - eval_st) / 60 eval_info = "[ Eval {:02}/{:02}]: accuracy={:.4f} elapse={:.4f} mins" print(eval_info.format(epoch + 1, epochs, acc, eval_time)) model.train() return acc
def _prepare_sample(self, sample): if sample is None or len(sample) == 0: return None if self.cuda: sample = utils.move_to_cuda(sample) return sample
def infer(self, split, step, direction='+'): queue = self.queues[split] new_arch_list = [] self.eval() for i, sample in enumerate(queue): encoder_input = utils.move_to_cuda(sample['encoder_input']) self.zero_grad() new_arch = self.generate_new_arch(encoder_input, step, direction=direction) new_arch_list.extend(new_arch.data.squeeze().tolist()) return new_arch_list
def collect_training_experience(self): """ Collect training data. This function executes a single episode in the environment using the exploration strategy/mechanism and the policy. The data, i.e. state-action-reward-nextState, is stored in the replay buffer. """ state = self._env.reset() nmbr_of_steps = 0 done = False if self._episode_counter < self._config['initial_episodes']: policy_gpu_ind = self._rl_alg_class.get_policy_network( self._networks['population']) else: policy_gpu_ind = self._rl_alg_class.get_policy_network( self._networks['individual']) # self._policy_cpu = utils.copy_network(network_to=self._policy_cpu, network_from=policy_gpu_ind, config=self._config, force_cpu=self._config['use_cpu_for_rollout']) self._policy_cpu = policy_gpu_ind if self._config['use_cpu_for_rollout']: utils.move_to_cpu() else: utils.move_to_cuda(self._config) while not (done) and nmbr_of_steps <= self._episode_length: nmbr_of_steps += 1 action, _ = self._policy_cpu.get_action(state) new_state, reward, done, info = self._env.step(action) # TODO this has to be fixed _variant_spec reward = reward * self._reward_scale terminal = np.array([done]) reward = np.array([reward]) self._replay.add_sample(observation=state, action=action, reward=reward, next_observation=new_state, terminal=terminal) state = new_state self._replay.terminate_episode() utils.move_to_cuda(self._config)
def main(): parser = get_inference_parser() args = parser.parse_args() vocab = Vocab(utils.DATA_DIR + "vocab.txt") test_dataset = ArgumentGenerationDataset(args=args, set_type="oracle_test.toy", vocab=vocab) test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, collate_fn=test_dataset.collater) ckpt_path = utils.find_ckpt_path(args.exp_name, args.epoch_id) model = Candela.load_from_checkpoint(ckpt_path) model.eval() model.cuda() decoding_strategy = DecodingStrategy(model=model, vocab=vocab, args=args) fout = open(f"output/{args.exp_name}_epoch={args.epoch_id}.jsonl", "w") test_tqdm = tqdm(enumerate(test_dataloader), total=len(test_dataset) / args.batch_size) for batch_ix, batch in test_tqdm: batch = utils.move_to_cuda(batch) batch_size = len(batch['id']) with torch.no_grad(): output, stype_results, ph_sel_results = decoding_strategy.generate( batch) for b in range(batch_size): cur_tok_ids_raw = output[b][0] cur_tok_ids_no_special = [ item for item in cur_tok_ids_raw if item not in vocab.special_token_idx ] cur_output_tokens_raw = vocab.decode(cur_tok_ids_raw) cur_output_str = " ".join(vocab.decode(cur_tok_ids_no_special)) enc_src_len = batch['enc_src_len'][b] enc_src = batch['enc_src'][b][:enc_src_len] enc_src = vocab.decode(enc_src) output_obj = { "id": batch['id'][b], "op": " ".join(enc_src), "output_tokens": cur_output_tokens_raw, "output": cur_output_str, "sentence_types": stype_results[b], "phrase_selection": ph_sel_results[b], } fout.write(json.dumps(output_obj) + "\n") fout.close()
def controller_infer(queue, model, step, direction='+'): new_arch_list = [] model.eval() for i, sample in enumerate(queue): encoder_input = utils.move_to_cuda(sample['encoder_input']) model.zero_grad() new_arch = model.generate_new_arch(encoder_input, step, direction=direction) new_arch_list.extend(new_arch.data.squeeze().tolist()) return new_arch_list
def main(): parser = get_parser("test") args = parser.parse_args() print(args) cuda = torch.cuda.is_available() node_dict, edge_dict, text_dict = load_dict(args) test_data = load_data(args, node_dict, edge_dict, text_dict, stage="test") test_tgt_sizes = [test_data.item_size(i, -1) for i in range(len(test_data))] print(" [test]: {} examples".format(len(test_data))) test_iters = DataLoader(test_data, batch_sampler=BatchSampler(torch.tensor(test_tgt_sizes), batch=args.batch_size), collate_fn=test_data.collate_fn) model = GraphTrans(args, node_dict, edge_dict, text_dict) model.eval() if cuda: model.cuda() saved = load_model(args, model, inference=True) if not saved: raise FileNotFoundError("Checkpoint does not exist") edges_correct, edges_num, edges_pred = 0, 0, 0 nodes_correct, nodes_num, nodes_pred = 0, 0, 0 graphs, graph_corrects = 0, 0 for i, test_it in enumerate(test_iters): if cuda: samples = move_to_cuda(test_it) else: samples = test_it batch_correct, batch_num, batch_pred, batch_graph_correct = greedy_search(model, samples["src_graph"], samples["src_text"], samples["tgt_graph"], node_dict, edge_dict, args.max_nodes, cuda) nodes_correct += batch_correct[0] nodes_num += batch_num[0] nodes_pred += batch_pred[0] edges_correct += batch_correct[1] edges_num += batch_num[1] edges_pred += batch_pred[1] graph_corrects += batch_graph_correct graphs += 1 print("Node: Recall: {:.2f}({}/{}), Precision: {:.2f}({}/{}) ".format(nodes_correct/nodes_num * 100, nodes_correct, nodes_num, nodes_correct/nodes_pred * 100, nodes_correct, nodes_pred)) print("Edge: Recall: {:.2f}({}/{}), Precision: {:.2f}({}/{}) ".format(edges_correct/edges_num * 100, edges_correct, edges_num, edges_correct/edges_pred * 100, edges_correct, edges_pred)) print("Accuracy: {:.2f}({}/{})".format(graph_corrects/graphs * 100, graph_corrects, graphs))
def child_valid(valid_queue, model, arch_pool, criterion, log_interval=1): valid_acc_list = [] with torch.no_grad(): model.eval() for i, arch in enumerate(arch_pool): # for step, (input, target) in enumerate(valid_queue): inputs, targets = next(iter(valid_queue)) inputs = utils.move_to_cuda(inputs) targets = utils.move_to_cuda(targets) logits = model(inputs, arch, bn_train=True) loss = criterion(logits, targets) prec1, prec5 = utils.accuracy(logits, targets, topk=(1, 5)) valid_acc_list.append(prec1.data / 100) if (i + 1) % log_interval == 0: logging.info('Valid arch %s\n loss %.2f top1 %f top5 %f', ' '.join(map(str, arch)), loss, prec1, prec5) return valid_acc_list
def execute_policy(self): """ Evaluates the current deterministic policy. Evaluates the current policy in the environment by unrolling a single episode in the environment. The achieved cumulative reward is logged. """ state = self._env.reset() done = False reward_ep = 0.0 reward_original = 0.0 action_cost = 0.0 nmbr_of_steps = 0 if self._episode_counter < self._config['initial_episodes']: policy_gpu_ind = self._rl_alg_class.get_policy_network( self._networks['population']) else: policy_gpu_ind = self._rl_alg_class.get_policy_network( self._networks['individual']) # self._policy_cpu = utils.copy_network(network_to=self._policy_cpu, network_from=policy_gpu_ind, config=self._config, force_cpu=self._config['use_cpu_for_rollout']) self._policy_cpu = policy_gpu_ind if self._config['use_cpu_for_rollout']: utils.move_to_cpu() else: utils.move_to_cuda(self._config) while not (done) and nmbr_of_steps <= self._episode_length: nmbr_of_steps += 1 action, _ = self._policy_cpu.get_action(state, deterministic=True) new_state, reward, done, info = self._env.step(action) action_cost += info['orig_action_cost'] reward_ep += float(reward) reward_original += float(info['orig_reward']) state = new_state utils.move_to_cuda(self._config) # Do something here to log the results self._data_rewards.append(reward_ep)
def infer(): parser = get_inference_config() args = parser.parse_args() ckpt_path = utils.get_latest_ckpt_path(args.ckpt_dir) print(f'Evaluating on {ckpt_path}') model = XRef(args) model.load_from_checkpoint(ckpt_path) # model.freeze() model.cuda() fout = open('output/' + args.output_path, 'w') results = dict( ) # mapping mentions to list of prediction results over all candidates neg_count, pos_count = 0, 0 for batch in tqdm(model.test_dataloader()): net_input = utils.move_to_cuda(batch) _, output_probs, accuracy = model(net_input) output_probs = (output_probs[0] > 0.5).long().tolist() for ix, ins_id in enumerate(batch['id']): art_id, cmt_id, ment_id, cand_id = ins_id.split('_') cmt_text = batch['comment_text'][ix] cand_text = batch['cand_text'][ix] ment = batch['mention_tuple'][ix] label = batch['labels'][ix].item() ment_id = f'{art_id}_{cmt_id}_{ment_id}' if ment_id not in results: results[ment_id] = { 'comment': cmt_text, 'mention': ment, 'candidates': [] } results[ment_id]['candidates'].append( (cand_text, output_probs[ix], int(label))) if output_probs[ix] == 1: pos_count += 1 else: neg_count += 1 for ment, rst in results.items(): modified_output_obj = rst modified_output_obj['candidates'] = sorted(rst['candidates'], key=lambda x: x[-1], reverse=True) fout.write(json.dumps(modified_output_obj) + '\n') fout.close() print(pos_count) print(neg_count)
def generate_batch(model, batch, beam_size, alpha, max_time_step): batch = move_to_cuda(batch, model.device) res = dict() token_batch, score_batch = [], [] beams = model.work(batch, beam_size, max_time_step) for beam in beams: best_hyp = beam.get_k_best(1, alpha)[0] predicted_token = [token for token in best_hyp.seq[1:-1]] token_batch.append(predicted_token) score_batch.append(best_hyp.score) res['token'] = token_batch res['score'] = score_batch return res
def child_valid(valid_queue, model, arch_pool, criterion, log_interval=1): valid_acc_list = [] #top1 = utils.AverageMeter() with torch.no_grad(): model.eval() for i, arch in enumerate(arch_pool): #top1.reset() #for step, (input, target) in enumerate(valid_queue): inputs, targets = next(iter(valid_queue)) inputs = utils.move_to_cuda(inputs) targets = utils.move_to_cuda(targets) logits = model(inputs, arch, bn_train=True) loss = criterion(logits, targets) prec1, prec5 = utils.accuracy(logits, targets, topk=(1, 5)) #top1.update(prec1.item(), inputs.size(0)) valid_acc_list.append(prec1.item()/100) logging.info('Valid %d arch %s\n loss %.2f top1 %f', i+1, ' '.join(map(str, arch)), loss, prec1.item()) return valid_acc_list
def generate_batched_itr(data_itr, strategy, model, task_dict): for sample in data_itr: s = utils.move_to_cuda(sample) with torch.no_grad(): hypos, kp_offset_pred = strategy.generate(model, s) for batch in range(hypos.size(0)): example_id = s['id'][batch] src_ids = s['net_input']['input_ids'][batch].tolist() ret_obj = {'id': example_id} gtruth_kp_tgt = s['kp_target'][batch].tolist() ref_kp_tgt_len = s['kp_target_length'][batch].item() gtruth_kp_tgt = gtruth_kp_tgt[:ref_kp_tgt_len] ret_obj['gtruth_kp_tgt'] = gtruth_kp_tgt prompt_end = src_ids.index(task_dict.sep()) ret_obj['prompt_ids'] = src_ids[:prompt_end] kp_src_end = src_ids.index(task_dict.pad()) if task_dict.pad() in src_ids else len(src_ids) kp_src_ids = src_ids[prompt_end + 1: kp_src_end] kp_tgt_ids = None ret_obj['kp_tgt_ids'] = kp_tgt_ids ret_obj['kp_src_ids'] = kp_src_ids hypo = hypos[batch].tolist() if task_dict.bok() in hypo: kp_tgt_start = hypo.index(task_dict.bok()) kp_tgt_end = hypo.index(task_dict.eos()) if task_dict.eos() in hypo else len(hypo) kp_tgt_ids = hypo[kp_tgt_start + 1: kp_tgt_end] ret_obj['kp_tgt_ids'] = kp_tgt_ids # if task_dict.bos() in hypo: # hypo_start = hypo.index(task_dict.bos()) # hypo_end = hypo.index(task_dict.eos()) # hypo = hypo[hypo_start:] # # generated_tgt = hypo[: hypo_end] # ret_obj['tgt'] = generated_tgt cur_kp_offset_pred = kp_offset_pred[batch] ret_obj['offset'] = cur_kp_offset_pred yield ret_obj
def valid_epoch(model, valid_dataloader, args, vocab, tb_logger): total_losses = { "total": 0, "token_ppl": 0, "token_loss": 0, "sentence_type_loss": 0, "phrase_selection_loss": 0, } n_iters = 0 for batch_ix, batch in tqdm(enumerate(valid_dataloader), total=len(valid_dataloader.dataset) / args.batch_size): batch = utils.move_to_cuda(batch) stype_logits, token_logits, ph_attn, _ = model(batch) losses = compute_losses( token_logits=token_logits, token_targets=batch["dec_out"], pad_token_id=vocab.pad_idx, sentence_type_logits=stype_logits, sentence_type_targets=batch["sent_types"], ph_bank_attn=ph_attn, ph_bank_len=batch["ph_bank_len_tensor"], ph_bank_sel_ind_targets=batch["ph_sel_ind_tensor"]) model_loss = losses['token_loss'] + \ args.gamma * losses["sentence_type_loss"] + \ args.eta * losses["phrase_selection_loss"] for loss_type in losses: total_losses[loss_type] += losses[loss_type].item() total_losses["total"] += model_loss.item() n_iters += 1 tb_logger.add_scalar("valid_loss_total", total_losses["total"] / n_iters, model.global_steps) tb_logger.add_scalar("valid_loss_token", total_losses["token_loss"] / n_iters, model.global_steps) tb_logger.add_scalar("valid_loss_sentence_type", total_losses["sentence_type_loss"] / n_iters, model.global_steps) tb_logger.add_scalar("valid_loss_phrase_selection", total_losses["phrase_selection_loss"] / n_iters, model.global_steps) tb_logger.add_scalar("valid_PPL", total_losses["token_ppl"] / n_iters, model.global_steps) return { loss_type: loss_val / n_iters for loss_type, loss_val in total_losses.items() }
def multi_task(): content = request.get_json(silent=True, force=True) if "data" in content: length = content.get("length", 30) is_beam = content.get("beam", -1) task_type = content.get("type", 0) if task_type not in [0, 1, 2]: task_type = 0 repeat = content.get("repeat", 1) response = OrderedDict() begin_time = time.time() logger.error("user message...") text = content["data"] logger.error(text) with torch.no_grad(): response["user-query"] = text context = convert_task_content(client.tokenizer, text, task_type) ids_length = context["input_tokens"].size(1) context = move_to_cuda(context, client.device) reply = [] for i in range(repeat): out = client.generator(client.multi_task_model, client.tokenizer, length, context=context, temperature=1, top_k=5, device=client.device, sample=True) out = out[ids_length - 1:] out = client.tokenizer.convert_ids_to_text(out) out = out.replace("##", "") reply.append(out) if len(reply) == 1: reply = reply[0] beam_out = None if is_beam != -1: client.multi_task_beam.beam_size = is_beam client.multi_task_beam.max_lens = length beam_out = client.multi_task_beam.generate_response(context) response["sampling-response"] = reply if beam_out is not None: response[ "beam-response"] = client.tokenizer.convert_ids_to_text( beam_out).replace("##", "") interval = time.time() - begin_time logger.error("elapsed time = %s", interval) response["interval"] = interval return json.dumps(response, ensure_ascii=False)
def valid(valid_queue, model, criterion): objs = utils.AverageMeter() top1 = utils.AverageMeter() top5 = utils.AverageMeter() with torch.no_grad(): model.eval() for step, (input, target) in enumerate(valid_queue): input = utils.move_to_cuda(input) target = utils.move_to_cuda(target) logits = model(input) loss = criterion(logits, target) prec1, prec5 = utils.accuracy(logits, target, topk=(1, 5)) n = input.size(0) objs.update(loss.data, n) top1.update(prec1.data, n) top5.update(prec5.data, n) if (step+1) % 100 == 0: logging.info('valid %03d %e %f %f', step+1, objs.avg, top1.avg, top5.avg) return top1.avg, top5.avg, objs.avg
def forward_step(data, model, tokenizer, criterion, args): """Forward step.""" sample = move_to_cuda(data, torch.cuda.current_device()) output, nsp, past = model(**sample["net_input"]) nsp_labels = sample["nsp_labels"] target = sample["target"] nsp_loss = criterion( nsp.view(-1, 3).contiguous().float(), nsp_labels.view(-1).contiguous()) losses = criterion( output.view(-1, tokenizer.num_tokens).contiguous().float(), target.contiguous().view(-1).contiguous()) # pdb.set_trace() return losses, nsp_loss, sample["nsentences"], sample["ntokens"]
def predict(args, model, eval_dataloader, device, fp16=False): if type(model) == list: model = [m.eval() for m in model] else: model.eval() if fp16: if type(model) == list: model = [m.half() for m in model] else: model.half() num_correct = 0.0 num_total = 0.0 for batch in tqdm(eval_dataloader): batch_to_feed = move_to_cuda(batch) if fp16: batch_to_feed = convert_to_half(batch_to_feed) with torch.no_grad(): results = model(batch_to_feed) product = torch.mm(results["q"], results["c"].t()) target = torch.arange(product.size(0)).to(product.device) prediction = product.argmax(-1) pred_res = prediction == target num_total += len(pred_res) num_correct += sum(pred_res) ## linear combination tuning on dev data acc = num_correct / num_total best_acc = 0 if acc > best_acc: best_acc = acc print(f"evaluated {num_total} examples...") print(f"avg. Acc: {acc}") if fp16: model.float() model.train() return best_acc
def predict(args, model, eval_dataloader, device, logger): model.eval() id2result = collections.defaultdict(list) for batch in tqdm(eval_dataloader): batch_to_feed = move_to_cuda(batch["net_inputs"]) batch_qids = batch["qids"] batch_labels = batch["net_inputs"]["label"].view(-1).tolist() with torch.no_grad(): scores = model(batch_to_feed) scores = scores.view(-1).tolist() for qid, label, score in zip(batch_qids, batch_labels, scores): id2result[qid].append((label, score)) acc = [] top_pred = {} for qid, res in id2result.items(): res.sort(key=lambda x: x[1], reverse=True) acc.append(res[0][0] == 1) logger.info(f"evaluated {len(id2result)} questions...") logger.info(f'acc: {np.mean(acc)}') model.train() return np.mean(acc)
def generate_synthetic_controller_data(model, exclude=[], maxn=1000): synthetic_input = [] synthetic_target = [] while len(synthetic_input) < maxn: synthetic_arch = utils.generate_arch(1, args.layers, args.num_ops)[0] if synthetic_arch not in exclude and synthetic_arch not in synthetic_input: synthetic_input.append(synthetic_arch) synthetic_dataset = utils.ControllerDataset(synthetic_input, None, False) synthetic_queue = torch.utils.data.DataLoader( synthetic_dataset, batch_size=len(synthetic_dataset), shuffle=False, pin_memory=True) with torch.no_grad(): model.eval() for sample in synthetic_queue: input = utils.move_to_cuda(sample['encoder_input']) _, _, _, predict_value = model.encoder(input) synthetic_target += predict_value.data.squeeze().tolist() assert len(synthetic_input) == len(synthetic_target) return synthetic_input, synthetic_target
def train_step(self, sample): """Do forward, backward and parameter update.""" self._set_seed() self._model.train() self.criterion.train() self.optimizer.zero_grad() # forward and backward pass if self.cuda: sample = utils.move_to_cuda(sample) # forward and backward results = self.criterion(self._model, sample) loss, nll_loss, ko_loss, sample_size, logging_output, to_print, \ offset_print = results nkp_tokens = logging_output.get('nkp_tokens', 0) self.meters['train_offset_loss'].update( logging_output.get('kp_offset_loss', 0) / nkp_tokens, nkp_tokens) # clip grads self.optimizer.clip_grad_norm(self.args.clip_norm) # take an optimization step loss.backward() self.optimizer.step() self.set_num_updates(self.get_num_updates() + 1) # update meters ntokens = logging_output.get('ntokens', 0) self.meters['train_nll_loss'].update( logging_output.get('nll_loss', 0) / ntokens, ntokens) self.meters['train_total_loss'].update( logging_output.get('total_loss', 0) / ntokens, ntokens) self.meters['train_offset_loss'].update( logging_output.get('kp_offset_loss', 0) / nkp_tokens, nkp_tokens)
def predict(args, model, eval_dataloader, device, fp16=False, is_query_embed=True): if type(model) == list: model = [m.eval() for m in model] else: model.eval() if fp16: if type(model) == list: model = [m.half() for m in model] else: model.half() num_correct = 0.0 num_total = 0.0 embed_array = [] for batch in tqdm(eval_dataloader): batch_to_feed = move_to_cuda(batch) with torch.no_grad(): results = model.get_embed(batch_to_feed, is_query_embed) embed = results['embed'] embed_array.append(embed) #print(prediction, target, sum(prediction==target), len(prediction)) #print(num_total, num_correct) ## linear combination tuning on dev data embed_array = torch.cat(embed_array) if fp16: model.float() model.train() return embed_array
def predict(logger, args, model, eval_dataloader, device, fp16=False): model.eval() all_results = [] if fp16: model.half() qid2results = {} for batch in tqdm(eval_dataloader): batch_to_feed = move_to_cuda(batch["net_input"]) if fp16: batch_to_feed = convert_to_half(batch_to_feed) with torch.no_grad(): results = model(batch_to_feed) batch_start_logits = results["start_logits"] batch_end_logits = results["end_logits"] question_mask = batch_to_feed["paragraph_mask"].ne(1) outs = [ o.float().masked_fill(question_mask, -1e10).type_as(o) for o in [batch_start_logits, batch_end_logits] ] span_scores = outs[0][:, :, None] + outs[1][:, None] max_answer_lens = 20 max_seq_len = span_scores.size(1) span_mask = np.tril(np.triu(np.ones((max_seq_len, max_seq_len)), 0), max_answer_lens) span_mask = span_scores.data.new(max_seq_len, max_seq_len).copy_( torch.from_numpy(span_mask)) span_scores_masked = span_scores.float().masked_fill( (1 - span_mask[None].expand_as(span_scores)).bool(), -1e10).type_as(span_scores) start_position = span_scores_masked.max(dim=2)[0].max(dim=1)[1] end_position = span_scores_masked.max(dim=2)[1].gather( 1, start_position.unsqueeze(1)).squeeze(1) para_offset = batch['para_offset'] start_position_ = list( np.array(start_position.tolist()) - np.array(para_offset)) end_position_ = list( np.array(end_position.tolist()) - np.array(para_offset)) for idx, qid in enumerate(batch['id']): start = start_position_[idx] end = end_position_[idx] tok_to_orig_index = batch['tok_to_orig_index'][idx] doc_tokens = batch['doc_tokens'][idx] wp_tokens = batch['wp_tokens'][idx] orig_doc_start = tok_to_orig_index[start] orig_doc_end = tok_to_orig_index[end] orig_tokens = doc_tokens[orig_doc_start:(orig_doc_end + 1)] tok_tokens = wp_tokens[start:end + 1] tok_text = " ".join(tok_tokens) tok_text = tok_text.replace(" ##", "") tok_text = tok_text.replace("##", "") tok_text = tok_text.strip() tok_text = " ".join(tok_text.split()) orig_text = " ".join(orig_tokens) final_text = get_final_text(tok_text, orig_text, logger, do_lower_case=args.do_lower_case, verbose_logging=False) qid2results[qid] = [final_text, batch['true_answers'][idx]] f1s = [ metric_max_over_ground_truths(f1_score, item[0], item[1]) for item in qid2results.values() ] ems = [ metric_max_over_ground_truths(exact_match_score, item[0], item[1]) for item in qid2results.values() ] print(f"evaluated {len(f1s)} examples...") if fp16: model.float() model.train() return (np.mean(f1s), np.mean(ems))
def generate(self, model, batch): net_input = utils.move_to_cuda(batch['net_input']) encoder_input_ids = net_input['input_ids'] encoder_attn_mask = net_input['attention_mask'] batch_size = encoder_input_ids.shape[0] encoder = model.get_encoder() encoder_outputs = encoder(encoder_input_ids, attention_mask=encoder_attn_mask) # create empty decoder_input_ids input_ids = torch.full( (batch_size, 1), self.decoder_bos_idx, dtype=torch.long, device=next(model.parameters()).device, ) cur_len = 1 probs = [[] for _ in range(batch_size)] unfinished_sents = input_ids.new(batch_size).fill_(1) past = encoder_outputs # defined for encoder-decoder models, None for decoder-only models while cur_len < self.domain_to_max_len[self.domain]: model_inputs = self.prepare_inputs_for_generation( input_ids, past=past, attention_mask=encoder_attn_mask) outputs = model(**model_inputs) next_token_logits = outputs[0][:, -1, :] past = outputs[1] if self.do_sampling: # Temperature (higher temperature => more likely to sample low probability tokens) if self.temperature != 1.0: next_token_logits = next_token_logits / self.temperature # Top-p/top-k filtering next_token_logits = top_k_top_p_filtering(next_token_logits, top_k=self.topk, top_p=self.topp) # Sample next_token_probs = F.softmax(next_token_logits, dim=-1) next_token = torch.multinomial(next_token_probs, num_samples=1).squeeze(1) else: # Greedy decoding next_token_probs = F.softmax(next_token_logits, dim=-1) next_token = torch.argmax(next_token_logits, dim=-1) chosen_token_probs = next_token_probs.gather( 1, next_token.view(-1, 1)) for b in range(batch_size): probs[b].append(chosen_token_probs[b, 0].item()) # pad finished sentences if eos_token_id exist tokens_to_add = next_token * unfinished_sents + (self.pad_idx) * ( 1 - unfinished_sents) if not self.quiet: output_str = '' for b in range(batch_size): w = self.tokenizer.convert_ids_to_tokens( [tokens_to_add[b]])[0] p = probs[b][-1] output_str += '{:>12}({:.2f})|'.format(w, 100 * p) if cur_len == 1: print('=' * 50) print('step={:<3d}|{}'.format(cur_len, output_str)) input_ids = torch.cat( [input_ids, tokens_to_add.unsqueeze(-1)], dim=-1) eos_in_sents = tokens_to_add == self.eos_idx unfinished_sents.mul_((~eos_in_sents).long()) # stop when there is a </s> in each sentence, or if we exceed the maximul length if unfinished_sents.max() == 0: break cur_len = cur_len + 1 return input_ids, probs