def predictKTacticsWithLoss_batch(prediction_distributions : torch.FloatTensor, embedding : Embedding, k : int, correct_stems : List[str], criterion : nn.Module) -> \ Tuple[List[List[Prediction]], float]: output_var = maybe_cuda( Variable( torch.LongTensor([ embedding.encode_token(correct_stem) if embedding.has_token(correct_stem) else 0 for correct_stem in correct_stems ]))) loss = criterion(prediction_distributions, output_var).item() if k > embedding.num_tokens(): k = embedding.num_tokens() certainties_and_idxs_list = [ single_distribution.view(-1).topk(k) for single_distribution in list(prediction_distributions) ] results = [[ Prediction( embedding.decode_token(stem_idx.item()) + ".", math.exp(certainty.item())) for certainty, stem_idx in zip(*certainties_and_idxs) ] for certainties_and_idxs in certainties_and_idxs_list] return results, loss
def predictKTactics(prediction_distribution : torch.FloatTensor, embedding : Embedding, k : int) \ -> List[Prediction]: if k > embedding.num_tokens(): k = embedding.num_tokens() certainties_and_idxs = prediction_distribution.view(-1).topk(k) results = [Prediction(embedding.decode_token(stem_idx.data[0]) + ".", math.exp(certainty.data[0])) for certainty, stem_idx in zip(*certainties_and_idxs)] return results
def _encode_tokenized_data(self, data : TokenizedDataset, arg_values : Namespace, tokenizer : Tokenizer, embedding : Embedding) \ -> PECDataset: return PECDataset([ PECSample( embedding.encode_token( get_stem(prev_tactics[-1] ) if len(prev_tactics) > 1 else "Proof"), goal, tactic) for prev_tactics, goal, tactic in data ])
def _optimize_checkpoints(self, encoded_data : EncFeaturesDataset, arg_values : Namespace, tokenizer : Tokenizer, embedding : Embedding) \ -> Iterable[NeuralPredictorState]: return optimize_checkpoints( self._data_tensors(encoded_data, arg_values), arg_values, self._get_model(arg_values, embedding.num_tokens(), tokenizer.numTokens()), lambda batch_tensors, model: self._getBatchPredictionLoss( batch_tensors, model))
def predictKTacticsWithLoss(prediction_distribution : torch.FloatTensor, embedding : Embedding, k : int, correct : str, criterion : nn.Module) -> Tuple[List[Prediction], float]: if k > embedding.num_tokens(): k = embedding.num_tokens() correct_stem = get_stem(correct) if embedding.has_token(correct_stem): output_var = maybe_cuda(Variable( torch.LongTensor([embedding.encode_token(correct_stem)]))) loss = criterion(prediction_distribution.view(1, -1), output_var).item() else: loss = 0 certainties_and_idxs = prediction_distribution.view(-1).topk(k) results = [Prediction(embedding.decode_token(stem_idx.item()) + ".", math.exp(certainty.item())) for certainty, stem_idx in zip(*certainties_and_idxs)] return results, loss
def get_stem_and_arg_idx(max_length: int, embedding: Embedding, inter: ScrapedTactic) -> Tuple[int, int]: tactic_stem, tactic_rest = serapi_instance.split_tactic(inter.tactic) stem_idx = embedding.encode_token(tactic_stem) symbols = tokenizer.get_symbols(inter.context.focused_goal) arg = tactic_rest.split()[0].strip(".") assert arg in symbols, "tactic: {}, arg: {}, goal: {}, symbols: {}"\ .format(inter.tactic, arg, inter.context.focused_goal, symbols) idx = symbols.index(arg) if idx >= max_length: return stem_idx, 0 else: return stem_idx, idx + 1
def print_subset_stem_distribution(probs: torch.FloatTensor, idxs: torch.LongTensor, embedding: Embedding): for idx, prob in zip(idxs, probs): print("{}: {:.2f}".format(embedding.decode_token(idx), prob))
def print_full_stem_distribution(stem_distribution: torch.FloatTensor, embedding: Embedding): for idx, prob in enumerate(stem_distribution): print("{}: {:.2f}".format(embedding.decode_token(idx), prob))