Python Vocabulary.get_pad示例

编程语言: Python

命名空间/包名称: dpu_utils.mlutils

类/类型: Vocabulary

方法/功能: get_pad

hotexamples.com的示例: 7

Python Vocabulary.get_pad - 已找到7个示例。这些是从开源项目中提取的最受好评的dpu_utils.mlutils.Vocabulary.get_pad现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

create_vocabulary(18)

get_unk(10)

get_pad(7)

get_id_or_unk(3)

Vocabulary(1)

add_or_get_id(1)

get_name_for_id(1)

示例#1

显示文件

文件： embedding_store.py 项目： pavlov200912/LearningToUpdateNLComments

    def __init__(self, nl_threshold, nl_embedding_size, nl_token_counter,
                 code_threshold, code_embedding_size, code_token_counter,
                 dropout_rate, load_pretrained_embeddings=False):
        """Keeps track of the NL and code vocabularies and embeddings."""
        super(EmbeddingStore, self).__init__()
        edit_keywords = get_edit_keywords()
        self.__nl_vocabulary = Vocabulary.create_vocabulary(tokens=edit_keywords,
                                                         max_size=MAX_VOCAB_SIZE,
                                                         count_threshold=1,
                                                         add_pad=True)
        self.__nl_vocabulary.update(nl_token_counter, MAX_VOCAB_SIZE, nl_threshold)
        self.__nl_embedding_layer = nn.Embedding(num_embeddings=len(self.__nl_vocabulary),
                                        embedding_dim=nl_embedding_size,
                                        padding_idx=self.__nl_vocabulary.get_id_or_unk(
                                            Vocabulary.get_pad()))
        self.nl_embedding_dropout_layer = nn.Dropout(p=dropout_rate)
        

        self.__code_vocabulary = Vocabulary.create_vocabulary(tokens=edit_keywords,
                                                    max_size=MAX_VOCAB_SIZE,
                                                    count_threshold=1,
                                                    add_pad=True)
        self.__code_vocabulary.update(code_token_counter, MAX_VOCAB_SIZE, code_threshold)
        self.__code_embedding_layer = nn.Embedding(num_embeddings=len(self.__code_vocabulary),
                        embedding_dim=code_embedding_size,
                        padding_idx=self.__code_vocabulary.get_id_or_unk(
                        Vocabulary.get_pad()))
        self.code_embedding_dropout_layer = nn.Dropout(p=dropout_rate)

        print('NL vocabulary size: {}'.format(len(self.__nl_vocabulary)))
        print('Code vocabulary size: {}'.format(len(self.__code_vocabulary)))

        if load_pretrained_embeddings:
            self.initialize_embeddings()

示例#2

显示文件

文件： embedding_store.py 项目： panthap2/deep-jit-inconsistency-detection

 def get_padded_nl_ids(self, nl_sequence, pad_length):
     return self.__nl_vocabulary.get_id_or_unk_multiple(
         nl_sequence,
         pad_to_size=pad_length,
         padding_element=self.__nl_vocabulary.get_id_or_unk(
             Vocabulary.get_pad()),
     )

示例#3

显示文件

文件： embedding_store.py 项目： panthap2/deep-jit-inconsistency-detection

 def pad_length(self, sequence, target_length):
     if len(sequence) >= target_length:
         return sequence[:target_length]
     else:
         return sequence + [
             self.__nl_vocabulary.get_id_or_unk(Vocabulary.get_pad())
             for _ in range(target_length - len(sequence))
         ]

示例#4

显示文件

文件： embedding_store.py 项目： pavlov200912/LearningToUpdateNLComments

 def get_extended_padded_nl_ids(self, nl_sequence, pad_length, inp_ids, inp_tokens):
     # Derived from: https://github.com/microsoft/dpu-utils/blob/master/python/dpu_utils/mlutils/vocabulary.py
     nl_ids = []
     for token in nl_sequence:
         nl_id = self.get_nl_id(token)
         if self.is_nl_unk(nl_id) and token in inp_tokens:
             copy_idx = inp_tokens.index(token)
             nl_id = inp_ids[copy_idx]
         nl_ids.append(nl_id)
     
     if len(nl_ids) > pad_length:
         return nl_ids[:pad_length]
     else:
         padding = [self.__nl_vocabulary.get_id_or_unk(Vocabulary.get_pad())] * (pad_length - len(nl_ids))
         return nl_ids + padding

示例#5

显示文件

def evaluate_f1(model: keras.Model,
                vocab: Vocabulary,
                input_method_body_subtokens: np.ndarray,
                target_method_names: np.ndarray,
                hyperparameters: Dict[str, any],
                visualise_prediction=True):
    padding_id = vocab.get_id_or_unk(vocab.get_pad())
    begin_of_sentence_id = vocab.get_id_or_unk(SENTENCE_START_TOKEN)
    end_of_sentence_id = vocab.get_id_or_unk(SENTENCE_END_TOKEN)

    if input_method_body_subtokens.ndim != 3:
        # model prediction expects 3 dimensions, a single input won't have the batch dimension, manually add it
        input_method_body_subtokens = np.expand_dims(
            input_method_body_subtokens, 0)

    predictions = model.predict(input_method_body_subtokens, batch_size=1)

    best_predictions, best_predictions_probs = beam_search(
        predictions,
        padding_id,
        begin_of_sentence_id,
        end_of_sentence_id,
        hyperparameters['beam_width'],
        hyperparameters['beam_top_paths'],
    )
    f1_evaluation = _evaluate_f1(best_predictions, best_predictions_probs,
                                 vocab, target_method_names)
    if visualise_prediction:
        max_results = 10
        visualised_input = visualise_beam_predictions_to_targets(
            vocab, best_predictions[:max_results],
            best_predictions_probs[:max_results],
            input_method_body_subtokens[:max_results],
            target_method_names[:max_results])

        # return best_predictions, best_predictions_probs
        return f1_evaluation, visualised_input
    return f1_evaluation

示例#6

显示文件

文件： data_utils.py 项目： zzxn/method-name-prediction

def translate_tokenized_array_to_list_words(vocab: Vocabulary, token: np.ndarray) -> List[str]:
    """Helper function to translate numpy array tokens back to words"""
    return [vocab.get_name_for_id(n) for n in token[np.nonzero(token != vocab.get_id_or_unk(vocab.get_pad()))]]

示例#7

显示文件

文件： embedding_store.py 项目： panthap2/deep-jit-inconsistency-detection

 def get_code_pad_id(self):
     return self.__code_vocabulary.get_id_or_unk(Vocabulary.get_pad())