Exemplo n.º 1
0
    def _build_bert_input(self, input_sentences):
        # first convert raw text sentences into a list of InputExample objects
        input_examples = []
        # in this case use single sentence InputExamples for single embedding
        # loop through each full example from input df
        for idx, sentence in enumerate(input_sentences):
            input_example = InputExample(
                unique_id = idx,
                text_a = sentence,
                text_b = None
            )
            input_examples.append(input_example)

        # then convert the InputExample objects to InputFeature objects
        input_features = convert_examples_to_features(
            examples=input_examples, 
            seq_length=self.max_seq_length, 
            tokenizer=self.tokenizer
        )
        # build input_fn to feed to bert model
        input_fn = input_fn_builder(
            features=input_features, 
            seq_length=self.max_seq_length
        )
        # store required info from each example for use in predict loop
        input_features_dict = {}
        for feature in input_features:
            input_features_dict[feature.unique_id] = feature
        
        return input_fn, input_features_dict
Exemplo n.º 2
0
    def extract(self, sentence):

        example = [InputExample(unique_id=0, text_a=sentence, text_b=None)]
        features = convert_examples_to_features(examples=example,
                                                seq_length=_max_seq_length,
                                                tokenizer=self._tokenizer)
        input_fn = input_fn_builder(features=features,
                                    seq_length=_max_seq_length)
        outputs = []
        for output in self._estimator.predict(input_fn):
            feature = features[0]
            output_dict = collections.OrderedDict()
            all_features = []
            for (i, token) in enumerate(feature.tokens):
                all_layers = []
                for (j, layer_index) in enumerate(_layers):
                    layer_output = output["layer_output_%d" % j]
                    layers = collections.OrderedDict()
                    layers["index"] = layer_index
                    layers["values"] = [
                        round(float(x), 6)
                        for x in layer_output[i:(i + 1)].flat
                    ]
                    all_layers.append(layers)
                features = collections.OrderedDict()
                features["token"] = token
                features["layers"] = all_layers
                all_features.append(features)
            output_dict["features"] = all_features
            outputs.append(output_dict)

        return outputs[0]
Exemplo n.º 3
0
def extract_v1(sentence, estimator, tokenizer, sen_len=15):
    example = [InputExample(unique_id=0, text_a=sentence, text_b=None)]
    features = convert_examples_to_features_1(examples=example,
                                              seq_length=sen_len,
                                              tokenizer=tokenizer)
    input_fn = input_fn_builder(features=features, seq_length=sen_len)

    outputs = []
    for output in estimator.predict(input_fn):
        outputs.append(output)

    return outputs[0]
Exemplo n.º 4
0
    def extract_v1(self, sentence):

        example = [InputExample(unique_id=0, text_a=sentence, text_b=None)]
        features = convert_examples_to_features(examples=example,
                                                seq_length=_max_seq_length,
                                                tokenizer=self._tokenizer)
        input_fn = input_fn_builder(features=features,
                                    seq_length=_max_seq_length)
        outputs = []
        for output in self._estimator.predict(input_fn):
            outputs.append(output)

        return outputs[0]
Exemplo n.º 5
0
def extracts_v1(sentences, estimator, tokenizer, sen_len=15):

    examples = []
    for idx, sentence in enumerate(sentences):
        examples.append(
            InputExample(unique_id=idx, text_a=sentence, text_b=None))
    features = convert_examples_to_features(
        examples=examples, seq_length=sen_len,
        tokenizer=tokenizer)  #, get_cls = get_cls)

    input_fn = input_fn_builder(features=features, seq_length=sen_len)
    outputs = []
    for output in estimator.predict(input_fn):
        outputs.append(output)

    return outputs
Exemplo n.º 6
0
def extracts_pad(add_n, estimator, select_layers, sen_len=15):

    features = []

    for _ in range(add_n):
        pad_feature = InputFeatures(unique_id=[0],
                                    tokens='[PAD]',
                                    input_ids=[0],
                                    input_mask=[0],
                                    input_type_ids=[0])
        features.append(pad_feature)

    input_fn = input_fn_builder(features=features, seq_length=sen_len)
    outputs = []
    layers = len(select_layers)
    for output in estimator.predict(input_fn):
        outputs.append(concat_layers(output, layers))

    return outputs
Exemplo n.º 7
0
    def _make_examples(self, texts):
        """Creates BERT examples and input_fn to iterate over them.

    Args:
      texts: List of strings. One example will be created per string.

    Returns:
      Dictionary mapping from unique example ID to example
    """
        print('MAKING EXAMPLES')
        examples = [
            extract_features.InputExample(i, text, None)
            for i, text in enumerate(texts)
        ]
        features = extract_features.convert_examples_to_features(
            examples, self._max_seq_len, self._tokenizer)
        unique_id_to_feature = {}
        for feature in features:
            unique_id_to_feature[feature.unique_id] = feature
        input_fn = extract_features.input_fn_builder(
            features=features, seq_length=self._max_seq_len)
        return unique_id_to_feature, input_fn