Пример #1
0
def load_vocab(vocab_dir):
    """load vocabs"""
    word_vocab = Vocab.from_json(os.path.join(vocab_dir, "word_vocab.json"))
    rel_vocab = Vocab.from_json(os.path.join(vocab_dir, "rel_vocab.json"))
    feat_vocab_path = os.path.join(vocab_dir, "feat_vocab.json")
    if os.path.exists(feat_vocab_path):
        feat_vocab = Vocab.from_json(os.path.join(feat_vocab_path))
    else:
        feat_vocab = None
    return word_vocab, feat_vocab, rel_vocab
Пример #2
0
 def test_json(self):
     token_to_idx = {'一万七千多': 1, '一万七千余': 2, '一万万': 3}
     vocab = Vocab(
         counter=self.counter, unk_token='[UNK]', token_to_idx=token_to_idx)
     json_str = vocab.to_json()
     copied_vocab = Vocab.from_json(json_str)
     for key, value in copied_vocab.token_to_idx.items():
         self.check_output_equal(value, vocab[key])
Пример #3
0
            print(probs)
            idx = np.argmax(probs, axis=1)
            idx = idx.tolist()
            labels = [label_map[i] for i in idx]
            results.extend(labels)
        return results


if __name__ == "__main__":
    # Define predictor to do prediction.
    predictor = Predictor(args.model_file, args.params_file, args.device,
                          args.max_seq_length)

    # Firstly pre-processing prediction data  and then do predict.
    data = [
        '非常不错,服务很好,位于市中心区,交通方便,不过价格也高!',
        '怀着十分激动的心情放映,可是看着看着发现,在放映完毕后,出现一集米老鼠的动画片',
        '作为老的四星酒店,房间依然很整洁,相当不错。机场接机服务很好,可以在车上办理入住手续,节省时间。',
    ]
    vocab = Vocab.from_json(args.vocab_path)
    tokenizer = JiebaTokenizer(vocab)
    label_map = {0: 'negative', 1: 'positive'}

    results = predictor.predict(data,
                                tokenizer,
                                label_map,
                                batch_size=args.batch_size,
                                network=args.network)
    for idx, text in enumerate(data):
        print('Data: {} \t Label: {}'.format(text, results[idx]))
Пример #4
0
def main():
    # Load vocab.
    vocab = Vocab.from_json(args.vocab_path)
    label_map = {0: 'negative', 1: 'positive'}

    # Constructs the newtork.
    network = args.network.lower()
    vocab_size = len(vocab)
    num_classes = len(label_map)
    pad_token_id = vocab.to_indices('[PAD]')
    if network == 'bow':
        model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id)
    elif network == 'bigru':
        model = GRUModel(vocab_size,
                         num_classes,
                         direction='bidirect',
                         padding_idx=pad_token_id)
    elif network == 'bilstm':
        model = LSTMModel(vocab_size,
                          num_classes,
                          direction='bidirect',
                          padding_idx=pad_token_id)
    elif network == 'bilstm_attn':
        lstm_hidden_size = 196
        attention = SelfInteractiveAttention(hidden_size=2 * lstm_hidden_size)
        model = BiLSTMAttentionModel(attention_layer=attention,
                                     vocab_size=vocab_size,
                                     lstm_hidden_size=lstm_hidden_size,
                                     num_classes=num_classes,
                                     padding_idx=pad_token_id)
    elif network == 'birnn':
        model = RNNModel(vocab_size,
                         num_classes,
                         direction='bidirect',
                         padding_idx=pad_token_id)
    elif network == 'cnn':
        model = CNNModel(vocab_size, num_classes, padding_idx=pad_token_id)
    elif network == 'gru':
        model = GRUModel(vocab_size,
                         num_classes,
                         direction='forward',
                         padding_idx=pad_token_id,
                         pooling_type='max')
    elif network == 'lstm':
        model = LSTMModel(vocab_size,
                          num_classes,
                          direction='forward',
                          padding_idx=pad_token_id,
                          pooling_type='max')
    elif network == 'rnn':
        model = RNNModel(vocab_size,
                         num_classes,
                         direction='forward',
                         padding_idx=pad_token_id,
                         pooling_type='max')
    else:
        raise ValueError(
            "Unknown network: %s, it must be one of bow, lstm, bilstm, cnn, gru, bigru, rnn, birnn and bilstm_attn."
            % network)

    # Load model parameters.
    state_dict = paddle.load(args.params_path)
    model.set_dict(state_dict)
    model.eval()

    inputs = [paddle.static.InputSpec(shape=[None, None], dtype="int64")]
    # Convert to static graph with specific input description
    if args.network in [
            "lstm", "bilstm", "gru", "bigru", "rnn", "birnn", "bilstm_attn"
    ]:
        inputs.append(paddle.static.InputSpec(shape=[None],
                                              dtype="int64"))  # seq_len

    model = paddle.jit.to_static(model, input_spec=inputs)
    # Save in static graph model.
    paddle.jit.save(model, args.output_path)