示例#1
0
        X_train = [[split_words(sent) for sent in paragraph]
                   for paragraph in X_train]
        X_test = [[split_words(sent) for sent in paragraph]
                  for paragraph in X_test]

        W2V_corpus = W2V_corpus_iter(X_train)
        w2vModel = train_W2V(W2V_corpus, in_path + 'w2vModel')
        word2idx, embedMatrix = build_word2idx_embedMatrix(
            w2vModel)  # 制作word2idx和embedMatrix

        X_train_idx = make_X_train_idx(X_train, word2idx, MAX_SENT_NUM,
                                       MAX_SENT_LEN)
        X_test_idx = make_X_train_idx(X_test, word2idx, MAX_SENT_NUM,
                                      MAX_SENT_LEN)

        y_train_oneHot = make_y_train_oneHot(y_train, is_cate_dict=True)
        y_test_oneHot = make_y_train_oneHot(y_test, is_cate_dict=True)

        print(len(X_train_idx), len(X_test_idx), len(y_train_oneHot),
              len(y_test_oneHot))

        yelp_2014_data = {}
        yelp_2014_data['X_train_idx'] = X_train_idx
        yelp_2014_data['X_test_idx'] = X_test_idx
        yelp_2014_data['y_train_oneHot'] = y_train_oneHot
        yelp_2014_data['y_test_oneHot'] = y_test_oneHot
        yelp_2014_data['embedMatrix'] = embedMatrix

        pickle.dump(yelp_2014_data, open(in_path + 'yelp_2014_data', 'wb'))

    print('——————————————模型的训练和预测——————————————')
示例#2
0
    imdb_idx2word = dict((idx, word) for (word, idx) in imdb_word2idx.items())
    X_all = [[imdb_idx2word.get(idx - 3, '?') for idx in sen][1:]
             for sen in X_all]

    w2vModel = train_W2V(X_all, in_path + 'w2vModel')
    word2idx, embedMatrix = build_word2idx_embedMatrix(
        w2vModel)  # 制作word2idx和embedMatrix

    X_all_idx = make_X_train_idx(X_all, word2idx, MAX_SEQ_LEN)
    y_all_idx = np.array(y_all)  # 一定要注意,X_all和y_all必须是np.array()类型,否则报错
    X_tra_idx, X_val_idx, y_tra_idx, y_val_idx = train_test_split(
        X_all_idx,
        y_all_idx,
        test_size=0.2,
        random_state=0,
        stratify=y_all_idx)
    y_tra_oneHot = make_y_train_oneHot(y_tra_idx)
    y_val_oneHot = make_y_train_oneHot(y_val_idx)

    print('——————————————模型的训练和预测——————————————')
    start = time()
    model = textCNN_train_test(embedMatrix)
    model.train([X_tra_idx, y_tra_oneHot])  #不知道为什么,验证非常非常慢!!但keras非常快,很奇怪!!!
    y_pred_idx = model.test([X_val_idx, y_val_oneHot])

    print('——————————————结果评估——————————————')
    y_val_idx = [list(oneHot).index(1) for oneHot in y_val_oneHot]
    evaluate_matrix(y_val_idx, y_pred_idx)

    print(time() - start)
示例#3
0
    X_tra_c, X_tra_t, y_tra = load_data(in_path + 'train.raw')
    X_test_c, X_test_t, y_test = load_data(in_path + 'test.raw')
    print(len(X_tra_c), len(X_tra_t), len(y_tra))
    print(len(X_test_c), len(X_test_t), len(y_test))

    if os.path.exists(in_path + 'embedMatrix.pkl'):
        embedMatrix = pickle.load(open(in_path + 'embedMatrix.pkl', 'rb'))
        word2idx = pickle.load(open(in_path + 'word2idx.pkl', 'rb'))
    else:
        all = X_tra_c + X_tra_t + X_test_c + X_test_t
        word_set = set([w for sent in all for w in sent])
        w2vModel = load_W2V(in_path + 'glove.42B.300d.txt', word_set=word_set)
        word2idx, embedMatrix = build_word2idx_embedMatrix(w2vModel)
        pickle.dump(embedMatrix, open(in_path + 'embedMatrix.pkl', 'wb'))
        pickle.dump(word2idx, open(in_path + 'word2idx.pkl', 'wb'))

    X_tra_c_idx = make_X_train_idx(X_tra_c, word2idx, MAX_SEQ_LEN)
    X_tra_t_idx = make_X_train_idx(X_tra_t, word2idx, MAX_SEQ_LEN)
    X_test_c_idx = make_X_train_idx(X_test_c, word2idx, MAX_SEQ_LEN)
    X_test_t_idx = make_X_train_idx(X_test_t, word2idx, MAX_SEQ_LEN)

    y_tra_oneHot = make_y_train_oneHot(y_tra)
    y_test_oneHot = make_y_train_oneHot(y_test)

    print('——————————————train model——————————————')
    model = IAN_train_test(embedMatrix)

    model.train([X_tra_c_idx, X_tra_t_idx, y_tra_oneHot])
    y_pred = model.test([X_test_c_idx, X_test_t_idx, y_test_oneHot])
    print(y_pred)