Python text_clean примеры использования

Язык программирования: Python

Пространство имен/Пакет: data_processing

Метод/Функция: text_clean

Примеров на hotexamples.com: 3

Python text_clean - 3 примера найдено. Это лучшие примеры Python кода для data_processing.text_clean, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: word2vec_gensim_model.py Проект: ylwctyt/DeepNaturalLanguageProcessing

def word2vec_test():
    # 读入数据
    pos_file_path = globe.file_pos
    neg_file_path = globe.file_neg

    tmp = data_processing.read_data(pos_file_path, neg_file_path)
    res = data_processing.data_split(tmp[0], tmp[1])
    x_train = res[0]
    x_train = data_processing.text_clean(x_train)

    for i in x_train:
        for j in i:
            print j,
    n_dim = 200
    min_count = 2

    # model = gensim.models.Word2Vec(x_train, min_count=0, size=200, workers=4)

    model = word2vec_model(x_train, n_dim, min_count)

    # res = w2c_model.most_similar(positive=['纤维', '批次'], negative=['成分'], topn=1)
    #
    # w2c_model.doesnt_match("我 爱 中国".split())
    #
    # var = w2c_model.similarity('纤维', '批次')
    # print var
    # res = w2c_model.most_similar("纤维")
    # for i in res:
    #     print i[0],

    dd = model.most_similar("批次")
    for i in dd:
        print i[0],

Пример #2

Показать файл

Файл: input_data.py Проект: ylwctyt/DeepNaturalLanguageProcessing

def _data_read(pos_file_path, neg_file_path, w2c_model_path):
    """read data and word2vec model from file path,
    Args:
        pos_file_path: Positive file path.
        neg_file_path: Negative file path.
        w2c_model_path: word2vec model path
    Returns:
        A list contains train and test data with labels.
    Raises:
        IOError: An error occurred accessing the bigtable.Table object.
    """

    tmp = data_processing.read_data(pos_file_path, neg_file_path)
    res = data_processing.data_split(tmp[0], tmp[1])
    (train_data, test_data, train_labels, test_labels) = (res[0], res[1],
                                                          res[2], res[3])

    # print train_labels[0]
    train_data = data_processing.text_clean(train_data)
    test_data = data_processing.text_clean(test_data)

    # 词向量的维度
    n_dim = globe.n_dim
    doc_vecs = []
    try:
        # load word2vec model from model path
        word2vec_model = Word2Vec.load(w2c_model_path)

        doc_vecs = word2vec_gensim_train.text_vecs(train_data, test_data,
                                                   n_dim, word2vec_model)
    except IOError:
        pass

    # 生成文本向量
    train_data_vecs = doc_vecs[0]
    # print train_data_vecs.shape
    test_data_vecs = doc_vecs[1]
    # print test_data_vecs.shape

    return train_data_vecs, train_labels, test_data_vecs, test_labels

Пример #3

Показать файл

Файл: word2vec_gensim_model.py Проект: ricklovelisa/DeepNaturalLanguageProcessing

    # res = w2c_model.most_similar(positive=['纤维', '批次'], negative=['成分'], topn=1)
    #
    # w2c_model.doesnt_match("我 爱 中国".split())
    #
    # var = w2c_model.similarity('纤维', '批次')
    # print var
    # res = w2c_model.most_similar("纤维")
    # for i in res:
    #     print i[0],

    dd = model.most_similar("批次")
    for i in dd:
        print i[0],


if __name__ == "__main__":
    word2vec_test()
    pos_file_path = globe.file_pos
    neg_file_path = globe.file_neg
    tmp = data_processing.read_data(pos_file_path, neg_file_path)
    res = data_processing.data_split(tmp[0], tmp[1])
    x_train = res[0]
    x_train = data_processing.text_clean(x_train)

    n_dim = 200
    min_count = 2
    model_path = globe.model_path
    mymodel = word2vec_model(x_train, n_dim, min_count)
    mymodel.save(model_path)