Python Word2Vec.load 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: word_embedding.word2vec_gensim

클래스/타입: Word2Vec

메소드/함수: load

hotexamples.com에서의 예제들: 5

Python Word2Vec.load - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 word_embedding.word2vec_gensim.Word2Vec.load에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

load(5)

자주 사용되는 메소드들

load (5)

예제 #1

파일 보기

def test():
    from tokenization.crf_tokenizer import CrfTokenizer
    from word_embedding.word2vec_gensim import Word2Vec
    word2vec_model = Word2Vec.load('../models/pretrained_word2vec.bin')
    # Please give the correct paths
    tokenizer = CrfTokenizer(
        config_root_path=
        '/Users/admin/Desktop/Projects/python/NLP/hactcore/hactcore/nlp/tokenization/',
        model_path='../models/pretrained_tokenizer.crfsuite')
    sym_dict = load_synonym_dict('../data/sentiment/synonym.txt')
    # keras_text_classifier = KerasTextClassifier(tokenizer=tokenizer, word2vec=word2vec_model.wv,
    keras_text_classifier = BiDirectionalLSTMClassifier(
        tokenizer=tokenizer,
        word2vec=word2vec_model.wv,
        model_path='../models/sentiment_model.h5',
        max_length=10,
        n_epochs=10,
        sym_dict=sym_dict)
    X, y = keras_text_classifier.load_data(
        [
            '../data/sentiment/samples/positive.txt',
            '../data/sentiment/samples/negative.txt'
        ],
        load_method=keras_text_classifier.load_data_from_file)

    keras_text_classifier.train(X, y)
    label_dict = {0: 'tích cực', 1: 'tiêu cực'}
    test_sentences = ['Dở thế', 'Hay thế', 'phim chán thật', 'nhảm quá']
    labels = keras_text_classifier.classify(test_sentences,
                                            label_dict=label_dict)
    print(labels)  # Output: ['tiêu cực', 'tích cực', 'tiêu cực', 'tiêu cực']

예제 #2

파일 보기

파일: demo.py 프로젝트: HongQuan0110/API_NLP

def test(sentences):

    # Please give the correct paths
    # Load word2vec model from file. If you want to train your own model, please go to README or check word2vec_gensim.py
    word2vec_model = Word2Vec.load('models/pretrained_word2vec.bin')

    # Load tokenizer model for word segmentation. If you want to train you own model,
    # please go to README or check crf_tokenizer.py
    tokenizer = LongMatchingTokenizer()
    sym_dict = load_synonym_dict('data/sentiment/synonym.txt')
    keras_text_classifier = BiDirectionalLSTMClassifier(
        tokenizer=tokenizer,
        word2vec=word2vec_model.wv,
        model_path='models/sentiment_model.h5',
        max_length=200,
        n_epochs=10,
        sym_dict=sym_dict)
    # Load and prepare data
    # X, y = keras_text_classifier.load_data(['data/sentiment/samples/positive.txt',
    #                                        'data/sentiment/samples/negative.txt'],
    #                                        load_method=keras_text_classifier.load_data_from_file)

    # Train your classifier and test the model
    # keras_text_classifier.train(X, y)
    label_dict = {0: 'tích cực', 1: 'tiêu cực', 2: 'bình thường'}
    test_sentences = ['Dở thế', 'Hay thế', 'phim chán thật', 'nhảm quá']
    test_sentences = [
        "Dở thế", "Hay thế", "chán thật", "nhảm quá", "không ngon",
        "nhân viện phục vụ chậm",
        "nhân viên phục vụ tệ thế, nhưng phim xuất sắc", " không có tiền"
    ]
    labels = keras_text_classifier.classify(test_sentences,
                                            label_dict=label_dict)
    print(labels)  # Output: ['tiêu cực', 'tích cực', 'tiêu cực', 'tiêu cực']

예제 #3

파일 보기

def test():
    from tokenization.dict_models import LongMatchingTokenizer
    from word_embedding.word2vec_gensim import Word2Vec
    word2vec_model = Word2Vec.load(dir_path +
                                   '/../models/pretrained_word2vec.bin')
    # Please give the correct paths
    tokenizer = LongMatchingTokenizer()
    sym_dict = load_synonym_dict(dir_path + '/../data/sentiment/synonym.txt')
    # keras_text_classifier = KerasTextClassifier(tokenizer=tokenizer, word2vec=word2vec_model.wv,
    keras_text_classifier = BiDirectionalLSTMClassifier(
        tokenizer=tokenizer,
        word2vec=word2vec_model.wv,
        model_path=dir_path + '/../models/sentiment_model.h5',
        max_length=200,
        n_epochs=10,
        sym_dict=sym_dict)
    # X, y = keras_text_classifier.load_data([dir_path + '/../data/sentiment/SA2016-training_data/positive.txt',
    #                                        dir_path + '/../data/sentiment/SA2016-training_data/negative.txt',
    #                                        dir_path + '/../data/sentiment/SA2016-training_data/neutral.txt'],
    #                                        load_method=keras_text_classifier.load_data_from_file)

    # keras_text_classifier.train(X, y)
    label_dict = {0: 'tích cực', 1: 'tiêu cực', 2: 'bình thường'}
    test_sentences = [
        'Dở thế', 'Hay thế', 'phim chán thật', 'nhảm quá', 'không ngon',
        'Sao mạnh tay với tác phẩm như vầy thế các bạn. Một tác phẩm giải trí khá ổn cho mọi người, diễn xuất của Angelina Jolie quá tuyệt với cái đơ đơ hài hài. Thông điệp ở đây khá rõ ràng và hết sức cấp thiết: con người đang phá hủy thiên nhiên và mong muốn con người và thiên nhiên sống chan hòa với nhau.'
    ]
    test_sentences = [
        "Dở thế", "Hay thế", "chán thật", "nhảm quá", "không ngon",
        "nhân viện phục vụ chậm",
        "nhân viên phục vụ tệ thế, nhưng phim xuất sắc", " không có tiền"
    ]
    labels = keras_text_classifier.classify(test_sentences,
                                            label_dict=label_dict)
    print(labels)  # Output: ['tiêu cực', 'tích cực', 'tiêu cực', 'tiêu cực']

예제 #4

파일 보기

파일: app.py 프로젝트: hoho303/trolyaobyhoho303

from word_embedding.word2vec_gensim import Word2Vec
from text_classification.short_text_classifiers import BiDirectionalLSTMClassifier
from tokenization.crf_tokenizer import CrfTokenizer
import flask
import pandas as pd

word2vec_model = Word2Vec.load('models/pretrained_word2vec.bin')

tokenizer = CrfTokenizer(config_root_path='tokenization/',
                         model_path='models/pretrained_tokenizer.crfsuite')
model = BiDirectionalLSTMClassifier(tokenizer=tokenizer,
                                    word2vec=word2vec_model.wv,
                                    model_path='models/app.h5',
                                    n_class=3)
label_dict = {0: 'mo_vnexpress', 1: 'mo_dantri', 2: 'mo_truyenfull'}

app = flask.Flask(__name__)


@app.route("/predict", methods=["GET", "POST"])
def predict():
    data = {"success": False}

    params = flask.request.json
    if params is None:
        params = flask.request.args

    # if parameters are found, return a prediction
    if params is not None:
        x = pd.DataFrame.from_dict(
            params, orient='index').to_numpy(dtype=str).tolist()

예제 #5

파일 보기

from keras.models import load_model
from sklearn.model_selection import train_test_split
from statistics import *
from word_embedding.word2vec_gensim import Word2Vec
import keras
EMBEDDING_FILES = [
    # 'D:/NLP/Toxic_Comment_Vie/pretrained_word2vec.bin'
    'pretrained_word2vec.bin'
]
# path_train= 'D:/NLP/Toxic_Comment_Vie/data/train.txt'
# path_test = 'D:/NLP/Toxic_Comment_Vie/data/test.txt'
# path_synonym = 'C:/Users/anlan/OneDrive/Desktop/core_nlp-master1/data/sentiment/synonym.txt'
path_train = 'data/train.txt'
path_test = 'data/test.txt'
path_synonym = 'synonym.txt'
word2vec_model = Word2Vec.load(EMBEDDING_FILES[0])
BATCH_SIZE = 256
LSTM_UNITS = 128
DENSE_HIDDEN_UNITS = 4 * LSTM_UNITS


def build_model(input_dim):

    # model = Sequential()

    # model.add(Bidirectional(LSTM(32, return_sequences=True), input_shape=input_dim))
    # model.add(Dropout(0.1))
    # model.add(Bidirectional(LSTM(16)))
    # model.add(Dense(1, activation="softmax"))

    # model.compile(loss=['binary_crossentropy'], optimizer='adam',metrics=['accuracy'])