Python search 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: word2vec_boostpython

메소드/함수: search

hotexamples.com에서의 예제들: 4

Python search - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 word2vec_boostpython.search에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: visword2vec.py 프로젝트: afk314/mycorp

    def plot(self, query, nbest=15):
        if ', ' not in query:
            words = [query] + w2v.search(self.data, query)[:nbest]
        else:
            words = query.split(', ')
            print ', '.join(words)
        mat = w2v.get_vectors(self.data)
        word_indexes = [w2v.get_word_index(self.data, w) for w in words]
        if word_indexes == [-1]:
            print 'not in vocabulary'
            return

        # do PCA
        X = mat[word_indexes]
        pca = PCA(n_components=2)
        pca.fit(X)
        print pca.explained_variance_ratio_
        X = pca.transform(X)
        xs = X[:, 0]
        ys = X[:, 1]

        # draw
        plt.figure(figsize=(12, 8))
        plt.scatter(xs, ys, marker='o')
        for i, w in enumerate(words):
            plt.annotate(w.decode('utf-8', 'ignore'),
                         xy=(xs[i], ys[i]),
                         xytext=(3, 3),
                         textcoords='offset points',
                         ha='left',
                         va='top',
                         **self.TEXT_KW)

        plt.show()

예제 #2

파일 보기

TEXT_KW = dict(fontsize=FONT_SIZE, fontweight='bold', fontproperties=font)

filename = 'word2vec/jawiki.bin'
#filename = 'word2vec/orj.bin'
print 'loading'
data = w2v.load(filename)
print 'loaded'
nbest = 15

while True:
    query = raw_input('query: ')
    if query.startswith('nbest='):
        nbest = int(query[6:])
        continue
    if ', ' not in query:
        words = [query] + w2v.search(data, query)[:nbest]
    else:
        words = query.split(', ')
    print ', '.join(words)
    mat = w2v.get_vectors(data)
    word_indexes = [w2v.get_word_index(data, w) for w in words]
    if word_indexes == [-1]:
        print 'not in vocabulary'
        continue

    # do PCA
    X = mat[word_indexes]
    pca = PCA(n_components=2)
    pca.fit(X)
    print pca.explained_variance_ratio_
    X = pca.transform(X)

예제 #3

파일 보기

파일: vis.py 프로젝트: nishio/mycorpus

TEXT_KW = dict(fontsize=FONT_SIZE, fontweight="bold", fontproperties=font)

filename = "word2vec/jawiki.bin"
# filename = 'word2vec/orj.bin'
print "loading"
data = w2v.load(filename)
print "loaded"
nbest = 15

while True:
    query = raw_input("query: ")
    if query.startswith("nbest="):
        nbest = int(query[6:])
        continue
    if ", " not in query:
        words = [query] + w2v.search(data, query)[:nbest]
    else:
        words = query.split(", ")
    print ", ".join(words)
    mat = w2v.get_vectors(data)
    word_indexes = [w2v.get_word_index(data, w) for w in words]
    if word_indexes == [-1]:
        print "not in vocabulary"
        continue

    # do PCA
    X = mat[word_indexes]
    pca = PCA(n_components=2)
    pca.fit(X)
    print pca.explained_variance_ratio_
    X = pca.transform(X)

예제 #4

파일 보기

파일: test.py 프로젝트: hailiang-wang/word2vec-get-started

test(invec,
     [-0.21694795787334442, 0.38501447439193726, 1.080917239189148,
       0.25622865557670593, -0.22400374710559845, 0.7944273948669434,
       0.8532216548919678, -0.3066456913948059, -0.6160392761230469,
       -0.19778962433338165])

num_words = w2v.get_num_words_from_model()


data = w2v.load("tiny_corpus.bin");
test(
    w2v.get_num_words(data),
    num_words)

test(
    w2v.search(data, "1")[:5],
    ['a', 'A', 'g', 'G', '6'])

data2 = w2v.load_without_normalize("tiny_corpus.bin");

words = [w2v.get_word(data2, i) for i in range(w2v.get_num_words(data2))]

test(words[1], 'd')
test(w2v.get_vector(data2, 'd'), invec)
test(list(w2v.get_vectors(data2)[1]), invec)


test(w2v.find_sub(data, ['A', 'b'], ['a'])[:5],
     ['B', '2', 'H', 'h', '8'])  # 'A' + 'b' - 'a'