Python file_to_stream示例

编程语言: Python

命名空间/包名称: prepare_sentence_stream

方法/功能: file_to_stream

hotexamples.com的示例: 4

Python file_to_stream - 已找到4个示例。这些是从开源项目中提取的最受好评的prepare_sentence_stream.file_to_stream现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： train_with_bigrams.py 项目： kivi239/ML

import prepare_sentence_stream
import gensim

#model = gensim.models.Word2Vec.load_word2vec_format('../../Word2Vec/all.s200.w11.n1.v20.cbow.bin', binary=True, unicode_errors='ignore')
#print("Loaded model")

file = '../text/ch.txt'
sentence_stream = prepare_sentence_stream.file_to_stream(file)

print(sentence_stream)

bigram = gensim.models.phrases.Phrases(sentence_stream,
                                       min_count=5,
                                       threshold=10)
print(bigram)

new_sentences = list(bigram[sentence_stream])

# do we need to convert all words to lower case?
'''for l in new_sentences:
    for i in range(len(l)):
        l[i] = l[i].lower()
'''

print("Read sentences, building model...")

model = gensim.models.word2vec.Word2Vec(new_sentences, size=25)
file_name = 'word2vec/w2v.bin'
model.save_word2vec_format(file_name, binary=True)

text_file_name = 'word2vec/w.txt'

示例#2

显示文件

文件： transform_file.py 项目： kivi239/ML

import prepare_sentence_stream
import gensim

separators = ['...', '.', '?', '!']
trash = ['"', '--', '(', ')', ':', ',', ';']


file = '../text/ch.txt'
sentence_stream = prepare_sentence_stream.file_to_stream(file)
text = prepare_sentence_stream.file_to_stream(file, seps=separators, trs=trash)

print(sentence_stream)

bigram = gensim.models.phrases.Phrases(sentence_stream, min_count=5, threshold=10)
print(bigram)

new_sentences = list(bigram[text])

new_file = '../text/ch_bigrams.txt'
f = open(new_file, 'w', encoding='utf-8')
for l in new_sentences:
    for word in l:
        f.write(word + ' ')
    f.write('\n')

示例#3

显示文件

文件： train_with_bigrams.py 项目： kivi239/ML

import prepare_sentence_stream
import gensim

#model = gensim.models.Word2Vec.load_word2vec_format('../../Word2Vec/all.s200.w11.n1.v20.cbow.bin', binary=True, unicode_errors='ignore')
#print("Loaded model")

file = '../text/ch.txt'
sentence_stream = prepare_sentence_stream.file_to_stream(file)

print(sentence_stream)

bigram = gensim.models.phrases.Phrases(sentence_stream, min_count=5, threshold=10)
print(bigram)

new_sentences = list(bigram[sentence_stream])

# do we need to convert all words to lower case?
'''for l in new_sentences:
    for i in range(len(l)):
        l[i] = l[i].lower()
'''

print("Read sentences, building model...")

model = gensim.models.word2vec.Word2Vec(new_sentences, size=25)
file_name = 'word2vec/w2v.bin'
model.save_word2vec_format(file_name, binary=True)

text_file_name = 'word2vec/w.txt'
f = open(text_file_name, 'w', encoding='utf-8')
for key in model.vocab.keys():

示例#4

显示文件

文件： transform_file.py 项目： kivi239/ML

import prepare_sentence_stream
import gensim

separators = ['...', '.', '?', '!']
trash = ['"', '--', '(', ')', ':', ',', ';']

file = '../text/ch.txt'
sentence_stream = prepare_sentence_stream.file_to_stream(file)
text = prepare_sentence_stream.file_to_stream(file, seps=separators, trs=trash)

print(sentence_stream)

bigram = gensim.models.phrases.Phrases(sentence_stream,
                                       min_count=5,
                                       threshold=10)
print(bigram)

new_sentences = list(bigram[text])

new_file = '../text/ch_bigrams.txt'
f = open(new_file, 'w', encoding='utf-8')
for l in new_sentences:
    for word in l:
        f.write(word + ' ')
    f.write('\n')