Пример #1
0
class ReviewCorpus(object):
    def __init__(self, filename, dictionary):
        self.filename = filename
        self.dictionary = dictionary
        self.tokenizer = SimpleTokenizer()

    def __iter__(self):
        with open(self.filename) as f:
            for line in f:
                review = json.loads(line)
                tokens = self.tokenizer.tokenize(review)
                yield self.dictionary.doc2bow(tokens)
Пример #2
0
def load_reviews(filename):
    tokeniser = SimpleTokenizer()
    with open(filename, 'r') as f:
        for line in f:
            review = json.loads(line)
            yield tokeniser.tokenize(review)