Exemplo n.º 1
0
 def __iter__(self):
     count = 0
     for document in stream_reuters_documents():
         sample = document["content"], "acq" in document["topics"]  # todo: maybe try "usa" or "earn"
         if count > num_train_samples:
             if self.first_iteration:
                 test_samples.append(sample)
         else:
             yield sample
         count += 1
     self.first_iteration = False
Exemplo n.º 2
0
 def __iter__(self):
     count = 0
     for document in stream_reuters_documents():
         sample = document["content"], "acq" in document[
             "topics"]  # todo: maybe try "usa" or "earn"
         if count > num_train_samples:
             if self.first_iteration:
                 test_samples.append(sample)
         else:
             yield sample
         count += 1
     self.first_iteration = False
Exemplo n.º 3
0
 def get_documents(self):
     for document in stream_reuters_documents():
         yield simple_preprocess(document["content"])
Exemplo n.º 4
0
def get_topic_counts():
    labels = defaultdict(int)
    for document in stream_reuters_documents():
        for topic in document["topics"]:
            labels[topic] += 1
    return labels
Exemplo n.º 5
0
 def get_documents(self):
     for document in stream_reuters_documents():
         yield simple_preprocess(document["content"])
Exemplo n.º 6
0
def get_topic_counts():
    labels = defaultdict(int)
    for document in stream_reuters_documents():
        for topic in document["topics"]:
            labels[topic] += 1
    return labels