Exemplo n.º 1
0
    def fill_example_queue(self):
        input_gen = self.text_generator(
            data.example_generator(self._data_path, self._single_pass))

        while True:
            try:
                (article, abstract) = next(
                    input_gen
                )  # read the next example from file. article and abstract are both strings.
            except StopIteration:  # if there are no more examples:
                #tf.logging.info("The example generator for this example queue filling thread has exhausted data.")
                if self._single_pass:
                    #tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.")
                    self._finished_reading = True
                    break
                else:
                    raise Exception(
                        "single_pass mode is off but the example generator is out of data; error."
                    )

            abstract_sentences = [
                sent.strip() for sent in data.abstract2sents(abstract)
            ]  # Use the <s> and </s> tags in abstract to get a list of sentences.
            example = Example(article, abstract_sentences,
                              self._vocab)  # Process into an Example.
            self._example_queue.put(
                example)  # place the Example in the example queue.
Exemplo n.º 2
0
    def __getitem__(self, index):
        article = self.db["articles"][index]
        abstract = self.db["abstracts"][index]

        # use existing Example from threaded Batcher code
        abstract_sentences = [
            sent.strip() for sent in data.abstract2sents(abstract)
        ]  # Use the <s> and </s> tags in abstract to get a list of sentences.
        example = Example(article, abstract_sentences,
                          self.vocabulary)  # Process into an Example.

        return example
Exemplo n.º 3
0
 def run(self):
     self.example_generator = self.generator()
     while self.alive:
         if not self.queue.full():
             try:
                 abstract, article = next(self.example_generator)
             except StopIteration:
                 self.alive = False
                 break
             except ValueError:
                 continue
             if len(abstract) <= self.min_len or len(article) <= self.min_len:
                 continue
             abstract_sentences = [sent.strip() for sent in data.abstract2sents(abstract)]
             example = Example(article, abstract_sentences, self.vocab)
             self.queue.put(example)