def _load_corpus(self, corpus_dir): self.documents = [] if not os.path.exists(corpus_dir): logging.error('The corpus directory %s does not exists.' % corpus_dir) return False for root, dirs, files in os.walk(corpus_dir): for f in files: filename = os.path.join(root, f) fp = open(filename, 'rb') record_reader = RecordReader(fp) while True: blob = record_reader.read() if blob == None: break document = Document(self.model.num_topics) document.parse_from_string(blob) self.documents.append(document) return True