def get_texts_from_dir(self, directory=REAL_TEST_FILES_DIR, for_parser=None): files = self.get_test_db_files(directory, for_parser) texts = [] for filename in files: parser = core.getParser(filename) new_texts = parser.parse(filename) print "(%d) from <%s> %s" % (len(new_texts), parser.__class__, os.path.basename(filename)) texts.extend(new_texts) print "sorting all %d texts by date" % len(texts) return sorted(texts, key=lambda text: text.date)
def get_texts_from_dir(self, directory=REAL_TEST_FILES_DIR, for_parser=None): files = self.get_test_db_files(directory, for_parser) texts = [] for filename in files: parser = core.getParser(filename) new_texts = parser.parse(filename) print("(%d) from <%s> %s" % (len(new_texts), parser.__class__, os.path.basename(filename))) texts.extend(new_texts) print("sorting all %d texts by date" % len(texts)) return sorted(texts, key=lambda text: text.date)
def get_test_db_files(self, directory=REAL_TEST_FILES_DIR, for_parser=None): files = glob.glob(os.path.join(directory,"*")) outfiles = [] for filename in files: try: parser = core.getParser(filename) if (for_parser==None) or (parser.__class__ == for_parser): outfiles.append(filename) except: pass return outfiles