def setup_method(self, method): self.docket = self.docket_to_test = "CP-51-CR-0000001-2011" self.birth_date = "07/24/1964" path = "tests/testDocs/testPDFs/%s.pdf" % self.docket_to_test text = sectionize.parse(path) sections = etree.XML(sectionize.stitch(text)) self.section_text = sections.xpath("//section[@name='Defendant_Information']")[0].text.strip() with open("tests/testDocs/defendantSectionTexts/%s.txt" % self.docket_to_test, "w+") as f: f.write(self.section_text) f.close()
def just_parse(): """" This is the slow one. For 115 dockets, it took 80 seconds, or .76 seconds per docket. This is the stumbling block in the whole thing. Why is this method so slow?! """ logging.basicConfig(filename="parse_timing.md", level=logging.DEBUG) logging.info("pdf2text_time, create_grammar_time, parse_grammar_time, node_visitor_time") print("Testing time of parse(), which includes pdf2text.") directory = "./testDocs/test_two/pdfs/*.pdf" iter = glob.iglob(directory) start = datetime.now() counter = 0 for file in iter: parse(file) counter += 1 end = datetime.now() print("Finished.") duration = (end-start).seconds print("Processed {} dockets in {} seconds.".format(counter, duration)) print("{} seconds per docket.".format(duration/counter)) print("Thanks for playing our game.")