def _transform(self, document): lines_from_section = section_extract(self.section_regex, document['description']) return [ word_tokenize(clean_str(strip_bullets_from_line(line.text))) for line in lines_from_section ]
def _clean(self, document): return self.join_spaces([ clean_str(document[field]) for field in self.document_schema_fields ])
def test_deep_wrapper(self): assert clean_str([["macy's engineer / apply now", "engineer/apply now"], ["engineer.", "python!"]]) == \ [["macy s engineer apply now", "engineer apply now"], ["engineer ", "python "]]