def clean(self, dictionaries): """Cleaning process. Categorizes words as pass/fail/indeterminate. Returns: list: 3 list containg allowed, not alowed and indeterminate words """ #print self.raw_data, "before we remove" temp = self.raw_data.replace(",", " ") # remove commas temporariarly dates, non_dates = ptchk.check_for_dates(temp) allowed, not_allowed, indeterminate = \ ptchk.check_for_words(non_dates, dictionaries) not_allowed = not_allowed + dates return allowed, not_allowed, indeterminate
def test_pattern(self): #test to see if pattern check finds obvious names dictionary = ptchk.Dictionary() allowed, not_allowed, indeterminate = \ ptchk.check_for_words("Aristotle Lewis Rajah Seth", dictionary.export_dicts()) #apparently no one names their children aristole or rajah in america expected_allowed = ['Aristotle', 'Rajah'] expected_not_allowed = ['Seth'] expected_indeterminate = ['Lewis'] #True only if output matches expected output pattern_pass = allowed == expected_allowed and \ not_allowed == expected_not_allowed and \ indeterminate == expected_indeterminate if not pattern_pass: print "expected allowed:\n %s\n got\n %s" \ % (expected_allowed, allowed) print "expected not allowed:\n %s\n got\n %s" \ % (expected_not_allowed, not_allowed) print "expeted indeterminate:\n %s\n got\n %s" \ % (expected_indeterminate, indeterminate) self.failUnless(pattern_pass, "Unexpected pattern match")