def clean(self, dictionaries):
        """Cleaning process. Categorizes words as pass/fail/indeterminate.

        Returns:
            list: 3 list containg allowed, not alowed and indeterminate words
        
        """
        #print self.raw_data, "before we remove"
        temp = self.raw_data.replace(",", " ")  # remove commas temporariarly
        dates, non_dates = ptchk.check_for_dates(temp)
        allowed, not_allowed, indeterminate = \
            ptchk.check_for_words(non_dates,
                                  dictionaries)
        not_allowed = not_allowed + dates
        return allowed, not_allowed, indeterminate
Exemplo n.º 2
0
    def test_pattern(self):
        #test to see if pattern check finds obvious names
        dictionary = ptchk.Dictionary()
        allowed, not_allowed, indeterminate = \
                 ptchk.check_for_words("Aristotle Lewis Rajah Seth",
                                       dictionary.export_dicts())
        #apparently no one names their children aristole or rajah in america
        expected_allowed = ['Aristotle', 'Rajah']
        expected_not_allowed = ['Seth']
        expected_indeterminate = ['Lewis']
        #True only if output matches expected output
        pattern_pass = allowed == expected_allowed and \
                      not_allowed == expected_not_allowed and \
                      indeterminate == expected_indeterminate

        if not pattern_pass:
            print "expected allowed:\n %s\n got\n %s" \
                  % (expected_allowed, allowed)
            print "expected not allowed:\n %s\n got\n %s" \
                  % (expected_not_allowed, not_allowed)
            print "expeted indeterminate:\n %s\n got\n %s" \
                  % (expected_indeterminate, indeterminate)

        self.failUnless(pattern_pass, "Unexpected pattern match")