def test_phone(self):
        data = [[u"Has a date 21/2/20013",u"Has a date 1"],
                [u"Has a date 21 2 2003",u"Has a date 1"],
                [u"Has a date 21.2.2013",u"Has a date 1"],
                [u"Has a date 21-2-2013",u"Has a date 1"],
                ]
        for i,(text, truth) in enumerate(data):
            with self.subTest(i=i):
                new_text = replace_date("1",text)
                self.assertEqual(new_text,truth)

    # Bar Codes in nubers
    # codes 10.3.4
示例#2
0
def word_count(text, lang=None):

    # Remove all tags from text
    text_no_tags,_ = remove_tags(text)

    # Remove urls, emails and other breakable elements.
    text_no_tags = replace_url(u'url', text_no_tags)
    text_no_tags = replace_email(u'email', text_no_tags)
    text_no_tags = replace_date(u'date', text_no_tags)
    text_no_tags = replace_phone(u'phone', text_no_tags)
    text_no_tags = replace_money(u'money', text_no_tags)

    if lang is not None and lang not in asian_languages:
          words = word_count_aux(text_no_tags)
          return words
    else:
        asian_chars = sum([is_asian(x) for x in text_no_tags])
        non_asian_words = "".join([filter_jchars(c) for c in text_no_tags])
        words = word_count_aux(non_asian_words)
    return words + asian_chars