def testNoPunct(self): test_string = [u"abc", u"a;m".encode("utf-8")] shapes = wordshape_ops.wordshape( test_string, wordshape_ops.WordShape.HAS_NO_PUNCT_OR_SYMBOL) self.assertAllEqual(shapes, [True, False])
def testTrailingPunct(self): test_string = [u"abc", u";b", u"b;", u";,\u0f08".encode("utf-8")] shapes = wordshape_ops.wordshape( test_string, wordshape_ops.WordShape.ENDS_WITH_PUNCT_OR_SYMBOL) self.assertAllEqual(shapes, [False, False, True, True])
def testNoDigits(self): test_string = [u"abc", u"a\u06f3m".encode("utf-8")] shapes = wordshape_ops.wordshape(test_string, wordshape_ops.WordShape.HAS_NO_DIGITS) self.assertAllEqual(shapes, [True, False])
def testNumericValue(self): test_string = [u"98.6", u"-0.3", u"2.783E4", u"e4", u"1e10"] shapes = wordshape_ops.wordshape( test_string, wordshape_ops.WordShape.IS_NUMERIC_VALUE) self.assertAllEqual(shapes, [True, True, True, False, True])
def testEllipsis(self): test_string = [u"abc", u"abc...", u"...abc", u"abc\u2026".encode("utf-8")] shapes = wordshape_ops.wordshape(test_string, wordshape_ops.WordShape.ENDS_WITH_ELLIPSIS) self.assertAllEqual(shapes, [False, True, False, True])
def testNonShapePassedToShapeArg(self): test_string = [u"abc", u"ABc", u"ABC"] with self.assertRaises(TypeError): wordshape_ops.wordshape(test_string, "This is not a Shape")
def testMathSymbol(self): test_string = [u"''", u"\u003c", u"\uff07".encode("utf-8")] shapes = wordshape_ops.wordshape( test_string, wordshape_ops.WordShape.HAS_MATH_SYMBOL) self.assertAllEqual(shapes, [False, True, False])
def testIsEmoticon(self): test_string = [u"abc", u":-)", u"O:)", u"8)x", u":\u3063C", u"abc:-)"] shapes = wordshape_ops.wordshape(test_string, wordshape_ops.WordShape.IS_EMOTICON) self.assertAllEqual(shapes, [False, True, False, False, True, False])
def testMixedCase(self): test_string = [u"abc", u"ABc", u"ABC", u"abC"] shapes = wordshape_ops.wordshape( test_string, wordshape_ops.WordShape.HAS_MIXED_CASE) self.assertAllEqual(shapes, [False, True, False, True])
def testMixedCaseLetters(self): test_string = [u"abc", u"ABc", u"ABC", u"abC", u"abC."] shapes = wordshape_ops.wordshape( test_string, wordshape_ops.WordShape.IS_MIXED_CASE_LETTERS) self.assertAllEqual(shapes, [False, True, False, True, False])
def testAllLowercase(self): test_string = [u"abc", u"ABc", u"ABC"] shapes = wordshape_ops.wordshape(test_string, wordshape_ops.WordShape.IS_LOWERCASE) self.assertAllEqual(shapes, [True, False, False])
def testExtendedEmojis(self): test_string = [ "‼", "⁉", "ℹ", "↘", "↩", "⌚", "⌛", "⏳", "⌨", "⏏", "⏩", "⏺", "⏰", "⏱", "⏲", "🕰", "Ⓜ", "▪", "⬛", "⬜", "✂", "✅", "✈", "✉", "✊", "✊🏿", "✋", "✌", "🤘🏾", "🤞🏿", "✍", "✏", "✒", "✔", "✝", "✡", "✨", "✳", "✴", "❄", "❇", "❌", "❎", "❓", "❔", "❗", "❕", "❣", "❤", "➕", "➖", "➗", "⤴", "⤵", "⬅", "⭐", "⭕", "〰", "〽", "㊗", "🀄", "🃏", "🅰", "🅱", "🅾", "🅿", "🆎", "🆑", "🆒", "🆔", "🆗", "🆘", "🆙", "🆚", "🈁", "🈂", "🈚", "🈯", "🈴", "🈳", "🈺", "🉐", "🉑", "🌍", "🏔", "🍾", "🐯", "🐆", "🦇", "🦅", "🐝", "🦖", "🐉", "🦠", "🔎", "⚗", "🕯", "💡", "📽", "📡", "🧮", "🔋", "📲", "☎", "🥁", "🎧", "🎼", "🔊", "💍", "👗", "🕶", "🎭", "🔮", "🧬", "🔬", "🤹", "🚵", "🧗", "🧗🏼♀️", "🧗🏿♂️", "🥋", "🎳", "🏈", "🏅", "🎑", "🎉", "🎄", "🌊", "⚡", "🌖", "🚀", "🚠", "🛩", "🛴", "🏎", "🚅", "🌆", "🕌", "🕍", "⛪", "🗽", "🏘", "🍵", "🍫", "🦑", "🍱", "🥦", "🥑", "🌴", "🌼", "🦂", "🐬", "🥀", "🧖🏾", "🧕🏿", "🧔🏼", "🧒🏾", "🧛", "🧝🏻", "🧞", "🧟", "🧙🏾", "🧚🏻", "💃🏽", "👯", "🧘", "🦱", "👪", "👩👩👧👦", "👨🏿🤝👨🏻", "🕵️♀️", "🧑🚀", "👩✈️", "🧑🏿⚕️", "🧑🏾⚖️", "🧠", "👁️🗨️", "🙉", "🤗", "👏", "💏", "🧯", "🛒", "🧺", "🧷", "💊", "🧲", "⛓", "⚖", "🛡", "🏹", "🎣", "⚔", "🔨", "📌", "📊", "📈", "💹", "💸", "💵", "📜", "📚", "📆", "💼", "📝", "📬", "🔏", "🔓", "🔑", "🗃", "🚿", "🛏", "🗿", "🏧", "🚮", "🚰", "♿", "🚻", "🚾", "🛄", "⚠", "🚸", "⛔", "🚭", "☣", "🔃", "🔚", "🔚", "⚛", "♈", "🔆", "🎦", "⚕", "♻", "⚜", "💠", "🏁", "🚩", "🎌", "🏴☠️", "🇺🇸", "🇨🇭", "🇺🇦", "🇿🇼", "🇦🇴", "🇦🇨", "🇦🇶", "🇺🇳", "🇪🇺", "🇧🇿", "🇵🇲", "🇮🇴", "🇻🇮", "🇨🇽", "🏴", "🇧🇱", u"\U0001fa70".encode("utf-8"), # ballet shoes. u"\U0001fa7a".encode("utf-8"), # stethoscope. u"\U0001fa80".encode("utf-8"), # yo-yo. u"\U0001fa82".encode("utf-8"), # parachute. u"\U0001fa86".encode("utf-8"), # nesting dolls. u"\U0001fa90".encode("utf-8"), # ringed planet. u"\U0001fa97".encode("utf-8"), # accordion. u"\U0001fa99".encode("utf-8"), # coin. u"\U0001fa9c".encode("utf-8"), # ladder. u"\U0001fa9f".encode("utf-8"), # window. u"\U0001faa1".encode("utf-8"), # sewing needle. u"\U0001faa8".encode("utf-8"), # rock. u"\U0001fab0".encode("utf-8"), # fly. u"\U0001fab4".encode("utf-8"), # potted plant. u"\U0001fab6".encode("utf-8"), # feather. u"\U0001fac0".encode("utf-8"), # anatomical heart. u"\U0001fac2".encode("utf-8"), # people hugging. u"\U0001fad0".encode("utf-8"), # blueberries. u"\U0001fad2".encode("utf-8"), # olive. u"\U0001fad6".encode("utf-8"), # teapot. ] shapes = wordshape_ops.wordshape(test_string, wordshape_ops.WordShape.HAS_EMOJI) self.assertAllEqual(shapes, [True] * len(test_string))
def testOnlyDigits(self): test_string = [u"abc", u"a9b".encode("utf-8"), u"90\u06f3".encode("utf-8")] shapes = wordshape_ops.wordshape(test_string, wordshape_ops.WordShape.HAS_ONLY_DIGITS) self.assertAllEqual(shapes, [False, False, True])
def testSentenceTerminal(self): test_string = [u"abc", u".b", u"b.", u"b,", u"b!!!", u"abc?!"] shapes = wordshape_ops.wordshape( test_string, wordshape_ops.WordShape.ENDS_WITH_SENTENCE_TERMINAL) self.assertAllEqual(shapes, [False, False, True, False, True, True])
def testCurrencySymbol(self): test_string = [u"''", u"ABc$", u"$\uff07".encode("utf-8")] shapes = wordshape_ops.wordshape( test_string, wordshape_ops.WordShape.HAS_CURRENCY_SYMBOL) self.assertAllEqual(shapes, [False, True, True])
def testMultipleTerminalPunct(self): test_string = [u"abc", u".b", u"b.", u"b,,", u"b!!!", u"abc?!"] shapes = wordshape_ops.wordshape( test_string, wordshape_ops.WordShape.ENDS_WITH_MULTIPLE_TERMINAL_PUNCT) self.assertAllEqual(shapes, [False, False, False, True, True, True])
def testCurrencySymbolAtBeginning(self): test_string = [u"''", u"ABc$", u"$ABc", u"A$Bc"] shapes = wordshape_ops.wordshape( test_string, wordshape_ops.WordShape.HAS_CURRENCY_SYMBOL) self.assertAllEqual(shapes, [False, True, True, True])
def testAcronym(self): test_string = [u"abc", u"A.B.", u"A.B.C.)", u"ABC"] shapes = wordshape_ops.wordshape( test_string, wordshape_ops.WordShape.IS_ACRONYM_WITH_PERIODS) self.assertAllEqual(shapes, [False, True, False, False])
def testAllPunct(self): test_string = [u"abc", u"a;b".encode("utf-8"), u";,\u0f08".encode("utf-8")] shapes = wordshape_ops.wordshape(test_string, wordshape_ops.WordShape.IS_PUNCT_OR_SYMBOL) self.assertAllEqual(shapes, [False, False, True])