示例#1
0
 def testNoPunct(self):
     test_string = [u"abc", u"a;m".encode("utf-8")]
     shapes = wordshape_ops.wordshape(
         test_string, wordshape_ops.WordShape.HAS_NO_PUNCT_OR_SYMBOL)
     self.assertAllEqual(shapes, [True, False])
示例#2
0
 def testTrailingPunct(self):
     test_string = [u"abc", u";b", u"b;", u";,\u0f08".encode("utf-8")]
     shapes = wordshape_ops.wordshape(
         test_string, wordshape_ops.WordShape.ENDS_WITH_PUNCT_OR_SYMBOL)
     self.assertAllEqual(shapes, [False, False, True, True])
示例#3
0
 def testNoDigits(self):
     test_string = [u"abc", u"a\u06f3m".encode("utf-8")]
     shapes = wordshape_ops.wordshape(test_string,
                                      wordshape_ops.WordShape.HAS_NO_DIGITS)
     self.assertAllEqual(shapes, [True, False])
示例#4
0
 def testNumericValue(self):
     test_string = [u"98.6", u"-0.3", u"2.783E4", u"e4", u"1e10"]
     shapes = wordshape_ops.wordshape(
         test_string, wordshape_ops.WordShape.IS_NUMERIC_VALUE)
     self.assertAllEqual(shapes, [True, True, True, False, True])
示例#5
0
 def testEllipsis(self):
   test_string = [u"abc", u"abc...", u"...abc", u"abc\u2026".encode("utf-8")]
   shapes = wordshape_ops.wordshape(test_string,
                                    wordshape_ops.WordShape.ENDS_WITH_ELLIPSIS)
   self.assertAllEqual(shapes, [False, True, False, True])
示例#6
0
 def testNonShapePassedToShapeArg(self):
     test_string = [u"abc", u"ABc", u"ABC"]
     with self.assertRaises(TypeError):
         wordshape_ops.wordshape(test_string, "This is not a Shape")
示例#7
0
 def testMathSymbol(self):
     test_string = [u"''", u"\u003c", u"\uff07".encode("utf-8")]
     shapes = wordshape_ops.wordshape(
         test_string, wordshape_ops.WordShape.HAS_MATH_SYMBOL)
     self.assertAllEqual(shapes, [False, True, False])
示例#8
0
 def testIsEmoticon(self):
     test_string = [u"abc", u":-)", u"O:)", u"8)x", u":\u3063C", u"abc:-)"]
     shapes = wordshape_ops.wordshape(test_string,
                                      wordshape_ops.WordShape.IS_EMOTICON)
     self.assertAllEqual(shapes, [False, True, False, False, True, False])
示例#9
0
 def testMixedCase(self):
     test_string = [u"abc", u"ABc", u"ABC", u"abC"]
     shapes = wordshape_ops.wordshape(
         test_string, wordshape_ops.WordShape.HAS_MIXED_CASE)
     self.assertAllEqual(shapes, [False, True, False, True])
示例#10
0
 def testMixedCaseLetters(self):
     test_string = [u"abc", u"ABc", u"ABC", u"abC", u"abC."]
     shapes = wordshape_ops.wordshape(
         test_string, wordshape_ops.WordShape.IS_MIXED_CASE_LETTERS)
     self.assertAllEqual(shapes, [False, True, False, True, False])
示例#11
0
 def testAllLowercase(self):
     test_string = [u"abc", u"ABc", u"ABC"]
     shapes = wordshape_ops.wordshape(test_string,
                                      wordshape_ops.WordShape.IS_LOWERCASE)
     self.assertAllEqual(shapes, [True, False, False])
示例#12
0
 def testExtendedEmojis(self):
     test_string = [
         "‼",
         "⁉",
         "ℹ",
         "↘",
         "↩",
         "⌚",
         "⌛",
         "⏳",
         "⌨",
         "⏏",
         "⏩",
         "⏺",
         "⏰",
         "⏱",
         "⏲",
         "🕰",
         "Ⓜ",
         "▪",
         "⬛",
         "⬜",
         "✂",
         "✅",
         "✈",
         "✉",
         "✊",
         "✊🏿",
         "✋",
         "✌",
         "🤘🏾",
         "🤞🏿",
         "✍",
         "✏",
         "✒",
         "✔",
         "✝",
         "✡",
         "✨",
         "✳",
         "✴",
         "❄",
         "❇",
         "❌",
         "❎",
         "❓",
         "❔",
         "❗",
         "❕",
         "❣",
         "❤",
         "➕",
         "➖",
         "➗",
         "⤴",
         "⤵",
         "⬅",
         "⭐",
         "⭕",
         "〰",
         "〽",
         "㊗",
         "🀄",
         "🃏",
         "🅰",
         "🅱",
         "🅾",
         "🅿",
         "🆎",
         "🆑",
         "🆒",
         "🆔",
         "🆗",
         "🆘",
         "🆙",
         "🆚",
         "🈁",
         "🈂",
         "🈚",
         "🈯",
         "🈴",
         "🈳",
         "🈺",
         "🉐",
         "🉑",
         "🌍",
         "🏔",
         "🍾",
         "🐯",
         "🐆",
         "🦇",
         "🦅",
         "🐝",
         "🦖",
         "🐉",
         "🦠",
         "🔎",
         "⚗",
         "🕯",
         "💡",
         "📽",
         "📡",
         "🧮",
         "🔋",
         "📲",
         "☎",
         "🥁",
         "🎧",
         "🎼",
         "🔊",
         "💍",
         "👗",
         "🕶",
         "🎭",
         "🔮",
         "🧬",
         "🔬",
         "🤹",
         "🚵",
         "🧗",
         "🧗🏼‍♀️",
         "🧗🏿‍♂️",
         "🥋",
         "🎳",
         "🏈",
         "🏅",
         "🎑",
         "🎉",
         "🎄",
         "🌊",
         "⚡",
         "🌖",
         "🚀",
         "🚠",
         "🛩",
         "🛴",
         "🏎",
         "🚅",
         "🌆",
         "🕌",
         "🕍",
         "⛪",
         "🗽",
         "🏘",
         "🍵",
         "🍫",
         "🦑",
         "🍱",
         "🥦",
         "🥑",
         "🌴",
         "🌼",
         "🦂",
         "🐬",
         "🥀",
         "🧖🏾",
         "🧕🏿",
         "🧔🏼",
         "🧒🏾",
         "🧛",
         "🧝🏻",
         "🧞",
         "🧟",
         "🧙🏾",
         "🧚🏻",
         "💃🏽",
         "👯",
         "🧘",
         "🦱",
         "👪",
         "👩‍👩‍👧‍👦",
         "👨🏿‍🤝‍👨🏻",
         "🕵️‍♀️",
         "🧑‍🚀",
         "👩‍✈️",
         "🧑🏿‍⚕️",
         "🧑🏾‍⚖️",
         "🧠",
         "👁️‍🗨️",
         "🙉",
         "🤗",
         "👏",
         "💏",
         "🧯",
         "🛒",
         "🧺",
         "🧷",
         "💊",
         "🧲",
         "⛓",
         "⚖",
         "🛡",
         "🏹",
         "🎣",
         "⚔",
         "🔨",
         "📌",
         "📊",
         "📈",
         "💹",
         "💸",
         "💵",
         "📜",
         "📚",
         "📆",
         "💼",
         "📝",
         "📬",
         "🔏",
         "🔓",
         "🔑",
         "🗃",
         "🚿",
         "🛏",
         "🗿",
         "🏧",
         "🚮",
         "🚰",
         "♿",
         "🚻",
         "🚾",
         "🛄",
         "⚠",
         "🚸",
         "⛔",
         "🚭",
         "☣",
         "🔃",
         "🔚",
         "🔚",
         "⚛",
         "♈",
         "🔆",
         "🎦",
         "⚕",
         "♻",
         "⚜",
         "💠",
         "🏁",
         "🚩",
         "🎌",
         "🏴‍☠️",
         "🇺🇸",
         "🇨🇭",
         "🇺🇦",
         "🇿🇼",
         "🇦🇴",
         "🇦🇨",
         "🇦🇶",
         "🇺🇳",
         "🇪🇺",
         "🇧🇿",
         "🇵🇲",
         "🇮🇴",
         "🇻🇮",
         "🇨🇽",
         "🏴󠁧󠁢󠁷󠁬󠁳󠁿",
         "🇧🇱",
         u"\U0001fa70".encode("utf-8"),  # ballet shoes.
         u"\U0001fa7a".encode("utf-8"),  # stethoscope.
         u"\U0001fa80".encode("utf-8"),  # yo-yo.
         u"\U0001fa82".encode("utf-8"),  # parachute.
         u"\U0001fa86".encode("utf-8"),  # nesting dolls.
         u"\U0001fa90".encode("utf-8"),  # ringed planet.
         u"\U0001fa97".encode("utf-8"),  # accordion.
         u"\U0001fa99".encode("utf-8"),  # coin.
         u"\U0001fa9c".encode("utf-8"),  # ladder.
         u"\U0001fa9f".encode("utf-8"),  # window.
         u"\U0001faa1".encode("utf-8"),  # sewing needle.
         u"\U0001faa8".encode("utf-8"),  # rock.
         u"\U0001fab0".encode("utf-8"),  # fly.
         u"\U0001fab4".encode("utf-8"),  # potted plant.
         u"\U0001fab6".encode("utf-8"),  # feather.
         u"\U0001fac0".encode("utf-8"),  # anatomical heart.
         u"\U0001fac2".encode("utf-8"),  # people hugging.
         u"\U0001fad0".encode("utf-8"),  # blueberries.
         u"\U0001fad2".encode("utf-8"),  # olive.
         u"\U0001fad6".encode("utf-8"),  # teapot.
     ]
     shapes = wordshape_ops.wordshape(test_string,
                                      wordshape_ops.WordShape.HAS_EMOJI)
     self.assertAllEqual(shapes, [True] * len(test_string))
示例#13
0
 def testOnlyDigits(self):
   test_string = [u"abc", u"a9b".encode("utf-8"), u"90\u06f3".encode("utf-8")]
   shapes = wordshape_ops.wordshape(test_string,
                                    wordshape_ops.WordShape.HAS_ONLY_DIGITS)
   self.assertAllEqual(shapes, [False, False, True])
示例#14
0
 def testSentenceTerminal(self):
     test_string = [u"abc", u".b", u"b.", u"b,", u"b!!!", u"abc?!"]
     shapes = wordshape_ops.wordshape(
         test_string, wordshape_ops.WordShape.ENDS_WITH_SENTENCE_TERMINAL)
     self.assertAllEqual(shapes, [False, False, True, False, True, True])
示例#15
0
 def testCurrencySymbol(self):
     test_string = [u"''", u"ABc$", u"$\uff07".encode("utf-8")]
     shapes = wordshape_ops.wordshape(
         test_string, wordshape_ops.WordShape.HAS_CURRENCY_SYMBOL)
     self.assertAllEqual(shapes, [False, True, True])
示例#16
0
 def testMultipleTerminalPunct(self):
     test_string = [u"abc", u".b", u"b.", u"b,,", u"b!!!", u"abc?!"]
     shapes = wordshape_ops.wordshape(
         test_string,
         wordshape_ops.WordShape.ENDS_WITH_MULTIPLE_TERMINAL_PUNCT)
     self.assertAllEqual(shapes, [False, False, False, True, True, True])
示例#17
0
 def testCurrencySymbolAtBeginning(self):
     test_string = [u"''", u"ABc$", u"$ABc", u"A$Bc"]
     shapes = wordshape_ops.wordshape(
         test_string, wordshape_ops.WordShape.HAS_CURRENCY_SYMBOL)
     self.assertAllEqual(shapes, [False, True, True, True])
示例#18
0
 def testAcronym(self):
     test_string = [u"abc", u"A.B.", u"A.B.C.)", u"ABC"]
     shapes = wordshape_ops.wordshape(
         test_string, wordshape_ops.WordShape.IS_ACRONYM_WITH_PERIODS)
     self.assertAllEqual(shapes, [False, True, False, False])
示例#19
0
 def testAllPunct(self):
   test_string = [u"abc", u"a;b".encode("utf-8"), u";,\u0f08".encode("utf-8")]
   shapes = wordshape_ops.wordshape(test_string,
                                    wordshape_ops.WordShape.IS_PUNCT_OR_SYMBOL)
   self.assertAllEqual(shapes, [False, False, True])