def test_find(self): ' Test searching for substrings ' self.ae((1, 1), icu.find(b'a', b'1ab')) self.ae((1, 1 if sys.maxunicode >= 0x10ffff else 2), icu.find('\U0001f431', 'x\U0001f431x')) self.ae((1 if sys.maxunicode >= 0x10ffff else 2, 1), icu.find('y', '\U0001f431y')) self.ae((0, 4), icu.primary_find('pena', 'peña')) for k, v in { u'pèché': u'peche', u'flüße': u'Flusse', u'Štepánek': u'ŠtepaneK' }.iteritems(): self.ae((1, len(k)), icu.primary_find(v, ' ' + k), 'Failed to find %s in %s' % (v, k)) self.assertTrue(icu.startswith(b'abc', b'ab')) self.assertTrue(icu.startswith('abc', 'abc')) self.assertFalse(icu.startswith('xyz', 'a')) self.assertTrue(icu.startswith('xxx', '')) self.assertTrue(icu.primary_startswith('pena', 'peña')) self.assertTrue(icu.contains('\U0001f431', '\U0001f431')) self.assertTrue(icu.contains('something', 'some other something else')) self.assertTrue(icu.contains('', 'a')) self.assertTrue(icu.contains('', '')) self.assertFalse(icu.contains('xxx', 'xx')) self.assertTrue(icu.primary_contains('pena', 'peña'))
def test_find(self): ' Test searching for substrings ' self.ae((1, 1), icu.find(b'a', b'1ab')) self.ae((1, 1), icu.find('\U0001f431', 'x\U0001f431x')) self.ae((1, 1), icu.find('y', '\U0001f431y')) self.ae((0, 4), icu.primary_find('pena', 'peña')) for k, v in iteritems({'pèché': 'peche', 'flüße':'Flusse', 'Štepánek':'ŠtepaneK'}): self.ae((1, len(k)), icu.primary_find(v, ' ' + k), f'Failed to find {v} in {k}') self.assertTrue(icu.startswith(b'abc', b'ab')) self.assertTrue(icu.startswith('abc', 'abc')) self.assertFalse(icu.startswith('xyz', 'a')) self.assertTrue(icu.startswith('xxx', '')) self.assertTrue(icu.primary_startswith('pena', 'peña')) self.assertTrue(icu.contains('\U0001f431', '\U0001f431')) self.assertTrue(icu.contains('something', 'some other something else')) self.assertTrue(icu.contains('', 'a')) self.assertTrue(icu.contains('', '')) self.assertFalse(icu.contains('xxx', 'xx')) self.assertTrue(icu.primary_contains('pena', 'peña')) x = icu.primary_collator() self.ae(x.get_attribute(icu._icu.UCOL_STRENGTH), icu._icu.UCOL_PRIMARY), self.ae((0, 4), icu.primary_no_punc_find('pena"', 'peña')) self.ae((0, 13), icu.primary_no_punc_find("typographers", 'typographer’s')) self.ae((0, 7), icu.primary_no_punc_find('abcd', 'a\u00adb\u200cc\u200dd')) self.ae((0, 5), icu.primary_no_punc_find('abcd', 'ab cd')) # test find all m = [] a = lambda p,l : m.append((p, l)) icu.primary_collator_without_punctuation().find_all('a', 'a a🐱a', a) self.ae(m, [(0, 1), (2, 1), (5, 1)]) # test find whole words c = icu.primary_collator_without_punctuation() self.ae(c.find('a', 'abc a bc'), (0, 1)) self.ae(c.find('a', 'abc a bc', True), (4, 1)) self.ae(c.find('pena', 'a peñaabc peña', True), (10, 4))
def process_item(ctx, haystack, needle): # non-recursive implementation using a stack stack = [(0, 0, 0, 0, [-1]*len(needle))] final_score, final_positions = stack[0][-2:] push, pop = stack.append, stack.pop while stack: hidx, nidx, last_idx, score, positions = pop() key = (hidx, nidx, last_idx) mem = ctx.memory.get(key, None) if mem is None: for i in xrange(nidx, len(needle)): n = needle[i] if (len(haystack) - hidx < len(needle) - i): score = 0 break pos = find(n, haystack[hidx:])[0] + hidx if pos == -1: score = 0 break distance = pos - last_idx score_for_char = ctx.max_score_per_char if distance <= 1 else calc_score_for_char(ctx, haystack[pos-1], haystack[pos], distance) hidx = pos + 1 push((hidx, i, last_idx, score, list(positions))) last_idx = positions[i] = pos score += score_for_char ctx.memory[key] = (score, positions) else: score, positions = mem if score > final_score: final_score = score final_positions = positions return final_score, final_positions
def test_find(self): ' Test searching for substrings ' self.ae((1, 1), icu.find(b'a', b'1ab')) self.ae((1, 1 if sys.maxunicode >= 0x10ffff else 2), icu.find('\U0001f431', 'x\U0001f431x')) self.ae((1 if sys.maxunicode >= 0x10ffff else 2, 1), icu.find('y', '\U0001f431y')) self.ae((0, 4), icu.primary_find('pena', 'peña')) for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}.iteritems(): self.ae((1, len(k)), icu.primary_find(v, ' ' + k), 'Failed to find %s in %s' % (v, k)) self.assertTrue(icu.startswith(b'abc', b'ab')) self.assertTrue(icu.startswith('abc', 'abc')) self.assertFalse(icu.startswith('xyz', 'a')) self.assertTrue(icu.startswith('xxx', '')) self.assertTrue(icu.primary_startswith('pena', 'peña')) self.assertTrue(icu.contains('\U0001f431', '\U0001f431')) self.assertTrue(icu.contains('something', 'some other something else')) self.assertTrue(icu.contains('', 'a')) self.assertTrue(icu.contains('', '')) self.assertFalse(icu.contains('xxx', 'xx')) self.assertTrue(icu.primary_contains('pena', 'peña'))
def test_find(self): " Test searching for substrings " self.ae((1, 1), icu.find(b"a", b"1ab")) self.ae((1, 1 if sys.maxunicode >= 0x10FFFF else 2), icu.find("\U0001f431", "x\U0001f431x")) self.ae((1 if sys.maxunicode >= 0x10FFFF else 2, 1), icu.find("y", "\U0001f431y")) self.ae((0, 4), icu.primary_find("pena", "peña")) for k, v in {"pèché": "peche", "flüße": "Flusse", "Štepánek": "ŠtepaneK"}.iteritems(): self.ae((1, len(k)), icu.primary_find(v, " " + k), "Failed to find %s in %s" % (v, k)) self.assertTrue(icu.startswith(b"abc", b"ab")) self.assertTrue(icu.startswith("abc", "abc")) self.assertFalse(icu.startswith("xyz", "a")) self.assertTrue(icu.startswith("xxx", "")) self.assertTrue(icu.primary_startswith("pena", "peña")) self.assertTrue(icu.contains("\U0001f431", "\U0001f431")) self.assertTrue(icu.contains("something", "some other something else")) self.assertTrue(icu.contains("", "a")) self.assertTrue(icu.contains("", "")) self.assertFalse(icu.contains("xxx", "xx")) self.assertTrue(icu.primary_contains("pena", "peña"))
def test_find(self): ' Test searching for substrings ' self.ae((1, 1), icu.find(b'a', b'1ab')) self.ae((1, 1), icu.find('\U0001f431', 'x\U0001f431x')) self.ae((1, 1), icu.find('y', '\U0001f431y')) self.ae((0, 4), icu.primary_find('pena', 'peña')) for k, v in iteritems({ 'pèché': 'peche', 'flüße': 'Flusse', 'Štepánek': 'ŠtepaneK' }): self.ae((1, len(k)), icu.primary_find(v, ' ' + k), f'Failed to find {v} in {k}') self.assertTrue(icu.startswith(b'abc', b'ab')) self.assertTrue(icu.startswith('abc', 'abc')) self.assertFalse(icu.startswith('xyz', 'a')) self.assertTrue(icu.startswith('xxx', '')) self.assertTrue(icu.primary_startswith('pena', 'peña')) self.assertTrue(icu.contains('\U0001f431', '\U0001f431')) self.assertTrue(icu.contains('something', 'some other something else')) self.assertTrue(icu.contains('', 'a')) self.assertTrue(icu.contains('', '')) self.assertFalse(icu.contains('xxx', 'xx')) self.assertTrue(icu.primary_contains('pena', 'peña'))