Exemplo n.º 1
0
 def test_find(self):
     ' Test searching for substrings '
     self.ae((1, 1), icu.find(b'a', b'1ab'))
     self.ae((1, 1 if sys.maxunicode >= 0x10ffff else 2),
             icu.find('\U0001f431', 'x\U0001f431x'))
     self.ae((1 if sys.maxunicode >= 0x10ffff else 2, 1),
             icu.find('y', '\U0001f431y'))
     self.ae((0, 4), icu.primary_find('pena', 'peña'))
     for k, v in {
             u'pèché': u'peche',
             u'flüße': u'Flusse',
             u'Štepánek': u'ŠtepaneK'
     }.iteritems():
         self.ae((1, len(k)), icu.primary_find(v, ' ' + k),
                 'Failed to find %s in %s' % (v, k))
     self.assertTrue(icu.startswith(b'abc', b'ab'))
     self.assertTrue(icu.startswith('abc', 'abc'))
     self.assertFalse(icu.startswith('xyz', 'a'))
     self.assertTrue(icu.startswith('xxx', ''))
     self.assertTrue(icu.primary_startswith('pena', 'peña'))
     self.assertTrue(icu.contains('\U0001f431', '\U0001f431'))
     self.assertTrue(icu.contains('something', 'some other something else'))
     self.assertTrue(icu.contains('', 'a'))
     self.assertTrue(icu.contains('', ''))
     self.assertFalse(icu.contains('xxx', 'xx'))
     self.assertTrue(icu.primary_contains('pena', 'peña'))
Exemplo n.º 2
0
 def test_find(self):
     ' Test searching for substrings '
     self.ae((1, 1), icu.find(b'a', b'1ab'))
     self.ae((1, 1), icu.find('\U0001f431', 'x\U0001f431x'))
     self.ae((1, 1), icu.find('y', '\U0001f431y'))
     self.ae((0, 4), icu.primary_find('pena', 'peña'))
     for k, v in iteritems({'pèché': 'peche', 'flüße':'Flusse', 'Štepánek':'ŠtepaneK'}):
         self.ae((1, len(k)), icu.primary_find(v, ' ' + k), f'Failed to find {v} in {k}')
     self.assertTrue(icu.startswith(b'abc', b'ab'))
     self.assertTrue(icu.startswith('abc', 'abc'))
     self.assertFalse(icu.startswith('xyz', 'a'))
     self.assertTrue(icu.startswith('xxx', ''))
     self.assertTrue(icu.primary_startswith('pena', 'peña'))
     self.assertTrue(icu.contains('\U0001f431', '\U0001f431'))
     self.assertTrue(icu.contains('something', 'some other something else'))
     self.assertTrue(icu.contains('', 'a'))
     self.assertTrue(icu.contains('', ''))
     self.assertFalse(icu.contains('xxx', 'xx'))
     self.assertTrue(icu.primary_contains('pena', 'peña'))
     x = icu.primary_collator()
     self.ae(x.get_attribute(icu._icu.UCOL_STRENGTH), icu._icu.UCOL_PRIMARY),
     self.ae((0, 4), icu.primary_no_punc_find('pena"', 'peña'))
     self.ae((0, 13), icu.primary_no_punc_find("typographers", 'typographer’s'))
     self.ae((0, 7), icu.primary_no_punc_find('abcd', 'a\u00adb\u200cc\u200dd'))
     self.ae((0, 5), icu.primary_no_punc_find('abcd', 'ab cd'))
     # test find all
     m = []
     a = lambda p,l : m.append((p, l))
     icu.primary_collator_without_punctuation().find_all('a', 'a a🐱a', a)
     self.ae(m, [(0, 1), (2, 1), (5, 1)])
     # test find whole words
     c = icu.primary_collator_without_punctuation()
     self.ae(c.find('a', 'abc a bc'), (0, 1))
     self.ae(c.find('a', 'abc a bc', True), (4, 1))
     self.ae(c.find('pena', 'a peñaabc peña', True), (10, 4))
Exemplo n.º 3
0
def process_item(ctx, haystack, needle):
    # non-recursive implementation using a stack
    stack = [(0, 0, 0, 0, [-1]*len(needle))]
    final_score, final_positions = stack[0][-2:]
    push, pop = stack.append, stack.pop
    while stack:
        hidx, nidx, last_idx, score, positions = pop()
        key = (hidx, nidx, last_idx)
        mem = ctx.memory.get(key, None)
        if mem is None:
            for i in xrange(nidx, len(needle)):
                n = needle[i]
                if (len(haystack) - hidx < len(needle) - i):
                    score = 0
                    break
                pos = find(n, haystack[hidx:])[0] + hidx
                if pos == -1:
                    score = 0
                    break

                distance = pos - last_idx
                score_for_char = ctx.max_score_per_char if distance <= 1 else calc_score_for_char(ctx, haystack[pos-1], haystack[pos], distance)
                hidx = pos + 1
                push((hidx, i, last_idx, score, list(positions)))
                last_idx = positions[i] = pos
                score += score_for_char
            ctx.memory[key] = (score, positions)
        else:
            score, positions = mem
        if score > final_score:
            final_score = score
            final_positions = positions
    return final_score, final_positions
Exemplo n.º 4
0
 def test_find(self):
     ' Test searching for substrings '
     self.ae((1, 1), icu.find(b'a', b'1ab'))
     self.ae((1, 1 if sys.maxunicode >= 0x10ffff else 2), icu.find('\U0001f431', 'x\U0001f431x'))
     self.ae((1 if sys.maxunicode >= 0x10ffff else 2, 1), icu.find('y', '\U0001f431y'))
     self.ae((0, 4), icu.primary_find('pena', 'peña'))
     for k, v in {u'pèché': u'peche', u'flüße':u'Flusse', u'Štepánek':u'ŠtepaneK'}.iteritems():
         self.ae((1, len(k)), icu.primary_find(v, ' ' + k), 'Failed to find %s in %s' % (v, k))
     self.assertTrue(icu.startswith(b'abc', b'ab'))
     self.assertTrue(icu.startswith('abc', 'abc'))
     self.assertFalse(icu.startswith('xyz', 'a'))
     self.assertTrue(icu.startswith('xxx', ''))
     self.assertTrue(icu.primary_startswith('pena', 'peña'))
     self.assertTrue(icu.contains('\U0001f431', '\U0001f431'))
     self.assertTrue(icu.contains('something', 'some other something else'))
     self.assertTrue(icu.contains('', 'a'))
     self.assertTrue(icu.contains('', ''))
     self.assertFalse(icu.contains('xxx', 'xx'))
     self.assertTrue(icu.primary_contains('pena', 'peña'))
Exemplo n.º 5
0
 def test_find(self):
     " Test searching for substrings "
     self.ae((1, 1), icu.find(b"a", b"1ab"))
     self.ae((1, 1 if sys.maxunicode >= 0x10FFFF else 2), icu.find("\U0001f431", "x\U0001f431x"))
     self.ae((1 if sys.maxunicode >= 0x10FFFF else 2, 1), icu.find("y", "\U0001f431y"))
     self.ae((0, 4), icu.primary_find("pena", "peña"))
     for k, v in {"pèché": "peche", "flüße": "Flusse", "Štepánek": "ŠtepaneK"}.iteritems():
         self.ae((1, len(k)), icu.primary_find(v, " " + k), "Failed to find %s in %s" % (v, k))
     self.assertTrue(icu.startswith(b"abc", b"ab"))
     self.assertTrue(icu.startswith("abc", "abc"))
     self.assertFalse(icu.startswith("xyz", "a"))
     self.assertTrue(icu.startswith("xxx", ""))
     self.assertTrue(icu.primary_startswith("pena", "peña"))
     self.assertTrue(icu.contains("\U0001f431", "\U0001f431"))
     self.assertTrue(icu.contains("something", "some other something else"))
     self.assertTrue(icu.contains("", "a"))
     self.assertTrue(icu.contains("", ""))
     self.assertFalse(icu.contains("xxx", "xx"))
     self.assertTrue(icu.primary_contains("pena", "peña"))
Exemplo n.º 6
0
 def test_find(self):
     ' Test searching for substrings '
     self.ae((1, 1), icu.find(b'a', b'1ab'))
     self.ae((1, 1), icu.find('\U0001f431', 'x\U0001f431x'))
     self.ae((1, 1), icu.find('y', '\U0001f431y'))
     self.ae((0, 4), icu.primary_find('pena', 'peña'))
     for k, v in iteritems({
             'pèché': 'peche',
             'flüße': 'Flusse',
             'Štepánek': 'ŠtepaneK'
     }):
         self.ae((1, len(k)), icu.primary_find(v, ' ' + k),
                 f'Failed to find {v} in {k}')
     self.assertTrue(icu.startswith(b'abc', b'ab'))
     self.assertTrue(icu.startswith('abc', 'abc'))
     self.assertFalse(icu.startswith('xyz', 'a'))
     self.assertTrue(icu.startswith('xxx', ''))
     self.assertTrue(icu.primary_startswith('pena', 'peña'))
     self.assertTrue(icu.contains('\U0001f431', '\U0001f431'))
     self.assertTrue(icu.contains('something', 'some other something else'))
     self.assertTrue(icu.contains('', 'a'))
     self.assertTrue(icu.contains('', ''))
     self.assertFalse(icu.contains('xxx', 'xx'))
     self.assertTrue(icu.primary_contains('pena', 'peña'))