def assert_out_contains(actual, expected, encode=True, sequential=False):
    """
    Utility assertion function. Helps keep the test code cleaner.

    :param actual:
    :param expected: a string or iterable of strings representing 1+ substrings that the output string should contain.
    :param encode:
    :param strict:
    :param sequential:
    """
    __tracebackhide__ = True
    if expected is None:
        expected = []
    elif is_string(expected):
        expected = [expected]
    if encode:
        if actual is not None:
            actual = actual.encode()
    if sequential:
        pos = 0
        for i, s in enumerate(expected):
            if encode:
                if s is not None:
                    s = s.encode()
            try:
                pos2 = actual.index(s, pos)
                # move pos to the beginning of the current match
                pos = pos2
            except ValueError:
                msg = "Expected #{index} not found in Actual[{start}:{end}]." + \
                      "\nExpected #{index}: ({value})." + \
                      "\nActual: ...{actual}..."
                # TODO: set max_actual dynamically based on the verbosity setting
                # max_actual = 80
                max_actual = 800
                assert False, msg.format(index=repr(i), value=repr(s), start=repr(pos), end=repr(len(actual)),
                                         actual=repr(actual[pos:pos + max_actual]))

            # Note: if we disallow nested substrings here, it would make sense to do so when sequential==False, too.
            # # move pos to the end of the current match (so that nested substrings are ignored)
            # pos += len(s)
    else:
        for s in expected:
            assert s in actual
示例#2
0
def find_distances(item1, item2, items, regex=False, regex_flags=None, verbose=False):
    """
    Uses find_all() and get_index_distance_stats() to calculate distance stats for 2 items (or 2 sets of items)
    within a given list of items. E.g. The distances of 2 words (or 2 word sets) within a given list of words.

    Adapted from: http://stackoverflow.com/a/33389155

    :param item1: the value (or pattern) to match/find.
        If it is not a string, it will be treated as an iterable of values/patterns to match.
    :param item2: the value (or pattern) to match/find.
        If it is not a string, it will be treated as an iterable of values/patterns to match.
    :param items: an iterable of items to match against.
    :param regex: If True, item will be treated as a regex pattern.
    :param regex_flags: Optional flags for re.search().
    :return:

    >>> words = get_words(lorem_ipsum())
    >>> find_distances(['lorem'], ['ipsum'], words)
    {'max': 893, 'mean': 402.56, 'min': 83}

    >>> words = get_words(lorem_ipsum())
    >>> find_distances(['lorem', 'dolor'], ['consectetur', 'adipiscing'], words)
    {'max': 889, 'mean': 467.0740740740741, 'min': 3}

    >>> words = get_words(lorem_ipsum())
    >>> w1 = ['^Pellentesque$']
    >>> w2 = ['^Vivamus']
    >>> find_distances(w1, w2, words, regex=True, regex_flags=re.IGNORECASE)
    {'max': 910, 'mean': 287.1212121212121, 'min': 21}
    """

    def find_distinct_indexes(find_items, all_items, regex, regex_flags):
        all_indexes = set()
        for item in find_items:
            indexes = find_all(item, all_items, regex=regex, regex_flags=regex_flags)
            indexes = (index for (index, value) in indexes)
            all_indexes.update(indexes)
        return all_indexes

    def get_matches_detail(indexes, items):
        # details = ((i, items[i]) for i in indexes)
        # details = [(i, items[i]) for i in indexes]
        # details = {i: items[i] for i in indexes}
        # details = dict((items[i], i) for i in indexes)
        details = {}
        for i in indexes:
            value = items[i]
            if value not in details:
                details[value] = set()
                # details[value] = []
            details[value].add(i)
            # details[value].append(i)
        return details

    if is_string(item1):
        item1 = [item1]
    if is_string(item2):
        item2 = [item2]

    indexes1 = find_distinct_indexes(item1, items, regex, regex_flags)
    indexes2 = find_distinct_indexes(item2, items, regex, regex_flags)
    d = get_index_distance_stats(indexes1, indexes2)
    if verbose:
        d.update({'matches1': get_matches_detail(indexes1, items)})
        d.update({'matches2': get_matches_detail(indexes2, items)})
    return d