Exemplo n.º 1
0
 def test_happy_days(self):
     '''Test various regular inputs'''
     assert list(sliding_window(range(5), size=1)) == [(0,),(1,),(2,),(3,),(4,)]
     assert list(sliding_window(range(5), size=2)) == [(0,1),(1,2),(2,3),(3,4)]
     assert list(sliding_window(range(5), size=3)) == [(0,1,2),(1,2,3),(2,3,4)]
     assert list(sliding_window(range(5), size=4)) == [(0,1,2,3),(1,2,3,4)]
     assert list(sliding_window(range(5), size=5)) == [(0,1,2,3,4)] 
Exemplo n.º 2
0
 def test_invalid_size(self):
     '''
     - When size < 1, ValueError
     - When size > ilen(iterable), ValueError
     '''
     with pytest.raises(ValueError):
         list(sliding_window(range(5), size=0))
     with pytest.raises(ValueError):
         list(sliding_window(range(5), size=6))
def connected_components2(sets):
    g = nx.Graph()
    sets = {k:v for k,v in sets.items() if v}
    g.add_nodes_from(sets.keys())
    for x, y in sliding_window(sorted((item, name) for name, set_ in sets.items() for item in set_)):
        if x[0] == y[0]:
            g.add_edge(x[1], y[1])
    return {frozenset(component) : set.union(*(set(sets[name]) for name in component)) for component in nx.connected_components(g)}
def create_input(overlap, list_size_distribution, list_size_mean, set_count):
    # create without overlap
    min_ = 1
    max_ = 2 * list_size_mean - min_
    if list_size_distribution == 'constant':  # 1 list size
        list_sizes = np.full(set_count, list_size_mean, dtype=int)
    elif list_size_distribution == 'uniform':  # all sizes equally possible
        list_sizes = np.random.random_integers(min_, max_, set_count)
    elif list_size_distribution == 'left_triangular':  # more small lists
        list_sizes = np.random.triangular(min_, min_, max_, set_count).round()
    elif list_size_distribution == 'right_triangular':  # more large lists
        list_sizes = np.random.triangular(min_, max_, max_, set_count).round()
    else:
        assert False
    indices = np.insert(list_sizes.astype(int).cumsum(), 0, 0)
    sets = [set(range(start, end)) for start, end in sliding_window(indices)]
    
    # add overlap, with a skip every so often
    overlap_to_create = round(set_count * overlap)
    i = 0
    def get_next_skip_distance():
        return round(np.random.standard_exponential()) + 1
    next_skip = get_next_skip_distance()
    expected_output = []
    current_overlap = sets[0]
    overlap_to_create -= 1
    while overlap_to_create > 0:
        if i == next_skip and i < len(sets)-1 and overlap_to_create >= 2:
            # start new family of overlapping sets
            i += 1
            overlap_to_create -= 1
            expected_output.append(current_overlap)
            current_overlap = sets[i]
            next_skip = i + get_next_skip_distance()
        # create overlap
        sets[i+1].pop()
        sets[i+1].add(first(sets[i]))
        overlap_to_create -= 1
        current_overlap = current_overlap | sets[i+1]
        i += 1
    else:
        expected_output.append(current_overlap)
    expected_output.extend(sets[i+1:])
        
    # final things
    sets = [list(x) for x in sets]
    random.shuffle(sets)
    
    # debug info of this func
#     print()
#     print('{} overlap, {} set sizes with mean {}, {} sets'.format(overlap, list_size_distribution, list_size_mean, set_count))
#     print(pd.Series(len(x) for x in sets).describe())
#     
#     overlap = 1 - len(expected_output)/len(sets)
#     print('{:.2}% of sets removed due to overlap'.format(overlap))  # Note this is only a rough under-estimate of actual number of sets overlapping
    
    return sets, expected_output
Exemplo n.º 5
0
def toset_from_tosets(
    *tosets
):  # Note: a setlist is perfect representation of a toset as it's totally ordered and it's a set, i.e. a toset
    """
    Create totally ordered set (toset) from tosets.
    
    These tosets, when merged, form a partially ordered set. The linear
    extension of this poset, a toset, is returned.
    
    .. warning:: untested
    
    Parameters
    ----------
    tosets : iterable of setlist
        Tosets to merge
        
    Raises
    ------
    ValueError
        If the tosets (derived from the lists) contradict each other. E.g. 
        ``[a, b]`` and ``[b, c, a]`` contradict each other.
        
    Returns
    -------
    setlist
        Totally ordered set
    """
    # Construct directed graph with: a <-- b iff a < b and adjacent in a list
    graph = nx.DiGraph()
    for toset in tosets:
        graph.add_nodes_from(toset)
        graph.add_edges_from(sliding_window(reversed(toset)))

    # No cycles allowed
    if not nx.is_directed_acyclic_graph(
        graph
    ):  # TODO could rely on NetworkXUnfeasible https://networkx.github.io/documentation/networkx-1.9/reference/generated/networkx.algorithms.dag.topological_sort.html
        raise ValueError("Given tosets contradict each other")  # each cycle is a contradiction, e.g. a > b > c > a

    # Topological sort
    return setlist(nx.topological_sort(graph, reverse=True))