Exemplo n.º 1
0
    def _(self, data: PathCollection, order: Optional[int] = None) -> None:

        # Check order
        if order is not None:
            self._order = order

        if 0 <= self.order <= 1:
            super().fit(data, order=self.order)

        elif self.order > 1:
            # --- START ---
            nc = NodeCollection()
            for node in data.nodes.values():
                nc.add(node)

            ec = EdgeCollection(nodes=nc)
            for edge in data.edges.values():
                ec.add(edge)

            self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec)
            # --- END ---

            # get path data
            paths = data

            # generate first order representation of data
            network = Network.from_paths(paths, frequencies=True)

            self.calculate(network, paths)

        else:
            LOG.error('A Null Model with order %s is not supported',
                      self.order)
            raise AttributeError
Exemplo n.º 2
0
def test_NodeCollection_iter():
    """Test iter trough the node collection"""
    nodes = NodeCollection()
    nodes.add(['a', 'b', 'c', 'd'])

    for node in nodes:
        assert isinstance(node.uid, str)

    for uid, node in nodes.items():
        assert isinstance(uid, str)
        assert isinstance(node, Node)

    for uid in nodes.keys():
        assert isinstance(uid, str)

    for node in nodes.values():
        assert isinstance(node, Node)
Exemplo n.º 3
0
def test_NodeCollection():
    """Test node collection"""
    nodes = NodeCollection()

    assert len(nodes) == 0

    a = Node('a')
    nodes.add(a)

    assert len(nodes) == 1
    assert nodes['a'] == a
    assert nodes[a] == a
    assert 'a' in nodes
    assert a in nodes
    assert 'a' in nodes.uids
    assert 'a' in nodes.keys()
    assert a in nodes.values()
    assert ('a', a) in nodes.items()

    nodes.add(('b', 'c'))

    assert len(nodes) == 3

    # with pytest.raises(Exception):
    #     nodes.add('a')

    # with pytest.raises(Exception):
    #     nodes.add(a)

    d = Node('d', color='blue')
    nodes.add(d)

    assert nodes['d']['color'] == 'blue'

    d['color'] = 'red'
    assert nodes['d']['color'] == 'red'

    nodes.add(['e', ('f', 'g'), ['h', 'i']])

    assert len(nodes) == 9

    nodes.remove(a)

    assert len(nodes) == 8
    assert a not in nodes

    nodes.remove('b')

    assert len(nodes) == 7
    assert 'b' not in nodes

    nodes.remove([('e', 'f', 'g'), 'h', ['i']])

    assert len(nodes) == 2
Exemplo n.º 4
0
    def _(self, data: Network, order: Optional[int] = None) -> None:

        # Check order
        if order is not None:
            self._order = order

        if 0 <= self.order <= 1:
            super().fit(data, order=self.order)

        elif self.order > 1:

            # TODO: create function to transfer base data from PathCollection object
            # --- START ---
            nc = NodeCollection()
            for node in data.nodes.values():
                nc.add(node)

            ec = EdgeCollection(nodes=nc)
            for edge in data.edges.values():
                ec.add(edge)

            self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec)
            # --- END ---

            # get network data
            network = data

            # generate a path representation of the data
            paths = PathCollection(directed=network.directed,
                                   nodes=network.nodes,
                                   edges=network.edges)
            for edge in data.edges:
                paths.add(edge, frequency=edge.attributes.get('frequency', 1))

            self.calculate(network, paths)

        else:
            LOG.error('A Null Model with order %s is not supported',
                      self.order)
            raise AttributeError
    def _(self,
          data: PathCollection,
          order: Optional[int] = None,
          subpaths: bool = True) -> None:

        if order is not None:
            self._order = order

        order = self.order
        # TODO: create function to transfer base data from PathCollection object
        # --- START ---
        nc = NodeCollection()
        for node in data.nodes.values():
            nc.add(node)

        ec = EdgeCollection(nodes=nc)
        for edge in data.edges.values():
            ec.add(edge)

        self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec)
        # --- END ---

        # iterate over all paths
        for path in data:

            # get frequency of the observed path
            # TODO: define keyword in config file
            frequency = path.attributes.get('frequency', 1)

            nodes: list = []
            if order == 0:
                for node in path.nodes:
                    if (node, ) not in self.nodes:
                        self.add_node(node, frequency=0.0)
                    #self.nodes[(node,)]['frequency'] += frequency

                for node in path.nodes:
                    self.nodes[(node, )]['frequency'] += frequency
            elif order == 1:
                nodes.extend([tuple([n]) for n in path.nodes])

            elif 1 < order <= len(path):
                for subpath in self.window(path.edges, size=order - 1):
                    nodes.append(subpath)

            elif order == len(path) + 1:
                if tuple(path.edges) not in self.nodes:
                    self.nodes.add(tuple(path.edges))

            else:
                pass

            _edges = []
            for _v, _w in zip(nodes[:-1], nodes[1:]):

                if _v not in self.nodes:
                    self.nodes.add(_v)

                if _w not in self.nodes:
                    self.nodes.add(_w)

                _nodes = (self.nodes[_v], self.nodes[_w])
                if _nodes not in self.edges:
                    self.add_edge(*_nodes, possible=0, observed=0, frequency=0)

                _edges.append(self.edges[_nodes])

            for edge in _edges:
                edge['frequency'] += frequency
                if order == len(path):
                    edge['observed'] += frequency
                else:
                    edge['possible'] += frequency

        if order == 0:
            frequencies = [n['frequency'] for n in self.nodes]
            for node in self.nodes:
                node['frequency'] = node['frequency'] / sum(frequencies)

        if subpaths:
            self._subpaths = SubPathCollection.from_paths(data,
                                                          max_length=order,
                                                          include_path=True)
Exemplo n.º 6
0
def read_pathcollection(filename: str,
                        separator: str = ',',
                        frequency: bool = False,
                        directed: bool = True,
                        maxlines: int = None) -> PathCollection:
    """Read path in edgelist format

    Reads data from a file containing multiple lines of *edges* of the form
    "v,w,frequency,X" (where frequency is optional and X are arbitrary
    additional columns). The default separating character ',' can be changed.

    Parameters
    ----------
    filename : str
        path to edgelist file
    separator : str
        character separating the nodes
    frequency : bool
        is a frequency given? if ``True`` it is the last element in the
        edge (i.e. ``a,b,2``)
    directed : bool
        are the edges directed or undirected
    maxlines : int
        number of lines to read (useful to test large files).
        None means the entire file is read

    """

    from pathpy.core.path import Path, PathCollection

    nodes: dict = {}
    edges: dict = {}
    paths: dict = {}

    with open(filename, 'r') as csv:
        for n, line in enumerate(csv):
            fields = line.rstrip().split(separator)
            assert len(fields) >= 1, 'Error: empty line: {0}'.format(line)

            if frequency:
                path = tuple(fields[:-1])
                freq = float(fields[-1])
            else:
                path = tuple(fields)
                freq = 1.0

            for node in path:
                if node not in nodes:
                    nodes[node] = Node(node)

            if len(path) == 1 and path not in paths:
                paths[path] = Path(nodes[path[0]], frequency=freq)

            else:
                edge_list = []
                for u, v in zip(path[:-1], path[1:]):
                    if (u, v) not in edges:
                        edges[(u, v)] = Edge(nodes[u], nodes[v])
                    edge_list.append(edges[(u, v)])

                if path not in paths:
                    paths[path] = Path(*edge_list, frequency=freq)

            if maxlines is not None and n >= maxlines:
                break

    ncoll = NodeCollection()
    for node in nodes.values():
        ncoll.add(node)

    ecoll = EdgeCollection(nodes=ncoll)
    for edge in edges.values():
        ecoll._add(edge)

    _paths = PathCollection(directed=directed, nodes=ncoll, edges=ecoll)

    for _path in paths.values():
        _paths._add(_path)

    return _paths
Exemplo n.º 7
0
    def read_file(cls,
                  filename: str,
                  separator: str = ',',
                  frequency: bool = False,
                  directed: bool = True,
                  maxlines: int = None) -> None:
        """
        Read path in edgelist format

        Reads data from a file containing multiple lines of *edges* of the
        form "v,w,frequency,X" (where frequency is optional and X are
        arbitrary additional columns). The default separating character ','
        can be changed.

        Parameters
        ----------
        filename : str
            path to edgelist file
        separator : str
            character separating the nodes
        frequency : bool
            is a frequency given? if ``True`` it is the last element in the
            edge (i.e. ``a,b,2``)
        directed : bool
            are the edges directed or undirected
        maxlines : int
            number of lines to read (useful to test large files).
            None means the entire file is read
        """
        nodes = {}
        edges = {}
        paths = {}

        with open(filename, 'r') as f:
            for n, line in enumerate(f):
                fields = line.rstrip().split(separator)
                assert len(fields) >= 2, 'Error: malformed line: {0}'.format(
                    line)

                if frequency:
                    path = tuple(fields[:-1])
                    f = int(fields[-1])
                else:
                    path = tuple(fields)
                    f = 1

                for node in path:
                    if node not in nodes:
                        nodes[node] = Node(node)

                edge_list = []
                for u, v in zip(path[:-1], path[1:]):
                    if (u, v) not in edges:
                        edges[(u, v)] = Edge(nodes[u],
                                             nodes[v],
                                             uid=u + '-' + v)
                    edge_list.append(edges[(u, v)])

                if path not in paths:
                    paths[path] = Path(*edge_list, frequency=f)

                if maxlines is not None and n >= maxlines:
                    break

        nc = NodeCollection()
        nc.add(*nodes.values())

        ec = EdgeCollection(nodes=nc)
        for edge in edges.values():
            ec._add(edge)

        p = PathCollection(nodes=nc, edges=ec)

        for path in paths.values():
            p._add(path)

        return p
Exemplo n.º 8
0
def test_NodeCollection():
    """Test node collection"""
    nodes = NodeCollection()

    assert len(nodes) == 0

    a = Node('a')
    nodes.add(a)

    assert len(nodes) == 1
    assert nodes['a'] == a
    assert nodes[a] == a
    assert 'a' in nodes
    assert a in nodes
    assert 'a' in nodes.uids
    assert 'a' in nodes.keys()
    assert a in nodes.values()
    assert ('a', a) in nodes.items()
    assert {'a': a} == nodes.dict

    nodes.add('b', 'c')

    print(nodes)
    assert len(nodes) == 3

    with pytest.raises(Exception):
        nodes.add('a')

    with pytest.raises(Exception):
        nodes.add(a)

    d = Node('d', color='blue')
    nodes.add(d)

    assert nodes['d']['color'] == 'blue'

    d['color'] = 'red'
    assert nodes['d']['color'] == 'red'

    nodes.add('e', ('f', 'g'), ['h', 'i'])

    assert len(nodes) == 9

    nodes.remove(('e', 'f', 'g'), 'h', ['i'])

    assert len(nodes) == 4