def _(self, data: PathCollection, order: Optional[int] = None) -> None: # Check order if order is not None: self._order = order if 0 <= self.order <= 1: super().fit(data, order=self.order) elif self.order > 1: # --- START --- nc = NodeCollection() for node in data.nodes.values(): nc.add(node) ec = EdgeCollection(nodes=nc) for edge in data.edges.values(): ec.add(edge) self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec) # --- END --- # get path data paths = data # generate first order representation of data network = Network.from_paths(paths, frequencies=True) self.calculate(network, paths) else: LOG.error('A Null Model with order %s is not supported', self.order) raise AttributeError
def __init__(self, uid: Optional[str] = None, multiedges: bool = False, **kwargs: Any) -> None: """Initialize the hypergraph object.""" # initialize the base class super().__init__(uid=uid, **kwargs) # indicator whether the network has multi-edges self._multiedges: bool = multiedges # # a container for the network properties self._properties: defaultdict = defaultdict() # a container for node objects self._nodes: NodeCollection = NodeCollection() # a container for edge objects self._edges: HyperEdgeCollection = HyperEdgeCollection( multiedges=multiedges) # add network properties self._properties['edges'] = set() self._properties['incident_edges'] = defaultdict(set) self._properties['degrees'] = defaultdict(float)
def __init__(self, directed: bool = True, multiedges: bool = False, nodes: Optional[NodeCollection] = None) -> None: """Initialize the network object.""" # initialize the base class super().__init__() # inidcator whether the network is directed or undirected self._directed: bool = directed # indicator whether the network has multi-edges self._multiedges: bool = multiedges # collection of nodes self._nodes: NodeCollection = NodeCollection() if nodes is not None: self._nodes = nodes # map node tuples to edges self._nodes_map: defaultdict = defaultdict(EdgeSet) # map single node to edges self._node_map: defaultdict = defaultdict(set)
def add_nodes(pathpy=True, numbers=1000): """Add nodes to NodeCollection""" if pathpy: nodes = NodeCollection() for n in range(numbers): nodes << Node(n) else: nodes = {} for n in range(numbers): nodes[str(n)] = Node(n) return True
def _(self, data: Network, order: Optional[int] = None) -> None: # Check order if order is not None: self._order = order if 0 <= self.order <= 1: super().fit(data, order=self.order) elif self.order > 1: # TODO: create function to transfer base data from PathCollection object # --- START --- nc = NodeCollection() for node in data.nodes.values(): nc.add(node) ec = EdgeCollection(nodes=nc) for edge in data.edges.values(): ec.add(edge) self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec) # --- END --- # get network data network = data # generate a path representation of the data paths = PathCollection(directed=network.directed, nodes=network.nodes, edges=network.edges) for edge in data.edges: paths.add(edge, frequency=edge.attributes.get('frequency', 1)) self.calculate(network, paths) else: LOG.error('A Null Model with order %s is not supported', self.order) raise AttributeError
def __init__(self, directed: bool = True, multiedges: bool = False, multipaths: bool = False, nodes: Optional[NodeCollection] = None, edges: Optional[EdgeCollection] = None) -> None: """Initialize the network object.""" # pylint: disable=too-many-arguments # initialize the base class super().__init__() # inidcator whether the network is directed or undirected self._directed: bool = directed # indicator whether the network has multi-edges self._multiedges: bool = multiedges # indicator whether the network has multi-edges self._multipaths: bool = multipaths # collection of nodes self._nodes: NodeCollection = NodeCollection() if nodes is not None: self._nodes = nodes elif nodes is None and edges is not None: self._nodes = edges.nodes # collection of edges self._edges: EdgeCollection = EdgeCollection(directed=directed, multiedges=multiedges, nodes=self._nodes) if edges is not None: self._edges = edges # map node tuples to paths self._nodes_map: defaultdict = defaultdict(PathSet) # map single node to paths self._node_map: defaultdict = defaultdict(set) # map edge tuples to paths self._edges_map: defaultdict = defaultdict(PathSet) # map single node to paths self._edge_map: defaultdict = defaultdict(set) # class of objects self._path_class: Any = Path
def test_NodeCollection_iter(): """Test iter trough the node collection""" nodes = NodeCollection() nodes.add(['a', 'b', 'c', 'd']) for node in nodes: assert isinstance(node.uid, str) for uid, node in nodes.items(): assert isinstance(uid, str) assert isinstance(node, Node) for uid in nodes.keys(): assert isinstance(uid, str) for node in nodes.values(): assert isinstance(node, Node)
def __init__(self, uid: Optional[str] = None, directed: bool = True, multiedges: bool = False, **kwargs: Any) -> None: """Initialize the network object.""" # initialize the base class super().__init__(uid=uid, **kwargs) # inidcator whether the network is directed or undirected self._directed: bool = directed # indicator whether the network has multi-edges self._multiedges: bool = multiedges # # a container for the network properties self._properties: defaultdict = defaultdict() # a container for node objects self._nodes: NodeCollection = NodeCollection() # a container for edge objects self._edges: EdgeCollection = EdgeCollection(directed=directed, multiedges=multiedges, nodes=self._nodes) # add attributes to the network self.attributes.update(**kwargs) # add network properties self._properties['edges'] = set() self._properties['successors'] = defaultdict(set) self._properties['predecessors'] = defaultdict(set) self._properties['outgoing'] = defaultdict(set) self._properties['incoming'] = defaultdict(set) self._properties['neighbors'] = defaultdict(set) self._properties['incident_edges'] = defaultdict(set) self._properties['indegrees'] = defaultdict(float) self._properties['outdegrees'] = defaultdict(float) self._properties['degrees'] = defaultdict(float)
def _(self, data: PathCollection, order: Optional[int] = None, subpaths: bool = True) -> None: if order is not None: self._order = order order = self.order # TODO: create function to transfer base data from PathCollection object # --- START --- nc = NodeCollection() for node in data.nodes.values(): nc.add(node) ec = EdgeCollection(nodes=nc) for edge in data.edges.values(): ec.add(edge) self._nodes = HigherOrderNodeCollection(nodes=nc, edges=ec) # --- END --- # iterate over all paths for path in data: # get frequency of the observed path # TODO: define keyword in config file frequency = path.attributes.get('frequency', 1) nodes: list = [] if order == 0: for node in path.nodes: if (node, ) not in self.nodes: self.add_node(node, frequency=0.0) #self.nodes[(node,)]['frequency'] += frequency for node in path.nodes: self.nodes[(node, )]['frequency'] += frequency elif order == 1: nodes.extend([tuple([n]) for n in path.nodes]) elif 1 < order <= len(path): for subpath in self.window(path.edges, size=order - 1): nodes.append(subpath) elif order == len(path) + 1: if tuple(path.edges) not in self.nodes: self.nodes.add(tuple(path.edges)) else: pass _edges = [] for _v, _w in zip(nodes[:-1], nodes[1:]): if _v not in self.nodes: self.nodes.add(_v) if _w not in self.nodes: self.nodes.add(_w) _nodes = (self.nodes[_v], self.nodes[_w]) if _nodes not in self.edges: self.add_edge(*_nodes, possible=0, observed=0, frequency=0) _edges.append(self.edges[_nodes]) for edge in _edges: edge['frequency'] += frequency if order == len(path): edge['observed'] += frequency else: edge['possible'] += frequency if order == 0: frequencies = [n['frequency'] for n in self.nodes] for node in self.nodes: node['frequency'] = node['frequency'] / sum(frequencies) if subpaths: self._subpaths = SubPathCollection.from_paths(data, max_length=order, include_path=True)
def read_pathcollection(filename: str, separator: str = ',', frequency: bool = False, directed: bool = True, maxlines: int = None) -> PathCollection: """Read path in edgelist format Reads data from a file containing multiple lines of *edges* of the form "v,w,frequency,X" (where frequency is optional and X are arbitrary additional columns). The default separating character ',' can be changed. Parameters ---------- filename : str path to edgelist file separator : str character separating the nodes frequency : bool is a frequency given? if ``True`` it is the last element in the edge (i.e. ``a,b,2``) directed : bool are the edges directed or undirected maxlines : int number of lines to read (useful to test large files). None means the entire file is read """ from pathpy.core.path import Path, PathCollection nodes: dict = {} edges: dict = {} paths: dict = {} with open(filename, 'r') as csv: for n, line in enumerate(csv): fields = line.rstrip().split(separator) assert len(fields) >= 1, 'Error: empty line: {0}'.format(line) if frequency: path = tuple(fields[:-1]) freq = float(fields[-1]) else: path = tuple(fields) freq = 1.0 for node in path: if node not in nodes: nodes[node] = Node(node) if len(path) == 1 and path not in paths: paths[path] = Path(nodes[path[0]], frequency=freq) else: edge_list = [] for u, v in zip(path[:-1], path[1:]): if (u, v) not in edges: edges[(u, v)] = Edge(nodes[u], nodes[v]) edge_list.append(edges[(u, v)]) if path not in paths: paths[path] = Path(*edge_list, frequency=freq) if maxlines is not None and n >= maxlines: break ncoll = NodeCollection() for node in nodes.values(): ncoll.add(node) ecoll = EdgeCollection(nodes=ncoll) for edge in edges.values(): ecoll._add(edge) _paths = PathCollection(directed=directed, nodes=ncoll, edges=ecoll) for _path in paths.values(): _paths._add(_path) return _paths
def test_NodeCollection(): """Test node collection""" nodes = NodeCollection() assert len(nodes) == 0 a = Node('a') nodes.add(a) assert len(nodes) == 1 assert nodes['a'] == a assert nodes[a] == a assert 'a' in nodes assert a in nodes assert 'a' in nodes.uids assert 'a' in nodes.keys() assert a in nodes.values() assert ('a', a) in nodes.items() nodes.add(('b', 'c')) assert len(nodes) == 3 # with pytest.raises(Exception): # nodes.add('a') # with pytest.raises(Exception): # nodes.add(a) d = Node('d', color='blue') nodes.add(d) assert nodes['d']['color'] == 'blue' d['color'] = 'red' assert nodes['d']['color'] == 'red' nodes.add(['e', ('f', 'g'), ['h', 'i']]) assert len(nodes) == 9 nodes.remove(a) assert len(nodes) == 8 assert a not in nodes nodes.remove('b') assert len(nodes) == 7 assert 'b' not in nodes nodes.remove([('e', 'f', 'g'), 'h', ['i']]) assert len(nodes) == 2
def read_file(cls, filename: str, separator: str = ',', frequency: bool = False, directed: bool = True, maxlines: int = None) -> None: """ Read path in edgelist format Reads data from a file containing multiple lines of *edges* of the form "v,w,frequency,X" (where frequency is optional and X are arbitrary additional columns). The default separating character ',' can be changed. Parameters ---------- filename : str path to edgelist file separator : str character separating the nodes frequency : bool is a frequency given? if ``True`` it is the last element in the edge (i.e. ``a,b,2``) directed : bool are the edges directed or undirected maxlines : int number of lines to read (useful to test large files). None means the entire file is read """ nodes = {} edges = {} paths = {} with open(filename, 'r') as f: for n, line in enumerate(f): fields = line.rstrip().split(separator) assert len(fields) >= 2, 'Error: malformed line: {0}'.format( line) if frequency: path = tuple(fields[:-1]) f = int(fields[-1]) else: path = tuple(fields) f = 1 for node in path: if node not in nodes: nodes[node] = Node(node) edge_list = [] for u, v in zip(path[:-1], path[1:]): if (u, v) not in edges: edges[(u, v)] = Edge(nodes[u], nodes[v], uid=u + '-' + v) edge_list.append(edges[(u, v)]) if path not in paths: paths[path] = Path(*edge_list, frequency=f) if maxlines is not None and n >= maxlines: break nc = NodeCollection() nc.add(*nodes.values()) ec = EdgeCollection(nodes=nc) for edge in edges.values(): ec._add(edge) p = PathCollection(nodes=nc, edges=ec) for path in paths.values(): p._add(path) return p
def test_NodeCollection(): """Test node collection""" nodes = NodeCollection() assert len(nodes) == 0 a = Node('a') nodes.add(a) assert len(nodes) == 1 assert nodes['a'] == a assert nodes[a] == a assert 'a' in nodes assert a in nodes assert 'a' in nodes.uids assert 'a' in nodes.keys() assert a in nodes.values() assert ('a', a) in nodes.items() assert {'a': a} == nodes.dict nodes.add('b', 'c') print(nodes) assert len(nodes) == 3 with pytest.raises(Exception): nodes.add('a') with pytest.raises(Exception): nodes.add(a) d = Node('d', color='blue') nodes.add(d) assert nodes['d']['color'] == 'blue' d['color'] = 'red' assert nodes['d']['color'] == 'red' nodes.add('e', ('f', 'g'), ['h', 'i']) assert len(nodes) == 9 nodes.remove(('e', 'f', 'g'), 'h', ['i']) assert len(nodes) == 4