def expand(self, order=0, include_path: bool = False) -> List[Path]: """Converts the path in subpaths of length oder.""" paths = [] for path in self.paths.values(): expanded = [] if order == 0: for uid in path.as_nodes: expanded.append( Path.from_nodes([path.nodes[uid]], **path.attributes.to_dict())) elif 0 < order < len(path): for subpath in window(path.as_edges, size=order): edges = [path.edges[uid] for uid in subpath] expanded.append( Path.from_edges(edges, **path.attributes.to_dict())) elif order == len(path) and include_path: expanded.append(path) else: pass # add sub path if exist if expanded: paths.append(expanded) return paths
def __init__(self, *args: Union[Node, Edge], uid: Optional[str] = None, **kwargs: Any) -> None: # initializing the parent classes Node.__init__(self, uid, **kwargs) Path.__init__(self, *args, uid=uid, **kwargs)
def __init__(self, *args: Union[Node, Edge], uid: Optional[str] = None, **kwargs: Any) -> None: # initializing the parent classes Node.__init__(self, uid, **kwargs) Path.__init__(self, *args, uid=uid, **kwargs) self['label'] = '-'.join([n.uid for n in self.nodes])
def calculate(self, min_length: int = 0, max_length: int = sys.maxsize, include_path: bool = False) -> None: """Helper function to calculate subpaths.""" if len(self) > 0: LOG.warning('Recalculating sub-paths!') # get the default max and min path lengths _min_length: int = min_length _max_length: int = max_length # iterrate over all paths for path in tqdm(self._paths.values(), desc='sub-path calculation'): # number of counted paths frequency = path.attributes.get('frequency', 1) # if min_length is zero, account also for nodes if _min_length <= 0: for node in path.nodes: if (node, ) not in self: self._add(Path(node, possible=frequency, frequency=0)) else: self[(node, )]['possible'] += frequency # get min and max length min_length = max(_min_length, 1) max_length = min(len(path) - 1, _max_length) # get subpaths for i in range(min_length - 1, max_length): for j in range(len(path) - i): edges = tuple(path.edges[j:j + i + 1]) if edges not in self: self._add(Path(*edges, possible=frequency, frequency=0)) else: # TODO: fix the frequency assignment if self[edges]['possible'] is None: self[edges]['possible'] = 0 self[edges]['possible'] += frequency # include the path if include_path: if path not in self and _min_length <= len( path) <= _max_length: path['possible'] = 0 self._add(path) for path in self: self._observed[len(path)][path] += path['frequency'] or 0 self._possible[len(path)][path] += path['possible'] or 0 self._counter[path] += path['frequency'] or 0 self._counter[path] += path['possible'] or 0
def read_pathcollection(filename: str, separator: str = ',', frequency: bool = False, directed: bool = True, maxlines: int = None) -> PathCollection: """Read path in edgelist format Reads data from a file containing multiple lines of *edges* of the form "v,w,frequency,X" (where frequency is optional and X are arbitrary additional columns). The default separating character ',' can be changed. Parameters ---------- filename : str path to edgelist file separator : str character separating the nodes frequency : bool is a frequency given? if ``True`` it is the last element in the edge (i.e. ``a,b,2``) directed : bool are the edges directed or undirected maxlines : int number of lines to read (useful to test large files). None means the entire file is read """ from pathpy.core.path import Path, PathCollection nodes: dict = {} edges: dict = {} paths: dict = {} with open(filename, 'r') as csv: for n, line in enumerate(csv): fields = line.rstrip().split(separator) assert len(fields) >= 1, 'Error: empty line: {0}'.format(line) if frequency: path = tuple(fields[:-1]) freq = float(fields[-1]) else: path = tuple(fields) freq = 1.0 for node in path: if node not in nodes: nodes[node] = Node(node) if len(path) == 1 and path not in paths: paths[path] = Path(nodes[path[0]], frequency=freq) else: edge_list = [] for u, v in zip(path[:-1], path[1:]): if (u, v) not in edges: edges[(u, v)] = Edge(nodes[u], nodes[v]) edge_list.append(edges[(u, v)]) if path not in paths: paths[path] = Path(*edge_list, frequency=freq) if maxlines is not None and n >= maxlines: break ncoll = NodeCollection() for node in nodes.values(): ncoll.add(node) ecoll = EdgeCollection(nodes=ncoll) for edge in edges.values(): ecoll._add(edge) _paths = PathCollection(directed=directed, nodes=ncoll, edges=ecoll) for _path in paths.values(): _paths._add(_path) return _paths
def _bw_hon(self: HigherOrderNetwork, normalized: bool = False) -> Dict: """Betweenness Centrality for Networks.""" from pathpy.core.edge import Edge from pathpy.core.path import Path LOG.debug('Calculating betweenness (order k = %s) ...', self.order) all_paths = shortest_paths.all_shortest_paths( self, weight=False, return_distance_matrix=False) bw: defaultdict = defaultdict(float) lengths: defaultdict = defaultdict( lambda: defaultdict(lambda: float('inf'))) paths: defaultdict = defaultdict(lambda: defaultdict(set)) for path_1_order_k in all_paths: for path_2_order_k in all_paths: for path_order_k in all_paths[path_1_order_k][path_2_order_k]: nodes = [] for node in path_order_k: nodes.append(self.nodes[node].nodes) path = nodes[0] for node in nodes[1:]: path.append(node[-1]) edges = [] for _v, _w in zip(path[:-1], path[1:]): edges.append(Edge(_v, _w)) if edges: path = Path(*edges) s1 = path.start t1 = path.end if len(path) < lengths[s1][t1]: lengths[s1][t1] = len(path) paths[s1][t1] = set() paths[s1][t1].add(path) elif len(path) == lengths[s1][t1]: paths[s1][t1].add(path) for s_order_1 in paths: for t_order_1 in paths[s_order_1]: for path_order_1 in paths[s_order_1][t_order_1]: for node in path_order_1.nodes[1:-1]: if s_order_1 != node != t_order_1: bw[node.uid] += 1.0 / len(paths[s_order_1][t_order_1]) # assign zero values to nodes not occurring on shortest paths for v in self.nodes.nodes.keys(): bw[v] += 0 if normalized: max_centr = max(bw.values()) min_centr = min(bw.values()) for v in bw: bw[v] = (bw[v] - min_centr) / (max_centr - min_centr) return bw
def generate(self, order: int = 1) -> HigherOrderNetwork: """Generate a null model.""" # TODO: Add null model for order 1 if order == 0: return HigherOrderNetwork(self.network, order=0) if order == 1: return HigherOrderNetwork(self.network, order=1) # some information for debugging log.debug('start generate null model') a = datetime.datetime.now() # generate all possible paths possible_paths = self.possible_paths(order=order) # get observed paths observed = self.network.subpaths.counter(min_length=order - 1, max_length=order - 1) # get transition matrix of the underlying network transition_matrix = self.network.transition_matrix( weight=config['attributes']['frequency']) # get the ordered node uids of the underlying network as a list nodes = list(self.network.nodes) # generate hon with possible paths hon = HigherOrderNetwork(order=order) for path in possible_paths: # generate "empty" higher order nodes v = HigherOrderNode() w = HigherOrderNode() # add first order edges to the higher oder nodes for v_uid, w_uid in zip(path[:-1], path[1:]): v.add_edge(self.network.edges[v_uid]) w.add_edge(self.network.edges[w_uid]) # generate the expected frequencies of all possible paths uid = self.network.separator['path'].join(path[:-1]) frequency = 0 if uid in observed: frequency = observed[uid] * transition_matrix[ nodes.index(w.as_nodes[-2]), nodes.index(w.as_nodes[-1])] # add higher order nodes to the hon # TODO: use automatically hon separator e = Edge(v, w, separator=hon.separator['hon']) hon.add_path(Path.from_edges([e], frequency=frequency)) # hon.add_edge(Edge(v, w, separator=hon.separator['hon']), # frequency=frequency) # some information for debugging b = datetime.datetime.now() log.debug('end generate null model:' + ' {} seconds'.format((b - a).total_seconds())) # safe hon in class and order hon.network = self.network self.hon = hon self.order = order # return null model return hon
def xsubpaths(self, min_length: int = 0, max_length: int = sys.maxsize, include_path: bool = False) -> Dict[str, Path]: """Returns a list of subpaths. Parameters ---------- min_length : int, optional (default = 0) Parameter which defines the minimum length of the sub-paths. This parameter has to be smaller then the maximum length parameter. max_length : int, optional (default = sys.maxsize) Parameter which defines the maximum length of the sub-paths. This parameter has to be greater then the minimum length parameter. If the parameter is also greater then the maximum length of the path, the maximum path length is used instead. include_path : bool, optional (default = Flase) If this option is enabled also the current path is added as a sub-path of it self. Returns ------- Dict[str, Paths] Return a dictionary with the :py:class:`Paht` uids as key and the :py:class:`Path` objects as values. Examples -------- >>> from pathpy import Path >>> p = Path('a','b','c','d','e') >>> for k in p.subpaths(): ... print(k) a b c d e a-b b-c c-d d-e a-b|b-c b-c|c-d c-d|d-e a-b|b-c|c-d b-c|c-d|d-e >>> for k in p.subpaths(min_length = 2, max_length = 2) ... print(k) a-b|b-c b-c|c-d c-d|d-e """ # initializing the subpaths dictionary subpaths: dict = PathDict(dict) # get the default max and min path lengths _min_length: int = min_length _max_length: int = max_length # TODO: FIX DICT -> LIST # if min_length is zero, account also for nodes if _min_length <= 0: for node in self.as_nodes: # generate empty path with one node subpaths[node] = Path.from_nodes([self.nodes[node]], **self.attributes.to_dict()) # find the right path lengths min_length = max(_min_length, 1) max_length = min(len(self) - 1, _max_length) # get subpaths for i in range(min_length - 1, max_length): for j in range(len(self) - i): # get the edge uids edges = [ self.edges[edge] for edge in self.as_edges[j:j + i + 1] ] # assign a new path based on the given edges subpaths[self.separator['path'].join( self.as_edges[j:j + i + 1])] = Path( *edges, **self.attributes.to_dict()) # include the path if include_path and _min_length <= len(self) <= _max_length: subpaths[self.uid] = self # return the dict of subpaths return subpaths