def __init__(self, v: Union[str, PathPyObject], w: Union[str, PathPyObject], uid: Optional[str] = None, directed: bool = True, **kwargs: Any) -> None: """Initialize the node object.""" # initialize the parent class Edge.__init__(self, v, w, uid=uid, directed=directed, **kwargs) TemporalPathPyObject.__init__(self, uid=uid, **kwargs)
def to_network(frame: pd.DataFrame, loops: bool = True, directed: bool = True, multiedges: bool = False, **kwargs: Any) -> Network: """Read network from a pandas data frame.""" # if no v/w columns are included, pick first synonym frame = _check_column_name(frame, 'v', config['edge']['v_synonyms']) frame = _check_column_name(frame, 'w', config['edge']['w_synonyms']) LOG.debug('Creating %s network', directed) node_set = set(frame['v']).union(set(frame['w'])) if None in node_set: LOG.error('DataFrame minimally needs columns \'v\' and \'w\'') raise IOError nodes = {n: Node(n) for n in node_set} edges: list = [] edge_set: set = set() # TODO: Make this for loop faster! for row in frame.to_dict(orient='records'): v = row.pop('v') w = row.pop('w') uid = row.pop('uid', None) if (v, w) in edge_set and not multiedges: LOG.warning( 'The edge (%s,%s) exist already ' 'and will not be considered. ' 'To capture this edge, please ' 'enalbe multiedges and/or directed!', v, w) elif loops or v != w: edges.append(Edge(nodes[v], nodes[w], uid=uid, **row)) edge_set.add((v, w)) if not directed: edge_set.add((w, v)) else: continue net = Network(directed=directed, multiedges=multiedges, **kwargs) for node in nodes.values(): net.nodes.add(node) for edge in edges: net.edges._add(edge) net._add_edge_properties() return net
def _add(self, edge: Edge) -> None: """Add an edge to the set of edges.""" begin = edge.attributes['begin'] end = edge.attributes['end'] attributes = TemporalAttributes() attributes.update(**{ **edge.attributes.to_dict(), **{ TIMESTAMP: begin } }) edge.attributes = attributes self._intervals.addi(begin, end, edge) self._interval_map[edge].add((begin, end)) super()._add(edge)
def _add_path_from_edges(self, *edges: Union[str, Edge], uid: Optional[str] = None, **kwargs: Any) -> None: """Helper function to add a path from edges.""" _edges: list = [] for edge in edges: if edge not in self.edges or self.multiedges: if isinstance(edge, str) and len(_edges) > 0: self.edges.add(Edge(_edges[-1].w, Node(), uid=edge)) else: self.edges.add(edge, nodes=False) _edges.append(self.edges[edge]) _path = _edges if _path not in self or self.multipaths: self._add_path(self._path_class(*_path, uid=uid, **kwargs)) else: # raise error if node already exists self._if_exist(_path, **kwargs)
def from_temporal_network(cls, temporal_network, **kwargs: Any): """Creates a time-unfolded directed acyclic graph.""" delta: int = kwargs.get('delta', 1) dag = cls() # dictionary that maps time-unfolded nodes to actual nodes node_map = {} i = 0 for uid, edge, begin, end in temporal_network.edges.temporal(): # i += 1 # if i == 300: # break # create time-unfolded nodes v_t and w_{t+1} v_t = "{0}_{1}".format(edge.v.uid, begin) #node_map[v_t] = edge.v.uid # create one time-unfolded link for all delta in [1, delta] # this implies that for delta = 2 and an edge (a,b,1) two # time-unfolded links (a_1, b_2) and (a_1, b_3) will be created for x in range(1, int(delta) + 1): w_t = "{0}_{1}".format(edge.w.uid, begin + x) #node_map[w_t] = edge.w.uid if v_t not in dag.nodes: dag.nodes._add(Node(v_t, original=edge.v)) #dag.add_node(v_t, original=edge.v) if w_t not in dag.nodes: dag.nodes._add(Node(w_t, original=edge.w)) #dag.add_node(w_t, original=edge.w) e = Edge(dag.nodes[v_t], dag.nodes[w_t], original=edge) dag.edges._add(e) dag._add_edge_properties() #dag.add_edge(v_t, w_t , original=edge) return dag
def from_dataframe(df: pd.DataFrame, directed: bool = True, loops: bool = True, multiedges: bool = False, **kwargs: Any) -> Network: """Reads a network from a pandas dataframe. By default, columns `v` and `w` will be used as source and target of edges. If no column 'v' or 'w' exists, the list of synonyms for `v` and `w`` in the config file will be used to remap columns, choosing the first matching entries. Any columns not used to create edges will be used as edge attributes, e.g. if a column 'v' is present and an additional column `source`is given, `source` will be assigned as an edge property. In addition, an optional column `uid` will be used to assign node uids. If this column is not present, default edge uids will be created. Any other columns (e.g. weight, type, time, etc.) will be assigned as edge attributes. kwargs will be assigned as network attributes. Parameters ---------- directed: bool Whether to generate a directed or undirected network. **kwargs: Any List of key-value pairs that will be assigned as network attributes Examples -------- """ # if no v/w columns are included, pick first synonym if 'v' not in df.columns: LOG.info('No column v, searching for synonyms') for col in df.columns: if col in config['edge']['v_synonyms']: LOG.info('Remapping column \'%s\' to \'v\'', col) df.rename(columns={col: "v"}, inplace=True) continue if 'w' not in df.columns: LOG.info('No column w, searching for synonyms') for col in df.columns: if col in config['edge']['w_synonyms']: LOG.info('Remapping column \'%s\' to \'w\'', col) df.rename(columns={col: "w"}, inplace=True) continue LOG.debug('Creating %s network', directed) net = Network(directed=directed, multiedges=multiedges, **kwargs) for row in df.to_dict(orient='records'): # get edge v = row.get('v', None) w = row.get('w', None) uid = row.get('uid', None) if v is None or w is None: LOG.error('DataFrame minimally needs columns \'v\' and \'w\'') raise IOError else: v = str(v) w = str(w) if v not in net.nodes.uids: net.add_node(v) if w not in net.nodes.uids: net.add_node(w) if uid is None: edge = Edge(net.nodes[v], net.nodes[w]) else: edge = Edge(net.nodes[v], net.nodes[w], uid=uid) if loops or edge.v != edge.w: net.add_edge(edge) reserved_columns = set(['v', 'w', 'uid']) for k in row: if k not in reserved_columns: edge[k] = row[k] return net
def read_graphml(filename: str): """Reads a pathyp.Network from a graphml file. This function supports typed Node and Edge attributes including default values. Warnings are issued if the type of Node or Edge attributes are undeclared, in which case the attribute type will fall back to string. Parameters ---------- filename: str The graphml file to read the graph from """ root = ET.parse(filename).getroot() graph = root.find('{http://graphml.graphdrawing.org/xmlns}graph') directed = graph.attrib['edgedefault'] != 'undirected' uid = graph.attrib['id'] n = Network(directed=directed, uid=uid) node_attributes = {} edge_attributes = {} # read attribute types and default values for a in root.findall('{http://graphml.graphdrawing.org/xmlns}key'): a_id = a.attrib['id'] a_name = a.attrib['attr.name'] a_type = a.attrib['attr.type'] a_for = a.attrib['for'] # store attribute info and assign data types a_data = {'name': a_name} if a_type == 'string': a_data['type'] = str elif a_type == 'float': a_data['type'] = float elif a_type == 'double': a_data['type'] = float elif a_type == 'int': a_data['type'] = int elif a_type == 'long': a_data['type'] = int elif a_type == 'boolean': a_data['type'] = bool else: a_data['type'] = str d = a.find('{http://graphml.graphdrawing.org/xmlns}default') if d is not None: a_data['default'] = a_data['type'](d.text) if a_for == 'node': node_attributes[a_name] = a_data if a_for == 'edge': edge_attributes[a_name] = a_data # add nodes with uids and attributes for node in graph.findall('{http://graphml.graphdrawing.org/xmlns}node'): # create node uid = node.attrib['id'] v = Node(uid=uid) # set attribute values for a in node.findall('{http://graphml.graphdrawing.org/xmlns}data'): key = a.attrib['key'] val = a.text if key not in node_attributes: LOG.warning( 'Undeclared Node attribute "{}". Defaulting to string type.' .format(key)) v.attributes[key] = val else: v.attributes[key] = node_attributes[key]['type'](val) # set default values for a_name in node_attributes: if 'default' in node_attributes[ a_name] and v.attributes[a_name] is None: v.attributes[a_name] = node_attributes[a_name]['default'] n.add_node(v) # add edges with uids and attributes for edge in graph.findall('{http://graphml.graphdrawing.org/xmlns}edge'): # create edge source = edge.attrib['source'] target = edge.attrib['target'] uid = edge.attrib['id'] e = Edge(n.nodes[source], n.nodes[target], uid=uid) # set attribute values for a in edge.findall('{http://graphml.graphdrawing.org/xmlns}data'): key = a.attrib['key'] val = a.text if key not in edge_attributes: LOG.warning( 'Warning: Undeclared Edge attribute "{}". Defaulting to string type.' .format(key)) e.attributes[key] = val else: e.attributes[key] = edge_attributes[key]['type'](val) # set default values for a_name in edge_attributes: if 'default' in edge_attributes[ a_name] and e.attributes[a_name] is None: e.attributes[a_name] = edge_attributes[a_name]['default'] n.add_edge(e) return n
def _bw_hon(self: HigherOrderNetwork, normalized: bool = False) -> Dict: """Betweenness Centrality for Networks.""" from pathpy.core.edge import Edge from pathpy.core.path import Path LOG.debug('Calculating betweenness (order k = %s) ...', self.order) all_paths = shortest_paths.all_shortest_paths( self, weight=False, return_distance_matrix=False) bw: defaultdict = defaultdict(float) lengths: defaultdict = defaultdict( lambda: defaultdict(lambda: float('inf'))) paths: defaultdict = defaultdict(lambda: defaultdict(set)) for path_1_order_k in all_paths: for path_2_order_k in all_paths: for path_order_k in all_paths[path_1_order_k][path_2_order_k]: nodes = [] for node in path_order_k: nodes.append(self.nodes[node].nodes) path = nodes[0] for node in nodes[1:]: path.append(node[-1]) edges = [] for _v, _w in zip(path[:-1], path[1:]): edges.append(Edge(_v, _w)) if edges: path = Path(*edges) s1 = path.start t1 = path.end if len(path) < lengths[s1][t1]: lengths[s1][t1] = len(path) paths[s1][t1] = set() paths[s1][t1].add(path) elif len(path) == lengths[s1][t1]: paths[s1][t1].add(path) for s_order_1 in paths: for t_order_1 in paths[s_order_1]: for path_order_1 in paths[s_order_1][t_order_1]: for node in path_order_1.nodes[1:-1]: if s_order_1 != node != t_order_1: bw[node.uid] += 1.0 / len(paths[s_order_1][t_order_1]) # assign zero values to nodes not occurring on shortest paths for v in self.nodes.nodes.keys(): bw[v] += 0 if normalized: max_centr = max(bw.values()) min_centr = min(bw.values()) for v in bw: bw[v] = (bw[v] - min_centr) / (max_centr - min_centr) return bw
def generate(self, order: int = 1) -> HigherOrderNetwork: """Generate a null model.""" # TODO: Add null model for order 1 if order == 0: return HigherOrderNetwork(self.network, order=0) if order == 1: return HigherOrderNetwork(self.network, order=1) # some information for debugging log.debug('start generate null model') a = datetime.datetime.now() # generate all possible paths possible_paths = self.possible_paths(order=order) # get observed paths observed = self.network.subpaths.counter(min_length=order - 1, max_length=order - 1) # get transition matrix of the underlying network transition_matrix = self.network.transition_matrix( weight=config['attributes']['frequency']) # get the ordered node uids of the underlying network as a list nodes = list(self.network.nodes) # generate hon with possible paths hon = HigherOrderNetwork(order=order) for path in possible_paths: # generate "empty" higher order nodes v = HigherOrderNode() w = HigherOrderNode() # add first order edges to the higher oder nodes for v_uid, w_uid in zip(path[:-1], path[1:]): v.add_edge(self.network.edges[v_uid]) w.add_edge(self.network.edges[w_uid]) # generate the expected frequencies of all possible paths uid = self.network.separator['path'].join(path[:-1]) frequency = 0 if uid in observed: frequency = observed[uid] * transition_matrix[ nodes.index(w.as_nodes[-2]), nodes.index(w.as_nodes[-1])] # add higher order nodes to the hon # TODO: use automatically hon separator e = Edge(v, w, separator=hon.separator['hon']) hon.add_path(Path.from_edges([e], frequency=frequency)) # hon.add_edge(Edge(v, w, separator=hon.separator['hon']), # frequency=frequency) # some information for debugging b = datetime.datetime.now() log.debug('end generate null model:' + ' {} seconds'.format((b - a).total_seconds())) # safe hon in class and order hon.network = self.network self.hon = hon self.order = order # return null model return hon
def read_pathcollection(filename: str, separator: str = ',', frequency: bool = False, directed: bool = True, maxlines: int = None) -> PathCollection: """Read path in edgelist format Reads data from a file containing multiple lines of *edges* of the form "v,w,frequency,X" (where frequency is optional and X are arbitrary additional columns). The default separating character ',' can be changed. Parameters ---------- filename : str path to edgelist file separator : str character separating the nodes frequency : bool is a frequency given? if ``True`` it is the last element in the edge (i.e. ``a,b,2``) directed : bool are the edges directed or undirected maxlines : int number of lines to read (useful to test large files). None means the entire file is read """ from pathpy.core.path import Path, PathCollection nodes: dict = {} edges: dict = {} paths: dict = {} with open(filename, 'r') as csv: for n, line in enumerate(csv): fields = line.rstrip().split(separator) assert len(fields) >= 1, 'Error: empty line: {0}'.format(line) if frequency: path = tuple(fields[:-1]) freq = float(fields[-1]) else: path = tuple(fields) freq = 1.0 for node in path: if node not in nodes: nodes[node] = Node(node) if len(path) == 1 and path not in paths: paths[path] = Path(nodes[path[0]], frequency=freq) else: edge_list = [] for u, v in zip(path[:-1], path[1:]): if (u, v) not in edges: edges[(u, v)] = Edge(nodes[u], nodes[v]) edge_list.append(edges[(u, v)]) if path not in paths: paths[path] = Path(*edge_list, frequency=freq) if maxlines is not None and n >= maxlines: break ncoll = NodeCollection() for node in nodes.values(): ncoll.add(node) ecoll = EdgeCollection(nodes=ncoll) for edge in edges.values(): ecoll._add(edge) _paths = PathCollection(directed=directed, nodes=ncoll, edges=ecoll) for _path in paths.values(): _paths._add(_path) return _paths
def read_file(cls, filename: str, separator: str = ',', frequency: bool = False, directed: bool = True, maxlines: int = None) -> None: """ Read path in edgelist format Reads data from a file containing multiple lines of *edges* of the form "v,w,frequency,X" (where frequency is optional and X are arbitrary additional columns). The default separating character ',' can be changed. Parameters ---------- filename : str path to edgelist file separator : str character separating the nodes frequency : bool is a frequency given? if ``True`` it is the last element in the edge (i.e. ``a,b,2``) directed : bool are the edges directed or undirected maxlines : int number of lines to read (useful to test large files). None means the entire file is read """ nodes = {} edges = {} paths = {} with open(filename, 'r') as f: for n, line in enumerate(f): fields = line.rstrip().split(separator) assert len(fields) >= 2, 'Error: malformed line: {0}'.format( line) if frequency: path = tuple(fields[:-1]) f = int(fields[-1]) else: path = tuple(fields) f = 1 for node in path: if node not in nodes: nodes[node] = Node(node) edge_list = [] for u, v in zip(path[:-1], path[1:]): if (u, v) not in edges: edges[(u, v)] = Edge(nodes[u], nodes[v], uid=u + '-' + v) edge_list.append(edges[(u, v)]) if path not in paths: paths[path] = Path(*edge_list, frequency=f) if maxlines is not None and n >= maxlines: break nc = NodeCollection() nc.add(*nodes.values()) ec = EdgeCollection(nodes=nc) for edge in edges.values(): ec._add(edge) p = PathCollection(nodes=nc, edges=ec) for path in paths.values(): p._add(path) return p