Exemplo n.º 1
0
    def __init__(self,
                 v: Union[str, PathPyObject],
                 w: Union[str, PathPyObject],
                 uid: Optional[str] = None,
                 directed: bool = True,
                 **kwargs: Any) -> None:
        """Initialize the node object."""

        # initialize the parent class
        Edge.__init__(self, v, w, uid=uid, directed=directed, **kwargs)
        TemporalPathPyObject.__init__(self, uid=uid, **kwargs)
Exemplo n.º 2
0
def to_network(frame: pd.DataFrame,
               loops: bool = True,
               directed: bool = True,
               multiedges: bool = False,
               **kwargs: Any) -> Network:
    """Read network from a pandas data frame."""

    # if no v/w columns are included, pick first synonym
    frame = _check_column_name(frame, 'v', config['edge']['v_synonyms'])
    frame = _check_column_name(frame, 'w', config['edge']['w_synonyms'])

    LOG.debug('Creating %s network', directed)

    node_set = set(frame['v']).union(set(frame['w']))

    if None in node_set:
        LOG.error('DataFrame minimally needs columns \'v\' and \'w\'')
        raise IOError

    nodes = {n: Node(n) for n in node_set}

    edges: list = []
    edge_set: set = set()

    # TODO: Make this for loop faster!
    for row in frame.to_dict(orient='records'):
        v = row.pop('v')
        w = row.pop('w')
        uid = row.pop('uid', None)

        if (v, w) in edge_set and not multiedges:
            LOG.warning(
                'The edge (%s,%s) exist already '
                'and will not be considered. '
                'To capture this edge, please '
                'enalbe multiedges and/or directed!', v, w)
        elif loops or v != w:
            edges.append(Edge(nodes[v], nodes[w], uid=uid, **row))
            edge_set.add((v, w))
            if not directed:
                edge_set.add((w, v))
        else:
            continue

    net = Network(directed=directed, multiedges=multiedges, **kwargs)
    for node in nodes.values():
        net.nodes.add(node)

    for edge in edges:
        net.edges._add(edge)

    net._add_edge_properties()
    return net
Exemplo n.º 3
0
    def _add(self, edge: Edge) -> None:
        """Add an edge to the set of edges."""
        begin = edge.attributes['begin']
        end = edge.attributes['end']

        attributes = TemporalAttributes()
        attributes.update(**{
            **edge.attributes.to_dict(),
            **{
                TIMESTAMP: begin
            }
        })
        edge.attributes = attributes

        self._intervals.addi(begin, end, edge)
        self._interval_map[edge].add((begin, end))

        super()._add(edge)
Exemplo n.º 4
0
    def _add_path_from_edges(self, *edges: Union[str, Edge],
                             uid: Optional[str] = None, **kwargs: Any) -> None:
        """Helper function to add a path from edges."""
        _edges: list = []
        for edge in edges:
            if edge not in self.edges or self.multiedges:
                if isinstance(edge, str) and len(_edges) > 0:
                    self.edges.add(Edge(_edges[-1].w, Node(), uid=edge))
                else:
                    self.edges.add(edge, nodes=False)
            _edges.append(self.edges[edge])

        _path = _edges
        if _path not in self or self.multipaths:
            self._add_path(self._path_class(*_path, uid=uid, **kwargs))
        else:
            # raise error if node already exists
            self._if_exist(_path, **kwargs)
    def from_temporal_network(cls, temporal_network, **kwargs: Any):
        """Creates a time-unfolded directed acyclic graph."""

        delta: int = kwargs.get('delta', 1)

        dag = cls()

        # dictionary that maps time-unfolded nodes to actual nodes
        node_map = {}

        i = 0
        for uid, edge, begin, end in temporal_network.edges.temporal():
            # i += 1

            # if i == 300:
            #     break
            # create time-unfolded nodes v_t and w_{t+1}
            v_t = "{0}_{1}".format(edge.v.uid, begin)
            #node_map[v_t] = edge.v.uid

            # create one time-unfolded link for all delta in [1, delta]
            # this implies that for delta = 2 and an edge (a,b,1) two
            # time-unfolded links (a_1, b_2) and (a_1, b_3) will be created
            for x in range(1, int(delta) + 1):
                w_t = "{0}_{1}".format(edge.w.uid, begin + x)
                #node_map[w_t] = edge.w.uid
                if v_t not in dag.nodes:
                    dag.nodes._add(Node(v_t, original=edge.v))
                    #dag.add_node(v_t, original=edge.v)
                if w_t not in dag.nodes:
                    dag.nodes._add(Node(w_t, original=edge.w))
                    #dag.add_node(w_t, original=edge.w)

                e = Edge(dag.nodes[v_t], dag.nodes[w_t], original=edge)
                dag.edges._add(e)
        dag._add_edge_properties()
        #dag.add_edge(v_t, w_t , original=edge)

        return dag
Exemplo n.º 6
0
def from_dataframe(df: pd.DataFrame,
                   directed: bool = True,
                   loops: bool = True,
                   multiedges: bool = False,
                   **kwargs: Any) -> Network:
    """Reads a network from a pandas dataframe.

    By default, columns `v` and `w` will be used as source and target of
    edges. If no column 'v' or 'w' exists, the list of synonyms for `v` and
    `w`` in the config file will be used to remap columns, choosing the first
    matching entries. Any columns not used to create edges will be used as edge
    attributes, e.g. if a column 'v' is present and an additional column
    `source`is given, `source` will be assigned as an edge property.

    In addition, an optional column `uid` will be used to assign node uids. If
    this column is not present, default edge uids will be created.  Any other
    columns (e.g. weight, type, time, etc.) will be assigned as edge
    attributes. kwargs will be assigned as network attributes.

    Parameters
    ----------

    directed: bool

        Whether to generate a directed or undirected network.

    **kwargs: Any

        List of key-value pairs that will be assigned as network attributes

    Examples
    --------

    """

    # if no v/w columns are included, pick first synonym
    if 'v' not in df.columns:
        LOG.info('No column v, searching for synonyms')
        for col in df.columns:
            if col in config['edge']['v_synonyms']:
                LOG.info('Remapping column \'%s\' to \'v\'', col)
                df.rename(columns={col: "v"}, inplace=True)
                continue

    if 'w' not in df.columns:
        LOG.info('No column w, searching for synonyms')
        for col in df.columns:
            if col in config['edge']['w_synonyms']:
                LOG.info('Remapping column \'%s\' to \'w\'', col)
                df.rename(columns={col: "w"}, inplace=True)
                continue

            LOG.debug('Creating %s network', directed)

    net = Network(directed=directed, multiedges=multiedges, **kwargs)
    for row in df.to_dict(orient='records'):

        # get edge
        v = row.get('v', None)
        w = row.get('w', None)
        uid = row.get('uid', None)
        if v is None or w is None:
            LOG.error('DataFrame minimally needs columns \'v\' and \'w\'')
            raise IOError
        else:
            v = str(v)
            w = str(w)
        if v not in net.nodes.uids:
            net.add_node(v)
        if w not in net.nodes.uids:
            net.add_node(w)
        if uid is None:
            edge = Edge(net.nodes[v], net.nodes[w])
        else:
            edge = Edge(net.nodes[v], net.nodes[w], uid=uid)
        if loops or edge.v != edge.w:
            net.add_edge(edge)

        reserved_columns = set(['v', 'w', 'uid'])
        for k in row:
            if k not in reserved_columns:
                edge[k] = row[k]
    return net
Exemplo n.º 7
0
def read_graphml(filename: str):
    """Reads a pathyp.Network from a graphml file. This function supports typed Node and Edge attributes 
    including default values. 
    
    Warnings are issued if the type of Node or Edge attributes are undeclared,  in which case the attribute type will fall back to string.

    Parameters
    ----------

    filename: str
        The graphml file to read the graph from
    
    """
    root = ET.parse(filename).getroot()

    graph = root.find('{http://graphml.graphdrawing.org/xmlns}graph')
    directed = graph.attrib['edgedefault'] != 'undirected'
    uid = graph.attrib['id']
    n = Network(directed=directed, uid=uid)

    node_attributes = {}
    edge_attributes = {}

    # read attribute types and default values
    for a in root.findall('{http://graphml.graphdrawing.org/xmlns}key'):
        a_id = a.attrib['id']
        a_name = a.attrib['attr.name']
        a_type = a.attrib['attr.type']
        a_for = a.attrib['for']

        # store attribute info and assign data types
        a_data = {'name': a_name}
        if a_type == 'string':
            a_data['type'] = str
        elif a_type == 'float':
            a_data['type'] = float
        elif a_type == 'double':
            a_data['type'] = float
        elif a_type == 'int':
            a_data['type'] = int
        elif a_type == 'long':
            a_data['type'] = int
        elif a_type == 'boolean':
            a_data['type'] = bool
        else:
            a_data['type'] = str

        d = a.find('{http://graphml.graphdrawing.org/xmlns}default')
        if d is not None:
            a_data['default'] = a_data['type'](d.text)

        if a_for == 'node':
            node_attributes[a_name] = a_data
        if a_for == 'edge':
            edge_attributes[a_name] = a_data

    # add nodes with uids and attributes
    for node in graph.findall('{http://graphml.graphdrawing.org/xmlns}node'):
        # create node
        uid = node.attrib['id']
        v = Node(uid=uid)

        # set attribute values
        for a in node.findall('{http://graphml.graphdrawing.org/xmlns}data'):
            key = a.attrib['key']
            val = a.text
            if key not in node_attributes:
                LOG.warning(
                    'Undeclared Node attribute "{}". Defaulting to string type.'
                    .format(key))
                v.attributes[key] = val
            else:
                v.attributes[key] = node_attributes[key]['type'](val)

        # set default values
        for a_name in node_attributes:
            if 'default' in node_attributes[
                    a_name] and v.attributes[a_name] is None:
                v.attributes[a_name] = node_attributes[a_name]['default']
        n.add_node(v)

    # add edges with uids and attributes
    for edge in graph.findall('{http://graphml.graphdrawing.org/xmlns}edge'):
        # create edge
        source = edge.attrib['source']
        target = edge.attrib['target']
        uid = edge.attrib['id']
        e = Edge(n.nodes[source], n.nodes[target], uid=uid)

        # set attribute values
        for a in edge.findall('{http://graphml.graphdrawing.org/xmlns}data'):
            key = a.attrib['key']
            val = a.text
            if key not in edge_attributes:
                LOG.warning(
                    'Warning: Undeclared Edge attribute "{}". Defaulting to string type.'
                    .format(key))
                e.attributes[key] = val
            else:
                e.attributes[key] = edge_attributes[key]['type'](val)
        # set default values
        for a_name in edge_attributes:
            if 'default' in edge_attributes[
                    a_name] and e.attributes[a_name] is None:
                e.attributes[a_name] = edge_attributes[a_name]['default']
        n.add_edge(e)
    return n
Exemplo n.º 8
0
def _bw_hon(self: HigherOrderNetwork, normalized: bool = False) -> Dict:
    """Betweenness Centrality for Networks."""

    from pathpy.core.edge import Edge
    from pathpy.core.path import Path

    LOG.debug('Calculating betweenness (order k = %s) ...', self.order)

    all_paths = shortest_paths.all_shortest_paths(
        self, weight=False, return_distance_matrix=False)

    bw: defaultdict = defaultdict(float)

    lengths: defaultdict = defaultdict(
        lambda: defaultdict(lambda: float('inf')))
    paths: defaultdict = defaultdict(lambda: defaultdict(set))

    for path_1_order_k in all_paths:
        for path_2_order_k in all_paths:
            for path_order_k in all_paths[path_1_order_k][path_2_order_k]:
                nodes = []
                for node in path_order_k:
                    nodes.append(self.nodes[node].nodes)

                path = nodes[0]
                for node in nodes[1:]:
                    path.append(node[-1])

                edges = []
                for _v, _w in zip(path[:-1], path[1:]):
                    edges.append(Edge(_v, _w))

                if edges:
                    path = Path(*edges)
                    s1 = path.start
                    t1 = path.end

                    if len(path) < lengths[s1][t1]:
                        lengths[s1][t1] = len(path)
                        paths[s1][t1] = set()
                        paths[s1][t1].add(path)
                    elif len(path) == lengths[s1][t1]:
                        paths[s1][t1].add(path)

    for s_order_1 in paths:
        for t_order_1 in paths[s_order_1]:
            for path_order_1 in paths[s_order_1][t_order_1]:
                for node in path_order_1.nodes[1:-1]:
                    if s_order_1 != node != t_order_1:
                        bw[node.uid] += 1.0 / len(paths[s_order_1][t_order_1])

    # assign zero values to nodes not occurring on shortest paths
    for v in self.nodes.nodes.keys():
        bw[v] += 0

    if normalized:
        max_centr = max(bw.values())
        min_centr = min(bw.values())
        for v in bw:
            bw[v] = (bw[v] - min_centr) / (max_centr - min_centr)

    return bw
Exemplo n.º 9
0
    def generate(self, order: int = 1) -> HigherOrderNetwork:
        """Generate a null model."""

        # TODO: Add null model for order 1

        if order == 0:
            return HigherOrderNetwork(self.network, order=0)

        if order == 1:
            return HigherOrderNetwork(self.network, order=1)

        # some information for debugging
        log.debug('start generate null model')
        a = datetime.datetime.now()

        # generate all possible paths
        possible_paths = self.possible_paths(order=order)

        # get observed paths
        observed = self.network.subpaths.counter(min_length=order - 1,
                                                 max_length=order - 1)

        # get transition matrix of the underlying network
        transition_matrix = self.network.transition_matrix(
            weight=config['attributes']['frequency'])

        # get the ordered node uids of the underlying network as a list
        nodes = list(self.network.nodes)

        # generate hon with possible paths
        hon = HigherOrderNetwork(order=order)

        for path in possible_paths:

            # generate "empty" higher order nodes
            v = HigherOrderNode()
            w = HigherOrderNode()

            # add first order edges to the higher oder nodes
            for v_uid, w_uid in zip(path[:-1], path[1:]):
                v.add_edge(self.network.edges[v_uid])
                w.add_edge(self.network.edges[w_uid])

            # generate the expected frequencies of all possible paths
            uid = self.network.separator['path'].join(path[:-1])
            frequency = 0
            if uid in observed:
                frequency = observed[uid] * transition_matrix[
                    nodes.index(w.as_nodes[-2]),
                    nodes.index(w.as_nodes[-1])]

            # add higher order nodes to the hon
            # TODO: use automatically hon separator
            e = Edge(v, w, separator=hon.separator['hon'])
            hon.add_path(Path.from_edges([e], frequency=frequency))
            # hon.add_edge(Edge(v, w, separator=hon.separator['hon']),
            #              frequency=frequency)

        # some information for debugging
        b = datetime.datetime.now()
        log.debug('end generate null model:' +
                  ' {} seconds'.format((b - a).total_seconds()))

        # safe hon in class and order
        hon.network = self.network
        self.hon = hon
        self.order = order

        # return null model
        return hon
Exemplo n.º 10
0
def read_pathcollection(filename: str,
                        separator: str = ',',
                        frequency: bool = False,
                        directed: bool = True,
                        maxlines: int = None) -> PathCollection:
    """Read path in edgelist format

    Reads data from a file containing multiple lines of *edges* of the form
    "v,w,frequency,X" (where frequency is optional and X are arbitrary
    additional columns). The default separating character ',' can be changed.

    Parameters
    ----------
    filename : str
        path to edgelist file
    separator : str
        character separating the nodes
    frequency : bool
        is a frequency given? if ``True`` it is the last element in the
        edge (i.e. ``a,b,2``)
    directed : bool
        are the edges directed or undirected
    maxlines : int
        number of lines to read (useful to test large files).
        None means the entire file is read

    """

    from pathpy.core.path import Path, PathCollection

    nodes: dict = {}
    edges: dict = {}
    paths: dict = {}

    with open(filename, 'r') as csv:
        for n, line in enumerate(csv):
            fields = line.rstrip().split(separator)
            assert len(fields) >= 1, 'Error: empty line: {0}'.format(line)

            if frequency:
                path = tuple(fields[:-1])
                freq = float(fields[-1])
            else:
                path = tuple(fields)
                freq = 1.0

            for node in path:
                if node not in nodes:
                    nodes[node] = Node(node)

            if len(path) == 1 and path not in paths:
                paths[path] = Path(nodes[path[0]], frequency=freq)

            else:
                edge_list = []
                for u, v in zip(path[:-1], path[1:]):
                    if (u, v) not in edges:
                        edges[(u, v)] = Edge(nodes[u], nodes[v])
                    edge_list.append(edges[(u, v)])

                if path not in paths:
                    paths[path] = Path(*edge_list, frequency=freq)

            if maxlines is not None and n >= maxlines:
                break

    ncoll = NodeCollection()
    for node in nodes.values():
        ncoll.add(node)

    ecoll = EdgeCollection(nodes=ncoll)
    for edge in edges.values():
        ecoll._add(edge)

    _paths = PathCollection(directed=directed, nodes=ncoll, edges=ecoll)

    for _path in paths.values():
        _paths._add(_path)

    return _paths
Exemplo n.º 11
0
    def read_file(cls,
                  filename: str,
                  separator: str = ',',
                  frequency: bool = False,
                  directed: bool = True,
                  maxlines: int = None) -> None:
        """
        Read path in edgelist format

        Reads data from a file containing multiple lines of *edges* of the
        form "v,w,frequency,X" (where frequency is optional and X are
        arbitrary additional columns). The default separating character ','
        can be changed.

        Parameters
        ----------
        filename : str
            path to edgelist file
        separator : str
            character separating the nodes
        frequency : bool
            is a frequency given? if ``True`` it is the last element in the
            edge (i.e. ``a,b,2``)
        directed : bool
            are the edges directed or undirected
        maxlines : int
            number of lines to read (useful to test large files).
            None means the entire file is read
        """
        nodes = {}
        edges = {}
        paths = {}

        with open(filename, 'r') as f:
            for n, line in enumerate(f):
                fields = line.rstrip().split(separator)
                assert len(fields) >= 2, 'Error: malformed line: {0}'.format(
                    line)

                if frequency:
                    path = tuple(fields[:-1])
                    f = int(fields[-1])
                else:
                    path = tuple(fields)
                    f = 1

                for node in path:
                    if node not in nodes:
                        nodes[node] = Node(node)

                edge_list = []
                for u, v in zip(path[:-1], path[1:]):
                    if (u, v) not in edges:
                        edges[(u, v)] = Edge(nodes[u],
                                             nodes[v],
                                             uid=u + '-' + v)
                    edge_list.append(edges[(u, v)])

                if path not in paths:
                    paths[path] = Path(*edge_list, frequency=f)

                if maxlines is not None and n >= maxlines:
                    break

        nc = NodeCollection()
        nc.add(*nodes.values())

        ec = EdgeCollection(nodes=nc)
        for edge in edges.values():
            ec._add(edge)

        p = PathCollection(nodes=nc, edges=ec)

        for path in paths.values():
            p._add(path)

        return p