Пример #1
0
def read_pathcollection(filename: str,
                        separator: str = ',',
                        frequency: bool = False,
                        directed: bool = True,
                        maxlines: int = None) -> PathCollection:
    """Read path in edgelist format

    Reads data from a file containing multiple lines of *edges* of the form
    "v,w,frequency,X" (where frequency is optional and X are arbitrary
    additional columns). The default separating character ',' can be changed.

    Parameters
    ----------
    filename : str
        path to edgelist file
    separator : str
        character separating the nodes
    frequency : bool
        is a frequency given? if ``True`` it is the last element in the
        edge (i.e. ``a,b,2``)
    directed : bool
        are the edges directed or undirected
    maxlines : int
        number of lines to read (useful to test large files).
        None means the entire file is read

    """

    from pathpy.core.path import Path, PathCollection

    nodes: dict = {}
    edges: dict = {}
    paths: dict = {}

    with open(filename, 'r') as csv:
        for n, line in enumerate(csv):
            fields = line.rstrip().split(separator)
            assert len(fields) >= 1, 'Error: empty line: {0}'.format(line)

            if frequency:
                path = tuple(fields[:-1])
                freq = float(fields[-1])
            else:
                path = tuple(fields)
                freq = 1.0

            for node in path:
                if node not in nodes:
                    nodes[node] = Node(node)

            if len(path) == 1 and path not in paths:
                paths[path] = Path(nodes[path[0]], frequency=freq)

            else:
                edge_list = []
                for u, v in zip(path[:-1], path[1:]):
                    if (u, v) not in edges:
                        edges[(u, v)] = Edge(nodes[u], nodes[v])
                    edge_list.append(edges[(u, v)])

                if path not in paths:
                    paths[path] = Path(*edge_list, frequency=freq)

            if maxlines is not None and n >= maxlines:
                break

    ncoll = NodeCollection()
    for node in nodes.values():
        ncoll.add(node)

    ecoll = EdgeCollection(nodes=ncoll)
    for edge in edges.values():
        ecoll._add(edge)

    _paths = PathCollection(directed=directed, nodes=ncoll, edges=ecoll)

    for _path in paths.values():
        _paths._add(_path)

    return _paths
Пример #2
0
    def read_file(cls,
                  filename: str,
                  separator: str = ',',
                  frequency: bool = False,
                  directed: bool = True,
                  maxlines: int = None) -> None:
        """
        Read path in edgelist format

        Reads data from a file containing multiple lines of *edges* of the
        form "v,w,frequency,X" (where frequency is optional and X are
        arbitrary additional columns). The default separating character ','
        can be changed.

        Parameters
        ----------
        filename : str
            path to edgelist file
        separator : str
            character separating the nodes
        frequency : bool
            is a frequency given? if ``True`` it is the last element in the
            edge (i.e. ``a,b,2``)
        directed : bool
            are the edges directed or undirected
        maxlines : int
            number of lines to read (useful to test large files).
            None means the entire file is read
        """
        nodes = {}
        edges = {}
        paths = {}

        with open(filename, 'r') as f:
            for n, line in enumerate(f):
                fields = line.rstrip().split(separator)
                assert len(fields) >= 2, 'Error: malformed line: {0}'.format(
                    line)

                if frequency:
                    path = tuple(fields[:-1])
                    f = int(fields[-1])
                else:
                    path = tuple(fields)
                    f = 1

                for node in path:
                    if node not in nodes:
                        nodes[node] = Node(node)

                edge_list = []
                for u, v in zip(path[:-1], path[1:]):
                    if (u, v) not in edges:
                        edges[(u, v)] = Edge(nodes[u],
                                             nodes[v],
                                             uid=u + '-' + v)
                    edge_list.append(edges[(u, v)])

                if path not in paths:
                    paths[path] = Path(*edge_list, frequency=f)

                if maxlines is not None and n >= maxlines:
                    break

        nc = NodeCollection()
        nc.add(*nodes.values())

        ec = EdgeCollection(nodes=nc)
        for edge in edges.values():
            ec._add(edge)

        p = PathCollection(nodes=nc, edges=ec)

        for path in paths.values():
            p._add(path)

        return p