Пример #1
0
def read_pgf(pgf_file, graph=None, pickle_graph=False):
    """
    Import graph from Graph Python Format file

    PGF format is the modules own file format consisting of a serialized
    graph data, nodes and edges dictionaries. Import either as plain text
    serialized dictionary or pickled graph object.
    The format is feature rich with good performance but is not portable.

    :param pgf_file:      PGF data to parse
    :type pgf_file:       File, string, stream or URL
    :param graph:         Graph object to import to or Graph by default
    :type graph:          :graphit:Graph
    :param pickle_graph:  PGF format is a pickled graph
    :type pickle_graph:   :py:bool

    :return:              Graph instance
    :rtype:               :graphit:Graph
    """

    # Unpickle pickled PGF format
    if pickle_graph:
        pgf_file = open_anything(pgf_file, mode='rb')
        pgraph = pickle.load(pgf_file)

        # Transfer data from unpickled graph to graph if defined
        if graph:
            graph.origin.nodes, graph.origin.edges, graph.origin.adjacency, graph.origin.data = graph.storagedriver(
                pgraph.nodes, pgraph.edges, pgraph.data)
            return graph
        return pgraph

    pgf_file = open_anything(pgf_file)

    # Import graph from serialized Graph Python Format
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    pgf_eval = ast.literal_eval(pgf_file.read())
    if not isinstance(pgf_eval, dict):
        raise GraphitException('Invalid PGF file format')

    missing_data = [d for d in pgf_file if d not in ('data', 'nodes', 'edges')]
    if missing_data:
        raise GraphitException(
            'Invalid PGF file format, missing required attributes: {0}'.format(
                ','.join(missing_data)))

    graph.origin.nodes, graph.origin.edges, graph.origin.adjacency, graph.origin.data = graph.storagedriver(
        pgf_eval['nodes'], pgf_eval['edges'], pgf_eval['data'])

    return graph
Пример #2
0
def read_json(json_file, graph=None, **kwargs):
    """
    Parse (hierarchical) JSON data structure to a graph
    
    Use the default Python json parser to parse the JSON file to a dictionary
    followed by io_dict_format.read_pydata to parse to a graph structure.

    Additional keyword arguments (kwargs) are passed to `read_pydata`

    :param json_file:      json data to parse
    :type json_file:       File, string, stream or URL
    :param graph:          Graph object to import dictionary data in
    :type graph:           :graphit:Graph
    
    :return:               GraphAxis object
    :rtype:                :graphit:GraphAxis
    """

    # Try parsing the string using default Python json parser
    json_file = open_anything(json_file)
    try:
        json_file = json.load(json_file)
    except IOError:
        logger.error('Unable to decode JSON string')
        return

    return read_pydata(json_file, graph=graph, **kwargs)
Пример #3
0
def read_xml(xml_file, graph=None):
    """
    Parse hierarchical XML data structure to a graph
    
    Uses the Python build-in etree cElementTree parser to parse the XML
    document and convert the elements into nodes.
    The XML element tag becomes the node key, XML text becomes the node
    value and XML attributes are added to the node as additional attributes.
    
    :param xml_file:       XML data to parse
    :type xml_file:        File, string, stream or URL
    :param graph:          Graph object to import dictionary data in
    :type graph:           :graphit:Graph
    
    :return:               GraphAxis object
    :rtype:                :graphit:GraphAxis
    """

    # User defined or default GraphAxis object
    if graph is None:
        graph = GraphAxis()
    if not isinstance(graph, GraphAxis):
        raise TypeError('Unsupported graph type {0}'.format(type(graph)))

    # Try parsing the string using default Python cElementTree parser
    xml_file = open_anything(xml_file)
    try:
        tree = et.fromstring(xml_file.read())
    except et.ParseError as error:
        logging.error(
            'Unable to parse XML file. cElementTree error: {0}'.format(error))
        return

    def walk_element_tree(element, parent=None):

        for child in element:
            child_data = child.attrib
            if child.text and len(child.text.strip()):
                child_data[graph.value_tag] = child.text.strip()

            nid = graph.add_node(child.tag, **child_data)
            graph.add_edge(parent, nid)

            walk_element_tree(child, parent=nid)

    is_empty = graph.empty()

    # Add root element
    element_data = tree.attrib
    if tree.text and len(tree.text.strip()):
        element_data[graph.value_tag] = tree.text.strip()
    rid = graph.add_node(tree.tag, **element_data)

    if is_empty:
        graph.root = rid

    # Recursive add XML elements as nodes
    walk_element_tree(tree, parent=graph.root)

    return graph
Пример #4
0
def read_gml(gml, graph=None):
    """
    Read graph in GML format

    :param gml:             GML graph data.
    :type gml:              File, string, stream or URL
    :param graph:           Graph object to import GML data in
    :type graph:            :graphit:Graph

    :return:                Graph object
    :rtype:                 :graphit:Graph
    """

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(type(graph)))

    # Parse GML into nested structure of Record class instances
    gml_stream = StreamReader(open_anything(gml))
    records = Record(gml_stream, name='root')

    gml_graphs = [g for g in records if g.name == 'graph']
    if len(gml_graphs) > 1:
        logging.warning("GML file contains {0} 'graph' objects. Only parse first".format(len(gml_graphs)))
    gml_graph_record = gml_graphs[0]

    # GML node and edge labels are unique, turn off auto_nid
    graph.data['auto_nid'] = False

    # Set graph meta-data and attributes
    graph_attr = gml_graph_record.to_dict({})
    graph.directed = True
    if 'directed' in graph_attr:
        directed = graph_attr.pop('directed')
        graph.directed = True if directed == 1 else False
        graph.data['directed'] = graph.directed

    graph.data.update(graph_attr)

    # Build graph from records
    build_nodes(graph, gml_graph_record)
    build_edges(graph, gml_graph_record)

    return graph
Пример #5
0
def read_yaml(yaml_file, graph=None, **kwargs):
    """
    Parse (hierarchical) YAML data structure to a graph

    Additional keyword arguments (kwargs) are passed to `read_pydata`
    
    :param yaml_file:      yaml data to parse
    :type yaml_file:       File, string, stream or URL
    :param graph:          Graph object to import dictionary data in
    :type graph:           :graphit:Graph
    
    :return:               GraphAxis object
    :rtype:                :graphit:GraphAxis
    """

    # Try parsing the string using default Python yaml parser
    yaml_file = open_anything(yaml_file)
    try:
        yaml_file = yaml.safe_load(yaml_file)
    except IOError:
        logger.error('Unable to decode YAML string')
        return

    if not isinstance(yaml_file, list):
        yaml_file = [yaml_file]

    base_graph = read_pydata(yaml_file[0], graph=graph, **kwargs)

    for yaml_object in yaml_file[1:]:
        sub_graph = read_pydata(yaml_object)

        # If sub-graph root is of type 'root', connect children to base_graph
        root = sub_graph.getnodes(sub_graph.root)
        if root[sub_graph.key_tag] == 'root':
            links = [(base_graph.root, child)
                     for child in root.children(return_nids=True)]
        else:
            links = [(base_graph.root, sub_graph.root)]

        graph_join(base_graph, sub_graph, links=links)

    return base_graph
Пример #6
0
def read_adl(adl_file, graph=None):
    """
    Construct a graph from a adjacency list (ADL)

    .. note:: the directionality of the graph is not defined explicitly
              in the adjacency list and thus depends on the graph.directional
              attribute that is False (undirectional) by default.

    :param adl_file:        ADL graph data.
    :type adl_file:         File, string, stream or URL
    :param graph:           Graph object to import ADL data in
    :type graph:            :graphit:Graph

    :return:                Graph object
    :rtype:                 :graphit:Graph
    """

    adl_file = open_anything(adl_file)

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(type(graph)))

    # ADL node labels are unique, turn off auto_nid
    graph.data['auto_nid'] = False

    for line in adl_file.readlines():

        # Ignore comments (# ..)
        line = line.split('#')[0].strip()
        if line:

            nodes = line.split()
            graph.add_nodes(nodes)
            if len(nodes) > 1:
                graph.add_edges([(nodes[0], n) for n in nodes[1:]])

    return graph
Пример #7
0
def read_tgf(tgf, graph=None, key_tag=None):
    """
    Read graph in Trivial Graph Format
    
    TGF format dictates that nodes to be listed in the file first with each
    node on a new line. A '#' character signals the end of the node list and
    the start of the edge list.
    
    Node and edge ID's can be integers, float or strings. They are parsed
    automatically to their most likely format.
    Simple node and edge labels are supported in TGF as all characters that
    follow the node or edge ID's. They are parsed and stored in the Graph
    node and edge data stores using the graphs default or custom 'key_tag'.
    
    TGF data is imported into a default Graph object if no custom Graph
    instance is provided. The graph behaviour and the data import process is
    influenced and can be controlled using a (custom) Graph class.
    
    .. note:: TGF format always defines edges in a directed fashion.
              This is enforced even for custom graphs.
    
    :param tgf:             TGF graph data.
    :type tgf:              File, string, stream or URL
    :param graph:           Graph object to import TGF data in
    :type graph:            :graphit:Graph
    :param key_tag:         Data key to use for parsed node/edge labels.
    :type key_tag:          :py:str
    
    :return:                Graph object
    :rtype:                 :graphit:Graph
    """

    tgf_file = open_anything(tgf)
    if not isinstance(graph, Graph):
        graph = Graph()

    # Define node/edge data labels
    if key_tag:
        graph.key_tag = key_tag

    # TGF defines edges in a directed fashion. Enforce but restore later
    default_directionality = graph.directed
    graph.directed = True

    # TGF node and edge labels are unique, turn off auto_nid
    graph.auto_nid = False

    # Start parsing. First extract nodes
    nodes = True
    node_dict = {}
    for line in tgf_file.readlines():

        line = line.strip()
        if len(line):

            # Reading '#' character means switching from node
            # definition to edges
            if line.startswith('#'):
                nodes = False
                continue

            # Coarse string to types
            line = [coarse_type(n) for n in line.split()]

            # Parse nodes
            if nodes:

                attr = {}
                # Has node data
                if len(line) > 1:
                    attr = {graph.key_tag: ' '.join(line[1:])}
                nid = graph.add_node(line[0], **attr)
                node_dict[line[0]] = nid

            # Parse edges
            else:
                e1 = node_dict[line[0]]
                e2 = node_dict[line[1]]

                attr = {}
                # Has edge data
                if len(line) > 2:
                    attr = {graph.key_tag: ' '.join(line[2:])}
                graph.add_edge(e1, e2, **attr)

    tgf_file.close()

    # Restore directionality
    graph.directed = default_directionality

    return graph
Пример #8
0
def read_jgf(jgf_format, graph=None):
    """
    Read JSON graph format (.jgf)

    This is a propitiatory format in which the graph meta-data, the nodes,
    edges and their data dictionaries are stored in JSON format.

    Format description. Primary key/value pairs:
    * graph: Graph class meta-data. Serializes all class attributes of type
             int, float, bool, long, str or unicode.
    * nodes: Graph node identifiers (keys) and attributes (values)
    * edges: Graph enumerated edge identifiers
    * edge_attr: Graph edge attributes

    :param jgf_format:  JSON encoded graph data to parse
    :type jgf_format:   :py:str
    :param graph:       Graph object to import TGF data in
    :type graph:        :graphit:Graph

    :return:            Graph object
    :rtype:             Graph or GraphAxis object
    """

    # Try parsing the string using default Python json parser
    if isinstance(jgf_format, dict):
        parsed = jgf_format
    else:
        jgf_format = open_anything(jgf_format)
        try:
            parsed = json.load(jgf_format)
        except IOError:
            logger.error('Unable to decode JSON string')
            return

    # Check graphit version and format validity
    if not check_graphit_version(parsed['data'].get('graphit_version')):
        return
    keywords = ['graph', 'data', 'nodes', 'edges', 'edge_attr']
    if not set(keywords).issubset(set(parsed.keys())):
        logger.error('JSON format does not contain required graph data')
        return

    # User defined or default Graph object
    if graph is None:
        if parsed['graph'].get('root') is not None:
            graph = GraphAxis(data=parsed['data'])
        else:
            graph = Graph(data=parsed['data'])
    elif not isinstance(graph, (Graph, GraphAxis)):
        raise GraphitException('Unsupported graph type {0}'.format(type(graph)))

    # Init graph meta-data attributes
    for key, value in parsed['graph'].items():
        setattr(graph, key, value)

    # Init graph nodes
    for node_key, node_value in parsed['nodes'].items():

        # JSON objects don't accept integers as dictionary keys
        # If graph.auto_nid equals True, course node_key to integer
        if graph.data.auto_nid:
            node_key = int(node_key)

        graph.nodes[node_key] = node_value

    # Init graph edges
    for edge_key, edge_value in parsed['edges'].items():
        edge_value = tuple(edge_value)
        graph.edges[edge_value] = parsed['edge_attr'].get(edge_key, {})

    # Set auto nid
    graph._set_auto_nid()

    return graph
Пример #9
0
def read_json_schema(schema, graph=None, exclude_args=None, resolve_ref=True):
    """
    Import hierarchical data structures defined in a JSON schema format

    :param schema:            JSON Schema data format to import
    :type schema:             dict, file, string, stream or URL
    :param graph:             graph object to import TGF data in
    :type graph:              :graphit:Graph
    :param exclude_args:      JSON schema arguments to exclude from import
    :type exclude_args:       :py:list
    :param resolve_ref:       Parse JSON schema 'definitions'
    :type resolve_ref:        :py:bool

    :return:                  Graph object
    :rtype:                   :graphit:Graph
    """

    json_schema = schema
    if not isinstance(schema, dict):

        # Try parsing the string using default Python json parser
        json_schema = open_anything(schema)
        try:
            json_schema = json.load(json_schema)
        except (IOError, ValueError) as error:
            logger.error('Unable to decode JSON string: {0}'.format(error))
            return

    # User defined or default Graph object
    if graph is None:
        graph = GraphAxis()
    elif not isinstance(graph, GraphAxis):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    if graph.empty():
        rid = graph.add_node('root')
        graph.root = rid

    # Build JSON schema parser ORM with format specific conversion classes
    graph.node_tools = JSONSchemaValidatorDraft07
    graph.orm = JSONSchemaORMDraft07

    # What data-blocks to parse, properties by default, definitions if required
    datablock = ['properties']
    if resolve_ref:
        datablock.append('definitions')

    if exclude_args is None:
        exclude_args = []

    def walk_schema(schema_block, parent=None):

        # Get all JSON schema definitions for this data instance
        attributes = dict([(k, v) for k, v in schema_block.items()
                           if not isinstance(v, dict) and k not in exclude_args
                           ])
        node = graph.getnodes(parent)
        node.update(attributes)

        # Get 'required' attribute
        required = schema_block.get('required', [])
        if not isinstance(required, list):
            required = []

        # Store default data or None
        if attributes.get('default') is not None:
            node.set(graph.data.value_tag, attributes.get('default'))

        # For all child elements in datablock, make new node
        # and parse using recursive calls to parse_schema
        for block in schema_block.keys():
            if block in datablock:
                for child, attr in schema_block[block].items():
                    nid = graph.add_node(child)

                    # Register block_name in child attributes
                    attr['schema_label'] = block

                    # Register 'required' elements
                    if child in required:
                        attr['required'] = True

                    graph.add_edge(parent, nid)
                    walk_schema(attr, parent=nid)

    walk_schema(json_schema, graph.root)

    # Parse schema meta data
    document_path = ''
    if isinstance(schema, PY_STRING):
        document_path = os.path.abspath(schema)

    root = graph.get_root()
    root.set('document_path', document_path)
    parse_schema_meta_data(root)

    # Resolve JSON Schema $ref
    if resolve_ref:
        resolve_json_ref(graph, exclude_args=exclude_args)

    return graph
Пример #10
0
def read_web(web,
             graph=None,
             orm_data_tag='haddock_type',
             auto_parse_format=True):
    """
    Import hierarchical data structures defined in the Spider .web format

    The data block type identifiers used in the .web format are stored in
    the nodes using the `orm_data_tag` attribute. These can be used by the
    Graph ORM mapper for custom data exchange in the graph.

    :param web:               Spider .web data format to import
    :type web:                file, string, stream or URL
    :param graph:             graph object to import TGF data in
    :type graph:              :graphit:Graph
    :param orm_data_tag:      data key to use for .web data identifier
    :type orm_data_tag:       :py:str
    :param auto_parse_format: automatically detect basic format types using JSON decoding
    :type auto_parse_format:  :py:bool

    :return:                  Graph object
    :rtype:                   :graphit:Graph
    """

    web_file = open_anything(web)
    if graph is None:
        graph = GraphAxis()
    elif not isinstance(graph, GraphAxis):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    # Build .web parser ORM with format specific conversion classes
    weborm = GraphORM()
    weborm.node_mapping.add(
        RestraintsInterface,
        lambda x: x.get(graph.data.key_tag) == 'activereslist')
    weborm.node_mapping.add(
        RestraintsInterface,
        lambda x: x.get(graph.data.key_tag) == 'passivereslist')

    # Set current ORM aside and register parser ORM.
    curr_orm = graph.orm
    graph.orm = weborm

    curr_obj_nid = None
    object_open_tags = 0
    object_close_tags = 0
    array_key_counter = 1
    array_store = []
    for i, line in enumerate(web_file.readlines()):
        line = line.strip()
        if len(line):

            # Detect start of new object definition
            if line.endswith('('):

                # Process data
                meta_data = [n.strip() for n in line.strip('(').split('=', 1)]
                ddict = {orm_data_tag: meta_data[-1], 'is_array': False}
                if len(meta_data) > 1:
                    node_key = meta_data[0]
                else:
                    node_key = 'item{0}'.format(array_key_counter)
                    ddict['is_array'] = True
                    array_key_counter += 1

                # Clear the array store
                array_store = []

                # First object defines graph root
                if graph.empty():
                    curr_obj_nid = graph.add_node(node_key, **ddict)
                    graph.root = curr_obj_nid

                # Add new object as child of current object
                else:
                    child_obj_nid = graph.add_node(node_key, **ddict)
                    graph.add_edge(curr_obj_nid, child_obj_nid)
                    curr_obj_nid = child_obj_nid

                object_open_tags += 1

            # Detect end of object definition
            elif line.startswith(')'):

                # If there is data in the array store, add it to node
                if len(array_store):
                    array_node = graph.getnodes(curr_obj_nid)
                    array_node.is_array = True
                    array_node.set(graph.data.value_tag, array_store)

                # Reset array key counter
                array_key_counter = 1

                # Move one level up the object three
                curr_obj_nid = node_parent(graph, curr_obj_nid,
                                           graph.root) or graph.root
                object_close_tags += 1

            # Parse object parameters
            else:

                # Parse key,value pairs and add as leaf node
                params = [n.strip() for n in line.rstrip(',').split('=', 1)]

                if '=' in line and len(params) == 2:
                    leaf_nid = graph.add_node(params[0])
                    graph.add_edge(curr_obj_nid, leaf_nid)

                    value = params[1]
                    if auto_parse_format:
                        value = json_decode_params(params[1])

                    leaf_node = graph.getnodes(leaf_nid)
                    leaf_node.set(graph.data.value_tag, value)

                # Parse single values as array data
                elif len(params) == 1:

                    value = params[0]
                    if auto_parse_format:
                        value = json_decode_params(params[0])

                    # Store array items as nodes
                    array_store.append(value)

                else:
                    logger.warning(
                        'Unknown .web data formatting on line: {0}, {1}'.
                        format(i, line))

    web_file.close()

    # Object blocks opening '(' and closing ')' tag count should be balanced
    if object_open_tags != object_close_tags:
        raise AssertionError(
            'Unbalanced object block, something is wrong with the file format')

    # Restore original ORM
    graph.orm = curr_orm

    # Root is of type 'array', rename key from 'item1' to 'project'
    root = graph.getnodes(graph.root)
    root.key = 'project'

    return graph
Пример #11
0
def read_dot(dot, graph=None):
    """
    Read graph in DOT format

    :param dot:             DOT graph data.
    :type dot:              File, string, stream or URL
    :param graph:           Graph object to import DOT data in
    :type graph:            :graphit:Graph

    :return:                Graph object
    :rtype:                 :graphit:Graph
    """

    dot_stream = StreamReader(open_anything(dot))

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(type(graph)))

    block = None
    node_attr = {}
    edges = []
    nodes = []

    parse = True
    init_graph = False
    while parse or dot_stream.has_more:

        # Parse start graph block
        if not init_graph:
            graph = parse_graph_type(dot_stream, graph)
            if graph is not None:
                init_graph = True

        # Parse up to DOT reserved chars
        line, char = dot_stream.read_upto_char(('\n', ';', '[', ']', '}'))
        line = line.strip() if line else ''

        if line:

            # Comment line
            if line[0] in ('/', '#'):
                logging.info('Skip DOT comment line: "{0}"'.format(line))

                # Read till end of line
                if char != '\n':
                    dot_stream.read_upto_char('\n')

            # Grouping not supported
            elif line[0] == '{':
                nxt_line, nxt_char = dot_stream.read_upto_char('}')
                logging.info('Skip group: {0}{1}{2}{3}'.format(line, char, nxt_line, nxt_char))

            # Subgraphs
            elif 'subgraph' in line:
                block = 'subgraph'
                node_attr[block] = shlex.split(line)[1]

            # Node attribute block
            elif 'node' in line:
                block = 'node'
                node_attr = {}

            # Parse edges
            elif '--' in line or '->' in line:

                attr = {}
                if char == '[':
                    attr = parse_attributes(dot_stream.read_upto_char(']')[0])
                edges.extend(parse_edge(line, graph, attr=attr))

            else:
                if '=' in line:
                    if block in ('subgraph', 'node'):
                        node_attr.update(parse_attributes(line))
                    else:
                        graph.data.update(parse_attributes(line))
                else:
                    nodes.extend(parse_nodes(line, graph))

        elif (char == '}' and block == 'subgraph') or block == 'node':
            logging.info('Stop parsing {0} group at position: {1}'.format(block, dot_stream.block_pos[1]))

            nodes.extend(list(set(sum(edges, ()))))
            for node in nodes:
                graph.nodes[node].update(node_attr)

            node_attr = {}
            edges = []
            nodes = []
            block = None

        else:
            parse = False

    return graph
Пример #12
0
def read_lgf(lgf, graph=None):
    """
    Read graph in LEMON Graph Format (LGF)

    :param lgf:             LGF graph data.
    :type lgf:              File, string, stream or URL
    :param graph:           Graph object to import LGF data in
    :type graph:            :graphit:Graph

    :return:                Graph object
    :rtype:                 :graphit:Graph
    """

    lgf_file = open_anything(lgf)

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    # LGF node and edge labels are unique, turn off auto_nid
    graph.data['auto_nid'] = False

    parser = None
    header = None
    did_parse_nodes = False
    is_directed = False
    for line in lgf_file.readlines():
        line = line.strip()

        # Skip empty lines and comment lines
        if not len(line) or line.startswith('#'):
            parser = None
            continue

        # Define parser
        if line.startswith('@') or parser is None:
            if 'nodes' in line:
                parser = parse_nodes
                did_parse_nodes = True
            elif line.startswith('@edges'):
                parser = parse_edges
            elif line.startswith('@arcs'):
                parser = parse_arcs
                is_directed = True
            elif line.startswith('@attributes'):
                logging.warning(
                    'Not importing LGF @attributes. Graph attributes not supported by graphit'
                )
            header = None
            continue

        # Immediately after parser definition, parse table column headers
        if header is None:
            header = split_line(line)
            continue

        parser(line, header, graph, did_parse_nodes=did_parse_nodes)

    # Set graph to 'directed' if arcs where parsed
    if is_directed:
        graph.directed = True

    return graph
Пример #13
0
def read_p2g(p2g_file, graph=None):
    """
    Read graph in P2G format

    :param p2g_file:      P2G data to parse
    :type p2g_file:       File, string, stream or URL
    :param graph:         Graph object to import to or Graph by default
    :type graph:          :graphit:Graph

    :return:              Graph instance
    :rtype:               :graphit:Graph
    """

    p2g_file = open_anything(p2g_file)

    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    # P2G graphs are directed
    graph.directed = True

    graph_name = None
    graph_layout = None
    curr_node = None
    nodes = {}
    for i, line in enumerate(p2g_file.readlines()):

        line = line.strip()
        if line:

            # Parse p2g graph name (first line)
            sline = line.split()
            if not graph_name:
                graph_name = line
                continue

            # Parse number of nodes and edges (second line)
            elif not graph_layout:
                try:
                    graph_layout = map(int, sline)
                except ValueError:
                    raise GraphitException(
                        'P2G import error: line {0} - {1}'.format(i, line))
                continue

            # Parse nodes and edges
            if len(sline) == 1:
                nodes[line] = []
                curr_node = line
            elif len(sline) == 2:
                try:
                    nodes[curr_node] = map(int, sline)
                except ValueError:
                    raise GraphitException(
                        'P2G import error: malformed edge on line {0} - {1}'.
                        format(i, line))
            else:
                raise GraphitException(
                    'P2G import error: line {0} - {1}'.format(i, line))

    graph.data['name'] = graph_name

    # Add nodes
    mapped_nodes = graph.add_nodes(nodes.keys())

    # Add edges
    for i, nid in enumerate(nodes.keys()):
        for e in nodes[nid]:
            if e < len(mapped_nodes):
                graph.add_edge(mapped_nodes[i], mapped_nodes[e])
            else:
                raise GraphitException(
                    'P2G import error: edge node index {0} not in graph'.
                    format(e))

    if len(nodes) != graph_layout[0] or (len(graph.edges)) != graph_layout[1]:
        logging.warning(
            'P2G import warning: declared number of nodes and edges {0}-{1} does not match {2}-{3}'
            .format(graph_layout[0], graph_layout[1], len(nodes),
                    len(graph.edges)))

    return graph
Пример #14
0
def read_lgr(lgr, graph=None, edge_label='label'):
    """
    Read graph in LEDA format

    Nodes are added to the graph using a unique ID or with the node data
    as label depending if the graph.data.auto_nid is True or False.
    Edge data is added to the edge attributes using `edge_label` as key.
    The data types for both nodes and edges is set according to the
    specifications in the LEDA header as either string, int, float or bool.

    :param lgr:             LEDA graph data.
    :type lgr:              File, string, stream or URL
    :param graph:           Graph object to import LEDA data in
    :type graph:            :graphit:Graph
    :param edge_label:      edge data label name
    :type edge_label:       :py:str

    :return:                Graph object
    :rtype:                 :graphit:Graph
    :raises:                TypeError if node/edge type conversion failed
                            GraphitException in case of malformed LEDA file
    """

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    # Parse LEDA file
    lgr_file = open_anything(lgr)
    header = []
    nodes = []
    edges = []
    container = header
    for line in lgr_file.readlines():
        line = line.strip()

        if line:
            if line.startswith('#header'):
                container = header
                continue
            if line.startswith('#nodes'):
                container = nodes
                continue
            if line.startswith('#edges'):
                container = edges
                continue

            container.append(line)

    # Parse LEDA header
    if not header[0] == 'LEDA.GRAPH':
        raise GraphitException('File is not a valid LEDA graph format')

    # Node and edge data types and graph directionality
    node_type = data_types.get(header[1])
    edge_type = data_types.get(header[2])
    graph.directed = int(header[3]) == -1

    # Parse LEDA nodes
    node_mapping = {}
    for i, node in enumerate(nodes[1:], start=1):
        data = node.strip('|{}|') or None
        if node_type and data:
            data = node_type(data)
        nid = graph.add_node(data)
        node_mapping[i] = nid

    # Parse LEDA edges
    for edge in edges[1:]:
        try:
            source, target, reversal, label = edge.split()
        except ValueError:
            raise GraphitException(
                'Too few fields in LEDA edge {0}'.format(edge))

        attr = {edge_label: label.strip('|{}|') or None}
        if edge_type and attr[edge_label]:
            attr[edge_label] = edge_type(attr[edge_label])
        graph.add_edge(node_mapping[int(source)], node_mapping[int(target)],
                       **attr)

    return graph
Пример #15
0
def read_gexf(gexf_file, graph=None):
    """
    Read graphs in GEXF format

    Uses the Python build-in etree cElementTree parser to parse the XML
    document and convert the elements into nodes.
    The XML element tag becomes the node key, XML text becomes the node
    value and XML attributes are added to the node as additional attributes.

    :param gexf_file:      XML data to parse
    :type gexf_file:       File, string, stream or URL
    :param graph:          Graph object to import dictionary data in
    :type graph:           :graphit:Graph

    :return:               GraphAxis object
    :rtype:                :graphit:GraphAxis
    """

    gexf_file = open_anything(gexf_file)

    # User defined or default Graph object
    if graph is None:
        graph = Graph()
    elif not isinstance(graph, Graph):
        raise GraphitException('Unsupported graph type {0}'.format(
            type(graph)))

    # Try parsing the string using default Python cElementTree parser
    try:
        tree = et.fromstring(gexf_file.read())
    except et.ParseError as error:
        logging.error(
            'Unable to parse GEXF file. cElementTree error: {0}'.format(error))
        return

    # Get XMLNS namespace from root
    xmlns = None
    for elem in tree.iter():
        if elem.tag.endswith('gexf'):
            xmlns = elem.tag.split('}')[0] + '}'
            break

    if xmlns is None:
        raise GraphitException(
            'Invalid GEXF file format, "gexf" tag not found')

    # Add graph meta-data and XMLNS namespace
    for meta in tree.iter('{0}meta'.format(xmlns)):
        graph.data.update(meta.attrib)
        for meta_data in meta:
            tag = meta_data.tag.split('}')[1]
            graph.data[tag] = meta_data.text

    # GEXF node and edge labels are unique, turn off auto_nid
    graph.data['auto_nid'] = False

    graph_tag = tree.find('{0}graph'.format(xmlns))
    graph.directed = graph_tag.get('defaultedgetype', 'directed') == 'directed'
    graph.data.update(graph_tag.attrib)

    # Parse all nodes
    nodes = tree.findall('.//{0}node'.format(xmlns))
    if not len(nodes):
        raise GraphitException('GEXF file containes no "node" elements')
    for node in nodes:
        attr = node.attrib
        attr = parse_attvalue_elements(node, attr, xmlns=xmlns)
        graph.add_node(attr['id'],
                       **dict([n for n in attr.items() if n[0] != 'id']))

    # Parse all edges
    edges = tree.findall('.//{0}edge'.format(xmlns))
    for edge in edges:
        attr = edge.attrib

        # Edge direction differs from global graph directionality
        edge_directed = graph.directed
        if 'type' in attr:
            edge_directed = attr['type'] == 'directed'

        attr = parse_attvalue_elements(edge, attr, xmlns=xmlns)
        graph.add_edge(attr['source'],
                       attr['target'],
                       directed=edge_directed,
                       **dict([
                           n for n in attr.items()
                           if n[0] not in ('source', 'target')
                       ]))

    logger.info('Import graph in GEXF format. XMLNS: {0}'.format(xmlns))

    return graph