def onebipartite(g, fp, degdir, analysis, namekey, mpkey, bipkey):
    """ Processes a single bipartite projection, sending it to the next step in
    the hierarchy towards writing degree seqeunces.

    Input:
        bipkey                  string, indicates the projection (a or b) that
                                generated this subgraph of the original.

    """
    if sg.weighted(g, fp) == 1 or sg.multigraph(g) == 1:
        processmultigraph(g,
                          fp,
                          degdir,
                          analysis,
                          namekey,
                          mpkey=0,
                          bipkey=bipkey)
    elif g.is_directed():
        processdirected(g,
                        fp,
                        degdir,
                        analysis,
                        namekey=namekey,
                        mpkey=0,
                        bipkey=bipkey)
    else:
        checkconnected(g,
                       fp,
                       degdir,
                       analysis,
                       namekey=namekey,
                       mpkey=0,
                       bipkey=bipkey)
def processmultigraph(g, fp, degdir, analysis, namekey='', mpkey=0, bipkey=0):
    """ Processes a multigraph or weighted graph by ignoring multiedges and
    weights, then sends the simplified graph on.

    """
    mgkey = 0
    weighkey = 0
    if sg.multigraph(g) == 1:
        namekey += '_multigraphsimplified'
        mgkey = 'simplified'
    if sg.weighted(g) == 1:
        namekey += '_weightedsimplified'
        weighkey = 'simplified'
    g.simplify()
    if sg.directed(g):
        processdirected(g,
                        fp,
                        degdir,
                        analysis,
                        namekey=namekey,
                        mpkey=mpkey,
                        bipkey=bipkey,
                        mgkey=mgkey,
                        weighkey=weighkey)
    else:
        checkconnected(g,
                       fp,
                       degdir,
                       analysis,
                       namekey=namekey,
                       mpkey=mpkey,
                       bipkey=bipkey,
                       mgkey=mgkey,
                       weighkey=weighkey)
Exemplo n.º 3
0
def buildGMLcatalog(gml_dir):
    """ Walks through the subdirectories of a root to find all gml files, then
    catalogs the relevant information about the contained networks.

    Input:
        gmldirpath              string, path to the root directory where gmls are



    Output:
        df                      DataFrame, catalog of the existing gml files

    """
    df = pd.DataFrame(columns=['fp_gml', 'Weighted', 'Directed', 'Bipartite',
                               'Multigraph', 'Multiplex'])
    # make list of file paths to gmls
    fpV = []
    for root, dirs, files in os.walk(gml_dir):
        for name in files:
            # leave out the bipartite projections so we can make our own
            if name.endswith('.gml'):
                fpV.append(os.path.join(root, name))
    # create the catalog
    for fp in fpV:
        g = igraph.read(fp)
        splitfp = fp.split('/')
        name = splitfp[-1]
        # add new row or overwrite existing row
        df.loc[name] = np.nan
        df.loc[name]['fp_gml'] = fp
        df.loc[name]['Weighted'] = sg.weighted(g, fp)
        df.loc[name]['Directed'] = sg.directed(g)
        df.loc[name]['Bipartite'] = sg.bipartite(g, fp)
        df.loc[name]['Multigraph'] = sg.multigraph(g)
        df.loc[name]['Multiplex'] = sg.multiplex(g)
        if (df.loc[name] == 'error').any():
            # this catches bad bipartite gmls
            df = df.drop(name)
            print('dropping {} from the considered gmls'.format(name))
    return df
Exemplo n.º 4
0
def processmultiplex(g, fp, degdir, analysis):
    """ Processes a multiplex graph. Splits along the edge types, so that each
    edge type gets its own new graph. Then sends each of these new graphs
    through the structural hierarchy and sends them along the appropriate path
    for further processing.

    Input:
        g                     igraph Graph object, known to be multiplex
        fp                    file path, leads to gml file

    """
    # project onto layers
    # pull out list of attributes
    attributes = g.es.attributes()
    # if there are multiple edge types, split on these.
    if len(attributes)>1:
        for att in attributes:
            # assume that these attribute values are weights, so should be numeric
            attkeyword = att+'_notin'
            # list of non-numeric values to avoid
            notthese = ['','Nan', 'n']
            # pull out edges that correspond to non empty weights
            edgeseq = g.es(**{attkeyword:notthese})
            # project onto the subgraph
            graph = g.subgraph_edges(edgeseq)
            namekey = '_multiplex'+att
            mpkey = 'sub_'+att
            if sg.bipartite(graph, fp)==1:
                processbipartite(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
            elif sg.multigraph(graph)==1 or sg.weighted(graph)==1:
                processmultigraph(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
            elif sg.directed(graph)==1:
                processdirected(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
            else:
                readdeg(graph, fp,degdir,analysis, namekey=namekey, mpkey=mpkey)
    # If, however, there is one edge type, assume the split is in this, and
    # look at the values of this attribute
    else:
        att = attributes[0]
        # get just the unique values the attribute takes
        types = np.unique(g.es[att])
        # initilize empty list of edge seqs for each projection subgraph
        edgeseqs = []
        attkeyword = att+'_eq'
        for typ in types:
            edgeseqs.append((g.es(**{attkeyword:typ})))
        # project onto the subgraphs
        subgraphs = [g.subgraph_edges(edgeseq) for edgeseq in edgeseqs]
        # process all the subgraphs
        for i in range(len(subgraphs)):
            graph = subgraphs[i]
            namekey = '_multiplex'+str(types[i])
            mpkey = 'sub_'+str(i)
            if sg.bipartite(graph, fp)==1:
                processbipartite(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
            elif sg.multigraph(graph)==1 or sg.weighted(graph)==1:
                processmultigraph(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
            elif sg.directed(graph)==1:
                processdirected(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
            else:
                readdeg(graph, fp,degdir,analysis, namekey=namekey, mpkey=mpkey)

        # process the union graph
        graph = g
        namekey = '_multiplexunion'
        mpkey = 'union'
        if sg.bipartite(graph, fp)==1:
            processbipartite(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
        elif sg.multigraph(graph)==1:
            processmultigraph(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
        elif sg.weighted(graph)==1:
            processweighted(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
        elif sg.directed(graph)==1:
            processdirected(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
        else:
            readdeg(graph, fp, degdir, analysis, namekey=namekey, mpkey=mpkey)
Exemplo n.º 5
0
def buildGMLcatalog(gmldirpath, df, avoiddirs, overwrite):
    """ Walks through the subdirectories of a root to find all gml files, then
    catalogs the relevant information about the contained networks.

    Input:
        gmldirpath              string, path to the root directory where gmls are
        df                      DataFrame, catalog of the existing gml files
        avoiddirs               list, contains names of any directories to avoid
                                    i.e. 'n7'
        overwrite               boolean, if true, forces overwrite of rows that
                                    already exist in the catalog. Otherwise only
                                    new files are added.



    Output:
        df                      DataFrame, catalog of the existing gml files

    """
    # make list of file paths to gmls
    fpV = []
    for root, dirs, files in os.walk(gmldirpath):
        # avoid gmls that are known to crash igraph.read()
        if 'bad' in dirs:
            dirs.remove('bad')
        if 'crash' in dirs:
            dirs.remove('crash')
        # avoid any other directories
        for avdir in avoiddirs:
            if avdir in dirs:
                dirs.remove(avdir)
        for name in files:
            # leave out the bipartite projections so we can make our own
            if name.endswith('.gml') and '1mode' not in name:
                fpV.append(os.path.join(root, name))
    # check which gmls are already in df
    if overwrite == False:
        newfpV = set(fpV).difference(set(df['fp_gml']))
    else:
        newfpV = fpV
    # update the catalog
    for fp in newfpV:
        g = igraph.read(fp)
        splitfp = fp.split('/')
        name = splitfp[-1]
        # add new row or overwrite existing row
        df.loc[name] = np.nan
        df.loc[name]['fp_gml'] = fp
        df.loc[name]['Domain'] = splitfp[-4]
        df.loc[name]['Subdomain'] = splitfp[-3]
        # cut out the 'n'
        df.loc[name]['Graph_order'] = int(splitfp[-2][1:])
        df.loc[name]['Weighted'] = sg.weighted(g, fp)
        df.loc[name]['Directed'] = sg.directed(g)
        df.loc[name]['Bipartite'] = sg.bipartite(g, fp)
        df.loc[name]['Multigraph'] = sg.multigraph(g)
        df.loc[name]['Multiplex'] = sg.multiplex(g)
        if (df.loc[name] == 'error').any():
            # this catches bad bipartite gmls
            df = df.drop(name)
    return df