示例#1
0
def as_unweighted_undirected(data, newname, description="", overide=False):
    '''
    convert the dataset to undirected unweighted dataset 
    
    :param data: :py:class:`gct.Dataset`
    :param newname: the name of the new dataset
    :param description:    discription
    :param overide:        When true and the named dataset already exists, it will be deleted
    
    :rtype: :py:class:`gct.Dataset`
    '''

    edges = data.get_edges()[['src', 'dest']]
    if data.has_ground_truth():
        gt = data.get_ground_truth()
    else:
        gt = None

    return Dataset(name=newname,
                   description=description,
                   groundtruthObj=gt,
                   edgesObj=edges,
                   directed=False,
                   weighted=False,
                   overide=overide)
示例#2
0
def from_snap(name, graph, description="", overide=False):
    '''
    create a datast from a SNAP graph
    
    :param name:         identifier of the dataset
    :param graph:        a SNAP graph 
    :param description:    discription
    :param overide:        When true and the named dataset already exists, it will be deleted
    
    :rtype: :py:class:`gct.Dataset`
    '''
    import snap
    if isinstance(graph, snap.PUNGraph):
        directed = False
    elif isinstance(graph, snap.PNGraph):
        directed = True
    else:
        raise Exception("Unkown graph type: " + str(type(graph)))

    lst = []
    for EI in graph.Edges():
        lst.append([EI.GetSrcNId(), EI.GetDstNId()])

    df = pd.DataFrame(lst, columns=['src', 'dest'])

    return Dataset(name,
                   edgesObj=df,
                   weighted=False,
                   directed=directed,
                   description=description,
                   overide=overide)
示例#3
0
def from_igraph(name, graph, data='weight', description="", overide=True):
    '''
    create a datast from `iGraph <http://igraph.org/>`_ graph
    
    :param name:         identifier of the dataset
    :param graph:        a igraph graph 
    :param data:         the name of the edge data which is taken as weights. Ignore for unweighted graph.
    :param description:    discription
    :param overide:        When true and the named dataset already exists, it will be deleted
    
    :rtype: :py:class:`gct.Dataset`
    '''

    directed = graph.is_directed()
    weighted = graph.is_weighted()

    lst = []
    for e in graph.es:
        lst.append(e.tuple)
    df = pd.DataFrame(lst, columns=['src', 'dest'])
    if weighted:
        w = graph.es[data]
        df['weight'] = w

    return Dataset(name,
                   edgesObj=df,
                   weighted=weighted,
                   directed=directed,
                   description=description,
                   overide=overide)
def load_snap_dataset(name, overide=False):
    if not overide and local_exists(name):
        return load_local(name)

    else:
        conf = _DATASET_[name]
        edges = conf.get_edges()
        gt = conf.get_ground_truth()
        description = conf.description
        weighted = conf.weighted
        directed = conf.directed
        return Dataset(name,
                       description=description,
                       groundtruthObj=gt,
                       edgesObj=edges,
                       directed=directed,
                       weighted=weighted,
                       overide=overide)
示例#5
0
def as_mirror_edges(data, newname, description="", overide=False):
    edges1 = data.get_edges()
    edges2 = edges1.copy()
    edges2['src'] = edges1['dest']
    edges2['dest'] = edges1['src']
    edges = pd.concat([edges1, edges2], axis=0)
    if data.has_ground_truth():
        gt = data.get_ground_truth()
    else:
        gt = None

    return Dataset(name=newname,
                   description=description,
                   groundtruthObj=gt,
                   edgesObj=edges,
                   directed=data.is_directed(),
                   weighted=data.is_weighted(),
                   is_edge_mirrored=True,
                   overide=overide)
示例#6
0
def from_edgelist(name,
                  edgelist,
                  groundtruth=None,
                  directed=False,
                  description="",
                  overide=True):
    """
    create a graph from edge list.
    
    :param name:         identifier of the dataset
    :param edgelist:     a 2d list (list of list) or a 2d numpy ndaray in [[src node, target node, weight],...] format.
                         Or a dataframe that has columns of "src","dest","weight". 
                         Weight is optional, if missing it is an unweighted graph.
    :param groundtruth:  None or a 2d list (list of list) or a 2d numpy ndaray in [[node, cluster],...] format.
                         Or a dataframe that has columns of "node","cluster". 
    
    :param directed:     this is a directed graph
    :param description:    discription
    :param overide:        When true and the named dataset already exists, it will be deleted
    
    :rtype: :py:class:`gct.Dataset` 
    
    """
    assert len(edgelist) > 0, "Error, empty edgelist"
    if isinstance(edgelist, pd.DataFrame):
        firstrow = edgelist.iloc[0]
    else:
        firstrow = edgelist[0]
    if len(firstrow) == 2:
        weighted = False
    elif len(firstrow) == 3:
        weighted = True
    else:
        raise Exception("Format not right")

    return Dataset(name=name,
                   edgesObj=edgelist,
                   groundtruthObj=groundtruth,
                   weighted=weighted,
                   directed=directed,
                   description=description,
                   overide=overide)
示例#7
0
def from_networkx(name,
                  graph,
                  weighted=False,
                  data='weight',
                  default=1,
                  description="",
                  overide=True):
    '''
    create a datast from networkx graph
    
    :param name:         identifier of the dataset
    :param graph:        a networkx graph 
    :param weight:       is it a weighted graph?
    
    :param data:         the name of the edge data which is taken as weights.
    :param default:        default weight if networkx edge data is missing.
    :param description:    discription
    :param overide:        When true and the named dataset already exists, it will be deleted
    
    :rtype: :py:class:`gct.Dataset`
    '''
    directed = graph.is_directed()
    lst = []
    if weighted:
        for e in graph.edges(data=data, default=default):
            lst.append(e)
    else:
        for e in graph.edges():
            lst.append(e)

    return Dataset(name,
                   edgesObj=lst,
                   weighted=weighted,
                   directed=directed,
                   description=description,
                   overide=overide)