def as_unweighted_undirected(data, newname, description="", overide=False): ''' convert the dataset to undirected unweighted dataset :param data: :py:class:`gct.Dataset` :param newname: the name of the new dataset :param description: discription :param overide: When true and the named dataset already exists, it will be deleted :rtype: :py:class:`gct.Dataset` ''' edges = data.get_edges()[['src', 'dest']] if data.has_ground_truth(): gt = data.get_ground_truth() else: gt = None return Dataset(name=newname, description=description, groundtruthObj=gt, edgesObj=edges, directed=False, weighted=False, overide=overide)
def from_snap(name, graph, description="", overide=False): ''' create a datast from a SNAP graph :param name: identifier of the dataset :param graph: a SNAP graph :param description: discription :param overide: When true and the named dataset already exists, it will be deleted :rtype: :py:class:`gct.Dataset` ''' import snap if isinstance(graph, snap.PUNGraph): directed = False elif isinstance(graph, snap.PNGraph): directed = True else: raise Exception("Unkown graph type: " + str(type(graph))) lst = [] for EI in graph.Edges(): lst.append([EI.GetSrcNId(), EI.GetDstNId()]) df = pd.DataFrame(lst, columns=['src', 'dest']) return Dataset(name, edgesObj=df, weighted=False, directed=directed, description=description, overide=overide)
def from_igraph(name, graph, data='weight', description="", overide=True): ''' create a datast from `iGraph <http://igraph.org/>`_ graph :param name: identifier of the dataset :param graph: a igraph graph :param data: the name of the edge data which is taken as weights. Ignore for unweighted graph. :param description: discription :param overide: When true and the named dataset already exists, it will be deleted :rtype: :py:class:`gct.Dataset` ''' directed = graph.is_directed() weighted = graph.is_weighted() lst = [] for e in graph.es: lst.append(e.tuple) df = pd.DataFrame(lst, columns=['src', 'dest']) if weighted: w = graph.es[data] df['weight'] = w return Dataset(name, edgesObj=df, weighted=weighted, directed=directed, description=description, overide=overide)
def load_snap_dataset(name, overide=False): if not overide and local_exists(name): return load_local(name) else: conf = _DATASET_[name] edges = conf.get_edges() gt = conf.get_ground_truth() description = conf.description weighted = conf.weighted directed = conf.directed return Dataset(name, description=description, groundtruthObj=gt, edgesObj=edges, directed=directed, weighted=weighted, overide=overide)
def as_mirror_edges(data, newname, description="", overide=False): edges1 = data.get_edges() edges2 = edges1.copy() edges2['src'] = edges1['dest'] edges2['dest'] = edges1['src'] edges = pd.concat([edges1, edges2], axis=0) if data.has_ground_truth(): gt = data.get_ground_truth() else: gt = None return Dataset(name=newname, description=description, groundtruthObj=gt, edgesObj=edges, directed=data.is_directed(), weighted=data.is_weighted(), is_edge_mirrored=True, overide=overide)
def from_edgelist(name, edgelist, groundtruth=None, directed=False, description="", overide=True): """ create a graph from edge list. :param name: identifier of the dataset :param edgelist: a 2d list (list of list) or a 2d numpy ndaray in [[src node, target node, weight],...] format. Or a dataframe that has columns of "src","dest","weight". Weight is optional, if missing it is an unweighted graph. :param groundtruth: None or a 2d list (list of list) or a 2d numpy ndaray in [[node, cluster],...] format. Or a dataframe that has columns of "node","cluster". :param directed: this is a directed graph :param description: discription :param overide: When true and the named dataset already exists, it will be deleted :rtype: :py:class:`gct.Dataset` """ assert len(edgelist) > 0, "Error, empty edgelist" if isinstance(edgelist, pd.DataFrame): firstrow = edgelist.iloc[0] else: firstrow = edgelist[0] if len(firstrow) == 2: weighted = False elif len(firstrow) == 3: weighted = True else: raise Exception("Format not right") return Dataset(name=name, edgesObj=edgelist, groundtruthObj=groundtruth, weighted=weighted, directed=directed, description=description, overide=overide)
def from_networkx(name, graph, weighted=False, data='weight', default=1, description="", overide=True): ''' create a datast from networkx graph :param name: identifier of the dataset :param graph: a networkx graph :param weight: is it a weighted graph? :param data: the name of the edge data which is taken as weights. :param default: default weight if networkx edge data is missing. :param description: discription :param overide: When true and the named dataset already exists, it will be deleted :rtype: :py:class:`gct.Dataset` ''' directed = graph.is_directed() lst = [] if weighted: for e in graph.edges(data=data, default=default): lst.append(e) else: for e in graph.edges(): lst.append(e) return Dataset(name, edgesObj=lst, weighted=weighted, directed=directed, description=description, overide=overide)