def write_graph(G, name):
    # Generating results
    try:
        os.mkdir('results')
    except FileExistsError as e:
        print(
            'W: results directory already exists. Files will be overwritten.')
    nx.write_graphml_xml(G, "results/{}.xml".format(name))
    nx.drawing.nx_pydot.write_dot(G, "results/{}.dot".format(name))
Пример #2
0
def main():
    root_dir = get_root_dir()

    # Read edge table from cytoscape for full network
    edge_table = cytoscape.table.getTable(columns=['shared name'],
                                          table='edge')
    # Read node table
    node_table = pd.read_csv(
        os.path.join(os.getcwd(), root_dir, 'data', 'node_table.csv'))

    # Create a dataframe with columns source and target columns and split the shared name column into these two
    source_target_list = pd.DataFrame(edge_table['shared name'].str.split(
        ',', 1).to_list(),
                                      columns=['source', 'target'])

    # Get only shared name and clusterONE columns from node_table
    reduced_node_table = node_table[['shared name', 'clusterONE']]

    # Add cluster numbers to corresponding source nodes
    df = pd.merge(source_target_list,
                  reduced_node_table,
                  copy=True,
                  left_on=['source'],
                  right_on=['shared name'])
    df = df.rename(columns={'clusterONE': 'clusterONE_source'})
    # Add cluster numbers to corresponding target nodes
    df = pd.merge(df,
                  reduced_node_table,
                  copy=True,
                  left_on=['target'],
                  right_on=['shared name'])
    df = df.rename(columns={'clusterONE': 'clusterONE_target'})

    # Create a new graph using cluster numbers as source and target node names
    source_target_df = df[['clusterONE_source', 'clusterONE_target']]
    print(source_target_df)
    source_target_df.to_csv(
        os.path.join(os.getcwd(), root_dir, 'results', 'test.csv'))
    # Get rid of self loops
    source_target_df.drop(
        source_target_df.index[source_target_df['clusterONE_source'] ==
                               source_target_df['clusterONE_target']],
        inplace=True)

    source_target_df.dropna(inplace=True)

    # Draw a graph
    g = nx.from_pandas_edgelist(source_target_df,
                                source='clusterONE_source',
                                target='clusterONE_target')
    # Save graph
    nx.write_graphml_xml(g,
                         path=os.path.join(os.getcwd(), root_dir, 'results',
                                           'pivot.xml'))
Пример #3
0
    def convert2GraphML(self):

        fe, ex = os.path.splitext(self.edgelist_file)
        csv_file = "{}_graph.csv".format(fe)
        self.gml_file = "{}_graph.graphml".format(fe)

        file_exist = os.path.isfile(self.gml_file)

        if not file_exist:
            # 1. convert to pandas df
            if self.delim == '#':
                with open(self.edgelist_file, "rb") as fmain:
                    reader = fmain.readlines()

                row_appender = []
                for i, lines in enumerate(reader):
                    new_line = lines.decode('utf-8')
                    new_line = new_line.lower()
                    new_line = new_line.replace(' ', '-')
                    new_line = new_line.strip().split('#')
                    row_appender.append(new_line)

            else:
                with open(self.edgelist_file, "r") as fmain:
                    reader = fmain.readlines()

                row_appender = []
                for i, lines in enumerate(reader):
                    new_line = lines.strip().split(' ')
                    new_ = [new_line[0], new_line[1]]
                    row_appender.append(new_)

            cat = pd.DataFrame(row_appender, columns=["parent", "child"])
            cat.to_csv(csv_file, index=False)

            # 2. use df object as edgelist to create graphml object
            load_df = cat

            self.Graph = nx.from_pandas_edgelist(load_df,
                                                 'parent',
                                                 'child',
                                                 create_using=nx.DiGraph)
            nx.write_graphml_xml(self.Graph, self.gml_file)

        else:

            self.Graph = nx.read_graphml(self.gml_file)

        logging.info("Converted to graphml!")
Пример #4
0
def build_graph(nodes,
                property_edges,
                compose_edges,
                explain_edges,
                field_nodes=None,
                field_edges=None,
                property_edge_weight=1,
                compose_edge_weight=0.5,
                explain_edge_weight=0.1,
                export_file=None):
    """
    Takes nodes and edges to build a directed, weighted graph. Standard edges have more weight than explain edges
    :param field_nodes:
    :param field_edges:
    :param nodes: a list of tuples of long node name and dict with short names. e.g. long=funder.budget, short=budget
    :param property_edges: list of tuples of long form nodes, for standard schema relationships
    :param compose_edges: per references to another property
    :param explain_edges: per property_edges but for explains cross-references between nodes.
    :param explain_edge_weight:
    :param compose_edge_weight:
    :param property_edge_weight:
    :param export_file:
    :return: a networkx graph
    """

    # The usual case is that we have edges from source fields to the schema nodes. The alt-case is for testing.
    # We treat them as properties, i.e. bidirectional with a weight of one.
    if field_nodes:
        nodes = nodes + field_nodes
    if field_edges:
        property_edges = property_edges + field_edges

    g = nx.DiGraph()
    g.add_nodes_from(nodes)
    g.add_edges_from(property_edges, weight=property_edge_weight)
    # create reversed edges for property relationships
    property_edges_flip = list(map(lambda x: (x[1], x[0]), property_edges))
    g.add_edges_from(property_edges_flip, weight=property_edge_weight)
    g.add_edges_from(compose_edges, weight=compose_edge_weight)
    g.add_edges_from(explain_edges, weight=explain_edge_weight)

    if CONTEXT.rule_config.rule_01_causal_relationships.export_graphml:
        graph_file = os.path.join(CONTEXT.directory, f'{CONTEXT.id}.graphml')
        logging.info(f'writing graph to {graph_file}')
        nx.write_graphml_xml(g, graph_file)
    elif export_file:
        nx.write_graphml_xml(g, export_file + '.graphml')

    return g
Пример #5
0
def main():
    seed = 0  # int(time.time())
    random.seed(seed)
    k = 0
    while True:
        #g, sqs = generate_graph_sequences_2(20, 8, 8, seed)
        g, sqs = get_graph_sequences()
        nx.write_graphml_xml(g, '/tmp/graph')
        save_sequences(sqs, '/tmp/sequences')
        for n, p in nx.kamada_kawai_layout(g).items():
            g.nodes[n]['pos'] = p
        draw(
            g, sqs, f'seed: {seed} k: {k} out deg: '
            f'{min([deg for _, deg in g.out_degree])} | '
            f'{sum([deg for _, deg in g.out_degree]) / len(g)} | '
            f'{max([deg for _, deg in g.out_degree])}')
        plt.show()
        k += 1
Пример #6
0
def dumpDataset(dataset_name, dataset, lxml=False):
    # add a folder for the dataset
    try:
        os.mkdir(os.getcwd() + '/' + dataset_name)
    except Exception:
        return
    # dump all graphml files
    for i, graph in enumerate(dataset):
        if lxml:
            with open(f"{dataset_name}/{dataset_name}_{i + 1}.graphml",
                      'w') as f:
                nx.write_graphml_lxml(
                    graph, f"{dataset_name}/{dataset_name}_{i + 1}.graphml")
        else:
            with open(f"{dataset_name}/{dataset_name}_{i + 1}.graphml",
                      'w') as f:
                nx.write_graphml_xml(
                    graph, f"{dataset_name}/{dataset_name}_{i + 1}.graphml")
def main():
    arguments = docopt(__doc__, version='cluster_result_comparator.py 0.1')

    output_name = arguments["--output"]
    result_file_1 = arguments["<result1.clustering>"]
    result_file_2 = arguments["<result2.clustering>"]

    # sanity check
    if os.path.isfile(output_name):
        print("Error: Result file exists")
        sys.exit(1)

    if not os.path.isfile(result_file_1):
        print("Error: Failed to find " + result_file_1)
        sys.exit(1)
    if not os.path.isfile(result_file_2):
        print("Error: Failed to find " + result_file_2)
        sys.exit(1)

    label1 = os.path.basename(result_file_1).replace(".clustering", "_")
    label2 = os.path.basename(result_file_2).replace(".clustering", "_")

    # load the spectra to cluster information
    print("Loading data from " + result_file_1 + "...")
    spectra_to_cluster_1 = load_spectra_to_cluster(result_file_1,
                                                   before_cluster_id=label1)
    print("  " + str(len(spectra_to_cluster_1)) + " spectra loaded")

    print("Loading data from " + result_file_2 + "...")
    spectra_to_cluster_2 = load_spectra_to_cluster(result_file_2,
                                                   before_cluster_id=label2)
    print("  " + str(len(spectra_to_cluster_2)) + " spectra loaded")

    # build the network
    network = build_network(spectra_to_cluster_1,
                            spectra_to_cluster_2,
                            source1=os.path.basename(result_file_1),
                            source2=os.path.basename(result_file_2))

    print("Created network with " + str(len(network.nodes)) + " nodes and " +
          str(len(network.edges)) + " edges")

    # add node properties
    print("Adding node properties...")
    add_node_properties(network, result_file_1, label1)
    add_node_properties(network, result_file_2, label2)

    # count the total number of spectra
    total_spectra = get_number_of_spectra(network)

    # remove all nodes that are only connected to a single other node (ie. identical clusters)
    removed_nodes = remove_identical_clusters(network)

    print("Removed " + str(removed_nodes) +
          " identical nodes from the graph. " + str(len(network.nodes)) +
          " nodes remaining.")

    remaining_spectra = get_number_of_spectra(network)
    print("  Affecting " + str(remaining_spectra) + "/" + str(total_spectra) +
          " (" +
          str(round(remaining_spectra / total_spectra * 100, ndigits=2)) +
          "%) spectra")

    # save the GraphML file
    nx.write_graphml_xml(network, output_name)
    print("Network (GraphML format) written to " + output_name)

    print("Displaying comparison network in Cytoscape (must be open)...")
    display_in_cytoscape(network, network_name=label1 + " vs. " + label2)
Пример #8
0
Darmstadt.add_edge("70_E1", "146_D31", weight=1)
Darmstadt.add_edge("70_E1", "146_D32", weight=1)
Darmstadt.add_edge("70_D11", "70_E4", weight=1)
Darmstadt.add_edge("70_D12", "70_E3", weight=1)
Darmstadt.add_edge("70_D13", "70_E2", weight=1)
Darmstadt.add_edge("70_D21", "70_E1", weight=1)
Darmstadt.add_edge("70_D21", "70_E4", weight=1)
Darmstadt.add_edge("70_D22", "70_E3", weight=1)
Darmstadt.add_edge("70_D31", "70_E2", weight=1)
Darmstadt.add_edge("70_D32", "70_E1", weight=1)
Darmstadt.add_edge("70_D41", "70_E2", weight=1)
Darmstadt.add_edge("70_D41", "70_E3", weight=1)
Darmstadt.add_edge("70_D42", "70_E1", weight=1)

#nx.write_edgelist(Darmstadt,".\\TestDarmstadt.edgelist",data=['weight','signal'])
nx.write_graphml_xml(Darmstadt, ".\\TestDarmstadt.graphml", prettyprint=True)
#nx.write_gml(Darmstadt,".\\TestDarmstadt.gml")
"""

# 37
Darmstadt.add_node("37_D111",pos=[],signal=16)
Darmstadt.add_node("37_D112",pos=[],signal=17)
Darmstadt.add_node("37_D21",pos=[],signal=9)
Darmstadt.add_node("37_D22.1",pos=[],signal=10)
Darmstadt.add_node("37_D51",pos=[],signal=20)
Darmstadt.add_node("37_D52",pos=[],signal=21)
Darmstadt.add_node("37_D81",pos=[],signal=13)
Darmstadt.add_node("37_D82.1",pos=[],signal=14)
Darmstadt.add_node("37_E1",pos=[])
Darmstadt.add_node("37_E2",pos=[])
Darmstadt.add_node("37_E3",pos=[])
            graph.nodes[n_j]['supportPSOE'] += int(
                (int(df_j['Support_PSOE'].iloc[0]) +
                 int(df_k['Support_PSOE'].iloc[0])) * norm_list[i][edge_id])
            graph.nodes[n_j]['supportCs'] += int(
                (int(df_j['Support_Cs'].iloc[0]) +
                 int(df_k['Support_Cs'].iloc[0])) * norm_list[i][edge_id])
            graph.nodes[n_j]['supportPodemos'] += int(
                (int(df_j['Support_Podemos'].iloc[0]) +
                 int(df_k['Support_Podemos'].iloc[0])) * norm_list[i][edge_id])
            graph.nodes[n_j]['supportVox'] += int(
                (int(df_j['Support_VOX'].iloc[0]) +
                 int(df_k['Support_VOX'].iloc[0])) * norm_list[i][edge_id])

            # statal political support
            graph.graph['supportPP'] += graph.nodes[n_j]['supportPP']
            graph.graph['supportPSOE'] += graph.nodes[n_j]['supportPSOE']
            graph.graph['supportCs'] += graph.nodes[n_j]['supportCs']
            graph.graph['supportPodemos'] += graph.nodes[n_j]['supportPodemos']
            graph.graph['supportVox'] += graph.nodes[n_j]['supportVox']

#write result
print('Writing results ...')
result_path = os.path.join(data_path, 'graphs', word_list)
if not os.path.exists(result_path):
    os.makedirs(result_path)

for graph in graph_list:
    file_name = f"graph_{graph.graph['date']}.graphml"
    nx.write_graphml_xml(graph, os.path.join(result_path, file_name))

print(f"Finished, data stored in: {result_path}")
Пример #10
0
def exportGraphGraphml(G, filename):
    print(G.nodes)
    nx.write_graphml_xml(G, filename, infer_numeric_types=True)
Пример #11
0
 def write_xml(self):
     G = self.createGraph()
     filename = "%s.xml" % (self.model.name)
     nx.write_graphml_xml(G, filename)
Пример #12
0
# path for the data file
in_file = os.path.join(wd, 'data', 'musae_ENGB_edges.csv')
# read file
G = nx.read_edgelist(
    in_file,  # edgelist file to read
    delimiter=',',  # delimiter between 'from' and 'to'
    comments='#')  # the first line doesn't contain data

# %% write dataset in GraphML
"""
We see two different options:

- the GraphML is popular among network scientists; it's based on the XML
  format; this file format contains metadata on the network
- the adjacency list turns to be useful for graphs without data associated
  with nodes or edges and for nodes that can be meaningfully represented as
  strings
"""

# GraphML
# --+ path for the data file
out_file = os.path.join(wd, 'data', 'to_graphml.xml')
# --+ write data
nx.write_graphml_xml(G, out_file)

# adjacency list
# --+ path for the data file
out_file = os.path.join(os.getcwd(), 'data', 'to_adjlist.adjlist')
# --+ write data
nx.write_adjlist(G, out_file)