Пример #1
0
 def load_data(self):
     print("Building StringDB Graph. It can take a while the first time...")
     self.proteinlinks = self.datastore + "/graphs/9606.protein.links.detailed.v11.0.txt"
     savefile = self.datastore + "/graphs/stringdb_graph_" + self.graph_type + "_edges.adjlist"
     if os.path.isfile(savefile):
         self.nx_graph = nx.read_adjlist(savefile)
     else:
         print(" ensp_to_hugo_map")
         ensmap = ensp_to_hugo_map(self.datastore)
         print(" reading self.proteinlinks")
         edges = pd.read_csv(self.proteinlinks, sep=' ')
         selected_edges = edges[self.name_to_edge[self.graph_type]] != 0
         edgelist = edges[selected_edges][["protein1",
                                           "protein2"]].values.tolist()
         edgelist = [[ensmap[edge[0][5:]], ensmap[edge[1][5:]]]
                     for edge in edgelist if edge[0][5:] in ensmap.keys()
                     and edge[1][5:] in ensmap.keys()]
         print(" creating OrderedGraph")
         self.nx_graph = nx.OrderedGraph(edgelist)
         print(" writing graph")
         nx.write_adjlist(self.nx_graph, savefile)
     # Randomize
     if self.randomize:
         self.nx_graph = nx.relabel.relabel_nodes(
             self.nx_graph, randmap(self.nx_graph.nodes))
     print("Graph built !")
Пример #2
0
 def load_data(self):
     self.nx_graph = nx.OrderedGraph(
         nx.readwrite.gpickle.read_gpickle(
             at.get(self.at_hash, datastore=self.datastore)))
     # Randomize
     if self.randomize:
         self.nx_graph = nx.relabel.relabel_nodes(
             self.nx_graph, randmap(self.nx_graph.nodes))
Пример #3
0
 def load_data(self):
     dir_path = self.datastore  #os.path.dirname(os.path.realpath(__file__))
     self.location = os.path.join(dir_path, 'graphs/')
     pkl_file = os.path.join(self.location, self.filename)
     if not os.path.isfile(pkl_file):
         self._process_and_pickle(save_name=pkl_file)
     self.nx_graph = nx.OrderedGraph(nx.read_gpickle(pkl_file))
     # Randomize
     if self.randomize:
         self.nx_graph = nx.relabel.relabel_nodes(
             self.nx_graph, randmap(self.nx_graph.nodes))
Пример #4
0
 def __init__(self, relabel_genes=True, datastore=None, randomize=False):
     
     if datastore is None:
         self.datastore = os.path.dirname(os.path.abspath(__file__))
     else:
         self.datastore = datastore
     self.load_data()
     self.nx_graph = nx.relabel.relabel_nodes(self.nx_graph, symbol_map(self.nx_graph.nodes))
     
     # Randomize
     self.randomize = randomize
     if self.randomize:
         print("Randomizing the graph")
         self.nx_graph = nx.relabel.relabel_nodes(self.nx_graph, randmap(self.nx_graph.nodes))
Пример #5
0
 def load_data(self):
     self.benchmark = self.datastore + "/graphs/HumanNet-XN.tsv"
     edgelist = pd.read_csv(self.benchmark,
                            header=None,
                            sep="\t",
                            skiprows=1).values[:, :2].tolist()
     self.nx_graph = nx.OrderedGraph(edgelist)
     # Map nodes from ncbi to hugo names
     self.nx_graph = nx.relabel.relabel_nodes(
         self.nx_graph,
         ncbi_to_hugo_map(self.nx_graph.nodes, datastore=self.datastore))
     # Remove nodes which are not covered by the map
     for node in list(self.nx_graph.nodes):
         if isinstance(node, float):
             self.nx_graph.remove_node(node)
     # Randomize
     if self.randomize:
         self.nx_graph = nx.relabel.relabel_nodes(
             self.nx_graph, randmap(self.nx_graph.nodes))
Пример #6
0
            "train_size": train_size,
            "seed": seed,
        }
        print(experiment)

        X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(features, 
                                                                                target.to_numpy(), 
                                                                                stratify=target.to_numpy(),
                                                                                train_size=train_size,
                                                                                test_size=test_size,
                                                                                shuffle=True,
                                                                                random_state=seed
                                                                                )
        if gene_graph.randomize:
            print("Randomizing the graph")
            gene_graph.nx_graph = nx.relabel.relabel_nodes(gene_graph.nx_graph, randmap(gene_graph.nx_graph.nodes))

        if num_genes == 'all':
            if 'MLP' in model_name:
                x_train = X_train.copy()
                x_test = X_test.copy()
                adj = None
            else:
                neighbors = gene_graph.nx_graph
                intersection_nodes = np.intersect1d(X_train.columns, neighbors.nodes)
                x_train = X_train[list(intersection_nodes)].copy()
                x_test = X_test[list(intersection_nodes)].copy()

                toremove = set(neighbors.nodes)
                toremove = toremove.difference(intersection_nodes)
                neighbors.remove_nodes_from(toremove)
            "seed": seed,
        }
        print(experiment)

        X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
            features,
            target.to_numpy(),
            stratify=target.to_numpy(),
            train_size=train_size,
            test_size=test_size,
            shuffle=True,
            random_state=seed)
        if gene_graph.randomize:
            print("Randomizing the graph")
            gene_graph.nx_graph = nx.relabel.relabel_nodes(
                gene_graph.nx_graph, randmap(gene_graph.nx_graph.nodes))

        if num_genes == 'all':
            if 'MLP' in model_name:
                x_train = X_train.copy()
                x_test = X_test.copy()
                adj = None
            else:
                neighbors = gene_graph.nx_graph
                intersection_nodes = np.intersect1d(X_train.columns,
                                                    neighbors.nodes)
                x_train = X_train[list(intersection_nodes)].copy()
                x_test = X_test[list(intersection_nodes)].copy()

                toremove = set(neighbors.nodes)
                toremove = toremove.difference(intersection_nodes)