def update(e_set_config, rebuild=False): # Create a fresh data model e_data = dm.EnrichmentData("e_sets") # Load the specified e_set, if it exists print "Updating e_set: " + e_set_config.name e_set = e_data.add_e_set(e_set_config.name) storage.ensure_e_set_dir(e_set_config.name) e_set.load() na = ndex_access.NdexAccess(e_set_config.ndex, e_set_config.account_name, e_set_config.password) # Find the candidate networks networks = na.find_networks(e_set_config) print "Found " + str(len(networks)) + " networks" # TODO: remove e_sets that are not found in the query # Figure out which ones to update networks_to_update = {} if rebuild: for network in networks: network_id = network.get("externalId") network_name = network.get("name") networks_to_update[network_name] = network_id else: for network in networks: network_id = network.get("externalId") network_name = network.get("name") modification_date = network.get("modificationTime") id_set = e_set.get_id_set_by_network_id(network_id) if id_set: if not id_set.modificationDate == modification_date: networks_to_update[network_name] = network_id else: networks_to_update[network_name] = network_id print "Updating " + str(len(networks_to_update.keys())) + " networks" # Do the updates counter = 0 term_mapper = term2gene_mapper.Term2gene_mapper() for network_name, network_id in networks_to_update.iteritems(): print network_name.encode("ascii", "ignore") + " : " + network_id.encode("ascii", "ignore") node_table = na.get_nodes_from_cx(network_id) term_mapper.add_network_nodes(node_table) gene_symbols = get_genes_for_network(node_table, term_mapper) id_set_dict = { "name": network_name, "ids": gene_symbols, "network_id": network_id, "ndex": e_set_config.ndex, "e_set": e_set_config.name, } id_set = dm.IdentifierSet(id_set_dict) e_set.add_id_set(id_set) counter += 1 print str(counter) + " networks indexed." # now that the updated e_set is ready to save, clear the old cached data storage.remove_all_id_sets(e_set_config.name) print "Saving e_set with " + str(len(e_set.get_id_set_names())) + " id_sets" e_set.save()
else: print "skipping run sif to cx" #=================================== #=================================== # Generate similarity files #=================================== #=================================== na = ndex_access.NdexAccess(upload_to_server, upload_to_username, upload_to_password) term_mapper = term2gene_mapper.Term2gene_mapper() # Create a fresh data model e_data = dm.EnrichmentData("e_sets") e_set = e_data.add_e_set('pipeline') storage.ensure_e_set_dir('pipeline') counter = 0 if run_generate_gsea_files: for network_name, network_id in networks_to_update.iteritems(): print str(network_id) print network_name.encode( 'ascii', 'ignore') + " : " + network_id.encode('ascii', 'ignore') node_table = na.get_nodes_from_cx(network_id) term_mapper.add_network_nodes(node_table) gene_symbols = get_genes_for_network(node_table, term_mapper) if (len(gene_symbols) < 1): print gene_symbols id_set_dict = { 'name': network_name, 'ids': gene_symbols, 'network_id': network_id,
def save(self, alt_grp_path=None): storage.ensure_e_set_dir(self.name) for set_name, id_set in self.id_set_map.iteritems(): id_set.save(alt_grp_path)
def save(self): storage.ensure_e_set_dir(self.name) for set_name, id_set in self.id_set_map.iteritems(): id_set.save()