def merge_pathways(pathways): """Return merged graphs from pathways in the request. :param dict pathways: pathways to be merged :rtype: Optional[pybel.BELGraph] """ networks = [] for name, resource in pathways.items(): pathway = current_app.pathme_manager.get_pathway_by_id(name, resource) if not pathway: abort( 500, 'Pathway "{}" in resource "{}" was not found in the database. ' 'Please check that you have used correctly the autocompletion form.' .format(name, resource)) # Loads the BELGraph and adds annotations to track provenance later graph = from_bytes(pathway.blob) graph.annotation_list['Database'] = { 'kegg', 'reactome', 'wikipathways' } graph.annotation_pattern['PathwayID'] = '.*' graph.annotation_pattern['Pathway name'] = '.*' graph.annotation_list['Interesting edge'] = { 'Contradicts', 'May contradict' } add_annotation_key(graph) add_annotation_value(graph, 'Pathway name', pathway.name) add_annotation_value(graph, 'Database', pathway.resource_name) add_annotation_value(graph, 'PathwayID', pathway.pathway_id) log.debug('Adding graph {} {}:with {} nodes and {} edges'.format( name, resource, graph.number_of_nodes(), graph.number_of_edges())) networks.append(graph) if not networks: abort( 500, 'Any pathway was requested. Please select at least one pathway.') graph = union(networks) graph.name = 'Merged graph from {}'.format( [graph.name for graph in networks]) graph.version = '0.0.0' contradicting_edges = get_contradiction_summary(graph) for u, v, _ in contradicting_edges: label_graph_edges(graph, u, v, 'Interesting edge', 'Contradicts') return graph
def bel_graph_loader(from_dir: str) -> BELGraph: """Obtains a combined BELGraph from all the BEL documents in one folder. :param from_dir: The folder with the BEL documents. :return: A corresponding BEL Graph. """ logger.info("Loading BEL Graph.") files = [ join(from_dir, file) for file in listdir(from_dir) if isfile(join(from_dir, file)) ] bel_files = [file for file in files if file[-4:].lower() == '.bel'] bel_graphs = [from_path(file) for file in bel_files] return union(bel_graphs)
def export_to_tsv(connection, all): """Summarize all.""" m = Manager.from_connection(connection=connection) if all: pathways = [pathway.as_bel() for pathway in m.get_all_pathways()] graph = union(pathways) with open("pathme_triplets.tsv", "w") as f: for sub, obj, data in graph.edges(data=True): print("%s\t%s\t%s" % (sub.as_bel(), data['relation'], obj.as_bel()), file=f)
def load_paths(paths, connection=None): """Parses multiple BEL scripts with :func:`pybel.from_path` and returns the union of the resulting graphs. :param iter[str] paths: An iterable over paths to BEL scripts :param connection: A custom database connection string or manager :type connection: Optional[str or pybel.manager.Manager] :rtype: pybel.BELGraph """ manager = Manager.ensure(connection) return union( from_path(path, manager=manager) for path in paths )
def get_universe_graph( *, kegg_path: Optional[str] = None, reactome_path: Optional[str] = None, wikipathways_path: Optional[str] = None, flatten: bool = True, normalize_names: bool = True, ) -> BELGraph: """Return universe graph.""" universe_graphs = iterate_universe_graphs( kegg_path=kegg_path, reactome_path=reactome_path, wikipathways_path=wikipathways_path, flatten=flatten, normalize_names=normalize_names, ) # Just keep the graph and not the source universe_graphs = (graph for _, _, graph in universe_graphs) logger.info('Merging all into a hairball...') return union(universe_graphs)
def get_graph( self, directory: Optional[str] = None, use_cached: bool = True, use_tqdm: bool = True, ) -> BELGraph: """Get the graph from all sources.""" if directory is None: if self.directory is None: raise ValueError directory = self.directory pickle_path = os.path.join(directory, f'{self.name}.bel.pickle') if use_cached and os.path.exists(pickle_path): return pybel.from_pickle(pickle_path) rv = union(self.get_graphs(use_tqdm=use_tqdm)) self.metadata.update(rv) pybel.to_pickle(rv, pickle_path) nodelink_path = os.path.join(directory, f'{self.name}.bel.nodelink.json') pybel.to_json_path(rv, nodelink_path) sif_path = os.path.join(directory, f'{self.name}.bel.sif') pybel.to_sif_path(rv, sif_path) gsea_path = os.path.join(directory, f'{self.name}.bel.gmt') pybel.to_gsea_path(rv, gsea_path) graphml_path = os.path.join(directory, f'{self.name}.bel.graphml') pybel.to_graphml(rv, graphml_path) try: statements = pybel.to_indra_statements(rv) except ImportError: pass else: indra_path = os.path.join(directory, f'{self.name}.indra.pickle') with open(indra_path, 'wb') as file: pickle.dump(statements, file) try: from pybel_cx import to_cx_file except ImportError: pass else: cx_path = os.path.join(directory, f'{self.name}.bel.cx.json') with open(cx_path, 'w') as file: to_cx_file(rv, file) try: from pybel_tools.assembler.html import to_html except ImportError: pass else: html_path = os.path.join(directory, 'index.html') with open(html_path, 'w') as file: print(to_html(rv), file=file) return rv
def get_graph( force: bool = False, force_global: bool = False, names: Optional[NamesList] = None, resources_directory: Optional[str] = None, ) -> BELGraph: """Get all resources in a combine BELGraph. :param force: Should cached files be overwritten? :param force_global: Should the global cache file be overwritten? :param names: The name of the bio2bel packages to use and arguments :param resources_directory: A non-default place to store the resources """ pickle_path = os.path.join(resources_directory or RESOURCES, CACHE_NAME) if not force_global and os.path.exists(pickle_path): logger.info(f'Getting cached full graph') return from_pickle(pickle_path) if names is None: names = DEFAULT_NAMES logger.info('Generating graphs') graphs = [] for name, to_bel_kwargs in names: _graph = get_graph_by_manager(name, force=force, to_bel_kwargs=to_bel_kwargs) logger.info(_graph.summary_str()) graphs.append(_graph) logger.info('Merging graphs') graph = pybel.union(graphs) graph.name = f'Graph from: {", ".join(graph.name for graph in graphs)}' graph.version = '0.0.1' logger.info('Finished merging graphs') logger.info('Preparing HGNC mappings') hgnc_manager = bio2bel_hgnc.Manager() hgnc_symbol_to_id = hgnc_manager.build_hgnc_symbol_id_mapping() entrez_id_to_hgnc_symbol = hgnc_manager.build_entrez_id_to_hgnc_symbol_mapping( ) logger.info('Generating namespace mapping for nodes') mapping = {} for node in graph: namespace = node.get('namespace') if namespace is None: continue elif namespace.lower() in { 'ncbigene', 'egid' } and node.identifier in entrez_id_to_hgnc_symbol: name = entrez_id_to_hgnc_symbol[node.identifier] identifier = hgnc_symbol_to_id[name] mapping[node] = node.__class__( namespace='hgnc', name=name, identifier=identifier, ) logger.info('Relabeling nodes') nx.relabel_nodes(graph, mapping, copy=False) logger.info('Enriching central dogma') enrich_protein_and_rna_origins(graph) logger.info('Exporting snp2k pickle') to_pickle(graph, pickle_path) return graph