Exemplo n.º 1
0
def get_reactome_graph(reactome_manager, reactome_dir, file):
    # Load BELGraph
    pathway_graph = from_pickle(os.path.join(reactome_dir, file))

    # Check if pathway has children to build the merge graph
    pathway_id = file.strip('.pickle')

    # Look up in Bio2BEL Reactome
    pathway = reactome_manager.get_pathway_by_id(pathway_id)

    # Log if it is not present
    if not pathway:
        logger.warning(f'{pathway_id} not found in database')

    # Check if there are children and merge them on the fly
    for child in yield_all_children(pathway):

        child_file_path = os.path.join(reactome_dir,
                                       f"{child.resource_id}.pickle")
        if not os.path.exists(child_file_path):
            logger.warning(f'{child.resource_id} pickle does not exist')
            continue

        # Load the pickle and union it
        child_graph = pybel.from_pickle(child_file_path)
        pathway_graph += child_graph

    # Normalize graph names
    normalize_graph_names(pathway_graph, REACTOME)

    return pathway_graph
Exemplo n.º 2
0
def import_from_pickle(manager, folder, files, database):
    """Import folder with pickles into database.

    :param pathme_viewer.manager.Manager manager: PathMe manager
    :param str folder: folder to be imported
    :param iter[str] files: iterator with file names
    :param str database: resource name
    """
    for file_name in tqdm.tqdm(
            files,
            desc='Loading {} pickles to populate PathMe database'.format(
                database)):
        file_path = os.path.join(folder, file_name)

        bel_pathway = from_pickle(file_path)

        pathway_id = os.path.splitext(file_name)[0]

        # KEGG files have a special format (prefix: unflatten/flatten needs to be removed)
        if database == KEGG:
            pathway_id = pathway_id.split('_')[0]

        pathway_dict = _prepare_pathway_model(pathway_id, database,
                                              bel_pathway)

        _ = manager.get_or_create_pathway(pathway_dict)

    log.info('%s has been loaded', database)
Exemplo n.º 3
0
def upload_recursive(directory,
                     connection=None,
                     exclude_directory_pattern=None):
    """Recursively uploads all gpickles in a given directory and sub-directories
    
    :param str directory: the directory to traverse
    :param connection: A connection string or manager
    :type connection: Optional[str or pybel.manage.Manager]
    :param Optional[str] exclude_directory_pattern: Any directory names to exclude
    """
    manager = Manager.ensure(connection)
    paths = list(
        get_paths_recursive(
            directory,
            extension='.gpickle',
            exclude_directory_pattern=exclude_directory_pattern))
    log.info('Paths to upload: %s', paths)

    for path in paths:
        try:
            network = from_pickle(path)
        except (ImportError, ImportVersionWarning):
            log.warning(
                '%s uses a pickle from an old version of PyBEL. Quitting.',
                path)
            continue

        to_database(network, connection=manager, store_parts=True)
Exemplo n.º 4
0
def get_kegg_genes_from_pickles(resource_folder, files: List[str], manager) -> Dict[str, Set]:
    """Get BEL graph gene set for all KEGG pathways.

    :param str resource_folder: path to resource folder
    :param list files: list of BEL graph pickles
    :param bio2bel Manager manager: Manager
    :return: BEL graph gene sets for each KEGG pathway
    :rtype: dict[str,set]
    """
    pathway_genes_dict = {}

    for file_name in files:

        # Flattened graphs considered for gene sets
        if file_name.endswith('_flatten.pickle'):
            graph = from_pickle(os.path.join(resource_folder, file_name))

            # Get gene set for pathway
            gene_set = get_genes_in_graph(graph)

            file_name = file_name[:-len('_flatten.pickle')]
            file_name = 'path:' + file_name
            file_name = manager.get_pathway_by_id(file_name)

            pathway_genes_dict[str(file_name)] = gene_set

    return pathway_genes_dict
Exemplo n.º 5
0
 def test_example_pickle(self):
     """Test the round-trip through a pickle."""
     bio = BytesIO()
     to_pickle(sialic_acid_graph, bio)
     bio.seek(0)
     graph = from_pickle(bio)
     self._help_test_equal(graph)
Exemplo n.º 6
0
def upload_jgf_directory(directory: str, manager: Manager):
    """Upload CBN data to edge store."""
    if not (os.path.exists(directory) and os.path.isdir(directory)):
        logger.warning('directory does not exist: %s', directory)
        return

    t = time.time()

    for path in iter_jgf(directory):
        gpickle_path = get_jgf_corresponding_gpickle_path(path)

        if os.path.exists(gpickle_path):
            graph = from_pickle(gpickle_path)
            strip_annotations(graph)
        else:
            with open(path) as f:
                cbn_jgif_dict = json.load(f)

            graph = pybel.from_cbn_jgif(cbn_jgif_dict)
            strip_annotations(graph)
            to_pickle(graph, gpickle_path)

        try:
            insert_graph(manager, graph, public=True, use_tqdm=True)
        except OperationalError:
            manager.session.rollback()
            logger.info('could not insert %s', graph)

    logger.info('done in %.2f seconds', time.time() - t)
Exemplo n.º 7
0
def get_combined_graph_similarity(
        *,
        fullgraph_path=DEFAULT_FULLGRAPH_WITHOUT_CHEMSIM_PICKLE,
        chemsim_graph_path=DEFAULT_CHEMSIM_PICKLE,
        mapping_file=DEFAULT_MAPPING_PATH,
        new_graph_path=DEFAULT_GRAPH_PATH,
        pickle_graph_path=DEFAULT_FULLGRAPH_PICKLE,
        rebuild: bool = False):
    """Combine chemical similarity graph with the fullgraph."""
    if not rebuild and os.path.exists(DEFAULT_GRAPH_PATH):
        return nx.read_edgelist(DEFAULT_GRAPH_PATH)
    if type(fullgraph_path) == pybel.struct.graph.BELGraph:
        fullgraph_without_chemsim = fullgraph_path
    else:
        fullgraph_without_chemsim = pybel.from_pickle(fullgraph_path)
    if type(chemsim_graph_path) == pybel.struct.graph.BELGraph:
        chemsim_graph = chemsim_graph_path
    else:
        chemsim_graph = pybel.from_pickle(chemsim_graph_path)

    mapping_df = pd.read_csv(
        mapping_file,
        sep="\t",
        dtype={
            'identifier': str,
            'node_id': str
        },
        index_col=False,
    )
    fullgraph_with_chemsim = fullgraph_without_chemsim + chemsim_graph
    pybel.to_pickle(fullgraph_with_chemsim, pickle_graph_path)
    relabel_graph = {}
    for ind, row in mapping_df.iterrows():
        if row['namespace'] == PUBCHEM_NAMESPACE:
            relabel_graph[pybel.dsl.Abundance(namespace=PUBCHEM_NAMESPACE, identifier=row['identifier'])] = \
                row['node_id']
        elif row['namespace'] == UNIPROT_NAMESPACE:
            relabel_graph[pybel.dsl.Protein(namespace=UNIPROT_NAMESPACE,
                                            identifier=row['identifier'],
                                            name=row['name'])] = row['node_id']
        else:
            relabel_graph[pybel.dsl.Pathology(namespace='umls', identifier=row['identifier'], name=row['name'])] = \
                row['node_id']

    nx.relabel_nodes(fullgraph_with_chemsim, relabel_graph, copy=False)
    nx.write_edgelist(fullgraph_with_chemsim, new_graph_path, data=False)
    return fullgraph_with_chemsim
Exemplo n.º 8
0
def get_nodes_in_database(folder):
    """Merge all python pickles in a given folder and returns the corresponding BELGraph."""
    database_networks = [
        pybel.from_pickle(os.path.join(folder, path))
        for path in os.listdir(folder) if path.endswith('.pickle')
    ]

    return {node for network in database_networks for node in network.nodes()}
Exemplo n.º 9
0
def get_graph_by_manager(
    module: Union[str, ModuleType, BELManagerMixin, Type[BELManagerMixin]],
    force: bool = False,
    to_bel_kwargs: Optional[Mapping[str, Any]] = None,
) -> BELGraph:
    """Get a graph for a manager."""
    if isinstance(module, str):  # get the cache or import that module
        _pickle_path = os.path.join(RESOURCES, f'{module}.bel.pickle')
        if os.path.exists(_pickle_path) and not force:
            logger.info(f'Getting {module} from pickle at {_pickle_path}')
            return from_pickle(_pickle_path)

        module_name = f'bio2bel_{module}'
        _module = importlib.import_module(module_name)
        manager = _module.Manager()
    elif isinstance(module, BELManagerMixin):
        manager = module
    elif isinstance(module, ModuleType):
        manager = module.Manager()
    elif isinstance(module, type):
        if not issubclass(module, BELManagerMixin):
            raise TypeError(f'{module} is not a subclass of BELManagerMixin')
        manager = module()
    else:
        raise TypeError(f'{module} has invalid type: {type(module)}')

    pickle_path = os.path.join(RESOURCES, f'{manager.module_name}.bel.pickle')
    if os.path.exists(pickle_path) and not force:
        logger.info(
            f'Getting {manager.module_name} from pickle at {pickle_path}')
        return from_pickle(pickle_path)

    if not manager.is_populated():
        logger.info(f'Populating manager for {manager.module_name}')
        manager.populate()

    graph = manager.to_bel(**(to_bel_kwargs or {}))
    logger.info(graph.summary_str())
    logger.info(str(count_namespaces(graph)))
    logger.info(str(count_functions(graph)))

    logger.info(f'Writing pickle for {pickle_path}')
    to_pickle(graph, pickle_path)
    return graph
Exemplo n.º 10
0
def iter_from_pickles(paths):
    """Iterates over the pickled BEL graphs in a directory

    :param iter[str] paths:
    :rtype: iter[pybel.BELGraph]
    """
    for path in paths:
        if not path.endswith('.gpickle'):
            log.info('not a gpickle: %s', path)
            continue
        yield from_pickle(path)
Exemplo n.º 11
0
def summarize(export_folder):
    """Summarize the KEGG export."""
    click.echo('loading KEGG graphs')
    graphs = [
        from_pickle(os.path.join(export_folder, fname))
        for fname in tqdm(get_paths_in_folder(export_folder))
    ]

    if graphs:
        summarize_helper(graphs)
    else:
        click.echo("Please export KEGG to BEL first. Run 'python3 -m pathme kegg bel' ")
Exemplo n.º 12
0
def summarize(export_folder):
    """Summarize the WikiPathways export."""
    click.echo('loading WikiPathways graphs')
    graphs = [
        from_pickle(os.path.join(export_folder, fname))
        for fname in tqdm(get_paths_in_folder(export_folder))
    ]

    if graphs:
        summarize_helper(graphs)
    else:
        click.echo("Please export WikiPathways to BEL first. Run 'python3 -m pathme wikipathways bel' ")
Exemplo n.º 13
0
def upload(path, connection, recursive, skip_check_version, to_service,
           service_url, debug):
    """Quick uploader"""
    set_debug_param(debug)
    if recursive:
        log.info('uploading recursively from: %s', path)
        upload_recursive(path, connection=connection)
    else:
        graph = from_pickle(path, check_version=(not skip_check_version))
        if to_service:
            receiver_service.post(graph, service_url)
        else:
            to_database(graph, connection=connection)
Exemplo n.º 14
0
def _iterate_kegg(kegg_pickle_paths, kegg_path, flatten, normalize_names):
    for path in tqdm(kegg_pickle_paths,
                     desc=f'Loading KEGG pickles from {kegg_path}'):
        if not path.endswith('.pickle'):
            continue
        graph = from_pickle(os.path.join(kegg_path, path), check_version=False)

        if flatten:
            flatten_complex_nodes(graph)

        if normalize_names:
            normalize_graph_names(graph, KEGG)

        _update_graph(graph, path, KEGG)
        yield KEGG, path, graph
Exemplo n.º 15
0
def upload_recursive(directory, connection=None, store_parts=False):
    """Recursively uploads all gpickles in a given directory and sub-directories
    
    :param str directory: the directory to traverse
    :param connection: A connection string or manager
    :type connection: None or str or pybel.manage.CacheManager
    :param bool store_parts: Should the edge store be used?
    """
    manager = build_manager(connection)
    paths = list(get_paths_recursive(directory, extension='.gpickle'))
    log.info('Paths to upload: %s', paths)

    for path in paths:
        graph = from_pickle(path)
        safe_upload(manager, graph, store_parts=store_parts)
Exemplo n.º 16
0
def get_drugbank_graph(rebuild: bool = False, **kwargs) -> pybel.BELGraph:
    """Get the DrugBank graph."""
    if not rebuild and os.path.exists(DEFAULT_DRUGBANK_PICKLE):
        return pybel.from_pickle(DEFAULT_DRUGBANK_PICKLE)

    import bio2bel_drugbank

    drugbank_manager = bio2bel_drugbank.Manager()
    if not drugbank_manager.is_populated():
        drugbank_manager.populate()
    drugbank_graph = drugbank_manager.to_bel(**kwargs)

    if os.path.exists(RESOURCES):
        pybel.to_pickle(drugbank_graph, DEFAULT_DRUGBANK_PICKLE)

    return drugbank_graph
Exemplo n.º 17
0
def get_sider_graph(rebuild: bool = False) -> pybel.BELGraph:
    """Get the SIDER graph."""
    if not rebuild and os.path.exists(DEFAULT_SIDER_PICKLE):
        return pybel.from_pickle(DEFAULT_SIDER_PICKLE)

    import bio2bel_sider

    sider_manager = bio2bel_sider.Manager()
    if not sider_manager.is_populated():
        sider_manager.populate()
    sider_graph = sider_manager.to_bel()

    if os.path.exists(RESOURCES):
        pybel.to_pickle(sider_graph, DEFAULT_SIDER_PICKLE)

    return sider_graph
Exemplo n.º 18
0
def upload_neurommsig_graphs(manager: Manager):
    """Only upload NeuroMMSig Sample Networks."""
    if not (os.path.exists(alzheimer_directory)
            and os.path.isdir(alzheimer_directory)):
        logger.warning('directory does not exist: %s', alzheimer_directory)
        return

    if not os.path.exists(neurommsig_directory):
        logger.info('created neurommsig directory: %s', neurommsig_directory)
        os.makedirs(neurommsig_directory)

    path = os.path.join(alzheimer_directory, 'alzheimers.bel')
    gpickle_path = os.path.join(alzheimer_directory, 'alzheimers.gpickle')

    if os.path.exists(gpickle_path):
        graph = from_pickle(gpickle_path)
    elif os.path.exists(path):
        graph = from_bel_script(path, manager=manager)
        to_pickle(graph, gpickle_path)
    else:
        raise RuntimeError('missing NeuroMMSig source file: {}'.format(path))

    subgraphs = {
        name: subgraph
        for name, subgraph in get_subgraphs_by_annotation(
            graph, annotation='Subgraph').items()
        if name in neurommsig_sample_networks
    }

    networks = []

    for subgraph_name, subgraph in subgraphs.items():
        subgraph.name = 'NeuroMMSig AD {}'.format(subgraph_name)
        subgraph.authors = 'Daniel Domingo-Fernandez et. al'
        subgraph.version = graph.version
        subgraph.license = graph.license

        # output to directory as gpickle
        to_pickle(
            subgraph,
            os.path.join(neurommsig_directory,
                         '{}.gpickle'.format(subgraph_name)))

        network = insert_graph(manager, subgraph, public=True, use_tqdm=True)
        networks.append(network)

    write_manifest(neurommsig_directory, networks)
Exemplo n.º 19
0
def _iterate_wp(wp_pickle_paths, wikipathways_path, flatten, normalize_names):
    for path in tqdm(wp_pickle_paths,
                     desc=f'Loading WP pickles from {wikipathways_path}'):
        if not path.endswith('.pickle'):
            continue

        graph = from_pickle(os.path.join(wikipathways_path, path),
                            check_version=False)

        if flatten:
            flatten_complex_nodes(graph)

        if normalize_names:
            normalize_graph_names(graph, WIKIPATHWAYS)

        _update_graph(graph, path, WIKIPATHWAYS)
        yield WIKIPATHWAYS, path, graph
Exemplo n.º 20
0
def _iterate_reactome(reactome_pickle_paths, reactome_path, flatten,
                      normalize_names):
    for file in tqdm(reactome_pickle_paths,
                     desc=f'Loading Reactome pickles from {reactome_path}'):
        if not file.endswith('.pickle'):
            continue

        graph = from_pickle(os.path.join(reactome_path, file),
                            check_version=False)

        if flatten:
            flatten_complex_nodes(graph)

        if normalize_names:
            normalize_graph_names(graph, REACTOME)

        _update_graph(graph, file, REACTOME)
        yield REACTOME, file, graph
Exemplo n.º 21
0
def upload(manager, path, skip_check_version, to_service, service_url,
           exclude_directory_pattern, debug):
    """Upload gpickles"""
    set_debug_param(debug)

    if os.path.isdir(path):
        log.info('uploading recursively from: %s', path)
        upload_recursive(path,
                         connection=manager,
                         exclude_directory_pattern=exclude_directory_pattern)

    elif os.path.isfile(path):
        from pybel import from_pickle
        graph = from_pickle(path, check_version=(not skip_check_version))
        if to_service:
            from pybel import to_web
            to_web(graph, service_url)
        else:
            from pybel import to_database
            to_database(graph, connection=manager, store_parts=True)
Exemplo n.º 22
0
def get_genes_from_pickles(resource_folder: str, files: List[str], manager) -> Dict[str, set]:
    """Get BEL graph gene set for all pathways in resource.

    :param resource_folder: path to resource folder
    :param list files: list of BEL graph pickles
    :param bio2bel Manager manager: Manager
    :return: BEL graph gene sets for each pathway in resource
    :rtype: dict[str,set]
    """
    pathway_genes_dict = {}

    for file_name in files:
        graph = from_pickle(os.path.join(resource_folder, file_name))

        # Get gene set for pathway
        gene_set = get_genes_in_graph(graph)
        file_name = file_name[:-len('.pickle')]
        file_name = manager.get_pathway_by_id(file_name)
        pathway_genes_dict[str(file_name)] = gene_set

    return pathway_genes_dict
Exemplo n.º 23
0
def process_pybel_network(network_type, network_file, **kwargs):
    """Return PybelProcessor by processing a given network file.

    Parameters
    ----------
    network_type : str
        The type of network that network_file is. The options are:
        belscript, json, cbn_jgif, graph_pickle, and graph_jsongz_url.
        Default: graph_jsongz_url
    network_file : str
        Path to the network file/URL to process.

    Returns
    -------
    bp : PybelProcessor
        A PybelProcessor object which contains INDRA Statements in
        bp.statements.
    """
    if network_type == 'belscript':
        return process_belscript(network_file, **kwargs)
    elif network_type == 'json':
        return process_json_file(network_file)
    elif network_type == 'cbn_jgif':
        return process_cbn_jgif_file(network_file)
    elif network_type == 'graph_jsongz_url':
        if not network_file:
            network_file = large_corpus_url
        logger.info('Loading %s' % network_file)
        res = requests.get(network_file)
        res.raise_for_status()
        contentb = zlib.decompress(res.content, zlib.MAX_WBITS | 32)
        content = contentb.decode('utf-8')
        graph = pybel.from_nodelink_jsons(content)
        return process_pybel_graph(graph)
    elif network_type == 'graph_pickle':
        graph = pybel.from_pickle(network_file)
        return process_pybel_graph(graph)
    else:
        raise ValueError('Unknown network type: %s' % network_type)
Exemplo n.º 24
0
def get_bel_types(path: str):
    """Get BEL node and edge type statistics.

    :param path: path to pickle
    :return: count of all nodes and edges in a BEL graph
    :rtype: dict
    """
    bel_stats = {}

    bel_graph = from_pickle(path)

    bel_stats['nodes'] = bel_graph.number_of_nodes()
    bel_stats['edges'] = bel_graph.number_of_edges()

    # Get count of all BEL function types
    bel_functions_dict = count_functions(bel_graph)
    bel_stats.update(bel_functions_dict)

    # Get count of all BEL edge types
    bel_edges_dict = count_relations(bel_graph)
    bel_stats.update(bel_edges_dict)

    return bel_stats
Exemplo n.º 25
0
def main(connection):
    """Parse a network, load it to the database, then test how fast it drops."""
    manager = pybel.Manager(connection)

    if os.path.exists(PICKLE):
        print(f'opening from {PICKLE}')
        graph = pybel.from_pickle(PICKLE)
    else:
        with time_me(f'opening from {SMALL_CORPUS_URL}'):

            graph = pybel.from_url(SMALL_CORPUS_URL, manager=manager, use_tqdm=True, citation_clearing=False)

        pybel.to_pickle(graph, PICKLE)

    n = 1
    # FIXME this fails if you do it with the same manager

    times = [
        get_numbers(graph, manager)
        for _ in range(n)
    ]

    print(times)
    print(sum(times) / n)
Exemplo n.º 26
0
import pybel
from pybel.struct.filters import has_protein_modification
from indra.sources import bel
from indra.sources.bel.processor import get_agent
from .util import get_mod_sites

if __name__ == '__main__':
    # Parse the BEL script, takes a few minutes
    if sys.argv[1] == 'parse_belscript':
        input_file = sys.argv[2]
        output_file = sys.argv[3]
        pbg = pybel.from_path(input_file)
        pybel.to_pickle(pbg, output_file)
    # Get all variant sites from the graph
    #elif sys.argv[1] == 'get_pybel_mod_agents':
    #    pbg = pybel.from_pickle('output/large_corpus_pybel.pkl')
    #    mod_nodes = [get_agent(n) for n in pbg.nodes()
    #                 if has_protein_modification(n)]
    #    with open('output/bel_mod_agents.pkl', 'wb') as f:
    #        pickle.dump(mod_nodes, f)
    elif sys.argv[1] == 'get_pybel_stmts_by_site':
        input_file = sys.argv[2]
        output_file = sys.argv[3]
        pbg = pybel.from_pickle(input_file)
        pbp = bel.process_pybel_graph(pbg)
        sites = get_mod_sites(pbp.statements)
        with open(output_file, 'wb') as f:
            pickle.dump(sites, f)
    else:
        sys.exit(1)
Exemplo n.º 27
0
 def test_thorough_pickle(self):
     bio = BytesIO()
     to_pickle(self.thorough_graph, bio)
     bio.seek(0)
     graph = from_pickle(bio)
     self.bel_thorough_reconstituted(graph)
Exemplo n.º 28
0
 def test_example_pickle(self):
     bio = BytesIO()
     to_pickle(sialic_acid_graph, bio)
     bio.seek(0)
     graph = from_pickle(bio)
     self.help_test_equal(graph)
Exemplo n.º 29
0
def get_wp_graph(file):
    pathway_graph = from_pickle(file)
    normalize_graph_names(pathway_graph, WIKIPATHWAYS)
    return pathway_graph
Exemplo n.º 30
0
def get_kegg_graph(file):
    pathway_graph = from_pickle(file)
    normalize_graph_names(pathway_graph, KEGG)
    return pathway_graph