示例#1
0
def main(directory: str):
    """Export Hetionet in several BEL formats."""
    click.echo(f'Using PyBEL v{pybel.get_version(with_git_hash=True)}')

    click.echo('Getting hetionet')
    graph = pybel.get_hetionet()

    click.echo('Grounding hetionet')
    graph = pybel.grounding.ground(graph)

    click.echo('Exporting BEL Script')
    script_gz_path = os.path.join(directory, 'hetionet-v1.0.bel.gz')
    pybel.to_bel_script_gz(graph, script_gz_path)

    click.echo('Exporting Nodelink')
    nodelink_gz_path = os.path.join(directory,
                                    'hetionet-v1.0.bel.nodelink.json.gz')
    pybel.to_nodelink_gz(graph, nodelink_gz_path)

    click.echo('Exporting GraphDati')
    graphdati_gz_path = os.path.join(directory,
                                     'hetionet-v1.0.bel.graphdati.json.gz')
    pybel.to_graphdati_gz(graph, graphdati_gz_path)

    click.echo('Exporting Machine Learning-ready TSV')
    tsv_path = os.path.join(directory, 'hetionet-v1.0.tsv.gz')
    with gzip.open(tsv_path, 'wt') as file:
        pybel.to_tsv(graph, file)
示例#2
0
def main(directory: str):
    """Make hetionet exports."""
    path = os.path.join(directory, 'hetionet.bel.nodelink.json.gz')
    if not os.path.exists(path):
        graph = get_hetionet()
        to_nodelink_gz(graph, path)
    else:
        click.echo('loading pickle from {}'.format(path))
        graph = from_nodelink_gz(path)

    output_bel_gz_path = os.path.join(directory, 'hetionet.bel.gz')
    if not os.path.exists(output_bel_gz_path):
        click.echo('outputting whole hetionet as BEL GZ to {}'.format(output_bel_gz_path))
        to_bel_script_gz(graph, output_bel_gz_path, use_identifiers=True)

    output_graphdati_jsonl_gz_path = os.path.join(directory, 'hetionet.bel.graphdati.jsonl.gz')
    if not os.path.exists(output_graphdati_jsonl_gz_path):
        click.echo('outputting whole hetionet as BEL GraphDati JSONL GZ to {}'.format(output_graphdati_jsonl_gz_path))
        to_graphdati_jsonl_gz(graph, output_graphdati_jsonl_gz_path, use_identifiers=True)

    output_graphdati_gz_path = os.path.join(directory, 'hetionet.bel.graphdati.json.gz')
    if not os.path.exists(output_graphdati_gz_path):
        click.echo('outputting whole hetionet as BEL GraphDati JSON GZ to {}'.format(output_graphdati_gz_path))
        to_graphdati_gz(graph, output_graphdati_gz_path, use_identifiers=True)

    summary_tsv_path = os.path.join(directory, 'hetionet_summary.tsv')
    if not os.path.exists(summary_tsv_path):
        click.echo('getting metaedges')
        rows = []
        keep_keys = set()
        for value in get_metaedge_to_key(graph).values():
            u, v, key = choice(list(value))
            keep_keys.add(key)
            d = graph[u][v][key]
            bel = edge_to_bel(u, v, d, use_identifiers=True)
            rows.append((key[:8], bel))

        df = pd.DataFrame(rows, columns=['key', 'bel'])
        df.to_csv(summary_tsv_path, sep='\t', index=False)

        non_sample_edges = [
            (u, v, k, d)
            for u, v, k, d in tqdm(graph.edges(keys=True, data=True), desc='Getting non-sample edges to remove')
            if k not in keep_keys
        ]
        click.echo('Removing non-sample edges')
        graph.remove_edges_from(non_sample_edges)
        graph.remove_nodes_from(list(nx.isolates(graph)))

        sample_bel_path = os.path.join(directory, 'hetionet_sample.bel')
        click.echo('outputting sample hetionet in BEL to {}'.format(sample_bel_path))
        to_bel_script(graph, sample_bel_path, use_identifiers=True)

        sample_graphdati_path = os.path.join(directory, 'hetionet_sample.bel.graphdati.json')
        click.echo('outputting sample hetionet in BEL to {}'.format(sample_bel_path))
        to_graphdati_file(graph, sample_graphdati_path, use_identifiers=True, indent=2)
示例#3
0
def ensure_graph(
        name: str,
        *,
        manager_kwargs: Optional[Mapping[str, Any]] = None) -> BELGraph:
    """Get the BEL graph for a given Bio2BEL package."""
    directory = get_data_dir(name)
    path = os.path.join(directory, f'{name}.bel.nodelink.json.gz')
    if os.path.exists(path):
        return from_nodelink_gz(path)

    _, module = ensure_bio2bel_installation(name)
    manager = module.Manager(**(manager_kwargs or {}))
    if not isinstance(manager, BELManagerMixin):
        raise ValueError(f'{module} is not enabled for BEL export')

    graph = manager.to_bel()
    to_nodelink_gz(graph, path)
    return graph
示例#4
0
    def write(
        self,
        *,
        tp_nodes: Mapping[str, Set[Node]],
        tp_edges: Mapping[str, Set[Edge]],
        tn_nodes: Mapping[str, Set[Node]],
        tn_edges: Mapping[str, Set[Edge]],
        **kwargs,
    ) -> None:
        """Write the graph as gzipped BEL graphs."""
        from pybel import to_nodelink_gz, to_bel_script_gz

        for nodes, edges, name in ((tp_nodes, tp_edges, "positive"), (tn_nodes, tn_edges, "negative")):
            graph = convert(nodes=nodes, edges=edges, name=name)
            nodelink_path = os.path.join(self.graph_dir_path, f"{name}.bel.nodelink.json.gz")
            to_nodelink_gz(graph, nodelink_path)
            bel_script_path = os.path.join(self.graph_dir_path, f"{name}.bel.gz")
            to_bel_script_gz(graph, bel_script_path)
def main(directory: str):
    """Export Hetionet in several BEL formats."""
    click.echo('Getting hetionet')
    graph = get_hetionet()

    click.echo('Exporting BEL Script')
    script_gz_path = os.path.join(directory, 'hetionet-v1.0.bel.gz')
    to_bel_script_gz(graph, script_gz_path)

    click.echo('Exporting Nodelink')
    nodelink_gz_path = os.path.join(directory,
                                    'hetionet-v1.0.bel.nodelink.json.gz')
    to_nodelink_gz(graph, nodelink_gz_path)

    click.echo('Exporting GraphDati')
    graphdati_gz_path = os.path.join(directory,
                                     'hetionet-v1.0.bel.graphdati.json.gz')
    to_graphdati_gz(graph, graphdati_gz_path)
示例#6
0
def ensure_graph(name: str, *, manager_kwargs: Optional[Mapping[str, Any]] = None) -> BELGraph:
    """Generate, cache, and return the BEL graph for a given Bio2BEL package.

    If it has already been cached, it is loaded directly.

    :param name: The name of the Bio2BEL package
    :param manager_kwargs: Optional mapping to give as keyword arguments to the manager upon instantiation.
    """
    directory = get_data_dir(name)
    path = os.path.join(directory, f'{name}.bel.nodelink.json.gz')
    if os.path.exists(path):
        return from_nodelink_gz(path)

    _, module = ensure_bio2bel_installation(name)
    manager = module.Manager(**(manager_kwargs or {}))
    if not isinstance(manager, BELManagerMixin):
        raise ValueError(f'{module} is not enabled for BEL export')

    graph = manager.to_bel()
    to_nodelink_gz(graph, path)
    return graph
示例#7
0
def write_bel(connection, skip, directory, force):
    """Write all as BEL."""
    os.makedirs(directory, exist_ok=True)
    from .manager.bel_manager import BELManagerMixin
    import pybel
    for _, name, manager in _iterate_managers(connection, skip):
        if not isinstance(manager, BELManagerMixin):
            continue
        click.secho(name, fg='cyan', bold=True)
        path = os.path.join(directory, f'{name}.bel.pickle')
        if os.path.exists(path) and not force:
            click.echo('👍 already exported')
            continue

        if not manager.is_populated():
            click.echo('👎 unpopulated')
        else:
            graph = manager.to_bel()
            pybel.to_pickle(graph, path)
            pybel.to_nodelink_gz(
                graph, os.path.join(directory, f'{name}.bel.nodelink.json.gz'))
            pybel.to_bel_script_gz(graph,
                                   os.path.join(directory, f'{name}.bel.gz'))