Пример #1
0
 def _load_helper(self):
     directed_graph = load_graph(GRAPH_EXAMPLE, 'csv')
     # Get source nodes from file
     source_nodes = read_nodes(SOURCES_EXAMPLE)
     # Get target nodes from file
     target_nodes = read_nodes(TARGETS_EXAMPLE)
     return directed_graph, source_nodes, target_nodes
Пример #2
0
def _prepare_graph(graph: str, fmt: str, sources: str, targets: str):
    """Parallelization of the calculations to improve efficiency."""
    # Initialize MPI environment and variables, if found.
    number_of_processes = 1
    process_id = 0
    try:
        from mpi4py import MPI
        comm = MPI.COMM_WORLD
        process_id = comm.Get_rank()
        number_of_processes = comm.Get_size()
        click.echo("Parallelization on")

    except ImportError:
        pass
    # Ensure file is valid
    check_graph_input(sources, targets, fmt, False)
    if process_id == 0:
        click.secho(f'{EMOJI} Loading the graph from {graph} {EMOJI}')
    # Load graph
    directed_graph: DiGraph = load_graph(graph, fmt)
    # Get source nodes from file
    source_nodes = read_nodes(sources)
    # Get target nodes from file
    activate_targets, inhibit_targets = read_nodes_to_optimize(targets)
    if process_id == 0:
        number_of_targets = len(activate_targets + inhibit_targets)
        click.echo(
            f"{EMOJI} A total of {len(source_nodes)} sources and {number_of_targets} targets will be used. {EMOJI}"
        )

        if number_of_processes > 1:
            click.echo(f"{EMOJI} Distributing work among {number_of_processes} processes. {EMOJI}")

    return directed_graph, source_nodes, activate_targets, inhibit_targets, process_id
Пример #3
0
    def test_path_cycles_2(self):
        """Test path search without cycles."""
        directed_graph = load_graph(GRAPH_EXAMPLE, 'csv')

        # Get source nodes from file
        source_nodes = ['b']
        # Get target nodes from file
        targets_nodes = read_nodes(TARGETS_EXAMPLE)

        # Call main function
        results, _ = wrapper_explore(
            graph=directed_graph,
            source_nodes=source_nodes,
            target_nodes=targets_nodes,
            lmax=5,
            simple_paths=False,
        )

        self.assertEqual(
            [
                {'source': 'b',
                 'target': 'g',
                 'relative_activation': 1,
                 'relative_inhibition': 0,
                 'number_of_paths': 2,
                 }
            ],
            results,
        )
Пример #4
0
def _explore_helper(
    graph: str,
    fmt: str,
    sources: str,
    targets: str,
    lmax: Union[int, List[int]],
    simple_paths: bool,
    log: bool,
    export_time: bool,
    output: str,
    name: str,
    drug_search_bel: bool,
) -> None:
    """Wrap explore command in cli.

    :param graph: path to the graph
    :param fmt: graph format
    :param sources: path to the source nodes
    :param targets: path to the target nodes
    :param lmax: max length of path
    :param simple_paths: use simple paths or cycles
    :param log: debug mode
    :param export_time: export time for each pair
    :param output: output directory
    :param name: name of the graph for output purposes
    :param drug_search_bel: search drugs automatically in BEL
    """
    """Parallelization of the calculations to improve efficiency"""
    # Initialize MPI environment and variables, if found.
    number_of_processes = 1
    process_id = 0
    try:
        from mpi4py import MPI
        comm = MPI.COMM_WORLD
        process_id = comm.Get_rank()
        number_of_processes = comm.Get_size()
        click.echo("Parallelization on")
    except ImportError:
        pass

    # Setup logging.
    # MPI ranks other than master will have it set to WARNING
    _setup_logging(log, process_id)

    # Ensure file is valid
    check_graph_input(sources, targets, fmt, drug_search_bel)
    if process_id == 0:
        click.secho(f'{EMOJI} Loading the graph from {graph} {EMOJI}')
    # Load graph
    directed_graph: DiGraph = load_graph(graph, fmt)
    # Find drugs (sources) and pathologies (targets) if selected and grpah is in BEL fmt
    if drug_search_bel:
        # Get source nodes (drugs)
        source_nodes = get_candidate_drugs(directed_graph)
        # Get target nodes (pathologies)
        targets_nodes = get_candidate_targets(directed_graph)
    else:
        # Get source nodes from file
        source_nodes = read_nodes(sources)
        # Get target nodes from file
        targets_nodes = read_nodes(targets)
    if process_id == 0:
        click.echo(
            f"{EMOJI} A total of {len(source_nodes)} sources and {len(targets_nodes)} targets will be used. {EMOJI}"
        )
        if number_of_processes > 1:
            click.echo(f"{EMOJI} Distributing work among {number_of_processes} processes. {EMOJI}")

    path_mode = "simple-paths" if simple_paths else "cycles"

    """Export results for each lmax"""
    for lmax in _handle_lmax_parameter(lmax):
        click.secho(
            f'{EMOJI} Calculating paths with lmax ({lmax}) on {path_mode} mode. This might take a while... {EMOJI}',
        )

        # Warn user if lmax larger than 12
        if lmax > 12:
            logger.warning(
                f"Note that the selected Lmax '{lmax}' might converge results if your graph is not large enough"
            )

        # Track the time it takes to run
        exe_t_0 = time.time()
        # Call main function
        results, time_cache = wrapper_explore(
            graph=directed_graph,
            source_nodes=source_nodes,
            target_nodes=targets_nodes,
            lmax=lmax,
            simple_paths=simple_paths,
        )
        # Finished time
        exe_t_f = time.time()
        running_time = exe_t_f - exe_t_0
        if process_id == 0:
            click.secho(f'{EMOJI} Finished in {running_time} seconds {EMOJI}')

            # Export results
            with open(os.path.join(output, f'{name}all_against_all_lmax_{lmax}.json'), 'w') as f:
                json.dump(results, f, indent=2)

            # Export time
            if export_time:
                with open(os.path.join(output, f'{name}time_cache_{lmax}.json'), 'w') as f:
                    json.dump(time_cache, f, indent=2)

            click.secho(f'{EMOJI} Results exported to {output} {EMOJI}')
Пример #5
0
def _pathway_enrichment_helper(
    graph: str,
    fmt: str,
    sources: str,
    targets: str,
    lmax: int,
    simple_paths: bool,
    log: bool,
    output: str,
) -> None:
    """Wrap optimize command in cli.

    :param graph: graph
    :param fmt: format
    :param sources: source nodes
    :param targets: target nodes
    :param lmax: max length of path
    :param simple_paths: use simple paths or cycles
    :param log: debug mode
    :param output: output directory
    """
    _setup_logging(log)

    # Ensure file is valid
    check_graph_input(sources, targets, fmt, False)

    # Load graph
    directed_graph: DiGraph = load_graph(graph, fmt)

    # Get source nodes from file
    source_nodes = read_nodes(sources)
    # Get target nodes from file
    targets_nodes = read_nodes(targets)

    click.echo(
        f"{EMOJI} A total of {len(source_nodes)} sources and {len(targets_nodes)} targets will be used. {EMOJI}"
    )

    path_mode = "simple-paths" if simple_paths else "cycles"

    """Export results for each lmax"""
    for lmax in _handle_lmax_parameter(lmax):
        click.secho(
            f'{EMOJI} Pathway analysis with lmax ({lmax}) on {path_mode} mode. This might take a while... {EMOJI}',
        )

        # Track the time it takes to run
        exe_t_0 = time.time()
        # Call main function
        wrapper_pathway_enrichment(
            graph=directed_graph,
            source_nodes=source_nodes,
            target_nodes=targets_nodes,
            lmax=lmax + 1,  # TODO: Fixme since enumerate_paths uses lmax + 1 (we have to now increase 1)
            simple_paths=simple_paths,
            output=output,
        )
        # Finished time
        exe_t_f = time.time()
        running_time = exe_t_f - exe_t_0

        click.secho(f'{EMOJI} Finished in {running_time} seconds {EMOJI}')

        click.secho(f'{EMOJI} Results exported to {output} {EMOJI}')