Exemplo n.º 1
0
import numpy as np
from pathlib import Path
from typing import Dict, List, Tuple, Optional

from pangtreebuild.tools import logprocess
from pangtreebuild.tools.cli import get_default_blosum
from pangtreebuild.affinity_tree import parameters
from pangtreebuild.affinity_tree import poa
from pangtreebuild.affinity_tree import tree

from pangtreebuild.pangenome import graph
from pangtreebuild.pangenome.parameters import msa


global_logger = logprocess.get_global_logger()
tresholds_logger = logprocess.get_logger('tresholdsCSV')
detailed_logger = logprocess.get_logger('details')


class AffinityTreeBuildException(Exception):
    """Any exception connected with Affinity Tree build process."""

    pass


def build_poa_affinity_tree(p: graph.Poagraph,
                            blosum: Optional[parameters.Blosum],
                            output_dir: Path,
                            hbmin: parameters.Hbmin,
                            verbose: bool) -> tree.AffinityTree:
    """Builds Affinity Tree coherent with poa software.
Exemplo n.º 2
0
from typing import Optional, List, Tuple, Dict

from Bio import AlignIO
from Bio.Align import MultipleSeqAlignment

from pangtreebuild.datamodel.Node import NodeID, ColumnID, Node, Base, BlockID
from pangtreebuild.datamodel.Sequence import SequenceID, Sequence, SequencePath
from pangtreebuild.datamodel.input_types import Maf, MetadataCSV
from pangtreebuild.tools import logprocess

_ParsedMaf = List[Optional[MultipleSeqAlignment]]

global_logger = logprocess.get_global_logger()
detailed_logger = logprocess.get_logger("details")

def get_poagraph(maf: Maf, metadata: Optional[MetadataCSV]) -> Tuple[List[Node], Dict[SequenceID, Sequence]]:
    alignment = [*AlignIO.parse(maf.filecontent, "maf")]
    nodes, sequences = _init_poagraph(alignment, metadata)

    current_node_id = NodeID(-1)
    column_id = ColumnID(-1)
    for block_id, block in enumerate(alignment):
        global_logger.info(f"Processing block {block_id}...")
        block_width = len(block[0].seq)

        for col in range(block_width):
            column_id += 1
            sequence_id_to_nucleotide = {SequenceID(seq.id): seq[col] for seq in block}
            nodes_codes = sorted([*(
                set([nucleotide for nucleotide in sequence_id_to_nucleotide.values()])).difference({'-'})])
            column_nodes_ids = [NodeID(current_node_id + i + 1) for i, _ in enumerate(nodes_codes)]
Exemplo n.º 3
0
import os
from bisect import bisect_left
from pathlib import Path
from typing import List, Dict, Union, Optional

from pangtreebuild.consensus.input_types import Hbmin
from pangtreebuild.datamodel.Node import NodeID
from pangtreebuild.datamodel.Poagraph import Poagraph
from pangtreebuild.datamodel.Sequence import SequenceID, SequencePath
from pangtreebuild.output.PangenomePO import NodePO, SequencePO
from pangtreebuild.tools import pathtools
import pangtreebuild.output.PangenomePO as PangenomePO
import subprocess
from pangtreebuild.tools import logprocess

detailed_logger = logprocess.get_logger('details')
global_logger = logprocess.get_global_logger()


class NoConsensusError(Exception):
    pass


class ConsInfo:
    def __init__(self,
                 fullname: str,
                 po_consensus_id: Optional[str] = None,
                 assigned_sequences_ids: Optional[List[SequenceID]] = None,
                 path: Optional[SequencePath] = None):
        self.fullname: str = fullname
        self.po_consensus_id: str = po_consensus_id