import numpy as np from pathlib import Path from typing import Dict, List, Tuple, Optional from pangtreebuild.tools import logprocess from pangtreebuild.tools.cli import get_default_blosum from pangtreebuild.affinity_tree import parameters from pangtreebuild.affinity_tree import poa from pangtreebuild.affinity_tree import tree from pangtreebuild.pangenome import graph from pangtreebuild.pangenome.parameters import msa global_logger = logprocess.get_global_logger() tresholds_logger = logprocess.get_logger('tresholdsCSV') detailed_logger = logprocess.get_logger('details') class AffinityTreeBuildException(Exception): """Any exception connected with Affinity Tree build process.""" pass def build_poa_affinity_tree(p: graph.Poagraph, blosum: Optional[parameters.Blosum], output_dir: Path, hbmin: parameters.Hbmin, verbose: bool) -> tree.AffinityTree: """Builds Affinity Tree coherent with poa software.
from typing import Optional, List, Tuple, Dict from Bio import AlignIO from Bio.Align import MultipleSeqAlignment from pangtreebuild.datamodel.Node import NodeID, ColumnID, Node, Base, BlockID from pangtreebuild.datamodel.Sequence import SequenceID, Sequence, SequencePath from pangtreebuild.datamodel.input_types import Maf, MetadataCSV from pangtreebuild.tools import logprocess _ParsedMaf = List[Optional[MultipleSeqAlignment]] global_logger = logprocess.get_global_logger() detailed_logger = logprocess.get_logger("details") def get_poagraph(maf: Maf, metadata: Optional[MetadataCSV]) -> Tuple[List[Node], Dict[SequenceID, Sequence]]: alignment = [*AlignIO.parse(maf.filecontent, "maf")] nodes, sequences = _init_poagraph(alignment, metadata) current_node_id = NodeID(-1) column_id = ColumnID(-1) for block_id, block in enumerate(alignment): global_logger.info(f"Processing block {block_id}...") block_width = len(block[0].seq) for col in range(block_width): column_id += 1 sequence_id_to_nucleotide = {SequenceID(seq.id): seq[col] for seq in block} nodes_codes = sorted([*( set([nucleotide for nucleotide in sequence_id_to_nucleotide.values()])).difference({'-'})]) column_nodes_ids = [NodeID(current_node_id + i + 1) for i, _ in enumerate(nodes_codes)]
import os from bisect import bisect_left from pathlib import Path from typing import List, Dict, Union, Optional from pangtreebuild.consensus.input_types import Hbmin from pangtreebuild.datamodel.Node import NodeID from pangtreebuild.datamodel.Poagraph import Poagraph from pangtreebuild.datamodel.Sequence import SequenceID, SequencePath from pangtreebuild.output.PangenomePO import NodePO, SequencePO from pangtreebuild.tools import pathtools import pangtreebuild.output.PangenomePO as PangenomePO import subprocess from pangtreebuild.tools import logprocess detailed_logger = logprocess.get_logger('details') global_logger = logprocess.get_global_logger() class NoConsensusError(Exception): pass class ConsInfo: def __init__(self, fullname: str, po_consensus_id: Optional[str] = None, assigned_sequences_ids: Optional[List[SequenceID]] = None, path: Optional[SequencePath] = None): self.fullname: str = fullname self.po_consensus_id: str = po_consensus_id