def test_6_repeated_sequences(self): fasta_path = self.fasta_dir.joinpath("test_6_repeated_sequences.fasta") with self.assertRaises(Exception) as exp: _ = missings.FromFile(Path(fasta_path)) expected_message = "Incorrect fasta provided: sequences IDs are not unique." actual_message = str(exp.exception) self.assertEqual(expected_message, actual_message)
def test_4_empty_sequence(self): fasta_path = self.fasta_dir.joinpath("test_4_empty_sequence.fasta") with self.assertRaises(Exception) as exp: _ = missings.FromFile(Path(fasta_path)) expected_message = "Empty sequence in FASTA. Provide the sequence or remove its header." actual_message = str(exp.exception) self.assertEqual(expected_message, actual_message)
def test_5_empty_fasta(self): fasta_path = self.fasta_dir.joinpath("test_5_empty_fasta.fasta") with self.assertRaises(Exception) as exp: _ = missings.FromFile(Path(fasta_path)) expected_message = "No sequences in zipped fastas or incorrect zipped files." actual_message = str(exp.exception) self.assertEqual(expected_message, actual_message)
def test_6_empty_zip(self): fasta_path = self.fasta_dir.joinpath("test_6_empty_zip.zip") with self.assertRaises(Exception) as exp: _ = missings.FromFile(Path(fasta_path)) expected_message = "Incorrect zip fasta source." actual_message = str(exp.exception) self.assertEqual(expected_message, actual_message)
def test_1_one_sequence(self): fasta_path = self.fasta_dir.joinpath("test_1_one_sequence.fasta") fasta_provider = missings.FromFile(Path(fasta_path)) sequence_id = msa.SequenceID("seq1") expected_sequence = self.read_sequence(fasta_path) self.raise_error_if_unequal(sequence_id, expected_sequence, fasta_provider)
def test_1_one_sequence_one_file_in_zip(self): fasta_path = self.fasta_dir.joinpath( "test_1_one_sequence_one_file_in_zip.zip") fasta_provider = missings.FromFile(Path(fasta_path)) sequence_id = msa.SequenceID("seq1") expected_sequence = "ACTGGGTGGGA" self.raise_error_if_unequal(sequence_id, expected_sequence, fasta_provider)
def run_pangtree(maf_path: Path, fasta_path: Path, output_dir: Path, po_output: bool) -> None: output_dir = pathtools.get_child_dir(output_dir, pathtools.get_current_time()) print(f"Runing pangtree for maf: {maf_path} and fasta: {fasta_path} " f"Output in: {output_dir}, include po file: {po_output}.") fasta_provider = missings.FromFile(fasta_path) maf = msa.Maf(pathtools.get_file_content_stringio(maf_path), maf_path) poagraph, dagmaf = builder.build_from_dagmaf(maf, fasta_provider) for p in p_values: current_output_dir = pathtools.get_child_dir(output_dir, str(p).replace(".", "_")) stop = at_params.Stop(0.99) at = at_builders.build_affinity_tree(poagraph, None, current_output_dir, stop, at_params.P(p), True) at_newick = at.as_newick(None, separate_leaves=True) pathtools.save_to_file( at_newick, pathtools.get_child_path(current_output_dir, "affinity_tree.newick")) if po_output: pangenome_po = po.poagraph_to_PangenomePO(poagraph) pathtools.save_to_file( pangenome_po, pathtools.get_child_path(current_output_dir, "poagraph.po")) task_params = json.TaskParameters( multialignment_file_path=str(maf_path), multialignment_format="maf", datatype="nucleotides", blosum_file_path="", output_path=current_output_dir, fasta_provider=fasta_provider, fasta_source_file=fasta_path, consensus_type="tree", stop=str(stop), p=str(p), output_with_nodes=False) pangenomejson = json.to_PangenomeJSON(task_parameters=task_params, poagraph=poagraph, dagmaf=dagmaf, affinity_tree=at) pangenome_json_str = json.to_json(pangenomejson) pathtools.save_to_file( pangenome_json_str, pathtools.get_child_path(current_output_dir, "pangenome.json"))
def test_2_three_sequences_in_two_files_in_zip(self): fasta_path = self.fasta_dir.joinpath( "test_2_three_sequences_in_two_files_in_zip.zip") fasta_provider = missings.FromFile(Path(fasta_path)) sequence_id_1 = msa.SequenceID("seq1") self.raise_error_if_unequal(sequence_id_1, "ACTGGGTGGGA", fasta_provider) sequence_id_2 = msa.SequenceID("seq2") self.raise_error_if_unequal(sequence_id_2, "AA", fasta_provider) sequence_id_3 = msa.SequenceID("seq3") self.raise_error_if_unequal(sequence_id_3, "GT", fasta_provider)
def resolve_fasta_provider(args: argparse.Namespace) -> \ missings.FastaProvider: """Returns fasta provider based on parsed arguments.""" if args.fasta_provider is None: if args.missing_symbol is None: return missings.ConstBaseProvider(missings.MissingBase()) else: return missings.ConstBaseProvider(args.missing_symbol) elif args.fasta_provider == 'ncbi': use_cache = args.cache if args.cache else False return missings.FromNCBI(use_cache) elif args.fasta_provider == 'file': if args.fasta_path is None: raise Exception("""Fasta file source must be specified. It must be provided when fasta source is \'local\'.""") return missings.FromFile(args.fasta_path) else: raise Exception("""Not known fasta provider. Should be \'ncbi\' or \'file\' or None. Cannot build graph.""")