def test_both_empty(self, empty_feature_list) -> None: # Test how the function hadles both empty lists new_feature_list: FeatureList = copy.deepcopy(empty_feature_list) extant_feature_list: FeatureList = copy.deepcopy(empty_feature_list) deduplicated_features: FeatureList = dedupl_features( new_feature_list, extant_feature_list) assert len(deduplicated_features) == 0
def test_new_empty(self, empty_feature_list, extant_feature_list_1) -> None: # Test how the function hadles case when `new_features` is empty new_feature_list: FeatureList = copy.deepcopy(empty_feature_list) extant_feature_list: FeatureList = copy.deepcopy(extant_feature_list_1) deduplicated_features: FeatureList = dedupl_features( new_feature_list, extant_feature_list) assert len(deduplicated_features) == 0
def test_both_new_dupl(self, new_feature_list_4, extant_feature_list_1) -> None: # Test how the function hadles case when two of `new_features` are duplicated new_feature_list: FeatureList = copy.deepcopy(new_feature_list_4) extant_feature_list: FeatureList = copy.deepcopy(extant_feature_list_1) int_len_new: int = len(new_feature_list) deduplicated_features: FeatureList = dedupl_features( new_feature_list, extant_feature_list) assert len(deduplicated_features) == int_len_new - 2
def test_all_new_unique(self, new_feature_list_1, extant_feature_list_1) -> None: # Test how the function hadles case when all `new_features` are unique new_feature_list: FeatureList = copy.deepcopy(new_feature_list_1) extant_feature_list: FeatureList = copy.deepcopy(extant_feature_list_1) int_len_new: int = len(new_feature_list) deduplicated_features: FeatureList = dedupl_features( new_feature_list, extant_feature_list) assert len(deduplicated_features) == int_len_new
def test_one_new_half_unique(self, new_feature_list_2, extant_feature_list_1) -> None: # Test how the function hadles case when one of `new_features` has only start # occuring somewhere in `extant_features` new_feature_list: FeatureList = copy.deepcopy(new_feature_list_2) extant_feature_list: FeatureList = copy.deepcopy(extant_feature_list_1) int_len_new: int = len(new_feature_list) deduplicated_features: FeatureList = dedupl_features( new_feature_list, extant_feature_list) assert len(deduplicated_features) == int_len_new
def main(version: str, last_update_date: str) -> None: # Parse arguments params: HighlighterParams = parse_arguments() # This string will be used for annotation of result GenBank file base_feature_note: str = f'generated by consensus-highlighter v{version}' # String for storing info about warnings with_warnings: str = '' # Read fasta records from input file print('Importing fasta from `{}`...'.format(params.target_fasta_fpath), end=' ') sys.stdout.flush() fasta_records: Sequence[SeqRecord] = pfr.parse_fasta_reference( params.target_fasta_fpath) print('done') # Create ouput directory _create_outdir_from_outfile(params.outfpath) out.create_or_emply_file(params.outfpath) # Obtain path to coverage file coverage_fpath: str = out.conf_path_to_depth_file(params.outfpath) # Count coverages with samtools depth print('Silently counting coverages with `samtools depth`...', end=' ') sys.stdout.flush() cov_fpath: str = oc.count_cov_for_all_refs(params.target_fasta_fpath, params.bam_fpath, coverage_fpath) print('done\n') # Proceed with annotation rec: SeqRecord for rec in fasta_records: print(f'Processing sequence `{rec.description}`') # Obtain coverages for current sequence cov_array: CoverageArray = oc.get_coverage_for_reference( rec.id, cov_fpath) # Check length of the coverage array if len(cov_array) == 0: print( f'! Warning: no coverage information found for sequence `{rec.id}`.' ) print( f"""! Please, make sure that field `RNAME` (3-rd column) in your BAM file contains ! id of this sequence specified in fasta header (i.e. `{rec.id}`).""") print('! Omitting this sequence.') print('=' * 10) with_warnings = ' with warnings' continue # end if if len(cov_array) != len(rec.seq): print( f"""! Warning: length of sequence `{rec.id}` ({len(rec.seq)} bp) ! is not equal to number of coverage positions ({len(cov_array)}) reported by `samtools depth` ! and stored in coverage file `{cov_fpath}`.""") print( '! Re-creating the bam file might be the solution of this issue.' ) print('! Omitting this sequence.') print('=' * 10) with_warnings = ' with warnings' continue # end if mean_coverage = round(sts.mean(cov_array.coverages), 2) print(f'Average coverage: {mean_coverage}') cov_threshold: CoverageThreshold coverage_features: MutableSequence[SeqFeature] # Detect all necessary coverage features for cov_threshold in params.coverage_thresholds: print( f'Screening the sequence for regions with {cov_threshold.get_label()}...', end=' ') sys.stdout.flush() # Get coverage features coverage_features = hlft.highlight_coverage_features( cov_array, cov_threshold, base_feature_note) coverage_features = ddf.dedupl_features(coverage_features, rec.features) # Append features to list rec.features.extend(coverage_features) print('done') # end for print(f'Writing annotated sequence to `{params.outfpath}`...', end=' ') sys.stdout.flush() # Write result GanBank record out.write_genbank_output(rec, params.topology, params.organism, params.outfpath) print('done') print('=' * 10) # end for print(f'Completed{with_warnings}!')