def test_get_min_coverage_spades_1(self, mock_contigs_spades_1): # Should return 24.26 for `mock_contigs_spades_1` contig_collection, overlap_collection = mock_contigs_spades_1 cov_calc: CovCalc = sts.CoverageCalculator(contig_collection) expected_min_cov: float = 24.26 assert abs(cov_calc.get_min_coverage() - expected_min_cov) < 1e-2
def test_get_min_coverage_a5_0(self, mock_contigs_a5_0): # Should return None for `mock_contigs_a5_0` contig_collection, overlap_collection = mock_contigs_a5_0 cov_calc: CovCalc = sts.CoverageCalculator(contig_collection) expected_min_cov: float = None assert cov_calc.get_min_coverage() is expected_min_cov
def test_get_min_coverage_spades_0(self, mock_contigs_spades_0): # Should return 18.28 for `mock_contigs_spades_0` contig_collection, overlap_collection = mock_contigs_spades_0 cov_calc: CovCalc = sts.CoverageCalculator(contig_collection) expected_min_cov: float = 18.28 assert abs(cov_calc.get_min_coverage() - expected_min_cov) < 1e-2
def test_get_median_coverage_spades_no_multplty_0_0(self, mock_contigs_spades_no_multplty_0): # Should return 23.48 for `mock_contigs_spades_no_multplty_0` contig_collection, overlap_collection = mock_contigs_spades_no_multplty_0 cov_calc: CovCalc = sts.CoverageCalculator(contig_collection) expected_median_cov: float = 23.48 assert abs(cov_calc.calc_median_coverage() - expected_median_cov) < 1e-2
def test_get_max_coverage_mix_0(self, mock_contigs_mix_0): # Should return 28.68 for `mock_contigs_mix_0` contig_collection, overlap_collection = mock_contigs_mix_0 cov_calc: CovCalc = sts.CoverageCalculator(contig_collection) expected_max_cov: float = 28.68 assert abs(cov_calc.get_max_coverage() - expected_max_cov) < 1e-2
def test_filter_non_none_covs_spades_no_multiplty_0(self, mock_contigs_spades_no_multplty_0): # Should find 4 "coverage-containing" contigs in `mock_contigs_spades_no_multplty_0` contig_collection, overlap_collection = mock_contigs_spades_no_multplty_0 cov_calc: CovCalc = sts.CoverageCalculator(contig_collection) expected_len: int = 4 assert len(cov_calc._coverages) == expected_len
def test_filter_non_none_covs_spades_1(self, mock_contigs_spades_1): # Should find 7 "coverage-containing" contigs in `mock_contigs_spades_1` contig_collection, overlap_collection = mock_contigs_spades_1 cov_calc: CovCalc = sts.CoverageCalculator(contig_collection) expected_len: int = 7 assert len(cov_calc._coverages) == expected_len
def write_summary(contig_collection: ContigCollection, overlap_collection: OverlapCollection, infpath: str, outdpath: str, out_prefix: str) -> None: # Function writes summary to summary file. # # :param contig_collection: instance of ContigCollection returned by # `src.contigs.get_contig_collection` function; # :param overlap_collection: instance of OverlapCollection returned by # `src.overlaps.detect_adjacent_contigs` function; # :param infpath: path to input file (it will be mentioned in summary); # :param outdpath: path to output directory; # :param out_prefix: prefix for current output files; # Make path to summary file summary_fpath: str = os.path.join( outdpath, '{}_combinator_summary_FQ.txt'\ .format(out_prefix) ) print('Writing summary to `{}`'.format(summary_fpath)) # Proceed wrk_str: str outfile: TextIO with open(summary_fpath, 'w') as outfile: # Path to input file outfile.write('Input file: `{}`\n\n'.format(infpath)) # Summary with some statistics: _double_write(' === Summary ===', outfile) # Number of contigs processed: wrk_str = '{} contigs were processed.'.format(len(contig_collection)) _double_write(wrk_str, outfile) # Sum of contigs' lengths wrk_str = 'Sum of contig lengths: {} bp'\ .format(sts.calc_sum_contig_lengths(contig_collection)) _double_write(wrk_str, outfile) # Expected length of the genome wrk_str = 'Expected length of the genome: {} bp'\ .format(sts.calc_exp_genome_size(contig_collection, overlap_collection)) _double_write(wrk_str, outfile) # Calculate coverage statistics cov_calc = sts.CoverageCalculator(contig_collection) # Min coverage min_coverage: float = cov_calc.get_min_coverage() wrk_str = 'Min coverage: {}'\ .format(min_coverage if not min_coverage is None else 'NA') _double_write(wrk_str, outfile) # Max coverage max_coverage: float = cov_calc.get_max_coverage() wrk_str = 'Max coverage: {}'\ .format(max_coverage if not max_coverage is None else 'NA') _double_write(wrk_str, outfile) # Mean coverage mean_coverage: float = cov_calc.calc_mean_coverage() wrk_str = 'Mean coverage: {}'\ .format(mean_coverage if not mean_coverage is None else 'NA') _double_write(wrk_str, outfile) # Median coverage median_coverage: float = cov_calc.calc_median_coverage() wrk_str = 'Median coverage: {}'\ .format(median_coverage if not median_coverage is None else 'NA') _double_write(wrk_str, outfile) # LQ coefficient wrk_str = 'LQ-coefficient: {}'\ .format(sts.calc_lq_coef(contig_collection, overlap_collection)) _double_write(wrk_str, outfile)