示例#1
0
    def test_both_empty(self, empty_feature_list) -> None:
        # Test how the function hadles both empty lists
        new_feature_list: FeatureList = copy.deepcopy(empty_feature_list)
        extant_feature_list: FeatureList = copy.deepcopy(empty_feature_list)

        deduplicated_features: FeatureList = dedupl_features(
            new_feature_list, extant_feature_list)

        assert len(deduplicated_features) == 0
示例#2
0
    def test_new_empty(self, empty_feature_list,
                       extant_feature_list_1) -> None:
        # Test how the function hadles case when `new_features` is empty
        new_feature_list: FeatureList = copy.deepcopy(empty_feature_list)
        extant_feature_list: FeatureList = copy.deepcopy(extant_feature_list_1)

        deduplicated_features: FeatureList = dedupl_features(
            new_feature_list, extant_feature_list)

        assert len(deduplicated_features) == 0
示例#3
0
    def test_both_new_dupl(self, new_feature_list_4,
                           extant_feature_list_1) -> None:
        # Test how the function hadles case when two of `new_features` are duplicated
        new_feature_list: FeatureList = copy.deepcopy(new_feature_list_4)
        extant_feature_list: FeatureList = copy.deepcopy(extant_feature_list_1)
        int_len_new: int = len(new_feature_list)

        deduplicated_features: FeatureList = dedupl_features(
            new_feature_list, extant_feature_list)

        assert len(deduplicated_features) == int_len_new - 2
示例#4
0
    def test_all_new_unique(self, new_feature_list_1,
                            extant_feature_list_1) -> None:
        # Test how the function hadles case when all `new_features` are unique
        new_feature_list: FeatureList = copy.deepcopy(new_feature_list_1)
        extant_feature_list: FeatureList = copy.deepcopy(extant_feature_list_1)
        int_len_new: int = len(new_feature_list)

        deduplicated_features: FeatureList = dedupl_features(
            new_feature_list, extant_feature_list)

        assert len(deduplicated_features) == int_len_new
示例#5
0
    def test_one_new_half_unique(self, new_feature_list_2,
                                 extant_feature_list_1) -> None:
        # Test how the function hadles case when one of `new_features` has only start
        #   occuring somewhere in `extant_features`
        new_feature_list: FeatureList = copy.deepcopy(new_feature_list_2)
        extant_feature_list: FeatureList = copy.deepcopy(extant_feature_list_1)
        int_len_new: int = len(new_feature_list)

        deduplicated_features: FeatureList = dedupl_features(
            new_feature_list, extant_feature_list)

        assert len(deduplicated_features) == int_len_new
示例#6
0
def main(version: str, last_update_date: str) -> None:

    # Parse arguments
    params: HighlighterParams = parse_arguments()

    # This string will be used for annotation of result GenBank file
    base_feature_note: str = f'generated by consensus-highlighter v{version}'

    # String for storing info about warnings
    with_warnings: str = ''

    # Read fasta records from input file
    print('Importing fasta from `{}`...'.format(params.target_fasta_fpath),
          end=' ')
    sys.stdout.flush()
    fasta_records: Sequence[SeqRecord] = pfr.parse_fasta_reference(
        params.target_fasta_fpath)
    print('done')

    # Create ouput directory
    _create_outdir_from_outfile(params.outfpath)
    out.create_or_emply_file(params.outfpath)

    # Obtain path to coverage file
    coverage_fpath: str = out.conf_path_to_depth_file(params.outfpath)

    # Count coverages with samtools depth
    print('Silently counting coverages with `samtools depth`...', end=' ')
    sys.stdout.flush()
    cov_fpath: str = oc.count_cov_for_all_refs(params.target_fasta_fpath,
                                               params.bam_fpath,
                                               coverage_fpath)
    print('done\n')

    # Proceed with annotation
    rec: SeqRecord
    for rec in fasta_records:

        print(f'Processing sequence `{rec.description}`')

        # Obtain coverages for current sequence
        cov_array: CoverageArray = oc.get_coverage_for_reference(
            rec.id, cov_fpath)

        # Check length of the coverage array
        if len(cov_array) == 0:
            print(
                f'!  Warning: no coverage information found for sequence `{rec.id}`.'
            )
            print(
                f"""!  Please, make sure that field `RNAME` (3-rd column) in your BAM file contains
!    id of this sequence specified in fasta header (i.e. `{rec.id}`).""")
            print('! Omitting this sequence.')
            print('=' * 10)
            with_warnings = ' with warnings'
            continue
        # end if

        if len(cov_array) != len(rec.seq):
            print(
                f"""!  Warning: length of sequence `{rec.id}` ({len(rec.seq)} bp)
!    is not equal to number of coverage positions ({len(cov_array)}) reported by `samtools depth`
!    and stored in coverage file `{cov_fpath}`.""")
            print(
                '!  Re-creating the bam file might be the solution of this issue.'
            )
            print('!  Omitting this sequence.')
            print('=' * 10)
            with_warnings = ' with warnings'
            continue
        # end if

        mean_coverage = round(sts.mean(cov_array.coverages), 2)
        print(f'Average coverage: {mean_coverage}')

        cov_threshold: CoverageThreshold
        coverage_features: MutableSequence[SeqFeature]

        # Detect all necessary coverage features
        for cov_threshold in params.coverage_thresholds:

            print(
                f'Screening the sequence for regions with {cov_threshold.get_label()}...',
                end=' ')
            sys.stdout.flush()

            # Get coverage features
            coverage_features = hlft.highlight_coverage_features(
                cov_array, cov_threshold, base_feature_note)

            coverage_features = ddf.dedupl_features(coverage_features,
                                                    rec.features)

            # Append features to list
            rec.features.extend(coverage_features)
            print('done')
        # end for

        print(f'Writing annotated sequence to `{params.outfpath}`...', end=' ')
        sys.stdout.flush()

        # Write result GanBank record
        out.write_genbank_output(rec, params.topology, params.organism,
                                 params.outfpath)
        print('done')

        print('=' * 10)
    # end for

    print(f'Completed{with_warnings}!')