Exemplo n.º 1
0
    def test_order_of_annotation_fields_is_taken_from_annotations_orddict_order(
            self, fasta_seq, index_df):

        # first seq_context, then triplet_seq
        annotations = OrderedDict()
        annotations['seq_context'] = 2
        annotations['triplet_seq'] = True
        computed_lines = list(
            bed_lines_generator(fasta_seq=fasta_seq,
                                motifs=['CG', 'CHH', 'CHG'],
                                annotations=annotations,
                                chr_name='chr1'))
        expected_cols = '#chrom start end motif score strand seq_context triplet_seq'.split(
        )
        expected_lines = self.get_expected_lines(index_df, expected_cols)
        assert computed_lines[0:3] == expected_lines[0:3]

        # first triplet_seq, then seq_context
        annotations = OrderedDict()
        annotations['triplet_seq'] = True
        annotations['seq_context'] = 2
        computed_lines = list(
            bed_lines_generator(fasta_seq=fasta_seq,
                                motifs=['CG', 'CHH', 'CHG'],
                                annotations=annotations,
                                chr_name='chr1'))
        expected_cols = '#chrom start end motif score strand triplet_seq seq_context'.split(
        )
        expected_lines = self.get_expected_lines(index_df, expected_cols)
        assert computed_lines[0:3] == expected_lines[0:3]
Exemplo n.º 2
0
 def test_optionally_annotates_motif_triplet_sequence(
         self, fasta_seq, index_df, annotations_orddict):
     annotations_orddict['seq_context'] = 0
     computed_lines = list(
         bed_lines_generator(fasta_seq=fasta_seq,
                             motifs=['CG', 'CHH', 'CHG'],
                             annotations=annotations_orddict,
                             chr_name='chr1'))
     expected_cols = '#chrom start end motif score strand triplet_seq'.split(
     )
     expected_lines = self.get_expected_lines(index_df, expected_cols)
     assert computed_lines == expected_lines
Exemplo n.º 3
0
    def test_finds_cytosines_and_classifies_motifs_correctly_even_at_boundaries_and_next_to_Ns(
            self, fasta_seq, index_df, annotations_orddict):

        for k in annotations_orddict.keys():
            annotations_orddict[k] = False

        computed_lines = list(
            bed_lines_generator(fasta_seq=fasta_seq,
                                motifs=['CG', 'CHH', 'CHG'],
                                annotations=annotations_orddict,
                                chr_name='chr1'))
        expected_cols = '#chrom start end motif score strand'.split()
        expected_lines = self.get_expected_lines(index_df, expected_cols)
        assert computed_lines == expected_lines
Exemplo n.º 4
0
    def test_discards_cytosines_which_are_not_in_the_specified_motifs(
            self, index_df, fasta_seq):

        computed_lines = list(
            bed_lines_generator(fasta_seq=fasta_seq,
                                motifs=['CG'],
                                annotations={},
                                chr_name='chr1'))

        expected_cols = '#chrom start end motif score strand'.split()
        is_cg = index_df['motif'] == 'CG'
        cg_index_df = index_df.loc[is_cg, :]
        expected_lines = self.get_expected_lines(cg_index_df, expected_cols)

        assert computed_lines == expected_lines