def test_order_of_annotation_fields_is_taken_from_annotations_orddict_order( self, fasta_seq, index_df): # first seq_context, then triplet_seq annotations = OrderedDict() annotations['seq_context'] = 2 annotations['triplet_seq'] = True computed_lines = list( bed_lines_generator(fasta_seq=fasta_seq, motifs=['CG', 'CHH', 'CHG'], annotations=annotations, chr_name='chr1')) expected_cols = '#chrom start end motif score strand seq_context triplet_seq'.split( ) expected_lines = self.get_expected_lines(index_df, expected_cols) assert computed_lines[0:3] == expected_lines[0:3] # first triplet_seq, then seq_context annotations = OrderedDict() annotations['triplet_seq'] = True annotations['seq_context'] = 2 computed_lines = list( bed_lines_generator(fasta_seq=fasta_seq, motifs=['CG', 'CHH', 'CHG'], annotations=annotations, chr_name='chr1')) expected_cols = '#chrom start end motif score strand triplet_seq seq_context'.split( ) expected_lines = self.get_expected_lines(index_df, expected_cols) assert computed_lines[0:3] == expected_lines[0:3]
def test_optionally_annotates_motif_triplet_sequence( self, fasta_seq, index_df, annotations_orddict): annotations_orddict['seq_context'] = 0 computed_lines = list( bed_lines_generator(fasta_seq=fasta_seq, motifs=['CG', 'CHH', 'CHG'], annotations=annotations_orddict, chr_name='chr1')) expected_cols = '#chrom start end motif score strand triplet_seq'.split( ) expected_lines = self.get_expected_lines(index_df, expected_cols) assert computed_lines == expected_lines
def test_finds_cytosines_and_classifies_motifs_correctly_even_at_boundaries_and_next_to_Ns( self, fasta_seq, index_df, annotations_orddict): for k in annotations_orddict.keys(): annotations_orddict[k] = False computed_lines = list( bed_lines_generator(fasta_seq=fasta_seq, motifs=['CG', 'CHH', 'CHG'], annotations=annotations_orddict, chr_name='chr1')) expected_cols = '#chrom start end motif score strand'.split() expected_lines = self.get_expected_lines(index_df, expected_cols) assert computed_lines == expected_lines
def test_discards_cytosines_which_are_not_in_the_specified_motifs( self, index_df, fasta_seq): computed_lines = list( bed_lines_generator(fasta_seq=fasta_seq, motifs=['CG'], annotations={}, chr_name='chr1')) expected_cols = '#chrom start end motif score strand'.split() is_cg = index_df['motif'] == 'CG' cg_index_df = index_df.loc[is_cg, :] expected_lines = self.get_expected_lines(cg_index_df, expected_cols) assert computed_lines == expected_lines