def test_attached_to_root(self): fam = Family() fam.name = "Test6" fam.accession = "TEST0006" fam.version = 6 fam.clades = [1] fam.consensus = "ACGTTGCAGAGACTCT" fam.repeat_type = "Test" fam.repeat_subtype = "RootTaxa" self.assertEqual( fam.to_embl(fakedb(), include_seq=False), """\ ID TEST0006; SV 6; linear; DNA; STD; UNC; 16 BP. NM Test6 XX AC TEST0006; XX XX KW Test/RootTaxa. XX XX CC CC RepeatMasker Annotations: CC Type: Test CC SubType: RootTaxa CC Species: root CC SearchStages: CC BufferStages: XX // """)
def test_simple(self): fam = Family() fam.name = "Test1" fam.accession = "TEST0001" fam.version = 1 fam.clades = [5] fam.consensus = "ACGTAAAA" fam.repeat_type = "Type" fam.repeat_subtype = "SubType" self.assertEqual( fam.to_embl(fakedb()), """\ ID TEST0001; SV 1; linear; DNA; STD; UNC; 8 BP. NM Test1 XX AC TEST0001; XX XX KW Type/SubType. XX OS Species 1 OC Parent Clade; A Clade. XX CC CC RepeatMasker Annotations: CC Type: Type CC SubType: SubType CC Species: Species_1 CC SearchStages: CC BufferStages: XX SQ Sequence 8 BP; 5 A; 1 C; 1 G; 1 T; 0 other; acgtaaaa 8 // """)
def test_metaonly(self): fam = Family() fam.name = "Test3" fam.accession = "TEST0003" fam.version = 3 fam.clades = [5] fam.consensus = "ACGTTGCA" fam.repeat_type = "Test" fam.repeat_subtype = "Metadata" self.assertEqual( fam.to_embl(fakedb(), include_seq=False), """\ ID TEST0003; SV 3; linear; DNA; STD; UNC; 8 BP. NM Test3 XX AC TEST0003; XX XX KW Test/Metadata. XX OS Species 1 OC Parent Clade; A Clade. XX CC CC RepeatMasker Annotations: CC Type: Test CC SubType: Metadata CC Species: Species_1 CC SearchStages: CC BufferStages: XX // """)
def test_citations(self): fam = Family() fam.name = "Test7" fam.accession = "TEST0007" fam.version = 7 fam.clades = [2] fam.consensus = "ACGTTGCAGAGACTCT" fam.length = 16 fam.repeat_type = "Test" fam.repeat_subtype = "HasCitations" fam.citations = json.dumps([ { "order_added": 1, "authors": "John Doe", "title": "Testing Citation Export Formatting", "journal": "Unit Tests 7(2), 2020.", }, { "order_added": 2, "authors": "Jane Doe", "title": "Testing Citation Export Formatting", "journal": "Unit Tests 7(2), 2020.", }, ]) self.assertEqual( fam.to_embl(fakedb(), include_seq=False), """\ ID TEST0007; SV 7; linear; DNA; STD; UNC; 16 BP. NM Test7 XX AC TEST0007; XX XX KW Test/HasCitations. XX OS A Clade OC Parent Clade. XX RN [1] (bases 1 to 16) RA John Doe RT Testing Citation Export Formatting RL Unit Tests 7(2), 2020. XX RN [2] (bases 1 to 16) RA Jane Doe RT Testing Citation Export Formatting RL Unit Tests 7(2), 2020. XX CC CC RepeatMasker Annotations: CC Type: Test CC SubType: HasCitations CC Species: A_Clade CC SearchStages: CC BufferStages: XX // """)
def test_without_version(self): fam = Family() fam.accession = "Test11" fam.clades = [] fam.consensus = "acgt" self.assertEqual( fam.to_fasta(fakedb(), use_accession=True), ">Test11\nACGT\n" )
def test_clades(self): fam = Family() fam.name = "Test4" fam.accession = "TEST0004" fam.version = 4 fam.clades = [2, 3] fam.consensus = "ACGT" self.assertEqual( fam.to_fasta(fakedb()), ">Test4 @A_Clade @Another_Clade_3.\nACGT\n" )
def test_complement(self): fam = Family() fam.name = "Test3" fam.accession = "TEST0003" fam.version = 3 fam.clades = [] fam.consensus = "CGTAWWKSAAAA" self.assertEqual( fam.to_fasta(None, do_reverse_complement=True), ">Test3 (anti)\nTTTTWMSSTACG\n" )
def test_always_exports_uppercase(self): fam = Family() fam.name = "Test10" fam.accession = "TEST0010" fam.version = 10 fam.clades = [] fam.consensus = "acgt" self.assertEqual( fam.to_fasta(fakedb()), ">Test10\nACGT\n" )
def test_search_stages(self): fam = Family() fam.name = "Test9" fam.accession = "TEST0009" fam.version = 9 fam.clades = [2] fam.consensus = "ACGT" fam.search_stages = "30,45" self.assertEqual( fam.to_fasta(fakedb()), ">Test9 @A_Clade [S:30,45]\nACGT\n" )
def test_simple(self): fam = Family() fam.name = "Test1" fam.accession = "TEST0001" fam.version = 1 fam.clades = [] fam.consensus = "ACGTAAAA" self.assertEqual( fam.to_fasta(None), ">Test1\nACGTAAAA\n" ) self.assertEqual( fam.to_fasta(None, use_accession=True), ">TEST0001.1 name=Test1\nACGTAAAA\n" )
def test_multiline(self): fam = Family() fam.name = "Test5" fam.accession = "TEST0005" fam.version = 5 fam.clades = [] fam.consensus = "ACGTTGCA" * 20 # 160 bp total self.assertEqual( fam.to_fasta(fakedb()), """\ >Test5 ACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGT TGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCA ACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCA """ )
def test_buffer(self): fam = Family() fam.name = "Test6" fam.accession = "TEST0006" fam.version = 6 fam.clades = [] fam.consensus = "AAAAGCGCGCAAAA" self.assertEqual( fam.to_fasta(fakedb(), buffer=True), ">Test6#buffer\nAAAAGCGCGCAAAA\n" ) self.assertEqual( fam.to_fasta(fakedb(), buffer=[5, 10]), ">Test6_5_10#buffer\nGCGCGC\n" )
def test_special_metadata(self): fam = Family() fam.name = "Test5" fam.accession = "TEST0005" fam.version = 5 fam.clades = [5, 3] fam.consensus = "ACGTTGCAGAGAKWCTCT" fam.repeat_type = "LTR" fam.repeat_subtype = "BigTest" fam.aliases = "Repbase:MyLTR1\nOtherDB:MyLTR\n" fam.refineable = True self.assertEqual( fam.to_embl(fakedb()), """\ ID TEST0005; SV 5; linear; DNA; STD; UNC; 18 BP. NM Test5 XX AC TEST0005; XX XX DR Repbase; MyLTR1. XX KW Long terminal repeat of retrovirus-like element; Test5. XX OS Species 1 OC Parent Clade; A Clade. OS Another Clade (3.) OC . XX CC CC RepeatMasker Annotations: CC Type: LTR CC SubType: BigTest CC Species: Species_1, Another_Clade_3. CC SearchStages: CC BufferStages: CC Refineable XX SQ Sequence 18 BP; 4 A; 4 C; 4 G; 4 T; 2 other; acgttgcaga gakwctct 18 // """)
def test_classname(self): fam = Family() fam.name = "Test2" fam.accession = "TEST0002" fam.version = 2 fam.clades = [] fam.consensus = "TCGATTTT" fam.repeat_type = "Type" self.assertEqual( fam.to_fasta(None, include_class_in_name=True), ">Test2#Type\nTCGATTTT\n" ) fam.repeat_subtype = "SubType" self.assertEqual( fam.to_fasta(None, include_class_in_name=True), ">Test2#Type/SubType\nTCGATTTT\n" )
def test_seqonly(self): fam = Family() fam.name = "Test4" fam.accession = "TEST0004" fam.version = 4 fam.clades = [5] fam.consensus = "ACGTTGCA" fam.repeat_type = "Test" fam.repeat_subtype = "SequenceOnly" self.assertEqual( fam.to_embl(fakedb(), include_meta=False), """\ ID TEST0004; SV 4; linear; DNA; STD; UNC; 8 BP. NM Test4 XX AC TEST0004; XX XX SQ Sequence 8 BP; 2 A; 2 C; 2 G; 2 T; 0 other; acgttgca 8 // """)
def test_multiline(self): fam = Family() fam.name = "Test2" fam.accession = "TEST0002" fam.version = 2 fam.clades = [5] fam.consensus = "ACGTTGCA" * 20 # 160 bp total fam.repeat_type = "Test" fam.repeat_subtype = "Multiline" self.assertEqual( fam.to_embl(fakedb()), """\ ID TEST0002; SV 2; linear; DNA; STD; UNC; 160 BP. NM Test2 XX AC TEST0002; XX XX KW Test/Multiline. XX OS Species 1 OC Parent Clade; A Clade. XX CC CC RepeatMasker Annotations: CC Type: Test CC SubType: Multiline CC Species: Species_1 CC SearchStages: CC BufferStages: XX SQ Sequence 160 BP; 40 A; 40 C; 40 G; 40 T; 0 other; acgttgcaac gttgcaacgt tgcaacgttg caacgttgca acgttgcaac gttgcaacgt 60 tgcaacgttg caacgttgca acgttgcaac gttgcaacgt tgcaacgttg caacgttgca 120 acgttgcaac gttgcaacgt tgcaacgttg caacgttgca 160 // """)
def test_all(self): fam = Family() fam.name = "Test7" fam.accession = "TEST0007" fam.version = 7 fam.clades = [2, 3] fam.consensus = "ACGTTGCA" * 20 # 160 bp total self.assertEqual( fam.to_fasta( fakedb(), use_accession=True, include_class_in_name=True, buffer=True, ), """\ >TEST0007.7#buffer name=Test7 @A_Clade @Another_Clade_3. ACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGT TGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCA ACGTTGCAACGTTGCAACGTTGCAACGTTGCAACGTTGCA """ ) self.assertEqual( fam.to_fasta( fakedb(), use_accession=True, include_class_in_name=True, do_reverse_complement=True, buffer=[23, 39], ), """\ >TEST0007.7_23_39#buffer (anti) name=Test7 @A_Clade @Another_Clade_3. GCAACGTTGCAACGTTG """ )
def test_cds(self): fam = Family() fam.name = "Test8" fam.accession = "TEST0008" fam.version = 8 fam.clades = [2] fam.consensus = "ACGTTGCAGAGACTCT" fam.repeat_type = "Test" fam.repeat_subtype = "CodingSequence" fam.coding_sequences = json.dumps([ { "cds_start": 1, "cds_end": 6, "product": "FAKE", "exon_count": 1, "description": "Example coding sequence", "translation": "TL", }, { "cds_start": 5, "cds_end": 16, "product": "FAKE2", "exon_count": 1, "description": "Another example coding sequence", "translation": "CRDS", }, ]) self.assertEqual( fam.to_embl(fakedb(), include_seq=False), """\ ID TEST0008; SV 8; linear; DNA; STD; UNC; 16 BP. NM Test8 XX AC TEST0008; XX XX KW Test/CodingSequence. XX OS A Clade OC Parent Clade. XX CC CC RepeatMasker Annotations: CC Type: Test CC SubType: CodingSequence CC Species: A_Clade CC SearchStages: CC BufferStages: XX FH Key Location/Qualifiers FH FT CDS 1..6 FT /product="FAKE" FT /number=1 FT /note="Example coding sequence" FT /translation="TL" FT CDS 5..16 FT /product="FAKE2" FT /number=1 FT /note="Another example coding sequence" FT /translation="CRDS" XX // """)