def test_string_reversecomplement(): logging.info(sys._getframe().f_code.co_name) s1 = seqan.StringDNA5('AANNGG') rc = s1.reversecomplement() print rc print str(rc) assert seqan.StringDNA5('CCNNTT') == rc del s1 assert seqan.StringDNA5('CCNNTT') == rc
def test_strings(): logging.info(sys._getframe().f_code.co_name) _num_bases, sequences, ids = seqan.readFastaDNA5(fasta_file('dm01r.fasta')) logging.info('Size of DNA5 alphabet %d', seqan.DNA5.valueSize ) logging.info('Size of DNA alphabet %d', seqan.DNA.valueSize ) logging.info('Length of sequence %s %d', ids[3], len(sequences[3])) logging.info('Tenth base of sequence %s %s', ids[2], sequences[2].value(9)) logging.info('Infix of sequence %s %s', ids[2], sequences[2].infix(9, 14)) _s5 = seqan.StringDNA5('ACGTACGTACGTACGT') s4 = seqan.StringDNA('ACGTACGTACGTACGT') s = s4 infix = s.infix(3, 9) slice_ = s[3:9] logging.info('Infix %s', infix) logging.info(type(infix)) logging.info('Slice %s', slice_) logging.info(type(slice_)) # check object lifetimes are respected del s logging.info('Infix %s', infix) logging.info('Slice %s', slice_) # # Check iteration using __getitem__ works correctly # s = sequences[0] chars = set() for i, c in enumerate(s): if i >= len(s): assert False chars.add(c) assert len(chars) <= 4 assert i == len(s) - 1 s5 = seqan.StringDNA5('NACGTNNACGTNACGTNACGT') print s5[0] == 'N' assert s5[0] == 'N' print s5[0] != 'G' assert s5[0] != 'G' print s5[1] != 'N' assert s5[1] != 'N' print s5[1] == 'A' assert s5[1] == 'A' print 'N' == s5[0] assert 'N' == s5[0] print 'G' != s5[0] assert 'G' != s5[0] print 'N' != s5[1] assert 'N' != s5[1] print 'A' == s5[1] assert 'A' == s5[1]
def test_small_index(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: sequences = seqan.StringDNA5Set() sequences.appendValue(seqan.StringDNA5('A')) index = IndexDNA5(sequences) del sequences _i = index.TopDownIterator(index)
def test_index_as_strings_custodian(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: sequences = seqan.StringDNA5Set() sequences.appendValue(seqan.StringDNA5('ACG')) index = IndexDNA5(sequences) del sequences i = index.TopDownIterator(index) assert i.goDown('A')
def test_string_set_iteration(): logging.info(sys._getframe().f_code.co_name) stringset = seqan.StringDNA5Set() stringset.appendValue(seqan.StringDNA5('ACGTACGTACGTNNN')) i = iter(stringset) try: while True: logging.info(len(i.next())) except StopIteration: pass map(len, stringset)
def _test_empty_index(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: sequences = seqan.StringDNA5Set() sequences.appendValue(seqan.StringDNA5()) index = IndexDNA5(sequences) logging.info('Creating top-down') i = index.topdown() 1 / 0 # going down causes SEGV logging.info('Going down "A"') assert not i.goDown('A')
def test_split_sequence(): logger.info(sys._getframe().f_code.co_name) result = list(split_sequence(seqan.StringDNA5('NNACNGANGGN'))) assert result[0] == 'AC', result[0] assert result[1] == 'GA', result[1] assert result[2] == 'GG', result[2] assert 3 == len(result) result = list(split_sequence('ACGTNNNNAAGG')) assert result[0] == 'ACGT', result[0] assert result[1] == 'AAGG', result[1] assert 2 == len(result)
def test_go_down(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: sequences = seqan.StringDNA5Set() for seq in ( 'ACGT', 'AAAA', 'GGGG', 'AC', ): sequences.appendValue(seqan.StringDNA5(seq)) index = IndexDNA5(sequences) for expected_occs, texts in ( (2, (seqan.StringDNA5('AC'), 'AC')), (1, (seqan.DNA5('T'), 'T', seqan.StringDNA5('T'))), (6, (seqan.DNA5('A'), 'A', seqan.StringDNA5('A'))), ): for text in texts: logging.info('Trying %s: %s', type(text).__name__, text) it = index.TopDownIterator(index) assert it.goDown(text) for occ in it.occurrences: logging.info('Occurrence for %s: %s', text, occ) assert expected_occs == len(it.occurrences)
def test_long_edges(): logging.info(sys._getframe().f_code.co_name) for IndexDNA5 in IndexesDNA5: sequences = seqan.StringDNA5Set() for seq in ( 'AA', 'AC', 'AG', 'ATACCGGTT', ): sequences.appendValue(seqan.StringDNA5(seq)) index = IndexDNA5(sequences) it = index.topdown() assert it.goDown('A') assert 'A' == it.representative assert it.goDown('T') logging.debug('%s: %s', IndexDNA5.__name__, it.representative) assert 'ATACCGGTT' == it.representative it = index.topdown() assert it.goDown('AT') logging.debug('%s: %s', IndexDNA5.__name__, it.representative) assert 'ATACCGGTT' == it.representative
def test_string_set_append(): logging.info(sys._getframe().f_code.co_name) stringset = seqan.StringDNA5Set() stringset.appendValue(seqan.StringDNA5('ACGTACGTACGTNNN'))