def test_cut_unicodestring_1500(): unicodestring = u'''>Scaffold102974:1-1500() TCTCAACAAACCCATAAGTCACTGGTACATTAACAGATAACGCTAATAATAAAATGGCAGAATGACTCAGATACAATTCCCATATAAAAAGCCATTAATA TGAACAGAACTTACTCGTAAAAATCCAGTGAAAATGAACAGGGTGATGATTAGTGACCTCATGGTGGCATTCAGTCACTAATCTGACAATCCCAACTGTA AACAGTCCTTATACCAGCTTGTGTCAGGGGCACCATTATGAGGTCATCACTAACAATCATGGCGACCATTGTGATGTCGCTACTAACAATCATGTGACAT CAACTGCTACAGGCCAAGTCAGCTTTCTGTTACGGCAAATATAGAATGACTGTTAGGGAGACAATATAAGAGCACCTTACAGTAATAAAACTTTATACAT AAATTCTACTTCATAAATTCATATTTTTTTATTGAAAATCACGATTTATTTAGATTACATTCAACAGCTAATCTTAAATTGACTCAACCTTAGTTATTCT TATTGCCTGTAATCAATAACAGGTTTTGGGTTGGACATAGTTACATATATTACTTTAAATTCCAAAAGGACAAAAGTCTGTCAAGTTCAACCCCTCCAAT TTATCCCCAGCATATGTGTGTACATATATATATACTGATGCACACTGGACGTCCACAGAAATGTTGCTACCTATGTCGGATCACAGATAAGGAGATTATT TAATGCAATTTAATAAAGTGCAACAGATGGGGGGAGGGACAGCAGCTGGGCCTGGGGGGGGGGCAAGAAAGATAAATTTGGTCGTTGCGCTGGCCAGAAC GTTCTCTAGGAATCGTGGAGAAGACATTGGCCAAAAGAGGAAGGTCTCACCAATGTGCCTTGATGAATAGAACAGTTAAAATATTTCAGCTGTAGCCCTC CAGCTCCAGAACTATAATTACCAGAATCCACCTCAGTTGGAGGCTGAAGATGCCTTAATTCCAAAATGTATTTATTCTTATGCCTTCTTTTCTTATTTAG TTTCAGTTTTTTTCTGAACTTTTCTTCTATTAATTTTTACCCTTCCCCAACCAACCCCCATCTTTCCTCACATTTCTCAGTCCCTTATGACCCCCCTTCA TCTCTTTTTCTGCTCCTTGTTCCTACTCATCTTCTTCTCTGTTCTGCATATTTTGCTCTCTCTCTATGTGTCATGGTGGCTTCAGCTTTTTTCTGAATAG AGCTCAGCTGAGTAGGGATATTGATTGTGATTGGGAGGTGAACTTCAACCTTAAATCTCCTATTTAGTGCCATGTGAGTGCAGCAATACAACTGTTGGGA AGGGACTTTGATGGGAGGAATTTGGATGGAAGGGTCTAAAAGAGAGACAGGCCAAGGAAAGGAATCATCTGATTGGCTTGTTTTGATTACTGGCGTAAGA CAGTTAGAATGCTGAAAGCAACTACTACTGGTCTGTGCTGCTTAAAGGAGAAGGAAACCCCCTAGGCACAAAAATCCCTCCCCTCTCCCCTGTGTTGTC ''' expected_list_of_bedtules = ['Scaffold102974:1-1500()\t786\t806\tBfaI\t1000\t+\n', 'Scaffold102974:1-1500()\t808\t828\tBfaI\t1000\t-\n', 'Scaffold102974:1-1500()\t1442\t1462\tBfaI\t1000\t+\n', 'Scaffold102974:1-1500()\t1464\t1484\tBfaI\t1000\t-\n', 'Scaffold102974:1-1500()\t732\t752\tScrFI\t1000\t+\n', 'Scaffold102974:1-1500()\t753\t773\tScrFI\t1000\t-\n'] assert(cut_unicodestring(unicodestring)==expected_list_of_bedtules)
def test_cut_unicodestring_42(): unicodestring = u'''>fourtytwobps GCGCTGGCCAGAACGTTCTCTAGGAATCGTGGAGAAGACATT ''' expected_list_of_bedtules = ['fourtytwobps\t0\t20\tBfaI\t1000\t+\n', 'fourtytwobps\t22\t42\tBfaI\t1000\t-\n'] assert(cut_unicodestring(unicodestring)==expected_list_of_bedtules)