def test_from_2_to_3(): 'It tests that we can go from gff2 to gff3' GFF2 = '''Chrctg0\tassembly\tChromosome\t1\t140722177\t.\t.\t.\tSequence "Chrctg0"; Name "Chrctg0" Chrctg0\tFPC\tcontig\t1\t140722177\t.\t.\t.\tcontig "ctg0"; Name "ctg0" Chrctg0\tFPC\tBAC\t109076481\t109461505\t.\t.\t.\tBAC "Cm45_J09"; Name "Cm45_J09"; Contig_hit "0" Chrctg0\tFPC\tBAC\t97189889\t97329153\t.\t.\t.\tBAC "Cm40_O16 3"; Name "Cm40_O16"; Contig_hit "0" Chrctg0\tFPC\tBAC\t57982977\t58302465\t.\t.\t.\tBAC "Cm22_F20"; Name "Cm22_F20"; Contig_hit "0" Chrctg0\tFPC\tBAC\t57982978\t58302466\t.\t.\t.\tBAC "Cm22_F20"; Name "Cm22_F20"; Contig_hit "0" ''' inh = NamedTemporaryFile() inh.write(GFF2) inh.flush() in_gff = GffFile(inh.name) outh = NamedTemporaryFile() write_gff(outh.name, in_gff.items) result = outh.read() assert 'ID=Cm22_F20_2' in result assert 'BAC=Cm40_O16%203' in result
def cmap_to_gff(data, fhand): 'Given a dict with the cmap data and an output fhand it writes a gff3 file' gff = [] gff.append((METADATA, 'cmap-gff-version 1')) # This marker count is used where there is a markers in two maps. marker_count = {} marker_id_map = {} for mapset in data['map_sets']: species_name = mapset['species'] species = data['species'][species_name] gff.append(_species_pragma(species)) gff.append((METADATA, '#')) gff.append(_map_set_pragma(mapset)) features_in_mapset = set() for map_ in mapset['maps']: #start and end start = None end = None for feat_loc in map_['feature_locations']: this_start = feat_loc['start'] if 'end' in feat_loc: this_end = feat_loc['end'] else: this_end = feat_loc['start'] if start is None or start > this_start: start = this_start if end is None or end < this_end: end = this_end map_['start'] = start map_['end'] = end gff.extend(_map_pragma(map_, mapset['accession'])) gff.extend(_map_features(map_, data['features'], mapset['accession'], marker_count, marker_id_map, features_in_mapset)) #the correspondences gff.extend(_cmap_correspondences(marker_count, marker_id_map)) write_gff(fhand.name, gff)
def test_simple_output(): 'We can write a simple gff3 file' feat1 = {'seqid': 'ctg123', 'type': 'gene', 'start': 1000, 'end': 9000, #'id': 'gene00001', #'name': 'EDEN', 'attributes':{'ID':'gene00001', 'Name':'EDEN'} } feats = [(METADATA, 'sequence-region ctg123 1 1497228'), (FEATURE, feat1)] result = '''##gff-version 3 ##sequence-region ctg123 1 1497228 ctg123\t.\tgene\t1000\t9000\t.\t.\t.\tID=gene00001;Name=EDEN\n''' outh = NamedTemporaryFile() write_gff(outh.name, feats) assert outh.read() in result feat1 = {'id':'23', 'seqid': 'ctg123', 'type': 'gene', 'start': 1000, 'end': 9000, 'name': 'hola', 'attributes' : {'Parent': ['p1', 'p2']}} feats = [(FEATURE, feat1)] outh = NamedTemporaryFile() write_gff(outh.name, feats) result = outh.read() expected = '##gff-version 3\nctg123\t.\tgene\t1000\t9000\t.\t.\t.\t' assert expected in result assert 'Name=hola' in result #escaping some caracteres feat1 = {'id':'23', 'seqid': 'ctg123', 'type': 'gene', 'start': 1000, 'end': 9000, 'name': 'hola', 'attributes':{'Dbxref':'peoi%25l a%20k%s'}} feats = [(FEATURE, feat1)] outh = NamedTemporaryFile() write_gff(outh.name, feats) result = outh.read() assert 'Dbxref=peoi%25l%20a%20k%25s' in result