示例#1
0
    def test_from_2_to_3():
        'It tests that we can go from gff2 to gff3'
        GFF2 = '''Chrctg0\tassembly\tChromosome\t1\t140722177\t.\t.\t.\tSequence "Chrctg0"; Name "Chrctg0"
Chrctg0\tFPC\tcontig\t1\t140722177\t.\t.\t.\tcontig "ctg0"; Name "ctg0"
Chrctg0\tFPC\tBAC\t109076481\t109461505\t.\t.\t.\tBAC "Cm45_J09"; Name "Cm45_J09"; Contig_hit "0"
Chrctg0\tFPC\tBAC\t97189889\t97329153\t.\t.\t.\tBAC "Cm40_O16 3"; Name "Cm40_O16"; Contig_hit "0"
Chrctg0\tFPC\tBAC\t57982977\t58302465\t.\t.\t.\tBAC "Cm22_F20"; Name "Cm22_F20"; Contig_hit "0"
Chrctg0\tFPC\tBAC\t57982978\t58302466\t.\t.\t.\tBAC "Cm22_F20"; Name "Cm22_F20"; Contig_hit "0"
'''
        inh = NamedTemporaryFile()
        inh.write(GFF2)
        inh.flush()
        in_gff = GffFile(inh.name)

        outh = NamedTemporaryFile()
        write_gff(outh.name, in_gff.items)

        result = outh.read()
        assert 'ID=Cm22_F20_2' in result
        assert 'BAC=Cm40_O16%203' in result
示例#2
0
def cmap_to_gff(data, fhand):
    'Given a dict with the cmap data and an output fhand it writes a gff3 file'
    gff = []
    gff.append((METADATA, 'cmap-gff-version 1'))
    # This marker count is used where there is a  markers in two maps.
    marker_count = {}
    marker_id_map = {}
    for mapset in data['map_sets']:
        species_name = mapset['species']
        species = data['species'][species_name]
        gff.append(_species_pragma(species))
        gff.append((METADATA, '#'))
        gff.append(_map_set_pragma(mapset))
        features_in_mapset = set()
        for map_ in mapset['maps']:
            #start and end
            start = None
            end = None
            for feat_loc in map_['feature_locations']:
                this_start = feat_loc['start']
                if 'end' in feat_loc:
                    this_end = feat_loc['end']
                else:
                    this_end = feat_loc['start']
                if start is None or start > this_start:
                    start = this_start
                if end is None or end < this_end:
                    end = this_end
            map_['start'] = start
            map_['end'] = end
            gff.extend(_map_pragma(map_, mapset['accession']))
            gff.extend(_map_features(map_, data['features'],
                                     mapset['accession'], marker_count,
                                     marker_id_map,
                                     features_in_mapset))
    #the correspondences
    gff.extend(_cmap_correspondences(marker_count, marker_id_map))

    write_gff(fhand.name, gff)
示例#3
0
    def test_simple_output():
        'We can write a simple gff3 file'
        feat1 = {'seqid': 'ctg123',
                 'type':  'gene',
                 'start': 1000,
                 'end':   9000,
                 #'id':    'gene00001',
                 #'name':  'EDEN',
                 'attributes':{'ID':'gene00001', 'Name':'EDEN'}
                 }
        feats = [(METADATA, 'sequence-region ctg123 1 1497228'),
                 (FEATURE, feat1)]
        result = '''##gff-version 3
##sequence-region ctg123 1 1497228
ctg123\t.\tgene\t1000\t9000\t.\t.\t.\tID=gene00001;Name=EDEN\n'''
        outh = NamedTemporaryFile()
        write_gff(outh.name, feats)
        assert outh.read() in result

        feat1 = {'id':'23',
                 'seqid': 'ctg123',
                 'type':  'gene',
                 'start': 1000,
                 'end':   9000,
                 'name': 'hola',
                 'attributes' : {'Parent': ['p1', 'p2']}}
        feats = [(FEATURE, feat1)]
        outh = NamedTemporaryFile()
        write_gff(outh.name, feats)
        result = outh.read()
        expected = '##gff-version 3\nctg123\t.\tgene\t1000\t9000\t.\t.\t.\t'
        assert expected in result
        assert 'Name=hola' in result

        #escaping some caracteres
        feat1 = {'id':'23',
                 'seqid': 'ctg123',
                 'type':  'gene',
                 'start': 1000,
                 'end':   9000,
                 'name': 'hola',
                 'attributes':{'Dbxref':'peoi%25l a%20k%s'}}
        feats = [(FEATURE, feat1)]
        outh = NamedTemporaryFile()
        write_gff(outh.name, feats)
        result = outh.read()
        assert 'Dbxref=peoi%25l%20a%20k%25s' in  result