Пример #1
0
    def read(self):
        """
        Returns gene annotation list and ontology graph read from nexo file.
        """

        content_handler = NexoContentHandler(self.get_all_attrs,
                                             self.annotation_source)
        xml.sax.parse(self.handle, content_handler)

        annotations = []
        for obj, assocs in content_handler.annotations.items():
            annotations.append(
                GeneAnnotation(
                    obj, associations=[TermAssociation(x) for x in assocs]))

        graph = OntologyGraph()

        for _, node in content_handler.nodes.items():
            graph.add_node(node.id, node)

        edge_types = set()
        for edge in content_handler.edges:
            source = content_handler.nodes[edge[0]].id
            target = content_handler.nodes[edge[1]].id
            graph.add_edge(target, source,
                           edge[2])  # in our representation it is inverted
            edge_types.add(edge[2])

        for edge_type in edge_types:
            graph.typedefs[edge_type] = {"id": edge_type}

        return (annotations, graph)
Пример #2
0
    def test_resolve(self): #TODO
        assocs = [GeneAnnotation('FBgn0004364', attrs = {'Synonym':
                                          ['18wheeler', 'CG8896', 'CT25100']}),
                  GeneAnnotation('FBgn0043467'),
                  GeneAnnotation('FBgn0004907', attrs = {'Synonym':
                                          ['14-3-3', '14-3-3 zeta', 'x']}),
                  GeneAnnotation('FBgn0010339', attrs = {'Synonym':
                                          ['CG8340', 'GTP-bp', 'X71866', 'x']})
                  ]
        resolver = FirstOneResolver(assocs)
        resolved = [resolver.resolve(x) for x in ['FBgn0043467', 'CT25100',
                                                  'x', 'FBgn0010340']]
        print "dupa", resolved
        expected = ['FBgn0043467', 'FBgn0004364', 'FBgn0004907', 'FBgn0010340']
 
        self.assertEqual(expected, resolved)
Пример #3
0
def _to_goa(obj_rows, version):
    row = obj_rows[0]
    
    obj_id = row[1]
    obj_attrs = {GAF20FIELDS[0] : row[0],
                 GAF20FIELDS[2] : row[2],
                 GAF20FIELDS[9] : row[9],
                 GAF20FIELDS[10] : _split_multi(row[10]),
                 GAF20FIELDS[11] : row[11],
                 GAF20FIELDS[12]: _split_multi(row[12])}
    
    if version == "1.0":
        row_len = 15
    else:
        row_len = 17
        obj_attrs[GAF20FIELDS[15]] = _split_multi(row[15])
        obj_attrs[GAF20FIELDS[16]] = row[16]
        
    assocs = []
    for row in obj_rows:
        if len(row) == row_len:
            assocs.append(TermAssociation(row[4],
                                       {GAF20FIELDS[3] : _split_multi(row[3]),
                                        GAF20FIELDS[5] : _split_multi(row[5]),
                                        GAF20FIELDS[6] : row[6],
                                        GAF20FIELDS[7] :_split_multi(row[7]),
                                        GAF20FIELDS[8] : row[8],
                                        GAF20FIELDS[13] : row[13],
                                        GAF20FIELDS[14] : row[14]}
                                          ))
        else:
            raise ValueError("Invalid gaf file: Incorrect row length.")
    
    return GeneAnnotation(obj_id, assocs, obj_attrs)
Пример #4
0
 def test_read_file(self):
     to = {
         'FBgn0026615':
         GeneAnnotation(
             'FBgn0026615', [
                 TermAssociation(
                     'GO:0005737', {
                         GAF20FIELDS[3]: [],
                         GAF20FIELDS[5]: ['FB:FBrf0106275'],
                         GAF20FIELDS[6]: 'IDA',
                         GAF20FIELDS[7]: [],
                         GAF20FIELDS[8]: 'C',
                         GAF20FIELDS[13]: '20060803',
                         GAF20FIELDS[14]: 'FlyBase'
                     }),
                 TermAssociation(
                     'GO:0045177', {
                         GAF20FIELDS[3]: [],
                         GAF20FIELDS[5]: ['FB:FBrf0106275'],
                         GAF20FIELDS[6]: 'IDA',
                         GAF20FIELDS[7]: [],
                         GAF20FIELDS[8]: 'C',
                         GAF20FIELDS[13]: '20060803',
                         GAF20FIELDS[14]: 'FlyBase'
                     })
             ], {
                 GAF20FIELDS[0]: 'FB',
                 GAF20FIELDS[2]: '10-4',
                 GAF20FIELDS[9]: '10-4',
                 GAF20FIELDS[10]: [],
                 GAF20FIELDS[11]: 'gene_product',
                 GAF20FIELDS[12]: ['taxon:7227'],
                 GAF20FIELDS[15]: [],
                 GAF20FIELDS[16]: ''
             }),
         'FBgn0043467':
         GeneAnnotation(
             'FBgn0043467', [
                 TermAssociation(
                     'GO:0048149', {
                         GAF20FIELDS[3]: [],
                         GAF20FIELDS[5]:
                         ['FB:FBrf0131396', 'PMID:11086999'],
                         GAF20FIELDS[6]: 'IMP',
                         GAF20FIELDS[7]: [],
                         GAF20FIELDS[8]: 'P',
                         GAF20FIELDS[13]: '20060803',
                         GAF20FIELDS[14]: 'FlyBase'
                     })
             ], {
                 GAF20FIELDS[0]: 'FB',
                 GAF20FIELDS[2]: '064Ya',
                 GAF20FIELDS[9]: '064Ya',
                 GAF20FIELDS[10]: [],
                 GAF20FIELDS[11]: 'gene_product',
                 GAF20FIELDS[12]: ['taxon:7227'],
                 GAF20FIELDS[15]: [],
                 GAF20FIELDS[16]: ''
             })
     }
     with open('Ontology/GoaIO/correct20.fb', 'r') as f:
         objs = GafReader(f).read()
         self.assertEqual(to, objs)