def read(self): """ Returns gene annotation list and ontology graph read from nexo file. """ content_handler = NexoContentHandler(self.get_all_attrs, self.annotation_source) xml.sax.parse(self.handle, content_handler) annotations = [] for obj, assocs in content_handler.annotations.items(): annotations.append( GeneAnnotation( obj, associations=[TermAssociation(x) for x in assocs])) graph = OntologyGraph() for _, node in content_handler.nodes.items(): graph.add_node(node.id, node) edge_types = set() for edge in content_handler.edges: source = content_handler.nodes[edge[0]].id target = content_handler.nodes[edge[1]].id graph.add_edge(target, source, edge[2]) # in our representation it is inverted edge_types.add(edge[2]) for edge_type in edge_types: graph.typedefs[edge_type] = {"id": edge_type} return (annotations, graph)
def test_resolve(self): #TODO assocs = [GeneAnnotation('FBgn0004364', attrs = {'Synonym': ['18wheeler', 'CG8896', 'CT25100']}), GeneAnnotation('FBgn0043467'), GeneAnnotation('FBgn0004907', attrs = {'Synonym': ['14-3-3', '14-3-3 zeta', 'x']}), GeneAnnotation('FBgn0010339', attrs = {'Synonym': ['CG8340', 'GTP-bp', 'X71866', 'x']}) ] resolver = FirstOneResolver(assocs) resolved = [resolver.resolve(x) for x in ['FBgn0043467', 'CT25100', 'x', 'FBgn0010340']] print "dupa", resolved expected = ['FBgn0043467', 'FBgn0004364', 'FBgn0004907', 'FBgn0010340'] self.assertEqual(expected, resolved)
def _to_goa(obj_rows, version): row = obj_rows[0] obj_id = row[1] obj_attrs = {GAF20FIELDS[0] : row[0], GAF20FIELDS[2] : row[2], GAF20FIELDS[9] : row[9], GAF20FIELDS[10] : _split_multi(row[10]), GAF20FIELDS[11] : row[11], GAF20FIELDS[12]: _split_multi(row[12])} if version == "1.0": row_len = 15 else: row_len = 17 obj_attrs[GAF20FIELDS[15]] = _split_multi(row[15]) obj_attrs[GAF20FIELDS[16]] = row[16] assocs = [] for row in obj_rows: if len(row) == row_len: assocs.append(TermAssociation(row[4], {GAF20FIELDS[3] : _split_multi(row[3]), GAF20FIELDS[5] : _split_multi(row[5]), GAF20FIELDS[6] : row[6], GAF20FIELDS[7] :_split_multi(row[7]), GAF20FIELDS[8] : row[8], GAF20FIELDS[13] : row[13], GAF20FIELDS[14] : row[14]} )) else: raise ValueError("Invalid gaf file: Incorrect row length.") return GeneAnnotation(obj_id, assocs, obj_attrs)
def test_read_file(self): to = { 'FBgn0026615': GeneAnnotation( 'FBgn0026615', [ TermAssociation( 'GO:0005737', { GAF20FIELDS[3]: [], GAF20FIELDS[5]: ['FB:FBrf0106275'], GAF20FIELDS[6]: 'IDA', GAF20FIELDS[7]: [], GAF20FIELDS[8]: 'C', GAF20FIELDS[13]: '20060803', GAF20FIELDS[14]: 'FlyBase' }), TermAssociation( 'GO:0045177', { GAF20FIELDS[3]: [], GAF20FIELDS[5]: ['FB:FBrf0106275'], GAF20FIELDS[6]: 'IDA', GAF20FIELDS[7]: [], GAF20FIELDS[8]: 'C', GAF20FIELDS[13]: '20060803', GAF20FIELDS[14]: 'FlyBase' }) ], { GAF20FIELDS[0]: 'FB', GAF20FIELDS[2]: '10-4', GAF20FIELDS[9]: '10-4', GAF20FIELDS[10]: [], GAF20FIELDS[11]: 'gene_product', GAF20FIELDS[12]: ['taxon:7227'], GAF20FIELDS[15]: [], GAF20FIELDS[16]: '' }), 'FBgn0043467': GeneAnnotation( 'FBgn0043467', [ TermAssociation( 'GO:0048149', { GAF20FIELDS[3]: [], GAF20FIELDS[5]: ['FB:FBrf0131396', 'PMID:11086999'], GAF20FIELDS[6]: 'IMP', GAF20FIELDS[7]: [], GAF20FIELDS[8]: 'P', GAF20FIELDS[13]: '20060803', GAF20FIELDS[14]: 'FlyBase' }) ], { GAF20FIELDS[0]: 'FB', GAF20FIELDS[2]: '064Ya', GAF20FIELDS[9]: '064Ya', GAF20FIELDS[10]: [], GAF20FIELDS[11]: 'gene_product', GAF20FIELDS[12]: ['taxon:7227'], GAF20FIELDS[15]: [], GAF20FIELDS[16]: '' }) } with open('Ontology/GoaIO/correct20.fb', 'r') as f: objs = GafReader(f).read() self.assertEqual(to, objs)