def create_class(s): factory.getOWLClass(IRI.create(s)) def add_anno(resource, prop, cont): anno = factory.getOWLAnnotation(factory.getOWLAnnotationProperty(prop.getIRI()), factory.getOWLLiteral(cont)) axiom = factory.getOWLAnnotationAssertionAxiom(resource.getIRI(), anno) manager.addAxiom(ontology, axiom) genericProteinNames = dict() proteinCounter = 0 for tfile in os.listdir('/home/mencella/ttlfiles'): print tfile rdfModel = ModelFactory.createDefaultModel() rdfModel.read(FileReader(tfile), "http://foobar#", "TURTLE") p = rdfModel.createProperty(up + "mnemonic") label = rdfModel.listStatements(0, p, 0)[0].getObject().toString() iri = rdfModel.listStatements(0, RDF.type, rdfModel.createResource(up + "Protein"))[0].getSubject().toString() cl = create_class(iri) add_anno(cl, OWLRDFVocabulary.RDFS_LABEL, label) cls = create_class(iri + "_all") add_anno(cls, OWLRDFVocabulary.RDFS_LABEL, "All %s in the universe" % label) manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, factory.getOWLObjectSomeValuesFrom(create_relation("member-of"), cls))) genericNames = [] first = True for stmt1 in rdfModel.listStatements(0, RDF.type, rdfModel.createResource(up+"Structured_Name")):
def readFiles(i, q): while True: tfile = q.get() size = q._qsize() if size % 1000 == 0: print "%d entries left in queue. " % size, time.strftime("%c") rdfModel = ModelFactory.createDefaultModel() try: rdfModel.read(FileReader(input_directory + tfile), "http://foobar#", "TURTLE") except: print "Error with file: ", tfile q.task_done() #traceback.print_exc() continue iri_iter = rdfModel.listStatements(None, RDF.type, rdfModel.createResource(up + "Protein")) while iri_iter.hasNext(): # iterate over all Protein iri's in file iris = iri_iter.nextStatement().getSubject() iri = iris.toString() label = rdfModel.listStatements(iris, rdfModel.createProperty(up + "mnemonic"), None).nextStatement().getObject().toString() # subclass cl = create_class(iri) add_anno(cl, OWLRDFVocabulary.RDFS_LABEL, label) cls = create_class(iri + "_all") add_anno(cls, OWLRDFVocabulary.RDFS_LABEL, "All %s in the universe" % label) manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, factory.getOWLObjectSomeValuesFrom(create_relation("member-of"), cls))) # generic names genericNames = [] first = True for stmt0 in rdfModel.listStatements(iris, rdfModel.createProperty(up + "submittedName"), None): name = stmt0.getObject() for stmt1 in rdfModel.listStatements(name, RDF.type, rdfModel.createResource(up + "Structured_Name")): for stmt2 in rdfModel.listStatements(stmt1.getSubject(), rdfModel.createProperty(up + "fullName"), None): if first: first = False firstName = stmt2.getObject().toString() genericNames.append(stmt2.getObject().toString()) if firstName not in genericProteinNames.keys(): ncl = create_class(onturi+"GENERICPROTEIN_%s" % proteinCounter) ncla = create_class(onturi+"GENERICPROTEIN_%s_all" % proteinCounter) proteinCounter += 1 for name in genericNames: universe = "All %s in the universe" % name add_anno(ncl, OWLRDFVocabulary.RDFS_LABEL, name) # add label add_anno(ncla, OWLRDFVocabulary.RDFS_LABEL, universe) # add label genericProteinNames[name] = ncl genericProteinNames[universe] = ncla ncl = genericProteinNames[firstName] ncla = genericProteinNames["All %s in the universe" % firstName] manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, ncl)) manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cls, ncla)) # isoforms isoforms = collections.OrderedDict() for stmt in rdfModel.listStatements(iris, rdfModel.createProperty(up + "sequence"), None): isoforms[stmt.getObject().toString()] = 1 for iso in isoforms.keys(): icl = create_class(iso) add_anno(icl, OWLRDFVocabulary.RDFS_LABEL, "Isoform of %s" % label) # add label manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(icl, factory.getOWLObjectSomeValuesFrom(create_relation("isoform-of"), cl))) manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(icl, cl)) # gofunctions gofunctions = collections.OrderedDict() for stmt in rdfModel.listStatements(rdfModel.createResource(iri), rdfModel.createProperty(up+"classifiedWith"), None): if(stmt.getObject().toString().startswith("http://purl.obolibrary.org/obo/")): gofunctions[stmt.getObject().toString()] = 1 for fun in gofunctions.keys(): # Decide whether this is a function/process or location if create_class(fun) in loc_nodeset: # location relation = "located-in" else: # function/process relation = "participates-in" manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cls, factory.getOWLObjectSomeValuesFrom(create_relation("has-member"), factory.getOWLObjectSomeValuesFrom(create_relation(relation), create_class(fun))))) organism = rdfModel.listStatements(rdfModel.createResource(iri), rdfModel.createProperty(up+"organism"), None).nextStatement().getObject().toString() prefix = "http://purl.obolibrary.org/obo/NCBITaxon_" slash = organism.rfind('/') organism = prefix + organism[slash+1:] manager.addAxiom(ontology, factory.getOWLSubClassOfAxiom(cl, factory.getOWLObjectSomeValuesFrom(create_relation("created-in-organism"), create_class(organism)))) #signals to queue job is done q.task_done()