def gen_pdm_from_indfile(ont_url, outfile): """Reads an owl file from ont_url; Writes a JSON file (outfile) of types and annotations on individuals in the file. JSON structure: id: label: string def: string types: - isAnonymous:boolean; - relId:URI_string; - objectId:URI_string. """ ont = Brain() ont.learn(ont_url) axioms = {} if ont.knowsClass("CARO_0030002"): axioms.update(gen_pdm(ont, ont.getInstances("CARO_0030002", 0), "CARO_0030002")) # expression_patterns if ont.knowsClass("FBbt_00005106"): axioms.update(gen_pdm(ont, ont.getInstances("FBbt_00005106", 0), "FBbt_00005106")) # neurons if ont.knowsClass("FBbt_00007683"): axioms.update(gen_pdm(ont, ont.getInstances("FBbt_00007683", 0), "FBbt_00007683")) # clones jfile = open(outfile, "w") jfile.write(json.dumps(axioms, sort_keys=True, indent=4)) ont.sleep()
def __main__(): ont = Brain() ont.learn(sys.argv[1]) qtab = tab("./", "queries.tsv") for r in qtab.tab: q = Query(r, ont) q.qtest() ont.sleep()
class om(): def __init__(self, uri_or_fp): self.b = Brain() self.b.learn(uri_or_fp) self.o = self.b.getOntology() self.bsfp = self.b.getBidiShortFormProvider( ) # uses .getEntity(<string> shortForm), .getShortForm(OWLEntity entity) def get_OP_list(self): s = self.o.getObjectPropertiesInSignature() out = [] for r in s: out.append(self.bsfp.getShortForm(r)) return out def roll_pdm(self, id_name): # TODO - get subsets + regular domain and range out = {} relations = self.get_OP_list() local_domains = [] local_ranges = [] for r in relations: #Assumes everything in file has a shorthand! # Better to key primary model on shortFormID and then re-key after reading. shorthand = self.b.getAnnotation(r, "shorthand") out[shorthand] = {} out[shorthand]['label'] = self.b.getLabel(r) try: out[shorthand]['usage'] = self.b.getAnnotation(r, "usage") out[shorthand]['defn'] = self.b.getAnnotation(r, "IAO_0000115") local_domains = self.b.getAnnotation(r, "local_domain").split(" ") local_ranges = self.b.getAnnotation(r, "local_range").split(" ") except: pass if local_domains: ldd = {} for ld in local_domains: if ld in id_name.keys(): ldd[ld] = id_name[ld] else: ldd[ld] = '' out[shorthand]['local_domain'] = str(ldd) if local_ranges: lrd = {} for lr in local_ranges: if lr in id_name.keys(): lrd[lr] = id_name[lr] else: lrd[lr] = '' out[shorthand]['local_range'] = str(lrd) return out
class om(): def __init__(self, uri_or_fp): self.b = Brain() self.b.learn(uri_or_fp) self.o = self.b.getOntology() self.bsfp = self.b.getBidiShortFormProvider() # uses .getEntity(<string> shortForm), .getShortForm(OWLEntity entity) def get_OP_list(self): s = self.o.getObjectPropertiesInSignature() out = [] for r in s: out.append(self.bsfp.getShortForm(r)) return out def roll_pdm(self, id_name): # TODO - get subsets + regular domain and range out = {} relations = self.get_OP_list() local_domains = [] local_ranges = [] for r in relations: #Assumes everything in file has a shorthand! # Better to key primary model on shortFormID and then re-key after reading. shorthand = self.b.getAnnotation(r, "shorthand") out[shorthand] = {} out[shorthand]['label'] = self.b.getLabel(r) try: out[shorthand]['usage'] = self.b.getAnnotation(r, "usage") out[shorthand]['defn'] = self.b.getAnnotation(r, "IAO_0000115") local_domains = self.b.getAnnotation(r, "local_domain").split(" ") local_ranges = self.b.getAnnotation(r, "local_range").split(" ") except: pass if local_domains: ldd = {} for ld in local_domains: if ld in id_name.keys(): ldd[ld] = id_name[ld] else: ldd[ld] = '' out[shorthand]['local_domain'] = str(ldd) if local_ranges: lrd = {} for lr in local_ranges: if lr in id_name.keys(): lrd[lr] = id_name[lr] else: lrd[lr] = '' out[shorthand]['local_range'] = str(lrd) return out
def entity_check(SFID_list, cursor): class owl_entity(): # v.simple object for storing attributes of class ont = '' base = '' sfid = '' typ = '' ont_dict = {} # Dict to make a uniq'd list of ontologies sfid_oe = {} # Dict of owl_entities - as specified by DB for SFID in SFID_list: cursor.execute("SELECT DISTINCT ontology_URI AS ont, owl_type as typ, baseURI as base FROM owl_entity WHERE shortFormID = '%s'" % SFID) dc = dict_cursor(cursor) brain = Brain() sfo = owl_entity() for d in dc: ont = d['ont'] if not ont in ont_dict: brain.learn(ont) ont_dict[ont] = brain sfo.ont = d['ont'] sfo.base = d['base'] sfo.typ = d['typ'] sfid_oe[SFID] = sfo for idt in sfid_oe.items(): SFID = idt[0] owlEnt = idt[1] ont = owlEnt.ont brain = ont_dict[ont] # if brain.getAnnotation(SFID, 'deprecated'): # Need to cope with cases where it is not deprecated! # print SFID + ' is obsolete!' if owlEnt.typ == 'class': if not brain.knowsClass(SFID): print 'Unknown Class SFID in ' + owlEnt.ont elif owlEnt.brain == 'objectProperty': if not ont.knowsClass(SFID): print 'Unknown objectProperty SFID in ' + owlEnt.ont
'fbext': { 'url': 'http://purl.obolibrary.org/obo/fbbt/vfb/vfb_ext.owl', 'path': '/repos/VFB_owl/src/owl/vfb_ext.owl'}, 'fbbt': { 'url' : 'http://purl.obolibrary.org/obo/fbbt/fbbt-simple.owl', 'path': '/repos/drosophila-anatomy-developmental-ontology/fbbt/releases/fbbt-simple.owl' }, 'fb_feature' : { 'url' : 'http://purl.obolibrary.org/obo/fbbt/vfb/fb_features.owl', 'path' : '/repos/VFB_owl/src/owl/fb_features.owl' } } ## Switch to owl_pdm_tools to deal with obsoletes? for k, v in onts_2_learn.items(): b.learn(v[sys.argv[1]]) # Get all classes sc = b.getSubClasses('Thing', 0) # set constraints # Add nodes statements = [] for c in sc: label = '' try: label = b.getLabel(c)
deleted = ct.tab1_only() # Only in the update tab new = ct.tab2_only() for r in new.tab: warnings.warn("Processes %s" % r) if r['class_id']: od.add_akv_type(key = r['a.annotation_type'], value =r['a.text'] , OWLclass = r['class_id'], objectProperty =r['op_id'] ) else: for r in deleted.tab: if not safe_mode: od.remove_akv_type(key = r['a.annotation_type'], value =r['a.text'] , OWLclass = r['class_id'], objectProperty =r['op_id'] ) else: warnings.warn("Row present in DB, now missing from mapping: %s. %s. " \ "Safe mode set so not deleting" % (r['a.annotation_type'], r['a.text'])) c = get_con(sys.argv[1], sys.argv[2]) b = Brain() b.learn(sys.argv[3]) # Path to ontology file with referenced terms (presumably fbbt_simple will suffice) od = owlDbOnt(conn = c, ont = b) update_table = tab("../../../doc/", "annotation_map.tsv") update_akv_from_tab(od, update_table) # Assumes update table has all mappings. If it lacks any, assumes these mappings are to be deleted! This is potentially dangerous if mapping table is out of sync with DB. outfile = open("../../../doc/annotation_map_report.tsv", "w") report_tab = gen_report_tab(od) outfile.write(report_tab.print_tab(sort_keys = ('a.annotation_type', 'a.text'))) outfile.close() c.commit() c.close() b.sleep()
from uk.ac.ebi.brain.core import Brain import json all_ont = Brain() all_ont.learn = ("") # fbbi all_ont.learn = ("") # fbbt all_ont.learn = ("") # fbdv classlist = all_ont.getSubClasses("Thing", 0) id_name = {} for c in classlist: id_name[c] = all_ont.getLabel(c) lookup = open("lookup", "w") lookup.write(json.dump(id_name))
import re ### Think this now works, but note that domain colours will need to be chosen for each new domain. Do we also have domain centres? ### Also - all of this should go into DB - in stack table (we'll need multiple stacks) #### Doing this the OWL-ish way. ### individual - adult brain ### individuals for all domains - domain ID goes in external IDs? - Hmmm - really should be an external ID table with a column for the name of the external ID. ### All will be typed via regular type table. ### part_of/has_part relations - Needs FACT table. ### Note - this would be treating multiple paintings of the same brain as different individuals. Seems reasonable though. fbbt = Brain() fbbt.learn("http://purl.obolibrary.org/obo/fbbt/fbbt-simple.owl") # This bit really should be a separate module - being file dependent. oboid_domId = {} JC = open("../json/JFRC_FBbt_correspondence.", "r") for l in JC: l2 = l.rstrip() clist = l2.split("\t") if not re.search("_L$", clist[0]): oboid_domId[clist[3]]=clist[2] tc = load_json("../json/treeContent.jso") new_tc = [] known_term_list = []
from owl2pdm_tools import ont_manager from uk.ac.ebi.brain.core import Brain from neo4j_tools import neo4j_connect import sys import re """Add typing via anonymous class expressions from OWL file. Requires uniqueness constraint on individual & class short_form_id.""" nc = neo4j_connect(base_uri = sys.argv[1], usr = sys.argv[2], pwd = sys.argv[3]) vfb = Brain() vfb.learn(sys.argv[4]) # Make this non-local vom = ont_manager(vfb.getOntology()) # vom.typeAxioms2pdm(sfid = 'VFB_00005000') # example = [{'isAnonymous': False, 'objectId': u'FBbt_00100247'}, # {'relId': u'BFO_0000050', 'isAnonymous': True, 'objectId': u'FBbt_00003624'}, # {'relId': u'BFO_0000050', 'isAnonymous': True, 'objectId': u'FBbt_00007011'}, # {'relId': u'RO_0002292', 'isAnonymous': True, 'objectId': u'FBtp0014830'}] # Simple to use. Only issue is resolution of short_form_ids. This can be done as long as these are stored as attributes on relations. These should be added in the process of adding named relations. Check proposed schema on ticket... # Get all inds by query inds = vfb.getInstances("Thing", 0) # Could grab from neo4J avoiding Brain
def load_ont(url): ont = Brain() ont.learn(url) return ont
#!/usr/bin/env jython -J-Xmx3000m import sys from uk.ac.ebi.brain.core import Brain sys.path.append('../mod') from lmb_fc_tools import get_con, owlDbOnt con = get_con(sys.argv[1], sys.argv[2]) obo = "http://purl.obolibrary.org/obo/" vfb = obo + "fbbt/vfb/" paths = [obo + "fbbt/fbbt-simple.owl", obo + "so.owl", vfb + "fb_features.owl", obo + "ro.owl", vfb + "vfb_ext.owl" ] # Could be done with one big brain file in memory, but would require lots of ram to run for p in paths: b = Brain() b.learn(p) od = owlDbOnt(con, b) od.update_labels() b.sleep() con.close()
#!/usr/bin/env Jython -J-Xmx8000m from uk.ac.ebi.brain.error import BrainException from uk.ac.ebi.brain.core import Brain from tsv2pdm import tab import warnings import sys vfb_ind = Brain() vfb_ind.learn(sys.argv[1]) # Draft class for object-based unit test system. This allows more info to be stored about queries and tests. Objects should be populated from a tsv. Good potential for generating documentation straight from tsv or by using these objects... class Query: """Class for testing queries against ontology + individuals files for VFB Attributes: query - DL query with shortFormIDs query_by_label - DL query with labels (for reference) query_text - Descriptive text for web-page / doc. description - # Why is this test being run? query = '' # DL query with shortFormIDs i = False # Should it have instances? - Default = False s = True # should it have subclasses? - Default = True Methods: qtest - """ def __init__(self, query_labels, query_ids, query_text, description ,i = False, s = True): self.query_by_label = '' # DL query with labels (for reference) self.query_text = '' # Descriptive text for web-page / doc. self.description = '' # Why is this test being run? self.i = i # Should it have instances? - Default = False self.s = s # should it have subclasses? - Default = True
matching a specified name is the correct one to add the type to. If this happens warnings are triggered. It is straightforward to remove this assumption. 2. Single quotes in names should NOT be escaped as the escape chars will end up in the DB. Double quotes in names are banned. """ sys.path.append("../mod/") con = get_con(usr = sys.argv[1] , pwd = sys.argv[2]) # connection to LMB DB. Need to make ssh tunnel first. ontologies = Brain() # Construct Brain object # Now load up ontologies. These are used to check validity for addition of new classes or # relations to DB. You can load as many as you need. ontologies.learn("http://purl.obolibrary.org/obo/fbbt/fbbt-simple.owl") # Switch to specific release if necessary. odbo = owlDbOnt(conn = con, ont = ontologies) # Object for interacting with the database + ontologies. # Also detects anything that looks like a FlyBase feature and checks validity against public FlyBase. annotation_table = tab(path = sys.argv[3], file_name=sys.argv[4]) # tsv file with headers: ind_name, class, rel, ind_source # ind_source must already be in the DB ID_range_start = 20000 for row in annotation_table.tab: print str(row) new_ind = odbo.add_ind(name = row['ind_name'], source = row['ind_source'], ID_range_start = 20000) # Returns FALSE and warns if addn fails if not new_ind: new_ind = odbo.ind_NameId[row['ind_name']] warnings.warn("Assuming existing individual called %s (%s) is the correct one, and adding types accordingly." %
#!/usr/bin/env jython from uk.ac.ebi.brain.core import Brain import json gorel = Brain() gorel.learn("http://purl.obolibrary.org/obo/go/extensions/gorel.owl") # Declaring AE rels as list for now. relations = [ "GOREL_0001006" ] # Would be better to pull list of AE rels automatically from file. #With current structure, thus would require pulling subproperties. Can't do that with Brain. # Iterate over list, pulling usage and saving to file named for relation: for r in relations: label = gorel.getLabel(r) usage = gorel.getAnnotation(r, "usage") usage_md = open("../.gitdown/" + label + "_usage.md", "w") usage_md.write(usage) usage_md.close() gorel.sleep()
class om(): def __init__(self, uri_or_fp): self.b = Brain() self.b.learn(uri_or_fp) self.o = self.b.getOntology() self.bsfp = self.b.getBidiShortFormProvider( ) # uses .getEntity(<string> shortForm), .getShortForm(OWLEntity entity) self.ogw = OWLGraphWrapper(self.o) self.start_auto_text = "---------------Text extracted from ontology: DO NOT EDIT---------------" self.end_auto_text = "---------------END AUTO GENERATED SECTION---------------" def get_valid_OP_list(self): """Returns a list of relations in the display_for_curators subset""" s = self.o.getObjectPropertiesInSignature() out = [] for r in s: # Slightly dodgy hard-wiring of validity criterion if 'display_for_curators' in self.ogw.getSubsets(r): out.append(self.bsfp.getShortForm(r)) return out def test_then_get_annotation(self, entity_sfid, AP): content = '' try: content = self.b.getAnnotation(entity_sfid, AP) except: warnings.warn("%s has no annotations with %s" % (entity_sfid, AP)) pass return content def gen_includes_md(self, r, id_name): """Generate markdown for inclusion in wiki page r = relation_shortFormId id_name = and id_name dict lookup to use for domain and range """ # Be carful auto_text = "%s\n" % self.start_auto_text auto_text += "\n## %s\n" % self.test_then_get_annotation( r, "shorthand") auto_text += "* OWL ID: %s\n" % r auto_text += "* label: %s\n" % self.test_then_get_annotation( r, 'label') auto_text += "* synonyms\n%s\n" % str( list(self.ogw.getOBOSynonymStrings(self.bsfp.getEntity(r), []))) # could be prettified auto_text += "\n### Definition\n%s\n" % self.test_then_get_annotation( r, "IAO_0000115") auto_text += "\n### Usage\n%s\n" % self.test_then_get_annotation( r, "usage") # auto_text += "\n### Comment\n%s\n" % self.test_then_get_annotation( r, "comment") # auto_text += "\n### Subsets\n%s\n" % str( self.ogw.getSubsets( self.bsfp.getEntity(r))) # Perhaps only display AE_ # Finding child and parent relations would take a reasoner object call. Better for annotators to reply on graph. # MIght be useful to give some record of how often used in ontology local_domain = self.test_then_get_annotation(r, "local_domain") local_range = self.test_then_get_annotation(r, "local_range") ldd = {} if local_domain: for ld in local_domain.split(" "): if ld in id_name.keys(): ldd[ld] = id_name[ld] else: ldd[ld] = '' id_name[ld] = '' auto_text += "\n##local domain\n%s\n" % str(ldd) lrd = {} if local_range: for lr in local_range.split(" "): if lr in id_name.keys(): lrd[lr] = id_name[lr] else: lrd[lr] = '' id_name[lr] = '' auto_text += "\n## local range\n%s\n" % str(lrd) auto_text += "\n%s\n" % self.end_auto_text return auto_text
#!/usr/bin/env jython import json from uk.ac.ebi.brain.core import Brain import sys """Takes a list of ontology URIs as args, writes a JSON lookup of ID:name.""" out = {} for path in sys.argv[1:]: entities = [] o = Brain() o.learn(path) entities.extend(list(o.getSubClasses('Thing', 0))) entities.extend(list(o.getInstances('Thing', 0))) for e in entities: # Need check for if label exists. Should be able to do that by iterating over all annotations on class to check. Will slow things down a lot... out[e] = o.getLabel(e) o.sleep() OUT = open('id_name.json', 'w') OUT.write(json.dumps(out))
class test_suite(): # Tests to add: add_owl_entity_2_db def __init__(self, usr,pwd, ont_uri_list): self.conn = get_con(usr,pwd) self.ont = Brain() for uri in ont_uri_list: self.ont.learn(uri) self.od = owlDbOnt(self.conn, self.ont) self.cleanup_list = [] def run_tests(self): self.add_ind_type_test() self.add_akv_type_test() self.cleanup() self.ont.sleep() self.conn.close() def add_ind_type_test(self): """Combined test of add ind and add_ind_type.. """ # A better test would use silly examples that could never be real, so all entities could safely be deleted. # add ind_test where name has quotes to be escaped. self.od.add_ind("add_ind_test", 'CostaJefferis') cursor = self.conn.cursor() cursor.execute("SELECT * from owl_individual WHERE label = 'add_ind_test'") dc = dict_cursor(cursor) iid = False for d in dc: if d['label'] == "add_ind_test": iid = d['id'] else: warnings.warn("Failed to add test ind") cursor.close() # add ind_type_test if iid: self.od.add_ind_type(ind = iid, OWLclass = 'FBbt_00003624', objectProperty = 'BFO_0000050') typ = self.od.type_exists('FBbt_00003624', 'BFO_0000050') self.od.add_ind_type(ind = iid, OWLclass = 'FBgn0000490', objectProperty = 'RO_0002292') typ2 = self.od.type_exists('FBgn0000490', 'RO_0002292') stat = False if not typ: warnings.warn("Failed to create test type statement 'BFO_0000050' some 'FBbt_00003624'") elif not typ2: warnings.warn("Failed to create test type statement 'expresses some dpp'.") else: stat = True # No longer needed as DELETE cascade set # self.cleanup_list.append("DELETE FROM individual_type WHERE id = %s" % typ) # Type assertions must be deleted first. # self.cleanup_list.append("DELETE FROM individual_type WHERE id = %s" % typ2) # Type assertions must be deleted first. self.cleanup_list.append("DELETE from owl_individual WHERE label = 'add_ind_test'") return stat def add_akv_type_test(self): self.od.add_akv_type('process', 'note','FBbt_00003624', 'BFO_0000050') cursor = self.conn.cursor() cursor.execute("SELECT at.id FROM annotation_type at " \ "JOIN annotation_key_value akv ON (akv.id = at.annotation_key_value_id) " \ "JOIN owl_type ot ON (ot.id=at.owl_type_id)") dc = dict_cursor(cursor) ID = '' for d in dc: ID = d['id'] if not ID: warnings.warn("Failed to add akv type.") self.cleanup_list.append("DELETE FROM annotation_type WHERE id = %s" % ID) return ID def cleanup(self): cursor = self.conn.cursor() for command in self.cleanup_list: cursor.execute(command) self.conn.commit()
import sys """Runs verification tests on abstract patterns and generates markdown docs. 1st ARG specifies path to input/output files. Second ARG specifies ontology file to use in validation.""" def load_json_from_file(path): json_file = open(path, "r") json_string = json_file.read() json_file.close() return json.loads(json_string) # Testing abstract pattern validation and documentation o = Brain() o.learn(sys.argv[2]) # Running with local file for now. # Switch this to specify path as argv json_files = glob.glob(sys.argv[1] + "*.json") # Note - glob returns full file path for f in json_files: p = load_json_from_file(f) m = re.search("(.+).json", f) pattern_name = m.group(1) print "Processing %s" % pattern_name ap = pattern.abstract_pattern(p, o) md = open(pattern_name + ".md", "w") #print ap.gen_markdown_doc() md.write(ap.gen_markdown_doc()) md.close() o.sleep()
from uk.ac.ebi.brain.core import Brain b = Brain() b.learn("http://purl.obolibrary.org/obo/pato.owl") sc = b.getSubClasses('Thing', 0) print "PATO has %s classes" % len(list(sc))