def genseo(): """ generate base SEO ontology. seo.owl this ontology combines SEO08 and SEO98 and defines their relations TODO: add owl:versionInfo """ g = Graph() g.add((SEO, RDF.type, OWL.Ontology)) g.add((SEO, RDFS.label, Literal(u'SEO Ontology'))) g.add((SEO, RDFS.comment, Literal(u'An ontology that provides some base ' u'definitions for SEO 1998 and SEO 2008 ' u'ontologies.'))) ontoannot(g, SEO) ontoversion(g, SEO) g.add((SEO.SEO, RDF.type, OWL.Class)) g.add((SEO.SEO, RDFS.label, Literal(u'SEO Code'))) g.add((SEO.SEO, RDFS.comment, Literal(u'Superclass for SEO codes'))) g.add((SEO.SEO, RDFS.subClassOf, SKOS.Concept)) g.add((SEO98.SEO, RDFS.subClassOf, SEO.SEO)) g.add((SEO08.SEO, RDFS.subClassOf, SEO.SEO)) add_matchingproperties(g) genseomatches(g) return g
def genfor(): """ generate base FOR ontology. for.owl This ontology combines RFCD and FOR and defines their relations """ g = Graph() g.add((FOR, RDF.type, OWL.Ontology)) g.add((FOR, RDFS.label, Literal(u'FOR Ontology'))) g.add((FOR, RDFS.comment, Literal(u'An ontology that provides some base ' u'definitions for RFCD 1998 and ' u'FOR 2008 ontologies.'))) ontoannot(g, FOR) ontoversion(g, FOR) g.add((FOR.FOR, RDF.type, OWL.Class)) g.add((FOR.FOR, RDFS.label, Literal(u'FOR code'))) g.add((FOR.FOR, RDFS.comment, Literal(u'Superclass for FOR codes'))) g.add((FOR.FOR, RDFS.subClassOf, SKOS.Concept)) g.add((FOR08.FOR, RDFS.subClassOf, FOR.FOR)) g.add((RFCD.RFCD, RDFS.subClassOf, FOR.FOR)) add_matchingproperties(g) genformatches(g) return g
def main(): # check if output dir exists d = os.path.abspath(OUTPUT_DIR) if not os.path.exists(d): os.makedirs(OUTPUT_DIR) genoutput(seo.genseo, 'seo', SEO) genoutput(seo1998.genseo98, 'seo98', SEO98) genoutput(seo2008.genseo08, 'seo08', SEO08) genoutput(for_.genfor, 'for', FOR) genoutput(rfcd.genrfcd, 'rfcd', RFCD) genoutput(for2008.genfor08, 'for08', FOR08) genoutput(toa.gentoa, 'toa', TOA) genoutput(genanzsrc, 'anzsrc', ANZSRC) g = Graph() setnamespaceprefixes(g) fref = URIRef(ANZSRCVIVO) ontoannot(g, fref) addvivo(g) f = open(OUTPUT_DIR + '/anzsrc_vivo.rdf', 'w') g.serialize(f) f.close()
def genseo98(): """ generate SEO 1998 ontology. seo98.owl """ g = Graph() g.add((SEO98, RDF.type, OWL.Ontology)) g.add((SEO98, RDF.type, SKOS.ConceptScheme)) g.add((SEO98, RDFS.label, Literal(u'SEO 1998 Ontology'))) g.add((SEO98, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about SEO 1998 codes.'))) g.add((SEO98, DC.title, Literal(u"Australian Standard Research " u"Classification (ASRC): " u"Socio-Economic Objective Classification", lang=u"en"))) g.add((SEO98, DC.description, Literal(u"The SEO Classification allows R&D " u"data to be classified according to the researcher's perceived " u"purpose. The purpose categories take account of processes, " u"products, health, education and other social and environmental " u"aspects of particular interest.", lang=u"en"))) ontoannot(g, SEO98) ontoversion(g, SEO98) g.add((SEO98.SEO, RDF.type, OWL.Class)) g.add((SEO98.SEO, RDFS.subClassOf, SKOS.Concept)) g.add((SEO98.SEO, RDFS.label, Literal(u'SEO 1998 Code'))) g.add((SEO98.SEO, RDFS.comment, Literal(u'Superclass for SEO 1998 codes'))) g.add((SEO98.SEODivision, RDF.type, OWL.Class)) g.add((SEO98.SEODivision, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEODivision, RDFS.label, Literal(u'SEO 1998 Division Code'))) g.add((SEO98.SEODivision, RDFS.comment, Literal(u'Class for SEO 1998 Division codes'))) g.add((SEO98.SEO2, RDF.type, OWL.Class)) g.add((SEO98.SEO2, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO2, RDFS.label, Literal(u'SEO 1998 2 digit Code'))) g.add((SEO98.SEO2, RDFS.comment, Literal(u'Class for SEO 1998 2 digit codes'))) g.add((SEO98.SEO4, RDF.type, OWL.Class)) g.add((SEO98.SEO4, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO4, RDFS.label, Literal(u'SEO 1998 4 digit Code'))) g.add((SEO98.SEO4, RDFS.comment, Literal(u'Class for SEO 1998 4 digit codes'))) g.add((SEO98.SEO6, RDF.type, OWL.Class)) g.add((SEO98.SEO6, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO6, RDFS.label, Literal(u'SEO 1998 6 digit Code'))) g.add((SEO98.SEO6, RDFS.comment, Literal(u'Class for SEO 1998 6 digit codes'))) # read data from csv files seo98csv = csv.reader(open('anzsrc_data/seo98.csv')) seo98csv.next() seo98csv.next() subdivision = {} # collect subdivision codes group = {} # collect group codes for code, title in seo98csv: if len(code) == 2: subdivision[int(code)] = title if len(code) == 4: group[int(code)] = title seo98csv = csv.reader(open('anzsrc_data/seo98-seo08.csv')) for i in range(0, 5): seo98csv.next() objective = {} # collect SEO-6 codes in here # check if all data exists and collect objective codes for s98, n98, s08, n08 in seo98csv: if not s98: continue subdivcode = int(s98[:2]) if subdivcode not in subdivision: print "WARNING division %d does net exist" % subdivcode grcode = int(s98[:4]) if grcode not in group: print "WARNING group %d does not exist" % grcode obcode = int(s98) if obcode not in objective: objective[obcode] = n98 elif objective[obcode] != n98: print "WARNING objective %d %s differs from %d %s" % (obcode, objective[obcode], obcode, n98) # print summary and build instances print 'SEO 98' # from seo98.pdf: 5 divisions, 18 subdivisions, 107 groups and 594 classes print 'Divisions (5):', len(division) print 'Subdivisions (18): ', len(subdivision) print 'Groups (107):', len(group) print 'Objective (594):', len(objective) for div in division.items(): createNode(g, SEO98, SEO98.SEODivision, div[0], div[1], None) for div in subdivision.items(): createNode(g, SEO98, SEO98.SEO2, u'%02d' % div[0], div[1], code2division[div[0]]) for gr in group.items(): createNode(g, SEO98, SEO98.SEO4, u'%04d' % gr[0], gr[1], (u'%04d' % gr[0])[:2]) for obj in objective.items(): createNode(g, SEO98, SEO98.SEO6, u'%06d' % obj[0], obj[1], (u'%06d' % obj[0])[:4]) return g
def genseo98(): """ generate SEO 1998 ontology. seo98.owl """ g = Graph() g.add((SEO98, RDF.type, OWL.Ontology)) g.add((SEO98, RDF.type, SKOS.ConceptScheme)) g.add((SEO98, RDFS.label, Literal(u'SEO 1998 Ontology'))) g.add((SEO98, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about SEO 1998 codes.'))) g.add((SEO98, DC.title, Literal( u"Australian Standard Research " u"Classification (ASRC): " u"Socio-Economic Objective Classification", lang=u"en"))) g.add( (SEO98, DC.description, Literal( u"The SEO Classification allows R&D " u"data to be classified according to the researcher's perceived " u"purpose. The purpose categories take account of processes, " u"products, health, education and other social and environmental " u"aspects of particular interest.", lang=u"en"))) ontoannot(g, SEO98) ontoversion(g, SEO98) g.add((SEO98.SEO, RDF.type, OWL.Class)) g.add((SEO98.SEO, RDFS.subClassOf, SKOS.Concept)) g.add((SEO98.SEO, RDFS.label, Literal(u'SEO 1998 Code'))) g.add((SEO98.SEO, RDFS.comment, Literal(u'Superclass for SEO 1998 codes'))) g.add((SEO98.SEODivision, RDF.type, OWL.Class)) g.add((SEO98.SEODivision, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEODivision, RDFS.label, Literal(u'SEO 1998 Division Code'))) g.add((SEO98.SEODivision, RDFS.comment, Literal(u'Class for SEO 1998 Division codes'))) g.add((SEO98.SEO2, RDF.type, OWL.Class)) g.add((SEO98.SEO2, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO2, RDFS.label, Literal(u'SEO 1998 2 digit Code'))) g.add((SEO98.SEO2, RDFS.comment, Literal(u'Class for SEO 1998 2 digit codes'))) g.add((SEO98.SEO4, RDF.type, OWL.Class)) g.add((SEO98.SEO4, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO4, RDFS.label, Literal(u'SEO 1998 4 digit Code'))) g.add((SEO98.SEO4, RDFS.comment, Literal(u'Class for SEO 1998 4 digit codes'))) g.add((SEO98.SEO6, RDF.type, OWL.Class)) g.add((SEO98.SEO6, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO6, RDFS.label, Literal(u'SEO 1998 6 digit Code'))) g.add((SEO98.SEO6, RDFS.comment, Literal(u'Class for SEO 1998 6 digit codes'))) # read data from csv files seo98csv = csv.reader(open('anzsrc_data/seo98.csv')) seo98csv.next() seo98csv.next() subdivision = {} # collect subdivision codes group = {} # collect group codes for code, title in seo98csv: if len(code) == 2: subdivision[int(code)] = title if len(code) == 4: group[int(code)] = title seo98csv = csv.reader(open('anzsrc_data/seo98-seo08.csv')) for i in range(0, 5): seo98csv.next() objective = {} # collect SEO-6 codes in here # check if all data exists and collect objective codes for s98, n98, s08, n08 in seo98csv: if not s98: continue subdivcode = int(s98[:2]) if subdivcode not in subdivision: print "WARNING division %d does net exist" % subdivcode grcode = int(s98[:4]) if grcode not in group: print "WARNING group %d does not exist" % grcode obcode = int(s98) if obcode not in objective: objective[obcode] = n98 elif objective[obcode] != n98: print "WARNING objective %d %s differs from %d %s" % ( obcode, objective[obcode], obcode, n98) # print summary and build instances print 'SEO 98' # from seo98.pdf: 5 divisions, 18 subdivisions, 107 groups and 594 classes print 'Divisions (5):', len(division) print 'Subdivisions (18): ', len(subdivision) print 'Groups (107):', len(group) print 'Objective (594):', len(objective) for div in division.items(): createNode(g, SEO98, SEO98.SEODivision, div[0], div[1], None) for div in subdivision.items(): createNode(g, SEO98, SEO98.SEO2, u'%02d' % div[0], div[1], code2division[div[0]]) for gr in group.items(): createNode(g, SEO98, SEO98.SEO4, u'%04d' % gr[0], gr[1], (u'%04d' % gr[0])[:2]) for obj in objective.items(): createNode(g, SEO98, SEO98.SEO6, u'%06d' % obj[0], obj[1], (u'%06d' % obj[0])[:4]) return g
def gentoa(): g = Graph() # statements about the ontology itself g.add((TOA, RDF.type, OWL.Ontology)) g.add((TOA, RDF.type, SKOS.ConceptScheme)) g.add((TOA, RDFS.label, Literal(u'TOA 1993 Ontology'))) g.add((TOA, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information about ASRC/ANZSRC ' u'Type of Activity definitions.'))) g.add((TOA, DC.title, Literal(u"Australian and New Zealand Standard " u"Research Classification (ANZSRC): " u"Type of Activity.", lang=u"en"))) ontoannot(g, TOA) ontoversion(g, TOA) # a class for TOA g.add((TOA.TOA, RDF.type, OWL.Class)) g.add((TOA.TOA, RDFS.subClassOf, SKOS.Concept)) g.add((TOA.TOA, RDFS.label, Literal(u'TOA 1993 Definition'))) g.add((TOA.TOA, RDFS.comment, Literal(u'Instances of this class describe ' u'TOA definitions'))) # Pure basic research toa = TOA.term(u'PureBasicResearch') g.add((toa, RDF.type, TOA.TOA)) g.add((toa, RDF.type, OWL.Thing)) g.add((toa, RDFS.label, Literal(u'Pure basic research'))) g.add((toa, RDFS.comment, Literal(u'Pure basic research is experimental ' u'and theoretical work undertaken to acquire new knowledge ' u'without looking for long term benefits other than the ' u'advancement of knowledge.'))) g.add((toa, SKOS.prefLabel, Literal(u'Pure basic research', lang=u"en"))) g.add((TOA, SKOS.hasTopConcept, toa)) g.add((toa, SKOS.inScheme, TOA)) # Strategic basic research toa = TOA.term(u'StrategicBasicResearch') g.add((toa, RDF.type, TOA.TOA)) g.add((toa, RDF.type, OWL.Thing)) g.add((toa, RDFS.label, Literal(u'Strategic basic research'))) g.add((toa, RDFS.comment, Literal(u'Strategic basic research is ' u'experimental and theoretical work undertaken to acquire new ' u'knowledge directed into specified broad areas in the ' u'expectation of useful discoveries. It provides the broad ' u'base of knowledge necessary for the solution of recognised ' u'practical problems.'))) g.add((toa, SKOS.prefLabel, Literal(u'Strategic basic research', lang=u"en"))) g.add((TOA, SKOS.hasTopConcept, toa)) g.add((toa, SKOS.inScheme, TOA)) # Applied research toa = TOA.term(u'AppliedResearch') g.add((toa, RDF.type, TOA.TOA)) g.add((toa, RDF.type, OWL.Thing)) g.add((toa, RDFS.label, Literal(u'Applied research'))) g.add((toa, RDFS.comment, Literal(u'Applied research is original work ' u'undertaken primarily to acquire new knowledge with a ' u'specific application in view. It is undertaken either to ' u'determine possible uses for the findings of basic research ' u'or to determine new ways of achieving some specific and ' u'predetermined objectives.'))) g.add((toa, SKOS.prefLabel, Literal(u'Applied research', lang=u"en"))) g.add((TOA, SKOS.hasTopConcept, toa)) g.add((toa, SKOS.inScheme, TOA)) # Experimental development toa = TOA.term(u'ExperimentalDevelopment') g.add((toa, RDF.type, TOA.TOA)) g.add((toa, RDF.type, OWL.Thing)) g.add((toa, RDFS.label, Literal(u'Experimental development'))) g.add((toa, RDFS.comment, Literal(u'Experimental development is systematic' u' work, using existing knowledge gained from research or ' u'practical experience, that is directed to producing new ' u'materials, products or devices, to installing new processes,' u' systems and services, or to improving substantially those ' u'already produced or installed.'))) g.add((toa, SKOS.prefLabel, Literal(u'Experimental development', lang=u"en"))) g.add((TOA, SKOS.hasTopConcept, toa)) g.add((toa, SKOS.inScheme, TOA)) return g
def gentoa(): g = Graph() # statements about the ontology itself g.add((TOA, RDF.type, OWL.Ontology)) g.add((TOA, RDF.type, SKOS.ConceptScheme)) g.add((TOA, RDFS.label, Literal(u'TOA 1993 Ontology'))) g.add((TOA, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information about ASRC/ANZSRC ' u'Type of Activity definitions.'))) g.add((TOA, DC.title, Literal( u"Australian and New Zealand Standard " u"Research Classification (ANZSRC): " u"Type of Activity.", lang=u"en"))) ontoannot(g, TOA) ontoversion(g, TOA) # a class for TOA g.add((TOA.TOA, RDF.type, OWL.Class)) g.add((TOA.TOA, RDFS.subClassOf, SKOS.Concept)) g.add((TOA.TOA, RDFS.label, Literal(u'TOA 1993 Definition'))) g.add((TOA.TOA, RDFS.comment, Literal(u'Instances of this class describe ' u'TOA definitions'))) # Pure basic research toa = TOA.term(u'PureBasicResearch') g.add((toa, RDF.type, TOA.TOA)) g.add((toa, RDF.type, OWL.Thing)) g.add((toa, RDFS.label, Literal(u'Pure basic research'))) g.add((toa, RDFS.comment, Literal(u'Pure basic research is experimental ' u'and theoretical work undertaken to acquire new knowledge ' u'without looking for long term benefits other than the ' u'advancement of knowledge.'))) g.add((toa, SKOS.prefLabel, Literal(u'Pure basic research', lang=u"en"))) g.add((TOA, SKOS.hasTopConcept, toa)) g.add((toa, SKOS.inScheme, TOA)) # Strategic basic research toa = TOA.term(u'StrategicBasicResearch') g.add((toa, RDF.type, TOA.TOA)) g.add((toa, RDF.type, OWL.Thing)) g.add((toa, RDFS.label, Literal(u'Strategic basic research'))) g.add((toa, RDFS.comment, Literal( u'Strategic basic research is ' u'experimental and theoretical work undertaken to acquire new ' u'knowledge directed into specified broad areas in the ' u'expectation of useful discoveries. It provides the broad ' u'base of knowledge necessary for the solution of recognised ' u'practical problems.'))) g.add((toa, SKOS.prefLabel, Literal(u'Strategic basic research', lang=u"en"))) g.add((TOA, SKOS.hasTopConcept, toa)) g.add((toa, SKOS.inScheme, TOA)) # Applied research toa = TOA.term(u'AppliedResearch') g.add((toa, RDF.type, TOA.TOA)) g.add((toa, RDF.type, OWL.Thing)) g.add((toa, RDFS.label, Literal(u'Applied research'))) g.add( (toa, RDFS.comment, Literal(u'Applied research is original work ' u'undertaken primarily to acquire new knowledge with a ' u'specific application in view. It is undertaken either to ' u'determine possible uses for the findings of basic research ' u'or to determine new ways of achieving some specific and ' u'predetermined objectives.'))) g.add((toa, SKOS.prefLabel, Literal(u'Applied research', lang=u"en"))) g.add((TOA, SKOS.hasTopConcept, toa)) g.add((toa, SKOS.inScheme, TOA)) # Experimental development toa = TOA.term(u'ExperimentalDevelopment') g.add((toa, RDF.type, TOA.TOA)) g.add((toa, RDF.type, OWL.Thing)) g.add((toa, RDFS.label, Literal(u'Experimental development'))) g.add((toa, RDFS.comment, Literal( u'Experimental development is systematic' u' work, using existing knowledge gained from research or ' u'practical experience, that is directed to producing new ' u'materials, products or devices, to installing new processes,' u' systems and services, or to improving substantially those ' u'already produced or installed.'))) g.add((toa, SKOS.prefLabel, Literal(u'Experimental development', lang=u"en"))) g.add((TOA, SKOS.hasTopConcept, toa)) g.add((toa, SKOS.inScheme, TOA)) return g
def genfor08(): """ generate FOR 2008 ontology for08.owl for:FOR ... top level class broad narrow for:FOR2 ... subclass of for:FOR - FOR4 for:FOR4 ... subclass of for:FOR FOR2 FOR6 for:FOR6 ... subclass of for:FOR FOR4 - properties: rdfs:label ... name for:code ... code (not used yet) skos:narrower ... narrower skos:broader ... broader """ g = Graph() g.add((FOR08, RDF.type, OWL.Ontology)) g.add((FOR08, RDF.type, SKOS.ConceptScheme)) g.add((FOR08, RDFS.label, Literal(u"FOR 2008 Ontology"))) g.add( ( FOR08, RDFS.comment, Literal( u"An ontology that provides classes " u"codes and hierarchical information " u"about FOR 2008 codes." ), ) ) g.add( ( FOR08, DC.title, Literal( u"Australian and New Zealand Standard " u"Research Classification (ANZSRC): " u"Fields of Research.", lang=u"en", ), ) ) g.add( ( FOR08, DC.description, Literal( u"The ANZSRC FOR allows R&D activity" u" to be categorised according to the methodology used in the R&D," u" rather than the activity of the unit performing the R&D or the " u"purpose of the R&D." u"\n" u"The categories in the classification include major fields and " u"related sub-fields of research and emerging areas of study " u"investigated by businesses, universities, tertiary institutions," u" national research institutions and other organisations." u"\n" u"This classification allows the categorisation of fields of " u"research activity within Australia and New Zealand.", lang=u"en", ), ) ) ontoannot(g, FOR08) ontoversion(g, FOR08) g.add((FOR08.FOR, RDF.type, OWL.Class)) g.add((FOR08.FOR, RDFS.subClassOf, SKOS.Concept)) g.add((FOR08.FOR, RDFS.label, Literal(u"FOR 2008 Code"))) g.add((FOR08.FOR, RDFS.comment, Literal(u"Superclass for FOR 2008 codes"))) g.add((FOR08.FOR2, RDF.type, OWL.Class)) g.add((FOR08.FOR2, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR2, RDFS.label, Literal(u"FOR 2008 2 digit Code"))) g.add((FOR08.FOR2, RDFS.comment, Literal(u"Class for FOR 2008 2 digit codes"))) g.add((FOR08.FOR4, RDF.type, OWL.Class)) g.add((FOR08.FOR4, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR4, RDFS.label, Literal(u"FOR 2008 4 digit Code"))) g.add((FOR08.FOR4, RDFS.comment, Literal(u"Class for FOR 2008 4 digit codes"))) g.add((FOR08.FOR6, RDF.type, OWL.Class)) g.add((FOR08.FOR6, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR6, RDFS.label, Literal(u"FOR 2008 6 digit Code"))) g.add((FOR08.FOR6, RDFS.comment, Literal(u"Class for FOR 2008 6 digit codes"))) # create property definition to hold code g.add((FOR08.code, RDF.type, OWL.DatatypeProperty)) g.add((FOR08.code, RDFS.domain, FOR08.FOR)) g.add((FOR08.code, RDFS.range, XSD.string)) for98csv = csv.reader(open("anzsrc_data/for08.csv")) for98csv.next() division = {} group = {} field = {} for div, gr, fi, code in for98csv: divcode = int(code[:2]) if divcode not in division: division[divcode] = div elif division[divcode] != div: print "WARNING division" grcode = int(code[:4]) if grcode not in group: group[grcode] = gr elif group[grcode] != gr: print "WARNING group" ficode = int(code) if ficode not in field: field[ficode] = fi elif field[ficode] != fi: print "WARNING objective" # checkagainst mapping: for98csv = csv.reader(open("anzsrc_data/for08-rfcd.csv")) for98csv.next() for98csv.next() for98csv.next() for98csv.next() for98csv.next() for row in for98csv: try: if int(row[0]) not in field: print "WARNING: missing code ", row except ValueError: continue print "FOR" print "Divisions (22):", len(division) print "Groups (157):", len(group) print "Field (1238):", len(field) for div in division.items(): createNode(g, FOR08, FOR08.FOR2, "%02d" % div[0], div[1], None) for gr in group.items(): createNode(g, FOR08, FOR08.FOR4, "%04d" % gr[0], gr[1], ("%04d" % gr[0])[:2]) for fi in field.items(): createNode(g, FOR08, FOR08.FOR6, "%06d" % fi[0], fi[1], ("%06d" % fi[0])[:4]) return g
def genrfcd(): ''' generate RFCD 2008 ontology for08.owl for:RFCD ... top level class broad narrow for:RFCD2 ... subclass of for:RFCD - RFCD4 for:RFCD4 ... subclass of for:RFCD RFCD2 RFCD6 for:RFCD6 ... subclass of for:RFCD RFCD4 - properties: rdfs:label ... name for:code ... code (not used yet) skos:narrower ... narrower skos:broader ... broader ''' # generate class definitions g = Graph() g.add((RFCD, RDF.type, OWL.Ontology)) g.add((RFCD, RDF.type, SKOS.ConceptScheme)) g.add((RFCD, RDFS.label, Literal(u'RFCD 1998 Ontology'))) g.add((RFCD, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about RFCD 1998 codes.'))) g.add((RFCD, DC.title, Literal(u"Australian Standard Research " u"Classification (ASRC): " u"Research Fields, Courses and Disciplines " u"Classification", lang=u"en"))) g.add((RFCD, DC.description, Literal(u"This classification allows both R&D" u" activity and other activity within the higher education sector " u"to be categorised." u"\n" u"The categories in the classification include recognised academic" u" disciplines and related major sub-fields taught at universities" u" or tertiary institutions, major fields of research investigated" u" by national research institutions and organisations, and " u"emerging areas of study.", lang="en"))) ontoannot(g, RFCD) ontoversion(g, RFCD) g.add((RFCD.RFCD, RDF.type, OWL.Class)) g.add((RFCD.RFCD, RDFS.subClassOf, SKOS.Concept)) g.add((RFCD.RFCD, RDFS.label, Literal(u'RFCD 1998 Code'))) g.add((RFCD.RFCD, RDFS.comment, Literal(u'Superclass for RFCD 1998 ' u'codes'))) g.add((RFCD.RFCD2, RDF.type, OWL.Class)) g.add((RFCD.RFCD2, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD2, RDFS.label, Literal(u'RFCD 1998 2 digit Code'))) g.add((RFCD.RFCD2, RDFS.comment, Literal(u'Class for RFCD 1998 2 digit codes'))) g.add((RFCD.RFCD4, RDF.type, OWL.Class)) g.add((RFCD.RFCD4, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD4, RDFS.label, Literal(u'RFCD 1998 4 digit Code'))) g.add((RFCD.RFCD4, RDFS.comment, Literal(u'Class for RFCD 1998 4 digit codes'))) g.add((RFCD.RFCD6, RDF.type, OWL.Class)) g.add((RFCD.RFCD6, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD6, RDFS.label, Literal(u'RFCD 1998 6 digit Code'))) g.add((RFCD.RFCD6, RDFS.comment, Literal(u'Class for RFCD 1998 6 digit codes'))) # read data from csv files rfcdcsv = csv.reader(open('anzsrc_data/rfcd.csv')) rfcdcsv.next() rfcdcsv.next() # skip file header division = {} # collect divisions here discipline = {} # collect discplines here for code, title in rfcdcsv: if len(code) == 2: division[int(code)] = title elif len(code) == 4: discipline[int(code)] = title rfcdcsv = csv.reader(open('anzsrc_data/rfcd-for08.csv')) for i in range(0, 5): rfcdcsv.next() subject = {} # collect RFCD-6 codes in here # check if all data exists and collect objective codes for f98, n98, f08, n08 in rfcdcsv: if not f98: continue divcode = int(f98[:2]) if divcode not in division: print "WARNING division %d does net exist" % divcode disccode = int(f98[:4]) if disccode not in discipline: print "WARNING discipline %d does not exist" % disccode subjcode = int(f98) if subjcode not in subject: subject[subjcode] = n98 elif subject[subjcode] != n98: print "WARNING subject %d %s differs from %d %s" % (subjcode, subject[subjcode], subjcode, n98) # All data read, print out a summary and start creating instances print 'RFCD 98' # from 12970_98.pdf: 24 divisions, 139 disciplines, 898 subjects print 'Divisions (24):', len(division) print 'Disciplines (139):', len(discipline) print 'Subjects (898):', len(subject) def createDivision(code, name): rfcdcode = RFCD.term(code) g.add((rfcdcode, RDF.type, RFCD.RFCD2)) g.add((rfcdcode, RDF.type, OWL.Thing)) g.add((rfcdcode, RDFS.label, Literal(unicode(name)))) g.add((rfcdcode, ANZSRC.code, Literal(code))) for div in division.items(): createDivision(u'%02d' % div[0], div[1]) for disc in discipline.items(): createNode(g, RFCD, RFCD.RFCD4, u'%04d' % disc[0], disc[1], (u'%04d' % disc[0])[:2]) for subj in subject.items(): createNode(g, RFCD, RFCD.RFCD6, u'%06d' % subj[0], subj[1], (u'%06d' % subj[0])[:4]) return g
def genrfcd(): ''' generate RFCD 2008 ontology for08.owl for:RFCD ... top level class broad narrow for:RFCD2 ... subclass of for:RFCD - RFCD4 for:RFCD4 ... subclass of for:RFCD RFCD2 RFCD6 for:RFCD6 ... subclass of for:RFCD RFCD4 - properties: rdfs:label ... name for:code ... code (not used yet) skos:narrower ... narrower skos:broader ... broader ''' # generate class definitions g = Graph() g.add((RFCD, RDF.type, OWL.Ontology)) g.add((RFCD, RDF.type, SKOS.ConceptScheme)) g.add((RFCD, RDFS.label, Literal(u'RFCD 1998 Ontology'))) g.add((RFCD, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about RFCD 1998 codes.'))) g.add((RFCD, DC.title, Literal( u"Australian Standard Research " u"Classification (ASRC): " u"Research Fields, Courses and Disciplines " u"Classification", lang=u"en"))) g.add(( RFCD, DC.description, Literal( u"This classification allows both R&D" u" activity and other activity within the higher education sector " u"to be categorised." u"\n" u"The categories in the classification include recognised academic" u" disciplines and related major sub-fields taught at universities" u" or tertiary institutions, major fields of research investigated" u" by national research institutions and organisations, and " u"emerging areas of study.", lang="en"))) ontoannot(g, RFCD) ontoversion(g, RFCD) g.add((RFCD.RFCD, RDF.type, OWL.Class)) g.add((RFCD.RFCD, RDFS.subClassOf, SKOS.Concept)) g.add((RFCD.RFCD, RDFS.label, Literal(u'RFCD 1998 Code'))) g.add( (RFCD.RFCD, RDFS.comment, Literal(u'Superclass for RFCD 1998 ' u'codes'))) g.add((RFCD.RFCD2, RDF.type, OWL.Class)) g.add((RFCD.RFCD2, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD2, RDFS.label, Literal(u'RFCD 1998 2 digit Code'))) g.add((RFCD.RFCD2, RDFS.comment, Literal(u'Class for RFCD 1998 2 digit codes'))) g.add((RFCD.RFCD4, RDF.type, OWL.Class)) g.add((RFCD.RFCD4, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD4, RDFS.label, Literal(u'RFCD 1998 4 digit Code'))) g.add((RFCD.RFCD4, RDFS.comment, Literal(u'Class for RFCD 1998 4 digit codes'))) g.add((RFCD.RFCD6, RDF.type, OWL.Class)) g.add((RFCD.RFCD6, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD6, RDFS.label, Literal(u'RFCD 1998 6 digit Code'))) g.add((RFCD.RFCD6, RDFS.comment, Literal(u'Class for RFCD 1998 6 digit codes'))) # read data from csv files rfcdcsv = csv.reader(open('anzsrc_data/rfcd.csv')) rfcdcsv.next() rfcdcsv.next() # skip file header division = {} # collect divisions here discipline = {} # collect discplines here for code, title in rfcdcsv: if len(code) == 2: division[int(code)] = title elif len(code) == 4: discipline[int(code)] = title rfcdcsv = csv.reader(open('anzsrc_data/rfcd-for08.csv')) for i in range(0, 5): rfcdcsv.next() subject = {} # collect RFCD-6 codes in here # check if all data exists and collect objective codes for f98, n98, f08, n08 in rfcdcsv: if not f98: continue divcode = int(f98[:2]) if divcode not in division: print "WARNING division %d does net exist" % divcode disccode = int(f98[:4]) if disccode not in discipline: print "WARNING discipline %d does not exist" % disccode subjcode = int(f98) if subjcode not in subject: subject[subjcode] = n98 elif subject[subjcode] != n98: print "WARNING subject %d %s differs from %d %s" % ( subjcode, subject[subjcode], subjcode, n98) # All data read, print out a summary and start creating instances print 'RFCD 98' # from 12970_98.pdf: 24 divisions, 139 disciplines, 898 subjects print 'Divisions (24):', len(division) print 'Disciplines (139):', len(discipline) print 'Subjects (898):', len(subject) def createDivision(code, name): rfcdcode = RFCD.term(code) g.add((rfcdcode, RDF.type, RFCD.RFCD2)) g.add((rfcdcode, RDF.type, OWL.Thing)) g.add((rfcdcode, RDFS.label, Literal(unicode(name)))) g.add((rfcdcode, ANZSRC.code, Literal(code))) for div in division.items(): createDivision(u'%02d' % div[0], div[1]) for disc in discipline.items(): createNode(g, RFCD, RFCD.RFCD4, u'%04d' % disc[0], disc[1], (u'%04d' % disc[0])[:2]) for subj in subject.items(): createNode(g, RFCD, RFCD.RFCD6, u'%06d' % subj[0], subj[1], (u'%06d' % subj[0])[:4]) return g
def genfor08(): ''' generate FOR 2008 ontology for08.owl for:FOR ... top level class broad narrow for:FOR2 ... subclass of for:FOR - FOR4 for:FOR4 ... subclass of for:FOR FOR2 FOR6 for:FOR6 ... subclass of for:FOR FOR4 - properties: rdfs:label ... name for:code ... code (not used yet) skos:narrower ... narrower skos:broader ... broader ''' g = Graph() g.add((FOR08, RDF.type, OWL.Ontology)) g.add((FOR08, RDF.type, SKOS.ConceptScheme)) g.add((FOR08, RDFS.label, Literal(u'FOR 2008 Ontology'))) g.add((FOR08, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about FOR 2008 codes.'))) g.add((FOR08, DC.title, Literal( u"Australian and New Zealand Standard " u"Research Classification (ANZSRC): " u"Fields of Research.", lang=u"en"))) g.add(( FOR08, DC.description, Literal( u"The ANZSRC FOR allows R&D activity" u" to be categorised according to the methodology used in the R&D," u" rather than the activity of the unit performing the R&D or the " u"purpose of the R&D." u"\n" u"The categories in the classification include major fields and " u"related sub-fields of research and emerging areas of study " u"investigated by businesses, universities, tertiary institutions," u" national research institutions and other organisations." u"\n" u"This classification allows the categorisation of fields of " u"research activity within Australia and New Zealand.", lang=u"en"))) ontoannot(g, FOR08) ontoversion(g, FOR08) g.add((FOR08.FOR, RDF.type, OWL.Class)) g.add((FOR08.FOR, RDFS.subClassOf, SKOS.Concept)) g.add((FOR08.FOR, RDFS.label, Literal(u'FOR 2008 Code'))) g.add((FOR08.FOR, RDFS.comment, Literal(u'Superclass for FOR 2008 codes'))) g.add((FOR08.FOR2, RDF.type, OWL.Class)) g.add((FOR08.FOR2, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR2, RDFS.label, Literal(u'FOR 2008 2 digit Code'))) g.add((FOR08.FOR2, RDFS.comment, Literal(u'Class for FOR 2008 2 digit codes'))) g.add((FOR08.FOR4, RDF.type, OWL.Class)) g.add((FOR08.FOR4, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR4, RDFS.label, Literal(u'FOR 2008 4 digit Code'))) g.add((FOR08.FOR4, RDFS.comment, Literal(u'Class for FOR 2008 4 digit codes'))) g.add((FOR08.FOR6, RDF.type, OWL.Class)) g.add((FOR08.FOR6, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR6, RDFS.label, Literal(u'FOR 2008 6 digit Code'))) g.add((FOR08.FOR6, RDFS.comment, Literal(u'Class for FOR 2008 6 digit codes'))) # create property definition to hold code g.add((FOR08.code, RDF.type, OWL.DatatypeProperty)) g.add((FOR08.code, RDFS.domain, FOR08.FOR)) g.add((FOR08.code, RDFS.range, XSD.string)) for98csv = csv.reader(open('anzsrc_data/for08.csv')) for98csv.next() division = {} group = {} field = {} for div, gr, fi, code in for98csv: divcode = int(code[:2]) if divcode not in division: division[divcode] = div elif division[divcode] != div: print "WARNING division" grcode = int(code[:4]) if grcode not in group: group[grcode] = gr elif group[grcode] != gr: print "WARNING group" ficode = int(code) if ficode not in field: field[ficode] = fi elif field[ficode] != fi: print "WARNING objective" # checkagainst mapping: for98csv = csv.reader(open('anzsrc_data/for08-rfcd.csv')) for98csv.next() for98csv.next() for98csv.next() for98csv.next() for98csv.next() for row in for98csv: try: if int(row[0]) not in field: print 'WARNING: missing code ', row except ValueError: continue print 'FOR' print 'Divisions (22):', len(division) print 'Groups (157):', len(group) print 'Field (1238):', len(field) for div in division.items(): createNode(g, FOR08, FOR08.FOR2, '%02d' % div[0], div[1], None) for gr in group.items(): createNode(g, FOR08, FOR08.FOR4, '%04d' % gr[0], gr[1], ('%04d' % gr[0])[:2]) for fi in field.items(): createNode(g, FOR08, FOR08.FOR6, '%06d' % fi[0], fi[1], ('%06d' % fi[0])[:4]) return g
def genseo08(): """ generate SEO 2008 ontology. seo08.owl """ g = Graph() g.add((SEO08, RDF.type, OWL.Ontology)) g.add((SEO08, RDF.type, SKOS.ConceptScheme)) g.add((SEO08, RDFS.label, Literal(u'SEO 2008 Ontology'))) g.add((SEO08, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about SEO 2008 codes.'))) g.add((SEO08, DC.title, Literal(u"Australian and New Zealand Standard " u"Research Classification (ANZSRC): " u"Socio-Economic Objective.", lang=u"en"))) g.add((SEO08, DC.description, Literal(u"The ANZSRC SEO classification " u"allows R&D activity in Australia and New Zealand to be " u"categorised according to the intended purpose or outcome of the " u"research, rather than the processes or techniques used in order " u"to achieve this objective." u"\n" u"The purpose categories include processes, products, health, " u"education and other social and environmental aspects in " u"Australia and New Zealand that R&D activity aims to improve.", lang=u"en"))) ontoannot(g, SEO08) ontoversion(g, SEO08) g.add((SEO08.SEO, RDF.type, OWL.Class)) g.add((SEO08.SEO, RDFS.subClassOf, SKOS.Concept)) g.add((SEO08.SEO, RDFS.label, Literal(u'SEO 2008 Code'))) g.add((SEO08.SEO, RDFS.comment, Literal(u'Superclass for SEO 2008 codes'))) g.add((SEO08.SEOSection, RDF.type, OWL.Class)) g.add((SEO08.SEOSection, RDFS.subClassOf, SEO08.SEO)) g.add((SEO08.SEOSection, RDFS.label, Literal(u'SEO 2008 Section Code'))) g.add((SEO08.SEOSection, RDFS.comment, Literal(u'Class for SEO 2008 Section codes'))) g.add((SEO08.SEO2, RDF.type, OWL.Class)) g.add((SEO08.SEO2, RDFS.subClassOf, SEO08.SEO)) g.add((SEO08.SEO2, RDFS.label, Literal(u'SEO 2008 2 digit Code'))) g.add((SEO08.SEO2, RDFS.comment, Literal(u'Class for SEO 2008 2 digit codes'))) g.add((SEO08.SEO4, RDF.type, OWL.Class)) g.add((SEO08.SEO4, RDFS.subClassOf, SEO08.SEO)) g.add((SEO08.SEO4, RDFS.label, Literal(u'SEO 2008 4 digit Code'))) g.add((SEO08.SEO4, RDFS.comment, Literal(u'Class for SEO 2008 4 digit codes'))) g.add((SEO08.SEO6, RDF.type, OWL.Class)) g.add((SEO08.SEO6, RDFS.subClassOf, SEO08.SEO)) g.add((SEO08.SEO6, RDFS.label, Literal(u'SEO 2008 6 digit Code'))) g.add((SEO08.SEO6, RDFS.comment, Literal(u'Class for SEO 2008 6 digit codes'))) seo98csv = csv.reader(open('anzsrc_data/seo08.csv')) seo98csv.next() division = {} group = {} objective = {} for sec, div, gr, obj, code in seo98csv: divcode = int(code[:2]) if divcode not in division: division[divcode] = div elif division[divcode] != div: print "WARNING division %d does not exist" % divcode grcode = int(code[:4]) if grcode not in group: group[grcode] = gr elif group[grcode] != gr: print "WARNING group %d does not exist" % grcode obcode = int(code) if obcode not in objective: objective[obcode] = obj elif objective[obcode] != obj: print "WARNING objective %d %s differs from %d %s" % (obcode, objective[obcode], obcode, obj) print 'SEO 08' print 'Sectors (5):', len(sector) print 'Divisions (17):', len(division) print 'Groups (119):', len(group) print 'Objective (847):', len(objective) # checkagainst mapping: seo98csv = csv.reader(open('anzsrc_data/seo08-seo98.csv')) for i in range(0, 5): seo98csv.next() for row in seo98csv: try: if int(row[0]) not in objective: print 'WARNING: missing code ', row except ValueError: continue for sec in sector.items(): createNode(g, SEO08, SEO08.SEOSection, sec[0], sec[1], None) for div in division.items(): createNode(g, SEO08, SEO08.SEO2, u'%02d' % div[0], div[1], code2sector[div[0]]) for gr in group.items(): createNode(g, SEO08, SEO08.SEO4, u'%04d' % gr[0], gr[1], (u'%04d' % gr[0])[:2]) for obj in objective.items(): createNode(g, SEO08, SEO08.SEO6, u'%06d' % obj[0], obj[1], (u'%06d' % obj[0])[:4]) return g