def genseo98(): """ generate SEO 1998 ontology. seo98.owl """ g = Graph() g.add((SEO98, RDF.type, OWL.Ontology)) g.add((SEO98, RDF.type, SKOS.ConceptScheme)) g.add((SEO98, RDFS.label, Literal(u'SEO 1998 Ontology'))) g.add((SEO98, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about SEO 1998 codes.'))) g.add((SEO98, DC.title, Literal( u"Australian Standard Research " u"Classification (ASRC): " u"Socio-Economic Objective Classification", lang=u"en"))) g.add( (SEO98, DC.description, Literal( u"The SEO Classification allows R&D " u"data to be classified according to the researcher's perceived " u"purpose. The purpose categories take account of processes, " u"products, health, education and other social and environmental " u"aspects of particular interest.", lang=u"en"))) ontoannot(g, SEO98) ontoversion(g, SEO98) g.add((SEO98.SEO, RDF.type, OWL.Class)) g.add((SEO98.SEO, RDFS.subClassOf, SKOS.Concept)) g.add((SEO98.SEO, RDFS.label, Literal(u'SEO 1998 Code'))) g.add((SEO98.SEO, RDFS.comment, Literal(u'Superclass for SEO 1998 codes'))) g.add((SEO98.SEODivision, RDF.type, OWL.Class)) g.add((SEO98.SEODivision, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEODivision, RDFS.label, Literal(u'SEO 1998 Division Code'))) g.add((SEO98.SEODivision, RDFS.comment, Literal(u'Class for SEO 1998 Division codes'))) g.add((SEO98.SEO2, RDF.type, OWL.Class)) g.add((SEO98.SEO2, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO2, RDFS.label, Literal(u'SEO 1998 2 digit Code'))) g.add((SEO98.SEO2, RDFS.comment, Literal(u'Class for SEO 1998 2 digit codes'))) g.add((SEO98.SEO4, RDF.type, OWL.Class)) g.add((SEO98.SEO4, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO4, RDFS.label, Literal(u'SEO 1998 4 digit Code'))) g.add((SEO98.SEO4, RDFS.comment, Literal(u'Class for SEO 1998 4 digit codes'))) g.add((SEO98.SEO6, RDF.type, OWL.Class)) g.add((SEO98.SEO6, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO6, RDFS.label, Literal(u'SEO 1998 6 digit Code'))) g.add((SEO98.SEO6, RDFS.comment, Literal(u'Class for SEO 1998 6 digit codes'))) # read data from csv files seo98csv = csv.reader(open('anzsrc_data/seo98.csv')) seo98csv.next() seo98csv.next() subdivision = {} # collect subdivision codes group = {} # collect group codes for code, title in seo98csv: if len(code) == 2: subdivision[int(code)] = title if len(code) == 4: group[int(code)] = title seo98csv = csv.reader(open('anzsrc_data/seo98-seo08.csv')) for i in range(0, 5): seo98csv.next() objective = {} # collect SEO-6 codes in here # check if all data exists and collect objective codes for s98, n98, s08, n08 in seo98csv: if not s98: continue subdivcode = int(s98[:2]) if subdivcode not in subdivision: print "WARNING division %d does net exist" % subdivcode grcode = int(s98[:4]) if grcode not in group: print "WARNING group %d does not exist" % grcode obcode = int(s98) if obcode not in objective: objective[obcode] = n98 elif objective[obcode] != n98: print "WARNING objective %d %s differs from %d %s" % ( obcode, objective[obcode], obcode, n98) # print summary and build instances print 'SEO 98' # from seo98.pdf: 5 divisions, 18 subdivisions, 107 groups and 594 classes print 'Divisions (5):', len(division) print 'Subdivisions (18): ', len(subdivision) print 'Groups (107):', len(group) print 'Objective (594):', len(objective) for div in division.items(): createNode(g, SEO98, SEO98.SEODivision, div[0], div[1], None) for div in subdivision.items(): createNode(g, SEO98, SEO98.SEO2, u'%02d' % div[0], div[1], code2division[div[0]]) for gr in group.items(): createNode(g, SEO98, SEO98.SEO4, u'%04d' % gr[0], gr[1], (u'%04d' % gr[0])[:2]) for obj in objective.items(): createNode(g, SEO98, SEO98.SEO6, u'%06d' % obj[0], obj[1], (u'%06d' % obj[0])[:4]) return g
def genseo98(): """ generate SEO 1998 ontology. seo98.owl """ g = Graph() g.add((SEO98, RDF.type, OWL.Ontology)) g.add((SEO98, RDF.type, SKOS.ConceptScheme)) g.add((SEO98, RDFS.label, Literal(u'SEO 1998 Ontology'))) g.add((SEO98, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about SEO 1998 codes.'))) g.add((SEO98, DC.title, Literal(u"Australian Standard Research " u"Classification (ASRC): " u"Socio-Economic Objective Classification", lang=u"en"))) g.add((SEO98, DC.description, Literal(u"The SEO Classification allows R&D " u"data to be classified according to the researcher's perceived " u"purpose. The purpose categories take account of processes, " u"products, health, education and other social and environmental " u"aspects of particular interest.", lang=u"en"))) ontoannot(g, SEO98) ontoversion(g, SEO98) g.add((SEO98.SEO, RDF.type, OWL.Class)) g.add((SEO98.SEO, RDFS.subClassOf, SKOS.Concept)) g.add((SEO98.SEO, RDFS.label, Literal(u'SEO 1998 Code'))) g.add((SEO98.SEO, RDFS.comment, Literal(u'Superclass for SEO 1998 codes'))) g.add((SEO98.SEODivision, RDF.type, OWL.Class)) g.add((SEO98.SEODivision, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEODivision, RDFS.label, Literal(u'SEO 1998 Division Code'))) g.add((SEO98.SEODivision, RDFS.comment, Literal(u'Class for SEO 1998 Division codes'))) g.add((SEO98.SEO2, RDF.type, OWL.Class)) g.add((SEO98.SEO2, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO2, RDFS.label, Literal(u'SEO 1998 2 digit Code'))) g.add((SEO98.SEO2, RDFS.comment, Literal(u'Class for SEO 1998 2 digit codes'))) g.add((SEO98.SEO4, RDF.type, OWL.Class)) g.add((SEO98.SEO4, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO4, RDFS.label, Literal(u'SEO 1998 4 digit Code'))) g.add((SEO98.SEO4, RDFS.comment, Literal(u'Class for SEO 1998 4 digit codes'))) g.add((SEO98.SEO6, RDF.type, OWL.Class)) g.add((SEO98.SEO6, RDFS.subClassOf, SEO98.SEO)) g.add((SEO98.SEO6, RDFS.label, Literal(u'SEO 1998 6 digit Code'))) g.add((SEO98.SEO6, RDFS.comment, Literal(u'Class for SEO 1998 6 digit codes'))) # read data from csv files seo98csv = csv.reader(open('anzsrc_data/seo98.csv')) seo98csv.next() seo98csv.next() subdivision = {} # collect subdivision codes group = {} # collect group codes for code, title in seo98csv: if len(code) == 2: subdivision[int(code)] = title if len(code) == 4: group[int(code)] = title seo98csv = csv.reader(open('anzsrc_data/seo98-seo08.csv')) for i in range(0, 5): seo98csv.next() objective = {} # collect SEO-6 codes in here # check if all data exists and collect objective codes for s98, n98, s08, n08 in seo98csv: if not s98: continue subdivcode = int(s98[:2]) if subdivcode not in subdivision: print "WARNING division %d does net exist" % subdivcode grcode = int(s98[:4]) if grcode not in group: print "WARNING group %d does not exist" % grcode obcode = int(s98) if obcode not in objective: objective[obcode] = n98 elif objective[obcode] != n98: print "WARNING objective %d %s differs from %d %s" % (obcode, objective[obcode], obcode, n98) # print summary and build instances print 'SEO 98' # from seo98.pdf: 5 divisions, 18 subdivisions, 107 groups and 594 classes print 'Divisions (5):', len(division) print 'Subdivisions (18): ', len(subdivision) print 'Groups (107):', len(group) print 'Objective (594):', len(objective) for div in division.items(): createNode(g, SEO98, SEO98.SEODivision, div[0], div[1], None) for div in subdivision.items(): createNode(g, SEO98, SEO98.SEO2, u'%02d' % div[0], div[1], code2division[div[0]]) for gr in group.items(): createNode(g, SEO98, SEO98.SEO4, u'%04d' % gr[0], gr[1], (u'%04d' % gr[0])[:2]) for obj in objective.items(): createNode(g, SEO98, SEO98.SEO6, u'%06d' % obj[0], obj[1], (u'%06d' % obj[0])[:4]) return g
def genfor08(): """ generate FOR 2008 ontology for08.owl for:FOR ... top level class broad narrow for:FOR2 ... subclass of for:FOR - FOR4 for:FOR4 ... subclass of for:FOR FOR2 FOR6 for:FOR6 ... subclass of for:FOR FOR4 - properties: rdfs:label ... name for:code ... code (not used yet) skos:narrower ... narrower skos:broader ... broader """ g = Graph() g.add((FOR08, RDF.type, OWL.Ontology)) g.add((FOR08, RDF.type, SKOS.ConceptScheme)) g.add((FOR08, RDFS.label, Literal(u"FOR 2008 Ontology"))) g.add( ( FOR08, RDFS.comment, Literal( u"An ontology that provides classes " u"codes and hierarchical information " u"about FOR 2008 codes." ), ) ) g.add( ( FOR08, DC.title, Literal( u"Australian and New Zealand Standard " u"Research Classification (ANZSRC): " u"Fields of Research.", lang=u"en", ), ) ) g.add( ( FOR08, DC.description, Literal( u"The ANZSRC FOR allows R&D activity" u" to be categorised according to the methodology used in the R&D," u" rather than the activity of the unit performing the R&D or the " u"purpose of the R&D." u"\n" u"The categories in the classification include major fields and " u"related sub-fields of research and emerging areas of study " u"investigated by businesses, universities, tertiary institutions," u" national research institutions and other organisations." u"\n" u"This classification allows the categorisation of fields of " u"research activity within Australia and New Zealand.", lang=u"en", ), ) ) ontoannot(g, FOR08) ontoversion(g, FOR08) g.add((FOR08.FOR, RDF.type, OWL.Class)) g.add((FOR08.FOR, RDFS.subClassOf, SKOS.Concept)) g.add((FOR08.FOR, RDFS.label, Literal(u"FOR 2008 Code"))) g.add((FOR08.FOR, RDFS.comment, Literal(u"Superclass for FOR 2008 codes"))) g.add((FOR08.FOR2, RDF.type, OWL.Class)) g.add((FOR08.FOR2, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR2, RDFS.label, Literal(u"FOR 2008 2 digit Code"))) g.add((FOR08.FOR2, RDFS.comment, Literal(u"Class for FOR 2008 2 digit codes"))) g.add((FOR08.FOR4, RDF.type, OWL.Class)) g.add((FOR08.FOR4, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR4, RDFS.label, Literal(u"FOR 2008 4 digit Code"))) g.add((FOR08.FOR4, RDFS.comment, Literal(u"Class for FOR 2008 4 digit codes"))) g.add((FOR08.FOR6, RDF.type, OWL.Class)) g.add((FOR08.FOR6, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR6, RDFS.label, Literal(u"FOR 2008 6 digit Code"))) g.add((FOR08.FOR6, RDFS.comment, Literal(u"Class for FOR 2008 6 digit codes"))) # create property definition to hold code g.add((FOR08.code, RDF.type, OWL.DatatypeProperty)) g.add((FOR08.code, RDFS.domain, FOR08.FOR)) g.add((FOR08.code, RDFS.range, XSD.string)) for98csv = csv.reader(open("anzsrc_data/for08.csv")) for98csv.next() division = {} group = {} field = {} for div, gr, fi, code in for98csv: divcode = int(code[:2]) if divcode not in division: division[divcode] = div elif division[divcode] != div: print "WARNING division" grcode = int(code[:4]) if grcode not in group: group[grcode] = gr elif group[grcode] != gr: print "WARNING group" ficode = int(code) if ficode not in field: field[ficode] = fi elif field[ficode] != fi: print "WARNING objective" # checkagainst mapping: for98csv = csv.reader(open("anzsrc_data/for08-rfcd.csv")) for98csv.next() for98csv.next() for98csv.next() for98csv.next() for98csv.next() for row in for98csv: try: if int(row[0]) not in field: print "WARNING: missing code ", row except ValueError: continue print "FOR" print "Divisions (22):", len(division) print "Groups (157):", len(group) print "Field (1238):", len(field) for div in division.items(): createNode(g, FOR08, FOR08.FOR2, "%02d" % div[0], div[1], None) for gr in group.items(): createNode(g, FOR08, FOR08.FOR4, "%04d" % gr[0], gr[1], ("%04d" % gr[0])[:2]) for fi in field.items(): createNode(g, FOR08, FOR08.FOR6, "%06d" % fi[0], fi[1], ("%06d" % fi[0])[:4]) return g
def genrfcd(): ''' generate RFCD 2008 ontology for08.owl for:RFCD ... top level class broad narrow for:RFCD2 ... subclass of for:RFCD - RFCD4 for:RFCD4 ... subclass of for:RFCD RFCD2 RFCD6 for:RFCD6 ... subclass of for:RFCD RFCD4 - properties: rdfs:label ... name for:code ... code (not used yet) skos:narrower ... narrower skos:broader ... broader ''' # generate class definitions g = Graph() g.add((RFCD, RDF.type, OWL.Ontology)) g.add((RFCD, RDF.type, SKOS.ConceptScheme)) g.add((RFCD, RDFS.label, Literal(u'RFCD 1998 Ontology'))) g.add((RFCD, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about RFCD 1998 codes.'))) g.add((RFCD, DC.title, Literal(u"Australian Standard Research " u"Classification (ASRC): " u"Research Fields, Courses and Disciplines " u"Classification", lang=u"en"))) g.add((RFCD, DC.description, Literal(u"This classification allows both R&D" u" activity and other activity within the higher education sector " u"to be categorised." u"\n" u"The categories in the classification include recognised academic" u" disciplines and related major sub-fields taught at universities" u" or tertiary institutions, major fields of research investigated" u" by national research institutions and organisations, and " u"emerging areas of study.", lang="en"))) ontoannot(g, RFCD) ontoversion(g, RFCD) g.add((RFCD.RFCD, RDF.type, OWL.Class)) g.add((RFCD.RFCD, RDFS.subClassOf, SKOS.Concept)) g.add((RFCD.RFCD, RDFS.label, Literal(u'RFCD 1998 Code'))) g.add((RFCD.RFCD, RDFS.comment, Literal(u'Superclass for RFCD 1998 ' u'codes'))) g.add((RFCD.RFCD2, RDF.type, OWL.Class)) g.add((RFCD.RFCD2, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD2, RDFS.label, Literal(u'RFCD 1998 2 digit Code'))) g.add((RFCD.RFCD2, RDFS.comment, Literal(u'Class for RFCD 1998 2 digit codes'))) g.add((RFCD.RFCD4, RDF.type, OWL.Class)) g.add((RFCD.RFCD4, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD4, RDFS.label, Literal(u'RFCD 1998 4 digit Code'))) g.add((RFCD.RFCD4, RDFS.comment, Literal(u'Class for RFCD 1998 4 digit codes'))) g.add((RFCD.RFCD6, RDF.type, OWL.Class)) g.add((RFCD.RFCD6, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD6, RDFS.label, Literal(u'RFCD 1998 6 digit Code'))) g.add((RFCD.RFCD6, RDFS.comment, Literal(u'Class for RFCD 1998 6 digit codes'))) # read data from csv files rfcdcsv = csv.reader(open('anzsrc_data/rfcd.csv')) rfcdcsv.next() rfcdcsv.next() # skip file header division = {} # collect divisions here discipline = {} # collect discplines here for code, title in rfcdcsv: if len(code) == 2: division[int(code)] = title elif len(code) == 4: discipline[int(code)] = title rfcdcsv = csv.reader(open('anzsrc_data/rfcd-for08.csv')) for i in range(0, 5): rfcdcsv.next() subject = {} # collect RFCD-6 codes in here # check if all data exists and collect objective codes for f98, n98, f08, n08 in rfcdcsv: if not f98: continue divcode = int(f98[:2]) if divcode not in division: print "WARNING division %d does net exist" % divcode disccode = int(f98[:4]) if disccode not in discipline: print "WARNING discipline %d does not exist" % disccode subjcode = int(f98) if subjcode not in subject: subject[subjcode] = n98 elif subject[subjcode] != n98: print "WARNING subject %d %s differs from %d %s" % (subjcode, subject[subjcode], subjcode, n98) # All data read, print out a summary and start creating instances print 'RFCD 98' # from 12970_98.pdf: 24 divisions, 139 disciplines, 898 subjects print 'Divisions (24):', len(division) print 'Disciplines (139):', len(discipline) print 'Subjects (898):', len(subject) def createDivision(code, name): rfcdcode = RFCD.term(code) g.add((rfcdcode, RDF.type, RFCD.RFCD2)) g.add((rfcdcode, RDF.type, OWL.Thing)) g.add((rfcdcode, RDFS.label, Literal(unicode(name)))) g.add((rfcdcode, ANZSRC.code, Literal(code))) for div in division.items(): createDivision(u'%02d' % div[0], div[1]) for disc in discipline.items(): createNode(g, RFCD, RFCD.RFCD4, u'%04d' % disc[0], disc[1], (u'%04d' % disc[0])[:2]) for subj in subject.items(): createNode(g, RFCD, RFCD.RFCD6, u'%06d' % subj[0], subj[1], (u'%06d' % subj[0])[:4]) return g
def genrfcd(): ''' generate RFCD 2008 ontology for08.owl for:RFCD ... top level class broad narrow for:RFCD2 ... subclass of for:RFCD - RFCD4 for:RFCD4 ... subclass of for:RFCD RFCD2 RFCD6 for:RFCD6 ... subclass of for:RFCD RFCD4 - properties: rdfs:label ... name for:code ... code (not used yet) skos:narrower ... narrower skos:broader ... broader ''' # generate class definitions g = Graph() g.add((RFCD, RDF.type, OWL.Ontology)) g.add((RFCD, RDF.type, SKOS.ConceptScheme)) g.add((RFCD, RDFS.label, Literal(u'RFCD 1998 Ontology'))) g.add((RFCD, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about RFCD 1998 codes.'))) g.add((RFCD, DC.title, Literal( u"Australian Standard Research " u"Classification (ASRC): " u"Research Fields, Courses and Disciplines " u"Classification", lang=u"en"))) g.add(( RFCD, DC.description, Literal( u"This classification allows both R&D" u" activity and other activity within the higher education sector " u"to be categorised." u"\n" u"The categories in the classification include recognised academic" u" disciplines and related major sub-fields taught at universities" u" or tertiary institutions, major fields of research investigated" u" by national research institutions and organisations, and " u"emerging areas of study.", lang="en"))) ontoannot(g, RFCD) ontoversion(g, RFCD) g.add((RFCD.RFCD, RDF.type, OWL.Class)) g.add((RFCD.RFCD, RDFS.subClassOf, SKOS.Concept)) g.add((RFCD.RFCD, RDFS.label, Literal(u'RFCD 1998 Code'))) g.add( (RFCD.RFCD, RDFS.comment, Literal(u'Superclass for RFCD 1998 ' u'codes'))) g.add((RFCD.RFCD2, RDF.type, OWL.Class)) g.add((RFCD.RFCD2, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD2, RDFS.label, Literal(u'RFCD 1998 2 digit Code'))) g.add((RFCD.RFCD2, RDFS.comment, Literal(u'Class for RFCD 1998 2 digit codes'))) g.add((RFCD.RFCD4, RDF.type, OWL.Class)) g.add((RFCD.RFCD4, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD4, RDFS.label, Literal(u'RFCD 1998 4 digit Code'))) g.add((RFCD.RFCD4, RDFS.comment, Literal(u'Class for RFCD 1998 4 digit codes'))) g.add((RFCD.RFCD6, RDF.type, OWL.Class)) g.add((RFCD.RFCD6, RDFS.subClassOf, RFCD.RFCD)) g.add((RFCD.RFCD6, RDFS.label, Literal(u'RFCD 1998 6 digit Code'))) g.add((RFCD.RFCD6, RDFS.comment, Literal(u'Class for RFCD 1998 6 digit codes'))) # read data from csv files rfcdcsv = csv.reader(open('anzsrc_data/rfcd.csv')) rfcdcsv.next() rfcdcsv.next() # skip file header division = {} # collect divisions here discipline = {} # collect discplines here for code, title in rfcdcsv: if len(code) == 2: division[int(code)] = title elif len(code) == 4: discipline[int(code)] = title rfcdcsv = csv.reader(open('anzsrc_data/rfcd-for08.csv')) for i in range(0, 5): rfcdcsv.next() subject = {} # collect RFCD-6 codes in here # check if all data exists and collect objective codes for f98, n98, f08, n08 in rfcdcsv: if not f98: continue divcode = int(f98[:2]) if divcode not in division: print "WARNING division %d does net exist" % divcode disccode = int(f98[:4]) if disccode not in discipline: print "WARNING discipline %d does not exist" % disccode subjcode = int(f98) if subjcode not in subject: subject[subjcode] = n98 elif subject[subjcode] != n98: print "WARNING subject %d %s differs from %d %s" % ( subjcode, subject[subjcode], subjcode, n98) # All data read, print out a summary and start creating instances print 'RFCD 98' # from 12970_98.pdf: 24 divisions, 139 disciplines, 898 subjects print 'Divisions (24):', len(division) print 'Disciplines (139):', len(discipline) print 'Subjects (898):', len(subject) def createDivision(code, name): rfcdcode = RFCD.term(code) g.add((rfcdcode, RDF.type, RFCD.RFCD2)) g.add((rfcdcode, RDF.type, OWL.Thing)) g.add((rfcdcode, RDFS.label, Literal(unicode(name)))) g.add((rfcdcode, ANZSRC.code, Literal(code))) for div in division.items(): createDivision(u'%02d' % div[0], div[1]) for disc in discipline.items(): createNode(g, RFCD, RFCD.RFCD4, u'%04d' % disc[0], disc[1], (u'%04d' % disc[0])[:2]) for subj in subject.items(): createNode(g, RFCD, RFCD.RFCD6, u'%06d' % subj[0], subj[1], (u'%06d' % subj[0])[:4]) return g
def genfor08(): ''' generate FOR 2008 ontology for08.owl for:FOR ... top level class broad narrow for:FOR2 ... subclass of for:FOR - FOR4 for:FOR4 ... subclass of for:FOR FOR2 FOR6 for:FOR6 ... subclass of for:FOR FOR4 - properties: rdfs:label ... name for:code ... code (not used yet) skos:narrower ... narrower skos:broader ... broader ''' g = Graph() g.add((FOR08, RDF.type, OWL.Ontology)) g.add((FOR08, RDF.type, SKOS.ConceptScheme)) g.add((FOR08, RDFS.label, Literal(u'FOR 2008 Ontology'))) g.add((FOR08, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about FOR 2008 codes.'))) g.add((FOR08, DC.title, Literal( u"Australian and New Zealand Standard " u"Research Classification (ANZSRC): " u"Fields of Research.", lang=u"en"))) g.add(( FOR08, DC.description, Literal( u"The ANZSRC FOR allows R&D activity" u" to be categorised according to the methodology used in the R&D," u" rather than the activity of the unit performing the R&D or the " u"purpose of the R&D." u"\n" u"The categories in the classification include major fields and " u"related sub-fields of research and emerging areas of study " u"investigated by businesses, universities, tertiary institutions," u" national research institutions and other organisations." u"\n" u"This classification allows the categorisation of fields of " u"research activity within Australia and New Zealand.", lang=u"en"))) ontoannot(g, FOR08) ontoversion(g, FOR08) g.add((FOR08.FOR, RDF.type, OWL.Class)) g.add((FOR08.FOR, RDFS.subClassOf, SKOS.Concept)) g.add((FOR08.FOR, RDFS.label, Literal(u'FOR 2008 Code'))) g.add((FOR08.FOR, RDFS.comment, Literal(u'Superclass for FOR 2008 codes'))) g.add((FOR08.FOR2, RDF.type, OWL.Class)) g.add((FOR08.FOR2, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR2, RDFS.label, Literal(u'FOR 2008 2 digit Code'))) g.add((FOR08.FOR2, RDFS.comment, Literal(u'Class for FOR 2008 2 digit codes'))) g.add((FOR08.FOR4, RDF.type, OWL.Class)) g.add((FOR08.FOR4, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR4, RDFS.label, Literal(u'FOR 2008 4 digit Code'))) g.add((FOR08.FOR4, RDFS.comment, Literal(u'Class for FOR 2008 4 digit codes'))) g.add((FOR08.FOR6, RDF.type, OWL.Class)) g.add((FOR08.FOR6, RDFS.subClassOf, FOR08.FOR)) g.add((FOR08.FOR6, RDFS.label, Literal(u'FOR 2008 6 digit Code'))) g.add((FOR08.FOR6, RDFS.comment, Literal(u'Class for FOR 2008 6 digit codes'))) # create property definition to hold code g.add((FOR08.code, RDF.type, OWL.DatatypeProperty)) g.add((FOR08.code, RDFS.domain, FOR08.FOR)) g.add((FOR08.code, RDFS.range, XSD.string)) for98csv = csv.reader(open('anzsrc_data/for08.csv')) for98csv.next() division = {} group = {} field = {} for div, gr, fi, code in for98csv: divcode = int(code[:2]) if divcode not in division: division[divcode] = div elif division[divcode] != div: print "WARNING division" grcode = int(code[:4]) if grcode not in group: group[grcode] = gr elif group[grcode] != gr: print "WARNING group" ficode = int(code) if ficode not in field: field[ficode] = fi elif field[ficode] != fi: print "WARNING objective" # checkagainst mapping: for98csv = csv.reader(open('anzsrc_data/for08-rfcd.csv')) for98csv.next() for98csv.next() for98csv.next() for98csv.next() for98csv.next() for row in for98csv: try: if int(row[0]) not in field: print 'WARNING: missing code ', row except ValueError: continue print 'FOR' print 'Divisions (22):', len(division) print 'Groups (157):', len(group) print 'Field (1238):', len(field) for div in division.items(): createNode(g, FOR08, FOR08.FOR2, '%02d' % div[0], div[1], None) for gr in group.items(): createNode(g, FOR08, FOR08.FOR4, '%04d' % gr[0], gr[1], ('%04d' % gr[0])[:2]) for fi in field.items(): createNode(g, FOR08, FOR08.FOR6, '%06d' % fi[0], fi[1], ('%06d' % fi[0])[:4]) return g
def genseo08(): """ generate SEO 2008 ontology. seo08.owl """ g = Graph() g.add((SEO08, RDF.type, OWL.Ontology)) g.add((SEO08, RDF.type, SKOS.ConceptScheme)) g.add((SEO08, RDFS.label, Literal(u'SEO 2008 Ontology'))) g.add((SEO08, RDFS.comment, Literal(u'An ontology that provides classes ' u'codes and hierarchical information ' u'about SEO 2008 codes.'))) g.add((SEO08, DC.title, Literal(u"Australian and New Zealand Standard " u"Research Classification (ANZSRC): " u"Socio-Economic Objective.", lang=u"en"))) g.add((SEO08, DC.description, Literal(u"The ANZSRC SEO classification " u"allows R&D activity in Australia and New Zealand to be " u"categorised according to the intended purpose or outcome of the " u"research, rather than the processes or techniques used in order " u"to achieve this objective." u"\n" u"The purpose categories include processes, products, health, " u"education and other social and environmental aspects in " u"Australia and New Zealand that R&D activity aims to improve.", lang=u"en"))) ontoannot(g, SEO08) ontoversion(g, SEO08) g.add((SEO08.SEO, RDF.type, OWL.Class)) g.add((SEO08.SEO, RDFS.subClassOf, SKOS.Concept)) g.add((SEO08.SEO, RDFS.label, Literal(u'SEO 2008 Code'))) g.add((SEO08.SEO, RDFS.comment, Literal(u'Superclass for SEO 2008 codes'))) g.add((SEO08.SEOSection, RDF.type, OWL.Class)) g.add((SEO08.SEOSection, RDFS.subClassOf, SEO08.SEO)) g.add((SEO08.SEOSection, RDFS.label, Literal(u'SEO 2008 Section Code'))) g.add((SEO08.SEOSection, RDFS.comment, Literal(u'Class for SEO 2008 Section codes'))) g.add((SEO08.SEO2, RDF.type, OWL.Class)) g.add((SEO08.SEO2, RDFS.subClassOf, SEO08.SEO)) g.add((SEO08.SEO2, RDFS.label, Literal(u'SEO 2008 2 digit Code'))) g.add((SEO08.SEO2, RDFS.comment, Literal(u'Class for SEO 2008 2 digit codes'))) g.add((SEO08.SEO4, RDF.type, OWL.Class)) g.add((SEO08.SEO4, RDFS.subClassOf, SEO08.SEO)) g.add((SEO08.SEO4, RDFS.label, Literal(u'SEO 2008 4 digit Code'))) g.add((SEO08.SEO4, RDFS.comment, Literal(u'Class for SEO 2008 4 digit codes'))) g.add((SEO08.SEO6, RDF.type, OWL.Class)) g.add((SEO08.SEO6, RDFS.subClassOf, SEO08.SEO)) g.add((SEO08.SEO6, RDFS.label, Literal(u'SEO 2008 6 digit Code'))) g.add((SEO08.SEO6, RDFS.comment, Literal(u'Class for SEO 2008 6 digit codes'))) seo98csv = csv.reader(open('anzsrc_data/seo08.csv')) seo98csv.next() division = {} group = {} objective = {} for sec, div, gr, obj, code in seo98csv: divcode = int(code[:2]) if divcode not in division: division[divcode] = div elif division[divcode] != div: print "WARNING division %d does not exist" % divcode grcode = int(code[:4]) if grcode not in group: group[grcode] = gr elif group[grcode] != gr: print "WARNING group %d does not exist" % grcode obcode = int(code) if obcode not in objective: objective[obcode] = obj elif objective[obcode] != obj: print "WARNING objective %d %s differs from %d %s" % (obcode, objective[obcode], obcode, obj) print 'SEO 08' print 'Sectors (5):', len(sector) print 'Divisions (17):', len(division) print 'Groups (119):', len(group) print 'Objective (847):', len(objective) # checkagainst mapping: seo98csv = csv.reader(open('anzsrc_data/seo08-seo98.csv')) for i in range(0, 5): seo98csv.next() for row in seo98csv: try: if int(row[0]) not in objective: print 'WARNING: missing code ', row except ValueError: continue for sec in sector.items(): createNode(g, SEO08, SEO08.SEOSection, sec[0], sec[1], None) for div in division.items(): createNode(g, SEO08, SEO08.SEO2, u'%02d' % div[0], div[1], code2sector[div[0]]) for gr in group.items(): createNode(g, SEO08, SEO08.SEO4, u'%04d' % gr[0], gr[1], (u'%04d' % gr[0])[:2]) for obj in objective.items(): createNode(g, SEO08, SEO08.SEO6, u'%06d' % obj[0], obj[1], (u'%06d' % obj[0])[:4]) return g