def process_paper(self): # filename = "{0}/data/manual/full_iswc_paper_pdf.csv".format( filename = "{0}/data/manual/iswc-publication-paper.csv".format(self.global_config["home"]) counter_paper = MyCounter() with open(filename) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row) != len(headers): # print "skipping mismatch row %s" % row continue entry = dict(zip(headers, row)) if entry["year"] != self.local_config["year"]: # skip mismatched year continue if len(entry["title"]) == 0: print "skipping empty title row %s" % entry continue if len(entry["proceedings_uri"]) == 0: print "skipping empty proceedings row %s" % entry continue counter_paper.inc(entry["proceedings_uri"]) id_paper = counter_paper.data[entry["proceedings_uri"]] uri_paper = "%s/paper-%02d" % (entry["proceedings_uri"], id_paper) uri_paper_author_list = "%s/paper-%02d/author_list" % (entry["proceedings_uri"], id_paper) # print json.dumps(entry, indent=4) # print uri_paper res_proceedings = URIRef(entry["proceedings_uri"]) res_paper = URIRef(uri_paper) self.graph.add((res_paper, RDF.type, SWRC.InProceedings)) # part-of proceedings self.graph.add((res_paper, SWC.isPartOf, res_proceedings)) self.graph.add((res_proceedings, SWC.hasPart, res_paper)) # author self.graph.add((res_paper, SWRC.listAuthor, Literal(entry["author"]))) list_res_author = [] for author in entry["author"].split(","): res_author = self.create_named_entity(self.get_namespace(DataIswc.PREFIX_PERSON), author) self.graph.add((res_author, RDF.type, FOAF.Person)) list_res_author.append(res_author) self.graph.add((res_paper, SWRC.author, res_author)) self.graph.add((res_paper, FOAF.maker, res_author)) self.graph.add((res_author, FOAF.made, res_paper)) res_paper_author_list = self.create_container(list_res_author, RDF.Seq, uri_paper_author_list) self.graph.add((res_paper, BIBO.authorList, res_paper_author_list)) # simple properties self.create_triple_complex( res_paper, [ "abstract", "keywords", "year", "pages", "title", "category", "link_open_access", "link_publisher", ], entry, ) # cache self.map_name_res[entry["title"]] = res_paper
self.data[key]=0 self.data[key] += cnt def list(self, min_count=0): ret = {} for k,v in self.data: if v >= min_count: ret[k]=v return ret with open(filename) as f: csvreader = UnicodeReader(f) headers = csvreader.next() counter = MyCounter() for row in csvreader: entry = dict(zip(headers, row)) entry["proceedings_uri"] = entry["proceedings_uri"].replace("demos/proceedings","demos-proceedings") dir_id = entry["year"]+"-"+ os.path.basename(entry["proceedings_uri"]) counter.inc(dir_id) counter.inc(dir_id+"-downloaded", 0) state = {"paper_uri", entry["paper_uri"]} if len(entry["link_open_access"])>0: if entry["link_open_access"].lower().endswith(".html") or entry["link_open_access"].lower().endswith(".htm"): print "skip ", entry["link_open_access"] continue
def process_event(self): filename = "{0}/data/manual/{1}-event.csv".format(self.global_config["home"], self.local_config["tag"]) counter_event = MyCounter() with open(filename) as f: csvreader = UnicodeReader(f) headers = csvreader.next() for row in csvreader: if len(row) != len(headers): # print "skipping mismatch row %s" % row continue entry = dict(zip(headers, row)) if len(entry["label"]) == 0: # print "skipping empty label row %s" % entry continue if len(entry["event_type"]) == 0: # print "skipping empty event_type row %s" % entry continue if entry["event_uri"].startswith("#"): # print "skipping empty commented row %s" % entry continue # set default super event if len(entry["super_event_uri"]) == 0: entry["super_event_uri"] = "[ME]" uri_super_event = self.expand_uri(entry["super_event_uri"]) res_super_event = URIRef(uri_super_event) if len(entry["event_uri"]) == 0: counter_event.inc(uri_super_event) entry["event_uri"] = "%s/event-%02d" % (uri_super_event, counter_event.data[uri_super_event]) uri_event = self.expand_uri(entry["event_uri"]) res_event = URIRef(uri_event) # event type self.graph.add((res_event, RDF.type, SWC[entry["event_type"]])) # super event self.graph.add((res_event, SWC.isSubEventOf, res_super_event)) self.graph.add((res_super_event, SWC.isSuperEventOf, res_event)) # simple properties self.create_triple_complex( res_event, [ "label", "acronym", "abstract", "order_in_super_event", "start", "end", "tzid", "room", "address", "homepage", "link_document", "logo", ], entry, ) # linking paper event if "TalkEvent" == entry["event_type"]: if entry["label"] in self.map_name_res: res_paper = self.map_name_res[entry["label"]] self.graph.add((res_event, SWC.hasRelatedDocument, res_paper)) self.graph.add((res_paper, SWC.relatedToEvent, res_event)) else: print "missing paper link " + entry["label"] sys.exit(0) # role -chair for role in ["Chair", "Presenter"]: role_lower = role.lower() if len(entry[role_lower + "_person"]) > 0: for name in entry[role_lower + "_person"].split(","): if len(name) == 0: continue res_person = self.create_named_entity(self.get_namespace(DataIswc.PREFIX_PERSON), name) self.create_role_to_event( uri_event, "swc:" + role, entry[role_lower + "_label"], res_person )