def index_term(self): terms = [] for f, s in map(self.subfields, self._marc.get_fields("655")): d = {} d["obp:thesaurus"] = [Literal(f.indicator2)] if "a" in s: d["rdf:value"] = [Literal(s["a"])] if "x" in s: d["foaf:topic"] = [Literal(s["x"])] if "y" in s: d["dc:date"] = [Literal(s["y"])] if "z" in s: d["dc:spatial"] = [Literal(s["z"])] terms.append(d) return terms
def clean_partof(self, record): parts = [] for part in record.get("dc:isPartOf", []): if part == "Porject Gutenberg": parts.append(Literal("Project Gutenberg")) elif _re_num.match(part): ident = record.get("dc:identifier", []) ident.append(Literal("PG%s" % (part, ))) record["dc:identifier"] = ident else: parts.append(part) record["dc:isPartOf"] = parts
def rdf_data(): s = LicensesService2() g = Graph(identifier=CC[""]) g.parse("http://creativecommons.org/schema.rdf") yield g fp = pkg_resources.resource_stream("licenses", os.path.join("n3", "license.n3")) g = Graph(identifier=LICENSES["lens"]) g.parse(fp, format="n3") fp.close() yield g for ld in s.get_licenses(): ident = LICENSES[ld["id"]] g = Graph(identifier=ident) l = License(ident, graph=g) l.label = Literal(ld["title"]) l.prefLabel = Literal(ld["title"]) l.notation = Literal(ld["id"]) l.lens = LICENSES.lens if ld.get("url"): url = URIRef(ld["url"]) sa = Graph() try: sa.parse(url) except: pass try: sa.parse(url, format="rdfa") except: pass sa.remove((url, XHV.icon, None)) sa.remove((url, XHV.alternate, None)) sa.remove((url, XHV.stylesheet, None)) for ll in sa.distinct_objects(url, XHV.license): l.license = ll sa.remove((url, XHV.license, None)) if sa.bnc((url, None, None)): [g.add((ident, p, o)) for s, p, o in sa.bnc((url, None, None))] l.sameAs = url else: l.seeAlso = URIRef(ld["url"]) yield g
def load(self, record): if self.options.sanity: record.sanity() ident = self.record_id() self.log.info("import: %s" % ident) proc = self.process() proc.use(self.source()) marc = record.rdf(identifier=ident) marc.add((ident, OBP["record"], Literal(self.recno))) marc.add((ident, DC["source"], self.source())) proc.result(marc) if not self.options.dryrun: from openbiblio import handler ctx = handler.context( getuser(), "command line import of %s" % (self.source())) ctx.add(marc) ctx.commit() #print marc.serialize(format="n3") return marc
def title(self, result): title = [] for part in result: if part.startswith("[by] ") or part.startswith("by "): continue title.append(part) return [Literal("\n".join(title))]
def date(self, d): parsed = date.parse(d) if parsed is None: return None iso = parsed.isoformat() if iso: val = iso else: val = parsed.qualifier if len(val) == 4: return Literal(val, datatype=XSD.year) try: strptime(val, "%Y-%m-%d") return Literal(val, datatype=XSD.date) except ValueError: return Literal(val)
def coerce_literal(x): if not isinstance(x, Literal): if isinstance(x, Node): raise TypeError(x, type(x), "must be Literal or a type other than Node") x = Literal(x) return x
def lccopy(self, g): cls = self["marc:lccopy_class"] item = self["marc:lccopy_item"] copy = self["marc:lccopy_copy"] if cls + item + copy: b = BNode() g.add((g.identifier, OBP["lccopy"], b)) for c, i in zip(cls, item): g.add((b, OBP["lccall"], Literal(c + i))) for c in copy: g.add((b, DC["description"], c))
def default(self, s): s = s.rstrip(" :"). \ rstrip(" /"). \ rstrip(","). \ rstrip("."). \ strip() try: s = s.decode("utf-8") except UnicodeError: s = s return Literal(s)
def __call__(self, field): from openbiblio.lib.name import normalize as name if field in ("dc:contributor", "marc:topic_person_name", "marc:topic_person_fullname"): return lambda x: Literal(name(x)) elif field in ("dc:date", ): return self.date elif field in ("dc:subject", ): return self.subject elif field in ("bibo:isbn", "bibo:issn"): return self.isbn elif field in ("foaf:page", "foaf:homepage"): return self.uri return self.default
def scn(self, g): """ Process system control numbers """ _scnre = re.compile(r'\((?P<org>[a-zA-Z]+)\)(?P<num>.*)') scn = self["marc:scn"] for m in map(_scnre.match, scn): if not m: log.warning("Invalid SCN: %s" % scn) continue d = m.groupdict() b = BNode() g.add((g.identifier, OBP["scn"], b)) g.add((b, DCAM["member"], SCN[d["org"]])) g.add((b, RDF["value"], Literal(d["num"]))) scnc = self["marc:scnc"] for m in map(_scnre.match, scnc): if not m: log.warning("Invalid cancelled SCN: %s" % scnc) continue d = m.groupdict() b = BNode() g.add((g.identifier, OBP["scn"], b)) g.add((b, DCAM["member"], SCN[d["org"]])) g.add((b, OBP["cancelled"], Literal(d["num"]))) for n in self["bibo:lccn"]: b = BNode() g.add((g.identifier, OBP["scn"], b)) g.add((b, DCAM["member"], SCN["DLC"])) g.add((b, RDF["value"], n)) for n in self["marc:lccnc"]: b = BNode() b = BNode() g.add((g.identifier, OBP["scn"], b)) g.add((b, DCAM["member"], SCN["DLC"])) g.add((b, RDF["cancelled"], n))
def aggregate(self, marc): from openbiblio import handler ctx = handler.context(getuser(), "Aggregation of %s" % marc.identifier) def _idx(g): path = g.identifier.lstrip(self.options.base) return URIRef("%saggregate/%s" % (self.options.base, path)) self.log.info("aggregate %s" % marc.identifier) q = """ SELECT DISTINCT ?x WHERE { ?x a obp:Work . ?x opmv:wasGeneratedBy _:proc . _:proc opmv:used %s } """ % (marc.identifier.n3(), ) for work in [handler.get(x) for x, in handler.query(q)]: work_agg = Aggregation(identifier=_idx(work)) work_agg.add((work_agg.identifier, ORDF["lens"], OBPL["work"])) for title in work.distinct_objects(work.identifier, DC["title"]): work_agg.add((work_agg.identifier, RDFS["label"], Literal(u"Work: %s" % (title, )))) work_agg.aggregate(work) contr_list = [] for contr in [ handler.get(x) for x in work.distinct_objects( work.identifier, DC["contributor"]) ]: work_agg.aggregate(contr) contr_agg = Aggregation(identifier=_idx(contr)) contr_agg.add( (contr_agg.identifier, ORDF["lens"], OBPL["contributor"])) for name in contr.distinct_objects(contr.identifier, FOAF["name"]): contr_agg.add((contr_agg.identifier, RDFS["label"], Literal(u"Person: %s" % (name, )))) contr_agg.aggregate(work) contr_agg.aggregate(contr) ctx.add(contr_agg) contr_list.append(contr) for manif in [ handler.get(x) for x in work.distinct_objects( work.identifier, OBP["hasManifestation"]) ]: work_agg.aggregate(manif) manif_agg = Aggregation(identifier=_idx(manif)) manif_agg.add((manif_agg.identifier, ORDF["lens"], OBPL["manifestation"])) for title in work.distinct_objects(work.identifier, DC["title"]): manif_agg.add((manif_agg.identifier, RDFS["label"], Literal(u"Manifestation: %s" % (title, )))) manif_agg.aggregate(work) manif_agg.aggregate(manif) for contr in contr_list: manif_agg.aggregate(contr) for pub in [ handler.get(x) for x in manif.distinct_objects( manif.identifier, DC["publisher"]) ]: manif_agg.aggregate(pub) pub_agg = Aggregation(identifier=_idx(pub)) pub_agg.add( (pub_agg.identifier, ORDF["lens"], OBPL["publisher"])) for name in pub.distinct_objects(pub.identifier, FOAF["name"]): pub_agg.add((pub_agg.identifier, RDFS["label"], Literal(u"Agent: %s" % (name, )))) pub_agg.aggregate(work) pub_agg.aggregate(manif) pub_agg.aggregate(pub) ctx.add(pub), ctx.add(pub_agg) ctx.add(manif), ctx.add(manif_agg) for contr in contr_list: ctx.add(contr) ctx.add(work), ctx.add(work_agg) ctx.commit()
def lccall(self, g): cls = self["marc:lccall_class"] item = self["marc:lccall_item"] for c, i in zip(cls, item): b = BNode() g.add((g.identifier, OBP["lccall"], Literal(c + i)))
def subject(self, subject): return Literal(subject)
def isbn(self, s): return Literal(s.replace(" ", "").replace("-", ""))