Пример #1
0
def index_aggregate(a):
    doc = xapian.Document()
    doc.add_value(VAL_URI, a.identifier)
    docid = u"URI" + a.identifier
    doc.add_term(docid)

    log.debug("Aggregate: %s" % a.identifier)

    def add_value(g, val_id, subject, predicate):
        val = []
        for s, p, o in g.triples((subject, predicate, None)):
            if not o.language or o.language == "en":  ### TODO: fix this
                val.append(o)
        if val:
            val = u", ".join(val)
            doc.add_value(val_id, val)
            return val

    ## create an abbreviated graph to store in the xapian database
    extract = Graph()
    add_value(a, VAL_LABEL, a.identifier, RDFS.label)
    for g in a.contexts():
        log.debug("Indexing: %s" % g.identifier)

        for pred in (RDF.type, RDFS.label, RDFS.comment, DC.title,
                     DC.description, FOAF.name):
            for statement in a.triples((g.identifier, pred, None)):
                extract.add(statement)
        title = add_value(g, VAL_TITLE, g.identifier, DC.title)
        if title:
            doc.add_term(u"ZT" + title[:160])
        name = add_value(g, VAL_NAME, g.identifier, FOAF.name)
        if name:
            doc.add_term(u"NA" + name[:160])
    doc.set_data(extract.serialize(format="n3"))

    ## take any fields that contain text, stem them according to their
    ## language (or english if unsupported or unspecified) and put them
    ## in the index
    termgen = xapian.TermGenerator()
    termgen.set_document(doc)
    for pred in (RDFS.label, RDFS.comment, DC.title, DC.description, FOAF.name,
                 FOAF.first_name, FOAF.last_name, FOAF.surname):
        for s, p, o in a.triples((None, pred, None)):
            termgen.increase_termpos()
            if o.language:
                try:
                    stemmer = xapian.Stem(o.language)
                except xapian.InvalidArgumentError:
                    stemmer = xapian.Stem("en")
            else:
                stemmer = xapian.Stem("en")
            termgen.set_stemmer(stemmer)
            termgen.index_text(o)

    return docid, doc
Пример #2
0
 def create(self, data):
     # create object
     content = Graph()
     # apply form.rdftype
     content.add((content.identifier, RDF['type'], self.rdftype))
     # apply form data
     form.applyChanges(self, content, data)
     for group in self.groups:
         form.applyChanges(group, content, data)
     return content
Пример #3
0
 def get(self, identifier):
     # simple check out mechanism.
     # the handler returns the same graph as long as it's not put back
     if identifier in self._cache:
         return self._cache[identifier]
     graph = self.store.get_context(identifier)
     # make a copy of the graph
     cgraph = Graph(identifier=identifier)
     for t in graph:
         cgraph.add(t)
     self._cache[identifier] = cgraph
     return cgraph
Пример #4
0
    def work(self, marc):
        proc = self.process()
        proc.use(marc.identifier)

        work = Graph(identifier=URIRef(marc.identifier + "/work"))
        work.add((work.identifier, RDF["type"], OBP["Work"]))
        work += self.rewrite(marc, work, DC["title"])
        work += self.rewrite(marc, work, DC["description"])
        work += self.rewrite(marc, work, BIBO["lccn"])
        work += self.rewrite(marc, work, OBP["scn"])

        contributors = self.contributors(marc)
        for c in contributors:
            work.add((work.identifier, DC["contributor"], c.identifier))
        subjects = self.subjects(marc)
        for s in subjects:
            work.add((work.identifier, DC["subject"], s.identifier))
            if not s.exists((s.identifier, RDF["type"], FOAF["Person"])):
                work += s

        manif = self.manifestation(marc)
        work.add((work.identifier, OBP["hasManifestation"], manif.identifier))

        proc.result(work)
        self.context.add(work)
Пример #5
0
    def contributors(self, marc):
        result = []
        i = 0
        for s, p, o in marc.triples(
            (marc.identifier, DC["contributor"], None)):
            proc = self.process()
            proc.use(marc.identifier)

            identifier = URIRef(marc.identifier + "/contributor/%d" % i)
            contributor = Graph(identifier=identifier)
            contributor += marc.bnc((o, None, None)).replace(
                (o, None, None), (identifier, None, None))
            if not contributor.exists((identifier, RDF["type"], None)):
                contributor.add((identifier, RDF["type"], FOAF["Person"]))
            proc.result(contributor)
            self.context.add(contributor)
            result.append(contributor)
            i += 1
        return result
Пример #6
0
    def manifestation(self, marc):
        proc = self.process()
        proc.use(marc.identifier)

        manif = Graph(identifier=URIRef(marc.identifier + "/manifestation"))
        manif.add((manif.identifier, RDF["type"], OBP["Manifestation"]))

        publisher = self.publisher(marc)
        manif.add((manif.identifier, DC["publisher"], publisher.identifier))
        for _s, _p, o in marc.triples(
            (marc.identifier, DC["publisher"], None)):
            for s, p, loc in marc.triples((o, DC["spatial"], None)):
                manif.add((manif.identifier, DC["spatial"], loc))

        manif += self.rewrite(marc, manif, BIBO["isbn"])
        manif += self.rewrite(marc, manif, BIBO["isbn10"])
        manif += self.rewrite(marc, manif, BIBO["isbn13"])
        manif += self.rewrite(marc, manif, DC["date"])
        manif += self.rewrite(marc, manif, DC["extent"])
        manif += self.rewrite(marc, manif, OBP["dimensions"])
        manif += self.rewrite(marc, manif, OBP["edition"])
        manif += self.rewrite(marc, manif, OBP["lccall"])
        manif += self.rewrite(marc, manif, OBP["nlmcall"])
        manif += self.rewrite(marc, manif, OBP["nbn"])
        manif += self.rewrite(marc, manif, OBP["physicalDetail"])
        manif += self.rewrite(marc, manif, RDFS["seeAlso"])

        proc.result(manif)
        self.context.add(manif)

        return manif
Пример #7
0
 def subjects(self, marc):
     result = []
     i = 0
     for s, p, o in marc.triples((marc.identifier, DC["subject"], None)):
         if isinstance(o, Literal):
             subject = Graph()
             subject.add((subject.identifier, RDF["value"], o))
             result.append(subject)
         elif marc.exists((o, RDF["type"], FOAF["Person"])):
             proc = self.process()
             proc.use(marc.identifier)
             identifier = URIRef(marc.identifier + "/subject/%d" % i)
             subject = Graph(identifier=identifier)
             subject += marc.bnc((o, None, None)).replace(
                 (o, None, None), (identifier, None, None))
             proc.result(subject)
             self.context.add(subject)
             i += 1
         else:
             subject = Graph(identifier=o)
             subject += marc.bnc((o, None, None))
         result.append(subject)
     return result
Пример #8
0
def rdf_data():
    s = LicensesService2()

    g = Graph(identifier=CC[""])
    g.parse("http://creativecommons.org/schema.rdf")
    yield g

    fp = pkg_resources.resource_stream("licenses",
                                       os.path.join("n3", "license.n3"))
    g = Graph(identifier=LICENSES["lens"])
    g.parse(fp, format="n3")
    fp.close()
    yield g

    for ld in s.get_licenses():
        ident = LICENSES[ld["id"]]
        g = Graph(identifier=ident)
        l = License(ident, graph=g)
        l.label = Literal(ld["title"])
        l.prefLabel = Literal(ld["title"])
        l.notation = Literal(ld["id"])
        l.lens = LICENSES.lens

        if ld.get("url"):
            url = URIRef(ld["url"])
            sa = Graph()
            try:
                sa.parse(url)
            except:
                pass
            try:
                sa.parse(url, format="rdfa")
            except:
                pass

            sa.remove((url, XHV.icon, None))
            sa.remove((url, XHV.alternate, None))
            sa.remove((url, XHV.stylesheet, None))
            for ll in sa.distinct_objects(url, XHV.license):
                l.license = ll
            sa.remove((url, XHV.license, None))

            if sa.bnc((url, None, None)):
                [g.add((ident, p, o)) for s, p, o in sa.bnc((url, None, None))]
                l.sameAs = url
            else:
                l.seeAlso = URIRef(ld["url"])
        yield g
Пример #9
0
def rdf_data():
    s = LicensesService2()

    g = Graph(identifier=CC[""])
    g.parse("http://creativecommons.org/schema.rdf")
    yield g
    
    fp = pkg_resources.resource_stream("licenses", os.path.join("n3", "license.n3"))
    g = Graph(identifier=LICENSES["lens"])
    g.parse(fp, format="n3")
    fp.close()
    yield g
    
    for ld in s.get_licenses():
        ident = LICENSES[ld["id"]]
        g = Graph(identifier=ident)
        l = License(ident, graph=g)
        l.label = Literal(ld["title"])
        l.prefLabel = Literal(ld["title"])
        l.notation = Literal(ld["id"])
        l.lens = LICENSES.lens
        
        if ld.get("url"):
            url = URIRef(ld["url"])
            sa = Graph()
            try:
                sa.parse(url)
            except:
                pass
            try:
                sa.parse(url, format="rdfa")
            except:
                pass

            sa.remove((url, XHV.icon, None))
            sa.remove((url, XHV.alternate, None))
            sa.remove((url, XHV.stylesheet, None))
            for ll in sa.distinct_objects(url, XHV.license):
                l.license = ll
            sa.remove((url, XHV.license, None))

            if sa.bnc((url, None, None)):
                [g.add((ident, p, o)) for s,p,o in sa.bnc((url, None, None))]
                l.sameAs = url
            else:
                l.seeAlso = URIRef(ld["url"])
        yield g
Пример #10
0
    def rdf(self, *av, **kw):
        g = Graph(*av, **kw)
        g.add((g.identifier, RDF["type"], OBP["MarcRecord"]))

        def merge(d, s):
            for k, v in d.items():
                ns, term = k.split(":")
                p = namespaces[ns][term]
                for o in v:
                    if isinstance(o, dict):
                        b = BNode()
                        g.add((s, p, b))
                        merge(o, b)
                    else:
                        g.add((s, p, o))

        ident = g.identifier
        merge(self, ident)

        for s, p, o in g.triples((ident, BIBO["isbn"], None)):
            g.add((ident, RDFS["seeAlso"], URIRef("urn:isbn:%s" % o)))
            g.add((ident, RDFS["seeAlso"],
                   URIRef("http://purl.org/NET/book/isbn/%s#book" % o)))
            g.add(
                (ident, RDFS["seeAlso"],
                 URIRef("http://www4.wiwiss.fu-berlin.de/bookmashup/books/%s" %
                        o)))
            if len(o) == 10:
                g.add((ident, BIBO["isbn10"], o))
            elif len(o) == 13:
                g.add((ident, BIBO["isbn13"], o))

        for s, p, o in g.triples((ident, BIBO["issn"], None)):
            g.add((ident, RDFS["seeAlso"], URIRef("urn:issn:%s" % o)))

        for s, p, o in g.triples((ident, BIBO["lccn"], None)):
            g.add(
                (ident, RDFS["seeAlso"], URIRef(u"http://lccn.loc.gov/" + o)))

        self.nbn(g)
        self.scn(g)
        self.lccall(g)
        self.lccopy(g)
        self.isPartOf(g)

        return g