def handle_record_links(self, loop, model, params): ''' Task coroutine of the main event loop for MARC conversion, called with In this case update a report of links encountered in the MARC/XML model -- raw Versa model with converted resource information from the MARC details from each MARC/XML record processed params -- parameters passed in from processing: params['workid']: ID of the work constructed from the MARC record params['instanceid']: list of IDs of instances constructed from the MARC record ''' #print ('BF_MARCREC_TASK', linkreport.PLUGIN_ID) items = {} #Get the title #First get the work ID workid = params['workid'] #simple_lookup() is a little helper for getting a property from a resource title = simple_lookup(model, workid, TITLE_REL) #Get the ISBN, just pick the first one isbn = '' if params['instanceids']: inst1 = params['instanceids'][0] isbn = simple_lookup(model, inst1, ISBN_REL) envelope = '<div id="{0}" isbn="{1}"><title>{2}</title>\n'.format(workid, isbn, title) #iterate over all the relationship targets to see which is a link for stmt in model.match(): if iri.matches_uri_syntax(stmt[TARGET]) and iri.split_uri_ref(stmt[TARGET])[1] != BFHOST: self._links_found.add(stmt[TARGET]) envelope += '<a href="{0}">{0}</a>\n'.format(stmt[TARGET], stmt[TARGET]) envelope += '</div>\n' self._outstr += envelope #print ('DONE BF_MARCREC_TASK', linkreport.PLUGIN_ID) return
def linkreport(config=None, **kwargs): #Any configuration variables passed in if config is None: config = {} try: #Initialize the output outstr = '' while True: params = yield model = params['model'] items = {} #Get the title #First get the work ID workid = params['workid'] #simple_lookup() is a little helper for getting a property from a resource title = simple_lookup(model, workid, TITLE_REL) #Get the ISBN, just pick the first one isbn = '' if params['instanceids']: inst1 = params['instanceids'][0] isbn = simple_lookup(model, inst1, ISBN_REL) envelope = '<div id="{0} isbn="{1}"><title>{2}</title>\n'.format(workid, isbn, title) #iterate over all the relationship targets to see which is a link for stmt in model.match(): if iri.matches_uri_syntax(stmt[TARGET]) and iri.split_uri_ref(stmt[TARGET])[1] != BFHOST: envelope += '<a href="{0}">{0}</a>\n'.format(stmt[TARGET], stmt[TARGET]) envelope += '</div>\n' outstr += envelope except GeneratorExit: #Reached when close() is called on this coroutine with open(config['output-file'], "w") as outf: outf.write(outstr)
def run(infile): m = memory.connection() from_markdown(infile.read(), m) #from versa.util import jsondump #jsondump(m, open('/tmp/foo.json', 'w')) for poem in resources_by_type(m, 'http://uche.ogbuji.net/poems/poem'): choice = '@choice' in list(map(operator.itemgetter(TARGET), m.match(poem, 'http://www.w3.org/2005/Atom/category'))) if not choice: continue d = parse_date(simple_lookup(m, poem, 'http://www.w3.org/2005/Atom/updated')) source = next(m.match(poem, 'http://www.w3.org/2005/Atom/source')) source = source[ATTRIBUTES]['title'] title = simple_lookup(m, poem, 'http://www.w3.org/2005/Atom/title') print('\t'.join(("'" + title + "'", 'Poem', d.strftime('%B, %Y'), source))) print()
def run(infile): m = memory.connection() from_markdown(infile.read(), m) #from versa.util import jsondump #jsondump(m, open('/tmp/foo.json', 'w')) print('<descriptionSet>') for poem in resources_by_type(m, 'http://uche.ogbuji.net/poems/poem'): choice = '@choice' in list(map(operator.itemgetter(TARGET), m.match(poem, 'http://www.w3.org/2005/Atom/category'))) if not choice: continue print('<description>') d = parse_date(simple_lookup(m, poem, 'http://www.w3.org/2005/Atom/updated')) source = next(m.match(poem, 'http://www.w3.org/2005/Atom/source')) source = source[ATTRIBUTES]['title'] title = simple_lookup(m, poem, 'http://www.w3.org/2005/Atom/title') print(' <title>{0}</title>\n <date>{1}</date>\n <publisher>{2}</publisher>'.format(title, d.strftime('%B, %Y'), source)) hlink = list(map(operator.itemgetter(TARGET), m.match(poem, 'http://www.w3.org/2005/Atom/link'))) if hlink: hlink = hlink[0] print(' <link href="{0}"/>'.format(hlink)) print('</description>') print('</descriptionSet>')
def handle_record_links(self, loop, model, params): ''' Task coroutine of the main event loop for MARC conversion, called with In this case update a report of links encountered in the MARC/XML model -- raw Versa model with converted resource information from the MARC details from each MARC/XML record processed params -- parameters passed in from processing: params['workid']: ID of the work constructed from the MARC record params['instanceid']: list of IDs of instances constructed from the MARC record ''' #print ('BF_MARCREC_TASK', linkreport.PLUGIN_ID) items = {} #Get the title #First get the work ID workid = params['workid'] #simple_lookup() is a little helper for getting a property from a resource title = simple_lookup(model, workid, TITLE_REL) #Get the ISBN, just pick the first one isbn = '' if params['instanceids']: inst1 = params['instanceids'][0] isbn = simple_lookup(model, inst1, ISBN_REL) envelope = '<div id="{0}" isbn="{1}"><title>{2}</title>\n'.format( workid, isbn, title) #iterate over all the relationship targets to see which is a link for stmt in model.match(): if iri.matches_uri_syntax(stmt[TARGET]) and iri.split_uri_ref( stmt[TARGET])[1] != BFHOST: self._links_found.add(stmt[TARGET]) envelope += '<a href="{0}">{0}</a>\n'.format( stmt[TARGET], stmt[TARGET]) envelope += '</div>\n' self._outstr += envelope #print ('DONE BF_MARCREC_TASK', linkreport.PLUGIN_ID) return
def handle_record_links(self, loop, model, params): ''' Task coroutine of the main event loop for MARC conversion, called with In this case update a report of links encountered in the MARC/XML model -- raw Versa model with converted resource information from the MARC details from each MARC/XML record processed params -- parameters passed in from processing: params['workid']: ID of the work constructed from the MARC record params['instanceid']: list of IDs of instances constructed from the MARC record ''' #print ('BF_MARCREC_TASK', linkreport.PLUGIN_ID) #Get the configured vocabulary base IRI vocabbase = params['vocabbase'] for cls, prop in self._config['lookup'].items(): for link in model.match(None, TYPE_REL, I(iri.absolutize(cls, vocabbase))): #simple_lookup() is a little helper for getting a property from a resource val = simple_lookup(model, link[ORIGIN], I(iri.absolutize(prop, vocabbase))) if val: model.add(link[ORIGIN], I(iri.absolutize('label', vocabbase)), val) return
def get_orgname(site, reuse=None): ''' Given a site URL return the org's name >>> from librarylink.util import all_sites, get_orgname >>> org = next(s for s in all_sites() if 'denverlibrary' in s.host ) >>> get_orgname(org) 'Denver Public Library' >>> get_orgname('http://link.denverlibrary.org/') 'Denver Public Library' ''' if reuse: model, sitetext = reuse else: model, sitetext = load_rdfa_page(site) if not model: return None for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'Organization'): name = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'name') if name is not None: return name
def send(self, data): #Body text, respunse URL (e.g. after redirections), aiohttp.header object from response, referrer, controlling task ID (body, respurl, respheaders, referrer, task_id) = data _, respurlhost, _, _, _ = iri.split_uri_ref(respurl) if LIBRARY_LINK_HEADER not in respheaders: #Not even an LLN page at all return if self._fphost == respurlhost: output_model = memory.connection() quickinfo_sink.logger.debug('[TASK {}]: Target subpage {} -> {}'.format(task_id, referrer, respurl)) #Subpage of the target site rdfalite.toversa(body, output_model, respurl) resname = versautil.simple_lookup(output_model, respurl, SCHEMAORG + 'name') print(respurl, '|', resname, file=quickinfo_sink.outfp) #orgentity = util.simple_lookup_byvalue(model, RDFTYPE, SCHEMAORG + 'Organization') #name = util.simple_lookup(model, orgentity, BL + 'name') #name = util.simple_lookup(model, baseurl + '#_default', BL + 'name') root = html5.parse(body) linkset = self._queue_links(root, respurl) return linkset
def send(self, data): #Body text, respunse URL (e.g. after redirections), aiohttp.header object from response, referrer, controlling task ID (body, respurl, respheaders, referrer, task_id) = data _, respurlhost, _, _, _ = iri.split_uri_ref(respurl) if LIBRARY_LINK_HEADER not in respheaders: #Not even an LLN page at all return if self._fphost == respurlhost: output_model = memory.connection() quickinfo_sink.logger.debug( '[TASK {}]: Target subpage {} -> {}'.format( task_id, referrer, respurl)) #Subpage of the target site rdfalite.toversa(body, output_model, respurl) resname = versautil.simple_lookup(output_model, respurl, SCHEMAORG + 'name') print(respurl, '|', resname, file=quickinfo_sink.outfp) #orgentity = util.simple_lookup_byvalue(model, RDFTYPE, SCHEMAORG + 'Organization') #name = util.simple_lookup(model, orgentity, BL + 'name') #name = util.simple_lookup(model, baseurl + '#_default', BL + 'name') root = html5.parse(body) linkset = self._queue_links(root, respurl) return linkset
def follow(self, rel): return simple_lookup(self._input_model, self._origin, I(iri.absolutize(rel, self._base)))
def get_branches(site, reuse=None): ''' Given an organization object as returned from librarylink.util.all_sites, or just a plain base URL string; return the org's name >>> from librarylink.util import all_sites, get_branches >>> org = next(s for s in all_sites() if 'denverlibrary' in s.host ) >>> get_branches(org) 'Denver Public Library' >>> get_branches('http://link.denverlibrary.org/') 'Denver Public Library' ''' if reuse: model, sitetext = reuse else: model, sitetext = load_rdfa_page(site) if not model: return None branches = [] for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'Library'): id_ = o name = next(versautil.lookup(model, o, SCHEMAORG_NS + 'name'), '').strip() url = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'url') loc = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'location') addr = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'address') #Goes schema:Library - schema:location -> schema:Place - schema:geo -> Coordinates if loc: loc = versautil.simple_lookup(model, loc, SCHEMAORG_NS + 'geo') if loc: lat = versautil.simple_lookup(model, loc, SCHEMAORG_NS + 'latitude') long_ = versautil.simple_lookup(model, loc, SCHEMAORG_NS + 'longitude') if addr: #rdf:type schema:PostalAddress #schema:streetAddress "2111 Snow Road"@en #schema:addressLocality "Parma"@en #schema:addressRegion "OH"@en #schema:postalCode "44134"@en #schema:addressCountry "US"@en street = versautil.simple_lookup(model, addr, SCHEMAORG_NS + 'streetAddress') locality = versautil.simple_lookup(model, addr, SCHEMAORG_NS + 'addressLocality') region = versautil.simple_lookup(model, addr, SCHEMAORG_NS + 'addressRegion') postcode = versautil.simple_lookup(model, addr, SCHEMAORG_NS + 'postalCode') country = versautil.simple_lookup(model, addr, SCHEMAORG_NS + 'addressCountry') branches.append(( id_, url, name, (lat, long_) if loc else None, (street, locality, region, postcode, country) if addr else None )) return branches
def get_orgdetails(site, reuse=None): ''' Given an organization object as returned from librarylink.util.all_sites, or just a plain base URL string; return the org's name >>> from librarylink.util import all_sites, get_orgdetails >>> det = get_orgdetails('http://link.dcl.org/') >>> det['name'] 'Douglas County Libraries' >>> org = next(s for s in all_sites() if 'denverlibrary' in s.host ) >>> det = get_orgdetails(org.base_url) >>> det['name'] 'Denver Public Library' ''' if reuse: model, sitetext = reuse else: model, sitetext = load_rdfa_page(site) if not model: return None details = {'name': None, 'group': None, 'groupname': None, 'network': None, 'features': set()} id_ = None for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'LibrarySystem'): id_ = o details['name'] = next(versautil.lookup(model, o, SCHEMAORG_NS + 'name'), '').strip() break details['id'] = id_ #for o, r, t, a in model.match(None, SCHEMAORG_NS + 'member'): # group = t.split('#')[0] for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'Consortium'): details['group'] = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'url') #group = o.split('#')[0] details['groupname'] = next(versautil.lookup(model, o, SCHEMAORG_NS + 'name'), '').strip() break network = 'zviz' for searchstr in NETWORK_HINTS: if searchstr in sitetext: details['network'] = NETWORK_HINTS[searchstr] m = PIPELINE_VERSION_PAT.search(sitetext) if m: details['pipeline_ver'] = m.group(1).decode('utf-8') else: details['pipeline_ver'] = None #print('Unable to get pipeline version from:', site) m = TEMPLATE_VERSION_PAT.search(sitetext) if m: details['template_ver'] = m.group(1).decode('utf-8') else: details['template_ver'] = None #print('Unable to get template version from:', site) for o, r, t, a in model.match(None, LL+'feature'): details['features'].add(t) #Legacy, for libraries where the above isn't published if b'<img class="img-responsive" src="/static/liblink_ea/img/nlogo.png"' in sitetext: details['features'].add('http://library.link/ext/feature/novelist/merge') details['same-as'] = [] for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'LibrarySystem'): for _, r, t, a in model.match(o, SCHEMAORG_NS + 'sameAs'): details['same-as'].append(t) break for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'LibrarySystem'): logo = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'logo') details['logo'] = logo.strip() if logo else logo break return details