示例#1
0
    def handle_record_links(self, loop, model, params):
        '''
        Task coroutine of the main event loop for MARC conversion, called with 
        In this case update a report of links encountered in the MARC/XML

        model -- raw Versa model with converted resource information from the MARC details from each MARC/XML record processed
        params -- parameters passed in from processing:
            params['workid']: ID of the work constructed from the MARC record
            params['instanceid']: list of IDs of instances constructed from the MARC record
        '''
        #print ('BF_MARCREC_TASK', linkreport.PLUGIN_ID)
        items = {}
        #Get the title
        #First get the work ID
        workid = params['workid']
        #simple_lookup() is a little helper for getting a property from a resource
        title = simple_lookup(model, workid, TITLE_REL)
        #Get the ISBN, just pick the first one
        isbn = ''
        if params['instanceids']:
            inst1 = params['instanceids'][0]
            isbn = simple_lookup(model, inst1, ISBN_REL)

        envelope = '<div id="{0}" isbn="{1}"><title>{2}</title>\n'.format(workid, isbn, title)
        #iterate over all the relationship targets to see which is a link
        for stmt in model.match():
            if iri.matches_uri_syntax(stmt[TARGET]) and iri.split_uri_ref(stmt[TARGET])[1] != BFHOST:
                self._links_found.add(stmt[TARGET])
                envelope += '<a href="{0}">{0}</a>\n'.format(stmt[TARGET], stmt[TARGET])
        envelope += '</div>\n'
        self._outstr += envelope
        #print ('DONE BF_MARCREC_TASK', linkreport.PLUGIN_ID)
        return
示例#2
0
def linkreport(config=None, **kwargs):
    #Any configuration variables passed in
    if config is None: config = {}
    try:
        #Initialize the output
        outstr = ''
        while True:
            params = yield
            model = params['model']
            items = {}
            #Get the title
            #First get the work ID
            workid = params['workid']
            #simple_lookup() is a little helper for getting a property from a resource
            title = simple_lookup(model, workid, TITLE_REL)
            #Get the ISBN, just pick the first one
            isbn = ''
            if params['instanceids']:
                inst1 = params['instanceids'][0]
                isbn = simple_lookup(model, inst1, ISBN_REL)

            envelope = '<div id="{0} isbn="{1}"><title>{2}</title>\n'.format(workid, isbn, title)
            #iterate over all the relationship targets to see which is a link
            for stmt in model.match():
                if iri.matches_uri_syntax(stmt[TARGET]) and iri.split_uri_ref(stmt[TARGET])[1] != BFHOST:
                    envelope += '<a href="{0}">{0}</a>\n'.format(stmt[TARGET], stmt[TARGET])
            envelope += '</div>\n'
            outstr += envelope
    except GeneratorExit:
        #Reached when close() is called on this coroutine
        with open(config['output-file'], "w") as outf:
            outf.write(outstr)
示例#3
0
def run(infile):
    m = memory.connection()
    from_markdown(infile.read(), m)
    #from versa.util import jsondump
    #jsondump(m, open('/tmp/foo.json', 'w'))
    for poem in resources_by_type(m, 'http://uche.ogbuji.net/poems/poem'):
        choice = '@choice' in list(map(operator.itemgetter(TARGET), m.match(poem, 'http://www.w3.org/2005/Atom/category')))
        if not choice: continue
        d = parse_date(simple_lookup(m, poem, 'http://www.w3.org/2005/Atom/updated'))
        source = next(m.match(poem, 'http://www.w3.org/2005/Atom/source'))
        source = source[ATTRIBUTES]['title']
        title = simple_lookup(m, poem, 'http://www.w3.org/2005/Atom/title')
        print('\t'.join(("'" + title + "'", 'Poem', d.strftime('%B, %Y'), source)))
        print()
示例#4
0
def run(infile):
    m = memory.connection()
    from_markdown(infile.read(), m)
    #from versa.util import jsondump
    #jsondump(m, open('/tmp/foo.json', 'w'))
    print('<descriptionSet>')
    for poem in resources_by_type(m, 'http://uche.ogbuji.net/poems/poem'):
        choice = '@choice' in list(map(operator.itemgetter(TARGET), m.match(poem, 'http://www.w3.org/2005/Atom/category')))
        if not choice: continue
        print('<description>')
        d = parse_date(simple_lookup(m, poem, 'http://www.w3.org/2005/Atom/updated'))
        source = next(m.match(poem, 'http://www.w3.org/2005/Atom/source'))
        source = source[ATTRIBUTES]['title']
        title = simple_lookup(m, poem, 'http://www.w3.org/2005/Atom/title')
        print('  <title>{0}</title>\n  <date>{1}</date>\n  <publisher>{2}</publisher>'.format(title, d.strftime('%B, %Y'), source))
        hlink = list(map(operator.itemgetter(TARGET), m.match(poem, 'http://www.w3.org/2005/Atom/link')))
        if hlink:
            hlink = hlink[0]
            print('  <link href="{0}"/>'.format(hlink))
        print('</description>')
    print('</descriptionSet>')
示例#5
0
    def handle_record_links(self, loop, model, params):
        '''
        Task coroutine of the main event loop for MARC conversion, called with 
        In this case update a report of links encountered in the MARC/XML

        model -- raw Versa model with converted resource information from the MARC details from each MARC/XML record processed
        params -- parameters passed in from processing:
            params['workid']: ID of the work constructed from the MARC record
            params['instanceid']: list of IDs of instances constructed from the MARC record
        '''
        #print ('BF_MARCREC_TASK', linkreport.PLUGIN_ID)
        items = {}
        #Get the title
        #First get the work ID
        workid = params['workid']
        #simple_lookup() is a little helper for getting a property from a resource
        title = simple_lookup(model, workid, TITLE_REL)
        #Get the ISBN, just pick the first one
        isbn = ''
        if params['instanceids']:
            inst1 = params['instanceids'][0]
            isbn = simple_lookup(model, inst1, ISBN_REL)

        envelope = '<div id="{0}" isbn="{1}"><title>{2}</title>\n'.format(
            workid, isbn, title)
        #iterate over all the relationship targets to see which is a link
        for stmt in model.match():
            if iri.matches_uri_syntax(stmt[TARGET]) and iri.split_uri_ref(
                    stmt[TARGET])[1] != BFHOST:
                self._links_found.add(stmt[TARGET])
                envelope += '<a href="{0}">{0}</a>\n'.format(
                    stmt[TARGET], stmt[TARGET])
        envelope += '</div>\n'
        self._outstr += envelope
        #print ('DONE BF_MARCREC_TASK', linkreport.PLUGIN_ID)
        return
示例#6
0
    def handle_record_links(self, loop, model, params):
        '''
        Task coroutine of the main event loop for MARC conversion, called with 
        In this case update a report of links encountered in the MARC/XML

        model -- raw Versa model with converted resource information from the MARC details from each MARC/XML record processed
        params -- parameters passed in from processing:
            params['workid']: ID of the work constructed from the MARC record
            params['instanceid']: list of IDs of instances constructed from the MARC record
        '''
        #print ('BF_MARCREC_TASK', linkreport.PLUGIN_ID)
        #Get the configured vocabulary base IRI
        vocabbase = params['vocabbase']
        for cls, prop in self._config['lookup'].items():
            for link in model.match(None, TYPE_REL, I(iri.absolutize(cls, vocabbase))):
                #simple_lookup() is a little helper for getting a property from a resource
                val = simple_lookup(model, link[ORIGIN], I(iri.absolutize(prop, vocabbase)))
                if val:
                    model.add(link[ORIGIN], I(iri.absolutize('label', vocabbase)), val)
        return
示例#7
0
def get_orgname(site, reuse=None):
    '''
    Given a site URL return the org's name

    >>> from librarylink.util import all_sites, get_orgname
    >>> org = next(s for s in all_sites() if 'denverlibrary' in s.host )
    >>> get_orgname(org)
    'Denver Public Library'
    >>> get_orgname('http://link.denverlibrary.org/')
    'Denver Public Library'
    '''
    if reuse:
        model, sitetext = reuse
    else:
        model, sitetext = load_rdfa_page(site)
    if not model:
        return None
    for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'Organization'):
        name = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'name')
        if name is not None: return name
示例#8
0
    def send(self, data):
        #Body text, respunse URL (e.g. after redirections), aiohttp.header object from response, referrer, controlling task ID
        (body, respurl, respheaders, referrer, task_id) = data
        _, respurlhost, _, _, _ = iri.split_uri_ref(respurl)
        if LIBRARY_LINK_HEADER not in respheaders:
            #Not even an LLN page at all
            return
        if self._fphost == respurlhost:
            output_model = memory.connection()
            quickinfo_sink.logger.debug('[TASK {}]: Target subpage {} -> {}'.format(task_id, referrer, respurl))
            #Subpage of the target site
            rdfalite.toversa(body, output_model, respurl)
            resname = versautil.simple_lookup(output_model, respurl, SCHEMAORG + 'name')
            print(respurl, '|', resname, file=quickinfo_sink.outfp)
            #orgentity = util.simple_lookup_byvalue(model, RDFTYPE, SCHEMAORG + 'Organization')
            #name = util.simple_lookup(model, orgentity, BL + 'name')
            #name = util.simple_lookup(model, baseurl + '#_default', BL + 'name')

            root = html5.parse(body)
            linkset = self._queue_links(root, respurl)
        return linkset
示例#9
0
    def send(self, data):
        #Body text, respunse URL (e.g. after redirections), aiohttp.header object from response, referrer, controlling task ID
        (body, respurl, respheaders, referrer, task_id) = data
        _, respurlhost, _, _, _ = iri.split_uri_ref(respurl)
        if LIBRARY_LINK_HEADER not in respheaders:
            #Not even an LLN page at all
            return
        if self._fphost == respurlhost:
            output_model = memory.connection()
            quickinfo_sink.logger.debug(
                '[TASK {}]: Target subpage {} -> {}'.format(
                    task_id, referrer, respurl))
            #Subpage of the target site
            rdfalite.toversa(body, output_model, respurl)
            resname = versautil.simple_lookup(output_model, respurl,
                                              SCHEMAORG + 'name')
            print(respurl, '|', resname, file=quickinfo_sink.outfp)
            #orgentity = util.simple_lookup_byvalue(model, RDFTYPE, SCHEMAORG + 'Organization')
            #name = util.simple_lookup(model, orgentity, BL + 'name')
            #name = util.simple_lookup(model, baseurl + '#_default', BL + 'name')

            root = html5.parse(body)
            linkset = self._queue_links(root, respurl)
        return linkset
示例#10
0
 def follow(self, rel):
     return simple_lookup(self._input_model, self._origin, I(iri.absolutize(rel, self._base)))
示例#11
0
def get_branches(site, reuse=None):
    '''
    Given an organization object as returned from librarylink.util.all_sites, or just a plain base URL string; return the org's name
    >>> from librarylink.util import all_sites, get_branches
    >>> org = next(s for s in all_sites() if 'denverlibrary' in s.host )
    >>> get_branches(org)
    'Denver Public Library'
    >>> get_branches('http://link.denverlibrary.org/')
    'Denver Public Library'
    '''
    if reuse:
        model, sitetext = reuse
    else:
        model, sitetext = load_rdfa_page(site)
    if not model:
        return None
    branches = []
    for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'Library'):
        id_ = o
        name = next(versautil.lookup(model, o, SCHEMAORG_NS + 'name'), '').strip()
        url = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'url')
        loc = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'location')
        addr = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'address')
        #Goes schema:Library - schema:location -> schema:Place - schema:geo -> Coordinates
        if loc:
            loc = versautil.simple_lookup(model, loc, SCHEMAORG_NS + 'geo')
        if loc:
            lat = versautil.simple_lookup(model, loc, SCHEMAORG_NS + 'latitude')
            long_ = versautil.simple_lookup(model, loc, SCHEMAORG_NS + 'longitude')

        if addr:
            #rdf:type	schema:PostalAddress
            #schema:streetAddress	"2111 Snow Road"@en
            #schema:addressLocality	"Parma"@en
            #schema:addressRegion	"OH"@en
            #schema:postalCode	"44134"@en
            #schema:addressCountry	"US"@en
            street = versautil.simple_lookup(model, addr, SCHEMAORG_NS + 'streetAddress')
            locality = versautil.simple_lookup(model, addr, SCHEMAORG_NS + 'addressLocality')
            region = versautil.simple_lookup(model, addr, SCHEMAORG_NS + 'addressRegion')
            postcode = versautil.simple_lookup(model, addr, SCHEMAORG_NS + 'postalCode')
            country = versautil.simple_lookup(model, addr, SCHEMAORG_NS + 'addressCountry')

        branches.append((
            id_,
            url,
            name,
            (lat, long_) if loc else None,
            (street, locality, region, postcode, country) if addr else None
        ))

    return branches
示例#12
0
def get_orgdetails(site, reuse=None):
    '''
    Given an organization object as returned from librarylink.util.all_sites, or just a plain base URL string; return the org's name
    >>> from librarylink.util import all_sites, get_orgdetails
    >>> det = get_orgdetails('http://link.dcl.org/')
    >>> det['name']
    'Douglas County Libraries'
    >>> org = next(s for s in all_sites() if 'denverlibrary' in s.host )
    >>> det = get_orgdetails(org.base_url)
    >>> det['name']
    'Denver Public Library'
    '''
    if reuse:
        model, sitetext = reuse
    else:
        model, sitetext = load_rdfa_page(site)
    if not model:
        return None
    details = {'name': None, 'group': None, 'groupname': None, 'network': None, 'features': set()}
    id_ = None
    for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'LibrarySystem'):
        id_ = o
        details['name'] = next(versautil.lookup(model, o, SCHEMAORG_NS + 'name'), '').strip()
        break

    details['id'] = id_
    #for o, r, t, a in model.match(None, SCHEMAORG_NS + 'member'):
    #    group = t.split('#')[0]
    for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'Consortium'):
        details['group'] = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'url')
        #group = o.split('#')[0]
        details['groupname'] = next(versautil.lookup(model, o, SCHEMAORG_NS + 'name'), '').strip()
        break

    network = 'zviz'
    for searchstr in NETWORK_HINTS:
        if searchstr in sitetext:
            details['network'] = NETWORK_HINTS[searchstr]

    m = PIPELINE_VERSION_PAT.search(sitetext)
    if m:
        details['pipeline_ver'] = m.group(1).decode('utf-8')
    else:
        details['pipeline_ver'] = None
        #print('Unable to get pipeline version from:', site)
    m = TEMPLATE_VERSION_PAT.search(sitetext)
    if m:
        details['template_ver'] = m.group(1).decode('utf-8')
    else:
        details['template_ver'] = None
        #print('Unable to get template version from:', site)

    for o, r, t, a in model.match(None, LL+'feature'):
        details['features'].add(t)

    #Legacy, for libraries where the above isn't published
    if b'<img class="img-responsive" src="/static/liblink_ea/img/nlogo.png"' in sitetext:
        details['features'].add('http://library.link/ext/feature/novelist/merge')

    details['same-as'] = []
    for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'LibrarySystem'):
        for _, r, t, a in model.match(o, SCHEMAORG_NS + 'sameAs'):
            details['same-as'].append(t)
        break

    for o, r, t, a in model.match(None, RDF_NS + 'type', SCHEMAORG_NS + 'LibrarySystem'):
        logo = versautil.simple_lookup(model, o, SCHEMAORG_NS + 'logo')
        details['logo'] = logo.strip() if logo else logo
        break

    return details