def unidentifiedDocumentRef(self, document): '''Compile a document reference from a document's fingerprints''' evidence = [ kend.model.Evidence(type='fingerprint', data=f, srctype='document') for f in document.fingerprints() ] return kend.model.DocumentReference(evidence=evidence)
def send_crowdsource_event(event, document=None): try: url = laz_eventUrl if document is not None: params = {'fingerprint': document.fingerprints()} url += '?{0}'.format(urllib.urlencode(params, doseq=True)) # Some debug stuff print("Sending event to: " + url) import pprint pp = pprint.PrettyPrinter(indent=4).pprint pp(event) request = urllib2.Request(url, data=json.dumps(event), headers={'Content-Type': 'application/json'}) response = urllib2.urlopen(request) # Are we interested in the response? except Exception as e: import traceback traceback.print_exc()
def on_ready_event(self, document): '''Fetch information from the Lazarus service''' permission = self.get_config('permission', False) if permission: # If an outline already exists, don't make a new one needs_outline = True for annotation in document.annotations(): if annotation.get('concept') == 'OutlineItem': needs_outline = False break # The Lazarus server needs to know what this document is document_id = utopia.tools.utils.metadata(document, 'identifiers[utopia]') this_doi = utopia.tools.utils.metadata(document, 'identifiers[doi]') if this_doi is not None: this_doi = u'doi:' + this_doi # Speak to server params = {'fingerprint': document.fingerprints()} url = '{0}?{1}'.format(laz_docUrl, urllib.urlencode(params, doseq=True)) response = urllib2.urlopen(url, timeout=60) if response.getcode() == 204: request = urllib2.Request( url, data=document.data(), headers={'Content-Type': 'application/pdf'}) response = urllib2.urlopen(request, timeout=60) #response = open('/Users/dave/Desktop/ananiadou_tibtech06.pdf-response.xml', 'r') # Create Metadata link annotation link = document.newAccList('metadata', 50) link['property:sourceDatabase'] = 'lazarus' link['property:sourceTitle'] = 'Lazarus' link['property:sourceDescription'] = self.sourceDescription link['property:sourceIcon'] = utopia.get_plugin_data_as_url( 'images/lazarus-prefs-logo.png', 'image/png') headers = [] pos = [] refs = [] annotations = [] concepts = {} hits = [] expression_annotations = [] for kAnnotation in kend.converter.XML.parse( response, kend.model.Document): #print kend.converter.XML.serialise(kAnnotation)[0] try: annotations.append( utopia.tools.converters.Annotation.kend2spineapi( kAnnotation, document)) except: pass annotations.sort(key=lambda a: int(a.get('structure:order', 0))) for sAnnotation in annotations: if sAnnotation['concept'] == 'structure_element': role, level = self.getHeaderRole(sAnnotation) if role is not None and needs_outline: while len(pos) < level: pos.append(0) while len(pos) > level: pos.pop() pos[-1] += 1 outline = u'.'.join([unicode(i) for i in pos]) anchor_name = '#lazarus.outline.{0}'.format(outline) anchor = spineapi.Annotation() anchor['concept'] = 'Anchor' anchor['property:anchor'] = anchor_name anchor.addExtents(sAnnotation.extents()) anchor.addAreas(sAnnotation.areas()) document.addAnnotation(anchor) header = spineapi.Annotation() header['concept'] = 'OutlineItem' header['property:outlinePosition'] = outline header['property:outlineTitle'] = u' '.join( [e.text() for e in sAnnotation.extents()]) header['property:destinationAnchorName'] = anchor_name document.addAnnotation(header) print((u' ' * level + u'.'.join([unicode(i) for i in pos]) + u' ' + u' '.join([ e.text() for e in sAnnotation.extents() ])).encode('utf8')) elif 'bibitem' in sAnnotation.getAllProperties( 'structure:role'): #refs.append(sAnnotation) pass elif sAnnotation['concept'] == 'Citation': # Hack to fix a mistake in authors property name if 'property:author' in sAnnotation and not 'property:authors' in sAnnotation: sAnnotation[ 'property:authors'] = sAnnotation.getAllProperties( 'property:author') refs.append(sAnnotation) elif sAnnotation['concept'] == 'LazarusConcept': concept_id = sAnnotation.get('property:identifier') if concept_id is not None: sAnnotation['id'] = str(uuid.uuid4()) concepts[concept_id] = sAnnotation document.addAnnotation(sAnnotation, 'Lazarus Concept') elif sAnnotation['concept'] == 'LazarusConceptHit': hits.append(sAnnotation) elif sAnnotation['concept'] == 'LazarusSentenceExpression': expression_annotations.append(sAnnotation) else: document.addAnnotation(sAnnotation) for ref in refs: #print(ref.get('structure:order', '0')) pass refs = sorted(refs, key=lambda ref: int(ref.get('property:order', '0'))) for ref in refs: #print(ref.get('structure:order', '0')) pass for ref in refs: # Create Bibliography annotations #citation = {'unstructured': u' '.join([e.text() for e in ref.extents()])} #annotation = utopia.tools.utils.citation_to_annotation(citation) #annotation['property:order'] = ref.get('structure:order') #annotation.addExtents(ref.extents()) #annotation.addAreas(ref.areas()) #document.addAnnotation(annotation, link['scratch']) document.addAnnotation(ref, link['scratch']) # Now link hits to concepts for i, hit in enumerate(hits): concept_id = hit.get('property:identifier') concept = concepts.get(concept_id) if concept is not None: concept_uuid = concept.get('id') hit['property:concept_id'] = concept_uuid identifier = concept.get('property:identifier') name = concept.get('property:name', '???') sources = concept.get('property:externalSources', 'json:[]') if sources.startswith('json:'): sources = json.loads(sources[5:]) if 'property:stdInchiKey' in concept: sources.append({ 'database': ' InchiKey', 'identifier': concept['property:stdInchiKey'] }) if 'property:canonicalSmiles' in concept: sources.append({ 'database': ' SMILES', 'identifier': concept['property:canonicalSmiles'] }) kind = concept.get('property:kind') kind = self.dbs.get(kind, {}).get('title', kind) links = {} for source in sources: uri = source.get('uri') if 'primary' in source.get('relationship', []): links.setdefault('definition', []) links['definition'].append(u''' <a href="{uri}" title="{uri}">{database}</a> '''.format(**source)) elif uri is None: if source.get('database') in (' InchiKey', ' SMILES'): links.setdefault('main', []) links['main'].append(u''' <tr><td>{database}:</td><td>{identifier}</td></tr> '''.format(**source)) else: identifier = source.get('identifier') links_category = 'xref' if 'seeAlso' in source.get('relationship', []) or uri is None: links_category = 'seeAlso' links.setdefault(links_category, []) if identifier is not None: links[links_category].append(u''' <a href="{uri}" title="{uri}">{name}...</a> ({identifier}) '''.format(**source)) else: links[links_category].append(u''' <a href="{uri}" title="{uri}">{name}...</a> '''.format(**source)) style = u''' <style> .lazarus-table tbody { border: none; } .lazarus-table td:first-of-type { text-align: right; font-weight: bold; } .lazarus-table td { vertical-align: top; } .lazarus-table td:first-of-type { white-space: nowrap; } .lazarus-table td:not(:first-of-type) { word-break: break-all; } .lazarus-table tr td { padding-top: 0ex; padding-bottom: 0ex; } .lazarus-table tbody:not(:first-of-type) tr:first-of-type td { padding-top: 1ex; } </style> ''' html = u''' <table class="lazarus-table"> <tr><td>Name:</td><td>{name}</td></tr> '''.format(**{'name': name}) categories = { 'xref': 'Related:', 'seeAlso': 'See also:', 'definition': 'Defined in:' } for links_category in ('main', 'xref', 'seeAlso', 'definition'): links_title = categories.get(links_category) these_links = sorted( list(set(links.get(links_category, [])))) if len(these_links) > 0: html += '<tbody>' if links_category != 'main': html += u'<tr><td>{0}</td><td>'.format( links_title) html += u'<br>'.join(these_links) html += '</td></tr>' else: html += ''.join(these_links) html += '</tbody>' #pprint('------------------------') html += u''' </table> ''' #print(html) hasLinks = len( links.get('xref', []) + links.get('seeAlso', [])) > 0 ann = spineapi.Annotation() ann['concept'] = 'Collated' ann['property:name'] = u'{0}'.format(name) ann['property:description'] = 'Lazarus Concept' ann['session:semanticTerm'] = name ann['property:html'] = [style, html] ann['property:sourceDescription'] = self.sourceDescription ann['property:sourceIcon'] = utopia.get_plugin_data_as_url( 'images/lazarus-prefs-logo.png', 'image/png') ann['session:overlay'] = 'hyperlink' ann['session:color'] = '#880000' count = 0 print('====', 7) if 'property:hitFragments' in hit: hitFragments = hit.getAllProperties( 'property:hitFragments') or [] #pprint(hitFragments) for hitFragment in hitFragments: pre, _, rest = hitFragment.partition('{!') match, _, post = rest.partition('!}') #pprint((pre, match, post)) matches = document.findInContext(pre, match, post, fuzzy=True) count += len(matches) ann.addExtents(matches) if hasLinks and count > 0: document.addAnnotation(ann) style = u''' <style> .lazarus-expression .box { background-color: #FFF0E8; border-color: #EEE0D8; } .lazarus-related { padding-left: 42px; background-image: url(%s); background-repeat: no-repeat; background-position: top left; background-size: 37px 48px; min-height: 53px; } .lazarus-related + .lazarus-related { margin-top: 5px; border-top: 1px dotted #aaa; padding-top: 5px; background-position-y: 5px; min-height: 58px; } .lazarus-sentence { padding-left: 0.5em; color: black; } .lazarus-sentence.negative { border-left: solid 5px #bb0000; } .lazarus-sentence.positive { border-left: solid 5px #008800; } .lazarus-sentence.negative a { color: #bb0000; } .lazarus-sentence.positive a { color: #008800; } </style> ''' % utopia.get_plugin_data_as_url('images/pdf-page-icon.png', 'image/png') expressions = [] for sAnnotation in expression_annotations: exp = sAnnotation.get('property:expressions', 'json:{}') if exp.startswith('json:'): exp = json.loads(exp[5:]) context = sAnnotation.get('property:context') if context is not None: if exp.get('negative', False): exp['posneg'] = 'negative' else: exp['posneg'] = 'positive' pprint(context) pprint(exp) matched_context = exp.get('context') matches = [] if matched_context is not None: matches = document.search( re.sub(r'\s+', ' ', matched_context)) if len(matches) > 0: anchor_id = str(uuid.uuid4())[1:-1] anchor = spineapi.Annotation() anchor['concept'] = 'Anchor' anchor['property:anchor'] = anchor_id anchor.addExtents(matches) document.addAnnotation(anchor) exp.update({ 'anchor_id': anchor_id, 'sentence': context }) expressions.append(exp) js = u''' <script> $(document).on('DOMNodeInserted', function(e) { var element = e.target; $(element).filter('a[target="tab"]').add('a[target="tab"]', element).each(function () { var fragment = $(this).closest('.-papyro-internal-citation').data('citation')['userdef']['first_fragment']; $(this).attr('target', 'pdf; show=highlight; text=[' + encodeURIComponent(fragment) + ']'); }); }); $(function () { var lazarus = { expressions: %s, fingerprints: %s, relUrl: %s }; var more_expressions_link = $('#lazarus-expression > p.more').hide(); var more_expressions_spinner = $('#lazarus-expression > div.spinner'); Spinners.create(more_expressions_spinner); Spinners.play(more_expressions_spinner); var exp_divs = []; var identifiers = []; for (var e = 0; e < lazarus.expressions.length; e++) { var expression = lazarus.expressions[e]; var exp_div = $('<div class="box"></div>'); exp_div.data('expression', expression); exp_div.hide(); exp_divs.push(exp_div); identifiers.push(expression.identifiers); } var params = { fingerprint: lazarus.fingerprints }; var url = lazarus.relUrl + '?' + $.param(params, traditional=true); $.ajax({ url: url, type: 'POST', dataType: 'json', data: JSON.stringify(identifiers), contentType: "application/json", error: function (xhr, ajaxOptions, thrownError) { console.log(xhr.statusText); console.log(xhr.responseText); console.log(xhr.status); console.log(thrownError); // FIXME do something here Spinners.remove(more_expressions_spinner); }, success: function (related) { // Sort related according to the number of articles found related.results.sort(function (l, r) { var lv = Object.keys(l.related).length; var rv = Object.keys(r.related).length; return (lv > rv) ? -1 : (lv < rv) ? 1 : 0; }); $.each(related.results, function (idx, result) { var exp_div = exp_divs[idx]; var expression = exp_div.data('expression'); expression.related = result.related; delete expression.related[%s]; split = expression.sentence.split(expression.context); pre = split[0]; pre = pre.replace(/(\w)$/, '$1 '); pre = pre.replace(/^\s*/, ''); match = expression.context; post = split[1]; post = post.replace(/^(\w)/, ' $1'); post = post.replace(/\s*$/, ''); expression.pre = pre; expression.match = match; expression.post = post; // Create expression element exp_div.append('<p class="lazarus-sentence ' + expression.posneg + '">“' + expression.pre + '<a target="pdf; show=select; anchor=' + expression.anchor_id + '"><strong>' + expression.match + '</strong></a>' + expression.post + '”</p>'); exp_div.data('expression', expression); $('#lazarus-expression > .content').append(exp_div); if (Object.keys(expression.related).length > 0) { var related_div = $('<div class="expandable" title="Related expressions elsewhere"></div>'); var related_div_content = $('<div></div>').appendTo(related_div); function on_expand() { related_div.off('papyro:expandable:expand', on_expand); $.each(expression.related, function (idx, obj) { fragments = []; $.each(obj, function (id, obj) { fragments.push(obj.context); }); fragments.join('\\n'); related_div_content.append($('<div class="lazarus-related unprocessed"></div>').append('<p><strong>“…'+fragments+'…”</strong></p>').hide().data('citation', {identifiers:{doi:idx},userdef:{first_fragment:fragments[0]}})); // .append(utopia.citation.render({identifiers:{doi:idx},first_fragment:fragments[0]}, true, true)) }); expression.related.length = 0; // empty for future if ($('.lazarus-related.unprocessed', exp_div).length > 0) { var more = $('<p class="more right"><a class="more">More related articles...</a></p>'); related_div_content.append(more); function show_five_related(e) { e.preventDefault(); $('.lazarus-related.unprocessed', exp_div).slice(0, 5).each(function (idx, obj) { var citation = $(obj).data('citation'); $(obj).append(utopia.citation.render(citation, true, true)); $(obj).show().removeClass('unprocessed'); }); if ($('.lazarus-related.unprocessed', exp_div).length == 0) { more.remove(); } } more.on('click', show_five_related).click(); } } related_div.on('papyro:expandable:expand', on_expand); exp_div.append(related_div); utopia.processNewContent(related_div); } }); Spinners.remove(more_expressions_spinner); more_expressions_link.show(); $('a.more', more_expressions_link).click(); } }); function append_five(e) { e.preventDefault(); // Show the next five $('#lazarus-expression > .content').children().filter(':hidden').slice(0,5).show(); // Hide the 'more' link if everything is now visible if ($('#lazarus-expression > .content').children().filter(':hidden').length == 0) { more_expressions_link.hide(); } } // Hook up 'more' link $('#lazarus-expression > p.more > a.more').on('click', append_five).click(); }); </script> ''' % (json.dumps(expressions), json.dumps( document.fingerprints()), json.dumps(laz_docRelUrl), json.dumps(this_doi)) #print(js.encode('utf8')) html = u''' <div id="lazarus-expression"><div class="content"></div><div class="spinner"></div><p class="more"><a class="more">More expressions...</a></p></div> ''' if len(expressions) > 0: ann = spineapi.Annotation() ann['concept'] = 'Collated' ann['property:name'] = 'Lazarus Expressions' ann['property:description'] = u'Summarizing expression(s)' ann['property:html'] = [js, style, html] ann['property:sourceDescription'] = self.sourceDescription ann['property:sourceIcon'] = utopia.get_plugin_data_as_url( 'images/lazarus-prefs-logo.png', 'image/png') document.addAnnotation(ann) else: # no permission noprompt = self.get_config('noprompt', False) if not noprompt: annotation = spineapi.Annotation() annotation['concept'] = 'Collated' params = { 'uuid': self.uuid(), } annotation['property:html'] = utopia.get_plugin_data( 'tpl/denied.html').format(**params) annotation['property:name'] = 'Lazarus' annotation[ 'property:description'] = 'Lazarus functionality is turned off' annotation[ 'property:sourceDescription'] = self.sourceDescription annotation[ 'property:sourceIcon'] = utopia.get_plugin_data_as_url( 'images/lazarus-prefs-logo.png', 'image/png') annotation['session:default'] = '1' document.addAnnotation(annotation)
def _resolve(self, document): # Start with evidence from fingerprinting evidence = [kend.model.Evidence(type='fingerprint', data=f, srctype='document') for f in document.fingerprints()] # Add scraped / resolved metadata fields = {} for key in ('doi', 'title', 'arxivid', 'pmid', 'pmcid', 'issn', 'pii'): value = common.utils.metadata(document, key) if value is not None: fields[key] = value for key, value in fields.iteritems(): evidence.append(kend.model.Evidence(type=key, data=value, srctype='algorithm', src='utopia/2.2.1')) # Add page count evidence.append(kend.model.Evidence(type='pagecount', data=document.numberOfPages(), srctype='document')) documentref = kend.model.DocumentReference(evidence=evidence) documentref = kend.client.Client().documents(documentref) try: return documentref.id, fields.get('doi') except AttributeError: pass return None, None
def _resolve(self, document): # Start with evidence from fingerprinting evidence = [ kend.model.Evidence(type="fingerprint", data=f, srctype="document") for f in document.fingerprints() ] documentref = kend.model.DocumentReference(evidence=evidence) documentref = kend.client.Client().documents(documentref) try: return documentref.id except AttributeError: pass