Пример #1
0
 def unidentifiedDocumentRef(self, document):
     '''Compile a document reference from a document's fingerprints'''
     evidence = [
         kend.model.Evidence(type='fingerprint', data=f, srctype='document')
         for f in document.fingerprints()
     ]
     return kend.model.DocumentReference(evidence=evidence)
Пример #2
0
def send_crowdsource_event(event, document=None):
    try:
        url = laz_eventUrl
        if document is not None:
            params = {'fingerprint': document.fingerprints()}
            url += '?{0}'.format(urllib.urlencode(params, doseq=True))

        # Some debug stuff
        print("Sending event to: " + url)
        import pprint
        pp = pprint.PrettyPrinter(indent=4).pprint
        pp(event)

        request = urllib2.Request(url,
                                  data=json.dumps(event),
                                  headers={'Content-Type': 'application/json'})
        response = urllib2.urlopen(request)
        # Are we interested in the response?
    except Exception as e:
        import traceback
        traceback.print_exc()
Пример #3
0
    def on_ready_event(self, document):
        '''Fetch information from the Lazarus service'''

        permission = self.get_config('permission', False)
        if permission:
            # If an outline already exists, don't make a new one
            needs_outline = True
            for annotation in document.annotations():
                if annotation.get('concept') == 'OutlineItem':
                    needs_outline = False
                    break

            # The Lazarus server needs to know what this document is
            document_id = utopia.tools.utils.metadata(document,
                                                      'identifiers[utopia]')
            this_doi = utopia.tools.utils.metadata(document,
                                                   'identifiers[doi]')
            if this_doi is not None:
                this_doi = u'doi:' + this_doi

            # Speak to server
            params = {'fingerprint': document.fingerprints()}
            url = '{0}?{1}'.format(laz_docUrl,
                                   urllib.urlencode(params, doseq=True))
            response = urllib2.urlopen(url, timeout=60)
            if response.getcode() == 204:
                request = urllib2.Request(
                    url,
                    data=document.data(),
                    headers={'Content-Type': 'application/pdf'})
                response = urllib2.urlopen(request, timeout=60)
            #response = open('/Users/dave/Desktop/ananiadou_tibtech06.pdf-response.xml', 'r')

            # Create Metadata link annotation
            link = document.newAccList('metadata', 50)
            link['property:sourceDatabase'] = 'lazarus'
            link['property:sourceTitle'] = 'Lazarus'
            link['property:sourceDescription'] = self.sourceDescription
            link['property:sourceIcon'] = utopia.get_plugin_data_as_url(
                'images/lazarus-prefs-logo.png', 'image/png')

            headers = []
            pos = []
            refs = []
            annotations = []
            concepts = {}
            hits = []
            expression_annotations = []
            for kAnnotation in kend.converter.XML.parse(
                    response, kend.model.Document):
                #print kend.converter.XML.serialise(kAnnotation)[0]
                try:
                    annotations.append(
                        utopia.tools.converters.Annotation.kend2spineapi(
                            kAnnotation, document))
                except:
                    pass
            annotations.sort(key=lambda a: int(a.get('structure:order', 0)))
            for sAnnotation in annotations:
                if sAnnotation['concept'] == 'structure_element':
                    role, level = self.getHeaderRole(sAnnotation)
                    if role is not None and needs_outline:
                        while len(pos) < level:
                            pos.append(0)
                        while len(pos) > level:
                            pos.pop()
                        pos[-1] += 1

                        outline = u'.'.join([unicode(i) for i in pos])
                        anchor_name = '#lazarus.outline.{0}'.format(outline)

                        anchor = spineapi.Annotation()
                        anchor['concept'] = 'Anchor'
                        anchor['property:anchor'] = anchor_name
                        anchor.addExtents(sAnnotation.extents())
                        anchor.addAreas(sAnnotation.areas())
                        document.addAnnotation(anchor)

                        header = spineapi.Annotation()
                        header['concept'] = 'OutlineItem'
                        header['property:outlinePosition'] = outline
                        header['property:outlineTitle'] = u' '.join(
                            [e.text() for e in sAnnotation.extents()])
                        header['property:destinationAnchorName'] = anchor_name
                        document.addAnnotation(header)

                        print((u'    ' * level +
                               u'.'.join([unicode(i)
                                          for i in pos]) + u' ' + u' '.join([
                                              e.text()
                                              for e in sAnnotation.extents()
                                          ])).encode('utf8'))
                    elif 'bibitem' in sAnnotation.getAllProperties(
                            'structure:role'):
                        #refs.append(sAnnotation)
                        pass
                elif sAnnotation['concept'] == 'Citation':
                    # Hack to fix a mistake in authors property name
                    if 'property:author' in sAnnotation and not 'property:authors' in sAnnotation:
                        sAnnotation[
                            'property:authors'] = sAnnotation.getAllProperties(
                                'property:author')
                    refs.append(sAnnotation)
                elif sAnnotation['concept'] == 'LazarusConcept':
                    concept_id = sAnnotation.get('property:identifier')
                    if concept_id is not None:
                        sAnnotation['id'] = str(uuid.uuid4())
                        concepts[concept_id] = sAnnotation
                        document.addAnnotation(sAnnotation, 'Lazarus Concept')
                elif sAnnotation['concept'] == 'LazarusConceptHit':
                    hits.append(sAnnotation)
                elif sAnnotation['concept'] == 'LazarusSentenceExpression':
                    expression_annotations.append(sAnnotation)
                else:
                    document.addAnnotation(sAnnotation)

            for ref in refs:
                #print(ref.get('structure:order', '0'))
                pass
            refs = sorted(refs,
                          key=lambda ref: int(ref.get('property:order', '0')))

            for ref in refs:
                #print(ref.get('structure:order', '0'))
                pass
            for ref in refs:
                # Create Bibliography annotations
                #citation = {'unstructured': u' '.join([e.text() for e in ref.extents()])}
                #annotation = utopia.tools.utils.citation_to_annotation(citation)
                #annotation['property:order'] = ref.get('structure:order')
                #annotation.addExtents(ref.extents())
                #annotation.addAreas(ref.areas())
                #document.addAnnotation(annotation, link['scratch'])
                document.addAnnotation(ref, link['scratch'])

            # Now link hits to concepts
            for i, hit in enumerate(hits):
                concept_id = hit.get('property:identifier')
                concept = concepts.get(concept_id)
                if concept is not None:
                    concept_uuid = concept.get('id')
                    hit['property:concept_id'] = concept_uuid

                    identifier = concept.get('property:identifier')
                    name = concept.get('property:name', '???')
                    sources = concept.get('property:externalSources',
                                          'json:[]')
                    if sources.startswith('json:'):
                        sources = json.loads(sources[5:])
                    if 'property:stdInchiKey' in concept:
                        sources.append({
                            'database':
                            ' InchiKey',
                            'identifier':
                            concept['property:stdInchiKey']
                        })
                    if 'property:canonicalSmiles' in concept:
                        sources.append({
                            'database':
                            ' SMILES',
                            'identifier':
                            concept['property:canonicalSmiles']
                        })
                    kind = concept.get('property:kind')
                    kind = self.dbs.get(kind, {}).get('title', kind)
                    links = {}
                    for source in sources:
                        uri = source.get('uri')
                        if 'primary' in source.get('relationship', []):
                            links.setdefault('definition', [])
                            links['definition'].append(u'''
                                <a href="{uri}" title="{uri}">{database}</a>
                            '''.format(**source))
                        elif uri is None:
                            if source.get('database') in (' InchiKey',
                                                          ' SMILES'):
                                links.setdefault('main', [])
                                links['main'].append(u'''
                                    <tr><td>{database}:</td><td>{identifier}</td></tr>
                                '''.format(**source))
                        else:
                            identifier = source.get('identifier')
                            links_category = 'xref'
                            if 'seeAlso' in source.get('relationship',
                                                       []) or uri is None:
                                links_category = 'seeAlso'
                            links.setdefault(links_category, [])
                            if identifier is not None:
                                links[links_category].append(u'''
                                    <a href="{uri}" title="{uri}">{name}...</a> ({identifier})
                                '''.format(**source))
                            else:
                                links[links_category].append(u'''
                                    <a href="{uri}" title="{uri}">{name}...</a>
                                '''.format(**source))

                    style = u'''
                        <style>
                          .lazarus-table tbody {
                            border: none;
                          }
                          .lazarus-table td:first-of-type {
                            text-align: right;
                            font-weight: bold;
                          }
                          .lazarus-table td {
                            vertical-align: top;
                          }
                          .lazarus-table td:first-of-type {
                            white-space: nowrap;
                          }
                          .lazarus-table td:not(:first-of-type) {
                            word-break: break-all;
                          }
                          .lazarus-table tr td {
                            padding-top: 0ex;
                            padding-bottom: 0ex;
                          }
                          .lazarus-table tbody:not(:first-of-type) tr:first-of-type td {
                            padding-top: 1ex;
                          }
                        </style>
                    '''
                    html = u'''
                        <table class="lazarus-table">
                          <tr><td>Name:</td><td>{name}</td></tr>
                    '''.format(**{'name': name})
                    categories = {
                        'xref': 'Related:',
                        'seeAlso': 'See also:',
                        'definition': 'Defined in:'
                    }
                    for links_category in ('main', 'xref', 'seeAlso',
                                           'definition'):
                        links_title = categories.get(links_category)
                        these_links = sorted(
                            list(set(links.get(links_category, []))))
                        if len(these_links) > 0:
                            html += '<tbody>'
                            if links_category != 'main':
                                html += u'<tr><td>{0}</td><td>'.format(
                                    links_title)
                                html += u'<br>'.join(these_links)
                                html += '</td></tr>'
                            else:
                                html += ''.join(these_links)
                            html += '</tbody>'
                    #pprint('------------------------')
                    html += u'''
                        </table>
                    '''
                    #print(html)

                    hasLinks = len(
                        links.get('xref', []) + links.get('seeAlso', [])) > 0

                    ann = spineapi.Annotation()
                    ann['concept'] = 'Collated'
                    ann['property:name'] = u'{0}'.format(name)
                    ann['property:description'] = 'Lazarus Concept'
                    ann['session:semanticTerm'] = name
                    ann['property:html'] = [style, html]
                    ann['property:sourceDescription'] = self.sourceDescription
                    ann['property:sourceIcon'] = utopia.get_plugin_data_as_url(
                        'images/lazarus-prefs-logo.png', 'image/png')
                    ann['session:overlay'] = 'hyperlink'
                    ann['session:color'] = '#880000'
                    count = 0
                    print('====', 7)
                    if 'property:hitFragments' in hit:
                        hitFragments = hit.getAllProperties(
                            'property:hitFragments') or []
                        #pprint(hitFragments)
                        for hitFragment in hitFragments:
                            pre, _, rest = hitFragment.partition('{!')
                            match, _, post = rest.partition('!}')
                            #pprint((pre, match, post))
                            matches = document.findInContext(pre,
                                                             match,
                                                             post,
                                                             fuzzy=True)
                            count += len(matches)
                            ann.addExtents(matches)
                    if hasLinks and count > 0:
                        document.addAnnotation(ann)

            style = u'''
                <style>
                    .lazarus-expression .box {
                        background-color: #FFF0E8;
                        border-color: #EEE0D8;
                    }
                    .lazarus-related {
                        padding-left: 42px;
                        background-image: url(%s);
                        background-repeat: no-repeat;
                        background-position: top left;
                        background-size: 37px 48px;
                        min-height: 53px;
                    }
                    .lazarus-related + .lazarus-related {
                        margin-top: 5px;
                        border-top: 1px dotted #aaa;
                        padding-top: 5px;
                        background-position-y: 5px;
                        min-height: 58px;
                    }
                    .lazarus-sentence {
                        padding-left: 0.5em;
                        color: black;
                    }
                    .lazarus-sentence.negative {
                        border-left: solid 5px #bb0000;
                    }
                    .lazarus-sentence.positive {
                        border-left: solid 5px #008800;
                    }
                    .lazarus-sentence.negative a {
                        color: #bb0000;
                    }
                    .lazarus-sentence.positive a {
                        color: #008800;
                    }
                </style>
            ''' % utopia.get_plugin_data_as_url('images/pdf-page-icon.png',
                                                'image/png')

            expressions = []
            for sAnnotation in expression_annotations:
                exp = sAnnotation.get('property:expressions', 'json:{}')
                if exp.startswith('json:'):
                    exp = json.loads(exp[5:])
                context = sAnnotation.get('property:context')
                if context is not None:
                    if exp.get('negative', False):
                        exp['posneg'] = 'negative'
                    else:
                        exp['posneg'] = 'positive'

                    pprint(context)
                    pprint(exp)

                    matched_context = exp.get('context')
                    matches = []
                    if matched_context is not None:
                        matches = document.search(
                            re.sub(r'\s+', ' ', matched_context))
                        if len(matches) > 0:
                            anchor_id = str(uuid.uuid4())[1:-1]
                            anchor = spineapi.Annotation()
                            anchor['concept'] = 'Anchor'
                            anchor['property:anchor'] = anchor_id
                            anchor.addExtents(matches)
                            document.addAnnotation(anchor)

                            exp.update({
                                'anchor_id': anchor_id,
                                'sentence': context
                            })
                            expressions.append(exp)

            js = u'''
                <script>
                    $(document).on('DOMNodeInserted', function(e) {
                        var element = e.target;
                        $(element).filter('a[target="tab"]').add('a[target="tab"]', element).each(function () {
                            var fragment = $(this).closest('.-papyro-internal-citation').data('citation')['userdef']['first_fragment'];
                            $(this).attr('target', 'pdf; show=highlight; text=[' + encodeURIComponent(fragment) + ']');
                        });
                    });

                    $(function () {
                        var lazarus = {
                            expressions: %s,
                            fingerprints: %s,
                            relUrl: %s
                        };

                        var more_expressions_link = $('#lazarus-expression > p.more').hide();
                        var more_expressions_spinner = $('#lazarus-expression > div.spinner');

                        Spinners.create(more_expressions_spinner);
                        Spinners.play(more_expressions_spinner);

                        var exp_divs = [];
                        var identifiers = [];
                        for (var e = 0; e < lazarus.expressions.length; e++) {
                            var expression = lazarus.expressions[e];
                            var exp_div = $('<div class="box"></div>');
                            exp_div.data('expression', expression);
                            exp_div.hide();
                            exp_divs.push(exp_div);
                            identifiers.push(expression.identifiers);
                        }
                        var params = {
                            fingerprint: lazarus.fingerprints
                        };
                        var url = lazarus.relUrl + '?' + $.param(params, traditional=true);
                        $.ajax({
                            url: url,
                            type: 'POST',
                            dataType: 'json',
                            data: JSON.stringify(identifiers),
                            contentType: "application/json",
                            error: function (xhr, ajaxOptions, thrownError) {
                                console.log(xhr.statusText);
                                console.log(xhr.responseText);
                                console.log(xhr.status);
                                console.log(thrownError);

                                // FIXME do something here
                                Spinners.remove(more_expressions_spinner);
                            },
                            success: function (related) {
                                // Sort related according to the number of articles found
                                related.results.sort(function (l, r) {
                                    var lv = Object.keys(l.related).length;
                                    var rv = Object.keys(r.related).length;
                                    return (lv > rv) ? -1 : (lv < rv) ? 1 : 0;
                                });
                                $.each(related.results, function (idx, result) {
                                    var exp_div = exp_divs[idx];
                                    var expression = exp_div.data('expression');
                                    expression.related = result.related;
                                    delete expression.related[%s];

                                    split = expression.sentence.split(expression.context);
                                    pre = split[0];
                                    pre = pre.replace(/(\w)$/, '$1 ');
                                    pre = pre.replace(/^\s*/, '');
                                    match = expression.context;
                                    post = split[1];
                                    post = post.replace(/^(\w)/, ' $1');
                                    post = post.replace(/\s*$/, '');
                                    expression.pre = pre;
                                    expression.match = match;
                                    expression.post = post;

                                    // Create expression element
                                    exp_div.append('<p class="lazarus-sentence ' + expression.posneg + '">&ldquo;' + expression.pre + '<a target="pdf; show=select; anchor=' + expression.anchor_id + '"><strong>' + expression.match + '</strong></a>' + expression.post + '&rdquo;</p>');
                                    exp_div.data('expression', expression);

                                    $('#lazarus-expression > .content').append(exp_div);

                                    if (Object.keys(expression.related).length > 0) {
                                        var related_div = $('<div class="expandable" title="Related expressions elsewhere"></div>');
                                        var related_div_content = $('<div></div>').appendTo(related_div);
                                        function on_expand() {
                                            related_div.off('papyro:expandable:expand', on_expand);
                                            $.each(expression.related, function (idx, obj) {
                                                fragments = [];
                                                $.each(obj, function (id, obj) {
                                                    fragments.push(obj.context);
                                                });
                                                fragments.join('\\n');
                                                related_div_content.append($('<div class="lazarus-related unprocessed"></div>').append('<p><strong>&ldquo;&hellip;'+fragments+'&hellip;&rdquo;</strong></p>').hide().data('citation', {identifiers:{doi:idx},userdef:{first_fragment:fragments[0]}}));
                                                // .append(utopia.citation.render({identifiers:{doi:idx},first_fragment:fragments[0]}, true, true))
                                            });
                                            expression.related.length = 0; // empty for future

                                            if ($('.lazarus-related.unprocessed', exp_div).length > 0) {
                                                var more = $('<p class="more right"><a class="more">More related articles...</a></p>');
                                                related_div_content.append(more);
                                                function show_five_related(e) {
                                                    e.preventDefault();

                                                    $('.lazarus-related.unprocessed', exp_div).slice(0, 5).each(function (idx, obj) {
                                                        var citation = $(obj).data('citation');
                                                        $(obj).append(utopia.citation.render(citation, true, true));
                                                        $(obj).show().removeClass('unprocessed');
                                                    });
                                                    if ($('.lazarus-related.unprocessed', exp_div).length == 0) {
                                                        more.remove();
                                                    }
                                                }
                                                more.on('click', show_five_related).click();
                                            }
                                        }
                                        related_div.on('papyro:expandable:expand', on_expand);
                                        exp_div.append(related_div);
                                        utopia.processNewContent(related_div);
                                    }
                                });

                                Spinners.remove(more_expressions_spinner);
                                more_expressions_link.show();
                                $('a.more', more_expressions_link).click();
                            }
                        });

                        function append_five(e) {
                            e.preventDefault();

                            // Show the next five
                            $('#lazarus-expression > .content').children().filter(':hidden').slice(0,5).show();

                            // Hide the 'more' link if everything is now visible
                            if ($('#lazarus-expression > .content').children().filter(':hidden').length == 0) {
                                more_expressions_link.hide();
                            }
                        }

                        // Hook up 'more' link
                        $('#lazarus-expression > p.more > a.more').on('click', append_five).click();
                    });
                </script>
            ''' % (json.dumps(expressions), json.dumps(
                document.fingerprints()), json.dumps(laz_docRelUrl),
                   json.dumps(this_doi))
            #print(js.encode('utf8'))

            html = u'''
                <div id="lazarus-expression"><div class="content"></div><div class="spinner"></div><p class="more"><a class="more">More expressions...</a></p></div>
            '''

            if len(expressions) > 0:
                ann = spineapi.Annotation()
                ann['concept'] = 'Collated'
                ann['property:name'] = 'Lazarus Expressions'
                ann['property:description'] = u'Summarizing expression(s)'
                ann['property:html'] = [js, style, html]
                ann['property:sourceDescription'] = self.sourceDescription
                ann['property:sourceIcon'] = utopia.get_plugin_data_as_url(
                    'images/lazarus-prefs-logo.png', 'image/png')
                document.addAnnotation(ann)

        else:  # no permission
            noprompt = self.get_config('noprompt', False)
            if not noprompt:
                annotation = spineapi.Annotation()
                annotation['concept'] = 'Collated'
                params = {
                    'uuid': self.uuid(),
                }
                annotation['property:html'] = utopia.get_plugin_data(
                    'tpl/denied.html').format(**params)
                annotation['property:name'] = 'Lazarus'
                annotation[
                    'property:description'] = 'Lazarus functionality is turned off'
                annotation[
                    'property:sourceDescription'] = self.sourceDescription
                annotation[
                    'property:sourceIcon'] = utopia.get_plugin_data_as_url(
                        'images/lazarus-prefs-logo.png', 'image/png')
                annotation['session:default'] = '1'
                document.addAnnotation(annotation)
    def _resolve(self, document):
        # Start with evidence from fingerprinting
        evidence = [kend.model.Evidence(type='fingerprint', data=f, srctype='document') for f in document.fingerprints()]

        # Add scraped / resolved metadata
        fields = {}
        for key in ('doi', 'title', 'arxivid', 'pmid', 'pmcid', 'issn', 'pii'):
            value = common.utils.metadata(document, key)
            if value is not None:
                fields[key] = value
        for key, value in fields.iteritems():
            evidence.append(kend.model.Evidence(type=key, data=value, srctype='algorithm', src='utopia/2.2.1'))

        # Add page count
        evidence.append(kend.model.Evidence(type='pagecount', data=document.numberOfPages(), srctype='document'))

        documentref = kend.model.DocumentReference(evidence=evidence)
        documentref = kend.client.Client().documents(documentref)

        try:
            return documentref.id, fields.get('doi')
        except AttributeError:
            pass

        return None, None
    def _resolve(self, document):
        # Start with evidence from fingerprinting
        evidence = [
            kend.model.Evidence(type="fingerprint", data=f, srctype="document") for f in document.fingerprints()
        ]
        documentref = kend.model.DocumentReference(evidence=evidence)
        documentref = kend.client.Client().documents(documentref)

        try:
            return documentref.id
        except AttributeError:
            pass