def createAnnotation(self, domain, document, html, mentions):
        annotation = Annotation()
        annotation['concept'] = 'Bio3DMInformation'
        annotation['property:name'] = '%s: "%s"' % (
            mentions[0]['mentionType'].title(),
            mentions[0]['formalRepresentation'])
        annotation['property:description'] = '3DM %s record' % mentions[0][
            'mentionType'].title()
        annotation['property:sourceDatabase'] = domain
        annotation['property:html'] = mentions[0]['html']
        annotation['property:css'] = mentions[0]['css']
        annotation['property:js'] = mentions[0]['js']
        annotation['property:sourceDatabase'] = 'bioprodict'
        annotation[
            'property:sourceDescription'] = '<p><a href="http://www.bio-prodict.nl">Bio-Prodict\'s</a> 3DM information systems provide protein family-specific annotations for this article</p>'
        annotation['session:overlay'] = 'hyperlink'
        annotation['session:color'] = '#336611'

        for mention in mentions:
            for textRange in mention['textRangeList']:
                start = int(textRange['start'])
                end = int(textRange['end'])
                match = document.substr(start, end - start)
                annotation.addExtent(match)

        return annotation
示例#2
0
    def on_activate_event(self, document, data={}):
        action = data.get('action')
        domain = data.get('domain')

        if self.annotatedDomains is None:
            self.annotatedDomains = []

        if action == 'annotate':
            print 'starting 3DM annotation . . .'
            pubmedId = utopia.tools.utils.metadata(document, 'identifiers[pubmed]')
            if pubmedId is None:
                pubmedId = '0'
            print 'sending text to remote server (' + pubmedId + '). . .'
            textMentions = self.getMentions(domain, document.text())
            print 'received response, adding annotations for domain ' + domain + ' . . .'
            mention_cache = {}
            for mention in textMentions:
                if mention['mentionType'] != 'SPECIES' and mention['mentionType'] != 'PDB':
                    html, css, js = self.buildHtml(domain, mention)
                    mention['html'] = html.encode('utf-8')
                    mention['css'] = css.encode('utf-8')
                    mention['js'] = js.encode('utf-8')
                    mention_cache.setdefault(mention['html'], [])
                    mention_cache[mention['html']].append(mention)

            for html, mentions in mention_cache.iteritems():
                annotation = self.createAnnotation(domain, document, html, mentions)
                annotation['displayRelevance'] = '2000'
                annotation['displayRank'] = '2000'
                document.addAnnotation(annotation)

            document.addAnnotation(Annotation(), domain)
            print 'done adding annotations.'
示例#3
0
    def on_ready_event(self, document):
        username = self.get_config('username')
        password = self.get_config('password')

        if self.validUsernameAndPassword(username, password):
            # Get a new bearer token
            basic = 'Basic dXRvcGlhLXBsdWdpbjo='  # base64.encodestring('utopia-plugin:').replace('\n', '')
            data = dict(username=username, password=password, grant_type='password')
            content = post_for_json(self.tokenurl, basic, data)
            self.bearer = 'Bearer ' + content['access_token']
            self.proteinJs = self.proteinJs.replace('#TOKEN#', self.bearer)
            self.commonJs = self.commonJs.replace('#TOKEN#', self.bearer)

            # Get available databases for user
            databases = post_for_json(self.databasesurl, self.bearer)
            sorted_databases = sorted(databases.items(), key=lambda item: item[1])
            databaseIds = [item[0] for item in sorted_databases]
            databaseDescriptions = [item[1] for item in sorted_databases]

            annotation = Annotation()
            annotation['concept'] = 'Bio3DMInformation'
            annotation['property:name'] = 'Bio-Prodict 3DM'
            annotation['property:html'] = 'html'
            annotation['session:overlay'] = 'hyperlink'
            annotation['session:color'] = '#336611'
            annotation['property:description'] = '''Annotate using one of your 3DM systems'''
            annotation['property:databaseIds'] = '|'.join(databaseIds)
            annotation['property:databaseDescriptions'] = '|'.join(databaseDescriptions)
            annotation['property:sourceDatabase'] = 'bioprodict'
            annotation['property:sourceDescription'] = '<p><a href="http://www.bio-prodict.nl">Bio-Prodict\'s</a> 3DM information systems provide protein family-specific annotations for this article</p>'

            document.addAnnotation(annotation)
    def createAnnotation(self, domain, document, html, mentions):
        annotation = Annotation()
        annotation["concept"] = "Bio3DMInformation"
        annotation["property:name"] = '%s: "%s"' % (mentions[0].mentionType.title(), mentions[0].formalRepresentation)
        annotation["property:description"] = "3DM %s record" % mentions[0].mentionType.title()
        annotation["property:sourceDatabase"] = domain
        annotation["property:html"] = mentions[0].html
        annotation["property:css"] = mentions[0].css
        annotation["property:js"] = mentions[0].js
        annotation["property:sourceDatabase"] = "bioprodict"
        annotation[
            "property:sourceDescription"
        ] = '<p><a href="http://www.bio-prodict.nl">Bio-Prodict\'s</a> 3DM information systems provide protein family-specific annotations for this article</p>'

        for mention in mentions:
            for textRange in mention.textRangeList:
                start = int(textRange.start)
                end = int(textRange.end)
                match = document.substr(start, end - start)
                annotation.addExtent(match)

        return annotation
    def on_activate_event(self, document):
        ns = {'r': 'Reflect'}

        maxTextFragmentSize = 1000000
        textFragments = []
        seenItemNames = set()
        ignoredEntityTypes = [-11]

        # Retrieve the full text of the document, split into fragments
        for page in document.pages():
            pageText = re.sub(r'\s+', r' ', page.pageText())
            if len(textFragments) == 0 or len(textFragments[-1][0]) + len(
                    pageText) > maxTextFragmentSize:
                textFragments.append([pageText, page])
            else:
                textFragments[-1][0] = textFragments[-1][0] + ' ' + pageText

        for text, page in textFragments:
            # Package it as URL encoded form encoding
            payload = 'document=%s' % urllib.quote(text.encode('utf8'))
            # Send it off to the reflect server
            response = urllib2.urlopen("http://reflect.ws/REST/GetEntities",
                                       payload,
                                       timeout=8)
            # Parse response
            root = etree.fromstring(response.read(), self.parser)

            reflections = {}
            annotations = {}

            for item in root.xpath('//r:item', namespaces=ns):
                itemName = etree.tostring(item.find('{%s}name' % ns['r']),
                                          method="text",
                                          encoding=unicode,
                                          with_tail=False).lower().strip()
                if itemName not in seenItemNames:
                    for entity in item.xpath('.//r:entity', namespaces=ns):
                        entityType = entity.findtext('{%s}type' % ns['r'])
                        if entityType is not None:
                            entityType = int(entityType)
                        if entityType not in ignoredEntityTypes:
                            entityIdentifier = entity.findtext(
                                '{%s}identifier' % ns['r'])
                            if itemName not in reflections:
                                reflections[itemName] = set()
                            reflections[itemName].add(
                                (entityType, entityIdentifier))

            # For each match, create an annotation that the UI will handle later
            regex = '(%s)' % '|'.join(
                [re.escape(key) for key in reflections.iterkeys()])
            matches = document.search(regex,
                                      IgnoreCase + WholeWordsOnly + RegExp,
                                      start=page)
            for match in matches:
                if match.begin().wordArea()[1] == 0:
                    itemName = match.text().lower().strip()
                    annotation = annotations.get(itemName, None)
                    if annotation is None and itemName in reflections:
                        annotation = Annotation()
                        annotation['concept'] = 'Reflection'
                        annotation['property:webpageUrl'] = \
                            'http://reflect.ws/fcgi-bin/solveAmbig.fcgi?entities=%s' % \
                            ';'.join(['%d.%s' % (t, id) for (t, id) in reflections[itemName]])
                        annotation['property:name'] = itemName
                        annotation['session:overlay'] = 'hyperlink'
                        annotation['session:color'] = '#0A0'
                        annotations[itemName] = annotation
                        seenItemNames.add(itemName)
                    if annotation is not None:
                        annotation.addExtent(match)
                    else:
                        print "ERROR: matched '%s' but could not find in reflections map" % itemName.encode(
                            'utf8')
                        print reflections.keys()

            document.addAnnotations(annotations.values())
    def on_activate_event(self, document):
        ns = {'r': 'Reflect'}

        maxTextFragmentSize = 1000000
        textFragments = []
        seenItemNames = set()
        ignoredEntityTypes = [-11]

        # Retrieve the full text of the document, split into fragments
        for page in document.pages():
            pageText = re.sub(r'\s+', r' ', page.pageText())
            if len(textFragments) == 0 or len(textFragments[-1][0]) + len(pageText) > maxTextFragmentSize:
                textFragments.append([pageText, page])
            else:
                textFragments[-1][0] = textFragments[-1][0] + ' ' + pageText

        for text, page in textFragments:
            # Package it as URL encoded form encoding
            payload = 'document=%s' % urllib.quote(text.encode('utf8'))
            # Send it off to the reflect server
            response = urllib2.urlopen("http://reflect.ws/REST/GetEntities", payload, timeout=8)
            # Parse response
            root = etree.fromstring(response.read(), self.parser)

            reflections = {}
            annotations = {}

            for item in root.xpath('//r:item', namespaces = ns):
                itemName = item.findtext('{%s}name' % ns['r']).lower().strip()
                if itemName not in seenItemNames:
                    for entity in item.xpath('.//r:entity', namespaces = ns):
                        entityType = entity.findtext('{%s}type' % ns['r'])
                        if entityType is not None:
                            entityType = int(entityType)
                        if entityType not in ignoredEntityTypes:
                            entityIdentifier = entity.findtext('{%s}identifier' % ns['r'])
                            if itemName not in reflections:
                                reflections[itemName] = set()
                            reflections[itemName].add((entityType, entityIdentifier))

            # For each match, create an annotation that the UI will handle later
            regex = '(%s)' % '|'.join([re.escape(key) for key in reflections.iterkeys()])
            matches = document.search(regex, IgnoreCase + WholeWordsOnly + RegExp, start = page)
            for match in matches:
                if match.begin().wordArea()[1] == 0:
                    itemName = match.text().lower().strip()
                    annotation = annotations.get(itemName, None)
                    if annotation is None and itemName in reflections:
                        annotation = Annotation()
                        annotation['concept'] = 'Reflection'
                        annotation['property:webpageUrl'] = \
                            'http://reflect.ws/fcgi-bin/solveAmbig.fcgi?entities=%s' % \
                            ';'.join(['%d.%s' % (t, id) for (t, id) in reflections[itemName]])
                        annotation['property:name'] = itemName
                        annotation['session:overlay'] = 'hyperlink'
                        annotation['session:color'] = '#0A0'
                        annotations[itemName] = annotation
                        seenItemNames.add(itemName)
                    if annotation is not None:
                        annotation.addExtent(match)
                    else:
                        print "ERROR: matched '%s' but could not find in reflections map" % itemName.encode('utf8')
                        print reflections.keys()

            document.addAnnotations(annotations.values())