def createAnnotation(self, domain, document, html, mentions): annotation = Annotation() annotation['concept'] = 'Bio3DMInformation' annotation['property:name'] = '%s: "%s"' % ( mentions[0]['mentionType'].title(), mentions[0]['formalRepresentation']) annotation['property:description'] = '3DM %s record' % mentions[0][ 'mentionType'].title() annotation['property:sourceDatabase'] = domain annotation['property:html'] = mentions[0]['html'] annotation['property:css'] = mentions[0]['css'] annotation['property:js'] = mentions[0]['js'] annotation['property:sourceDatabase'] = 'bioprodict' annotation[ 'property:sourceDescription'] = '<p><a href="http://www.bio-prodict.nl">Bio-Prodict\'s</a> 3DM information systems provide protein family-specific annotations for this article</p>' annotation['session:overlay'] = 'hyperlink' annotation['session:color'] = '#336611' for mention in mentions: for textRange in mention['textRangeList']: start = int(textRange['start']) end = int(textRange['end']) match = document.substr(start, end - start) annotation.addExtent(match) return annotation
def on_activate_event(self, document, data={}): action = data.get('action') domain = data.get('domain') if self.annotatedDomains is None: self.annotatedDomains = [] if action == 'annotate': print 'starting 3DM annotation . . .' pubmedId = utopia.tools.utils.metadata(document, 'identifiers[pubmed]') if pubmedId is None: pubmedId = '0' print 'sending text to remote server (' + pubmedId + '). . .' textMentions = self.getMentions(domain, document.text()) print 'received response, adding annotations for domain ' + domain + ' . . .' mention_cache = {} for mention in textMentions: if mention['mentionType'] != 'SPECIES' and mention['mentionType'] != 'PDB': html, css, js = self.buildHtml(domain, mention) mention['html'] = html.encode('utf-8') mention['css'] = css.encode('utf-8') mention['js'] = js.encode('utf-8') mention_cache.setdefault(mention['html'], []) mention_cache[mention['html']].append(mention) for html, mentions in mention_cache.iteritems(): annotation = self.createAnnotation(domain, document, html, mentions) annotation['displayRelevance'] = '2000' annotation['displayRank'] = '2000' document.addAnnotation(annotation) document.addAnnotation(Annotation(), domain) print 'done adding annotations.'
def on_ready_event(self, document): username = self.get_config('username') password = self.get_config('password') if self.validUsernameAndPassword(username, password): # Get a new bearer token basic = 'Basic dXRvcGlhLXBsdWdpbjo=' # base64.encodestring('utopia-plugin:').replace('\n', '') data = dict(username=username, password=password, grant_type='password') content = post_for_json(self.tokenurl, basic, data) self.bearer = 'Bearer ' + content['access_token'] self.proteinJs = self.proteinJs.replace('#TOKEN#', self.bearer) self.commonJs = self.commonJs.replace('#TOKEN#', self.bearer) # Get available databases for user databases = post_for_json(self.databasesurl, self.bearer) sorted_databases = sorted(databases.items(), key=lambda item: item[1]) databaseIds = [item[0] for item in sorted_databases] databaseDescriptions = [item[1] for item in sorted_databases] annotation = Annotation() annotation['concept'] = 'Bio3DMInformation' annotation['property:name'] = 'Bio-Prodict 3DM' annotation['property:html'] = 'html' annotation['session:overlay'] = 'hyperlink' annotation['session:color'] = '#336611' annotation['property:description'] = '''Annotate using one of your 3DM systems''' annotation['property:databaseIds'] = '|'.join(databaseIds) annotation['property:databaseDescriptions'] = '|'.join(databaseDescriptions) annotation['property:sourceDatabase'] = 'bioprodict' annotation['property:sourceDescription'] = '<p><a href="http://www.bio-prodict.nl">Bio-Prodict\'s</a> 3DM information systems provide protein family-specific annotations for this article</p>' document.addAnnotation(annotation)
def createAnnotation(self, domain, document, html, mentions): annotation = Annotation() annotation["concept"] = "Bio3DMInformation" annotation["property:name"] = '%s: "%s"' % (mentions[0].mentionType.title(), mentions[0].formalRepresentation) annotation["property:description"] = "3DM %s record" % mentions[0].mentionType.title() annotation["property:sourceDatabase"] = domain annotation["property:html"] = mentions[0].html annotation["property:css"] = mentions[0].css annotation["property:js"] = mentions[0].js annotation["property:sourceDatabase"] = "bioprodict" annotation[ "property:sourceDescription" ] = '<p><a href="http://www.bio-prodict.nl">Bio-Prodict\'s</a> 3DM information systems provide protein family-specific annotations for this article</p>' for mention in mentions: for textRange in mention.textRangeList: start = int(textRange.start) end = int(textRange.end) match = document.substr(start, end - start) annotation.addExtent(match) return annotation
def on_activate_event(self, document): ns = {'r': 'Reflect'} maxTextFragmentSize = 1000000 textFragments = [] seenItemNames = set() ignoredEntityTypes = [-11] # Retrieve the full text of the document, split into fragments for page in document.pages(): pageText = re.sub(r'\s+', r' ', page.pageText()) if len(textFragments) == 0 or len(textFragments[-1][0]) + len( pageText) > maxTextFragmentSize: textFragments.append([pageText, page]) else: textFragments[-1][0] = textFragments[-1][0] + ' ' + pageText for text, page in textFragments: # Package it as URL encoded form encoding payload = 'document=%s' % urllib.quote(text.encode('utf8')) # Send it off to the reflect server response = urllib2.urlopen("http://reflect.ws/REST/GetEntities", payload, timeout=8) # Parse response root = etree.fromstring(response.read(), self.parser) reflections = {} annotations = {} for item in root.xpath('//r:item', namespaces=ns): itemName = etree.tostring(item.find('{%s}name' % ns['r']), method="text", encoding=unicode, with_tail=False).lower().strip() if itemName not in seenItemNames: for entity in item.xpath('.//r:entity', namespaces=ns): entityType = entity.findtext('{%s}type' % ns['r']) if entityType is not None: entityType = int(entityType) if entityType not in ignoredEntityTypes: entityIdentifier = entity.findtext( '{%s}identifier' % ns['r']) if itemName not in reflections: reflections[itemName] = set() reflections[itemName].add( (entityType, entityIdentifier)) # For each match, create an annotation that the UI will handle later regex = '(%s)' % '|'.join( [re.escape(key) for key in reflections.iterkeys()]) matches = document.search(regex, IgnoreCase + WholeWordsOnly + RegExp, start=page) for match in matches: if match.begin().wordArea()[1] == 0: itemName = match.text().lower().strip() annotation = annotations.get(itemName, None) if annotation is None and itemName in reflections: annotation = Annotation() annotation['concept'] = 'Reflection' annotation['property:webpageUrl'] = \ 'http://reflect.ws/fcgi-bin/solveAmbig.fcgi?entities=%s' % \ ';'.join(['%d.%s' % (t, id) for (t, id) in reflections[itemName]]) annotation['property:name'] = itemName annotation['session:overlay'] = 'hyperlink' annotation['session:color'] = '#0A0' annotations[itemName] = annotation seenItemNames.add(itemName) if annotation is not None: annotation.addExtent(match) else: print "ERROR: matched '%s' but could not find in reflections map" % itemName.encode( 'utf8') print reflections.keys() document.addAnnotations(annotations.values())
def on_activate_event(self, document): ns = {'r': 'Reflect'} maxTextFragmentSize = 1000000 textFragments = [] seenItemNames = set() ignoredEntityTypes = [-11] # Retrieve the full text of the document, split into fragments for page in document.pages(): pageText = re.sub(r'\s+', r' ', page.pageText()) if len(textFragments) == 0 or len(textFragments[-1][0]) + len(pageText) > maxTextFragmentSize: textFragments.append([pageText, page]) else: textFragments[-1][0] = textFragments[-1][0] + ' ' + pageText for text, page in textFragments: # Package it as URL encoded form encoding payload = 'document=%s' % urllib.quote(text.encode('utf8')) # Send it off to the reflect server response = urllib2.urlopen("http://reflect.ws/REST/GetEntities", payload, timeout=8) # Parse response root = etree.fromstring(response.read(), self.parser) reflections = {} annotations = {} for item in root.xpath('//r:item', namespaces = ns): itemName = item.findtext('{%s}name' % ns['r']).lower().strip() if itemName not in seenItemNames: for entity in item.xpath('.//r:entity', namespaces = ns): entityType = entity.findtext('{%s}type' % ns['r']) if entityType is not None: entityType = int(entityType) if entityType not in ignoredEntityTypes: entityIdentifier = entity.findtext('{%s}identifier' % ns['r']) if itemName not in reflections: reflections[itemName] = set() reflections[itemName].add((entityType, entityIdentifier)) # For each match, create an annotation that the UI will handle later regex = '(%s)' % '|'.join([re.escape(key) for key in reflections.iterkeys()]) matches = document.search(regex, IgnoreCase + WholeWordsOnly + RegExp, start = page) for match in matches: if match.begin().wordArea()[1] == 0: itemName = match.text().lower().strip() annotation = annotations.get(itemName, None) if annotation is None and itemName in reflections: annotation = Annotation() annotation['concept'] = 'Reflection' annotation['property:webpageUrl'] = \ 'http://reflect.ws/fcgi-bin/solveAmbig.fcgi?entities=%s' % \ ';'.join(['%d.%s' % (t, id) for (t, id) in reflections[itemName]]) annotation['property:name'] = itemName annotation['session:overlay'] = 'hyperlink' annotation['session:color'] = '#0A0' annotations[itemName] = annotation seenItemNames.add(itemName) if annotation is not None: annotation.addExtent(match) else: print "ERROR: matched '%s' but could not find in reflections map" % itemName.encode('utf8') print reflections.keys() document.addAnnotations(annotations.values())