def parse_response(self, response): """ Parse the arXiv response, which is in Atom XML format. The feed provides itself provides more-or-less all the information required without needing any extra requests. """ papers = [] try: parsed = feedparser.parse(response) except Exception as ex: log_error("arxiv: error while parsing response: %s" % ex[0]) return papers log_debug("arxiv: received response containing %d results" % len(parsed.entries)) for entry in parsed.entries: paper = {} try: paper['title'] = entry['title'] for link in entry['links']: if link.get('title', None) == 'pdf': paper['import_url'] = link['href'] break paper['authors'] = [a['name'] for a in entry['authors']] if 'arxiv_journal_ref' in entry: paper['journal'] = entry['arxiv_journal_ref'] if 'arxiv_doi' in entry: paper['doi'] = entry['arxiv_doi'] if 'arxiv_comment' in entry: paper['notes'] = entry['arxiv_comment'] paper['year'] = entry['published_parsed'].tm_year paper['arxiv_id'] = entry['id'] paper['url'] = entry['id'] paper['abstract'] = entry['summary'].replace('\n', ' ') if 'arxiv_primary_category' in entry: paper['arxiv_type'] = entry['arxiv_primary_category'].get( 'term', '') paper['created'] = datetime.datetime( year=entry['published_parsed'].tm_year, month=entry['published_parsed'].tm_mon, day=entry['published_parsed'].tm_mday) paper['updated'] = datetime.datetime( year=entry['updated_parsed'].tm_year, month=entry['updated_parsed'].tm_mon, day=entry['updated_parsed'].tm_mday) paper['data'] = paper #messy papers += [paper] except Exception as ex: log_error("arxiv: error while reading item: %s" % ex[0]) return papers
def _got_bibtex(self, message, callback, user_data): if message.status_code == Soup.KnownStatusCode.OK: bibtex_data = message.response_body.flatten().get_data() log_debug('Received BibTeX data:\n%s' % bibtex_data) paper_info = paper_info_from_bibtex(bibtex_data) else: log_error('google scholar got status code %d' % message.status_code) paper_info = None callback(paper_info, None, user_data)
def parse_response(self, response): """ Parse the arXiv response, which is in Atom XML format. The feed provides itself provides more-or-less all the information required without needing any extra requests. """ papers = [] try: parsed = feedparser.parse(response) except Exception as ex: log_error("arxiv: error while parsing response: %s" % ex[0]) return papers log_debug("arxiv: received response containing %d results" % len(parsed.entries)) for entry in parsed.entries: paper = {} try: paper['title'] = entry['title'] for link in entry['links']: if link.get('title', None) == 'pdf': paper['import_url'] = link['href'] break paper['authors'] = [a['name'] for a in entry['authors']] if 'arxiv_journal_ref' in entry: paper['journal'] = entry['arxiv_journal_ref'] if 'arxiv_doi' in entry: paper['doi'] = entry['arxiv_doi'] if 'arxiv_comment' in entry: paper['notes'] = entry['arxiv_comment'] paper['year'] = entry['published_parsed'].tm_year paper['arxiv_id'] = entry['id'] paper['url'] = entry['id'] paper['abstract'] = entry['summary'].replace('\n', ' ') if 'arxiv_primary_category' in entry: paper['arxiv_type'] = entry['arxiv_primary_category'].get('term', '') paper['created'] = datetime.datetime(year=entry['published_parsed'].tm_year, month=entry['published_parsed'].tm_mon, day=entry['published_parsed'].tm_mday) paper['updated'] = datetime.datetime(year=entry['updated_parsed'].tm_year, month=entry['updated_parsed'].tm_mon, day=entry['updated_parsed'].tm_mday) paper['data'] = paper #messy papers += [paper] except Exception as ex: log_error("arxiv: error while reading item: %s" % ex[0]) return papers
def open(self): if self.full_text and os.path.isfile(self.full_text.path): uri = 'file://' + self.full_text.path if Gtk.show_uri(None, uri, Gdk.CURRENT_TIME): self.read_count = self.read_count + 1 # temporary disable receivers getting notified by post_save receivers = post_save.receivers post_save.receivers = [] self.save() post_save.receivers = receivers else: log_error('Failed to open %s' % uri)
def _paper_info_received(self, message, callback, user_data): if not message.status_code == Soup.KnownStatusCode.OK: log_error('Pubmed replied with error code %d for paper_info with id: %s' % \ (message.status_code, user_data[1])) paper_info = None else: parsed_response = BeautifulStoneSoup(message.response_body.data) paper_info = {} # Journal try: journal = parsed_response.findAll('journal')[0] paper_info['journal'] = journal.findAll('title')[0].text try: paper_info['issue'] = journal.findAll('issue')[0].text except: pass paper_info['pages'] = parsed_response.findAll('medlinepgn')[0].text log_debug('Pages: %s' % paper_info['pages']) except Exception as ex: pass # Publication date try: articledate = parsed_response.findAll('articledate')[0] paper_info['year'] = articledate.year.text except: pass # Title and abstract try: paper_info['title'] = parsed_response.findAll('articletitle')[0].text log_debug('Title: %s' % paper_info['title']) paper_info['abstract'] = parsed_response.findAll('abstracttext')[0].text log_debug('Abstract: %s' % paper_info['abstract']) except Exception as ex: pass # Authors try: all_authors = [] authors = parsed_response.findAll('author') for author in authors: author_name = author.forename.text + ' ' + \ author.lastname.text log_debug('\tAuthor: %s' % author_name) all_authors.append(author_name) if all_authors: paper_info['authors'] = all_authors except Exception as ex: pass # URL + IDs try: articleids = parsed_response.findAll('articleid') for articleid in articleids: if articleid['idtype'] == 'doi': paper_info['doi'] = articleid.text elif articleid['idtype'] == 'pubmed': paper_info['pubmed_id'] = articleid.text except: pass callback(paper_info=paper_info, user_data=user_data)
def _paper_info_received(self, message, callback, user_data): if not message.status_code == Soup.KnownStatusCode.OK: log_error('Pubmed replied with error code %d for paper_info with id: %s' % \ (message.status_code, user_data[1])) paper_info = None else: parsed_response = BeautifulStoneSoup(message.response_body.data) paper_info = {} # Journal try: journal = parsed_response.findAll('journal')[0] paper_info['journal'] = journal.findAll('title')[0].text try: paper_info['issue'] = journal.findAll('issue')[0].text except: pass paper_info['pages'] = parsed_response.findAll( 'medlinepgn')[0].text log_debug('Pages: %s' % paper_info['pages']) except Exception as ex: pass # Publication date try: articledate = parsed_response.findAll('articledate')[0] paper_info['year'] = articledate.year.text except: pass # Title and abstract try: paper_info['title'] = parsed_response.findAll( 'articletitle')[0].text log_debug('Title: %s' % paper_info['title']) paper_info['abstract'] = parsed_response.findAll( 'abstracttext')[0].text log_debug('Abstract: %s' % paper_info['abstract']) except Exception as ex: pass # Authors try: all_authors = [] authors = parsed_response.findAll('author') for author in authors: author_name = author.forename.text + ' ' + \ author.lastname.text log_debug('\tAuthor: %s' % author_name) all_authors.append(author_name) if all_authors: paper_info['authors'] = all_authors except Exception as ex: pass # URL + IDs try: articleids = parsed_response.findAll('articleid') for articleid in articleids: if articleid['idtype'] == 'doi': paper_info['doi'] = articleid.text elif articleid['idtype'] == 'pubmed': paper_info['pubmed_id'] = articleid.text except: pass callback(paper_info=paper_info, user_data=user_data)