def search_author_publication(self, author_id, show=True, verbose=False): #{{{ search author's publications using authid #TODO: Verbose mode ''' Search author's publication by author id returns a list of dictionaries ''' url = self._search_url_base + 'apikey={}&query=au-id({})&start=0&httpAccept=application/xml'.format(self.apikey, author_id) soup = bs(urlopen(url).read(), 'lxml') total = float(soup.find('opensearch:totalresults').text) print 'A toal number of ', int(total), ' records for author ', author_id starts = np.array([i*25 for i in range(int(np.ceil(total/25.)))]) publication_list = [] for start in starts: search_url = self._search_url_base + 'apikey={}&start={}&query=au-id({})&httpAccept=application/xml'.format(self.apikey, start, author_id) results = bs(urlopen(search_url).read(), 'lxml') entries = results.find_all('entry') for entry in entries: publication_list.append(_parse_xml(entry)) if show: #pd.set_printoptions('display.expand_frame_repr', False) #print df['title'].to_string(max_rows=10, justify='left') df = pd.DataFrame(publication_list) titles = np.array(df['title']) for i in range(titles.size): t = trunc(titles[i]) print '%d)' %i, t # }}} return publication_list
def search_author_publication(self, author_id, show=True, verbose=False): #{{{ search author's publications using authid import warnings import numpy as np import pandas as pd from urllib2 import urlopen from utils import trunc, _parse_author, _parse_xml from bs4 import BeautifulSoup as bs #TODO: Verbose mode ''' Search author's publication by author id ''' url = self._search_url_base + 'apikey={}&query=au-id({})&start=0&httpAccept=application/xml'.format( self.apikey, author_id) soup = bs(urlopen(url).read(), 'lxml') total = float(soup.find('opensearch:totalresults').text) print 'A toal number of ', int( total), ' records for author ', author_id starts = np.array([i * 25 for i in range(int(np.ceil(total / 25.)))]) publication_list = [] for start in starts: search_url = self._search_url_base + 'apikey={}&start={}&query=au-id({})&httpAccept=application/xml'.format( self.apikey, start, author_id) results = bs(urlopen(search_url).read(), 'lxml') entries = results.find_all('entry') for entry in entries: publication_list.append(_parse_xml(entry)) df = pd.DataFrame(publication_list) if show: #pd.set_printoptions('display.expand_frame_repr', False) #print df['title'].to_string(max_rows=10, justify='left') titles = np.array(df['title']) for i in range(titles.size): t = trunc(titles[i]) print i, t # }}} return df
def search(self, query, show=True, verbose=False): #{{{ ''' Search for documents matching the keywords in query Details: http://api.elsevier.com/documentation/SCOPUSSearchAPI.wadl Tips: http://api.elsevier.com/documentation/search/SCOPUSSearchTips.htm returns a list of document records in the form of dict ''' # parse query dictionary url = self._search_url_base +\ 'apikey={}&query={}&start=0&httpAccept=application/xml'.format(self.apikey, quote(query)) print url soup = bs(urlopen(url).read(), 'lxml') total = float(soup.find('opensearch:totalresults').text) print 'A total number of ', int(total), ' records for the query.' starts = np.array([i*25 for i in range(int(np.ceil(total/25.)))]) doc_list = [] for start in starts: search_url = self._search_url_base + \ 'apikey={}&start={}&query={}&httpAccept=application/xml'.format(self.apikey, start, quote(query)) results = bs(urlopen(search_url).read(), 'lxml') entries = results.find_all('entry') for entry in entries: doc_list.append(_parse_xml(entry)) if show: df = pd.DataFrame(doc_list) print df # }}} return doc_list