示例#1
0
    def search_author_publication(self, author_id, show=True, verbose=False):
        #{{{ search author's publications using authid
        #TODO: Verbose mode

        '''
            Search author's publication by author id
            returns a list of dictionaries
        '''
        url = self._search_url_base + 'apikey={}&query=au-id({})&start=0&httpAccept=application/xml'.format(self.apikey, author_id)
        soup = bs(urlopen(url).read(), 'lxml')
        total = float(soup.find('opensearch:totalresults').text)
        print 'A toal number of ', int(total), ' records for author ', author_id
        starts = np.array([i*25 for i in range(int(np.ceil(total/25.)))])

        publication_list = []
        for start in starts:
            search_url = self._search_url_base + 'apikey={}&start={}&query=au-id({})&httpAccept=application/xml'.format(self.apikey, start, author_id)
            results = bs(urlopen(search_url).read(), 'lxml')
            entries = results.find_all('entry')
            for entry in entries:
                publication_list.append(_parse_xml(entry))

        if show:
            #pd.set_printoptions('display.expand_frame_repr', False)
            #print df['title'].to_string(max_rows=10, justify='left')
            df = pd.DataFrame(publication_list)
            titles = np.array(df['title'])
            for i in range(titles.size):
                t = trunc(titles[i])
                print '%d)' %i, t
        # }}}
        return publication_list
示例#2
0
    def search_author_publication(self, author_id, show=True, verbose=False):
        #{{{ search author's publications using authid
        import warnings
        import numpy as np
        import pandas as pd
        from urllib2 import urlopen
        from utils import trunc, _parse_author, _parse_xml
        from bs4 import BeautifulSoup as bs
        #TODO: Verbose mode
        '''
            Search author's publication by author id
        '''
        url = self._search_url_base + 'apikey={}&query=au-id({})&start=0&httpAccept=application/xml'.format(
            self.apikey, author_id)
        soup = bs(urlopen(url).read(), 'lxml')
        total = float(soup.find('opensearch:totalresults').text)
        print 'A toal number of ', int(
            total), ' records for author ', author_id
        starts = np.array([i * 25 for i in range(int(np.ceil(total / 25.)))])

        publication_list = []
        for start in starts:
            search_url = self._search_url_base + 'apikey={}&start={}&query=au-id({})&httpAccept=application/xml'.format(
                self.apikey, start, author_id)
            results = bs(urlopen(search_url).read(), 'lxml')
            entries = results.find_all('entry')
            for entry in entries:
                publication_list.append(_parse_xml(entry))

        df = pd.DataFrame(publication_list)
        if show:
            #pd.set_printoptions('display.expand_frame_repr', False)
            #print df['title'].to_string(max_rows=10, justify='left')
            titles = np.array(df['title'])
            for i in range(titles.size):
                t = trunc(titles[i])
                print i, t
        # }}}
        return df
示例#3
0
    def search(self, query, show=True, verbose=False):
        #{{{
        '''
            Search for documents matching the keywords in query
            Details: http://api.elsevier.com/documentation/SCOPUSSearchAPI.wadl
            Tips: http://api.elsevier.com/documentation/search/SCOPUSSearchTips.htm

            returns a list of document records in the form of dict
        '''

        # parse query dictionary
        url = self._search_url_base +\
            'apikey={}&query={}&start=0&httpAccept=application/xml'.format(self.apikey, quote(query))
        print url


        soup = bs(urlopen(url).read(), 'lxml')
        total = float(soup.find('opensearch:totalresults').text)

        print 'A total number of ', int(total), ' records for the query.'
        starts = np.array([i*25 for i in range(int(np.ceil(total/25.)))])

        doc_list = []
        for start in starts:
            search_url = self._search_url_base + \
                'apikey={}&start={}&query={}&httpAccept=application/xml'.format(self.apikey, start, quote(query))

            results = bs(urlopen(search_url).read(), 'lxml')
            entries = results.find_all('entry')
            for entry in entries:
                doc_list.append(_parse_xml(entry))

        if show:
            df = pd.DataFrame(doc_list)
            print df
        
        # }}}
        return doc_list