示例#1
0
    def test_ScraperAPI(self):
        proxy_generator = ProxyGenerator()
        proxy_generator.ScraperAPI(os.getenv('SCRAPER_API_KEY'))
        scholarly.set_timeout(60)

        ## Uses another method to test that proxy is working.
        self.test_search_keyword()
示例#2
0
 def search(self,
            query: str,
            generic_cover: str = "",
            locale: str = "en") -> Optional[List[MetaRecord]]:
     val = list()
     if self.active:
         title_tokens = list(
             self.get_title_tokens(query, strip_joiners=False))
         if title_tokens:
             tokens = [quote(t.encode("utf-8")) for t in title_tokens]
             query = " ".join(tokens)
         try:
             scholarly.set_timeout(20)
             scholarly.set_retries(2)
             scholar_gen = itertools.islice(scholarly.search_pubs(query),
                                            10)
         except Exception as e:
             log.warning(e)
             return None
         for result in scholar_gen:
             match = self._parse_search_result(result=result,
                                               generic_cover="",
                                               locale=locale)
             val.append(match)
     return val
示例#3
0
from scholarly import scholarly, ProxyGenerator
import json
from dotenv import load_dotenv
from pathlib import Path
import os
env_path = Path('../') / '.env'
load_dotenv(dotenv_path=env_path)
SCRAPER = os.getenv("SCRAPER")

proxy_generator = ProxyGenerator()
proxy_generator.ScraperAPI(SCRAPER)
scholarly.set_timeout(60)
scholarly.use_proxy(proxy_generator)

search_query = scholarly.search_author('Maël Montévil')

author = scholarly.fill(next(search_query))

pubs = [
    scholarly.fill(pub) for pub in author['publications']
    if (pub['num_citations'] > 0)
]

pubs2 = [[pub, (list(scholarly.citedby(pub)))] for pub in pubs
         if 'citedby_url' in pub]

print(json.dumps(pubs2, indent=2, default=lambda o: '<not serializable>'))
示例#4
0
class ScraperAPI(ProxyGenerator):
    def __init__(self, api_key):
        self._api_key = api_key
        self._client = ScraperAPIClient(api_key)

        assert api_key is not None

        super(ScraperAPI, self).__init__()

        self._TIMEOUT = 120
        self._session = self._client
        self._session.proxies = {}

    def _new_session(self):
        self.got_403 = False
        return self._session

    def _close_session(self):
        pass  # no need to close the ScraperAPI client


pg = ScraperAPI(SCRAPER)
scholarly.use_proxy(pg)
scholarly.set_timeout(120)

search_query = scholarly.search_author('Maël Montévil')
author = scholarly.fill(next(search_query))
pub = scholarly.fill(author['publications'][16])
print(pub)
print(list(scholarly.citedby(pub)))