def test_ScraperAPI(self): proxy_generator = ProxyGenerator() proxy_generator.ScraperAPI(os.getenv('SCRAPER_API_KEY')) scholarly.set_timeout(60) ## Uses another method to test that proxy is working. self.test_search_keyword()
def search(self, query: str, generic_cover: str = "", locale: str = "en") -> Optional[List[MetaRecord]]: val = list() if self.active: title_tokens = list( self.get_title_tokens(query, strip_joiners=False)) if title_tokens: tokens = [quote(t.encode("utf-8")) for t in title_tokens] query = " ".join(tokens) try: scholarly.set_timeout(20) scholarly.set_retries(2) scholar_gen = itertools.islice(scholarly.search_pubs(query), 10) except Exception as e: log.warning(e) return None for result in scholar_gen: match = self._parse_search_result(result=result, generic_cover="", locale=locale) val.append(match) return val
from scholarly import scholarly, ProxyGenerator import json from dotenv import load_dotenv from pathlib import Path import os env_path = Path('../') / '.env' load_dotenv(dotenv_path=env_path) SCRAPER = os.getenv("SCRAPER") proxy_generator = ProxyGenerator() proxy_generator.ScraperAPI(SCRAPER) scholarly.set_timeout(60) scholarly.use_proxy(proxy_generator) search_query = scholarly.search_author('Maël Montévil') author = scholarly.fill(next(search_query)) pubs = [ scholarly.fill(pub) for pub in author['publications'] if (pub['num_citations'] > 0) ] pubs2 = [[pub, (list(scholarly.citedby(pub)))] for pub in pubs if 'citedby_url' in pub] print(json.dumps(pubs2, indent=2, default=lambda o: '<not serializable>'))
class ScraperAPI(ProxyGenerator): def __init__(self, api_key): self._api_key = api_key self._client = ScraperAPIClient(api_key) assert api_key is not None super(ScraperAPI, self).__init__() self._TIMEOUT = 120 self._session = self._client self._session.proxies = {} def _new_session(self): self.got_403 = False return self._session def _close_session(self): pass # no need to close the ScraperAPI client pg = ScraperAPI(SCRAPER) scholarly.use_proxy(pg) scholarly.set_timeout(120) search_query = scholarly.search_author('Maël Montévil') author = scholarly.fill(next(search_query)) pub = scholarly.fill(author['publications'][16]) print(pub) print(list(scholarly.citedby(pub)))