def setUp(self): proxy_generator = ProxyGenerator() if "CONNECTION_METHOD" in scholarly.env: self.connection_method = os.getenv("CONNECTION_METHOD") else: self.connection_method = "none" if self.connection_method == "tor": tor_sock_port = None tor_control_port = None tor_password = "******" # Tor uses the 9050 port as the default socks port # on windows 9150 for socks and 9151 for control if sys.platform.startswith("linux") or sys.platform.startswith("darwin"): tor_sock_port = 9050 tor_control_port = 9051 elif sys.platform.startswith("win"): tor_sock_port = 9150 tor_control_port = 9151 proxy_generator.Tor_External(tor_sock_port,tor_control_port,tor_password) scholarly.use_proxy(proxy_generator) elif self.connection_method == "tor_internal": if sys.platform.startswith("linux"): tor_cmd = 'tor' elif sys.platform.startswith("win"): tor_cmd = 'tor.exe' proxy_generator.Tor_Internal(tor_cmd = tor_cmd) scholarly.use_proxy(proxy_generator) elif self.connection_method == "luminati": scholarly.set_retries(10) proxy_generator.Luminati(usr=os.getenv("USERNAME"),passwd=os.getenv("PASSWORD"),proxy_port = os.getenv("PORT")) scholarly.use_proxy(proxy_generator) elif self.connection_method == "freeproxy": proxy_generator.FreeProxies() scholarly.use_proxy(proxy_generator) else: scholarly.use_proxy(None)
from futurewater.util import format_author MAX_RETRIES_ON_ERROR = 3 # https://scholarly.readthedocs.io/en/latest/quickstart.html#installation # https://github.com/scholarly-python-package/scholarly # https://github.com/OpenAPC/openapc-de/blob/master/python/import_dois.py logging.getLogger("requests").setLevel(logging.WARNING) logging.getLogger("urllib3").setLevel(logging.WARNING) logger = logging.getLogger() pg = ProxyGenerator() pg.Tor_External(tor_sock_port=9050, tor_control_port=9051, tor_password="******") scholarly.use_proxy(pg) def get_schoolar_data(author_name, cache_folder="scholarly", affiliation='UBC'): output_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "resources", cache_folder) cached = os.path.join(output_folder, format_author(author_name)) from_cache = False final_data = [] if not os.path.isfile(cached): try:
#!env python from scholarly import scholarly from scholarly import ProxyGenerator import fileinput import sys pg = ProxyGenerator() pg.Tor_External(9050, 9051, 'password') scholarly.use_proxy(pg) for a in fileinput.input(): if a == "": continue try: search_query = scholarly.search_pubs(a) aa = next(search_query).fill() print(a.rstrip(), end='') bib = aa.bib print("," + str(bib['gsrank']), end='') print("," + str(bib['cites']), end='') print("") except: print(" --- Unexpected error (" + a + "): ", sys.exc_info()[0]) pass