def get_by_eid(self, eid): """Return the h-index of an author by a EID if found, None otherwise. """ au = AuthorSearch('AU-ID({})'.format(eid)) if au.get_results_size() == 0: return None assert au.get_results_size() == 1 res = AuthorRetrieval(au.authors[0][0]) return res.h_index
def get_by_name(self, first, last): """Return a table of EID, affiliation, town, country otherwise.""" au = AuthorSearch("AUTHLAST({}) and AUTHFIRST({})".format(last, first)) if au.get_results_size() == 0: return None df = pd.DataFrame(au.authors) ret = [] for x in zip(df["eid"], df["affiliation"], df["city"], df["country"]): tokens = x[0].split("-") ret.append([tokens[-1], x[1], x[2], x[3]]) return ret
def test_authors_in_cache(): create_cache(drop=True, file=test_cache) # Variables expected_auth = ["53164702100", "57197093438"] search_auth = ["55317901900"] # Test empty cache df1 = pd.DataFrame(expected_auth, columns=["auth_id"], dtype="int64") incache, tosearch = authors_in_cache(df1, file=test_cache) expected_cols = ['auth_id', 'eid', 'surname', 'initials', 'givenname', 'affiliation', 'documents', 'affiliation_id', 'city', 'country', 'areas'] expected_auth = [int(au) for au in expected_auth] assert_equal(tosearch, expected_auth) assert_equal(len(incache), 0) assert_equal(incache.columns.tolist(), expected_cols) # Test partial retrieval q = "AU-ID({})".format(') OR AU-ID('.join([str(a) for a in expected_auth])) res = pd.DataFrame(AuthorSearch(q).authors, dtype="int64") res["auth_id"] = res["eid"].str.split("-").str[-1] res = res[expected_cols] cache_insert(res, table="authors", file=test_cache) df2 = pd.DataFrame(expected_auth + search_auth, columns=["auth_id"], dtype="int64") incache, tosearch = authors_in_cache(df2, file=test_cache) assert_equal(tosearch, [55317901900]) assert_equal(len(incache), 2) # Test full retrieval incache, tosearch = authors_in_cache(df1, file=test_cache) assert_equal(tosearch, []) assert_equal(len(incache), 2)
def create_obj(params): if q_type == "author": return AuthorSearch(**params) elif q_type == "docs": params["integrity_fields"] = fields params["view"] = view return ScopusSearch(**params)
def get_by_name(self, first, last): """Return the h-index of an author if there is only one matching, None if none is found, or a table with EID, affiliation, town, country otherwise. """ au = AuthorSearch('AUTHLAST({}) and AUTHFIRST({})'.format(last, first)) if au.get_results_size() == 0: return [None, False] elif au.get_results_size() == 1: res = AuthorRetrieval(au.authors[0][0]) return [res.h_index, False] else: df = pd.DataFrame(au.authors) ret = [] for x in zip(df['eid'], df['affiliation'], df['city'], df['country']): tokens = x[0].split('-') ret.append([tokens[-1], x[1], x[2], x[3]]) return [ret, True]
def test_retrieve_authors_insert(): make_database(test_cache, drop=True) conn = connect_database(test_cache) # Variables expected_auth = [53164702100, 57197093438] search_auth = [55317901900] expected_cols = [ 'auth_id', 'eid', 'surname', 'initials', 'givenname', 'affiliation', 'documents', 'affiliation_id', 'city', 'country', 'areas' ] # Insert data q = f"AU-ID({robust_join(expected_auth, sep=') OR AU-ID(')})" res = pd.DataFrame(AuthorSearch(q, refresh=refresh).authors, dtype="int64") res["auth_id"] = res["eid"].str.split("-").str[-1] res = res[expected_cols] insert_data(res, conn, table="authors") # Retrieve data df = pd.DataFrame(expected_auth + search_auth, columns=["auth_id"], dtype="int64") incache, missing = retrieve_authors(df, conn) assert_equal(incache.shape[0], 2) assert_equal(missing, [55317901900])
def search_data(self): pdb.set_trace() s = AuthorSearch('AUTHLAST(Selten) and AUTHFIRST(Reinhard)', refresh=True) pdb.set_trace()
#!/usr/bin/env python # -*- coding: utf-8 -*- """Tests for `scopus.AuthorSearch` module.""" from collections import namedtuple from nose.tools import assert_equal, assert_true from pybliometrics.scopus import AuthorSearch s1 = AuthorSearch('authlast(selten) and authfirst(reinhard)', refresh=True) s2 = AuthorSearch('authlast(selten)', download=False) def test_authors(): order = 'eid surname initials givenname affiliation documents '\ 'affiliation_id city country areas' Author = namedtuple('Author', order) expected = [ Author(eid='9-s2.0-6602907525', surname='Selten', initials='R.', givenname='Reinhard', affiliation='Universität Bonn', documents='74', affiliation_id='60007493', city='Bonn', country='Germany', areas='ECON (73); MATH (19); BUSI (16)') ] assert_equal(s1.authors, expected)
def base_query(q_type, query, refresh=False, fields=None, size_only=False): """Wrapper function to perform a particular search query. Parameters ---------- q_type : str Determines the query search that will be used. Allowed values: "author", "docs". query : str The query string. refresh : bool (optional, default=False) Whether to refresh cached files if they exist, or not. fields : list of field names (optional, default=None) Fields in the Scopus query that must always present. To be passed onto pybliometrics.scopus.ScopusSearch. Will be ignored when q_type = "author". size_only : bool (optional, default=False) Whether to not download results and return the number of results instead. tsleep: float Seconds to wait in case of failure due to errors. Returns ------- res : list of namedtuples (if size_only is False) or int Documents represented by namedtuples as returned from scopus or number of search results. Raises ------ ValueError: If q_type is none of the allowed values. """ params = {"query": query, "refresh": refresh, "download": not size_only} # Download query until server is available try: if q_type == "author": obj = AuthorSearch(**params) elif q_type == "docs": params["integrity_fields"] = fields obj = ScopusSearch(**params) except (AttributeError, Scopus500Error, KeyError, HTTPError): # exception of all errors here has to be maintained due to the # occurrence of not replicable errors (e.g. 'cursor', HTTPError) sleep(2.0) return base_query(q_type, query, refresh=True, fields=None, size_only=size_only) if size_only: return obj.get_results_size() # Parse results, refresh once if integrity check fails or when server # sends bad results (in this case pause querying for a while) try: if q_type == "author": res = obj.authors or [] elif q_type == "docs": res = obj.results or [] except (AttributeError, Scopus500Error, KeyError, HTTPError): # exception of all errors here has to be maintained due to the # occurrence of not replicable errors (e.g. 'cursor', HTTPError) return base_query(q_type, query, refresh=True, fields=None, size_only=size_only) return res
def base_query(q_type, query, refresh=False, view="COMPLETE", fields=None, size_only=False): """Wrapper function to perform a particular search query. Parameters ---------- q_type : str Determines the query search that will be used. Allowed values: "author", "docs". query : str The query string. refresh : bool (optional, default=False) Whether to refresh cached files if they exist, or not. fields : list of field names (optional, default=None) Fields in the Scopus query that must always present. To be passed onto pybliometrics.scopus.ScopusSearch. Will be ignored when q_type = "author". size_only : bool (optional, default=False) Whether to not download results and return the number of results instead. Returns ------- res : list of namedtuples (if size_only is False) or int Documents represented by namedtuples as returned from scopus or number of search results. Raises ------ ValueError: If q_type is none of the allowed values. """ from pybliometrics.scopus import AuthorSearch, ScopusSearch params = {"query": query, "refresh": refresh, "download": not size_only} if q_type == "author": au = AuthorSearch(**params) if size_only: return au.get_results_size() else: return au.authors or [] elif q_type == "docs": params["integrity_fields"] = fields params["view"] = view if size_only: return ScopusSearch(**params).get_results_size() try: return ScopusSearch(**params).results or [] except AttributeError: params.pop("integrity_fields") params["refresh"] = True return ScopusSearch(**params).results or []
names = [] largest_name = 0 largest_title = 0 with open(args.input, "r") as infile: for line in infile: tokens = line.rstrip().split(";") if len(tokens) == 4 and tokens[3]: initials = [] for firstname in tokens[1].split(" "): initials.append(firstname[0] + ".") hindex = 0 if args.fake: hindex = int(random.expovariate(1 / 20.0)) else: au = AuthorSearch(f"AU-ID({tokens[3]})") if au.get_results_size() > 0: assert au.get_results_size() == 1 hindex = int( AuthorRetrieval(au.authors[0][0]).h_index) fullname = tokens[0] + " " + " ".join(initials) largest_name = max(largest_name, len(fullname)) largest_title = max(largest_title, len(tokens[2])) names.append([fullname, tokens[2], hindex]) for name in sorted(names, key=itemgetter(2), reverse=True): print( f'{name[0] + " " * (largest_name - len(name[0]))} ({name[1]}) {" " * (largest_title - len(name[1]))} {"▇" * name[2]} {name[2]}' )
with open('cis_academics.csv', 'r') as readFile: reader = csv.reader(readFile, delimiter=',') for row in reader: if first_line_flag: first_line_flag = False continue given_name = row[0] family_name = row[1] orcid_id = row[2] print(family_name + " " + given_name) if len(orcid_id) > 1: print("orcid:" + orcid_id) authorSearch = AuthorSearch('ORCID(' + orcid_id + ')') authors = authorSearch.authors if authors == None: print("no result with orcid!") authorSearch = AuthorSearch('AUTHLAST(' + family_name + ') and AUTHFIRST(' + given_name + ') and AFFIL(University)') authors = authorSearch.authors if authors == None: print("no result with first") authorSearch = AuthorSearch('AUTHLAST(' + given_name + ') and AUTHFIRST(' + family_name + ') and AFFIL(University)') authors = authorSearch.authors