Exemplo n.º 1
0
def search_my_query(my_query):
    '''
    Function to search a query in scopus
    :param my_query: string of query desired to be searched in scopus
    :return: resultant dataframe with query from scopus
    '''
    if type(my_query) == str:
        ## Load configuration
        con_file = open("config.json")
        config = json.load(con_file)
        con_file.close()

        ## Initialize client
        client = ElsClient(config['APIKey'])

        ## Initialize doc search object using Scopus and execute search, retrieving all results
        print('......Searching Scopus......')
        print('......for..... ' + query + ' ....')
        doc_srch = ElsSearch(query, 'scopus')
        doc_srch.execute(client, get_all=True)
        print("doc_srch has", len(doc_srch.results), "results.")

        return doc_srch.results_df
    else:
        print('the query must be a string. no searches run...')
        return
def get_pubs_org_from_api(org_id: str, api_key=None) -> Optional[json.dumps]:
    """ Loads and returns data on publications of organization from Scopus via API.
    """
    client = ElsClient(api_key)
    search = ElsSearch(f"(AF-ID({org_id}))", 'scopus')  # AND PUBYEAR > 2019
    # todo переписать в асинхронном режиме
    search.execute(client,
                   get_all=True)  # загружаем данные по публикациям организации
    if client.req_status['status_code'] != 200:
        return None
    pubs = search.results

    logging.info(f'{len(pubs)} publications received')

    # составляем список тасков для загрузки данных по авторам
    tasks = defaultdict(list)
    for i, res in enumerate(pubs):
        for authors_link in res['link']:
            if authors_link['@ref'] == 'author-affiliation':
                tasks[i] = authors_link['@href']
                break

    header = get_header(api_key)
    result = async_fetch_urls(tasks.values(), header)
    for i, j in zip(tasks.keys(), result):
        pubs[i]['authors'] = j

    return pubs
Exemplo n.º 3
0
def find_articles(year=None, issn=None, get_all=True):
    """
    Returns a list of the DOI's for all articles published in the specified year and journal.

    Args:
        year (str): year of publication
        issn (str): ISSN (or EISSN) of journal
        get_all (bool): Whether all results should be returned or just the 1st result. Default is True.

    Returns:
        dois (str): The dois for all articles published in corresponding journal in the specified year

    """

    query = build_scopus_query(year=year, issn=issn)
    search = ElsSearch(
        query,
        index='scopus',
    )
    search.execute(els_client=CLIENT, get_all=get_all)
    dois = []
    for r in search.results:
        try:
            dois.append(r['prism:doi'])
        except:
            continue
    return dois
Exemplo n.º 4
0
    def search(self):
        # initialize the keys
        keygen = self.key_generator()
        init_key = next(keygen)

        # Initialize the elsapy client
        client = ElsClient(init_key, view=self.view)
        count = 0

        folder = Path('result') / f'{self.subject}_{time.strftime("%Y%m%d")}'
        if not folder.exists():
            folder.mkdir(parents=True)

        for query in self.queries:

            try:
                name = next(self.names)
                name = '_'.join(name)
            except:
                # this could happen if your file name contains unexpected characters
                error_log.info(f'Name error at {query}.')
                break

            try:
                srch =ElsSearch(query, index=self.subject, keygen=keygen)
                srch.execute(client, get_all=True)
                count += 1
                print(f'Progress: {count}/{self.length}, {query}')
                if srch.status_code == 400:
                    error_log.info(f'Bad query: {name}')
                else:
                    search_log.info(f'Results found: {name}, # of results: {len(srch.results)}')
                    self.write_json(srch.results, name, folder)
            except Exception as e:
                error_log.info(f'Search error: {name}, {str(e)}')
Exemplo n.º 5
0
 def search(self):
     search_string_without_year = f"({self._title})" + "".join(list(f" OR ({x})" for x in self._similar))
     
     for year in range(self._begin_year, self._end_year + 1):
         search_string_year = f" AND PUBYEAR = {year}"
         
         doc_srch = ElsSearch(search_string_without_year + search_string_year,'sciencedirect')
         doc_srch.execute(client, get_all = False)
         
         with open(self._filename, "a") as f:
             f.write(f"{self._new_title};{year};{doc_srch.tot_num_res}\n")
Exemplo n.º 6
0
def get_relevant_papers(patient_id):
    results = {}
    count = 0
    conditions = get_search_terms(patient_id)
    for cond in conditions:
        doc_srch = ElsSearch(cond, 'scopus')
        doc_srch.execute(client, get_all=False)
        for res in doc_srch.results:
            results[str(count)] = res
            count += 1
        print("doc_srch for ", cond, " has", len(doc_srch.results), "results.")
    return get_n_most_cited(5, results)  #CHANGE TO NUMBER OF WANTED PAPERS
Exemplo n.º 7
0
    def doi(self, doi):
        from elsapy.elsclient import ElsClient
        from elsapy.elssearch import ElsSearch

        aff_srch = ElsSearch("DOI ( %s )" % doi, 'scopus', maxResults=1)
        aff_srch.execute(self.client, get_all=True)

        rs = [x for x in aff_srch.results if "error" not in x]
        if rs:
            doi = rs[0]['prism:doi']
            return {"doi": doi, "title": rs[0]["dc:title"], "record": rs[0]}
        else:
            return None
Exemplo n.º 8
0
def get_docs_by_author(author_id):
    """
    Get documents published by an author

    :param author_id:
    :param api_key:
    :return:
    """
    logger.info(f'Searching docs by author {author_id}')
    api_key = key_manager.get_key('scopus_search')
    client = elsclient.ElsClient(api_key)
    search = ElsSearch(f'au-id({author_id})', 'scopus')
    search.execute(client, True)
    for doc in search.results:
        yield doc
Exemplo n.º 9
0
def get_author_by_name(last_name, first_name, affiliation_id='60005248'):
    """
    Search author by first name, last name and affiliation
    :param last_name:
    :param first_name:
    :param affiliation_id:
    :param api_key:
    :return:
    """
    logger.info('searching authors by name and affiliation')
    api_key = key_manager.get_key('scopus_search')
    query = f'authlast({last_name}) and authfirst({first_name}) and af-id({affiliation_id})'
    client = elsclient.ElsClient(api_key)
    search = ElsSearch(query, 'author')
    search.execute(client)
    yield {'first_name': first_name, 'last_name': last_name, 'results': search.results}
Exemplo n.º 10
0
 def search(self,query="A Lightweight Autoencoder"):
     doc_srch = ElsSearch(query,'sciencedirect')
     doc_srch.execute(self.client, get_all = False)
     for _,doc in doc_srch.results_df.iterrows():
         pii_doc = FullDoc(sd_pii = doc['pii'])
         if pii_doc.read(self.client):
             try:
                 abstract = " ".join(pii_doc.data['coredata']['dc:description'].split()[1:])
                 doc_id = str(hex(time.time().as_integer_ratio()[0]))
                 title = doc['dc:title']
                 pdf_link = doc['link']['scidir']
                 dates = doc['load-date'].split('-')[0]
                 self.data[doc_id] = {"title": title, "year": dates, "link": pdf_link, "Abstract":abstract}
             except:
                 pass
         else:
             print("Doc Skipped!!")
Exemplo n.º 11
0
    def search(self, query_name):
        """Do a search.

        Args:
            query_name (str): the name of the file in the ./queries/ directory
                that contains the query. Defaults to "query.txt".

        Raises:
            FileNotFoundError if the file query file can not be found.

        Returns:
            list: The results.
        """
        query = self._load_query(query_name)
        client = ElsClient(self.api_key)
        search = ElsSearch(query, "scopus")
        search.execute(client)
        return search.results
Exemplo n.º 12
0
def author_score(fname, lname):
    client = elsevier_auth()

    the_zip = zip(fname, lname)
    num = len(fname)
    count = 0
    total = 0
    score = 0

    for first, last in the_zip:
        start = time.time()
        print(first, last)
        myDocSrch = ElsSearch(
            'AUTHLASTNAME(' + last + ') AND AUTHFIRST(' + first + ')',
            'author')
        myDocSrch.execute(client)

        for x in myDocSrch.results:
            try:
                a_id = x['dc:identifier']
            except:
                continue
            auth_id = a_id.replace('AUTHOR_ID:', '')

            author = ElsAuthor(author_id=auth_id)
            if (author.read_metrics(client)):
                h_index = author.data['h-index']
                score += h_index
                print(first, last, " ID:", auth_id, " h-index:", h_index)
            else:
                print("no data")
                score += 0

        end = time.time()
        diff = end - start
        total += diff
        count += 1
        num -= 1
        avg = total / count
        est = (num * avg) / 60
        print("time used for this author:", end - start, "s")
        print(num, "authors, estimated time left:", est, "minutes")
        print()
    return score
def retrivePublicationForAuthor(author, client, output_file):
    doc_srch = ElsSearch("AUTHOR-NAME({}) AND PUBYEAR > 2018".format(author),
                         'scopus')
    doc_srch.execute(client, get_all=True)
    print(author)
    print("doc_srch has", len(doc_srch.results), "results.")

    for res in doc_srch.results:
        if any(i in res['dc:title'] for i in key_words):
            try:
                doi = res['prism:doi']
            except:
                try:
                    doi = res['prism:url']
                except:
                    doi = ''
            output_file.write('{}\t{}\t{}\t{}\t{}\n'.format(
                res['dc:title'], doi, res['prism:coverDate'],
                res['subtypeDescription'], res['prism:publicationName']))
Exemplo n.º 14
0
def apiCall(query):
    results = []
    searchResult = ElsSearch(query, 'sciencedirect')

    # Check if API reuturns valid reults
    try:
        searchResult.execute(client, get_all=False)
    except:
        return results

    for result in searchResult.results:
        # Attempt to retrieve abstract

        try:
            DOI = result["prism:doi"]
            abstractResp = requests.get(url=abstractURL + DOI,
                                        headers=API_headers)
            abstractData = json.loads(abstractResp.text)

            # If valid abstract found then exract required data
            abstract = abstractData["abstracts-retrieval-response"][
                "coredata"]["dc:description"]
            author = result["dc:creator"]
            title = result["dc:title"]
            source = result["prism:publicationName"]
            sourceURL = result["prism:url"]

            results.append({
                "abstract": abstract,
                "author": author,
                "title": title,
                "source": source,
                "sourceURL": sourceURL
            })

        except:
            pass

        if (len(results) == 5):
            break

    return results
Exemplo n.º 15
0
def find_articles(year=None,
                  issn=None,
                  get_all=True,
                  id_type="doi",
                  apikey=None):
    """
    Returns a list of the DOI's for all articles published in the specified year and journal.

    Args:
        year (str): year of publication
        issn (str): ISSN (or EISSN) of journal
        get_all (bool): Whether all results should be returned or just the 1st result. Default is True.
        id_type: (str) Return document eids or dois. Default is doi.

    Returns:
        ids (str): The eids/dois for all articles published in corresponding journal in the specified year

    """

    query = build_scopus_query(year=year, issn=issn)
    if apikey:
        CLIENT = ElsClient(apikey, num_res=10000)
    search = ElsSearch(
        query,
        index='scopus',
    )
    search.execute(els_client=CLIENT, get_all=get_all)
    if id_type == "doi":
        key = 'prism:doi'
    else:
        key = id_type
    ids = []
    for r in search.results:
        try:
            ids.append(r[key])
        except:
            continue
    return ids
Exemplo n.º 16
0
    def query(self, country, keywords=words, after_year=None):
        """

        Example:
        aff_srch = ElsSearch('( ( AFFILCOUNTRY ( argentina )  AND  (TITLE-ABS-KEY ( Puccinia ) OR TITLE-ABS-KEY(Lactobacillus)) ) )  AND  ( burguener ) ','scopus')
        aff_srch.execute(client)
         print ("aff_srch has", len(aff_srch.results), "results.")
         AFFILCOUNTRY ( argentina )  AND  ( LIMIT-TO ( SRCTYPE ,  "j" )  OR  LIMIT-TO ( SRCTYPE ,  "p" ) )
        :return:
        """
        from elsapy.elssearch import ElsSearch


        kwfilter = "OR ".join(['TITLE-ABS-KEY ("' + w + '") ' for w in keywords])

        after_year_q = "AND PUBYEAR AFT " + str(after_year) if after_year else ""

        aff_srch = ElsSearch("((" + kwfilter + ") AND AFFILCOUNTRY ( " + country + " ) " + after_year_q + " )",
                             'scopus', maxResults=10000)
        aff_srch.execute(self.client, get_all=True)

        for article in aff_srch.results:
            yield article
Exemplo n.º 17
0
def get_docs_by_year(year, affl_id='60005248', get_all=False):
    """
    Get documents by year

    :param year:
    :param affl_id:
    :param get_all:
    :param api_key:
    :return:
    """
    logger.info(f'Searching docs for year {year} and affiliation {affl_id}')
    api_key = key_manager.get_key('scopus_search')
    client = elsclient.ElsClient(api_key)

    # Split the search since for recent years JHU has publications more than 5,000 each year.
    search_one = ElsSearch(f'af-id({affl_id})', 'scopus', {'date': year})
    search_one.execute(client, get_all)
    all_results = search_one.results
    #
    # search_two = ElsSearch('af-id(60005248) AND NOT subjarea(MEDI)', 'scopus', {'date': year})
    # search_two.execute(client, get_all)
    # all_results += search_two.results

    return all_results
Exemplo n.º 18
0
def scidir_search(search_terms, database):
    """
    Initialize doc search object using ScienceDirect and/or Scopus, and execute search, 
    retrieving all results
    
    parameter
    ---------
    search_terms (str): The string to search
    database ([]): oprional databasename to search
    
    result
    ------
    doc_srch: dataframe
    """
    print("Running scidir_search...")

    print("Searching: {}".format(database))

    doc_srch = ElsSearch(search_terms, database)
    doc_srch.execute(client, get_all=True)
    print(
        "Retrieved {} from {}. Writing to file for further processing".format(
            len(doc_srch.results), database))
    doc_srch.results_df.to_csv('data/' + str(database) + '.csv', index=None)
Exemplo n.º 19
0
    def auth_query(auth_last, auth_first):

        auth_data = [auth_last, auth_first]
        print("Searching for author %s, %s" % (auth_last, auth_first))
        # Initialize search object and execute search under the author index
        query = 'authlast(%s)+AND+authfirst(%s)' % (auth_last, auth_first)

        try:
            auth_srch = ElsSearch(query, 'author')
            auth_srch.execute(client, get_all=False)

        except:
            # Load other configuration with new API Key
            con_file = open("config2.json")
            config = json.load(con_file)
            con_file.close()

            # Initialize new client
            client = ElsClient(config['apikey'])
            client.inst_token = config['insttoken']

            auth_srch = ElsSearch(query, 'author')
            auth_srch.execute(client, get_all=False)

        if (len(auth_srch.results) == 1):
            print("auth_srch has", len(auth_srch.results), "result.")
        else:
            print("auth_srch has", len(auth_srch.results), "results.")

        # checking if no results at all
        error_message = auth_srch.results[0].get('error')

        if (len(auth_srch.results) > 0):

            if (not error_message):
                # grabs the author_id from the search data
                # this assumes that the wanted author is the first one in results
                # check this out later
                try:
                    string_author_id = auth_srch.results[0].get('dc:identifier')
                    # this line cuts the author id string from the end of AUTHOR_ID
                    # to the end of the id digits
                    author_id = string_author_id[10:]
                    print("author_id : %s" % author_id)
                    auth_data.append(author_id)
                except AttributeError:
                    print("Could not extract auth_id field for %s, %s" % (auth_last, auth_first))
                    auth_data.append("CNE")

                # grabs the curr_affil from the search data
                # appends it to auth_data
                try:
                    dict_curr_affil = auth_srch.results[0].get('affiliation-current')
                    curr_affil = dict_curr_affil.get('affiliation-name')
                    print("curr_affil : %s" % curr_affil)
                    auth_data.append(curr_affil)
                except AttributeError:
                    print("Could not extract curr_affil field for %s, %s" % (auth_last, auth_first))
                    auth_data.append("CNE")

            # this could be a false positive! the author name could be in the name-variant field
            # I redo the query down below in the next function
            else:
                auth_data.append("DNE")
                auth_data.append("DNE")
                print(error_message)

        else:
            print("very bad error @ length of auth_srch.results <= 0")
            auth_data.append("none")
            auth_data.append("none")

        return auth_data
Exemplo n.º 20
0
def main():
    # Load author names list
    with open('authors.json', 'r', encoding='utf-8') as fp:
        data = json.load(fp)
        search_list = data['names']

    # Load configuration
    con_file = open("config.json")
    config = json.load(con_file)
    con_file.close()

    # Initialize client
    client = ElsClient(config['apikey'])
    client.inst_token = config['insttoken']

    # Run search for each author names in list and get IDs
    auth_id_list = []
    for author in search_list:
        search_query = ""
        if len(author[0]) > 0:
            search_query += f"authfirst({author[0]}) "
        if len(author[1]) > 0:
            search_query += f"authlast({author[1]})"

        auth_srch = ElsSearch(search_query, 'author')
        auth_srch.execute(client)
        print(
            f'\n{author[0]} {author[1]}: {len(auth_srch.results)} results found!\n'
        )

        # If there are more than one author that matches the search, display search results
        if len(auth_srch.results) > 1:
            for i, search_result in enumerate(auth_srch.results):
                first_name = search_result['preferred-name']['given-name']
                surname = search_result['preferred-name']['surname']
                try:
                    affiliation = search_result['affiliation-current'][
                        'affiliation-name']
                    affiliation_country = search_result['affiliation-current'][
                        'affiliation-country']
                except KeyError:
                    affiliation = ''
                    affiliation_country = ''
                print(
                    f"[{i+1}] {first_name} {surname}, {affiliation} ({affiliation_country})"
                )

            # Choose desired author
            desired_author_index = int(input('\nChoose correct author: ')) - 1

        else:
            desired_author_index = 0

        # Get author ID
        desired_author = auth_srch.results[desired_author_index]
        link = desired_author['link'][0]['@href']
        auth_id = desired_author['dc:identifier'].split(':')[1]
        auth_id_list.append(auth_id)

    # Save author ID to JSON
    with open('authors.json', 'w', encoding='utf-8') as fp:
        data = {'ids': auth_id_list, 'names': search_list}
        json.dump(data, fp, indent=4, sort_keys=True)

    print(link)
    print('\n-----------\n')
    print('Grabbing author metrics...')

    get_author_by_id.get_metrics(client, auth_id_list)
    config = json.load(config_file)

GET_ALL = config[
    'get_all']  # False gets one chunk (25) True gets all or max (5000)
FULL_TEXT = config['full_text']  # Save fulltext
OPEN_ACCESS = config[
    'open_access']  # Search only openaccess documents (so we can get the full text)

# "public policy AND (impact OR result OR evaluation OR evidence) AND (climate OR environment)"
query = config['query']

if OPEN_ACCESS:
    query = "openaccess(1) AND " + query

client = ElsClient(config['api_key'])

doc_srch = ElsSearch(query, 'sciencedirect')
doc_srch.execute(client, get_all=GET_ALL)

for doc in doc_srch.results:
    doi = doc['dc:identifier']
    print(doi)
    if FULL_TEXT:
        ## ScienceDirect (full-text) document example using DOI
        doi_doc = FullDoc(doi=doi)
        if doi_doc.read(client):
            doi_doc.write()
        else:
            print("Read full-text failed for DOI", doi)

print("# Found", len(doc_srch.results), "results.")
Exemplo n.º 22
0
    if my_auth.read_docs(client):
        print ("my_auth.doc_list has " + str(len(my_auth.doc_list)) + " items.")
        my_auth.write_docs()
    else:
        print ("Read docs for author failed.")

    ## Read all documents for example affiliation, then write to disk
    if my_aff.read_docs(client):
        print ("my_aff.doc_list has " + str(len(my_aff.doc_list)) + " items.")
        my_aff.write_docs()
    else:
        print ("Read docs for affiliation failed.")

## Initialize author search object and execute search
auth_srch = ElsSearch('authlast(keuskamp)','author')
auth_srch.execute(client)
print ("auth_srch has", len(auth_srch.results), "results.")

## Initialize affiliation search object and execute search
aff_srch = ElsSearch('affil(amsterdam)','affiliation')
aff_srch.execute(client)
print ("aff_srch has", len(aff_srch.results), "results.")

## Initialize doc search object using Scopus and execute search, retrieving 
#   all results
doc_srch = ElsSearch("AFFIL(dartmouth) AND AUTHOR-NAME(lewis) AND PUBYEAR > 2011",'scopus')
doc_srch.execute(client, get_all = True)
print ("doc_srch has", len(doc_srch.results), "results.")

## Initialize doc search object using ScienceDirect and execute search, 
#   retrieving all results
Exemplo n.º 23
0
import pandas as pd
from elsapy.elsclient import ElsClient
from elsapy.elssearch import ElsSearch
import json
fefu_id = '60103811'
con_file = open("config.json")
config = json.load(con_file)
con_file.close()
# view = 'COMPLETE' -- to access more fields
client = ElsClient(config['apikey'], num_res = 25)

search = ElsSearch('AF-ID( ' + fefu_id + ' )', 'scopus')
search.execute(client)
sr = search.results

result = []
res = {}

authorname = ''
authid = ''

special_fields = ['authname', 'authid', 'prism:coverDisplayDate',
                  'prism:pageRange', 'openaccessFlag', 'link', 'prism:coverDate']
fields = {
        'authname'                  : 'Authors',
        'authid'                    : 'Author(s) ID',
        'dc:title'                  : 'Title', 
        'prism:coverDate'           : 'Year', 
        'prism:publicationName'     : 'Source title',
        'prism:volume'              : 'Volume',
        'prism:doi'                 : 'DOI',
Exemplo n.º 24
0
#     print(test.text, file=output_xml_file)

#subj_areas = ["AGRI","ARTS","BIOC","BUSI","CENG","CHEM","COMP","DECI","DENT",
# "EART","ECON","ENER","ENGI","ENVI","HEAL","IMMU","MATE","MATH",
#"MEDI","NEUR","NURS","PHAR","PHYS","PSYC","SOCI","VETE","MULT"]

#Stop-Words listed in the Sci-Dir. Expert search (deemed non-distinct)

DOI_List = []
Data_Headers = ["Subj_Area", "DOI"]

for subj in subj_areas:
    for word in comm_words:
        subj_srch = ElsSearch("{" + word + "}" + '+SUBJAREA(' + subj + ')',
                              'scidir')
        subj_srch.execute(client)

        for article in subj_srch.results:
            if "dc:identifier" in article:
                DOI_List.append((subj, article["dc:identifier"]))
                print(subj, ": ", article["dc:identifier"])

#output data as csv
table = pd.DataFrame(data=DOI_List, columns=Data_Headers)
table.to_csv('Article_DOIs.csv', index=False)
"""doi_doc = FullDoc(doi = TEST_DOI)
if doi_doc.read(client):
    print ("doi_doc.title: ", doi_doc.title)
    doi_doc.write()
    print(doi_doc.data['originalText'])
else:
Exemplo n.º 25
0
## Initialize client
client = ElsClient(API_KEY)

query = 'AUTHFIRST(%s) AND AUTHLASTNAME(%s)'  # AND AF-ID(60003892)'

#name_list = df["Name"][86].split()
#first,last = name_list[0],name_list[len(name_list)-1]
name = df["Name"].iloc[26]

profile_urls = []
for name in df["Name"]:
    name_list = name.split()
    first, last = name_list[0], name_list[len(name_list) - 1]
    auth_srch = ElsSearch(query % (first, last), 'author')
    auth_srch.execute(client)
    #print ("auth_srch has", len(auth_srch.results), "results.")
    try:
        url = auth_srch.results[0]['prism:url']
        print(name, url)
        profile_urls.append([name, url])
    except:
        print('Author: ' + name + ' not found.')
        profile_urls.append([name, ''])
scopus_urls = pd.DataFrame(data=profile_urls, columns=['Name', 'URL'])

scopus_urls.to_csv('scopus_urls_no_affil_full.csv', index=False)
len(scopus_urls['Name'].unique())
len(scopus_urls['URL'].unique())

# scopus_urls_back = scopus_urls
Exemplo n.º 26
0
import csv
from elsapy.elsclient import ElsClient
from elsapy.elssearch import ElsSearch
import json
import pandas as pd
se=input("Enter the Word You Want to Search about: ")
print("If you requested full Data it will take along time and also you want to have a good internet connection because it's a big data ")
fi=int(input("enter 0 for 25 line from data and 1 for full data: "))
con_file = open("config.json")
config = json.load(con_file)
con_file.close()
client = ElsClient(config['apikey'])
doc_srch = ElsSearch(se,'sciencedirect')
doc_srch.execute(client, get_all =fi)
print ("doc_srch has", len(doc_srch.results), "results.")
df = pd.read_json (r'dump.json')
df = df[['load-date','dc:title','dc:creator','prism:publicationName']]
df.columns = ['Date','Title','Creator','Publication Name']
df.to_csv (r'output.csv', index = [])
csvFile = 'output.csv'
xmlFile = 'output.xml'
csvData = csv.reader(open(csvFile))
xmlData = open(xmlFile, 'w')
xmlData.write('<?xml version="1.0"?>' + "\n")
xmlData.write('<csv_data>' + "\n")
rowNum = 0
csvData=csvData
for row in csvData:
    if rowNum == 0:
        tags = row
        for i in range(len(tags)):
Exemplo n.º 27
0
    print(commer, thisComm['lastname'])
    for _, auth in thisComm.iterrows():
        idList = authDF[authDF['lastname'] == auth['lastname']]
        commString = ''

        for _, aID in idList.iterrows():
            if len(commString):

                commString = commString + '+OR'
            commString = commString + '+AU-ID(' + str(aID['aid']) + ')'

#u_id(24588214300)
## Initialize doc search object and execute search, retrieving all results
#doc_srch = ElsSearch('collective+movement+ecology+AU-ID(24588214300)+AU-ID(23479355600)','scopus')
        doc_srch = ElsSearch('title("collective")+' + commString, 'scopus')
        doc_srch.execute(client, get_all=True)
        cbCount = len(doc_srch.results)
        #       title("neuropsychological evidence")
        doc_srch = ElsSearch('title("ecology")+' + commString, 'scopus')
        doc_srch.execute(client, get_all=True)
        meCount = len(doc_srch.results)
        print(meCount, cbCount)
        if meCount > cbCount:
            me_auth += 1
            print(auth['lastname'] + ' ME!')
        if meCount < cbCount:
            cb_auth += 1
            print(auth['lastname'] + ' CB!')

    if me_auth > cb_auth:
        authDF.loc[authDF.isin(thisComm['lastname'].values)['lastname'],
Exemplo n.º 28
0
def detailed_auth_query(auth_last, auth_first):

    auth_data = [auth_last, auth_first, '', '']
    print("Searching for author %s, %s" % (auth_last, auth_first))
    # Initialize search object and execute search under the author index
    query = 'authlast(%s)+AND+authfirst(%s)' % (auth_last, auth_first)

    try:
        auth_srch = ElsSearch(query, 'author')
        auth_srch.execute(client, get_all=False)

    except:
        # Load other configuration with new API Key
        con_file = open("config2.json")
        config = json.load(con_file)
        con_file.close()

        # Initialize new client
        client = ElsClient(config['apikey'])
        client.inst_token = config['insttoken']

        auth_srch = ElsSearch(query, 'author')
        auth_srch.execute(client, get_all=False)

    if (len(auth_srch.results) == 1):
        print("auth_srch has", len(auth_srch.results), "result.")
    else:
        print("auth_srch has", len(auth_srch.results), "results.")

    # checking if no results at all
    error_message = auth_srch.results[0].get('error')

    if (len(auth_srch.results) > 0):

        if (not error_message):

            print("Into the results...")

            # grabs the author_id from the search data
            for i in range(len(auth_srch.results)):

                try:
                    string_author_id = auth_srch.results[i].get('dc:identifier')
                    # this line cuts the author id string from the end of AUTHOR_ID
                    # to the end of the id digits
                    author_id = string_author_id[10:]
                    print("author_id : %s" % author_id)
                    auth_data[2] = author_id

                except AttributeError:
                    print("Could not extract auth_id field for %s, %s" % (auth_last, auth_first))
                    auth_data[2] = "CNE"

                # grabs the curr_affil from the search data
                # appends it to auth_data
                try:
                    dict_curr_affil = auth_srch.results[i].get('affiliation-current')
                    curr_affil = dict_curr_affil.get('affiliation-name')
                    print("curr_affil : %s" % curr_affil)

                except AttributeError:
                    print("Could not extract curr_affil field for %s, %s" % (auth_last, auth_first))
                    auth_data[3] = "CNE"

                try:
                    # if UR not current affil go on and search history
                    if (not isUR(curr_affil)):

                        affil_hist = auth_id_query(auth_data[2])

                        try:
                            if (len(affil_hist) > 1):
                                for institution in affil_hist:
                                    try:
                                        affil_instance = institution['ip-doc']['preferred-name']['$']
                                        # if UR affil is found, return immediately
                                        if (isUR(affil_instance)):
                                            curr_affil = affil_instance
                                            auth_data[3] = curr_affil
                                            return auth_data
                                    except:
                                        print("Affiliation instance data for %s,%s wasn't structured correctly." % (auth_data[0], auth_data[1]))
                                        # print(institution)
                            else:
                                try:
                                    affil_instance = affil_hist['ip-doc']['preferred-name']['$']
                                    try:
                                        # if UR affil is found, return immediately
                                        if (isUR(affil_instance)):
                                            curr_affil = affil_instance
                                            auth_data[3] = curr_affil
                                            return auth_data
                                    except TypeError:
                                        print("isUR error")
                                        print(affil_instance)
                                except:
                                    print("Affiliation instance data for %s,%s wasn't structured correctly." % (auth_data[0], auth_data[1]))
                                    # print(institution)

                        except TypeError:
                            print("Type Error occured for affil_hist of %s,%s" % (auth_data[0], auth_data[1]))
                            print(affil_hist)

                    # but if it is then return immediately
                    else:
                        print("Returned with curr_affil : '%s' for %s,%s" % (curr_affil, auth_data[0], auth_data[1]))
                        auth_data[3] = curr_affil
                        return auth_data

                except:
                    print("Something wrong within the returned profile data of %s,%s" % (auth_data[0], auth_data[1]))

            # this is the case of hitting the cap of 25, too many people down the list
            if (len(auth_srch.results) >= 25):
                print("Results CAP of 25 was hit for the %d results of %s,%s" % (len(auth_srch.results), auth_data[0], auth_data[1]))
                auth_data[3] = 'max'
                return auth_data

            # this covers the case of no UR affils found at all
            elif (len(auth_srch.results) < 25):
                print("EXHAUSTED results list of %d results for %s,%s" % (len(auth_srch.results), auth_data[0], auth_data[1]))
                auth_data[3] = 'na'
                return auth_data

        # this could be a false positive! the author name could be in the name-variant field
        # I redo the query down below in the next function
        else:
            auth_data[2] = 'DNE'
            auth_data[3] = 'DNE'
            print(error_message)

    else:
        print("very bad error @ length of auth_srch.results <= 0")
        auth_data[2] = 'NONE'
        auth_data[3] = 'NONE'

    return auth_data
Exemplo n.º 29
0
def get_search(keywords, client):
    doc_srch = ElsSearch(keywords, 'scopus')
    doc_srch.execute(client, get_all=True)
    print("doc_srch has", len(doc_srch.results), "results.")
    df = doc_srch.results_df
    return (df)
Exemplo n.º 30
0
string_2 = 'REF' + '%28' + quote_plus(string_2) + '%29'

# Search for publications from this year. If the final search of the year is not exactly at the end of the year, then some publications could be missed. Therefore, we use a different PUBYEAR string in January (month == 1) of the following year (assuming that searches are done at least once per month).
if month == 1:
    year = year - 2
    year = 'PUBYEAR > ' + str(year)
else:
    year = 'PUBYEAR = ' + str(year)
year = quote_plus(year)

encoded_search_string = string_1 + '+OR+' + string_2 + '+AND+' + year

# Initialize doc search object and execute search, retrieving <=25 results if
# get_all=False or <=5000 results if get_all=True.
search = ElsSearch(encoded_search_string, 'scopus')
search.execute(client, get_all=True)

# Save the results as a new record in the Search table
results = search.results
source = Source.objects.get(source='Scopus')
topic = search_string.topic

record = Search(
    topic=topic,
    search_string=search_string,
    source=source,
    results=results,
)
record.save()

event = 'search_scopus.py'