def get_paper_count(query, rows_per_page=500, max_pages=100): """ Parameters ---------- query : str rows_per_page : int (optional) max_pages : int (optional) Returns ------- search_query : `ads.SearchQuery` count : int """ q = ads.SearchQuery(q=query, rows=1) q.execute() return q.response.numFound
def qsearch(request, qstring=None): if qstring is None: try: qstring = request.GET['qsearch'] except: return HttpResponseRedirect('/') if 'qsort' in request.GET: sort = request.GET['qsort'] else: sort = 'classic_factor' if 'page' in request.GET: page = int(request.GET['page']) else: page = 0 results = list( ads.SearchQuery(q=qstring, fl=[ 'bibcode', 'title', 'author', 'pubdate', 'doi', 'classic_factor' ], rows=400, start=page * 400, sort=sort)) if sort == 'classic_factor': try: norm = max(r.classic_factor for r in results) for r in results: r.classic_factor /= norm / 50 r.classic_factor += 50 except ValueError: # This usually means an empty result # just carry on pass template = loader.get_template('qsearch.html') context = { 'qstring': qstring, 'results': results, 'total': len(results), 'page': page, 'sort': sort } return HttpResponse(template.render(context, request))
def ReadADSAuthor(authlist): papers = [] for author in authlist: try: papers += list( ads.SearchQuery(author=author, sort="pubdate asc", rows=400, fl=[ 'id', 'bibcode', 'title', 'date', 'citation_count', 'author', 'citation', 'pubdate', 'year', 'pub', 'volume', 'page' ])) except: print('No connection with ADS, no updates...') print('From ADS retrieved total {} papers'.format(len(papers))) return papers
def get_total(): print("Getting total #'s") years = np.arange(1970, thisyear + 1) values = [] for year in years: print("Getting {0}".format(year)) result = ads.SearchQuery(database='astronomy', year="{0}".format(year), property='refereed', fl=['year'], rows=1, max_pages=1) result.execute() values.append(result.response.numFound) return years, np.array(values)
def __init__(self, name, year=None): p = list( ads.SearchQuery(author=name, max_pages=10, fl=[ "id", "bibcode", "citation_count", "author", "year", "property" ])) # filter by year, if desired if year is None: self.mypapers = p else: pyr = [q for q in p if int(q.year) >= year] self.mypapers = pyr self.num = len(self.mypapers)
def save_query_to_collection(query_params, collection, api_token=None): if api_token is None: # don't put this on github with open('api_token.txt', 'r') as f: ads.config.token = f.read() q = ads.SearchQuery(**query_params) # max allowed rows all_responses = [] for paper in q: response = {} for field in query_params['fl']: response[field] = getattr(paper, field) all_responses.append(response) if len(all_responses) > 0: print('Inserting {} papers'.format(len(all_responses))) collection.insert_many(all_responses) # modifies inplace
def get_all_papers(author): papers = ads.SearchQuery(author=author, sort="date", max_pages=128, fl=[ "id", "title", "author", "doi", "year", "pubdate", "pub", "volume", "page", "identifier", "doctype", "citation_count", "bibcode" ]) all_dicts = [] for paper in papers: # Get arxiv ID aid = [ ":".join(t.split(":")[1:]) for t in paper.identifier if t.startswith("arXiv:") ] try: page = int(paper.page[0]) except (ValueError, TypeError): page = None if paper.page is not None and paper.page[0].startswith("arXiv:"): aid.append(":".join(paper.page[0].split(":")[1:])) all_dicts.append( dict( doctype=paper.doctype, authors=paper.author, year=paper.year, pubdate=paper.pubdate, doi=paper.doi[0] if paper.doi is not None else None, title=paper.title[0], pub=paper.pub, volume=paper.volume, page=page, arxiv=aid[0] if len(aid) else None, citations=(paper.citation_count if paper.citation_count is not None else 0), url="http://adsabs.harvard.edu/abs/" + paper.bibcode, )) return sorted(all_dicts, key=lambda x: x['pubdate'], reverse=True)
def get_numbers(language): print("Getting {0} #'s".format(language)) years = np.arange(1970, thisyear + 1) values = [] for year in years: print("Getting {0}".format(year)) query = ads.SearchQuery(full=language, database='astronomy', property='refereed', year="{0}".format(year), fl=['year'], rows=100, max_pages=1000) query.execute() values.append(query.response.numFound) return years, np.array(values, dtype=float)
def search_ads(results_dir=None,verbose=False): """Search NASA ADS for publication mentioning the TOI. Parameters ---------- results_dir : str directory location of downloaded files verbose : bool print texts Returns ------- toi_pub : dict dictionary with TIC as key and paper tile as value """ try: import ads ads.config.token = ADS_TOKEN except ImportError: raise ImportError('please install ads first') if results_dir is None: results_dir = '.' if not exists(results_dir): sys.exit('{} does not exist!'.format(results_dir)) tics = glob(join(results_dir,'tic*')) toi_pub = {} if len(tics)>0: for tic in tqdm(tics): #TOI.01 tic = tic.split('/')[-1][3:] q = query_toi(tic=int(tic),clobber=False) toi = q['TOI'].values[0] toi = str(toi).split('.')[0] #FIXME: filter by year > 2018 papers = ads.SearchQuery(q='TOI '+(toi), sort="citation_count", fq='database:astronomy') toi_pub[tic] = [paper.title for paper in papers] else: sys.exit('No tic* directories found in {}'.format(results_dir)) if verbose: print(toi_pub) return toi_pub
def cached_query(q): global CACHED_PAPERS global REMAINING_API_CALLS if not CACHED_PAPERS: with open('/opt/cache/cache.json') as cache_f: CACHED_PAPERS = json.load(cache_f) if q not in CACHED_PAPERS: results = ads.SearchQuery(author=q) CACHED_PAPERS[q] = {} for paper in results: if not paper.author: continue CACHED_PAPERS[q][paper.bibcode] = paper.author REMAINING_API_CALLS = results.response.get_ratelimits()['remaining'] return CACHED_PAPERS[q]
def add_by_bibcode(self, bibcode, interactive=False, **kwargs): if ads is None: log.error("This action requires the ADS key to be setup.") return q = ads.SearchQuery(q="identifier:{}".format(bibcode), fl=FIELDS) for article in q: # Print useful warnings if bibcode != article.bibcode: log.warning("Requested {} but ADS API returned {}".format(bibcode, article.bibcode)) if 'NONARTICLE' in article.property: # Note: data products are sometimes tagged as NONARTICLE log.warning("{} is not an article.".format(article.bibcode)) if article in self: log.warning("{} is already in the db.".format(article.bibcode)) else: if interactive: self.add_interactively(article) else: self.add(article, **kwargs)
def __init__(self, bibcode: str = None, db_article: ads.search.Article = None, judgement: bool = False): """ Create new publication node :param bibcode: A bibcode to be queried from ADS :param db_article: An Article object to be used instead of querying the ADS """ if db_article: self._article = db_article elif bibcode: _query = ads.SearchQuery(bibcode=bibcode, token=ADS_API_KEY, fl=[ 'bibcode', 'year', 'author', 'title', 'reference', 'citation' ]) self._article = _query.next() self._modularity_id: int = 0 self.judgement = judgement
def __init__(self): p = list( ads.SearchQuery(author="Zingale, M", max_pages=10, fl=[ "id", "bibcode", "citation_count", "author", "pub", "volume", "issue", "page", "year", "title", "property", "authors" ])) self.mypapers = p # hack around a bug whereby some papers might have "None" as the number of cites for paper in self.mypapers: if paper.citation_count is None: paper.citation_count = 0 if paper.property is None: paper.property = [] # do some sorting and splitting self.refereed = [q for q in self.mypapers if "REFEREED" in q.property] self.num = len(self.mypapers)
def _get_all_bibcodes(self, q): limits = {"remaining": "unknown"} with shelve.open(self.cache_file) as cache: if q in cache: result = cache[q] if result["expires"] >= time.time(): return result["bibcodes"], limits else: del cache[q] sort = "bibcode desc" query = ads.SearchQuery(q=q, sort=sort, fl=["bibcode", "title"]) bibcodes = [] while True: query.execute() limits = query.response.get_ratelimits() new_bibcodes = [] for a in query.response.articles: code = a.bibcode self.article_cache[code] = dict(a.items()) new_bibcodes.append(a.bibcode) bibcodes += new_bibcodes if len(new_bibcodes) < 50: break # Check rate limits if int(limits["remaining"]) <= 0: wait = int(limits["reset"]) - time.time() print("Request has been rate limited. " "Resets in {0} minutes".format(wait / 60.0)) time.sleep(wait) with shelve.open(self.cache_file) as cache: cache[q] = dict(expires=time.time() + self.cache_ttl, bibcodes=bibcodes) return bibcodes, limits
def getby_bibcode(bibcode): """ Query an ADS item by bibcode. :param bibcode: bibcode (ADS unique identifier) :return: queried item as Node object """ for i in range(5): try: query = ads.SearchQuery(bibcode=bibcode, fl=[ 'author', 'year', 'title', 'bibcode', 'reference', 'citation' ]) for item in query: new_node = Node(item) if new_node is not None: return new_node else: print('Couldn\'t make node for bibcode {}'.format( bibcode)) except (IndexError, APIResponseError): print('Error occured while querying ADS. Retrying...') continue
def by_keywords(self, keywords): """Query ADS for the publications containing any of a list of keywords. Aliases of the keywords (as determined by ADS) are included in the search. Params: ------- keywords: list of str The list of keywords to search for. Returns: -------- list of Publication The publications containing any of the keywords (or their synonyms). """ publications = dict() for keyword in keywords: print('Searching for ' + keyword) q = 'full:"{keyword}" AND pubdate:{pubdate}'.format( keyword=keyword, pubdate=self.pubdate) query = ads.SearchQuery(q=q, fl=self.fields, fq='database:astronomy') for result in list(query): if result.bibcode not in publications: publications[result.bibcode] = { f: getattr(result, f) for f in self.fields } publications[result.bibcode]['keywords'] = [] publications[result.bibcode]['keywords'].append(keyword) return publications
def query_ads(people): """ Query ads for given list of people for given affiliation """ all_pubs = [] for p in people: query = list( ads.SearchQuery(author=p, fl=PROPERTIES + OTHER_PROPERTIES, rows=MAX_ROWS, max_pages=MAX_PAGES)) for q in query: if not accept_publication(q, people): continue tmp = {p: q.__dict__[p] for p in PROPERTIES} tmp['year'] = int(tmp['year']) tmp['rs_author'] = [a for a in q.author if a in people] tmp['citation_count'] = len( q.citation) if q.citation is not None else 0 tmp['date'] = '-'.join( ['01' if dt == '00' else dt for dt in q.pubdate.split('-')]) all_pubs.append(tmp) return all_pubs
def main(output_path, figure_format, orcid=False, bibcodes=False, query=False, save=False, plot=False, printable=False, test=False, desc=None): # Imports should not be here, but I don't care.... if test: import ads.sandbox as ads else: import ads fl = ['id', 'bibcode'] rows = 2000 max_pages = 1 print('Using rows: {} with max_pages: {}'.format(rows, max_pages)) # See what the user has given to generate the metrics plot if query: sq = ads.SearchQuery(q=query, fl=fl, rows=rows, max_pages=max_pages) sq.execute() bibcodes = [i.bibcode for i in sq.articles] print('You gave a query: {}'.format(query)) print('Found {} bibcodes (e.g., {})'.format(len(bibcodes), bibcodes[0:4])) elif orcid: query = 'orcid:{}'.format(orcid) sq = ads.SearchQuery(q=query, fl=fl, rows=rows, max_pages=max_pages) sq.execute() bibcodes = [i.bibcode for i in sq.articles] print('You gave an ORCiD iD: {}'.format(orcid)) print('Found {} bibcodes (e.g., {})'.format(len(bibcodes), bibcodes[0:4])) elif bibcodes: sq = False print('You gave {} bibcodes: {}'.format(len(bibcodes), bibcodes[0:4])) else: sys.exit() # Collect the metrics from the API mq = ads.MetricsQuery(bibcodes=bibcodes) metrics = mq.execute() if plot: # Number of papers y, t, r = get_numbers_of_papers(metrics) number = dict(name='numbers', year=y, total=t, refereed=r) # Number of citations y, r2r, r2nr, nr2r, nr2nr = get_citations_of_papers(metrics) citation = dict(name='citations', year=y, ref_to_ref=r2r, non_ref_to_ref=nr2r, ref_to_non_ref=r2nr, non_ref_to_non_ref=nr2nr) # Indices y, h, g, tori, i10, read10, i100 = get_indices_of_papers(metrics) index = dict(name='indices', year=y, h=h, g=g, tori=tori, i10=i10, read10=read10, i100=i100) # Number of reads y, t, rr = get_reads_of_papers(metrics) reads = dict(name='reads', year=y, total=t, reads_ref=rr) # Define the figure and the axes fig = plt.figure(0, figsize=(8.27, 11.69)) ax1 = fig.add_subplot(411) ax2 = fig.add_subplot(412) ax3 = fig.add_subplot(413) ax4 = fig.add_subplot(414) # Number of papers step(ax1, number['year'], number['total'] - number['refereed'], label='Not refereed', color='green') step(ax1, number['year'], number['refereed'], label='Refereed', color='blue') ax1.set_ylim([0, max(number['total'])+1]) ax1.set_ylabel('Numer of papers') leg1 = ax1.legend(loc=0) leg1.draw_frame(False) # Number of citations step(ax2, citation['year'], citation['ref_to_ref'], label='Ref. citations to ref. papers', color='blue') step(ax2, citation['year'], citation['ref_to_non_ref'], label='Ref. citations to non ref. papers', color='green') step(ax2, citation['year'], citation['non_ref_to_ref'], label='Non ref. citations to ref. papers', color='gold') step(ax2, citation['year'], citation['non_ref_to_non_ref'], label='Non ref. citations to non ref. papers', color='red') ax2.set_ylabel('Numer of citations') max_citation = max( citation['ref_to_ref'].max(), citation['ref_to_non_ref'].max(), citation['non_ref_to_ref'].max(), citation['non_ref_to_non_ref'].max() ) ax2.set_ylim([0, max_citation+1]) leg2 = ax2.legend(loc=0) leg2.draw_frame(False) # Indices ax3.errorbar(index['year'], index['h'], label='h Index', color='blue', lw=2, ls='-') ax3.errorbar(index['year'], index['g'], label='g Index', color='green', lw=2, ls='-') ax3.errorbar(index['year'], index['i10'], label='i10 Index', color='gold', lw=2, ls='-') ax3.errorbar(index['year'], index['tori'], label='tori Index', color='red', lw=2, ls='-') ax3.errorbar(index['year'], index['i100'], label='i100 Index', color='purple', lw=2, ls='-') ax3.errorbar(index['year'], index['read10'], label='read10 Index', color='darkblue', lw=2, ls='-') max_index = max(h.max(), g.max(), i10.max(), tori.max(), i100.max(), read10.max()) ax3.set_ylim([0, max_index+1]) leg3 = ax3.legend(loc=0, ncol=2) leg3.draw_frame(False) # Number of reads step(ax4, reads['year'], reads['total'] - reads['reads_ref'], label='Non refereed', color='green') step(ax4, reads['year'], reads['reads_ref'], label='Refereed', color='blue') max_reads = max( reads['total'].max(), reads['reads_ref'].max() ) min_year = reads['year'][0] for i in range(len(reads['year'])): if reads['total'][i] > 0 or reads['reads_ref'][i] > 0: break min_year = reads['year'][i] ax4.set_xlim([min_year, reads['year'].max()]) ax4.set_ylim([0, max_reads+1]) ax4.set_xlabel('Year') ax4.set_ylabel('Number of reads') leg4 = ax4.legend(loc=0) leg4.draw_frame(False) figure_path = '{}/metrics.{}'.format(output_path, figure_format) plt.savefig(figure_path) # Save to disk if requested if save == 'csv': for output in [number, citation, index, reads]: with open('{}/{}.{}'.format(output_path, output['name'], save), 'w') as f: keys = [i for i in output.keys() if i != 'name' and i != 'year'] f.write('#year,{}\n'.format(','.join(keys))) for i in range(len(output['year'])): f.write('{year},{other}\n'.format( year=output['year'][i].year, other=','.join([str(output[k][i]) for k in keys]) )) save_metrics(metrics) # Does the user want a printable PDF? if printable: build_latex(metrics, orcid_id=orcid, plot=plot, desc=desc)
def P(c): return min(5,3 + 2*c/12.) num_pubs = len(bibcodes) if (num_pubs < 11): raise ValueError("La lista de publicaciones debe tener al menos 10 articulos") PI_name_length=len(PI_name) l_array=[] p_array=[] N=10 for b in bibcodes: res = ads.SearchQuery(bibcode=b,fl=['year', 'author', 'citation_count', 'title']) for paper in res: print("Processing paper: "+ b) print('Title: "'+unidecode(paper.title[0])+'"') yr = int(paper.year) years = 2016 - yr if (yr < 2011): raise ValueError("No papers prior to 2011 can be used") if years == 0: years = 1 p_i = P(paper.citation_count/years) if (yr == 2016) and (p_i < 4): p_i = 4.0 p_array.append(p_i) author_list_length = len(paper.author) bis = min(author_list_length,5)
import pandas as pd import ads yt_bib = '2011ApJS..192....9T' fields = ['author', 'bibcode', 'pubdate', 'title', 'author_norm'] q = ads.SearchQuery(reference=yt_bib, fl=fields, max_pages=100) l = list(q) data = {_: [] for _ in fields} for r in l: for f in fields: data[f].append(getattr(r, f)) df = pd.DataFrame(data) df.to_json("data/yt_citations.json", orient="records")
for keyword in keywords: print(keyword) publication_data = [] for journal in bibstemlist: # Initiate the dictionary for this journal journal_data = {"name": journal, "articles": [], "total": 0} for year in range(years[0], years[1] + 1): # Perform the query # We actually don't want all the results, we just want the metadata # which tells us how many publications there were q = ads.SearchQuery( q="abstract:(=\"{keyword}\") bibstem:\"{journal}\" year:{year}" .format(keyword=keyword, journal=journal, year=year), fl=['id'], rows=1) q.execute() num = int(q.response.numFound) print( "using {keyword} in abstract {journal} had {num} publications in {year}" .format(keyword=keyword, journal=journal, num=num, year=year)) # Save this data journal_data["articles"].append([year, num]) journal_data["total"] += num
except: print "quering for some name that can't print" papers = authorsPapers(phd.author[0].encode('utf-8'), years='%i-%i' % (int(phd.year) - yearsPrePhD, now.year)) phdDict = article2dict(phd) result = [article2dict(paper) for paper in papers] # Make sure the phd is in there! if phdDict not in result: result.append(phdDict) flags['nonUS'] = False # Need to add a search for similar named phds ack = list( ads.SearchQuery(q='bibstem:"*PhDT" author:"%s"' % authSimple(phd.author[0].encode('utf-8')), database='astronomy')) if len(ack) > 1: titles = set([ paper.title[0].lower() for paper in ack if paper.title is not None ]) if len(titles) > 1: flags['uniqueName'] = False else: flags['uniqueName'] = True else: flags['uniqueName'] = True savefile = phd.bibcode.replace('.', '_') + '.npz' np.savez(os.path.join(outDir, savefile), result=result, flags=flags)
# encoding: utf-8 """ Who are the most cited astronomers? """ __author__ = "Andy Casey <*****@*****.**>" import ads # Let's assume the most cited people have the most cited papers, since we can # only search for papers, not people most_cited_papers = ads.SearchQuery(q='*', sort='citation_count desc', fq='database:astronomy', rows=50, fl=['first_author']) # Who are these successful people, anyways? successful_astronomers = [paper.first_author for paper in most_cited_papers] # Okay, let's get the top 200 most-cited papers for each person and see how # many citations they have in total total_citations = {} for astronomer in successful_astronomers: papers = ads.SearchQuery(first_author=astronomer.encode('utf-8'), sort='citation_count desc', fq='database:astronomy', rows=50, fl=['citation_count']) total_citations[astronomer] = sum( [paper.citation_count for paper in papers]) # Now there's a problem because astronomers publish under "Aaronson, A" and
item = json.loads(filetext, object_pairs_hook=OrderedDict) item = item[list(item.keys())[0]] if 'sources' in item: for source in item['sources']: if 'bibcode' in source: bc = source['bibcode'] if bc not in biblio: tqdm.write(bc) authors = '' if bc in bibauthordict: authors = bibauthordict[bc] allauthors = list(ads.SearchQuery(bibcode=bc)) if allauthors and allauthors[0].author: allauthors = allauthors[0].author else: allauthors = [] biblio[bc] = OrderedDict([('authors', authors), ('allauthors', allauthors), ('bibcode', bc), ('events', []), ('eventdates', []), ('types', []), ('photocount', 0), ('spectracount', 0), ('metacount', 0)]) biblio[bc]['events'].append(item['name']) if 'discoverdate' in item and item['discoverdate']:
# listing with code names arg1 = sys.argv[1] codes = open(arg1).readlines() for code in codes: code = code.strip() if code[0] == '#': continue print("# CODE:", code) # lazy loading (expensive) #q = ads.SearchQuery(full=code, sort=year) # loading with fields ahead of time q = ads.SearchQuery( full=code, fl=['title', 'first_author', 'year', 'citation_count', 'bibcode'], sort='year', rows=10) n1 = 0 for paper in q: print("%s\t%s\t%s\t%s\t%s" % (paper.year, paper.citation_count, paper.bibcode, paper.first_author, paper.title[0])) n1 = n1 + 1 q1 = q.response.get_ratelimits() print('# %d %s\n' % (n1, q1['remaining']))
def add_paper_using_galpy(arxiv_id): # Read current file with open(os.path.join(_PAPERS_FILE_DIR, 'papers-using-galpy.json'), 'r') as jsonFile: papers_data = json.load(jsonFile) duplicate = numpy.any([ papers_data[p]['url'] == 'https://arxiv.org/abs/{}'.format(arxiv_id) for p in papers_data.keys() ]) if duplicate: print("This appears to be a duplicate of an existing entry:") dup_indx = [ papers_data[p]['url'] == 'https://arxiv.org/abs/{}'.format( arxiv_id) for p in papers_data.keys() ].index(True) print( json.dumps(papers_data[list(papers_data.keys())[dup_indx]], indent=4, separators=(',', ': ')).replace('\\n', '\n')) cont = input("Continue? [y/N] ") cont = cont.lower() == 'y' if not cont: print("Okay, aborting then...") sys.exit(-1) # Find paper on ADS if True: ads_paper = list( ads.SearchQuery( arxiv=arxiv_id, fl=['author', 'title', 'year', 'pub', 'volume', 'page']))[0] else: # Mock up class ads_paper_example(): def __init__(self): self.author = ['Qian, Yansong', 'Arshad, Yumna', 'Bovy, Jo'] self.title = ['The structure of accreted stellar streams'] self.year = '2022' self.pub = 'Monthly Notices of the Royal Astronomical Society' self.volume = '511' self.page = ['2339'] ads_paper = ads_paper_example() internal_id = build_internal_id(ads_paper, papers_data) new_entry = build_and_edit_new_entry(ads_paper, internal_id, arxiv_id) print("Adding entry {}".format(arxiv_id)) # Move the screenshot in the right place done = input("""Now please take a screen shot of an example figure and place it in the paper-figs directory. Just take it with the standard Mac Screenshot app and have it be saved to that directory. I'll do the rest! Please press enter when done, any other input will lead me to abort the operation! """) if not done == '': print("Okay, aborting then...") sys.exit(-1) # Find the Screenshot file and move it possible_screenshots = glob.glob( os.path.join(_PAPERS_FILE_DIR, 'paper-figs', 'Screen Shot*')) if len(possible_screenshots) > 1: print("Found multiple possible screen shots... aborting ...") sys.exit(-1) shutil.move( possible_screenshots[0], os.path.join(_PAPERS_FILE_DIR, 'paper-figs', '{}.png'.format(internal_id.lower()))) print("Moved file to {}".format( os.path.join('paper-figs', '{}.png'.format(internal_id.lower())))) num_lines = sum(1 for line in open( os.path.join(_PAPERS_FILE_DIR, 'papers-using-galpy.json'))) with open(os.path.join(_PAPERS_FILE_DIR, 'papers-using-galpy.json'), 'r+') as jsonFile: contents = jsonFile.readlines() pretty_print_new_entry( arxiv_id, internal_id, new_entry, print_func=lambda x: contents.insert(-11, x + '\n')) jsonFile.seek(0) jsonFile.writelines(contents) print("Success!") return None
def process_token(article_identifier, prefs, bibdesk): """ Process a single article token from the user, adding it to BibDesk. Parameters ---------- article_identifier : str Any user-supplied `str` token. prefs : :class:`Preferences` A `Preferences` instance. bibdesk : :class:`BibDesk` A `BibDesk` AppKit hook instance. """ """ print((prefs['default']['ads_token'])) print(article_identifier) """ if 'true' in prefs['options']['alert_sound'].lower(): alert_sound = 'Frog' else: alert_sound = None if 'dev_key' not in prefs['default']['ads_token']: ads.config.token = prefs['default']['ads_token'] ads_query = ads.SearchQuery(identifier=article_identifier, fl=[ 'author', 'first_author', 'bibcode', 'identifier', 'alternate_bibcode', 'id', 'year', 'title', 'abstract' ]) try: ads_articles = list(ads_query) except: logging.info( "API response error, Likely no authorized key is provided!") notify('API response error', 'key:' + prefs['default']['ads_token'], 'Likely no authorized key is provided!', alert_sound=alert_sound) return False if len(ads_articles) != 1: logging.debug( ' Zero or Multiple ADS entries for the article identifiier: {}'. format(article_identifier)) logging.debug('Matching Number: {}'.format(len(ads_articles))) notify('Found Zero or Multiple ADS antries for ', article_identifier, ' No update in BibDesk', alert_sound=alert_sound) logging.info("Found Zero or Multiple ADS antries for {}".format( article_identifier)) logging.info("No update in BibDesk") return False ads_article = ads_articles[0] use_bibtexabs = False # use "bibtex" by default # another option could be "bibtexabs": # https://github.com/andycasey/ads/pull/109 # however, a change in ads() is required and the abstract field from the "bibtexabs" option doesn't # always comply with the tex syntax. if use_bibtexabs == True: ads_bibtex = ads.ExportQuery(bibcodes=ads_article.bibcode, format='bibtexabs').execute() else: ads_bibtex = ads.ExportQuery(bibcodes=ads_article.bibcode, format='bibtex').execute() logging.debug("process_token: >>>API limits") logging.debug("process_token: {}".format( ads_query.response.get_ratelimits())) logging.debug("process_token: >>>ads_bibtex") logging.debug("process_token: {}".format(ads_bibtex)) for k, v in ads_article.items(): logging.debug('process_token: >>>{}'.format(k)) logging.debug('process_token: {}'.format(v)) article_bibcode = ads_article.bibcode gateway_url = 'https://' + prefs['default']['ads_mirror'] + '/link_gateway' # https://ui.adsabs.harvard.edu/link_gateway by default if 'true' in prefs['options']['download_pdf'].lower(): pdf_filename, pdf_status = process_pdf(article_bibcode, prefs=prefs, gateway_url=gateway_url) else: pdf_filename = '.null' kept_pdfs = [] kept_fields = {} kept_groups = [] found = difflib.get_close_matches(ads_article.title[0], bibdesk.titles, n=1, cutoff=.7) # first author is the same if len(found) > 0: if found and difflib.SequenceMatcher( None, bibdesk.authors(bibdesk.pid(found[0]))[0], ads_article.author[0]).ratio() > .6: # further comparison on abstract abstract = bibdesk('abstract', bibdesk.pid(found[0])).stringValue() if not abstract or difflib.SequenceMatcher( None, abstract, ads_article.abstract).ratio() > .6: pid = bibdesk.pid(found[0]) kept_groups = bibdesk.get_groups(pid) # keep all fields for later comparison # (especially rating + read bool) kept_fields = dict((k, v) for k, v in zip( bibdesk('return name of fields', pid, True), bibdesk('return value of fields', pid, True)) # Adscomment may be arXiv only if k != 'Adscomment') # plus BibDesk annotation kept_fields['BibDeskAnnotation'] = bibdesk( 'return its note', pid).stringValue() kept_pdfs += bibdesk.safe_delete(pid) notify('Duplicate publication removed', article_identifier, ads_article.title[0], alert_sound=alert_sound) logging.info('Duplicate publication removed:') logging.info(article_identifier) logging.info(ads_article.title[0]) bibdesk.refresh() # add new entry ads_bibtex_clean = ads_bibtex.replace('\\', r'\\').replace('"', r'\"') pub = bibdesk(f'import from "{ads_bibtex_clean}"') # pub id pub = pub.descriptorAtIndex_(1).descriptorAtIndex_(3).stringValue() # automatic cite key bibdesk('set cite key to generated cite key', pub) # abstract if ads_article.abstract is not None: ads_abstract_clean = ads_article.abstract.replace('\\', r'\\').replace( '"', r'\"').replace('}', ' ').replace('{', ' ') bibdesk(f'set abstract to "{ads_abstract_clean}"', pub) doi = bibdesk('value of field "doi"', pub).stringValue() if pdf_filename.endswith('.pdf') and pdf_status: # register PDF into BibDesk bibdesk( f'add POSIX file "{pdf_filename}" to beginning of linked files', pub) # automatic file name bibdesk('auto file', pub) elif 'http' in pdf_filename and not doi: # URL for electronic version - only add it if no DOI link present # (they are very probably the same) bibdesk( f'make new linked URL at end of linked URLs with data "{pdf_filename}"', pub) # add URLs as linked URL if not there yet urls = bibdesk('value of fields whose name ends with "url"', pub, strlist=True) if 'arxiv' in article_bibcode.lower(): article_gateway = get_article_gateway(article_bibcode, gateway_url=gateway_url) urls += [article_gateway['eprint_html']] urlspub = bibdesk('linked URLs', pub, strlist=True) for u in [u for u in urls if u not in urlspub]: bibdesk(f'make new linked URL at end of linked URLs with data "{u}"', pub) # add old annotated files for kept_pdf in kept_pdfs: bibdesk(f'add POSIX file "{kept_pdf}" to end of linked files', pub) # re-insert custom fields bibdesk_annotation = kept_fields.pop("BibDeskAnnotation", '') bibdesk(f'set its note to "{bibdesk_annotation}"', pub) newFields = bibdesk('return name of fields', pub, True) for k, v in list(kept_fields.items()): if k not in newFields: bibdesk(f'set value of field "{(k, v)}" to "{pub}"') notify('New publication added', bibdesk('cite key', pub).stringValue(), ads_article.title[0], alert_sound=alert_sound) logging.info('New publication added:') logging.info(bibdesk('cite key', pub).stringValue()) logging.info(ads_article.title[0]) # add back the static groups assignment if kept_groups != []: new_groups = bibdesk.add_groups(pub, kept_groups) return True
from networkx.readwrite.gexf import write_gexf CONSTANTS.titles.remove( *CONSTANTS.titles) # This data set of names contains no titles SOLAR_ASTROPHYSICS_QUERY = ( 'keyword:"Astrophysics - Solar and Stellar Astrophysics" ' 'title:("solar" OR "sun" OR "helio" OR "cme" OR "corona")' # 'year:2020-2021' ) if __name__ == "__main__": # Make sure environment variable ADS_DEV_KEY is defined solar_papers = ads.SearchQuery( q=SOLAR_ASTROPHYSICS_QUERY, fl=["author"], max_pages=1000, ) solar_coauthorship = nx.DiGraph() for paper in solar_papers: paper_authors = list(paper.author) if len(paper_authors) > 1: for second_author in paper_authors[1:]: solar_coauthorship.add_edge(paper_authors[0], second_author) else: solar_coauthorship.add_node(paper_authors[0]) # Merge duplicate author names authors = [(author, HumanName(author)) for author in solar_coauthorship.nodes] lnfi = defaultdict(list) # Last name, first initial for author, parsed in authors: if len(parsed.first) > 0:
outfile = sys.argv[2] else: infile = 'ads_refs.tsv' outfile = 'ads_refs.dat' # Read the infile ads_data = {} with open(infile, 'r') as f: reader = csv.reader(f, delimiter='\t') next(reader, None) for line in reader: ref = line[0] bibcode = line[1] ads_data[ref] = bibcode # Replace the bibcodes with the ADS articles for key, value in ads_data.items(): print('Processing key:', key) ads_data[key] = list(ads.SearchQuery(bibcode=value))[0] # Write the data with open(outfile, 'wb') as f: pickle.dump(ads_data, f)
ads.config.token = get_dev_key() with open('cv.bib', 'r') as fh: bib_database = bibtexparser.load(fh, parser=parser) firstauthor_entries = [] nonfirstauthor_entries = [] total_cites = 0 total_firstauthor_cites = 0 for entry in bib_database.entries: if 'doi' in entry: paper = ads.SearchQuery( doi=entry['doi'], fl=['citation_count', 'author', 'year', 'id', 'bibcode']) pfx = "Loaded from doi {0}".format(entry['doi']) elif 'adsurl' in entry: adsurl = entry['adsurl'].split("/")[-1].replace( "%26", "&") if 'adsurl' in entry else None paper = ads.SearchQuery( bibcode=adsurl, fl=['citation_count', 'author', 'year', 'id', 'bibcode']) pfx = "Loaded from adsurl {0}".format(adsurl) else: print("Skipped {0} because it has no DOI or ADSURL".format( entry['title'])) continue paper.execute()