def process(document): scholar = ScholarQuerier() query = SearchScholarQuery() # save cookie at first paper global save_cookie if save_cookie: query.set_phrase("quantum theory") scholar.send_query(query) scholar.save_cookies() save_cookie = False query.set_phrase(document.title) scholar.send_query(query) scholar_articles = scholar.articles if len(scholar_articles) == 0: return None title_match_ratio = \ difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio() if title_match_ratio < min_title_match_ratio: return None old_tags = document.tags citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations']) new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)]) new_tags.append(str(scholar_articles[0]['num_citations'])) document.update(tags=new_tags) return scholar_articles[0]['num_citations']
def papers_by_query_api(request): if request.method == 'GET': phrase = request.GET.get('phrase', '') if not phrase: return HttpResponseBadRequest() query = SearchScholarQuery() query.set_phrase(phrase) querier = ScholarQuerier() querier.send_query(query) papers = querier.articles if not papers: result = { 'papers': [{ 'title': '', 'id': 0, 'url': '', 'excerpt': '' }] } else: result = { 'papers': [{ 'title': papers[0]['title'], 'id': papers[0]['cluster_id'], 'url': papers[0]['url'], 'excerpt': papers[0]['excerpt'] }] } return JsonResponse(result) else: return HttpResponseBadRequest()
def setCitationByTitle(paper_title): query = SearchScholarQuery() query.set_author("Si Chen") query.set_phrase(paper_title) query.set_num_page_results(1) #querier.send_query(query) #citations = 0 return query
def getPublications(author): print author querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() query.set_author(author) querier.send_query(query) #scholar.csv(querier) scholar.txt(querier, with_globals=False)
def getRelatedPublications(author): print author settings = ScholarSettings() #adjust scholar settings querier = ScholarQuerier( ) #Instance of ScholarQuerier() conducts a search on Google Scholar querier.apply_settings( settings ) #applies settings as provided by the instance of ScholarSettings() query = SearchScholarQuery() query.set_author(author) querier.send_query(query) print querier.articles
def literature_search(query_terms, type='full_name'): """ perform a google scholar query with given terms """ querier = ScholarQuerier() settings = ScholarSettings() config = ScholarConf() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() papers = [] for item in query_terms.values: repo_id = item[0] if type !='full_name': repo_name = item[1] phrase = item[2] keywords = item[3] start_year = item[4] if keywords: if ',' not in keywords: keywords = keywords + ',' query.set_words_some(keywords) query.set_words(repo_name) query.set_phrase(phrase) phrase_text = repo_name + ', ' + phrase else: phrase = item[1] start_year = item[2] query.set_phrase(phrase) # commontk/CTK, meoyo/AIPS phrase_text = phrase print('search papers for {} ...'.format(phrase_text)) query.set_timeframe(start_year) querier.send_query(query) articles = querier.articles if len(articles)==0: continue results = process_arts(config, item[0], phrase_text, articles) papers = papers + results time_delay = random.randrange(1,10) time.sleep(time_delay) return papers
def search(bot, update, args): search_command = ' '.join(args) bot.send_message(chat_id=update.message.chat_id, text="You searched for: " + search_command) querier = ScholarQuerier() query = SearchScholarQuery() query.set_words(args) querier.send_query(query) articles = querier.articles message = "" bot.send_message(chat_id=update.message.chat_id, text="Number of results: " + str(len(articles))) index = 0 for article in articles: bot.send_message(chat_id=update.message.chat_id, text=str(index+1)+". " + article.attrs['title'][0])
def getPublications_Title(title): querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() publications = [] query.set_words(title) querier.send_query(query) related_list = scholar.json(querier) if related_list: print "No of related publications found : ", print len(related_list) for item in related_list: #print item.keys() #item["relatedTitle"] = title[0] publications.append(item) #time.sleep(random.randrange(10, 40, 2)); #time.sleep(60); return publications
def query_scholar_for_papers(author, searchstring): querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(settings.CITFORM_BIBTEX) settings.set_per_page_results(5) querier.apply_settings(settings) query = SearchScholarQuery() query.set_author(author) query.set_phrase(searchstring) querier.send_query(query) return_str = '' if len(querier.articles) > 0: return_str += querier.articles[0].as_citation() + '\n' else: return_str = 'Ooopsie. No results. Maybe we ran over the request limit?' return return_str
def process(document): scholar = ScholarQuerier() query = SearchScholarQuery() query.set_phrase(document.title) scholar.send_query(query) scholar_articles = scholar.articles if len(scholar_articles) == 0: return None title_match_ratio = \ difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio() if title_match_ratio < min_title_match_ratio: return None old_tags = document.tags citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations']) new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)]) document.update(tags=new_tags) return scholar_articles[0]['num_citations']
def blocked(): print "Test if blocked...." #time.sleep(random.randrange(10, 40, 2)); time.sleep(60) publications = [] querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() query.set_author("Ryan Baker") querier.send_query(query) related_list = scholar.json(querier) if related_list: print "Block Test : No of related publications found : ", print len(related_list) for item in related_list: publications.append(item) if len(publications) == 0: return True else: return False
def getPublications(authors): print authors querier = ScholarQuerier() settings = ScholarSettings() querier.apply_settings(settings) query = SearchScholarQuery() publications = [] for author in authors: if len(author) > 0: print "Using Author : ", print author query.set_author(author) querier.send_query(query) related_list = scholar.json(querier) if related_list: print "No of related publications found : ", print len(related_list) for item in related_list: #print item.keys() #item["relatedAuthor"] = author publications.append(item) #time.sleep(random.randrange(10, 40, 2)); time.sleep(20) return publications
def get_url(querier, phrase): # Setup query query = SearchScholarQuery() # Query title / phrase query.set_phrase(phrase) # Set title search only query.set_scope(True) # Result count query.set_num_page_results(1) try: # Send query querier.send_query(query) articles = querier.articles for art in articles: url, _, _ = art.attrs['url'] url_pdf, _, _ = art.attrs['url_pdf'] return url except: return None
from scholar import ScholarQuerier, ScholarSettings, SearchScholarQuery DST = Path(__file__).absolute().parents[1] / 'publications.bib' IGNORE = """ vo2014cytotoxicity takeilnatriureticpeptideisolatedfromeelbrain matchintemporal brodbeck2018transformation """.split() ACRONYMS = ['EEG', 'MEG', 'MRI'] querier = ScholarQuerier() settings = ScholarSettings() settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX) querier.apply_settings(settings) query = SearchScholarQuery() query.set_phrase("eelbrain") query.set_timeframe(2012, None) query.set_include_patents(False) bib = parse_file(DST, 'bibtex') start = 0 while True: querier.send_query(query) if len(querier.articles) == 0: break # extract articles for article in querier.articles: querier.get_citation_data(article) # convert to pybtex entry data = parse_bytes(article.citation_data, 'bibtex')