def get_gs_citations_web(title): """ Use the google scholar web URL and requests API to obtain the citations for a given title of a scholarly article Parameters ---------- arg1 | title: str The title of a scholarly article Returns ------- Dictionary dict """ while True: try: # call the lumproxy object scholarly.use_lum_proxy() # make the query query = scholarly.search_pubs(title) # come out break except Exception as e: # come out and try again break # return the response dict return next(query)
def query_generator(search_term, venue, year, num_results, pg, grace_period=0): google_search_query = search_term query_year = year if venue == "CoRL": if year == 2019: query_year = 2020 elif year == 2020: query_year = 2021 while True: try: search_query = scholarly.search_pubs( google_search_query, year_low=query_year, year_high=query_year + grace_period, patents=False, ) results = list(itertools.islice(search_query, num_results)) print("Search URL: ", search_query._url) return results, search_query._url except Exception as e: print("Trying different proxy!") pg.get_next_proxy()
def publicationDetails(p): # p is the passed publication name search_query = scholarly.search_pubs(p) pd = next(search_query).fill() print(pd.bibtex) timer.sleep(5)
def get(self, queries): publications = [] with Controller.from_port(port=self.port) as controller: controller.authenticate('scholarly_password') socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, "127.0.0.1", 9050) socket.socket = socks.socksocket for query in queries: found = False limit = 1 while not found: try: response = scholarly.search_pubs(query) found = True except Exception as e: while True: if controller.is_newnym_available(): print("Refreshing Tor Node...") controller.signal(Signal.NEWNYM) break elem = 1 count = 0 while (elem is not None) and (count < limit): elem = next(response, None) info = elem.bib # pub = Publication(info) print(type(info)) publications.append(info) count += 1 return publications
def _get_pdf(k, title): # Skip if exists if os.path.isdir(out_dir(k)) and os.listdir(out_dir(k)): return os.path.join(out_dir(k), os.listdir(out_dir(k))[0]) # Normalize title google_search = scholarly.search_pubs(title) google_result = next(google_search) print(title) title = google_result['bib']['title'] + ' ' + (' '.join( google_result['bib']['author'])) print(title) # Get DOI try: found, bib_string = get_bib_from_title(title) except Exception as e: print("Error while getting DOI", e) return None # Download if found: bib = bibtexparser.loads(bib_string).entries if bib and ("doi" in bib[0]) and (bib[0]['ENTRYTYPE'] == 'article'): doi = bib[0]["doi"] try: SciHub(doi, out_dir(k)).download(choose_scihub_url_index=3) except Exception as e: print("Error while downloading", e) return None pdf = os.path.join(out_dir(k), os.listdir(out_dir(k))[0]) if os.listdir( out_dir(k)) else None return pdf else: print(bib) print("\tAbsent DOI") return None
def search(self, query: str, generic_cover: str = "", locale: str = "en") -> Optional[List[MetaRecord]]: val = list() if self.active: title_tokens = list( self.get_title_tokens(query, strip_joiners=False)) if title_tokens: tokens = [quote(t.encode("utf-8")) for t in title_tokens] query = " ".join(tokens) try: scholarly.set_timeout(20) scholarly.set_retries(2) scholar_gen = itertools.islice(scholarly.search_pubs(query), 10) except Exception as e: log.warning(e) return None for result in scholar_gen: match = self._parse_search_result(result=result, generic_cover="", locale=locale) val.append(match) return val
def scholarlyBookAuthor(): # try: query = request.form['query'] search_query = scholarly.search_author(query) string = '[' author = next(search_query).fill() for pub in author.publications: search_book = scholarly.search_pubs(pub.bib['title']) book = next(search_book) # print (book) url = '' try: url = book['blb'] except: url = '' print(url) # print (basename(url)) # for i in range(5): # try: # author = next(search_query) # string += str(author) + "," # except: # print('') # # if (len(string) > 0): # string = string[:-1] # string=string+"]" return (str(string))
def search(self, query, generic_cover=""): val = list() if self.active: scholar_gen = scholarly.search_pubs(' '.join(query.split('+'))) i = 0 for publication in scholar_gen: v = dict() v['id'] = publication['url_scholarbib'].split(':')[1] v['title'] = publication['bib'].get('title') v['authors'] = publication['bib'].get('author', []) v['description'] = publication['bib'].get('abstract', "") v['publisher'] = publication['bib'].get('venue', "") if publication['bib'].get('pub_year'): v['publishedDate'] = publication['bib'].get( 'pub_year') + "-01-01" else: v['publishedDate'] = "" v['tags'] = [] v['rating'] = 0 v['series'] = "" v['cover'] = "" v['url'] = publication.get('pub_url') or publication.get( 'eprint_url') or "", v['source'] = { "id": self.__id__, "description": "Google Scholar", "link": "https://scholar.google.com/" } val.append(v) i += 1 if (i >= 10): break return val
def make_query(topics): """Queries Google scholar and returns the first new research paper found according to topics""" queryString = generate_query_string(topics) for topic in topics: queryString += topic + ' ' logsDir = get_log_path() queryListPath = os.path.join(logsDir, 'query-list.pickle') # If any queries made previously, load that list, else create a new empty list try: with open(queryListPath, 'rb') as f: prevQueries = pickle.load(f) except FileNotFoundError: prevQueries = [] searchQuery = scholarly.search_pubs(queryString) pub = first_unique_query(prevQueries, searchQuery) prevQueries.append(pub.bib['title']) with open(queryListPath, 'wb+') as f: pickle.dump(prevQueries, f) return pub
def get_citations_from_title(title: str) -> int: """ Args: title (str): Title of paper to be searched on Scholar. Raises: TypeError: If sth else than str is passed. Returns: int: Number of citations of paper. """ if not isinstance(title, str): raise TypeError(f"Pass str not {type(title)}") # Search for exact match title = '"' + title.strip() + '"' matches = scholarly.search_pubs(title) counts = list(map(lambda p: int(p.bib["cites"]), matches)) if len(counts) == 0: logger.warning(f"Found no match for {title}.") return 0 if len(counts) > 1: logger.warning(f"Found {len(counts)} matches for {title}.") return counts[0]
def get_papers_from_paper_citations(paper_title: str): """ gets the papers that cited the paper given as a parameter it registers the found papers in articles folder and registres the citation relationship in the citations folder """ target_paper_generator = scholarly.search_pubs( paper_title) # search by title as a keyword print("=======> getting the rarget pater") target_paper = next(target_paper_generator) # get the first result print('##########################') publications_generator = scholarly.citedby(target_paper) try: citations_count= 0 while citations_count<=NB_MAX_CITATIONS_PER_PAPERS: publication = next(publications_generator) # filled_publication = scholarly.fill(publication) mydict = publication_to_dict(publication) write_publication(mydict, PUBLICATIONS_CSV_FILE_OUTPUT) register_citation( target_paper['citedby_url'], mydict['citedby_url']) citations_count+=1 except Exception as e: raise e
def test_search_pubs_filling_publication_contents(self): ''' This process checks the process of filling a publication that is derived from the search publication snippets. ''' query = 'Creating correct blur and its effect on accommodation' results = scholarly.search_pubs(query) pubs = [p for p in results] self.assertGreaterEqual(len(pubs), 1) f = pubs[0].fill() self.assertTrue( f.bib['author'] == u'Cholewiak, Steven A and Love, Gordon D and Banks, Martin S') self.assertTrue(f.bib['journal'] == u'Journal of vision') self.assertTrue(f.bib['number'] == u'9') self.assertTrue(f.bib['pages'] == u'1--1') self.assertTrue( f.bib['publisher'] == u'The Association for Research in Vision and Ophthalmology') self.assertTrue( f.bib['title'] == u'Creating correct blur and its effect on accommodation') self.assertTrue( f.bib['url'] == u'https://jov.arvojournals.org/article.aspx?articleid=2701817') self.assertTrue(f.bib['volume'] == u'18') self.assertTrue(f.bib['year'] == u'2018')
def get_bibtex_for_pubs(pubs: str) -> str: """Returns bibtex""" search_query = scholarly.search_pubs(pubs) for result in search_query: if query_bib_title(result["bib"]): return scholarly.bibtex(result) raise NotFoundError(f"Can't find {pubs}")
def search_paper(title: str, feel_lucky: bool = True): """ Search paper through google scholar, return scholarly publication container """ title = get_accurate_name_from_arxiv(title) pub = next(scholarly.search_pubs(title)) if not feel_lucky: raise NotImplementedError return pub
def test_search_pubs_total_results(self): """ As of February 4, 2021 there are 32 pubs that fit the search term: ["naive physics" stability "3d shape"]. Check that the total results for that search term equals 32. """ pubs = scholarly.search_pubs('"naive physics" stability "3d shape"') self.assertGreaterEqual(pubs.total_results, 32)
def search_paper(): from scholarly import scholarly paper_name = request.POST.get('paper_name') paper = next(scholarly.search_pubs(paper_name)) res = { 'url': paper.bib['url'], 'venue': paper.bib['venue'], 'abstract': paper.bib['abstract'] } return jsonify(res)
def search_paper(request): paper_name = request.POST['paper_name'] # 文献名 #paper_name = 'Li Buyu' paper = next(scholarly.search_pubs(paper_name)) content = { 'url': paper.bib['url'], 'venue': paper.bib['venue'], 'abstract': paper.bib['abstract'] } return HttpResponse(json.dumps(content), content_type="application/json")
def proxied_search_query(query): while True: try: search_query = scholarly.search_pubs(query) print("Got the results of the query") return search_query except Exception as e: print(e) print("Trying new proxy") set_new_proxy()
def do_search(search_string): global publications_found, current_pub, search_query publications_found = [] if http_proxy or https_proxy: print("\n--Using HTTP proxy: " + http_proxy) print("--Using HTTPS proxy: " + https_proxy) set_proxy() print("\nStarting Google Scholar search.") print("--Using search string: \n" + search_string) try: search_query = scholarly.search_pubs(search_string) except Exception: print("\nCannot fetch the page from Google Scholar.") print( "You may have been blocked by Google Scholar, please check your internet connection." ) sys.exit() # Iterate through retrieved publications end = False order = 1 while not end: pub = next(search_query, None) current_pub = {} if pub: current_pub['ORDER'] = order current_pub['LIBRARY'] = current_lib current_pub['YEAR'] = pub.bib['year'] current_pub['CITATIONS'] = pub.bib['cites'] current_pub['URL'] = pub.bib['url'] current_pub['TITLE'] = pub.bib['title'] if 'abstract' in pub.bib: current_pub['ABSTRACT'] = pub.bib['abstract'] else: current_pub['ABSTRACT'] = 'NA' publications_found.append(current_pub) order += 1 else: end = True print('\n{} publications found'.format(len(publications_found))) header = [ 'ORDER', 'LIBRARY', 'YEAR', 'CITATIONS', 'URL', 'TITLE', 'ABSTRACT' ] csv_filename = 'raw-' + current_lib + '-' + str( filters.get_start_year()) + '-' + str( filters.get_final_year()) + '.csv' write_result(csv_filename, publications_found, header) logging.shutdown() # stop scholar.log logging
def proxy(self): proxy_works = scholarly.use_proxy( http= "http://29ea0d9d66134811b51ead72601a1181:@proxy.crawlera.com:8010/" ) print(proxy_works) test_query = scholarly.search_pubs( 'Perception of physical stability and center of mass of 3D objects' ) print(test_query)
def scholarly_request(search_string: str) -> Dict: '''This function takes a search keyword string and request information about the corresponding article via scholarly''' # Get all available information search_query = scholarly.search_pubs(search_string) article_info = next(search_query) scholarly.fill(article_info) article_dict = article_info['bib'] article_dict = normalize_scholarly_dict(article_dict) article_dict = add_retrieval_information(article_dict, 'Scholarly', 'unstructured_ID', search_string) return article_dict
def results(request): if request.method == "POST": search_word = request.POST['search'] searchquery = scholarly.search_pubs(search_word) data = next(searchquery) # print(data.bib['url']) title = data.bib['title'] author = data.bib['author'] url = data.bib['url'] return render(request, "homepage.html", {'title': title, 'url': url, 'author': author})
def test_multiple_publications(self): """ As of May 12, 2020 there are at least 29 pubs that fit the search term: ["naive physics" stability "3d shape"]. Check that the paper "Visual perception of the physical stability of asymmetric three-dimensional objects" is among them """ pubs = [p.bib['title'] for p in scholarly.search_pubs('"naive physics" stability "3d shape"')] self.assertGreaterEqual(len(pubs), 29) self.assertIn(u'Visual perception of the physical stability of asymmetric three-dimensional objects', pubs)
def search_GoogleScholar(query_string, n=20): print(f"Search on Google Scholar: [{query_string}]\n") ranks = scholarly.search_pubs(query_string) pubs = [] for pub in ranks: if len(pubs) == n: return pubs pubs.append(pub) print(f"Warning: {len(pubs)} matched publications in total.\n") return pubs
def pub_query_by_author(author): search_query = scholarly.search_pubs(author) pubs = [] for i in range(20): try: pub = next(search_query) print(pub) pubs.append(pub) except: # print("End of the iterator") break return render_template('pub_results.html', title='文献查询结果', pubs=pubs, au=author)
def get_research_articles(self, max_num): # Search string for Google Scholar to look for. # e.g. "{self.title} {self.director.name}" would equate to "Concussion Peter Landesman" for the movie Concussion. search_str = f'{self.title} {self.director.name}' output = f"" try: pg = ProxyGenerator() ip = os.environ['PROXY_IP'] pg.SingleProxy(http=ip, https=ip) o = scholarly.use_proxy(pg) search_query = scholarly.search_pubs(search_str) for i in range(0, max_num): curr = next(search_query) # For debugging purposes, this is how you pretty print the search query's contents. #scholarly.pprint(curr) # Grab the title of the article. title = curr['bib']['title'] # Begin our formatted html output for each found research article. output += f""" <li> """ # See if a publication url (i.e. curr['pub_url']) exists. If so, add an external link to it. if 'pub_url' in curr: output += f""" <a target='_blank' href=\"{curr['pub_url']}\">{title}</a> """ else: output += f""" {title} """ output += f""" <br> """ # Writes the abstract (i.e.curr['bib']['abstract']) if it exists. if 'bib' in curr and 'abstract' in curr['bib']: output += f""" <p>{curr['bib']['abstract']}</p> """ output += f""" </li> """ except Exception as e: pass # Useful for seeing errors in your terminal. Replace pass with the print statement below. #print(sys.stderr, e) return output
def search_articles(query, n=5): ''' recherche d articles ''' search_query = scholarly.search_pubs(query) tab_doc = [] for i in range(0, n): try: doc = next(search_query) tab_doc.append(doc) except: doc = None return tab_doc return tab_doc
def test_get_cited_by(self): """ Testing that when we retrieve the list of publications that cite a publication, the number of citing publication is the same as the number of papers that are returned """ query = 'frequency-domain analysis of haptic gratings cholewiak' pubs = [p for p in scholarly.search_pubs(query)] self.assertGreaterEqual(len(pubs), 1) filled = pubs[0].fill() cites = [c for c in filled.citedby] self.assertEqual(str(len(cites)), filled.bib['cites'])
def search(self, query, n=5): ''' recherche d articles ''' search_query = scholarly.search_pubs(query) pubs = [] for i in range(n): try: pub = next(search_query) pubs.append(Publication.from_scholar(pub)) except Exception as e: print(f"Stopped because of {e}") return pubs return pubs
def get_articleInfo(title): while True: try: search_query = scholarly.search_pubs(title) print("Got the results of the query") break except Exception as e: print("Trying new proxy") set_new_proxy() pub = next(search_query) return pub