Exemplo n.º 1
0
	def query_scholarly(self, author = None, keyword = None, pub = None):
		import scholarly;
		
		RESULTS = {}
		if author is not None and keyword is not None and pub is not None:
			RESULTS['author'] =	scholarly.search_author(author);
			RESULTS['keyword'] = scholarly.search_keyword(keyword);
			RESULTS['pub'] = scholarly.search_pubs_query(pub);
			return RESULTS;
		elif author is not None:
			return scholarly.search_author(author);
		elif keyword is not None:
			return scholarly.search_keyword(keyword);
		elif pub is not None:
			return scholarly.search_pubs_query(pub);
def publication_information(pub):
    p = ""
    c = 0
    dat = ""
    publisher = ""
    search_query = scholarly.search_pubs_query(pub)
    #    try:
    publication = next(search_query).fill()

    if re.search(publication.bib["ENTRYTYPE"], "article"):
        try:
            dat = publication.bib["year"]
        except:
            dat = ""
        print(dat)
        try:
            p = publication.bib["journal"]
        except:
            p = ""
        print(p)
        try:
            c = publication.citedby
        except:
            c = 0
        print(c)
        try:
            publisher = publication.bib["publisher"]
        except:
            publisher = 0
        print(publisher)

#    except:
#        pass
    return p, c, dat, publisher
Exemplo n.º 3
0
def scholarly_query(authors, title):
    """
    Query Google Scholar database.

    Args:
        authors (list): a list of strings for up the first authors last names.
        title (str): the title of the article.

    Returns:
        A record (dict) of the bibtex entry obtained from Google Scholar.
    """
    query = ' '.join(authors) + ' ' + title
    search_query = scholarly.search_pubs_query(query)
    try:
        res = next(search_query)
    except StopIteration:
        return None
    res.fill()
    if 'abstract' in res.bib:
        del res.bib['abstract']

    # Post-process title
    res.bib['title'] = re.sub('\\.*$', '', res.bib['title'])

    print('S: ' + nomenclature.gen_filename(res.bib))
    return res.bib
Exemplo n.º 4
0
    def query_scholar(self):
        """
        QUERT ARTICLES FROM GOOGLE SCHOLAR

        Parameters:
        ------
        None
        

        Function scrap Google Scholar and push articles in MongoDB
        
        
        Return
        ------
        None
        
        """

        tab = self.get_db_object()
        indice = 0

        while indice < len(tab):
            if 'ngc' in tab[indice]:
                search_query = scholarly.search_pubs_query(tab[indice]['ngc'])
                for i in range(500):
                    current_article = next(search_query)
                    current_article = current_article.__dict__
                    current_article["biblio"] = current_article.pop('bib')
                    current_article["ngc"] = tab[indice]["ngc"]
                    current_article["_object_id"] = tab[indice]["_id"]
                    print(current_article)
                    articles.insert(current_article)
                indice += 1
            indice += 1
Exemplo n.º 5
0
def get_all_studies(keywords: List[str]) -> List[Study]:
    retval = []
    shuffle(keywords)
    shuffled = ' '.join(keywords)
    print(shuffled)
    query = scholarly.search_pubs_query(shuffled)
    #
    # for i in range(10):
    #     try:
    #         res = next(query)
    #         retval.append(Study(res.bib['title'], res.bib['url'], res.bib['author']))
    #     except KeyError:
    #         retval.append(Study(res.bib['title'], "", res.bib['author']))
    #     except StopIteration:
    #         pass

    while True:
        try:
            res = next(query)
            retval.append(Study(res.bib['title'], res.bib['url'], res.bib['author']))
        except KeyError:
            retval.append(Study(res.bib['title'], "", res.bib['author']))
        except StopIteration:
            break

    return retval
def rename_bib_file(citeDir, filename):
    my_bucket, s3_resource = aws_bucket_info()
    bibtex_file = my_bucket.Object(filename).get()['Body'].read().decode(
        'utf-8')
    parser = BibTexParser()
    parser.customization = convert_to_unicode
    bibtex_database = bibtexparser.loads(bibtex_file, parser=parser)
    entries = bibtex_database.entries[0]
    df = read_bib_df(citation_pickle_file=citation_pickle_file)
    if entries['ID'] not in df['ID'].values:
        search_query = scholarly.search_pubs_query(entries['title'])
        res = next(search_query)
        strings_to_look = ['url', 'abstract']
        search_output = []
        for ss in strings_to_look:
            try:
                search_output.append(res.bib[ss])
            except:
                search_output.append("")
        dict_to_store = {
            'ID': entries['ID'],
            'numLikes': 0,
            'Title': entries['title'],
            'Authors': entries['author'],
            'url': search_output[0],
            'Abstract': search_output[1]
        }
        store_bib_in_df(dict_to_store,
                        citation_pickle_file=citation_pickle_file)
        new_bibfile = citeDir + entries['ID'] + "_slrm.bib"
        return new_bibfile
    else:
        return ""
Exemplo n.º 7
0
def get_cite_count_by_doi(doi):
    query = scholarly.search_pubs_query(doi)
    try:
        result = next(query).fill()
        return (result.citedby)  # Return citedby count
    except:
        return (-1)  # Couldn't find doi via scholar
Exemplo n.º 8
0
    def search_scholar(self, query, n_results):
        """ Returns a list containing the n_results most relevant
        Publications to the keywords.

        Parameters
        ----------
        query: string
            Query to search on Google Scholar.
        n_results: int
            Number of articles to return.

        Returns
        -------
        results: list(Publications)
            List containing Publication objects.
        """
        generator_results = scholarly.search_pubs_query(query)
        self.results_scholar = [next(generator_results)
                                for _ in range(n_results)]
        # time.sleep(1)
        # for _ in range(n_results):
        #     res = next(generator_results).fill()
        #     time.sleep(1)
        #     self.results_scholar.append(res)
        return self.results_scholar
Exemplo n.º 9
0
def scholarly_query(authors, title):
    """
    Query Google Scholar database.

    Args:
        authors (list): a list of strings for up the first authors last names.
        title (str): the title of the article.

    Returns:
        A record (dict) of the bibtex entry obtained from Google Scholar.
    """
    query = ' '.join(authors) + ' ' + title
    search_query = scholarly.search_pubs_query(query)
    try:
        res = next(search_query)
    except StopIteration:
        return None
    res.fill()
    if 'abstract' in res.bib:
        del res.bib['abstract']

    # Post-process title
    res.bib['title'] = re.sub('\\.*$', '', res.bib['title'])

    print('S: ' + nomenclature.gen_filename(res.bib))
    return res.bib
Exemplo n.º 10
0
def fetch_bibtex_by_fulltext_scholar(txt, assess_results=True):
    import scholarly
    scholarly._get_page = _get_page_fast  # remove waiting time
    logger.debug(txt)
    search_query = scholarly.search_pubs_query(txt)

    # get the most likely match of the first results
    results = list(search_query)
    if len(results) > 1 and assess_results:
        maxscore = 0
        result = results[0]
        for res in results:
            score = _scholar_score(txt, res.bib)
            if score > maxscore:
                maxscore = score
                result = res
    else:
        result = results[0]

    # use url_scholarbib to get bibtex from google
    if getattr(result, 'url_scholarbib', ''):
        bibtex = scholarly._get_page(result.url_scholarbib).strip()
    else:
        raise NotImplementedError(
            'no bibtex import linke. Make crossref request using title?')
    return bibtex
Exemplo n.º 11
0
 def test_get_cited_by(self):
     pub = next(
         scholarly.search_pubs_query(
             'frequency-domain analysis of haptic gratings cholewiak')
     ).fill()
     cites = [c for c in pub.get_citedby()]
     self.assertEqual(len(cites), pub.citedby)
Exemplo n.º 12
0
def information(pub):
    
    search_query = scholarly.search_pubs_query(pub)
    p=next(search_query)
    print(p.bib["author"])
    return p.bib["author"]
#information("TOWARDS SEAMLESS TRACKING-FREE WEB: IMPROVED DETECTION OF TRACKERS VIA ONE-CLASS LEARNING")
Exemplo n.º 13
0
def scholar_view(request, nodeType, nodeLabel):
    # Call scholarly library to retrieve Google Scholar information

    mostCited = None

    # If request is for an Author
    if nodeType == "Author":
        try:
            query = scholarly.search_author(nodeLabel)
            result = next(query).fill()
            mostCited = result.publications[0].fill()

        # If Author cannot be found
        except Exception as e:
            result = "Sorry, we could not find this author's profile."
            nodeType = "Bad query"
            mostCited = None

    # If request is for a Publicaiton
    elif nodeType == "Publication":
        try:
            query = scholarly.search_pubs_query(nodeLabel)
            result = next(query)
            print(result)

        # If Publication cannot be found
        except Exception as e:
            result = "Sorry, we could not find this paper's profile."
            nodeType = "Bad query"

    return render(request, "external/google_search_results.html", {
        "result": result,
        "nodeType": nodeType,
        "mostCited": mostCited
    })
Exemplo n.º 14
0
def build_super_dict(query, path, amount):
    amount = int(amount)
    publications = scholarly.search_pubs_query("darwin")

    field_names = ["title", "eprint", "author", "abstract", "url"]

    counter = 0
    with open(
            '/home/fabian/Documents/repos/scholar_web_scrapper/results/scrapping_results.csv',
            'w') as file:
        csv.register_dialect("toMYSQL",
                             delimiter=";",
                             quoting=1,
                             doublequote=1)
        writer = csv.DictWriter(file,
                                fieldnames=field_names,
                                dialect="toMYSQL")
        writer.writeheader()

        for publication in publications:
            row = publication.__getattribute__('bib')

            clean_row = process_row(row)

            print(clean_row)
            if clean_row is not None:
                writer.writerow(clean_row)

            if counter >= 15:
                break
            else:
                counter = counter + 1
Exemplo n.º 15
0
 def test_multiple_publications(self):
     ''' As of November 18, 2016 there are 12 pubs that fit the search term'''
     pubs = [
         p.bib['title']
         for p in scholarly.search_pubs_query('cholewiak campbell robson')
     ]
     self.assertEqual(len(pubs), 12)
     self.assertIn(u'A frequency-domain analysis of haptic gratings', pubs)
Exemplo n.º 16
0
def getCitation(titleLine, authorLine):
    global citationDict

    search_query = scholarly.search_pubs_query(titleLine)
    citeCount = next(search_query).citedby
    print(titleLine + str(citeCount) + "\n" + authorLine)

    return
Exemplo n.º 17
0
def captcha_test():
    try:
        results = scholarly.search_pubs_query('Einstien')
        first_result = results.next()
        first_result.citedby
    except StopIteration:
        return True
    return False
Exemplo n.º 18
0
def doQuery():
    # works = Works()
    # w1 = works.query(title='zika', author='johannes', publisher_name='Wiley-Blackwell')
    # for item in w1:
    #     print(item['title'])

    search_query = scholarly.search_pubs_query('wearable')
    print(next(search_query))
Exemplo n.º 19
0
def search_pub(query):
    search = scholar.search_pubs_query(query)
    try:
        result = next(iter(search))
        result = serialize_result(result)
    except StopIteration:
        result = {}
    return result
Exemplo n.º 20
0
def publication_information_2(pub):
    publication = {}
    try:
        search_query_1 = scholarly.search_pubs_query(pub)
        publication = next(search_query_1).fill()
        print(publication)
    except:
        print(publication)
def crawl_scholar_paper(title):
    cited_by = -1
    result = next(scholarly.search_pubs_query(title), None)

    if result is not None:
        if hasattr(result, 'citedby'):
            cited_by = result.citedby

    return cited_by
Exemplo n.º 22
0
def test_pop():
    import matplotlib.pyplot as plt
    G = nx.DiGraph()
    search_query = sch.search_pubs_query('10.1109/THS.2013.6698999')
    P = search_query.next()
    P = P.fill()
    populate_graph(P, G)
    nx.draw_spectral(G)
    plt.show()
Exemplo n.º 23
0
def gscholar_craw(file_name):
    search_query = scholarly.search_pubs_query(file_name)
    with open('./gs_data/' + file_name + '.txt', 'w') as f:
        for i in range(0, 200):
            temp = next(search_query)
            dict = temp.bib
            json.dump(dict, f)
            f.write('\n')
    f.close()
Exemplo n.º 24
0
 def test_multiple_publications(self):
     ''' As of October 11, 2017 there are 23 pubs that fit the search term'''
     pubs = [
         p.bib['title'] for p in scholarly.search_pubs_query(
             '"naive physics" stability "3d shape"')
     ]
     self.assertEqual(len(pubs), 23)
     self.assertIn(
         u'Visual perception of the physical stability of asymmetric three-dimensional objects',
         pubs)
Exemplo n.º 25
0
def trx_searchciters(m):

    TRX = MaltegoTransform()
    title = m.getProperty("title.article")
    title = unidecode(title)
    #    print title
    DOI = m.getProperty("DOI")
    if DOI:
        query = DOI
    else:
        query = title
    search_query = scholarly.search_pubs_query(query)

    try:
        result = next(search_query)
    except StopIteration:
        TRX.addUIMessage("""The DOI could not be found on Google Scholar, 
which very likely means Google Scholar has never heard of this article before"""
                         )
        return TRX.returnOutput()

    titlemaybe = result.bib['title']

    TRX.addUIMessage(
        """Title found: %s. 
If this is not what you were looking for, add the article's DOI and search again"""
        % make_unicode(clean_obsession(titlemaybe)), UIM_INFORM)

    limit = m.slider
    count = 0

    for citation in result.get_citedby():

        if count == limit:
            break

        for i in bastardi:
            title = citation.bib['title'].replace(i, '')
        new = TRX.addEntity("me.Article", title.encode('utf-8'))

        #        new.setLinkLabel('Cited by')
        #        new.setLinkColor('blue')
        #        new.setLinkThickness(2)

        authors = '; '.join(
            [authore for authore in citation.bib['author'].split(' and ')])
        for i in bastardi:
            authors = authors.replace(i, '')
        new.addProperty("author", "Author", "loose", authors.encode('utf-8'))

        count += 1

    logging(TRX.returnOutput(), m.Maltegoxml)

    return TRX.returnOutput()
Exemplo n.º 26
0
 def test_publication_contents(self):
     pub = next(scholarly.search_pubs_query('A frequency-domain analysis of haptic gratings')).fill()
     self.assertTrue(pub.bib['author'] == u'Cholewiak, Steven A and Kim, Kwangtaek and Tan, Hong Z and Adelstein, Bernard D')
     self.assertTrue(pub.bib['journal'] == u'Haptics, IEEE Transactions on')
     self.assertTrue(pub.bib['number'] == u'1')
     self.assertTrue(pub.bib['pages'] == u'3--14')
     self.assertTrue(pub.bib['publisher'] == u'IEEE')
     self.assertTrue(pub.bib['title'] == u'A frequency-domain analysis of haptic gratings')
     self.assertTrue(pub.bib['url'] == u'http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=5210096')
     self.assertTrue(pub.bib['volume'] == u'3')
     self.assertTrue(pub.bib['year'] == u'2010')
Exemplo n.º 27
0
 def test_publication_contents(self):
     pub = next(scholarly.search_pubs_query('Creating correct blur and its effect on accommodation')).fill()
     self.assertTrue(pub.bib['author'] == u'Cholewiak, Steven A and Love, Gordon D and Banks, Martin S')
     self.assertTrue(pub.bib['journal'] == u'Journal of vision')
     self.assertTrue(pub.bib['number'] == u'9')
     self.assertTrue(pub.bib['pages'] == u'1--1')
     self.assertTrue(pub.bib['publisher'] == u'The Association for Research in Vision and Ophthalmology')
     self.assertTrue(pub.bib['title'] == u'Creating correct blur and its effect on accommodation')
     self.assertTrue(pub.bib['url'] == u'https://jov.arvojournals.org/article.aspx?articleid=2701817')
     self.assertTrue(pub.bib['volume'] == u'18')
     self.assertTrue(pub.bib['year'] == u'2018')
Exemplo n.º 28
0
def GetCitation(title: str, formatStr: str, bibFolder: str) -> str:
    '''
    Given the title of an article use the package Scholarly to query google 
    scholar for that article and return the reference file in the format 
    specified by the format string (currently only .ris is implemented). Write 
    the ris file to the bibFolder location.

    Parameters
    ----------
    title : str
        Title of the article.
    formatStr : str
        Identifier of the filetype for the reference (only ris is implemented).
    bibFolder : str
        Folder to write the reference file to.

    Returns
    -------
    str
        Returns the text of the reference file.

    '''
    # Currently only supports reference manager format
    formats = {"rm": ".ris"}

    query = scholarly.search_pubs_query("\"" + title + "\"")

    # Gets the link to the citation file
    try:
        citationLink = next(query).url_scholarbib

    except StopIteration:
        return ""

    citationLink = citationLink.split('=')

    # Adjusts the format to the requested one by changing the last query param
    citationLink[-1] = formatStr
    citationLink = "=".join(citationLink)

    # Makes a get request to download the reference
    citationText = requests.get(citationLink).text
    # Filter out any problematic characters
    citationText = re.sub(r'[^\x00-\x7f]', r' ', citationText)

    # Writes the citation file to the bibliography folder using the title as the file name7
    with open(bibFolder + "/" + title + formats[formatStr],
              "w+",
              encoding='utf-8') as citationFile:

        citationFile.write(citationText)

    return citationText
Exemplo n.º 29
0
def print_title(query):
    '''Print titles of related query publications
	'''
    print(
        "===================================================================")
    print("=               Titles from ", query)
    print(
        "===================================================================")
    search_query = sc.search_pubs_query(query)
    for i in range(5):
        paper = next(search_query)
        print(paper.bib['title'])
Exemplo n.º 30
0
def get_publication(bib):
    title = bib['title']
    if title not in publications:
        try:
            search_query = scholarly.search_pubs_query(title)
            publication = next(search_query).fill()
            publications[title] = publication.bib
        except:
            print('Cannot find publication: ' + title)
            publications[title] = bib

    return publications[title]
Exemplo n.º 31
0
def main():
    search_query = "bert nlp"

    print("Fetching paper matching: %s" % search_query)

    results = scholarly.search_pubs_query(search_query)
    paper = next(results)

    paper_data = paper_extraction.extract(paper, verbose=True)
    file_utility.save_json(paper_data, PAPER_DATA_PATH)

    embed()
Exemplo n.º 32
0
def generate_metadata(input_file):
    res = None
    if "paper" in input_file:
        print(input_file)
        try:
            res = next(scholarly.search_pubs_query(input_file.replace(".pdf","")))
        except StopIteration:
            print("",end="",sep="")
        finally:
            res = res.bib
        return res
    else:
Exemplo n.º 33
0
 def test_publication_contents(self):
     pub = next(scholarly.search_pubs_query('A frequency-domain analysis of haptic gratings')).fill()
     superset = pub.bib
     subset = {u'author': u'Cholewiak, Steven and Kim, Kwangtaek and Tan, Hong Z and Adelstein, Bernard D and others',
               u'journal': u'Haptics, IEEE Transactions on',
               u'number': u'1',
               u'pages': u'3--14',
               u'publisher': u'IEEE',
               u'title': u'A frequency-domain analysis of haptic gratings',
               u'url': u'http://ieeexplore.ieee.org/xpls/abs_all.jsp?arnumber=5210096',
               u'volume': u'3',
               u'year': u'2010'}
     self.assertTrue(all(item in superset.items() for item in subset.items()))
 def search_scholar(self):
     search_query = scholarly.search_pubs_query(self.term)
     self.result = next(search_query).fill()
     return self.result
Exemplo n.º 35
0
 def test_multiple_publications(self):
     ''' As of October 21, 2015 there are 7 pubs that fit the search term'''
     pubs = [p.bib['title'] for p in scholarly.search_pubs_query('cholewiak campbell robson')]
     self.assertEqual(len(pubs), 7)
     self.assertIn(u'A frequency-domain analysis of haptic gratings', pubs)
Exemplo n.º 36
0
 def test_get_cited_by(self):
     pub = next(scholarly.search_pubs_query('frequency-domain analysis of haptic gratings cholewiak')).fill()
     cites = [c for c in pub.get_citedby()]
     self.assertEqual(len(cites), pub.citedby)
Exemplo n.º 37
0
 def test_empty_publication(self):
     pubs = [p for p in scholarly.search_pubs_query('')]
     self.assertIs(len(pubs), 0)
__author__ = 'fccoelho'


import scholarly
import pymongo
import time
import json

conn = pymongo.MongoClient()


search_query = scholarly.search_pubs_query('zika zikv -author:zika')

def continuous_fetch():
    downloaded = [a['url_scholarbib'] for a in conn.scholar.articles.find({}, {'url_scholarbib': 1})]
    while True:
        doc = {}
        # try:
        art = next(search_query)
        if art.url_scholarbib in downloaded:
            continue
        if not art._filled:
            art.fill()
        doc['bib'] = art.bib
        try:
            doc['citedby'] = art.citedby
        except AttributeError:
            doc['citedby'] = 0
        try:
            doc['id_scholarcitedby'] = art.id_scholarcitedby
        except AttributeError: