Пример #1
0
def process(document):
    scholar = ScholarQuerier()
    query = SearchScholarQuery()

    # save cookie at first paper
    global save_cookie
    if save_cookie:
        query.set_phrase("quantum theory")
        scholar.send_query(query)
        scholar.save_cookies()
        save_cookie = False

    query.set_phrase(document.title)
    scholar.send_query(query)
    scholar_articles = scholar.articles
    if len(scholar_articles) == 0:
        return None

    title_match_ratio = \
        difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio()
    if title_match_ratio < min_title_match_ratio:
        return None

    old_tags = document.tags
    citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations'])
    new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)])
    new_tags.append(str(scholar_articles[0]['num_citations']))
    document.update(tags=new_tags)

    return scholar_articles[0]['num_citations']
Пример #2
0
def papers_by_query_api(request):
    if request.method == 'GET':
        phrase = request.GET.get('phrase', '')
        if not phrase:
            return HttpResponseBadRequest()

        query = SearchScholarQuery()
        query.set_phrase(phrase)
        querier = ScholarQuerier()
        querier.send_query(query)
        papers = querier.articles

        if not papers:
            result = {
                'papers': [{
                    'title': '',
                    'id': 0,
                    'url': '',
                    'excerpt': ''
                }]
            }
        else:
            result = {
                'papers': [{
                    'title': papers[0]['title'],
                    'id': papers[0]['cluster_id'],
                    'url': papers[0]['url'],
                    'excerpt': papers[0]['excerpt']
                }]
            }
        return JsonResponse(result)
    else:
        return HttpResponseBadRequest()
Пример #3
0
def setCitationByTitle(paper_title):
    query = SearchScholarQuery()
    query.set_author("Si Chen")
    query.set_phrase(paper_title)
    query.set_num_page_results(1)
    #querier.send_query(query)
    #citations = 0
    return query
Пример #4
0
def getPublications(author):
    print author
    querier = ScholarQuerier()
    settings = ScholarSettings()
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    query.set_author(author)
    querier.send_query(query)
    #scholar.csv(querier)
    scholar.txt(querier, with_globals=False)
Пример #5
0
def getRelatedPublications(author):
    print author
    settings = ScholarSettings()  #adjust scholar settings
    querier = ScholarQuerier(
    )  #Instance of ScholarQuerier() conducts a search on Google Scholar
    querier.apply_settings(
        settings
    )  #applies settings as provided by the instance of ScholarSettings()
    query = SearchScholarQuery()
    query.set_author(author)
    querier.send_query(query)
    print querier.articles
def literature_search(query_terms, type='full_name'):
    """
    perform a google scholar query with given terms
    """

    querier = ScholarQuerier()
    settings = ScholarSettings()
    config = ScholarConf()
    settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
    querier.apply_settings(settings)
    query = SearchScholarQuery()

    papers = []
    for item in query_terms.values:
        repo_id = item[0]
        
        if type !='full_name':
            repo_name = item[1]
            phrase = item[2]
            keywords = item[3]
            start_year = item[4]
            if keywords:
                if ',' not in keywords:
                    keywords = keywords + ','
                query.set_words_some(keywords)                

            query.set_words(repo_name)
            query.set_phrase(phrase)

            phrase_text = repo_name + ', ' + phrase
        else:
            phrase = item[1]
            start_year = item[2]

            query.set_phrase(phrase) # commontk/CTK, meoyo/AIPS
            phrase_text = phrase
        print('search papers for {} ...'.format(phrase_text))
        query.set_timeframe(start_year)
        querier.send_query(query)
        articles = querier.articles
        if len(articles)==0:
            continue
        results = process_arts(config, item[0], phrase_text, articles)
        papers = papers + results
        time_delay = random.randrange(1,10)
        time.sleep(time_delay)

    return papers
Пример #7
0
def search(bot, update, args):
    search_command = ' '.join(args)

    bot.send_message(chat_id=update.message.chat_id, text="You searched for: " + search_command)

    querier = ScholarQuerier()
    query = SearchScholarQuery()
    query.set_words(args)
    querier.send_query(query)
    
    articles = querier.articles
    
    message = ""

    bot.send_message(chat_id=update.message.chat_id, text="Number of results: " + str(len(articles)))

    index = 0
    for article in articles:
        bot.send_message(chat_id=update.message.chat_id, text=str(index+1)+". " + article.attrs['title'][0])
def getPublications_Title(title):
    querier = ScholarQuerier()
    settings = ScholarSettings()
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    publications = []
    query.set_words(title)
    querier.send_query(query)
    related_list = scholar.json(querier)
    if related_list:
        print "No of related publications found : ",
        print len(related_list)
        for item in related_list:
            #print item.keys()
            #item["relatedTitle"] = title[0]
            publications.append(item)
    #time.sleep(random.randrange(10, 40, 2));
    #time.sleep(60);
    return publications
Пример #9
0
def query_scholar_for_papers(author, searchstring):

    querier = ScholarQuerier()
    settings = ScholarSettings()
    settings.set_citation_format(settings.CITFORM_BIBTEX)
    settings.set_per_page_results(5)
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    query.set_author(author)
    query.set_phrase(searchstring)

    querier.send_query(query)

    return_str = ''
    if len(querier.articles) > 0:
        return_str += querier.articles[0].as_citation() + '\n'
    else:
        return_str = 'Ooopsie. No results. Maybe we ran over the request limit?'

    return return_str
Пример #10
0
def process(document):
    scholar = ScholarQuerier()
    query = SearchScholarQuery()
    query.set_phrase(document.title)
    scholar.send_query(query)
    scholar_articles = scholar.articles
    if len(scholar_articles) == 0:
        return None

    title_match_ratio = \
        difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio()
    if title_match_ratio < min_title_match_ratio:
        return None

    old_tags = document.tags
    citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations'])
    new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)])
    document.update(tags=new_tags)

    return scholar_articles[0]['num_citations']
def blocked():
    print "Test if blocked...."
    #time.sleep(random.randrange(10, 40, 2));
    time.sleep(60)
    publications = []
    querier = ScholarQuerier()
    settings = ScholarSettings()
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    query.set_author("Ryan Baker")
    querier.send_query(query)
    related_list = scholar.json(querier)
    if related_list:
        print "Block Test : No of related publications found : ",
        print len(related_list)
        for item in related_list:
            publications.append(item)
    if len(publications) == 0:
        return True
    else:
        return False
def getPublications(authors):
    print authors
    querier = ScholarQuerier()
    settings = ScholarSettings()
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    publications = []
    for author in authors:
        if len(author) > 0:
            print "Using Author : ",
            print author
            query.set_author(author)
            querier.send_query(query)
            related_list = scholar.json(querier)
            if related_list:
                print "No of related publications found : ",
                print len(related_list)
                for item in related_list:
                    #print item.keys()
                    #item["relatedAuthor"] = author
                    publications.append(item)
            #time.sleep(random.randrange(10, 40, 2));
            time.sleep(20)
    return publications
Пример #13
0
def get_url(querier, phrase):
    # Setup query
    query = SearchScholarQuery()

    # Query title / phrase
    query.set_phrase(phrase)

    # Set title search only
    query.set_scope(True)

    # Result count
    query.set_num_page_results(1)

    try:
        # Send query
        querier.send_query(query)

        articles = querier.articles
        for art in articles:
            url, _, _ = art.attrs['url']
            url_pdf, _, _ = art.attrs['url_pdf']
            return url
    except:
        return None
Пример #14
0
from scholar import ScholarQuerier, ScholarSettings, SearchScholarQuery

DST = Path(__file__).absolute().parents[1] / 'publications.bib'
IGNORE = """
vo2014cytotoxicity
takeilnatriureticpeptideisolatedfromeelbrain
matchintemporal
brodbeck2018transformation
""".split()
ACRONYMS = ['EEG', 'MEG', 'MRI']

querier = ScholarQuerier()
settings = ScholarSettings()
settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
querier.apply_settings(settings)
query = SearchScholarQuery()
query.set_phrase("eelbrain")
query.set_timeframe(2012, None)
query.set_include_patents(False)

bib = parse_file(DST, 'bibtex')
start = 0
while True:
    querier.send_query(query)
    if len(querier.articles) == 0:
        break
    # extract articles
    for article in querier.articles:
        querier.get_citation_data(article)
        # convert to pybtex entry
        data = parse_bytes(article.citation_data, 'bibtex')