示例#1
0
def process(document):
    scholar = ScholarQuerier()
    query = SearchScholarQuery()

    # save cookie at first paper
    global save_cookie
    if save_cookie:
        query.set_phrase("quantum theory")
        scholar.send_query(query)
        scholar.save_cookies()
        save_cookie = False

    query.set_phrase(document.title)
    scholar.send_query(query)
    scholar_articles = scholar.articles
    if len(scholar_articles) == 0:
        return None

    title_match_ratio = \
        difflib.SequenceMatcher(None, document.title, scholar_articles[0]['title']).ratio()
    if title_match_ratio < min_title_match_ratio:
        return None

    old_tags = document.tags
    citation_tag = ncitations_to_tag(scholar_articles[0]['num_citations'])
    new_tags = update_tags(old_tags, [(tag_pattern, citation_tag)])
    new_tags.append(str(scholar_articles[0]['num_citations']))
    document.update(tags=new_tags)

    return scholar_articles[0]['num_citations']
示例#2
0
        
        querier = ScholarQuerier()
        settings = ScholarSettings()
        settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
        querier.apply_settings(settings)
        query = SearchScholarQuery()
        query.set_phrase(v)
        query.set_scope(True)
        querier.send_query(query)
        if querier.articles:
            cites=querier.articles[0].__getitem__('num_citations')
            citation_list=querier.articles[0].__getitem__('url_citations')
        else:
            cites=0
            citation_list= ""
            
    #    json_results = []
    #    file_name = 'query_data/cites_for_article_'+str(k+1)+'.json'
    #    for art in querier.articles:
    #        json_results.append(
    #            {key: art.attrs[key][0] for key in art.attrs.keys()})
    #    with open(file_name, 'wb') as f:
    #        json.dump(json_results, f)
        print('{} iter *** {} cited'.format(k+1,cites))
        writer.writerow([cites,citation_list])
        querier.save_cookies()
        sleep(randint(min_sleep_time_sec, max_sleep_time_sec))