def __init__(self, reset_cache=False):
     self.journals = Journals()
     self.works = Works()
     self.filter_kwargs = dict(has_license='true', has_full_text='true')
     self.keywords = 'business financial merger entrepreneur banking insurance commerce trade economics'
     UnpywallCredentials('*****@*****.**')
     cache_path = path.join(DATA_DIR, 'unpaywall_cache')
     if reset_cache and path.exists(cache_path):
         remove(cache_path)
     self.unpywall_cache = UnpywallCache(cache_path)
     Unpywall.init_cache(self.unpywall_cache)
Exemplo n.º 2
0
def get_crossref_records(dois):
    works = Works()
    crossref_records = {}
    print('searching crossref for all DOIs, this might take a few minutes...')
    for doi in dois:
        r = works.doi(doi)
        if r is not None:
            crossref_records[doi] = r
        else:
            print('missing crossref record for', doi)
    return (crossref_records)
Exemplo n.º 3
0
def download_details(doi_list):
    works = Works()
    details = []
    for j, doi in enumerate(doi_list):
        doi = doi.strip()
        if doi[0] == '#':
            continue
        work = works.doi(doi)
        work['doi'] = doi
        details.append(standarize(work))
    return details
Exemplo n.º 4
0
def doi_BibJSON_pipeline(dois):
    """
    Creates a list of BibJSON objects from a list of DOIs passed
    """
    works = Works()
    BibJSON_list = []
    for doi in dois:
        crossref_obj = works.doi(doi)
        BibJSON_list.append(json.dumps(create_BibJSON(crossref_obj, doi)))

    return BibJSON_list
Exemplo n.º 5
0
    def __init__(self, out):
        requests.packages.urllib3.disable_warnings(
            requests.packages.urllib3.exceptions.InsecureRequestWarning)
        self.sess = requests.Session()
        self.sess.headers = {'user-agent': self.get_random_user_agent()}

        self.available_base_url_list = AVAILABLE_SCIHUB_BASE_URL
        self.base_url = 'http://' + self.available_base_url_list[0] + '/'
        self.works = Works()
        self.sess.proxies = PROXIES
        self.out = out
        self.re_bracket = re.compile("\[(.*?)\]\s")
Exemplo n.º 6
0
def get_doi(title, author):
    works = Works()
    work = works.query(bibliographic=title, author=author).url
    response = requests.get(work)
    json_response = json.loads(response.text)
    try:
        document = json_response["message"]["items"][0]
        return document.get(
            "URL"
        )  # print(document["DOI"], document["title"], document["URL"])
    except IndexError:
        return None
Exemplo n.º 7
0
    def check_doi(self, doi):
        works = Works()
        response = works.doi(doi)

        is_valid = False
        status = None
        if response is not None:
            status = response[self.crossref_title][0]
            if status != self.status_inactive_doi:
                is_valid = True

        return is_valid
Exemplo n.º 8
0
def main():

    #Tracks the row that is entered in from the DOI list.
    currentRows = 0

    with open('DOIValues.csv', newline='') as csvFile:

        #A line is read in from file instead of the whole file in order to be memory efficient
        lineIn = csv.reader(csvFile)

        #Look continues while there are left to process in the .csv file
        while currentRows < csvLineCount:

            #Reads in the next line
            csvRow = next(lineIn)
            StringConvert = ""
            csvLineString = StringConvert.join(csvRow)

            try:
                #Creates a "works" object from the Crossref metadata API
                from crossref.restful import Works
                works = Works()

                #Passes DOI from the .csv file to Crossref to get all metadata info that is available
                doiMetaData = works.doi(csvLineString)

                #If author information is found in the "works" obejct then it passed to the "authorMetaDataIngest.py" for processing
                # if (doiMetaData['author']):

                #     authorInfo = doiMetaData['author']
                #     print("Author information for DOI: " + csvLineString + " found")
                #     authorMetaDataIngest.authorIngest(connection, cursor, csvLineString, authorInfo)

                if (doiMetaData['content-domain']):

                    contentDomainInfo = doiMetaData['content-domain']
                    print("Content Domain information for DOI: " +
                          csvLineString + " found")
                    contentDomainMetaDataIngest.contentDomainIngest(
                        connection, cursor, csvLineString, contentDomainInfo)

            except ImportError:
                print("Installation of the Crossref API is needed")
            except:
                print("Unknown Error")

            #Increases counter to keep track of whether at the end of .csv file
            currentRows += 1
            if currentRows > csvLineCount:
                currentRows = csvLineCount
Exemplo n.º 9
0
def on_click_fetch(event):
    """Get metadata for DOI, and return an error if the DOI is not valid (no metadata found)."""
    import dateutil
    from crossref.restful import Works
    import json

    # turn the "Add paper" primary, to remember clicking it again!
    btn_add_paper.button_type = 'primary'

    # Input DOI: (1) if empty use test DOI (2) If Angewandte German Edition, change with International Edition
    inp_doi.value = inp_doi.value.replace("ange","anie") or "10.1021/jacs.9b01891"

    works = Works()
    print("Querying Crossref API for doi {} (this can take several seconds, depending on the server...)".format(inp_doi.value))
    metadata = works.doi(inp_doi.value)
    print("Query done!")
    #print(json.dumps(metadata,sort_keys=True, indent=4)) # Use for debug!

    if not metadata:
        btn_doi.button_type = 'danger'
        inp_title.value = inp_year.value = inp_reference.value = inp_paper_id.value = "ERROR: wrong/missing DOI."
        return

    inp_title.value = str(metadata['title'][0])
    journal = str(metadata['short-container-title'][0])

    if 'volume' in metadata:
        already_in_issue = True
        volume = metadata['volume']
        if 'published-print' in metadata: # ACS, wiley
            year = str(metadata['published-print']['date-parts'][0][0])
        elif 'created' in metadata: # RSC
            year = str(metadata['created']['date-parts'][0][0])
        else:
            year = 'ERROR: year not found.'
    else: # not yet in an issue: assuming that it will be published at the same year of today
        already_in_issue = False
        year = str(datetime.datetime.now().year)

    inp_year.value = year

    if already_in_issue:
        if 'page' in metadata: # most of the journals
            inp_reference.value = "{}, {}, {}, {}".format(journal, year, volume, metadata['page'])
        else:  # NatComm or not yet in an issue
            inp_reference.value = "{}, {}, {}".format(journal, year, volume)
    else:
        inp_reference.value = "{}, {}, {}".format(journal, year, "in press")
    inp_paper_id.value = mint_paper_id(doi=inp_doi.value, year=inp_year.value)
    btn_doi.button_type = 'success'
Exemplo n.º 10
0
 def run(self):
     while True:
         if (self.doi_queue.empty() == False):
             tmp = self.doi_queue.get()
             doi = tmp.split('+')[0]
             cat = int(tmp.split('+')[1])
             try:
                 wk = Works()
                 item = wk.doi(doi)
             except:
                 print('网络状态不良好' + doi + '请求失败')
                 item = 'network-error'
             if (item == None):
                 print(doi + 'crossref未收录该doi')
                 item = 'doi-error'
             if (item != 'network-error' and item != 'doi-error'):
                 info = parseInfo.parse_info(doi=doi, item=item)
                 self.lock.acquire()
                 if ('subject' in info.keys()):
                     subject_list = info['subject'].split(',')
                     #记录和统计subject信息
                     for s in subject_list:
                         if (s in self.ref_file['statistics']
                             ['subject'].keys()):
                             self.ref_file['statistics']['count'][s] += 1
                             l = len(
                                 self.ref_file['statistics']['subject'][s])
                             self.ref_file['statistics']['subject'][s][
                                 l] = info
                         else:
                             self.ref_file['statistics'][
                                 'subjectCount'] += 1
                             self.ref_file['statistics']['count'][s] = 1
                             self.ref_file['statistics']['subject'][s] = {}
                             l = len(
                                 self.ref_file['statistics']['subject'][s])
                             self.ref_file['statistics']['subject'][s][
                                 l] = info
                 self.lock.release()
                 if (cat == 0):
                     self.ref_file['info'] = info
                 else:
                     length = len(self.ref_file['nodes'])
                     info['cat'] = cat
                     self.ref_file['nodes'][length] = info
                 if (cat < 2):
                     parse_ref(item, cat, doi, self.ref_file,
                               self.doi_queue)
             self.doi_queue.task_done()
Exemplo n.º 11
0
def fetch_pubmed_info(self, pmid, pubmed_cache_key):
    target, uid = pmid.split(':', 1)

    if target == 'PMID':
        # Actually make the request
        postUrl = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi"
        postData = {
            "db": "pubmed",
            "id": uid,
            "version": "2.0",
            "retmode": "json"
        }

        response = requests.post(postUrl, data=postData)

        # When you get the request back
        if not response.ok:
            # logger.debug(f'Pubmed returned a bad response for {uid}, status code {response.status_code}')
            if response.status_code == 429:
                return 'Too many robokop pubmed requests'
            return f'Unable to complete pubmed request, pubmed request status {response.status_code}'

        pubmed_payload = response.json()
        if not pubmed_payload or 'result' not in pubmed_payload:
            # logger.debug(f'Pubmed returned a bad json response for {uid}, response json {pubmed_payload}')
            return 'Unable to complete pubmed request, bad pubmed response'

        pubmed_result = pubmed_payload['result']
        if uid not in pubmed_result:
            # logger.debug(f'Pubmed returned a bad json result for {uid}, result {pubmed_result}')
            return 'Unable to complete pubmed request, bad pubmed result'
        pubmed_result = pubmed_result[uid]
    elif target == 'DOI':
        # https://github.com/fabiobatalha/crossrefapi
        works = Works()
        pubmed_result = works.doi(uid)
        # logger.debug(f'DOI response {pubmed_result}')
    else:
        return 'Unsupported publication type.'

    pubmed_redis_client = redis.Redis(
        host=os.environ['PUBMED_CACHE_HOST'],
        port=os.environ['PUBMED_CACHE_PORT'],
        db=os.environ['PUBMED_CACHE_DB'],
        password=os.environ['PUBMED_CACHE_PASSWORD'])
    pubmed_redis_client.set(pubmed_cache_key, json.dumps(pubmed_result))
    # logger.debug(f'Pubmed response is now cached for pmid {pmid}')

    return 'cached'
Exemplo n.º 12
0
    def get_complement(self, doi):
        try:
            works = Works()
            response = works.doi(doi)

            year = None
            cited_by = None
            if response is not None:
                year = response[self.crossref_created][
                    self.crossref_created_date_parts][0][0]
                cited_by = response[self.crossref_cited_by]

            return year, cited_by
        except Exception as e:
            return None, None
Exemplo n.º 13
0
def resolve_doi(doi: str) -> dict:
    works = Works()
    data = works.doi(doi)
    names: list = get_names_from_doi(data)
    journal = get_journal_name(data)
    paper_title = get_paper_title(data)
    year = get_publication_year(data)
    page = get_first_page(data)
    return {
        '_publ_author_name': names,
        '_journal_name_full': journal,
        '_publ_section_title': paper_title,
        '_journal_year': year,
        '_journal_page_first': page,
    }
    def _parse_document_type(self, doc):
        """ Returns the document type of a document as a <class 'str'>.
        e.g. 'paper', 'clinical_trial', 'patent', 'news'. """
        try:
            doi = self._parse_doi(doc)

            works = Works()
            doc_type = works.doi(doi)['type']

            if doc_type == 'book-chapter':
                return 'chapter'
            else:
                return 'paper'
        except:
            return 'paper'
Exemplo n.º 15
0
def fetchMetaByDOI(doi):

    works = Works(etiquette=ETIQUETTE)
    try:
        data = works.doi(doi)
    except:
        rec = 1

    if data is None:
        rec = 1
    else:
        rec = 0

    LOGGER.info('DOI = %s. Rec of doi query = %s' % (doi, rec))

    return rec, data
Exemplo n.º 16
0
def get_apa(doi):
    works = Works()
    output = works.doi(doi)
    if output == None:
        return "Not available"
    authors = output['author']
    length = len(authors)
    citation = ""
    for i in range(length):
        citation = citation + authors[i]['family'] + ", " + authors[i][
            'given'][0] + "., "
    citation = citation + "({}). ".format(
        output['published-print']['date-parts'][0][0]) + "{}. ".format(
            output['title'][0]
        ) + output['publisher'] + ", " + "{0}({1}), {2}. doi: {3}".format(
            output['volume'], output["issue"], output['page'], doi)
    return citation
Exemplo n.º 17
0
def get_doi(request, doi):
    works = Works()
    result = works.doi(doi)

    context = {}

    if 'present_doi' in request.session:
        if 'past_dois' in request.session:
            request.session['past_dois'] += [request.session['present_doi']]
        else:
            request.session['past_dois'] = [request.session['present_doi']]
        context["past_dois"] = request.session['past_dois']

    request.session['present_doi'] = doi

    context.update({"doi": result} if result else {})
    return render(request, "dois/main.html", context)
Exemplo n.º 18
0
def data(request):

    if request.method == 'POST':
        form = SimpleForm(request.POST)
        if form.is_valid():
            #query = input('Enter the query to be searched: ')
            query = form.cleaned_data.get("enterUrl")
            parameter_values_list = [1, 10, '9ipXPomYaSrHLAIuONZfzUGk3t57RcBD']
            response = requests.get(edited_search_coreAPI(query, parameter_values_list))
            # response = requests.get(edited_search_coreAPI(form.enterUrl, parameter_values_list))
            content = response.json()
            works = Works()
            w1 = works.query(container_title='zika', author='johannes', publisher_name='Wiley-Blackwell')
            for item in w1:
                print(item['title'])


            print(content)

            print(type(content))

            context = {
                'form': form,
                'content': content
            }

            messages.success(request, f'Your Url has been generated')

            return redirect("query",data=str(content))
            # return render(request,'users/query.html', {'content': [content]})
            #return render(json.dumps(content,sort_keys=True, indent=4),'users/query.html', content_type="application/json"))
            #return (HttpResponse(json.dumps(content,sort_keys=True, indent=4), content_type="application/json"))


            # print(lists[0])

        #     form.save()

        else:
                messages.error(request,f'Wrong Url')
                return render(request, 'users/query.html', {'form': form})

    else:
        form = SimpleForm()
        return render(request, 'users/data.html', {'form': form})
    def __init__(self, args):
        self.works = Works()
        #print(args.doi)
        if 'arxiv' in args.doi:
            #maybe you prepended it, maybe not
            self.doi = args.doi.replace('https://arxiv.org/abs/', '')
            self.link = 'https://arxiv.org/abs/' + self.doi
            self.arxiv()
        else:
            self.doi = args.doi.replace('https://doi.org/', '')
            self.link = 'https://doi.org/' + self.doi
            self.crossref()

        #user-supplied
        self.type = args.type
        self.field = args.field
        self.approach = args.approach
        self.size = args.size
        self.category = args.category
Exemplo n.º 20
0
def vis_cit2(request):
    keyword = request.POST.get('keyword')
    cit_doi = doi_trans(keyword)
    if (cit_doi == None):
        signal = 'TitleError'
        return HttpResponse(signal)
    else:
        # 查找对应的文件名,如1,2,3
        cit_file_id = file_find(cit_doi)
        print(cit_file_id)
        #查找info表中记录
        info_flag = info_find(cit_doi)
        if (info_flag == 'existed'):
            print('info表中存在记录')
        else:
            print('info表中不存在记录')
            info_wk = Works()
            item = info_wk.doi(cit_doi)
            result = infoDo.parse_info(item)
            info_record = Info(**result)
            info_record.save()
            print('info表已保存该记录')
            path = os.getcwd() + '\\media\\info\\' + str(cit_file_id) + '.json'
            with open(path, "w", encoding='utf-8') as f:
                f.write(json.dumps(result, ensure_ascii=False))
                print('info文件已保存')
        infos = Info.objects.get(doi=cit_doi)
        date = infos.time
        title = infos.title
        cited = infos.is_referenced_by_count
        # 查找cit数据库中记录
        cit_flag = cit_find(cit_doi)
        if (cit_flag == 'existed'):
            print("cit数据库中找到记录")
            return HttpResponse(cit_file_id)
        else:
            print("cit数据库中无记录")
            citDo2.citdo(date, title, cited, cit_file_id)
            cit_record = Cit(cit_doi=cit_doi, create_ctime=datetime.now())
            cit_record.save()
            return HttpResponse(cit_file_id)
Exemplo n.º 21
0
def main():
    sql = connect()
    cursor = sql.cursor()

    if checkTables(cursor) == False:
        print "fatal error: tables not verified"
        return
    else:
        print "tables verified"

    # Start scraping and populating data
    # 1) Scraping Entry point: random doi using "sample" - done
    # 2) Back-propagation occurs through citations found in the paper - might have bugs with duplicates
    # 3) When no more unique papers are found, return to step 1 - done
    project = Etiquette('ResearchSub', 'Pre-alpha', 'localhost', '*****@*****.**')
    works = Works(etiquette=project)
    for item in works.sort('published').order('desc'):
        recursiveReferenceAdd(sql,cursor, item)
     
    # Commit any changes after all transactions completed
    sql.commit()
Exemplo n.º 22
0
    def _parse_document_type(self, doc):
        """ Returns the document type of a document as a <class 'str'>.
        e.g. 'paper', 'clinical_trial', 'patent', 'news', 'chapter', 'index', 'table_of_contents'"""
        doi = self._parse_doi(doc)
        if doi is None:
            return 'paper'

        is_toc = self._parse_is_table_of_contents(doc)
        if is_toc:
            return 'table_of_contents'

        is_index = self._parse_is_index(doc)
        if is_index:
            return 'index'
        
        works = Works()
        doc_info = works.doi(doi)
        
        if doc_info['type'] == 'book-chapter':
            return 'chapter'
        else
            return 'paper'
Exemplo n.º 23
0
def search():
    """
    Uses Crossref API to search documents.
    """
    queries = {}
    for key in ['author', 'words', 'doi']:
        val = request.form[key]
        if len(val) > 0:
            queries[key] = request.form[key]
        else:
            queries[key] = None
    # Init API and query
    works = Works()
    articles_q = []
    if queries['doi']:
        articles_q = [works.doi(doi=queries['doi'])]
    else:
        articles_q = works.query(bibliographic=queries['words'],
                                 author=queries['author']).sample(20)
    # Check if article is in database already and wiki exists
    articles = []
    for article in articles_q:
        articles.append(article)
        doi = article['DOI']
        search_result = mongo.db.paperwiki.find_one({"DOI": doi})
        if search_result:
            if 'content' in search_result.keys():
                article['actionurl'] = "see_wiki?id=" + doi
                article['wiki_exists'] = True
            else:
                article['actionurl'] = "create_wiki?id=" + doi
                article['wiki_exists'] = False
        else:
            insert_id = mongo.db.paperwiki.insert_one(article)
            article['actionurl'] = "create_wiki?id=" + doi
            article['wiki_exists'] = False
    context = {"docs": articles}
    resp = render_template("home.html", docs=articles)
    return resp
Exemplo n.º 24
0
def fetch_conten_domain (APP_CONFIG):

    mysql_username = APP_CONFIG['DOI-Database']['username']
    mysql_password = APP_CONFIG['DOI-Database']['password']
    doi_database_name = APP_CONFIG['DOI-Database']['name']


    # connect to doi database
    drBowmanDatabase = mysql.connector.connect(host = "localhost", user = '******', passwd = 'Dsus1209.', database = doi_database_name)
    drBowmanDatabaseCursor = drBowmanDatabase.cursor()



    # get list of DOIs from doi database
    drBowmanDatabaseCursor.execute("Select DOI, fk FROM doidata._main_ WHERE DOI IS NOT NULL")
    articles = drBowmanDatabaseCursor.fetchall()
    # articles = drBowmanDatabaseCursor.fetchmany(5)
    

    count = 0


    for article in articles:

        doi = article[0]
        fk = article[1]
        count = count + 1
        print(str(count) + "/" + str(len(articles)))


        # print(article)

        from crossref.restful import Works
        works = Works()
        data = works.doi(doi)
        domainData = data['content-domain']
        domain = domainData['domain']
        # print(domain)
        ingest_content_domain(APP_CONFIG, drBowmanDatabase, drBowmanDatabaseCursor, domainData, doi, fk)
Exemplo n.º 25
0
def fetch_bibtex_by_fulltext_crossref(txt, **kw):
    work = Works(etiquette=my_etiquette)
    logger.debug(six.u('crossref fulltext seach:\n') + six.u(txt))

    # get the most likely match of the first results
    # results = []
    # for i, r in enumerate(work.query(txt).sort('score')):
    #     results.append(r)
    #     if i > 50:
    #         break
    query = work.query(txt, **kw).sort('score')
    query_result = query.do_http_request('get',
                                         query.url,
                                         custom_header=str(
                                             query.etiquette)).text
    print("We arrived here")
    try:
        results = json.loads(query_result)['message']['items']
    except:
        print(query_result)

    if len(results) > 1:
        maxscore = 0
        result = results[0]
        for res in results:
            score = _crossref_score(txt, res)
            if score > maxscore:
                maxscore = score
                result = res
        logger.info('score: ' + str(maxscore))

    elif len(results) == 0:
        raise ValueError('crossref fulltext: no results')

    else:
        result = results[0]

    # convert to bibtex
    return crossref_to_bibtex(result).strip()
Exemplo n.º 26
0
def main(list_):
    books = []
    crossref_works = Works()

    with open(list_) as f:
        lines = [line.rstrip() for line in f]
        for line in lines:
            book_object = create_book_metadata_obejct(line)
            books.append(book_object)

    for book in books:
        for result in crossref_works.filter(isbn=book['isbn'])\
                                    .sample(100)\
                                    .select('title', 'ISBN', 'link', 'author'):
            try:
                if book['isbn'] in result['ISBN']\
                    and book['title'] in result['title']:

                    if result.get('author'):
                        for author in result['author']:
                            name = f"{author['given']} {author['family']}"
                            book['authors'].append(name)

                    if result.get('link'):
                        for link in result['link']:
                            book['access_link'] = link['URL']

            except KeyError as err:
                print(err)
                pass

        sleep(0.5)

    normalised_json = json_normalize(books, sep=',')
    normalised_json.to_excel(f'results_{list_}.xlsx',
                             verbose=True,
                             sheet_name='books',
                             encoding='utf-8')
Exemplo n.º 27
0
def crossrefAPI_query(keyword: str) -> Dict:
    '''This function takes a keyword str and sends an according GET request to the CrossRef API.
	A normalized version of the first (most 'relevant') result is returned.'''
    article_dict = False
    works = Works()
    # If there is a timeout, try again (5 times)
    for _ in range(5):
        try:
            result = works.query(keyword).sort("relevance")
            for entry in result:
                # Take first result
                article_dict = entry
                break
        except:
            pass
    else:
        return
    if article_dict:
        #article_dict = normalize_crossref_dict(article_dict)
        #if contains_minimal_information(article_dict):
        article_dict = add_retrieval_information(article_dict, 'Crossref',
                                                 'unstructured_ID', keyword)
        return article_dict
Exemplo n.º 28
0
def publication_new(
        request):  # todo mejorar quebrando la view en defs distintas
    if request.method == "POST":
        form = PublicationForm(request.POST)
        if form.is_valid():
            publication = form.save(commit=False)
            publication.created_by = request.user
            works = Works()
            if publication.DOI != "" and works.doi_exists(publication.DOI):
                cross_doi(publication)
            elif publication.ISBN != "" and is_isbn13(publication.ISBN):
                cross_isbn(publication)
            publication.save()
            messages.success(request, "Registro realizado con exito")

            return redirect('bibliography:publication_detail',
                            pk=publication.pk)
        messages.error(request,
                       'Formulario con error: revise todos los campos')
        return render(request, 'publication_form.html', {'form': form})
    else:
        form = PublicationForm()
        return render(request, 'publication_form.html', {'form': form})
Exemplo n.º 29
0
def recursiveReferenceAdd(sql, cursor, item):
    works = Works()

    returnId = -1
    #Check valid item
    if item == None:
        print "Can't find works.doi item"
        return -1

    # Check for duplicates in the database
    tableEntry = isInTable(cursor, item)
    if tableEntry[0] == False:
        returnId = addToResearchTable(cursor, item)
    else:
        print "Got duplicate"
        returnId = tableEntry[1]
        return -1  #returnId
        #TODO: Allow a duplicate to check the references and update them

    # Continue checking references recursively
    #if 'reference' in item:
    #    for i in xrange(len(item['reference'])):
    #        if 'doi' in item['reference'][i]:
    #            if item['reference'][i]['doi'] != None:
    #                returnId2 = recursiveReferenceAdd(sql,cursor,works.doi(item['reference'][i]['doi']))
    #                # Add relations between research papers
    #                if (returnId != -1 and returnId2 != -1):
    #                    addToReferenceTable(cursor, returnId, returnId2)
    #        elif 'DOI' in item['reference'][i]:
    #            if item['reference'][i]['DOI'] != None:
    #                returnId2 = recursiveReferenceAdd(sql,cursor,works.doi(item['reference'][i]['DOI']))
    #                #Add relations between research papers
    #                if (returnId != -1 and returnId2 != -1):
    #                    addToReferenceTable(cursor, returnId, returnId2)

    sql.commit()
    return returnId
Exemplo n.º 30
0
def find_meta(title, doi):
    """ find metadata with title or doi
    Keyword Arguments:
    title --
    doi   --
    """
    ylog.info(title)
    works = Works()
    w1 = works.query(title).sort('relevance').order('desc')
    i = 0
    for item in w1:
        i = i + 1
        try:
            t = item.get('title')[0]
            sub_title = item.get('subtitle')[0]
        except:
            continue
        if SequenceMatcher(a=title, b=t).ratio() > 0.9 or SequenceMatcher(
                a=title, b=sub_title).ratio > 0.9:
            return item
        if i > 18:
            ylog.debug('[x]%s' % title)
            # ylog.debug(item['title'])
            return None