Пример #1
0
 def find_missing_ids(self):
     id_fields = [
         self.to_mongo().get(x, None)
         for x in ['doi', 'pubmed_id', 'pmcid']
     ]
     ids_not_none = [x is not None for x in id_fields]
     #We need at least one of the id fields complete in order to find the others
     if not all(ids_not_none) and any(ids_not_none):
         present_id = next(x for x in id_fields if x is not None)
         remaining_ids = find_remaining_ids(present_id)
         for k, v in remaining_ids.items():
             if v is not None:
                 self[k] = v
Пример #2
0
 def _parse_pubmed_id(self, doc):
     """ Returns the PubMed ID of a document as a <class 'str'>."""
     return find_remaining_ids(self._parse_doi(doc))['pubmed_id']
Пример #3
0
 def _parse_pmcid(self, doc):
     """ Returns the pmcid of a document as a <class 'str'>."""
     return find_remaining_ids(self._parse_doi(doc))['pmcid']
Пример #4
0
 def _parse_doi(self, doc):
     """ Returns the DOI of a document as a <class 'str'>"""
     doi_fetch = find_remaining_ids(str(doc['pmid'])).get('doi', None)
     if doi_fetch != None:
         return doi_fetch
     return None
Пример #5
0
 def _parse_pmcid(self, doc):
     """ Returns the pmcid of a document as a <class 'str'>."""
     try:
         return doc['pmcid']
     except:
         return find_remaining_ids(str(doc['pmid']))['pmcid']
Пример #6
0
 def _parse_pubmed_id(self, doc):
     """ Returns the PubMed ID of a document as a <class 'str'>."""
     if 'pmid' in doc.keys():
         if doc['pmcid'] != '':
             return doc['pmcid']
     return find_remaining_ids(self._parse_doi(doc))['pubmed_id']
Пример #7
0
def update_PubMed_entries(mongo_db):
    col_name = 'CDCN_extracted_PubMed'
    col = mongo_db[col_name]
    col_entries = mongo_db['entries']
    col_entries_vespa = mongo_db['entries_vespa']
    query = col.find({}, )

    print('query.count()', query.count())
    found_entries = set()
    found_entries_vespa = set()
    for doc in query:
        PMID = doc['PMID']
        ids = find_remaining_ids(PMID)
        doi = ids['doi']
        drugs = doc.get('Repurposed Drug Name', None)
        if not drugs:
            continue
        # TODO: remove drug name NR
        if isinstance(drugs, str):
            drugs = [drugs]

        if doi is not None:
            print('doi', doi)
            col.find_one_and_update({'_id': doc['_id']},
                                    {'$set': {
                                        'doi': doi
                                    }})

        # # update entries
        # if doi is not None:
        #     entry = col_entries.find_one({'doi': doi})
        #     if entry:
        #         col_entries.find_one_and_update(
        #             {
        #                 '_id': entry['_id']
        #             },
        #             {
        #                 "$set": {
        #                     'drug_names': drugs
        #                 },
        #             }
        #         )
        #         found_entries.add(entry['_id'])
        #
        # # update vespa entries
        # if PMID is not None:
        #     entry = col_entries_vespa.find_one({'pubmed_id': PMID})
        #     if entry:
        #         col_entries_vespa.find_one_and_update(
        #             {
        #                 '_id': entry['_id']
        #             },
        #             {
        #                 "$set": {
        #                     'drug_names': drugs
        #                 },
        #             }
        #         )
        #         found_entries_vespa.add(entry['_id'])
        #
        # if doi is not None:
        #     entry = col_entries_vespa.find_one({'doi': doi})
        #     if entry:
        #         col_entries_vespa.find_one_and_update(
        #             {
        #                 '_id': entry['_id']
        #             },
        #             {
        #                 "$set": {
        #                     'drug_names': drugs
        #                 },
        #             }
        #         )
        #         found_entries_vespa.add(entry['_id'])

    print('found_entries', len(found_entries))
    print('found_entries_vespa', len(found_entries_vespa))