def get_subject_id(name): if not name in SUBJECTS_CACHE: subject = None try: subject = Subject.find_one(Q('text', 'eq', name)) except NoResultsFound: raise Exception('Subject: "{}" not found'.format(name)) else: SUBJECTS_CACHE[name] = subject._id return SUBJECTS_CACHE[name]
def migrate_preprint(preprint): logger.info('Preparint to migrate {}'.format(preprint['_id'])) new_hiers = [] for hier in preprint['subjects']: leaf = get_leaf(hier) new_leaf_name = PLOS_TO_BP_MAP[database['plos_subject'].find_one( {'_id': leaf})['text']] if new_leaf_name != '<drop>': new_hiers.append( Subject.find_one(Q('text', 'eq', new_leaf_name)).hierarchy) logger.info('Setting subjects on {} to {}'.format(preprint['_id'], new_hiers)) database['preprintservice'].find_and_modify( {'_id': preprint['_id']}, {'$set': { 'subjects': new_hiers }})
def migrate_preprint(preprint): logger.info('Preparint to migrate {}'.format(preprint['_id'])) new_hiers = [] for hier in preprint['subjects']: leaf = get_leaf(hier) new_leaf_name = PLOS_TO_BP_MAP[database['plos_subject'].find_one({'_id': leaf})['text']] if new_leaf_name != '<drop>': new_hiers.append( Subject.find_one(Q('text', 'eq', new_leaf_name)).hierarchy ) logger.info('Setting subjects on {} to {}'.format(preprint['_id'], new_hiers)) database['preprintservice'].find_and_modify( {'_id': preprint['_id']}, {'$set':{ 'subjects': new_hiers }} )
def load_bepress(f_path): assert Subject.find().count() == 0 logger.info('Loading BePress...') with open(f_path) as fp: bpress = json.load(fp) validate_map_bepress_correctness(set(bpress.keys())) logger.info('Populating Subjects...') for text in bpress.keys(): Subject(text=text).save() assert Subject.find().count() == len(bpress.keys()) logger.info('Setting parents...') for s in Subject.find(): if bpress[s.text]['lineage']: s.parents = [Subject.find_one(Q('text', 'eq', bpress[s.text]['lineage'][-1]))] s.save() logger.info('Setting children...') for s in Subject.find(): s.children = Subject.find(Q('parents', 'eq', s)) s.save() logger.info('Successfully imported BePress taxonomy.')
def load_bepress(f_path): assert Subject.find().count() == 0 logger.info('Loading BePress...') with open(f_path) as fp: bpress = json.load(fp) validate_map_bepress_correctness(set(bpress.keys())) logger.info('Populating Subjects...') for text in bpress.keys(): Subject(text=text).save() assert Subject.find().count() == len(bpress.keys()) logger.info('Setting parents...') for s in Subject.find(): if bpress[s.text]['lineage']: s.parents = [ Subject.find_one( Q('text', 'eq', bpress[s.text]['lineage'][-1])) ] s.save() logger.info('Setting children...') for s in Subject.find(): s.children = Subject.find(Q('parents', 'eq', s)) s.save() logger.info('Successfully imported BePress taxonomy.')