def transer_ids_from_institution(): db_ids = mongo_ids() n = 100 fname_main = '../../data/universities_table.csv' drop_cols = ['order', 'metrics', 'id_downloaded', 'link', 'type', 'name', 'uri'] df = pd.read_csv(fname_main) cols = df.columns.tolist() # df = df.set_index("Institution") drop_cols.extend([x for x in cols if re.search(r'downloaded', x, re.I)]) drop_cols = np.unique(drop_cols) df = df.drop(drop_cols, axis=1).rename({'id': 'scival_id', 'Institution': 'name'}, axis=1) cols = df.columns.tolist() for index, row in df.iloc[:5, :].iterrows(): a = row.to_dict() print('index is ', index) # print('row is ', row) item = ids(**row.to_dict()) print('inserting item') print(item) db_ids.insert_affiliation(item)
def try_find_and_filter(): coll = mongo_ids() a = coll.filter_children_by_name('parent_id', [{'scival_id': 508076}], 'name', 'harvard') return a
def try_insert(): # inserting item should be a dictionary db_ids = mongo_ids() a = ids(name='Harvard University', country='USA', city='Cambridge') db_ids.insert_affiliation(a)
def try_partial_input(): # if __name__ == "__main__": db_ids = mongo_ids() old_item = {'scival_id': 508076} new_item = {'city': 'Change to something'} a = ids(**old_item) b = ids(**new_item) db_ids.partial_insert(a, 'scopus_id', b)
def try_insert_child_ids(): db_ids = mongo_ids() df = pd.read_excel('aaa.xlsx').rename({'Affiliation ID': 'scopus_id', 'Name': 'name'}, axis=1) parent_aff = ids(name='Harvard University', scival_id=508076) for index, row in df.iterrows(): db_ids.append_child_aff(parent_aff, 'scival_id', {'scopus_id': row.scopus_id}) db_ids.insert_affiliation(ids(**row)) db_ids.append_aff(ids(**row), 'scopus_id', 'parent_id', {'scival_id': parent_aff.scival_id})
def __init__(self, db_name=None, coll_name=None, address=None, port=None): if db_name is None: db_name = 'acknowledgements' if coll_name is None: coll_name = 'acks' if address is None: address = 'localhost' if port is None: port = 27017 self.client = pymongo.MongoClient(address, port) self.db = self.client[db_name] self.aff_acks = self.db[coll_name] self.db_ids = mongo_ids()
def init_db_ids(): return mongo_ids()
coll = mongo_ids() a = coll.filter_children_by_name('parent_id', [{'scival_id': 508076}], 'name', 'harvard') return a if __name__ == "__main__": # def transer_table_to_db(): db_name = "ids" coll_name = "name_ids_updated5" db_ids = mongo_ids(db_name=db_name, coll_name=coll_name) # n = 100 fname_main = '../data/universities_table.csv' drop_cols = ['order', 'metrics', 'id_downloaded', 'link', 'type', 'name', 'uri'] # drop_cols = ['order', 'metrics', 'id_downloaded', 'link', 'type', 'name', 'uri', 'countryCode', 'country'] df = pd.read_csv(fname_main) cols = df.columns.tolist() # df = df.set_index("Institution") drop_cols.extend([x for x in cols if re.search(r'downloaded', x, re.I)]) drop_cols = np.unique(drop_cols)