Пример #1
0
def try_partial_input():
# if __name__ == "__main__":

    db_ids = mongo_ids()

    old_item = {'scival_id': 508076}
    new_item = {'city': 'Change to something'}

    a = ids(**old_item)
    b = ids(**new_item)

    db_ids.partial_insert(a, 'scopus_id', b)
Пример #2
0
def try_insert_child_ids():

    db_ids = mongo_ids()

    df = pd.read_excel('aaa.xlsx').rename({'Affiliation ID': 'scopus_id', 'Name': 'name'}, axis=1)

    parent_aff = ids(name='Harvard University', scival_id=508076)

    for index, row in df.iterrows():

        db_ids.append_child_aff(parent_aff, 'scival_id', {'scopus_id': row.scopus_id})
        db_ids.insert_affiliation(ids(**row))
        db_ids.append_aff(ids(**row), 'scopus_id', 'parent_id', {'scival_id': parent_aff.scival_id})
Пример #3
0
def transer_ids_from_institution():

    db_ids = mongo_ids()
    n = 100

    fname_main = '../../data/universities_table.csv'

    drop_cols = ['order', 'metrics', 'id_downloaded', 'link', 'type', 'name', 'uri']

    df = pd.read_csv(fname_main)

    cols = df.columns.tolist()

    # df = df.set_index("Institution")

    drop_cols.extend([x for x in cols if re.search(r'downloaded', x, re.I)])
    drop_cols = np.unique(drop_cols)

    df = df.drop(drop_cols, axis=1).rename({'id': 'scival_id', 'Institution': 'name'}, axis=1)

    cols = df.columns.tolist()

    for index, row in df.iloc[:5, :].iterrows():

        a = row.to_dict()
        print('index is ', index)
        # print('row is ', row)
        item = ids(**row.to_dict())
        print('inserting item')
        print(item)
        db_ids.insert_affiliation(item)
Пример #4
0
def try_insert():

    # inserting item should be a dictionary
    db_ids = mongo_ids()

    a = ids(name='Harvard University', country='USA', city='Cambridge')

    db_ids.insert_affiliation(a)
Пример #5
0
def append_scopus_ids_to_parent(db_ids, df, parent):

    # append the scopus_ids to a parent in the dbs


    df = df.rename({'Affiliation ID': 'scopus_id', 'Name': 'name'}, axis=1)

    df = df.apply(pd.to_numeric, errors='ignore')

    parent['scival_id'] = int(parent['scival_id'])

    parent_aff = ids(**parent)
    db_ids.insert_affiliation(parent_aff)

    for index, row in df.iterrows():

        db_ids.append_child_aff(parent_aff, 'scival_id', {'scopus_id': row.scopus_id})
        db_ids.insert_affiliation(ids(**row))
        db_ids.append_aff(ids(**row), 'scopus_id', 'parent_id', {'scival_id': parent_aff.scival_id})

    pass
Пример #6
0
    df = pd.read_csv(fname_main)

    cols = df.columns.tolist()

    # df = df.set_index("Institution")

    drop_cols.extend([x for x in cols if re.search(r'downloaded', x, re.I)])
    drop_cols = np.unique(drop_cols)

    df = df.drop(drop_cols, axis=1).rename({'id': 'scival_id', 'Institution': 'name'}, axis=1)
    df = df.replace(np.nan, '')

    cols = df.columns.tolist()

    index_field = 'scival_id'

    for index, row in df.iterrows(): 
    # for index, row in df.iloc[-10:, :].iterrows(): 

        if row[index_field]:

            print("inserting item")
            # a = row.to_dict()
            # print('a is ')
            # print(a)

            item = ids(**row.to_dict())
            print(item.to_dict())

            db_ids.partial_insert_by_index(row[index_field], index_field, item)