Пример #1
0
def retrieve(search, search_type):  # pragma: no cover

    build_final = []
    if not isinstance(search, list):
        search = [search]

    for search_value in search:
        flat_dfs = []
        url = get_url(search_value, search_type)
        try:
            locations = Dive(url).call()
        except:
            warn('no information found for: ' + str(search_value))
            continue
        if isinstance(locations, dict):
            for ulrloc in tqdm(locations['results']):
                df_ = flatten_df(Dive('%s?format=json' % ulrloc['url']).call())
                df_['DSMZ_id'] = [ulrloc['url'].split('/')[-2]] * df_.shape[0]
                df_.index = (df_['DSMZ_id'] + '||' + df_['Section'] + '||' +
                             df_['Subsection'] + '||' + df_['Field_ID']).values
                flat_dfs.append(df_)
        if isinstance(locations, list):
            for ulrloc in tqdm(locations):
                df_ = flatten_df(Dive('%s?format=json' % ulrloc['url']).call())
                df_['DSMZ_id'] = [ulrloc['url'].split('/')[-2]] * df_.shape[0]
                df_.index = (df_['DSMZ_id'] + '||' + df_['Section'] + '||' +
                             df_['Subsection'] + '||' + df_['Field_ID']).values
                flat_dfs.append(df_)
        build_final.append(implode_fattened_df(pd.concat(flat_dfs)))

    return pd.concat(build_final, axis=1).sort_index()
Пример #2
0
    def test_flatten_df(self):

        ulr_test = ('https://bacdive.dsmz.de/api/'
                    'bacdive/bacdive_id/132558/?format=json')
        test_json = get_data_path('test.json')
        truth_df = pd.read_csv(
            get_data_path('test.csv'), index_col=[
                0, 1, 2]).sort_index()
        with open(test_json) as json_data:
            test_data = json.load(json_data)

        test_data = flatten_df(test_data)
        test_data['DSMZ_id'] = [ulr_test.split('/')[-2]] * test_data.shape[0]
        test_data.index = (
            test_data['DSMZ_id'] +
            '||' +
            test_data['Section'] +
            '||' +
            test_data['Subsection'] +
            '||' +
            test_data['Field_ID']).values
        test_data = implode_fattened_df(test_data).sort_index()
        self.assertListEqual(
            list(
                truth_df.index.get_level_values('Section')), list(
                test_data.index.get_level_values('Section')))
        self.assertListEqual(
            list(
                truth_df.index.get_level_values('Field')), list(
                test_data.index.get_level_values('Field')))
Пример #3
0
def retrieve(search, search_type):  # pragma: no cover

    build_final = []
    if not isinstance(search, list):
        search = [search]

    for search_value in search:
        flat_dfs = []
        url = get_url(search_value, search_type)

        if search_type == 'bacdive_id':
            locations = {
                'count': 1,
                'next': None,
                'previous': None,
                'results': [{
                    'url': url
                }]
            }
        else:
            try:
                locations = Dive(url).call()
            except:
                warn('no information found for: ' + str(search_value))
                continue

        if isinstance(locations, dict):
            for ulrloc in tqdm(locations['results']):
                df_ = flatten_df(Dive('%s?format=json' % ulrloc['url']).call())
                df_['DSMZ_id'] = [ulrloc['url'].split('/')[-2]] * df_.shape[0]
                df_.index = (df_['DSMZ_id'] + '||' + df_['Section'] + '||' +
                             df_['Subsection'] + '||' + df_['Field_ID']).values
                flat_dfs.append(df_)
        if isinstance(locations, list):
            for ulrloc in tqdm(locations):
                df_ = flatten_df(Dive('%s?format=json' % ulrloc['url']).call())
                df_['DSMZ_id'] = [ulrloc['url'].split('/')[-2]] * df_.shape[0]
                df_.index = (df_['DSMZ_id'] + '||' + df_['Section'] + '||' +
                             df_['Subsection'] + '||' + df_['Field_ID']).values
                flat_dfs.append(df_)
        flat_dfs = pd.concat(flat_dfs)
        flat_dfs = flat_dfs.groupby(flat_dfs.index)['Field'].apply(list)
        build_final.append(flat_dfs)

    build_final = clean_cat(pd.DataFrame(build_final).T)
    return build_final