def test_translate_flowsheet_id_to_fid(internal_id, fid): df = pd.DataFrame([ { 'FlowsheetRowID': internal_id, 'ExtraColumn': 'ExtraValue' }, ]) df_correct = pd.DataFrame([ { 'fid': fid, 'FlowsheetRowID': internal_id, 'ExtraColumn': 'ExtraValue' }, ]) result = translate.translate_epic_id_to_fid(df=df.copy(), col='FlowsheetRowID', new_col='fid', config_map=flowsheet_ids) assert test_utils.dataframe_equality(result, df_correct) df_correct_drop = pd.DataFrame([ { 'fid': fid, 'ExtraColumn': 'ExtraValue' }, ]) result_drop = translate.translate_epic_id_to_fid(df=df.copy(), col='FlowsheetRowID', new_col='fid', config_map=flowsheet_ids, drop_original=True) assert test_utils.dataframe_equality(result_drop, df_correct_drop)
def test_unlistify_empty_list(): df = pd.DataFrame([ { 'A': 'A_0_1', 'B': ['B_0_1', 'B_0_2'], 'C': 'C_0_1', }, { 'A': 'A_1_1', 'B': [], 'C': 'C_1_1', }, ]) df_extracted = pd.DataFrame([ { 'A': 'A_0_1', 'B': 'B_0_1', 'C': 'C_0_1', }, { 'A': 'A_0_1', 'B': 'B_0_2', 'C': 'C_0_1', }, { 'A': 'A_1_1', 'B': np.nan, 'C': 'C_1_1', }, ]) result = pandas_utils.unlistify_pandas_column(df.copy(), 'B') assert test_utils.dataframe_equality(df_extracted, result)
def test_unlistify_lists_of_length_one(): df = pd.DataFrame([ { 'A': 'A_0_1', 'B': ['B_0_1'], 'C': 'C_0_1', }, { 'A': 'A_1_1', 'B': ['B_1_1'], 'C': 'C_1_1', }, ]) df_extracted = pd.DataFrame([ { 'A': 'A_0_1', 'B': 'B_0_1', 'C': 'C_0_1', }, { 'A': 'A_1_1', 'B': 'B_1_1', 'C': 'C_1_1', }, ]) result = pandas_utils.unlistify_pandas_column(df.copy(), 'B') assert test_utils.dataframe_equality(df_extracted, result)
def test_unlistify_multiple_lists(): df = pd.DataFrame([ { 'A': 'A_0_1', 'B': ['B_0_1', 'B_0_2', 'B_0_3'], 'C': ['C_0_1'], }, { 'A': 'A_1_1', 'B': ['B_1_1', 'B_1_2'], 'C': ['C_1_1', 'C_1_2'], }, ]) df_extracted = pd.DataFrame([ { 'A': 'A_0_1', 'B': 'B_0_1', 'C': 'C_0_1', }, { 'A': 'A_0_1', 'B': 'B_0_2', 'C': 'C_0_1', }, { 'A': 'A_0_1', 'B': 'B_0_3', 'C': 'C_0_1', }, { 'A': 'A_1_1', 'B': 'B_1_1', 'C': 'C_1_1', }, { 'A': 'A_1_1', 'B': 'B_1_2', 'C': 'C_1_1', }, { 'A': 'A_1_1', 'B': 'B_1_1', 'C': 'C_1_2', }, { 'A': 'A_1_1', 'B': 'B_1_2', 'C': 'C_1_2', }, ]) result_1 = pandas_utils.unlistify_pandas_column(df.copy(), 'C') result_2 = pandas_utils.unlistify_pandas_column(result_1, 'B') assert test_utils.dataframe_equality(df_extracted, result_2)
def test_unlistify_lists_of_same_lengths(): df = pd.DataFrame([{ 'A': 'A_' + str(j) + '_1', 'B': ['B_' + str(j) + '_' + str(i) for i in range(3)], 'C': 'C_' + str(j) + '_1', } for j in range(6)]) df_extracted = pd.DataFrame([{ 'A': 'A_' + str(int(j / 3)) + '_1', 'B': 'B_' + str(int(j / 3)) + '_' + str(j % 3), 'C': 'C_' + str(int(j / 3)) + '_1', } for j in range(18)]) result = pandas_utils.unlistify_pandas_column(df.copy(), 'B') assert test_utils.dataframe_equality(df_extracted, result)
def test_unlist(): empty_list = [] df = pd.DataFrame([{ 'ComponentID': 'fid_' + str(k), 'Value': [str(i) for i in range(2)], 'tsp': str(j), 'Unit': 'unit_' + str(k), } for k in range(2) for j in range(10)]) df_extracted = pd.DataFrame([{ 'ComponentID': 'fid_' + str(k), 'Value': str(j % 2), 'tsp': str(int(j / 2)), 'Unit': 'unit_' + str(k), } for k in range(2) for j in range(20)]) result = restructure.unlist(df.copy(), 'Value') assert test_utils.dataframe_equality(df_extracted, result)
def test_translate_lab_result_to_fids(component_id, fid): df = pd.DataFrame([ { 'ComponentID': component_id, 'ExtraColumn': 'ExtraValue' }, ]) df_correct = pd.DataFrame([ { 'fid': fid, 'ComponentID': component_id, 'ExtraColumn': 'ExtraValue' }, ]) result = translate.translate_epic_id_to_fid(df=df.copy(), col='ComponentID', new_col='fid', config_map=component_ids) assert test_utils.dataframe_equality(result, df_correct)
def test_extract(): df = pd.DataFrame([{ 'fid': 'fid_' + str(j), 'dict_col': { 'RawValue': 'val_' + str(j), 'Instant': 'tsp_' + str(j), 'Unneeded': 'fid_' + str(j), }, 'Unit': 'unit_' + str(j), } for j in range(10)]) df_extracted = pd.DataFrame([{ 'fid': 'fid_' + str(j), 'new_raw_value': 'val_' + str(j), 'tsp': 'tsp_' + str(j), 'Unit': 'unit_' + str(j), } for j in range(10)]) result = restructure.extract(df.copy(), 'dict_col', { 'RawValue': 'new_raw_value', 'Instant': 'tsp', }) assert test_utils.dataframe_equality(df_extracted, result)
def test_extract_internal_ids(): df = pd.DataFrame([{ 'ID_col': [{ 'Type': 'bad_type1', }, { 'Type': 'good_type', 'ID': j }, { 'Type': 'bad_type2', 'ID': str(j + 10) }], 'Extra': 'still_here', } for j in range(5)]) df_correct = pd.DataFrame([{ 'ID_col': str(j), 'Extra': 'still_here' } for j in range(5)]) result = restructure.extract_id_from_list(df.copy(), 'ID_col', 'good_type') assert test_utils.dataframe_equality(df_correct, result)