def test_append_column_chunked_cta(cta_file, cta_config): from aict_tools.io import read_telescope_data_chunked, read_data from aict_tools.io import append_column_to_hdf5 new_column_name = 'foobar' chunk_size = 125 table_name = 'telescope_events' df = read_data(cta_file, table_name) assert new_column_name not in df.columns columns = cta_config.energy.columns_to_read_train generator = read_telescope_data_chunked(cta_file, cta_config, chunk_size, columns=columns) for df, start, stop in generator: assert not df.empty new_data = np.arange(start, stop, step=1) append_column_to_hdf5(cta_file, new_data, table_name, new_column_name) df = read_data(cta_file, table_name) assert new_column_name in df.columns assert np.array_equal(df.foobar, np.arange(0, len(df))) df.set_index( ['run_id', 'array_event_id', 'telescope_id'], drop=True, verify_integrity=True, inplace=True, )
def test_append_column_chunked(hdf5_file): from aict_tools.io import read_telescope_data_chunked, read_data from aict_tools.io import append_column_to_hdf5 path, table_name, config = hdf5_file new_column_name = 'foobar' chunk_size = 125 df = read_data(path, table_name) assert new_column_name not in df.columns columns = config.energy.columns_to_read_train generator = read_telescope_data_chunked(path, config, chunk_size, columns=columns) for df, start, stop in generator: assert not df.empty new_data = np.arange(start, stop, step=1) append_column_to_hdf5(path, new_data, table_name, new_column_name) df = read_data(path, table_name) assert new_column_name in df.columns assert np.array_equal(df.foobar, np.arange(0, len(df)))
def test_read_chunks_cta_dl1(cta_file, cta_config): from aict_tools.io import read_telescope_data, read_telescope_data_chunked import pandas as pd from pandas.testing import assert_frame_equal chunk_size = 500 # choose some columns from different tables in the file columns = [ "true_energy", "azimuth", "equivalent_focal_length", "hillas_width", "tel_id", "event_id", "obs_id", ] cta_file = str(cta_file) generator = read_telescope_data_chunked(cta_file, cta_config, chunk_size, columns=columns) df1 = pd.concat([df for df, _, _ in generator]) df2 = read_telescope_data(cta_file, cta_config, columns=columns) assert_frame_equal(df1, df2) # make sure we only loaded the telescopes we wanted np.testing.assert_array_equal( df2.tel_id.unique(), [int(x.split("_")[1]) for x in cta_config.telescopes])
def test_read_chunks_cta(cta_file, cta_config, chunk_size): from aict_tools.io import read_telescope_data, read_telescope_data_chunked import pandas as pd from pandas.util.testing import assert_frame_equal columns = ['width', 'num_triggered_telescopes', 'telescope_id'] generator = read_telescope_data_chunked(cta_file, cta_config, chunk_size, columns=columns) df1 = pd.concat([df for df, _, _ in generator]).reset_index(drop=True) df2 = read_telescope_data(cta_file, cta_config, columns=columns) assert_frame_equal(df1, df2)
def test_read_default_columns_chunked(hdf5_file): from aict_tools.io import read_telescope_data, read_telescope_data_chunked import pandas as pd from pandas.util.testing import assert_frame_equal path, table_name, config = hdf5_file generator = read_telescope_data_chunked(path, config, 100) df_chunked = pd.concat([df for df, _, _ in generator]).reset_index(drop=True) df = read_telescope_data(path, config).reset_index(drop=True) assert_frame_equal(df, df_chunked)
def test_read_chunks_cta_feature_gen(cta_file, cta_config): from aict_tools.io import read_telescope_data_chunked chunk_size = 125 columns = cta_config.energy.columns_to_read_train fg = cta_config.energy.feature_generation generator = read_telescope_data_chunked( cta_file, cta_config, chunk_size, columns=columns, feature_generation_config=fg ) for df, _, _ in generator: assert not df.empty assert set(df.columns) == set( cta_config.energy.features + ['array_event_id', 'run_id'] ) | set([cta_config.energy.target_column])
def test_read_chunks(hdf5_file): from aict_tools.io import read_telescope_data_chunked, read_telescope_data import pandas as pd from pandas.util.testing import assert_frame_equal path, table_name, config = hdf5_file cols = ['width', 'length', ] chunk_size = 125 generator = read_telescope_data_chunked(path, config, chunk_size, cols) dfs = [] for df, _, _ in generator: dfs.append(df) assert not df.empty df_chunked = pd.concat(dfs).reset_index(drop=True) df = read_telescope_data(path, config, columns=cols).reset_index(drop=True) assert_frame_equal(df, df_chunked)
def test_read_chunks_cta_feature_gen(cta_file, cta_config): from aict_tools.io import read_telescope_data_chunked chunk_size = 100 cta_file = str(cta_file) columns = cta_config.energy.columns_to_read_train fg = cta_config.energy.feature_generation generator = read_telescope_data_chunked(cta_file, cta_config, chunk_size, columns=columns, feature_generation_config=fg) for df, _, _ in generator: assert not df.empty assert set( df.columns) == set(cta_config.energy.features + fg.needed_columns + ["obs_id", "event_id", "tel_id"]) | set( [cta_config.energy.target_column])
parser.add_argument('sign_model_path', type=str) parser.add_argument('configuration_path', type=str) parser.add_argument('output', type=str) args = parser.parse_args() config = AICTConfig.from_yaml(args.configuration_path) model_config = config.disp disp_model = load_model(args.disp_model_path) sign_model = load_model(args.sign_model_path) chunked_frames = [] chunksize = 2000 df_generator = read_telescope_data_chunked( args.data_path, config, chunksize, model_config.columns_to_read_apply + ['mc_energy'], feature_generation_config=model_config.feature_generation) for df_data, start, stop in tqdm(df_generator): df_data[model_config.delta_column] = np.deg2rad( df_data[model_config.delta_column]) df_features = convert_to_float32(df_data[model_config.features]) valid = check_valid_rows(df_features) disp_abs = disp_model.predict(df_features.loc[valid].values) disp_sign = sign_model.predict(df_features.loc[valid].values) disp = np.full(len(df_features), np.nan) disp[valid] = disp_abs * disp_sign disp = predict_disp(