def extract_stats_from_plate(csv, dest): print(f'Processing: {csv}') plate_number = csv.split(".")[0] plate_folder = path.join(dest, plate_number) makedirs(plate_folder, exist_ok=True) df = load_plate_csv(csv) gen_cols, corr_cols, channel_dict = list_columns(df) df.drop(corr_cols, axis=1, inplace=True) df = df[df.index.isin(['mock'], 1)] gb = df.groupby(['Plate', 'Image_Metadata_Well']) desc_wells = gb.describe() desc_wells.to_csv(path.join(plate_folder, f'CW_{plate_number}_Summery.csv')) for stat in desc_wells.columns.unique(1): nest_desc = desc_wells.xs(stat, level=1, axis=1).describe() nest_desc.to_csv(path.join(plate_folder, f'CW_{plate_number}_{stat}.csv')) desc = df.groupby(['Plate']).describe() return desc, {stat: desc.xs(stat, level=1, axis=1) for stat in desc.columns.unique(1)}, desc_wells
def extract_dist_score(plate_csv, well_type='treated', **kwargs): df = load_plate_csv(plate_csv) df = df.groupby(by=[ 'Plate', LABEL_FIELD, 'Metadata_broad_sample', 'Image_Metadata_Well' ]).apply(lambda g: g.mean()) def calculate_distance_from(v): return lambda x: np.linalg.norm(x - v) _, _, channels = list_columns(df) all_cols = [col for ch_cols in channels.values() for col in ch_cols] channels['ALL'] = all_cols scores = [] for channel, cols in channels.items(): df_mck = df[df.index.isin(['mock'], 1)][cols] mck_profile = df_mck.median() df_trt = df[df.index.isin([well_type], 1)][cols] dist_func = calculate_distance_from(mck_profile) trt_dist = df_trt.apply(dist_func, axis=1) del df_trt trt_dist.name = channel scores.append(trt_dist) del df scores_df = pd.concat(scores, axis=1) return scores_df
def extract_dist_score_norm_before(plate_csv, well_type='treated', **kwargs): df = load_pure_zscores(plate_csv, kwargs['raw'], kwargs['inter_channel']) def calculate_distance_from(v): return lambda x: np.linalg.norm(x - v) _, _, channels = list_columns(df) all_cols = [col for ch_cols in channels.values() for col in ch_cols] channels['ALL'] = all_cols scores = [] for channel, cols in channels.items(): df_mck = df[df.index.isin(['mock'], 1)][cols] mck_profile = df_mck.median() df_trt = df[df.index.isin([well_type], 1)][cols] dist_func = calculate_distance_from(mck_profile) trt_dist = df_trt.apply(dist_func, axis=1) del df_trt trt_dist.name = channel scores.append(trt_dist) del df scores_df = pd.concat(scores, axis=1) return scores_df
def extract_score(plate_csv, by_well=True, by_channel=True, abs_zscore=True, well_type='treated', raw=False, thresh=4, inter_channel=True): df = load_pure_zscores(plate_csv, raw, inter_channel) if well_type in ['treated', 'mock']: df_selected = df[df.index.isin([well_type], 1)] del df else: df_selected = df if abs_zscore: df_selected = df_selected.abs() df_selected = df_selected.apply(lambda x: x.apply(lambda y: 0 if y < thresh else 1)) if by_channel: _, _, channels = list_columns(df_selected) for channel, cols in channels.items(): df_selected[channel] = df_selected[cols].sum(axis=1) / len(cols) channels_cols = [ col for ch_cols in channels.values() for col in ch_cols ] df_selected["ALL"] = df_selected[channels_cols].sum( axis=1) / len(channels_cols) data = df_selected[CHANNELS + ["ALL"]] del df_selected else: data = df_selected if by_well: gb = data.groupby( by=['Plate', 'Metadata_broad_sample', 'Image_Metadata_Well']) del data by_trt = gb.apply(lambda g: g.mean()) return by_trt return data
def extract_dist_score_norm_after(plate_csv, well_type='treated', **kwargs): df = load_plate_csv(plate_csv) df = df.groupby(by=[ 'Plate', LABEL_FIELD, 'Metadata_broad_sample', 'Image_Metadata_Well' ]).apply(lambda g: g.mean()) def calculate_distance_from(v): return lambda x: np.linalg.norm(x - v) _, _, channels = list_columns(df) all_cols = [col for ch_cols in channels.values() for col in ch_cols] channels['ALL'] = all_cols scores = [] for channel, cols in channels.items(): df_mck = df[df.index.isin(['mock'], 1)][cols] mck_profile = df_mck.median() df_trt = df[df.index.isin([well_type], 1)][cols] dist_func = calculate_distance_from(mck_profile) mck_dist = df_trt.apply(dist_func, axis=1) del df_mck trt_dist = df_trt.apply(dist_func, axis=1) del df_trt scaler = StandardScaler() scaler.fit(mck_dist.to_numpy().reshape(-1, 1)) del mck_dist cur_scores = pd.Series(scaler.transform(trt_dist.to_numpy().reshape( -1, 1)).reshape(-1), index=trt_dist.index, name=channel) del trt_dist scores.append(cur_scores) del df scores_df = pd.concat(scores, axis=1) return scores_df