def __init__(self, df, y_key, n_job=7, path=None): super().__init__() self.plot = PlotTools() self.n_job = n_job self.df = df self.y_key = y_key self.path = path if self.path is None: self.path = "./plot_outs"
def test_VAE_latent_space(n_iter=100): dataset_path = """C:\\Users\\demetoir_desktop\\PycharmProjects\\MLtools\\data\\MNIST""" dataset_pack = MNIST().load(dataset_path) dataset_pack.shuffle() train_set = dataset_pack['train'] full_Xs, full_Ys = train_set.full_batch() x = train_set.Ys_index_label idxs_labels = [] for i in range(10): idxs_label = np.where(x == i) idxs_labels += [(full_Xs[idxs_label], full_Ys[idxs_label])] plot = PlotTools() model = VAE params = { 'loss_type': 'VAE', 'learning_rate': 0.01, 'latent_code_size': 2, 'encoder_net_shapes': (512, 256, 128, 64, 32), 'encoder_kwargs': { 'tail_bn': False, 'tail_activation': 'lrelu', 'linear_stack_bn': False, 'linear_stack_activation': 'lrelu', }, 'decoder_net_shapes': (32, 64, 128, 256, 512), 'decoder_kwargs': { 'tail_bn': True, 'tail_activation': 'sigmoid', 'linear_stack_bn': True, 'linear_stack_activation': 'relu' }, 'batch_size': 256, # 'KL_D_rate': 0.01 } ae = model(**params) for i in range(n_iter): ae.train(full_Xs, epoch=1) # ae.save(save_path) metric = ae.metric(full_Xs) if np.isnan(metric): print(f'metric is {metric}') break print(metric) codes = [ae.code(x) for x, y in idxs_labels] plot.scatter_2d(*codes, title=f'vae_latent_space_epoch_{i}.png') del ae
def test_df_to_onehot_embedding(): x = [] for i in range(10): normal = np.random.normal(i * 2, 1, size=[100]) x += [normal] x = np.concatenate(x) df = DF({'x': x}) plot = PlotTools(show=True, save=False) # plot.dist(df, title='before') bin = [-5, 5, 10, 15, 20, 30, 50] df = df_binning(df, 'x', bin) from sklearn.utils import shuffle df = shuffle(df) pprint(df.head(5)) df = df_to_onehot_embedding(df[['x_binning']]) pprint(df.head(5)) # pprint(df.keys()) # plot.count(df, 'bucketed_x', title='binning') # pprint(x) # pprint(df) pass
def __init__(self, save=True, show=False, extend='.png', dpi=300, n_job=CPU_COUNT): super().__init__(n_job) self.instance = PlotTools(save=save, show=show, extend=extend, dpi=dpi)
def __init__(self, df: DF, df_Xs_keys, df_Ys_key, silent=False, verbose=0): LoggerMixIn.__init__(self, verbose) null_clean_methodMixIn.__init__(self) df_plotterMixIn.__init__(self) self.df = df self.silent = silent self.df_Xs_keys = df_Xs_keys self.df_Ys_key = df_Ys_key self.plot = PlotTools()
def plot_all(self): path = "./data/samsung_contest/data_tansformed.csv" df = load_samsung(path) reg_cols = [ 'c02_사망자수', 'c03_사상자수', 'c04_중상자수', 'c05_경상자수', 'c06_부상신고자수', ] label_encoder_cols = [] for k in df.columns: if '_label' in k: label_encoder_cols += [k] onehot_col = [] for k in df.columns: if '_onehot' in k: onehot_col += [k] x_cols = reg_cols + onehot_col origin_cols = [ 'c00_주야', 'c01_요일', 'c02_사망자수', 'c03_사상자수', 'c04_중상자수', 'c05_경상자수', 'c06_부상신고자수', 'c07_발생지시도', 'c08_발생지시군구', 'c09_사고유형_대분류', 'c10_사고유형_중분류', 'c11_법규위반', 'c12_도로형태_대분류', 'c13_도로형태', 'c14_당사자종별_1당_대분류', 'c15_당사자종별_2당_대분류', ] # pprint(label_encoder_cols) # pprint(onehot_col) # pprint(x_cols) # pprint(origin_cols) plot = PlotTools() for key in origin_cols: # plot.dist(df, key, title=f'dist_{key}') plot.count(df, key, title=f'count_{key}') for a_key in origin_cols: for b_key in origin_cols: try: plot.count(df, a_key, b_key, title=f'count_{a_key}_groupby_{b_key}') except BaseException as e: print(a_key, b_key, e)
def test_CVAE_latent_space(): x = train_set.Ys_index_label idxs_labels = [] for i in range(10): idxs_label = np.where(x == i) idxs_labels += [(full_Xs[idxs_label], full_Ys[idxs_label])] plot = PlotTools() model = CVAE params = { 'loss_type': 'VAE', 'learning_rate': 0.01, 'latent_code_size': 2, 'encoder_net_shapes': (512, 256, 128, 64, 32), 'encoder_kwargs': { 'tail_bn': False, 'tail_activation': 'none', 'linear_stack_bn': False, 'linear_stack_activation': 'lrelu', }, 'decoder_net_shapes': (32, 64, 128, 256, 512), 'decoder_kwargs': { 'tail_bn': True, 'tail_activation': 'sigmoid', 'linear_stack_bn': True, 'linear_stack_activation': 'relu' }, 'batch_size': 256, # 'KL_D_rate': 0.01 } ae = model(**params) n_iter = 50 for i in range(n_iter): ae.train(full_Xs, full_Ys, epoch=1) metric = ae.metric(full_Xs, full_Ys) # ae.save(save_path) if np.isnan(metric): print(f'metric is {metric}') break print(metric) codes = [] for x, y in idxs_labels: code = ae.code(x, y) codes += [code] plot.scatter_2d(*codes, title=f'CVAE_latent_space_epoch_{i}.png') for label, code in enumerate(codes): plot.scatter_2d(code, title=f'vae_latent_space_epoch_{i}_label+{label}.png') del ae
def test_df_binning(): x = [] for i in range(10): normal = np.random.normal(i * 2, 1, size=[100]) x += [normal] x = np.concatenate(x) df = DF({'x': x}) plot = PlotTools(show=True, save=False) # plot.dist(df, title='before') bin = [-5, 5, 10, 15, 20, 30, 50] df = df_binning(df, 'x', bin) pprint(df.keys()) # plot.count(df, 'bucketed_x', title='binning') pprint(x) pprint(df)
from script.model.sklearn_like_model.BaseModel import BaseModel from script.model.sklearn_like_model.NetModule.BaseNetModule import BaseNetModule from script.model.sklearn_like_model.NetModule.optimizer.Adam import Adam from script.util.Logger import pprint_logger, Logger from script.util.PlotTools import PlotTools from script.util.deco import deco_timeit from slackbot.SlackBot import deco_slackbot bprint = print logger = Logger('bench_code', level='INFO', ) print = logger.info pprint = pprint_logger(print) NpArr = np.array DF = pd.DataFrame Series = pd.Series plot = PlotTools(save=True, show=False) def build_model(DropoutRatio=0.5): capacity = 16 input_layer = Input((28, 28, 1)) # 28 * 28 conv1 = Conv2D(32, (3, 3), padding="same")(input_layer) conv1 = ReLU()(conv1) # 14*14 conv1 = Conv2D(64, (3, 3), padding="same")(conv1) conv1 = ReLU()(conv1) conv1 = MaxPooling2D(pool_size=(2, 2))(conv1)
def corr_heatmap(self): plot = PlotTools(save=False, show=True) corr = self.df.corr() plot.heatmap(corr)
def __init__(self): self.plot = PlotTools()
class df_plotterMixIn: def __init__(self): self.plot = PlotTools() def plot_all(self, df, df_Xs_keys, df_Ys_key): self._df_cols_plot(df, df_Xs_keys, df_Ys_key) @deco_exception_catch def _plot_dist(self, df: DF, col_key: str, partial_df: DF, series: Series, Xs_keys: list, Ys_key: list, path=None): title = f'{col_key}_plot_dist' self.plot.dist(df, col_key, title=title, path=f"./matplot/{title}.png") @deco_exception_catch def _plot_count(self, df: DF, col_key: str, partial_df: DF, series: Series, Xs_keys: list, Ys_key: list, path=None): title = f'{col_key}_plot_count_bar' self.plot.count(df, col_key, title=title, path=f"./matplot/{title}.png") @deco_exception_catch def _plot_violin(self, df: DF, col_key: str, partial_df: DF, series: Series, Xs_keys: list, Ys_key: list, path=None): title = f'{col_key}_plot_violin' self.plot.violin_plot(df, col_key, Ys_key, path=f"./matplot/{title}_1.png", title=title) self.plot.violin_plot(df, Ys_key, col_key, path=f"./matplot/{title}_2.png", title=title) @deco_exception_catch def _plot_joint2d(self, df: DF, col_key: str, partial_df: DF, series: Series, Xs_keys: list, Ys_key: list, path=None): title = f'{col_key}_plot_joint2d' self.plot.joint_2d(df, col_key, Ys_key, path=f"./matplot/{title}.png", title=title) @deco_exception_catch def _plot_dist_groupby(self, df: DF, col_key: str, partial_df: DF, series: Series, Xs_keys: list, Ys_key: list, path=None): title = f'{col_key}_plot_dist_groupby' self.plot.dist_groupby(df, Ys_key, col_key, df, title=title, path=f"./matplot/{title}.png") self.plot.dist_groupby(df, col_key, Ys_key, df, title=title, path=f"./matplot/{title}.png") def _df_cols_plot(self, df, df_Xs_keys, df_Ys_key): with JobPool() as pool: for key in df_Xs_keys: col = df[[key]] series = df[key] args = (df, key, col, series, df_Xs_keys, df_Ys_key) pool.apply_async(self._plot_dist, args=args) pool.apply_async(self._plot_count, args=args) pool.apply_async(self._plot_violin, args=args) pool.apply_async(self._plot_joint2d, args=args)
class DF_PlotTools(LoggerMixIn): def __init__(self, df, y_key, n_job=7, path=None): super().__init__() self.plot = PlotTools() self.n_job = n_job self.df = df self.y_key = y_key self.path = path if self.path is None: self.path = "./plot_outs" def plot_all(self, ): with JobPool(self.n_job) as pool: keys = self.df.keys() df = self.df # plot dist for key in keys: args = [df, key] pool.apply_async(self.plot_dist, args) # plot count for key in keys: args = [df, key] pool.apply_async(self.plot_countbar, args) # plot violin for key in keys: args = [df, key, self.y_key] pool.apply_async(self.plot_violin, args) args = [df, self.y_key, key] pool.apply_async(self.plot_violin, args) # plot_joint2d for key in keys: args = [df, key, self.y_key] pool.apply_async(self.plot_joint2d, args) args = [df, self.y_key, key] pool.apply_async(self.plot_joint2d, args) # plot countbar_groupby for a_key in keys: args = [df, a_key, self.y_key] pool.apply_async(self.plot_count_groupby, args) @deco_exception_catch def plot_dist(self, df: DF, col_key: str, title=None): if title is None: title = f'{col_key}_plot_dist' self.plot.dist(df, col_key, title=title, path=f"./matplot/{title}.png") @deco_exception_catch def plot_countbar(self, df: DF, col_key: str, title=None): if title is None: title = f'{col_key}_plot_count_bar' self.plot.count(df, col_key, title=title, path=f"./matplot/{title}.png") @deco_exception_catch def plot_violin(self, df: DF, a_col, b_col, title=None): if title is None: title = f'{a_col}_{b_col}_plot_violin' self.plot.violin_plot(df, a_col, b_col, path=f"./matplot/{title}.png", title=title) @deco_exception_catch def plot_joint2d(self, df: DF, a_col, b_col, title=None): if title is None: title = f'{a_col}_{b_col}_plot_joint2d' self.plot.joint_2d(df, a_col, b_col, path=f"./matplot/{title}.png", title=title) @deco_exception_catch def plot_dist_groupby(self, df: DF, a_col, groupby_cols, title=None): if title is None: title = f'{a_col}_groupby_{groupby_cols}_plot_dist_groupby' self.plot.dist_groupby(df, a_col, groupby_cols, title=title, path=f"./matplot/{title}.png") @deco_exception_catch def plot_count_groupby(self, df: DF, a_col, groupby_cols, title=None): if title is None: title = f'{a_col}_groupby_{groupby_cols}_count' self.plot.count(df, a_col, groupby_cols, title=title, path=f"./matplot/{title}.png") @deco_exception_catch def plot_violin_groupby(self, df: DF, a_col, b_col, groupby_cols, title=None): if title is None: title = f'{a_col}_{b_col}_groupby_{groupby_cols}_violin' self.plot.violin_plot(df, a_col, b_col, groupby_cols, title=title, path=f"./matplot/{title}.png")
def test_VAE_latent_space_grid_search(n_iter=6): dataset_path = """C:\\Users\\demetoir_desktop\\PycharmProjects\\MLtools\\data\\MNIST""" dataset_pack = MNIST().load(dataset_path) dataset_pack.shuffle() train_set = dataset_pack['train'] full_Xs, full_Ys = train_set.full_batch() x = train_set.Ys_index_label idxs_labels = [] for i in range(10): idxs_label = np.where(x == i) idxs_labels += [(full_Xs[idxs_label], full_Ys[idxs_label])] plot = PlotTools() model = VAE save_path = './CVAE.pkl' BOOLs = [True, False] linear_stack_activation = ['sigmoid', 'tanh', 'relu', 'lrelu', 'elu'] param_grid = params_to_dict( latent_code_size=[2], learning_rate=[0.005], encoder_net_shapes=[(512, 256, 128, 64, 32)], decoder_net_shapes=[(32, 64, 128, 256, 512)], batch_size=[256], # KL_D_rate=[0.01], loss_type=['VAE'], encoder_kwargs=params_to_dict( linear_stack_bn=[False], linear_stack_activation=['relu', 'lrelu', 'elu'], tail_bn=[False], tail_activation=['lrelu', 'none'], ), decoder_kwargs=params_to_dict( linear_stack_bn=[True], linear_stack_activation=['relu'], tail_bn=[False], tail_activation=['sigmoid'], )) param_grid = param_grid_full(param_grid) df = DF({'params': list(param_grid)}) df.to_csv('./params.csv') for param_idx, params in enumerate(param_grid): pprint(param_idx, params) ae = model(**params) for i in range(50): ae.train(full_Xs, epoch=1) # ae.save(save_path) metric = ae.metric(full_Xs) if np.isnan(metric): print(f'param_idx:{param_idx}, metric is {metric}') break print(metric) codes = [] for x, y in idxs_labels: code = ae.code(x) codes += [code] plot.scatter_2d( *codes, title=f'param_idx_{param_idx}_vae_latent_space_epoch_{i}.png') del ae
def test_AAE_latent_space(): x = train_set.Ys_index_label idxs_labels = [] for i in range(10): idxs_label = np.where(x == i) idxs_labels += [(full_Xs[idxs_label], full_Ys[idxs_label])] plot = PlotTools() model = AAE save_path = './CVAE.pkl' BOOLs = [True, False] linear_stack_activation = ['sigmoid', 'tanh', 'relu', 'lrelu', 'elu'] param_grid = params_to_dict( latent_code_size=[2], learning_rate=[0.01], encoder_net_shapes=[(512, 256)], decoder_net_shapes=[(256, 512)], batch_size=[100], # KL_D_rate=[0.01], encoder_kwargs=params_to_dict( linear_stack_bn=[False], linear_stack_activation=['relu'], tail_bn=[False], tail_activation=['none'], ), decoder_kwargs=params_to_dict( linear_stack_bn=[False], linear_stack_activation=['relu'], tail_bn=[False], tail_activation=['sigmoid'], )) param_grid = param_grid_full(param_grid) df = DF({'params': list(param_grid)}) df.to_csv('./params.csv') for param_idx, params in enumerate(param_grid): pprint(param_idx, params) ae = model(**params) for i in range(100): ae.train(full_Xs, full_Ys, epoch=1) # ae.save(save_path) metric = ae.metric(full_Xs, full_Ys) if any([np.isnan(val) for val in metric.values()]): print(f'param_idx:{param_idx}, metric is {metric}') break print(metric) codes = [] for x, y in idxs_labels: code = ae.code(x) codes += [code] plot.scatter_2d( *codes, title=f'param_idx_{param_idx}/aae_latent_space_epoch_{i}.png') # for label, code in enumerate(codes): # plot.scatter_2d(code, title=f'param_idx_{param_idx}/aae_latent_space_epoch_{i}_label+{label}.png') recon = ae.recon(sample_Xs, sample_Ys) gen = ae.generate(full_Ys[:30]) code_walk = np.concatenate( [ae.augmentation(sample_Xs, sample_Ys) for _ in range(5)], axis=0) recon_sharpen = ae.recon_sharpen(sample_Xs, sample_Ys) np_img = np.concatenate( [sample_Xs, recon, recon_sharpen, gen, code_walk]) np_img = np_img_float32_to_uint8(np_img) file_name = f'./matplot/param_idx_{param_idx}/aae_img_epoch_{i}.png' plot.plot_image_tile(np_img, file_name, column=5) # sample_imgs = Xs_gen del ae
def test_CVAE_latent_space_grid_search(): x = train_set.Ys_index_label idxs_labels = [] for i in range(10): idxs_label = np.where(x == i) idxs_labels += [(full_Xs[idxs_label], full_Ys[idxs_label])] plot = PlotTools() model = CVAE save_path = './CVAE.pkl' BOOLs = [True, False] linear_stack_activation = ['sigmoid', 'tanh', 'relu', 'lrelu', 'elu'] param_grid = params_to_dict( latent_code_size=[2], learning_rate=[0.005], encoder_net_shapes=[(512, 256, 128, 64, 32)], decoder_net_shapes=[(32, 64, 128, 256, 512)], batch_size=[256], # KL_D_rate=[0.01], loss_type=['VAE'], encoder_kwargs=params_to_dict( linear_stack_bn=[False], linear_stack_activation=['relu', 'lrelu', 'elu'], tail_bn=[False], tail_activation=['lrelu', 'none'], ), decoder_kwargs=params_to_dict( linear_stack_bn=[True], linear_stack_activation=['relu'], tail_bn=[False], tail_activation=['sigmoid'], ) ) param_grid = param_grid_full(param_grid) df = DF({ 'params': list(param_grid) }) df.to_csv('./params.csv') for param_idx, params in enumerate(param_grid): pprint(param_idx, params) ae = model(**params) for i in range(10): ae.train(full_Xs, full_Ys, epoch=1) # ae.save(save_path) metric = ae.metric(full_Xs, full_Ys) if np.isnan(metric): print(f'param_idx:{param_idx}, metric is {metric}') break print(metric) codes = [] for x, y in idxs_labels: code = ae.code(x, y) codes += [code] plot.scatter_2d(*codes, title=f'param_idx_{param_idx}/vae_latent_space_epoch_{i}.png') # for label, code in enumerate(codes): # plot.scatter_2d(code, title=f'param_idx_{param_idx}/vae_latent_space_epoch_{i}_label+{label}.png') recon = ae.recon(sample_Xs, sample_Ys) gen = ae.generate(sample_Ys) np_img = np.concatenate([sample_Xs, recon, gen]) np_img = np_img_float32_to_uint8(np_img) # sample_imgs = Xs_gen file_name = f'./matplot/param_idx_{param_idx}/vae_img_epoch_{i}.png' tile = np_img_to_tile(np_img, column_size=5) np_image_save(tile, file_name) del ae