Пример #1
0
    def __init__(self, df, y_key, n_job=7, path=None):
        super().__init__()
        self.plot = PlotTools()
        self.n_job = n_job
        self.df = df
        self.y_key = y_key

        self.path = path
        if self.path is None:
            self.path = "./plot_outs"
Пример #2
0
def test_VAE_latent_space(n_iter=100):
    dataset_path = """C:\\Users\\demetoir_desktop\\PycharmProjects\\MLtools\\data\\MNIST"""
    dataset_pack = MNIST().load(dataset_path)
    dataset_pack.shuffle()
    train_set = dataset_pack['train']
    full_Xs, full_Ys = train_set.full_batch()

    x = train_set.Ys_index_label
    idxs_labels = []
    for i in range(10):
        idxs_label = np.where(x == i)
        idxs_labels += [(full_Xs[idxs_label], full_Ys[idxs_label])]

    plot = PlotTools()

    model = VAE
    params = {
        'loss_type': 'VAE',
        'learning_rate': 0.01,
        'latent_code_size': 2,
        'encoder_net_shapes': (512, 256, 128, 64, 32),
        'encoder_kwargs': {
            'tail_bn': False,
            'tail_activation': 'lrelu',
            'linear_stack_bn': False,
            'linear_stack_activation': 'lrelu',
        },
        'decoder_net_shapes': (32, 64, 128, 256, 512),
        'decoder_kwargs': {
            'tail_bn': True,
            'tail_activation': 'sigmoid',
            'linear_stack_bn': True,
            'linear_stack_activation': 'relu'
        },
        'batch_size': 256,
        # 'KL_D_rate': 0.01
    }
    ae = model(**params)
    for i in range(n_iter):
        ae.train(full_Xs, epoch=1)
        # ae.save(save_path)
        metric = ae.metric(full_Xs)
        if np.isnan(metric):
            print(f'metric is {metric}')
            break
        print(metric)

        codes = [ae.code(x) for x, y in idxs_labels]
        plot.scatter_2d(*codes, title=f'vae_latent_space_epoch_{i}.png')

    del ae
Пример #3
0
def test_df_to_onehot_embedding():
    x = []
    for i in range(10):
        normal = np.random.normal(i * 2, 1, size=[100])
        x += [normal]
    x = np.concatenate(x)

    df = DF({'x': x})
    plot = PlotTools(show=True, save=False)
    # plot.dist(df, title='before')

    bin = [-5, 5, 10, 15, 20, 30, 50]

    df = df_binning(df, 'x', bin)
    from sklearn.utils import shuffle
    df = shuffle(df)

    pprint(df.head(5))
    df = df_to_onehot_embedding(df[['x_binning']])
    pprint(df.head(5))

    # pprint(df.keys())
    # plot.count(df, 'bucketed_x', title='binning')
    # pprint(x)
    # pprint(df)

    pass
Пример #4
0
 def __init__(self,
              save=True,
              show=False,
              extend='.png',
              dpi=300,
              n_job=CPU_COUNT):
     super().__init__(n_job)
     self.instance = PlotTools(save=save, show=show, extend=extend, dpi=dpi)
Пример #5
0
    def __init__(self, df: DF, df_Xs_keys, df_Ys_key, silent=False, verbose=0):
        LoggerMixIn.__init__(self, verbose)
        null_clean_methodMixIn.__init__(self)
        df_plotterMixIn.__init__(self)

        self.df = df
        self.silent = silent
        self.df_Xs_keys = df_Xs_keys
        self.df_Ys_key = df_Ys_key
        self.plot = PlotTools()
Пример #6
0
    def plot_all(self):
        path = "./data/samsung_contest/data_tansformed.csv"
        df = load_samsung(path)

        reg_cols = [
            'c02_사망자수',
            'c03_사상자수',
            'c04_중상자수',
            'c05_경상자수',
            'c06_부상신고자수',
        ]

        label_encoder_cols = []
        for k in df.columns:
            if '_label' in k:
                label_encoder_cols += [k]

        onehot_col = []
        for k in df.columns:
            if '_onehot' in k:
                onehot_col += [k]

        x_cols = reg_cols + onehot_col

        origin_cols = [
            'c00_주야',
            'c01_요일',
            'c02_사망자수',
            'c03_사상자수',
            'c04_중상자수',
            'c05_경상자수',
            'c06_부상신고자수',
            'c07_발생지시도',
            'c08_발생지시군구',
            'c09_사고유형_대분류',
            'c10_사고유형_중분류',
            'c11_법규위반',
            'c12_도로형태_대분류',
            'c13_도로형태',
            'c14_당사자종별_1당_대분류',
            'c15_당사자종별_2당_대분류',
        ]

        # pprint(label_encoder_cols)
        # pprint(onehot_col)
        # pprint(x_cols)
        # pprint(origin_cols)

        plot = PlotTools()
        for key in origin_cols:
            # plot.dist(df, key, title=f'dist_{key}')
            plot.count(df, key, title=f'count_{key}')

        for a_key in origin_cols:
            for b_key in origin_cols:
                try:
                    plot.count(df, a_key, b_key, title=f'count_{a_key}_groupby_{b_key}')
                except BaseException as e:
                    print(a_key, b_key, e)
Пример #7
0
def test_CVAE_latent_space():
    x = train_set.Ys_index_label
    idxs_labels = []
    for i in range(10):
        idxs_label = np.where(x == i)
        idxs_labels += [(full_Xs[idxs_label], full_Ys[idxs_label])]

    plot = PlotTools()

    model = CVAE
    params = {
        'loss_type':          'VAE',
        'learning_rate':      0.01,
        'latent_code_size':   2,
        'encoder_net_shapes': (512, 256, 128, 64, 32),
        'encoder_kwargs':     {
            'tail_bn':                 False,
            'tail_activation':         'none',
            'linear_stack_bn':         False,
            'linear_stack_activation': 'lrelu',
        },
        'decoder_net_shapes': (32, 64, 128, 256, 512),
        'decoder_kwargs':     {
            'tail_bn':                 True,
            'tail_activation':         'sigmoid',
            'linear_stack_bn':         True,
            'linear_stack_activation': 'relu'
        },
        'batch_size':         256,
        # 'KL_D_rate': 0.01
    }

    ae = model(**params)
    n_iter = 50
    for i in range(n_iter):
        ae.train(full_Xs, full_Ys, epoch=1)
        metric = ae.metric(full_Xs, full_Ys)
        # ae.save(save_path)
        if np.isnan(metric):
            print(f'metric is {metric}')
            break
        print(metric)

        codes = []
        for x, y in idxs_labels:
            code = ae.code(x, y)
            codes += [code]

        plot.scatter_2d(*codes, title=f'CVAE_latent_space_epoch_{i}.png')
        for label, code in enumerate(codes):
            plot.scatter_2d(code, title=f'vae_latent_space_epoch_{i}_label+{label}.png')

    del ae
Пример #8
0
def test_df_binning():
    x = []
    for i in range(10):
        normal = np.random.normal(i * 2, 1, size=[100])
        x += [normal]
    x = np.concatenate(x)

    df = DF({'x': x})
    plot = PlotTools(show=True, save=False)
    # plot.dist(df, title='before')

    bin = [-5, 5, 10, 15, 20, 30, 50]

    df = df_binning(df, 'x', bin)

    pprint(df.keys())
    # plot.count(df, 'bucketed_x', title='binning')
    pprint(x)
    pprint(df)
Пример #9
0
from script.model.sklearn_like_model.BaseModel import BaseModel
from script.model.sklearn_like_model.NetModule.BaseNetModule import BaseNetModule
from script.model.sklearn_like_model.NetModule.optimizer.Adam import Adam
from script.util.Logger import pprint_logger, Logger
from script.util.PlotTools import PlotTools
from script.util.deco import deco_timeit
from slackbot.SlackBot import deco_slackbot

bprint = print
logger = Logger('bench_code', level='INFO', )
print = logger.info
pprint = pprint_logger(print)
NpArr = np.array
DF = pd.DataFrame
Series = pd.Series
plot = PlotTools(save=True, show=False)


def build_model(DropoutRatio=0.5):
    capacity = 16
    input_layer = Input((28, 28, 1))
    # 28 * 28
    conv1 = Conv2D(32, (3, 3), padding="same")(input_layer)
    conv1 = ReLU()(conv1)

    # 14*14
    conv1 = Conv2D(64, (3, 3), padding="same")(conv1)
    conv1 = ReLU()(conv1)

    conv1 = MaxPooling2D(pool_size=(2, 2))(conv1)
Пример #10
0
 def corr_heatmap(self):
     plot = PlotTools(save=False, show=True)
     corr = self.df.corr()
     plot.heatmap(corr)
Пример #11
0
 def __init__(self):
     self.plot = PlotTools()
Пример #12
0
class df_plotterMixIn:
    def __init__(self):
        self.plot = PlotTools()

    def plot_all(self, df, df_Xs_keys, df_Ys_key):
        self._df_cols_plot(df, df_Xs_keys, df_Ys_key)

    @deco_exception_catch
    def _plot_dist(self,
                   df: DF,
                   col_key: str,
                   partial_df: DF,
                   series: Series,
                   Xs_keys: list,
                   Ys_key: list,
                   path=None):
        title = f'{col_key}_plot_dist'
        self.plot.dist(df, col_key, title=title, path=f"./matplot/{title}.png")

    @deco_exception_catch
    def _plot_count(self,
                    df: DF,
                    col_key: str,
                    partial_df: DF,
                    series: Series,
                    Xs_keys: list,
                    Ys_key: list,
                    path=None):
        title = f'{col_key}_plot_count_bar'
        self.plot.count(df,
                        col_key,
                        title=title,
                        path=f"./matplot/{title}.png")

    @deco_exception_catch
    def _plot_violin(self,
                     df: DF,
                     col_key: str,
                     partial_df: DF,
                     series: Series,
                     Xs_keys: list,
                     Ys_key: list,
                     path=None):
        title = f'{col_key}_plot_violin'
        self.plot.violin_plot(df,
                              col_key,
                              Ys_key,
                              path=f"./matplot/{title}_1.png",
                              title=title)
        self.plot.violin_plot(df,
                              Ys_key,
                              col_key,
                              path=f"./matplot/{title}_2.png",
                              title=title)

    @deco_exception_catch
    def _plot_joint2d(self,
                      df: DF,
                      col_key: str,
                      partial_df: DF,
                      series: Series,
                      Xs_keys: list,
                      Ys_key: list,
                      path=None):
        title = f'{col_key}_plot_joint2d'
        self.plot.joint_2d(df,
                           col_key,
                           Ys_key,
                           path=f"./matplot/{title}.png",
                           title=title)

    @deco_exception_catch
    def _plot_dist_groupby(self,
                           df: DF,
                           col_key: str,
                           partial_df: DF,
                           series: Series,
                           Xs_keys: list,
                           Ys_key: list,
                           path=None):
        title = f'{col_key}_plot_dist_groupby'
        self.plot.dist_groupby(df,
                               Ys_key,
                               col_key,
                               df,
                               title=title,
                               path=f"./matplot/{title}.png")
        self.plot.dist_groupby(df,
                               col_key,
                               Ys_key,
                               df,
                               title=title,
                               path=f"./matplot/{title}.png")

    def _df_cols_plot(self, df, df_Xs_keys, df_Ys_key):
        with JobPool() as pool:
            for key in df_Xs_keys:
                col = df[[key]]
                series = df[key]
                args = (df, key, col, series, df_Xs_keys, df_Ys_key)

                pool.apply_async(self._plot_dist, args=args)
                pool.apply_async(self._plot_count, args=args)
                pool.apply_async(self._plot_violin, args=args)
                pool.apply_async(self._plot_joint2d, args=args)
Пример #13
0
class DF_PlotTools(LoggerMixIn):
    def __init__(self, df, y_key, n_job=7, path=None):
        super().__init__()
        self.plot = PlotTools()
        self.n_job = n_job
        self.df = df
        self.y_key = y_key

        self.path = path
        if self.path is None:
            self.path = "./plot_outs"

    def plot_all(self, ):
        with JobPool(self.n_job) as pool:
            keys = self.df.keys()
            df = self.df

            # plot dist
            for key in keys:
                args = [df, key]
                pool.apply_async(self.plot_dist, args)

            # plot count
            for key in keys:
                args = [df, key]
                pool.apply_async(self.plot_countbar, args)

            # plot violin
            for key in keys:
                args = [df, key, self.y_key]
                pool.apply_async(self.plot_violin, args)

                args = [df, self.y_key, key]
                pool.apply_async(self.plot_violin, args)

            # plot_joint2d
            for key in keys:
                args = [df, key, self.y_key]
                pool.apply_async(self.plot_joint2d, args)

                args = [df, self.y_key, key]
                pool.apply_async(self.plot_joint2d, args)

            # plot countbar_groupby
            for a_key in keys:
                args = [df, a_key, self.y_key]
                pool.apply_async(self.plot_count_groupby, args)

    @deco_exception_catch
    def plot_dist(self, df: DF, col_key: str, title=None):
        if title is None:
            title = f'{col_key}_plot_dist'

        self.plot.dist(df, col_key, title=title, path=f"./matplot/{title}.png")

    @deco_exception_catch
    def plot_countbar(self, df: DF, col_key: str, title=None):
        if title is None:
            title = f'{col_key}_plot_count_bar'

        self.plot.count(df,
                        col_key,
                        title=title,
                        path=f"./matplot/{title}.png")

    @deco_exception_catch
    def plot_violin(self, df: DF, a_col, b_col, title=None):
        if title is None:
            title = f'{a_col}_{b_col}_plot_violin'

        self.plot.violin_plot(df,
                              a_col,
                              b_col,
                              path=f"./matplot/{title}.png",
                              title=title)

    @deco_exception_catch
    def plot_joint2d(self, df: DF, a_col, b_col, title=None):
        if title is None:
            title = f'{a_col}_{b_col}_plot_joint2d'

        self.plot.joint_2d(df,
                           a_col,
                           b_col,
                           path=f"./matplot/{title}.png",
                           title=title)

    @deco_exception_catch
    def plot_dist_groupby(self, df: DF, a_col, groupby_cols, title=None):
        if title is None:
            title = f'{a_col}_groupby_{groupby_cols}_plot_dist_groupby'

        self.plot.dist_groupby(df,
                               a_col,
                               groupby_cols,
                               title=title,
                               path=f"./matplot/{title}.png")

    @deco_exception_catch
    def plot_count_groupby(self, df: DF, a_col, groupby_cols, title=None):
        if title is None:
            title = f'{a_col}_groupby_{groupby_cols}_count'

        self.plot.count(df,
                        a_col,
                        groupby_cols,
                        title=title,
                        path=f"./matplot/{title}.png")

    @deco_exception_catch
    def plot_violin_groupby(self,
                            df: DF,
                            a_col,
                            b_col,
                            groupby_cols,
                            title=None):
        if title is None:
            title = f'{a_col}_{b_col}_groupby_{groupby_cols}_violin'

        self.plot.violin_plot(df,
                              a_col,
                              b_col,
                              groupby_cols,
                              title=title,
                              path=f"./matplot/{title}.png")
Пример #14
0
def test_VAE_latent_space_grid_search(n_iter=6):
    dataset_path = """C:\\Users\\demetoir_desktop\\PycharmProjects\\MLtools\\data\\MNIST"""
    dataset_pack = MNIST().load(dataset_path)
    dataset_pack.shuffle()
    train_set = dataset_pack['train']
    full_Xs, full_Ys = train_set.full_batch()

    x = train_set.Ys_index_label
    idxs_labels = []
    for i in range(10):
        idxs_label = np.where(x == i)
        idxs_labels += [(full_Xs[idxs_label], full_Ys[idxs_label])]

    plot = PlotTools()

    model = VAE
    save_path = './CVAE.pkl'

    BOOLs = [True, False]
    linear_stack_activation = ['sigmoid', 'tanh', 'relu', 'lrelu', 'elu']
    param_grid = params_to_dict(
        latent_code_size=[2],
        learning_rate=[0.005],
        encoder_net_shapes=[(512, 256, 128, 64, 32)],
        decoder_net_shapes=[(32, 64, 128, 256, 512)],
        batch_size=[256],
        # KL_D_rate=[0.01],
        loss_type=['VAE'],
        encoder_kwargs=params_to_dict(
            linear_stack_bn=[False],
            linear_stack_activation=['relu', 'lrelu', 'elu'],
            tail_bn=[False],
            tail_activation=['lrelu', 'none'],
        ),
        decoder_kwargs=params_to_dict(
            linear_stack_bn=[True],
            linear_stack_activation=['relu'],
            tail_bn=[False],
            tail_activation=['sigmoid'],
        ))
    param_grid = param_grid_full(param_grid)

    df = DF({'params': list(param_grid)})
    df.to_csv('./params.csv')

    for param_idx, params in enumerate(param_grid):
        pprint(param_idx, params)

        ae = model(**params)
        for i in range(50):
            ae.train(full_Xs, epoch=1)
            # ae.save(save_path)
            metric = ae.metric(full_Xs)
            if np.isnan(metric):
                print(f'param_idx:{param_idx}, metric is {metric}')
                break
            print(metric)

            codes = []
            for x, y in idxs_labels:
                code = ae.code(x)
                codes += [code]

            plot.scatter_2d(
                *codes,
                title=f'param_idx_{param_idx}_vae_latent_space_epoch_{i}.png')

        del ae
Пример #15
0
def test_AAE_latent_space():
    x = train_set.Ys_index_label
    idxs_labels = []
    for i in range(10):
        idxs_label = np.where(x == i)
        idxs_labels += [(full_Xs[idxs_label], full_Ys[idxs_label])]

    plot = PlotTools()

    model = AAE
    save_path = './CVAE.pkl'

    BOOLs = [True, False]
    linear_stack_activation = ['sigmoid', 'tanh', 'relu', 'lrelu', 'elu']
    param_grid = params_to_dict(
        latent_code_size=[2],
        learning_rate=[0.01],
        encoder_net_shapes=[(512, 256)],
        decoder_net_shapes=[(256, 512)],
        batch_size=[100],
        # KL_D_rate=[0.01],
        encoder_kwargs=params_to_dict(
            linear_stack_bn=[False],
            linear_stack_activation=['relu'],
            tail_bn=[False],
            tail_activation=['none'],
        ),
        decoder_kwargs=params_to_dict(
            linear_stack_bn=[False],
            linear_stack_activation=['relu'],
            tail_bn=[False],
            tail_activation=['sigmoid'],
        ))
    param_grid = param_grid_full(param_grid)

    df = DF({'params': list(param_grid)})
    df.to_csv('./params.csv')

    for param_idx, params in enumerate(param_grid):
        pprint(param_idx, params)

        ae = model(**params)
        for i in range(100):
            ae.train(full_Xs, full_Ys, epoch=1)
            # ae.save(save_path)
            metric = ae.metric(full_Xs, full_Ys)
            if any([np.isnan(val) for val in metric.values()]):
                print(f'param_idx:{param_idx}, metric is {metric}')
                break
            print(metric)

            codes = []
            for x, y in idxs_labels:
                code = ae.code(x)
                codes += [code]

            plot.scatter_2d(
                *codes,
                title=f'param_idx_{param_idx}/aae_latent_space_epoch_{i}.png')
            # for label, code in enumerate(codes):
            #     plot.scatter_2d(code, title=f'param_idx_{param_idx}/aae_latent_space_epoch_{i}_label+{label}.png')

            recon = ae.recon(sample_Xs, sample_Ys)
            gen = ae.generate(full_Ys[:30])
            code_walk = np.concatenate(
                [ae.augmentation(sample_Xs, sample_Ys) for _ in range(5)],
                axis=0)
            recon_sharpen = ae.recon_sharpen(sample_Xs, sample_Ys)
            np_img = np.concatenate(
                [sample_Xs, recon, recon_sharpen, gen, code_walk])
            np_img = np_img_float32_to_uint8(np_img)

            file_name = f'./matplot/param_idx_{param_idx}/aae_img_epoch_{i}.png'
            plot.plot_image_tile(np_img, file_name, column=5)
            # sample_imgs = Xs_gen

        del ae
Пример #16
0
def test_CVAE_latent_space_grid_search():
    x = train_set.Ys_index_label
    idxs_labels = []
    for i in range(10):
        idxs_label = np.where(x == i)
        idxs_labels += [(full_Xs[idxs_label], full_Ys[idxs_label])]

    plot = PlotTools()

    model = CVAE
    save_path = './CVAE.pkl'

    BOOLs = [True, False]
    linear_stack_activation = ['sigmoid', 'tanh', 'relu', 'lrelu', 'elu']
    param_grid = params_to_dict(

        latent_code_size=[2],
        learning_rate=[0.005],
        encoder_net_shapes=[(512, 256, 128, 64, 32)],
        decoder_net_shapes=[(32, 64, 128, 256, 512)],
        batch_size=[256],
        # KL_D_rate=[0.01],
        loss_type=['VAE'],
        encoder_kwargs=params_to_dict(
            linear_stack_bn=[False],
            linear_stack_activation=['relu', 'lrelu', 'elu'],
            tail_bn=[False],
            tail_activation=['lrelu', 'none'],
        ),
        decoder_kwargs=params_to_dict(
            linear_stack_bn=[True],
            linear_stack_activation=['relu'],
            tail_bn=[False],
            tail_activation=['sigmoid'],
        )
    )
    param_grid = param_grid_full(param_grid)

    df = DF({
        'params': list(param_grid)
    })
    df.to_csv('./params.csv')

    for param_idx, params in enumerate(param_grid):
        pprint(param_idx, params)

        ae = model(**params)
        for i in range(10):
            ae.train(full_Xs, full_Ys, epoch=1)
            # ae.save(save_path)
            metric = ae.metric(full_Xs, full_Ys)
            if np.isnan(metric):
                print(f'param_idx:{param_idx}, metric is {metric}')
                break
            print(metric)

            codes = []
            for x, y in idxs_labels:
                code = ae.code(x, y)
                codes += [code]

            plot.scatter_2d(*codes, title=f'param_idx_{param_idx}/vae_latent_space_epoch_{i}.png')
            # for label, code in enumerate(codes):
            #     plot.scatter_2d(code, title=f'param_idx_{param_idx}/vae_latent_space_epoch_{i}_label+{label}.png')

            recon = ae.recon(sample_Xs, sample_Ys)
            gen = ae.generate(sample_Ys)
            np_img = np.concatenate([sample_Xs, recon, gen])
            np_img = np_img_float32_to_uint8(np_img)

            # sample_imgs = Xs_gen
            file_name = f'./matplot/param_idx_{param_idx}/vae_img_epoch_{i}.png'
            tile = np_img_to_tile(np_img, column_size=5)
            np_image_save(tile, file_name)

        del ae