def start(self, **kwargs):
        pct_train = kwargs['pct_train']
        pct_test = kwargs['pct_test']

        df, _ = pickle_service.concat_pickled_dataframes(
            config.MERGED_SWATCH_DATA_PATH)

        df['filename'] = df['path'].apply(lambda x: Path(x).name)
        df['video_name_stem'] = df['path'].apply(
            lambda x: Path(x).stem.split('_')[0])
        df['gross_label'] = df['path'].apply(
            lambda x: 'real' if x.endswith('1.0.png') else 'fake')
        df['real_or_fake_digit'] = df['gross_label'].apply(
            lambda x: 1 if x == 'fake' else 0)
        df['score_2places'] = np.around(df['score'].values, decimals=2)
        df['score_3places'] = np.around(df['score'].values, decimals=3)
        df['score_4places'] = np.around(df['score'].values, decimals=4)

        video_names = df['video_name_stem'].unique()

        np.random.shuffle(video_names)

        num_rows = video_names.shape[0]

        train_num = int((num_rows * (pct_train / 100)))
        test_num = int((num_rows * (pct_test / 100)))
        val_num = num_rows - train_num - test_num
        vid_train = video_names[:train_num]
        vid_validation = video_names[train_num:train_num + val_num]

        logger.info(
            f'Will attempt to set rows for training: {vid_train.shape[0]}.')

        vid_train_list = list(vid_train)

        def split_train_test_val(value):
            t_label = 'test'
            if value in vid_train_list:
                t_label = 'train'
            elif value in vid_validation:
                t_label = 'validation'
            return t_label

        df['test_train_split'] = df['video_name_stem'].apply(
            split_train_test_val)

        train_rows = df[df['test_train_split'] == 'train'].shape[0]
        val_rows = df[df['test_train_split'] == 'validation'].shape[0]
        test_rows = df[df['test_train_split'] == 'test'].shape[0]

        logger.info(f'Train {train_rows} rows.')
        logger.info(f'Validation {val_rows} rows.')
        logger.info(f'Test {test_rows} rows.')
        logger.info(f'Head: {df.head()}')

        result = {'output_path': self.persist_output_dataframe(df)}
        self.validate_start_output(result)

        return result
    def consolidate_pickles(output_path: Path):
        output_par_path = output_path.parent

        logger.info(f'Parent path: {output_par_path}')
        assert (output_par_path.exists())

        df, _ = pickle_service.concat_pickled_dataframes(output_par_path)

        logger.info("About to pickle.")
        df.to_pickle(output_path)
示例#3
0
    def consolidate_persisted_dataframes(self):
        logger.info(f'Output path: {self.dataframes_path}')

        df, all_df_paths = pickle_service.concat_pickled_dataframes(
            self.dataframes_path)

        archive_path = file_service.archive_paths(all_df_paths,
                                                  self.dataframes_path,
                                                  'archive', 'pkl')
        assert (archive_path.exists())

        for f in all_df_paths:
            f.unlink()

        output_path = self.persist_output_dataframe(df)

        return output_path, archive_path
示例#4
0
def get_decorated_df(pct_train: int = 80, pct_test: int = 5):
  df, _ = pickle_service.concat_pickled_dataframes(config.MERGED_SWATCH_DATA_PATH)

  df['video_name_stem'] = df['path'].apply(lambda x: Path(x).stem.split('_')[0])
  df['gross_label'] = df['path'].apply(lambda x: 'real' if x.endswith('1.0.png') else 'fake')

  video_names = df['video_name_stem'].unique()

  np.random.shuffle(video_names)

  num_rows = video_names.shape[0]

  train_num = int((num_rows * (pct_train/100)))
  test_num = int((num_rows * (pct_test/100)))
  val_num = num_rows - train_num - test_num
  vid_train = video_names[:train_num]
  vid_validation = video_names[train_num:train_num + val_num]

  logger.info(f'Will attempt to set rows for training: {vid_train.shape[0]}.')

  vid_train_list = list(vid_train)

  def split_train_test_val(value):
    t_label = 'test'
    if value in vid_train_list:
      t_label = 'train'
    elif value in vid_validation:
      t_label = 'validation'
    return t_label

  df['test_train_split'] = df['video_name_stem'].apply(split_train_test_val)

  train_rows = df[df['test_train_split'] == 'train'].shape[0]
  val_rows = df[df['test_train_split'] == 'validation'].shape[0]
  test_rows = df[df['test_train_split'] == 'test'].shape[0]

  logger.info(f'Train {train_rows} rows.')
  logger.info(f'Validation {val_rows} rows.')
  logger.info(f'Test {test_rows} rows.')

  logger.info(f'Head: {df.head()}')
示例#5
0
    def read_existing_output_dataframe(self) -> pd.DataFrame:
        df, _ = pickle_service.concat_pickled_dataframes(self.dataframes_path)

        return df
示例#6
0
def load_history(max_pickles=None):
  logger.info("About to load real swatch process history ...")
  df, _ = pickle_service.concat_pickled_dataframes(config.SSIM_REALS_DATA_OUTPUT_PATH, max_pickles)
  return df
    def test_columns(self):
        df, _ = pickle_service.concat_pickled_dataframes(
            config.MERGED_SWATCH_DATA_PATH)

        logger.info(f'Cols: {df.columns}')