def prepare_sampling(self): """Prepare model for generate samples.""" if self.model is None: self.model = self.get_model(training=False) else: self.model.training = False predict_config = PredictConfig( session_init=SaverRestore(self.restore_path), model=self.model, input_names=['z'], output_names=['gen/gen', 'z'], ) self.simple_dataset_predictor = SimpleDatasetPredictor( predict_config, RandomZData((self.batch_size, self.z_dim)))
def get_model(model, ckpt_name, option): model_path = ospj('train_log', option.log_dir, ckpt_name) ds = get_data('val', option) pred_config = PredictConfig( model=model, session_init=get_model_loader(model_path), input_names=['input', 'label', 'bbox'], output_names=['wrong-top1', 'top5', 'actmap', 'grad'], return_input=True) return SimpleDatasetPredictor(pred_config, ds)
class TGANModel: """Main model from TGAN. Args: continuous_columns (list[int]): 0-index list of column indices to be considered continuous. output (str, optional): Path to store the model and its artifacts. Defaults to :attr:`output`. gpu (list[str], optional):Comma separated list of GPU(s) to use. Defaults to :attr:`None`. max_epoch (int, optional): Number of epochs to use during training. Defaults to :attr:`5`. steps_per_epoch (int, optional): Number of steps to run on each epoch. Defaults to :attr:`10000`. save_checkpoints(bool, optional): Whether or not to store checkpoints of the model after each training epoch. Defaults to :attr:`True` restore_session(bool, optional): Whether or not continue training from the last checkpoint. Defaults to :attr:`True`. batch_size (int, optional): Size of the batch to feed the model at each step. Defaults to :attr:`200`. z_dim (int, optional): Number of dimensions in the noise input for the generator. Defaults to :attr:`100`. noise (float, optional): Upper bound to the gaussian noise added to categorical columns. Defaults to :attr:`0.2`. l2norm (float, optional): L2 reguralization coefficient when computing losses. Defaults to :attr:`0.00001`. learning_rate (float, optional): Learning rate for the optimizer. Defaults to :attr:`0.001`. num_gen_rnn (int, optional): Defaults to :attr:`400`. num_gen_feature (int, optional): Number of features of in the generator. Defaults to :attr:`100` num_dis_layers (int, optional): Defaults to :attr:`2`. num_dis_hidden (int, optional): Defaults to :attr:`200`. optimizer (str, optional): Name of the optimizer to use during `fit`,possible values are: [`GradientDescentOptimizer`, `AdamOptimizer`, `AdadeltaOptimizer`]. Defaults to :attr:`AdamOptimizer`. """ def __init__(self, continuous_columns, sensitive_column, output='output', gpu=None, max_epoch=5, steps_per_epoch=10000, save_checkpoints=True, restore_session=True, batch_size=200, z_dim=200, noise=0.2, l2norm=0.00001, discrim_learning_rate=0.001, fair_learning_rate=0.0002, num_gen_rnn=100, num_gen_feature=100, num_dis_layers=1, num_dis_hidden=100, optimizer='AdamOptimizer', trainer='GANTrainer'): """Initialize object.""" # Output self.continuous_columns = continuous_columns self.sensitive_column = sensitive_column self.log_dir = os.path.join(output, 'logs') self.model_dir = os.path.join(output, 'model') self.output = output # Training params self.max_epoch = max_epoch self.steps_per_epoch = steps_per_epoch self.save_checkpoints = save_checkpoints self.restore_session = restore_session # Model params self.model = None self.batch_size = batch_size self.z_dim = z_dim self.noise = noise self.l2norm = l2norm self.discrim_learning_rate = discrim_learning_rate self.fair_learning_rate = fair_learning_rate self.num_gen_rnn = num_gen_rnn self.num_gen_feature = num_gen_feature self.num_dis_layers = num_dis_layers self.num_dis_hidden = num_dis_hidden self.optimizer = optimizer self.trainer = trainer if gpu: os.environ['CUDA_VISIBLE_DEVICES'] = gpu self.gpu = gpu def get_model(self, training=True): """Return a new instance of the model.""" return GraphBuilder(metadata=self.metadata, sensitive_column=self.sensitive_column, batch_size=self.batch_size, z_dim=self.z_dim, noise=self.noise, l2norm=self.l2norm, discrim_learning_rate=self.discrim_learning_rate, fair_learning_rate=self.fair_learning_rate, num_gen_rnn=self.num_gen_rnn, num_gen_feature=self.num_gen_feature, num_dis_layers=self.num_dis_layers, num_dis_hidden=self.num_dis_hidden, optimizer=self.optimizer, training=training) def prepare_sampling(self): """Prepare model for generate samples.""" if self.model is None: self.model = self.get_model(training=False) else: self.model.training = False predict_config = PredictConfig( session_init=SaverRestore(self.restore_path), model=self.model, input_names=['z'], output_names=['gen/gen', 'z'], ) self.simple_dataset_predictor = SimpleDatasetPredictor( predict_config, RandomZData((self.batch_size, self.z_dim))) def fit(self, data): """Fit the model to the given data. Args: data(pandas.DataFrame): dataset to fit the model. Returns: None """ self.preprocessor = Preprocessor( continuous_columns=self.continuous_columns) data = self.preprocessor.fit_transform(data) self.metadata = self.preprocessor.metadata dataflow = TGANDataFlow(data, self.metadata) batch_data = BatchData(dataflow, self.batch_size) input_queue = QueueInput(batch_data) self.model = self.get_model(training=True) if self.trainer == 'GANTrainer': trainer = GANTrainer(model=self.model, input_queue=input_queue) elif self.trainer == 'SeparateGANTrainer': trainer = SeparateGANTrainer(model=self.model, input_queue=input_queue) else: raise ValueError( 'Incorrect trainer name. Use GANTrainer or SeparateGANTrainer') # trainer = SeparateGANTrainer(model=self.model, input_queue=input_queue) self.restore_path = os.path.join(self.model_dir, 'checkpoint') if os.path.isfile(self.restore_path) and self.restore_session: session_init = SaverRestore(self.restore_path) with open(os.path.join(self.log_dir, 'stats.json')) as f: starting_epoch = json.load(f)[-1]['epoch_num'] + 1 else: session_init = None starting_epoch = 1 action = 'k' if self.restore_session else None logger.set_logger_dir(self.log_dir, action=action) callbacks = [] if self.save_checkpoints: callbacks.append(ModelSaver(checkpoint_dir=self.model_dir)) trainer.train_with_defaults(callbacks=callbacks, steps_per_epoch=self.steps_per_epoch, max_epoch=self.max_epoch, session_init=session_init, starting_epoch=starting_epoch) self.prepare_sampling() def sample(self, num_samples): """Generate samples from model. Args: num_samples(int) Returns: None Raises: ValueError """ max_iters = (num_samples // self.batch_size) results = [] for idx, o in enumerate(self.simple_dataset_predictor.get_result()): results.append(o[0]) if idx + 1 == max_iters: break results = np.concatenate(results, axis=0) ptr = 0 features = {} for col_id, col_info in enumerate(self.metadata['details']): if col_info['type'] == 'category': features['f%02d' % col_id] = results[:, ptr:ptr + 1] ptr += 1 elif col_info['type'] == 'value': gaussian_components = col_info['n'] val = results[:, ptr:ptr + 1] ptr += 1 pro = results[:, ptr:ptr + gaussian_components] ptr += gaussian_components features['f%02d' % col_id] = np.concatenate([val, pro], axis=1) else: raise ValueError( "self.metadata['details'][{}]['type'] must be either `category` or " "`values`. Instead it was {}.".format( col_id, col_info['type'])) return self.preprocessor.reverse_transform( features)[:num_samples].copy() def tar_folder(self, tar_name): """Generate a tar of :self.output:.""" with tarfile.open(tar_name, 'w:gz') as tar_handle: for root, dirs, files in os.walk(self.output): for file_ in files: tar_handle.add(os.path.join(root, file_)) tar_handle.close() @classmethod def load(cls, path): """Load a pretrained model from a given path.""" with tarfile.open(path, 'r:gz') as tar_handle: destination_dir = os.path.dirname(tar_handle.getmembers()[0].name) tar_handle.extractall() with open('{}/TGANModel'.format(destination_dir), 'rb') as f: instance = pickle.load(f) instance.prepare_sampling() return instance def save(self, path, force=False): """Save the fitted model in the given path.""" if os.path.exists(path) and not force: logger.info( 'The indicated path already exists. Use `force=True` to overwrite.' ) return base_path = os.path.dirname(path) if not os.path.exists(base_path): os.makedirs(base_path) model = self.model dataset_predictor = self.simple_dataset_predictor self.model = None self.simple_dataset_predictor = None with open('{}/TGANModel'.format(self.output), 'wb') as f: pickle.dump(self, f) self.model = model self.simple_dataset_predictor = dataset_predictor self.tar_folder(path) logger.info('Model saved successfully.')