def __init__(self, is_training, config, scaler=None, adj_mx=None): super(DCRNNModel, self).__init__(config, scaler=scaler) batch_size = int(config.get('batch_size')) max_diffusion_step = int(config.get('max_diffusion_step', 2)) cl_decay_steps = int(config.get('cl_decay_steps', 1000)) filter_type = config.get('filter_type', 'laplacian') horizon = int(config.get('horizon', 1)) input_dim = int(config.get('input_dim', 1)) loss_func = config.get('loss_func', 'MSE') max_grad_norm = float(config.get('max_grad_norm', 5.0)) num_nodes = int(config.get('num_nodes', 1)) num_rnn_layers = int(config.get('num_rnn_layers', 1)) output_dim = int(config.get('output_dim', 1)) rnn_units = int(config.get('rnn_units')) seq_len = int(config.get('seq_len')) use_curriculum_learning = bool( config.get('use_curriculum_learning', False)) assert input_dim == output_dim, 'input_dim: %d != output_dim: %d' % ( input_dim, output_dim) # Input (batch_size, timesteps, num_sensor, input_dim) self._inputs = tf.placeholder(tf.float32, shape=(batch_size, seq_len, num_nodes, input_dim), name='inputs') # Labels: (batch_size, timesteps, num_sensor, output_dim) self._labels = tf.placeholder(tf.float32, shape=(batch_size, horizon, num_nodes, output_dim), name='labels') GO_SYMBOL = tf.zeros(shape=(batch_size, num_nodes * input_dim)) cell = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, filter_type=filter_type) cell_with_projection = DCGRUCell(rnn_units, adj_mx, max_diffusion_step=max_diffusion_step, num_nodes=num_nodes, num_proj=output_dim, filter_type=filter_type) encoding_cells = [cell] * num_rnn_layers decoding_cells = [cell] * (num_rnn_layers - 1) + [cell_with_projection] encoding_cells = tf.contrib.rnn.MultiRNNCell(encoding_cells, state_is_tuple=True) decoding_cells = tf.contrib.rnn.MultiRNNCell(decoding_cells, state_is_tuple=True) global_step = tf.train.get_or_create_global_step() # Outputs: (batch_size, timesteps, num_nodes, output_dim) with tf.variable_scope('DCRNN_SEQ'): inputs = tf.unstack(tf.reshape( self._inputs, (batch_size, seq_len, num_nodes * input_dim)), axis=1) labels = tf.unstack(tf.reshape( self._labels, (batch_size, horizon, num_nodes * output_dim)), axis=1) labels.insert(0, GO_SYMBOL) loop_function = None if is_training: if use_curriculum_learning: def loop_function(prev, i): c = tf.random_uniform((), minval=0, maxval=1.) threshold = self._compute_sampling_threshold( global_step, cl_decay_steps) result = tf.cond(tf.less(c, threshold), lambda: labels[i], lambda: prev) return result else: # Return the output of the model. def loop_function(prev, _): return prev _, enc_state = tf.contrib.rnn.static_rnn(encoding_cells, inputs, dtype=tf.float32) outputs, final_state = legacy_seq2seq.rnn_decoder( labels, enc_state, decoding_cells, loop_function=loop_function) # Project the output to output_dim. outputs = tf.stack(outputs[:-1], axis=1) self._outputs = tf.reshape( outputs, (batch_size, horizon, num_nodes, output_dim), name='outputs') preds = self._outputs[..., 0] labels = self._labels[..., 0] null_val = config.get('null_val', 0.) self._mae = masked_mae_loss(self._scaler, null_val)(preds=preds, labels=labels) if loss_func == 'MSE': self._loss = masked_mse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) elif loss_func == 'MAE': self._loss = masked_mae_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) elif loss_func == 'RMSE': self._loss = masked_rmse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) else: self._loss = masked_mse_loss(self._scaler, null_val)(preds=self._outputs, labels=self._labels) if is_training: optimizer = tf.train.AdamOptimizer(self._lr) tvars = tf.trainable_variables() grads = tf.gradients(self._loss, tvars) grads, _ = tf.clip_by_global_norm(grads, max_grad_norm) self._train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step, name='train_op') self._merged = tf.summary.merge_all()
def __init__(self, **kwargs): self._kwargs = kwargs self._data_kwargs = kwargs.get('data') self._model_kwargs = kwargs.get('model') self._train_kwargs = kwargs.get('train') self._test_kwargs = kwargs.get('test') # logging. self._log_dir = self._get_log_dir(kwargs) log_level = self._kwargs.get('log_level', 'INFO') self._logger = utils.get_logger(self._log_dir, __name__, 'info.log', level=log_level) self._writer = tf.summary.FileWriter(self._log_dir) self._logger.info(kwargs) self._mon_ratio = float(self._kwargs.get('mon_ratio')) # Model's args self._seq_len = int(self._model_kwargs.get('seq_len')) self._horizon = int(self._model_kwargs.get('horizon')) self._input_dim = int(self._model_kwargs.get('input_dim')) self._nodes = int(self._model_kwargs.get('num_nodes')) # Test's args self._flow_selection = self._test_kwargs.get('flow_selection') self._test_size = self._test_kwargs.get('test_size') self._run_times = self._test_kwargs.get('run_times') # Data preparation self._day_size = self._data_kwargs.get('day_size') self._data = utils.load_dataset_dcrnn(seq_len=self._model_kwargs.get('seq_len'), horizon=self._model_kwargs.get('horizon'), input_dim=self._model_kwargs.get('input_dim'), mon_ratio=self._mon_ratio, **self._data_kwargs) for k, v in self._data.items(): if hasattr(v, 'shape'): self._logger.info((k, v.shape)) # Build models. scaler = self._data['scaler'] with tf.name_scope('Train'): with tf.variable_scope('DCRNN', reuse=False): self._train_model = DCRNNModel(is_training=True, scaler=scaler, batch_size=self._data_kwargs['batch_size'], adj_mx=self._data['adj_mx'], **self._model_kwargs) with tf.name_scope('Val'): with tf.variable_scope('DCRNN', reuse=True): self._val_model = DCRNNModel(is_training=False, scaler=scaler, batch_size=self._data_kwargs['val_batch_size'], adj_mx=self._data['adj_mx'], **self._model_kwargs) with tf.name_scope('Eval'): with tf.variable_scope('DCRNN', reuse=True): self._eval_model = DCRNNModel(is_training=False, scaler=scaler, batch_size=self._data_kwargs['eval_batch_size'], adj_mx=self._data['adj_mx'], **self._model_kwargs) with tf.name_scope('Test'): with tf.variable_scope('DCRNN', reuse=True): self._test_model = DCRNNModel(is_training=False, scaler=scaler, batch_size=self._data_kwargs['test_batch_size'], adj_mx=self._data['adj_mx'], **self._model_kwargs) # Learning rate. self._lr = tf.get_variable('learning_rate', shape=(), initializer=tf.constant_initializer(0.01), trainable=False) self._new_lr = tf.placeholder(tf.float32, shape=(), name='new_learning_rate') self._lr_update = tf.assign(self._lr, self._new_lr, name='lr_update') # Configure optimizer optimizer_name = self._train_kwargs.get('optimizer', 'adam').lower() epsilon = float(self._train_kwargs.get('epsilon', 1e-3)) optimizer = tf.train.AdamOptimizer(self._lr, epsilon=epsilon, ) if optimizer_name == 'sgd': optimizer = tf.train.GradientDescentOptimizer(self._lr, ) elif optimizer_name == 'amsgrad': optimizer = AMSGrad(self._lr, epsilon=epsilon) # Calculate loss output_dim = self._model_kwargs.get('output_dim') preds = self._train_model.outputs labels = self._train_model.labels[..., :output_dim] null_val = 0. self._loss_fn = masked_mse_loss(scaler, null_val) # self._loss_fn = masked_mae_loss(scaler, null_val) self._train_loss = self._loss_fn(preds=preds, labels=labels) tvars = tf.trainable_variables() grads = tf.gradients(self._train_loss, tvars) max_grad_norm = kwargs['train'].get('max_grad_norm', 1.) grads, _ = tf.clip_by_global_norm(grads, max_grad_norm) global_step = tf.train.get_or_create_global_step() self._train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step, name='train_op') max_to_keep = self._train_kwargs.get('max_to_keep', 100) self._epoch = 0 self._saver = tf.train.Saver(tf.global_variables(), max_to_keep=max_to_keep) # Log model statistics. total_trainable_parameter = utils.get_total_trainable_parameter_size() self._logger.info('Total number of trainable parameters: {:d}'.format(total_trainable_parameter)) for var in tf.global_variables(): self._logger.debug('{}, {}'.format(var.name, var.get_shape()))
def __init__(self, sess, adj_mx, dataloaders, kwargs): self._kwargs = kwargs self._data_kwargs = kwargs.get('data') self._model_kwargs = kwargs.get('model') self._train_kwargs = kwargs.get('train') self._paths_kwargs = kwargs.get('paths') self._save_tensors = kwargs.get('tf_config').get('save_tensors', False) \ if kwargs.get('tf_config') else False self._trace = kwargs.get('tf_config').get('trace', False) \ if kwargs.get('tf_config') else False self._save_graph = kwargs.get('tf_config').get('save_graph', False) \ if kwargs.get('tf_config') else False self._log_dir = self._get_log_dir(kwargs) self._writer = tf.summary.FileWriter(self._log_dir, sess.graph) \ if self._save_graph else tf.summary.FileWriter(self._log_dir) # Data preparation self._data = dataloaders # for k, v in self._data.items(): # if hasattr(v, 'shape'): # self._kwargs.logger.info((k, v.shape)) # Build models. scaler = self._data['scaler'] with tf.name_scope('Train'): with tf.variable_scope('DCRNN', reuse=False): train_batch_size = dataloaders['train_loader'].batch_size self._train_model = DCRNNModel(is_training=True, scaler=scaler, batch_size=train_batch_size, adj_mx=adj_mx, **self._model_kwargs) with tf.name_scope('Val'): with tf.variable_scope('DCRNN', reuse=True): val_batch_size = dataloaders['val_loader'].batch_size self._val_model = DCRNNModel(is_training=False, scaler=scaler, batch_size=val_batch_size, adj_mx=adj_mx, **self._model_kwargs) with tf.name_scope('Test'): with tf.variable_scope('DCRNN', reuse=True): test_batch_size = dataloaders['test_loader'].batch_size self._test_model = DCRNNModel(is_training=False, scaler=scaler, batch_size=test_batch_size, adj_mx=adj_mx, **self._model_kwargs) # Learning rate. self._lr = tf.get_variable('learning_rate', shape=(), initializer=tf.constant_initializer(0.01), trainable=False) self._new_lr = tf.placeholder(tf.float32, shape=(), name='new_learning_rate') self._lr_update = tf.assign(self._lr, self._new_lr, name='lr_update') # Configure optimizer optimizer_name = self._train_kwargs.get('optimizer', 'adam').lower() epsilon = float(self._train_kwargs.get('epsilon', 1e-3)) optimizer = tf.train.AdamOptimizer(self._lr, epsilon=epsilon) if optimizer_name == 'sgd': optimizer = tf.train.GradientDescentOptimizer(self._lr, ) elif optimizer_name == 'amsgrad': optimizer = AMSGrad(self._lr, epsilon=epsilon) # Calculate loss output_dim = self._model_kwargs.get('output_dim') preds = self._train_model.outputs labels = self._train_model.labels[..., :output_dim] null_val = 0. if kwargs['model'].get('exclude_zeros_in_metric', True) else np.nan loss_func_dict = { 'mae': masked_mae_loss(scaler, null_val), 'rmse': masked_rmse_loss(scaler, null_val), 'mse': masked_mse_loss(scaler, null_val) } self._loss_fn = loss_func_dict.get(kwargs['train'].get( 'loss_func', 'mae')) self._metric_fn = loss_func_dict.get(kwargs['train'].get( 'metric_func', 'mae')) self._train_loss = self._loss_fn(preds=preds, labels=labels) tvars = tf.trainable_variables() grads = tf.gradients(self._train_loss, tvars) max_grad_norm = kwargs['train'].get('max_grad_norm', 1.) grads, _ = tf.clip_by_global_norm(grads, max_grad_norm) self._train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.train.get_or_create_global_step(), name='train_op') max_to_keep = self._train_kwargs.get('max_to_keep', 100) self._saver = tf.train.Saver(tf.global_variables(), max_to_keep=max_to_keep) # load model model_filename = self._paths_kwargs.get('model_filename') if model_filename is not None: self._saver.restore(sess, model_filename) self._kwargs.logger.info( 'Pretrained model was loaded from : {}'.format(model_filename)) else: sess.run(tf.global_variables_initializer()) # Log model statistics. total_trainable_parameter = utils.get_total_trainable_parameter_size() self._kwargs.logger.info('Total number of trainable parameters: {:d}'.\ format(total_trainable_parameter)) for var in tf.global_variables(): self._kwargs.logger.debug('{}, {}'.format(var.name, var.get_shape()))
def __init__(self, is_training=False, **kwargs): super(DCRNNSupervisor, self).__init__(**kwargs) self._data = utils.load_dataset_dcrnn( seq_len=self._model_kwargs.get('seq_len'), horizon=self._model_kwargs.get('horizon'), input_dim=self._model_kwargs.get('input_dim'), mon_ratio=self._mon_ratio, scaler_type=self._kwargs.get('scaler'), is_training=is_training, **self._data_kwargs) for k, v in self._data.items(): if hasattr(v, 'shape'): self._logger.info((k, v.shape)) # Build models. scaler = self._data['scaler'] if is_training: self.model = DCRNNModel(scaler=scaler, batch_size=self._data_kwargs['batch_size'], adj_mx=self._data['adj_mx'], **self._model_kwargs) else: self.model = DCRNNModel(scaler=scaler, batch_size=1, adj_mx=self._data['adj_mx'], **self._model_kwargs) # Learning rate. self._lr = tf.get_variable('learning_rate', shape=(), initializer=tf.constant_initializer(0.01), trainable=False) self._new_lr = tf.placeholder(tf.float32, shape=(), name='new_learning_rate') self._lr_update = tf.assign(self._lr, self._new_lr, name='lr_update') # Configure optimizer optimizer_name = self._train_kwargs.get('optimizer', 'adam').lower() epsilon = float(self._train_kwargs.get('epsilon', 1e-3)) optimizer = tf.train.AdamOptimizer( self._lr, epsilon=epsilon, ) if optimizer_name == 'sgd': optimizer = tf.train.GradientDescentOptimizer(self._lr, ) elif optimizer_name == 'amsgrad': optimizer = AMSGrad(self._lr, epsilon=epsilon) # Calculate loss output_dim = self._model_kwargs.get('output_dim') preds = self.model.outputs labels = self.model.labels[..., :output_dim] null_val = 0. self._loss_fn = masked_mse_loss(scaler, null_val) # self._loss_fn = masked_mae_loss(scaler, null_val) self._train_loss = self._loss_fn(preds=preds, labels=labels) tvars = tf.trainable_variables() grads = tf.gradients(self._train_loss, tvars) max_grad_norm = kwargs['train'].get('max_grad_norm', 1.) grads, _ = tf.clip_by_global_norm(grads, max_grad_norm) global_step = tf.train.get_or_create_global_step() self._train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step, name='train_op') max_to_keep = self._train_kwargs.get('max_to_keep', 100) self._epoch = 0 self._saver = tf.train.Saver(tf.global_variables(), max_to_keep=max_to_keep) # Log model statistics. total_trainable_parameter = utils.get_total_trainable_parameter_size() self._logger.info('Total number of trainable parameters: {:d}'.format( total_trainable_parameter)) for var in tf.global_variables(): self._logger.debug('{}, {}'.format(var.name, var.get_shape()))
def __init__(self, is_training=False, **kwargs): super(DCRNNSupervisor, self).__init__(**kwargs) self._r = int(self._model_kwargs.get('r')) self._lamda = [] self._lamda.append(self._test_kwargs.get('lamda_0')) self._lamda.append(self._test_kwargs.get('lamda_1')) self._lamda.append(self._test_kwargs.get('lamda_2')) # Data preparation self._day_size = self._data_kwargs.get('day_size') self._data = utils.load_dataset_dcrnn_fwbw( seq_len=self._model_kwargs.get('seq_len'), horizon=self._model_kwargs.get('horizon'), input_dim=self._model_kwargs.get('input_dim'), mon_ratio=self._mon_ratio, scaler_type=self._kwargs.get('scaler'), is_training=is_training, **self._data_kwargs) for k, v in self._data.items(): if hasattr(v, 'shape'): self._logger.info((k, v.shape)) # Build models. scaler = self._data['scaler'] with tf.name_scope('Train'): with tf.variable_scope('DCRNN', reuse=False): self.train_model = DCRNNModel( is_training=True, scaler=scaler, batch_size=self._data_kwargs['batch_size'], adj_mx=self._data['adj_mx'], **self._model_kwargs) with tf.name_scope('Val'): with tf.variable_scope('DCRNN', reuse=True): self.val_model = DCRNNModel( is_training=False, scaler=scaler, batch_size=self._data_kwargs['val_batch_size'], adj_mx=self._data['adj_mx'], **self._model_kwargs) with tf.name_scope('Eval'): with tf.variable_scope('DCRNN', reuse=True): self.eval_model = DCRNNModel( is_training=False, scaler=scaler, batch_size=self._data_kwargs['eval_batch_size'], adj_mx=self._data['adj_mx'], **self._model_kwargs) with tf.name_scope('Test'): with tf.variable_scope('DCRNN', reuse=True): self.test_model = DCRNNModel( is_training=False, scaler=scaler, batch_size=self._data_kwargs['test_batch_size'], adj_mx=self._data['adj_mx'], **self._model_kwargs) # Learning rate. self._lr = tf.get_variable('learning_rate', shape=(), initializer=tf.constant_initializer(0.01), trainable=False) self._new_lr = tf.placeholder(tf.float32, shape=(), name='new_learning_rate') self._lr_update = tf.assign(self._lr, self._new_lr, name='lr_update') # Configure optimizer optimizer_name = self._train_kwargs.get('optimizer', 'adam').lower() epsilon = float(self._train_kwargs.get('epsilon', 1e-3)) optimizer = tf.train.AdamOptimizer( self._lr, epsilon=epsilon, ) if optimizer_name == 'sgd': optimizer = tf.train.GradientDescentOptimizer(self._lr, ) elif optimizer_name == 'amsgrad': optimizer = AMSGrad(self._lr, epsilon=epsilon) # Calculate loss output_dim = self._model_kwargs.get('output_dim') # fw decoder preds_fw = self.train_model.outputs_fw labels_fw = self.train_model.labels_fw[..., :output_dim] # bw encoder enc_preds_bw = self.train_model.enc_outputs_bw enc_labels_bw = self.train_model.enc_labels_bw[..., :output_dim] null_val = 0. self._loss_fn = masked_mse_loss(scaler, null_val) self._train_loss_dec = self._loss_fn(preds=preds_fw, labels=labels_fw) # backward loss self._train_loss_enc_bw = self._loss_fn(preds=enc_preds_bw, labels=enc_labels_bw) self._train_loss = self._train_loss_dec + self._train_loss_enc_bw tvars = tf.trainable_variables() grads = tf.gradients(self._train_loss, tvars) max_grad_norm = kwargs['train'].get('max_grad_norm', 1.) grads, _ = tf.clip_by_global_norm(grads, max_grad_norm) global_step = tf.train.get_or_create_global_step() self._train_op = optimizer.apply_gradients(zip(grads, tvars), global_step=global_step, name='train_op') max_to_keep = self._train_kwargs.get('max_to_keep', 100) self._epoch = 0 self._saver = tf.train.Saver(tf.global_variables(), max_to_keep=max_to_keep) # Log model statistics. total_trainable_parameter = utils.get_total_trainable_parameter_size() self._logger.info('Total number of trainable parameters: {:d}'.format( total_trainable_parameter)) for var in tf.global_variables(): self._logger.debug('{}, {}'.format(var.name, var.get_shape()))