def __init__(self, batch_size=1): with tf.device('/gpu:0'): self.input_frames = tf.placeholder( tf.float32, shape=[None, None, 64, 64, 1], name='input_frames') # batch,sqe,x,y,features self.fut_frames = tf.placeholder(tf.float32, shape=[None, None, 64, 64, 1], name='future_frames') self.keep_prob = tf.Variable(1.0, dtype=tf.float32, trainable=False, name='keep_prob') self.weight_decay = tf.Variable(1e-4, dtype=tf.float32, trainable=False, name='weight_decay') self.learning_rate = tf.Variable(1e-4, dtype=tf.float32, trainable=False, name='learning_rate') # data refinement s = tf.shape(self.input_frames) input_flatten = tf.reshape(self.input_frames, [s[0], s[1], 64 * 64 * 1]) fut_flatten = tf.reshape(self.fut_frames, [s[0], s[1], 64 * 64 * 1]) input_norm = input_flatten / 1. fut_norm = fut_flatten / 1. # cell decleration enc_cell = self.__lstm_cell(2048, 2) fut_cell = self.__lstm_cell(2048, 2) # encode frames enc_o, enc_s = rnn.custom_dynamic_rnn(enc_cell, input_norm, name='enc_rnn', scope='enc_cell') # state mapping repr = enc_cell.zero_state(s[0], tf.float32) repr = (tf.contrib.rnn.LSTMStateTuple(enc_s[0][0], repr[0][1]), tf.contrib.rnn.LSTMStateTuple(enc_s[1][0], repr[1][1])) # future prediction fut_dummy = tf.zeros_like(fut_norm) fut_o, fut_s = rnn.custom_dynamic_rnn( fut_cell, fut_dummy, output_conditioned=False, output_dim=4096, output_activation=tf.identity, initial_state=repr, name='dec_rnn', scope='dec_cell') # future ground-truth (0 or 1) fut_logit = tf.greater(fut_norm, 0.) # loss calculation self.fut_loss = \ tf.nn.sigmoid_cross_entropy_with_logits(logits=fut_o, labels=tf.cast(fut_logit, tf.float32)) self.fut_loss = tf.reduce_mean(tf.reduce_sum(self.fut_loss, [2])) # optimizer self.optimizer = self.__adam_optimizer_op( self.fut_loss + self.weight_decay * self.__calc_weight_l2_panalty()) # output future frames as uint8 self.fut_output = tf.cast( tf.clip_by_value(tf.sigmoid(fut_o) * 255, 0, 255), tf.uint8)
def __init__(self, batch_size=1): with tf.device('/gpu:0'): self.input_frames = tf.placeholder(tf.float32, shape=[None, None, 64, 64, 1], name='input_frames') self.fut_frames = tf.placeholder(tf.float32, shape=[None, None, 64, 64, 1], name='future_frames') self.keep_prob = tf.Variable(1.0, dtype=tf.float32, trainable=False, name='keep_prob') self.weight_decay = tf.Variable(1e-4, dtype=tf.float32, trainable=False, name='weight_decay') self.learning_rate = tf.Variable(1e-4, dtype=tf.float32, trainable=False, name='learning_rate') # self.learning_rate = tf.Variable(1e-2, dtype=tf.float32, trainable=False, name='learning_rate') # data refinement s = tf.shape(self.input_frames) #TODO:old # input_flatten = tf.reshape(self.input_frames, [s[0], s[1], 64 * 64 * 1]) # fut_flatten = tf.reshape(self.fut_frames, [s[0], s[1], 64 * 64 * 1]) #TODO:conv input_flatten = tf.reshape(self.input_frames, [s[0], s[1], 64, 64, 1]) fut_flatten = tf.reshape(self.fut_frames, [s[0], s[1], 64, 64, 1]) input_norm = input_flatten / 1. fut_norm = fut_flatten / 1. # cell declaration print('cell declaration...') dim1 = 16 dim2 = 64 cell_dim = 256 bias_start = 0.0 enc_cell = self.__lstm_cell(cell_dim, 2) # expressive power: 2048 fut_cell = self.__lstm_cell(cell_dim, 2) def conv_to_input(input, name): with tf.variable_scope(name): # tensor = tf.identity(input, name) cv1_f = tf.get_variable( "weights_cv1_f", shape=[3, 3, 1, dim1], initializer=tf.random_uniform_initializer(-0.01, 0.01)) cv1_b = tf.get_variable( "weights_cv1_b", shape=[dim1], initializer=tf.constant_initializer(bias_start)) cv1 = tf.nn.relu( tf.nn.conv2d(input, cv1_f, strides=[1, 2, 2, 1], padding='VALID') + cv1_b) cv2_f = tf.get_variable( "weights_cv2_f", shape=[3, 3, dim1, dim2], initializer=tf.random_uniform_initializer(-0.01, 0.01)) cv2_b = tf.get_variable( "weights_cv2_b", shape=[dim2], initializer=tf.constant_initializer(bias_start)) cv2 = tf.nn.relu( tf.nn.conv2d( cv1, cv2_f, strides=[1, 2, 2, 1], padding='VALID') + cv2_b) cv3_f = tf.get_variable( "weights_cv3_f", shape=[3, 3, dim2, cell_dim], initializer=tf.random_uniform_initializer(-0.01, 0.01)) cv3_b = tf.get_variable( "weights_cv3_b", shape=[cell_dim], initializer=tf.constant_initializer(bias_start)) cv3 = tf.nn.relu( tf.nn.conv2d( cv2, cv3_f, strides=[1, 2, 2, 1], padding='VALID') + cv3_b) return cv3 def conv_to_output(input, name): with tf.variable_scope(name): # input = ?,7,7,2048 shape1 = [batch_size, 15, 15, dim2] dcv1_f = tf.get_variable( "weights_dcv1_f", shape=[3, 3, dim2, cell_dim], initializer=tf.random_uniform_initializer(-0.01, 0.01)) dcv1_b = tf.get_variable( "weights_dcv1_b", shape=[dim2], initializer=tf.constant_initializer(bias_start)) dcv1 = tf.nn.relu( tf.nn.conv2d_transpose(input, dcv1_f, output_shape=shape1, strides=[1, 2, 2, 1], padding='VALID') + dcv1_b) shape2 = [batch_size, 31, 31, dim1] dcv2_f = tf.get_variable( "weights_dcv2_f", shape=[3, 3, dim1, dim2], initializer=tf.random_uniform_initializer(-0.01, 0.01)) dcv2_b = tf.get_variable( "weights_dcv2_b", shape=[dim1], initializer=tf.constant_initializer(bias_start)) dcv2 = tf.nn.relu( tf.nn.conv2d_transpose(dcv1, dcv2_f, output_shape=shape2, strides=[1, 2, 2, 1], padding='VALID') + dcv2_b) shape3 = [batch_size, 64, 64, 1] dcv3_f = tf.get_variable( "weights_dcv3_f", shape=[3, 3, 1, dim1], initializer=tf.random_uniform_initializer(-0.01, 0.01)) dcv3_b = tf.get_variable( "weights_dcv3_b", shape=[1], initializer=tf.constant_initializer(bias_start)) dcv3 = tf.nn.conv2d_transpose(dcv2, dcv3_f, output_shape=shape3, strides=[1, 2, 2, 1], padding='VALID') + dcv3_b return dcv3 # encode frames print('encode frames...') enc_o, enc_s = rnn.custom_dynamic_rnn( enc_cell, input_norm, input_operation=conv_to_input, name='enc_rnn', scope='enc_cell') #TODO: multi cell repr = enc_cell.zero_state(s[0], tf.float32) repr = (tf.contrib.rnn.LSTMStateTuple(enc_s[0][0], repr[0][1]), tf.contrib.rnn.LSTMStateTuple(enc_s[1][0], repr[1][1])) #TODO: single cell # repr = enc_s # future prediction print('future prediction...') fut_dummy = tf.zeros_like(enc_o) #TODO: output_dim = None! fut_o, fut_s = rnn.custom_dynamic_rnn( fut_cell, fut_dummy, output_operation=conv_to_output, output_conditioned=False, output_dim=None, output_activation=tf.identity, initial_state=repr, name='dec_rnn', scope='dec_cell') # future ground-truth (0 or 1) fut_logit = tf.greater(fut_norm, 0.) #fut_o # loss calculation self.fut_loss = \ tf.nn.sigmoid_cross_entropy_with_logits(logits=fut_o, labels=tf.cast(fut_logit, tf.float32)) ## fut_o: ?,?,4096 ## fut_logit: ?,?,4096 self.fut_loss = tf.reduce_mean( tf.reduce_sum(self.fut_loss, [2, 3, 4])) #?,?,4096 -> ?,?,64,64,1 # optimizer print('optimization...') self.optimizer = self.__adam_optimizer_op( self.fut_loss + self.weight_decay * self.__calc_weight_l2_panalty()) # output future frames as uint8 print('output future frames...') self.fut_output = tf.cast( tf.clip_by_value(tf.sigmoid(fut_o) * 255, 0, 255), tf.uint8)
def __init__(self, batch_size=80): with tf.device('/gpu:0'): ####with tf.device('/cpu:0'): self.input_frames = tf.placeholder(tf.float32, shape=[None, None, 64, 64, 1], name='input_frames') self.fut_frames = tf.placeholder(tf.float32, shape=[None, None, 64, 64, 1], name='future_frames') self.keep_prob = tf.Variable(1.0, dtype=tf.float32, trainable=False, name='keep_prob') self.weight_decay = tf.Variable(1e-4, dtype=tf.float32, trainable=False, name='weight_decay') self.learning_rate = tf.Variable(1e-4, dtype=tf.float32, trainable=False, name='learning_rate') self.test_case = tf.placeholder(tf.bool, name='test_case') # data refinement s = tf.shape(self.input_frames) input_flatten = tf.reshape(self.input_frames, [s[0], s[1], 64, 64, 1]) fut_flatten = tf.reshape(self.fut_frames, [s[0], s[1], 64, 64, 1]) input_norm = input_flatten / 1. fut_norm = fut_flatten / 1. # cell declaration print('cell declaration...') dim1 = 16 dim2 = 64 cell_dim = 256 bias_start = 0.0 enc_cell = self.__lstm_cell(cell_dim, 2) # expressive power: 2048 fut_cell = self.__lstm_cell(cell_dim, 2) recon_cell = self.__lstm_cell(cell_dim, 2) def conv_to_input(input, name): with tf.variable_scope(name): # tensor = tf.identity(input, name) cv1_f = tf.get_variable( "weights_cv1_f", shape=[3, 3, 1, dim1], initializer=tf.random_uniform_initializer(-0.01, 0.01)) cv1_b = tf.get_variable( "weights_cv1_b", shape=[dim1], initializer=tf.constant_initializer(bias_start)) cv1 = tf.nn.relu( tf.nn.conv2d(input, cv1_f, strides=[1, 2, 2, 1], padding='VALID') + cv1_b) cv2_f = tf.get_variable( "weights_cv2_f", shape=[3, 3, dim1, dim2], initializer=tf.random_uniform_initializer(-0.01, 0.01)) cv2_b = tf.get_variable( "weights_cv2_b", shape=[dim2], initializer=tf.constant_initializer(bias_start)) cv2 = tf.nn.relu( tf.nn.conv2d( cv1, cv2_f, strides=[1, 2, 2, 1], padding='VALID') + cv2_b) cv3_f = tf.get_variable( "weights_cv3_f", shape=[3, 3, dim2, cell_dim], initializer=tf.random_uniform_initializer(-0.01, 0.01)) cv3_b = tf.get_variable( "weights_cv3_b", shape=[cell_dim], initializer=tf.constant_initializer(bias_start)) cv3 = tf.nn.relu( tf.nn.conv2d( cv2, cv3_f, strides=[1, 2, 2, 1], padding='VALID') + cv3_b) return cv3 def conv_to_output(input, name): with tf.variable_scope(name): # input = ?,7,7,2048 shape1 = [batch_size, 15, 15, dim2] dcv1_f = tf.get_variable( "weights_dcv1_f", shape=[3, 3, dim2, cell_dim], initializer=tf.random_uniform_initializer(-0.01, 0.01)) dcv1_b = tf.get_variable( "weights_dcv1_b", shape=[dim2], initializer=tf.constant_initializer(bias_start)) dcv1 = tf.nn.relu( tf.nn.conv2d_transpose(input, dcv1_f, output_shape=shape1, strides=[1, 2, 2, 1], padding='VALID') + dcv1_b) shape2 = [batch_size, 31, 31, dim1] dcv2_f = tf.get_variable( "weights_dcv2_f", shape=[3, 3, dim1, dim2], initializer=tf.random_uniform_initializer(-0.01, 0.01)) dcv2_b = tf.get_variable( "weights_dcv2_b", shape=[dim1], initializer=tf.constant_initializer(bias_start)) dcv2 = tf.nn.relu( tf.nn.conv2d_transpose(dcv1, dcv2_f, output_shape=shape2, strides=[1, 2, 2, 1], padding='VALID') + dcv2_b) shape3 = [batch_size, 64, 64, 1] dcv3_f = tf.get_variable( "weights_dcv3_f", shape=[3, 3, 1, dim1], initializer=tf.random_uniform_initializer(-0.01, 0.01)) dcv3_b = tf.get_variable( "weights_dcv3_b", shape=[1], initializer=tf.constant_initializer(bias_start)) dcv3 = tf.nn.conv2d_transpose(dcv2, dcv3_f, output_shape=shape3, strides=[1, 2, 2, 1], padding='VALID') + dcv3_b return dcv3 # encode frames print('encode frames...') enc_o, enc_s = rnn.custom_dynamic_rnn( enc_cell, input_norm, input_operation=conv_to_input, name='enc_rnn', scope='enc_cell') #TODO: multi cell #copy c_states repr = enc_cell.zero_state(s[0], tf.float32) repr = ( tf.contrib.rnn.LSTMStateTuple(enc_s[0][0], repr[0][1]), #[cell][c/h] tf.contrib.rnn.LSTMStateTuple(enc_s[1][0], repr[1][1])) #TODO:shift right dummy = tf.expand_dims(tf.zeros_like(input_norm[:, 0]), axis=1) #bx1xhxwxd input_norm_reverse = input_norm input_norm_reverse = tf.reverse(input_norm_reverse, [1]) #2 or 1? input_norm_shifted = tf.concat([dummy, input_norm_reverse], 1) input_norm_shifted = input_norm_shifted[:, :-1] #input_norm_reverse = tf.reshape(input_norm_reverse, tf.shape(input_norm)) recon_out, recon_st = rnn.custom_dynamic_rnn( recon_cell, input_norm_shifted, input_operation=conv_to_input, output_operation=conv_to_output, output_conditioned=False, output_dim=None, output_activation=tf.identity, initial_state=repr, name='dec_rnn_recon', scope='dec_cell_recon') # future ground-truth (0 or 1) recon_logit = tf.greater(input_norm_reverse, 0.) # loss calculation self.recon_loss = \ tf.nn.sigmoid_cross_entropy_with_logits(logits=recon_out, labels=tf.cast(recon_logit, tf.float32)) self.recon_loss = tf.reduce_mean( tf.reduce_sum(self.recon_loss, [2, 3, 4])) # ?,?,4096 -> ?,?,64,64,1 ###### # future prediction # TODO:shift right print('future prediction...') fut_norm_shifted = tf.concat([dummy, fut_norm], 1) fut_norm_shifted = fut_norm_shifted[:, :-1] fut_out_tr, fut_st_tr = rnn.custom_dynamic_rnn( fut_cell, fut_norm_shifted, input_operation=conv_to_input, output_operation=conv_to_output, output_conditioned=False, output_dim=None, output_activation=tf.identity, initial_state=repr, name='dec_rnn_fut', scope='dec_cell_fut', reuse=False) fut_dummy_te = tf.zeros_like(input_norm) fut_out_te, fut_st_te = rnn.custom_dynamic_rnn( fut_cell, fut_dummy_te, input_operation=conv_to_input, output_operation=conv_to_output, output_conditioned=True, output_dim=None, output_activation=tf.identity, recurrent_activation=tf.sigmoid, initial_state=repr, name='dec_rnn_fut', scope='dec_cell_fut', reuse=True) fut_o, fut_s = tf.cond(self.test_case, lambda: (tf.convert_to_tensor(fut_out_te), tf.convert_to_tensor(fut_st_te)), lambda: (tf.convert_to_tensor(fut_out_tr), tf.convert_to_tensor(fut_st_tr)), name=None) # future ground-truth (0 or 1) fut_logit = tf.greater(fut_norm, 0.) self.fut_loss = \ tf.nn.sigmoid_cross_entropy_with_logits(logits=fut_o, labels=tf.cast(fut_logit, tf.float32)) self.fut_loss = tf.reduce_mean( tf.reduce_sum(self.fut_loss, [2, 3, 4])) # ?,?,4096 -> ?,?,64,64,1 # optimizer print('optimization...') self.optimizer = self.__adam_optimizer_op( (self.fut_loss + self.recon_loss )) #+ self.weight_decay * self.__calc_weight_l2_panalty()) # output future frames as uint8 print('output future frames...') self.fut_output = tf.cast( tf.clip_by_value(tf.sigmoid(fut_o) * 255, 0, 255), tf.uint8)