def rnn(images, real_input_flag, num_layers, num_hidden, configs): """Builds a RNN according to the config.""" gen_images, lstm_layer, cell, hidden, c_history = [], [], [], [], [] shape = images.get_shape().as_list() batch_size = shape[0] ims_width = shape[2] ims_height = shape[3] output_channels = shape[-1] total_length = configs.total_length input_length = configs.input_length window_length = 2 window_stride = 1 for i in range(num_layers): if i == 0: num_hidden_in = output_channels else: num_hidden_in = num_hidden[i - 1] new_lstm = eidetic_lstm( name='e3d' + str(i), input_shape=[ims_width, window_length, ims_height, num_hidden_in], output_channels=num_hidden[i], kernel_shape=[2, 5, 5]) lstm_layer.append(new_lstm) zero_state = tf.zeros( [batch_size, window_length, ims_width, ims_height, num_hidden[i]]) cell.append(zero_state) hidden.append(zero_state) c_history.append(None) memory = zero_state with tf.variable_scope('generator'): input_list = [] reuse = False for time_step in range(window_length - 1): input_list.append( tf.zeros([batch_size, ims_width, ims_height, output_channels])) for time_step in range(total_length - 1): with tf.variable_scope('e3d-lstm', reuse=reuse): if time_step < input_length: input_frm = images[:, time_step] else: time_diff = time_step - input_length input_frm = real_input_flag[:, time_diff] * images[:, time_step] \ + (1 - real_input_flag[:, time_diff]) * x_gen # pylint: disable=used-before-assignment input_list.append(input_frm) if time_step % (window_length - window_stride) == 0: input_frm = tf.stack(input_list[time_step:]) input_frm = tf.transpose(input_frm, [1, 0, 2, 3, 4]) for i in range(num_layers): if time_step == 0: c_history[i] = cell[i] else: c_history[i] = tf.concat([c_history[i], cell[i]], 1) if i == 0: inputs = input_frm else: # input to 3rd layer is hidden state outputs from 1st and 2nd layer if i == 2: inputs = hidden[i - 1] + hidden[i - 2] else: inputs = hidden[i - 1] hidden[i], cell[i], memory = lstm_layer[i]( inputs, hidden[i], cell[i], memory, c_history[i]) # input to decoder is hidden state outputs from 3rd and 4th layer dec_input = hidden[num_layers - 1] + hidden[num_layers - 2] x_gen = tf.layers.conv3d(dec_input, output_channels, [window_length, 1, 1], [window_length, 1, 1], 'same') x_gen = tf.squeeze(x_gen) gen_images.append(x_gen) reuse = True gen_images = tf.stack(gen_images) gen_images = tf.transpose(gen_images, [1, 0, 2, 3, 4]) l2_loss = tf.nn.l2_loss(gen_images - images[:, 1:]) l1_loss = tf.reduce_sum(tf.abs(gen_images - images[:, 1:])) loss = l2_loss + l1_loss out_len = total_length - input_length out_ims = gen_images[:, -out_len:] return [out_ims, loss, l1_loss, l2_loss]
def rnn(images, real_input_flag, num_layers, num_hidden, configs): """Builds a RNN according to the config.""" gen_images, lstm_layer, cell, hidden, c_history = [], [], [], [], [] shape = images.get_shape().as_list() batch_size = shape[0] # seq_length = shape[1] ims_width = shape[2] ims_height = shape[3] output_channels = shape[-1] filter_size = configs.filter_size total_length = int((configs.input_seq_length + configs.output_seq_length) / configs.dimension_3D) input_length = int(configs.input_seq_length / configs.dimension_3D) window_length = 1 # What is window_length? window_stride = 1 for i in range(num_layers): if i == 0: num_hidden_in = output_channels else: num_hidden_in = num_hidden[i - 1] new_lstm = eidetic_lstm( name='e3d' + str(i), input_shape=[ims_width, window_length, ims_height, num_hidden_in], output_channels=num_hidden[i], kernel_shape=[2, filter_size, filter_size]) # 5是不是可以用configs的filter size? lstm_layer.append(new_lstm) zero_state = tf.zeros( [batch_size, window_length, ims_width, ims_height, num_hidden[i]]) # 为什么第二维是window_length # Construct hidden state shape of this layer cell.append(zero_state) hidden.append(zero_state) c_history.append(None) memory = zero_state #最后一层layer的hidden的大小 with tf.variable_scope('generator'): input_list = [] reuse = False for time_step in range(total_length - 1): with tf.variable_scope('e3d-lstm', reuse=reuse): if time_step < input_length: input_frm = images[:, time_step] else: time_diff = time_step - input_length input_frm = real_input_flag[:, time_diff] * images[:, time_step] + ( 1 - real_input_flag[:, time_diff] ) * x_gen # pylint: disable=used-before-assignment input_list.append(input_frm) input_frm = tf.stack(input_list[time_step:]) input_frm = tf.transpose(input_frm, [1, 0, 2, 3, 4]) for i in range(num_layers): if time_step == 0: c_history[i] = cell[i] else: c_history[i] = tf.concat([c_history[i], cell[i]], 1) if i == 0: inputs = input_frm else: inputs = hidden[i - 1] hidden[i], cell[i], memory = lstm_layer[i](inputs, hidden[i], cell[i], memory, c_history[i]) #cell:Ckt-1, memory:zigzag global memory, c_history:eidetic_cell就是累积的Cell Mmemory x_gen = tf.layers.conv3d(hidden[num_layers - 1], output_channels, [window_length, 1, 1], [window_length, 1, 1], 'same') print('x_gen shape before squeeze: ', tf.shape(x_gen)) x_gen = tf.squeeze(x_gen) print('x_gen shape after squeeze: ', tf.shape(x_gen)) gen_images.append(x_gen) reuse = True gen_images = tf.stack(gen_images) print(images.shape, images[:, 1:]) #gen_images = tf.transpose(gen_images, [1, 0, 2, 3, 4]) ss = images[:, 1:].shape gen_images = tf.reshape(gen_images, ss) #[batch_size,3,100,100,2] loss = tf.nn.l2_loss(gen_images - images[:, 1:]) loss += tf.reduce_sum(tf.abs(gen_images - images[:, 1:])) out_len = total_length - input_length out_ims = gen_images[:, -out_len:] return [out_ims, loss]
def rnn(images, real_input_flag, num_layers, num_hidden, configs): """Builds a RNN according to the config.""" gen_images, lstm_layer, cell, hidden, c_history = [], [], [], [], [] shape = images.get_shape().as_list() batch_size = shape[0] ims_width = shape[2] ims_height = shape[3] output_channels = shape[-1] total_length = configs.total_length input_length = configs.input_length window_length = 2 window_stride = 1 for i in range(num_layers): if i == 0: num_hidden_in = output_channels else: num_hidden_in = num_hidden[i - 1] new_lstm = eidetic_lstm( name='e3d' + str(i), input_shape=[ims_width, window_length, ims_height, num_hidden_in], output_channels=num_hidden[i], kernel_shape=[5, 5]) lstm_layer.append(new_lstm) zero_state = tf.zeros( [batch_size, window_length, ims_width * ims_height, num_hidden[i]]) cell.append(zero_state) hidden.append(zero_state) c_history.append(None) memory = zero_state with tf.variable_scope('generator'): input_list = [] reuse = False for time_step in range(window_length - 1): input_list.append( tf.zeros([batch_size, ims_width, ims_height, output_channels])) for time_step in range(total_length - 1): with tf.variable_scope('e3d-lstm', reuse=reuse): if time_step < input_length: input_frm = images[:, time_step] else: time_diff = time_step - input_length input_frm = real_input_flag[:, time_diff] * images[:, time_step] \ + (1 - real_input_flag[:, time_diff]) * x_gen # pylint: disable=used-before-assignment input_list.append(input_frm) if time_step % (window_length - window_stride) == 0: input_frm = tf.stack(input_list[time_step:]) input_frm = tf.transpose(input_frm, [1, 0, 2, 3, 4]) for i in range(num_layers): if time_step == 0: c_history[i] = cell[i] else: c_history[i] = tf.concat([c_history[i], cell[i]], 1) if i == 0: inputs = input_frm # Add 3D-encoder here -- should be reusable across time steps with tf.variable_scope('3Denc', reuse=reuse): inputs = tf.layers.conv3d(inputs, output_channels, [2, 5, 5], padding="same") # print('inputs op name:', inputs.name) enc = inputs.shape # (batch_size, length, height*width, channel) inputs = tf.reshape(inputs, shape=(enc[0], enc[1], enc[2] * enc[3], enc[4])) else: inputs = hidden[i - 1] hidden[i], cell[i], memory = lstm_layer[i]( inputs, hidden[i], cell[i], memory, c_history[i]) hidden_state_clf = tf.reshape( hidden[num_layers - 1], [batch_size, window_length, ims_width, ims_height, 64]) # set trainable=True if training from scratch, o/w keep False for pretrained is_dec_fixed = True if configs.pretrained_model else False x_gen = tf.layers.conv3d(hidden_state_clf, output_channels, [window_length, 1, 1], [window_length, 1, 1], 'same', trainable=not is_dec_fixed) x_gen = tf.squeeze(x_gen) gen_images.append(x_gen) reuse = True gen_images = tf.stack(gen_images) gen_images = tf.transpose(gen_images, [1, 0, 2, 3, 4]) l2_loss = tf.nn.l2_loss(gen_images - images[:, 1:]) l1_loss = tf.reduce_sum(tf.abs(gen_images - images[:, 1:])) loss = l2_loss + l1_loss out_len = total_length - input_length out_ims = gen_images[:, -out_len:] return [out_ims, loss, l1_loss, l2_loss]