def split_video(inputs): with tf.variable_scope('vfeedbacknet_base', reuse=True): #inputs = tf.expand_dims(inputs, axis=5) #ModelLogger.log('preprocess1', inputs) inputs = tf.unstack(inputs, axis=1) ModelLogger.log('preprocess2', inputs) #inputs = [ tf.tile(inp, [1, 1, 1, 3]) for inp in inputs ] #ModelLogger.log('preprocess3', inputs) return inputs
def basic_loss_pred(inputs, inputs_sequence_length, inputs_sequence_maxlength, labels, zeros, last_loss_multiple=1): assert len(inputs) == inputs_sequence_maxlength, 'inputs must be the max sequence length' predictions = tf.stack([ tf.nn.softmax(logits=inp) for inp in inputs ], axis=1) ModelLogger.log('predictions', predictions) cross_entropies = [ tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=inp) for inp in inputs ] #ModelLogger.log('cross_entropies', cross_entropies) # only add the loss incurred during the sequence # [ 0 1 2 3 4 5 ] seqlen=3 -> [ 0 1 2 0 0 0 ] cross_entropies_truncated = [ tf.where(i > inputs_sequence_length-1, zeros, cross_entropies[i]) for i in range(len(inputs)) ] # boost the loss on the last output by last_loss_multiple # [ 0 1 2 0 0 0 ] seqlen=3 -> [ 0 0 2 0 0 0 ] last_cross_entropy = [ tf.where(i < inputs_sequence_length-1, zeros, cross_entropies_truncated[i]) for i in range(len(inputs)) ] cross_entropies_truncated = [ cross_entropies_truncated[i] + last_loss_multiple*last_cross_entropy[i] for i in range(len(inputs)) ] losses = tf.stack(cross_entropies_truncated, axis=1, name='loss') ModelLogger.log('losses', losses) total_loss = tf.reduce_sum(tf.reduce_sum(tf.stack(cross_entropies_truncated)) / tf.to_float(inputs_sequence_length), name='total_loss') ModelLogger.log('total_loss', total_loss) return losses, total_loss, predictions
def __call__(self, inputs, inputs_sequence_length): ModelLogger.log('raw_input', inputs) inputs = VFeedbackNetBase.split_video(inputs) ModelLogger.log('input', inputs) # feedback model inputs = [ self.reshape_conv_layer(inp, 1, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('reshape_conv_layer1', inputs) inputs = [ self.reshape_conv_layer(inp, 2, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('reshape_conv_layer2', inputs) inputs = [ self.reshape_conv_layer(inp, 3, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('reshape_conv_layer3', inputs) inputs = [ self.feedbackLSTMCell1.apply_layer( [inp for _ in range(Model.NFEEDBACK)], var_list=self.main_model_variables) for inp in inputs ] fb_sequence = [[] for _ in range(Model.NFEEDBACK)] for inp in inputs: for i in range(Model.NFEEDBACK): fb_sequence[i].append(inp[i]) ModelLogger.log('feedbackCell1', fb_sequence[0]) logits = [] for fbi in range(Model.NFEEDBACK): inputs = [ tf.nn.max_pool(inp, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') for inp in fb_sequence[fbi] ] ModelLogger.log('fb{}_maxpool'.format(fbi), inputs) # sequence model inputs = self.convLSTM_layer1(inputs, None, var_list=self.main_model_variables) ModelLogger.log('fb{}_convLSTM1'.format(fbi), inputs) inputs = [self.vfeedbacknet_base.ave_pool(inp) for inp in inputs] ModelLogger.log('fb{}_ave_pool'.format(fbi), inputs) inputs = [ self.fc_layer(inp, 1, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fb{}_fc1'.format(fbi), inputs) inputs = [ self.fc_layer(inp, 2, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fb{}_fc2'.format(fbi), inputs) inputs = tf.stack(inputs, axis=1) logits.append(inputs) logits = tf.stack(logits, axis=1) ModelLogger.log('combined-feedback-logits', logits) return logits
def __call__(self, inputs, inputs_sequence_length): ModelLogger.log('raw_input', inputs) inputs = VFeedbackNetBase.split_video(inputs) ModelLogger.log('input', inputs) # feedback model inputs = [ self.vfeedbacknet_base.vgg16_layer1( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer1', inputs) featurizer_outputs = inputs # feedback model inputs = [ self.reshape_conv_layer(inp, 1, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('reshape_conv_layer1', inputs) inputs = [ self.reshape_conv_layer(inp, 2, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('reshape_conv_layer2', inputs) inputs = [ self.reshape_conv_layer(inp, 3, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('reshape_conv_layer3', inputs) inputs = [ self.feedbackLSTMCell1.apply_layer( [inp for _ in range(Model.NFEEDBACK_ITERATIONS)], var_list=self.main_model_variables) for inp in inputs ] fb_sequence = [[] for _ in range(Model.NFEEDBACK_ITERATIONS)] for inp in inputs: for i in range(Model.NFEEDBACK_ITERATIONS): fb_sequence[i].append(inp[i]) ModelLogger.log('feedbackCell1', fb_sequence[0]) logits = [] for fbi in range(Model.NFEEDBACK_ITERATIONS): inputs = fb_sequence[fbi] inputs = self.convLSTM_layer1(inputs, inputs_sequence_length, var_list=self.main_model_variables) ModelLogger.log('fb{}_convLSTM1'.format(fbi), inputs) inputs = [self.vfeedbacknet_base.ave_pool(inp) for inp in inputs] ModelLogger.log('fb{}_ave_pool'.format(fbi), inputs) inputs = [ self.fc_layer(inp, 1, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fb{}_fc1'.format(fbi), inputs) inputs = [ self.fc_layer(inp, 2, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fb{}_fc2'.format(fbi), inputs) inputs = tf.stack(inputs, axis=1) logits.append(inputs) logits = tf.stack(logits, axis=1) ModelLogger.log('combined-feedback-logits', logits) return logits
def __call__(self, inputs, inputs_sequence_length): ''' inputs: A tensor fo size [batch, video_length, video_height, video_width, channels] ''' with tf.variable_scope('NoFeedBackNetVgg16', reuse=True): ModelLogger.log('input', inputs) #assert(inputs.shape[1:] == (40, 96, 96)) # specific model shape for now inputs = tf.expand_dims(inputs, axis=4) #assert(inputs.shape[1:] == (40, 96, 96, 1)) # specific model shape for now inputs = tf.unstack(inputs, axis=1) ModelLogger.log('input-unstack', inputs) logging.debug('--- begin model definition ---') # use VGG16 pretrained on imagenet as an initialization inputs = [self.vgg_layers(inp) for inp in inputs] ModelLogger.log('vgg16_conv', inputs) # use feedback network architecture below with tf.variable_scope('NoFeedBackNetVgg16'): with tf.variable_scope('convgru1'): num_filters = 512 # convLSTM internal fitlers h, w = int(inputs[0].shape[1]), int(inputs[0].shape[2]) cell = convLSTM.ConvLSTMCell([h, w], num_filters, [3, 3]) inputs, state = tf.nn.dynamic_rnn( cell, tf.stack(inputs, axis=1), dtype=tf.float32, sequence_length=inputs_sequence_length, ) inputs = tf.unstack(inputs, axis=1) ModelLogger.log('convLSTM_output', inputs) # inputs = [ tf.nn.max_pool(inp, # ksize=[1, 2, 2, 1], # strides=[1, 2, 2, 1], # padding='SAME', # name='pool1') for inp in inputs ] # ModelLogger.log('pool_output', inputs) # with tf.variable_scope('convlstm2', reuse=False): # num_filters = 512 # convLSTM internal fitlers # h, w = int(inputs[0].shape[1]), int(inputs[0].shape[2]) # cell = convLSTM.ConvLSTMCell([h, w], num_filters, [3, 3], reuse=False) # inputs, state = tf.nn.dynamic_rnn( # cell, # tf.stack(inputs, axis=1), # dtype=tf.float32, # sequence_length=inputs_sequence_length, # ) # inputs = tf.unstack(inputs, axis=1) # ModelLogger.log('convLSTM_output', inputs) # inputs = [ tf.nn.max_pool(inp, # ksize=[1, 2, 2, 1], # strides=[1, 2, 2, 1], # padding='SAME', # name='pool1') for inp in inputs ] # ModelLogger.log('pool_output', inputs) with tf.variable_scope('NoFeedBackNetVgg16', reuse=True): with tf.variable_scope('fc', reuse=True): inputs = [ tf.layers.average_pooling2d(inputs=inp, pool_size=7, strides=1, padding='VALID', data_format='channels_last', name='ave_pool') for inp in inputs ] ModelLogger.log('ave_pool_output', inputs) weights = tf.get_variable('weights') biases = tf.get_variable('biases') inputs = [tf.reshape(inp, [-1, 512]) for inp in inputs] ModelLogger.log('flatten_output', inputs) inputs = [tf.matmul(inp, weights) + biases for inp in inputs] ModelLogger.log('fc_output', inputs) logging.debug('--- end model definition ---') logits = inputs ModelLogger.log('logits', logits) return logits
def __call__(self, inputs, inputs_sequence_length): #assert inputs.shape[1:] == (20, 112, 112), 'expected input shape of (20, 112, 112) but got {}'.format(inputs.shape) ModelLogger.log('raw_input', inputs) inputs = VFeedbackNetBase.split_video(inputs) ModelLogger.log('input', inputs) ## featurizer ## inputs = [ self.vfeedbacknet_base.vgg16_layer1( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer1', inputs) # inputs = [ self.vfeedbacknet_fb_base.vfeedbacknet_base.vgg16_layer2(inp, var_list=self.featurizer_variables) for inp in inputs ] # ModelLogger.log('vgg-layer2', inputs) # inputs = [ self.vfeedbacknet_fb_base.vfeedbacknet_base.vgg16_layer3(inp, var_list=self.featurizer_variables) for inp in inputs ] # ModelLogger.log('vgg-layer3', inputs) ## main model ## sequence = [] featurizer_outputs = inputs feedback_outputs = None inputs = [ self.reshape_conv_layer(inp, 4, var_list=self.main_model_variables) for inp in featurizer_outputs ] ModelLogger.log('reshape_conv_layer4', inputs) inputs = self.convLSTM_layer1(inputs, 60, var_list=self.main_model_variables) ModelLogger.log('convLSTM1', inputs) inputs = [ self.reshape_conv_layer(inp, 5, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('reshape_conv_layer5', inputs) # inputs = self.convLSTM_layer2(inputs, 60, var_list=self.main_model_variables) # ModelLogger.log('convLSTM2', inputs) inputs = [ self.fc_layer(inp, 1, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc1', inputs) inputs = [ self.fc_layer(inp, 2, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc2', inputs) # output logits = tf.stack(inputs, axis=1) logits = tf.expand_dims(logits, axis=1) ModelLogger.log('combined-feedback-logits', logits) return logits
def __call__(self, inputs, inputs_sequence_length): ModelLogger.log('raw_input', inputs) inputs = VFeedbackNetBase.split_video(inputs) ModelLogger.log('input', inputs) ## featurizer ## inputs = [ self.vfeedbacknet_base.vgg16_layer1(inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer1', inputs) ## main model ## inputs = [self.feedbackLSTMCell1.apply_layer([inp for _ in range(Model.NFEEDBACK_ITERATIONS)], var_list=self.main_model_variables) for inp in inputs] fb_sequence = [ [] for _ in range(Model.NFEEDBACK_ITERATIONS) ] for inp in inputs: for i in range(Model.NFEEDBACK_ITERATIONS): fb_sequence[i].append(inp[i]) ModelLogger.log('feedbackCell1', fb_sequence[0]) logits = [] for seq in fb_sequence: inputs = [ self.reshape_conv_layer(inp, 1, var_list=self.main_model_variables) for inp in seq ] ModelLogger.log('reshape_conv_layer1', inputs) inputs = self.convLSTM_layer1(inputs, inputs_sequence_length, var_list=self.main_model_variables) ModelLogger.log('convLSTM1', inputs) inputs = [ self.reshape_conv_layer(inp, 2, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('reshape_conv_layer2', inputs) inputs = [ self.fc_layer(inp, 1, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc1', inputs) inputs = [ self.fc_layer(inp, 2, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc2', inputs) seq1 = tf.stack(inputs, axis=1) logits.append(seq1) logits = tf.stack(logits, axis=1) ModelLogger.log('combined-feedback-logits', logits) return logits
def __call__(self, inputs, inputs_sequence_length): ModelLogger.log('raw_input', inputs) inputs = VFeedbackNetBase.split_video(inputs) ModelLogger.log('input', inputs) ## featurizer ## inputs = [ self.vfeedbacknet_base.vgg16_layer1( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer1', inputs) # inputs = [ self.vfeedbacknet_base.vgg16_layer2(inp, var_list=self.featurizer_variables) for inp in inputs ] # ModelLogger.log('vgg-layer2', inputs) # inputs = [ self.vfeedbacknet_base.vgg16_layer3(inp, var_list=self.featurizer_variables) for inp in inputs ] # ModelLogger.log('vgg-layer3', inputs) ## main model ## featurizer_outputs = inputs # feedback model inputs = [ self.reshape_conv_layer(inp, 1, var_list=self.main_model_variables) for inp in featurizer_outputs ] ModelLogger.log('reshape_conv_layer1', inputs) # DISJOINT! The LSTM is not being used as a sequence model. Each input is completely independent inputs = [ self.convLSTM_layer1([inp], None, var_list=self.main_model_variables) for inp in inputs ] inputs = list(map(lambda x: x[0], inputs)) ModelLogger.log('convLSTM1', inputs) inputs = [ self.reshape_conv_layer(inp, 2, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('reshape_conv_layer2', inputs) # DISJOINT! The LSTM is not being used as a sequence model. Each input is completely independent inputs = [ self.convLSTM_layer2([inp], None, var_list=self.main_model_variables) for inp in inputs ] inputs = list(map(lambda x: x[0], inputs)) ModelLogger.log('convLSTM2', inputs) inputs = [ self.reshape_conv_layer(inp, 3, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('reshape_conv_layer3', inputs) # DISJOINT! The LSTM is not being used as a sequence model. Each input is completely independent # inputs = [ self.convLSTM_layer3([inp], None, var_list=self.main_model_variables) for inp in inputs ] # inputs = list(map(lambda x: x[0], inputs)) # ModelLogger.log('convLSTM3', inputs) # inputs = [ self.reshape_conv_layer(inp, 4, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer4', inputs) # sequence model inputs = self.convLSTM_layer4(inputs, inputs_sequence_length, var_list=self.main_model_variables) ModelLogger.log('convLSTM4', inputs) inputs = [self.vfeedbacknet_base.ave_pool(inp) for inp in inputs] ModelLogger.log('ave_pool', inputs) inputs = [ self.fc_layer(inp, 1, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc1', inputs) inputs = [ self.fc_layer(inp, 2, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc2', inputs) logits = tf.stack(inputs, axis=1) logits = tf.expand_dims(logits, axis=1) ModelLogger.log('combined-feedback-logits', logits) return logits
if var_list is not None and biases not in var_list: var_list.append(biases) return inputs if __name__ == '__main__': sess = tf.Session() x = tf.placeholder(tf.float32, [None, 20, 112, 112], name='inputs') x_len = tf.placeholder(tf.float32, [None], name='inputs_len') zeros = tf.placeholder(tf.float32, [20], name='inputs_len') labels = tf.placeholder(tf.float32, [None], name='inputs_len') vfeedbacknet_base = VFeedbackNetBase(sess, 27, train_vgg16='FROM_SCRATCH') ModelLogger.log('input', x) inputs = vfeedbacknet_base.split_video(x) ModelLogger.log('split', inputs) variables = [] inputs = [ vfeedbacknet_base.vgg16_layer1(inp, var_list=variables) for inp in inputs ] ModelLogger.log('vgg-layer', inputs) inputs = [ vfeedbacknet_base.vgg16_layer2(inp, var_list=variables) for inp in inputs
def __call__(self, inputs, inputs_sequence_length): #assert inputs.shape[1:] == (20, 112, 112), 'expected input shape of (20, 112, 112) but got {}'.format(inputs.shape) ModelLogger.log('raw_input', inputs) inputs = self.vfeedbacknet_base.split_video(inputs) ModelLogger.log('input', inputs) ## featurizer ## inputs = [ self.vfeedbacknet_base.vgg16_layer1(inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer1', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer2(inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer2', inputs) ## main model ## inputs = [ self.vfeedbacknet_base.vgg16_layer3(inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer3', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer4(inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer4', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer5(inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer5', inputs) inputs = self.convLSTM_layer1(inputs, inputs_sequence_length, var_list=self.main_model_variables) ModelLogger.log('convLSTM1', inputs) ## ave_pool and fc ## inputs = [ self.vfeedbacknet_base.ave_pool(inp) for inp in inputs ] ModelLogger.log('ave_pool', inputs) inputs = [ self.vfeedbacknet_base.fc_layer(inp, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc', inputs) logits = tf.stack(inputs, axis=1) logits = tf.expand_dims(logits, axis=1) ModelLogger.log('logits', logits) return logits
def __call__(self, inputs, inputs_sequence_length): ModelLogger.log('raw_input', inputs) inputs = VFeedbackNetBase.split_video(inputs) ModelLogger.log('input', inputs) ## featurizer ## inputs = [ self.reshape_conv_layer(inp, 1, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('conv_layer1', inputs) inputs = [ [ inp for _ in range(Model.NFEEDBACK_ITERATIONS) ] for inp in inputs ] ## main model ## inputs = [ self.feedbackLSTMCell1.apply_layer(inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('feedback_cell1', inputs[0]) inputs = [ [ self.reshape_conv_layer(activation, 2, var_list=self.featurizer_variables) for activation in inp ] for inp in inputs ] ModelLogger.log('reshape_conv_layer2', inputs[0]) inputs = [ self.feedbackLSTMCell2.apply_layer(inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('feedback_cell2', inputs[0]) inputs = [ [ self.reshape_conv_layer(activation, 3, var_list=self.featurizer_variables) for activation in inp ] for inp in inputs ] ModelLogger.log('reshape_conv_layer3', inputs[0]) inputs = [ self.feedbackLSTMCell3.apply_layer(inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('feedback_cell3', inputs[0]) inputs = [ [ self.reshape_conv_layer(activation, 4, var_list=self.featurizer_variables) for activation in inp ] for inp in inputs ] ModelLogger.log('reshape_conv_layer4', inputs[0]) inputs = [ self.feedbackLSTMCell4.apply_layer(inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('feedback_cell4', inputs[0]) inputs = [ [ self.reshape_conv_layer(activation, 5, var_list=self.featurizer_variables) for activation in inp ] for inp in inputs ] ModelLogger.log('reshape_conv_layer5', inputs[0]) fb_sequence = [ [] for _ in range(Model.NFEEDBACK_ITERATIONS) ] for inp in inputs: for i in range(Model.NFEEDBACK_ITERATIONS): fb_sequence[i].append(inp[i]) logits = [] for seq in fb_sequence: inputs = seq inputs = [ self.vfeedbacknet_base.ave_pool(inp) for inp in inputs ] ModelLogger.log('ave_pool', inputs) inputs = [ self.fc_layer(inp, 1, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc1', inputs) seq1 = tf.stack(inputs, axis=1) logits.append(seq1) logits = tf.stack(logits, axis=1) ModelLogger.log('combined-feedback-logits', logits) return logits
def __call__(self, inputs, inputs_sequence_length): #assert inputs.shape[1:] == (20, 112, 112), 'expected input shape of (20, 112, 112) but got {}'.format(inputs.shape) ModelLogger.log('raw_input', inputs) inputs = VFeedbackNetBase.split_video(inputs) ModelLogger.log('input', inputs) ## featurizer ## # inputs = [ self.vfeedbacknet_base.vgg16_layer1(inp, var_list=self.featurizer_variables) for inp in inputs ] # ModelLogger.log('vgg-layer1', inputs) # inputs = [ self.vfeedbacknet_fb_base.vfeedbacknet_base.vgg16_layer2(inp, var_list=self.featurizer_variables) for inp in inputs ] # ModelLogger.log('vgg-layer2', inputs) # inputs = [ self.vfeedbacknet_fb_base.vfeedbacknet_base.vgg16_layer3(inp, var_list=self.featurizer_variables) for inp in inputs ] # ModelLogger.log('vgg-layer3', inputs) ## main model ## sequence = [] featurizer_outputs = inputs feedback_outputs = None ## feedback 1 ## # feedback_outputs11 = [ self.feedback_block1(inp, var_list=self.main_model_variables) for inp in featurizer_outputs ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs11)) # ModelLogger.log('feedback_block1', inputs) inputs = [ self.reshape_conv_layer(inp, 1, var_list=self.main_model_variables) for inp in featurizer_outputs ] ModelLogger.log('reshape_conv_layer1', inputs) # feedback_outputs21 = [ self.feedback_block2(inp, var_list=self.main_model_variables) for inp in inputs ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs21)) # ModelLogger.log('feedback_block2', inputs) # inputs = [ self.reshape_conv_layer(inp, 2, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer2', inputs) feedback_outputs31 = [ self.feedback_block3(inp, var_list=self.main_model_variables) for inp in inputs ] inputs = list(map(lambda x : x['hidden_state'], feedback_outputs31)) ModelLogger.log('feedback_block3', inputs) # inputs = [ self.reshape_conv_layer(inp, 3, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer3', inputs) # add outputs to sequence sequence += inputs ## feedback 2 ## # feedback_outputs12 = [ self.feedback_block1(inp, state=state, var_list=self.main_model_variables) for inp,state in zip(featurizer_outputs, feedback_outputs11) ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs12)) # ModelLogger.log('feedback_block1', inputs) inputs = [ self.reshape_conv_layer(inp, 1, var_list=self.main_model_variables) for inp in featurizer_outputs ] ModelLogger.log('reshape_conv_layer1', inputs) # feedback_outputs22 = [ self.feedback_block2(inp, state=state, var_list=self.main_model_variables) for inp,state in zip(inputs, feedback_outputs21) ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs22)) # ModelLogger.log('feedback_block2', inputs) # inputs = [ self.reshape_conv_layer(inp, 2, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer2', inputs) feedback_outputs32 = [ self.feedback_block3(inp, state=state, var_list=self.main_model_variables) for inp,state in zip(inputs, feedback_outputs31) ] inputs = list(map(lambda x : x['hidden_state'], feedback_outputs32)) ModelLogger.log('feedback_block3', inputs) # inputs = [ self.reshape_conv_layer(inp, 3, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer3', inputs) # add outputs to sequence sequence += inputs ## feedback 3 ## # feedback_outputs13 = [ self.feedback_block1(inp, state=state, var_list=self.main_model_variables) for inp,state in zip(featurizer_outputs, feedback_outputs12) ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs13)) # ModelLogger.log('feedback_block1', inputs) inputs = [ self.reshape_conv_layer(inp, 1, var_list=self.main_model_variables) for inp in featurizer_outputs ] ModelLogger.log('reshape_conv_layer1', inputs) # feedback_outputs23 = [ self.feedback_block2(inp, state=state, var_list=self.main_model_variables) for inp,state in zip(inputs, feedback_outputs22) ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs23)) # ModelLogger.log('feedback_block2', inputs) # inputs = [ self.reshape_conv_layer(inp, 2, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer2', inputs) feedback_outputs33 = [ self.feedback_block3(inp, state=state, var_list=self.main_model_variables) for inp,state in zip(inputs, feedback_outputs32) ] inputs = list(map(lambda x : x['hidden_state'], feedback_outputs33)) ModelLogger.log('feedback_block3', inputs) # inputs = [ self.reshape_conv_layer(inp, 3, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer3', inputs) # add outputs to sequence sequence += inputs inputs = [ self.reshape_conv_layer(inp, 4, var_list=self.main_model_variables) for inp in sequence ] ModelLogger.log('reshape_conv_layer4', inputs) inputs = self.convLSTM_layer1(inputs, 60, var_list=self.main_model_variables) ModelLogger.log('convLSTM1', inputs) inputs = [ self.reshape_conv_layer(inp, 5, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('reshape_conv_layer5', inputs) # inputs = self.convLSTM_layer2(inputs, 60, var_list=self.main_model_variables) # ModelLogger.log('convLSTM2', inputs) inputs = [ self.fc_layer(inp, 1, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc1', inputs) inputs = [ self.fc_layer(inp, 2, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc2', inputs) seq1 = tf.stack(inputs[0:20], axis=1) seq2 = tf.stack(inputs[20:40], axis=1) seq3 = tf.stack(inputs[40:60], axis=1) # output logits = [seq1, seq2, seq3] logits = tf.stack(logits, axis=1) ModelLogger.log('combined-feedback-logits', logits) return logits
def __call__(self, inputs, inputs_sequence_length): #assert inputs.shape[1:] == (20, 112, 112), 'expected input shape of (20, 112, 112) but got {}'.format(inputs.shape) ModelLogger.log('raw_input', inputs) inputs = self.vfeedbacknet_base.split_video(inputs) ModelLogger.log('input', inputs) ## featurizer ## inputs = [ self.vfeedbacknet_base.vgg16_layer1( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer1', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer2( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer2', inputs) ## main model ## logits = [] featurizer_outputs = inputs feedback_outputs = None # feedback 1 inputs = featurizer_outputs inputs = [ self.vfeedbacknet_base.vgg16_layer3( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer3', inputs) inputs = self.convLSTM_layer1(inputs, inputs_sequence_length, var_list=self.main_model_variables) ModelLogger.log('convLSTM1', inputs) feedback_outputs = inputs inputs = [ self.conv_layer(inp, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('conv', inputs) inputs = [self.vfeedbacknet_base.ave_pool(inp) for inp in inputs] ModelLogger.log('ave_pool', inputs) inputs = [ self.vfeedbacknet_base.fc_layer(inp, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc', inputs) logits.append(tf.stack(inputs, axis=1)) # feedback 2 inputs = [ self.dconv_layer(feedback_o, var_list=self.main_model_variables) for featurizer_o, feedback_o in zip(featurizer_outputs, feedback_outputs) ] ModelLogger.log('dconv0', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer3( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer3', inputs) inputs = self.convLSTM_layer1(inputs, inputs_sequence_length, var_list=self.main_model_variables) ModelLogger.log('convLSTM1', inputs) feedback_outputs = inputs inputs = [ self.conv_layer(inp, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('conv0', inputs) inputs = [self.vfeedbacknet_base.ave_pool(inp) for inp in inputs] ModelLogger.log('ave_pool', inputs) inputs = [ self.vfeedbacknet_base.fc_layer(inp, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc', inputs) logits.append(tf.stack(inputs, axis=1)) # feedback 3 inputs = [ self.dconv_layer(feedback_o, var_list=self.main_model_variables) for featurizer_o, feedback_o in zip(featurizer_outputs, feedback_outputs) ] ModelLogger.log('dconv0', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer3( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer3', inputs) inputs = self.convLSTM_layer1(inputs, inputs_sequence_length, var_list=self.main_model_variables) ModelLogger.log('convLSTM1', inputs) feedback_outputs = inputs inputs = [ self.conv_layer(inp, var_list=self.main_model_variables) for inp in inputs ] ModelLogger.log('conv0', inputs) inputs = [self.vfeedbacknet_base.ave_pool(inp) for inp in inputs] ModelLogger.log('ave_pool', inputs) inputs = [ self.vfeedbacknet_base.fc_layer(inp, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc', inputs) logits.append(tf.stack(inputs, axis=1)) logits = tf.stack(logits, axis=1) ModelLogger.log('combined-feedback-logits', logits) return logits
def __call__(self, inputs, inputs_sequence_length): ModelLogger.log('raw_input', inputs) inputs = self.vfeedbacknet_base.split_video(inputs) ModelLogger.log('input', inputs) ## featurizer ## inputs = [ self.vfeedbacknet_base.vgg16_layer1( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer1', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer2( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer2', inputs) ## main model ## inputs = [ self.vfeedbacknet_base.vgg16_layer3( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer3', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer4( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer4', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer5( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer5', inputs) inputs = [ self.feedbackLSTMCell1.apply_layer( [inp for _ in range(Model.NFEEDBACK)], var_list=self.main_model_variables) for inp in inputs ] fb_sequence = [[] for _ in range(Model.NFEEDBACK)] for inp in inputs: for i in range(Model.NFEEDBACK): fb_sequence[i].append(inp[i]) ModelLogger.log('feedbackCell1', fb_sequence[0]) logits = [] for fbi in range(Model.NFEEDBACK): inputs = self.convLSTM_layer1(fb_sequence[fbi], inputs_sequence_length, var_list=self.main_model_variables) ModelLogger.log('convLSTM1', inputs) inputs = self.convLSTM_layer2(inputs, inputs_sequence_length, var_list=self.main_model_variables) ModelLogger.log('convLSTM2', inputs) ## ave_pool and fc ## inputs = [self.vfeedbacknet_base.ave_pool(inp) for inp in inputs] ModelLogger.log('ave_pool', inputs) inputs = [ self.vfeedbacknet_base.fc_layer(inp, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc', inputs) inputs = tf.stack(inputs, axis=1) ModelLogger.log('logits', inputs) logits.append(inputs) logits = tf.stack(logits, axis=1) ModelLogger.log('combined-feedback-logits', logits) return logits
def __call__(self, inputs, inputs_sequence_length): ModelLogger.log('raw_input', inputs) inputs = self.vfeedbacknet_base.split_video(inputs) ModelLogger.log('input', inputs) ## featurizer ## inputs = [ self.vfeedbacknet_base.vgg16_layer1( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer1', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer2( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer2', inputs) ## main model ## inputs = [ self.vfeedbacknet_base.vgg16_layer3( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer3', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer4( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer4', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer5( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer5', inputs) ## ave_pool and fc ## # inputs = [ self.vfeedbacknet_base.ave_pool(inp) for inp in inputs ] # ModelLogger.log('ave_pool', inputs) inputs = [ self.vfeedbacknet_base.vgg16_fc_layer1(inp, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc6', inputs) inputs = [ self.vfeedbacknet_base.vgg16_fc_layer2(inp, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc7', inputs) inputs = [ self.vfeedbacknet_base.vgg16_fc_layer3(inp, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc8', inputs) # inputs = [ self.vfeedbacknet_base.fc_layer(inp, var_list=self.fc_variables) for inp in inputs ] # ModelLogger.log('fc', inputs) logits = tf.stack(inputs, axis=1) logits = tf.expand_dims(logits, axis=1) ModelLogger.log('logits', logits) return logits
sess = tf.Session() video_length = 20 x = tf.placeholder(tf.float32, [None, video_length, 112, 112], name='inputs') x_len = tf.placeholder(tf.float32, [None], name='inputs_len') zeros = tf.placeholder(tf.float32, [video_length], name='inputs_len') labels = tf.placeholder(tf.float32, [None], name='inputs_len') model = Model( sess, 27, 16, train_featurizer='NO', train_main_model='FROM_SCRATCH', train_fc='FROM_SCRATCH', weights_filename='/home/jemmons/vfeedbacknet_base_weights.npz') logits = model(x, x_len) ModelLogger.log('logits', logits) model.initialize_variables() model.export_variables('/tmp/weights.npz') #model.print_variables() # print out the model # graph = tf.get_default_graph() # for op in graph.get_operations(): # print((op.name))
def __call__(self, inputs, inputs_sequence_length): #assert inputs.shape[1:] == (20, 112, 112), 'expected input shape of (20, 112, 112) but got {}'.format(inputs.shape) ModelLogger.log('raw_input', inputs) inputs = self.vfeedbacknet_base.split_video(inputs) ModelLogger.log('input', inputs) ## featurizer ## inputs = [ self.vfeedbacknet_base.vgg16_layer1( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer1', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer2( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer2', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer3( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer3', inputs) inputs = [ self.vfeedbacknet_base.vgg16_layer4( inp, var_list=self.featurizer_variables) for inp in inputs ] ModelLogger.log('vgg-layer4', inputs) inputs = self.convLSTM_layer1(inputs, inputs_sequence_length, var_list=self.main_model_variables) ModelLogger.log('convLSTM1', inputs) # inputs = [ self.vfeedbacknet_base.vgg16_layer5(inp, var_list=self.featurizer_variables) for inp in inputs ] # ModelLogger.log('vgg-layer5', inputs) ## main model ## logits = [] featurizer_outputs = inputs feedback_outputs = None # "feedback" 1 feedback_outputs11 = [ self.feedback_block1(inp, var_list=self.main_model_variables) for inp in featurizer_outputs ] inputs = list(map(lambda x: x['hidden_state'], feedback_outputs11)) ModelLogger.log('feedback_block1', inputs) # feedback_outputs21 = [ self.feedback_block2(inp, var_list=self.main_model_variables) for inp in inputs ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs21)) # ModelLogger.log('feedback_block2', inputs) # inputs = [ self.reshape_conv_layer(inp, 2, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer2', inputs) # feedback_outputs31 = [ self.feedback_block3(inp, var_list=self.main_model_variables) for inp in inputs ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs31)) # ModelLogger.log('feedback_block3', inputs) # inputs = [ self.reshape_conv_layer(inp, 3, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer3', inputs) inputs = [self.vfeedbacknet_base.ave_pool(inp) for inp in inputs] ModelLogger.log('ave_pool', inputs) inputs = [ self.fc_layer(inp, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc', inputs) logits.append(tf.stack(inputs, axis=1)) # "feedback" 2 feedback_outputs12 = [ self.feedback_block1(inp, state=state, var_list=self.main_model_variables) for inp, state in zip(featurizer_outputs, feedback_outputs11) ] inputs = list(map(lambda x: x['hidden_state'], feedback_outputs12)) ModelLogger.log('feedback_block1', inputs) # feedback_outputs22 = [ self.feedback_block2(inp, state=state, var_list=self.main_model_variables) for inp,state in zip(inputs, feedback_outputs21) ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs22)) # ModelLogger.log('feedback_block2', inputs) # inputs = [ self.reshape_conv_layer(inp, 2, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer2', inputs) # feedback_outputs32 = [ self.feedback_block3(inp, state=state, var_list=self.main_model_variables) for inp,state in zip(inputs, feedback_outputs31) ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs32)) # ModelLogger.log('feedback_block3', inputs) # inputs = [ self.reshape_conv_layer(inp, 3, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer3', inputs) inputs = [self.vfeedbacknet_base.ave_pool(inp) for inp in inputs] ModelLogger.log('ave_pool', inputs) inputs = [ self.fc_layer(inp, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc', inputs) logits.append(tf.stack(inputs, axis=1)) # "feedback" 3 feedback_outputs13 = [ self.feedback_block1(inp, state=state, var_list=self.main_model_variables) for inp, state in zip(featurizer_outputs, feedback_outputs12) ] inputs = list(map(lambda x: x['hidden_state'], feedback_outputs13)) ModelLogger.log('feedback_block1', inputs) # feedback_outputs23 = [ self.feedback_block2(inp, state=state, var_list=self.main_model_variables) for inp,state in zip(inputs, feedback_outputs22) ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs23)) # ModelLogger.log('feedback_block2', inputs) # inputs = [ self.reshape_conv_layer(inp, 2, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer2', inputs) # feedback_outputs33 = [ self.feedback_block3(inp, state=state, var_list=self.main_model_variables) for inp,state in zip(inputs, feedback_outputs32) ] # inputs = list(map(lambda x : x['hidden_state'], feedback_outputs33)) # ModelLogger.log('feedback_block3', inputs) # inputs = [ self.reshape_conv_layer(inp, 3, var_list=self.main_model_variables) for inp in inputs ] # ModelLogger.log('reshape_conv_layer3', inputs) inputs = [self.vfeedbacknet_base.ave_pool(inp) for inp in inputs] ModelLogger.log('ave_pool', inputs) inputs = [ self.fc_layer(inp, var_list=self.fc_variables) for inp in inputs ] ModelLogger.log('fc', inputs) logits.append(tf.stack(inputs, axis=1)) # output logits = tf.stack(logits, axis=1) ModelLogger.log('combined-feedback-logits', logits) return logits
def __call__(self, inputs, inputs_sequence_length): ''' inputs: A tensor fo size [batch, video_length, video_height, video_width, channels] ''' with tf.variable_scope('NoFeedBackNetVgg16', reuse=True): ModelLogger.log('input', inputs) #assert(inputs.shape[1:] == (40, 96, 96)) # specific model shape for now inputs = tf.expand_dims(inputs, axis=4) #assert(inputs.shape[1:] == (40, 96, 96, 1)) # specific model shape for now inputs = tf.unstack(inputs, axis=1) ModelLogger.log('input-unstack', inputs) logging.debug('--- begin model definition ---') # use VGG16 pretrained on imagenet as an initialization inputs = [self.vgg_layers(inp) for inp in inputs] ModelLogger.log('vgg16_conv', inputs) # use feedback network architecture below # with tf.variable_scope('NoFeedBackNetVgg16'): # with tf.variable_scope('convgru1'): # pass with tf.variable_scope('NoFeedBackNetVgg16', reuse=True): with tf.variable_scope('fc', reuse=True): weights = tf.get_variable('weights') biases = tf.get_variable('biases') inputs = [tf.reshape(inp, [-1, 4096]) for inp in inputs] ModelLogger.log('flatten_output', inputs) inputs = [tf.matmul(inp, weights) + biases for inp in inputs] ModelLogger.log('fc_output', inputs) logging.debug('--- end model definition ---') logits = inputs ModelLogger.log('logits', logits) return logits