def main(args): # load the kinetics classes kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')] if args.eval_type in ['rgb', 'joint']: if args.no_imagenet_pretrained: # build model for RGB data # and load pretrained weights (trained on kinetics dataset only) rgb_model = Inception_Inflated3d( include_top=False, weights='rgb_kinetics_only', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), classes=NUM_CLASSES) else: # build model for RGB data # and load pretrained weights (trained on imagenet and kinetics dataset) rgb_model = Inception_Inflated3d( include_top=False, weights='rgb_imagenet_and_kinetics', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), classes=NUM_CLASSES) # pdb.set_trace() # print rgb_model.summary() # plot_model(rgb_model, to_file='model_without_top.png', show_shapes = True) # print rgb_model.summary() # load RGB sample (just one example) vid_input = Input(shape =(79, 224,224, 3)) features = rgb_model(vid_input) print features.shape.as_list() feature_shape = features.shape.as_list() mid_slice_no = (feature_shape[1]/2)+1 print mid_slice_no rgb_features = features[:,mid_slice_no,:,:,:] print rgb_features.shape.as_list() ''' rgb_sample = np.load(SAMPLE_DATA_PATH['rgb']) # # # # # make prediction rgb_features = rgb_model.predict(rgb_sample) # rgb_features # print rgb_features.shape.as_list() # # print rgb_logits.shape features = rgb_features[:,11,:,:,:] features = np.array(features) print features.shape ''' '''
def define_model(self, model_type="RGB"): if model_type == "RGB": channel = 3 model_name = "_rgb" elif model_type == "OPT": channel = 2 model_name = "_opt" a = keras.layers.Input(shape=(16, 224, 224, channel)) i3d = Inception_Inflated3d(include_top=False, weights=None, input_tensor=a, input_shape=None, dropout_prob=0.5, endpoint_logit=True, classes=3, model_name=model_name) for layer in i3d.layers: layer.name = layer.name + model_name model = keras.models.Sequential() model.add(i3d) model.add(keras.layers.Flatten()) model.add(keras.layers.Dropout(0.5)) model.add(keras.layers.Dense(3)) return model
def loadModel(numberOfClasses,inputFrames, frameHeight,frameWidth,numRGBChannels,withWeights = False): weights = None if withWeights : weights = 'rgb_inception_i3d' rgb_model = Inception_Inflated3d( include_top=False, weights=weights, input_shape=(inputFrames, frameHeight, frameWidth, numRGBChannels), dropout_prob=0.5, endpoint_logit=True, classes=numberOfClasses) x = rgb_model.output x = Dropout(0.5)(x) x = conv3d_bn(x,numberOfClasses, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1') num_frames_remaining = int(x.shape[1]) x = Reshape((num_frames_remaining, numberOfClasses))(x) # logits (raw scores for each class) x = Lambda(lambda x: K.mean(x, axis=1, keepdims=False), output_shape=lambda s: (s[0], s[2]))(x) predictions = Activation('softmax', name='prediction')(x) model = Model(rgb_model.input, predictions) return model
def run_test(listFileName, storeDir): num_test_videos = len(list(open(listFileName,'r'))) print("Number of test videos={}".format(num_test_videos)) rgb_model = Inception_Inflated3d( include_top=False, weights=weight_names['withImagenet'], input_shape=(CLIP_LENGTH, CROP_SIZE, CROP_SIZE, 3), classes=NUM_CLASSES) # # # saver = tf.train.Saver() # # # tf_config = tf.ConfigProto() # tf_config.gpu_options.allow_growth = True # # tf_config.gpu_options.per_process_gpu_memory_fraction = 0.8 # sess = tf.Session(config=tf_config) # # # sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) # init = tf.global_variables_initializer()###### # sess.run(init) # saver.restore(sess, model_name) next_batch_start = 0 all_steps = int((num_test_videos - 1) / BATCH_SIZE + 1) file_index = 0 for step in xrange(all_steps): np_arr_data, np_arr_label, next_batch_start, _, _ = input_data.read_clip_and_label( listFileName,BATCH_SIZE,SEQ_NUM,start_pos=next_batch_start,num_frames_per_clip=CLIP_LENGTH,crop_size = CROP_SIZE) logist_batch = [] for i in range(SEQ_NUM): data = np_arr_data[:,i,:,:,:] rgb_logits = rgb_model.predict(data) logist_batch.append(rgb_logits.reshape(BATCH_SIZE, 1024)) logist_batch= np.array(logist_batch) fc6_feature_batch = logist_batch.reshape((-1,SEQ_NUM,1024)) for batch_index in range(min(BATCH_SIZE, np_arr_label.shape[0])): try: #image = io.imread(images[i]) # type(image) must be array! data = fc6_feature_batch[batch_index] data = data.astype(np.float64) label = np_arr_label[batch_index] file_index += 1 filename = "%s/%08d_%02d.bin" % (storeDir, file_index,label) # print("data-->",data) # print("label-->",label) # print("filename",filename) # with open(filename, 'wb') as f: # f.write(data[i,:]) data.tofile(filename) except IOError as e: print('Skip it!\n')
def train(): # load the kinetics classes # datapath='/users/kevin/downloads/aicure-dataset/*/*.npy' datapath = r'C:\Users\Chris\Documents\projects\cs172b\aicure-dataset\*\*.npy' indexes, data, labels = load_data(datapath) base_model = Inception_Inflated3d(weights='rgb_imagenet_and_kinetics', include_top=False, input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS)) output = Dropout(0.5)(base_model.output) predict = Reshape((-1, 1024))(output) #predict = AveragePooling1D(pool_size=3)(predict) predict = Dense(NUM_FRAMES, kernel_initializer='normal', activation='sigmoid')(predict) predict = ThresholdedReLU(theta=0.8, trainable=False)(predict) model = Model(inputs=base_model.input, outputs=predict) # freeze the first 100 layers for layer in model.layers[:100]: layer.trainable = False # randomize the weights for the remaining trainable layers # for layer in model.layers[150:195]: # change to 150:195 later # layer.kernel_initializer = 'glorot_uniform' optimizer = keras.optimizers.Adam(lr=1e-4) model.compile(optimizer=optimizer, loss='mae', metrics=['accuracy']) #model.summary() train_indexes = indexes[:int(0.7 * len(indexes))] validation_indexes = indexes[int(0.7 * len(indexes)):] reducelr = tf.keras.callbacks.ReduceLROnPlateau(monitor='accuracy', patience=2, factor=0.2, min_lr=1e-8) earlystop = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5) callbacks = [reducelr, earlystop] history = model.fit_generator(data_generator(data, labels, train_indexes, BATCH_SIZE), steps_per_epoch=int(STEPS * .7), epochs=50, validation_data=data_generator( data, labels, validation_indexes), validation_steps=int(STEPS * .3), callbacks=callbacks) write_out(history, 'hist.csv') save_model(model, 'i3d')
def initialize_weights(self, all_models_name, mode, dropout_prob, sum_idx): model = self.pi3d_model(all_models_name, mode, dropout_prob, sum_idx) #model_second = load_model('/data/stars/user/achaudha/ACCV_2018/I3D_experiments_all_patches/new_model/weights_optim/' + 'left_hand' + '/weights.hdf5') #model_second = load_model('/data/stars/user/achaudha/ACCV_2018/I3D_experiments_all_patches/new_model/weights_optim/' + 'i3d' + '/weights.hdf5') model_second = Inception_Inflated3d(include_top = True, weights='rgb_imagenet_and_kinetics') #pi3d = PI3D2(self.num_classes) #model_second = pi3d.initialize_weights(all_models_name, mode, dropout_prob) #model_second.load_weights('/data/stars/user/achaudha/ACCV_2018/PI3D_a/weights_pi3d_left_hand_full_body_sum1/epoch8.hdf5') weight_idx_s = -45 + (2-sum_idx)*20 weight_idx_e = -4 for l_m, l_lh in zip(model.layers[weight_idx_s: weight_idx_e], model_second.layers[weight_idx_s: weight_idx_e]): l_m.set_weights(l_lh.get_weights()) return model
def loadModelLR(numberOfClasses,inputFrames, frameHeight,frameWidth,numRGBChannels,withWeights = False): weights = None if withWeights : weights = 'rgb_inception_i3d' rgb_model = Inception_Inflated3d( include_top=False, weights=weights, input_shape=(inputFrames, frameHeight, frameWidth, numRGBChannels), dropout_prob=0.5, endpoint_logit=True, classes=numberOfClasses) x = rgb_model.output x = Dropout(0.5)(x) x = Dense(1024, activation='relu')(x) predictions = Dense(numberOfClasses, activation='softmax')(x) model = Model(rgb_model.input, predictions) return model
def RGB_model(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_CLASSES, dropout_prob): rgb_model = Inception_Inflated3d(include_top=False, weights='rgb_imagenet_and_kinetics', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, 3)) x1 = rgb_model.layers[-1].output x1 = Dropout(dropout_prob)(x1) x1 = generate_logit(x1, '1x1_Conv3d_rgb_logits', NUM_CLASSES) x = Activation('softmax', name='prediction')(x1) model = Model(input=rgb_model.input, output=x) return model
while True: batch_features = np.zeros( (BATCH, NUM_FRAMES, FRAME_WIDTH, FRAME_HEIGHT, 3)) batch_labels = np.zeros((BATCH, NUM_CLASSES)) for i in range(BATCH): batch_features[i] = hf["validation"][counter % 20] batch_labels[i] = validation_labels[counter % 20] # print("Index: "+str(i)) # print(batch_labels) counter += 1 yield batch_features, batch_labels rgb_model = Inception_Inflated3d(include_top=False, weights='rgb_kinetics_only', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, 3), classes=NUM_CLASSES, endpoint_logit=False) opt = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6) rgb_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) best_checkpoint = ModelCheckpoint('2_per_signer_weights_best.hdf5', monitor='val_acc',
yield x, y rgb_data = [] flow_data = [] labels = [] for t in range(5): rgb_data.append(pickle.load(open(rgb_test_path[t], "rb"))) flow_data.append(pickle.load(open(flow_test_path[t], "rb"))) labels.append(pickle.load(open(label_test_path[t], "rb"))) acc_list = [] for t in range(1): rgb_model = Inception_Inflated3d(include_top=False, weights='rgb_imagenet_and_kinetics', input_shape=(None, 224, 224, 3), classes=8) #rgb_model.load_weights("data/0_8/rgb"+str(t)+".h5") rgb_model.load_weights("data/0_8/rgb" + str(t) + ".h5") flow_model = Inception_Inflated3d(include_top=False, weights='flow_imagenet_and_kinetics', input_shape=(None, 224, 224, 2), classes=8) flow_model.load_weights("data/0_8/flow" + str(t) + ".h5") count = 0 y_pred = [] y_true = [] overall_conf = [] correct_overall_conf = [] wrong_overall_conf = [] label_conf = {0: [], 1: [], 2: [], 3: [], 4: [], 5: [], 6: [], 7: []}
def __init__(self, weights='rgb_imagenet_and_kinetics'): self.model = Inception_Inflated3d(include_top=True, weights=weights)
def video_process(lock, IS_OVER): rgb_model = Inception_Inflated3d( include_top=True, weights='rgb_imagenet_and_kinetics', input_shape=(NUM_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, NUM_RGB_CHANNELS), classes=NUM_CLASSES) flow_model = Inception_Inflated3d( include_top=True, weights='flow_imagenet_and_kinetics', input_shape=(NUM_FRAMES, _IMAGE_SIZE, _IMAGE_SIZE, NUM_FLOW_CHANNELS), classes=NUM_CLASSES) if IS_PLOT_MODEL: rgb_model.summary() from keras.utils.vis_utils import plot_model plot_model(rgb_model, 'model.png') running_symbol = RUNNING_SYMBOL change_frames, warning_rate = 0, 0 global is_change while True: with lock: with open('rgbs.pkl', 'rb') as f: rgbs = pickle.load(f) if len(rgbs) != NUM_FRAMES: continue now = time.time() if IS_OVER.value: exit() flows = read_frames_flow(rgbs) is_change = False video_text = [] if running_symbol: norm = np.linalg.norm(flows[-1]) # frame_now = CURRENT_FRAME if CURRENT_FRAME.shape[0] > 480 else cv2.resize(CURRENT_FRAME, (860, 480)) process_time = time.time() - now video_text.append('Speed: %.2f s, running norm: %.2f' % (process_time, norm)) if norm > RUNNING_SPEED: is_change = True video_text.append('Running fast!') if warning_rate > WARNING_RATE: video_text.append('WARNING!') change_frames = change_frames + 1 if not is_change else 0 warning_rate = warning_rate + 1 if is_change else 0 if change_frames > CHANGE_FRAMES_CLASSIFICATION: running_symbol = False change_frames = 0 warning_rate = 0 else: rgb_sample = rgb2tensor(rgbs) flow_sample = flow2tensor(flows) rgb_logits = rgb_model.predict(rgb_sample) flow_logits = flow_model.predict(flow_sample) sample_logits = rgb_logits + flow_logits # produce softmax output from model logit for class probabilities sample_logits = sample_logits[0] # we are dealing with just one example sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits)) sorted_indices = np.argsort(sample_predictions)[::-1] process_time = time.time() - now video_text.append('Speed: %.2f s' % process_time) for index in sorted_indices[:5]: if kinetics_classes[index] in RUN_CLASSES: is_change = True video_text.append('%s: %.2f' % (kinetics_classes[index], sample_predictions[index])) change_frames = change_frames + 1 if is_change else 0 with lock: with open('video_text.pkl', 'wb') as f: pickle.dump(video_text, f) if change_frames > CHANGE_FRAMES_RUNNING: running_symbol = True change_frames = 0
NUM_FRAMES = 79 FRAME_WIDTH = 224 FRAME_HEIGHT = 224 NUM_RGB_CHANNELS = 3 NUM_CLASSES = 400 #emotion_frames= np.load('./test_data/emotion_data_1.npy') action_frames = np.load('./test_data/action_data_1.npy') face_detection = load_detection_model(detection_model_path) emotion_classifier = load_model(emotion_model_path, compile=False) gender_classifier = load_model(gender_model_path, compile=False) action_classifier = Inception_Inflated3d( include_top=True, weights='rgb_imagenet_and_kinetics', #input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), classes=NUM_CLASSES) emotion_target_size = emotion_classifier.input_shape[1:3] #(64,64) gender_target_size = gender_classifier.input_shape[1:3] #(48,48) action_target_size = (FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS ) #(224,224,3) #emotion_label_arg = np.argmax(emotion_classifier.predict(emotion_frames)) #final_emotion = mode(emotion_label_arg) #emotion_text = emotion_labels[final_emotion] action_frames = np.expand_dims(action_frames, 0) logic_action = action_classifier.predict(action_frames) # produce softmax output from model logit for class probabilities
type=int, default=32) parser.add_argument("-m", "--memory", dest="memory", help="memory -> for fusion", type=int, default=5) parser.add_argument("-t", "--threshold", dest="threshold", help="prediction threshold", type=float, default=0.5) args = parser.parse_args() labels = readLabels(args.labels) clipDuration = args.input_frames memory = args.memory threshold = args.threshold if not args.model: model = Inception_Inflated3d(include_top=True, weights='rgb_inception_i3d', input_shape=(clipDuration, 224, 224, 3), classes=400, endpoint_logit=False) else: model = loadModel(len(labels), clipDuration, 224, 224, 3) model.load_weights(args.model) main(args.source, labels, model)
if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) # input_shape_features = (num_features, None, None) else: input_shape_img = (None, None, 3) shared_layers_input= Input(shape=( None,None,832)) roi_input = Input(shape=(None, 4)) vid_input = Input(shape =(None, None, None, 3)) vid_input_shape = (64, 400,320, 3) feature_map_input = Input(shape=(None, None,None,832)) rgb_model = Inception_Inflated3d( include_top=False, weights='rgb_kinetics_only', input_shape=vid_input_shape, classes=classes_count) def get_new_img_size(width, height, img_min_side, C): img_min_side =448 if width <= height: f = float(img_min_side) / width resized_height = int(f * height) resized_width = img_min_side else: f = float(img_min_side) / height resized_width = int(f * width) resized_height = img_min_side if C.dataset == 'AVA': return resized_width, resized_height
def main(args): # load the kinetics classes kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')] args.eval_type = 'rgb' if args.eval_type in ['rgb', 'joint']: if args.no_imagenet_pretrained: # build model for RGB data # and load pretrained weights (trained on kinetics dataset only) rgb_model = Inception_Inflated3d( include_top=True, weights='rgb_kinetics_only', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), classes=NUM_CLASSES) else: # build model for RGB data # and load pretrained weights (trained on imagenet and kinetics dataset) rgb_model = Inception_Inflated3d( include_top=True, weights='rgb_imagenet_and_kinetics', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), classes=NUM_CLASSES) # load RGB sample (just one example) rgb_sample = np.load(SAMPLE_DATA_PATH['rgb']) # import IPython; IPython.embed() # make prediction train_generator, validation_generator = create_generators( args, backbone.preprocess_image) rgb_sample = train_generator.__getitem__(1)[0] rgb_logits = rgb_model.predict(rgb_sample) if args.eval_type in ['flow', 'joint']: if args.no_imagenet_pretrained: # build model for optical flow data # and load pretrained weights (trained on kinetics dataset only) flow_model = Inception_Inflated3d( include_top=True, weights='flow_kinetics_only', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), classes=NUM_CLASSES) else: # build model for optical flow data # and load pretrained weights (trained on imagenet and kinetics dataset) flow_model = Inception_Inflated3d( include_top=True, weights='flow_imagenet_and_kinetics', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), classes=NUM_CLASSES) # load flow sample (just one example) flow_sample = np.load(SAMPLE_DATA_PATH['flow']) # import IPython; IPython.embed() # make prediction flow_logits = flow_model.predict(flow_sample) # produce final model logits if args.eval_type == 'rgb': sample_logits = rgb_logits # elif args.eval_type == 'flow': # sample_logits = flow_logits # else: # joint # sample_logits = rgb_logits + flow_logits # produce softmax output from model logit for class probabilities sample_logits = sample_logits[0] # we are dealing with just one example sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits)) sorted_indices = np.argsort(sample_predictions)[::-1] print('\nNorm of logits: %f' % np.linalg.norm(sample_logits)) print('\nTop classes and probabilities') for index in sorted_indices[:20]: print(sample_predictions[index], sample_logits[index], kinetics_classes[index]) return
def main(args): model = ResNet50(weights='imagenet') model_res = Model(inputs=model.input, outputs=[ model.get_layer('conv1_relu').output, model.get_layer('conv2_block1_out').output, model.get_layer('conv3_block1_out').output ]) # load the kinetics classes kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')] if args.eval_type in ['rgb', 'joint']: if args.no_imagenet_pretrained: # build model for RGB data # and load pretrained weights (trained on kinetics dataset only) rgb_model = Inception_Inflated3d( include_top=True, weights='rgb_kinetics_only', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), classes=NUM_CLASSES) else: # build model for RGB data # and load pretrained weights (trained on imagenet and kinetics dataset) rgb_model = Inception_Inflated3d( include_top=True, weights='rgb_imagenet_and_kinetics', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), classes=NUM_CLASSES) model_rgb = Model( inputs=rgb_model.input, outputs=rgb_model.get_layer('Conv3d_3c_3b_1x1').output) # print(model_rgb.summary()) # load RGB sample (just one example) # rgb_sample = np.load(SAMPLE_DATA_PATH['rgb']) # make prediction # rgb_logits = rgb_model.predict(rgb_sample) if args.eval_type in ['flow', 'joint']: if args.no_imagenet_pretrained: # build model for optical flow data # and load pretrained weights (trained on kinetics dataset only) flow_model = Inception_Inflated3d( include_top=True, weights='flow_kinetics_only', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), classes=NUM_CLASSES) else: # build model for optical flow data # and load pretrained weights (trained on imagenet and kinetics dataset) flow_model = Inception_Inflated3d( include_top=True, weights='flow_imagenet_and_kinetics', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), classes=NUM_CLASSES) # load flow sample (just one example) # flow_sample = np.load(SAMPLE_DATA_PATH['flow']) # make prediction # flow_logits = flow_model.predict(flow_sample) model_flow = Model( inputs=flow_model.input, outputs=flow_model.get_layer('Conv3d_3c_3b_1x1').output) # print(model_flow.summary()) # produce final model logits # if args.eval_type == 'rgb': # sample_logits = rgb_logits # elif args.eval_type == 'flow': # sample_logits = flow_logits # else: # joint # sample_logits = rgb_logits + flow_logits # # produce softmax output from model logit for class probabilities # sample_logits = sample_logits[0] # we are dealing with just one example # sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits)) # sorted_indices = np.argsort(sample_predictions)[::-1] # print('\nNorm of logits: %f' % np.linalg.norm(sample_logits)) # print('\nTop classes and probabilities') # for index in sorted_indices[:20]: # print(sample_predictions[index], sample_logits[index], kinetics_classes[index]) import STLSTM NUM_CELL = 1 FILTERS0 = 128 FILTERS1 = 64 FILTERS2 = 64 FILTERS3 = 64 KERNEL_SIZE = 3 for layer in model_rgb.layers: layer.trainable = False for layer in model_res.layers: layer.trainable = False #Model 1 # m1,m2,m3=model_res(input3) #MODEL 2 # rgb=model_rgb(input1) # flow=model_flow(input2)x_rgb=Input(shape=(10,224,224,3)) cells0 = STLSTM.StackedSTLSTMCells([ STLSTM.STLSTMCell(filters=FILTERS0, kernel_size=KERNEL_SIZE, padding="same", data_format="channels_last") for i in range(NUM_CELL) ]) cells1 = STLSTM.StackedSTLSTMCells([ STLSTM.STLSTMCell(filters=FILTERS1, kernel_size=KERNEL_SIZE, padding="same", data_format="channels_last") for i in range(NUM_CELL) ]) cells2 = STLSTM.StackedSTLSTMCells([ STLSTM.STLSTMCell(filters=FILTERS2, kernel_size=KERNEL_SIZE, padding="same", data_format="channels_last") for i in range(NUM_CELL) ]) cells3 = STLSTM.StackedSTLSTMCells([ STLSTM.STLSTMCell(filters=FILTERS3, kernel_size=KERNEL_SIZE, padding="same", data_format="channels_last") for i in range(NUM_CELL) ]) x_rgb = Input(shape=(10, 224, 224, 3)) # x_flow=Input(shape=(10,224,224,2)) x = model_rgb(x_rgb) # x_flow1=model_flow(x_flow) l1 = [] l2 = [] l3 = [] for i in range(10): [m1, m2, m3] = model_res(x_rgb[:, i, :, :, :]) l1.append(m1) l2.append(m2) l3.append(m3) # [merge1,merge2,merge3]=model_res(x_res) skip_conn1 = tf.stack(l1, axis=1) skip_conn2 = tf.stack(l2, axis=1) skip_conn3 = tf.stack(l3, axis=1) print(skip_conn1.shape) print(skip_conn2.shape) print(skip_conn3.shape) x = STLSTM.STLSTM2D(cells0, return_sequences=True)(x) x = STLSTM.STLSTM2D(cells1, return_sequences=True)(x) x = STLSTM.STLSTM2D(cells2, return_sequences=True)(x) x = STLSTM.STLSTM2D(cells3, return_sequences=True)(x) x = Conv3DTranspose(64, (3, 3, 3), strides=(2, 1, 1), output_padding=(1, 0, 0), padding='valid', data_format="channels_last")(x) x = Conv3D(64, (3, 3, 3), strides=(1, 1, 1), padding='valid', data_format="channels_last")(x) x = tf.concat([x, skip_conn3], axis=4) print(x.shape) x = Conv3DTranspose(64, (3, 3, 3), strides=(1, 2, 2), output_padding=(0, 1, 1), padding='valid', data_format="channels_last")(x) x = Conv3D(64, (3, 3, 3), strides=(1, 1, 1), padding='valid', data_format="channels_last")(x) print(x.shape) x = tf.concat([x, skip_conn2], axis=4) x = Conv3DTranspose(64, (3, 3, 3), strides=(1, 2, 2), output_padding=(0, 1, 1), padding='valid', data_format="channels_last")(x) x = Conv3D(64, (3, 3, 3), strides=(1, 1, 1), padding='valid', data_format="channels_last")(x) print(x.shape) x = tf.concat([x, skip_conn1], axis=4) x = Conv3DTranspose(64, (3, 3, 3), strides=(1, 2, 2), output_padding=(0, 1, 1), padding='valid', data_format="channels_last")(x) x = Conv3D(64, (3, 3, 3), strides=(1, 1, 1), padding='valid', data_format="channels_last")(x) print(x.shape) x = Conv3D(3, (3, 3, 3), strides=(1, 1, 1), padding='same', data_format="channels_last")(x) model_final = Model(inputs=x_rgb, outputs=x) print(x.shape) # print(model_final.summary()) print(model_final.summary()) plot_model(model_final, to_file='feature_extract.png') # x=STLSTM(rgb+flow) # x=STLSTM(x) # x=STLSTM(x) # x=DCONV(x) # x=CONV(x) # #Combine # x=CONV(m1+x) # x=DCONV(x) # x=CONV(m2+x) # x=DCONV(x) # x=CONV(m3+x) # output=DCONV(x) # model_final=Model(inputs=[input1,input2,input3],outputs=output) return
parser.add_argument( "-f", "--input_frames", type=int, default=64, help="number of frames in each input clip to the model") parser.add_argument( "-b", "--batch_size", type=int, default=8, help="batch size for testing.") parser.add_argument( "-r", "--results_path",default="./results/results.json", help="name/path of the output results of the test(has to be a json file -> ./results/results.json)") parser.add_argument( "-p", "--data_preprocessed",action="store_true", default=False,help="if data is preprocessed") parser.add_argument( "-c", "--per_clip",action="store_true", default=False,help="results for clips not videos") args = parser.parse_args() if not os.path.exists("./results"): os.makedirs("./results") labels = readLabels(args.labels) num_classes = len(labels) if not args.weights: model = Inception_Inflated3d(include_top=True, weights="rgb_inception_i3d", input_shape=(args.input_frames,224,224,3), classes=400, endpoint_logit=True) else: model = loadModel(num_classes,args.input_frames,224,224,3) model.load_weights(args.weights) if num_classes != 400: testViolence (model, args.data_directory, labels, args.input_frames, args.batch_size,results_path=args.results_path,just_load=args.data_preprocessed,perClip=args.per_clip) else: test(model, args.data_directory, labels, args.input_frames, args.batch_size)
def pi3d_model(fc_main, model_inputs, dataset, protocol, all_models_name=[], mode='sum', dropout_prob=0.0, num_classes=60, sum_idx=0, train_end_to_end=False): mode = mode all_models_name = all_models_name #all_models = {} if sum_idx == 0: global f_dept f_dept = 1024 pi3d_interm_outputs = [] for model_name in all_models_name: model = load_model('./weights_optim/{}/weights_{}_{}.hdf5'.format( dataset, model_name, protocol)) for idx in range(len(model.layers)): model.get_layer( index=idx).name = model.layers[idx].name + '_' + model_name for l in model.layers: l.trainable = train_end_to_end model_inputs.append(model.input) if sum_idx <= 3 and sum_idx >= 0: pi3d_interm_outputs.append( Reshape((1, 8, 7, 7, f_dept))( model.get_layer(index=-46 + (2 - sum_idx) * 20).output)) x = concatenate(pi3d_interm_outputs, axis=1) inflated_fc_main = keras.layers.core.Lambda(inflate_dense, output_shape=(no_of_p, 8, 7, 7, f_dept))(fc_main) multiplied_features = keras.layers.Multiply()([inflated_fc_main, x]) if mode == 'sum': x = keras.layers.core.Lambda( sum_feature, output_shape=(8, 7, 7, f_dept))(multiplied_features) elif mode == 'cat': x = keras.layers.core.Lambda( concat_feature, output_shape=(8, 7, 7, f_dept * no_of_p))(multiplied_features) ##second part of I3D if sum_idx == 2: # Mixed 5b branch_0 = conv3d_bn(x, 256, 1, 1, 1, padding='same', name='' + 'second') branch_1 = conv3d_bn(x, 160, 1, 1, 1, padding='same', name='Conv3d_5b_1a_1x1' + 'second') branch_1 = conv3d_bn(branch_1, 320, 3, 3, 3, padding='same', name='Conv3d_5b_1b_3x3' + 'second') branch_2 = conv3d_bn(x, 32, 1, 1, 1, padding='same', name='Conv3d_5b_2a_1x1' + 'second') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5b_2b_3x3' + 'second') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5b_3a_3x3' + 'second')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5b_3b_1x1' + 'second') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=4, name='Mixed_5b' + 'second') if sum_idx == 1 or sum_idx == 2: # Mixed 5c branch_0 = conv3d_bn(x, 384, 1, 1, 1, padding='same', name='Conv3d_5c_0a_1x1' + 'second') branch_1 = conv3d_bn(x, 192, 1, 1, 1, padding='same', name='Conv3d_5c_1a_1x1' + 'second') branch_1 = conv3d_bn(branch_1, 384, 3, 3, 3, padding='same', name='Conv3d_5c_1b_3x3' + 'second') branch_2 = conv3d_bn(x, 48, 1, 1, 1, padding='same', name='Conv3d_5c_2a_1x1' + 'second') branch_2 = conv3d_bn(branch_2, 128, 3, 3, 3, padding='same', name='Conv3d_5c_2b_3x3' + 'second') branch_3 = MaxPooling3D((3, 3, 3), strides=(1, 1, 1), padding='same', name='MaxPool2d_5c_3a_3x3' + 'second')(x) branch_3 = conv3d_bn(branch_3, 128, 1, 1, 1, padding='same', name='Conv3d_5c_3b_1x1' + 'second') x = layers.concatenate([branch_0, branch_1, branch_2, branch_3], axis=4, name='Mixed_5c' + 'second') #Classification block x = AveragePooling3D((2, 7, 7), strides=(1, 1, 1), padding='valid', name='global_avg_pool' + 'second')(x) x = Dropout(dropout_prob)(x) x = conv3d_bn(x, num_classes, 1, 1, 1, padding='same', use_bias=True, use_activation_fn=False, use_bn=False, name='Conv3d_6a_1x1' + 'second') x = Flatten(name='flatten' + 'second')(x) predictions = Dense(num_classes, activation='softmax', name='softmax' + 'second')(x) model = Model(inputs=model_inputs, outputs=predictions, name='PI3D') model_second = Inception_Inflated3d(include_top=True, weights='rgb_imagenet_and_kinetics') weight_idx_s = -45 + (2 - sum_idx) * 20 weight_idx_e = -4 for l_m, l_lh in zip(model.layers[weight_idx_s:weight_idx_e], model_second.layers[weight_idx_s:weight_idx_e]): l_m.set_weights(l_lh.get_weights()) l_m.trainable = True lstm_weights = "./weights_optim/{}/lstm_model_{}.hdf5".format( dataset, protocol) l_model = load_model(lstm_weights, compile=False) for idx1 in range(len(model.layers)): n1 = model.layers[idx1].name if 'lstm' in n1: for idx2 in range(len(l_model.layers)): n2 = l_model.layers[idx2].name if n1 == n2: model.layers[idx1].set_weights( l_model.layers[idx2].get_weights()) break return model
x, y = np.load(data[i]), labels[i] x = x.reshape((1, x.shape[0], x.shape[1], x.shape[2], x.shape[3])) yield x, y earlystop = EarlyStopping(monitor='acc', min_delta=0, patience=5, verbose=0, mode='auto') for i in range(2, 5): rgb_model = Inception_Inflated3d(include_top=False, weights='rgb_imagenet_and_kinetics', input_shape=(None, 224, 224, 3), endpoint_logit=False, classes=8) sgd = SGD(lr=1e-4, decay=1e-7, momentum=0.9, nesterov=True) rgb_model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) rgb_model.summary() rgb_train_data = pickle.load(open(rgb_train_path[i], "rb")) label_train_data = pickle.load(open(label_train_path[i], "rb")) steps = len(label_train_data) rgb_model.fit_generator(generate_arrays_from_file(rgb_train_data, label_train_data), steps_per_epoch=steps, epochs=65)
from i3d_dataset import I3DFusionSequence from i3d_inception import Inception_Inflated3d if __name__ == '__main__': NUM_FRAMES = 64 FRAME_HEIGHT = 224 FRAME_WIDTH = 224 NUM_RGB_CHANNELS = 3 NUM_FLOW_CHANNELS = 2 NUM_CLASSES = 2 rgb_input = Input(shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS)) rgb_model = Inception_Inflated3d( include_top=False, weights='rgb_imagenet_and_kinetics', # weights='rgb_kinetics_only', input_tensor=rgb_input, classes=NUM_CLASSES) for i, l in enumerate(rgb_model.layers): # if i >= 181: # break # if "Mixed_5b" == l.name: # break l.trainable = False rgb_y = rgb_model.get_output_at(0) flow_input = Input(shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,
def train(path_train, batch_size, path_val=None, t_size=9, train_val_split=1, validate=True): data = h5py.File(path_train, 'r') X = data['X'] Y = data['Y'] input_shape = (t_size, ) + X.shape[1:] if 'autoscore_checkpoint' in os.listdir('.'): model_final = load_model('autoscore_checkpoint') print('Loaded existing model') else: rgb_model = Inception_Inflated3d(include_top=False, weights='rgb_imagenet_and_kinetics', input_shape=(input_shape)) output_old = rgb_model.layers[-1].output x = Reshape((1024, ), name='Reshape_top')(output_old) x = Dense(50, activation='selu', name='Dense_top_1')(x) x = Dense(2, activation='sigmoid', name='Dense_top_2')(x) sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model_final = Model(input=rgb_model.input, output=[x]) model_final.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['mae', 'acc']) checkpoint = ModelCheckpoint('autoscore_checkpoint', monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1) model_final = load_model('autoscore_checkpoint') train_generator = i3d_generator(X, Y, 7, t_size, train_val_split=0.9, train=True) if validate: val_generator = i3d_generator(X, Y, 7, t_size, train_val_split=0.9, train=False) model_final.fit_generator(train_generator.__getitem__(), steps_per_epoch=5000, epochs=50, validation_data=val_generator.__getitem__(), validation_steps=1000, callbacks=[checkpoint]) # elif path_val: # # data_val = h5py.open(path_train,'r') # X_val = data_val['X'] # Y_val = data_val['Y'] # val_generator = i3d_generator(X_val, Y_val, 5, t_size) # model_final.fit_generator(train_generator.__getitem__(), # steps_per_epoch=2000, # epochs=50, # validation_data=val_generator.__getitem__(), # validation_steps=800) # # else: # if validate: # model_final.fit_generator(train_generator.__getitem__(), # steps_per_epoch=2000, # epochs=50) model_final.save('autoscore_model_3')
batch_features = np.zeros( (BATCH, NUM_FRAMES, FRAME_WIDTH, FRAME_HEIGHT, 3)) batch_labels = np.zeros((BATCH, NUM_CLASSES)) for i in range(BATCH): batch_features[i] = hf["validation"][counter % 40] batch_labels[i] = validation_labels[counter % 40] # print("Index: "+str(i)) # print(batch_labels) counter += 1 yield batch_features, batch_labels rgb_model = Inception_Inflated3d(include_top=False, weights='rgb_imagenet_and_kinetics', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, 3), classes=NUM_CLASSES, endpoint_logit=False, dropout_prob=0.5) opt = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6) index_freeze_layer = [ 1, 2, 3, 5, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 53, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 87,
import pickle import numpy as np from i3d_inception import Inception_Inflated3d from keras.optimizers import SGD, Adam from keras.callbacks import EarlyStopping import tensorflow as tf from keras import backend as K import os from keras.callbacks import Callback from keras.metrics import binary_accuracy import numpy.linalg as linalg flow_model_kin = Inception_Inflated3d(include_top=False, weights='flow_imagenet_and_kinetics', input_shape=(None, 224, 224, 2), dropout_prob=0.5,endpoint_logit=False, classes=8) flow_model_kin.load_weights("/DATA/keras-kinetics-i3d/data/0_8/kin_flow_0.h5") flow_train_path = "../crossval5_8/less_f_train7.p" rgb_train_path = "../crossval5_8/less_r_train7.p" label_train_path = "../crossval5_8/less_l_train7.p" class Flda(Callback): def __init__(self, w): super(Flda, self).__init__() self.w = K.variable(w, dtype=np.float32) def on_batch_begin(self, batch, logs={}): kin_wts=flow_model_kin.layers[196].get_weights()[0] #kin_wts=kin_wts.reshape(8192) #kin_wts=kin_wts[0:8100] #kin_wts=kin_wts.reshape(90,90)
def main(args): SAMPLE_DATA_PATH = { # 'rgb' : 'data/v_CricketShot_g04_c01_rgb.npy', 'rgb': '../data/' + args.video_name + '_rgb.npy', 'flow': '../data/' + args.video_name + '_flow.npy' } # load the kinetics classes kinetics_classes = [x.strip() for x in open(LABEL_MAP_PATH, 'r')] if args.eval_type in ['rgb', 'joint']: # load RGB sample (just one example) rgb_sample = np.load(SAMPLE_DATA_PATH['rgb']) INPUT_SHAPE = rgb_sample.shape[1] if args.no_imagenet_pretrained: # build model for RGB data # and load pretrained weights (trained on kinetics dataset only) rgb_model = Inception_Inflated3d( include_top=True, weights='rgb_kinetics_only', input_shape=(INPUT_SHAPE, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), classes=NUM_CLASSES) else: # build model for RGB data # and load pretrained weights (trained on imagenet and kinetics dataset) rgb_model = Inception_Inflated3d( include_top=True, weights='rgb_imagenet_and_kinetics', input_shape=(INPUT_SHAPE, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS), classes=NUM_CLASSES) # make prediction rgb_logits = rgb_model.predict(rgb_sample) if args.eval_type in ['flow', 'joint']: # load flow sample (just one example) flow_sample = np.load(SAMPLE_DATA_PATH['flow']) INPUT_SHAPE = flow_sample.shape[1] if args.no_imagenet_pretrained: # build model for optical flow data # and load pretrained weights (trained on kinetics dataset only) flow_model = Inception_Inflated3d( include_top=True, weights='flow_kinetics_only', input_shape=(INPUT_SHAPE, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), classes=NUM_CLASSES) else: # build model for optical flow data # and load pretrained weights (trained on imagenet and kinetics dataset) flow_model = Inception_Inflated3d( include_top=True, weights='flow_imagenet_and_kinetics', input_shape=(INPUT_SHAPE, FRAME_HEIGHT, FRAME_WIDTH, NUM_FLOW_CHANNELS), classes=NUM_CLASSES) # make prediction flow_logits = flow_model.predict(flow_sample) # produce final model logits if args.eval_type == 'rgb': sample_logits = rgb_logits elif args.eval_type == 'flow': sample_logits = flow_logits else: # joint sample_logits = rgb_logits + flow_logits # produce softmax output from model logit for class probabilities sample_logits = sample_logits[0] # we are dealing with just one example sample_predictions = np.exp(sample_logits) / np.sum(np.exp(sample_logits)) sorted_indices = np.argsort(sample_predictions)[::-1] print('\nNorm of logits: %f' % np.linalg.norm(sample_logits)) print('\nTop 20 classes and probabilities') for index in sorted_indices[:20]: print(sample_predictions[index], sample_logits[index], kinetics_classes[index]) return
def main(args=None): # parse arguments if args is None: args = sys.argv[1:] args = parse_args(args) # create object that stores backbone information backbone = models.backbone(args.backbone) # make sure keras is the minimum required version check_keras_version() # optionally choose specific GPU if args.gpu: os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu keras.backend.tensorflow_backend.set_session(get_session()) # optionally load config parameters if args.config: args.config = read_config_file(args.config) # create the generators train_generator, validation_generator = create_generators( args, backbone.preprocess_image) # import IPython; IPython.embed() img_input_ret = img_input_retina(32, 1024, 1024, 3) from i3d_inception import Inception_Inflated3d NUM_CLASSES = train_generator.num_classes() rgb_model = Inception_Inflated3d(include_top=False, weights='rgb_imagenet_and_kinetics', input_shape=img_input_ret, classes=NUM_CLASSES) # rgb_sample = train_generator.__getitem__(1)[0] # rgb_logits = rgb_model.predict(rgb_sample) # import IPython; IPython.embed() print('Loading I3D models.........') # print(rgb_model.summary()) # for layer in rgb_model.layers: # print("Layer name: "+str(layer.name), "Input shape: "+str(layer.input_shape)+". Output shape: "+str(layer.output_shape)) # import IPython;IPython.embed() # import IPython; IPython.embed() print("Connecting to Retinanet Layer................") num_classes = train_generator.num_classes() # model= retinanet(inputs=img_input_ret, num_classes=num_classes, # backbone_layers=[rgb_model.get_layer('Conv3d_3c_0a_1x1').output, # rgb_model.get_layer('Conv3d_4f_0a_1x1').output, # rgb_model.get_layer('Conv3d_5b_0a_1x1').output]) model = retinanet(inputs=img_input_ret, num_classes=num_classes, backbone_layers=[ rgb_model.get_layer('Conv3d_2c_3x3').output, rgb_model.get_layer('Conv3d_3c_0a_1x1').output, rgb_model.get_layer('Conv3d_4f_0a_1x1').output, rgb_model.get_layer('Conv3d_5b_0a_1x1').output ]) print("Retinanet+I3D model summary:") print(model.summary()) # import IPython;IPython.embed() # for layer in model.layers: # print("Layer name: "+str(layer.name), "Input shape: "+str(layer.input_shape)+". Output shape: "+str(layer.output_shape)) # import IPython; IPython.embed() # bbox # create the model if args.snapshot is not None: print('Loading prediction model, this may take a second...') model = models.load_model(args.snapshot, backbone_name=args.backbone) training_model = model anchor_params = None if args.config and 'anchor_parameters' in args.config: anchor_params = parse_anchor_parameters(args.config) prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params) else: weights = args.weights # # default to imagenet if nothing else is specified # if weights is None and args.imagenet_weights: # weights = backbone.download_imagenet() if args.multi_gpu is not None: multi_gpu = args.multi_gpu else: multi_gpu = 0 if multi_gpu > 1: from keras.utils import multi_gpu_model # with tf.device('/cpu:0'): # model = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True) training_model = multi_gpu_model(model, gpus=multi_gpu) else: # model = model_with_weights(backbone_retinanet(num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True) training_model = model # load anchor parameters, or pass None (so that defaults will be used) if args.config is not None: if config and 'anchor_parameters' in config: anchor_params = parse_anchor_parameters(config) num_anchors = anchor_params.num_anchors() else: anchor_params = None num_anchors = None # make prediction model prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params) # compile model training_model.compile( loss={ 'regression': losses.smooth_l1(), 'classification': losses.focal(), # 'depthsification': lossesification.mean_squared_error 'depthsification': losses.smooth_l1_depth() }, optimizer=keras.optimizers.adam(lr=1e-5, clipnorm=0.001)) # print('bin/train.py | line 610 | debug before fit_generator') # import IPython; IPython.embed() # create the callbacks callbacks = create_callbacks(model, training_model, prediction_model, validation_generator, args) # Use multiprocessing if workers > 0 if args.workers > 0: use_multiprocessing = True else: use_multiprocessing = False # start training training_model.fit_generator(generator=train_generator, steps_per_epoch=args.steps, epochs=args.epochs, verbose=1, callbacks=callbacks, workers=args.workers, use_multiprocessing=use_multiprocessing, max_queue_size=args.max_queue_size)
parser.add_argument('video_file', type=str, help="path to the video file") parser.add_argument('outvecs', type=str, help="path to the opeput numpy vector") parser.add_argument('--startframe', type=int, default=-1, help="start frame") parser.add_argument('--endframe', type=int, default=-1, help="end frame") args = parser.parse_args() rgb_input = Input(shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH, NUM_RGB_CHANNELS)) fe = Inception_Inflated3d( include_top=False, weights='rgb_imagenet_and_kinetics', # weights='rgb_kinetics_only', input_tensor=rgb_input, classes=-1) m = Model(input=fe.get_input_at(0), output=(fe.get_layer("Mixed_5c").output)) # plot_model(m, show_shapes=True) os.makedirs(args.outvecs, exist_ok=True) for fn, vec in main_fe(m, args.video_file, show=False, startfn=args.startframe, endfn=args.endframe): print(fn)
while True: batch_features = np.zeros((BATCH,NUM_FRAMES, FRAME_WIDTH, FRAME_HEIGHT,1)) temp = np.zeros((BATCH,NUM_FRAMES, FRAME_WIDTH, FRAME_HEIGHT)) batch_labels = np.zeros((BATCH,NUM_CLASSES)) for i in range(BATCH): temp = hf["train"][counter%120] batch_features[i] = np.reshape(temp,(NUM_FRAMES,FRAME_WIDTH,FRAME_HEIGHT,1)) batch_labels[i] = validation_labels[counter%40] # print("Index: "+str(i)) # print(batch_labels) counter+=1 yield batch_features,batch_labels rgb_model = Inception_Inflated3d( include_top=False, #weights='None', input_shape=(NUM_FRAMES, FRAME_HEIGHT, FRAME_WIDTH,1), classes=NUM_CLASSES,endpoint_logit=False) opt = optimizers.Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=1e-6) rgb_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=opt, metrics=['accuracy']) best_checkpoint = ModelCheckpoint('sibi_1_weights_best.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='max') checkpoint = ModelCheckpoint('sibi_1_weights_epoch.hdf5', monitor='val_acc', verbose=1, save_best_only=False, mode='max') csv_logger = CSVLogger('sibi_1.log', append=False) tensorboard = TensorBoard(log_dir='./sibi_1_tf-logs') callbacks_list = [checkpoint,best_checkpoint, csv_logger, tensorboard]