def init(): global prednet_model, logistic_regression_model nt = 10 # logistic regression model for detecing anomalies based on model output model_path = Model.get_model_path("logistic_regression") logistic_regression_model = joblib.load(model_path) model_root = Model.get_model_path('prednet_UCSDped1') #, _workspace=ws) # model_root = model_root.strip('model.json') print(model_root) # load json and create model json_file = open(os.path.join(model_root, 'model.json'), 'r') # todo, this is going to the real one # json_file = open(os.path.join(model_root, 'model', 'model.json'), 'r') model_json = json_file.read() json_file.close() trained_model = model_from_json(model_json, custom_objects={"PredNet": PredNet}) # load weights into new model trained_model.load_weights(os.path.join(model_root, "weights.hdf5")) # Create testing model (to output predictions) layer_config = trained_model.layers[1].get_config() layer_config['output_mode'] = 'prediction' # data_format = layer_config['data_format'] if 'data_format' in layer_config else layer_config['dim_ordering'] test_prednet = PredNet(weights=trained_model.layers[1].get_weights(), **layer_config) input_shape = list(trained_model.layers[0].batch_input_shape[1:]) input_shape[0] = nt inputs = Input(shape=tuple(input_shape)) predictions = test_prednet(inputs) prednet_model = Model_keras(inputs=inputs, outputs=predictions)
def __init__(self, weightsfile, prior): #self.weights=hickle.load(weightsfile) # Load PredNet configuration and instantiate a prednet object: weights_file = os.path.join(WEIGHTS_DIR, 'prednet_caltech_weights.hdf5') json_file = os.path.join(WEIGHTS_DIR, 'prednet_caltech_model.json') # Load trained model f = open(json_file, 'r') json_string = f.read() f.close() train_model = model_from_json(json_string, custom_objects={'PredNet': PredNet}) train_model.load_weights(weights_file) # We have a pretrained model now. layer_config = train_model.layers[1].get_config() self.layer = "E0" layer_config['output_mode'] = self.layer self.input_shape = list(train_model.layers[0].batch_input_shape[1:]) # NOTE: We need to remember to set the input shape at 0 to the number of images in the series. if prior != "none": self.prior = hickle.load(prior) else: self.prior = None self.data_format = layer_config[ 'data_format'] if 'data_format' in layer_config else layer_config[ 'dim_ordering'] self.test_prednet = PredNet( weights=train_model.layers[1].get_weights(), **layer_config)
def pretrained_prednet(pretrained_model, n_timesteps, output_mode='error', train=False, stateful=False, batch_size=None, trainable_layers=None, trainable_units=None, **config): prednet_model = pretrained_model if 'stack_sizes' not in prednet_model.layers[1].get_config(): for layer in pretrained_model.layers: if 'prednet' in layer.name.lower(): print('Found PredNet in layer', layer.name) prednet_model = layer break layer_config = prednet_model.layers[1].get_config() layer_config['output_mode'] = output_mode layer_config['stateful'] = stateful prednet = PredNet(weights=prednet_model.layers[1].get_weights(), trainable_layers=trainable_layers, trainable_units=trainable_units, **layer_config) input_shape = list(prednet_model.layers[0].batch_input_shape[1:]) input_shape[0] = n_timesteps inputs = get_input_layer(batch_size, tuple(input_shape)) outputs = get_output_layer(prednet, inputs, n_timesteps, train, output_mode) model = Model(inputs=inputs, outputs=outputs, name='PredNet') return model
def random_prednet(input_channels, input_height, input_width, n_timesteps, stack_sizes=(48, 96, 192), train=False, output_mode='error', stateful=False, batch_size=None, trainable_layers=None, trainable_units=None, **config): # Model parameters if K.image_data_format() == 'channels_first': input_shape = (input_channels, input_height, input_width) else: input_shape = (input_height, input_width, input_channels) stack_sizes = (input_channels, ) + stack_sizes R_stack_sizes = stack_sizes A_filt_sizes = (3, ) * (len(stack_sizes) - 1) Ahat_filt_sizes = (3, ) * len(stack_sizes) R_filt_sizes = (3, ) * len(stack_sizes) prednet = PredNet(stack_sizes, R_stack_sizes, A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, output_mode=output_mode, return_sequences=True, stateful=stateful, trainable_layers=trainable_layers, trainable_units=trainable_units, name='prednet_layer') input_shape = (n_timesteps, ) + input_shape inputs = get_input_layer(batch_size, input_shape) outputs = get_output_layer(prednet, inputs, n_timesteps, train, output_mode) model = Model(inputs=inputs, outputs=outputs, name='PredNet') return model
def create_test_model(json_file, weights_file, target): # Load trained model f = open(json_file, 'r') json_string = f.read() f.close() train_model = model_from_json(json_string, custom_objects={'PredNet': PredNet}) train_model.load_weights(weights_file) # Create testing model (to output predictions) layer_config = train_model.layers[1].get_config() layer_config['output_mode'] = target data_format = layer_config['data_format'] if 'data_format' \ in layer_config else layer_config['dim_ordering'] test_prednet = PredNet(weights=train_model.layers[1].get_weights(), **layer_config) input_shape = list(train_model.layers[0].batch_input_shape[1:]) input_shape[0] = nt inputs = Input(shape=tuple(input_shape)) predictions = test_prednet(inputs) test_model = Model(inputs=inputs, outputs=predictions) return input_shape, data_format, test_model
def init(): global prednet_model, logistic_regression_model nt = 10 # logistic regression model for detecing anomalies based on model output try: model_path = Model.get_model_path("logistic_regression") logistic_regression_model = joblib.load(model_path) except Exception as e: print(e) logistic_regression_model = None prednet_path = Model.get_model_path('prednet_UCSDped1') # model_root = model_root.strip('model.json') print(prednet_path) # load json and create model with open(os.path.join(prednet_path, 'model.json'), 'r') as json_file: model_json = json_file.read() trained_model = model_from_json(model_json, custom_objects={"PredNet": PredNet}) # load weights into new model trained_model.load_weights(os.path.join(prednet_path, "weights.hdf5")) # Create testing model (to output predictions) layer_config = trained_model.layers[1].get_config() layer_config['output_mode'] = 'prediction' test_prednet = PredNet(weights=trained_model.layers[1].get_weights(), **layer_config) input_shape = list(trained_model.layers[0].batch_input_shape[1:]) input_shape[0] = nt inputs = Input(shape=tuple(input_shape)) predictions = test_prednet(inputs) prednet_model = Model_keras(inputs=inputs, outputs=predictions)
def main(verbose=False): save_model = True # if weights will be saved weights_file = os.path.join(WEIGHTS_DIR, 'prednet_kitti_weights.hdf5') # where weights will be saved json_file = os.path.join(WEIGHTS_DIR, 'prednet_kitti_model.json') # Data files train_file = os.path.join(DATA_DIR, 'X_train.hkl') train_sources = os.path.join(DATA_DIR, 'sources_train.hkl') val_file = os.path.join(DATA_DIR, 'X_val.hkl') val_sources = os.path.join(DATA_DIR, 'sources_val.hkl') # Training parameters nb_epoch = 75 batch_size = 4 samples_per_epoch = 250 N_seq_val = 100 # number of sequences to use for validation # Model parameters n_channels, im_height, im_width = (3, 128, 160) input_shape = (n_channels, im_height, im_width) if K.image_data_format() == 'channels_first' else (im_height, im_width, n_channels) stack_sizes = (n_channels, 48, 96, 192) R_stack_sizes = stack_sizes A_filt_sizes = (3, 3, 3) Ahat_filt_sizes = (3, 3, 3, 3) R_filt_sizes = (3, 3, 3, 3) layer_loss_weights = np.array([1., 0., 0., 0.]) # weighting for each layer in final loss; "L_0" model: [1, 0, 0, 0], "L_all": [1, 0.1, 0.1, 0.1] layer_loss_weights = np.expand_dims(layer_loss_weights, 1) nt = 10 # number of timesteps used for sequences in training time_loss_weights = 1./ (nt - 1) * np.ones((nt,1)) # equally weight all timesteps except the first time_loss_weights[0] = 0 prednet = PredNet(stack_sizes, R_stack_sizes, A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, output_mode='error', return_sequences=True) inputs = Input(shape=(nt,) + input_shape) # The output will have shape (batch_size, nt, nb_layers). The outputs correspond # to the errors at each time step and layer. errors = prednet(inputs) # This merely computes a weighted sum of the errors layer by layer throughout time. # The output has shape (batch_size, nt, 1). errors_by_time = TimeDistributed( Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], trainable=False)(errors) # Will have shape (batch_size, nt) errors_by_time = Flatten()(errors_by_time) # The output of this final layer is the weighted sum over time of the weighted # sums of the errors layer-by-layer, which is the final L_train function from # the original paper. final_errors = Dense( 1, weights=[time_loss_weights, np.zeros(1)], trainable=False)(errors_by_time) model = Model(inputs=inputs, outputs=final_errors) model.compile(loss='mean_absolute_error', optimizer='adam') if verbose: model.summary() # dense_2 = model.layers[-1] # from pprint import pprint # pprint(dir(dense_2)) # print("_initial_weights: ", dense_2._initial_weights) # print("_losses: ", dense_2._losses) # print("_per_input_losses: ", dense_2._per_input_losses) # print("_per_input_updates: ", dense_2._per_input_updates) # print("_trainable_weights: ", dense_2._trainable_weights) # print("_updates: ", dense_2._updates) # print("bias:", dense_2.bias) # print("count_params: ", dense_2.count_params()) # print("config: ", dense_2.get_config()) # print("weights: ", dense_2.get_weights()) # print("losses: ", dense_2.losses) # print("kernel: ", dense_2.kernel.name) # print("trainable_weights: ", dense_2.trainable_weights) # print("This is going to be interesting...") # model._make_train_function() # training_function = model.train_function # print("Training Function: ", training_function) # pprint(dir(training_function)) # print("_callable_fn: ", training_function._callable_fn) # print("_feed_arrays: ", training_function._feed_arrays) # print("_feed_symbols: ", training_function._feed_symbols) # print("feed_dict: ", training_function.feed_dict) # print("fetches: ", training_function.fetches) # print("inputs: ", training_function.inputs) # print("outputs: ", training_function.outputs) # print("name: ", training_function.name) # print("session_kwargs: ", training_function.session_kwargs) # print("updates_op: ", training_function.updates_op) train_generator = SequenceGenerator(train_file, train_sources, nt, batch_size=batch_size, shuffle=True) val_generator = SequenceGenerator(val_file, val_sources, nt, batch_size=batch_size, N_seq=N_seq_val) # start with lr of 0.001 and then drop to 0.0001 after 75 epochs lr_schedule = lambda epoch: 0.001 if epoch < 75 else 0.0001 callbacks = [LearningRateScheduler(lr_schedule)] if save_model: if not os.path.exists(WEIGHTS_DIR): os.mkdir(WEIGHTS_DIR) callbacks.append(ModelCheckpoint(filepath=weights_file, monitor='val_loss', save_best_only=True)) history = model.fit_generator(train_generator, samples_per_epoch / batch_size, nb_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=N_seq_val / batch_size) if save_model: json_string = model.to_json() with open(json_file, "w") as f: f.write(json_string)
def __init__(self): super().__init__() self.model = PredNet(R_channels, A_channels, output_mode='error')
# Model parameters n_channels, im_height, im_width = (2, 128, 128) input_shape = (n_channels, im_height, im_width) if K.image_data_format() == 'channels_first' else (im_height, im_width, n_channels) stack_sizes = (n_channels, 48, 96, 192) #(n_channels, 12, 24, 48) #24, 48, 96) #(n_channels, 48, 96, 192) R_stack_sizes = stack_sizes A_filt_sizes = (3, 3, 3) Ahat_filt_sizes = (3, 3, 3, 3) R_filt_sizes = (3, 3, 3, 3) layer_loss_weights = np.array([1., 0., 0., 0.]) # weighting for each layer in final loss; "L_0" model: [1, 0, 0, 0], "L_all": [1, 0.1, 0.1, 0.1] layer_loss_weights = np.expand_dims(layer_loss_weights, 1) time_loss_weights = 1./ (nt - 1) * np.ones((nt,1)) # equally weight all timesteps except the first time_loss_weights[0] = 0. prednet = PredNet(stack_sizes, R_stack_sizes, A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, output_mode='error', return_sequences=True)#, extrap_start_time=10) #pixel_max=3. inputs = Input(shape=(nt,) + input_shape) errors = prednet(inputs) # errors will be (batch_size, nt, nb_layers) errors_by_time = TimeDistributed(Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], trainable=False)(errors) # calculate weighted error by layer errors_by_time = Flatten()(errors_by_time) # will be (batch_size, nt) final_errors = Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False)(errors_by_time) # weight errors by time model = Model(inputs=inputs, outputs=final_errors) model.compile(loss='mean_absolute_error', optimizer='adam') train_generator = SequenceGenerator(train_file, train_sources, nt, batch_size=batch_size, shuffle=True) val_generator = SequenceGenerator(val_file, val_sources, nt, batch_size=batch_size, N_seq=N_seq_val) print "Shapes: ", train_generator.X.shape, val_generator.X.shape
def main(): client = storage.Client() bucket = client.bucket('meniscus_cloud_data_1000') val_recordings = [('clouds', 'validation')] test_recordings = [('clouds', 'testing')] categories = 'clouds' desired_im_sz = (1000, 1000) #### Trainiing data ####### training_dataset = [] for blob in bucket.list_blobs(prefix='kitti_data/clouds/training'): training_dataset.append(blob.name) training_images = [] for i in len(training_dataset): name = 'kitti_data/clouds/validation/' + training_dataset[i] for blob in bucket.list_blobs(prefix='kitti_data/clouds/training/' + training_dataset[i]): #training_images += name + '/' + blob.name training_images += blob.id X = np.zeros((len(training_images), ) + desired_im_sz + (3, ), np.uint8) for i, filename in enumerate(training_images): with open(filename, "wb") as file_obj: im_file = blob.download_to_file(file_obj) im = scipy.misc.imread(im_file) X[i] = im train_sources = training_images train_file = X #### Validation data ####### validation_dataset = [] for blob in bucket.list_blobs(prefix='kitti_data/clouds/validation'): validation_dataset.append(blob.name) validation_images = [] for i in len(validation_dataset): name = 'kitti_data/clouds/validation/' + validation_dataset[i] for blob in bucket.list_blobs(prefix=name): #validation_images_blob += name + '/' + blob.name validation_images += blob.id X = np.zeros((len(validation_images), ) + desired_im_sz + (3, ), np.uint8) for i, im_file in enumerate(validation_images): with open(filename, "wb") as file_obj: im_file = blob.download_to_file(file_obj) im = scipy.misc.imread(im_file) X[i] = im val_sources = validation_images val_file = X save_model = True # if weights will be saved weights_file = WEIGHTS_DIR + 'prednet_kitti_weights.hdf5' # where weights will be saved json_file = WEIGHTS_DIR + 'prednet_kitti_model.json' saved_models = WEIGHTS_DIR + 'weights.{epoch:02d}-{val_loss:.2f}.hdf5' #Training parameters nb_epoch = 50 samples_per_epoch = 28 batch_size = 8 N_seq_val = 4 # number of sequences to use for validation # number of timesteps used for sequences in training nt = 4 lr = 0.001 #learning rate up_lr = 0.0001 #learinig rate is updated to this new value up_lr_ep = 40 #point at which learinig rate should be updated # Model parameters #n_channels, im_height, im_width = (3, 128, 160) n_channels, im_height, im_width = (3, 1000, 1000) input_shape = ( n_channels, im_height, im_width) if K.image_data_format() == 'channels_first' else ( im_height, im_width, n_channels) stack_sizes = (n_channels, 48, 96, 192) #Lstm stack_sizes R_stack_sizes = stack_sizes #convolutional filter size A_filt_sizes = (3, 3, 3) #prdiction convloutinal filter size Ahat_filt_sizes = (3, 3, 3, 3) # recurrent convolution filter size R_filt_sizes = (3, 3, 3, 3) # weighting for each layer in final loss; "L_0" model: [1, 0, 0, 0], "L_all": [1, 0.1, 0.1, 0.1] layer_loss_weights = np.array([1., 0., 0., 0.]) layer_loss_weights = np.expand_dims(layer_loss_weights, 1) # equally weight all timesteps except the first time_loss_weights = 1. / (nt - 1) * np.ones((nt, 1)) time_loss_weights[0] = 0 #-----------------------------------------Arcitecture---------------------------------------------------# prednet = PredNet(stack_sizes, R_stack_sizes, A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, output_mode='error', return_sequences=True) #-----------------------------------------Layers--------------------------------------------------------# #initializing a tensor for input # inputs = Input(shape=(nt, ) + input_shape) # # errors will be (batch_size, nt, nb_layers) # errors = prednet(inputs) # # calculate weighted error by layer # errors_by_time = TimeDistributed( Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], # trainable=False)(errors) # # will be (batch_size, nt) # errors_by_time = Flatten()(errors_by_time) # # weight errors by time # # dense() creates a densely connected network # # final_errors = Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False)(errors_by_time) # #-------------------------------------------------------------------------------------------------------# #----------------------------------Create model---------------------------------------------------------# model = Model(inputs=inputs, outputs=final_errors) #model = load_model(weights_file,custom_objects={'prednet': PredNet}) # model.compile(loss='mean_absolute_error', optimizer='adam') #model.load_weights(weights_file) #-------------------------------------------------------------------------------------------------------# #-------------------------------------Data Preprocessing------------------------------------------------# train_generator = SequenceGenerator(train_file, train_sources, nt, batch_size=batch_size, shuffle=True) # val_generator = SequenceGenerator(val_file, val_sources, nt, batch_size=batch_size, N_seq=N_seq_val) # #-------------------------------------------------------------------------------------------------------# #-------------------------------------Callback functions for training--------------------------------------# # start with lesrning rate of 0.001 and then drop to 0.0001 after 75 epochs # lr_schedule = lambda epoch: lr if epoch < up_lr_ep else up_lr # callbacks = [LearningRateScheduler(lr_schedule)] # # #save model best model check points # # if save_model: # # if not os.path.exists(WEIGHTS_DIR): os.mkdir(WEIGHTS_DIR) # # callbacks.append(ModelCheckpoint(filepath=weights_file, monitor='val_loss', save_best_only=True)) # callbacks.append( ModelCheckpoint(filepath=weights_file, monitor='val_loss', save_best_only=True)) #Tensorboard for visualization # tb = TensorBoard(log_dir=GRAPH_DIR, batch_size=batch_size, histogram_freq=2, write_graph=True, write_images=True) tb.set_model(model) callbacks.append(tb) checkPoints = ModelCheckpoint(saved_models, monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1) # callbacks.append(checkPoints) #earlyStops = EarlyStopping(monitor='val_loss', min_delta=0, # patience=0, verbose=0, mode='auto') #callbacks.append(earlyStops) #--------------------------------------------------------------------------------------------------------------# #-------------------------------------Training-----------------------------------------------------------------# history = model.fit_generator( train_generator, samples_per_epoch / batch_size, nb_epoch, callbacks=callbacks, # validation_data=val_generator, validation_steps=N_seq_val / batch_size) # # #save model # # if save_model: # # json_string = model.to_json() # # with open(json_file, "w") as f: # # f.write(json_string) json_string = model.to_json() with file_io.FileIO(json_string, mode='r') as input_f: with file_io.FileIO(json_file, mode='w+') as output_f: output_f.write(input_f.read())
else: # RGB 3 channels A_channels = (3, 48, 96, 192) R_channels = (3, 48, 96, 192) DATA_DIR = args.data_dir #'kitti_data' TRAIN_DIR = 'trained' RESULTS_DIR = 'results' test_file = os.path.join(DATA_DIR, 'X_test.hkl') test_sources = os.path.join(DATA_DIR, 'sources_test.hkl') kitti_test = KITTI(test_file, test_sources, nt, sequence_start_mode='unique') test_loader = DataLoader(kitti_test, batch_size=batch_size, shuffle=False) model = PredNet(R_channels, A_channels, output_mode='prediction', gpu_id=args.gpu_id) model.load_state_dict( torch.load(os.path.join(TRAIN_DIR, 'training.pt'), map_location=lambda storage, loc: storage)) if args.et > 0 and args.et < args.nt: model.set_extrap_start_time(extrap_start_time=args.et) if args.gpu_id >= 0 and torch.cuda.is_available(): print(' Using GPU.') model.cuda() # output save file number if not (args.gpu_id >= 0): # When cpu, only two times
class PredictiveCoding(ExternalModule): def __init__(self, module_name=None, steps=1, underSmpl=5, nt=15, t_extrap=5, n_feat=1, max_pix_value=1.0, C_channels=3, scale=4, use_new_w=False, use_trained_w=True, do_train=False, lr=1e-4, epoch_loop=100): super(PredictiveCoding, self).__init__(module_name, steps) # Subscribers self.camera = None self.camera_sub = rospy.Subscriber("chatter", Image, self.camera_sub_callback) # Publishers self.plot_pub = rospy.Publisher('plot_topic', Image, queue_size=1) self.latent_pub = rospy.Publisher('latent_topic', Float32MultiArray, queue_size=1) self.pred_pos_pub = rospy.Publisher('pred_pos_topic', Float32MultiArray, queue_size=1) # Image and model parameters self.underSmpl = underSmpl # Avoiding too sharp time resolution (no change between frames) self.nt = nt # Number of "past" frames given to the network self.t_extrap = t_extrap # After this frame, input is not used for future predictions self.n_feat = n_feat # Factor for number of features used in the network self.max_pix_value = max_pix_value # Depends on what's inside the PredNet code self.normalizer = 255.0 / self.max_pix_value self.C_channels = C_channels # 1 or 3 (number of color channels) self.A_channels = (self.C_channels, self.n_feat * 4, self.n_feat * 8, self.n_feat * 16) self.R_channels = (self.C_channels, self.n_feat * 4, self.n_feat * 8, self.n_feat * 16) self.scale = scale # 2 or 4 (how much layers down/upsample images) self.pad = 8 if self.scale == 4 else 0 # For up/downsampling to work self.model_name = 'model' + str(self.n_feat) + '.pt' self.new_model_path = os.getcwd() + '/resources/' + self.model_name self.trained_w_path = exp_dir + self.model_name # exp_dir computed in specs.py self.device = 'cpu' if torch.cuda.is_available(): self.device = 'cuda' # Training parameters self.use_new_w = use_new_w # If True, do not use weights that are saved in new_model_path self.use_trained_w = use_trained_w # If above is False, use trained_w_path as model weights self.do_train = do_train # Train with present frames if True, predicts future if False self.initial_lr = lr # Then, the learning rate is scheduled with cosine annealing self.epoch_loop = epoch_loop # Every epoch_loop, a prediction is made, to monitor progress # Variables that can change over time self.pred_msg = None self.model = None self.model_path = None self.model_inputs = None self.optimizer = None self.scheduler = None self.running_step = 0 self.last_cam_time = rospy.Time.now().to_sec() * 1000 def camera_sub_callback(self, data): self.camera = data def run_step(self): # Check that the camera device is on and that it is the right time-step if self.camera is not None: t = self.camera.header.stamp.to_secs() * 1000.0 # in milliseconds if t > self.last_cam_time + 20 * self.underSmpl: # one ros time-step is 20 ms self.last_cam_time = t # Collect input image and initialize the network input cam_img = CvBridge().imgmsg_to_cv2(self.camera, 'rgb8') / self.normalizer if self.C_channels == 3: # Below I messed up, it should be (2,0,1) but the model is already trained. cam_img = torch.tensor(cam_img, device=self.device).permute( 2, 1, 0) # --> channels last if self.C_channels == 1: cam_img = cam_img[:, :, 1] # .mean(axis=2) cam_img = torch.tensor( cam_img, device=self.device).unsqueeze(dim=2).permute(2, 1, 0) img_shp = cam_img.shape cam_inp = F.pad(cam_img, (self.pad, self.pad), 'constant', 0.0) # width may need to be 256 if self.model_inputs is None: self.model_inputs = torch.zeros( (1, self.nt) + cam_inp.shape, device=self.device) # Update the model or the mode, if needed self.running_step = self.running_step + 1 if self.new_model_path != self.model_path: # Update the model path if new or changed and reset prediction plot self.model_path = self.new_model_path self.pred_msg = torch.ones( img_shp[0], img_shp[1] * (self.nt - self.t_extrap + 1), img_shp[2] + 10) * 64.0 # Load or reload the model self.model = PredNet(self.R_channels, self.A_channels, device=self.device, t_extrap=self.t_extrap, scale=self.scale) if self.device == 'cuda': self.model = self.model.to('cuda') if self.running_step == 1: try: if self.use_new_w: a = 1. / 0. if self.use_trained_w: self.model.load_state_dict( torch.load(self.trained_w_path)) rospy.loginfo( 'Model initialized with pre-trained weights.' ) else: self.model.load_state_dict( torch.load(self.model_path)) rospy.loginfo( 'Learning weights loaded in the model.') except: rospy.loginfo( 'No existing weight file found. Model initialized randomly.' ) # Initialize some variables needed for training time_loss_w = [ 1.0 / (self.nt - 1) if s > 0 else 0.0 for s in range(self.nt) ] if self.t_extrap < self.nt: time_loss_w = [ w if n < self.t_extrap else 2.0 * w for n, w in enumerate(time_loss_w) ] # Initialize the optimizer and the scheduler if needed if None in [self.optimizer, self.scheduler]: self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.initial_lr) self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( self.optimizer, T_0=50) # Save the model at each epoch if self.running_step % self.epoch_loop == 1: torch.save(self.model.state_dict(), self.model_path) # Check that the model exists and initialize plot message if self.model is not None: # Feed network and train it or compute prediction self.model_inputs = self.model_inputs.roll(-1, dims=1) self.model_inputs[0, -1, :, :, :] = cam_inp if self.running_step > self.nt: # Compute prediction along present frames and updates weights if self.do_train: # Compute prediction loss for every frame pred, latent = self.model(self.model_inputs, self.nt) loss = torch.tensor([0.0], device=self.device) for s in range(self.nt): error = (pred[s][0] - self.model_inputs[0][s])**2 loss += torch.sum(error) * time_loss_w[s] # Backward pass and weight updates self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.scheduler.step() # Predicts future frames without weight updates else: with torch.no_grad(): pred, latent = self.model( self. model_inputs[:, -self.t_extrap:, :, :, :], self.nt) # Collect prediction frames displays = [ cam_img ] # First frame to be displayed is the present frame targ_pos = [ localize_target(cam_img) ] # Localize the target on the present camera frame t_stamps = [ t ] # Time of the present frame is the camera rostime for s in range(self.nt - self.t_extrap): disp = torch.detach(pred[self.t_extrap + s].clamp( 0.0, 1.0)[0, :, :, self.pad:-self.pad]).cpu() targ_pos.append(localize_target(disp)) displays.append(disp) t_stamps.append( t + (s + 1) * 0.02 * self.underSmpl) # Not sure about this # Complete for missing target positions, highlight target and set the display message if 0 < np.sum([ any([np.isnan(p) for p in pos]) for pos in targ_pos ]) < len(targ_pos) - 2: targ_pos = complete_target_positions(targ_pos) for s, (disp, pos) in enumerate(zip(displays, targ_pos)): self.pred_msg[:, s * img_shp[1]:( s + 1) * img_shp[1], :img_shp[2]] = mark_target( disp, pos) # Print loss or prediction messages if self.do_train: rospy.loginfo( 'Epoch: %2i - step: %2i - error: %5.4f - lr: %5.4f' % (int(self.running_step / self.epoch_loop), self.running_step % self.epoch_loop, loss.item(), self.scheduler.get_lr()[0])) else: rospy.loginfo( 'Prediction for future target locations: ' + str(targ_pos)) # Send latent state message (latent[0] to remove batch dimension) latent_msg = list(latent[0].cpu().numpy().flatten()) layout_msg = MultiArrayLayout(dim=[ MultiArrayDimension(size=d) for d in latent[0].shape ]) self.latent_pub.publish( Float32MultiArray(layout=layout_msg, data=latent_msg)) # Send predicted position according to the index of the frame that has to be reported pos_3d_msg = [[ 1.562 - p[0] / 156.274, -0.14 - p[1] / 152.691, 0.964 + p[0] - p[0] ] for p in targ_pos] pos_4d_msg = [[p[0], p[1], p[2], s] for (s, p) in zip(t_stamps, pos_3d_msg) ] # Add time stamps pos_4d_msg = [p for pos in pos_3d_msg for p in pos] # Flatten the list layout_msg = MultiArrayLayout(dim=[ MultiArrayDimension(size=d) for d in [len(targ_pos), 4] ]) self.pred_pos_pub.publish( Float32MultiArray(layout=layout_msg, data=pos_3d_msg)) # Collect input frames inpt_msg = torch.zeros( img_shp[0], img_shp[1] * (self.nt - self.t_extrap + 1), img_shp[2]) for s in range(self.nt - self.t_extrap): inpt_msg[:, (s + 1) * img_shp[1]:(s + 2) * img_shp[1], :] = self.model_inputs[ 0, self.t_extrap + s, :, :, self.pad:-self.pad] # Build and send the display message plot_msg = torch.cat( (self.pred_msg, inpt_msg), 2).numpy().transpose( 2, 1, 0) * int(self.normalizer) if self.C_channels == 1: plot_msg = np.dstack((plot_msg, plot_msg, plot_msg)) self.plot_pub.publish(CvBridge().cv2_to_imgmsg( plot_msg.astype(np.uint8), 'rgb8'))
layer_loss_weights = np.expand_dims(layer_loss_weights, 1) # equally weight all timesteps except the first time_loss_weights = 1. / (time_steps - 1) * np.ones((time_steps, 1)) time_loss_weights[0] = 0 # weight initial weight layer in time predictions higher. inputs = Input(shape=(time_steps,) + input_shape) # print("<d>inputs.shape", inputs) if(args.multitask_flag): output_mode='error_and_label' # Configuring the model prednet = PredNet(stack_sizes, r_stack_sizes, args.a_filt_sizes, args.ahat_filt_sizes, args.r_filt_sizes , return_sequences=True , output_mode=output_mode , strided_conv_pool=args.strided_conv_pool , nb_classes=args.nb_classes, lbl_stack_sizes=args.n_chan_lbl_layer) # print("<d>compute_output_shape:", prednet.compute_output_shape(inputs)) errors_and_labels = prednet(inputs) # errors will be (batch_size, nt, nb_layers), labels will be (batch_size, nt, num_classes) errors = Lambda(lambda x: x[:,:,:nb_layers], output_shape=(time_steps,nb_layers,))(errors_and_labels) labels = Lambda(lambda x: x[:,:,nb_layers:], output_shape=(time_steps,args.nb_classes,))(errors_and_labels) # print("<d>o.shape", errors_and_labels.shape) # print("<d>errors.shape", errors.shape) # print("<d>labels.shape", labels.shape) errors_by_time = TimeDistributed(Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], trainable=False)(errors) # calculate weighted error by layer errors_by_time = Flatten()(errors_by_time) # will be (batch_size, nt) final_errors = Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False, name='y')( errors_by_time) # weight errors by time
n_channels = args.n_channels img_height = args.img_height img_width = args.img_width # stack_sizes = eval(args.stack_sizes) # R_stack_sizes = eval(args.R_stack_sizes) # A_filter_sizes = eval(args.A_filter_sizes) # Ahat_filter_sizes = eval(args.Ahat_filter_sizes) # R_filter_sizes = eval(args.R_filter_sizes) stack_sizes = (n_channels, 48, 96, 192) R_stack_sizes = stack_sizes A_filter_sizes = (3, 3, 3) Ahat_filter_sizes = (3, 3, 3, 3) R_filter_sizes = (3, 3, 3, 3) prednet = PredNet(stack_sizes, R_stack_sizes, A_filter_sizes, Ahat_filter_sizes, R_filter_sizes, output_mode='error', data_format=args.data_format, return_sequences=True) print(prednet) prednet.cuda() assert args.mode == 'train' train(prednet, args)
def execute_test(): print "Preparing to execute the test..." # Load trained model f = open(json_file, 'r') json_string = f.read() f.close() train_model = model_from_json(json_string, custom_objects={'PredNet': PredNet}) train_model.load_weights(weights_file) # Create testing model (to output predictions) layer_config = train_model.layers[1].get_config() layer_config['output_mode'] = 'prediction' #'prediction' layer_config['extrap_start_time'] = extrap data_format = layer_config[ 'data_format'] if 'data_format' in layer_config else layer_config[ 'dim_ordering'] test_prednet = PredNet(weights=train_model.layers[1].get_weights(), **layer_config) input_shape = list(train_model.layers[0].batch_input_shape[1:]) input_shape[0] = nt inputs = Input(shape=tuple(input_shape)) predictions = test_prednet(inputs) test_model = Model(inputs=inputs, outputs=predictions) test_generator = SequenceGenerator(test_file, test_sources, nt, sequence_start_mode='unique', data_format=data_format) # orig: unique X_test = test_generator.create_all() X_hat = test_model.predict(X_test, batch_size) if data_format == 'channels_first': X_test = np.transpose(X_test, (0, 1, 3, 4, 2)) X_hat = np.transpose(X_hat, (0, 1, 3, 4, 2)) # Compare MSE of PredNet predictions vs. using last frame. Write results to prediction_scores.txt mse_model = np.mean( (X_test[:, 1:] - X_hat[:, 1:])**2) # look at all timesteps except the first mse_prev = np.mean((X_test[:, :-1] - X_test[:, 1:])**2) if not os.path.exists(RESULTS_DIR): os.mkdir(RESULTS_DIR) f = open(os.path.join(RESULTS_DIR, 'prediction_scores.txt'), 'w') f.write("Model MSE: %f\n" % mse_model) f.write("Previous Frame MSE: %f" % mse_prev) f.close() # Plot some predictions aspect_ratio = float(X_hat.shape[2]) / X_hat.shape[3] plt.figure(figsize=(nt, 2 * aspect_ratio)) gs = gridspec.GridSpec(2, nt) gs.update(wspace=0., hspace=0.) plot_save_dir = os.path.join(RESULTS_DIR, 'prediction_plots/') if not os.path.exists(plot_save_dir): os.mkdir(plot_save_dir) # Output the sequence of all the predicted images for test in range(numtests): testdir = os.path.join("single/", testdir_name) testdir = os.path.join(plot_save_dir, testdir) if not os.path.exists(testdir): os.makedirs(testdir) print "///////// NT: " + str(nt) for t in range(nt): imsave(testdir + "/pred-%02d.jpg" % (t, ), X_hat[test, t]) imsave(testdir + "/orig-%02d.jpg" % (t, ), X_test[test, t]) print "Test data saved in " + testdir
# stack_sizes = eval(args.stack_sizes) # R_stack_sizes = eval(args.R_stack_sizes) # A_filter_sizes = eval(args.A_filter_sizes) # Ahat_filter_sizes = eval(args.Ahat_filter_sizes) # R_filter_sizes = eval(args.R_filter_sizes) stack_sizes = (n_channels, 48, 96, 192) R_stack_sizes = stack_sizes A_filter_sizes = (3, 3, 3) Ahat_filter_sizes = (3, 3, 3, 3) R_filter_sizes = (3, 3, 3, 3) prednet = PredNet(stack_sizes, R_stack_sizes, A_filter_sizes, Ahat_filter_sizes, R_filter_sizes, output_mode='prediction', data_format=args.data_format, return_sequences=True) print(prednet) prednet.cuda() # print('\n'.join(['%s:%s' % item for item in prednet.__dict__.items()])) # print(type(prednet.state_dict())) # <class 'collections.OrderedDict'> # for k, v in prednet.state_dict().items(): # print(k, v.size()) ## 使用自己训练的参数 checkpoint_file = args.checkpoint_file try: checkpoint = checkpoint_loader(checkpoint_file)
def process(cap, fname): scale_height = 128 scale_width = 160 target_fps = 8 fps = cap.get(cv2.CAP_PROP_FPS) fps = 25 nt = 10 # number of timesteps used for sequences in training print(fps) X = [] Xcur = [] i = 0 j = 0 while True: ok, frame = cap.read() if not ok: break j += 1 if (j < fps / target_fps): continue j = 0 i += 1 frame = resize_fit(frame, (scale_width, scale_height)) cv2.imshow('frame', frame) Xcur.append(frame) if len(Xcur) >= nt: X.append(np.array(Xcur).astype(np.float32) / 255) Xcur = [] k = cv2.waitKey(1) & 0xff if k == 32: k = cv2.waitKey() & 0xff if k == 27: break cap.release() cv2.destroyAllWindows() X = np.array(X) print(X.shape) Y = np.zeros(X.shape[0], np.float32) print(Y.shape) assert (K.image_data_format() != 'channels_first') save_model = True # if weights will be saved weights_file = os.path.join(WEIGHTS_DIR, 'prednet_' + fname + '_weights.hdf5') # where weights will be saved json_file = os.path.join(WEIGHTS_DIR, 'prednet_' + fname + '_model.json') # Training parameters nb_epoch = 2 batch_size = 4 # Model parameters n_channels, im_height, im_width = (3, 128, 160) input_shape = ( n_channels, im_height, im_width) if K.image_data_format() == 'channels_first' else ( im_height, im_width, n_channels) stack_sizes = (n_channels, 48, 96, 192) R_stack_sizes = stack_sizes A_filt_sizes = (3, 3, 3) Ahat_filt_sizes = (3, 3, 3, 3) R_filt_sizes = (3, 3, 3, 3) layer_loss_weights = np.array( [1., 0., 0., 0.] ) # weighting for each layer in final loss; "L_0" model: [1, 0, 0, 0], "L_all": [1, 0.1, 0.1, 0.1] layer_loss_weights = np.expand_dims(layer_loss_weights, 1) time_loss_weights = 1. / (nt - 1) * np.ones( (nt, 1)) # equally weight all timesteps except the first time_loss_weights[0] = 0 prednet = PredNet(stack_sizes, R_stack_sizes, A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, output_mode='error', return_sequences=True) inputs = Input(shape=(nt, ) + input_shape) errors = prednet(inputs) # errors will be (batch_size, nt, nb_layers) errors_by_time = TimeDistributed( Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], trainable=False)(errors) # calculate weighted error by layer errors_by_time = Flatten()(errors_by_time) # will be (batch_size, nt) final_errors = Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False)( errors_by_time) # weight errors by time model = Model(inputs=inputs, outputs=final_errors) model.compile(loss='mean_absolute_error', optimizer='adam') lr_schedule = lambda epoch: 0.001 if epoch < 75 else 0.0001 # start with lr of 0.001 and then drop to 0.0001 after 75 epochs callbacks = [LearningRateScheduler(lr_schedule)] if save_model: if not os.path.exists(WEIGHTS_DIR): os.mkdir(WEIGHTS_DIR) callbacks.append( ModelCheckpoint(filepath=weights_file, monitor='val_loss', save_best_only=True)) model.fit(X, Y, batch_size=batch_size, epochs=nb_epoch, validation_split=0.2, verbose=1, callbacks=callbacks) if save_model: json_string = model.to_json() with open(json_file, "w") as f: f.write(json_string)
from data_utils import SequenceGenerator data_file = '/media/sdb1/chenrui/kitti_data/h5/X_test.h5' source_file = '/media/sdb1/chenrui/kitti_data/h5/sources_test.h5' nt = 10 # sg = SequenceGenerator(data_file, source_file, nt) # print(next(sg)) n_channels = 3 stack_sizes = (n_channels, 48, 96, 192) R_stack_sizes = stack_sizes A_filt_sizes = (3, 3, 3) Ahat_filt_sizes = (3, 3, 3, 3) R_filt_sizes = (3, 3, 3, 3) prednet = PredNet(stack_sizes, R_stack_sizes, A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, output_mode='error', data_format='channels_first', return_sequences=True) input_shape = (8, 3, 128, 160) prednet.build(input_shape) print('\n'.join(['%s:%s' % item for item in prednet.__dict__.items()])) print('+' * 30) print(prednet.conv_layers['ahat'][1].strides)
import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" import datetime import keras import numpy as np from prednet import PredNet from datagen import PredNetBatchGenerator model_train = PredNet(T=40, L=10, img_shape=(64, 64, 3)).build_train() model_train.compile(optimizer='sgd', loss='mean_squared_error') model_train.summary() train_batch_generator = PredNetBatchGenerator(video_path="../data/video/20bn-jester-v1", img_size=(64, 64), batch_size=2, T=40, L=10, use_padding=True) date_string = "prednet_"+datetime.datetime.now().strftime('%Y%m%d %H:%M:%S') os.mkdir('./log/'+date_string) print("model logdir :", "./log/"+date_string) callbacks=[] callbacks.append(keras.callbacks.CSVLogger(filename='./log/'+date_string+'/metrics.csv')) callbacks.append(keras.callbacks.ModelCheckpoint(filepath='./log/'+date_string+'/bestweights.hdf5', monitor='loss', save_best_only=True))
def run_step(self): # Check that the camera device is on and that it is the right time-step if self.camera is not None: t = self.camera.header.stamp.to_secs() * 1000.0 # in milliseconds if t > self.last_cam_time + 20 * self.underSmpl: # one ros time-step is 20 ms self.last_cam_time = t # Collect input image and initialize the network input cam_img = CvBridge().imgmsg_to_cv2(self.camera, 'rgb8') / self.normalizer if self.C_channels == 3: # Below I messed up, it should be (2,0,1) but the model is already trained. cam_img = torch.tensor(cam_img, device=self.device).permute( 2, 1, 0) # --> channels last if self.C_channels == 1: cam_img = cam_img[:, :, 1] # .mean(axis=2) cam_img = torch.tensor( cam_img, device=self.device).unsqueeze(dim=2).permute(2, 1, 0) img_shp = cam_img.shape cam_inp = F.pad(cam_img, (self.pad, self.pad), 'constant', 0.0) # width may need to be 256 if self.model_inputs is None: self.model_inputs = torch.zeros( (1, self.nt) + cam_inp.shape, device=self.device) # Update the model or the mode, if needed self.running_step = self.running_step + 1 if self.new_model_path != self.model_path: # Update the model path if new or changed and reset prediction plot self.model_path = self.new_model_path self.pred_msg = torch.ones( img_shp[0], img_shp[1] * (self.nt - self.t_extrap + 1), img_shp[2] + 10) * 64.0 # Load or reload the model self.model = PredNet(self.R_channels, self.A_channels, device=self.device, t_extrap=self.t_extrap, scale=self.scale) if self.device == 'cuda': self.model = self.model.to('cuda') if self.running_step == 1: try: if self.use_new_w: a = 1. / 0. if self.use_trained_w: self.model.load_state_dict( torch.load(self.trained_w_path)) rospy.loginfo( 'Model initialized with pre-trained weights.' ) else: self.model.load_state_dict( torch.load(self.model_path)) rospy.loginfo( 'Learning weights loaded in the model.') except: rospy.loginfo( 'No existing weight file found. Model initialized randomly.' ) # Initialize some variables needed for training time_loss_w = [ 1.0 / (self.nt - 1) if s > 0 else 0.0 for s in range(self.nt) ] if self.t_extrap < self.nt: time_loss_w = [ w if n < self.t_extrap else 2.0 * w for n, w in enumerate(time_loss_w) ] # Initialize the optimizer and the scheduler if needed if None in [self.optimizer, self.scheduler]: self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.initial_lr) self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( self.optimizer, T_0=50) # Save the model at each epoch if self.running_step % self.epoch_loop == 1: torch.save(self.model.state_dict(), self.model_path) # Check that the model exists and initialize plot message if self.model is not None: # Feed network and train it or compute prediction self.model_inputs = self.model_inputs.roll(-1, dims=1) self.model_inputs[0, -1, :, :, :] = cam_inp if self.running_step > self.nt: # Compute prediction along present frames and updates weights if self.do_train: # Compute prediction loss for every frame pred, latent = self.model(self.model_inputs, self.nt) loss = torch.tensor([0.0], device=self.device) for s in range(self.nt): error = (pred[s][0] - self.model_inputs[0][s])**2 loss += torch.sum(error) * time_loss_w[s] # Backward pass and weight updates self.optimizer.zero_grad() loss.backward() self.optimizer.step() self.scheduler.step() # Predicts future frames without weight updates else: with torch.no_grad(): pred, latent = self.model( self. model_inputs[:, -self.t_extrap:, :, :, :], self.nt) # Collect prediction frames displays = [ cam_img ] # First frame to be displayed is the present frame targ_pos = [ localize_target(cam_img) ] # Localize the target on the present camera frame t_stamps = [ t ] # Time of the present frame is the camera rostime for s in range(self.nt - self.t_extrap): disp = torch.detach(pred[self.t_extrap + s].clamp( 0.0, 1.0)[0, :, :, self.pad:-self.pad]).cpu() targ_pos.append(localize_target(disp)) displays.append(disp) t_stamps.append( t + (s + 1) * 0.02 * self.underSmpl) # Not sure about this # Complete for missing target positions, highlight target and set the display message if 0 < np.sum([ any([np.isnan(p) for p in pos]) for pos in targ_pos ]) < len(targ_pos) - 2: targ_pos = complete_target_positions(targ_pos) for s, (disp, pos) in enumerate(zip(displays, targ_pos)): self.pred_msg[:, s * img_shp[1]:( s + 1) * img_shp[1], :img_shp[2]] = mark_target( disp, pos) # Print loss or prediction messages if self.do_train: rospy.loginfo( 'Epoch: %2i - step: %2i - error: %5.4f - lr: %5.4f' % (int(self.running_step / self.epoch_loop), self.running_step % self.epoch_loop, loss.item(), self.scheduler.get_lr()[0])) else: rospy.loginfo( 'Prediction for future target locations: ' + str(targ_pos)) # Send latent state message (latent[0] to remove batch dimension) latent_msg = list(latent[0].cpu().numpy().flatten()) layout_msg = MultiArrayLayout(dim=[ MultiArrayDimension(size=d) for d in latent[0].shape ]) self.latent_pub.publish( Float32MultiArray(layout=layout_msg, data=latent_msg)) # Send predicted position according to the index of the frame that has to be reported pos_3d_msg = [[ 1.562 - p[0] / 156.274, -0.14 - p[1] / 152.691, 0.964 + p[0] - p[0] ] for p in targ_pos] pos_4d_msg = [[p[0], p[1], p[2], s] for (s, p) in zip(t_stamps, pos_3d_msg) ] # Add time stamps pos_4d_msg = [p for pos in pos_3d_msg for p in pos] # Flatten the list layout_msg = MultiArrayLayout(dim=[ MultiArrayDimension(size=d) for d in [len(targ_pos), 4] ]) self.pred_pos_pub.publish( Float32MultiArray(layout=layout_msg, data=pos_3d_msg)) # Collect input frames inpt_msg = torch.zeros( img_shp[0], img_shp[1] * (self.nt - self.t_extrap + 1), img_shp[2]) for s in range(self.nt - self.t_extrap): inpt_msg[:, (s + 1) * img_shp[1]:(s + 2) * img_shp[1], :] = self.model_inputs[ 0, self.t_extrap + s, :, :, self.pad:-self.pad] # Build and send the display message plot_msg = torch.cat( (self.pred_msg, inpt_msg), 2).numpy().transpose( 2, 1, 0) * int(self.normalizer) if self.C_channels == 1: plot_msg = np.dstack((plot_msg, plot_msg, plot_msg)) self.plot_pub.publish(CvBridge().cv2_to_imgmsg( plot_msg.astype(np.uint8), 'rgb8'))
input_shape = (3, 128, 160) stack_sizes = (input_shape[0], 48, 96, 192) R_stack_sizes = stack_sizes A_filt_sizes = (3, 3, 3) Ahat_filt_sizes = (3, 3, 3, 3) R_filt_sizes = (3, 3, 3, 3) layer_loss_weights = np.array([1., 0., 0., 0.]) layer_loss_weights = np.expand_dims(layer_loss_weights, 1) time_loss_weights = 1./ (nt - 1) * np.ones((nt,1)) time_loss_weights[0] = 0 prednet = PredNet(stack_sizes, R_stack_sizes, A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, output_mode='error', return_sequences=True, dim_ordering='tf', weights=train_model.layers[1].get_weights()) inputs = Input(shape=(nt,) + input_shape) errors = prednet(inputs) # errors will be (batch_size, nt, nb_layers) model = Model(input=inputs, output=errors) model.compile(loss='mean_absolute_error', optimizer='adam') train_model = 0 gc.collect() #test_generator = SequenceGenerator(test_file, test_sources, nt, sequence_start_mode='unique', dim_ordering=dim_ordering) #X_test = test_generator.create_all() #[int(vim2_stim2.shape[0] / batch_size# )
batch_size = 16 A_channels = (3, 48, 96, 192) R_channels = (3, 48, 96, 192) DATA_DIR = '/media/lei/000F426D0004CCF4/datasets/kitti_data' test_file = os.path.join(DATA_DIR, 'X_test.hkl') test_sources = os.path.join(DATA_DIR, 'sources_test.hkl') nt = 10 kitti_test = KITTI(test_file, test_sources, nt) test_loader = DataLoader(kitti_test, batch_size=batch_size, shuffle=False) model = PredNet(R_channels, A_channels, output_mode='prediction') model.load_state_dict(torch.load('training.pt')) if torch.cuda.is_available(): print('Using GPU.') model.cuda() for i, inputs in enumerate(test_loader): inputs = inputs.permute(0, 1, 4, 2, 3) # batch x time_steps x channel x width x height inputs = Variable(inputs.cuda()) origin = inputs.data.cpu().byte()[:, nt-1] print('origin:') print(type(origin)) print(origin.size()) print('predicted:')
json_file = open(os.path.join(model_root, 'model.json'), 'r') # todo, this is going to the real one # json_file = open(os.path.join(model_root, 'model', 'model.json'), 'r') model_json = json_file.read() json_file.close() trained_model = model_from_json(model_json, custom_objects={"PredNet": PredNet}) trained_model.output_mode = 'error' trained_model.return_sequences = True # load weights into new model trained_model.load_weights(os.path.join(model_root, "weights.hdf5")) layer_config = trained_model.layers[1].get_config() layer_config['output_mode'] = 'error' # data_format = layer_config['data_format'] if 'data_format' in layer_config else layer_config['dim_ordering'] prednet = PredNet(weights=trained_model.layers[1].get_weights(), **layer_config) input_shape = list(trained_model.layers[0].batch_input_shape)[1:] # input_shape[0] = nt inputs = Input(shape=tuple(input_shape)) # print(inputs.shape) nb_layers = len(trained_model.layers) - 1 nt = input_shape[0] # errors = prednet(inputs) # model = Model_keras(inputs=inputs, outputs=errors) else: # Set model characteristics according to above settings stack_sizes = ( n_channels, ) + stack_sizes_arg # 4 layer architecture, with 3 input channels (rgb), and 48, 96, 192 units in the deep layers nb_layers = len(stack_sizes) # number of layers input_shape = (n_channels, im_height,
basename = sys.argv[1] testfile = sys.argv[1] + ".hkl" srcfile = "sources_" + sys.argv[1] + '.hkl' outname = sys.argv[2] layerdesignation = sys.argv[3] print("test file: %s. sources file: %s" % (testfile, srcfile)) data_format = layer_config[ 'data_format'] if 'data_format' in layer_config else layer_config[ 'dim_ordering'] # Todo: check to ensure that layer_config is what we need. print("Setting up model to extract data from layer %s" % layerdesignation) layer_config['output_mode'] = layerdesignation test_prednet = PredNet(weights=train_model.layers[1].get_weights(), **layer_config) print("PredNet picked up output mode: %s" % test_prednet.output_mode) input_shape = list(train_model.layers[0].batch_input_shape[1:]) print("Input shape:") print(input_shape) input_shape[ 0] = nt # The nt parameter appears to be... uhhh... the number of elements in... something. inputs = Input(shape=tuple(input_shape)) predictions = test_prednet(inputs) test_model = Model(inputs=inputs, outputs=predictions) test_generator = SequenceGenerator(testfile, srcfile, nt, sequence_start_mode="unique", data_format=data_format)
def img_to_pred(t, camera, plot_topic, latent_topic, pred_pos_topic, pred_msg, model, model_path, model_inputs, optimizer, scheduler, run_step): # Imports import os import torch import torch.nn.functional as F import numpy as np from prednet import PredNet from cv_bridge import CvBridge from std_msgs.msg import Float32MultiArray, MultiArrayLayout, MultiArrayDimension from specs import localize_target, complete_target_positions, mark_target, exp_dir # Image and model parameters underSmpl = 5 # Avoiding too sharp time resolution (no change between frames) nt = 15 # Number of "past" frames given to the network t_extrap = 5 # After this frame, input is not used for future predictions n_feat = 1 # Factor for number of features used in the network max_pix_value = 1.0 normalizer = 255.0 / max_pix_value C_channels = 3 # 1 or 3 (color channels) A_channels = (C_channels, n_feat * 4, n_feat * 8, n_feat * 16) R_channels = (C_channels, n_feat * 4, n_feat * 8, n_feat * 16) scale = 4 # 2 or 4 (how much layers down/upsample images) pad = 8 if scale == 4 else 0 # For up/downsampling to work model_name = 'model' + str(n_feat) + '.pt' new_model_path = os.getcwd() + '/resources/' + model_name trained_w_path = exp_dir + model_name # exp_dir computed in specs.py device = 'cpu' if torch.cuda.is_available(): device = 'cuda' # Training parameters use_new_w = False # If True, do not use weights that are saved in new_model_path use_trained_w = True # If above is False, use trained_w_path as model weights do_train = False # Train with present frames if True, predicts future if False initial_lr = 1e-4 # Then, the learning rate is scheduled with cosine annealing epoch_loop = 100 # Every epoch_loop, a prediction is made, to monitor progress n_batches = 1 # For now, not usable (could roll images for multiple batches) # Check that the simulation frame is far enough if camera.value is not None and int(t * 50) % underSmpl == 0: # Collect input image and initialize the network input cam_img = CvBridge().imgmsg_to_cv2(camera.value, 'rgb8') / normalizer if C_channels == 3: # Below I messed up, it should be (2,0,1) but the model is already trained. cam_img = torch.tensor(cam_img, device=device).permute( 2, 1, 0) # --> channels last if C_channels == 1: cam_img = cam_img[:, :, 1] # .mean(axis=2) cam_img = torch.tensor(cam_img, device=device).unsqueeze(dim=2).permute( 2, 1, 0) img_shp = cam_img.shape cam_img = F.pad(cam_img, (pad, pad), 'constant', 0.0) # width may need to be 256 if model_inputs.value is None: model_inputs.value = torch.zeros((1, nt) + cam_img.shape, device=device) # Update the model or the mode, if needed run_step.value = run_step.value + 1 if new_model_path != model_path.value: # Update the model path if new or changed and reset prediction plot model_path.value = new_model_path pred_msg.value = torch.ones(img_shp[0], img_shp[1] * (nt - t_extrap), img_shp[2] + 10) * 64.0 # Load or reload the model model.value = PredNet(R_channels, A_channels, device=device, t_extrap=t_extrap, scale=scale) if device == 'cuda': model.value = model.value.to('cuda') if run_step.value == 1: try: if use_new_w: a = 1. / 0. if use_trained_w: model.value.load_state_dict(torch.load(trained_w_path)) clientLogger.info( 'Model initialized with pre-trained weights.') else: model.value.load_state_dict( torch.load(model_path.value)) clientLogger.info( 'Learning weights loaded in the model.') except: clientLogger.info( 'No existing weight file found. Model initialized randomly.' ) # Initialize some variables needed for training time_loss_w = [1.0 / (nt - 1) if s > 0 else 0.0 for s in range(nt)] if t_extrap < nt: time_loss_w = [ w if n < t_extrap else 2.0 * w for n, w in enumerate(time_loss_w) ] if None in [optimizer.value, scheduler.value]: optimizer.value = torch.optim.Adam(model.value.parameters(), lr=initial_lr) scheduler.value = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts( optimizer.value, T_0=50) # Save the model at each epoch if run_step.value % epoch_loop == 1: torch.save(model.value.state_dict(), model_path.value) # Check that the model exists and initialize plot message if model.value is not None: # Feed network and train it or compute prediction model_inputs.value = model_inputs.value.roll(-1, dims=1) model_inputs.value[0, -1, :, :, :] = cam_img if run_step.value > nt: # Compute prediction along present frames and updates weights if do_train: # Compute prediction loss for every frame pred, latent = model.value(model_inputs.value, nt) loss = torch.tensor([0.0], device=device) for s in range(nt): error = (pred[s][0] - model_inputs.value[0][s])**2 loss += torch.sum(error) * time_loss_w[s] # Backward pass and weight updates optimizer.value.zero_grad() loss.backward() optimizer.value.step() scheduler.value.step() # Predicts future frames without weight updates else: with torch.no_grad(): pred, latent = model.value( model_inputs.value[:, -t_extrap:, :, :, :], nt) # Collect prediction frames displays = [] targ_pos = [] for s in range(nt - t_extrap): disp = torch.detach(pred[t_extrap + s].clamp( 0.0, 1.0)[0, :, :, pad:-pad]).cpu() # disp = model_inputs.value[0,-(s+1),:,:,pad:-pad].cpu() # for tests targ_pos.append(localize_target(disp)) displays.append(disp) # Complete for missing target positions, highlight target and set the display message if 0 < np.sum( [any([np.isnan(p) for p in pos]) for pos in targ_pos]) < len(targ_pos) - 2: targ_pos = complete_target_positions(targ_pos) for s, (disp, pos) in enumerate(zip(displays, targ_pos)): pred_msg.value[:, s * img_shp[1]:(s + 1) * img_shp[1], :img_shp[2]] = mark_target( disp, pos) # Print loss or prediction messages if do_train: clientLogger.info('Epoch: %2i - step: %2i - error: %5.4f - lr: %5.4f' % \ (int(run_step.value/epoch_loop), run_step.value%epoch_loop, loss.item(), \ scheduler.value.get_lr()[0])) else: clientLogger.info( 'Prediction for future target locations: ' + str(targ_pos)) # Send latent state message (latent[0] to remove batch dimension) latent_msg = list(latent[0].cpu().numpy().flatten()) layout_msg = MultiArrayLayout( dim=[MultiArrayDimension(size=d) for d in latent[0].shape]) latent_topic.send_message( Float32MultiArray(layout=layout_msg, data=latent_msg)) # Send predicted position according to the index of the frame that has to be reported pos_3d_msg = [[ 1.562 - p[0] / 156.274, -0.14 - p[1] / 152.691, 0.964 + p[0] - p[0] ] for p in targ_pos] pos_3d_msg = [p for pos in pos_3d_msg for p in pos] # flatten the list layout_msg = MultiArrayLayout(dim=[ MultiArrayDimension(size=d) for d in [len(targ_pos), 3] ]) pred_pos_topic.send_message( Float32MultiArray(layout=layout_msg, data=pos_3d_msg)) # Collect input frames inpt_msg = torch.zeros(img_shp[0], img_shp[1] * (nt - t_extrap), img_shp[2]) for s in range(nt - t_extrap): inpt_msg[:, s * img_shp[1]:(s + 1) * img_shp[1], :] = model_inputs.value[0, t_extrap + s, :, :, pad:-pad] # Build and send the display message plot_msg = torch.cat( (pred_msg.value, inpt_msg), 2).numpy().transpose( 2, 1, 0) * int(normalizer) if C_channels == 1: plot_msg = np.dstack((plot_msg, plot_msg, plot_msg)) plot_topic.send_message(CvBridge().cv2_to_imgmsg( plot_msg.astype(np.uint8), 'rgb8'))
K.set_learning_phase(1) #set learning phase # Load t+1 model f = open(orig_json_file, 'r') json_string = f.read() f.close() orig_model = model_from_json(json_string, custom_objects={'PredNet': PredNet}) orig_model.load_weights(orig_weights_file) layer_config = orig_model.layers[1].get_config() layer_config['output_mode'] = 'prediction' layer_config['extrap_start_time'] = 5 data_format = layer_config[ 'data_format'] if 'data_format' in layer_config else layer_config[ 'dim_ordering'] prednet = PredNet(weights=orig_model.layers[1].get_weights(), **layer_config) input_shape = list(orig_model.layers[0].batch_input_shape[1:]) input_shape[0] = nt inputs = Input(input_shape) predictions = prednet(inputs) model = Model(inputs=inputs, outputs=predictions) model.compile(loss=extrap_loss, optimizer='adam') train_generator = SequenceGenerator(train_file, train_sources, nt, batch_size=batch_size, shuffle=True, output_mode='prediction')
time_loss_weights = Variable(time_loss_weights.cuda()) DATA_DIR = 'kitti_data_raw' train_file = os.path.join(DATA_DIR, 'X_train.hkl') train_sources = os.path.join(DATA_DIR, 'sources_train.hkl') val_file = os.path.join(DATA_DIR, 'X_val.hkl') val_sources = os.path.join(DATA_DIR, 'sources_val.hkl') kitti_train = KITTI(train_file, train_sources, nt) kitti_val = KITTI(val_file, val_sources, nt) train_loader = DataLoader(kitti_train, batch_size=batch_size, shuffle=True) val_loader = DataLoader(kitti_val, batch_size=batch_size, shuffle=True) model = PredNet(R_channels, A_channels, output_mode='error') if torch.cuda.is_available(): print('Using GPU.') model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=lr) def lr_scheduler(optimizer, epoch): if epoch < num_epochs // 2: return optimizer else: for param_group in optimizer.param_groups: param_group['lr'] = 0.0001 return optimizer
# ------------ # validation the results from n timestep prediction through PredNet(t+1)/PredNet(t+10) Model # ------------- WIDTH = 160 HEIGHT = 128 nt = 20 # change the name of h5 file, we can switch the model seq = load_model('openfoam.h5', custom_objects={'PredNet': PredNet}) # Create testing model (to output predictions) layer_config = seq.layers[1].get_config() layer_config['output_mode'] = 'prediction' data_format = layer_config[ 'data_format'] if 'data_format' in layer_config else layer_config[ 'dim_ordering'] test_prednet = PredNet(weights=seq.layers[1].get_weights(), **layer_config) input_shape = list(seq.layers[0].batch_input_shape[1:]) input_shape[0] = nt inputs = Input(shape=tuple(input_shape)) predictions = test_prednet(inputs) validation_model = Model(inputs=inputs, outputs=predictions) all_images = [] path = 'Sample/' for image_path in os.listdir(path): if image_path.endswith(".jpg"): img = io.imread(path + image_path, as_grey=False) # img = img[54:222,108:320,:] #168,212 img = img[120:445, 216:640, :] # 168,212 img = transform.resize(img, (HEIGHT, WIDTH, 3)) all_images.append(img)
A_filt_sizes = (3, 3, 3) Ahat_filt_sizes = (3, 3, 3, 3) R_filt_sizes = (3, 3, 3, 3) layer_loss_weights = np.array( [1., 0, 0, 0] ) # weighting for each layer in final loss; "L_0" model: [1, 0, 0, 0], "L_all": [1, 0.1, 0.1, 0.1] layer_loss_weights = np.expand_dims(layer_loss_weights, 1) nt = 10 # number of timesteps used for sequences in training time_loss_weights = 1. / (nt - 1) * np.ones( (nt, 1)) # equally weight all timesteps except the first time_loss_weights[0] = 0 prednet = PredNet(stack_sizes, R_stack_sizes, A_filt_sizes, Ahat_filt_sizes, R_filt_sizes, output_mode='error', return_sequences=True) inputs = Input(shape=(nt, ) + input_shape) errors = prednet(inputs) # errors will be (batch_size, nt, nb_layers) errors_by_time = TimeDistributed( Dense(1, trainable=False), weights=[layer_loss_weights, np.zeros(1)], trainable=False)(errors) # calculate weighted error by layer errors_by_time = Flatten()(errors_by_time) # will be (batch_size, nt) final_errors = Dense(1, weights=[time_loss_weights, np.zeros(1)], trainable=False)(errors_by_time) # weight errors by time
def process(cap): scale_height = 128 scale_width = 160 target_fps = 8 n_plot = 40 fps = cap.get(cv2.CAP_PROP_FPS) fps = 25 nt = 10 # number of timesteps used for sequences in training weights_file = os.path.join(WEIGHTS_DIR, 'prednet_ped_train_weights.hdf5') # where weights will be saved json_file = os.path.join(WEIGHTS_DIR, 'prednet_ped_train_model.json') f = open(json_file, 'r') json_string = f.read() f.close() train_model = model_from_json(json_string, custom_objects = {'PredNet': PredNet}) train_model.load_weights(weights_file) # Create testing model (to output predictions) layer_config = train_model.layers[1].get_config() layer_config['output_mode'] = 'prediction' data_format = layer_config['data_format'] if 'data_format' in layer_config else layer_config['dim_ordering'] test_prednet = PredNet(weights=train_model.layers[1].get_weights(), **layer_config) input_shape = list(train_model.layers[0].batch_input_shape[1:]) input_shape[0] = nt inputs = Input(shape=tuple(input_shape)) predictions = test_prednet(inputs) test_model = Model(inputs=inputs, outputs=predictions) print(fps) Xcur = [] mses = [] #plt.ylim(0, 0.01) plt.ion() i = 0 j = 0 while True: ok, frame = cap.read() if not ok: break j += 1 if (j < fps / target_fps): continue j = 0 i += 1 frame = resize_fit(frame, (scale_width, scale_height)) cv2.imshow('frame', frame) Xcur.append(frame) if len(Xcur) >= nt: X_test = np.array([np.array(Xcur).astype(np.float32) / 255]) X_hat = test_model.predict(X_test) mses += list(np.mean(((X_test[:, 1:] - X_hat[:, 1:])**2)[0], axis=(1,2,3))) plt.plot(mses) plt.pause(0.05) #aspect_ratio = float(X_hat.shape[2]) / X_hat.shape[3] #plt.figure(figsize = (nt, 2*aspect_ratio)) #gs = gridspec.GridSpec(2, nt) #gs.update(wspace=0., hspace=0.) #for t in range(nt): # plt.subplot(gs[t]) # plt.imshow(X_test[0,t], interpolation='none') # plt.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off') # if t==0: plt.ylabel('Actual', fontsize=10) # plt.subplot(gs[t + nt]) # plt.imshow(X_hat[0,t], interpolation='none') # plt.tick_params(axis='both', which='both', bottom='off', top='off', left='off', right='off', labelbottom='off', labelleft='off') # if t==0: plt.ylabel('Predicted', fontsize=10) #plt.show() Xcur = [] k = cv2.waitKey(1) & 0xff if k == 32: k = cv2.waitKey() & 0xff if k == 27: break cap.release() cv2.destroyAllWindows() print("Done!") plt.ioff() plt.show()