def printConfigOnError(return_code, exp_config, step_name): """ :param return_code: (int) :param exp_config: (dict) :param step_name: (str) """ if return_code != 0: printRed("An error occured, error code: {}".format(return_code)) pprint(exp_config) raise RuntimeError("Error during {} (config file above)".format(step_name)) print("End of " + step_name)
def getBaseExpConfig(args): """ :param args: (parsed args object) :return: (str) """ if not os.path.isfile(args.base_config): printRed("You must specify a valid --base-config json file") sys.exit(1) args.data_folder = parseDataFolder(args.data_folder) dataset_path = "data/{}".format(args.data_folder) assert os.path.isdir(dataset_path), "Path to dataset folder is not valid: {}".format(dataset_path) with open(args.base_config, 'r') as f: exp_config = json.load(f) exp_config['data-folder'] = args.data_folder exp_config['relative-pos'] = useRelativePosition(args.data_folder) return exp_config
def findPriorsPairs(batch_size, minibatchlist, actions, rewards, n_actions, n_pairs_per_action): """ :param batch_size: (int) :param minibatchlist: ([[int]]) :param actions: (np.ndarray) :param rewards: (np.ndarray) :param n_actions: (int) :param n_pairs_per_action: ([int]) :return: ([np.ndarray], [np.ndarray]) """ dissimilar_pairs = [ np.array( [[i, j] for i in range(batch_size) for j in findDissimilar(i, minibatch, minibatch, actions, rewards) if j > i], dtype='int64') for minibatch in minibatchlist] # sampling relevant pairs to have at least a pair of dissimilar obs in every minibatches dissimilar_pairs, minibatchlist = overSampling(batch_size, minibatchlist, dissimilar_pairs, findDissimilar, actions, rewards) # same_actions: list of arrays, each containing one pair of observation ids same_actions_pairs = [ np.array([[i, j] for i in range(batch_size) for j in findSameActions(i, minibatch, actions) if j > i], dtype='int64') for minibatch in minibatchlist] for pair, minibatch in zip(same_actions_pairs, minibatchlist): for i in range(n_actions): n_pairs_per_action[i] += np.sum(actions[minibatch[pair[:, 0]]] == i) # Stats about pairs print("Number of pairs per action:") print(n_pairs_per_action) print("Pairs of {} unique actions".format(np.sum(n_pairs_per_action > 0))) for item in same_actions_pairs + dissimilar_pairs: if len(item) == 0: msg = "No same actions or dissimilar pairs found for at least one minibatch (currently is {})\n".format( batch_size) msg += "=> Consider increasing the batch_size or changing the seed" printRed(msg) sys.exit(NO_PAIRS_ERROR) return dissimilar_pairs, same_actions_pairs
def stateRepresentationLearningCall(exp_config): """ :param exp_config: (dict) :return: (bool) True if no error occured """ printGreen("\nLearning a state representation...") args = ['--no-display-plots'] if exp_config.get('multi-view', False): args.extend(['--multi-view']) for arg in ['learning-rate', 'l1-reg', 'batch-size', 'state-dim', 'epochs', 'seed', 'model-type', 'log-folder', 'data-folder', 'training-set-size']: args.extend(['--{}'.format(arg), str(exp_config[arg])]) ok = subprocess.call(['python', 'train.py'] + args) if ok == 0: print("End of state representation learning.\n") return True else: printRed("An error occured, error code: {}".format(ok)) pprint(exp_config) if ok == NO_PAIRS_ERROR: printRed("No Pairs found, consider increasing the batch_size or using a different seed") return False elif ok == NAN_ERROR: printRed("NaN Loss, consider increasing NOISE_STD in the gaussian noise layer") return False elif ok != MATPLOTLIB_WARNING_CODE: raise RuntimeError("Error during state representation learning (config file above)") else: return False
def gradient_descent(Xn,y,theta,alpha,num_iters=1000,tol=None,theta_hist=False): """Perform gradient descent optimization to learn theta that creates the best fit hypothesis h(theta)=X @ theta to the dataset Args: Xn: Normalized Feature Matrix y: Target Vector alpha: (Real, >0) Learning Rate Kwargs: num_iters: (Real) Maximum iterations to perform optimization tol: (Real) If provided, superscede num_iters, breaking optimization if tolerance cost is reached theta_hist: (Bool) IF provided, also return theta's history """ # Check to see if Xn is normalized. Warn if not. if round(Xn[:,1].std()) != 1: utils.printYellow("Gradient Descent X matrix is not normalized. Pass in normalized in the future to ensure convergence") # Xn,_,_ = normalize_features(Xn) m = 1.0*len(y) J_history =[] theta_history = [] for idx in range(0,num_iters): ## Compute new theta theta = theta - (alpha/m) * ((Xn @ theta - y).T @ Xn).T theta_history.append(theta) ## Save new J cost J_history.append(compute_cost(Xn,y,theta)) if (idx>1) and (tol is not None) and (J_history[-1]-J_history[-2] <= tol): break ## Check to make sure J is decreasing... if (idx > 1) and J_history[-2] <= J_history[-1]: utils.printRed("Gradient Descent is not decreasing! Alpha: {}\t previous J {}\tJ {}. Try decreasing alpha".format(alpha,J_history[-2], J_history[-1])) if theta_hist: return theta, J_history, np.vstack(theta_history) return theta, J_history
def learn(self, images_path, actions, rewards, episode_starts): """ Learn a state representation :param images_path: (numpy 1D array) :param actions: (np.ndarray) :param rewards: (numpy 1D array) :param episode_starts: (numpy 1D array) boolean array the ith index is True if one episode starts at this frame :return: (np.ndarray) the learned states for the given observations """ print("\nYour are using the following weights for the losses:") pprint(self.losses_weights_dict) # PREPARE DATA ------------------------------------------------------------------------------------------------- # here, we organize the data into minibatches # and find pairs for the respective loss terms (for robotics priors only) num_samples = images_path.shape[0] - 1 # number of samples # indices for all time steps where the episode continues indices = np.array([i for i in range(num_samples) if not episode_starts[i + 1]], dtype='int64') np.random.shuffle(indices) # split indices into minibatches. minibatchlist is a list of lists; each # list is the id of the observation preserved through the training minibatchlist = [np.array(sorted(indices[start_idx:start_idx + self.batch_size])) for start_idx in range(0, len(indices) - self.batch_size + 1, self.batch_size)] test_minibatchlist = DataLoader.createTestMinibatchList(len(images_path), MAX_BATCH_SIZE_GPU) # Number of minibatches used for validation: n_val_batches = np.round(VALIDATION_SIZE * len(minibatchlist)).astype(np.int64) val_indices = np.random.permutation(len(minibatchlist))[:n_val_batches] # Print some info print("{} minibatches for training, {} samples".format(len(minibatchlist) - n_val_batches, (len(minibatchlist) - n_val_batches) * BATCH_SIZE)) print("{} minibatches for validation, {} samples".format(n_val_batches, n_val_batches * BATCH_SIZE)) assert n_val_batches > 0, "Not enough sample to create a validation set" # Stats about actions if not self.continuous_action: print('Discrete action space:') action_set = set(actions) n_actions = int(np.max(actions) + 1) print("{} unique actions / {} actions".format(len(action_set), n_actions)) n_pairs_per_action = np.zeros(n_actions, dtype=np.int64) n_obs_per_action = np.zeros(n_actions, dtype=np.int64) for i in range(n_actions): n_obs_per_action[i] = np.sum(actions == i) print("Number of observations per action") print(n_obs_per_action) else: print('Continuous action space:') print('Action dimension: {}'.format(self.dim_action)) dissimilar_pairs, same_actions_pairs = None, None if not self.no_priors: if self.continuous_action: print('This option (priors) doesnt support continuous action space for now !') dissimilar_pairs, same_actions_pairs = findPriorsPairs(self.batch_size, minibatchlist, actions, rewards, n_actions, n_pairs_per_action) if self.use_vae and self.perceptual_similarity_loss and self.path_to_dae is not None: self.denoiser = SRLModules(state_dim=self.state_dim_dae, action_dim=self.dim_action, model_type="custom_cnn", cuda=self.cuda, losses=["dae"]) self.denoiser.load_state_dict(th.load(self.path_to_dae)) self.denoiser.eval() self.denoiser = self.denoiser.to(self.device) for param in self.denoiser.parameters(): param.requires_grad = False if self.episode_prior: idx_to_episode = {idx: episode_idx for idx, episode_idx in enumerate(np.cumsum(episode_starts))} minibatch_episodes = [[idx_to_episode[i] for i in minibatch] for minibatch in minibatchlist] data_loader = DataLoader(minibatchlist, images_path, n_workers=N_WORKERS, multi_view=self.multi_view, use_triplets=self.use_triplets, is_training=True, apply_occlusion=self.use_dae, occlusion_percentage=self.occlusion_percentage) test_data_loader = DataLoader(test_minibatchlist, images_path, n_workers=N_WORKERS, multi_view=self.multi_view, use_triplets=self.use_triplets, max_queue_len=1, is_training=False, apply_occlusion=self.use_dae, occlusion_percentage=self.occlusion_percentage) # TRAINING ----------------------------------------------------------------------------------------------------- loss_history = defaultdict(list) loss_manager = LossManager(self.model, loss_history) best_error = np.inf best_model_path = "{}/srl_model.pth".format(self.log_folder) start_time = time.time() # Random features, we don't need to train a model if len(self.losses) == 1 and self.losses[0] == 'random': global N_EPOCHS N_EPOCHS = 0 printYellow("Skipping training because using random features") th.save(self.model.state_dict(), best_model_path) for epoch in range(N_EPOCHS): # In each epoch, we do a full pass over the training data: epoch_loss, epoch_batches = 0, 0 val_loss = 0 pbar = tqdm(total=len(minibatchlist)) for minibatch_num, (minibatch_idx, obs, next_obs, noisy_obs, next_noisy_obs) in enumerate(data_loader): validation_mode = minibatch_idx in val_indices if validation_mode: self.model.eval() else: self.model.train() if self.use_dae: noisy_obs = noisy_obs.to(self.device) next_noisy_obs = next_noisy_obs.to(self.device) obs, next_obs = obs.to(self.device), next_obs.to(self.device) self.optimizer.zero_grad() loss_manager.resetLosses() decoded_obs, decoded_next_obs = None, None states_denoiser = None states_denoiser_predicted = None next_states_denoiser = None next_states_denoiser_predicted = None # Predict states given observations as in Time Contrastive Network (Triplet Loss) [Sermanet et al.] if self.use_triplets: states, positive_states, negative_states = self.model.forwardTriplets(obs[:, :3:, :, :], obs[:, 3:6, :, :], obs[:, 6:, :, :]) next_states, next_positive_states, next_negative_states = self.model.forwardTriplets( next_obs[:, :3:, :, :], next_obs[:, 3:6, :, :], next_obs[:, 6:, :, :]) elif self.use_autoencoder: (states, decoded_obs), (next_states, decoded_next_obs) = self.model(obs), self.model(next_obs) elif self.use_dae: (states, decoded_obs), (next_states, decoded_next_obs) = \ self.model(noisy_obs), self.model(next_noisy_obs) elif self.use_vae: (decoded_obs, mu, logvar), (next_decoded_obs, next_mu, next_logvar) = self.model(obs), \ self.model(next_obs) states, next_states = self.model.getStates(obs), self.model.getStates(next_obs) if self.perceptual_similarity_loss: # Predictions for the perceptual similarity loss as in DARLA # https://arxiv.org/pdf/1707.08475.pdf (states_denoiser, decoded_obs_denoiser), (next_states_denoiser, decoded_next_obs_denoiser) = \ self.denoiser(obs), self.denoiser(next_obs) (states_denoiser_predicted, decoded_obs_denoiser_predicted) = self.denoiser(decoded_obs) (next_states_denoiser_predicted, decoded_next_obs_denoiser_predicted) = self.denoiser(next_decoded_obs) else: states, next_states = self.model(obs), self.model(next_obs) # Actions associated to the observations of the current minibatch actions_st = actions[minibatchlist[minibatch_idx]] if not self.continuous_action: # Discrete actions, rearrange action to have n_minibatch ligns and one column, containing the int action actions_st = th.from_numpy(actions_st).view(-1, 1).requires_grad_(False).to(self.device) else: # Continuous actions, rearrange action to have n_minibatch ligns and dim_action columns actions_st = th.from_numpy(actions_st).view(-1, self.dim_action).requires_grad_(False).to(self.device) # L1 regularization if self.losses_weights_dict['l1_reg'] > 0: l1Loss(loss_manager.reg_params, self.losses_weights_dict['l1_reg'], loss_manager) if self.losses_weights_dict['l2_reg'] > 0: l2Loss(loss_manager.reg_params, self.losses_weights_dict['l2_reg'], loss_manager) if not self.no_priors: if self.n_actions == np.inf: print('This option (priors) doesnt support continuous action space for now !') roboticPriorsLoss(states, next_states, minibatch_idx=minibatch_idx, dissimilar_pairs=dissimilar_pairs, same_actions_pairs=same_actions_pairs, weight=self.losses_weights_dict['priors'], loss_manager=loss_manager) # TODO change here to classic call (forward and backward) if self.use_forward_loss: next_states_pred = self.model.forwardModel(states, actions_st) forwardModelLoss(next_states_pred, next_states, weight=self.losses_weights_dict['forward'], loss_manager=loss_manager) if self.use_inverse_loss: actions_pred = self.model.inverseModel(states, next_states) inverseModelLoss(actions_pred, actions_st, weight=self.losses_weights_dict['inverse'], loss_manager=loss_manager, continuous_action=self.continuous_action) if self.use_reward_loss: rewards_st = rewards[minibatchlist[minibatch_idx]].copy() # Removing negative reward rewards_st[rewards_st == -1] = 0 rewards_st = th.from_numpy(rewards_st).to(self.device) rewards_pred = self.model.rewardModel(states, next_states) rewardModelLoss(rewards_pred, rewards_st.long(), weight=self.losses_weights_dict['reward'], loss_manager=loss_manager) if self.use_autoencoder or self.use_dae: loss_type = "dae" if self.use_dae else "autoencoder" autoEncoderLoss(obs, decoded_obs, next_obs, decoded_next_obs, weight=self.losses_weights_dict[loss_type], loss_manager=loss_manager) if self.use_vae: kullbackLeiblerLoss(mu, next_mu, logvar, next_logvar, loss_manager=loss_manager, beta=self.beta) if self.perceptual_similarity_loss: perceptualSimilarityLoss(states_denoiser, states_denoiser_predicted, next_states_denoiser, next_states_denoiser_predicted, weight=self.losses_weights_dict['perceptual'], loss_manager=loss_manager) else: generationLoss(decoded_obs, next_decoded_obs, obs, next_obs, weight=self.losses_weights_dict['vae'], loss_manager=loss_manager) if self.reward_prior: rewards_st = rewards[minibatchlist[minibatch_idx]] rewards_st = th.from_numpy(rewards_st).float().view(-1, 1).to(self.device) rewardPriorLoss(states, rewards_st, weight=self.losses_weights_dict['reward-prior'], loss_manager=loss_manager) if self.episode_prior: episodePriorLoss(minibatch_idx, minibatch_episodes, states, self.discriminator, BALANCED_SAMPLING, weight=self.losses_weights_dict['episode-prior'], loss_manager=loss_manager) if self.use_triplets: tripletLoss(states, positive_states, negative_states, weight=self.losses_weights_dict['triplet'], loss_manager=loss_manager, alpha=0.2) # Compute weighted average of losses loss_manager.updateLossHistory() loss = loss_manager.computeTotalLoss() # We have to call backward in both train/val # to avoid memory error loss.backward() if validation_mode: val_loss += loss.item() # We do not optimize on validation data # so optimizer.step() is not called else: self.optimizer.step() epoch_loss += loss.item() epoch_batches += 1 pbar.update(1) pbar.close() train_loss = epoch_loss / float(epoch_batches) val_loss /= float(n_val_batches) # Even if loss_history is modified by LossManager # we make it explicit loss_history = loss_manager.loss_history loss_history['train_loss'].append(train_loss) loss_history['val_loss'].append(val_loss) for key in loss_history.keys(): if key in ['train_loss', 'val_loss']: continue loss_history[key][-1] /= epoch_batches if epoch + 1 < N_EPOCHS: loss_history[key].append(0) # Save best model if val_loss < best_error: best_error = val_loss th.save(self.model.state_dict(), best_model_path) if np.isnan(train_loss): printRed("NaN Loss, consider increasing NOISE_STD in the gaussian noise layer") sys.exit(NAN_ERROR) # Then we print the results for this epoch: if (epoch + 1) % EPOCH_FLAG == 0: print("Epoch {:3}/{}, train_loss:{:.4f} val_loss:{:.4f}".format(epoch + 1, N_EPOCHS, train_loss, val_loss)) print("{:.2f}s/epoch".format((time.time() - start_time) / (epoch + 1))) if DISPLAY_PLOTS: with th.no_grad(): self.model.eval() # Optionally plot the current state space plotRepresentation(self.predStatesWithDataLoader(test_data_loader), rewards, add_colorbar=epoch == 0, name="Learned State Representation (Training Data)") if self.use_autoencoder or self.use_vae or self.use_dae: # Plot Reconstructed Image if obs[0].shape[0] == 3: # RGB plotImage(deNormalize(detachToNumpy(obs[0])), "Input Image (Train)") if self.use_dae: plotImage(deNormalize(detachToNumpy(noisy_obs[0])), "Noisy Input Image (Train)") if self.perceptual_similarity_loss: plotImage(deNormalize(detachToNumpy(decoded_obs_denoiser[0])), "Reconstructed Image DAE") plotImage(deNormalize(detachToNumpy(decoded_obs_denoiser_predicted[0])), "Reconstructed Image predicted DAE") plotImage(deNormalize(detachToNumpy(decoded_obs[0])), "Reconstructed Image") elif obs[0].shape[0] % 3 == 0: # Multi-RGB for k in range(obs[0].shape[0] // 3): plotImage(deNormalize(detachToNumpy(obs[0][k * 3:(k + 1) * 3, :, :]), "image_net"), "Input Image {} (Train)".format(k + 1)) if self.use_dae: plotImage(deNormalize(detachToNumpy(noisy_obs[0][k * 3:(k + 1) * 3, :, :])), "Noisy Input Image (Train)".format(k + 1)) if self.perceptual_similarity_loss: plotImage(deNormalize( detachToNumpy(decoded_obs_denoiser[0][k * 3:(k + 1) * 3, :, :])), "Reconstructed Image DAE") plotImage(deNormalize( detachToNumpy(decoded_obs_denoiser_predicted[0][k * 3:(k + 1) * 3, :, :])), "Reconstructed Image predicted DAE") plotImage(deNormalize(detachToNumpy(decoded_obs[0][k * 3:(k + 1) * 3, :, :])), "Reconstructed Image {}".format(k + 1)) if DISPLAY_PLOTS: plt.close("Learned State Representation (Training Data)") # Load best model before predicting states self.model.load_state_dict(th.load(best_model_path)) print("Predicting states for all the observations...") # return predicted states for training observations self.model.eval() with th.no_grad(): pred_states = self.predStatesWithDataLoader(test_data_loader) pairs_loss_weight = [k for k in zip(loss_manager.names, loss_manager.weights)] return loss_history, pred_states, pairs_loss_weight
import mido import sys from utils import printRed, printGreen printGreen("Initializing") input_devices = mido.get_input_names() output_devices = mido.get_output_names() output_devices = list(filter(lambda x: 'dtx' in x.lower(), output_devices)) if len(output_devices) == 0: printRed("Failed to found the dtx module") sys.exit(1) input_devices = list(filter(lambda x: 'deluge' in x.lower(), input_devices)) if len(input_devices) == 0: printRed("Failed to found the deluge module") sys.exit(1) try: with mido.open_output(output_devices[0]) as dtx: with mido.open_input(input_devices[0]) as deluge: printGreen("Ready to forward! In: {0} out: {1}".format(deluge.name, dtx.name)) for msg in deluge: if "channel" in vars(msg) and msg.channel == 9 and "type" in vars(msg) and "note_" in msg.type: printGreen("Message {} matches".format(msg)) dtx.send(msg) elif 'type' in vars(msg) and msg.type in ("program_change", "control_change"): printGreen("Sending system command {}".format(msg)) dtx.send(msg) except Exception as e: printRed("Exception! {}".format(e))