示例#1
0
文件: pipeline.py 项目: s206283/gcrl
def printConfigOnError(return_code, exp_config, step_name):
    """
    :param return_code: (int)
    :param exp_config: (dict)
    :param step_name: (str)
    """
    if return_code != 0:
        printRed("An error occured, error code: {}".format(return_code))
        pprint(exp_config)
        raise RuntimeError("Error during {} (config file above)".format(step_name))
    print("End of " + step_name)
示例#2
0
文件: pipeline.py 项目: s206283/gcrl
def getBaseExpConfig(args):
    """
    :param args: (parsed args object)
    :return: (str)
    """
    if not os.path.isfile(args.base_config):
        printRed("You must specify a valid --base-config json file")
        sys.exit(1)

    args.data_folder = parseDataFolder(args.data_folder)
    dataset_path = "data/{}".format(args.data_folder)
    assert os.path.isdir(dataset_path), "Path to dataset folder is not valid: {}".format(dataset_path)
    with open(args.base_config, 'r') as f:
        exp_config = json.load(f)
    exp_config['data-folder'] = args.data_folder
    exp_config['relative-pos'] = useRelativePosition(args.data_folder)
    return exp_config
示例#3
0
def findPriorsPairs(batch_size, minibatchlist, actions, rewards, n_actions, n_pairs_per_action):
    """

    :param batch_size: (int)
    :param minibatchlist: ([[int]])
    :param actions: (np.ndarray)
    :param rewards: (np.ndarray)
    :param n_actions: (int)
    :param n_pairs_per_action: ([int])
    :return: ([np.ndarray], [np.ndarray])
    """
    dissimilar_pairs = [
        np.array(
            [[i, j] for i in range(batch_size) for j in findDissimilar(i, minibatch, minibatch, actions, rewards) if
             j > i],
            dtype='int64') for minibatch in minibatchlist]

    # sampling relevant pairs to have at least a pair of dissimilar obs in every minibatches
    dissimilar_pairs, minibatchlist = overSampling(batch_size, minibatchlist, dissimilar_pairs,
                                                   findDissimilar, actions, rewards)
    # same_actions: list of arrays, each containing one pair of observation ids
    same_actions_pairs = [
        np.array([[i, j] for i in range(batch_size) for j in findSameActions(i, minibatch, actions) if j > i],
                 dtype='int64') for minibatch in minibatchlist]

    for pair, minibatch in zip(same_actions_pairs, minibatchlist):
        for i in range(n_actions):
            n_pairs_per_action[i] += np.sum(actions[minibatch[pair[:, 0]]] == i)

    # Stats about pairs
    print("Number of pairs per action:")
    print(n_pairs_per_action)
    print("Pairs of {} unique actions".format(np.sum(n_pairs_per_action > 0)))

    for item in same_actions_pairs + dissimilar_pairs:
        if len(item) == 0:
            msg = "No same actions or dissimilar pairs found for at least one minibatch (currently is {})\n".format(
                batch_size)
            msg += "=> Consider increasing the batch_size or changing the seed"
            printRed(msg)
            sys.exit(NO_PAIRS_ERROR)
    return dissimilar_pairs, same_actions_pairs
示例#4
0
文件: pipeline.py 项目: s206283/gcrl
def stateRepresentationLearningCall(exp_config):
    """
    :param exp_config: (dict)
    :return: (bool) True if no error occured
    """
    printGreen("\nLearning a state representation...")

    args = ['--no-display-plots']

    if exp_config.get('multi-view', False):
        args.extend(['--multi-view'])

    for arg in ['learning-rate', 'l1-reg', 'batch-size',
                'state-dim', 'epochs', 'seed', 'model-type',
                'log-folder', 'data-folder', 'training-set-size']:
        args.extend(['--{}'.format(arg), str(exp_config[arg])])

    ok = subprocess.call(['python', 'train.py'] + args)
    if ok == 0:
        print("End of state representation learning.\n")
        return True
    else:
        printRed("An error occured, error code: {}".format(ok))
        pprint(exp_config)
        if ok == NO_PAIRS_ERROR:
            printRed("No Pairs found, consider increasing the batch_size or using a different seed")
            return False
        elif ok == NAN_ERROR:
            printRed("NaN Loss, consider increasing NOISE_STD in the gaussian noise layer")
            return False
        elif ok != MATPLOTLIB_WARNING_CODE:
            raise RuntimeError("Error during state representation learning (config file above)")
        else:
            return False
示例#5
0
def gradient_descent(Xn,y,theta,alpha,num_iters=1000,tol=None,theta_hist=False):
    """Perform gradient descent optimization to learn theta that creates the best fit
    hypothesis h(theta)=X @ theta to the dataset

    Args:
        Xn:     Normalized Feature Matrix
        y:      Target Vector
        alpha:  (Real, >0) Learning Rate

    Kwargs:
        num_iters:  (Real) Maximum iterations to perform optimization
        tol:        (Real) If provided, superscede num_iters, breaking optimization if tolerance cost is reached
        theta_hist: (Bool) IF provided, also return theta's history
    """
    
    # Check to see if Xn is normalized. Warn if not. 
    if round(Xn[:,1].std()) != 1:
        utils.printYellow("Gradient Descent X matrix is not normalized. Pass in normalized in the future to ensure convergence")
        # Xn,_,_ = normalize_features(Xn)

    m = 1.0*len(y)
    J_history =[]
    theta_history = []
    for idx in range(0,num_iters):
        ## Compute new theta
        theta = theta -  (alpha/m) * ((Xn @ theta - y).T @ Xn).T
        theta_history.append(theta)

        ## Save new J cost
        J_history.append(compute_cost(Xn,y,theta))
        if (idx>1) and (tol is not None) and (J_history[-1]-J_history[-2] <= tol):
            break

        ## Check to make sure J is decreasing...
        if (idx > 1) and J_history[-2] <= J_history[-1]:
            utils.printRed("Gradient Descent is not decreasing! Alpha: {}\t previous J {}\tJ {}. Try decreasing alpha".format(alpha,J_history[-2], J_history[-1]))
    if theta_hist:
        return theta, J_history, np.vstack(theta_history)
    return theta, J_history
示例#6
0
    def learn(self, images_path, actions, rewards, episode_starts):
        """
        Learn a state representation
        :param images_path: (numpy 1D array)
        :param actions: (np.ndarray)
        :param rewards: (numpy 1D array)
        :param episode_starts: (numpy 1D array) boolean array
                                the ith index is True if one episode starts at this frame
        :return: (np.ndarray) the learned states for the given observations
        """

        print("\nYour are using the following weights for the losses:")
        pprint(self.losses_weights_dict)

        # PREPARE DATA -------------------------------------------------------------------------------------------------
        # here, we organize the data into minibatches
        # and find pairs for the respective loss terms (for robotics priors only)

        num_samples = images_path.shape[0] - 1  # number of samples

        # indices for all time steps where the episode continues
        indices = np.array([i for i in range(num_samples) if not episode_starts[i + 1]], dtype='int64')
        np.random.shuffle(indices)

        # split indices into minibatches. minibatchlist is a list of lists; each
        # list is the id of the observation preserved through the training
        minibatchlist = [np.array(sorted(indices[start_idx:start_idx + self.batch_size]))
                         for start_idx in range(0, len(indices) - self.batch_size + 1, self.batch_size)]

        test_minibatchlist = DataLoader.createTestMinibatchList(len(images_path), MAX_BATCH_SIZE_GPU)

        # Number of minibatches used for validation:
        n_val_batches = np.round(VALIDATION_SIZE * len(minibatchlist)).astype(np.int64)
        val_indices = np.random.permutation(len(minibatchlist))[:n_val_batches]
        # Print some info
        print("{} minibatches for training, {} samples".format(len(minibatchlist) - n_val_batches,
                                                               (len(minibatchlist) - n_val_batches) * BATCH_SIZE))
        print("{} minibatches for validation, {} samples".format(n_val_batches, n_val_batches * BATCH_SIZE))
        assert n_val_batches > 0, "Not enough sample to create a validation set"

        # Stats about actions
        if not self.continuous_action:
            print('Discrete action space:')
            action_set = set(actions)
            n_actions = int(np.max(actions) + 1)
            print("{} unique actions / {} actions".format(len(action_set), n_actions))
            n_pairs_per_action = np.zeros(n_actions, dtype=np.int64)
            n_obs_per_action = np.zeros(n_actions, dtype=np.int64)
            for i in range(n_actions):
                n_obs_per_action[i] = np.sum(actions == i)

            print("Number of observations per action")
            print(n_obs_per_action)

        else:
            print('Continuous action space:')
            print('Action dimension: {}'.format(self.dim_action))

        dissimilar_pairs, same_actions_pairs = None, None
        if not self.no_priors:
            if self.continuous_action:
                print('This option (priors) doesnt support continuous action space for now !')

            dissimilar_pairs, same_actions_pairs = findPriorsPairs(self.batch_size, minibatchlist, actions, rewards,
                                                                   n_actions, n_pairs_per_action)

        if self.use_vae and self.perceptual_similarity_loss and self.path_to_dae is not None:

            self.denoiser = SRLModules(state_dim=self.state_dim_dae, action_dim=self.dim_action,
                                       model_type="custom_cnn",
                                       cuda=self.cuda, losses=["dae"])
            self.denoiser.load_state_dict(th.load(self.path_to_dae))
            self.denoiser.eval()
            self.denoiser = self.denoiser.to(self.device)
            for param in self.denoiser.parameters():
                param.requires_grad = False

        if self.episode_prior:
            idx_to_episode = {idx: episode_idx for idx, episode_idx in enumerate(np.cumsum(episode_starts))}
            minibatch_episodes = [[idx_to_episode[i] for i in minibatch] for minibatch in minibatchlist]

        data_loader = DataLoader(minibatchlist, images_path, n_workers=N_WORKERS, multi_view=self.multi_view,
                                 use_triplets=self.use_triplets, is_training=True, apply_occlusion=self.use_dae,
                                 occlusion_percentage=self.occlusion_percentage)
        test_data_loader = DataLoader(test_minibatchlist, images_path, n_workers=N_WORKERS, multi_view=self.multi_view,
                                      use_triplets=self.use_triplets, max_queue_len=1, is_training=False,
                                      apply_occlusion=self.use_dae, occlusion_percentage=self.occlusion_percentage)
        # TRAINING -----------------------------------------------------------------------------------------------------
        loss_history = defaultdict(list)

        loss_manager = LossManager(self.model, loss_history)

        best_error = np.inf
        best_model_path = "{}/srl_model.pth".format(self.log_folder)
        start_time = time.time()

        # Random features, we don't need to train a model
        if len(self.losses) == 1 and self.losses[0] == 'random':
            global N_EPOCHS
            N_EPOCHS = 0
            printYellow("Skipping training because using random features")
            th.save(self.model.state_dict(), best_model_path)

        for epoch in range(N_EPOCHS):
            # In each epoch, we do a full pass over the training data:
            epoch_loss, epoch_batches = 0, 0
            val_loss = 0
            pbar = tqdm(total=len(minibatchlist))

            for minibatch_num, (minibatch_idx, obs, next_obs, noisy_obs, next_noisy_obs) in enumerate(data_loader):

                validation_mode = minibatch_idx in val_indices
                if validation_mode:
                    self.model.eval()
                else:
                    self.model.train()

                if self.use_dae:
                    noisy_obs = noisy_obs.to(self.device)
                    next_noisy_obs = next_noisy_obs.to(self.device)
                obs, next_obs = obs.to(self.device), next_obs.to(self.device)

                self.optimizer.zero_grad()
                loss_manager.resetLosses()

                decoded_obs, decoded_next_obs = None, None
                states_denoiser = None
                states_denoiser_predicted = None
                next_states_denoiser = None
                next_states_denoiser_predicted = None

                # Predict states given observations as in Time Contrastive Network (Triplet Loss) [Sermanet et al.]
                if self.use_triplets:
                    states, positive_states, negative_states = self.model.forwardTriplets(obs[:, :3:, :, :],
                                                                                          obs[:, 3:6, :, :],
                                                                                          obs[:, 6:, :, :])

                    next_states, next_positive_states, next_negative_states = self.model.forwardTriplets(
                        next_obs[:, :3:, :, :],
                        next_obs[:, 3:6, :, :],
                        next_obs[:, 6:, :, :])
                elif self.use_autoencoder:
                    (states, decoded_obs), (next_states, decoded_next_obs) = self.model(obs), self.model(next_obs)

                elif self.use_dae:
                    (states, decoded_obs), (next_states, decoded_next_obs) = \
                        self.model(noisy_obs), self.model(next_noisy_obs)

                elif self.use_vae:
                    (decoded_obs, mu, logvar), (next_decoded_obs, next_mu, next_logvar) = self.model(obs), \
                                                                                          self.model(next_obs)
                    states, next_states = self.model.getStates(obs), self.model.getStates(next_obs)

                    if self.perceptual_similarity_loss:
                        # Predictions for the perceptual similarity loss as in DARLA
                        # https://arxiv.org/pdf/1707.08475.pdf
                        (states_denoiser, decoded_obs_denoiser), (next_states_denoiser, decoded_next_obs_denoiser) = \
                            self.denoiser(obs), self.denoiser(next_obs)

                        (states_denoiser_predicted, decoded_obs_denoiser_predicted) = self.denoiser(decoded_obs)
                        (next_states_denoiser_predicted,
                         decoded_next_obs_denoiser_predicted) = self.denoiser(next_decoded_obs)
                else:
                    states, next_states = self.model(obs), self.model(next_obs)

                # Actions associated to the observations of the current minibatch
                actions_st = actions[minibatchlist[minibatch_idx]]
                if not self.continuous_action:
                    # Discrete actions, rearrange action to have n_minibatch ligns and one column, containing the int action
                    actions_st = th.from_numpy(actions_st).view(-1, 1).requires_grad_(False).to(self.device)
                else:
                    # Continuous actions, rearrange action to have n_minibatch ligns and dim_action columns
                    actions_st = th.from_numpy(actions_st).view(-1, self.dim_action).requires_grad_(False).to(self.device)

                # L1 regularization
                if self.losses_weights_dict['l1_reg'] > 0:
                    l1Loss(loss_manager.reg_params, self.losses_weights_dict['l1_reg'], loss_manager)

                if self.losses_weights_dict['l2_reg'] > 0:
                    l2Loss(loss_manager.reg_params, self.losses_weights_dict['l2_reg'], loss_manager)

                if not self.no_priors:
                    if self.n_actions == np.inf:
                        print('This option (priors) doesnt support continuous action space for now !')

                    roboticPriorsLoss(states, next_states, minibatch_idx=minibatch_idx,
                                      dissimilar_pairs=dissimilar_pairs, same_actions_pairs=same_actions_pairs,
                                      weight=self.losses_weights_dict['priors'], loss_manager=loss_manager)

                # TODO change here to classic call (forward and backward)
                if self.use_forward_loss:
                    next_states_pred = self.model.forwardModel(states, actions_st)
                    forwardModelLoss(next_states_pred, next_states,
                                     weight=self.losses_weights_dict['forward'],
                                     loss_manager=loss_manager)

                if self.use_inverse_loss:
                    actions_pred = self.model.inverseModel(states, next_states)
                    inverseModelLoss(actions_pred, actions_st, weight=self.losses_weights_dict['inverse'],
                                     loss_manager=loss_manager, continuous_action=self.continuous_action)

                if self.use_reward_loss:
                    rewards_st = rewards[minibatchlist[minibatch_idx]].copy()
                    # Removing negative reward
                    rewards_st[rewards_st == -1] = 0
                    rewards_st = th.from_numpy(rewards_st).to(self.device)
                    rewards_pred = self.model.rewardModel(states, next_states)
                    rewardModelLoss(rewards_pred, rewards_st.long(), weight=self.losses_weights_dict['reward'],
                                    loss_manager=loss_manager)

                if self.use_autoencoder or self.use_dae:
                    loss_type = "dae" if self.use_dae else "autoencoder"
                    autoEncoderLoss(obs, decoded_obs, next_obs, decoded_next_obs,
                                    weight=self.losses_weights_dict[loss_type], loss_manager=loss_manager)

                if self.use_vae:

                    kullbackLeiblerLoss(mu, next_mu, logvar, next_logvar, loss_manager=loss_manager, beta=self.beta)

                    if self.perceptual_similarity_loss:
                        perceptualSimilarityLoss(states_denoiser, states_denoiser_predicted, next_states_denoiser,
                                                 next_states_denoiser_predicted,
                                                 weight=self.losses_weights_dict['perceptual'],
                                                 loss_manager=loss_manager)
                    else:
                        generationLoss(decoded_obs, next_decoded_obs, obs, next_obs,
                                       weight=self.losses_weights_dict['vae'], loss_manager=loss_manager)

                if self.reward_prior:
                    rewards_st = rewards[minibatchlist[minibatch_idx]]
                    rewards_st = th.from_numpy(rewards_st).float().view(-1, 1).to(self.device)
                    rewardPriorLoss(states, rewards_st, weight=self.losses_weights_dict['reward-prior'],
                                    loss_manager=loss_manager)

                if self.episode_prior:
                    episodePriorLoss(minibatch_idx, minibatch_episodes, states, self.discriminator,
                                     BALANCED_SAMPLING, weight=self.losses_weights_dict['episode-prior'],
                                     loss_manager=loss_manager)
                if self.use_triplets:
                    tripletLoss(states, positive_states, negative_states, weight=self.losses_weights_dict['triplet'],
                                loss_manager=loss_manager, alpha=0.2)
                # Compute weighted average of losses
                loss_manager.updateLossHistory()
                loss = loss_manager.computeTotalLoss()

                # We have to call backward in both train/val
                # to avoid memory error
                loss.backward()
                if validation_mode:
                    val_loss += loss.item()
                    # We do not optimize on validation data
                    # so optimizer.step() is not called
                else:
                    self.optimizer.step()
                    epoch_loss += loss.item()
                    epoch_batches += 1
                pbar.update(1)
            pbar.close()

            train_loss = epoch_loss / float(epoch_batches)
            val_loss /= float(n_val_batches)
            # Even if loss_history is modified by LossManager
            # we make it explicit
            loss_history = loss_manager.loss_history
            loss_history['train_loss'].append(train_loss)
            loss_history['val_loss'].append(val_loss)
            for key in loss_history.keys():
                if key in ['train_loss', 'val_loss']:
                    continue
                loss_history[key][-1] /= epoch_batches
                if epoch + 1 < N_EPOCHS:
                    loss_history[key].append(0)

            # Save best model
            if val_loss < best_error:
                best_error = val_loss
                th.save(self.model.state_dict(), best_model_path)

            if np.isnan(train_loss):
                printRed("NaN Loss, consider increasing NOISE_STD in the gaussian noise layer")
                sys.exit(NAN_ERROR)

            # Then we print the results for this epoch:
            if (epoch + 1) % EPOCH_FLAG == 0:
                print("Epoch {:3}/{}, train_loss:{:.4f} val_loss:{:.4f}".format(epoch + 1, N_EPOCHS, train_loss,
                                                                                val_loss))
                print("{:.2f}s/epoch".format((time.time() - start_time) / (epoch + 1)))
                if DISPLAY_PLOTS:
                    with th.no_grad():
                        self.model.eval()
                        # Optionally plot the current state space
                        plotRepresentation(self.predStatesWithDataLoader(test_data_loader), rewards,
                                           add_colorbar=epoch == 0,
                                           name="Learned State Representation (Training Data)")

                        if self.use_autoencoder or self.use_vae or self.use_dae:
                            # Plot Reconstructed Image
                            if obs[0].shape[0] == 3:  # RGB
                                plotImage(deNormalize(detachToNumpy(obs[0])), "Input Image (Train)")
                                if self.use_dae:
                                    plotImage(deNormalize(detachToNumpy(noisy_obs[0])), "Noisy Input Image (Train)")
                                if self.perceptual_similarity_loss:
                                    plotImage(deNormalize(detachToNumpy(decoded_obs_denoiser[0])),
                                              "Reconstructed Image DAE")
                                    plotImage(deNormalize(detachToNumpy(decoded_obs_denoiser_predicted[0])),
                                              "Reconstructed Image predicted DAE")
                                plotImage(deNormalize(detachToNumpy(decoded_obs[0])), "Reconstructed Image")

                            elif obs[0].shape[0] % 3 == 0:  # Multi-RGB
                                for k in range(obs[0].shape[0] // 3):
                                    plotImage(deNormalize(detachToNumpy(obs[0][k * 3:(k + 1) * 3, :, :]), "image_net"),
                                              "Input Image {} (Train)".format(k + 1))
                                    if self.use_dae:
                                        plotImage(deNormalize(detachToNumpy(noisy_obs[0][k * 3:(k + 1) * 3, :, :])),
                                                  "Noisy Input Image (Train)".format(k + 1))
                                    if self.perceptual_similarity_loss:
                                        plotImage(deNormalize(
                                            detachToNumpy(decoded_obs_denoiser[0][k * 3:(k + 1) * 3, :, :])),
                                            "Reconstructed Image DAE")
                                        plotImage(deNormalize(
                                            detachToNumpy(decoded_obs_denoiser_predicted[0][k * 3:(k + 1) * 3, :, :])),
                                            "Reconstructed Image predicted DAE")
                                    plotImage(deNormalize(detachToNumpy(decoded_obs[0][k * 3:(k + 1) * 3, :, :])),
                                              "Reconstructed Image {}".format(k + 1))

        if DISPLAY_PLOTS:
            plt.close("Learned State Representation (Training Data)")

        # Load best model before predicting states
        self.model.load_state_dict(th.load(best_model_path))

        print("Predicting states for all the observations...")
        # return predicted states for training observations
        self.model.eval()
        with th.no_grad():
            pred_states = self.predStatesWithDataLoader(test_data_loader)
        pairs_loss_weight = [k for k in zip(loss_manager.names, loss_manager.weights)]
        return loss_history, pred_states, pairs_loss_weight
示例#7
0
import mido
import sys
from utils import printRed, printGreen

printGreen("Initializing")
input_devices = mido.get_input_names()
output_devices = mido.get_output_names()
output_devices = list(filter(lambda x: 'dtx' in x.lower(), output_devices))
if len(output_devices) == 0:
    printRed("Failed to found the dtx module")
    sys.exit(1) 

input_devices = list(filter(lambda x: 'deluge' in x.lower(), input_devices))
if len(input_devices) == 0:
    printRed("Failed to found the deluge module")
    sys.exit(1) 
try:
    with mido.open_output(output_devices[0]) as dtx:
        with mido.open_input(input_devices[0]) as deluge:
            printGreen("Ready to forward! In: {0} out: {1}".format(deluge.name, dtx.name))
            for msg in deluge:
                if "channel" in vars(msg) and msg.channel == 9 and "type" in vars(msg) and "note_" in msg.type:
                    printGreen("Message {} matches".format(msg))
                    dtx.send(msg)
                elif 'type' in vars(msg) and msg.type in ("program_change", "control_change"):
                    printGreen("Sending system command {}".format(msg))
                    dtx.send(msg)
except Exception as e:
    printRed("Exception! {}".format(e))