def get_map(self, temperature=True):
        '''
		Returns the maximum of posterior assuming normality of the posterior
		'''
        if (any(self.results[self.idx_weights] < 0)
                or (all(self.results[self.idx_weights] == 0.))):
            if any(self.results[self.idx_weights] > 0):
                raise ValueError
            sample_weights = useful_functions.to_normalized_weights(
                self.results[self.idx_weights])
        else:
            sample_weights = self.results[self.idx_weights]
        self.map = np.sum(self.results[self.idx_samples].T * sample_weights,
                          axis=1)
        self.got_map = True
        if temperature:
            if self.complete:
                self.map[1] = np.sum(1. /
                                     self.results[self.idx_samples][:, 1].T *
                                     sample_weights)
            else:
                self.map[2] = np.sum(1. /
                                     self.results[self.idx_samples][:, 2].T *
                                     sample_weights)
        else:
            if self.complete:
                self.map[1] = np.sum(
                    (10**self.results[self.idx_samples][:, 1].T) *
                    sample_weights)
            else:
                self.map[2] = np.sum(
                    (10**self.results[self.idx_samples][:, 2].T) *
                    sample_weights)
        if temperature and self.traj_param['beta_softmax'] == 3:
            self.map[np.where([
                self.param_names[k] == 'beta_softmax'
                for k in range(len(self.param_names))
            ])[0][0]] = 1000.
        print 'found map {0}'.format(self.map)
Exemplo n.º 2
0
def SMC2(td,
         beta_softmax=1.,
         numberOfStateSamples=200,
         numberOfThetaSamples=200,
         numberOfBetaSamples=50,
         coefficient=.5,
         latin_hyp_sampling=True):

    print('\n')
    print('Forward Varying Volatility Model')
    print('number of theta samples ' + str(numberOfThetaSamples))
    print('\n')

    start_time_multi = time.time()

    # uniform distribution
    if latin_hyp_sampling:
        d0 = uniform()
        print('latin hypercube sampling')
    else:
        print('sobolev sampling')

    # Extract parameters from task description
    stimuli = td['S']  # Sequence of Stimuli
    numberOfActions = td['action_num']  # Number of Actions possible
    numberOfStimuli = td['state_num']  # Number of states or stimuli
    rewards = td['reward']
    actions = td['A_chosen']
    K = np.prod(
        np.arange(numberOfActions +
                  1)[-numberOfStimuli:])  # Number of possible Task Sets
    numberOfTrials = len(stimuli)  # Number of Trials

    # verification
    if K == 2:
        if latin_hyp_sampling == False:
            raise ValueError(
                'Why did you change the latin_hyp_sampling? By default, it is True and has no influence when K=2.'
            )

    # Sampling and prior settings
    betaPrior = np.array([1, 1])  # Prior on Beta, the feedback noise parameter
    nuPrior = np.array([
        3, 1e-3
    ])  # Prior on Nu, the variance on the projected gaussian random walk
    gammaPrior = numpy.ones(K)  # Prior on Gamma, the Dirichlet parameter
    try:
        tauDefault = td['tau'][0]
    except:
        tauDefault = td['tau']
    log_proba_ = 0.

    # Mapping from task set to correct action per stimulus
    mapping = get_mapping.Get_TaskSet_Stimulus_Mapping(
        state_num=numberOfStimuli, action_num=numberOfActions).T

    betaWeights = np.zeros(numberOfBetaSamples)
    betaAncestors = np.arange(numberOfBetaSamples)

    # Probabilities of every actions updated at every time step -> Used to take the decision
    actionLikelihood = np.zeros([numberOfBetaSamples, numberOfActions])
    sum_actionLik = np.zeros(numberOfBetaSamples)
    filt_actionLkd = np.zeros(
        [numberOfTrials, numberOfBetaSamples, numberOfActions])

    # Keep track of probability correct/exploration after switches
    tsProbability = np.zeros([numberOfBetaSamples, K])
    sum_tsProbability = np.zeros(numberOfBetaSamples)

    # SMC particles initialisation
    muSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    nuSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    gammaSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K])

    if K == 24:
        try:
            latin_hyp_samples = pickle.load(
                open('../../utils/sobol_200_26.pkl', 'rb'))
        except:
            latin_hyp_samples = pickle.load(
                open('../../models/utils/sobol_200_26.pkl', 'rb'))
        for beta_idx in range(numberOfBetaSamples):
            if latin_hyp_sampling:
                latin_hyp_samples = mcerp.lhd(dist=d0,
                                              size=numberOfThetaSamples,
                                              dims=K + 2)
            muSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 0],
                                              betaPrior[0], betaPrior[1])
            nuSamples[beta_idx] = useful_functions.ppf_inv_gamma(
                latin_hyp_samples[:, 1], nuPrior[0], nuPrior[1])
            gammaSamples[beta_idx] = gammalib.ppf(latin_hyp_samples[:, 2:],
                                                  gammaPrior)
            gammaSamples[beta_idx] = np.transpose(
                gammaSamples[beta_idx].T /
                np.sum(gammaSamples[beta_idx], axis=1))
    elif K == 2:
        muSamples = np.random.beta(betaPrior[0], betaPrior[1],
                                   [numberOfBetaSamples, numberOfThetaSamples])
        nuSamples = useful_functions.sample_inv_gamma(
            nuPrior[0], nuPrior[1],
            [numberOfBetaSamples, numberOfThetaSamples])
        gammaSamples = np.random.dirichlet(
            gammaPrior, [numberOfBetaSamples, numberOfThetaSamples])
    else:
        raise IndexError('Wrong number of task sets')

    muSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    nuSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    gammaSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K])
    logThetaWeightsNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    normalisedThetaWeights = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples])

    logThetaWeights = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    currentStateSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.intc)
    currentTauSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.double)
    ancestorStateSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.intc)
    ancestorTauSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.double)
    ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples
                                ]) / numberOfStateSamples
    essList = np.zeros(numberOfTrials)

    # Guided SMC variables
    dirichletParamCandidates = np.zeros(K)

    # Loop over trials
    for T in range(numberOfTrials):

        # Print progress
        if (T + 1) % 10 == 0:
            sys.stdout.write(' ' + str(T + 1))
            sys.stdout.flush()
        if (T + 1) % 100 == 0: print('\n')

        for beta_idx in range(numberOfBetaSamples):

            ances = betaAncestors[beta_idx]
            # Update theta weights
            smc_c.bootstrapUpdateStep_c(currentStateSamples[beta_idx], logThetaWeights[beta_idx], currentTauSamples[beta_idx], gammaSamples[ances], muSamples[ances]/2. + 1./2, nuSamples[ances], tauDefault, T, \
                                            np.ascontiguousarray(ancestorStateSamples[ances], dtype=np.intc), ancestorTauSamples[ances], ancestorsWeights, np.ascontiguousarray(mapping), stimuli[T-1], actions[T-1], rewards[T-1])

            # Degeneray criterion
            logEss = 2 * useful_functions.log_sum(
                logThetaWeights[beta_idx]) - useful_functions.log_sum(
                    2 * logThetaWeights[beta_idx])
            essList[T] = np.exp(logEss)

            # Move step
            normalisedThetaWeights[
                beta_idx] = useful_functions.to_normalized_weights(
                    logThetaWeights[beta_idx])
            if (essList[T] < coefficient * numberOfThetaSamples):
                betaMu = np.sum(normalisedThetaWeights[beta_idx] *
                                muSamples[ances])
                betaVar = np.sum(normalisedThetaWeights[beta_idx] *
                                 (muSamples[ances] - betaMu)**2)
                betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2
                betaBeta = betaAlpha * (1 / betaMu - 1)
                assert (betaAlpha > 0)
                assert (betaBeta > 0)
                nuMu = np.sum(normalisedThetaWeights[beta_idx] *
                              nuSamples[ances])
                nuVar = np.sum(normalisedThetaWeights[beta_idx] *
                               (nuSamples[ances] - nuMu)**2)
                nuAlpha = nuMu**2 / nuVar + 2
                nuBeta = nuMu * (nuAlpha - 1)
                assert (nuAlpha > 0)
                assert (nuBeta > 0)
                dirichletMeans = np.sum(normalisedThetaWeights[beta_idx] *
                                        gammaSamples[ances].T,
                                        axis=1)
                dirichletVar = np.sum(normalisedThetaWeights[beta_idx] *
                                      (gammaSamples[ances]**2).T,
                                      axis=1) - dirichletMeans**2
                dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2
                                            ) / (np.sum(dirichletVar)) - 1
                dirichletParamCandidates[:] = np.maximum(
                    dirichletMeans * dirichletPrecision, 1.)
                assert ((dirichletParamCandidates > 0).all())
                if K == 2:
                    nuSamplesNew[beta_idx] = useful_functions.sample_inv_gamma(
                        nuAlpha, nuBeta, numberOfThetaSamples)
                    muSamplesNew[beta_idx] = np.random.beta(
                        betaAlpha, betaBeta, numberOfThetaSamples)
                    gammaSamplesNew[beta_idx] = np.random.dirichlet(
                        dirichletParamCandidates, numberOfThetaSamples)
                elif K == 24:
                    if latin_hyp_sampling:
                        latin_hyp_samples = mcerp.lhd(
                            dist=d0, size=numberOfThetaSamples, dims=K + 2)
                    muSamplesNew[beta_idx] = betalib.ppf(
                        latin_hyp_samples[:, 0], betaAlpha, betaBeta)
                    nuSamplesNew[beta_idx] = useful_functions.ppf_inv_gamma(
                        latin_hyp_samples[:, 1], nuAlpha, nuBeta)
                    gammaSamplesNew[beta_idx] = gammalib.ppf(
                        latin_hyp_samples[:, 2:], dirichletParamCandidates)
                    gammaSamplesNew[beta_idx] = np.transpose(
                        gammaSamplesNew[beta_idx].T /
                        np.sum(gammaSamplesNew[beta_idx], axis=1))

                logThetaWeightsNew[beta_idx] = 0.
                normalisedThetaWeights[beta_idx] = 1. / numberOfThetaSamples

            else:
                muSamplesNew[beta_idx] = muSamples[ances]
                gammaSamplesNew[beta_idx] = gammaSamples[ances]
                nuSamplesNew[beta_idx] = nuSamples[ances]
                logThetaWeightsNew[beta_idx] = logThetaWeights[beta_idx]

        # task set probability
        sum_tsProbability[:] = 0.
        for ts_idx in range(K):
            tsProbability[:, ts_idx] = np.sum(normalisedThetaWeights * np.sum(
                (currentStateSamples == ts_idx), axis=2),
                                              axis=1)
            sum_tsProbability += tsProbability[:, ts_idx]

        tsProbability[:] = np.transpose(tsProbability.T / sum_tsProbability)

        # Compute action likelihood
        sum_actionLik[:] = 0.
        for action_idx in range(numberOfActions):
            actionLikelihood[:, action_idx] = np.exp(
                np.log(
                    np.sum(tsProbability[:, mapping[stimuli[T].astype(int)] ==
                                         action_idx],
                           axis=1)) * beta_softmax)
            sum_actionLik += actionLikelihood[:, action_idx]

        rewards[T] = td['reward'][T]
        actions[T] = td['A_chosen'][T]

        actionLikelihood[:] = np.transpose(actionLikelihood.T / sum_actionLik)
        betaWeights[:] = actionLikelihood[:, actions[T].astype(int)]

        filt_actionLkd[T] = actionLikelihood

        log_proba_ += np.log(sum(betaWeights) / numberOfBetaSamples)
        betaWeights = betaWeights / sum(betaWeights)

        betaAncestors[:] = useful_functions.stratified_resampling(betaWeights)

        # update particles
        muSamples[:] = muSamplesNew
        gammaSamples[:] = gammaSamplesNew
        nuSamples[:] = nuSamplesNew
        logThetaWeights[:] = logThetaWeightsNew[betaAncestors]
        ancestorTauSamples[:] = currentTauSamples
        ancestorStateSamples[:] = currentStateSamples

    elapsed_time = time.time() - start_time_multi

    return log_proba_, filt_actionLkd
def SMC2(td,
         show_progress=True,
         lambdaa=.9,
         eta=0.,
         inertie_noise=0.,
         numberOfStateSamples=2000,
         numberOfThetaSamples=1000,
         coefficient=.5,
         beta_softmax=None,
         espilon_softmax=0.):

    print(
        'precision model with lambda = {0} and eta = {1}, epsilon= {4}, inertie_noise={5}. Number of state samples : {2} and number of theta samples : {3}'
        .format(lambdaa, eta, numberOfStateSamples, numberOfThetaSamples,
                espilon_softmax, inertie_noise))

    #Start timer
    start_time_multi = time.time()

    # Extract parameters from task description
    stimuli = np.ascontiguousarray(td['S'],
                                   dtype=np.intc)  # Sequence of Stimuli
    Z = td['Z']  # Sequence of Task Sets
    numberOfActions = td['action_num']  # Number of Actions possible
    numberOfStimuli = td['state_num']  # Number of states or stimuli
    K = np.prod(
        np.arange(numberOfActions +
                  1)[-numberOfStimuli:])  # Number of possible Task Sets
    numberOfTrials = len(Z)  # Number of Trials

    # Sampling and prior settings
    betaPrior = np.array([1, 1])  # Prior on Beta, the feedback noise parameter
    dirichletPrior = np.ones(K)

    # Mapping from task set to correct action per stimulus
    mapping = np.ascontiguousarray(get_mapping.Get_TaskSet_Stimulus_Mapping(
        state_num=numberOfStimuli, action_num=numberOfActions).T,
                                   dtype=np.intc)
    Z_true = Z

    # Probabilities of every actions updated at every time step -> Used to take the decision
    actionLikelihood = np.zeros(
        numberOfActions
    )  # For 1 observation, likelihood of the action. Requires a marginalisation over all task sets
    actions = np.ascontiguousarray(np.zeros(numberOfTrials) - 1, dtype=np.intc)
    rewards = np.ascontiguousarray(np.zeros(numberOfTrials), dtype=np.intc)

    # Keep track of probability correct/exploration after switches
    countPerformance = np.zeros(
        numberOfTrials)  # Number of correct actions after i trials
    countExploration = np.zeros(
        numberOfTrials)  # Number of exploratory actions after i trials
    correct_before_switch = np.empty(0)  # The correct task set before switch
    tsProbability = np.zeros([numberOfTrials, K])
    acceptanceProba = 0.
    betaTracking = np.zeros(numberOfTrials)
    betaStdTracking = np.zeros(numberOfTrials)
    temperatureTracking = np.zeros(numberOfTrials)
    temperatureStdTracking = np.zeros(numberOfTrials)
    acceptance_list = [1.]
    transitionProba = np.zeros([numberOfThetaSamples, K, K])

    # SMC particles initialisation
    betaSamples = np.random.beta(betaPrior[0], betaPrior[1],
                                 numberOfThetaSamples)
    gammaSamples = np.random.dirichlet(dirichletPrior, numberOfThetaSamples)
    logThetaWeights = np.zeros(numberOfThetaSamples)
    logThetaLks = np.zeros(numberOfThetaSamples)
    currentTaskSetSamples = np.zeros(
        [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc)
    ancestorTaskSetSamples = np.zeros(
        [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc)
    weightsList = np.zeros([numberOfThetaSamples, numberOfStateSamples])
    essList = np.zeros(numberOfTrials)
    tasksetLikelihood = np.zeros(K)
    currentTemperatures = np.zeros(numberOfTrials)
    entropies = np.zeros(numberOfTrials)
    temperature = 0.5

    # variables for speed-up

    ante_proba_local = np.zeros(K)
    post_proba_local = np.zeros(K)
    sum_weightsList = np.zeros(numberOfThetaSamples)
    ancestorsIndexes = np.zeros(numberOfStateSamples, dtype=np.intc)
    gammaAdaptedProba = np.zeros(K)
    likelihoods = np.zeros(K)
    positiveStates = np.zeros(K, dtype=np.intc)
    distances = np.zeros([numberOfThetaSamples, 1])
    currentNoises = np.zeros([numberOfThetaSamples, numberOfStateSamples])
    noise_amount = np.zeros(numberOfTrials)

    # Plot progress
    if show_progress:
        plt.figure(figsize=(12, 9))
        plt.ion()

    # Loop over trials
    for T in range(numberOfTrials):

        # Print progress
        if (T + 1) % 10 == 0:
            sys.stdout.write(' ' + str(T + 1))
            sys.stdout.flush()
        if (T + 1) % 100 == 0: print('\n')

        noise_amount[T] = smc_c.bootstrap_smc_step_c(logThetaWeights, distances, betaSamples/2. + 1/2., lambdaa, eta, inertie_noise, gammaSamples, currentTaskSetSamples, ancestorTaskSetSamples, weightsList, \
                    mapping, stimuli[T-1], rewards[T-1], actions[T-1], T, likelihoods, positiveStates, ante_proba_local,\
                                            post_proba_local, ancestorsIndexes, gammaAdaptedProba, sum_weightsList, currentNoises, float(temperature))

        if temperature is None:
            assert (False)

        entropies[T] = entropy(
            np.asarray([np.sum(currentTaskSetSamples == i)
                        for i in range(K)]) * 1. /
            (numberOfThetaSamples * numberOfStateSamples))
        ancestorTaskSetSamples[:] = currentTaskSetSamples

        # Degeneray criterion
        logEss = 2 * useful_functions.log_sum(
            logThetaWeights) - useful_functions.log_sum(2 * logThetaWeights)
        essList[T] = np.exp(logEss)

        # Move step
        normalisedThetaWeights = useful_functions.to_normalized_weights(
            logThetaWeights)
        if essList[T] < coefficient * numberOfThetaSamples and acceptance_list[
                -1] > 0.05:
            acceptanceProba = 0.
            betaMu = np.sum(normalisedThetaWeights * betaSamples)
            betaVar = np.sum(normalisedThetaWeights *
                             (betaSamples - betaMu)**2)
            betaAlpha = np.maximum(
                ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2, 1)
            betaBeta = np.maximum(betaAlpha * (1 / betaMu - 1), 1.)
            assert (betaAlpha > 0)
            assert (betaBeta > 0)
            dirichletMeans = np.sum(normalisedThetaWeights * gammaSamples.T,
                                    axis=1)
            dirichletVar = np.sum(normalisedThetaWeights * (gammaSamples**2).T,
                                  axis=1) - dirichletMeans**2
            dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2) / (
                np.sum(dirichletVar)) - 1
            dirichletParamCandidates = dirichletMeans * dirichletPrecision
            dirichletParamCandidates = np.maximum(dirichletParamCandidates, 1.)
            assert ((dirichletParamCandidates > 0).all())

            logThetaWeights[:] = 0
            betaSamples = np.random.beta(betaAlpha, betaBeta,
                                         numberOfThetaSamples)
            gammaSamples = np.random.dirichlet(dirichletParamCandidates,
                                               numberOfThetaSamples)
            normalisedThetaWeights = useful_functions.to_normalized_weights(
                logThetaWeights)

        # Take decision
        for ts_idx in range(K):
            tsProbability[T, ts_idx] = np.sum(normalisedThetaWeights * np.sum(
                (currentTaskSetSamples == ts_idx), axis=1))

        if beta_softmax is None:
            # Compute action likelihood
            for action_idx in range(numberOfActions):
                actionLikelihood[action_idx] = np.sum(
                    tsProbability[T, mapping[stimuli[T]] == action_idx])

            # Select action
            actions[T] = np.argmax(actionLikelihood)

        else:
            # Compute action likelihood
            tsProbability[T] /= sum(tsProbability[T])

            for action_idx in range(numberOfActions):
                actionLikelihood[action_idx] = np.exp(
                    np.log(
                        np.sum(tsProbability[
                            T, mapping[stimuli[T].astype(int)] == action_idx]))
                    * beta_softmax)

            actionLikelihood /= sum(actionLikelihood)

            actionLikelihood = actionLikelihood * (
                1 - espilon_softmax) + espilon_softmax / K
            # Select action
            actions[T] = np.where(
                np.random.multinomial(1, actionLikelihood, size=1)[0])[0][0]

        betaTracking[T] = np.sum(normalisedThetaWeights * betaSamples)
        betaStdTracking[T] = np.sum(normalisedThetaWeights *
                                    (betaSamples - betaTracking[T])**2)
        temperatureTracking[T] = np.mean(currentNoises)
        temperatureStdTracking[T] = np.std(currentNoises)

        if K == 2:
            assert (mapping[stimuli[T].astype(int),
                            Z_true[T].astype(int)] == Z_true[T])
        if (K == 2) and (actions[T] == mapping[stimuli[T].astype(int),
                                               Z_true[T].astype(int)]):
            rewards[T] = not td['trap'][T]
            countPerformance[T:] += 1
        elif (K == 24) and (actions[T] == td['A_correct'][T]):
            rewards[T] = not td['trap'][T]
            countPerformance[T:] += 1
        else:
            rewards[T] = td['trap'][T]

        if show_progress:
            plt.subplot(3, 2, 1)
            plt.imshow(tsProbability[:T].T, aspect='auto')
            plt.hold(True)
            plt.plot(Z_true[:T], 'w--')
            plt.axis([0, T - 1, 0, K - 1])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('p(TS|past) at current time')

            plt.subplot(3, 2, 2)
            plt.plot(temperatureTracking[:T])
            plt.fill_between(
                np.arange(T),
                temperatureTracking[:T] - temperatureStdTracking[:T],
                temperatureTracking[:T] + temperatureStdTracking[:T],
                facecolor=[.5, .5, 1],
                color=[.5, .5, 1])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('Temperature')

            plt.subplot(3, 2, 3)
            x = np.linspace(0.01, .99, 100)
            plt.plot(x, normlib.pdf(x, betaTracking[T], betaStdTracking[T]),
                     'r')
            plt.hold(True)
            plt.plot([betaTracking[T], betaTracking[T]],
                     plt.gca().get_ylim(),
                     'r',
                     linewidth=2)
            plt.plot([td['beta'], td['beta']],
                     plt.gca().get_ylim(),
                     'r--',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('Parameters')
            plt.ylabel('Gaussian pdf')

            plt.subplot(3, 2, 4)
            plt.plot(np.arange(T) + 1, essList[:T], 'g', linewidth=2)
            plt.hold(True)
            plt.plot(plt.gca().get_xlim(), [
                coefficient * numberOfThetaSamples,
                coefficient * numberOfThetaSamples
            ],
                     'g--',
                     linewidth=2)
            plt.axis([0, T - 1, 0, numberOfThetaSamples])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('ESS')

            plt.subplot(3, 2, 5)
            plt.plot(np.divide(countPerformance[:T],
                               np.arange(T) + 1),
                     'k--',
                     linewidth=2)
            plt.hold(True)
            plt.axis([0, T - 1, 0, 1])
            plt.hold(False)
            plt.xlabel('Trials')
            plt.ylabel('Performance')

            plt.draw()
            plt.show()
            plt.pause(0.1)

    elapsed_time = time.time() - start_time_multi

    return [
        td, noise_amount, lambdaa, eta, betaSamples, betaTracking,
        betaStdTracking, currentTemperatures, temperatureTracking,
        temperatureStdTracking, gammaSamples, tsProbability, countPerformance,
        actions, acceptance_list, elapsed_time
    ]
def SMC2(td,
         show_progress=False,
         numberOfStateSamples=1000,
         numberOfThetaSamples=1000,
         coefficient=.5):

    print('\n')
    print('Constant Volatility Model')
    print('\n')

    #Start timer
    start_time_multi = time.time()

    # Extract parameters from task description
    stimuli = td['S']  # Sequence of Stimuli
    Z_true = td['Z']  # Sequence of Task Sets
    numberOfActions = td['action_num']  # Number of Actions possible
    numberOfStimuli = td['state_num']  # Number of states or stimuli
    K = np.prod(
        np.arange(numberOfActions +
                  1)[-numberOfStimuli:])  # Number of possible Task Sets
    numberOfTrials = len(Z_true)  # Number of Trials

    # Sampling and prior settings
    betaPrior = np.array([1, 1])  # Prior on Beta, the feedback noise parameter
    tauPrior = np.array(
        [1, 1])  # Prior on Tau, the switch parameter (the volatility)
    gammaPrior = numpy.ones(K)  # Prior on Gamma, the Dirichlet parameter

    # Mapping from task set to correct action per stimulus
    mapping = get_mapping.Get_TaskSet_Stimulus_Mapping(
        state_num=numberOfStimuli, action_num=numberOfActions).T

    actions = np.zeros(numberOfTrials) - 1
    rewards = np.zeros(numberOfTrials, dtype=bool)

    # Keep track of probability correct/exploration after switches
    countPerformance = np.zeros(
        numberOfTrials)  # Number of correct actions after i trials
    countExploration = np.zeros(
        numberOfTrials)  # Number of exploratory actions after i trials
    correct_before_switch = np.empty(0)  # The correct task set before switch
    tsProbability = np.zeros([numberOfTrials, K])
    acceptanceProba = 0.
    volTracking = np.zeros(numberOfTrials)
    volStdTracking = np.zeros(numberOfTrials)
    betaTracking = np.zeros(numberOfTrials)
    betaStdTracking = np.zeros(numberOfTrials)
    acceptance_list = [1.]
    time_list = [start_time_multi]

    # SMC particles initialisation
    betaSamples = np.random.beta(betaPrior[0], betaPrior[1],
                                 numberOfThetaSamples)
    tauSamples = np.random.beta(tauPrior[0], tauPrior[1], numberOfThetaSamples)
    gammaSamples = np.random.dirichlet(gammaPrior, numberOfThetaSamples)
    logThetaWeights = np.zeros(numberOfThetaSamples)
    logThetaLks = np.zeros(numberOfThetaSamples)
    currentSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples],
                              dtype=np.intc)
    ancestorSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples],
                               dtype=np.intc)
    ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples
                                ]) / numberOfStateSamples
    unnormalisedAncestorsWeights = np.ones(
        [numberOfThetaSamples, numberOfStateSamples])
    essList = np.zeros(numberOfTrials)
    tasksetLikelihood = np.zeros(K)

    # Guided SMC variables
    betaSamplesNew = np.zeros(numberOfThetaSamples)
    tauSamplesNew = np.zeros(numberOfThetaSamples)
    gammaSamplesNew = np.zeros([numberOfThetaSamples, K])
    stateSamplesNew = np.zeros([numberOfThetaSamples, numberOfStateSamples],
                               dtype=np.intc)
    weightsSamplesNew = np.zeros([numberOfThetaSamples, numberOfStateSamples])
    logThetaLksNew = np.zeros(numberOfThetaSamples)
    dirichletParamCandidates = np.zeros(K)
    stateSamplesCandidates = np.zeros(numberOfStateSamples, dtype=np.intc)
    weightsSamplesCandidates = np.zeros(numberOfStateSamples)
    idxTrajectories = np.zeros(numberOfThetaSamples)

    # Plot progress
    if show_progress: plt.figure(figsize=(12, 9))

    # Loop over trials
    for T in range(numberOfTrials):

        # Print progress
        if (T + 1) % 10 == 0:
            sys.stdout.write(' ' + str(T + 1))
            sys.stdout.flush()
            time_list.append(time.time() - start_time_multi)
        if (T + 1) % 100 == 0: print('\n')

        if T > 0:
            smc_c.guidedUpdateStep_c(logThetaLks, logThetaWeights, np.ascontiguousarray(currentSamples), gammaSamples, betaSamples/2. + 1./2, tauSamples/2., T, np.ascontiguousarray(ancestorSamples), ancestorsWeights, \
                                                np.ascontiguousarray(mapping), stimuli[T-2], stimuli[T-1], rewards[T-1], actions[T-1])
            ancestorSamples = np.array(currentSamples)

        # Degeneray criterion
        logEss = 2 * useful_functions.log_sum(
            logThetaWeights) - useful_functions.log_sum(2 * logThetaWeights)
        essList[T] = np.exp(logEss)

        # Move step
        normalisedThetaWeights = useful_functions.to_normalized_weights(
            logThetaWeights)
        if (essList[T] < coefficient * numberOfThetaSamples) and (
                acceptance_list[-1] > 0.05):
            acceptanceProba = 0.
            tauMu = np.sum(normalisedThetaWeights * tauSamples)
            tauVar = np.sum(normalisedThetaWeights * (tauSamples - tauMu)**2)
            tauAlpha = ((1 - tauMu) / tauVar - 1 / tauMu) * tauMu**2
            tauBeta = tauAlpha * (1 / tauMu - 1)
            assert (tauAlpha > 0)
            assert (tauBeta > 0)
            betaMu = np.sum(normalisedThetaWeights * betaSamples)
            betaVar = np.sum(normalisedThetaWeights *
                             (betaSamples - betaMu)**2)
            betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2
            betaBeta = betaAlpha * (1 / betaMu - 1)
            assert (betaAlpha > 0)
            assert (betaBeta > 0)
            dirichletMeans = np.sum(normalisedThetaWeights * gammaSamples.T,
                                    axis=1)
            dirichletVar = np.sum(normalisedThetaWeights * (gammaSamples**2).T,
                                  axis=1) - dirichletMeans**2
            dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2) / (
                np.sum(dirichletVar)) - 1
            dirichletParamCandidates = dirichletMeans * dirichletPrecision
            assert ((dirichletParamCandidates > 0).all())

            idxTrajectories = useful_functions.stratified_resampling(
                normalisedThetaWeights)

            for theta_idx in range(numberOfThetaSamples):
                tauCandidate = np.random.beta(tauAlpha, tauBeta)
                betaCandidate = np.random.beta(betaAlpha, betaBeta)
                gammaCandidate = np.random.dirichlet(dirichletParamCandidates)

                # Launch guidedSMC
                logLksCandidate                = smc_c.guidedSmc_c(np.ascontiguousarray(stateSamplesCandidates), weightsSamplesCandidates, gammaCandidate, betaCandidate/2. + 1./2, tauCandidate/2., np.ascontiguousarray(mapping), \
                                                            np.ascontiguousarray(stimuli[:T], dtype=np.intc), np.ascontiguousarray(rewards[:T], dtype=np.intc), np.ascontiguousarray(actions[:T], dtype=np.intc), numberOfStateSamples)

                # Update a trajectory
                idx_traj = idxTrajectories[theta_idx]

                priorsLogRatio = useful_functions.log_dirichlet_pdf(
                    gammaCandidate,
                    gammaPrior) - useful_functions.log_dirichlet_pdf(
                        gammaSamples[idx_traj], gammaPrior)

                transLogRatio                  = useful_functions.log_beta_pdf(tauSamples[idx_traj], tauAlpha, tauBeta) + useful_functions.log_beta_pdf(betaSamples[idx_traj], betaAlpha, betaBeta) + useful_functions.log_dirichlet_pdf(gammaSamples[idx_traj], dirichletParamCandidates) - \
                                                        useful_functions.log_beta_pdf(tauCandidate, tauAlpha, tauBeta) - useful_functions.log_beta_pdf(betaCandidate, betaAlpha, betaBeta) - useful_functions.log_dirichlet_pdf(gammaCandidate, dirichletParamCandidates)

                logLkdRatio = logLksCandidate - logThetaLks[idx_traj]

                logAlpha = min(0, priorsLogRatio + transLogRatio + logLkdRatio)

                U = np.random.rand()

                # Accept or Reject
                if np.log(U) < logAlpha:
                    acceptanceProba += 1.
                    betaSamplesNew[theta_idx] = betaCandidate
                    tauSamplesNew[theta_idx] = tauCandidate
                    gammaSamplesNew[theta_idx] = gammaCandidate
                    stateSamplesNew[theta_idx] = stateSamplesCandidates
                    logThetaLksNew[theta_idx] = logLksCandidate
                    weightsSamplesNew[theta_idx] = weightsSamplesCandidates
                else:
                    betaSamplesNew[theta_idx] = betaSamples[idx_traj]
                    tauSamplesNew[theta_idx] = tauSamples[idx_traj]
                    gammaSamplesNew[theta_idx] = gammaSamples[idx_traj]
                    stateSamplesNew[theta_idx] = ancestorSamples[idx_traj]
                    logThetaLksNew[theta_idx] = logThetaLks[idx_traj]
                    weightsSamplesNew[theta_idx] = ancestorsWeights[idx_traj]

            print('\n')
            print('acceptance ratio is ')
            print(acceptanceProba / numberOfThetaSamples)
            print('\n')
            acceptance_list.append(acceptanceProba / numberOfThetaSamples)

            ancestorsWeights = np.array(weightsSamplesNew)
            logThetaLks = np.array(logThetaLksNew)
            logThetaWeights = np.zeros(numberOfThetaSamples)
            ancestorSamples = np.array(stateSamplesNew)
            betaSamples = np.array(betaSamplesNew)
            tauSamples = np.array(tauSamplesNew)
            gammaSamples = np.array(gammaSamplesNew)
            normalisedThetaWeights = useful_functions.to_normalized_weights(
                logThetaWeights)

        # Launch bootstrap update
        smc_c.bootstrapUpdateStep_c(np.ascontiguousarray(currentSamples),
                                    gammaSamples, betaSamples / 2. + 1. / 2,
                                    tauSamples / 2., T,
                                    np.ascontiguousarray(ancestorSamples),
                                    ancestorsWeights,
                                    np.ascontiguousarray(mapping),
                                    stimuli[T - 1])

        # Take decision
        for ts_idx in range(K):
            tsProbability[T, ts_idx] = np.sum(normalisedThetaWeights * np.sum(
                (currentSamples == ts_idx), axis=1))
        # Select action and compute vol
        volTracking[T] = np.sum(normalisedThetaWeights * tauSamples)
        volStdTracking[T] = np.sum(normalisedThetaWeights *
                                   (tauSamples - volTracking[T])**2)

        betaTracking[T] = np.sum(normalisedThetaWeights * betaSamples)
        betaStdTracking[T] = np.sum(normalisedThetaWeights *
                                    (betaSamples - betaTracking[T])**2)

        rewards[T] = td['reward'][T]
        actions[T] = td['A_chosen'][T]

        if show_progress:
            plt.subplot(3, 2, 1)
            plt.imshow(tsProbability[:T].T, aspect='auto')
            plt.hold(True)
            plt.plot(Z_true[:T], 'w--')
            plt.axis([0, T - 1, 0, K - 1])  # For speed
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('p(TS|past) at current time')

            plt.subplot(3, 2, 2)
            plt.plot(volTracking[:T], 'b')
            plt.hold(True)
            plt.fill_between(np.arange(T),
                             volTracking[:T] - volStdTracking[:T],
                             volTracking[:T] + volStdTracking[:T],
                             facecolor=[.5, .5, 1],
                             color=[.5, .5, 1])
            plt.plot(td['tau'], 'b--', linewidth=2)
            plt.axis([0, T - 1, 0, .5])  # For speed
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('Volatility')

            plt.subplot(3, 2, 3)
            x = np.linspace(0.01, .99, 100)
            plt.plot(x, normlib.pdf(x, betaTracking[T], betaStdTracking[T]),
                     'r')
            plt.hold(True)
            plt.plot([betaTracking[T], betaTracking[T]],
                     plt.gca().get_ylim(),
                     'r',
                     linewidth=2)
            plt.plot([td['beta'], td['beta']],
                     plt.gca().get_ylim(),
                     'r--',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('Parameters')
            plt.ylabel('Gaussian pdf')

            plt.subplot(3, 2, 4)
            plt.plot(np.arange(T) + 1, essList[:T], 'g', linewidth=2)
            plt.hold(True)
            plt.plot(plt.gca().get_xlim(), [
                coefficient * numberOfThetaSamples,
                coefficient * numberOfThetaSamples
            ],
                     'g--',
                     linewidth=2)
            plt.axis([0, T - 1, 0, numberOfThetaSamples])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('ESS')

            plt.subplot(3, 2, 5)
            plt.plot(np.divide(countPerformance[:T],
                               np.arange(T) + 1),
                     'k--',
                     linewidth=2)
            plt.hold(True)
            plt.axis([0, T - 1, 0, 1])
            plt.hold(False)
            plt.xlabel('Trials')
            plt.ylabel('Performance')

            plt.draw()

    elapsed_time = time.time() - start_time_multi

    return [
        td, tauSamples, volTracking, volStdTracking, betaSamples, betaTracking,
        betaStdTracking, gammaSamples, tsProbability, countPerformance,
        actions, acceptance_list, essList, time_list, elapsed_time
    ]
Exemplo n.º 5
0
def SMC2(td,
         show_progress=True,
         numberOfStateSamples=1000,
         numberOfThetaSamples=1000,
         coefficient=.5,
         beta_softmax=None):

    print('Varying Volatility Model')
    print('number of theta samples ' + str(numberOfThetaSamples))
    print('\n')

    #Start timer
    start_time_multi = time.time()

    # Extract parameters from task description
    stimuli = td['S']  # Sequence of Stimuli
    Z_true = td['Z']  # Sequence of Task Sets
    numberOfActions = td['action_num']  # Number of Actions possible
    numberOfStimuli = td['state_num']  # Number of states or stimuli
    K = np.prod(
        np.arange(numberOfActions +
                  1)[-numberOfStimuli:])  # Number of possible Task Sets
    numberOfTrials = len(Z_true)  # Number of Trials

    # Sampling and prior settings
    betaPrior = np.array([1, 1])  # Prior on Beta, the feedback noise parameter
    nuPrior = np.array([
        3, 1e-3
    ])  # Prior on Nu, the variance on the projected gaussian random walk
    gammaPrior = numpy.ones(K)  # Prior on Gamma, the Dirichlet parameter
    try:
        tauDefault = td['tau'][0]
    except:
        tauDefault = td['tau']

    # Mapping from task set to correct action per stimulus
    mapping = get_mapping.Get_TaskSet_Stimulus_Mapping(
        state_num=numberOfStimuli, action_num=numberOfActions).T

    # Probabilities of every actions updated at every time step -> Used to take the decision
    actionLikelihood = np.zeros(
        numberOfActions
    )  # For 1 observation, likelihood of the action. Requires a marginalisation over all task sets
    actions = np.zeros(numberOfTrials) - 1
    rewards = np.zeros(numberOfTrials, dtype=bool)

    # Keep track of probability correct/exploration after switches
    countPerformance = np.zeros(
        numberOfTrials)  # Number of correct actions after i trials
    countExploration = np.zeros(
        numberOfTrials)  # Number of exploratory actions after i trials
    correct_before_switch = np.empty(0)  # The correct task set before switch
    tsProbability = np.zeros([numberOfTrials, K])
    volTracking = np.zeros(numberOfTrials)  # Volatility with time
    volStdTracking = np.zeros(numberOfTrials)
    nuTracking = np.zeros(numberOfTrials)
    nuStdTracking = np.zeros(numberOfTrials)
    betaTracking = np.zeros(numberOfTrials)
    betaStdTracking = np.zeros(numberOfTrials)
    acceptanceProba = 0.  # Acceptance proba
    time_list = [start_time_multi]

    # SMC particles initialisation
    betaSamples = np.random.beta(betaPrior[0], betaPrior[1],
                                 numberOfThetaSamples)
    nuSamples = useful_functions.sample_inv_gamma(nuPrior[0], nuPrior[1],
                                                  numberOfThetaSamples)
    gammaSamples = np.random.dirichlet(gammaPrior, numberOfThetaSamples)
    logThetaWeights = np.zeros(numberOfThetaSamples)
    currentStateSamples = np.zeros(
        [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc)
    currentTauSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples],
                                 dtype=np.double)
    ancestorStateSamples = np.zeros(
        [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc)
    ancestorTauSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples],
                                  dtype=np.double)
    ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples
                                ]) / numberOfStateSamples
    unnormalisedAncestorsWeights = np.ones(
        [numberOfThetaSamples, numberOfStateSamples])
    essList = np.zeros(numberOfTrials)
    tasksetLikelihood = np.zeros(K)

    # Guided SMC variables
    dirichletParamCandidates = np.zeros(K)

    # Plot progress
    if show_progress:
        plt.figure(figsize=(12, 9))
        plt.ion()

    # Loop over trials
    for T in range(numberOfTrials):

        # Print progress
        if (T + 1) % 10 == 0:
            sys.stdout.write(' ' + str(T + 1))
            sys.stdout.flush()
            time_list.append(time.time() - start_time_multi)
        if (T + 1) % 100 == 0: print('\n')

        # Update theta weights
        smc_c.bootstrapUpdateStep_c(currentStateSamples, logThetaWeights, currentTauSamples, gammaSamples, betaSamples/2. + 1/2., nuSamples, tauDefault, T, \
                                        np.ascontiguousarray(ancestorStateSamples, dtype=np.intc), ancestorTauSamples, ancestorsWeights, np.ascontiguousarray(mapping), stimuli[T-1], actions[T-1], rewards[T-1])

        ancestorTauSamples = np.array(currentTauSamples)
        ancestorStateSamples = np.array(currentStateSamples)

        # Degeneray criterion
        logEss = 2 * useful_functions.log_sum(
            logThetaWeights) - useful_functions.log_sum(2 * logThetaWeights)
        essList[T] = np.exp(logEss)

        # Move step
        normalisedThetaWeights = useful_functions.to_normalized_weights(
            logThetaWeights)
        if (essList[T] < coefficient * numberOfThetaSamples):
            acceptanceProba = 0.
            betaMu = np.sum(normalisedThetaWeights * betaSamples)
            betaVar = np.sum(normalisedThetaWeights *
                             (betaSamples - betaMu)**2)
            betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2
            betaBeta = betaAlpha * (1 / betaMu - 1)
            assert (betaAlpha > 0)
            assert (betaBeta > 0)
            nuMu = np.sum(normalisedThetaWeights * nuSamples)
            nuVar = np.sum(normalisedThetaWeights * (nuSamples - nuMu)**2)
            nuAlpha = nuMu**2 / nuVar + 2
            nuBeta = nuMu * (nuAlpha - 1)
            assert (nuAlpha > 0)
            assert (nuBeta > 0)
            dirichletMeans = np.sum(normalisedThetaWeights * gammaSamples.T,
                                    axis=1)
            dirichletVar = np.sum(normalisedThetaWeights * (gammaSamples**2).T,
                                  axis=1) - dirichletMeans**2
            dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2) / (
                np.sum(dirichletVar)) - 1
            dirichletParamCandidates = np.maximum(
                dirichletMeans * dirichletPrecision, 1.)
            assert ((dirichletParamCandidates > 0).all())

            nuSamples = useful_functions.sample_inv_gamma(
                nuAlpha, nuBeta, numberOfThetaSamples)
            betaSamples = np.random.beta(betaAlpha, betaBeta,
                                         numberOfThetaSamples)
            gammaSamples = np.random.dirichlet(dirichletParamCandidates,
                                               numberOfThetaSamples)
            logThetaWeights[:] = 0

            normalisedThetaWeights = useful_functions.to_normalized_weights(
                logThetaWeights)

        # Take decision
        for ts_idx in range(K):
            tsProbability[T, ts_idx] = np.sum(normalisedThetaWeights * np.sum(
                (currentStateSamples == ts_idx),
                axis=1))  # Todo : change!!! take out currentAncestorsWeights

        if beta_softmax is None:
            # Compute action likelihood
            for action_idx in range(numberOfActions):
                actionLikelihood[action_idx] = np.sum(
                    tsProbability[T, mapping[stimuli[T]] == action_idx])

            # Select action
            actions[T] = np.argmax(actionLikelihood)

        else:
            # Compute action likelihood
            tsProbability[T] /= sum(tsProbability[T])

            for action_idx in range(numberOfActions):
                actionLikelihood[action_idx] = np.exp(
                    np.log(
                        np.sum(tsProbability[
                            T, mapping[stimuli[T].astype(int)] == action_idx]))
                    * beta_softmax)

            actionLikelihood /= sum(actionLikelihood)

            # Select action
            actions[T] = np.where(
                np.random.multinomial(1, actionLikelihood, size=1)[0])[0][0]

        # Select action and compute vol, nu, beta for tracking
        volTracking[T] = np.sum(
            normalisedThetaWeights *
            (np.sum(currentTauSamples, axis=1) / numberOfStateSamples))
        volStdTracking[T] = np.sum(normalisedThetaWeights *
                                   (np.sum(currentTauSamples**2, axis=1) /
                                    numberOfStateSamples)) - volTracking[T]**2
        nuTracking[T] = np.sum(normalisedThetaWeights * nuSamples)
        nuStdTracking[T] = np.sum(normalisedThetaWeights *
                                  (nuSamples - nuTracking[T])**2)

        betaTracking[T] = np.sum(normalisedThetaWeights * betaSamples)
        betaStdTracking[T] = np.sum(normalisedThetaWeights *
                                    (betaSamples - betaTracking[T])**2)

        # Update performance
        if K == 2:
            assert (mapping[stimuli[T].astype(int),
                            Z_true[T].astype(int)] == Z_true[T])
        if (K == 2) and (actions[T] == mapping[stimuli[T].astype(int),
                                               Z_true[T].astype(int)]):
            rewards[T] = not td['trap'][T]
            countPerformance[T:] += 1
        elif (K == 24) and (actions[T] == td['A_correct'][T]):
            rewards[T] = not td['trap'][T]
            countPerformance[T:] += 1
        else:
            rewards[T] = td['trap'][T]

        if show_progress:
            plt.subplot(3, 2, 1)
            plt.imshow(tsProbability[:T].T, aspect='auto')
            plt.hold(True)
            plt.plot(Z_true[:T], 'w--')
            plt.axis([0, T - 1, 0, K - 1])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('p(TS|past) at current time')

            plt.subplot(3, 2, 2)
            plt.plot(volTracking[:T], 'b')
            plt.hold(True)
            plt.fill_between(np.arange(T),
                             volTracking[:T] - volStdTracking[:T],
                             volTracking[:T] + volStdTracking[:T],
                             facecolor=[.5, .5, 1],
                             color=[.5, .5, 1])
            plt.plot(td['tau'], 'b--', linewidth=2)
            plt.axis([0, T - 1, 0, .5])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('Volatility')

            plt.subplot(3, 2, 3)
            x = np.linspace(0.01, .99, 100)
            plt.plot(x, normlib.pdf(x, nuTracking[T], nuStdTracking[T]), 'b')
            plt.hold(True)
            plt.plot([nuTracking[T], nuTracking[T]],
                     plt.gca().get_ylim(),
                     'b',
                     linewidth=2)
            plt.plot(x, normlib.pdf(x, betaTracking[T], betaStdTracking[T]),
                     'r')
            plt.plot([betaTracking[T], betaTracking[T]],
                     plt.gca().get_ylim(),
                     'r',
                     linewidth=2)
            plt.plot([td['beta'], td['beta']],
                     plt.gca().get_ylim(),
                     'r--',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('Parameters')
            plt.ylabel('Gaussian pdf')

            plt.subplot(3, 2, 4)
            plt.plot(np.arange(T) + 1, essList[:T], 'g', linewidth=2)
            plt.hold(True)
            plt.plot(plt.gca().get_xlim(), [
                coefficient * numberOfThetaSamples,
                coefficient * numberOfThetaSamples
            ],
                     'g--',
                     linewidth=2)
            plt.axis([0, T - 1, 0, numberOfThetaSamples])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('ESS')

            plt.subplot(3, 2, 5)
            plt.plot(np.divide(countPerformance[:T],
                               np.arange(T) + 1),
                     'k--',
                     linewidth=2)
            plt.hold(True)
            plt.axis([0, T - 1, 0, 1])
            plt.hold(False)
            plt.xlabel('Trials')
            plt.ylabel('Performance')

            plt.draw()
            plt.show()
            plt.pause(0.1)

    elapsed_time = time.time() - start_time_multi

    return [
        td, nuSamples, nuTracking, nuStdTracking, volTracking, volTracking,
        betaSamples, betaTracking, betaStdTracking, gammaSamples,
        tsProbability, countPerformance, actions, essList, time_list,
        elapsed_time
    ]
def smc2(actions, rewards, idx_blocks, choices, subj_idx, show_progress,
         apply_rep, apply_weber, beta_softmax, temperature,
         observational_noise):

    assert (2 not in actions)
    assert (0 in actions)
    assert (1 in actions)
    assert (apply_rep == 0 or apply_rep == 1)
    assert (apply_weber == 0 or apply_weber == 1)

    # Extract parameters from task description
    actions = np.asarray(actions, dtype=np.intc)
    rewards = np.ascontiguousarray(rewards)
    idx_blocks = np.asarray(idx_blocks, dtype=np.intc)
    N_samples = 1000
    n_theta = 1000
    coefficient = .5
    T = actions.shape[0]
    prev_action = -1
    upp_bound_eta = 10.

    if apply_rep:
        n_param = 5
    else:
        n_param = 4
    if apply_weber == 1:
        upp_bound_eps = 1.
    else:
        upp_bound_eps = .5

    # samples
    samples = np.random.rand(n_theta, n_param)
    if beta_softmax > 0:
        temperature = False
        samples[:, 2] = beta_softmax
        sample_beta = False
        upp_bound_beta = beta_softmax
    else:
        if temperature:
            upp_bound_beta = np.sqrt(6) / (np.pi * 5)
        else:
            upp_bound_beta = 2.
        samples[:, 2] = np.random.rand(n_theta) * upp_bound_beta
        sample_beta = True
    samples[:, 3] = np.random.rand(n_theta) * upp_bound_eps
    if apply_rep:
        samples[:, 4] = (2 * np.random.rand(n_theta) - 1) * upp_bound_eta

    # variable memory
    noisy_descendants = np.zeros([n_theta, N_samples, 2])
    noisy_ancestors = np.zeros([n_theta, N_samples, 2])
    weights_norm = np.zeros([n_theta, N_samples])
    log_weights_a = np.zeros([n_theta])
    ancestorsIndexes = np.ascontiguousarray(np.zeros(n_theta, dtype=np.intc))
    logThetaWeights = np.zeros(n_theta)
    logThetalkd = np.zeros(n_theta)
    log_lkd = np.zeros(n_theta)
    essList = np.zeros(T)
    acceptance_list = []
    marg_loglkd = 0

    #move step variables
    ancestors_indexes_p = np.ascontiguousarray(
        np.zeros(N_samples, dtype=np.intc))
    samples_new = np.zeros([n_theta, n_param])
    weights_new = np.zeros([n_theta, N_samples])
    states_new = np.zeros([n_theta, N_samples, 2])
    logThetalkd_new = np.zeros(n_theta)
    state_candidates = np.zeros([N_samples, 2])
    state_candidates_a = np.zeros([N_samples, 2])
    weights_candidates = np.zeros(N_samples)

    # history of samples
    noisy_history = np.zeros([T, 2])

    if show_progress:
        plt.figure(figsize=(15, 9))
        plt.suptitle("noisy rl", fontsize=14)
        plt.ion()

    for t_idx in range(T):

        # Print progress
        if (t_idx + 1) % 10 == 0:
            sys.stdout.write(' ' + str(t_idx + 1))
            sys.stdout.flush()
            print ' marg_loglkd ' + str(marg_loglkd)

        prev_rew = np.ascontiguousarray(rewards[:, max(0, t_idx - 1)])
        log_weights_a[:] = logThetaWeights
        if t_idx > 0 and choices[t_idx - 1]:
            assert (actions[max(0, t_idx - 1)] == prev_action)

        smc_c.smc_update_2q_c(log_lkd, logThetalkd, noisy_descendants, noisy_ancestors, weights_norm, logThetaWeights, ancestorsIndexes, samples, \
                     idx_blocks, choices, prev_action, actions, prev_rew, t_idx, apply_rep, apply_weber, 2, temperature, observational_noise)

        # save and update
        marg_loglkd += logsumexp(log_weights_a +
                                 log_lkd) - logsumexp(log_weights_a)
        normalisedThetaWeights = uf.to_normalized_weights(logThetaWeights)
        noisy_history[t_idx] = np.sum((normalisedThetaWeights * np.sum(
            np.transpose(weights_norm * noisy_descendants.T), axis=1).T),
                                      axis=1)

        # Degeneray criterion
        logEss = 2 * uf.log_sum(logThetaWeights) - uf.log_sum(
            2 * logThetaWeights)
        essList[t_idx] = np.exp(logEss)

        # update repetition action
        if choices[t_idx] == 1:
            prev_action = actions[t_idx]

        # Move step
        if (essList[t_idx] < coefficient * n_theta):
            acceptance_proba = 0
            if not sample_beta:
                samples_tmp = np.delete(samples, 2, axis=1)
                mu_p = np.sum(samples_tmp.T * normalisedThetaWeights, axis=1)
                Sigma_p = np.dot(
                    (samples_tmp - mu_p).T * normalisedThetaWeights,
                    (samples_tmp - mu_p))
            else:
                mu_p = np.sum(samples.T * normalisedThetaWeights, axis=1)
                Sigma_p = np.dot((samples - mu_p).T * normalisedThetaWeights,
                                 (samples - mu_p))

            ancestorsIndexes[:] = uf.stratified_resampling(
                normalisedThetaWeights)

            for theta_idx in range(n_theta):
                idx_traj = ancestorsIndexes[theta_idx]
                while True:
                    sample_cand = np.array(samples[idx_traj])
                    sample_p = multi_norm(mu_p, Sigma_p)
                    sample_p_copy = np.array(sample_p)
                    if (not sample_beta) and apply_rep:
                        sample_p = np.array([
                            sample_p[0], sample_p[1], beta_softmax,
                            sample_p[2], sample_p[3]
                        ])
                        sample_cand = np.delete(sample_cand, 2)
                    elif not sample_beta:
                        sample_p = np.array([
                            sample_p[0], sample_p[1], beta_softmax, sample_p[2]
                        ])
                        sample_cand = np.delete(sample_cand, 2)

                    if apply_rep:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1. and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta \
                                and sample_p[3] > 0 and sample_p[3] < upp_bound_eps and sample_p[4] > -upp_bound_eta and sample_p[4] < upp_bound_eta:
                            break
                    else:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1. and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta \
                                and sample_p[3] > 0 and sample_p[3] < upp_bound_eps:
                            break

                # Launch SMC
                logmarglkd_p = smc_c.smc_2q_c(state_candidates, state_candidates_a, weights_candidates, sample_p, ancestors_indexes_p, \
                              idx_blocks, actions, rewards, choices, t_idx + 1, apply_rep, apply_weber, 2, temperature, observational_noise)

                logAlpha     = np.minimum(0, logmarglkd_p - logThetalkd[idx_traj]  \
                         + get_logtruncnorm(sample_cand, mu_p, Sigma_p) - get_logtruncnorm(sample_p_copy, mu_p, Sigma_p) )

                # accept or reject
                if np.log(np.random.rand()) < logAlpha:
                    acceptance_proba += 1.
                    samples_new[theta_idx] = sample_p
                    weights_new[theta_idx] = weights_candidates
                    states_new[theta_idx] = state_candidates
                    logThetalkd_new[theta_idx] = logmarglkd_p
                else:
                    samples_new[theta_idx] = samples[idx_traj]
                    weights_new[theta_idx] = weights_norm[idx_traj]
                    states_new[theta_idx] = noisy_descendants[idx_traj]
                    logThetalkd_new[theta_idx] = logThetalkd[idx_traj]

            print('\n')
            print('acceptance ratio is ')
            print(acceptance_proba / n_theta)
            print('\n')
            acceptance_list.append(acceptance_proba / n_theta)

            weights_norm[:] = weights_new
            logThetalkd[:] = logThetalkd_new
            logThetaWeights[:] = np.zeros(n_theta)
            noisy_descendants[:] = states_new
            samples[:] = samples_new
            normalisedThetaWeights = uf.to_normalized_weights(logThetaWeights)

        if show_progress and t_idx % 10:
            plt.subplot(3, 2, 1)
            plt.plot(range(t_idx), noisy_history[:t_idx, 0], 'r')
            plt.hold(True)
            plt.plot(range(t_idx), noisy_history[:t_idx, 1], 'b')
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('Q-value 0 (red), and 1 (blue)')

            plt.subplot(3, 2, 4)
            plt.plot(range(t_idx), essList[:t_idx], 'b', linewidth=2)
            plt.hold(True)
            plt.plot(plt.gca().get_xlim(), [n_theta / 2, n_theta / 2],
                     'b--',
                     linewidth=2)
            plt.axis([0, t_idx - 1, 0, n_theta])  # For speed
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('ess')

            if temperature:
                mean_beta = np.sum(normalisedThetaWeights *
                                   (1. / samples[:, 2]))
                std_beta = np.sqrt(
                    np.sum(normalisedThetaWeights * (1. / samples[:, 2])**2) -
                    mean_beta**2)
                x = np.linspace(0., 200, 5000)
            else:
                mean_beta = np.sum(normalisedThetaWeights *
                                   (10**samples[:, 2]))
                std_beta = np.sqrt(
                    np.sum(normalisedThetaWeights * (10**samples[:, 2])**2) -
                    mean_beta**2)
                x = np.linspace(0., 10**upp_bound_beta, 5000)
            plt.subplot(3, 2, 3)
            plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g')
            plt.hold(True)
            plt.plot([mean_beta, mean_beta],
                     plt.gca().get_ylim(),
                     'g',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('beta softmax')
            plt.ylabel('pdf')

            mean_alpha_0 = np.sum(normalisedThetaWeights * samples[:, 0])
            std_alpha_0 = np.sqrt(
                np.sum(normalisedThetaWeights * samples[:, 0]**2) -
                mean_alpha_0**2)
            mean_alpha_1 = np.sum(normalisedThetaWeights * samples[:, 1])
            std_alpha_1 = np.sqrt(
                np.sum(normalisedThetaWeights * samples[:, 1]**2) -
                mean_alpha_1**2)
            plt.subplot(3, 2, 2)
            x = np.linspace(0., 1., 5000)
            plt.plot(x, norm.pdf(x, mean_alpha_0, std_alpha_0), 'm')
            plt.hold(True)
            plt.plot([mean_alpha_0, mean_alpha_0], plt.gca().get_ylim(), 'm')
            plt.plot(x, norm.pdf(x, mean_alpha_1, std_alpha_1), 'c')
            plt.plot([mean_alpha_1, mean_alpha_1], plt.gca().get_ylim(), 'c')
            plt.hold(False)
            plt.xlabel('learning rates')
            plt.ylabel('pdf')

            mean_epsilon = np.sum(normalisedThetaWeights * samples[:, 3])
            std_epsilon = np.sqrt(
                np.sum(normalisedThetaWeights * samples[:, 3]**2) -
                mean_epsilon**2)
            plt.subplot(3, 2, 6)
            x = np.linspace(0., upp_bound_eps, 5000)
            if apply_rep == 1:
                mean_rep = np.sum(normalisedThetaWeights * samples[:, 4])
                std_rep = np.sqrt(
                    np.sum(normalisedThetaWeights * samples[:, 4]**2) -
                    mean_rep**2)
                x = np.linspace(-2., 2., 5000)
                plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'y')
                plt.hold(True)
                plt.plot([mean_rep, mean_rep],
                         plt.gca().get_ylim(),
                         'y',
                         linewidth=2)
            plt.plot(x, norm.pdf(x, mean_epsilon, std_epsilon), 'g')
            plt.hold(True)
            plt.plot([mean_epsilon, mean_epsilon],
                     plt.gca().get_ylim(),
                     'g',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('epsilon std (green), rep_bias (yellow)')
            plt.ylabel('pdf')
            plt.draw()
            plt.show()
            plt.pause(0.05)

    return [
        samples, noisy_history, acceptance_list, normalisedThetaWeights,
        logThetalkd, marg_loglkd
    ]
Exemplo n.º 7
0
def ibis(actions, rewards, choices, idx_blocks, subj_idx, apply_rep_bias,
         apply_weber_decision_noise, curiosity_bias, show_progress,
         temperature):

    assert (2 not in actions)
    assert (0 in actions)
    assert (1 in actions)

    actions = np.asarray(actions, dtype=np.intc)
    rewards = np.ascontiguousarray(rewards)
    choices = np.asarray(choices, dtype=np.intc)
    idx_blocks = np.asarray(idx_blocks, dtype=np.intc)
    nb_samples = 1000
    T = actions.shape[0]
    upp_bound_eta = 10.

    # sample initialisation
    if (apply_rep_bias or curiosity_bias) and apply_weber_decision_noise == 0:
        samples = np.random.rand(nb_samples, 4)

        if temperature:
            upp_bound_beta = np.sqrt(6) / (np.pi * 5)
        else:
            upp_bound_beta = 2.
        samples[:, 2] = np.random.rand(nb_samples) * upp_bound_beta
        samples[:, 3] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.)
    elif apply_weber_decision_noise == 0:
        samples = np.random.rand(nb_samples, 3)
        if temperature:
            upp_bound_beta = np.sqrt(6) / (np.pi * 5)
        else:
            upp_bound_beta = 2.

        samples[:, 2] = np.random.rand(nb_samples) * upp_bound_beta
    elif apply_weber_decision_noise == 1:

        if apply_rep_bias:
            samples = np.random.rand(nb_samples, 5)
            if temperature:
                upp_bound_beta = np.sqrt(6) / (np.pi * 5)
            else:
                upp_bound_beta = 2.

            samples[:,
                    4] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.)
        else:
            samples = np.random.rand(nb_samples, 4)
            if temperature:
                upp_bound_beta = np.sqrt(6) / (np.pi * 5)
            else:
                upp_bound_beta = 2.

        upp_bound_k = 10
        samples[:, 2] = np.random.rand(
            nb_samples) * upp_bound_beta  # bound on the beta
        samples[:, 3] = np.random.rand(nb_samples) * upp_bound_k

    Q_samples = np.zeros([nb_samples, 2])
    prev_action = np.zeros(nb_samples) - 1

    # ibis param
    esslist = np.zeros(T)
    log_weights = np.zeros(nb_samples)
    weights_a = np.zeros(nb_samples)
    p_loglkd = np.zeros(nb_samples)
    loglkd = np.zeros(nb_samples)
    marg_loglkd = 0
    coefficient = .5
    marg_loglkd_l = np.zeros(T)
    acceptance_l = []

    # move step param
    if apply_rep_bias and apply_weber_decision_noise:
        move_samples = np.zeros([nb_samples, 5])
    elif apply_rep_bias or curiosity_bias:
        move_samples = np.zeros([nb_samples, 4])
    elif apply_weber_decision_noise:
        move_samples = np.zeros([nb_samples, 4])
    else:
        move_samples = np.zeros([nb_samples, 3])

    move_p_loglkd = np.zeros(nb_samples)
    Q_samples_move = np.zeros([nb_samples, 2])
    prev_action_move = np.zeros(nb_samples)
    mean_Q = np.zeros([T, 2])
    prediction_err = np.zeros(nb_samples)
    prediction_err[:] = -np.inf
    prediction_err_move = np.zeros(nb_samples)

    if show_progress:
        plt.figure(figsize=(15, 9))
        plt.suptitle("noiseless rl", fontsize=14)
        plt.ion()

    # loop
    for t_idx in range(T):

        if (t_idx + 1) % 10 == 0:
            sys.stdout.write(' ' + str(t_idx + 1) + ' ')
            print 'marg_loglkd ' + str(marg_loglkd)
        if (t_idx + 1) % 100 == 0: print('\n')
        assert (len(np.unique(prev_action)) == 1)

        # update step
        weights_a[:] = log_weights
        if idx_blocks[t_idx]:
            Q_samples[:] = 0.5
            prev_action[:] = -1

        # loop over samples
        for n_idx in range(nb_samples):
            alpha_c = samples[n_idx, 0]
            alpha_u = samples[n_idx, 1]

            if temperature:
                beta = 1. / samples[n_idx, 2]
            else:
                beta = 10**samples[n_idx, 2]

            if apply_rep_bias or curiosity_bias:
                eta = samples[n_idx, -1]
            if apply_weber_decision_noise:
                k_beta = samples[n_idx, 3]

            # reweighting
            if choices[t_idx] == 1 and prev_action[n_idx] != -1 and (
                    apply_rep_bias == 1
                    or curiosity_bias) and apply_weber_decision_noise == 0:
                if apply_rep_bias:
                    value = 1. / (
                        1. +
                        np.exp(beta *
                               (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) -
                               np.sign(prev_action[n_idx] - .5) * eta))
                    loglkd[n_idx] = np.log((value**actions[t_idx]) *
                                           (1 - value)**((1 - actions[t_idx])))
                    prev_action[n_idx] = actions[t_idx]
                elif curiosity_bias:
                    try:
                        count_samples = t_idx - 1 - np.where(
                            actions[:t_idx] != actions[t_idx - 1])[0][-1]
                    except:
                        count_samples = t_idx
                    assert (count_samples > 0)
                    value = 1. / (1. + np.exp(
                        beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) +
                        np.sign(prev_action[n_idx] - .5) * eta * count_samples)
                                  )
                    loglkd[n_idx] = np.log((value**actions[t_idx]) *
                                           (1 - value)**((1 - actions[t_idx])))
                    prev_action[n_idx] = actions[t_idx]

            elif choices[t_idx] == 1 and apply_weber_decision_noise == 0:
                value = 1. / (
                    1. + np.exp(beta *
                                (Q_samples[n_idx, 0] - Q_samples[n_idx, 1])))
                loglkd[n_idx] = np.log((value**actions[t_idx]) *
                                       (1 - value)**((1 - actions[t_idx])))
                prev_action[n_idx] = actions[t_idx]
            elif choices[
                    t_idx] == 1 and apply_weber_decision_noise == 1 and apply_rep_bias == 0:
                beta_modified = beta / (1. + k_beta * prediction_err[n_idx])
                value = 1. / (
                    1. + np.exp(beta_modified *
                                (Q_samples[n_idx, 0] - Q_samples[n_idx, 1])))
                loglkd[n_idx] = np.log((value**actions[t_idx]) *
                                       (1 - value)**((1 - actions[t_idx])))
                prev_action[n_idx] = actions[t_idx]
            elif choices[
                    t_idx] == 1 and apply_weber_decision_noise == 1 and apply_rep_bias == 1:
                beta_modified = beta / (1. + k_beta * prediction_err[n_idx])
                value = 1. / (
                    1. + np.exp(beta_modified *
                                (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) -
                                np.sign(prev_action[n_idx] - .5) * eta))
                loglkd[n_idx] = np.log((value**actions[t_idx]) *
                                       (1 - value)**((1 - actions[t_idx])))
                prev_action[n_idx] = actions[t_idx]
            else:
                value = 1.
                loglkd[n_idx] = 0.

            if np.isnan(loglkd[n_idx]):
                print t_idx
                print n_idx
                print beta
                print value
                raise Exception

            p_loglkd[n_idx] = p_loglkd[n_idx] + loglkd[n_idx]

            log_weights[n_idx] = log_weights[n_idx] + loglkd[n_idx]

            # update step
            if actions[t_idx] == 0:
                prediction_err[n_idx] = np.abs(Q_samples[n_idx, 0] -
                                               rewards[0, t_idx])

                Q_samples[n_idx, 0] = (1 - alpha_c) * Q_samples[
                    n_idx, 0] + alpha_c * rewards[0, t_idx]
                if not curiosity_bias:
                    Q_samples[n_idx, 1] = (1 - alpha_u) * Q_samples[
                        n_idx, 1] + alpha_u * rewards[1, t_idx]
            else:
                prediction_err[n_idx] = np.abs(Q_samples[n_idx, 1] -
                                               rewards[1, t_idx])
                if not curiosity_bias:
                    Q_samples[n_idx, 0] = (1 - alpha_u) * Q_samples[
                        n_idx, 0] + alpha_u * rewards[0, t_idx]
                Q_samples[n_idx, 1] = (1 - alpha_c) * Q_samples[
                    n_idx, 1] + alpha_c * rewards[1, t_idx]

        marg_loglkd += logsumexp(weights_a + loglkd) - logsumexp(weights_a)
        marg_loglkd_l[t_idx] = marg_loglkd
        ess = np.exp(2 * logsumexp(log_weights) - logsumexp(2 * log_weights))
        esslist[t_idx] = ess

        weights_a[:] = uf.to_normalized_weights(log_weights)
        mean_Q[t_idx] = np.sum((Q_samples.T * weights_a).T, axis=0)

        # move step
        if ess < coefficient * nb_samples:
            idxTrajectories = uf.stratified_resampling(weights_a)
            mu_p = np.sum(samples.T * weights_a, axis=1)
            Sigma_p = np.dot((samples - mu_p).T * weights_a, (samples - mu_p))
            nb_acceptance = 0.

            for n_idx in range(nb_samples):
                idx_traj = idxTrajectories[n_idx]
                while True:
                    sample_p = multi_norm(mu_p, Sigma_p)
                    if not apply_rep_bias and not apply_weber_decision_noise:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[
                                1] > 0 and sample_p[1] < 1 and sample_p[
                                    2] > 0 and sample_p[2] <= upp_bound_beta:
                            break
                    elif not apply_rep_bias and apply_weber_decision_noise:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \
                                and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > 0 and sample_p[3] <= upp_bound_k:
                            break
                    elif apply_rep_bias and not apply_weber_decision_noise:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \
                                     and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > -upp_bound_eta and sample_p[3] < upp_bound_eta:
                            break
                    else:
                        if sample_p[0] > 0 and sample_p[0] < 1 and sample_p[1] > 0 and sample_p[1] < 1 \
                                     and sample_p[2] > 0 and sample_p[2] <= upp_bound_beta and sample_p[3] > 0 and sample_p[3] < upp_bound_k \
                                             and sample_p[-1] > -upp_bound_eta and sample_p[-1] < upp_bound_eta:
                            break

                [loglkd_prop, Q_prop, prev_action_prop, prediction_err_prop
                 ] = get_loglikelihood(sample_p, rewards, actions, choices,
                                       idx_blocks, t_idx + 1, apply_rep_bias,
                                       apply_weber_decision_noise,
                                       curiosity_bias, temperature)

                log_ratio                               = loglkd_prop - p_loglkd[idx_traj] \
                                                             + get_logtruncnorm(samples[idx_traj], mu_p, Sigma_p) - get_logtruncnorm(sample_p, mu_p, Sigma_p)

                log_ratio = np.minimum(log_ratio, 0)
                if (np.log(np.random.rand()) < log_ratio):
                    nb_acceptance += 1.
                    move_samples[n_idx] = sample_p
                    move_p_loglkd[n_idx] = loglkd_prop
                    Q_samples_move[n_idx] = Q_prop
                    prediction_err_move[n_idx] = prediction_err_prop
                else:
                    move_samples[n_idx] = samples[idx_traj]
                    move_p_loglkd[n_idx] = p_loglkd[idx_traj]
                    Q_samples_move[n_idx] = Q_samples[idx_traj]
                    prediction_err_move[n_idx] = prediction_err[idx_traj]

            print 'acceptance ratio %s' % str(nb_acceptance / nb_samples)
            assert (prev_action_prop == prev_action[0])

            acceptance_l.append(nb_acceptance / nb_samples)
            # move samples
            samples[:] = move_samples
            p_loglkd[:] = move_p_loglkd

            log_weights[:] = 0.
            Q_samples[:] = Q_samples_move
            prediction_err[:] = prediction_err_move

        if show_progress and t_idx % 10 == 0:
            weights_a[:] = uf.to_normalized_weights(log_weights)

            plt.subplot(3, 2, 1)
            plt.plot(range(t_idx), mean_Q[:t_idx], 'm', linewidth=2)
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('Q values')

            if apply_rep_bias == 1:
                mean_rep = np.sum(weights_a * samples[:, 3])
                std_rep = np.sqrt(
                    np.sum(weights_a * samples[:, 3]**2) - mean_rep**2)
                plt.subplot(3, 2, 2)
                x = np.linspace(-2., 2., 5000)
                plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'g')
                plt.hold(True)
                plt.plot([mean_rep, mean_rep],
                         plt.gca().get_ylim(),
                         'g',
                         linewidth=2)
                plt.hold(False)
                plt.xlabel('trials')
                plt.ylabel('rep param')

            if temperature:
                mean_beta = np.sum(weights_a * 1. / samples[:, 2])
                std_beta = np.sqrt(
                    np.sum(weights_a * ((1. / samples[:, 2])**2)) -
                    mean_beta**2)
            else:
                mean_beta = np.sum(weights_a * 10**samples[:, 2])
                std_beta = np.sqrt(
                    np.sum(weights_a * ((10**samples[:, 2])**2)) -
                    mean_beta**2)

            if apply_weber_decision_noise:
                mean_k = np.sum(weights_a * samples[:, 3])
                std_k = np.sqrt(
                    np.sum(weights_a * (samples[:, 3]**2)) - mean_k**2)

            plt.subplot(3, 2, 3)
            x = np.linspace(0.01, 200., 5000)
            plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g', linewidth=2)
            plt.hold(True)
            plt.plot([mean_beta, mean_beta],
                     plt.gca().get_ylim(),
                     'g',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('beta softmax')
            plt.ylabel('pdf')

            mean_alpha_0 = np.sum(weights_a * samples[:, 0])
            std_alpha_0 = np.sqrt(
                np.sum(weights_a * (samples[:, 0]**2)) - mean_alpha_0**2)
            mean_alpha_1 = np.sum(weights_a * samples[:, 1])
            std_alpha_1 = np.sqrt(
                np.sum(weights_a * (samples[:, 1]**2)) - mean_alpha_1**2)
            plt.subplot(3, 2, 4)
            x = np.linspace(0., 1., 5000)
            plt.plot(x,
                     norm.pdf(x, mean_alpha_0, std_alpha_0),
                     'm',
                     linewidth=2)
            plt.hold(True)
            plt.plot([mean_alpha_0, mean_alpha_0],
                     plt.gca().get_ylim(),
                     'm',
                     linewidth=2)
            plt.plot(x,
                     norm.pdf(x, mean_alpha_1, std_alpha_1),
                     'c',
                     linewidth=2)
            plt.plot([mean_alpha_1, mean_alpha_1],
                     plt.gca().get_ylim(),
                     'c',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('learning rate chosen (majenta) an unchosen (cyan)')
            plt.ylabel('pdf')

            plt.subplot(3, 2, 5)
            plt.plot(range(t_idx), esslist[:t_idx], 'b', linewidth=2)
            plt.hold(True)
            plt.plot(plt.gca().get_xlim(), [nb_samples / 2, nb_samples / 2],
                     'b--',
                     linewidth=2)
            plt.axis([0, t_idx - 1, 0, nb_samples])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('ess')

            # modified here add the plot for k
            plt.subplot(3, 2, 6)
            x = np.linspace(0.01, 10., 5000)
            plt.plot(x, norm.pdf(x, mean_k, std_k), 'k', linewidth=2)
            plt.hold(True)
            plt.plot([mean_k, mean_k], plt.gca().get_ylim(), 'k', linewidth=2)
            plt.hold(False)
            plt.xlabel('scaling parameter for softmax 1/[0 1]')
            plt.ylabel('pdf')

            plt.draw()
            plt.show()
            plt.pause(0.05)

    return [
        samples, mean_Q, esslist, acceptance_l, log_weights, p_loglkd,
        marg_loglkd_l
    ]
def SMC2(td,
         beta_softmax=1.,
         lambda_noise=.4,
         eta_noise=.1,
         epsilon_softmax=0.,
         noise_inertie=0.,
         numberOfStateSamples=200,
         numberOfThetaSamples=200,
         numberOfBetaSamples=20,
         coefficient=.5,
         latin_hyp_sampling=True):

    print('\n')
    print('Noisy Forward Model')
    print('number of theta samples ' + str(numberOfThetaSamples))
    print('\n')

    #Start timer
    start_time_multi = time.time()

    # uniform distribution
    if latin_hyp_sampling:
        d0 = uniform()
        print('latin hypercube sampling')
    else:
        print('sobolev sampling')

    # Extract parameters from task description
    stimuli = td['S']  # Sequence of Stimuli
    Z_true = td['Z']  # Sequence of Task Sets
    numberOfActions = td['action_num']  # Number of Actions possible
    numberOfStimuli = td['state_num']  # Number of states or stimuli
    rewards = td['reward']
    actions = td['A_chosen']
    K = np.prod(
        np.arange(numberOfActions +
                  1)[-numberOfStimuli:])  # Number of possible Task Sets
    numberOfTrials = len(Z_true)  # Number of Trials
    distances = np.zeros([numberOfThetaSamples, 1])
    # Sampling and prior settings
    betaPrior = np.array([1, 1])  # Prior on Beta, the feedback noise parameter
    gammaPrior = np.ones(K)  # Prior on Gamma, the Dirichlet parameter
    log_proba_ = 0.

    # verification
    if K == 2:
        if latin_hyp_sampling == False:
            raise ValueError(
                'Why did you change the latin_hyp_sampling? By default, it is True and has no influence when K=2.'
            )

    # Mapping from task set to correct action per stimulus
    mapping = get_mapping.Get_TaskSet_Stimulus_Mapping(
        state_num=numberOfStimuli, action_num=numberOfActions).T

    betaWeights = np.zeros(numberOfBetaSamples)
    betaLog = np.zeros(numberOfBetaSamples)
    logbetaWeights = np.zeros(numberOfBetaSamples)
    betaAncestors = np.arange(numberOfBetaSamples)

    # Probabilities of every actions updated at every time step -> Used to take the decision
    actionLikelihood = np.zeros([numberOfBetaSamples, numberOfActions])
    sum_actionLik = np.zeros(numberOfBetaSamples)
    filt_actionLkd = np.zeros(
        [numberOfTrials, numberOfBetaSamples, numberOfActions])

    # Keep track of probability correct/exploration after switches
    tsProbability = np.zeros([numberOfBetaSamples, K])
    sum_tsProbability = np.zeros(numberOfBetaSamples)
    dirichletParamCandidates = np.zeros(K)

    # SMC particles initialisation
    muSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples]
    )  #np.random.beta(betaPrior[0], betaPrior[1], [numberOfBetaSamples, numberOfThetaSamples])
    gammaSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K])

    if K == 24:
        try:
            latin_hyp_samples = pickle.load(
                open('../../utils/sobol_200_25.pkl', 'rb'))
        except:
            latin_hyp_samples = pickle.load(
                open('../../models/utils/sobol_200_25.pkl', 'rb'))
        for beta_idx in range(numberOfBetaSamples):
            if latin_hyp_sampling:
                latin_hyp_samples = mcerp.lhd(dist=d0,
                                              size=numberOfThetaSamples,
                                              dims=K + 1)
            muSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 0],
                                              betaPrior[0], betaPrior[1])
            gammaSamples[beta_idx] = gammalib.ppf(latin_hyp_samples[:, 1:],
                                                  gammaPrior)
            gammaSamples[beta_idx] = np.transpose(
                gammaSamples[beta_idx].T /
                np.sum(gammaSamples[beta_idx], axis=1))
    elif K == 2:
        muSamples = np.random.beta(betaPrior[0], betaPrior[1],
                                   [numberOfBetaSamples, numberOfThetaSamples])
        gammaSamples = np.random.dirichlet(
            gammaPrior, [numberOfBetaSamples, numberOfThetaSamples])
    else:
        raise IndexError('Wrong number of task sets')

    logThetaWeights = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    currentSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.intc)
    ancestorSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.intc)
    weightsList = np.ones([numberOfThetaSamples, numberOfStateSamples
                           ]) / numberOfStateSamples
    currentNoises = np.zeros([numberOfThetaSamples, numberOfStateSamples])

    log_proba_corr = 0.
    ante_proba_local = np.zeros(K)
    post_proba_local = np.zeros(K)
    sum_weightsList = np.zeros(numberOfThetaSamples)
    ancestorsIndexes = np.zeros(numberOfStateSamples, dtype=np.intc)
    gammaAdaptedProba = np.zeros(K)
    likelihoods = np.zeros(K)
    positiveStates = np.zeros(K, dtype=np.intc)

    # Guided SMC variables
    muSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    gammaSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K])
    logThetaWeightsNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    normalisedThetaWeights = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples])
    temperatures = np.zeros(numberOfBetaSamples)
    temperatureAncestors = np.zeros(numberOfBetaSamples) + .5

    # Loop over trials
    for T in range(numberOfTrials):

        # Print progress
        if (T + 1) % 10 == 0:
            sys.stdout.write(' ' + str(T + 1))
            sys.stdout.flush()
        if (T + 1) % 100 == 0: print('\n')

        for beta_idx in range(numberOfBetaSamples):

            ances = betaAncestors[beta_idx]

            temperatures[beta_idx] = smc_c.bootstrap_smc_step_c(logThetaWeights[beta_idx], distances, muSamples[ances]/2. + 1./2, lambda_noise, eta_noise, noise_inertie, gammaSamples[ances], currentSamples[beta_idx], ancestorSamples[ances], weightsList, \
                                            np.ascontiguousarray(mapping), stimuli[T-1], rewards[T-1], actions[T-1], T, likelihoods, positiveStates, ante_proba_local,\
                                            post_proba_local, ancestorsIndexes, gammaAdaptedProba, sum_weightsList, currentNoises, temperatureAncestors[ances])

            # Move step
            normalisedThetaWeights[
                beta_idx] = useful_functions.to_normalized_weights(
                    logThetaWeights[beta_idx])
            ess = 1. / np.sum(normalisedThetaWeights[beta_idx]**2)

            if (ess < coefficient * numberOfThetaSamples):
                acceptanceProba = 0.
                betaMu = np.sum(normalisedThetaWeights[beta_idx] *
                                muSamples[ances])
                betaVar = np.sum(normalisedThetaWeights[beta_idx] *
                                 (muSamples[ances] - betaMu)**2)
                betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2
                betaBeta = betaAlpha * (1 / betaMu - 1)
                assert (betaAlpha > 0)
                assert (betaBeta > 0)
                dirichletMeans = np.sum(normalisedThetaWeights[beta_idx] *
                                        gammaSamples[ances].T,
                                        axis=1)
                dirichletVar = np.sum(normalisedThetaWeights[beta_idx] *
                                      (gammaSamples[ances]**2).T,
                                      axis=1) - dirichletMeans**2
                dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2
                                            ) / (np.sum(dirichletVar)) - 1
                dirichletParamCandidates[:] = np.maximum(
                    dirichletMeans * dirichletPrecision, 1.)
                assert ((dirichletParamCandidates > 0).all())

                if K == 2:
                    muSamplesNew[beta_idx] = np.random.beta(
                        betaAlpha, betaBeta, numberOfThetaSamples)
                    gammaSamplesNew[beta_idx] = np.random.dirichlet(
                        dirichletParamCandidates, numberOfThetaSamples)
                if K == 24:
                    if latin_hyp_sampling:
                        latin_hyp_samples = mcerp.lhd(
                            dist=d0, size=numberOfThetaSamples, dims=K + 1)
                    muSamplesNew[beta_idx] = betalib.ppf(
                        latin_hyp_samples[:, 0], betaAlpha, betaBeta)
                    gammaSamplesNew[beta_idx] = gammalib.ppf(
                        latin_hyp_samples[:, 1:], dirichletParamCandidates)
                    gammaSamplesNew[beta_idx] = np.transpose(
                        gammaSamplesNew[beta_idx].T /
                        np.sum(gammaSamplesNew[beta_idx], axis=1))

                logThetaWeightsNew[beta_idx] = 0.
                normalisedThetaWeights[beta_idx] = 1. / numberOfThetaSamples
            else:
                muSamplesNew[beta_idx] = muSamples[ances]
                gammaSamplesNew[beta_idx] = gammaSamples[ances]
                logThetaWeightsNew[beta_idx] = logThetaWeights[beta_idx]

        # task set probability
        sum_tsProbability[:] = 0.
        for ts_idx in range(K):
            tsProbability[:, ts_idx] = np.sum(normalisedThetaWeights * np.sum(
                (currentSamples == ts_idx), axis=2),
                                              axis=1)
            sum_tsProbability += tsProbability[:, ts_idx]

        tsProbability[:] = np.transpose(tsProbability.T / sum_tsProbability)

        # Compute action likelihood
        sum_actionLik[:] = 0.
        for action_idx in range(numberOfActions):
            actionLikelihood[:, action_idx] = np.exp(
                np.log(
                    np.sum(tsProbability[:, mapping[stimuli[T].astype(int)] ==
                                         action_idx],
                           axis=1)) * beta_softmax)
            sum_actionLik += actionLikelihood[:, action_idx]

        rewards[T] = td['reward'][T]
        actions[T] = td['A_chosen'][T]

        actionLikelihood[:] = np.transpose(
            actionLikelihood.T / sum_actionLik) * (
                1 - epsilon_softmax) + epsilon_softmax / numberOfActions
        betaWeights[:] = actionLikelihood[:, actions[T].astype(int)]

        filt_actionLkd[T] = actionLikelihood

        log_proba_ += np.log(sum(betaWeights) / numberOfBetaSamples)
        betaWeights = betaWeights / sum(betaWeights)

        betaAncestors[:] = useful_functions.stratified_resampling(betaWeights)

        # update particles
        muSamples[:] = muSamplesNew
        gammaSamples[:] = gammaSamplesNew
        logThetaWeights[:] = logThetaWeightsNew[betaAncestors]
        ancestorSamples[:] = currentSamples
        temperatureAncestors[:] = temperatures

    elapsed_time = time.time() - start_time_multi

    return log_proba_
Exemplo n.º 9
0
def ibis(actions, rewards, tau, subj_idx, apply_rep_bias, show_progress = True, temperature = True, model_id = 0):

	'''
	model_id = 0 : 1 alpha, 1 beta
	model_id = 1 : n alpha, 1 beta
	model_id = 2 : n alpha, n beta 
	'''

	actions       = np.asarray(actions, dtype=np.intc)
	rewards       = np.ascontiguousarray(rewards)
	nb_samples    = 1000
	T             = actions.shape[0]
	upp_bound_eta = 10.

	# sample initialisation
	if model_id == 2:
		n_alpha    = 6
		n_beta     = 6
		tau_unique = np.unique(tau)
		x_coor_a   = np.array([np.where(tau_unique == t)[0][0] for t in tau])	
		x_coor_b   = np.array([np.where(tau_unique == t)[0][0] for t in tau]) + n_alpha		
	elif model_id == 1:
		n_alpha    = 6
		n_beta     = 1
		tau_unique = np.unique(tau)
		x_coor_a   = np.array([np.where(tau_unique == t)[0][0] for t in tau])
		x_coor_b   = np.zeros(len(tau), dtype=np.int8) + n_alpha
	else:
		n_alpha    = 1
		n_beta     = 1
		x_coor_a   = np.zeros(len(tau), dtype=np.int8)
		x_coor_b   = np.zeros(len(tau), dtype=np.int8) + n_alpha

	n_theta = n_alpha + n_beta

	if apply_rep_bias:
		n_theta += 1

	samples                = np.random.rand(nb_samples, n_theta)
	if temperature:
		upp_bound_beta     = .6
	else:
		upp_bound_beta     = 2.

	samples[:, n_alpha:(n_beta + n_alpha)] = np.random.rand(nb_samples, n_beta) * upp_bound_beta

	if apply_rep_bias:
		samples[:, -1] = upp_bound_eta * (np.random.rand(nb_samples) * 2. - 1.)

	
	Q_samples   = np.zeros([nb_samples, 2]) + .5
	prev_action = np.zeros(nb_samples) - 1

	# ibis param
	esslist       = np.zeros(T)
	log_weights   = np.zeros(nb_samples)
	weights_a     = np.zeros(nb_samples)
	p_loglkd      = np.zeros(nb_samples)
	loglkd        = np.zeros(nb_samples)
	marg_loglkd   = 0
	coefficient   = .5
	marg_loglkd_l = np.zeros(T)
	acceptance_l  = []

	# move step param
	move_samples = np.zeros([nb_samples, n_theta])
	move_p_loglkd    = np.zeros(nb_samples)
	Q_samples_move   = np.zeros([nb_samples, 2])
	prev_action_move = np.zeros(nb_samples)
	mean_Q           = np.zeros([T, 2])

	if show_progress : plt.figure(figsize=(15,9)); plt.suptitle("noiseless rl", fontsize=14); plt.ion()

	# loop
	for t_idx in range(T):
		#print t_idx

		if (t_idx+1) % 10 == 0 : sys.stdout.write(' ' + str(t_idx+1) + ' '); print 'marg_loglkd ' + str(marg_loglkd); 
		if (t_idx+1) % 100 == 0: print ('\n')
		# epsilon
		assert(len(np.unique(prev_action)) == 1)
		# update step
		weights_a[:] = log_weights

		for n_idx in range(nb_samples):
			alpha                  = samples[n_idx, x_coor_a[t_idx]]
			if temperature:
				beta                     = 1./samples[n_idx, x_coor_b[t_idx]]
			else:
				beta                     = 10**samples[n_idx, x_coor_b[t_idx]]
			if apply_rep_bias:
				eta                  = samples[n_idx, -1]
			if prev_action[n_idx] != -1 and apply_rep_bias:
				value              = 1./(1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1]) - np.sign(prev_action[n_idx] - .5) * eta))
				loglkd[n_idx]      = np.log(((value)**actions[t_idx]) * (1 - value)**((1 - actions[t_idx])))
				prev_action[n_idx] = actions[t_idx]
			else:
				value              = 1./(1. + np.exp(beta * (Q_samples[n_idx, 0] - Q_samples[n_idx, 1])))
				loglkd[n_idx]      = np.log(((value)**actions[t_idx]) * (1 - value)**((1 - actions[t_idx])))
				prev_action[n_idx] = actions[t_idx]	
			
			if np.isnan(loglkd[n_idx]):
				print t_idx
				print n_idx
				print beta
				print value
				raise Exception

			p_loglkd[n_idx]          = p_loglkd[n_idx] + loglkd[n_idx]
			log_weights[n_idx]       = log_weights[n_idx] + loglkd[n_idx]

			if actions[t_idx] == 0:
				Q_samples[n_idx, 0]          = (1 - alpha) * Q_samples[n_idx, 0] + alpha * rewards[t_idx]
				Q_samples[n_idx, 1]          = (1 - alpha) * Q_samples[n_idx, 1] + alpha * (1 - rewards[t_idx])
			else:
				Q_samples[n_idx, 0]          = (1 - alpha) * Q_samples[n_idx, 0] + alpha * (1 - rewards[t_idx])
				Q_samples[n_idx, 1]          = (1 - alpha) * Q_samples[n_idx, 1] + alpha * rewards[t_idx]

		marg_loglkd         += logsumexp(weights_a + loglkd) - logsumexp(weights_a)
		marg_loglkd_l[t_idx] = marg_loglkd
		ess                  = np.exp(2 * logsumexp(log_weights) - logsumexp(2 * log_weights))
		esslist[t_idx]       = ess

		weights_a[:]         = uf.to_normalized_weights(log_weights)
		mean_Q[t_idx]        = np.sum((Q_samples.T * weights_a).T, axis=0)

		# move step
		if ess < coefficient * nb_samples:
			idxTrajectories = uf.stratified_resampling(weights_a)
			mu_p            = np.sum(samples.T * weights_a, axis=1)
			Sigma_p         = np.dot((samples - mu_p).T * weights_a, (samples - mu_p))
			nb_acceptance   = 0.

			for n_idx in range(nb_samples):
				idx_traj = idxTrajectories[n_idx]
				while True:
					sample_p = multi_norm(mu_p, Sigma_p)
					if not apply_rep_bias:
						if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] > 0) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] <= upp_bound_beta):
							break
					else:
						if np.all(sample_p[:n_alpha] > 0) and np.all(sample_p[:n_alpha] < 1) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] > 0) and np.all(sample_p[n_alpha:(n_beta + n_alpha)] <= upp_bound_beta) and sample_p[-1] > -upp_bound_eta and sample_p[-1] < upp_bound_eta:
							break
				[loglkd_prop, Q_prop, prev_action_prop] = get_loglikelihood(sample_p, x_coor_a, x_coor_b, rewards, actions, t_idx + 1, apply_rep_bias, temperature) 
				log_ratio                               = loglkd_prop - p_loglkd[idx_traj] \
													         + get_logtruncnorm(samples[idx_traj], mu_p, Sigma_p) - get_logtruncnorm(sample_p, mu_p, Sigma_p)

				log_ratio = np.minimum(log_ratio, 0)
				if (np.log(np.random.rand()) < log_ratio):
					nb_acceptance          += 1.
					move_samples[n_idx]     = sample_p
					move_p_loglkd[n_idx]    = loglkd_prop
					Q_samples_move[n_idx]   = Q_prop
				else:
					move_samples[n_idx]     = samples[idx_traj]
					move_p_loglkd[n_idx]    = p_loglkd[idx_traj]
					Q_samples_move[n_idx]   = Q_samples[idx_traj]

			print 'acceptance ratio %s'%str(nb_acceptance/nb_samples)
			assert(prev_action_prop == prev_action[0])

			acceptance_l.append(nb_acceptance/nb_samples)
			# move samples
			samples[:]     = move_samples
			p_loglkd[:]    = move_p_loglkd
			log_weights[:] = 0.
			Q_samples[:]   = Q_samples_move

		if show_progress and t_idx%10==0 :
			weights_a[:]    = uf.to_normalized_weights(log_weights)

			plt.subplot(3,2,1)
			plt.plot(range(t_idx), mean_Q[:t_idx], 'm', linewidth=2);
			plt.hold(False)
			plt.xlabel('trials')
			plt.ylabel('Q values')

			if apply_rep_bias == 1:
				mean_rep = np.sum(weights_a * samples[:,2])
				std_rep  = np.sqrt(np.sum(weights_a * samples[:,2]**2) - mean_rep**2)
				plt.subplot(3,2,2)
				x = np.linspace(-2.,2.,5000)
				plt.plot(x, norm.pdf(x, mean_rep, std_rep), 'g'); plt.hold(True)
				plt.plot([mean_rep, mean_rep], plt.gca().get_ylim(),'g', linewidth=2)
				plt.hold(False)
				plt.xlabel('trials')
				plt.ylabel('rep param')

			if temperature:
				mean_beta = np.sum(weights_a * 1./samples[:, 1])
				std_beta  = np.sqrt(np.sum(weights_a * ((1./samples[:,1])**2)) - mean_beta**2)
			else:
				mean_beta = np.sum(weights_a * 10**samples[:, 1])
				std_beta  = np.sqrt(np.sum(weights_a * ((10**samples[:,1])**2)) - mean_beta**2)
			plt.subplot(3,2,3)
			x = np.linspace(0.01,200.,5000)
			plt.plot(x, norm.pdf(x, mean_beta, std_beta), 'g', linewidth=2); plt.hold(True)
			plt.plot([mean_beta, mean_beta], plt.gca().get_ylim(), 'g', linewidth=2)
			plt.hold(False)
			plt.xlabel('beta softmax')
			plt.ylabel('pdf')

			mean_alpha_0 = np.sum(weights_a * samples[:, 0])
			std_alpha_0  = np.sqrt(np.sum(weights_a * (samples[:, 0]**2)) - mean_alpha_0**2)
			plt.subplot(3,2,4)
			x = np.linspace(0.,1.,5000)
			plt.plot(x, norm.pdf(x, mean_alpha_0, std_alpha_0), 'm', linewidth=2); plt.hold(True)
			plt.plot([mean_alpha_0, mean_alpha_0], plt.gca().get_ylim(), 'm', linewidth=2)
			plt.hold(False)
			plt.xlabel('learning rate (majenta)')
			plt.ylabel('pdf')

			plt.subplot(3,2,5)
			plt.plot(range(t_idx), esslist[:t_idx], 'b', linewidth=2); plt.hold(True)
			plt.plot(plt.gca().get_xlim(), [nb_samples/2,  nb_samples/2],'b--', linewidth=2)
			plt.axis([0, t_idx-1, 0, nb_samples]) # For speed
			plt.hold(False)
			plt.xlabel('trials')
			plt.ylabel('ess')

			plt.draw()
			plt.show()
			plt.pause(0.05)
		
	return [samples, Q_samples, mean_Q, esslist, acceptance_l, log_weights, p_loglkd, marg_loglkd_l]