Пример #1
0
def SMC2(td,
         beta_softmax=1.,
         numberOfStateSamples=200,
         numberOfThetaSamples=200,
         numberOfBetaSamples=50,
         coefficient=.5,
         latin_hyp_sampling=True):

    print('\n')
    print('Forward Varying Volatility Model')
    print('number of theta samples ' + str(numberOfThetaSamples))
    print('\n')

    start_time_multi = time.time()

    # uniform distribution
    if latin_hyp_sampling:
        d0 = uniform()
        print('latin hypercube sampling')
    else:
        print('sobolev sampling')

    # Extract parameters from task description
    stimuli = td['S']  # Sequence of Stimuli
    numberOfActions = td['action_num']  # Number of Actions possible
    numberOfStimuli = td['state_num']  # Number of states or stimuli
    rewards = td['reward']
    actions = td['A_chosen']
    K = np.prod(
        np.arange(numberOfActions +
                  1)[-numberOfStimuli:])  # Number of possible Task Sets
    numberOfTrials = len(stimuli)  # Number of Trials

    # verification
    if K == 2:
        if latin_hyp_sampling == False:
            raise ValueError(
                'Why did you change the latin_hyp_sampling? By default, it is True and has no influence when K=2.'
            )

    # Sampling and prior settings
    betaPrior = np.array([1, 1])  # Prior on Beta, the feedback noise parameter
    nuPrior = np.array([
        3, 1e-3
    ])  # Prior on Nu, the variance on the projected gaussian random walk
    gammaPrior = numpy.ones(K)  # Prior on Gamma, the Dirichlet parameter
    try:
        tauDefault = td['tau'][0]
    except:
        tauDefault = td['tau']
    log_proba_ = 0.

    # Mapping from task set to correct action per stimulus
    mapping = get_mapping.Get_TaskSet_Stimulus_Mapping(
        state_num=numberOfStimuli, action_num=numberOfActions).T

    betaWeights = np.zeros(numberOfBetaSamples)
    betaAncestors = np.arange(numberOfBetaSamples)

    # Probabilities of every actions updated at every time step -> Used to take the decision
    actionLikelihood = np.zeros([numberOfBetaSamples, numberOfActions])
    sum_actionLik = np.zeros(numberOfBetaSamples)
    filt_actionLkd = np.zeros(
        [numberOfTrials, numberOfBetaSamples, numberOfActions])

    # Keep track of probability correct/exploration after switches
    tsProbability = np.zeros([numberOfBetaSamples, K])
    sum_tsProbability = np.zeros(numberOfBetaSamples)

    # SMC particles initialisation
    muSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    nuSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    gammaSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K])

    if K == 24:
        try:
            latin_hyp_samples = pickle.load(
                open('../../utils/sobol_200_26.pkl', 'rb'))
        except:
            latin_hyp_samples = pickle.load(
                open('../../models/utils/sobol_200_26.pkl', 'rb'))
        for beta_idx in range(numberOfBetaSamples):
            if latin_hyp_sampling:
                latin_hyp_samples = mcerp.lhd(dist=d0,
                                              size=numberOfThetaSamples,
                                              dims=K + 2)
            muSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 0],
                                              betaPrior[0], betaPrior[1])
            nuSamples[beta_idx] = useful_functions.ppf_inv_gamma(
                latin_hyp_samples[:, 1], nuPrior[0], nuPrior[1])
            gammaSamples[beta_idx] = gammalib.ppf(latin_hyp_samples[:, 2:],
                                                  gammaPrior)
            gammaSamples[beta_idx] = np.transpose(
                gammaSamples[beta_idx].T /
                np.sum(gammaSamples[beta_idx], axis=1))
    elif K == 2:
        muSamples = np.random.beta(betaPrior[0], betaPrior[1],
                                   [numberOfBetaSamples, numberOfThetaSamples])
        nuSamples = useful_functions.sample_inv_gamma(
            nuPrior[0], nuPrior[1],
            [numberOfBetaSamples, numberOfThetaSamples])
        gammaSamples = np.random.dirichlet(
            gammaPrior, [numberOfBetaSamples, numberOfThetaSamples])
    else:
        raise IndexError('Wrong number of task sets')

    muSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    nuSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    gammaSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K])
    logThetaWeightsNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    normalisedThetaWeights = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples])

    logThetaWeights = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    currentStateSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.intc)
    currentTauSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.double)
    ancestorStateSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.intc)
    ancestorTauSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.double)
    ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples
                                ]) / numberOfStateSamples
    essList = np.zeros(numberOfTrials)

    # Guided SMC variables
    dirichletParamCandidates = np.zeros(K)

    # Loop over trials
    for T in range(numberOfTrials):

        # Print progress
        if (T + 1) % 10 == 0:
            sys.stdout.write(' ' + str(T + 1))
            sys.stdout.flush()
        if (T + 1) % 100 == 0: print('\n')

        for beta_idx in range(numberOfBetaSamples):

            ances = betaAncestors[beta_idx]
            # Update theta weights
            smc_c.bootstrapUpdateStep_c(currentStateSamples[beta_idx], logThetaWeights[beta_idx], currentTauSamples[beta_idx], gammaSamples[ances], muSamples[ances]/2. + 1./2, nuSamples[ances], tauDefault, T, \
                                            np.ascontiguousarray(ancestorStateSamples[ances], dtype=np.intc), ancestorTauSamples[ances], ancestorsWeights, np.ascontiguousarray(mapping), stimuli[T-1], actions[T-1], rewards[T-1])

            # Degeneray criterion
            logEss = 2 * useful_functions.log_sum(
                logThetaWeights[beta_idx]) - useful_functions.log_sum(
                    2 * logThetaWeights[beta_idx])
            essList[T] = np.exp(logEss)

            # Move step
            normalisedThetaWeights[
                beta_idx] = useful_functions.to_normalized_weights(
                    logThetaWeights[beta_idx])
            if (essList[T] < coefficient * numberOfThetaSamples):
                betaMu = np.sum(normalisedThetaWeights[beta_idx] *
                                muSamples[ances])
                betaVar = np.sum(normalisedThetaWeights[beta_idx] *
                                 (muSamples[ances] - betaMu)**2)
                betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2
                betaBeta = betaAlpha * (1 / betaMu - 1)
                assert (betaAlpha > 0)
                assert (betaBeta > 0)
                nuMu = np.sum(normalisedThetaWeights[beta_idx] *
                              nuSamples[ances])
                nuVar = np.sum(normalisedThetaWeights[beta_idx] *
                               (nuSamples[ances] - nuMu)**2)
                nuAlpha = nuMu**2 / nuVar + 2
                nuBeta = nuMu * (nuAlpha - 1)
                assert (nuAlpha > 0)
                assert (nuBeta > 0)
                dirichletMeans = np.sum(normalisedThetaWeights[beta_idx] *
                                        gammaSamples[ances].T,
                                        axis=1)
                dirichletVar = np.sum(normalisedThetaWeights[beta_idx] *
                                      (gammaSamples[ances]**2).T,
                                      axis=1) - dirichletMeans**2
                dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2
                                            ) / (np.sum(dirichletVar)) - 1
                dirichletParamCandidates[:] = np.maximum(
                    dirichletMeans * dirichletPrecision, 1.)
                assert ((dirichletParamCandidates > 0).all())
                if K == 2:
                    nuSamplesNew[beta_idx] = useful_functions.sample_inv_gamma(
                        nuAlpha, nuBeta, numberOfThetaSamples)
                    muSamplesNew[beta_idx] = np.random.beta(
                        betaAlpha, betaBeta, numberOfThetaSamples)
                    gammaSamplesNew[beta_idx] = np.random.dirichlet(
                        dirichletParamCandidates, numberOfThetaSamples)
                elif K == 24:
                    if latin_hyp_sampling:
                        latin_hyp_samples = mcerp.lhd(
                            dist=d0, size=numberOfThetaSamples, dims=K + 2)
                    muSamplesNew[beta_idx] = betalib.ppf(
                        latin_hyp_samples[:, 0], betaAlpha, betaBeta)
                    nuSamplesNew[beta_idx] = useful_functions.ppf_inv_gamma(
                        latin_hyp_samples[:, 1], nuAlpha, nuBeta)
                    gammaSamplesNew[beta_idx] = gammalib.ppf(
                        latin_hyp_samples[:, 2:], dirichletParamCandidates)
                    gammaSamplesNew[beta_idx] = np.transpose(
                        gammaSamplesNew[beta_idx].T /
                        np.sum(gammaSamplesNew[beta_idx], axis=1))

                logThetaWeightsNew[beta_idx] = 0.
                normalisedThetaWeights[beta_idx] = 1. / numberOfThetaSamples

            else:
                muSamplesNew[beta_idx] = muSamples[ances]
                gammaSamplesNew[beta_idx] = gammaSamples[ances]
                nuSamplesNew[beta_idx] = nuSamples[ances]
                logThetaWeightsNew[beta_idx] = logThetaWeights[beta_idx]

        # task set probability
        sum_tsProbability[:] = 0.
        for ts_idx in range(K):
            tsProbability[:, ts_idx] = np.sum(normalisedThetaWeights * np.sum(
                (currentStateSamples == ts_idx), axis=2),
                                              axis=1)
            sum_tsProbability += tsProbability[:, ts_idx]

        tsProbability[:] = np.transpose(tsProbability.T / sum_tsProbability)

        # Compute action likelihood
        sum_actionLik[:] = 0.
        for action_idx in range(numberOfActions):
            actionLikelihood[:, action_idx] = np.exp(
                np.log(
                    np.sum(tsProbability[:, mapping[stimuli[T].astype(int)] ==
                                         action_idx],
                           axis=1)) * beta_softmax)
            sum_actionLik += actionLikelihood[:, action_idx]

        rewards[T] = td['reward'][T]
        actions[T] = td['A_chosen'][T]

        actionLikelihood[:] = np.transpose(actionLikelihood.T / sum_actionLik)
        betaWeights[:] = actionLikelihood[:, actions[T].astype(int)]

        filt_actionLkd[T] = actionLikelihood

        log_proba_ += np.log(sum(betaWeights) / numberOfBetaSamples)
        betaWeights = betaWeights / sum(betaWeights)

        betaAncestors[:] = useful_functions.stratified_resampling(betaWeights)

        # update particles
        muSamples[:] = muSamplesNew
        gammaSamples[:] = gammaSamplesNew
        nuSamples[:] = nuSamplesNew
        logThetaWeights[:] = logThetaWeightsNew[betaAncestors]
        ancestorTauSamples[:] = currentTauSamples
        ancestorStateSamples[:] = currentStateSamples

    elapsed_time = time.time() - start_time_multi

    return log_proba_, filt_actionLkd
def SMC2(td,
         show_progress=False,
         numberOfStateSamples=1000,
         numberOfThetaSamples=1000,
         coefficient=.5):

    print('\n')
    print('Constant Volatility Model')
    print('\n')

    #Start timer
    start_time_multi = time.time()

    # Extract parameters from task description
    stimuli = td['S']  # Sequence of Stimuli
    Z_true = td['Z']  # Sequence of Task Sets
    numberOfActions = td['action_num']  # Number of Actions possible
    numberOfStimuli = td['state_num']  # Number of states or stimuli
    K = np.prod(
        np.arange(numberOfActions +
                  1)[-numberOfStimuli:])  # Number of possible Task Sets
    numberOfTrials = len(Z_true)  # Number of Trials

    # Sampling and prior settings
    betaPrior = np.array([1, 1])  # Prior on Beta, the feedback noise parameter
    tauPrior = np.array(
        [1, 1])  # Prior on Tau, the switch parameter (the volatility)
    gammaPrior = numpy.ones(K)  # Prior on Gamma, the Dirichlet parameter

    # Mapping from task set to correct action per stimulus
    mapping = get_mapping.Get_TaskSet_Stimulus_Mapping(
        state_num=numberOfStimuli, action_num=numberOfActions).T

    actions = np.zeros(numberOfTrials) - 1
    rewards = np.zeros(numberOfTrials, dtype=bool)

    # Keep track of probability correct/exploration after switches
    countPerformance = np.zeros(
        numberOfTrials)  # Number of correct actions after i trials
    countExploration = np.zeros(
        numberOfTrials)  # Number of exploratory actions after i trials
    correct_before_switch = np.empty(0)  # The correct task set before switch
    tsProbability = np.zeros([numberOfTrials, K])
    acceptanceProba = 0.
    volTracking = np.zeros(numberOfTrials)
    volStdTracking = np.zeros(numberOfTrials)
    betaTracking = np.zeros(numberOfTrials)
    betaStdTracking = np.zeros(numberOfTrials)
    acceptance_list = [1.]
    time_list = [start_time_multi]

    # SMC particles initialisation
    betaSamples = np.random.beta(betaPrior[0], betaPrior[1],
                                 numberOfThetaSamples)
    tauSamples = np.random.beta(tauPrior[0], tauPrior[1], numberOfThetaSamples)
    gammaSamples = np.random.dirichlet(gammaPrior, numberOfThetaSamples)
    logThetaWeights = np.zeros(numberOfThetaSamples)
    logThetaLks = np.zeros(numberOfThetaSamples)
    currentSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples],
                              dtype=np.intc)
    ancestorSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples],
                               dtype=np.intc)
    ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples
                                ]) / numberOfStateSamples
    unnormalisedAncestorsWeights = np.ones(
        [numberOfThetaSamples, numberOfStateSamples])
    essList = np.zeros(numberOfTrials)
    tasksetLikelihood = np.zeros(K)

    # Guided SMC variables
    betaSamplesNew = np.zeros(numberOfThetaSamples)
    tauSamplesNew = np.zeros(numberOfThetaSamples)
    gammaSamplesNew = np.zeros([numberOfThetaSamples, K])
    stateSamplesNew = np.zeros([numberOfThetaSamples, numberOfStateSamples],
                               dtype=np.intc)
    weightsSamplesNew = np.zeros([numberOfThetaSamples, numberOfStateSamples])
    logThetaLksNew = np.zeros(numberOfThetaSamples)
    dirichletParamCandidates = np.zeros(K)
    stateSamplesCandidates = np.zeros(numberOfStateSamples, dtype=np.intc)
    weightsSamplesCandidates = np.zeros(numberOfStateSamples)
    idxTrajectories = np.zeros(numberOfThetaSamples)

    # Plot progress
    if show_progress: plt.figure(figsize=(12, 9))

    # Loop over trials
    for T in range(numberOfTrials):

        # Print progress
        if (T + 1) % 10 == 0:
            sys.stdout.write(' ' + str(T + 1))
            sys.stdout.flush()
            time_list.append(time.time() - start_time_multi)
        if (T + 1) % 100 == 0: print('\n')

        if T > 0:
            smc_c.guidedUpdateStep_c(logThetaLks, logThetaWeights, np.ascontiguousarray(currentSamples), gammaSamples, betaSamples/2. + 1./2, tauSamples/2., T, np.ascontiguousarray(ancestorSamples), ancestorsWeights, \
                                                np.ascontiguousarray(mapping), stimuli[T-2], stimuli[T-1], rewards[T-1], actions[T-1])
            ancestorSamples = np.array(currentSamples)

        # Degeneray criterion
        logEss = 2 * useful_functions.log_sum(
            logThetaWeights) - useful_functions.log_sum(2 * logThetaWeights)
        essList[T] = np.exp(logEss)

        # Move step
        normalisedThetaWeights = useful_functions.to_normalized_weights(
            logThetaWeights)
        if (essList[T] < coefficient * numberOfThetaSamples) and (
                acceptance_list[-1] > 0.05):
            acceptanceProba = 0.
            tauMu = np.sum(normalisedThetaWeights * tauSamples)
            tauVar = np.sum(normalisedThetaWeights * (tauSamples - tauMu)**2)
            tauAlpha = ((1 - tauMu) / tauVar - 1 / tauMu) * tauMu**2
            tauBeta = tauAlpha * (1 / tauMu - 1)
            assert (tauAlpha > 0)
            assert (tauBeta > 0)
            betaMu = np.sum(normalisedThetaWeights * betaSamples)
            betaVar = np.sum(normalisedThetaWeights *
                             (betaSamples - betaMu)**2)
            betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2
            betaBeta = betaAlpha * (1 / betaMu - 1)
            assert (betaAlpha > 0)
            assert (betaBeta > 0)
            dirichletMeans = np.sum(normalisedThetaWeights * gammaSamples.T,
                                    axis=1)
            dirichletVar = np.sum(normalisedThetaWeights * (gammaSamples**2).T,
                                  axis=1) - dirichletMeans**2
            dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2) / (
                np.sum(dirichletVar)) - 1
            dirichletParamCandidates = dirichletMeans * dirichletPrecision
            assert ((dirichletParamCandidates > 0).all())

            idxTrajectories = useful_functions.stratified_resampling(
                normalisedThetaWeights)

            for theta_idx in range(numberOfThetaSamples):
                tauCandidate = np.random.beta(tauAlpha, tauBeta)
                betaCandidate = np.random.beta(betaAlpha, betaBeta)
                gammaCandidate = np.random.dirichlet(dirichletParamCandidates)

                # Launch guidedSMC
                logLksCandidate                = smc_c.guidedSmc_c(np.ascontiguousarray(stateSamplesCandidates), weightsSamplesCandidates, gammaCandidate, betaCandidate/2. + 1./2, tauCandidate/2., np.ascontiguousarray(mapping), \
                                                            np.ascontiguousarray(stimuli[:T], dtype=np.intc), np.ascontiguousarray(rewards[:T], dtype=np.intc), np.ascontiguousarray(actions[:T], dtype=np.intc), numberOfStateSamples)

                # Update a trajectory
                idx_traj = idxTrajectories[theta_idx]

                priorsLogRatio = useful_functions.log_dirichlet_pdf(
                    gammaCandidate,
                    gammaPrior) - useful_functions.log_dirichlet_pdf(
                        gammaSamples[idx_traj], gammaPrior)

                transLogRatio                  = useful_functions.log_beta_pdf(tauSamples[idx_traj], tauAlpha, tauBeta) + useful_functions.log_beta_pdf(betaSamples[idx_traj], betaAlpha, betaBeta) + useful_functions.log_dirichlet_pdf(gammaSamples[idx_traj], dirichletParamCandidates) - \
                                                        useful_functions.log_beta_pdf(tauCandidate, tauAlpha, tauBeta) - useful_functions.log_beta_pdf(betaCandidate, betaAlpha, betaBeta) - useful_functions.log_dirichlet_pdf(gammaCandidate, dirichletParamCandidates)

                logLkdRatio = logLksCandidate - logThetaLks[idx_traj]

                logAlpha = min(0, priorsLogRatio + transLogRatio + logLkdRatio)

                U = np.random.rand()

                # Accept or Reject
                if np.log(U) < logAlpha:
                    acceptanceProba += 1.
                    betaSamplesNew[theta_idx] = betaCandidate
                    tauSamplesNew[theta_idx] = tauCandidate
                    gammaSamplesNew[theta_idx] = gammaCandidate
                    stateSamplesNew[theta_idx] = stateSamplesCandidates
                    logThetaLksNew[theta_idx] = logLksCandidate
                    weightsSamplesNew[theta_idx] = weightsSamplesCandidates
                else:
                    betaSamplesNew[theta_idx] = betaSamples[idx_traj]
                    tauSamplesNew[theta_idx] = tauSamples[idx_traj]
                    gammaSamplesNew[theta_idx] = gammaSamples[idx_traj]
                    stateSamplesNew[theta_idx] = ancestorSamples[idx_traj]
                    logThetaLksNew[theta_idx] = logThetaLks[idx_traj]
                    weightsSamplesNew[theta_idx] = ancestorsWeights[idx_traj]

            print('\n')
            print('acceptance ratio is ')
            print(acceptanceProba / numberOfThetaSamples)
            print('\n')
            acceptance_list.append(acceptanceProba / numberOfThetaSamples)

            ancestorsWeights = np.array(weightsSamplesNew)
            logThetaLks = np.array(logThetaLksNew)
            logThetaWeights = np.zeros(numberOfThetaSamples)
            ancestorSamples = np.array(stateSamplesNew)
            betaSamples = np.array(betaSamplesNew)
            tauSamples = np.array(tauSamplesNew)
            gammaSamples = np.array(gammaSamplesNew)
            normalisedThetaWeights = useful_functions.to_normalized_weights(
                logThetaWeights)

        # Launch bootstrap update
        smc_c.bootstrapUpdateStep_c(np.ascontiguousarray(currentSamples),
                                    gammaSamples, betaSamples / 2. + 1. / 2,
                                    tauSamples / 2., T,
                                    np.ascontiguousarray(ancestorSamples),
                                    ancestorsWeights,
                                    np.ascontiguousarray(mapping),
                                    stimuli[T - 1])

        # Take decision
        for ts_idx in range(K):
            tsProbability[T, ts_idx] = np.sum(normalisedThetaWeights * np.sum(
                (currentSamples == ts_idx), axis=1))
        # Select action and compute vol
        volTracking[T] = np.sum(normalisedThetaWeights * tauSamples)
        volStdTracking[T] = np.sum(normalisedThetaWeights *
                                   (tauSamples - volTracking[T])**2)

        betaTracking[T] = np.sum(normalisedThetaWeights * betaSamples)
        betaStdTracking[T] = np.sum(normalisedThetaWeights *
                                    (betaSamples - betaTracking[T])**2)

        rewards[T] = td['reward'][T]
        actions[T] = td['A_chosen'][T]

        if show_progress:
            plt.subplot(3, 2, 1)
            plt.imshow(tsProbability[:T].T, aspect='auto')
            plt.hold(True)
            plt.plot(Z_true[:T], 'w--')
            plt.axis([0, T - 1, 0, K - 1])  # For speed
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('p(TS|past) at current time')

            plt.subplot(3, 2, 2)
            plt.plot(volTracking[:T], 'b')
            plt.hold(True)
            plt.fill_between(np.arange(T),
                             volTracking[:T] - volStdTracking[:T],
                             volTracking[:T] + volStdTracking[:T],
                             facecolor=[.5, .5, 1],
                             color=[.5, .5, 1])
            plt.plot(td['tau'], 'b--', linewidth=2)
            plt.axis([0, T - 1, 0, .5])  # For speed
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('Volatility')

            plt.subplot(3, 2, 3)
            x = np.linspace(0.01, .99, 100)
            plt.plot(x, normlib.pdf(x, betaTracking[T], betaStdTracking[T]),
                     'r')
            plt.hold(True)
            plt.plot([betaTracking[T], betaTracking[T]],
                     plt.gca().get_ylim(),
                     'r',
                     linewidth=2)
            plt.plot([td['beta'], td['beta']],
                     plt.gca().get_ylim(),
                     'r--',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('Parameters')
            plt.ylabel('Gaussian pdf')

            plt.subplot(3, 2, 4)
            plt.plot(np.arange(T) + 1, essList[:T], 'g', linewidth=2)
            plt.hold(True)
            plt.plot(plt.gca().get_xlim(), [
                coefficient * numberOfThetaSamples,
                coefficient * numberOfThetaSamples
            ],
                     'g--',
                     linewidth=2)
            plt.axis([0, T - 1, 0, numberOfThetaSamples])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('ESS')

            plt.subplot(3, 2, 5)
            plt.plot(np.divide(countPerformance[:T],
                               np.arange(T) + 1),
                     'k--',
                     linewidth=2)
            plt.hold(True)
            plt.axis([0, T - 1, 0, 1])
            plt.hold(False)
            plt.xlabel('Trials')
            plt.ylabel('Performance')

            plt.draw()

    elapsed_time = time.time() - start_time_multi

    return [
        td, tauSamples, volTracking, volStdTracking, betaSamples, betaTracking,
        betaStdTracking, gammaSamples, tsProbability, countPerformance,
        actions, acceptance_list, essList, time_list, elapsed_time
    ]
Пример #3
0
def SMC2(td,
         show_progress=True,
         numberOfStateSamples=1000,
         numberOfThetaSamples=1000,
         coefficient=.5,
         beta_softmax=None):

    print('Varying Volatility Model')
    print('number of theta samples ' + str(numberOfThetaSamples))
    print('\n')

    #Start timer
    start_time_multi = time.time()

    # Extract parameters from task description
    stimuli = td['S']  # Sequence of Stimuli
    Z_true = td['Z']  # Sequence of Task Sets
    numberOfActions = td['action_num']  # Number of Actions possible
    numberOfStimuli = td['state_num']  # Number of states or stimuli
    K = np.prod(
        np.arange(numberOfActions +
                  1)[-numberOfStimuli:])  # Number of possible Task Sets
    numberOfTrials = len(Z_true)  # Number of Trials

    # Sampling and prior settings
    betaPrior = np.array([1, 1])  # Prior on Beta, the feedback noise parameter
    nuPrior = np.array([
        3, 1e-3
    ])  # Prior on Nu, the variance on the projected gaussian random walk
    gammaPrior = numpy.ones(K)  # Prior on Gamma, the Dirichlet parameter
    try:
        tauDefault = td['tau'][0]
    except:
        tauDefault = td['tau']

    # Mapping from task set to correct action per stimulus
    mapping = get_mapping.Get_TaskSet_Stimulus_Mapping(
        state_num=numberOfStimuli, action_num=numberOfActions).T

    # Probabilities of every actions updated at every time step -> Used to take the decision
    actionLikelihood = np.zeros(
        numberOfActions
    )  # For 1 observation, likelihood of the action. Requires a marginalisation over all task sets
    actions = np.zeros(numberOfTrials) - 1
    rewards = np.zeros(numberOfTrials, dtype=bool)

    # Keep track of probability correct/exploration after switches
    countPerformance = np.zeros(
        numberOfTrials)  # Number of correct actions after i trials
    countExploration = np.zeros(
        numberOfTrials)  # Number of exploratory actions after i trials
    correct_before_switch = np.empty(0)  # The correct task set before switch
    tsProbability = np.zeros([numberOfTrials, K])
    volTracking = np.zeros(numberOfTrials)  # Volatility with time
    volStdTracking = np.zeros(numberOfTrials)
    nuTracking = np.zeros(numberOfTrials)
    nuStdTracking = np.zeros(numberOfTrials)
    betaTracking = np.zeros(numberOfTrials)
    betaStdTracking = np.zeros(numberOfTrials)
    acceptanceProba = 0.  # Acceptance proba
    time_list = [start_time_multi]

    # SMC particles initialisation
    betaSamples = np.random.beta(betaPrior[0], betaPrior[1],
                                 numberOfThetaSamples)
    nuSamples = useful_functions.sample_inv_gamma(nuPrior[0], nuPrior[1],
                                                  numberOfThetaSamples)
    gammaSamples = np.random.dirichlet(gammaPrior, numberOfThetaSamples)
    logThetaWeights = np.zeros(numberOfThetaSamples)
    currentStateSamples = np.zeros(
        [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc)
    currentTauSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples],
                                 dtype=np.double)
    ancestorStateSamples = np.zeros(
        [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc)
    ancestorTauSamples = np.zeros([numberOfThetaSamples, numberOfStateSamples],
                                  dtype=np.double)
    ancestorsWeights = np.ones([numberOfThetaSamples, numberOfStateSamples
                                ]) / numberOfStateSamples
    unnormalisedAncestorsWeights = np.ones(
        [numberOfThetaSamples, numberOfStateSamples])
    essList = np.zeros(numberOfTrials)
    tasksetLikelihood = np.zeros(K)

    # Guided SMC variables
    dirichletParamCandidates = np.zeros(K)

    # Plot progress
    if show_progress:
        plt.figure(figsize=(12, 9))
        plt.ion()

    # Loop over trials
    for T in range(numberOfTrials):

        # Print progress
        if (T + 1) % 10 == 0:
            sys.stdout.write(' ' + str(T + 1))
            sys.stdout.flush()
            time_list.append(time.time() - start_time_multi)
        if (T + 1) % 100 == 0: print('\n')

        # Update theta weights
        smc_c.bootstrapUpdateStep_c(currentStateSamples, logThetaWeights, currentTauSamples, gammaSamples, betaSamples/2. + 1/2., nuSamples, tauDefault, T, \
                                        np.ascontiguousarray(ancestorStateSamples, dtype=np.intc), ancestorTauSamples, ancestorsWeights, np.ascontiguousarray(mapping), stimuli[T-1], actions[T-1], rewards[T-1])

        ancestorTauSamples = np.array(currentTauSamples)
        ancestorStateSamples = np.array(currentStateSamples)

        # Degeneray criterion
        logEss = 2 * useful_functions.log_sum(
            logThetaWeights) - useful_functions.log_sum(2 * logThetaWeights)
        essList[T] = np.exp(logEss)

        # Move step
        normalisedThetaWeights = useful_functions.to_normalized_weights(
            logThetaWeights)
        if (essList[T] < coefficient * numberOfThetaSamples):
            acceptanceProba = 0.
            betaMu = np.sum(normalisedThetaWeights * betaSamples)
            betaVar = np.sum(normalisedThetaWeights *
                             (betaSamples - betaMu)**2)
            betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2
            betaBeta = betaAlpha * (1 / betaMu - 1)
            assert (betaAlpha > 0)
            assert (betaBeta > 0)
            nuMu = np.sum(normalisedThetaWeights * nuSamples)
            nuVar = np.sum(normalisedThetaWeights * (nuSamples - nuMu)**2)
            nuAlpha = nuMu**2 / nuVar + 2
            nuBeta = nuMu * (nuAlpha - 1)
            assert (nuAlpha > 0)
            assert (nuBeta > 0)
            dirichletMeans = np.sum(normalisedThetaWeights * gammaSamples.T,
                                    axis=1)
            dirichletVar = np.sum(normalisedThetaWeights * (gammaSamples**2).T,
                                  axis=1) - dirichletMeans**2
            dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2) / (
                np.sum(dirichletVar)) - 1
            dirichletParamCandidates = np.maximum(
                dirichletMeans * dirichletPrecision, 1.)
            assert ((dirichletParamCandidates > 0).all())

            nuSamples = useful_functions.sample_inv_gamma(
                nuAlpha, nuBeta, numberOfThetaSamples)
            betaSamples = np.random.beta(betaAlpha, betaBeta,
                                         numberOfThetaSamples)
            gammaSamples = np.random.dirichlet(dirichletParamCandidates,
                                               numberOfThetaSamples)
            logThetaWeights[:] = 0

            normalisedThetaWeights = useful_functions.to_normalized_weights(
                logThetaWeights)

        # Take decision
        for ts_idx in range(K):
            tsProbability[T, ts_idx] = np.sum(normalisedThetaWeights * np.sum(
                (currentStateSamples == ts_idx),
                axis=1))  # Todo : change!!! take out currentAncestorsWeights

        if beta_softmax is None:
            # Compute action likelihood
            for action_idx in range(numberOfActions):
                actionLikelihood[action_idx] = np.sum(
                    tsProbability[T, mapping[stimuli[T]] == action_idx])

            # Select action
            actions[T] = np.argmax(actionLikelihood)

        else:
            # Compute action likelihood
            tsProbability[T] /= sum(tsProbability[T])

            for action_idx in range(numberOfActions):
                actionLikelihood[action_idx] = np.exp(
                    np.log(
                        np.sum(tsProbability[
                            T, mapping[stimuli[T].astype(int)] == action_idx]))
                    * beta_softmax)

            actionLikelihood /= sum(actionLikelihood)

            # Select action
            actions[T] = np.where(
                np.random.multinomial(1, actionLikelihood, size=1)[0])[0][0]

        # Select action and compute vol, nu, beta for tracking
        volTracking[T] = np.sum(
            normalisedThetaWeights *
            (np.sum(currentTauSamples, axis=1) / numberOfStateSamples))
        volStdTracking[T] = np.sum(normalisedThetaWeights *
                                   (np.sum(currentTauSamples**2, axis=1) /
                                    numberOfStateSamples)) - volTracking[T]**2
        nuTracking[T] = np.sum(normalisedThetaWeights * nuSamples)
        nuStdTracking[T] = np.sum(normalisedThetaWeights *
                                  (nuSamples - nuTracking[T])**2)

        betaTracking[T] = np.sum(normalisedThetaWeights * betaSamples)
        betaStdTracking[T] = np.sum(normalisedThetaWeights *
                                    (betaSamples - betaTracking[T])**2)

        # Update performance
        if K == 2:
            assert (mapping[stimuli[T].astype(int),
                            Z_true[T].astype(int)] == Z_true[T])
        if (K == 2) and (actions[T] == mapping[stimuli[T].astype(int),
                                               Z_true[T].astype(int)]):
            rewards[T] = not td['trap'][T]
            countPerformance[T:] += 1
        elif (K == 24) and (actions[T] == td['A_correct'][T]):
            rewards[T] = not td['trap'][T]
            countPerformance[T:] += 1
        else:
            rewards[T] = td['trap'][T]

        if show_progress:
            plt.subplot(3, 2, 1)
            plt.imshow(tsProbability[:T].T, aspect='auto')
            plt.hold(True)
            plt.plot(Z_true[:T], 'w--')
            plt.axis([0, T - 1, 0, K - 1])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('p(TS|past) at current time')

            plt.subplot(3, 2, 2)
            plt.plot(volTracking[:T], 'b')
            plt.hold(True)
            plt.fill_between(np.arange(T),
                             volTracking[:T] - volStdTracking[:T],
                             volTracking[:T] + volStdTracking[:T],
                             facecolor=[.5, .5, 1],
                             color=[.5, .5, 1])
            plt.plot(td['tau'], 'b--', linewidth=2)
            plt.axis([0, T - 1, 0, .5])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('Volatility')

            plt.subplot(3, 2, 3)
            x = np.linspace(0.01, .99, 100)
            plt.plot(x, normlib.pdf(x, nuTracking[T], nuStdTracking[T]), 'b')
            plt.hold(True)
            plt.plot([nuTracking[T], nuTracking[T]],
                     plt.gca().get_ylim(),
                     'b',
                     linewidth=2)
            plt.plot(x, normlib.pdf(x, betaTracking[T], betaStdTracking[T]),
                     'r')
            plt.plot([betaTracking[T], betaTracking[T]],
                     plt.gca().get_ylim(),
                     'r',
                     linewidth=2)
            plt.plot([td['beta'], td['beta']],
                     plt.gca().get_ylim(),
                     'r--',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('Parameters')
            plt.ylabel('Gaussian pdf')

            plt.subplot(3, 2, 4)
            plt.plot(np.arange(T) + 1, essList[:T], 'g', linewidth=2)
            plt.hold(True)
            plt.plot(plt.gca().get_xlim(), [
                coefficient * numberOfThetaSamples,
                coefficient * numberOfThetaSamples
            ],
                     'g--',
                     linewidth=2)
            plt.axis([0, T - 1, 0, numberOfThetaSamples])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('ESS')

            plt.subplot(3, 2, 5)
            plt.plot(np.divide(countPerformance[:T],
                               np.arange(T) + 1),
                     'k--',
                     linewidth=2)
            plt.hold(True)
            plt.axis([0, T - 1, 0, 1])
            plt.hold(False)
            plt.xlabel('Trials')
            plt.ylabel('Performance')

            plt.draw()
            plt.show()
            plt.pause(0.1)

    elapsed_time = time.time() - start_time_multi

    return [
        td, nuSamples, nuTracking, nuStdTracking, volTracking, volTracking,
        betaSamples, betaTracking, betaStdTracking, gammaSamples,
        tsProbability, countPerformance, actions, essList, time_list,
        elapsed_time
    ]
def SMC2(td,
         show_progress=True,
         lambdaa=.9,
         eta=0.,
         inertie_noise=0.,
         numberOfStateSamples=2000,
         numberOfThetaSamples=1000,
         coefficient=.5,
         beta_softmax=None,
         espilon_softmax=0.):

    print(
        'precision model with lambda = {0} and eta = {1}, epsilon= {4}, inertie_noise={5}. Number of state samples : {2} and number of theta samples : {3}'
        .format(lambdaa, eta, numberOfStateSamples, numberOfThetaSamples,
                espilon_softmax, inertie_noise))

    #Start timer
    start_time_multi = time.time()

    # Extract parameters from task description
    stimuli = np.ascontiguousarray(td['S'],
                                   dtype=np.intc)  # Sequence of Stimuli
    Z = td['Z']  # Sequence of Task Sets
    numberOfActions = td['action_num']  # Number of Actions possible
    numberOfStimuli = td['state_num']  # Number of states or stimuli
    K = np.prod(
        np.arange(numberOfActions +
                  1)[-numberOfStimuli:])  # Number of possible Task Sets
    numberOfTrials = len(Z)  # Number of Trials

    # Sampling and prior settings
    betaPrior = np.array([1, 1])  # Prior on Beta, the feedback noise parameter
    dirichletPrior = np.ones(K)

    # Mapping from task set to correct action per stimulus
    mapping = np.ascontiguousarray(get_mapping.Get_TaskSet_Stimulus_Mapping(
        state_num=numberOfStimuli, action_num=numberOfActions).T,
                                   dtype=np.intc)
    Z_true = Z

    # Probabilities of every actions updated at every time step -> Used to take the decision
    actionLikelihood = np.zeros(
        numberOfActions
    )  # For 1 observation, likelihood of the action. Requires a marginalisation over all task sets
    actions = np.ascontiguousarray(np.zeros(numberOfTrials) - 1, dtype=np.intc)
    rewards = np.ascontiguousarray(np.zeros(numberOfTrials), dtype=np.intc)

    # Keep track of probability correct/exploration after switches
    countPerformance = np.zeros(
        numberOfTrials)  # Number of correct actions after i trials
    countExploration = np.zeros(
        numberOfTrials)  # Number of exploratory actions after i trials
    correct_before_switch = np.empty(0)  # The correct task set before switch
    tsProbability = np.zeros([numberOfTrials, K])
    acceptanceProba = 0.
    betaTracking = np.zeros(numberOfTrials)
    betaStdTracking = np.zeros(numberOfTrials)
    temperatureTracking = np.zeros(numberOfTrials)
    temperatureStdTracking = np.zeros(numberOfTrials)
    acceptance_list = [1.]
    transitionProba = np.zeros([numberOfThetaSamples, K, K])

    # SMC particles initialisation
    betaSamples = np.random.beta(betaPrior[0], betaPrior[1],
                                 numberOfThetaSamples)
    gammaSamples = np.random.dirichlet(dirichletPrior, numberOfThetaSamples)
    logThetaWeights = np.zeros(numberOfThetaSamples)
    logThetaLks = np.zeros(numberOfThetaSamples)
    currentTaskSetSamples = np.zeros(
        [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc)
    ancestorTaskSetSamples = np.zeros(
        [numberOfThetaSamples, numberOfStateSamples], dtype=np.intc)
    weightsList = np.zeros([numberOfThetaSamples, numberOfStateSamples])
    essList = np.zeros(numberOfTrials)
    tasksetLikelihood = np.zeros(K)
    currentTemperatures = np.zeros(numberOfTrials)
    entropies = np.zeros(numberOfTrials)
    temperature = 0.5

    # variables for speed-up

    ante_proba_local = np.zeros(K)
    post_proba_local = np.zeros(K)
    sum_weightsList = np.zeros(numberOfThetaSamples)
    ancestorsIndexes = np.zeros(numberOfStateSamples, dtype=np.intc)
    gammaAdaptedProba = np.zeros(K)
    likelihoods = np.zeros(K)
    positiveStates = np.zeros(K, dtype=np.intc)
    distances = np.zeros([numberOfThetaSamples, 1])
    currentNoises = np.zeros([numberOfThetaSamples, numberOfStateSamples])
    noise_amount = np.zeros(numberOfTrials)

    # Plot progress
    if show_progress:
        plt.figure(figsize=(12, 9))
        plt.ion()

    # Loop over trials
    for T in range(numberOfTrials):

        # Print progress
        if (T + 1) % 10 == 0:
            sys.stdout.write(' ' + str(T + 1))
            sys.stdout.flush()
        if (T + 1) % 100 == 0: print('\n')

        noise_amount[T] = smc_c.bootstrap_smc_step_c(logThetaWeights, distances, betaSamples/2. + 1/2., lambdaa, eta, inertie_noise, gammaSamples, currentTaskSetSamples, ancestorTaskSetSamples, weightsList, \
                    mapping, stimuli[T-1], rewards[T-1], actions[T-1], T, likelihoods, positiveStates, ante_proba_local,\
                                            post_proba_local, ancestorsIndexes, gammaAdaptedProba, sum_weightsList, currentNoises, float(temperature))

        if temperature is None:
            assert (False)

        entropies[T] = entropy(
            np.asarray([np.sum(currentTaskSetSamples == i)
                        for i in range(K)]) * 1. /
            (numberOfThetaSamples * numberOfStateSamples))
        ancestorTaskSetSamples[:] = currentTaskSetSamples

        # Degeneray criterion
        logEss = 2 * useful_functions.log_sum(
            logThetaWeights) - useful_functions.log_sum(2 * logThetaWeights)
        essList[T] = np.exp(logEss)

        # Move step
        normalisedThetaWeights = useful_functions.to_normalized_weights(
            logThetaWeights)
        if essList[T] < coefficient * numberOfThetaSamples and acceptance_list[
                -1] > 0.05:
            acceptanceProba = 0.
            betaMu = np.sum(normalisedThetaWeights * betaSamples)
            betaVar = np.sum(normalisedThetaWeights *
                             (betaSamples - betaMu)**2)
            betaAlpha = np.maximum(
                ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2, 1)
            betaBeta = np.maximum(betaAlpha * (1 / betaMu - 1), 1.)
            assert (betaAlpha > 0)
            assert (betaBeta > 0)
            dirichletMeans = np.sum(normalisedThetaWeights * gammaSamples.T,
                                    axis=1)
            dirichletVar = np.sum(normalisedThetaWeights * (gammaSamples**2).T,
                                  axis=1) - dirichletMeans**2
            dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2) / (
                np.sum(dirichletVar)) - 1
            dirichletParamCandidates = dirichletMeans * dirichletPrecision
            dirichletParamCandidates = np.maximum(dirichletParamCandidates, 1.)
            assert ((dirichletParamCandidates > 0).all())

            logThetaWeights[:] = 0
            betaSamples = np.random.beta(betaAlpha, betaBeta,
                                         numberOfThetaSamples)
            gammaSamples = np.random.dirichlet(dirichletParamCandidates,
                                               numberOfThetaSamples)
            normalisedThetaWeights = useful_functions.to_normalized_weights(
                logThetaWeights)

        # Take decision
        for ts_idx in range(K):
            tsProbability[T, ts_idx] = np.sum(normalisedThetaWeights * np.sum(
                (currentTaskSetSamples == ts_idx), axis=1))

        if beta_softmax is None:
            # Compute action likelihood
            for action_idx in range(numberOfActions):
                actionLikelihood[action_idx] = np.sum(
                    tsProbability[T, mapping[stimuli[T]] == action_idx])

            # Select action
            actions[T] = np.argmax(actionLikelihood)

        else:
            # Compute action likelihood
            tsProbability[T] /= sum(tsProbability[T])

            for action_idx in range(numberOfActions):
                actionLikelihood[action_idx] = np.exp(
                    np.log(
                        np.sum(tsProbability[
                            T, mapping[stimuli[T].astype(int)] == action_idx]))
                    * beta_softmax)

            actionLikelihood /= sum(actionLikelihood)

            actionLikelihood = actionLikelihood * (
                1 - espilon_softmax) + espilon_softmax / K
            # Select action
            actions[T] = np.where(
                np.random.multinomial(1, actionLikelihood, size=1)[0])[0][0]

        betaTracking[T] = np.sum(normalisedThetaWeights * betaSamples)
        betaStdTracking[T] = np.sum(normalisedThetaWeights *
                                    (betaSamples - betaTracking[T])**2)
        temperatureTracking[T] = np.mean(currentNoises)
        temperatureStdTracking[T] = np.std(currentNoises)

        if K == 2:
            assert (mapping[stimuli[T].astype(int),
                            Z_true[T].astype(int)] == Z_true[T])
        if (K == 2) and (actions[T] == mapping[stimuli[T].astype(int),
                                               Z_true[T].astype(int)]):
            rewards[T] = not td['trap'][T]
            countPerformance[T:] += 1
        elif (K == 24) and (actions[T] == td['A_correct'][T]):
            rewards[T] = not td['trap'][T]
            countPerformance[T:] += 1
        else:
            rewards[T] = td['trap'][T]

        if show_progress:
            plt.subplot(3, 2, 1)
            plt.imshow(tsProbability[:T].T, aspect='auto')
            plt.hold(True)
            plt.plot(Z_true[:T], 'w--')
            plt.axis([0, T - 1, 0, K - 1])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('p(TS|past) at current time')

            plt.subplot(3, 2, 2)
            plt.plot(temperatureTracking[:T])
            plt.fill_between(
                np.arange(T),
                temperatureTracking[:T] - temperatureStdTracking[:T],
                temperatureTracking[:T] + temperatureStdTracking[:T],
                facecolor=[.5, .5, 1],
                color=[.5, .5, 1])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('Temperature')

            plt.subplot(3, 2, 3)
            x = np.linspace(0.01, .99, 100)
            plt.plot(x, normlib.pdf(x, betaTracking[T], betaStdTracking[T]),
                     'r')
            plt.hold(True)
            plt.plot([betaTracking[T], betaTracking[T]],
                     plt.gca().get_ylim(),
                     'r',
                     linewidth=2)
            plt.plot([td['beta'], td['beta']],
                     plt.gca().get_ylim(),
                     'r--',
                     linewidth=2)
            plt.hold(False)
            plt.xlabel('Parameters')
            plt.ylabel('Gaussian pdf')

            plt.subplot(3, 2, 4)
            plt.plot(np.arange(T) + 1, essList[:T], 'g', linewidth=2)
            plt.hold(True)
            plt.plot(plt.gca().get_xlim(), [
                coefficient * numberOfThetaSamples,
                coefficient * numberOfThetaSamples
            ],
                     'g--',
                     linewidth=2)
            plt.axis([0, T - 1, 0, numberOfThetaSamples])
            plt.hold(False)
            plt.xlabel('trials')
            plt.ylabel('ESS')

            plt.subplot(3, 2, 5)
            plt.plot(np.divide(countPerformance[:T],
                               np.arange(T) + 1),
                     'k--',
                     linewidth=2)
            plt.hold(True)
            plt.axis([0, T - 1, 0, 1])
            plt.hold(False)
            plt.xlabel('Trials')
            plt.ylabel('Performance')

            plt.draw()
            plt.show()
            plt.pause(0.1)

    elapsed_time = time.time() - start_time_multi

    return [
        td, noise_amount, lambdaa, eta, betaSamples, betaTracking,
        betaStdTracking, currentTemperatures, temperatureTracking,
        temperatureStdTracking, gammaSamples, tsProbability, countPerformance,
        actions, acceptance_list, elapsed_time
    ]
def SMC2(td,
         beta_softmax=1.,
         lambda_noise=.4,
         eta_noise=.1,
         epsilon_softmax=0.,
         noise_inertie=0.,
         numberOfStateSamples=200,
         numberOfThetaSamples=200,
         numberOfBetaSamples=20,
         coefficient=.5,
         latin_hyp_sampling=True):

    print('\n')
    print('Noisy Forward Model')
    print('number of theta samples ' + str(numberOfThetaSamples))
    print('\n')

    #Start timer
    start_time_multi = time.time()

    # uniform distribution
    if latin_hyp_sampling:
        d0 = uniform()
        print('latin hypercube sampling')
    else:
        print('sobolev sampling')

    # Extract parameters from task description
    stimuli = td['S']  # Sequence of Stimuli
    Z_true = td['Z']  # Sequence of Task Sets
    numberOfActions = td['action_num']  # Number of Actions possible
    numberOfStimuli = td['state_num']  # Number of states or stimuli
    rewards = td['reward']
    actions = td['A_chosen']
    K = np.prod(
        np.arange(numberOfActions +
                  1)[-numberOfStimuli:])  # Number of possible Task Sets
    numberOfTrials = len(Z_true)  # Number of Trials
    distances = np.zeros([numberOfThetaSamples, 1])
    # Sampling and prior settings
    betaPrior = np.array([1, 1])  # Prior on Beta, the feedback noise parameter
    gammaPrior = np.ones(K)  # Prior on Gamma, the Dirichlet parameter
    log_proba_ = 0.

    # verification
    if K == 2:
        if latin_hyp_sampling == False:
            raise ValueError(
                'Why did you change the latin_hyp_sampling? By default, it is True and has no influence when K=2.'
            )

    # Mapping from task set to correct action per stimulus
    mapping = get_mapping.Get_TaskSet_Stimulus_Mapping(
        state_num=numberOfStimuli, action_num=numberOfActions).T

    betaWeights = np.zeros(numberOfBetaSamples)
    betaLog = np.zeros(numberOfBetaSamples)
    logbetaWeights = np.zeros(numberOfBetaSamples)
    betaAncestors = np.arange(numberOfBetaSamples)

    # Probabilities of every actions updated at every time step -> Used to take the decision
    actionLikelihood = np.zeros([numberOfBetaSamples, numberOfActions])
    sum_actionLik = np.zeros(numberOfBetaSamples)
    filt_actionLkd = np.zeros(
        [numberOfTrials, numberOfBetaSamples, numberOfActions])

    # Keep track of probability correct/exploration after switches
    tsProbability = np.zeros([numberOfBetaSamples, K])
    sum_tsProbability = np.zeros(numberOfBetaSamples)
    dirichletParamCandidates = np.zeros(K)

    # SMC particles initialisation
    muSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples]
    )  #np.random.beta(betaPrior[0], betaPrior[1], [numberOfBetaSamples, numberOfThetaSamples])
    gammaSamples = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K])

    if K == 24:
        try:
            latin_hyp_samples = pickle.load(
                open('../../utils/sobol_200_25.pkl', 'rb'))
        except:
            latin_hyp_samples = pickle.load(
                open('../../models/utils/sobol_200_25.pkl', 'rb'))
        for beta_idx in range(numberOfBetaSamples):
            if latin_hyp_sampling:
                latin_hyp_samples = mcerp.lhd(dist=d0,
                                              size=numberOfThetaSamples,
                                              dims=K + 1)
            muSamples[beta_idx] = betalib.ppf(latin_hyp_samples[:, 0],
                                              betaPrior[0], betaPrior[1])
            gammaSamples[beta_idx] = gammalib.ppf(latin_hyp_samples[:, 1:],
                                                  gammaPrior)
            gammaSamples[beta_idx] = np.transpose(
                gammaSamples[beta_idx].T /
                np.sum(gammaSamples[beta_idx], axis=1))
    elif K == 2:
        muSamples = np.random.beta(betaPrior[0], betaPrior[1],
                                   [numberOfBetaSamples, numberOfThetaSamples])
        gammaSamples = np.random.dirichlet(
            gammaPrior, [numberOfBetaSamples, numberOfThetaSamples])
    else:
        raise IndexError('Wrong number of task sets')

    logThetaWeights = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    currentSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.intc)
    ancestorSamples = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples, numberOfStateSamples],
        dtype=np.intc)
    weightsList = np.ones([numberOfThetaSamples, numberOfStateSamples
                           ]) / numberOfStateSamples
    currentNoises = np.zeros([numberOfThetaSamples, numberOfStateSamples])

    log_proba_corr = 0.
    ante_proba_local = np.zeros(K)
    post_proba_local = np.zeros(K)
    sum_weightsList = np.zeros(numberOfThetaSamples)
    ancestorsIndexes = np.zeros(numberOfStateSamples, dtype=np.intc)
    gammaAdaptedProba = np.zeros(K)
    likelihoods = np.zeros(K)
    positiveStates = np.zeros(K, dtype=np.intc)

    # Guided SMC variables
    muSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    gammaSamplesNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples, K])
    logThetaWeightsNew = np.zeros([numberOfBetaSamples, numberOfThetaSamples])
    normalisedThetaWeights = np.zeros(
        [numberOfBetaSamples, numberOfThetaSamples])
    temperatures = np.zeros(numberOfBetaSamples)
    temperatureAncestors = np.zeros(numberOfBetaSamples) + .5

    # Loop over trials
    for T in range(numberOfTrials):

        # Print progress
        if (T + 1) % 10 == 0:
            sys.stdout.write(' ' + str(T + 1))
            sys.stdout.flush()
        if (T + 1) % 100 == 0: print('\n')

        for beta_idx in range(numberOfBetaSamples):

            ances = betaAncestors[beta_idx]

            temperatures[beta_idx] = smc_c.bootstrap_smc_step_c(logThetaWeights[beta_idx], distances, muSamples[ances]/2. + 1./2, lambda_noise, eta_noise, noise_inertie, gammaSamples[ances], currentSamples[beta_idx], ancestorSamples[ances], weightsList, \
                                            np.ascontiguousarray(mapping), stimuli[T-1], rewards[T-1], actions[T-1], T, likelihoods, positiveStates, ante_proba_local,\
                                            post_proba_local, ancestorsIndexes, gammaAdaptedProba, sum_weightsList, currentNoises, temperatureAncestors[ances])

            # Move step
            normalisedThetaWeights[
                beta_idx] = useful_functions.to_normalized_weights(
                    logThetaWeights[beta_idx])
            ess = 1. / np.sum(normalisedThetaWeights[beta_idx]**2)

            if (ess < coefficient * numberOfThetaSamples):
                acceptanceProba = 0.
                betaMu = np.sum(normalisedThetaWeights[beta_idx] *
                                muSamples[ances])
                betaVar = np.sum(normalisedThetaWeights[beta_idx] *
                                 (muSamples[ances] - betaMu)**2)
                betaAlpha = ((1 - betaMu) / betaVar - 1 / betaMu) * betaMu**2
                betaBeta = betaAlpha * (1 / betaMu - 1)
                assert (betaAlpha > 0)
                assert (betaBeta > 0)
                dirichletMeans = np.sum(normalisedThetaWeights[beta_idx] *
                                        gammaSamples[ances].T,
                                        axis=1)
                dirichletVar = np.sum(normalisedThetaWeights[beta_idx] *
                                      (gammaSamples[ances]**2).T,
                                      axis=1) - dirichletMeans**2
                dirichletPrecision = np.sum(dirichletMeans - dirichletMeans**2
                                            ) / (np.sum(dirichletVar)) - 1
                dirichletParamCandidates[:] = np.maximum(
                    dirichletMeans * dirichletPrecision, 1.)
                assert ((dirichletParamCandidates > 0).all())

                if K == 2:
                    muSamplesNew[beta_idx] = np.random.beta(
                        betaAlpha, betaBeta, numberOfThetaSamples)
                    gammaSamplesNew[beta_idx] = np.random.dirichlet(
                        dirichletParamCandidates, numberOfThetaSamples)
                if K == 24:
                    if latin_hyp_sampling:
                        latin_hyp_samples = mcerp.lhd(
                            dist=d0, size=numberOfThetaSamples, dims=K + 1)
                    muSamplesNew[beta_idx] = betalib.ppf(
                        latin_hyp_samples[:, 0], betaAlpha, betaBeta)
                    gammaSamplesNew[beta_idx] = gammalib.ppf(
                        latin_hyp_samples[:, 1:], dirichletParamCandidates)
                    gammaSamplesNew[beta_idx] = np.transpose(
                        gammaSamplesNew[beta_idx].T /
                        np.sum(gammaSamplesNew[beta_idx], axis=1))

                logThetaWeightsNew[beta_idx] = 0.
                normalisedThetaWeights[beta_idx] = 1. / numberOfThetaSamples
            else:
                muSamplesNew[beta_idx] = muSamples[ances]
                gammaSamplesNew[beta_idx] = gammaSamples[ances]
                logThetaWeightsNew[beta_idx] = logThetaWeights[beta_idx]

        # task set probability
        sum_tsProbability[:] = 0.
        for ts_idx in range(K):
            tsProbability[:, ts_idx] = np.sum(normalisedThetaWeights * np.sum(
                (currentSamples == ts_idx), axis=2),
                                              axis=1)
            sum_tsProbability += tsProbability[:, ts_idx]

        tsProbability[:] = np.transpose(tsProbability.T / sum_tsProbability)

        # Compute action likelihood
        sum_actionLik[:] = 0.
        for action_idx in range(numberOfActions):
            actionLikelihood[:, action_idx] = np.exp(
                np.log(
                    np.sum(tsProbability[:, mapping[stimuli[T].astype(int)] ==
                                         action_idx],
                           axis=1)) * beta_softmax)
            sum_actionLik += actionLikelihood[:, action_idx]

        rewards[T] = td['reward'][T]
        actions[T] = td['A_chosen'][T]

        actionLikelihood[:] = np.transpose(
            actionLikelihood.T / sum_actionLik) * (
                1 - epsilon_softmax) + epsilon_softmax / numberOfActions
        betaWeights[:] = actionLikelihood[:, actions[T].astype(int)]

        filt_actionLkd[T] = actionLikelihood

        log_proba_ += np.log(sum(betaWeights) / numberOfBetaSamples)
        betaWeights = betaWeights / sum(betaWeights)

        betaAncestors[:] = useful_functions.stratified_resampling(betaWeights)

        # update particles
        muSamples[:] = muSamplesNew
        gammaSamples[:] = gammaSamplesNew
        logThetaWeights[:] = logThetaWeightsNew[betaAncestors]
        ancestorSamples[:] = currentSamples
        temperatureAncestors[:] = temperatures

    elapsed_time = time.time() - start_time_multi

    return log_proba_