nEpochs = 100

    # Environment
    nStatesOneSide = 3
    specialRewards = {nStatesOneSide * 2: 1.0}

    # Agent
    alphas_MC = [0.005]
    alphas_TD = [0.05, 0.005]
    doBatchUpdates_TD = [False, True]
    gamma = 1.0

    avg_rms_TD = np.zeros([nEpochs, len(alphas_TD)])
    avg_rms_MC = np.zeros([nEpochs, len(alphas_MC)])
    for idx_experiment in range(nExperiments):
        env = RandomWalk(nStatesOneSide, specialRewards=specialRewards)
        groundTruth = np.arange(1, env.nStates - 1) / (env.nStates - 1)
        # TD agents
        agents_TD = []
        valueTables_TD = []
        aux = []
        for alpha in alphas_TD:
            agent = TDPrediction(env.nStates, alpha, gamma)
            agent.valueTable = agent.valueTable + 0.5
            agent.valueTable[0] = 0.0
            agent.valueTable[nStatesOneSide * 2] = 0.0
            agents_TD.append(agent)
            aux.append(np.array(agent.valueTable))
        valueTables_TD.append(np.array(aux))
        # MC agents
        agents_MC = []
 specialRewards = {nStatesOneSide*2:1.0, 0:-1.0}
 groundTruth = np.zeros(nStatesOneSide*2+1)
 groundTruth[nStatesOneSide:] = np.arange(nStatesOneSide+1)/nStatesOneSide
 groundTruth[0:nStatesOneSide] = np.arange(nStatesOneSide,0,-1)/(-nStatesOneSide)
 groundTruth = groundTruth[1:nStatesOneSide*2]
 nStates = nStatesOneSide*2+1
 
 # Agents
 alphas = np.arange(0.01, 1.01, 0.09)
 lambdaVals = [0.0, 0.4, 0.8, 0.9, 0.95, 0.975, 0.99, 1.0]
 gamma = 1.0
 nParams = nStates
 approximationFunctionArgs = {'af':linearTransform, 'afd':dLinearTransform, 
   'ftf':stateAggregation, 'nStates':nStates, 'nParams':nParams}
 
 env = RandomWalk(nStatesOneSide, specialRewards=specialRewards) 
 agent_OffLR = OfflineLambdaReturn(nParams, 0.0, gamma, 0.0, approximationFunctionArgs=approximationFunctionArgs)
 agent_SGTDL = SemiGradientTDLambda(nParams, 0.0, gamma, 0.0, approximationFunctionArgs=approximationFunctionArgs)
 agent_TOGL = TrueOnlineTDLambda(nParams, 0.0, gamma, 0.0, approximationFunctionArgs=approximationFunctionArgs)
 agent_OnLR = OnlineLambdaReturn(nParams, 0.0, gamma, 0.0, approximationFunctionArgs=approximationFunctionArgs) # High complexity
 agents = [agent_OffLR, agent_SGTDL, agent_TOGL, agent_OnLR]
 agents = [agent_OffLR, agent_SGTDL, agent_TOGL]
 for agent in agents:
   avg_vals_all = []
   avg_rmse_all = []
   for lambd in lambdaVals:
     for alpha in alphas:
       avg_rmse = 0.0
       avg_vals = np.zeros(env.nStates)
       for idx_experiment in range(nExperiments):