def apply_monte_carlo_simulation( costOfLiving , numTimes ): utilMap = UtilityMap() transitionModel = TransitionModel() rewardSet = RewardSet( costOfLiving ) utilMap = Problem1.apply_value_iteration( utilMap , transitionModel , rewardSet ) policy = utilMap.get_optimal_policy( transitionModel ) rewards = [] for _ in range( numTimes ): rewards.append( simulate_run( 2 , 3 , policy , rewardSet ) ) return rewards
def apply_monte_carlo_simulation(costOfLiving, numTimes): utilMap = UtilityMap() transitionModel = TransitionModel() rewardSet = RewardSet(costOfLiving) utilMap = Problem1.apply_value_iteration(utilMap, transitionModel, rewardSet) policy = utilMap.get_optimal_policy(transitionModel) rewards = [] for _ in range(numTimes): rewards.append(simulate_run(2, 3, policy, rewardSet)) return rewards
def solve(): utilMap = UtilityMap() transitionModel = TransitionModel() rewardSet = RewardSet( -0.04 ) utilMap = Problem1.apply_value_iteration( utilMap , transitionModel , rewardSet ) rewards10 = apply_monte_carlo_simulation( -0.04 , 10 ) print "10 run mean:", sum( rewards10 ) / 10.0 print "10 run stddev:" , numpy.std( numpy.array( rewards10 ) ) rewards100 = apply_monte_carlo_simulation( -0.04 , 100 ) print "100 run mean:" , sum( rewards100 ) / 100.0 print "100 run stddev:" , numpy.std( numpy.array( rewards100 ) ) rewards1000 = apply_monte_carlo_simulation( -0.04 , 1000 ) print "1000 run mean:" , sum( rewards1000 ) / 1000.0 print "1000 run stddev:" , numpy.std( numpy.array( rewards1000 ) ) return (utilMap , rewards10 , rewards100 , rewards1000)
def solve(): utilMap = UtilityMap() transitionModel = TransitionModel() rewardSet = RewardSet(-0.04) utilMap = Problem1.apply_value_iteration(utilMap, transitionModel, rewardSet) rewards10 = apply_monte_carlo_simulation(-0.04, 10) print "10 run mean:", sum(rewards10) / 10.0 print "10 run stddev:", numpy.std(numpy.array(rewards10)) rewards100 = apply_monte_carlo_simulation(-0.04, 100) print "100 run mean:", sum(rewards100) / 100.0 print "100 run stddev:", numpy.std(numpy.array(rewards100)) rewards1000 = apply_monte_carlo_simulation(-0.04, 1000) print "1000 run mean:", sum(rewards1000) / 1000.0 print "1000 run stddev:", numpy.std(numpy.array(rewards1000)) return (utilMap, rewards10, rewards100, rewards1000)