def q_learning_nfq(**args): # estimate best_score = 0 best_turn = 1000 best_agent = None score_list = [] turn_list = [] #for i in range(2): for i in range(50): agent = QLearning(12, 4) # training agent.greedy_rate = 0.0 print print "===========================" print 'before training' print_state(agent.get_q_values) training(agent, args) print 'after training' print_state(agent.get_q_values) agent.greedy_rate = 0.7 #agent.learner._setExplorer(EpsilonGreedyExplorer(0.3)) score, turn = play(agent, 'neural', args, [2, 2]) score_list.append(score) turn_list.append(turn) print print 'test one play' print i, int(numpy.mean(score_list)), max(score_list), score, turn if best_agent == None or numpy.average( best_agent.train_error) > numpy.average(agent.train_error): print 'best train error !' best_score = score best_turn = turn best_agent = agent # if best_score < score or best_turn > turn: # print 'best train error !' # best_score = score # best_turn = turn # best_agent = agent with open(args['path'] + '/result.dump', 'w') as f: pickle.dump([score_list, turn_list, best_agent], f) print print "===========================" print 'best score : ', best_score print 'best turn : ', best_turn print_state(best_agent.get_q_values)
def q_learning_nfq(**args): # estimate best_score = 0 best_turn = 1000 best_agent = None score_list = [] turn_list = [] #for i in range(2): for i in range(50): agent = QLearning(12, 4) # training agent.greedy_rate = 0.0 print print "===========================" print 'before training' print_state(agent.get_q_values) training(agent, args) print 'after training' print_state(agent.get_q_values) agent.greedy_rate = 0.7 #agent.learner._setExplorer(EpsilonGreedyExplorer(0.3)) score, turn = play(agent, 'neural', args, [2,2]) score_list.append(score) turn_list.append(turn) print print 'test one play' print i, int(numpy.mean(score_list)) , max(score_list) , score, turn if best_agent==None or numpy.average(best_agent.train_error) > numpy.average(agent.train_error): print 'best train error !' best_score = score best_turn = turn best_agent = agent # if best_score < score or best_turn > turn: # print 'best train error !' # best_score = score # best_turn = turn # best_agent = agent with open(args['path']+'/result.dump', 'w') as f: pickle.dump([score_list, turn_list, best_agent], f) print print "===========================" print 'best score : ', best_score print 'best turn : ', best_turn print_state(best_agent.get_q_values)
def q_learning_nfq(**args): # estimate best_score = 0 best_turn = 1000 best_agent = None score_list = [] turn_list = [] for i in range(1): #for i in range(50): #agent = QLearning(12, 4) agent = QLearning(117, 4) # training agent.greedy_rate = 0.5 for i in range(100): print print "=========================== ", i agent.greedy_rate += 0.05 if agent.greedy_rate < 0.7 else 0.0 training(agent, args) agent.greedy_rate = 0.7 #score, turn = play(agent, 'neural', args, [2,2]) score, turn = play(agent) score_list.append(score) turn_list.append(turn) print print 'test one play' print i, int(numpy.mean(score_list)), max(score_list), score, turn if best_agent == None or numpy.average( best_agent.train_error) > numpy.average(agent.train_error): print 'best train error !' best_score = score best_turn = turn best_agent = agent # if best_score < score or best_turn > turn: # print 'best train error !' # best_score = score # best_turn = turn # best_agent = agent with open(args['path'] + '/result.dump', 'w') as f: pickle.dump([score_list, turn_list, best_agent], f) print print "===========================" print 'best score : ', best_score print 'best turn : ', best_turn
def q_learning_nfq(**args): # estimate best_score = 0 best_turn = 1000 best_agent = None score_list = [] turn_list = [] for i in range(1): #for i in range(50): #agent = QLearning(12, 4) agent = QLearning(117, 4) # training agent.greedy_rate = 0.5 for i in range(100): print print "=========================== ", i agent.greedy_rate += 0.05 if agent.greedy_rate < 0.7 else 0.0 training(agent, args) agent.greedy_rate = 0.7 #score, turn = play(agent, 'neural', args, [2,2]) score, turn = play(agent) score_list.append(score) turn_list.append(turn) print print 'test one play' print i, int(numpy.mean(score_list)) , max(score_list) , score, turn if best_agent==None or numpy.average(best_agent.train_error) > numpy.average(agent.train_error): print 'best train error !' best_score = score best_turn = turn best_agent = agent # if best_score < score or best_turn > turn: # print 'best train error !' # best_score = score # best_turn = turn # best_agent = agent with open(args['path']+'/result.dump', 'w') as f: pickle.dump([score_list, turn_list, best_agent], f) print print "===========================" print 'best score : ', best_score print 'best turn : ', best_turn