import numpy as np from env import Environment from policy_agent import PolicyAgent env = Environment() estimator = PolicyAgent(env, policy='estimator') env.add_opponent(estimator) print(env.bench(PolicyAgent(env, policy='downsize'), times=3000)) env.reset() print(env._make_state())
simple_value=no_consensus_score) writer.add_summary(summary, model.writer_step) for env in env_list: env.no_consensus_score = no_consensus_score model.explore(env_list, game_count=1024, game_off=game_off) game_off += 1024 if EPOCH % SAVE_EVERY == 0: saver.save(sess, os.path.join(SAVE_DIR, '{:08d}'.format(EPOCH))) if EPOCH % BENCH_EVERY == 0: print('Running benchmark...') for tag, env in bench_env.items(): bench = env.bench(model) # TODO(indutny): move logging to model? summary = tf.Summary() for key, value in bench.items(): summary.value.add(tag='bench_{}/{}'.format(tag, key), simple_value=value) writer.add_summary(summary, model.writer_step) if EPOCH < ANTAGONIST_EPOCH: continue if EPOCH % ANTAGONIST_UPDATE_EVERY == 0: print('Adding new antagonist to the pool') weights = model.save_weights(sess) ANTAGONIST_WEIGHTS.append({'epoch': EPOCH, 'weights': weights})