Exemplo n.º 1
0
 def flush_to_stdout(self, epoch):
     stats = dict(self.base_stats)
     stats.update({"dts_h": util.dts(), "epoch": epoch,
                   "n_egs_trained": self.n_egs_trained,
                   "elapsed_time": int(time.time()) - self.start_time,
                   "train_cost": util.mean_sd(self.train_costs),
                   "dev_cost": util.mean_sd(self.dev_costs),
                   "dev_acc": self.dev_accuracy})
     if self.norms:
         stats.update({"norms": self.norms})
     print "STATS\t%s" % json.dumps(stats)
     sys.stdout.flush()
     self.reset()
Exemplo n.º 2
0
 def flush_to_stdout(self, epoch):
     stats = dict(self.base_stats)
     stats.update({
         "dts_h": util.dts(),
         "epoch": epoch,
         "n_egs_trained": self.n_egs_trained,
         "elapsed_time": int(time.time()) - self.start_time,
         "train_cost": util.mean_sd(self.train_costs),
         "dev_cost": util.mean_sd(self.dev_costs),
         "dev_acc": self.dev_accuracy
     })
     if self.norms:
         stats.update({"norms": self.norms})
     print "STATS\t%s" % json.dumps(stats)
     sys.stdout.flush()
     self.reset()
Exemplo n.º 3
0
def log(s):
    print >> sys.stderr, util.dts(), s
Exemplo n.º 4
0
# init our rl_agent
agent_cstr = eval("agents.%sAgent" % opts.agent)
agent = agent_cstr(opts)

# init event log (if logging events)
event_log = event_log.EventLog(opts.event_log_out) if opts.event_log_out else None

# hook up connection to trainer
if opts.trainer_port == 0:
  trainer = None
else:
  channel = grpc.insecure_channel("localhost:%d" % opts.trainer_port)
  trainer = model_pb2.ModelStub(channel)

for episode_idx in itertools.count(0):
  print util.dts(), "EPISODE", episode_idx, "eval", opts.eval

  # start new mission; explicitly wait for first observation
  # (not just world_state.has_mission_begun)
  mission_start = time.time()
  while True:
    try:
      # TODO: work out why this blocks and how to get it timeout somehow...
      malmo.startMission(mission, client_pool, mission_record, 0, "")
      break
    except RuntimeError as r:
      # have observed that getting stuck here doesn't recover, even if the servers
      # are restarted. try to recreate everything
      print >>sys.stderr, util.dts(), "failed to start mission", r
      print >>sys.stderr, util.dts(), "recreating malmo components..."
      time.sleep(1)
Exemplo n.º 5
0
def log(s):
    print >>sys.stderr, util.dts(), s
Exemplo n.º 6
0
spec = spec.replace("__WIDTH__", str(opts.width))
spec = spec.replace("__HEIGHT__", str(opts.height))
spec = spec.replace("__EPISODE_TIME_MS__", str(opts.episode_time_ms))
mission = MalmoPython.MissionSpec(spec, True)
mission_record = MalmoPython.MissionRecordSpec()

# init our rl_agent
agent_cstr = eval("agents.%sAgent" % opts.agent)
agent = agent_cstr(opts)

event_log = event_log.EventLog(
    opts.event_log_out) if opts.event_log_out else None

for episode_idx in itertools.count(1):
    eval_episode = (episode_idx % opts.eval_freq == 0)
    print >> sys.stderr, "EPISODE", episode_idx, util.dts(
    ), "eval =", eval_episode

    # start new mission; explicitly wait for first observation
    # (not just world_state.has_mission_begun)
    mission_start = time.time()
    while True:
        try:
            malmo.startMission(mission, mission_record)
            break
        except RuntimeError as r:
            print >> sys.stderr, "failed to start mission", r
            time.sleep(1)
    world_state = malmo.getWorldState()
    while len(world_state.observations) == 0:
        print >> sys.stderr, "started, but no obs?"
        time.sleep(0.1)
Exemplo n.º 7
0
def run_trainer(episodes, opts):
    # init replay memory
    render_shape = (opts.height, opts.width, 3)
    replay_memory = rm.ReplayMemory(opts=opts,
                                    state_shape=render_shape,
                                    action_dim=2,
                                    load_factor=1.1)
    if opts.event_log_in:
        replay_memory.reset_from_event_logs(opts.event_log_in,
                                            opts.event_log_in_num,
                                            opts.reset_smooth_reward_factor)

    # init network for training
    config = tf.ConfigProto()
    #config.gpu_options.allow_growth = True
    #config.log_device_placement = True
    config.gpu_options.per_process_gpu_memory_fraction = opts.gpu_mem_fraction
    sess = tf.Session(config=config)

    network = models.NafNetwork("naf", action_dim=2, opts=opts)

    with sess.as_default():
        # setup saver util and either load saved ckpt or init variables
        saver = ckpt_util.TrainerCkptSaver(sess, opts.ckpt_dir,
                                           opts.ckpt_save_freq)
        for v in tf.all_variables():
            if '/biases:' not in v.name:
                print >> sys.stderr, v.name, util.shape_and_product_of(v)
        network.setup_target_network()

        # while true process episodes from run_agents
        print util.dts(), "waiting for episodes"
        while True:
            start_time = time.time()
            episode = episodes.get()
            wait_time = time.time() - start_time

            start_time = time.time()
            replay_memory.add_episode(
                episode, smooth_reward_factor=opts.smooth_reward_factor)
            losses = []
            if replay_memory.burnt_in():
                for _ in xrange(opts.batches_per_new_episode):
                    batch = replay_memory.batch(opts.batch_size)
                    batch_losses = network.train(batch).T[
                        0]  # .T[0] => (B, 1) -> (B,)
                    replay_memory.update_priorities(batch.idxs, batch_losses)
                    network.target_value_net.update_target_weights()
                    losses.extend(batch_losses)
                saver.save_if_required()
            process_time = time.time() - start_time

            stats = {
                "wait_time": wait_time,
                "process_time": process_time,
                "pending": episodes.qsize(),
                "replay_memory": replay_memory.stats
            }
            if losses:
                stats['loss'] = {
                    "min": float(np.min(losses)),
                    "median": float(np.median(losses)),
                    "mean": float(np.mean(losses)),
                    "max": float(np.max(losses))
                }
            print "STATS\t%s\t%s" % (util.dts(), json.dumps(stats))