def process_batch(engine, batch):
     optimizer.zero_grad()
     loss = common.calc_loss_dqn(batch, net, tgt_net.target_model, gamma=params.gamma, device=device)
     loss.backward()
     optimizer.step()
     epsilon_tracker.frame(engine.state.iteration)
     if engine.state.iteration % params.target_net_sync == 0:
         tgt_net.sync()
     return {"loss": loss.item(), "epsilon": selector.epsilon}
示例#2
0
 def process_batch(engine, batch):
     optimizer.zero_grad()
     loss = common.calc_loss_dqn(batch,
                                 net,
                                 tgt_net.target_model,
                                 gamma=params.gamma,
                                 device=device)
     loss.backward()
     optimizer.step()
     epsilon_tracker.frame(engine.state.iteration)
     if engine.state.iteration % params.target_net_sync == 0:
         tgt_net.sync()
     if engine.state.iteration % NOISY_SNR_EVERY_ITERS == 0:
         for layer_idx, sigma_l2 in enumerate(net.noisy_layers_sigma_snr()):
             engine.state.metrics[f"snr_{layer_idx + 1}"] = sigma_l2
     return {"loss": loss.item()}
 def process_batch(engine, batch):
     optimizer.zero_grad()
     loss = common.calc_loss_dqn(batch, net, tgt_net.target_model, gamma=params.gamma, device=device)
     loss.backward()
     optimizer.step()
     epsilon_tracker.frame(engine.state.iteration)
     if engine.state.iteration % params.target_net_sync == 0:
         tgt_net.sync()
     if engine.state.iteration % EVAL_EVER_FRAME == 0:
         eval_states = getattr(engine.state, "eval_states", None)
         if eval_states is None:
             eval_states = buffer.sample(STATES_TO_EVALUATE)
             eval_states = [np.array(transition.state, copy=False) for transition in eval_states]
             eval_states = np.array(eval_states, copy=False)
             engine.state.eval_states = eval_states
         evaluate_states(eval_states, net, device, engine)
     return {"loss": loss.item(), "epsilon": selector.epsilon}