def process_batch(engine, batch): optimizer.zero_grad() loss = common.calc_loss_dqn(batch, net, tgt_net.target_model, gamma=params.gamma, device=device) loss.backward() optimizer.step() epsilon_tracker.frame(engine.state.iteration) if engine.state.iteration % params.target_net_sync == 0: tgt_net.sync() return {"loss": loss.item(), "epsilon": selector.epsilon}
def process_batch(engine, batch): optimizer.zero_grad() loss = common.calc_loss_dqn(batch, net, tgt_net.target_model, gamma=params.gamma, device=device) loss.backward() optimizer.step() epsilon_tracker.frame(engine.state.iteration) if engine.state.iteration % params.target_net_sync == 0: tgt_net.sync() if engine.state.iteration % NOISY_SNR_EVERY_ITERS == 0: for layer_idx, sigma_l2 in enumerate(net.noisy_layers_sigma_snr()): engine.state.metrics[f"snr_{layer_idx + 1}"] = sigma_l2 return {"loss": loss.item()}
def process_batch(engine, batch): optimizer.zero_grad() loss = common.calc_loss_dqn(batch, net, tgt_net.target_model, gamma=params.gamma, device=device) loss.backward() optimizer.step() epsilon_tracker.frame(engine.state.iteration) if engine.state.iteration % params.target_net_sync == 0: tgt_net.sync() if engine.state.iteration % EVAL_EVER_FRAME == 0: eval_states = getattr(engine.state, "eval_states", None) if eval_states is None: eval_states = buffer.sample(STATES_TO_EVALUATE) eval_states = [np.array(transition.state, copy=False) for transition in eval_states] eval_states = np.array(eval_states, copy=False) engine.state.eval_states = eval_states evaluate_states(eval_states, net, device, engine) return {"loss": loss.item(), "epsilon": selector.epsilon}