Tank_model.setpoint)) ## Record information of state buffer.record((Last_state, action, reward, state), base_critic=base_critic, base_actor=base_actor, target_actor=target_actor, target_critic=target_critic, gamma=gamma, reward=reward) ## Update base actor and base critic base_actor, base_critic = buffer.learn( base_critic=base_critic, base_actor=base_actor, target_actor=target_actor, target_critic=target_critic, gamma=gamma, actor_optimizer=actor_optimizer, critic_optimizer=critic_optimizer) ## Soft update target actor and target critic target_actor, target_critic = update_target_single( tau, base_critic=base_critic, base_actor=base_actor, target_actor=target_actor, target_critic=target_critic) ## Remember noise produced Noise_mem.append(noise)