def do_running_printout(self):
     clear()
     print ("Simname: " + str(p["simname"]))
     print ("Episodes Elapsed: " + str(self.episode))
     print ("Average Reward Per Episode: " + str(self.r_sum_avg))
     print ("Average Number of Steps Spent Balancing Pole: " + str(self.steps_balancing_pole_avg))
     print ("Max Number of Steps Spent Balancing Pole: " + str(np.max(np.array(self.steps_balancing_pole_avg_list))))
     print ("Epsilon: " + str(self.epsilon))
     print ("Epsilon Min: " + str(p["epsilon_min"]))
     print ("Alpha (learning rate): " + str(self.alpha * p["learning_rate"]))
     if p.has_key("learning_rate_decay"):
         print ("Alpha (learning rate) decay: " + str(p["learning_rate_decay"]))
     if p["qsa_type"] == "cluster_nnet":
         print ("num_hidden: " + str(p["num_hidden"]))
         print ("num_selected: " + str(self.qsa.net.layer[0].num_selected))
     if p["qsa_type"] == "nnet":
         print ("Activation function: " + str(p["activation_function"]))
         print ("num_hidden: " + str(p["num_hidden"]))
     if p["action_type"] == "noisy_qsa":
         print ("Average QSA Standard Deviation: " + str(self.qsa_std_avg))
         print ("Probability of taking different action: " + str(self.prob_of_different_action))
     if p["qsa_type"] == "cartpole_nnet":
         print ("state given to nnet:\n" + str(np.array(self.qsa.net.input).transpose()))
     print ("Average Steps Per Second: " + str(1.0 / self.avg_step_duration))
     print ("Action Type: " + str(p["action_type"]))
     print ("a_list: " + str(self.tmp_a_list))
     m, s = divmod(time.time() - self.start_time, 60)
     h, m = divmod(m, 60)
     print "Elapsed Time %d:%02d:%02d" % (h, m, s)
     sys.stdout.flush()
     print_update_timer = time.time()
 def do_running_printout(self):
     clear()
     print("Simname: " + str(p['simname']))
     print("Episodes Elapsed: " + str(self.episode))
     print("Average Reward Per Episode: " + str(self.r_sum_avg))
     print("Average Number of Steps Spent Balancing Pole: " + str(self.steps_balancing_pole_avg))
     print("Max Number of Steps Spent Balancing Pole: " + str(np.max(np.array(self.steps_balancing_pole_avg_list))))
     print("Epsilon: " + str(self.epsilon))
     print("Epsilon Min: " + str(p['epsilon_min']))
     print("Alpha (learning rate): " + str(self.alpha*p['learning_rate']))
     if(p.has_key('learning_rate_decay')):
         print("Alpha (learning rate) decay: " + str(p['learning_rate_decay']))
     if(p['qsa_type'] == 'cluster_nnet'):
         print("num_hidden: " + str(p['num_hidden']))
         print('num_selected: ' + str(self.qsa.net.layer[0].num_selected))
     if(p['qsa_type'] == 'nnet'):
         print("Activation function: " + str(p['activation_function']))
         print("num_hidden: " + str(p['num_hidden']))
     if(p['action_type'] == 'noisy_qsa'):
         print("Average QSA Standard Deviation: " + str(self.qsa_std_avg))
         print("Probability of taking different action: " + str(self.prob_of_different_action))
     if(p['qsa_type'] == 'cartpole_nnet'):
         print("state given to nnet:\n" + str(np.array(self.qsa.net.input).transpose()))
     print("Average Steps Per Second: " + str(1.0/self.avg_step_duration))
     print("Action Type: " + str(p['action_type']))
     print("a_list: " + str(self.tmp_a_list))
     m, s = divmod(time.time() - self.start_time, 60)
     h, m = divmod(m, 60)
     print "Elapsed Time %d:%02d:%02d" % (h, m, s)
     sys.stdout.flush()
     print_update_timer = time.time()
    def run_sim(self,p):
        sim = cartpole_environment()
        reward_type = p.get('reward_type',0)
        negative_reward = p['negative_reward']
        positive_reward = p['positive_reward']
        sim.init(p['vel_bound'],p['angle_vel_bound'],p['pos_bound'],p['g'],p['l'],p['mp'],p['mc'],p['dt'],p['negative_reward'],p['positive_reward'],p['no_reward'],reward_type)
        v = visualize_sdl()
        v.init_vis(p['display_width'],p['display_height'],p['axis_x_min'],p['axis_x_max'],p['axis_y_min'],p['axis_y_max'],p['fps'])
        push_force = p['push_force']

        self.vel_bound = p['vel_bound']
        self.pos_bound = p['pos_bound']
        self.angle_vel_bound = p['angle_vel_bound']
        self.mins = np.array([0.0, -self.vel_bound, -self.pos_bound, -self.angle_vel_bound])
        self.maxs = np.array([2*math.pi,  self.vel_bound,  self.pos_bound,  self.angle_vel_bound])
        self.mins = np.append(np.array([-1.0,-1.0]),self.mins[1:])
        self.maxs = np.append(np.array([1.0,1.0]),self.maxs[1:])
        self.incorrect_target = p['incorrect_target']
        self.correct_target = p['correct_target']
        self.num_actions = 3
        action=0

        while 1:
            if(p.has_key('print_state_debug')):
                clear()
                action_list = np.ones((1,self.num_actions))*self.incorrect_target
                action_list[0,action] = self.correct_target
                state = sim.sim.state
                s = np.append(np.array([math.sin(state[0]),math.cos(state[0])]),state[1:])
                s = (np.array(s) - self.mins)/(self.maxs - self.mins)
                s = s-0.5
                s = s*2.25
                s = np.append(s,action_list)
                np.set_printoptions(precision=4)
                print(str(s[:,np.newaxis]))
                print(str(np.array(sim.sim.state)[:,np.newaxis]))
            v.delay_vis()
            k = v.get_keys()

            u = 0.0;
            action = 0
            if(k[0]):
                action = 2
                u = -push_force;
            if(k[1]):
                action = 1
                u = push_force;
            sim.set_action(u)
            sim.step()
            #if(sim.state[2] < -4.0):
            #    sim.state[2] = -4.0
            #if(sim.state[2] > 4.0):
            #    sim.state[2] = 4.0
            if(sim.is_terminal):
                sim.reset_state()

            v.draw_cartpole(sim.get_state(),action,sim.get_reward(reward_type)/positive_reward)
            exit = v.update_vis()
            if(exit):
                break
        return