Exemplo n.º 1
0
class Environment:
    def __init__(self, num_items, use_all=False, val=False):
        self.env = PointingEnv(num_items=num_items, use_all=use_all, val=val)

    def run(self, agent, inspect=False):
        s = self.env.reset()
        R = 0
        global failed_cnt
        global done_cnt
        global R_total
        while True:
            sbar = agent.brain.encode(
                np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS)))
            a = agent.act(sbar)
            #print a
            s_, r, done = self.env.step(a)

            s = s_
            R += r

            if done:
                if r < 1:
                    failed_cnt += 1
                else:
                    done_cnt += 1
                    R_total += R
                break
Exemplo n.º 2
0
class Environment:
    def __init__(self, num_items, use_all=False, val=False):
        self.env = PointingEnv(num_items=num_items, use_all=use_all, val=val)

    def run(self, agent):
        s = self.env.reset()
        R = 0
        global r_history
        imaginary = False  #flag to start using imaginary rollouts for training
        if episodes > I_START and USE_IMAGINARY == True:
            #print('Imaginary training started...')
            imaginary = True  #start using imaginary rollouts
        # TODO: decide if imaginary or not
        while True:
            z, sigma = agent.brain.encode(
                np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS)))
            a = agent.act(z)
            # print('a = ', a)
            if imaginary:
                for i_b in range(I_B):
                    agent.brain.env_model.init_model(z)
                    zhat = z
                    for i_d in range(I_D):
                        zhat_, rhat, donehat = agent.brain.env_model.step(a)
                        rhat, donehat = round(rhat), round(donehat)
                        #if donehat == 1:
                        #   zhat_ = None
                        agent.observe((zhat, a, rhat, zhat_, donehat),
                                      imaginary=True)
                        if donehat == 1:
                            break
                        zhat = zhat_
            s_, r, done = self.env.step(a)
            z_, sigma_ = agent.brain.encode(
                np.reshape(s_, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS)))

            #if done:  # terminal state
            #   z_ = None

            agent.observe((z, a, r, z_, done, sigma, sigma_), imaginary=False)
            if (episodes >= ENV_LEARN_START) and USE_IMAGINARY and ENV_LEARN:
                for i in range(R_ENV):
                    agent.train_env()

            for i in range(R_CR):
                agent.replay(imaginary=False)
            if imaginary:
                for i in range(R_CI):
                    agent.replay(imaginary=imaginary)

            s = s_
            R += r
            r_history[episodes] = R

            if done:
                break

        print("Total reward:", R, ", episode: ", episodes)
Exemplo n.º 3
0
class Environment:
    def __init__(self, num_items):
        self.env = PointingEnv(num_items)
        self.env_model = EnvironmentModel()

    def run(self, agent):
        s = self.env.reset()
        sbar = agent.brain.encode(
            np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS)))
        R = 0
        imaginary = False
        if episodes > 0:
            imaginary = True
            self.env_model.init_model(sbar)
        #TODO: decide if imaginary or not
        while True:
            a = agent.act(sbar)
            #print('a = ', a)
            if imaginary:
                sbar_, r, done = self.env_model.step(a)
                r, done = round(r), round(done)
            else:
                s_, r, done = self.env.step(a)
                sbar_ = agent.brain.encode(
                    np.reshape(s_, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS)))

            if done:  # terminal state
                sbar_ = None

            agent.observe((sbar, a, r, sbar_, done), imaginary=imaginary)
            agent.replay(imaginary=imaginary)

            if imaginary:
                sbar = sbar_
            else:
                s = s_
                sbar = agent.brain.encode(
                    np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS)))
            R += r

            if done:
                break

        print("Total reward:", R, ", episode: ", episodes)
Exemplo n.º 4
0
class Environment:
    def __init__(self, num_items):
        self.env = PointingEnv(num_items)

    def run(self, agent, inspect=False):
        s = self.env.reset()
        R = 0
        global selected
        global ss
        global r_history
        if not selected:
            ss = agent.brain.encode(
                np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS)))
            selected = True
        # print (agent.brain.predictOne(ss))
        while True:
            if inspect: self.env.printState()

            sbar = agent.brain.encode(
                np.reshape(s, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS)))
            # print(sbar)

            a = agent.act(sbar)
            # print(a)
            s_, r, done = self.env.step(a)

            sbar_ = agent.brain.encode(
                np.reshape(s_, (1, IMAGE_WIDTH, IMAGE_HEIGHT, CHANNELS)))
            if done:  # terminal state
                sbar_ = None

            agent.observe((sbar, a, r, sbar_, done))
            agent.replay()

            s = s_
            R += r
            r_history[episodes] = R

            if done:
                break

        print("Total reward:", R, ", episode: ", episodes)
Exemplo n.º 5
0
 def __init__(self, num_items, use_all=False, val=False):
     self.env = PointingEnv(num_items=num_items, use_all=use_all, val=val)
Exemplo n.º 6
0
 def __init__(self, num_items):
     self.env = PointingEnv(num_items)
     self.env_model = EnvironmentModel()
Exemplo n.º 7
0
 def __init__(self, num_items):
     self.env = PointingEnv(num_items)