Exemplo n.º 1
0
    def run(self, searchDepth=2, verbose=False):
        statesAtDepth = []
        rewardsAtDepth = []
        isSolvedAtDepth = []

        states_root = self.combineNodes(self.roots)
        isSolved_root = self.Environment.checkSolved(states_root)

        statesAtDepth.append(states_root)
        rewardsAtDepth.append(
            self.Environment.getReward(states_root, isSolved_root))
        isSolvedAtDepth.append(isSolved_root)

        for depth in range(1, searchDepth + 1):
            nextStates, nextStateRewards, nextStateSolved = nnet_utils.getNextStates(
                statesAtDepth[-1], self.Environment)
            nextStates = nextStates.reshape([
                nextStates.shape[0] * nextStates.shape[1], nextStates.shape[2]
            ])

            statesAtDepth.append(nextStates)
            rewardsAtDepth.append(nextStateRewards)
            isSolvedAtDepth.append(nextStateSolved)

        isSolved = isSolvedAtDepth[-1]
        valsBackup = self.heuristicFn(statesAtDepth[-1])

        valsBackup = valsBackup.reshape(
            valsBackup.shape[0] / len(self.legalMoves), len(self.legalMoves))
        valsBackup = valsBackup * (np.logical_not(isSolved)) + 0.0 * (isSolved)

        for depth in range(len(statesAtDepth) - 2, -1, -1):
            valsBackup_children = valsBackup
            rewards_children = rewardsAtDepth[depth + 1]

            valsBackup = np.min(rewards_children + valsBackup_children, 1)

            isSolved = isSolvedAtDepth[depth]
            if depth > 0:
                valsBackup = valsBackup.reshape(
                    valsBackup.shape[0] / len(self.legalMoves),
                    len(self.legalMoves))
            valsBackup = valsBackup * (np.logical_not(isSolved)) + 0.0 * (
                isSolved)

        rootValsBackup = valsBackup
        nextStatesValueReward = valsBackup_children + rewards_children

        return (rootValsBackup, nextStatesValueReward)
Exemplo n.º 2
0
    def generateToDepth(states_root,depth,Environment):
        statesAtDepth = []
        rewardsAtDepth = []
        isSolvedAtDepth = []

        isSolved_root = Environment.checkSolved(states_root)

        statesAtDepth.append(states_root)
        rewardsAtDepth.append(Environment.getReward(states_root,isSolved_root))
        isSolvedAtDepth.append(isSolved_root)

        for depth in range(1,depth+1):
            nextStates, nextStateRewards, nextStateSolved = nnet_utils.getNextStates(statesAtDepth[-1],Environment)
            nextStates = nextStates.reshape([nextStates.shape[0]*nextStates.shape[1],nextStates.shape[2]])

            statesAtDepth.append(nextStates)
            rewardsAtDepth.append(nextStateRewards)
            isSolvedAtDepth.append(nextStateSolved)

        return(statesAtDepth,rewardsAtDepth,isSolvedAtDepth)
Exemplo n.º 3
0
    def generateToDepth(states_root,depth,Environment): #GUSTAVO: Expande o estado apartir do states_root e retorna a lista com as recompensas
        statesAtDepth = []
        rewardsAtDepth = []
        isSolvedAtDepth = []

        isSolved_root = Environment.checkSolved(states_root)

        statesAtDepth.append(states_root)
        rewardsAtDepth.append(Environment.getReward(states_root,isSolved_root))
        isSolvedAtDepth.append(isSolved_root)

        for depth in range(1,depth+1):
            nextStates, nextStateRewards, nextStateSolved = nnet_utils.getNextStates(statesAtDepth[-1],Environment)
            nextStates = nextStates.reshape([nextStates.shape[0]*nextStates.shape[1],nextStates.shape[2]])

            statesAtDepth.append(nextStates)
            rewardsAtDepth.append(nextStateRewards)
            isSolvedAtDepth.append(nextStateSolved)

        return(statesAtDepth,rewardsAtDepth,isSolvedAtDepth)
Exemplo n.º 4
0
    def expand_static(self, states, verbose=False):
        # 0:state, 1:value, 2:isSolved, 3:reward, 4:parent_move, 5:depth
        seenNodes = self.seenNodes

        ### Get next states
        startTime = time.time()
        cStates, cRewards, cIsSolveds = nnet_utils.getNextStates(
            states, self.Environment)  # next states
        cStates = cStates.astype(self.Environment.dtype)

        numStates = states.shape[0]
        childrenPerState = cStates.shape[1]
        numChildren = numStates * childrenPerState

        cStates = cStates.reshape(
            (numStates * childrenPerState,
             cStates.shape[2]))  # reshape to numStates*childrenPerState
        cRewards = cRewards.reshape((numStates * childrenPerState))
        cIsSolveds = cIsSolveds.reshape((numStates * childrenPerState))

        cParentMoves = np.array(range(childrenPerState) * numStates)

        self.numGenerated = self.numGenerated + cStates.shape[0]

        nextStateTime = time.time() - startTime
        """
        ### Send data to be evaluated
        resQueue = Queue(1)
        heuristicProc = Process(target=lambda x: resQueue.put(self.computeNodeValues(x)[:,0]), args=(cStates,))
        heuristicProc.daemon = True
        heuristicProc.start()
        """

        ### Get all child information
        startTime = time.time()

        cDepths = np.expand_dims(
            [seenNodes[state.tostring()][1] for state in states], axis=1)
        cDepths = np.repeat(cDepths, childrenPerState, axis=1).reshape(
            (numStates * childrenPerState))
        cParentHashReps = []
        for state in states:
            stateHashRep = state.tostring()
            for cIdx in range(childrenPerState):
                cParentHashReps.append(stateHashRep)

        #cDepths = cDepths + np.array([len(self.legalMoves[x]) if type(self.legalMoves[x][0]) == type(list()) else 1 for x in cParentMoves])

        cDepths = cDepths + 1

        cHashReps = [x.tostring() for x in cStates]

        childrenInfoTime = time.time() - startTime

        ### Add states that haven't been seen
        startTime = time.time()
        addToQueue_idxs = []
        for cIdx in range(numChildren):
            cParentMove = cParentMoves[cIdx]
            cDepth = cDepths[cIdx]
            cHashRep = cHashReps[cIdx]
            cParentHashRep = cParentHashReps[cIdx]

            getNode = seenNodes.get(cHashRep)
            if (getNode is None) or (cDepth < getNode[1]):
                addToQueue_idxs.append(cIdx)
                self.addNewNode(cHashRep, cParentMove, cDepth, cParentHashRep)

        cStates_add = cStates[addToQueue_idxs]
        cDepths_add = cDepths[addToQueue_idxs]
        cIsSolveds_add = cIsSolveds[addToQueue_idxs]

        checkSeenTime = time.time() - startTime

        ### Compute values
        startTime = time.time()
        if cStates_add.shape[0] > 0:
            cVals_add = self.computeNodeValues(cStates_add)[:, 0]
            #cVals_add = resQueue.get()[addToQueue_idxs]
            #heuristicProc.join()
            #heuristicProc.terminate()

            computeValueTime = time.time() - startTime

            ### Push to priority queue
            startTime = time.time()
            heapVals = cVals_add * (np.logical_not(cIsSolveds_add)
                                    ) + cDepths_add * self.depthPenalty

            for heapVal, cState in zip(heapVals, cStates_add):
                heappush(self.unexpanded, (heapVal, self.nodeCount, cState))
                self.nodeCount = self.nodeCount + 1

            heapPushTime = time.time() - startTime
        else:
            cVals_add = []
            computeValueTime = time.time() - startTime
            heapPushTime = time.time() - startTime

        if verbose:
            print(
                "TIMES - Next state: %.3f, children data proc: %.3f, check seen: %.3f, val comp: %.3f, heappush: %.3f"
                % (nextStateTime, childrenInfoTime, checkSeenTime,
                   computeValueTime, heapPushTime))
            print("%i Children, %i Added" %
                  (numChildren, len(addToQueue_idxs)))
            #print([int(x) for x in cStates[np.argmin(cVals_add)]])
        return (cVals_add, cDepths_add)