def run(self, searchDepth=2, verbose=False): statesAtDepth = [] rewardsAtDepth = [] isSolvedAtDepth = [] states_root = self.combineNodes(self.roots) isSolved_root = self.Environment.checkSolved(states_root) statesAtDepth.append(states_root) rewardsAtDepth.append( self.Environment.getReward(states_root, isSolved_root)) isSolvedAtDepth.append(isSolved_root) for depth in range(1, searchDepth + 1): nextStates, nextStateRewards, nextStateSolved = nnet_utils.getNextStates( statesAtDepth[-1], self.Environment) nextStates = nextStates.reshape([ nextStates.shape[0] * nextStates.shape[1], nextStates.shape[2] ]) statesAtDepth.append(nextStates) rewardsAtDepth.append(nextStateRewards) isSolvedAtDepth.append(nextStateSolved) isSolved = isSolvedAtDepth[-1] valsBackup = self.heuristicFn(statesAtDepth[-1]) valsBackup = valsBackup.reshape( valsBackup.shape[0] / len(self.legalMoves), len(self.legalMoves)) valsBackup = valsBackup * (np.logical_not(isSolved)) + 0.0 * (isSolved) for depth in range(len(statesAtDepth) - 2, -1, -1): valsBackup_children = valsBackup rewards_children = rewardsAtDepth[depth + 1] valsBackup = np.min(rewards_children + valsBackup_children, 1) isSolved = isSolvedAtDepth[depth] if depth > 0: valsBackup = valsBackup.reshape( valsBackup.shape[0] / len(self.legalMoves), len(self.legalMoves)) valsBackup = valsBackup * (np.logical_not(isSolved)) + 0.0 * ( isSolved) rootValsBackup = valsBackup nextStatesValueReward = valsBackup_children + rewards_children return (rootValsBackup, nextStatesValueReward)
def generateToDepth(states_root,depth,Environment): statesAtDepth = [] rewardsAtDepth = [] isSolvedAtDepth = [] isSolved_root = Environment.checkSolved(states_root) statesAtDepth.append(states_root) rewardsAtDepth.append(Environment.getReward(states_root,isSolved_root)) isSolvedAtDepth.append(isSolved_root) for depth in range(1,depth+1): nextStates, nextStateRewards, nextStateSolved = nnet_utils.getNextStates(statesAtDepth[-1],Environment) nextStates = nextStates.reshape([nextStates.shape[0]*nextStates.shape[1],nextStates.shape[2]]) statesAtDepth.append(nextStates) rewardsAtDepth.append(nextStateRewards) isSolvedAtDepth.append(nextStateSolved) return(statesAtDepth,rewardsAtDepth,isSolvedAtDepth)
def generateToDepth(states_root,depth,Environment): #GUSTAVO: Expande o estado apartir do states_root e retorna a lista com as recompensas statesAtDepth = [] rewardsAtDepth = [] isSolvedAtDepth = [] isSolved_root = Environment.checkSolved(states_root) statesAtDepth.append(states_root) rewardsAtDepth.append(Environment.getReward(states_root,isSolved_root)) isSolvedAtDepth.append(isSolved_root) for depth in range(1,depth+1): nextStates, nextStateRewards, nextStateSolved = nnet_utils.getNextStates(statesAtDepth[-1],Environment) nextStates = nextStates.reshape([nextStates.shape[0]*nextStates.shape[1],nextStates.shape[2]]) statesAtDepth.append(nextStates) rewardsAtDepth.append(nextStateRewards) isSolvedAtDepth.append(nextStateSolved) return(statesAtDepth,rewardsAtDepth,isSolvedAtDepth)
def expand_static(self, states, verbose=False): # 0:state, 1:value, 2:isSolved, 3:reward, 4:parent_move, 5:depth seenNodes = self.seenNodes ### Get next states startTime = time.time() cStates, cRewards, cIsSolveds = nnet_utils.getNextStates( states, self.Environment) # next states cStates = cStates.astype(self.Environment.dtype) numStates = states.shape[0] childrenPerState = cStates.shape[1] numChildren = numStates * childrenPerState cStates = cStates.reshape( (numStates * childrenPerState, cStates.shape[2])) # reshape to numStates*childrenPerState cRewards = cRewards.reshape((numStates * childrenPerState)) cIsSolveds = cIsSolveds.reshape((numStates * childrenPerState)) cParentMoves = np.array(range(childrenPerState) * numStates) self.numGenerated = self.numGenerated + cStates.shape[0] nextStateTime = time.time() - startTime """ ### Send data to be evaluated resQueue = Queue(1) heuristicProc = Process(target=lambda x: resQueue.put(self.computeNodeValues(x)[:,0]), args=(cStates,)) heuristicProc.daemon = True heuristicProc.start() """ ### Get all child information startTime = time.time() cDepths = np.expand_dims( [seenNodes[state.tostring()][1] for state in states], axis=1) cDepths = np.repeat(cDepths, childrenPerState, axis=1).reshape( (numStates * childrenPerState)) cParentHashReps = [] for state in states: stateHashRep = state.tostring() for cIdx in range(childrenPerState): cParentHashReps.append(stateHashRep) #cDepths = cDepths + np.array([len(self.legalMoves[x]) if type(self.legalMoves[x][0]) == type(list()) else 1 for x in cParentMoves]) cDepths = cDepths + 1 cHashReps = [x.tostring() for x in cStates] childrenInfoTime = time.time() - startTime ### Add states that haven't been seen startTime = time.time() addToQueue_idxs = [] for cIdx in range(numChildren): cParentMove = cParentMoves[cIdx] cDepth = cDepths[cIdx] cHashRep = cHashReps[cIdx] cParentHashRep = cParentHashReps[cIdx] getNode = seenNodes.get(cHashRep) if (getNode is None) or (cDepth < getNode[1]): addToQueue_idxs.append(cIdx) self.addNewNode(cHashRep, cParentMove, cDepth, cParentHashRep) cStates_add = cStates[addToQueue_idxs] cDepths_add = cDepths[addToQueue_idxs] cIsSolveds_add = cIsSolveds[addToQueue_idxs] checkSeenTime = time.time() - startTime ### Compute values startTime = time.time() if cStates_add.shape[0] > 0: cVals_add = self.computeNodeValues(cStates_add)[:, 0] #cVals_add = resQueue.get()[addToQueue_idxs] #heuristicProc.join() #heuristicProc.terminate() computeValueTime = time.time() - startTime ### Push to priority queue startTime = time.time() heapVals = cVals_add * (np.logical_not(cIsSolveds_add) ) + cDepths_add * self.depthPenalty for heapVal, cState in zip(heapVals, cStates_add): heappush(self.unexpanded, (heapVal, self.nodeCount, cState)) self.nodeCount = self.nodeCount + 1 heapPushTime = time.time() - startTime else: cVals_add = [] computeValueTime = time.time() - startTime heapPushTime = time.time() - startTime if verbose: print( "TIMES - Next state: %.3f, children data proc: %.3f, check seen: %.3f, val comp: %.3f, heappush: %.3f" % (nextStateTime, childrenInfoTime, checkSeenTime, computeValueTime, heapPushTime)) print("%i Children, %i Added" % (numChildren, len(addToQueue_idxs))) #print([int(x) for x in cStates[np.argmin(cVals_add)]]) return (cVals_add, cDepths_add)