def __init__(self, sim_class, map_path=None): self.goal = None self.start = None self.rows = None self.cols = None self.digs = None #x,y coordinates of dig sites 0-9 self.dump = None #x,y coordinates of the dump self.bin = None #x,y coordinates of the bin to avoid self.height_map = None self.map_path = map_path self.rover_buffer = 0.55 #m from center to furthest perimeter point self.rateOrder = 3 if map_path is not None: self.read_map(map_path) #self.calcRatesOfChange(n=self.rateOrder,ret=False,dbg=False) if sim_class.clientID is not -1: if sim_class.dig_handles is not None: self.digs = [ cmn.convert_VREPXY(sim_class.sim_get_xy(handle)) for handle in sim_class.dig_handles ] if sim_class.dump_handle is not None: self.dump = cmn.convert_VREPXY( sim_class.sim_get_xy(sim_class.dump_handle)) if sim_class.bin_handle is not None: self.bin = cmn.convert_VREPXY( sim_class.sim_get_xy(sim_class.bin_handle))
def calcDigSiteOrder(self, start, savePickle=True, grabPickle=False): dbg = True if dbg: print 'generating connected graph...' self.dsg = _dsg.DigSiteGraph(g=self, env=self.map_path, start=start) self.dsg.build(grabPickle=grabPickle, savePickle=savePickle) tmpDigs = copy.copy(self.dsg.shortestDigOrder) self.digs = [] for dig in tmpDigs: self.digs.append(cmn.convert_VREPXY(dig))
def calcRewardFunction(self, goal=42.0, start=0.0, obstacle=-7.0, edge=0, ret=False, vrs=-1, legVal=0.0, legIdx=None): if vrs < 1: vrs = cmn._CALC_REWARD_VRS self.rewards = {} obsRateThresh = 1.0 critRate = np.tan(np.pi / 20) #IE if the slops is less than 9 deg negCritRate = np.tan(np.pi / 15) startPos = np.array(self.start) goalPos = np.array(self.goal) #For use later goalRwrds = np.zeros(shape=(self.rows, self.cols)) maxRad = (self.rows + self.cols) / np.sqrt(2) fid = cmn._sim_res * 5 nns = cmn.neighbors(mat=goalRwrds, rad=maxRad, r=self.goal[0], c=self.goal[1]) for nn in zip(list(nns[0]), list(nns[1])): tmpR = np.linalg.norm([nn[0] - goalPos[0], nn[1] - goalPos[1]]) if np.isnan(tmpR) or tmpR < fid: tmpR = fid # EPS goalRwrds[nn] = goal * ((1.0 / maxRad) * np.abs(tmpR - maxRad) + 1.0 / np.power(tmpR, 1.0 / 2)) #goalRwrds[self.goal] = goal if legIdx is None: legRwrds = np.zeros(shape=(self.rows, self.cols)) else: print 'calculating leg reward using {}'.format( self.dsg.shortestRoverPathNodes[legIdx]) legRwrds = np.zeros(shape=(self.rows, self.cols)) tmpPath = copy.copy(self.dsg.shortestRoverPathNodes[legIdx]) tmpPath = [cmn.convert_VREPXY(tmpNode) for tmpNode in tmpPath] for tmpNode in tmpPath: maxRad = 5.0 nns = cmn.neighbors(mat=legRwrds, rad=maxRad, r=tmpNode[0], c=tmpNode[1]) for nn in zip(list(nns[0]), list(nns[1])): tmpR = np.linalg.norm( [nn[0] - tmpNode[0], nn[1] - tmpNode[1]]) legRwrds[nn] += legVal * (np.abs(tmpR - maxRad) / maxRad) cmap = cm.Spectral cmap.set_bad(color='k') cmn.createArrImg( legRwrds, cmap=cmap, plotTitle='Base State Rewards for Leg {}'.format(legIdx), fn=os.path.join(cmn._IMG_FLDR, 'base_rewards_leg{}.svg'.format(legIdx)), show=False) maxEdge = np.max([self.rows, self.cols]) for i in range(len(cmn._actions)): tmpRewardMap = np.full(shape=(self.rows, self.cols), fill_value=0.0, dtype=np.float) obsRwrds = np.zeros(shape=tmpRewardMap.shape) # Rewards based on self.rates first. for r in range(self.rows): for c in range(self.cols): if vrs == 1: if np.abs(self.rates[0][cmn._actions[i]][( r, c)].item()) > obsRateThresh: obsRwrds[(r, c)] = obstacle elif vrs == 2: if self.rates[0][cmn._actions[i]][(r, c)] <= 0: obsRwrds[(r, c)] = 0.0 else: obsRwrds[(r, c)] = obstacle * np.power( self.rates[0][cmn._actions[i]][(r, c)].item(), 2) elif vrs == 3: if np.abs(self.rates[0][cmn._actions[i]][( r, c)]) <= critRate: obsRwrds[(r, c)] = 2.0 else: obsRwrds[(r, c)] = obstacle * (np.exp( np.abs(self.rates[0][cmn._actions[i]][ (r, c)])) - np.exp(critRate)) elif vrs == 4: absRate = np.abs(self.rates[0][cmn._actions[i]][(r, c)]) tmpCritRate = critRate if self.rates[0][cmn._actions[i]][(r, c)] < 0: tmpCritRate = negCritRate if absRate > tmpCritRate: obsRwrds[( r, c)] = obstacle - np.abs(absRate - tmpCritRate) else: obsRwrds[(r, c)] = obstacle * cmn.sigmoid( cmn.scaleRange([ 0, float(tmpRewardMap[(r, c)]) / tmpCritRate, 1 ], -6, 6)[1]) maxReward = np.nanmax(obsRwrds) minReward = np.nanmin(obsRwrds) maxAbReward = np.nanmax([np.abs(maxReward), np.abs(minReward)]) ''' goalSlopeRad = (maxEdge / 2.0) * np.sqrt(maxEdge) nns = cmn.neighbors(mat=tmpRewardMap, rad=goalSlopeRad, r=self.goal[0], c=self.goal[1]) for nn in zip(list(nns[0]), list(nns[1])): npNN = np.array(nn) goalRwrds[nn] = (1 / 5.0) * goal * (np.linalg.norm(npNN - self.goal) - goalSlopeRad) maxRad = 5 fid = 0.4 for rad in np.arange(maxRad, 0, -fid): nns = cmn.neighbors(mat=tmpRewardMap, rad=rad, r=self.goal[0], c=self.goal[1]) for nn in zip(list(nns[0]), list(nns[1])): if obsRwrds[nn] < 0: priorInfluence = -1 * cmn.sigmoid( cmn.scaleRange([0, float(obsRwrds[nn] / minReward), 1], -6, 6)[1]) else: priorInfluence = cmn.sigmoid( cmn.scaleRange([0, float(obsRwrds[nn] / maxReward), 1], -6, 6)[1]) goalRwrds[nn] = goal * ( priorInfluence + cmn.sigmoid(cmn.scaleRange([0, maxRad - rad, maxRad], -6, 6)[1])) goalRwrds[self.goal] = goal ''' tmpRewardMap = np.add(goalRwrds, obsRwrds) tmpRewardMap = np.add(legRwrds, tmpRewardMap) #EDGES [ tmpRewardMap[0, :], tmpRewardMap[-1, :], tmpRewardMap[:, 0], tmpRewardMap[:, -1] ] = [edge] * 4 cmap = cm.Spectral cmap.set_bad(color='k') cmn.createArrImg( obsRwrds, cmap=cmap, plotTitle='Base State Rewards for Leg {} via {}'.format( legIdx, cmn._actions[i]), fn=os.path.join( cmn._IMG_FLDR, 'base_rewards_l{}_{}_obs.svg'.format( legIdx, cmn._actions[i])), show=False) cmn.overlayArrImgs( arr1=self.height_map, arr2=obsRwrds, cmap1=cm.gray, cmap2=cmap, alpha1=1, alpha2=.8, arr2Masked=False, plotTitle= 'Base State Rewards for Leg {} via {} (with heightmap)'.format( legIdx, cmn._actions[i]), fn=os.path.join( cmn._IMG_FLDR, 'base_rewards_l{}_{}_obs_ovr.svg'.format( legIdx, cmn._actions[i])), show=False) cmn.createArrImg( goalRwrds, cmap=cmap, plotTitle='Base State Rewards for Leg {} via {}'.format( legIdx, cmn._actions[i]), fn=os.path.join( cmn._IMG_FLDR, 'base_rewards_l{}_{}_goal.svg'.format( legIdx, cmn._actions[i])), show=False) cmn.overlayArrImgs( arr1=self.height_map, arr2=goalRwrds, cmap1=cm.gray, cmap2=cmap, alpha1=1, alpha2=.8, arr2Masked=False, plotTitle= 'Base State Rewards for Leg {} via {} (with heightmap)'.format( legIdx, cmn._actions[i]), fn=os.path.join( cmn._IMG_FLDR, 'base_rewards_l{}_{}_goal_ovr.svg'.format( legIdx, cmn._actions[i])), show=False) #GOAL AND START maxRad = 5.0 fid = 0.4 for rad in np.arange(maxRad, 0, -fid): tmpRewardMap[cmn.neighbors( mat=tmpRewardMap, rad=rad, r=self.start[0], c=self.start[1])] = start * cmn.sigmoid( cmn.scaleRange([0, maxRad - rad, maxRad], -6, 6)[1]) cmap = cm.Spectral cmap.set_bad(color='k') cmn.createArrImg( tmpRewardMap, cmap=cmap, plotTitle='Base State Rewards for Leg {} via {}'.format( legIdx, cmn._actions[i]), fn=os.path.join( cmn._IMG_FLDR, 'base_rewards_l{}_{}.svg'.format(legIdx, cmn._actions[i])), show=False) cmn.overlayArrImgs( arr1=self.height_map, arr2=tmpRewardMap, cmap1=cm.gray, cmap2=cmap, alpha1=1, alpha2=.8, arr2Masked=False, plotTitle= 'Base State Rewards for Leg {} via {} (with heightmap)'.format( legIdx, cmn._actions[i]), fn=os.path.join( cmn._IMG_FLDR, 'base_rewards_l{}_{}_ovr.svg'.format( legIdx, cmn._actions[i])), show=False) self.rewards.update({cmn._actions[i]: tmpRewardMap.copy()}) shutil.copy('graph_search.py', os.path.join(cmn._OUT_FLDR, 'graph_search.py')) if ret: return self.rewards.copy()
g = graph_search.GridMap(s, cmn._map_file) g.calcRatesOfChange(n=3, ret=False,dbg=False) #Create plotes for each action direction and save to disk cmap = cm.Spectral; cmap.set_bad(color='k') cmn.createArrImg(g.height_map, cmap=cmap, plotTitle='Height Map', fn=os.path.join(cmn._IMG_FLDR, 'heightmap.svg'), show=False) for a in cmn._actions: cmn.createArrImg(g.rates[0][a], cmap=cmap, plotTitle='Rate Map: {}'.format(a), fn=os.path.join(cmn._IMG_FLDR, 'ratemap_{}.svg'.format(a)), show=False) cmn.overlayArrImgs(arr1=g.height_map, arr2=g.rates[0][a], plotTitle='Overlayed Rate Map: {}'.format(a), fn=os.path.join(cmn._IMG_FLDR, 'ratemap_ovr_{}.svg'.format(a)), show=False) rover_pos = s.sim_get_xy(s.rover_handle) changed_rover_pos = cmn.convert_VREPXY(rover_pos) print '\n{} ---> {}'.format(rover_pos,changed_rover_pos) #Plan dig site visitation order g.calcDigSiteOrder(start=changed_rover_pos,savePickle=cmn._SAVE_PICKLE,grabPickle=cmn._GRAB_PICKLE) print '\n\tusing dig order {}\n'.format(g.digs) #For each dig site... for i in range(len(g.digs)): for j in range(2): #Plan route to dig site if j is 0: g.goal = g.digs[i] print "Destination: dig(" + str(i) + ")", g.goal #Or back to the dump collection bin