Exemplo n.º 1
0
 def __init__(self, sim_class, map_path=None):
     self.goal = None
     self.start = None
     self.rows = None
     self.cols = None
     self.digs = None  #x,y coordinates of dig sites 0-9
     self.dump = None  #x,y coordinates of the dump
     self.bin = None  #x,y coordinates of the bin to avoid
     self.height_map = None
     self.map_path = map_path
     self.rover_buffer = 0.55  #m from center to furthest perimeter point
     self.rateOrder = 3
     if map_path is not None:
         self.read_map(map_path)
         #self.calcRatesOfChange(n=self.rateOrder,ret=False,dbg=False)
     if sim_class.clientID is not -1:
         if sim_class.dig_handles is not None:
             self.digs = [
                 cmn.convert_VREPXY(sim_class.sim_get_xy(handle))
                 for handle in sim_class.dig_handles
             ]
         if sim_class.dump_handle is not None:
             self.dump = cmn.convert_VREPXY(
                 sim_class.sim_get_xy(sim_class.dump_handle))
         if sim_class.bin_handle is not None:
             self.bin = cmn.convert_VREPXY(
                 sim_class.sim_get_xy(sim_class.bin_handle))
Exemplo n.º 2
0
 def calcDigSiteOrder(self, start, savePickle=True, grabPickle=False):
     dbg = True
     if dbg: print 'generating connected graph...'
     self.dsg = _dsg.DigSiteGraph(g=self, env=self.map_path, start=start)
     self.dsg.build(grabPickle=grabPickle, savePickle=savePickle)
     tmpDigs = copy.copy(self.dsg.shortestDigOrder)
     self.digs = []
     for dig in tmpDigs:
         self.digs.append(cmn.convert_VREPXY(dig))
Exemplo n.º 3
0
    def calcRewardFunction(self,
                           goal=42.0,
                           start=0.0,
                           obstacle=-7.0,
                           edge=0,
                           ret=False,
                           vrs=-1,
                           legVal=0.0,
                           legIdx=None):
        if vrs < 1: vrs = cmn._CALC_REWARD_VRS
        self.rewards = {}
        obsRateThresh = 1.0
        critRate = np.tan(np.pi / 20)  #IE if the slops is less than 9 deg
        negCritRate = np.tan(np.pi / 15)

        startPos = np.array(self.start)
        goalPos = np.array(self.goal)

        #For use later
        goalRwrds = np.zeros(shape=(self.rows, self.cols))
        maxRad = (self.rows + self.cols) / np.sqrt(2)
        fid = cmn._sim_res * 5
        nns = cmn.neighbors(mat=goalRwrds,
                            rad=maxRad,
                            r=self.goal[0],
                            c=self.goal[1])
        for nn in zip(list(nns[0]), list(nns[1])):
            tmpR = np.linalg.norm([nn[0] - goalPos[0], nn[1] - goalPos[1]])
            if np.isnan(tmpR) or tmpR < fid:
                tmpR = fid  # EPS
            goalRwrds[nn] = goal * ((1.0 / maxRad) * np.abs(tmpR - maxRad) +
                                    1.0 / np.power(tmpR, 1.0 / 2))
        #goalRwrds[self.goal] = goal

        if legIdx is None:
            legRwrds = np.zeros(shape=(self.rows, self.cols))
        else:
            print 'calculating leg reward using {}'.format(
                self.dsg.shortestRoverPathNodes[legIdx])
            legRwrds = np.zeros(shape=(self.rows, self.cols))
            tmpPath = copy.copy(self.dsg.shortestRoverPathNodes[legIdx])
            tmpPath = [cmn.convert_VREPXY(tmpNode) for tmpNode in tmpPath]
            for tmpNode in tmpPath:
                maxRad = 5.0
                nns = cmn.neighbors(mat=legRwrds,
                                    rad=maxRad,
                                    r=tmpNode[0],
                                    c=tmpNode[1])
                for nn in zip(list(nns[0]), list(nns[1])):
                    tmpR = np.linalg.norm(
                        [nn[0] - tmpNode[0], nn[1] - tmpNode[1]])
                    legRwrds[nn] += legVal * (np.abs(tmpR - maxRad) / maxRad)

            cmap = cm.Spectral
            cmap.set_bad(color='k')
            cmn.createArrImg(
                legRwrds,
                cmap=cmap,
                plotTitle='Base State Rewards for Leg {}'.format(legIdx),
                fn=os.path.join(cmn._IMG_FLDR,
                                'base_rewards_leg{}.svg'.format(legIdx)),
                show=False)

        maxEdge = np.max([self.rows, self.cols])
        for i in range(len(cmn._actions)):
            tmpRewardMap = np.full(shape=(self.rows, self.cols),
                                   fill_value=0.0,
                                   dtype=np.float)

            obsRwrds = np.zeros(shape=tmpRewardMap.shape)
            # Rewards based on self.rates first.
            for r in range(self.rows):
                for c in range(self.cols):
                    if vrs == 1:
                        if np.abs(self.rates[0][cmn._actions[i]][(
                                r, c)].item()) > obsRateThresh:
                            obsRwrds[(r, c)] = obstacle
                    elif vrs == 2:
                        if self.rates[0][cmn._actions[i]][(r, c)] <= 0:
                            obsRwrds[(r, c)] = 0.0
                        else:
                            obsRwrds[(r, c)] = obstacle * np.power(
                                self.rates[0][cmn._actions[i]][(r, c)].item(),
                                2)
                    elif vrs == 3:
                        if np.abs(self.rates[0][cmn._actions[i]][(
                                r, c)]) <= critRate:
                            obsRwrds[(r, c)] = 2.0
                        else:
                            obsRwrds[(r, c)] = obstacle * (np.exp(
                                np.abs(self.rates[0][cmn._actions[i]][
                                    (r, c)])) - np.exp(critRate))
                    elif vrs == 4:
                        absRate = np.abs(self.rates[0][cmn._actions[i]][(r,
                                                                         c)])
                        tmpCritRate = critRate
                        if self.rates[0][cmn._actions[i]][(r, c)] < 0:
                            tmpCritRate = negCritRate
                        if absRate > tmpCritRate:
                            obsRwrds[(
                                r,
                                c)] = obstacle - np.abs(absRate - tmpCritRate)
                        else:
                            obsRwrds[(r, c)] = obstacle * cmn.sigmoid(
                                cmn.scaleRange([
                                    0,
                                    float(tmpRewardMap[(r, c)]) / tmpCritRate,
                                    1
                                ], -6, 6)[1])

            maxReward = np.nanmax(obsRwrds)
            minReward = np.nanmin(obsRwrds)
            maxAbReward = np.nanmax([np.abs(maxReward), np.abs(minReward)])
            '''
            goalSlopeRad = (maxEdge / 2.0) * np.sqrt(maxEdge)
            nns = cmn.neighbors(mat=tmpRewardMap, rad=goalSlopeRad, r=self.goal[0], c=self.goal[1])
            for nn in zip(list(nns[0]), list(nns[1])):
                npNN = np.array(nn)
                goalRwrds[nn] = (1 / 5.0) * goal * (np.linalg.norm(npNN - self.goal) - goalSlopeRad)
            maxRad = 5
            fid = 0.4
            for rad in np.arange(maxRad, 0, -fid):
                nns = cmn.neighbors(mat=tmpRewardMap, rad=rad, r=self.goal[0], c=self.goal[1])
                for nn in zip(list(nns[0]), list(nns[1])):
                    if obsRwrds[nn] < 0:
                        priorInfluence = -1 * cmn.sigmoid(
                            cmn.scaleRange([0, float(obsRwrds[nn] / minReward), 1], -6, 6)[1])
                    else:
                        priorInfluence = cmn.sigmoid(
                            cmn.scaleRange([0, float(obsRwrds[nn] / maxReward), 1], -6, 6)[1])
                    goalRwrds[nn] = goal * (
                            priorInfluence + cmn.sigmoid(cmn.scaleRange([0, maxRad - rad, maxRad], -6, 6)[1]))
            goalRwrds[self.goal] = goal
            '''

            tmpRewardMap = np.add(goalRwrds, obsRwrds)
            tmpRewardMap = np.add(legRwrds, tmpRewardMap)

            #EDGES
            [
                tmpRewardMap[0, :], tmpRewardMap[-1, :], tmpRewardMap[:, 0],
                tmpRewardMap[:, -1]
            ] = [edge] * 4

            cmap = cm.Spectral
            cmap.set_bad(color='k')
            cmn.createArrImg(
                obsRwrds,
                cmap=cmap,
                plotTitle='Base State Rewards for Leg {} via {}'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR, 'base_rewards_l{}_{}_obs.svg'.format(
                        legIdx, cmn._actions[i])),
                show=False)

            cmn.overlayArrImgs(
                arr1=self.height_map,
                arr2=obsRwrds,
                cmap1=cm.gray,
                cmap2=cmap,
                alpha1=1,
                alpha2=.8,
                arr2Masked=False,
                plotTitle=
                'Base State Rewards for Leg {} via {} (with heightmap)'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR, 'base_rewards_l{}_{}_obs_ovr.svg'.format(
                        legIdx, cmn._actions[i])),
                show=False)

            cmn.createArrImg(
                goalRwrds,
                cmap=cmap,
                plotTitle='Base State Rewards for Leg {} via {}'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR, 'base_rewards_l{}_{}_goal.svg'.format(
                        legIdx, cmn._actions[i])),
                show=False)

            cmn.overlayArrImgs(
                arr1=self.height_map,
                arr2=goalRwrds,
                cmap1=cm.gray,
                cmap2=cmap,
                alpha1=1,
                alpha2=.8,
                arr2Masked=False,
                plotTitle=
                'Base State Rewards for Leg {} via {} (with heightmap)'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR, 'base_rewards_l{}_{}_goal_ovr.svg'.format(
                        legIdx, cmn._actions[i])),
                show=False)

            #GOAL AND START
            maxRad = 5.0
            fid = 0.4
            for rad in np.arange(maxRad, 0, -fid):
                tmpRewardMap[cmn.neighbors(
                    mat=tmpRewardMap,
                    rad=rad,
                    r=self.start[0],
                    c=self.start[1])] = start * cmn.sigmoid(
                        cmn.scaleRange([0, maxRad - rad, maxRad], -6, 6)[1])

            cmap = cm.Spectral
            cmap.set_bad(color='k')
            cmn.createArrImg(
                tmpRewardMap,
                cmap=cmap,
                plotTitle='Base State Rewards for Leg {} via {}'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR,
                    'base_rewards_l{}_{}.svg'.format(legIdx, cmn._actions[i])),
                show=False)

            cmn.overlayArrImgs(
                arr1=self.height_map,
                arr2=tmpRewardMap,
                cmap1=cm.gray,
                cmap2=cmap,
                alpha1=1,
                alpha2=.8,
                arr2Masked=False,
                plotTitle=
                'Base State Rewards for Leg {} via {} (with heightmap)'.format(
                    legIdx, cmn._actions[i]),
                fn=os.path.join(
                    cmn._IMG_FLDR, 'base_rewards_l{}_{}_ovr.svg'.format(
                        legIdx, cmn._actions[i])),
                show=False)

            self.rewards.update({cmn._actions[i]: tmpRewardMap.copy()})

        shutil.copy('graph_search.py',
                    os.path.join(cmn._OUT_FLDR, 'graph_search.py'))
        if ret: return self.rewards.copy()
    g = graph_search.GridMap(s, cmn._map_file)
    g.calcRatesOfChange(n=3, ret=False,dbg=False)

    #Create plotes for each action direction and save to disk
    cmap = cm.Spectral;
    cmap.set_bad(color='k')
    cmn.createArrImg(g.height_map, cmap=cmap, plotTitle='Height Map', fn=os.path.join(cmn._IMG_FLDR, 'heightmap.svg'),
                     show=False)
    for a in cmn._actions:
        cmn.createArrImg(g.rates[0][a], cmap=cmap, plotTitle='Rate Map: {}'.format(a),
                         fn=os.path.join(cmn._IMG_FLDR, 'ratemap_{}.svg'.format(a)), show=False)
        cmn.overlayArrImgs(arr1=g.height_map, arr2=g.rates[0][a], plotTitle='Overlayed Rate Map: {}'.format(a),
                           fn=os.path.join(cmn._IMG_FLDR, 'ratemap_ovr_{}.svg'.format(a)), show=False)

    rover_pos = s.sim_get_xy(s.rover_handle)
    changed_rover_pos = cmn.convert_VREPXY(rover_pos)

    print '\n{} ---> {}'.format(rover_pos,changed_rover_pos)
    
    #Plan dig site visitation order
    g.calcDigSiteOrder(start=changed_rover_pos,savePickle=cmn._SAVE_PICKLE,grabPickle=cmn._GRAB_PICKLE)
    print '\n\tusing dig order {}\n'.format(g.digs)
    
    #For each dig site...
    for i in range(len(g.digs)):
        for j in range(2):
            #Plan route to dig site
            if j is 0:
                g.goal = g.digs[i]
                print "Destination: dig(" + str(i) + ")", g.goal
            #Or back to the dump collection bin