Python NumMap 예제들, util.classes.NumMap Python 예제들

예제 #1

0

파일 보기

파일: model.py 프로젝트: s-arora-1987/sorting_patrol_MDP_irl

    def T(self,state,action):
        """Returns a function state -> [0,1] for probability of next state
        given currently in state performing action"""
        result = NumMap()
        s_p = action.apply(state)
        if not self.is_legal(s_p):
            result[state] = 1
        else:
            result[state] = self._p_fail
            result[s_p] = 1 - self._p_fail  
        
#         actions = self.A(state)
#         totalP = 0
#         for a in actions:
#             p = 0
#             if a == action:
#                 p = 1 - self._p_fail
#             else:
#                 p = self._p_fail / ( len(actions)-1 )
#             s_p = a.apply(state)
#             
#             if not self.is_legal(s_p):
#                 result[state] += p
#             else:
#                 result[s_p] += p  
#             totalP += p
#         result[state] += 1.0 - totalP
        return result

예제 #2

0

파일 보기

파일: agent.py 프로젝트: s-arora-1987/sorting_patrol_MDP_irl

class RandomAgent(Agent):
    def __init__(self, actions):
        self._action_distr = NumMap()
        for a in actions:
            self._action_distr[a] = 1.0
        self._action_distr = self._action_distr.normalize()

    def actions(self, state):
        """Returns a function from actions -> [0,1] probability of
        performing action in state"""
        return self._action_distr

예제 #3

0

파일 보기

파일: model.py 프로젝트: s-arora-1987/sorting_patrol_MDP_irl

    def T(self,state,action):
        """Returns a function state -> [0,1] for probability of next state
        given currently in state performing action"""
        result = NumMap()
        s_p = action.apply(state)
        if not self.is_legal(s_p):
            result[state] = 1
        else:
            result[state] = self._p_fail
            result[s_p] = 1 - self._p_fail  

        return result

예제 #4

0

파일 보기

파일: agent.py 프로젝트: duckworthd/inverse-reinforcement-learning

class RandomAgent(Agent):
    
    def __init__(self, actions):
        self._action_distr = NumMap()
        for a in actions:
            self._action_distr[a] = 1.0
        self._action_distr = self._action_distr.normalize()
    
    def actions(self, state):
        """Returns a function from actions -> [0,1] probability of
        performing action in state"""
        return self._action_distr

예제 #5

0

파일 보기

파일: agent.py 프로젝트: s-arora-1987/sorting_patrol_MDP_irl

    def __init__(self, Q):
        '''
        Q: a map (state,action) -> [0,1]
        '''
        # build a two-stage dictionary for quick processing
        states = set([s for (s, a) in Q])
        actions = set([a for (s, a) in Q])

        QQ = {}
        for s in states:
            QQ[s] = NumMap()
        for s in states:
            for a in actions:
                if (s, a) in Q:
                    QQ[s][a] = Q[(s, a)]
        self._Q = QQ

예제 #6

0

파일 보기

 def T(self, state, action):
     """Returns a function state -> [0,1] for probability of next state
     given currently in state performing action"""
     result = NumMap()
     actions = self.A(state)
     for a in actions:
         p = 0
         if a == action:
             p = 1 - self._p_fail
         else:
             p = self._p_fail / (len(actions) - 1)
         s_p = a.apply(state)
         if not self.is_legal(s_p):
             result[state] += p
         else:
             result[s_p] += p
     return result

예제 #7

0

파일 보기

파일: model.py 프로젝트: s-arora-1987/sorting_patrol_MDP_irl

 def T(self,state,action):
     """Returns a function state -> [0,1] for probability of next state
     given currently in state performing action"""
     result = NumMap()
     actions = self.A(state)
     # unfortunately have to assume we're using attackerStates here
     newState = AttackerState(state.location, state.time + 1)
     for a in actions:
         p = 0
         if a == action:
             p = 1 - self._p_fail
         else:
             p = self._p_fail / ( len(actions)-1 )
         s_p = a.apply(state)
         if not self.is_legal(s_p):
             result[newState] += p
         else:
             result[s_p] += p  
     return result

예제 #8

0

파일 보기

def init():

    global pub
    global all_stop_pub
    global pub_status
    global model
    global policy
    global lastpublish
    global savedPose
    global amclmessageReceived
    global ground_truth
    global add_delay
    global otherPose
    global lastGoal
    global go
    global maxsightdistance
    global dr_client
    global home
    global timestep
    global current_cg, curr_st, next_st

    rospy.init_node('patrolcontrol')

    global recordConvTraj, useRegions
    recordConvTraj = 0
    useRegions = 0
    recordConvTraj = int(rospy.get_param("~recordConvTraj"))
    observableStates = int(rospy.get_param("~observableStates"))
    useRegions = int(rospy.get_param("~useRegions"))
    data_log = str(rospy.get_param("~dataLog"))

    rospy.Subscriber("amcl_pose", PoseWithCovarianceStamped, handle_pose)
    pub = rospy.Publisher("move_base_simple/goal", PoseStamped)

    all_stop_pub = rospy.Publisher("all_stop_enabled", std_msgs.msg.Bool)

    initialpose = rospy.Publisher("initialpose", PoseWithCovarianceStamped)
    rospy.Subscriber("base_pose_ground_truth", Odometry, save_ground_truth)

    pub_status = rospy.Publisher("/study_attacker_status", String, latch=True)

    global skip_interaction
    if not skip_interaction:
        if add_delay:
            rospy.Subscriber("/robot_0/base_pose_ground_truth", Odometry,
                             handle_other)
        else:
            rospy.Subscriber("/robot_1/base_pose_ground_truth", Odometry,
                             handle_other)

    random.seed()
    np.random.seed()

    ## Initialize constants

    p_fail = 0.05
    longHallway = 10
    shortSides = 2
    patrolAreaSize = longHallway + shortSides + shortSides
    observableStateLow = patrolAreaSize / 2 - 1
    observableStateHigh = patrolAreaSize / 2
    observableStateLow = 0
    observableStateHigh = patrolAreaSize

    # calculate farness for each node in the patrolled area
    farness = np.zeros(patrolAreaSize)
    for i in range(patrolAreaSize):
        sum = 0
        for j in range(patrolAreaSize):
            sum += abs(i - j)

        farness[i] = sum

    global m
    global ogmap
    current_cg = None
    ## Create reward function

    if (m == "boydright"):
        mapparams = boydrightMapParams(False)
        ogmap = OGMap(*mapparams)
        ## Create Model
        model = patrol.model.PatrolModel(p_fail, None, ogmap.theMap())
        model.gamma = 0.9

    elif m == "largeGridPatrol":
        current_cg = 4
        mapparams = largeGridMapParams(False)
        ogmap = OGMap(*mapparams)
        ## Create Model
        # 		model = patrol.model.PatrolModel(p_fail, PatrolState(np.array( [6, 8, 0] )), ogmap.theMap())
        model = patrol.model.PatrolExtendedModel(0.01, PatrolExtendedState\
                  (np.array( [-1,-1, -1] ),0), \
                  ogmap.theMap())
        model.gamma = 0.9

    elif (m == "boydright2"):
        mapparams = boydright2MapParams(False)

        ogmap = OGMap(*mapparams)

        ## Create Model

        model = patrol.model.PatrolModel(p_fail, None, ogmap.theMap())

        model.gamma = 0.9

    else:
        mapparams = boyd2MapParams(False)
        ogmap = OGMap(*mapparams)
        ## Create Model
        model = patrol.model.PatrolModel(p_fail, None, ogmap.theMap())
        model.gamma = 0.9

    # Call external solver here instead of Python based
    args = [
        "boydsimple_t",
    ]
    import subprocess

    p = subprocess.Popen(args,
                         stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)

    stdin = m + "\n"
    stdin += "1.0\n"

    (transitionfunc, stderr) = p.communicate(stdin)

    args = [
        "boydpatroller",
    ]
    p = subprocess.Popen(args,
                         stdin=subprocess.PIPE,
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)

    stdin = m + "\n"
    stdin += str(useRegions) + "\n"

    stdin += transitionfunc

    f = open(home + "/patrolstudy/toupload/patrollerT.log", "w")
    f.write(stdin)
    f.close()

    (stdout, stderr) = p.communicate(stdin)
    print("\n computed patroller's policy")

    if m == "largeGridPatrol":
        correctpolicy = home + "/patrolstudy/largeGridpolicy_mdppatrol"
    if m == "boydright":
        correctpolicy = home + "/patrolstudy/boydright_policy"
    if m == "boydright2":
        correctpolicy = home + "/patrolstudy/boydright2_policy"
    if m == "boyd2":
        correctpolicy = home + "/patrolstudy/boydpolicy_mdppatrolcontrol"

    # stdout now needs to be parsed into a hash of state => action, which is then sent to map agent
# 	correctpolicy = "/home/saurabh/patrolstudy/largeGridpolicy_mdppatrol"
    f = open(correctpolicy, "w")
    f.write("")
    f.close()

    f = open(correctpolicy, "w")
    outtraj = ""
    p = {}
    stateactions = stdout.split("\n")
    for stateaction in stateactions:
        temp = stateaction.split(" = ")
        if len(temp) < 2: continue
        state = temp[0]
        action = temp[1]

        outtraj += state + " = " + action + "\n"

        # 		ps = patrol.model.PatrolState(np.array([int(pieces[0]), int(pieces[1]), int(pieces[2])]))
        if m == "largeGridPatrol":
            state = state.split(";")
            loc = state[0]
            current_goal = int(state[1])
            loc = loc[1:len(loc) - 1]
            pieces = loc.split(",")
            location = np.array(
                [int(pieces[0]),
                 int(pieces[1]),
                 int(pieces[2])])
            ps = patrol.model.PatrolExtendedState(location, current_goal)

        else:
            state = state[1:len(state) - 1]
            pieces = state.split(",")
            ps = patrol.model.PatrolState(
                np.array([int(pieces[0]),
                          int(pieces[1]),
                          int(pieces[2])]))

        if action == "MoveForwardAction":
            a = patrol.model.PatrolActionMoveForward()
        elif action == "TurnLeftAction":
            a = patrol.model.PatrolActionTurnLeft()
        elif action == "TurnAroundAction":
            a = patrol.model.PatrolActionTurnAround()
        elif action == "TurnRightAction":
            a = patrol.model.PatrolActionTurnRight()
        else:
            a = patrol.model.PatrolActionStop()

        p[ps] = a

    f.write(outtraj)
    f.close()

    #  	pub_status.publish(String("abortrun"))

    from mdp.agent import MapAgent
    policy = MapAgent(p)

    # 	sample trajectories for m == "largeGrid":
    global recordConvTraj
    if m == "largeGridPatrol" and recordConvTraj == 1:
        from util.classes import NumMap
        state_dict = {}
        for s in model.S():
            if s.location[0] <= 7 and s.location[1] <= 1:
                state_dict[s] = 1.0
# 		state_dict = { PatrolState([0,0,0]):1.0 }
        initial = NumMap(state_dict).normalize()
        # 		data_log = "/home/saurabh/patrolstudy/toupload/recd_convertedstates_14_14_200states_LGR.log"
        f = open(data_log, "w")
        from mdp.simulation import sample_traj
        n_samples = 45
        t_max = 50
        outtraj = ""
        # 		outtraj2 = "observableStates: "
        # 		outtraj2 += str(observableStates)
        # 		s = ogmap.toState((6, 0, 0), False)
        # 		s = PatrolState(np.array( [6, 0, 0] ))
        # 		outtraj2 += str(largeGridisvisible(s, observableStates))
        # 		pub_status.publish(String("datacollected"))

        pair_ct = 0
        for i in range(n_samples):
            traj_list = sample_traj(model, t_max, initial, policy)
            for (s, a, s_p) in traj_list:
                # 				outtraj2 += str(s.location[0:3])
                # 				outtraj2 += str(largeGridisvisible(s, observableStates))+"\n"
                if largeGridisvisible(s, observableStates):
                    s = s.location[0:3]
                    # print(s)
                    # print(sap[1].__class__.__name__)
                    outtraj += str(s[0]) + "," + str(s[1]) + "," + str(
                        s[2]) + ","
                    if a.__class__.__name__ == "PatrolActionMoveForward":
                        outtraj += "PatrolActionMoveForward"
                    elif a.__class__.__name__ == "PatrolActionTurnLeft":
                        outtraj += "PatrolActionTurnLeft"
                    elif a.__class__.__name__ == "PatrolActionTurnRight":
                        outtraj += "PatrolActionTurnRight"
                    elif a.__class__.__name__ == "PatrolActionTurnAround":
                        outtraj += "PatrolActionTurnAround"
                    else:
                        outtraj += "PatrolActionStop"
                else:
                    outtraj += "None"

                outtraj += "\n"
                pair_ct += 1
# 			outtraj2 += "\n"
            if pair_ct >= 200:
                outtraj += "ENDREC\n"
                pair_ct = 0

        f.write(outtraj)
        # 		f.write(outtraj2)
        f.close()

#	policy = mdp.solvers.PolicyIteration(30, mdp.solvers.IteratingPolicyEvaluator2(.5)).solve(model)
#	policy = mdp.solvers.PolicyIteration(30, mdp.solvers.IteratingPolicyEvaluator(20)).solve(model)
#	policy = mdp.solvers.ValueIteration(.5).solve(model)
    print("Finished solving mdp")

    has_slowed_down = False
    dr_client = dynamic_reconfigure.client.Client(
        "move_base_node/TrajectoryPlannerROS")
    print("dr_client initialized")

    r = rospy.Rate(8)
    while not rospy.is_shutdown():

        # 		print "savedPose is not None and otherPose is not None and canSee(savedPose, otherPose)"
        # 		print (savedPose is not None and otherPose is not None and canSee(savedPose, otherPose))
        if m == "largeGridPatrol":
            print current_cg

        if savedPose is not None and otherPose is not None and canSee(
                savedPose, otherPose):

            if not add_delay:
                params = {'max_vel_x': 0.2}
                config = dr_client.update_configuration(params)
                has_slowed_down = True

            if not add_delay and lastGoal is not None:

                a = otherPose.orientation

                angles2 = tf.transformations.euler_from_quaternion(
                    [a.x, a.y, a.z, a.w])

                if (angles2[2] < 0):
                    angles2 = (0, 0, 2 * math.pi + angles2[2])
                otherState = ogmap.toState(
                    (otherPose.position.x, otherPose.position.y, angles2[2]),
                    False)

                if lastGoal.conflicts(otherState):
                    #					if rospy.get_time() - lastpublish >= timestep:
                    lg = lastGoal
                    while (lg.conflicts(otherState)):
                        if m == "largeGridPatrol":
                            curr_st = PatrolExtendedState(
                                lg.location, current_cg)
                            # print "(lg.conflicts(otherState)) next_st = policy.actions(curr_st).keys()[0].apply(curr_st)"
                            next_st = policy.actions(curr_st).keys()[0].apply(
                                curr_st)
                            lg = PatrolState(next_st.location)
                        else:
                            lg = policy.actions(lg).keys()[0].apply(lg)

                    sendGoal(lg)
                else:
                    nextGoal()

            if add_delay and go:

                go = False

                topub = std_msgs.msg.Bool()
                topub.data = True

                all_stop_pub.publish(topub)

        elif savedPose is not None:

            if otherPose is not None and not canSee(
                    savedPose, otherPose) and has_slowed_down:
                params = {'max_vel_x': 0.5}
                config = dr_client.update_configuration(params)
                has_slowed_down = False


# 			print "nextGoal() in elif savedPose is not None"
            nextGoal()

        time.sleep(0.25)

예제 #9

0

파일 보기

파일: agent.py 프로젝트: s-arora-1987/sorting_patrol_MDP_irl

 def __init__(self, actions):
     self._action_distr = NumMap()
     for a in actions:
         self._action_distr[a] = 1.0
     self._action_distr = self._action_distr.normalize()

예제 #10

0

파일 보기

파일: agent.py 프로젝트: s-arora-1987/sorting_patrol_MDP_irl

 def actions(self, state):
     """Returns a function from actions -> [0,1] probability of
     performing action in state"""
     result = NumMap()
     result[self._Q[state].argmax()] = 1.0
     return result

예제 #11

0

파일 보기

파일: agent.py 프로젝트: duckworthd/inverse-reinforcement-learning

 def __init__(self, actions):
     self._action_distr = NumMap()
     for a in actions:
         self._action_distr[a] = 1.0
     self._action_distr = self._action_distr.normalize()