def enumerateForBIRLsortingModel1(trajs): patroller = trajs[0] for sap in patroller: if (sap is not None): s = sap[0] if sap[1].__class__.__name__ == "InspectAfterPicking": test_act = InspectAfterPicking() elif sap[1].__class__.__name__ == "InspectWithoutPicking": test_act = InspectWithoutPicking() elif sap[1].__class__.__name__ == "Pick": test_act = Pick() elif sap[1].__class__.__name__ == "PlaceOnConveyor": test_act = PlaceOnConveyor() elif sap[1].__class__.__name__ == "PlaceInBin": test_act = PlaceInBin() elif sap[1].__class__.__name__ == "ClaimNewOnion": test_act = ClaimNewOnion() elif sap[1].__class__.__name__ == "ClaimNextInList": test_act = ClaimNextInList() else: print("can't enumerate ", sap[1]) # adding data for BIRL MLIRL inds = dict_stateEnum.keys()[dict_stateEnum.values().index(s)] f_st_BIRLcode.write(str(inds) + ",") inda = dict_actEnum.keys()[dict_actEnum.values().index(test_act)] f_ac_BIRLcode.write(str(inda) + ",") else: print("can't enumerate bcz sap is none ") f_st_BIRLcode.write("\n") f_ac_BIRLcode.write("\n") return
def parse_sorting_policy(buf): # stdout now needs to be parsed into a hash of state => action, which is then sent to mapagent p = {} stateactions = buf.split("\n") for stateaction in stateactions: temp = stateaction.split(" = ") if len(temp) < 2: continue state = temp[0] action = temp[1] state = state[1:len(state) - 1] pieces = state.split(",") ss = sortingState(int(pieces[0]), int(pieces[1]), int(pieces[2]), int(pieces[3])) if action == "InspectAfterPicking": act = InspectAfterPicking() elif action == "InspectWithoutPicking": act = InspectWithoutPicking() elif action == "Pick": act = Pick() elif action == "PlaceOnConveyor": act = PlaceOnConveyor() elif action == "PlaceInBin": act = PlaceInBin() elif action == "ClaimNewOnion": act = ClaimNewOnion() elif action == "ClaimNextInList": act = ClaimNextInList() elif action == "PlaceInBinClaimNextInList": act = PlaceInBinClaimNextInList() else: print("Invalid input policy to parse_sorting_policy") exit(0) p[ss] = act # print("parsed ss {} a {}".format(ss,act)) from mdp.agent import MapAgent return MapAgent(p)
def saveDataForBaseline(): ############################################################# # BIRL input data for checking if problem is method ############################################################# sortingMDP = model for s in sortingMDP.S(): dummy_states.append(s) dummy_states.append(sortingState(-1, -1, -1, -1)) ind = 0 for s in dummy_states: ind = ind + 1 dict_stateEnum[ind] = s print("dict_stateEnum \n", dict_stateEnum) acts = [InspectAfterPicking(),PlaceOnConveyor(),PlaceInBin(),\ Pick(),ClaimNewOnion(),InspectWithoutPicking(),ClaimNextInList()] ind = 0 for a in acts: ind = ind + 1 dict_actEnum[ind] = a # record first trajectory in data for single task BIRL enumerateForBIRLsortingModel1(traj) f_st_BIRLcode.close() f_ac_BIRLcode.close() f_TM_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/transition_matrix.txt", "w") f_TM_BIRLcode.write("") f_TM_BIRLcode.close() tuple_res = sortingMDP.generate_matrix(dict_stateEnum, dict_actEnum) dict_tr = tuple_res[0] f_TM_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/transition_matrix.txt", "a") for ind1 in range(1, len(dict_actEnum) + 1): acArray2d = np.empty((len(dict_stateEnum), len(dict_stateEnum))) for ind2 in range(1, len(dict_stateEnum) + 1): for ind3 in range(1, len(dict_stateEnum) + 1): acArray2d[ind3 - 1][ind2 - 1] = dict_tr[ind1][ind3][ind2] for ind3 in range(1, len(dict_stateEnum) + 1): for ind2 in range(1, len(dict_stateEnum) + 1): f_TM_BIRLcode.write(str(acArray2d[ind3 - 1][ind2 - 1]) + ",") f_TM_BIRLcode.write("\n") f_TM_BIRLcode.write("\n") f_TM_BIRLcode.close() f_Phis_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/features_matrix.txt", "w") f_Phis_BIRLcode.write("") f_Phis_BIRLcode.close() f_Phis_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/features_matrix.txt", "a") for inda in range(1, len(dict_actEnum) + 1): a = dict_actEnum[inda] for inds in range(1, len(dict_stateEnum) + 1): s = dict_stateEnum[inds] arraysPhis = sortingReward.features(s, a) for indk in range(1, len(arraysPhis) + 1): f_Phis_BIRLcode.write(str(arraysPhis[indk - 1]) + ",") f_Phis_BIRLcode.write("\n") f_Phis_BIRLcode.write("\n") f_Phis_BIRLcode.close() wts_experts_array = np.empty( (sortingReward._dim, len(np.unique(true_assignments)))) j = 0 for wt_ind in np.unique(true_assignments): for i in range(0, wts_experts_array.shape[0]): wts_experts_array[i][j] = List_TrueWeights[wt_ind][i] j += 1 f_wts_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/weights_experts.log", "w") f_wts_BIRLcode.write("") f_wts_BIRLcode.close() f_wts_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/weights_experts.log", "a") for i in range(0, wts_experts_array.shape[0]): for e in range(0, wts_experts_array.shape[1]): f_wts_BIRLcode.write(str(wts_experts_array[i][e]) + ",") f_wts_BIRLcode.write("\n") f_wts_BIRLcode.close()
def computeLBA(fileTruePolicy, model, mapAgentLrndPolicy): # read and compare policies using dictionaries f = open(fileTruePolicy, "r") truePol = {} for stateaction in f: temp = stateaction.strip().split(" = ") if len(temp) < 2: continue state = temp[0] action = temp[1] state = state[1:len(state) - 1] pieces = state.split(",") ss = (int(pieces[0]), int(pieces[1]), int(pieces[2]), int(pieces[3])) if action == "InspectAfterPicking": act = InspectAfterPicking() elif action == "InspectWithoutPicking": act = InspectWithoutPicking() elif action == "Pick": act = Pick() elif action == "PlaceOnConveyor": act = PlaceOnConveyor() elif action == "PlaceInBin": act = PlaceInBin() elif action == "ClaimNewOnion": act = ClaimNewOnion() elif action == "ClaimNextInList": act = ClaimNextInList() elif action == "Pickpip": act = Pickpip() elif action == "PlaceInBinpip": act = PlaceInBinpip() else: print("Invalid input policy to parse_sorting_policy") exit(0) truePol[ss] = act # print("number of keys for truePolicy ", len(truePol)) # print("number of keys in leaerned policy ",len(mapAgentLrndPolicy._policy)) # print("number of states in model ",len(model.S())) f.close() totalsuccess = 0 totalstates = 0 if (mapAgentLrndPolicy.__class__.__name__ == "MapAgent"): for s in model.S(): if s in mapAgentLrndPolicy._policy: # check key existence # print("number of actions in current state in learned policy",len(mapAgentLrndPolicy.actions(state).keys())) action = mapAgentLrndPolicy.actions(s).keys()[0] # action_name = action.__class__.__name__ # print("action_name ",action_name) ss2 = (int(s._onion_location),int(s._prediction),\ int(s._EE_location),int(s._listIDs_status)) if ss2 in truePol.keys(): totalstates += 1 if (truePol[ss2] == action): # print("found a matching action") totalsuccess += 1 # else: # print("for state {}, action {} neq action {} ".format(ss2,action,truePol[ss2])) print("totalstates, totalsuccess: " + str(totalstates) + ", " + str(totalsuccess)) if float(totalstates) == 0: print("Error: states in two policies are different") return 0 lba = float(totalsuccess) / float(totalstates) return lba
def parsePolicies(stdout, lineFoundWeights, lineFeatureExpec, \ learned_weights, num_Trajsofar, BatchIRLflag): if stdout is None: print("no stdout in parse policies") stateactions = stdout.split("\n") #print("\n parse Policies from contents:") #print(stateactions) counter = 0 p = {} for stateaction in stateactions: counter += 1 if stateaction == "ENDPOLICY": break temp = stateaction.split(" = ") if len(temp) < 2: continue state = temp[0] action = temp[1] state = state[1:len(state) - 1] pieces = state.split(",") ss = sortingState(int(pieces[0]), int(pieces[1]), int(pieces[2]), int(pieces[3])) # print((state,pieces,ss)) if action == "InspectAfterPicking": act = InspectAfterPicking() elif action == "InspectWithoutPicking": act = InspectWithoutPicking() elif action == "Pick": act = Pick() elif action == "PlaceOnConveyor": act = PlaceOnConveyor() elif action == "PlaceInBin": act = PlaceInBin() elif action == "ClaimNewOnion": act = ClaimNewOnion() elif action == "ClaimNextInList": act = ClaimNextInList() elif action == "PlaceInBinClaimNextInList": act = PlaceInBinClaimNextInList() else: print("Invalid input policy to parse_sorting_policy") exit(0) p[ss] = act returnval = [mdp.agent.MapAgent(p)] sessionFinish = True if len(stateactions[counter:]) > 0 and BatchIRLflag == False: # this change is not reflected in updatewithalg sessionFinish = True # print("\n sessionFinish = True")#results after i2rl session at time: "+str(rospy.Time.now().to_sec())) # file = open("/home/saurabh/patrolstudy/i2rl_troubleshooting/I2RLOPread_rosctrl.txt","r") lineFoundWeights = stateactions[counter] counter += 1 global reward_dim print(lineFoundWeights[1:-1].split(", ")) stripped_weights = lineFoundWeights[1:-1].split(", ") learned_weights = [float(x) for x in stripped_weights] # print("lineFoundWeights:"+lineFoundWeights) lineFeatureExpec = stateactions[counter] counter += 1 num_Trajsofar = int(stateactions[counter].split("\n")[0]) counter += 1 elif len(stateactions[counter:]) == 0: lineFoundWeights = lineFoundWeights lineFeatureExpec = lineFeatureExpec num_Trajsofar = num_Trajsofar sessionFinish = False print("\n no results from i2rl session") return (returnval, lineFoundWeights, lineFeatureExpec, \ learned_weights, num_Trajsofar, sessionFinish)