def enumerateForBIRLsortingModel1(trajs):

    patroller = trajs[0]
    for sap in patroller:
        if (sap is not None):
            s = sap[0]

            if sap[1].__class__.__name__ == "InspectAfterPicking":
                test_act = InspectAfterPicking()

            elif sap[1].__class__.__name__ == "InspectWithoutPicking":
                test_act = InspectWithoutPicking()

            elif sap[1].__class__.__name__ == "Pick":
                test_act = Pick()

            elif sap[1].__class__.__name__ == "PlaceOnConveyor":
                test_act = PlaceOnConveyor()

            elif sap[1].__class__.__name__ == "PlaceInBin":
                test_act = PlaceInBin()

            elif sap[1].__class__.__name__ == "ClaimNewOnion":
                test_act = ClaimNewOnion()

            elif sap[1].__class__.__name__ == "ClaimNextInList":
                test_act = ClaimNextInList()

            else:
                print("can't enumerate ", sap[1])

            # adding data for BIRL MLIRL
            inds = dict_stateEnum.keys()[dict_stateEnum.values().index(s)]
            f_st_BIRLcode.write(str(inds) + ",")
            inda = dict_actEnum.keys()[dict_actEnum.values().index(test_act)]
            f_ac_BIRLcode.write(str(inda) + ",")

        else:
            print("can't enumerate bcz sap is none ")

        f_st_BIRLcode.write("\n")
        f_ac_BIRLcode.write("\n")

    return
def parse_sorting_policy(buf):
    # stdout now needs to be parsed into a hash of state => action, which is then sent to mapagent
    p = {}
    stateactions = buf.split("\n")
    for stateaction in stateactions:
        temp = stateaction.split(" = ")
        if len(temp) < 2: continue
        state = temp[0]
        action = temp[1]

        state = state[1:len(state) - 1]
        pieces = state.split(",")

        ss = sortingState(int(pieces[0]), int(pieces[1]), int(pieces[2]),
                          int(pieces[3]))

        if action == "InspectAfterPicking":
            act = InspectAfterPicking()
        elif action == "InspectWithoutPicking":
            act = InspectWithoutPicking()
        elif action == "Pick":
            act = Pick()
        elif action == "PlaceOnConveyor":
            act = PlaceOnConveyor()
        elif action == "PlaceInBin":
            act = PlaceInBin()
        elif action == "ClaimNewOnion":
            act = ClaimNewOnion()
        elif action == "ClaimNextInList":
            act = ClaimNextInList()
        elif action == "PlaceInBinClaimNextInList":
            act = PlaceInBinClaimNextInList()
        else:
            print("Invalid input policy to parse_sorting_policy")
            exit(0)

        p[ss] = act
        # print("parsed ss {} a {}".format(ss,act))

    from mdp.agent import MapAgent
    return MapAgent(p)
def saveDataForBaseline():

    #############################################################
    # BIRL input data for checking if problem is method
    #############################################################

    sortingMDP = model
    for s in sortingMDP.S():
        dummy_states.append(s)
    dummy_states.append(sortingState(-1, -1, -1, -1))

    ind = 0
    for s in dummy_states:
        ind = ind + 1
        dict_stateEnum[ind] = s
    print("dict_stateEnum \n", dict_stateEnum)

    acts = [InspectAfterPicking(),PlaceOnConveyor(),PlaceInBin(),\
    Pick(),ClaimNewOnion(),InspectWithoutPicking(),ClaimNextInList()]
    ind = 0
    for a in acts:
        ind = ind + 1
        dict_actEnum[ind] = a

    # record first trajectory in data for single task BIRL
    enumerateForBIRLsortingModel1(traj)

    f_st_BIRLcode.close()
    f_ac_BIRLcode.close()

    f_TM_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/transition_matrix.txt",
                         "w")
    f_TM_BIRLcode.write("")
    f_TM_BIRLcode.close()
    tuple_res = sortingMDP.generate_matrix(dict_stateEnum, dict_actEnum)
    dict_tr = tuple_res[0]
    f_TM_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/transition_matrix.txt",
                         "a")
    for ind1 in range(1, len(dict_actEnum) + 1):
        acArray2d = np.empty((len(dict_stateEnum), len(dict_stateEnum)))

        for ind2 in range(1, len(dict_stateEnum) + 1):
            for ind3 in range(1, len(dict_stateEnum) + 1):
                acArray2d[ind3 - 1][ind2 - 1] = dict_tr[ind1][ind3][ind2]

        for ind3 in range(1, len(dict_stateEnum) + 1):
            for ind2 in range(1, len(dict_stateEnum) + 1):
                f_TM_BIRLcode.write(str(acArray2d[ind3 - 1][ind2 - 1]) + ",")
            f_TM_BIRLcode.write("\n")
        f_TM_BIRLcode.write("\n")

    f_TM_BIRLcode.close()

    f_Phis_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/features_matrix.txt",
                           "w")
    f_Phis_BIRLcode.write("")
    f_Phis_BIRLcode.close()
    f_Phis_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/features_matrix.txt",
                           "a")
    for inda in range(1, len(dict_actEnum) + 1):
        a = dict_actEnum[inda]
        for inds in range(1, len(dict_stateEnum) + 1):
            s = dict_stateEnum[inds]
            arraysPhis = sortingReward.features(s, a)
            for indk in range(1, len(arraysPhis) + 1):
                f_Phis_BIRLcode.write(str(arraysPhis[indk - 1]) + ",")
            f_Phis_BIRLcode.write("\n")
        f_Phis_BIRLcode.write("\n")
    f_Phis_BIRLcode.close()

    wts_experts_array = np.empty(
        (sortingReward._dim, len(np.unique(true_assignments))))
    j = 0
    for wt_ind in np.unique(true_assignments):
        for i in range(0, wts_experts_array.shape[0]):
            wts_experts_array[i][j] = List_TrueWeights[wt_ind][i]
        j += 1

    f_wts_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/weights_experts.log",
                          "w")
    f_wts_BIRLcode.write("")
    f_wts_BIRLcode.close()
    f_wts_BIRLcode = open(get_home() + "/BIRL_MLIRL_data/weights_experts.log",
                          "a")
    for i in range(0, wts_experts_array.shape[0]):
        for e in range(0, wts_experts_array.shape[1]):
            f_wts_BIRLcode.write(str(wts_experts_array[i][e]) + ",")
        f_wts_BIRLcode.write("\n")
    f_wts_BIRLcode.close()
def computeLBA(fileTruePolicy, model, mapAgentLrndPolicy):
    # read and compare policies using dictionaries
    f = open(fileTruePolicy, "r")
    truePol = {}
    for stateaction in f:
        temp = stateaction.strip().split(" = ")
        if len(temp) < 2: continue
        state = temp[0]
        action = temp[1]
        state = state[1:len(state) - 1]
        pieces = state.split(",")

        ss = (int(pieces[0]), int(pieces[1]), int(pieces[2]), int(pieces[3]))

        if action == "InspectAfterPicking":
            act = InspectAfterPicking()
        elif action == "InspectWithoutPicking":
            act = InspectWithoutPicking()
        elif action == "Pick":
            act = Pick()
        elif action == "PlaceOnConveyor":
            act = PlaceOnConveyor()
        elif action == "PlaceInBin":
            act = PlaceInBin()
        elif action == "ClaimNewOnion":
            act = ClaimNewOnion()
        elif action == "ClaimNextInList":
            act = ClaimNextInList()
        elif action == "Pickpip":
            act = Pickpip()
        elif action == "PlaceInBinpip":
            act = PlaceInBinpip()
        else:
            print("Invalid input policy to parse_sorting_policy")
            exit(0)

        truePol[ss] = act

    # print("number of keys for truePolicy ", len(truePol))
    # print("number of keys in leaerned policy ",len(mapAgentLrndPolicy._policy))
    # print("number of states in model ",len(model.S()))

    f.close()

    totalsuccess = 0
    totalstates = 0
    if (mapAgentLrndPolicy.__class__.__name__ == "MapAgent"):
        for s in model.S():
            if s in mapAgentLrndPolicy._policy:  # check key existence
                # print("number of actions in current state in learned policy",len(mapAgentLrndPolicy.actions(state).keys()))
                action = mapAgentLrndPolicy.actions(s).keys()[0]
                # action_name = action.__class__.__name__
                # print("action_name ",action_name)
                ss2 = (int(s._onion_location),int(s._prediction),\
                 int(s._EE_location),int(s._listIDs_status))

                if ss2 in truePol.keys():
                    totalstates += 1
                    if (truePol[ss2] == action):
                        # print("found a matching action")
                        totalsuccess += 1
                    # else:
                    # 	print("for state {},  action {} neq action {} ".format(ss2,action,truePol[ss2]))

    print("totalstates, totalsuccess: " + str(totalstates) + ", " +
          str(totalsuccess))
    if float(totalstates) == 0:
        print("Error: states in two policies are different")
        return 0
    lba = float(totalsuccess) / float(totalstates)

    return lba
def parsePolicies(stdout, lineFoundWeights, lineFeatureExpec, \
 learned_weights, num_Trajsofar, BatchIRLflag):

    if stdout is None:
        print("no stdout in parse policies")

    stateactions = stdout.split("\n")
    #print("\n parse Policies from contents:")
    #print(stateactions)
    counter = 0
    p = {}
    for stateaction in stateactions:
        counter += 1
        if stateaction == "ENDPOLICY":
            break
        temp = stateaction.split(" = ")
        if len(temp) < 2: continue
        state = temp[0]
        action = temp[1]

        state = state[1:len(state) - 1]
        pieces = state.split(",")
        ss = sortingState(int(pieces[0]), int(pieces[1]), int(pieces[2]),
                          int(pieces[3]))
        # print((state,pieces,ss))

        if action == "InspectAfterPicking":
            act = InspectAfterPicking()
        elif action == "InspectWithoutPicking":
            act = InspectWithoutPicking()
        elif action == "Pick":
            act = Pick()
        elif action == "PlaceOnConveyor":
            act = PlaceOnConveyor()
        elif action == "PlaceInBin":
            act = PlaceInBin()
        elif action == "ClaimNewOnion":
            act = ClaimNewOnion()
        elif action == "ClaimNextInList":
            act = ClaimNextInList()
        elif action == "PlaceInBinClaimNextInList":
            act = PlaceInBinClaimNextInList()
        else:
            print("Invalid input policy to parse_sorting_policy")
            exit(0)

        p[ss] = act

    returnval = [mdp.agent.MapAgent(p)]

    sessionFinish = True
    if len(stateactions[counter:]) > 0 and BatchIRLflag == False:
        # this change is not reflected in updatewithalg

        sessionFinish = True
        # print("\n sessionFinish = True")#results after i2rl session at time: "+str(rospy.Time.now().to_sec()))
        # file = open("/home/saurabh/patrolstudy/i2rl_troubleshooting/I2RLOPread_rosctrl.txt","r")
        lineFoundWeights = stateactions[counter]
        counter += 1
        global reward_dim

        print(lineFoundWeights[1:-1].split(", "))
        stripped_weights = lineFoundWeights[1:-1].split(", ")

        learned_weights = [float(x) for x in stripped_weights]

        # print("lineFoundWeights:"+lineFoundWeights)
        lineFeatureExpec = stateactions[counter]
        counter += 1

        num_Trajsofar = int(stateactions[counter].split("\n")[0])
        counter += 1

    elif len(stateactions[counter:]) == 0:
        lineFoundWeights = lineFoundWeights
        lineFeatureExpec = lineFeatureExpec
        num_Trajsofar = num_Trajsofar
        sessionFinish = False
        print("\n no results from i2rl session")

    return (returnval, lineFoundWeights, lineFeatureExpec, \
     learned_weights, num_Trajsofar, sessionFinish)