Python StateActionRankingTeacher示例，src.machine_teaching.StateActionRankingTeacher Python示例

示例#1

0

显示文件

文件： value_alignment_verification.py 项目： dsbrown1331/value_alignment_verification

    def __init__(self,
                 mdp_world,
                 Q,
                 opt_policy,
                 precision,
                 debug=False,
                 use_suboptimal_rankings=False,
                 epsilon_gap=0.0,
                 teacher=None,
                 tests=None,
                 halfspaces=None):
        self.mdp_world = mdp_world
        self.precision = precision
        self.debug = debug
        self.epsilon_gap = epsilon_gap
        if teacher:
            teacher = teacher
        else:
            teacher = machine_teaching.StateActionRankingTeacher(
                mdp_world, Q, opt_policy, debug=self.debug, epsilon=precision)

        #TODO: we don't need the tests, just the halfspaces, but we do need to know which are equality
        if tests and halfspaces:
            tests, self.halfspaces = tests, halfspaces
        else:
            tests, self.halfspaces = teacher.get_optimal_value_alignment_tests(
                use_suboptimal_rankings=False,
                compare_optimal=False,
                epsilon_gap=self.epsilon_gap)

        #for now let's just select the first question for each halfspace
        self.test = [questions[0] for questions in tests]

示例#2

0

显示文件

文件： value_alignment_verification.py 项目： dsbrown1331/value_alignment_verification

    def __init__(self,
                 mdp_world,
                 Q,
                 opt_policy,
                 precision,
                 debug=False,
                 remove_redundancy_lp=True,
                 teacher=None,
                 tests=None,
                 halfspaces=None):
        self.mdp_world = mdp_world
        self.precision = precision
        self.debug = debug
        self.q_values = Q  #mdp.compute_q_values(mdp_world, eps = precision)
        self.optimal_policy = opt_policy  #mdp.find_optimal_policy(mdp_world, Q=self.q_values, epsilon=precision)

        if teacher:
            teacher = teacher
        else:
            teacher = machine_teaching.StateActionRankingTeacher(
                mdp_world, Q, opt_policy, debug=self.debug, epsilon=precision)

        #TODO: we don't need the tests, just the halfspaces, but we do need to know which are equality
        if tests and halfspaces:
            self.tests, self.halfspaces = tests, halfspaces
        else:
            self.tests, self.halfspaces = teacher.get_optimal_value_alignment_tests(
                use_suboptimal_rankings=False, compare_optimal=False)

        # teacher = machine_teaching.StateActionRankingTeacher(mdp_world, Q, opt_policy, debug=self.debug, remove_redundancy_lp = remove_redundancy_lp, epsilon=precision)

        # self.tests, self.halfspaces = teacher.get_optimal_value_alignment_tests(use_suboptimal_rankings = False)

        #for now let's just select the first question for each halfspace
        self.test = [questions[0] for questions in self.tests]

示例#3

0

显示文件

文件： value_alignment_verification_gym.py 项目： dsbrown1331/value_alignment_verification

    def __init__(self, mdp_world, precision, debug=False, remove_redundancy_lp = True):
        self.mdp_world = mdp_world
        self.precision = precision
        self.debug = debug
        self.q_values = mdp.compute_q_values(mdp_world, eps = precision)
        self.optimal_policy = mdp.find_optimal_policy(mdp_world, Q=self.q_values, epsilon=precision)

        teacher = machine_teaching.StateActionRankingTeacher(mdp_world, debug=self.debug, remove_redundancy_lp = remove_redundancy_lp, epsilon=precision)

        tests, _ = teacher.get_optimal_value_alignment_tests(use_suboptimal_rankings = False)

        #for now let's just select the first question for each halfspace
        self.test = [questions[0] for questions in tests]

示例#4

0

显示文件

文件： value_alignment_verification.py 项目： dsbrown1331/value_alignment_verification

    def __init__(self, mdp_world, precision, debug=False):
        self.mdp_world = mdp_world
        self.precision = precision
        self.debug = debug
        teacher = machine_teaching.StateActionRankingTeacher(mdp_world,
                                                             debug=self.debug,
                                                             epsilon=precision)

        tests, _ = teacher.get_optimal_value_alignment_tests(
            use_suboptimal_rankings=False)

        #for now let's just select the first question for each halfspace
        self.test = [questions[0] for questions in tests]

示例#5

0

显示文件

                    eval_weights.append(eval_weight_vector)
                    num_eval_policies += 1

            print("There are {} distinct optimal policies".format(
                len(eval_policies)))
            if len(eval_policies) == 0:
                print(
                    "The only possible policy is the optimal policy. There must be a problem with the features. Can't do verification if only on policy possible!"
                )
                sys.exit()

            print()
            print("Generating verification tests")

            #TODO: save computation by solving for halfspaces once for ARP-w and ARP-bb
            teacher = machine_teaching.StateActionRankingTeacher(
                true_world, Qopt, opt_policy, debug=debug, epsilon=precision)

            #TODO: we don't need the tests, just the halfspaces, but we do need to know which are equality
            tests, halfspaces = teacher.get_optimal_value_alignment_tests(
                use_suboptimal_rankings=False, compare_optimal=False)

            for vindx, verifier_name in enumerate(verifier_list):
                tester = None
                size_verification_test = None

                if "state-value-critical-" in verifier_name:
                    critical_value_thresh = float(
                        verifier_name[len("state-value-critical-"):])
                    #print("critical value", critical_value_thresh)
                    tester = ah.CriticalStateActionValueVerifier(
                        true_world,

示例#6

0

显示文件

    eval_policy = mdp.find_optimal_policy(rand_world,
                                          Q=Qval,
                                          epsilon=precision)
    #only save if not equal to optimal policy
    if eval_policy not in eval_policies:
        if debug:
            print("found distinct eval policy")
            print("weights", eval_weight_vector)

            rand_world.print_map(rand_world.to_arrows(eval_policy))

        eval_policies.append(eval_policy)
        eval_Qvalues.append(Qval)
        eval_weights.append(eval_weight_vector)
        teacher = machine_teaching.StateActionRankingTeacher(rand_world,
                                                             debug=False,
                                                             epsilon=precision)

        tests, halfspaces = teacher.get_optimal_value_alignment_tests(
            use_suboptimal_rankings=False)
        eval_halfspaces.append(halfspaces)
        print(halfspaces)

        #add all the normal vectors to a big list for getting edges later
        for h in halfspaces:
            all_halfspaces.append(h)

        num_eval_policies += 1

print("There are {} distinct optimal policies when sampling randomly".format(
    len(eval_policies)))