Пример #1
0
def dkt_multistep_single(n_concepts, n_trajectories, model_id, checkpoints,
                         horizon, use_mem):
    '''
    Compute the multistep error per step for either both random and expert policies.
    '''
    if not use_mem:
        model_list = []
        for chkpt in checkpoints:
            model = dmc.DynamicsModel(model_id,
                                      timesteps=horizon,
                                      load_checkpoint=False)
            model.load(chkpt)
            model_list.append(model)
        dkt = dmc.RnnStudentSimEnsemble(model_list)
    else:
        mem_array_list = []
        for chkpt in checkpoints:
            mem_arrays = np.load(chkpt)['mem_arrays']
            mem_array_list.append(mem_arrays)
        dkt = dmc.RnnStudentSimMemEnsemble(n_concepts, mem_array_list)

    concept_tree = cdg.ConceptDependencyGraph()
    concept_tree.init_default_tree(n_concepts)
    test_student = st.Student2(n_concepts, True)

    # for both policies
    policies = ['random', 'expert']

    # for horizons: 0,1,2,3,...,horizon
    errors = np.zeros((2, horizon + 1))

    for pol in six.moves.range(2):
        for i in six.moves.range(n_trajectories):
            curr_dkt = dkt.copy()
            # sample a real trajectory
            traj = dg.generate_student_sample(concept_tree,
                                              seqlen=horizon + 1,
                                              student=test_student,
                                              policy=policies[pol])
            for t in six.moves.range(horizon + 1):
                curr_action = st.make_student_action_vec(traj[t][0])
                curr_ob = traj[t][1]
                curr_probs = sanitize_probs(n_concepts,
                                            curr_dkt.sample_observations())

                # advance dkt with sampled observation
                sampled_ob = 1 if np.random.random() < curr_probs[
                    curr_action.concept] else 0
                curr_dkt.advance_simulator(curr_action, sampled_ob)

                # accumulate the error for this step
                errors[pol, t] += np.square(curr_ob -
                                            curr_probs[curr_action.concept])

    if False:
        six.print_('errors {}'.format(errors / n_trajectories))

    return errors / n_trajectories
Пример #2
0
def dkt_test_policies_rme(model_id, n_trajectories, r_type, policies, chkpt):
    '''
    Tests a given open loop policy for student2 n4 on the learned model.
    '''

    horizon = 6
    n_concepts = 4

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    student2 = st.Student2(n_concepts, transition_after)

    # load model from given file
    model = dmc.DynamicsModel(model_id=model_id,
                              timesteps=horizon,
                              load_checkpoint=False)
    model.load(chkpt)

    # create the model and simulators
    student = student2.copy()
    student.reset()
    student.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(student, dgraph)

    # initialize the shared dktcache across the trials
    dktcache = dict()

    num_policies = policies.shape[0]
    rewards = np.zeros((num_policies, ))
    traj_per_policy = n_trajectories

    for pix in six.moves.range(num_policies):
        pol = policies[pix, :]
        reward_acc = 0.0
        for t in six.moves.range(traj_per_policy):
            # make the model
            rnnmodel = dmc.RnnStudentSim(model)

            curr_state = DKTState(rnnmodel, sim, 1, horizon, r_type, dktcache,
                                  False)
            all_actions = curr_state.actions
            for i in range(horizon):
                curr_state = curr_state.perform(all_actions[pol[i]])
            reward_acc += curr_state.reward()
        rewards[pix] = reward_acc / traj_per_policy

    return rewards
Пример #3
0
def test_student_exact():
    '''
    MCTS is now working.
    The number of rollouts required to be optimal grows very fast as a function of the horizon.
    Still, even if not fully optimal, MCTS is an extremely good approximation.

    Default student with horizon 10 needs about 50 rollouts is good
    learn prob 0.15 student with horizon 40 needs about 150 rollouts is good; gets about 0.94 which is 0.02 off from 0.96
    '''
    import concept_dependency_graph as cdg
    from simple_mdp import create_custom_dependency
    r_type = DENSE
    n_concepts = 4
    learn_prob = 0.5
    horizon = 6
    n_rollouts = 50
    n_trajectories = 100
    n_jobs = 8
    traj_per_job = n_trajectories // n_jobs

    #dgraph = create_custom_dependency()

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
    student2 = st.Student2(n_concepts, transition_after)
    test_student = student2

    accs = Parallel(n_jobs=n_jobs)(delayed(test_student_exact_chunk)(
        traj_per_job, dgraph, test_student, horizon, n_rollouts, sparse_r)
                                   for _ in range(n_jobs))
    avg = sum(accs) / (n_jobs * traj_per_job)

    test_data = dg.generate_data(dgraph,
                                 student=test_student,
                                 n_students=1000,
                                 seqlen=horizon,
                                 policy='expert',
                                 filename=None,
                                 verbose=False)
    print('Number of jobs {}'.format(n_jobs))
    print('Trajectory per job {}'.format(traj_per_job))
    print('Average posttest true: {}'.format(expected_reward(test_data)))
    print('Average posttest mcts: {}'.format(avg))
Пример #4
0
def dkt_test_policy(model_id, horizon, n_trajectories, r_type, chkpt):
    '''
    Tests the uniformly random policy (behavior) for student2 n4 on the learned model.
    '''
    n_concepts = 4

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    student2 = st.Student2(n_concepts, transition_after)

    # load model from given file
    model = dmc.DynamicsModel(model_id=model_id,
                              timesteps=horizon,
                              load_checkpoint=False)
    model.load(chkpt)

    # create the model and simulators
    student = student2.copy()
    student.reset()
    student.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(student, dgraph)

    # initialize the shared dktcache across the trials
    dktcache = dict()

    reward_acc = 0.0

    for t in six.moves.range(n_trajectories):
        # make the model
        rnnmodel = dmc.RnnStudentSim(model)

        curr_state = DKTState(rnnmodel, sim, 1, horizon, r_type, dktcache,
                              False)
        all_actions = curr_state.actions
        for i in range(horizon):
            curr_state = curr_state.perform(random.choice(all_actions))
            reward_acc += curr_state.reward()
        #six.print_('Step: {}'.format(curr_state.step))
        #six.print_('Reward: {}'.format(curr_state.reward()))
        #six.print_('Reward Acc: {}'.format(reward_acc))
        #six.print_('Probs: {}'.format(curr_state.get_probs()))

    return reward_acc / n_trajectories
Пример #5
0
def test_drqn(model_id="", parallel=False):
    '''
    Test DRQN
    '''
    n_concepts = 4
    learn_prob = 0.15
    horizon = 6
    n_trajectories = 100
    n_jobs = 8
    traj_per_job = n_trajectories // n_jobs

    from simple_mdp import create_custom_dependency
    # dgraph = create_custom_dependency()

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    # student = st.Student(n=n_concepts, p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
    student = st.Student2(n_concepts)
    if model_id == "":
        model_id = "test_model_drqn"

    print('Testing model: {}'.format(model_id))
    print('horizon: {}'.format(horizon))

    if parallel:
        accs = Parallel(n_jobs=n_jobs)(delayed(test_drqn_chunk)(
            traj_per_job, dgraph, student, model_id, horizon)
                                       for _ in range(n_jobs))
        avg = sum(accs) / (n_jobs)
    else:
        avg = test_drqn_chunk(n_trajectories, dgraph, student, model_id,
                              horizon)

    test_data = dg.generate_data(dgraph,
                                 student=student,
                                 n_students=1000,
                                 seqlen=horizon,
                                 policy='expert',
                                 filename=None,
                                 verbose=False)
    print('Average posttest true: {}'.format(expected_reward(test_data)))
    print('Average posttest drqn: {}'.format(avg))
Пример #6
0
def test_dkt_multistep(model_id, dataset, chkpt=None):
    '''
    Test DKT multistep error on dataset. Dataset is output from generate_data.
    '''
    import concept_dependency_graph as cdg

    n_concepts = dataset[0][0][0].shape[0]
    horizon = len(dataset[0])

    # debug
    #six.print_('n concepts {} horizon {} trajectory {}'.format(n_concepts, horizon, dataset[0]))

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    # create the model and simulators
    student2 = st.Student2(n_concepts, True)
    test_student = student2
    stu = test_student.copy()
    stu.reset()
    stu.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(stu, dgraph)

    # load the model
    if chkpt is not None:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=False)
        model.load(chkpt)
    else:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=True)
    # initialize the dktcache to speed up DKT queries
    dktcache = dict()

    print('Testing model multstep: {}'.format(model_id))

    # make the model
    dktmodel = dmc.RnnStudentSim(model)

    # accumulate error
    mse_acc = 0.0
    for i in six.moves.range(len(dataset)):
        curr_mse = 0.0
        curr_traj = dataset[i]
        curr_state = DKTState(dktmodel, sim, 1, horizon, SPARSE, dktcache,
                              False)
        for t in six.moves.range(horizon - 1):
            # advance the DKT, then compare prediction with the data, up to the last prediction
            curr_conceptvec = curr_traj[t][0]
            curr_concept = np.nonzero(curr_conceptvec)[0]
            curr_ob = int(curr_traj[t][1])

            next_conceptvec = curr_traj[t + 1][0]
            next_concept = np.nonzero(next_conceptvec)[0]
            next_ob = int(curr_traj[t + 1][1])

            # advance the DKT
            curr_state = curr_state.perform(
                st.StudentAction(curr_concept, curr_conceptvec))
            next_probs = curr_state.get_probs()

            # compute and accumulate the mse
            diff = next_probs[next_concept] - next_ob
            curr_mse += diff * diff

            #debugging
            #six.print_('traj {} step {} actvec {} act {} ob {} next probs {} diff {}'.format(i,t,curr_conceptvec,curr_concept,curr_ob,next_probs,diff))
        # average mse per step
        mse_acc += curr_mse / (horizon - 1)

        #six.print_('mse per step acc {}'.format(mse_acc))
    # return the average MSE per step in a trajectory
    return mse_acc / len(dataset)
Пример #7
0
def test_dkt_extract_policy(model_id,
                            n_concepts,
                            transition_after,
                            horizon,
                            n_rollouts,
                            r_type,
                            chkpt=None):
    '''
    Test DKT+MCTS to extract out the policy used in the real domain. Also return the qvals.
    '''
    import concept_dependency_graph as cdg
    from simple_mdp import create_custom_dependency

    #learn_prob = 0.5

    #dgraph = create_custom_dependency()

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
    student2 = st.Student2(n_concepts, transition_after)
    test_student = student2

    # load the model
    if chkpt is not None:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=False)
        model.load(chkpt)
    else:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=True)
    # initialize the dktcache to speed up DKT queries
    dktcache = dict()

    print('Extracting policy from model: {}'.format(model_id))
    print('horizon: {}'.format(horizon))
    print('rollouts: {}'.format(n_rollouts))

    # create the model and simulators
    stu = test_student.copy()
    stu.reset()
    stu.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(stu, dgraph)

    # make the model
    dktmodel = dmc.RnnStudentSim(model)

    #rollout_policy = default_policies.immediate_reward
    rollout_policy = default_policies.RandomKStepRollOut(horizon + 1)
    uct = MCTS(
        tree_policies.UCB1(1.41), rollout_policy,
        backups.monte_carlo)  # 1.41 is sqrt (2), backups is from mcts.py

    root = StateNode(
        None, DKTState(dktmodel, sim, 1, horizon, r_type, dktcache, True))

    optpolicy = []
    qfunc = []

    for i in range(horizon):
        best_action = uct(root, n=n_rollouts)
        optpolicy.append(best_action.concept)
        qfunc.append([])
        for student_action in root.state.actions:
            qfunc[-1].append(root.children[student_action].q)
        # act in the real environment
        new_root = root.children[best_action].sample_state(real_world=True)
        new_root.parent = None  # cutoff the rest of the tree
        root = new_root

    six.print_('Extracted policy: {}'.format(optpolicy))
    six.print_('Extracted q function: {}'.format(qfunc))

    return optpolicy, qfunc
Пример #8
0
def test_dkt_qval(model_id,
                  n_concepts,
                  transition_after,
                  horizon,
                  n_rollouts,
                  r_type,
                  chkpt=None):
    '''
    Test DKT+MCTS with loads of rollouts to estimate the initial qval
    '''
    import concept_dependency_graph as cdg
    from simple_mdp import create_custom_dependency

    #learn_prob = 0.5

    #dgraph = create_custom_dependency()

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
    student2 = st.Student2(n_concepts, transition_after)
    test_student = student2

    # load the model
    if chkpt is not None:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=False)
        model.load(chkpt)
    else:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=True)
    # initialize the dktcache to speed up DKT queries
    dktcache = dict()

    print('Testing model qval: {}'.format(model_id))
    print('horizon: {}'.format(horizon))
    print('rollouts: {}'.format(n_rollouts))

    # create the model and simulators
    stu = test_student.copy()
    stu.reset()
    stu.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(stu, dgraph)

    # make the model
    dktmodel = dmc.RnnStudentSim(model)

    #rollout_policy = default_policies.immediate_reward
    rollout_policy = default_policies.RandomKStepRollOut(horizon + 1)
    uct = MCTS(
        tree_policies.UCB1(1.41), rollout_policy,
        backups.monte_carlo)  # 1.41 is sqrt (2), backups is from mcts.py

    root = StateNode(
        None, DKTState(dktmodel, sim, 1, horizon, r_type, dktcache, False))
    # run MCTS
    best_action = uct(root, n=n_rollouts)
    # get qvalue at the root
    qval = root.q

    six.print_('Initial qval: {}'.format(qval))

    return qval
Пример #9
0
def test_dkt(model_id,
             n_concepts,
             transition_after,
             horizon,
             n_rollouts,
             n_trajectories,
             r_type,
             use_real,
             use_mem,
             checkpoints=[]):
    '''
    Test DKT+MCTS
    Can accept a number of checkpoints, meaning to use an ensemble if more than one.
    '''
    import concept_dependency_graph as cdg
    from simple_mdp import create_custom_dependency

    #learn_prob = 0.5
    n_jobs = 8
    traj_per_job = n_trajectories // n_jobs

    #dgraph = create_custom_dependency()

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
    student2 = st.Student2(n_concepts, transition_after)
    test_student = student2

    test_student.reset()
    test_student.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(test_student.copy(), dgraph)

    # create a shared dktcache across all processes
    dktcache_manager = mp.Manager()
    dktcache = dktcache_manager.dict()

    print('Testing model: {}'.format(model_id))
    print('horizon: {}'.format(horizon))
    print('rollouts: {}'.format(n_rollouts))

    accs = np.array(
        Parallel(n_jobs=n_jobs)(delayed(test_dkt_chunk)(traj_per_job,
                                                        dgraph,
                                                        sim,
                                                        model_id,
                                                        checkpoints,
                                                        horizon,
                                                        n_rollouts,
                                                        r_type,
                                                        dktcache=dktcache,
                                                        use_real=use_real,
                                                        use_mem=use_mem)
                                for _ in range(n_jobs)))
    results = np.sum(accs, axis=0) / (n_jobs * traj_per_job)
    avg_acc, avg_best_q = results[0], results[1]

    test_data = dg.generate_data(dgraph,
                                 student=test_student,
                                 n_students=1000,
                                 seqlen=horizon,
                                 policy='expert',
                                 filename=None,
                                 verbose=False)
    print('Average posttest true: {}'.format(expected_reward(test_data)))
    print('Average posttest mcts: {}'.format(avg_acc))
    print('Average best q: {}'.format(avg_best_q))
    return avg_acc, avg_best_q