示例#1
0
def dkt_multistep_single(n_concepts, n_trajectories, model_id, checkpoints,
                         horizon, use_mem):
    '''
    Compute the multistep error per step for either both random and expert policies.
    '''
    if not use_mem:
        model_list = []
        for chkpt in checkpoints:
            model = dmc.DynamicsModel(model_id,
                                      timesteps=horizon,
                                      load_checkpoint=False)
            model.load(chkpt)
            model_list.append(model)
        dkt = dmc.RnnStudentSimEnsemble(model_list)
    else:
        mem_array_list = []
        for chkpt in checkpoints:
            mem_arrays = np.load(chkpt)['mem_arrays']
            mem_array_list.append(mem_arrays)
        dkt = dmc.RnnStudentSimMemEnsemble(n_concepts, mem_array_list)

    concept_tree = cdg.ConceptDependencyGraph()
    concept_tree.init_default_tree(n_concepts)
    test_student = st.Student2(n_concepts, True)

    # for both policies
    policies = ['random', 'expert']

    # for horizons: 0,1,2,3,...,horizon
    errors = np.zeros((2, horizon + 1))

    for pol in six.moves.range(2):
        for i in six.moves.range(n_trajectories):
            curr_dkt = dkt.copy()
            # sample a real trajectory
            traj = dg.generate_student_sample(concept_tree,
                                              seqlen=horizon + 1,
                                              student=test_student,
                                              policy=policies[pol])
            for t in six.moves.range(horizon + 1):
                curr_action = st.make_student_action_vec(traj[t][0])
                curr_ob = traj[t][1]
                curr_probs = sanitize_probs(n_concepts,
                                            curr_dkt.sample_observations())

                # advance dkt with sampled observation
                sampled_ob = 1 if np.random.random() < curr_probs[
                    curr_action.concept] else 0
                curr_dkt.advance_simulator(curr_action, sampled_ob)

                # accumulate the error for this step
                errors[pol, t] += np.square(curr_ob -
                                            curr_probs[curr_action.concept])

    if False:
        six.print_('errors {}'.format(errors / n_trajectories))

    return errors / n_trajectories
示例#2
0
def test_dkt_rme(model_id, n_rollouts, n_trajectories, r_type, dmcmodel,
                 chkpt):
    '''
    Test DKT+MCTS where the real environment is a StudentDKTSim with a proxy DynamicsModel
    '''

    use_mem = False  # TODO

    import concept_dependency_graph as cdg
    from simple_mdp import create_custom_dependency

    n_concepts = 4
    horizon = 6
    n_jobs = 8
    traj_per_job = n_trajectories // n_jobs

    #dgraph = create_custom_dependency()
    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    # create a shared dktcache across all processes
    dktcache_manager = mp.Manager()
    # for the MCTS model
    dktcache = dktcache_manager.dict()
    # for the real environment
    dktsimcache = dktcache_manager.dict()

    # create the simulator
    dktsim = st.StudentDKTSim(dgraph, dmcmodel, dktsimcache)

    print('Testing proper RME model: {}'.format(model_id))
    print('horizon: {}'.format(horizon))
    print('rollouts: {}'.format(n_rollouts))

    accs = np.array(
        Parallel(n_jobs=n_jobs)(delayed(test_dkt_chunk)(traj_per_job,
                                                        dgraph,
                                                        dktsim,
                                                        model_id, [chkpt],
                                                        horizon,
                                                        n_rollouts,
                                                        r_type,
                                                        dktcache=dktcache,
                                                        use_real=True,
                                                        use_mem=use_mem)
                                for _ in range(n_jobs)))
    results = np.sum(accs, axis=0) / (n_jobs * traj_per_job)
    avg_acc, avg_best_q = results[0], results[1]

    print('Average posttest mcts: {}'.format(avg_acc))
    print('Average best q: {}'.format(avg_best_q))
    return avg_acc, avg_best_q
示例#3
0
def dkt_test_policies_rme(model_id, n_trajectories, r_type, policies, chkpt):
    '''
    Tests a given open loop policy for student2 n4 on the learned model.
    '''

    horizon = 6
    n_concepts = 4

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    student2 = st.Student2(n_concepts, transition_after)

    # load model from given file
    model = dmc.DynamicsModel(model_id=model_id,
                              timesteps=horizon,
                              load_checkpoint=False)
    model.load(chkpt)

    # create the model and simulators
    student = student2.copy()
    student.reset()
    student.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(student, dgraph)

    # initialize the shared dktcache across the trials
    dktcache = dict()

    num_policies = policies.shape[0]
    rewards = np.zeros((num_policies, ))
    traj_per_policy = n_trajectories

    for pix in six.moves.range(num_policies):
        pol = policies[pix, :]
        reward_acc = 0.0
        for t in six.moves.range(traj_per_policy):
            # make the model
            rnnmodel = dmc.RnnStudentSim(model)

            curr_state = DKTState(rnnmodel, sim, 1, horizon, r_type, dktcache,
                                  False)
            all_actions = curr_state.actions
            for i in range(horizon):
                curr_state = curr_state.perform(all_actions[pol[i]])
            reward_acc += curr_state.reward()
        rewards[pix] = reward_acc / traj_per_policy

    return rewards
示例#4
0
def test_student_exact():
    '''
    MCTS is now working.
    The number of rollouts required to be optimal grows very fast as a function of the horizon.
    Still, even if not fully optimal, MCTS is an extremely good approximation.

    Default student with horizon 10 needs about 50 rollouts is good
    learn prob 0.15 student with horizon 40 needs about 150 rollouts is good; gets about 0.94 which is 0.02 off from 0.96
    '''
    import concept_dependency_graph as cdg
    from simple_mdp import create_custom_dependency
    r_type = DENSE
    n_concepts = 4
    learn_prob = 0.5
    horizon = 6
    n_rollouts = 50
    n_trajectories = 100
    n_jobs = 8
    traj_per_job = n_trajectories // n_jobs

    #dgraph = create_custom_dependency()

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
    student2 = st.Student2(n_concepts, transition_after)
    test_student = student2

    accs = Parallel(n_jobs=n_jobs)(delayed(test_student_exact_chunk)(
        traj_per_job, dgraph, test_student, horizon, n_rollouts, sparse_r)
                                   for _ in range(n_jobs))
    avg = sum(accs) / (n_jobs * traj_per_job)

    test_data = dg.generate_data(dgraph,
                                 student=test_student,
                                 n_students=1000,
                                 seqlen=horizon,
                                 policy='expert',
                                 filename=None,
                                 verbose=False)
    print('Number of jobs {}'.format(n_jobs))
    print('Trajectory per job {}'.format(traj_per_job))
    print('Average posttest true: {}'.format(expected_reward(test_data)))
    print('Average posttest mcts: {}'.format(avg))
示例#5
0
def init_synthetic_data():
    """
    Run this to generate the default synthetic data sets.
    :return:
    """
    concept_tree = cdg.ConceptDependencyGraph()
    concept_tree.init_default_tree(n=N_CONCEPTS)
    print("Initializing synthetic data sets...")
    n_students = 10000
    seqlen = 100
    for policy in ['random', 'expert', 'modulo']:
        filename = "{}stud_{}seq_{}.pickle".format(n_students, seqlen, policy)
        generate_data(concept_tree,
                      n_students=n_students,
                      seqlen=seqlen,
                      policy=policy,
                      filename="{}{}".format(SYN_DATA_DIR, filename))
    print("Data generation completed. ")
示例#6
0
def dkt_test_policy(model_id, horizon, n_trajectories, r_type, chkpt):
    '''
    Tests the uniformly random policy (behavior) for student2 n4 on the learned model.
    '''
    n_concepts = 4

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    student2 = st.Student2(n_concepts, transition_after)

    # load model from given file
    model = dmc.DynamicsModel(model_id=model_id,
                              timesteps=horizon,
                              load_checkpoint=False)
    model.load(chkpt)

    # create the model and simulators
    student = student2.copy()
    student.reset()
    student.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(student, dgraph)

    # initialize the shared dktcache across the trials
    dktcache = dict()

    reward_acc = 0.0

    for t in six.moves.range(n_trajectories):
        # make the model
        rnnmodel = dmc.RnnStudentSim(model)

        curr_state = DKTState(rnnmodel, sim, 1, horizon, r_type, dktcache,
                              False)
        all_actions = curr_state.actions
        for i in range(horizon):
            curr_state = curr_state.perform(random.choice(all_actions))
            reward_acc += curr_state.reward()
        #six.print_('Step: {}'.format(curr_state.step))
        #six.print_('Reward: {}'.format(curr_state.reward()))
        #six.print_('Reward Acc: {}'.format(reward_acc))
        #six.print_('Probs: {}'.format(curr_state.get_probs()))

    return reward_acc / n_trajectories
示例#7
0
def test_drqn(model_id="", parallel=False):
    '''
    Test DRQN
    '''
    n_concepts = 4
    learn_prob = 0.15
    horizon = 6
    n_trajectories = 100
    n_jobs = 8
    traj_per_job = n_trajectories // n_jobs

    from simple_mdp import create_custom_dependency
    # dgraph = create_custom_dependency()

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    # student = st.Student(n=n_concepts, p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
    student = st.Student2(n_concepts)
    if model_id == "":
        model_id = "test_model_drqn"

    print('Testing model: {}'.format(model_id))
    print('horizon: {}'.format(horizon))

    if parallel:
        accs = Parallel(n_jobs=n_jobs)(delayed(test_drqn_chunk)(
            traj_per_job, dgraph, student, model_id, horizon)
                                       for _ in range(n_jobs))
        avg = sum(accs) / (n_jobs)
    else:
        avg = test_drqn_chunk(n_trajectories, dgraph, student, model_id,
                              horizon)

    test_data = dg.generate_data(dgraph,
                                 student=student,
                                 n_students=1000,
                                 seqlen=horizon,
                                 policy='expert',
                                 filename=None,
                                 verbose=False)
    print('Average posttest true: {}'.format(expected_reward(test_data)))
    print('Average posttest drqn: {}'.format(avg))
示例#8
0
def main_test():
    """
    Run this to test this module.
    - Tests ConceptDependencytree
    - Generates sample for a single student using three different policies
    - Generates toy data set with 5 students
    - Loads generated toy data set
    """
    concept_tree = cdg.ConceptDependencyGraph()
    concept_tree.init_default_tree(n=11)
    print(concept_tree.children)
    print(concept_tree.parents)
    print(concept_tree.prereq_map)
    print("Generate one sample using expert policy. ")
    generate_student_sample(concept_tree, policy='expert', verbose=True)
    print("Generate one sample using random policy. ")
    generate_student_sample(concept_tree, policy='random', verbose=True)
    print("Generate one sample using modulo policy. ")
    generate_student_sample(concept_tree, policy='modulo', verbose=True)

    make_toy_data(concept_tree)
    load_toy_data()
示例#9
0
def dkt_forwardsearch_single(n_concepts, model_id, checkpoints, horizon,
                             use_mem):
    '''
    Use forward search to find value of the optimal policy of dkt executed in sim and other information.
    '''
    if not use_mem:
        model_list = []
        for chkpt in checkpoints:
            model = dmc.DynamicsModel(model_id,
                                      timesteps=horizon,
                                      load_checkpoint=False)
            model.load(chkpt)
            model_list.append(model)
        dkt = dmc.RnnStudentSimEnsemble(model_list)
    else:
        mem_array_list = []
        for chkpt in checkpoints:
            mem_arrays = np.load(chkpt)['mem_arrays']
            mem_array_list.append(mem_arrays)
        dkt = dmc.RnnStudentSimMemEnsemble(n_concepts, mem_array_list)

    concept_tree = cdg.ConceptDependencyGraph()
    concept_tree.init_default_tree(n_concepts)
    sim = st.RnnStudent2SimExact(concept_tree)

    if False:
        six.print_('Semisparse Value {}'.format(next_ssv))
        six.print_('Spares Value {}'.format(next_sv))
        six.print_('Semisparse Value Sim {}'.format(next_sim_ssv))
        six.print_('Sparse Value Sim {}'.format(next_sim_sv))
        six.print_(
            'Semisparse Q-Values along sim trajectory {}'.format(ss_list))
        six.print_('Sparse Q-Values along sim trajectory {}'.format(s_list))
        six.print_('Semisparse Sim Q-Values along sim trajectory {}'.format(
            sim_ss_list))
        six.print_(
            'Sparse Sim Q-Values along sim trajectory {}'.format(sim_s_list))

    return dkt_forwardsearch_single_recurse(n_concepts, dkt, sim, horizon, 0)
示例#10
0
def test_dkt_multistep(model_id, dataset, chkpt=None):
    '''
    Test DKT multistep error on dataset. Dataset is output from generate_data.
    '''
    import concept_dependency_graph as cdg

    n_concepts = dataset[0][0][0].shape[0]
    horizon = len(dataset[0])

    # debug
    #six.print_('n concepts {} horizon {} trajectory {}'.format(n_concepts, horizon, dataset[0]))

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    # create the model and simulators
    student2 = st.Student2(n_concepts, True)
    test_student = student2
    stu = test_student.copy()
    stu.reset()
    stu.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(stu, dgraph)

    # load the model
    if chkpt is not None:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=False)
        model.load(chkpt)
    else:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=True)
    # initialize the dktcache to speed up DKT queries
    dktcache = dict()

    print('Testing model multstep: {}'.format(model_id))

    # make the model
    dktmodel = dmc.RnnStudentSim(model)

    # accumulate error
    mse_acc = 0.0
    for i in six.moves.range(len(dataset)):
        curr_mse = 0.0
        curr_traj = dataset[i]
        curr_state = DKTState(dktmodel, sim, 1, horizon, SPARSE, dktcache,
                              False)
        for t in six.moves.range(horizon - 1):
            # advance the DKT, then compare prediction with the data, up to the last prediction
            curr_conceptvec = curr_traj[t][0]
            curr_concept = np.nonzero(curr_conceptvec)[0]
            curr_ob = int(curr_traj[t][1])

            next_conceptvec = curr_traj[t + 1][0]
            next_concept = np.nonzero(next_conceptvec)[0]
            next_ob = int(curr_traj[t + 1][1])

            # advance the DKT
            curr_state = curr_state.perform(
                st.StudentAction(curr_concept, curr_conceptvec))
            next_probs = curr_state.get_probs()

            # compute and accumulate the mse
            diff = next_probs[next_concept] - next_ob
            curr_mse += diff * diff

            #debugging
            #six.print_('traj {} step {} actvec {} act {} ob {} next probs {} diff {}'.format(i,t,curr_conceptvec,curr_concept,curr_ob,next_probs,diff))
        # average mse per step
        mse_acc += curr_mse / (horizon - 1)

        #six.print_('mse per step acc {}'.format(mse_acc))
    # return the average MSE per step in a trajectory
    return mse_acc / len(dataset)
示例#11
0
def test_dkt_extract_policy(model_id,
                            n_concepts,
                            transition_after,
                            horizon,
                            n_rollouts,
                            r_type,
                            chkpt=None):
    '''
    Test DKT+MCTS to extract out the policy used in the real domain. Also return the qvals.
    '''
    import concept_dependency_graph as cdg
    from simple_mdp import create_custom_dependency

    #learn_prob = 0.5

    #dgraph = create_custom_dependency()

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
    student2 = st.Student2(n_concepts, transition_after)
    test_student = student2

    # load the model
    if chkpt is not None:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=False)
        model.load(chkpt)
    else:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=True)
    # initialize the dktcache to speed up DKT queries
    dktcache = dict()

    print('Extracting policy from model: {}'.format(model_id))
    print('horizon: {}'.format(horizon))
    print('rollouts: {}'.format(n_rollouts))

    # create the model and simulators
    stu = test_student.copy()
    stu.reset()
    stu.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(stu, dgraph)

    # make the model
    dktmodel = dmc.RnnStudentSim(model)

    #rollout_policy = default_policies.immediate_reward
    rollout_policy = default_policies.RandomKStepRollOut(horizon + 1)
    uct = MCTS(
        tree_policies.UCB1(1.41), rollout_policy,
        backups.monte_carlo)  # 1.41 is sqrt (2), backups is from mcts.py

    root = StateNode(
        None, DKTState(dktmodel, sim, 1, horizon, r_type, dktcache, True))

    optpolicy = []
    qfunc = []

    for i in range(horizon):
        best_action = uct(root, n=n_rollouts)
        optpolicy.append(best_action.concept)
        qfunc.append([])
        for student_action in root.state.actions:
            qfunc[-1].append(root.children[student_action].q)
        # act in the real environment
        new_root = root.children[best_action].sample_state(real_world=True)
        new_root.parent = None  # cutoff the rest of the tree
        root = new_root

    six.print_('Extracted policy: {}'.format(optpolicy))
    six.print_('Extracted q function: {}'.format(qfunc))

    return optpolicy, qfunc
示例#12
0
def test_dkt_qval(model_id,
                  n_concepts,
                  transition_after,
                  horizon,
                  n_rollouts,
                  r_type,
                  chkpt=None):
    '''
    Test DKT+MCTS with loads of rollouts to estimate the initial qval
    '''
    import concept_dependency_graph as cdg
    from simple_mdp import create_custom_dependency

    #learn_prob = 0.5

    #dgraph = create_custom_dependency()

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
    student2 = st.Student2(n_concepts, transition_after)
    test_student = student2

    # load the model
    if chkpt is not None:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=False)
        model.load(chkpt)
    else:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=True)
    # initialize the dktcache to speed up DKT queries
    dktcache = dict()

    print('Testing model qval: {}'.format(model_id))
    print('horizon: {}'.format(horizon))
    print('rollouts: {}'.format(n_rollouts))

    # create the model and simulators
    stu = test_student.copy()
    stu.reset()
    stu.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(stu, dgraph)

    # make the model
    dktmodel = dmc.RnnStudentSim(model)

    #rollout_policy = default_policies.immediate_reward
    rollout_policy = default_policies.RandomKStepRollOut(horizon + 1)
    uct = MCTS(
        tree_policies.UCB1(1.41), rollout_policy,
        backups.monte_carlo)  # 1.41 is sqrt (2), backups is from mcts.py

    root = StateNode(
        None, DKTState(dktmodel, sim, 1, horizon, r_type, dktcache, False))
    # run MCTS
    best_action = uct(root, n=n_rollouts)
    # get qvalue at the root
    qval = root.q

    six.print_('Initial qval: {}'.format(qval))

    return qval
示例#13
0
def test_dkt(model_id,
             n_concepts,
             transition_after,
             horizon,
             n_rollouts,
             n_trajectories,
             r_type,
             use_real,
             use_mem,
             checkpoints=[]):
    '''
    Test DKT+MCTS
    Can accept a number of checkpoints, meaning to use an ensemble if more than one.
    '''
    import concept_dependency_graph as cdg
    from simple_mdp import create_custom_dependency

    #learn_prob = 0.5
    n_jobs = 8
    traj_per_job = n_trajectories // n_jobs

    #dgraph = create_custom_dependency()

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0)
    student2 = st.Student2(n_concepts, transition_after)
    test_student = student2

    test_student.reset()
    test_student.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(test_student.copy(), dgraph)

    # create a shared dktcache across all processes
    dktcache_manager = mp.Manager()
    dktcache = dktcache_manager.dict()

    print('Testing model: {}'.format(model_id))
    print('horizon: {}'.format(horizon))
    print('rollouts: {}'.format(n_rollouts))

    accs = np.array(
        Parallel(n_jobs=n_jobs)(delayed(test_dkt_chunk)(traj_per_job,
                                                        dgraph,
                                                        sim,
                                                        model_id,
                                                        checkpoints,
                                                        horizon,
                                                        n_rollouts,
                                                        r_type,
                                                        dktcache=dktcache,
                                                        use_real=use_real,
                                                        use_mem=use_mem)
                                for _ in range(n_jobs)))
    results = np.sum(accs, axis=0) / (n_jobs * traj_per_job)
    avg_acc, avg_best_q = results[0], results[1]

    test_data = dg.generate_data(dgraph,
                                 student=test_student,
                                 n_students=1000,
                                 seqlen=horizon,
                                 policy='expert',
                                 filename=None,
                                 verbose=False)
    print('Average posttest true: {}'.format(expected_reward(test_data)))
    print('Average posttest mcts: {}'.format(avg_acc))
    print('Average best q: {}'.format(avg_best_q))
    return avg_acc, avg_best_q