def dkt_multistep_single(n_concepts, n_trajectories, model_id, checkpoints, horizon, use_mem): ''' Compute the multistep error per step for either both random and expert policies. ''' if not use_mem: model_list = [] for chkpt in checkpoints: model = dmc.DynamicsModel(model_id, timesteps=horizon, load_checkpoint=False) model.load(chkpt) model_list.append(model) dkt = dmc.RnnStudentSimEnsemble(model_list) else: mem_array_list = [] for chkpt in checkpoints: mem_arrays = np.load(chkpt)['mem_arrays'] mem_array_list.append(mem_arrays) dkt = dmc.RnnStudentSimMemEnsemble(n_concepts, mem_array_list) concept_tree = cdg.ConceptDependencyGraph() concept_tree.init_default_tree(n_concepts) test_student = st.Student2(n_concepts, True) # for both policies policies = ['random', 'expert'] # for horizons: 0,1,2,3,...,horizon errors = np.zeros((2, horizon + 1)) for pol in six.moves.range(2): for i in six.moves.range(n_trajectories): curr_dkt = dkt.copy() # sample a real trajectory traj = dg.generate_student_sample(concept_tree, seqlen=horizon + 1, student=test_student, policy=policies[pol]) for t in six.moves.range(horizon + 1): curr_action = st.make_student_action_vec(traj[t][0]) curr_ob = traj[t][1] curr_probs = sanitize_probs(n_concepts, curr_dkt.sample_observations()) # advance dkt with sampled observation sampled_ob = 1 if np.random.random() < curr_probs[ curr_action.concept] else 0 curr_dkt.advance_simulator(curr_action, sampled_ob) # accumulate the error for this step errors[pol, t] += np.square(curr_ob - curr_probs[curr_action.concept]) if False: six.print_('errors {}'.format(errors / n_trajectories)) return errors / n_trajectories
def dkt_test_policies_rme(model_id, n_trajectories, r_type, policies, chkpt): ''' Tests a given open loop policy for student2 n4 on the learned model. ''' horizon = 6 n_concepts = 4 dgraph = cdg.ConceptDependencyGraph() dgraph.init_default_tree(n_concepts) student2 = st.Student2(n_concepts, transition_after) # load model from given file model = dmc.DynamicsModel(model_id=model_id, timesteps=horizon, load_checkpoint=False) model.load(chkpt) # create the model and simulators student = student2.copy() student.reset() student.knowledge[0] = 1 # initialize the first concept to be known sim = st.StudentExactSim(student, dgraph) # initialize the shared dktcache across the trials dktcache = dict() num_policies = policies.shape[0] rewards = np.zeros((num_policies, )) traj_per_policy = n_trajectories for pix in six.moves.range(num_policies): pol = policies[pix, :] reward_acc = 0.0 for t in six.moves.range(traj_per_policy): # make the model rnnmodel = dmc.RnnStudentSim(model) curr_state = DKTState(rnnmodel, sim, 1, horizon, r_type, dktcache, False) all_actions = curr_state.actions for i in range(horizon): curr_state = curr_state.perform(all_actions[pol[i]]) reward_acc += curr_state.reward() rewards[pix] = reward_acc / traj_per_policy return rewards
def test_student_exact(): ''' MCTS is now working. The number of rollouts required to be optimal grows very fast as a function of the horizon. Still, even if not fully optimal, MCTS is an extremely good approximation. Default student with horizon 10 needs about 50 rollouts is good learn prob 0.15 student with horizon 40 needs about 150 rollouts is good; gets about 0.94 which is 0.02 off from 0.96 ''' import concept_dependency_graph as cdg from simple_mdp import create_custom_dependency r_type = DENSE n_concepts = 4 learn_prob = 0.5 horizon = 6 n_rollouts = 50 n_trajectories = 100 n_jobs = 8 traj_per_job = n_trajectories // n_jobs #dgraph = create_custom_dependency() dgraph = cdg.ConceptDependencyGraph() dgraph.init_default_tree(n_concepts) #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0) student2 = st.Student2(n_concepts, transition_after) test_student = student2 accs = Parallel(n_jobs=n_jobs)(delayed(test_student_exact_chunk)( traj_per_job, dgraph, test_student, horizon, n_rollouts, sparse_r) for _ in range(n_jobs)) avg = sum(accs) / (n_jobs * traj_per_job) test_data = dg.generate_data(dgraph, student=test_student, n_students=1000, seqlen=horizon, policy='expert', filename=None, verbose=False) print('Number of jobs {}'.format(n_jobs)) print('Trajectory per job {}'.format(traj_per_job)) print('Average posttest true: {}'.format(expected_reward(test_data))) print('Average posttest mcts: {}'.format(avg))
def dkt_test_policy(model_id, horizon, n_trajectories, r_type, chkpt): ''' Tests the uniformly random policy (behavior) for student2 n4 on the learned model. ''' n_concepts = 4 dgraph = cdg.ConceptDependencyGraph() dgraph.init_default_tree(n_concepts) student2 = st.Student2(n_concepts, transition_after) # load model from given file model = dmc.DynamicsModel(model_id=model_id, timesteps=horizon, load_checkpoint=False) model.load(chkpt) # create the model and simulators student = student2.copy() student.reset() student.knowledge[0] = 1 # initialize the first concept to be known sim = st.StudentExactSim(student, dgraph) # initialize the shared dktcache across the trials dktcache = dict() reward_acc = 0.0 for t in six.moves.range(n_trajectories): # make the model rnnmodel = dmc.RnnStudentSim(model) curr_state = DKTState(rnnmodel, sim, 1, horizon, r_type, dktcache, False) all_actions = curr_state.actions for i in range(horizon): curr_state = curr_state.perform(random.choice(all_actions)) reward_acc += curr_state.reward() #six.print_('Step: {}'.format(curr_state.step)) #six.print_('Reward: {}'.format(curr_state.reward())) #six.print_('Reward Acc: {}'.format(reward_acc)) #six.print_('Probs: {}'.format(curr_state.get_probs())) return reward_acc / n_trajectories
def test_drqn(model_id="", parallel=False): ''' Test DRQN ''' n_concepts = 4 learn_prob = 0.15 horizon = 6 n_trajectories = 100 n_jobs = 8 traj_per_job = n_trajectories // n_jobs from simple_mdp import create_custom_dependency # dgraph = create_custom_dependency() dgraph = cdg.ConceptDependencyGraph() dgraph.init_default_tree(n_concepts) # student = st.Student(n=n_concepts, p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0) student = st.Student2(n_concepts) if model_id == "": model_id = "test_model_drqn" print('Testing model: {}'.format(model_id)) print('horizon: {}'.format(horizon)) if parallel: accs = Parallel(n_jobs=n_jobs)(delayed(test_drqn_chunk)( traj_per_job, dgraph, student, model_id, horizon) for _ in range(n_jobs)) avg = sum(accs) / (n_jobs) else: avg = test_drqn_chunk(n_trajectories, dgraph, student, model_id, horizon) test_data = dg.generate_data(dgraph, student=student, n_students=1000, seqlen=horizon, policy='expert', filename=None, verbose=False) print('Average posttest true: {}'.format(expected_reward(test_data))) print('Average posttest drqn: {}'.format(avg))
def test_dkt_multistep(model_id, dataset, chkpt=None): ''' Test DKT multistep error on dataset. Dataset is output from generate_data. ''' import concept_dependency_graph as cdg n_concepts = dataset[0][0][0].shape[0] horizon = len(dataset[0]) # debug #six.print_('n concepts {} horizon {} trajectory {}'.format(n_concepts, horizon, dataset[0])) dgraph = cdg.ConceptDependencyGraph() dgraph.init_default_tree(n_concepts) # create the model and simulators student2 = st.Student2(n_concepts, True) test_student = student2 stu = test_student.copy() stu.reset() stu.knowledge[0] = 1 # initialize the first concept to be known sim = st.StudentExactSim(stu, dgraph) # load the model if chkpt is not None: model = dmc.DynamicsModel(model_id=model_id, timesteps=horizon, load_checkpoint=False) model.load(chkpt) else: model = dmc.DynamicsModel(model_id=model_id, timesteps=horizon, load_checkpoint=True) # initialize the dktcache to speed up DKT queries dktcache = dict() print('Testing model multstep: {}'.format(model_id)) # make the model dktmodel = dmc.RnnStudentSim(model) # accumulate error mse_acc = 0.0 for i in six.moves.range(len(dataset)): curr_mse = 0.0 curr_traj = dataset[i] curr_state = DKTState(dktmodel, sim, 1, horizon, SPARSE, dktcache, False) for t in six.moves.range(horizon - 1): # advance the DKT, then compare prediction with the data, up to the last prediction curr_conceptvec = curr_traj[t][0] curr_concept = np.nonzero(curr_conceptvec)[0] curr_ob = int(curr_traj[t][1]) next_conceptvec = curr_traj[t + 1][0] next_concept = np.nonzero(next_conceptvec)[0] next_ob = int(curr_traj[t + 1][1]) # advance the DKT curr_state = curr_state.perform( st.StudentAction(curr_concept, curr_conceptvec)) next_probs = curr_state.get_probs() # compute and accumulate the mse diff = next_probs[next_concept] - next_ob curr_mse += diff * diff #debugging #six.print_('traj {} step {} actvec {} act {} ob {} next probs {} diff {}'.format(i,t,curr_conceptvec,curr_concept,curr_ob,next_probs,diff)) # average mse per step mse_acc += curr_mse / (horizon - 1) #six.print_('mse per step acc {}'.format(mse_acc)) # return the average MSE per step in a trajectory return mse_acc / len(dataset)
def test_dkt_extract_policy(model_id, n_concepts, transition_after, horizon, n_rollouts, r_type, chkpt=None): ''' Test DKT+MCTS to extract out the policy used in the real domain. Also return the qvals. ''' import concept_dependency_graph as cdg from simple_mdp import create_custom_dependency #learn_prob = 0.5 #dgraph = create_custom_dependency() dgraph = cdg.ConceptDependencyGraph() dgraph.init_default_tree(n_concepts) #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0) student2 = st.Student2(n_concepts, transition_after) test_student = student2 # load the model if chkpt is not None: model = dmc.DynamicsModel(model_id=model_id, timesteps=horizon, load_checkpoint=False) model.load(chkpt) else: model = dmc.DynamicsModel(model_id=model_id, timesteps=horizon, load_checkpoint=True) # initialize the dktcache to speed up DKT queries dktcache = dict() print('Extracting policy from model: {}'.format(model_id)) print('horizon: {}'.format(horizon)) print('rollouts: {}'.format(n_rollouts)) # create the model and simulators stu = test_student.copy() stu.reset() stu.knowledge[0] = 1 # initialize the first concept to be known sim = st.StudentExactSim(stu, dgraph) # make the model dktmodel = dmc.RnnStudentSim(model) #rollout_policy = default_policies.immediate_reward rollout_policy = default_policies.RandomKStepRollOut(horizon + 1) uct = MCTS( tree_policies.UCB1(1.41), rollout_policy, backups.monte_carlo) # 1.41 is sqrt (2), backups is from mcts.py root = StateNode( None, DKTState(dktmodel, sim, 1, horizon, r_type, dktcache, True)) optpolicy = [] qfunc = [] for i in range(horizon): best_action = uct(root, n=n_rollouts) optpolicy.append(best_action.concept) qfunc.append([]) for student_action in root.state.actions: qfunc[-1].append(root.children[student_action].q) # act in the real environment new_root = root.children[best_action].sample_state(real_world=True) new_root.parent = None # cutoff the rest of the tree root = new_root six.print_('Extracted policy: {}'.format(optpolicy)) six.print_('Extracted q function: {}'.format(qfunc)) return optpolicy, qfunc
def test_dkt_qval(model_id, n_concepts, transition_after, horizon, n_rollouts, r_type, chkpt=None): ''' Test DKT+MCTS with loads of rollouts to estimate the initial qval ''' import concept_dependency_graph as cdg from simple_mdp import create_custom_dependency #learn_prob = 0.5 #dgraph = create_custom_dependency() dgraph = cdg.ConceptDependencyGraph() dgraph.init_default_tree(n_concepts) #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0) student2 = st.Student2(n_concepts, transition_after) test_student = student2 # load the model if chkpt is not None: model = dmc.DynamicsModel(model_id=model_id, timesteps=horizon, load_checkpoint=False) model.load(chkpt) else: model = dmc.DynamicsModel(model_id=model_id, timesteps=horizon, load_checkpoint=True) # initialize the dktcache to speed up DKT queries dktcache = dict() print('Testing model qval: {}'.format(model_id)) print('horizon: {}'.format(horizon)) print('rollouts: {}'.format(n_rollouts)) # create the model and simulators stu = test_student.copy() stu.reset() stu.knowledge[0] = 1 # initialize the first concept to be known sim = st.StudentExactSim(stu, dgraph) # make the model dktmodel = dmc.RnnStudentSim(model) #rollout_policy = default_policies.immediate_reward rollout_policy = default_policies.RandomKStepRollOut(horizon + 1) uct = MCTS( tree_policies.UCB1(1.41), rollout_policy, backups.monte_carlo) # 1.41 is sqrt (2), backups is from mcts.py root = StateNode( None, DKTState(dktmodel, sim, 1, horizon, r_type, dktcache, False)) # run MCTS best_action = uct(root, n=n_rollouts) # get qvalue at the root qval = root.q six.print_('Initial qval: {}'.format(qval)) return qval
def test_dkt(model_id, n_concepts, transition_after, horizon, n_rollouts, n_trajectories, r_type, use_real, use_mem, checkpoints=[]): ''' Test DKT+MCTS Can accept a number of checkpoints, meaning to use an ensemble if more than one. ''' import concept_dependency_graph as cdg from simple_mdp import create_custom_dependency #learn_prob = 0.5 n_jobs = 8 traj_per_job = n_trajectories // n_jobs #dgraph = create_custom_dependency() dgraph = cdg.ConceptDependencyGraph() dgraph.init_default_tree(n_concepts) #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0) student2 = st.Student2(n_concepts, transition_after) test_student = student2 test_student.reset() test_student.knowledge[0] = 1 # initialize the first concept to be known sim = st.StudentExactSim(test_student.copy(), dgraph) # create a shared dktcache across all processes dktcache_manager = mp.Manager() dktcache = dktcache_manager.dict() print('Testing model: {}'.format(model_id)) print('horizon: {}'.format(horizon)) print('rollouts: {}'.format(n_rollouts)) accs = np.array( Parallel(n_jobs=n_jobs)(delayed(test_dkt_chunk)(traj_per_job, dgraph, sim, model_id, checkpoints, horizon, n_rollouts, r_type, dktcache=dktcache, use_real=use_real, use_mem=use_mem) for _ in range(n_jobs))) results = np.sum(accs, axis=0) / (n_jobs * traj_per_job) avg_acc, avg_best_q = results[0], results[1] test_data = dg.generate_data(dgraph, student=test_student, n_students=1000, seqlen=horizon, policy='expert', filename=None, verbose=False) print('Average posttest true: {}'.format(expected_reward(test_data))) print('Average posttest mcts: {}'.format(avg_acc)) print('Average best q: {}'.format(avg_best_q)) return avg_acc, avg_best_q