def test_item_bank_generation(): for items in [ generate_item_bank(5, '1PL'), generate_item_bank(5, '2PL'), generate_item_bank(5, '3PL'), generate_item_bank(5, '3PL', corr=0), generate_item_bank(5, '4PL') ]: irt.validate_item_bank(items, raise_err=True) items = numpy.zeros(100) irt.validate_item_bank(items) items = irt.normalize_item_bank(items) irt.validate_item_bank(items, raise_err=True)
def add_qn(org_qns): '''Adds questions to the database, where questions are formatted to be in a dictionary {<question>:{'answer':<options>,'difficulty':<difficulty>} <questions> is str <options> is list of str <difficulty> is float (not added yet) ''' if Question.query.all(): return for q in org_qns.keys(): item = generate_item_bank(1)[0] qn = Question(question=q, discrimination=item[0], \ difficulty=item[1], guessing=item[2], upper=item[3], topicID=1) db.session.add(qn) db.session.commit() qid = qn.id b=True for o in org_qns[q]['answers']: opt=Option(qnID=qid,option=o) db.session.add(opt) if b: db.session.flush() qn.answerID = opt.id b=False db.session.commit()
def add_question(user, qn_text, options, answer, topicID): '''Adds a question to the database Input qn_text : str options : seq of str answer : int (1 to 4) topic : int ''' # Generate item parameters from CatSim item = generate_item_bank(1)[0] # Add question question = Question(question=qn_text, discrimination=item[0], \ difficulty=item[1], guessing=item[2], upper=item[3], topicID = topicID, userID=user.id) db.session.add(question) db.session.flush() qnID = question.id # Add options and answer for opt in options: o = Option(qnID=qnID,option=opt) answer -= 1 db.session.add(o) db.session.flush() if answer == 0: optID = o.id question.answerID = optID db.session.flush() db.session.commit() return question
def test_plots(): from matplotlib.pyplot import close initializer = RandomInitializer() selector = MaxInfoSelector() estimator = HillClimbingEstimator() stopper = MaxItemStopper(20) s = Simulator(generate_item_bank(100), 10) s.simulate(initializer, selector, estimator, stopper, verbose=True) for item in s.items[0:10]: yield plot.item_curve, item[0], item[1], item[2], item[ 3], 'Test plot', 'icc', False, None, False yield plot.item_curve, item[0], item[1], item[2], item[ 3], 'Test plot', 'iic', True, None, False yield plot.item_curve, item[0], item[1], item[2], item[ 3], 'Test plot', 'both', True, None, False close('all') plot.gen3d_dataset_scatter(items=s.items, show=False) plot.test_progress(title='Test progress', simulator=s, index=0, info=True, see=True, reliability=True, show=False) plot.item_exposure(title='Test progress', simulator=s, show=False) plot.item_exposure(title='Test progress', simulator=s, show=False, par='a') plot.item_exposure(title='Test progress', simulator=s, show=False, par='b') plot.item_exposure(title='Test progress', simulator=s, show=False, par='c') plot.item_exposure(title='Test progress', simulator=s, show=False, par='d') plot.item_exposure(title='Test progress', simulator=s, show=False, ptype='line') plot.item_exposure(title='Test progress', simulator=s, show=False, par='a', ptype='line') plot.item_exposure(title='Test progress', simulator=s, show=False, par='b', ptype='line') plot.item_exposure(title='Test progress', simulator=s, show=False, par='c', ptype='line') plot.item_exposure(title='Test progress', simulator=s, show=False, par='d', ptype='line') # close all plots after testing close('all')
def test_stats(): import numpy.random as nprnd for _ in range(10): items = generate_item_bank(500) stats.coef_variation(items) stats.coef_correlation(items) stats.covariance(items) stats.covariance(items, False) stats.scatter_matrix(items) random_integers = nprnd.randint(30, size=1000) stats.bincount(random_integers)
def __init__(self, question_bank, parameter_model='2PL'): self.question_bank = question_bank self.question_bank_size = len(self.question_bank) assert self.question_bank_size > 0 assert len(self.question_bank[0]) == 2 self.indexed_items = generate_item_bank(self.question_bank_size, itemtype=parameter_model) self.parameter_model = parameter_model self.initializer = RandomInitializer() self.selector = MaxInfoSelector() self.estimator = HillClimbingEstimator() self.stopper = MaxItemStopper(self.question_bank_size) self.est_theta = self.initializer.initialize() self.responses = [] self.administered_items = [] for i in range(len(self.indexed_items)): self.indexed_items[i][1] = question_bank[i][1]
def insert_qns(path): '''Inserts questions formatted as a json file {<number>: {'answer':<extra text><answer>, 'option_texts':<extra text><options>, 'question_text':<extra text><question><extra text>}} all are strings ''' qn_dict = {} for filename in glob.glob(os.path.join(path, '*.json')): print("===") print(filename) print("===") with open(filename, 'r') as f: # open in readonly mode data = json.load(f) for qn_set in data.values(): qn_txt = qn_set["question_text"] n, qn_text = qn_txt.split(")",1) options = qn_set["option_texts"] options = [[o[0], o[6:]] for o in options] answer = qn_set["answer"] a, answer = answer.split("Answer / Explanation :\n\nAnswer : ", 1) answer, explanation = answer.split(".", 1) item = generate_item_bank(1)[0] question = Question(question=qn_text, discrimination=item[0], \ difficulty=item[1], guessing=item[2], upper=item[3], topicID=1) db.session.add(question) db.session.flush() qid = question.id for opt in options: o = Option(qnID=qid, option=opt[1]) db.session.add(o) if opt[0] == answer: db.session.flush() optID = o.id question.answerID = optID db.session.commit()
def test_cism(): examinees = 100 test_sizes = [30] bank_sizes = [500] for bank_size in bank_sizes: for test_size in test_sizes: initializers = [RandomInitializer('uniform', (-5, 5))] estimators = [HillClimbingEstimator()] stoppers = [MaxItemStopper(test_size), MinErrorStopper(.4)] for initializer in initializers: for estimator in estimators: for stopper in stoppers: items = generate_item_bank(bank_size) clusters = list( KMeans(n_clusters=8).fit_predict(items)) ClusterSelector.weighted_cluster_infos( 0, items, clusters) ClusterSelector.avg_cluster_params(items, clusters) selector = ClusterSelector(clusters=clusters, r_max=.2) yield one_simulation, items, examinees, initializer, selector, estimator, stopper
def test_simulations(): examinees = 100 test_sizes = [30] bank_sizes = [500] logistic_models = ['4PL'] for bank_size in bank_sizes: for test_size in test_sizes: initializers = [ RandomInitializer('uniform', (-5, 5)), FixedPointInitializer(0) ] infinite_selectors = [MaxInfoSelector(), RandomSelector()] finite_selectors = [ LinearSelector( list( numpy.random.choice(bank_size, size=test_size, replace=False))), AStratifiedSelector(test_size), AStratifiedBBlockingSelector(test_size), MaxInfoStratificationSelector(test_size), MaxInfoBBlockingSelector(test_size), The54321Selector(test_size), RandomesqueSelector(5) ] for logistic_model in logistic_models: for initializer in [FixedPointInitializer(0)]: for estimator in [HillClimbingEstimator()]: for stopper in [MaxItemStopper(test_size)]: for selector in finite_selectors: items = generate_item_bank( bank_size, itemtype=logistic_model) responses = cat.random_response_vector( random.randint(1, test_size - 1)) administered_items = numpy.random.choice( bank_size, len(responses), replace=False) est_theta = initializer.initialize() selector.select( items=items, administered_items=administered_items, est_theta=est_theta) estimator.estimate( items=items, administered_items=administered_items, response_vector=responses, est_theta=est_theta) stopper.stop(administered_items=items[ administered_items], theta=est_theta) yield one_simulation, items, examinees, initializer, selector, estimator, stopper for stopper in [ MinErrorStopper(.4), MaxItemStopper(test_size) ]: for selector in infinite_selectors: items = generate_item_bank( bank_size, itemtype=logistic_model) yield one_simulation, items, examinees, initializer, selector, estimator, stopper
import sys import json from catsim.estimation import HillClimbingEstimator from catsim.cat import generate_item_bank param = json.loads(sys.argv[1]) items = param['items'] item_bank = generate_item_bank(len(items), '1PL') for i in range(len(items)): item_bank[i][1] = items[i]['difficulty'] print(HillClimbingEstimator().estimate( items=item_bank, administered_items=param['administered_items'], response_vector=param['response_vector'], est_theta=param['est_theta']), end='')
from catsim.cat import generate_item_bank from catsim import plot item = generate_item_bank(1)[0] plot.item_curve(item[0], item[1], item[2], item[3], ptype='icc') plot.item_curve(item[0], item[1], item[2], item[3], ptype='iic') plot.item_curve(item[0], item[1], item[2], item[3], ptype='both')
def generate_bank(): # generating an item bank print('Generating item bank...') bank_size = 100 return (generate_item_bank(bank_size, '1PL'))
from catsim.cat import generate_item_bank from catsim import plot items = generate_item_bank(2) for item in items: plot.item_curve(item[0], item[1], item[2], item[3], ptype='iic', max_info=True)
from catsim.cat import generate_item_bank from catsim import plot from catsim.initialization import RandomInitializer from catsim.selection import MaxInfoSelector from catsim.estimation import HillClimbingEstimator from catsim.stopping import MaxItemStopper from catsim.simulation import Simulator s = Simulator(generate_item_bank(100), 10) s.simulate(RandomInitializer(), MaxInfoSelector(), HillClimbingEstimator(), MaxItemStopper(20)) plot.item_exposure(title='Exposures', simulator=s, hist=True) plot.item_exposure(title='Exposures', simulator=s, par='b')
import numpy as np import matplotlib.pyplot as plt from catsim.estimation import * from catsim.cat import generate_item_bank test_size = 20 randBinList = lambda n: [np.random.randint(0,2) for b in range(1,n+1)] items = generate_item_bank(20) items = items[items[:,1].argsort()] # order by difficulty ascending r0 = [True] * 7 + [False] * 13 r1 = [True] * 10 + [False] * 10 r2 = [True] * 15 + [False] * 5 response_vectors = [r0, r1, r2] thetas = np.arange(-6.,6.,.1) for estimator in [DifferentialEvolutionEstimator((-8, 8)), HillClimbingEstimator()]: plt.figure() for response_vector in response_vectors: ll_line = [irt.log_likelihood(theta, response_vector, items) for theta in thetas] max_LL = estimator.estimate(items=items, administered_items=range(20), response_vector=response_vector, est_theta=0) best_theta = irt.log_likelihood(max_LL, response_vector, items) plt.plot(thetas, ll_line) plt.plot(max_LL, best_theta, 'o', label = str(sum(response_vector)) + ' correct, '+r'$\hat{\theta} \approx $' + format(round(max_LL, 5))) plt.xlabel(r'$\theta$', size=16) plt.ylabel(r'$\log L(\theta)$', size=16) plt.title('MLE -- {0} ({1} avg. evals)'.format(type(estimator).__name__, round(estimator.avg_evaluations))) plt.legend(loc='best') plt.show()
from catsim.cat import generate_item_bank from catsim import plot items = generate_item_bank(100) plot.gen3d_dataset_scatter(items)