def estimate(self, index: int = None, items: numpy.ndarray = None, administered_items: list = None, response_vector: list = None, est_theta: float = None, **kwargs) -> float: """Returns the theta value that minimizes the negative log-likelihood function, given the current state of the test for the given examinee. :param index: index of the current examinee in the simulator :param items: a matrix containing item parameters in the format that `catsim` understands (see: :py:func:`catsim.cat.generate_item_bank`) :param administered_items: a list containing the indexes of items that were already administered :param response_vector: a boolean list containing the examinee's answers to the administered items :param est_theta: a float containing the current estimated proficiency :returns: the current :math:`\\hat\\theta` """ if (index is None or self.simulator is None) and ( items is None and administered_items is None or response_vector is None or est_theta is None): raise ValueError( 'Either pass an index for the simulator or all of the other optional parameters to use this component independently.' ) if items is None and administered_items is None and response_vector is None and est_theta is None: items = self.simulator.items administered_items = self.simulator.administered_items[index] response_vector = self.simulator.response_vectors[index] est_theta = self.simulator.latest_estimations[index] self._calls += 1 self._evaluations = 0 if len(set(response_vector)) == 1 and self._dodd: return cat.dodd(est_theta, items, response_vector[-1]) if set(response_vector) == 1: return float('inf') elif set(response_vector) == 0: return float('-inf') if len(administered_items) > 0: lower_bound = min(items[administered_items][:, 1]) upper_bound = max(items[administered_items][:, 1]) else: lower_bound = min(items[:, 1]) upper_bound = max(items[:, 1]) best_theta = float('-inf') max_ll = float('-inf') # the estimator starts with a rough search, which gets finer with each pass for granularity in range(10): # generate a list of candidate theta values candidates = numpy.linspace(lower_bound, upper_bound, 10) interval_size = candidates[1] - candidates[0] if self._verbose: print('Pass: {0}\n\tBounds: {1} {2}\n\tInterval size: {3}'. format(granularity + 1, lower_bound, upper_bound, interval_size)) # we'll use the concave nature of the log-likelihood function # to program a primitive early stopping method in our search previous_ll = float('-inf') # iterate through each candidate for candidate_theta in candidates: self._evaluations += 1 current_ll = irt.log_likelihood(candidate_theta, response_vector, items[administered_items]) # we search the function from left to right, so when the # log-likelihood of the current theta is smaller than the one # from the previous theta we tested, it means it's all downhill # from then on, so we stop our search if current_ll < previous_ll: break previous_ll = current_ll # check if the LL of the current candidate theta is larger than the best one checked as of yet if current_ll > max_ll: if self._verbose: print('\t\tTheta: {0}, LL: {1}'.format( candidate_theta, current_ll)) if abs(best_theta - candidate_theta) < float('1e-' + str(self._precision)): return self._getout(candidate_theta) max_ll = current_ll best_theta = candidate_theta # the bounds of the new candidates are adjusted around the current best theta value lower_bound = best_theta - interval_size upper_bound = best_theta + interval_size return self._getout(best_theta)
def estimate( self, index: int = None, items: numpy.ndarray = None, administered_items: list = None, response_vector: list = None, est_theta: float = None, **kwargs ) -> float: """Returns the theta value that minimizes the negative log-likelihood function, given the current state of the test for the given examinee. :param index: index of the current examinee in the simulator :param items: a matrix containing item parameters in the format that `catsim` understands (see: :py:func:`catsim.cat.generate_item_bank`) :param administered_items: a list containing the indexes of items that were already administered :param response_vector: a boolean list containing the examinee's answers to the administered items :param est_theta: a float containing the current estimated proficiency :returns: the current :math:`\\hat\\theta` """ if (index is None or self.simulator is None) and ( items is None and administered_items is None or response_vector is None or est_theta is None ): raise ValueError( 'Either pass an index for the simulator or all of the other optional parameters to use this component independently.' ) if items is None and administered_items is None and response_vector is None and est_theta is None: items = self.simulator.items administered_items = self.simulator.administered_items[index] response_vector = self.simulator.response_vectors[index] est_theta = self.simulator.latest_estimations[index] self._calls += 1 self._evaluations = 0 if len(set(response_vector)) == 1 and self._dodd: return cat.dodd(est_theta, items, response_vector[-1]) if set(response_vector) == 1: return float('inf') elif set(response_vector) == 0: return float('-inf') if len(administered_items) > 0: lower_bound = min(items[administered_items][:, 1]) upper_bound = max(items[administered_items][:, 1]) else: lower_bound = min(items[:, 1]) upper_bound = max(items[:, 1]) best_theta = float('-inf') max_ll = float('-inf') # the estimator starts with a rough search, which gets finer with each pass for granularity in range(10): # generate a list of candidate theta values candidates = numpy.linspace(lower_bound, upper_bound, 10) interval_size = candidates[1] - candidates[0] if self._verbose: print( 'Pass: {0}\n\tBounds: {1} {2}\n\tInterval size: {3}'.format( granularity + 1, lower_bound, upper_bound, interval_size ) ) # we'll use the concave nature of the log-likelihood function # to program a primitive early stopping method in our search previous_ll = float('-inf') # iterate through each candidate for candidate_theta in candidates: self._evaluations += 1 current_ll = irt.log_likelihood(candidate_theta, response_vector, items[administered_items]) # we search the function from left to right, so when the # log-likelihood of the current theta is smaller than the one # from the previous theta we tested, it means it's all downhill # from then on, so we stop our search if current_ll < previous_ll: break previous_ll = current_ll # check if the LL of the current candidate theta is larger than the best one checked as of yet if current_ll > max_ll: if self._verbose: print('\t\tTheta: {0}, LL: {1}'.format(candidate_theta, current_ll)) if abs(best_theta - candidate_theta) < float('1e-' + str(self._precision)): return self._getout(candidate_theta) max_ll = current_ll best_theta = candidate_theta # the bounds of the new candidates are adjusted around the current best theta value lower_bound = best_theta - interval_size upper_bound = best_theta + interval_size return self._getout(best_theta)
def estimate( self, index: int = None, items: numpy.ndarray = None, administered_items: list = None, response_vector: list = None, est_theta: float = None, **kwargs ) -> float: """Returns the theta value that minimizes the negative log-likelihood function, given the current state of the test for the given examinee. :param index: index of the current examinee in the simulator :param items: a matrix containing item parameters in the format that `catsim` understands (see: :py:func:`catsim.cat.generate_item_bank`) :param administered_items: a list containing the indexes of items that were already administered :param response_vector: a boolean list containing the examinee's answers to the administered items :param est_theta: a float containing the current estimated proficiency :returns: the current :math:`\\hat\\theta` """ if (index is None or self.simulator is None) and ( items is None and administered_items is None or response_vector is None or est_theta is None ): raise ValueError( 'Either pass an index for the simulator or all of the other optional parameters to use this component independently.' ) if items is None and administered_items is None and response_vector is None and est_theta is None: items = self.simulator.items administered_items = self.simulator.administered_items[index] response_vector = self.simulator.response_vectors[index] est_theta = self.simulator.latest_estimations[index] self._calls += 1 # need to constrain all estimates between these bounds, rather then, e.g. # min / max difficulties lower_bound, upper_bound = self._bounds if len(set(response_vector)) == 1 and self._dodd: # append bounds in mock "items", so that the dodd procedure will # at least step toward the bounds we set. Note that this is stretching # the use of the term dodd. min_item = [0, lower_bound, 0, 0] max_item = [0, upper_bound, 0, 0] bound_items = numpy.vstack([min_item, max_item]) return cat.dodd(est_theta, bound_items, response_vector[-1]) if set(response_vector) == 1: return float('inf') elif set(response_vector) == 0: return float('-inf') best_theta = float('-inf') max_ll = float('-inf') self._evaluations = 0 for _ in range(10): intervals = numpy.linspace(lower_bound, upper_bound, 10) if self._verbose: print(('Bounds: ' + str(lower_bound) + ' ' + str(upper_bound))) print(('Interval size: ' + str(intervals[1] - intervals[0]))) for ii in intervals: self._evaluations += 1 ll = irt.log_likelihood(ii, response_vector, items[administered_items]) if ll > max_ll: max_ll = ll if self._verbose: print( ( 'Iteration: {0}, Theta: {1}, LL: {2}'.format( self._evaluations, ii, ll ) ) ) if abs(best_theta - ii) < float('1e-' + str(self._precision)): return self._bound_estimate(ii) best_theta = ii else: lower_bound = best_theta - (intervals[1] - intervals[0]) upper_bound = ii # reset best_theta, in case optimum is to the left of it max_ll = float('-inf') break return self._bound_estimate(best_theta)
def estimate(self, index: int = None, items: numpy.ndarray = None, administered_items: list = None, response_vector: list = None, est_theta: float = None, **kwargs) -> float: """Returns the theta value that minimizes the negative log-likelihood function, given the current state of the test for the given examinee. :param index: index of the current examinee in the simulator :param items: a matrix containing item parameters in the format that `catsim` understands (see: :py:func:`catsim.cat.generate_item_bank`) :param administered_items: a list containing the indexes of items that were already administered :param response_vector: a boolean list containing the examinee's answers to the administered items :param est_theta: a float containing the current estimated proficiency :returns: the current :math:`\\hat\\theta` """ if (index is None or self.simulator is None) and ( items is None and administered_items is None or response_vector is None or est_theta is None): raise ValueError( 'Either pass an index for the simulator or all of the other optional parameters to use this component independently.' ) if items is None and administered_items is None and response_vector is None and est_theta is None: items = self.simulator.items administered_items = self.simulator.administered_items[index] response_vector = self.simulator.response_vectors[index] est_theta = self.simulator.latest_estimations[index] self._calls += 1 if len(set(response_vector)) == 1 and self._dodd: return cat.dodd(est_theta, items, response_vector[-1]) if set(response_vector) == 1: return float('inf') elif set(response_vector) == 0: return float('-inf') if len(administered_items) > 0: lower_bound = min(items[administered_items][:, 1]) upper_bound = max(items[administered_items][:, 1]) else: lower_bound = min(items[:, 1]) upper_bound = max(items[:, 1]) best_theta = float('-inf') max_ll = float('-inf') self._evaluations = 0 for _ in range(10): intervals = numpy.linspace(lower_bound, upper_bound, 10) if self._verbose: print(('Bounds: ' + str(lower_bound) + ' ' + str(upper_bound))) print(('Interval size: ' + str(intervals[1] - intervals[0]))) for ii in intervals: self._evaluations += 1 ll = irt.log_likelihood(ii, response_vector, items[administered_items]) if ll > max_ll: max_ll = ll if self._verbose: print(('Iteration: {0}, Theta: {1}, LL: {2}'.format( self._evaluations, ii, ll))) if abs(best_theta - ii) < float('1e-' + str(self._precision)): return ii best_theta = ii else: lower_bound = best_theta - (intervals[1] - intervals[0]) upper_bound = ii break return best_theta
def estimate( self, index: int = None, items: numpy.ndarray = None, administered_items: list = None, response_vector: list = None, est_theta: float = None, **kwargs ) -> float: """Returns the theta value that corresponds to the maximum a posteriori estimate, given the current state of the test for the given examinee. The posterior is obtained from summing the log-likelihood and the log of the normal density function. :param index: index of the current examinee in the simulator :param items: a matrix containing item parameters in the format that `catsim` understands (see: :py:func:`catsim.cat.generate_item_bank`) :param administered_items: a list containing the indexes of items that were already administered :param response_vector: a boolean list containing the examinee's answers to the administered items :param est_theta: a float containing the current estimated proficiency :returns: the current theta estimate (based on the bounded MAP estimate) """ if (index is None or self.simulator is None) and ( items is None and administered_items is None or response_vector is None or est_theta is None ): raise ValueError( 'Either pass an index for the simulator or all of the other optional parameters to use this component independently.' ) if items is None and administered_items is None and response_vector is None and est_theta is None: items = self.simulator.items administered_items = self.simulator.administered_items[index] response_vector = self.simulator.response_vectors[index] est_theta = self.simulator.latest_estimations[index] self._calls += 1 # need to constrain all estimates between these bounds, rather then, e.g. # min / max difficulties lower_bound, upper_bound = self._bounds best_theta = float('-inf') max_ll = float('-inf') self._evaluations = 0 for _ in range(10): intervals = numpy.linspace(lower_bound, upper_bound, 10) if self._verbose: print(('Bounds: ' + str(lower_bound) + ' ' + str(upper_bound))) print(('Interval size: ' + str(intervals[1] - intervals[0]))) for ii in intervals: self._evaluations += 1 ll = irt.log_likelihood(ii, response_vector, items[administered_items]) + norm.logpdf(ii, loc = self._prior_mean, scale = self._prior_sd) if ll > max_ll: max_ll = ll if self._verbose: print( ( 'Iteration: {0}, Theta: {1}, LL: {2}'.format( self._evaluations, ii, ll ) ) ) if abs(best_theta - ii) < float('1e-' + str(self._precision)): return self._bound_estimate(ii) best_theta = ii else: lower_bound = best_theta - (intervals[1] - intervals[0]) upper_bound = ii # reset best_theta, in case optimum is to the left of it max_ll = float('-inf') break return self._bound_estimate(best_theta)