Exemplo n.º 1
0
    def test_credit_partial_integration(self):
        """Testing the partial integral in the graded model."""
        theta = _get_quadrature_points(61, -5, 5)
        response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2])
        betas = np.array([0, -0.4, 0.94, -.37])
        discrimination = 1.42

        # Hand calculations
        offsets = np.cumsum(betas)[1:]
        first_pos = np.ones_like(theta)
        second_pos = np.exp(discrimination * (theta - offsets[0]))
        third_pos = np.exp(2*discrimination * (theta - offsets[1]/2))
        last_pos = np.exp(3*discrimination * (theta - offsets[2]/3))
        norm_term = first_pos + second_pos + third_pos + last_pos

        probability_values = [first_pos / norm_term, second_pos / norm_term,
                              third_pos / norm_term, last_pos / norm_term]
        expected = np.zeros((response_set.size, theta.size))
        for ndx, response in enumerate(response_set):
            expected[ndx] = probability_values[response]

        result = _credit_partial_integral(theta, betas, discrimination,
                                          response_set)

        np.testing.assert_array_almost_equal(result, expected)
Exemplo n.º 2
0
 def _local_min_func(estimate):
     new_betas[1:] = estimate[1:]
     new_values = _credit_partial_integral(
         theta, new_betas, estimate[0], responses[item_ndx],
         invalid_response_mask[item_ndx])
     new_values *= partial_int
     otpt = np.sum(new_values, axis=1)
     return -np.log(otpt).sum()
Exemplo n.º 3
0
            def _local_min_func(estimate):
                new_betas[1:] = estimate[1:]
                new_values = _credit_partial_integral(theta, new_betas,
                                                      estimate[0],
                                                      responses[item_ndx])

                new_values *= partial_int
                otpt = integrate.fixed_quad(
                    lambda x: new_values, quad_start, quad_stop, n=quad_n)[0]

                return -np.log(otpt).sum()
Exemplo n.º 4
0
    def test_credit_partial_integration(self):
        """Testing the partial integral in the graded model."""
        theta, _ = _get_quadrature_points(61, -5, 5)
        response_set = np.array([0, 1, 2, 2, 1, 0, 3, 1, 3, 2, 2, 2])
        betas = np.array([0, -0.4, 0.94, -.37])
        discrimination = 1.42
        invalid_response_mask = np.zeros_like(response_set, dtype='bool')

        # Hand calculations
        offsets = np.cumsum(betas)[1:]
        first_pos = np.ones_like(theta)
        second_pos = np.exp(discrimination * (theta - offsets[0]))
        third_pos = np.exp(2 * discrimination * (theta - offsets[1] / 2))
        last_pos = np.exp(3 * discrimination * (theta - offsets[2] / 3))
        norm_term = first_pos + second_pos + third_pos + last_pos

        probability_values = [
            first_pos / norm_term, second_pos / norm_term,
            third_pos / norm_term, last_pos / norm_term
        ]
        expected = np.zeros((response_set.size, theta.size))
        for ndx, response in enumerate(response_set):
            expected[ndx] = probability_values[response]

        result = _credit_partial_integral(theta, betas, discrimination,
                                          response_set, invalid_response_mask)

        np.testing.assert_array_almost_equal(result, expected)

        invalid_response_mask[1] = True
        invalid_response_mask[7] = True
        result = _credit_partial_integral(theta, betas, discrimination,
                                          response_set, invalid_response_mask)

        np.testing.assert_equal(result[1], np.ones(61, ))
        np.testing.assert_equal(result[7], np.ones(61, ))

        with np.testing.assert_raises(AssertionError):
            for ndx in [0, 2, 3, 4, 5, 6, 8, 9]:
                np.testing.assert_equal(result[ndx], np.ones(61, ))
Exemplo n.º 5
0
def pcm_mml(dataset, options=None):
    """Estimate parameters for partial credit model.

    Estimate the discrimination and difficulty parameters for
    the partial credit model using marginal maximum likelihood.

    Args:
        dataset: [n_items, n_participants] 2d array of measured responses
        options: dictionary with updates to default options

    Returns:
        discrimination: (1d array) estimates of item discrimination
        difficulty: (2d array) estimates of item difficulties x item thresholds

    Options:
        * max_iteration: int
        * distribution: callable
        * quadrature_bounds: (float, float)
        * quadrature_n: int
    """
    options = validate_estimation_options(options)
    quad_start, quad_stop = options['quadrature_bounds']
    quad_n = options['quadrature_n']

    responses, item_counts = condition_polytomous_response(dataset, trim_ends=False,
                                                           _reference=0.0)
    n_items = responses.shape[0]

    # Interpolation Locations
    theta = _get_quadrature_points(quad_n, quad_start, quad_stop)
    distribution = options['distribution'](theta)

    # Initialize difficulty parameters for estimation
    betas = np.full((n_items, item_counts.max()), np.nan)
    discrimination = np.ones((n_items,))
    partial_int = np.ones((responses.shape[1], theta.size))

    # Not all items need to have the same
    # number of response categories
    betas[:, 0] = 0
    for ndx in range(n_items):
        betas[ndx, 1:item_counts[ndx]] = np.linspace(-1, 1, item_counts[ndx]-1)

    #############
    # 1. Start the iteration loop
    # 2. Estimate Dicriminatin/Difficulty Jointly
    # 3. Integrate of theta
    # 4. minimize and repeat
    #############
    for iteration in range(options['max_iteration']):
        previous_discrimination = discrimination.copy()
        previous_betas = betas.copy()

        # Quadrature evaluation for values that do not change
        # This is done during the outer loop to address rounding errors
        # and for speed
        partial_int *= 0.0
        partial_int += distribution[None, :]
        for item_ndx in range(n_items):
            partial_int *= _credit_partial_integral(theta, betas[item_ndx],
                                                    discrimination[item_ndx],
                                                    responses[item_ndx])

        # Loop over each item and solve for the alpha / beta parameters
        for item_ndx in range(n_items):
            # pylint: disable=cell-var-from-loop
            item_length = item_counts[item_ndx]
            new_betas = np.zeros((item_length))

            # Remove the previous output
            old_values = _credit_partial_integral(theta, previous_betas[item_ndx],
                                                  previous_discrimination[item_ndx],
                                                  responses[item_ndx])
            partial_int /= old_values

            def _local_min_func(estimate):
                new_betas[1:] = estimate[1:]
                new_values = _credit_partial_integral(theta, new_betas,
                                                      estimate[0],
                                                      responses[item_ndx])

                new_values *= partial_int
                otpt = integrate.fixed_quad(
                    lambda x: new_values, quad_start, quad_stop, n=quad_n)[0]

                return -np.log(otpt).sum()

            # Initial Guess of Item Parameters
            initial_guess = np.concatenate(([discrimination[item_ndx]],
                                            betas[item_ndx, 1:item_length]))

            otpt = fmin_slsqp(_local_min_func, initial_guess,
                              disp=False,
                              bounds=[(.25, 4)] + [(-6, 6)] * (item_length - 1))

            discrimination[item_ndx] = otpt[0]
            betas[item_ndx, 1:item_length] = otpt[1:]

            new_values = _credit_partial_integral(theta, betas[item_ndx],
                                                  discrimination[item_ndx],
                                                  responses[item_ndx])

            partial_int *= new_values

        if np.abs(previous_discrimination - discrimination).max() < 1e-3:
            break

    # TODO:  look where missing values are and place NAN there instead
    # of appending them to the end
    return discrimination, betas[:, 1:]
Exemplo n.º 6
0
def pcm_mml(dataset, options=None):
    """Estimate parameters for partial credit model.

    Estimate the discrimination and difficulty parameters for
    the partial credit model using marginal maximum likelihood.

    Args:
        dataset: [n_items, n_participants] 2d array of measured responses
        options: dictionary with updates to default options

    Returns:
        discrimination: (1d array) estimates of item discrimination
        difficulty: (2d array) estimates of item difficulties x item thresholds

    Options:
        * estimate_distribution: Boolean    
        * number_of_samples: int >= 5       
        * max_iteration: int
        * distribution: callable
        * quadrature_bounds: (float, float)
        * quadrature_n: int
    """
    options = validate_estimation_options(options)

    cpr_result = condition_polytomous_response(dataset,
                                               trim_ends=False,
                                               _reference=0.0)
    responses, item_counts, valid_response_mask = cpr_result
    invalid_response_mask = ~valid_response_mask

    n_items = responses.shape[0]

    # Quadrature Locations
    latent_pdf = LatentPDF(options)
    theta = latent_pdf.quadrature_locations

    # Initialize difficulty parameters for estimation
    betas = np.full((n_items, item_counts.max()), np.nan)
    discrimination = np.ones((n_items, ))
    partial_int = np.ones((responses.shape[1], theta.size))

    # Not all items need to have the same
    # number of response categories
    betas[:, 0] = 0
    for ndx in range(n_items):
        betas[ndx, 1:item_counts[ndx]] = np.linspace(-1, 1,
                                                     item_counts[ndx] - 1)

    # Set invalid index to zero, this allows minimal
    # changes for invalid data and it is corrected
    # during integration
    responses[invalid_response_mask] = 0

    #############
    # 1. Start the iteration loop
    # 2. Estimate Dicriminatin/Difficulty Jointly
    # 3. Integrate of theta
    # 4. minimize and repeat
    #############
    for iteration in range(options['max_iteration']):
        previous_discrimination = discrimination.copy()
        previous_betas = betas.copy()

        # Quadrature evaluation for values that do not change
        # This is done during the outer loop to address rounding errors
        # and for speed
        partial_int = np.ones((responses.shape[1], theta.size))
        for item_ndx in range(n_items):
            partial_int *= _credit_partial_integral(
                theta, betas[item_ndx], discrimination[item_ndx],
                responses[item_ndx], invalid_response_mask[item_ndx])
        # Estimate the distribution if requested
        distribution_x_weight = latent_pdf(partial_int, iteration)
        partial_int *= distribution_x_weight

        # Loop over each item and solve for the alpha / beta parameters
        for item_ndx in range(n_items):
            # pylint: disable=cell-var-from-loop
            item_length = item_counts[item_ndx]
            new_betas = np.zeros((item_length))

            # Remove the previous output
            old_values = _credit_partial_integral(
                theta, previous_betas[item_ndx],
                previous_discrimination[item_ndx], responses[item_ndx],
                invalid_response_mask[item_ndx])
            partial_int /= old_values

            def _local_min_func(estimate):
                new_betas[1:] = estimate[1:]
                new_values = _credit_partial_integral(
                    theta, new_betas, estimate[0], responses[item_ndx],
                    invalid_response_mask[item_ndx])
                new_values *= partial_int
                otpt = np.sum(new_values, axis=1)
                return -np.log(otpt).sum()

            # Initial Guess of Item Parameters
            initial_guess = np.concatenate(
                ([discrimination[item_ndx]], betas[item_ndx, 1:item_length]))

            otpt = fmin_slsqp(_local_min_func,
                              initial_guess,
                              disp=False,
                              bounds=[(.25, 4)] + [(-6, 6)] *
                              (item_length - 1))

            discrimination[item_ndx] = otpt[0]
            betas[item_ndx, 1:item_length] = otpt[1:]

            new_values = _credit_partial_integral(
                theta, betas[item_ndx], discrimination[item_ndx],
                responses[item_ndx], invalid_response_mask[item_ndx])

            partial_int *= new_values

        if np.abs(previous_discrimination - discrimination).max() < 1e-3:
            break

    # Recompute partial int for later calculations
    partial_int = np.ones((responses.shape[1], theta.size))
    for item_ndx in range(n_items):
        partial_int *= _credit_partial_integral(
            theta, betas[item_ndx], discrimination[item_ndx],
            responses[item_ndx], invalid_response_mask[item_ndx])

    # TODO:  look where missing values are and place NAN there instead
    # of appending them to the end
    # Compute statistics for final iteration
    null_metrics = latent_pdf.compute_metrics(
        partial_int, latent_pdf.null_distribution * latent_pdf.weights, 0)
    full_metrics = latent_pdf.compute_metrics(partial_int,
                                              distribution_x_weight,
                                              latent_pdf.n_points - 3)

    # Ability estimates
    eap_abilities = _ability_eap_abstract(partial_int, distribution_x_weight,
                                          theta)

    return {
        'Discrimination': discrimination,
        'Difficulty': betas[:, 1:],
        'Ability': eap_abilities,
        'LatentPDF': latent_pdf,
        'AIC': {
            'final': full_metrics[0],
            'null': null_metrics[0],
            'delta': null_metrics[0] - full_metrics[0]
        },
        'BIC': {
            'final': full_metrics[1],
            'null': null_metrics[1],
            'delta': null_metrics[1] - full_metrics[1]
        }
    }