Exemplo n.º 1
0
 def testBasic(self):
   self.assertAlmostEqual(
       mann_whitney_u.MannWhitneyU(range(10), range(20, 30)),
       0.00018267179110955002)
   self.assertAlmostEqual(
       mann_whitney_u.MannWhitneyU(range(5), range(10)),
       0.13986357686781267)
Exemplo n.º 2
0
def _CompareValues(values_a, values_b):
    if not (values_a and values_b):
        return _UNKNOWN

    try:
        p_value = mann_whitney_u.MannWhitneyU(values_a, values_b)
    except ValueError:
        return _UNKNOWN

    if p_value < _SIGNIFICANCE_LEVEL:
        return _DIFFERENT
    else:
        return _UNKNOWN
Exemplo n.º 3
0
def _CompareValues(values_a, values_b):
  """Decide whether two samples are the same, different, or unknown.

  Arguments:
    values_a: A list of sortable values. They don't need to be numeric.
    values_b: A list of sortable values. They don't need to be numeric.

  Returns:
    _DIFFERENT: The samples likely come from different distributions.
        Reject the null hypothesis.
    _SAME: Not enough evidence to say that the samples come from different
        distributions. Fail to reject the null hypothesis.
    _UNKNOWN: Not enough evidence to say that the samples come from different
        distributions, but it looks a little suspicious, and we would like more
        data before making a final decision.
  """
  if not (values_a and values_b):
    # A sample has no values in it.
    return _UNKNOWN

  # MWU is bad at detecting changes in variance, and K-S is bad with discrete
  # distributions. So use both. We want low p-values for the below examples.
  #        a                     b               MWU(a, b)  KS(a, b)
  # [0]*20            [0]*15+[1]*5                0.0097     0.4973
  # range(10, 30)     range(10)+range(30, 40)     0.4946     0.0082
  p_value = min(
      kolmogorov_smirnov.KolmogorovSmirnov(values_a, values_b),
      mann_whitney_u.MannWhitneyU(values_a, values_b))

  if p_value < _SIGNIFICANCE_LEVEL:
    # The p-value is less than the significance level. Reject the null
    # hypothesis.
    return _DIFFERENT

  index = min(len(values_a), len(values_b)) / 10
  index = min(index, len(_QUESTIONABLE_SIGNIFICANCE_LEVELS) - 1)
  questionable_significance_level = _QUESTIONABLE_SIGNIFICANCE_LEVELS[index]
  if p_value < questionable_significance_level:
    # The p-value is not less than the significance level, but it's small enough
    # to be suspicious. We'd like to investigate more closely.
    return _UNKNOWN

  # The p-value is quite large. We're not suspicious that the two samples might
  # come from different distributions, and we don't care to investigate more.
  return _SAME
Exemplo n.º 4
0
def _CompareValues(values_a, values_b):
  """Decide whether two samples are the same, different, or unknown.

  Arguments:
    values_a: A list of sortable values. They don't need to be numeric.
    values_b: A list of sortable values. They don't need to be numeric.

  Returns:
    _DIFFERENT: The samples likely come from different distributions.
        Reject the null hypothesis.
    _SAME: Not enough evidence to say that the samples come from different
        distributions. Fail to reject the null hypothesis.
    _UNKNOWN: Not enough evidence to say that the samples come from different
        distributions, but it looks a little suspicious, and we would like more
        data before making a final decision.
  """
  if not (values_a and values_b):
    # A sample has no values in it.
    return _UNKNOWN

  if (len(values_a) < _MINIMUM_VALUE_COUNT or
      len(values_b) < _MINIMUM_VALUE_COUNT):
    # There are few enough values that the significance test would never reject
    # the null hypothesis. We'd like more information. This can happen if a lot
    # of the test runs fail, so we don't have a lot of performance numbers to
    # work with.
    return _UNKNOWN

  p_value = mann_whitney_u.MannWhitneyU(values_a, values_b)

  if p_value < _SIGNIFICANCE_LEVEL:
    # The p-value is less than the significance level. Reject the null
    # hypothesis.
    return _DIFFERENT

  if p_value < _QUESTIONABLE_SIGNIFICANCE_LEVEL:
    # The p-value is not less than the significance level, but it's small enough
    # to be suspicious. We'd like to investigate more closely.
    return _UNKNOWN

  # The p-value is quite large. We're not suspicious that the two samples might
  # come from different distributions, and we don't care to investigate more.
  return _SAME
Exemplo n.º 5
0
 def testAllValuesIdentical(self):
   with self.assertRaises(ValueError):
     mann_whitney_u.MannWhitneyU([0] * 5, [0] * 5)
Exemplo n.º 6
0
 def testSmallSamples(self):
   self.assertEqual(mann_whitney_u.MannWhitneyU([0], [1]), 1.0)
Exemplo n.º 7
0
 def testDuplicateValues(self):
   self.assertAlmostEqual(mann_whitney_u.MannWhitneyU([0] * 5, [1] * 5),
                          0.0039767517097886512)
Exemplo n.º 8
0
 def testAllValuesIdentical(self):
   self.assertEqual(mann_whitney_u.MannWhitneyU([0] * 5, [0] * 5), 1.0)