def test_getZScores(self): """Data taken from C++ test""" values = [12,13,9,18,7,9,14,16,10,12,7,13,14,19,10,16,12,16,19,11] arr = numpy.array(values,dtype=numpy.float64) zscore = Stats.getZscore(arr) self.assertAlmostEqual(1.63977, zscore[4], places = 4) self.assertAlmostEqual(0.32235, zscore[6], places = 4) modZ = Stats.getModifiedZscore(arr) self.assertAlmostEqual(1.23658, modZ[4], places = 4) self.assertAlmostEqual(0.33725, modZ[6], places = 4)
def test_getZScores(self): """Data taken from C++ test""" values = [ 12, 13, 9, 18, 7, 9, 14, 16, 10, 12, 7, 13, 14, 19, 10, 16, 12, 16, 19, 11 ] arr = numpy.array(values, dtype=numpy.float64) zscore = Stats.getZscore(arr) self.assertAlmostEqual(1.63977, zscore[4], places=4) self.assertAlmostEqual(0.32235, zscore[6], places=4) modZ = Stats.getModifiedZscore(arr) self.assertAlmostEqual(1.23658, modZ[4], places=4) self.assertAlmostEqual(0.33725, modZ[6], places=4)
def PyExec(self): in_ws = mtd[self.getPropertyValue('InputWorkspace')] out_ws_name = self.getPropertyValue('OutputWorkspace') out_ws = ms.CreateEmptyTableWorkspace(OutputWorkspace=out_ws_name) out_ws.addColumn('str', 'statistic') stats = { 'standard_deviation': dict(), 'maximum': dict(), 'minimum': dict(), 'mean': dict(), 'median': dict(), } for name in in_ws.getColumnNames(): try: col_stats = _stats_to_dict(Stats.getStatistics(np.array([float(v) for v in in_ws.column(name)]))) for statname in stats: stats[statname][name] = col_stats[statname] out_ws.addColumn('float', name) except ValueError: logger.notice('Column \'%s\' is not numerical, skipping' % name) for name, stat in iteritems(stats): stat1 = dict(stat) stat1['statistic'] = name out_ws.addRow(stat1) self.setProperty('OutputWorkspace', out_ws)
def PyExec(self): in_ws = mtd[self.getPropertyValue('InputWorkspace')] out_ws_name = self.getPropertyValue('OutputWorkspace') out_ws = ms.CreateEmptyTableWorkspace(OutputWOrkspace=out_ws_name) out_ws.addColumn('str', 'statistic') stats = { 'standard_deviation': dict(), 'maximum': dict(), 'minimum': dict(), 'mean': dict(), 'median': dict(), } for name in in_ws.getColumnNames(): try: col_stats = _stats_to_dict( Stats.getStatistics( np.array([float(v) for v in in_ws.column(name)]))) for statname in stats.keys(): stats[statname][name] = col_stats[statname] out_ws.addColumn('float', name) except ValueError: logger.notice('Column \'%s\' is not numerical, skipping' % name) for name, stat in stats.items(): stat1 = dict(stat) stat1['statistic'] = name out_ws.addRow(stat1) self.setProperty('OutputWorkspace', out_ws_name)
def test_getStatistics_with_floats(self): data = numpy.array([17.2,18.1,16.5,18.3,12.6]) stats = Stats.getStatistics(data) self.assertAlmostEqual(16.54, stats.mean, places = 10) self.assertAlmostEqual(2.0733, stats.standard_deviation, places = 4) self.assertEquals(12.6, stats.minimum) self.assertEquals(18.3, stats.maximum) self.assertEquals(17.2, stats.median) data = numpy.sort(data) stats = Stats.getStatistics(data, sorted=True) self.assertAlmostEqual(16.54, stats.mean, places = 10) self.assertAlmostEqual(2.0733, stats.standard_deviation, places = 4) self.assertEquals(12.6, stats.minimum) self.assertEquals(18.3, stats.maximum) self.assertEquals(17.2, stats.median)
def test_getStatistics_with_floats(self): data = numpy.array([17.2, 18.1, 16.5, 18.3, 12.6]) stats = Stats.getStatistics(data) self.assertAlmostEqual(16.54, stats.mean, places=10) self.assertAlmostEqual(2.0733, stats.standard_deviation, places=4) self.assertEquals(12.6, stats.minimum) self.assertEquals(18.3, stats.maximum) self.assertEquals(17.2, stats.median) data = numpy.sort(data) stats = Stats.getStatistics(data, sorted=True) self.assertAlmostEqual(16.54, stats.mean, places=10) self.assertAlmostEqual(2.0733, stats.standard_deviation, places=4) self.assertEquals(12.6, stats.minimum) self.assertEquals(18.3, stats.maximum) self.assertEquals(17.2, stats.median)
def test_getZScores(self): """Data taken from C++ test""" values = [12,13,9,18,7,9,14,16,10,12,7,13,14,19,10,16,12,16,19,11] arr = numpy.array(values,dtype=numpy.float64) zscore = Stats.getZscore(arr) self.assertAlmostEqual(1.63977, zscore[4], places = 4) self.assertAlmostEqual(0.32235, zscore[6], places = 4) modZ = Stats.getModifiedZscore(arr) self.assertAlmostEqual(1.23658, modZ[4], places = 4) self.assertAlmostEqual(0.33725, modZ[6], places = 4) # Test the sorted argument still works. Remove this when the function is removed # sorted=True only ever affected the order zscore = Stats.getZscore(arr, sorted=True) self.assertAlmostEqual(1.63977, zscore[4], places = 4) self.assertAlmostEqual(0.32235, zscore[6], places = 4)
def test_getMoments(self): mean = 5. sigma = 4. deltaX = .2 numX = 200 # calculate to have same number of points left and right of function offsetX = mean - (.5 * deltaX * float(numX)) # variance about origin expVar = mean * mean + sigma * sigma # skew about origin expSkew = mean * mean * mean + 3. * mean * sigma * sigma # x-values to try out indep = numpy.arange(numX, dtype=numpy.float64) indep = indep * deltaX + offsetX # y-values # test different type depend = numpy.arange(numX, dtype=numpy.int32) self.assertRaises(ValueError, Stats.getMomentsAboutOrigin, indep, depend) # now correct y values weightedDiff = (indep - mean) / sigma depend = numpy.exp(-0.5 * weightedDiff * weightedDiff) / sigma / math.sqrt(2. * math.pi) aboutOrigin = Stats.getMomentsAboutOrigin(indep, depend) self.assertTrue(isinstance(aboutOrigin, numpy.ndarray)) self.assertEquals(4, aboutOrigin.shape[0]) self.assertAlmostEqual(1., aboutOrigin[0], places=4) self.assertAlmostEqual(mean, aboutOrigin[1], places=4) self.assertTrue(math.fabs(expVar - aboutOrigin[2]) < 0.001 * expVar) self.assertTrue(math.fabs(expSkew - aboutOrigin[3]) < 0.001 * expSkew) aboutMean = Stats.getMomentsAboutMean(indep, depend) self.assertTrue(isinstance(aboutOrigin, numpy.ndarray)) self.assertEquals(4, aboutOrigin.shape[0]) self.assertAlmostEqual(1., aboutMean[0], places=4) self.assertAlmostEqual(0., aboutMean[1], places=4) self.assertTrue( math.fabs(sigma * sigma - aboutMean[2]) < 0.001 * expVar) self.assertTrue(math.fabs(0. - aboutMean[3]) < 0.0001 * expSkew)
def test_getMoments(self): mean = 5. sigma = 4. deltaX = .2 numX = 200 # calculate to have same number of points left and right of function offsetX = mean - (.5 * deltaX * float(numX)) # variance about origin expVar = mean*mean+sigma*sigma; # skew about origin expSkew = mean*mean*mean+3.*mean*sigma*sigma; # x-values to try out indep = numpy.arange(numX, dtype=numpy.float64) indep = indep*deltaX + offsetX # y-values # test different type depend = numpy.arange(numX, dtype=numpy.int32) self.assertRaises(ValueError, Stats.getMomentsAboutOrigin, indep, depend) # now correct y values weightedDiff = (indep-mean)/sigma depend = numpy.exp(-0.5*weightedDiff*weightedDiff)/sigma/math.sqrt(2.*math.pi) aboutOrigin = Stats.getMomentsAboutOrigin(indep, depend) self.assertTrue(isinstance(aboutOrigin, numpy.ndarray)) self.assertEquals(4, aboutOrigin.shape[0]) self.assertAlmostEqual(1., aboutOrigin[0], places=4) self.assertAlmostEqual(mean, aboutOrigin[1], places=4) self.assertTrue(math.fabs(expVar - aboutOrigin[2]) < 0.001*expVar) self.assertTrue(math.fabs(expSkew - aboutOrigin[3]) < 0.001*expSkew) aboutMean = Stats.getMomentsAboutMean(indep, depend) self.assertTrue(isinstance(aboutOrigin, numpy.ndarray)) self.assertEquals(4, aboutOrigin.shape[0]) self.assertAlmostEqual(1., aboutMean[0], places=4) self.assertAlmostEqual(0., aboutMean[1], places=4) self.assertTrue(math.fabs(sigma*sigma - aboutMean[2]) < 0.001*expVar) self.assertTrue(math.fabs(0. - aboutMean[3]) < 0.0001*expSkew)
def PyExec(self): in_ws = mtd[self.getPropertyValue('InputWorkspace')] indices_list = self.getPropertyValue('ColumnIndices') out_ws_name = self.getPropertyValue('OutputWorkspace') column_names = in_ws.getColumnNames() # If column indices are not provided, then default to _ALL_ columns if len(indices_list) > 0: indices_list = [int(x) for x in indices_list.split(',')] else: indices_list = range(len(column_names)) out_ws = ms.CreateEmptyTableWorkspace(OutputWorkspace=out_ws_name) out_ws.addColumn('str', 'Statistic') stats = collections.OrderedDict([ ('StandardDev', collections.OrderedDict()), ('Minimum', collections.OrderedDict()), ('Median', collections.OrderedDict()), ('Maximum', collections.OrderedDict()), ('Mean', collections.OrderedDict()), ]) for index in indices_list: column_name = column_names[index] try: column_data = np.array([float(v) for v in in_ws.column(index)]) col_stats = _stats_to_dict(Stats.getStatistics(column_data)) for stat_name in stats: stats[stat_name][column_name] = col_stats[stat_name] out_ws.addColumn('float', column_name) except RuntimeError: logger.notice('Column \'%s\' is not numerical, skipping' % column_name) except: logger.notice('Column \'%s\' is not numerical, skipping' % column_name) for index, stat_name in iteritems(stats): stat = collections.OrderedDict(stat_name) stat['Statistic'] = index out_ws.addRow(stat) self.setProperty('OutputWorkspace', out_ws)