def main(args): point2centroid = defaultdict(list) point2centroidfinal = defaultdict(list) point2centroidreject = defaultdict(list) point2centroid = args for centroid in point2centroid: # print 'centroid',centroid centroidX, centroidY = centroid.split(',') distlist = finallist = pdevlist = rejectlist = [] newX = newY = sumX = sumY = count = ctrrej = 0 for point in point2centroid[centroid]: #print point pointX, pointY = point.split(',') #datapoint += pointX + ' ' + pointY +'\n' dist = (((int(centroidX) - int(pointX))**2) + ((int(centroidY) - int(pointY))**2))**.5 distlist.append(dist) pmed = stats.medianscore(distlist) #print 'pmed',pmed for p in distlist: pdev = abs(p - pmed) pdevlist.append(pdev) med_pdev = stats.medianscore(pdevlist) #print "med_pdev", med_pdev for p in distlist: if med_pdev > 0: test_stat = abs(p - pmed) / med_pdev #print test_stat if test_stat < 2: finallist.append(p) else: ctrrej += 1 rejectlist.append(p) for point in point2centroid[centroid]: # print point pointX, pointY = point.split(',') dist = (((int(centroidX) - int(pointX))**2) + ((int(centroidY) - int(pointY))**2))**.5 if dist in rejectlist and dist > 100: point2centroidreject[centroid].append(point) else: point2centroidfinal[centroid].append(point) sumX += int(pointX) sumY += int(pointY) count += 1 newX = sumX / count newY = sumY / count newCentroid = newX + ',' + newY # print newCentroid point2centroidfinal[newCentroid] = point2centroidfinal[ centroid] del point2centroidfinal[centroid] clustering.main(point2centroidfinal)
def test_medianscore(self): "Testing medianscore" # data of even lenghts data1 = [self.L, self.LF, self.A, self.AF] for d in data1: self.EQ(stats.medianscore(d), 10.5) # data with odd lenghts L2 = self.L + [20] A2 = num_array(L2) data2 = [L2, A2] for d in data2: self.EQ(stats.medianscore(d), 11)
def test_medianscore(self): "Testing medianscore" # data of even lenghts data1 = [ self.L, self.LF, self.A, self.AF ] for d in data1 : self.EQ( stats.medianscore( d ), 10.5 ) # data with odd lenghts L2 = self.L + [ 20 ] A2 = num_array( L2 ) data2 = [ L2, A2 ] for d in data2: self.EQ( stats.medianscore( d ), 11 )
def _format_ratings(self, output): ratings = [] for result in self.data: try: ratings.append( float(result[0].toPython()) ) except ValueError: pass output['results'] = {} if ratings: output['results']['median'] = stats.medianscore(ratings) output['results']['mode'] = stats.mode(ratings) output['results']['mean'] = stats.mean(ratings) output['results']['histogram'] = stats.histogram(ratings,6) output['results']['cumfreq'] = stats.cumfreq(ratings,6) output['results']['count'] = len(ratings) return output
lf = list(range(1, 21)) lf[2] = 3.0 a = N.array(l) af = N.array(lf) ll = [l] * 5 aa = N.array(ll) print('\nCENTRAL TENDENCY') print('geometricmean:', stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af)) print('harmonicmean:', stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af)) print('mean:', stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af)) print('median:', stats.median(l), stats.median(lf), stats.median(a), stats.median(af)) print('medianscore:', stats.medianscore(l), stats.medianscore(lf), stats.medianscore(a), stats.medianscore(af)) print('mode:', stats.mode(l), stats.mode(a)) print('\nMOMENTS') print('moment:', stats.moment(l), stats.moment(lf), stats.moment(a), stats.moment(af)) print('variation:', stats.variation(l), stats.variation(a), stats.variation(lf), stats.variation(af)) print('skew:', stats.skew(l), stats.skew(lf), stats.skew(a), stats.skew(af)) print('kurtosis:', stats.kurtosis(l), stats.kurtosis(lf), stats.kurtosis(a), stats.kurtosis(af)) print('tmean:', stats.tmean(a, (5, 17)), stats.tmean(af, (5, 17))) print('tvar:', stats.tvar(a, (5, 17)), stats.tvar(af, (5, 17))) print('tstdev:', stats.tstdev(a, (5, 17)), stats.tstdev(af, (5, 17))) print('tsem:', stats.tsem(a, (5, 17)), stats.tsem(af, (5, 17)))
N=numpy l = range(1,21) lf = range(1,21) lf[2] = 3.0 a = N.array(l) af = N.array(lf) ll = [l]*5 aa = N.array(ll) print '\nCENTRAL TENDENCY' print 'geometricmean:',stats.geometricmean(l), stats.geometricmean(lf), stats.geometricmean(a), stats.geometricmean(af) print 'harmonicmean:',stats.harmonicmean(l), stats.harmonicmean(lf), stats.harmonicmean(a), stats.harmonicmean(af) print 'mean:',stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af) print 'median:',stats.median(l),stats.median(lf),stats.median(a),stats.median(af) print 'medianscore:',stats.medianscore(l),stats.medianscore(lf),stats.medianscore(a),stats.medianscore(af) print 'mode:',stats.mode(l),stats.mode(a) print '\nMOMENTS' print 'moment:',stats.moment(l),stats.moment(lf),stats.moment(a),stats.moment(af) print 'variation:',stats.variation(l),stats.variation(a),stats.variation(lf),stats.variation(af) print 'skew:',stats.skew(l),stats.skew(lf),stats.skew(a),stats.skew(af) print 'kurtosis:',stats.kurtosis(l),stats.kurtosis(lf),stats.kurtosis(a),stats.kurtosis(af) print 'tmean:',stats.tmean(a,(5,17)),stats.tmean(af,(5,17)) print 'tvar:',stats.tvar(a,(5,17)),stats.tvar(af,(5,17)) print 'tstdev:',stats.tstdev(a,(5,17)),stats.tstdev(af,(5,17)) print 'tsem:',stats.tsem(a,(5,17)),stats.tsem(af,(5,17)) print 'describe:' print stats.describe(l) print stats.describe(lf) print stats.describe(a)
def main(args): point2centroid = defaultdict(list) point2centroidfinal = defaultdict(list) point2centroidreject = defaultdict(list) point2centroid = args #point2centroid = [('278,728',['351,467','395,285','611,160','612,409','561,239']), ('778,258',['747,355','699,481','730,207','739,182','870,104','782,161'])] for centroid in point2centroid: # print 'centroid',centroid centroidX, centroidY = centroid.split(',') distlist=finallist=pdevlist=rejectlist=[] newX =newY=sumX=sumY=count=ctrrej=0 for point in point2centroid[centroid]: #print point pointX, pointY = point.split(',') #datapoint += pointX + ' ' + pointY +'\n' dist = (((int(centroidX) - int(pointX))**2) + ((int(centroidY) - int(pointY))**2))**.5 distlist.append(dist) pmed = stats.medianscore(distlist) #print 'pmed',pmed for p in distlist: pdev = abs(p-pmed) pdevlist.append(pdev) med_pdev = stats.medianscore(pdevlist) #print "med_pdev", med_pdev for p in distlist: if med_pdev >0: test_stat = abs(p-pmed)/med_pdev #print test_stat if test_stat<2: finallist.append(p) else: ctrrej +=1 rejectlist.append(p) #print 'distlist', distlist #print 'finallist', finallist #print 'rejectlist', rejectlist #print 'ctrrej', ctrrej for point in point2centroid[centroid]: #print point pointX, pointY = point.split(',') dist = (((int(centroidX) - int(pointX))**2) + ((int(centroidY) - int(pointY))**2))**.5 if dist in rejectlist and dist >100: point2centroidreject[centroid].append(point) else: point2centroidfinal[centroid].append(point) sumX += int(pointX) sumY +=int(pointY) count +=1 newX=sumX/count newY=sumY/count newCentroid = `newX`+','+`newY` #print newCentroid point2centroidfinal[newCentroid]=point2centroidfinal[centroid] del point2centroidfinal[centroid] #print 'final',point2centroidfinal #print 'reject', point2centroidreject clustering.main(point2centroidfinal)
lf = range(1, 21) lf[2] = 3.0 a = N.array(l) af = N.array(lf) ll = [l] * 5 aa = N.array(ll) print '\nCENTRAL TENDENCY' print 'geometricmean:', stats.geometricmean(l), stats.geometricmean( lf), stats.geometricmean(a), stats.geometricmean(af) print 'harmonicmean:', stats.harmonicmean(l), stats.harmonicmean( lf), stats.harmonicmean(a), stats.harmonicmean(af) print 'mean:', stats.mean(l), stats.mean(lf), stats.mean(a), stats.mean(af) print 'median:', stats.median(l), stats.median(lf), stats.median( a), stats.median(af) print 'medianscore:', stats.medianscore(l), stats.medianscore( lf), stats.medianscore(a), stats.medianscore(af) print 'mode:', stats.mode(l), stats.mode(a) print '\nMOMENTS' print 'moment:', stats.moment(l), stats.moment(lf), stats.moment( a), stats.moment(af) print 'variation:', stats.variation(l), stats.variation(a), stats.variation( lf), stats.variation(af) print 'skew:', stats.skew(l), stats.skew(lf), stats.skew(a), stats.skew(af) print 'kurtosis:', stats.kurtosis(l), stats.kurtosis(lf), stats.kurtosis( a), stats.kurtosis(af) print 'tmean:', stats.tmean(a, (5, 17)), stats.tmean(af, (5, 17)) print 'tvar:', stats.tvar(a, (5, 17)), stats.tvar(af, (5, 17)) print 'tstdev:', stats.tstdev(a, (5, 17)), stats.tstdev(af, (5, 17)) print 'tsem:', stats.tsem(a, (5, 17)), stats.tsem(af, (5, 17))
def evaluate(self, *args, **params): return _stats.medianscore(*args, **params)