Пример #1
0
    def test_pearsonr(self):
        "Testing pearsonr"

        data1 = [self.L, self.A]
        data2 = [self.M, self.B]
        results = (0.80208084775070976, 2.1040104471429959e-005)

        i = 0
        for d in data1:
            self.assertEqual(stats.pearsonr(d, data2[i])[i], results[i])
            i += 1
Пример #2
0
 def test_pearsonr(self):
     "Testing pearsonr"
     
     data1 = [ self.L, self.A ]
     data2 = [ self.M, self.B ]
     results = (0.80208084775070976, 2.1040104471429959e-005)
     
     i = 0
     for d in data1:
        self.assertEqual( stats.pearsonr( d, data2[i] )[i], results[i] )
        i += 1
Пример #3
0
m = list(range(4, 24))
m[10] = 34
b = N.array(m)

pb = [0] * 9 + [1] * 11
apb = N.array(pb)

print('paired:')
#stats.paired(l,m)
#stats.paired(a,b)

print()
print()
print('pearsonr:')
print(stats.pearsonr(l, m))
print(stats.pearsonr(a, b))
print('spearmanr:')
print(stats.spearmanr(l, m))
print(stats.spearmanr(a, b))
print('pointbiserialr:')
print(stats.pointbiserialr(pb, l))
print(stats.pointbiserialr(apb, a))
print('kendalltau:')
print(stats.kendalltau(l, m))
print(stats.kendalltau(a, b))
print('linregress:')
print(stats.linregress(l, m))
print(stats.linregress(a, b))

print('\nINFERENTIAL')
 def evaluate( self, *args, **params):
     return _stats.pearsonr(*args, **params)
Пример #5
0
m = range(4,24)
m[10] = 34 
b = N.array(m)

pb = [0]*9 + [1]*11
apb = N.array(pb)

print 'paired:'
#stats.paired(l,m)
#stats.paired(a,b)

print
print
print 'pearsonr:'
print stats.pearsonr(l,m)
print stats.pearsonr(a,b)
print 'spearmanr:'
print stats.spearmanr(l,m)
print stats.spearmanr(a,b)
print 'pointbiserialr:'
print stats.pointbiserialr(pb,l)
print stats.pointbiserialr(apb,a)
print 'kendalltau:'
print stats.kendalltau(l,m)
print stats.kendalltau(a,b)
print 'linregress:'
print stats.linregress(l,m)
print stats.linregress(a,b)

print '\nINFERENTIAL'
SNP2 = {}

for line in fileinput.input([File2]):
	rowlist = []
        rowlist = (line.rstrip("\n")).split('\t')

        if line.startswith("#") or rowlist[0] == "chrX":
                continue
        else:
        	if rowlist[0]+"\t"+rowlist[1] in SNP1 and int(rowlist[4])+int(rowlist[6]) >= 30:
                	SNP2[rowlist[0]+"\t"+rowlist[1]] = 0.0
                        SNP2[rowlist[0]+"\t"+rowlist[1]] = float(rowlist[4])/float(int(rowlist[4])+int(rowlist[6]))

Array1 = []
Array2 = []

from statlib import stats
reload(stats)

for snp in SNP2:
	output.write(str(SNP2[snp])+"\t"+str(SNP1[snp])+"\n")
	Array2.append(SNP2[snp])
	Array1.append(SNP1[snp])

	
print pearson(Array1,Array2)
print stats.pearsonr(Array1,Array2)
print len(Array2)

output.close()
Пример #7
0
stats.attest_ind(array(a), array(b), 0, 1)

print '\n\nRelated Samples t-test'

before = [11, 16, 20, 17, 10]
after = [8, 11, 15, 11, 11]
print '\n\nSHOULD BE t=+2.88, 0.01<p<0.05 (df=4) ... Basic Stats 1st ed, p.359'
stats.ttest_rel(before, after, 1, 'Before', 'After')
stats.attest_rel(array(before), array(after), 1, 'Before', 'After')

print "\n\nPearson's r"

x = [0, 0, 1, 1, 1, 2, 2, 3, 3, 4]
y = [8, 7, 7, 6, 5, 4, 4, 4, 2, 0]
print 'SHOULD BE -0.94535 (N=10) ... Basic Stats 1st ed, p.190'
print stats.pearsonr(x, y)
print stats.apearsonr(array(x), array(y))

print "\n\nSpearman's r"

x = [4, 1, 9, 8, 3, 5, 6, 2, 7]
y = [3, 2, 8, 6, 5, 4, 7, 1, 9]
print '\nSHOULD BE +0.85 on the dot (N=9) ... Basic Stats 1st ed, p.193'
print stats.spearmanr(x, y)
print stats.aspearmanr(array(x), array(y))

print '\n\nPoint-Biserial r'

gender = [1, 1, 1, 1, 2, 2, 2, 2, 2, 2]
score = [35, 38, 41, 40, 60, 65, 65, 68, 68, 64]
print '\nSHOULD BE +0.981257 (N=10) ... Basic Stats 1st ed, p.197'
Пример #8
0
	def plot(self,init=None):
		"""Interactive plotting of parsing features."""
		
		if not init:
			init=self
			init.plotstats={}
			init.unitfeats=self._getFeatValDict()
			
			sels=[]
			for k,v in sorted(init.unitfeats.items()):
				for kk,vv in v.items():
					sels.append((k,kk))
			
			conditions={'x':[],'y':[]}
			targets={'x':[],'y':[]}
			pkey={'x':[],'y':[]}
			
			print str(0)+"\t[no condition]"
			for selnum in range(len(sels)):
				print str(selnum+1)+"\t"+str(sels[selnum][0])+" = "+str(sels[selnum][1])
			print
			
			stepnum=0
			for a in sorted(conditions.keys()):
				stepnum+=1
				sel=raw_input(">> [step "+str(stepnum)+"/4] ["+a+" coord] [conditions of population] please type in the number (or numbers separated by commas)\n\tof the conditions determining the total population from which the percentage of the sample is taken:\n").strip()
				for x in sel.split(","):
					try:
						conditions[a].append(sels[int(x)-1])
						pkey[a]+=["of_"+str(sels[int(x)-1][0])+"_is_"+str(sels[int(x)-1][1])]
					except:
						pass
						
				stepnum+=1
				sel=raw_input(">> [step "+str(stepnum)+"/4] ["+a+" coord] [conditions of sample] please type in the number (or numbers separated by commas)\n\tof the conditions determining the sample:\n").strip()
				for x in sel.split(","):
					try:
						targets[a].append(sels[int(x)-1])
						pkey[a]+=[str(sels[int(x)-1][0])+"_is_"+str(sels[int(x)-1][1])]
					except:
						pass
				
			
			# print ">> POPULATION:"
			# print conditions
			# 
			# print ">> SAMPLE:"
			# print targets

			init.population=conditions
			init.sample=targets
			
			init.pkey="X_"+"-".join(pkey['x'])+"."+"Y_"+"-".join(pkey['y'])
			print ">> plotting: "+ init.pkey
			
		
		if not hasattr(self,'bestparses'):
			for child in self.children:
				child.plot(init)
		else:
			posdict={}
			#if not hasattr(self,'minparselen'): return None
			for posnum in range(self.minparselen):
				posdict[posnum]={'x':[],'y':[]}
				for parse in self.__bestparses:
					posfeats=parse.positions[posnum].posfeats()
					
					for a in ['x','y']:
						conds=init.population[a]
						targs=init.sample[a]
						
						condsHold=True
						if len(conds):							
							for cond in conds:
								condK=cond[0]
								condV=cond[1]
							
								try:
									if posfeats[condK]==condV:
										continue
								except:
									condsHold=False
									break
								condsHold=False
								break
						
						if condsHold:
							targsHold=True
							for targ in targs:
								targK=targ[0]
								targV=targ[1]

								try:
									if posfeats[targK]==targV:
										continue
								except:
									targsHold=False
									break
								targsHold=False
								break
							
							if targsHold:
								posdict[posnum][a].append(1)
							else:
								posdict[posnum][a].append(0)
			
			for a in ['x','y']:
				if not posdict[posnum][a]:
					print "<< not enough data: position number ("+str(posnum)+") empty on dimension ["+str(a)+"]"
					return None

			init.plotstats[self.getName()]=posdict
			if (self!=init): return None

		totalstrs=[]
		totaltsvs=[]
		for textname,posdict in sorted(init.plotstats.items()):
			
			tsv="posnum\tx_mean\ty_mean\tx_std\ty_std\n"
			xs=[]
			ys=[]
			for posnum,xydict in posdict.items():
				x_avg,x_std=mean_stdev(xydict['x'])
				y_avg,y_std=mean_stdev(xydict['y'])
				
				xs.append(x_avg)
				ys.append(y_avg)
				tsv+="\t".join(str(bb) for bb in [(posnum+1),x_avg,y_avg,x_std,y_std])+"\n"
			
			
			ccmsg=""
			cc=None
			p=None
			try:
				from statlib import stats
				(cc,p)=stats.pearsonr(xs,ys)
				
				aa=makeminlength("    correlation coefficient: ",int(being.linelen/1.4))+str(cc)
				bb=makeminlength("    p-value: ",int(being.linelen/1.4))+str(p)
				
				tsv+=aa.strip().replace(":",":\t")+"\n"
				tsv+=bb.strip().replace(":",":\t")+"\n"
				
				for l in tsv.split("\n"):
					totaltsvs.append(textname+"_"+l)
				
				ccmsg+=aa+"\n"+bb+"\n"
				
			except:
				pass
			
			writeToFile(textname,init.pkey,tsv,extension="tsv")
			totaltsvs.append(tsv)
			
			try:
				strtowrite=self.makeBubbleChart(posdict,".".join([textname,init.pkey]),(cc,p))
				totalstrs+=[strtowrite]
				writeToFile(textname,init.pkey,self._getBubbleHeader()+strtowrite+self._getBubbleFooter(),extension="htm")
			except:
				pass
			
			if ccmsg:
				print ccmsg
		
		if not self.classname()=="Corpus": return None
		writeToFile(self.getName(),
			init.pkey,
			self._getBubbleHeader()+"\n<br/><br/><br/><br/><br/><br/><br/><br/>\n".join(totalstrs)+self._getBubbleFooter(),
			iscorpus=True,
			extension="htm")
		writeToFile(self.getName(),init.pkey,"\n\n\n\n".join(totaltsvs),iscorpus=True,extension="tsv")
Пример #9
0
for line in fileinput.input([File2]):
    rowlist = []
    rowlist = (line.rstrip("\n")).split('\t')

    if line.startswith("#") or rowlist[0] == "chrX":
        continue
    else:
        if rowlist[0] + "\t" + rowlist[1] in SNP1 and int(rowlist[4]) + int(
                rowlist[6]) >= 30:
            SNP2[rowlist[0] + "\t" + rowlist[1]] = 0.0
            SNP2[rowlist[0] + "\t" + rowlist[1]] = float(
                rowlist[4]) / float(int(rowlist[4]) + int(rowlist[6]))

Array1 = []
Array2 = []

from statlib import stats
reload(stats)

for snp in SNP2:
    output.write(str(SNP2[snp]) + "\t" + str(SNP1[snp]) + "\n")
    Array2.append(SNP2[snp])
    Array1.append(SNP1[snp])

print pearson(Array1, Array2)
print stats.pearsonr(Array1, Array2)
print len(Array2)

output.close()
Пример #10
0
	def plot(self,init=None):
		"""Interactive plotting of parsing features."""

		if not init:
			init=self
			init.plotstats={}
			init.unitfeats=self._getFeatValDict()

			sels=[]
			for k,v in sorted(init.unitfeats.items()):
				for kk,vv in v.items():
					sels.append((k,kk))

			conditions={'x':[],'y':[]}
			targets={'x':[],'y':[]}
			pkey={'x':[],'y':[]}

			print str(0)+"\t[no condition]"
			for selnum in range(len(sels)):
				print str(selnum+1)+"\t"+str(sels[selnum][0])+" = "+str(sels[selnum][1])
			print

			stepnum=0
			for a in sorted(conditions.keys()):
				stepnum+=1
				sel=raw_input(">> [step "+str(stepnum)+"/4] ["+a+" coord] [conditions of population] please type in the number (or numbers separated by commas)\n\tof the conditions determining the total population from which the percentage of the sample is taken:\n").strip()
				for x in sel.split(","):
					try:
						conditions[a].append(sels[int(x)-1])
						pkey[a]+=["of_"+str(sels[int(x)-1][0])+"_is_"+str(sels[int(x)-1][1])]
					except:
						pass

				stepnum+=1
				sel=raw_input(">> [step "+str(stepnum)+"/4] ["+a+" coord] [conditions of sample] please type in the number (or numbers separated by commas)\n\tof the conditions determining the sample:\n").strip()
				for x in sel.split(","):
					try:
						targets[a].append(sels[int(x)-1])
						pkey[a]+=[str(sels[int(x)-1][0])+"_is_"+str(sels[int(x)-1][1])]
					except:
						pass


			# print ">> POPULATION:"
			# print conditions
			#
			# print ">> SAMPLE:"
			# print targets

			init.population=conditions
			init.sample=targets

			init.pkey="X_"+"-".join(pkey['x'])+"."+"Y_"+"-".join(pkey['y'])
			print ">> plotting: "+ init.pkey


		if not hasattr(self,'bestParses'):
			for child in self.children:
				child.plot(init)
		else:
			posdict={}
			minparselen = min([len(parse.positions) for parse in self.bestParses()])
			maxparselen = max([len(parse.positions) for parse in self.bestParses()])
			#if not hasattr(self,'minparselen'): return None
			for posnum in range(maxparselen):
				posdict[posnum]={'x':[],'y':[]}
				for parse in self.bestParses():
					try:
						posfeats=parse.positions[posnum].posfeats()
					except IndexError:
						# there is no position number `posnum` in this parse `parse`
						continue

					for a in ['x','y']:
						conds=init.population[a]
						targs=init.sample[a]

						condsHold=True
						if len(conds):
							for cond in conds:
								condK=cond[0]
								condV=cond[1]

								try:
									if posfeats[condK]==condV:
										continue
								except:
									condsHold=False
									break
								condsHold=False
								break

						if condsHold:
							targsHold=True
							for targ in targs:
								targK=targ[0]
								targV=targ[1]

								try:
									if posfeats[targK]==targV:
										continue
								except:
									targsHold=False
									break
								targsHold=False
								break

							if targsHold:
								posdict[posnum][a].append(1)
							else:
								posdict[posnum][a].append(0)

			for a in ['x','y']:
				if not posdict[posnum][a]:
					print "<< not enough data: position number ("+str(posnum)+") empty on dimension ["+str(a)+"]"
					return None

			init.plotstats[self.getName()]=posdict
			if (self!=init): return None

		totalstrs=[]
		totaltsvs=[]
		for textname,posdict in sorted(init.plotstats.items()):

			tsv="posnum\tnumobs\tx_mean\ty_mean\tx_std\ty_std\n"
			xs=[]
			ys=[]
			for posnum,xydict in posdict.items():
				x_avg,x_std=mean_stdev(xydict['x'])
				y_avg,y_std=mean_stdev(xydict['y'])

				assert len(xydict['x'])==len(xydict['y'])

				xs.append(x_avg)
				ys.append(y_avg)
				tsv+="\t".join(str(bb) for bb in [(posnum+1),len(xydict['x']),x_avg,y_avg,x_std,y_std])+"\n"


			ccmsg=""
			cc=None
			p=None
			try:
				from statlib import stats
				(cc,p)=stats.pearsonr(xs,ys)

				aa=makeminlength("    correlation coefficient: ",int(being.linelen/1.4))+str(cc)
				bb=makeminlength("    p-value: ",int(being.linelen/1.4))+str(p)

				tsv+=aa.strip().replace(":",":\t")+"\n"
				tsv+=bb.strip().replace(":",":\t")+"\n"

				for l in tsv.split("\n"):
					totaltsvs.append(textname+"_"+l)

				ccmsg+=aa+"\n"+bb+"\n"

			except:
				pass

			writeToFile(textname,init.pkey,tsv,extension="tsv")
			totaltsvs.append(tsv)

			"""
			try:
				strtowrite=self.makeBubbleChart(posdict,".".join([textname,init.pkey]),(cc,p))
				totalstrs+=[strtowrite]
				writeToFile(textname,init.pkey,self._getBubbleHeader()+strtowrite+self._getBubbleFooter(),extension="htm")
			except:
				pass
			"""

			if ccmsg:
				print ccmsg

		if not self.classname()=="Corpus": return None

		"""
		writeToFile(self.getName(),
			init.pkey,
			self._getBubbleHeader()+"\n<br/><br/><br/><br/><br/><br/><br/><br/>\n".join(totalstrs)+self._getBubbleFooter(),
			iscorpus=True,
			extension="htm")
		"""

		writeToFile(self.getName(),init.pkey,"\n\n\n\n".join(totaltsvs),iscorpus=True,extension="tsv")
Пример #11
0
print '\n\nRelated Samples t-test'

before = [11,16,20,17,10]
after = [8,11,15,11,11]
print '\n\nSHOULD BE t=+2.88, 0.01<p<0.05 (df=4) ... Basic Stats 1st ed, p.359'
stats.ttest_rel(before,after,1,'Before','After')
stats.attest_rel(array(before),array(after),1,'Before','After')


print "\n\nPearson's r"

x = [0,0,1,1,1,2,2,3,3,4]
y = [8,7,7,6,5,4,4,4,2,0]
print 'SHOULD BE -0.94535 (N=10) ... Basic Stats 1st ed, p.190'
print stats.pearsonr(x,y)
print stats.apearsonr(array(x),array(y))


print "\n\nSpearman's r"

x = [4,1,9,8,3,5,6,2,7]
y = [3,2,8,6,5,4,7,1,9]
print '\nSHOULD BE +0.85 on the dot (N=9) ... Basic Stats 1st ed, p.193'
print stats.spearmanr(x,y)
print stats.aspearmanr(array(x),array(y))


print '\n\nPoint-Biserial r'

gender = [1,1,1,1,2,2,2,2,2,2]
Пример #12
0
m = range(4, 24)
m[10] = 34
b = N.array(m)

pb = [0] * 9 + [1] * 11
apb = N.array(pb)

print 'paired:'
#stats.paired(l,m)
#stats.paired(a,b)

print
print
print 'pearsonr:'
print stats.pearsonr(l, m)
print stats.pearsonr(a, b)
print 'spearmanr:'
print stats.spearmanr(l, m)
print stats.spearmanr(a, b)
print 'pointbiserialr:'
print stats.pointbiserialr(pb, l)
print stats.pointbiserialr(apb, a)
print 'kendalltau:'
print stats.kendalltau(l, m)
print stats.kendalltau(a, b)
print 'linregress:'
print stats.linregress(l, m)
print stats.linregress(a, b)

print '\nINFERENTIAL'
Пример #13
0
print('\n\nRelated Samples t-test')

before = [11,16,20,17,10]
after = [8,11,15,11,11]
print('\n\nSHOULD BE t=+2.88, 0.01<p<0.05 (df=4) ... Basic Stats 1st ed, p.359')
stats.ttest_rel(before,after,1,'Before','After')
stats.attest_rel(array(before),array(after),1,'Before','After')


print("\n\nPearson's r")

x = [0,0,1,1,1,2,2,3,3,4]
y = [8,7,7,6,5,4,4,4,2,0]
print('SHOULD BE -0.94535 (N=10) ... Basic Stats 1st ed, p.190')
print(stats.pearsonr(x,y))
print(stats.apearsonr(array(x),array(y)))


print("\n\nSpearman's r")

x = [4,1,9,8,3,5,6,2,7]
y = [3,2,8,6,5,4,7,1,9]
print('\nSHOULD BE +0.85 on the dot (N=9) ... Basic Stats 1st ed, p.193')
print(stats.spearmanr(x,y))
print(stats.aspearmanr(array(x),array(y)))


print('\n\nPoint-Biserial r')

gender = [1,1,1,1,2,2,2,2,2,2]