def test_ttest_ind(self): "Testing ttest_ind" data1 = [self.L, self.A] data2 = [self.M, self.B] results = (-1.8746868717340566, 0.068537696711420654) i = 0 for d in data1: self.assertEqual(stats.ttest_ind(d, data2[i])[i], results[i]) i += 1
def test_ttest_ind(self): "Testing ttest_ind" data1 = [ self.L, self.A ] data2 = [ self.M, self.B ] results = (-1.8746868717340566, 0.068537696711420654) i = 0 for d in data1: self.assertEqual( stats.ttest_ind( d, data2[i] )[i], results[i] ) i += 1
print('pointbiserialr:') print(stats.pointbiserialr(pb, l)) print(stats.pointbiserialr(apb, a)) print('kendalltau:') print(stats.kendalltau(l, m)) print(stats.kendalltau(a, b)) print('linregress:') print(stats.linregress(l, m)) print(stats.linregress(a, b)) print('\nINFERENTIAL') print('ttest_1samp:') print(stats.ttest_1samp(l, 12)) print(stats.ttest_1samp(a, 12)) print('ttest_ind:') print(stats.ttest_ind(l, m)) print(stats.ttest_ind(a, b)) print('ttest_rel:') print(stats.ttest_rel(l, m)) print(stats.ttest_rel(a, b)) print('chisquare:') print(stats.chisquare(l)) print(stats.chisquare(a)) print('ks_2samp:') print(stats.ks_2samp(l, m)) print(stats.ks_2samp(a, b)) print('mannwhitneyu:') print(stats.mannwhitneyu(l, m)) print(stats.mannwhitneyu(a, b)) print('ranksums:')
print 'pointbiserialr:' print stats.pointbiserialr(pb,l) print stats.pointbiserialr(apb,a) print 'kendalltau:' print stats.kendalltau(l,m) print stats.kendalltau(a,b) print 'linregress:' print stats.linregress(l,m) print stats.linregress(a,b) print '\nINFERENTIAL' print 'ttest_1samp:' print stats.ttest_1samp(l,12) print stats.ttest_1samp(a,12) print 'ttest_ind:' print stats.ttest_ind(l,m) print stats.ttest_ind(a,b) print 'ttest_rel:' print stats.ttest_rel(l,m) print stats.ttest_rel(a,b) print 'chisquare:' print stats.chisquare(l) print stats.chisquare(a) print 'ks_2samp:' print stats.ks_2samp(l,m) print stats.ks_2samp(a,b) print 'mannwhitneyu:' print stats.mannwhitneyu(l,m) print stats.mannwhitneyu(a,b) print 'ranksums:'
def __parsePassage(self): tokenize_sent = PunktSentenceTokenizer() #Sentence tokenizer tokenize_word = PunktWordTokenizer() #Word Tokenizer sentences = tokenize_sent.tokenize(self.corpus) #tokenize passage into sentences pos_corpus = [] neg_corpus = [] self.pos_n = 0 self.neg_n = 0 for sentence in sentences: #print sentence sentence_scores = [] pos_tally = [] neg_tally = [] sent_pos_n = 0 sent_neg_n = 0 flip = False for word_tag in self.brill_tagger.tag(tokenize_word.tokenize(sentence)): #for word_tag in tokenize_word.tokenize(sentence): pos_score, neg_score = self.__scorePassage(word_tag[0], word_tag[1]) if flip: #switch negative and positive scores sentence_scores.append([neg_score, pos_score]) else: sentence_scores.append([pos_score, neg_score]) if word_tag[0] in self.negations: #from now on flip scores if flip: flip = False else: flip = True for score in sentence_scores: if score[0] != None: pos_tally.append(score[0]) pos_corpus.append(score[0]) if score[1] != None: neg_tally.append(score[1]) neg_corpus.append(score[1]) if score[0] > score[1]: sent_pos_n = sent_pos_n + 1 self.pos_n = self.pos_n + 1 elif score[0] < score[1]: sent_neg_n = sent_neg_n + 1 self.neg_n = self.neg_n + 1 try: #TTest_Ind calculates our scores and probability of make an error in 5% of cases sen_t_score, sen_t_prob = stats.ttest_ind(pos_tally, neg_tally) except: #A zero division error sen_t_score, sen_t_prob = 0, 0 try: sent_pos_mean = stats.mean(pos_tally) except: sent_pos_mean = 0 try: sent_neg_mean = stats.mean(neg_tally) except: sent_neg_mean = 0 self.scoredPassage.append({'sentence':sentence, 'pos_mean':sent_pos_mean, 'neg_mean':sent_neg_mean, 'pos_n':sent_pos_n, 'neg_n':sent_neg_n, 't_score':sen_t_score , 't_prob':sen_t_prob}) #append the sentence and its scores #Calculate the T-Score self.pos_mean = stats.mean(pos_corpus) self.neg_mean = stats.mean(neg_corpus) try: self.t_score, self.t_prob = stats.ttest_ind(pos_corpus, neg_corpus) except: #A zero division error self.t_score, self.t_prob = 0, 0 print "Finished Parsing and scoring"
reload(pstat) import statshelp print '\n\nSingle Sample t-test' x = [50, 75, 65, 72, 68, 65, 73, 59, 64] print 'SHOULD BE ... t=-3.61, p<0.01 (df=8) ... Basic Stats 1st ed, p.307' stats.ttest_1samp(x, 75, 1) stats.attest_1samp(array(x), 75, 1) print '\n\nIndependent Samples t-test' a = [11, 16, 20, 17, 10, 12] b = [8, 11, 15, 11, 11, 12, 11, 7] print '\n\nSHOULD BE ??? <p< (df=) ... ' stats.ttest_ind(a, b, 1) stats.attest_ind(array(a), array(b), 0, 1) print '\n\nRelated Samples t-test' before = [11, 16, 20, 17, 10] after = [8, 11, 15, 11, 11] print '\n\nSHOULD BE t=+2.88, 0.01<p<0.05 (df=4) ... Basic Stats 1st ed, p.359' stats.ttest_rel(before, after, 1, 'Before', 'After') stats.attest_rel(array(before), array(after), 1, 'Before', 'After') print "\n\nPearson's r" x = [0, 0, 1, 1, 1, 2, 2, 3, 3, 4] y = [8, 7, 7, 6, 5, 4, 4, 4, 2, 0] print 'SHOULD BE -0.94535 (N=10) ... Basic Stats 1st ed, p.190'
def evaluate( self, *args, **params): return _stats.ttest_ind(*args, **params)
import statshelp print '\n\nSingle Sample t-test' x = [50,75,65,72,68,65,73,59,64] print 'SHOULD BE ... t=-3.61, p<0.01 (df=8) ... Basic Stats 1st ed, p.307' stats.ttest_1samp(x,75,1) stats.attest_1samp(array(x),75,1) print '\n\nIndependent Samples t-test' a = [11,16,20,17,10,12] b = [8,11,15,11,11,12,11,7] print '\n\nSHOULD BE ??? <p< (df=) ... ' stats.ttest_ind(a,b,1) stats.attest_ind(array(a),array(b),0,1) print '\n\nRelated Samples t-test' before = [11,16,20,17,10] after = [8,11,15,11,11] print '\n\nSHOULD BE t=+2.88, 0.01<p<0.05 (df=4) ... Basic Stats 1st ed, p.359' stats.ttest_rel(before,after,1,'Before','After') stats.attest_rel(array(before),array(after),1,'Before','After') print "\n\nPearson's r" x = [0,0,1,1,1,2,2,3,3,4]
print 'pointbiserialr:' print stats.pointbiserialr(pb, l) print stats.pointbiserialr(apb, a) print 'kendalltau:' print stats.kendalltau(l, m) print stats.kendalltau(a, b) print 'linregress:' print stats.linregress(l, m) print stats.linregress(a, b) print '\nINFERENTIAL' print 'ttest_1samp:' print stats.ttest_1samp(l, 12) print stats.ttest_1samp(a, 12) print 'ttest_ind:' print stats.ttest_ind(l, m) print stats.ttest_ind(a, b) print 'ttest_rel:' print stats.ttest_rel(l, m) print stats.ttest_rel(a, b) print 'chisquare:' print stats.chisquare(l) print stats.chisquare(a) print 'ks_2samp:' print stats.ks_2samp(l, m) print stats.ks_2samp(a, b) print 'mannwhitneyu:' print stats.mannwhitneyu(l, m) print stats.mannwhitneyu(a, b) print 'ranksums:'
def __parsePassage(self): tokenize_sent = PunktSentenceTokenizer() #Sentence tokenizer tokenize_word = PunktWordTokenizer() #Word Tokenizer sentences = tokenize_sent.tokenize( self.corpus) #tokenize passage into sentences pos_corpus = [] neg_corpus = [] self.pos_n = 0 self.neg_n = 0 for sentence in sentences: #print sentence sentence_scores = [] pos_tally = [] neg_tally = [] sent_pos_n = 0 sent_neg_n = 0 flip = False for word_tag in self.brill_tagger.tag( tokenize_word.tokenize(sentence) ): #for word_tag in tokenize_word.tokenize(sentence): pos_score, neg_score = self.__scorePassage( word_tag[0], word_tag[1]) if flip: #switch negative and positive scores sentence_scores.append([neg_score, pos_score]) else: sentence_scores.append([pos_score, neg_score]) if word_tag[0] in self.negations: #from now on flip scores if flip: flip = False else: flip = True for score in sentence_scores: if score[0] != None: pos_tally.append(score[0]) pos_corpus.append(score[0]) if score[1] != None: neg_tally.append(score[1]) neg_corpus.append(score[1]) if score[0] > score[1]: sent_pos_n = sent_pos_n + 1 self.pos_n = self.pos_n + 1 elif score[0] < score[1]: sent_neg_n = sent_neg_n + 1 self.neg_n = self.neg_n + 1 try: #TTest_Ind calculates our scores and probability of make an error in 5% of cases sen_t_score, sen_t_prob = stats.ttest_ind(pos_tally, neg_tally) except: #A zero division error sen_t_score, sen_t_prob = 0, 0 try: sent_pos_mean = stats.mean(pos_tally) except: sent_pos_mean = 0 try: sent_neg_mean = stats.mean(neg_tally) except: sent_neg_mean = 0 self.scoredPassage.append({ 'sentence': sentence, 'pos_mean': sent_pos_mean, 'neg_mean': sent_neg_mean, 'pos_n': sent_pos_n, 'neg_n': sent_neg_n, 't_score': sen_t_score, 't_prob': sen_t_prob }) #append the sentence and its scores #Calculate the T-Score self.pos_mean = stats.mean(pos_corpus) self.neg_mean = stats.mean(neg_corpus) try: self.t_score, self.t_prob = stats.ttest_ind(pos_corpus, neg_corpus) except: #A zero division error self.t_score, self.t_prob = 0, 0 print "Finished Parsing and scoring"