def classify(line): log_sum_pos=0.0 log_deno=0.0 # line=raw_input() tuple=tweet_filter.sub("",word_normalize.sub(r'\1\1',sub(r'(\w)\1+\b', r'\1', line.lower()))) #Convert tweet into a list of words feature=list(tuple.split()) for word in range(0,len(feature)-1): bigram=filter_tweet(feature[word])+" "+filter_tweet(feature[word+1]) # Antonym replacer bigram=neg.replace_negations(bigram) temp=0.0 if len(bigram) > 1: unigramList=bigram.split() if bigram in posBigramdic: temp=float(posBigramdic[bigram]) log_sum_pos+=log(float(temp)/(pos_dic_len/total_len)) if temp>0 : log_deno+=log(temp) else : for unigram in unigramList: if unigram in posunigramdic: temp=float(posunigramdic[unigram]) log_sum_pos+=log(float(temp)/(pos_dic_len/total_len)) if temp>0 : log_deno+=log(temp) if bigram in negBigramdic: temp+=float(negBigramdic[bigram]) if temp>0 : log_deno+=log(temp) else : for unigram in unigramList: if unigram in negunigramdic: temp=float(negunigramdic[unigram]) if temp>0 : log_deno+=log(temp) else : if bigram in posunigramdic: temp=float(posunigramdic[bigram]) log_sum_pos+=log(float(temp)/(pos_dic_len/total_len)) if bigram in negunigramdic: temp+=float(negunigramdic[bigram]) if temp>0 : log_deno+=log(temp) #print "\nTemp > 0",word,temp,log_sum_pos #print "VALUES",log_deno,log_sum_pos log_pos_prob=log(0.5) #log_sum=log_sum_pos+log_sum_neg+log_pos_prob log_sum=(log_sum_pos+log_pos_prob)-log_deno print exp(log_sum),line #print log_sum,line if exp(log_sum)>limits.ClassifierThreshold: return '1' else: return '0'
def classify(line): log_sum_pos = 0.0 log_deno = 0.0 # line=raw_input() tuple = tweet_filter.sub( "", word_normalize.sub(r'\1\1', sub(r'(\w)\1+\b', r'\1', line.lower()))) #Convert tweet into a list of words feature = list(tuple.split()) for word in range(0, len(feature) - 1): bigram = filter_tweet(feature[word]) + " " + filter_tweet( feature[word + 1]) # Antonym replacer bigram = neg.replace_negations(bigram) temp = 0.0 if len(bigram) > 1: unigramList = bigram.split() if bigram in posBigramdic: temp = float(posBigramdic[bigram]) log_sum_pos += log(float(temp) / (pos_dic_len / total_len)) if temp > 0: log_deno += log(temp) else: for unigram in unigramList: if unigram in posunigramdic: temp = float(posunigramdic[unigram]) log_sum_pos += log( float(temp) / (pos_dic_len / total_len)) if temp > 0: log_deno += log(temp) if bigram in negBigramdic: temp += float(negBigramdic[bigram]) if temp > 0: log_deno += log(temp) else: for unigram in unigramList: if unigram in negunigramdic: temp = float(negunigramdic[unigram]) if temp > 0: log_deno += log(temp) else: if bigram in posunigramdic: temp = float(posunigramdic[bigram]) log_sum_pos += log(float(temp) / (pos_dic_len / total_len)) if bigram in negunigramdic: temp += float(negunigramdic[bigram]) if temp > 0: log_deno += log(temp) #print "\nTemp > 0",word,temp,log_sum_pos #print "VALUES",log_deno,log_sum_pos log_pos_prob = log(0.5) #log_sum=log_sum_pos+log_sum_neg+log_pos_prob log_sum = (log_sum_pos + log_pos_prob) - log_deno print exp(log_sum), line #print log_sum,line if exp(log_sum) > limits.ClassifierThreshold: return '1' else: return '0'
def classify(line,Company): log_sum_pos=0.0 log_sum_neg=0.0 log_deno=0.0 if Company=="aapl" : posd=aapl_posd negd=aapl_negd elif Company=="bac" : posd=bac_posd negd=bac_negd elif Company=="goog" : posd=goog_posd negd=goog_negd lne="" tweet=tweet_filter.sub("",word_normalize.sub(r'\1\1',sub(r'(\w)\1+\b', r'\1', line.lower()))).split() #twitter hack! for feature in tweet: #print feature #word=feature if dicSpell.has_key(feature): feature=dicSpell[feature] #feature=sp.correct(feature) word=stem.PorterStemmer().stem(feature) if word not in stop: if dicSyn.has_key(word): word=dicSyn[word] lne=lne+" "+word lne = neg.replace_negations(lne) # bi=defaultdict(int) feature=set(list(lne.split())) # for word in range(0,len(feature)-1): # bi[feature[word]+" "+feature[word+1]]+=1 for word in feature: temp=0.0 if word in posd: temp=float(posd[word]) log_sum_pos+=log(float(temp))#/(pos_dic_len/total_len)) #print "Positive",word,log_sum_pos,posd[word],temp if word in negd: temp+=float(negd[word]) log_sum_neg+=log(float(temp)) #print "\nNegative",word,log_sum_pos,negd[word],temp if temp>0 : log_deno+=log(temp) log_sum_pos+=log(2.1) log_sum_neg+=log(1) #print "\nTemp > 0",word,temp,log_sum_pos #print "VALUES",log_deno,log_sum_pos log_pos_prob=log(0.5) log_neg_prob=log(0.5) #log_sum=log_sum_pos+log_sum_neg+log_pos_prob log_sum=(log_sum_pos+log_pos_prob)-log_deno n_log_sum=(log_sum_neg+log_neg_prob)-log_deno difference=exp(log_sum)-exp(n_log_sum) print Company,difference,line #print log_sum,line if difference>=0.1: return 2 else: return 0 #BAD NEGATIVE
def classify(line, Company): log_sum_pos = 0.0 log_sum_neg = 0.0 log_deno = 0.0 if Company == "aapl": posd = aapl_posd negd = aapl_negd elif Company == "bac": posd = bac_posd negd = bac_negd elif Company == "goog": posd = goog_posd negd = goog_negd lne = "" tweet = tweet_filter.sub( "", word_normalize.sub(r'\1\1', sub(r'(\w)\1+\b', r'\1', line.lower()))).split() #twitter hack! for feature in tweet: #print feature #word=feature if dicSpell.has_key(feature): feature = dicSpell[feature] #feature=sp.correct(feature) word = stem.PorterStemmer().stem(feature) if word not in stop: if dicSyn.has_key(word): word = dicSyn[word] lne = lne + " " + word lne = neg.replace_negations(lne) # bi=defaultdict(int) feature = set(list(lne.split())) # for word in range(0,len(feature)-1): # bi[feature[word]+" "+feature[word+1]]+=1 for word in feature: temp = 0.0 if word in posd: temp = float(posd[word]) log_sum_pos += log(float(temp)) #/(pos_dic_len/total_len)) #print "Positive",word,log_sum_pos,posd[word],temp if word in negd: temp += float(negd[word]) log_sum_neg += log(float(temp)) #print "\nNegative",word,log_sum_pos,negd[word],temp if temp > 0: log_deno += log(temp) log_sum_pos += log(2.1) log_sum_neg += log(1) #print "\nTemp > 0",word,temp,log_sum_pos #print "VALUES",log_deno,log_sum_pos log_pos_prob = log(0.5) log_neg_prob = log(0.5) #log_sum=log_sum_pos+log_sum_neg+log_pos_prob log_sum = (log_sum_pos + log_pos_prob) - log_deno n_log_sum = (log_sum_neg + log_neg_prob) - log_deno difference = exp(log_sum) - exp(n_log_sum) print Company, difference, line #print log_sum,line if difference >= 0.1: return 2 else: return 0 #BAD NEGATIVE