sparseVector = SparseVector
#print sentencebgs

Sorted_Senttest = sorted(Sentset)
numConver = NumConver(Sorted_Senttest)
print counts2

#print sentencebgs
#print numeric_sentencebgs
temp2= {}
counts2_numeric = []
for co in counts2:
    for c in co:
        #temp.append((numConver.getNumericFromString((c)),co[c]))
        temp2[numConver.getNumericFromString(c)] = co[c]
        #print co[c]
    counts2_numeric.append(temp2)
    temp2 = {}

 #   print "-----------------------------------------------------------"



print counts2
print counts2_numeric

sent_obj = Sentence([],[],[],[])

#sent_obj.display()
Row_sparse_key = []
Row_sparse_value = []
#print number_of_text

for each_text in counts3:
    countx=0
    sparse_key = []
    sparse_value = []
    #print "Running..."
    logsum = 0

    for ngram in each_text:
        if each_text[ngram] == 0:
            continue
        else:
            sparse_key.append(numConver.getNumericFromString(ngram))
            sparse_value.append(math.log(each_text[ngram]+1))
            #print math.log(each_text[ngram]+1)
        #print "Value of an ngram: \n"
        #print ngram
        templist_for_counts=[]# This is the list for making "value" part of the dictionary to be formed
        templist_for_counts_tfidf=[]
        flag=0
        for each_text2 in counts3:

            templist_for_counts.append(math.log(each_text2[ngram]+1))

            if each_text2[ngram]>0:
                flag=flag+1

        idf= 1/float(flag)
Exemplo n.º 3
0
Row_sparse_key = []
Row_sparse_value = []
#print number_of_text

for each_text in counts3:
    countx = 0
    sparse_key = []
    sparse_value = []
    #print "Running..."
    logsum = 0

    for ngram in each_text:
        if each_text[ngram] == 0:
            continue
        else:
            sparse_key.append(numConver.getNumericFromString(ngram))
            sparse_value.append(math.log(each_text[ngram] + 1))
            #print math.log(each_text[ngram]+1)
        #print "Value of an ngram: \n"
        #print ngram
        templist_for_counts = [
        ]  # This is the list for making "value" part of the dictionary to be formed
        templist_for_counts_tfidf = []
        flag = 0
        for each_text2 in counts3:

            templist_for_counts.append(math.log(each_text2[ngram] + 1))

            if each_text2[ngram] > 0:
                flag = flag + 1
Exemplo n.º 4
0
sparseVector = SparseVector
#print sentencebgs

Sorted_Senttest = sorted(Sentset)
numConver = NumConver(Sorted_Senttest)
print counts2

#print sentencebgs
#print numeric_sentencebgs
temp2 = {}
counts2_numeric = []
for co in counts2:
    for c in co:
        #temp.append((numConver.getNumericFromString((c)),co[c]))
        temp2[numConver.getNumericFromString(c)] = co[c]
        #print co[c]
    counts2_numeric.append(temp2)
    temp2 = {}

#   print "-----------------------------------------------------------"

print counts2
print counts2_numeric

sent_obj = Sentence([], [], [], [])

#sent_obj.display()

#sent_obj.compute_proximity()
#count()