def lesk(context,word): splited_context = context.split(" ") stemmed_context =[] for item in splited_context: if item.strip()!='': temp = stemm.findstem(item) print "stemmed=",temp stemmed_context.append(temp) #stemmed_context=sthree_stemm.malayalam_stemmer(stemmed_context) flag = False for item in stemmed_context: print "item=",item,"ddddd=",word.strip() if item == word.strip(): flag= True break if flag!=True: return flag wordnet_obj = wordnet.mysqldbwordnet() definitions = wordnet_obj.getDefinitions(word) rank_list = [] ranks = [] for item in definitions: definition =item[0].split(" ") stemmed_def =[] for item1 in definition: if item1.strip()!='': temp = stemm.findstem(item1) print "stemmed _definition:",temp stemmed_def.append(temp) #print "###############################" #stemmed_def=sthree_stemm.malayalam_stemmer(stemmed_def) rank_count = 0 for item1 in stemmed_context: if (item1 in stemmed_def) and (item1 != word): rank_count = rank_count + 1 rank_list.append([item[0],rank_count]) if rank_count not in ranks: ranks.append(rank_count) ranks.sort(cmp=None, key=None, reverse=True) selected_definitions = [] first_rank = ranks[0] print "first rank =",first_rank for item in rank_list: #print "eeee=",item[0],"\t",item[1] if item[1] == first_rank: selected_definitions.append(item[0]) return selected_definitions
def stem(): f_in=open('output1.txt','r') words=f_in.read().split('\n') f_out=open('output2.txt','w') for item in words: stem=mal.findstem(item) f_out.write(stem+"\n") f_out.close()
def stem(): f_in=open("1test",'r') words=f_in.read().split('\n') ''' for i in range(len(text)): print text[i].encode('utf-8') ''' f_out=open("test","w") for i in words: stem=mal.findstem(i) f_out.write(stem+"\n") f_out.close()
def stem(): f_in=open('output1.txt','r') words=f_in.read().split('\n') f_out=open('output2.txt','w') for item in words: stem=mal.findstem(item) f_out.write(stem+"\n") f_out.close() f_in2=open("output2.txt","r") inp=f_in2.read().split("\n\n") f_out2=open("output3.txt","w") s1="" c=0 n=len(inp) #print "****",len(inp) for sent in inp: c+=1 if(sent!=s1 and c<(n-2)): #print sent print c f_out2.write(sent+"\n\n") elif sent!=s1 and c==n-2: f_out2.write(sent)
import MalayalamStemmer as mal f_in=open('output1.txt','r') words=f_in.read().split('\n') f_out=open('output2.txt','w') for item in words: stem=mal.findstem(item) f_out.write(stem+"\n") f_out.close()
import sys import MalayalamWordnet import MalayalamStemmer wordnet_object = MalayalamWordnet.mysqldbwordnet() f = open('input.txt','r') out_put_file = open('output.txt','w') input_text = f.read().split('#'); root = MalayalamStemmer.findstem(input_text[0]) row= wordnet_object.getDefinitions(root) #row= wordnet_object.getDefinitions(input_text[0]) '''for item in input_text[1].split(): print item.decode('UTF-8')[len(item.decode('UTF-8'))-4:len(item.decode('UTF-8'))] out_put_file.write(item.decode('UTF-8')+'\t')''' for item in row: print item[0] # sys.path.append('classes/') # import mysqldaccess as m_acc # # # k = m_acc.DbAccess(password='******',user='******') # rows = k.selectDB("SELECT * FROM sense_table LIMIT 0,100", "hi") # for item in rows: # print item[2] # #k.insertDB("CREATE TABLE A (mm char(20))","error on selection")