def doWSD(request): postFn= PostFn() if 'wsdText' in request.POST: wsdText =request.POST['wsdText'] # modify in index.html to send only the sentence instead of whole text else: wsdText = '' if 'wsdWord' in request.POST: wsdWord =request.POST['wsdWord'] else: wsdWord = '' if 'updateKB' in request.POST: updateKB = request.POST['updateKB'] wsdText = wsdText.lower() wsdWord = wsdWord.lower() wsdText = strip_tags(wsdText) wsdText = stemWords(wsdText) wsdWord = stemWords(wsdWord) if(updateKB == "on"): setStatus("Making Query String") queryStr = makeQueryString(wsdText) setStatus("Searching on Yahoo.com") page = googleSearch(queryStr) setStatus("Extracting Links From Result") urlList = extractLinks(page) setStatus("Fetching Contents") contents=fetchSentsFromPages(urlList) setStatus("Parsing Contents") depGraphList=parseContents(contents) setStatus("Updating knowledgebase") postFn.insertToDB(depGraphList) setStatus("Fetching Senses") senseList = postFn.fetchSenses(wsdWord) setStatus("Creating Sense Parse Trees") senseTrees = postFn.createSenseTree(senseList) setStatus("Creating Parse Tree for Input") wsdTextTree = postFn.createWSDTextTree(wsdText) setStatus("Calculating Dep Score") candidateSense1 = postFn.depScore(senseList, wsdText, wsdWord, senseTrees, wsdTextTree) setStatus("Calculating Gloss Score") candidateSense2 = postFn.glossScore(senseList, wsdText, wsdWord , senseTrees, wsdTextTree) if(candidateSense1 == candidateSense2): sense = candidateSense1 else: if(candidateSense1 > candidateSense2): sense = candidateSense2 else: sense = candidateSense1 html = postFn.createMarkup(senseList[sense],wsdText,wsdWord) return HttpResponse(html)
def createSenseTree(self, senseList): ''' create parse tree for all senses in senseList eg: {'conduct': ['institution', 'to', 'business'], 'ROOT': ['created'], 'institution': ['an'], 'created': ['institution', 'conduct']} ''' senseDict = [] depParsed = parseSenses(senseList) for dep in depParsed: temp = defaultdict( list ) for n ,v in dep: n = stemWords(n) v = stemWords(v) temp[n].append(v) senseDict.append(temp) return senseDict
def parseSenses(senseList): typedDep="" depGraphList=[] tempList=[] str1=[] str1.append('. '.join(senseList)) # to make the whole list into a single item # otherwise the parser need to be initialised many times. str1[0] = stemWords(str1[0], rmStopWords= True) parser = StanfordParser ("/home/rohith/stanford-parser") for content in str1: typedDep += parser.parse(content) typedDep =re.sub('[0-9-]+',"",typedDep) # to remove numbers and '-' #need to separate each senses into induvidual lists typedDepList = typedDep.split("\n\n") typedDepList.pop() rx = re.compile("\((.+), (.+)\)") for dep in typedDepList: depGraphList.append(rx.findall(dep)) return depGraphList