示例#1
0
def doWSD(request):
    postFn= PostFn()
    if 'wsdText' in request.POST:
        wsdText =request.POST['wsdText']  # modify in index.html to send only the sentence instead of whole text
    else:
        wsdText = ''
    if 'wsdWord' in request.POST:
        wsdWord =request.POST['wsdWord']
    else:
        wsdWord = ''
    if 'updateKB' in request.POST:
        updateKB = request.POST['updateKB']
    wsdText = wsdText.lower()
    wsdWord = wsdWord.lower()
    wsdText = strip_tags(wsdText)
    
    wsdText = stemWords(wsdText)
    wsdWord = stemWords(wsdWord)
    if(updateKB == "on"):
        setStatus("Making Query String")
        queryStr = makeQueryString(wsdText)
        setStatus("Searching on Yahoo.com")
        page = googleSearch(queryStr)
        setStatus("Extracting Links From Result")
        urlList = extractLinks(page)
        setStatus("Fetching Contents")
        contents=fetchSentsFromPages(urlList)
        setStatus("Parsing Contents")
        depGraphList=parseContents(contents)
        setStatus("Updating knowledgebase")
        postFn.insertToDB(depGraphList)
    setStatus("Fetching Senses")
    senseList = postFn.fetchSenses(wsdWord)
    setStatus("Creating Sense Parse Trees")
    senseTrees = postFn.createSenseTree(senseList)
    setStatus("Creating Parse Tree for Input")
    wsdTextTree = postFn.createWSDTextTree(wsdText)
    setStatus("Calculating Dep Score")
    candidateSense1 = postFn.depScore(senseList, wsdText, wsdWord, senseTrees, wsdTextTree)
    setStatus("Calculating Gloss Score")
    candidateSense2 = postFn.glossScore(senseList, wsdText, wsdWord , senseTrees, wsdTextTree)
    if(candidateSense1 == candidateSense2):
        sense = candidateSense1
    else:
        if(candidateSense1 > candidateSense2):
            sense = candidateSense2
        else:
            sense = candidateSense1
    html = postFn.createMarkup(senseList[sense],wsdText,wsdWord)
    return HttpResponse(html)
    
示例#2
0
 def createSenseTree(self, senseList):
     '''
     create parse tree for all senses in senseList
     eg: {'conduct': ['institution', 'to', 'business'], 
     'ROOT': ['created'], 'institution': ['an'], 'created': ['institution', 'conduct']}
     '''
     senseDict = []
     depParsed = parseSenses(senseList)
     for dep in depParsed:
         temp = defaultdict( list )
         for n ,v in dep:
             n = stemWords(n)
             v = stemWords(v)
             temp[n].append(v)
         senseDict.append(temp)
     return senseDict
示例#3
0
def parseSenses(senseList):
    typedDep=""
    depGraphList=[]
    tempList=[]
    str1=[]
    str1.append('. '.join(senseList)) # to make the whole list into a single item
                                        # otherwise the parser need to be initialised many times.
    str1[0] = stemWords(str1[0], rmStopWords= True)
    parser = StanfordParser ("/home/rohith/stanford-parser")
    for content in str1:
        typedDep += parser.parse(content)
        typedDep =re.sub('[0-9-]+',"",typedDep) # to remove numbers and '-'
        #need to separate each senses into induvidual lists
        typedDepList = typedDep.split("\n\n")
        typedDepList.pop()
        rx = re.compile("\((.+), (.+)\)")
        for dep in typedDepList:
            depGraphList.append(rx.findall(dep))   
    return depGraphList