def queryDocs(query): """ TD: Scheme for repeating words in the query returns: [(score, doc),...] """ query = re.sub('[,.!?\-]', ' ', query) query = toUnicode(query) query = filterWord(query) queryWords = query.split() qtmp = [] intent = [] for w in queryWords: if w in intentWords: intent.append(w) else: qtmp.append(w) queryWords = qtmp res1 = selectAndExpand(queryWords, graph, globalDict) try: res2 = selectAndExpand(queryWords, titleGraph, globalDictTitle) for k, v in res2.items(): res1[k] += v*1.5 except Exception as e: pass for k, v in res1.items(): title = getTitle(k) for inw in intent: for w in title: if editDistance(w, inw) >= 0.7: res1[k] += 0.2 break res = res1.items() res.sort(key=lambda x: -x[1]) for i in range(len(res)): # print res[i][0] res[i] = (idxToFile[res[i][0]], res[i][1]) return res