def Query(self, handles, status, abort): result = [] for handle in handles: status.Advance() if abort.Aborting(): return result title = handle.Format("[%title%]") artist = handle.Format("[%artist%]") try: l = get_lyric_list(title, artist) m = 0xffffffffffffffff best = None for url, ti, ar in l: d = LevenshteinDistance(artist, ar) + LevenshteinDistance( title, ti) if m > d: m = d best = url if best == None: result.append('') else: lyric = get_lyric(best) result.append(lyric) continue except Exception, e: traceback.print_exc(file=sys.stdout) result.append('') continue
def Query(self, handles, status, abort): result = [] for handle in handles: status.Advance() if abort.Aborting(): return result try: artist = handle.Format("[%artist%]") title = handle.Format("[%title%]") s = urllib.urlopen( "http://ttlrcct2.qianqian.com/dll/lyricsvr.dll?sh?Artist=%s&Title=%s&Flags=0" % (self.ToQianQianHexString(artist), self.ToQianQianHexString(title))).read() ##这里是utf-8编码 doc = minidom.parseString(s) m = 0xFFFFFFFFFFFFFFFF best = None for e in doc.getElementsByTagName("lrc"): # i = LevenshteinDistance(artist, e.getAttribute("artist")) + LevenshteinDistance(title, e.getAttribute("title"))#原来对比的是不同编码的文本 i = LevenshteinDistance( artist, e.getAttribute("artist").encode( "utf-8")) + LevenshteinDistance( title, e.getAttribute("title").encode("utf-8")) if m > i: m = i best = e.getAttribute("id"), e.getAttribute( "artist"), e.getAttribute("title") if best == None: result.append('') continue Id, artist, title = best code = self.CreateQianQianCode(Id, artist, title) txheaders = { 'User-agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)' } req = urllib2.Request( "http://ttlrcct2.qianqian.com/dll/lyricsvr.dll?dl?Id=%s&Code=%d" % (Id, code), None, txheaders) lyric = urllib2.urlopen(req).read() if lyric.find("Search ID or Code error!") >= 0: result.append('') continue else: result.append(lyric) except Exception, e: traceback.print_exc(file=sys.stdout) result.append('') continue
def sort_by_weighted_edit_dist(self): """ Sort by weighted edit distance from scTIM again from John """ self.clear_weight() leve = LeveDist(5, 5, 3) for seq in self.__sequence: seq.weight = leve.computeDistance(seq.seq, self.__scTIM.seq) self.__sequence.sort(key=lambda seq:seq.weight, reverse = True)
def sort_by_edit_dist(self): """ Sort by edit distance from scTIM Original from John Wenskovitch in Java """ self.clear_weight() leve = LeveDist(1, 1, 1) for seq in self.__sequence: seq.weight = leve.computeDistance(seq.seq, self.__scTIM.seq) self.__sequence.sort(key=lambda seq:seq.weight, reverse = True)
class SpellChecker: sim_min = 0.75 # Minimum similarity, inclusive def __init__(self): self.keyword = ContextIdentifier().getKeyword() self.distance_counter = LevenshteinDistance() def getWordSuggestion(self, text): word_suggestion = set() suggested_word_candidate_set = set(self.keyword) word_set = {word.lower() for word in text.split(" ") if word != ""} for word in word_set: current_suggestion = suggested_word_candidate_set - word_suggestion for suggested_word_candidate in current_suggestion: distance = self.distance_counter.getDistance( word, suggested_word_candidate) if distance == 0: continue value = 1 - distance / max(len(word), len(suggested_word_candidate)) if value > self.sim_min: word_suggestion.add(suggested_word_candidate) suggested_word_candidate_set.remove( suggested_word_candidate) return list(word_suggestion)
def sort_by_weighted_edit_dist(self): """ Sort by weighted edit distance from scTIM again from John """ self.clear_weight() leve = LeveDist(5, 5, 3) for seq in self.__sequence: seq.weight = leve.computeDistance(seq.seq, self.__scTIM.seq) # set the dTIM_core's weight to a large number, so that it will alway the top one self.__sequence[0].weight += 9999999999 self.__sequence[-1].weight = self.__sequence[0].weight - 1 # myp = "" # for seq in self.__sequence: # myp += str(seq._sequence__name) + " " # print myp self.__sequence.sort(key=lambda seq: seq.weight, reverse = True)
def __init__(self): self.keyword = ContextIdentifier().getKeyword() self.distance_counter = LevenshteinDistance()