def match(self, query, scope, verbose=False): out = [] for taxId, ncbiMatch in self._db.execute('select taxid, spname from species where spname like "%s%%";' % scope).fetchall(): # TODO where rank in (111,222,333) match = sw.align( ncbiMatch, query) out.append( (taxId, ncbiMatch, match, match.score) ) out.sort(key = lambda x:-(x[3]) ) if verbose: for x in out[:3]: print("--"*20) print("%d %s" % (x[0], x[1])) print( "score: %d matches: %d" % (x[2].score, x[2].matches)) x[2].dump() else: #out[0][2].dump() pass if not out: return None if out[0][2].matches < len(query)*self._requiredQueryIdentity: return None if len(out)>1 and not ( ( out[0][3] > out[1][3]) and (out[0][2].matches > out[1][2].matches ) ): return None return (out[0][0], out[0][1], float(out[0][2].matches)/len(query))
def findBestMatches(n1, n2): bestMatches = {} for a in n1: best = None best_b = None for b in n2: match = sw.align( removeUninformativeTerms(a), removeUninformativeTerms(b)) if best is None: best = match best_b = b elif match.score > best.score: best = match best_b = b bestMatches[a] = best_b return bestMatches
def findMatchingName(name): bestScore = 0.0 bestMatch = "" bestMatchTaxId = 0 isBestMatchATie = False for target, targetTaxId in speciesMapping.items(): match = sw.align(name, target) #if match.score>0: # print("{}\t{}".format( match.score, target) ) if match.score == bestScore: # tie isBestMatchATie = True elif match.score > bestScore and match.score > 0: bestMatch = target bestMatchTaxId = targetTaxId bestScore = match.score isBestMatchATie = False return (bestMatch, bestMatchTaxId, isBestMatchATie)
def matchInexactNames(n1, n2): n1 = set(n1) n2 = set(n2) bestMatches12 = findBestMatches(n1, n2) bestMatches21 = findBestMatches(n2, n1) matches = [] for a in n1: b = bestMatches12[a] if a == bestMatches21[b]: #print("%s\t%s" % (a, b)) matches.append((a,b)) else: print("**"*20) print("%s -> %s" % (a, bestMatches12[a])) sw.align(a, bestMatches12[a]).dump() print("%s -> %s" % (b, bestMatches21[b])) sw.align(b, bestMatches21[b]).dump() print("--") print("%s -> %s" % (bestMatches12[a], bestMatches21[bestMatches12[a]] )) sw.align(bestMatches12[a], bestMatches21[bestMatches12[a]]).dump() print("%s -> %s" % (bestMatches21[b], bestMatches12[bestMatches21[b]] )) sw.align(bestMatches21[b], bestMatches12[bestMatches21[b]]).dump() print("**"*20) #revMatch = sw.align( best_b, a) #print("--"*20) #print(a) #print(best_b) #best.dump() #revMatch.dump() return matches