Пример #1
0
    def match(self, query, scope, verbose=False):
        out = []
        for taxId, ncbiMatch in self._db.execute('select taxid, spname from species where spname like "%s%%";' % scope).fetchall():  # TODO where rank in (111,222,333)
            match = sw.align( ncbiMatch, query)
            out.append( (taxId, ncbiMatch, match, match.score) )

        out.sort(key = lambda x:-(x[3]) )
        if verbose:
            for x in out[:3]:
                print("--"*20)
                print("%d %s" % (x[0], x[1]))
                print( "score: %d matches: %d" % (x[2].score, x[2].matches))
                x[2].dump()
        else:
            #out[0][2].dump()
            pass


        if not out:
            return None

        if out[0][2].matches < len(query)*self._requiredQueryIdentity:
            return None

        if len(out)>1 and not ( ( out[0][3] > out[1][3]) and (out[0][2].matches > out[1][2].matches ) ):
            return None

        return (out[0][0], out[0][1], float(out[0][2].matches)/len(query))
Пример #2
0
def findBestMatches(n1, n2):
    bestMatches = {}
    for a in n1:

        best = None
        best_b = None
        
        for b in n2:
            match = sw.align( removeUninformativeTerms(a), removeUninformativeTerms(b))
            if best is None:
                best = match
                best_b = b
            elif match.score > best.score:
                best = match
                best_b = b

        bestMatches[a] = best_b
    return bestMatches
def findMatchingName(name):
    bestScore = 0.0
    bestMatch = ""
    bestMatchTaxId = 0
    isBestMatchATie = False
    for target, targetTaxId in speciesMapping.items():
        match = sw.align(name, target)
        #if match.score>0:
        #    print("{}\t{}".format( match.score, target) )

        if match.score == bestScore:  # tie
            isBestMatchATie = True

        elif match.score > bestScore and match.score > 0:
            bestMatch = target
            bestMatchTaxId = targetTaxId
            bestScore = match.score
            isBestMatchATie = False

    return (bestMatch, bestMatchTaxId, isBestMatchATie)
Пример #4
0
def matchInexactNames(n1, n2):
    n1 = set(n1)
    n2 = set(n2)

    bestMatches12 = findBestMatches(n1, n2)
    bestMatches21 = findBestMatches(n2, n1)

    matches = []

    for a in n1:
        b = bestMatches12[a]

        if a == bestMatches21[b]:
            #print("%s\t%s" % (a, b))
            matches.append((a,b))
        else:
            print("**"*20)
            print("%s -> %s" % (a, bestMatches12[a]))
            sw.align(a, bestMatches12[a]).dump()
            print("%s -> %s" % (b, bestMatches21[b]))
            sw.align(b, bestMatches21[b]).dump()
            print("--")
            print("%s -> %s" % (bestMatches12[a], bestMatches21[bestMatches12[a]] ))
            sw.align(bestMatches12[a], bestMatches21[bestMatches12[a]]).dump()
            print("%s -> %s" % (bestMatches21[b], bestMatches12[bestMatches21[b]] ))
            sw.align(bestMatches21[b], bestMatches12[bestMatches21[b]]).dump()
            
            print("**"*20)
        
        #revMatch = sw.align( best_b, a)
        #print("--"*20)
        #print(a)
        #print(best_b)
        #best.dump()
        #revMatch.dump()

    return matches