Пример #1
0
def similarity_score(kmers1, kmers2, scoring="jaccard"):
    """Compute Jaccard similarity index"""
    # count of kmers in common
    intersection = float(kmerizer.count_common(kmers1, kmers2))
    if scoring == "jaccard":
        # Use Jaccard similarity index
        score = intersection / (kmers1.size + kmers2.size - intersection)
    elif scoring == "minsize":
        # Use intersection / min_size (proper subset scores 1.0)
        score = intersection / min(kmers1.size, kmers2.size)
    elif scoring == "meansize":
        # Use mean size in denominator (used in Mash)
        score = intersection / ((kmers1.size + kmers2.size) / 2)
    elif scoring == "maxsize":
        # Use intersection / max_size (proper subset scores min/max)
        score = intersection / max(kmers1.size, kmers2.size)
    elif scoring == "reference":
        # Use intersection / size of reference (useful for comparing reads to
        # assembled references)
        score = intersection / kmers2.size
    else:
        assert scoring in (
            "jaccard", "minsize", "maxsize", "meansize", "reference"), \
            "unknown scoring method"
    return score
Пример #2
0
def reference(kmers1, kmers2):
    """Assume k-merset 2 is the k-merset of a reference genome."""
    intersection = kmerizer.count_common(kmers1, kmers2)
    return intersection / kmers2.size
Пример #3
0
def subset(kmers1, kmers2):
    """Calculate the fraction of k-mers in k-merset 1 that are also in k-merset
    2, useful to check whether k-merset 1 is a subset of another."""
    intersection = kmerizer.count_common(kmers1, kmers2)
    return intersection / kmers1.size
Пример #4
0
def maxsize(kmers1, kmers2):
    intersection = kmerizer.count_common(kmers1, kmers2)
    return intersection / max(kmers1.size, kmers2.size)
Пример #5
0
def meansize(kmers1, kmers2):
    intersection = kmerizer.count_common(kmers1, kmers2)
    return intersection / ((kmers1.size + kmers2.size) / 2)
Пример #6
0
def jaccard(kmers1, kmers2):
    """Computes jaccard similarity. Returns numerator and denominator
    separately."""
    intersection = kmerizer.count_common(kmers1, kmers2)
    return intersection / (kmers1.size + kmers2.size - intersection)