示例#1
0
    def _get_adj_list_directional(self, umis, counts):
        ''' identify all umis within the hamming distance threshold
        and where the counts of the first umi is > (2 * second umi counts)-1'''

        adj_list = {umi: [] for umi in umis}

        if self.fuzzy_match:
            for umi1 in umis:
                # we need a second regex for some insertions,
                # e.g UMI1 = "ATCG", UMI2 = "ATTC"
                comp_regex_err = regex.compile("(%s){e<=1}" % str(umi1))
                comp_regex_del = regex.compile("(%s){i<=1}" % str(umi1)[::-1])
                for umi2 in umis:
                    if umi1 == umi2:
                        continue
                    if counts[umi1] >= (counts[umi2]*self.dir_threshold):
                        if (max(len(umi1), len(umi2)) -
                            min(len(umi1), len(umi2))) > 1:
                            continue
                        if (comp_regex_err.match(str(umi2)) or
                            comp_regex_del.match(str(umi2))):
                            adj_list[umi1].append(umi2)
        else:
            for umi1, umi2 in itertools.combinations(umis, 2):
                if edit_distance(umi1, umi2) <= 1:
                    if counts[umi1] >= (counts[umi2]*2)-1:
                        adj_list[umi1].append(umi2)
                    if counts[umi2] >= (counts[umi1]*2)-1:
                        adj_list[umi2].append(umi1)

        return adj_list
    def _get_adj_list_directional_adjacency(self, umis, counts, threshold):
        ''' identify all umis within the hamming distance threshold
        and where the counts of the first umi is > (2 * second umi counts)-1'''

        return {umi: [umi2 for umi2 in umis if
                      edit_distance(umi, umi2) == 1 and
                      counts[umi] >= (counts[umi2]*2)-1] for umi in umis}
示例#3
0
    def _get_adj_list_directional_adjacency(self, umis, counts, threshold):
        ''' identify all umis within the hamming distance threshold
        and where the counts of the first umi is > (2 * second umi counts)-1'''

        return {umi: [umi2 for umi2 in umis if
                      edit_distance(umi, umi2) == 1 and
                      counts[umi] >= (counts[umi2]*2)-1] for umi in umis}
示例#4
0
    def _get_adj_list_adjacency(self, umis, counts, threshold):
        ''' identify all umis within hamming distance threshold'''

        return {
            umi:
            [umi2 for umi2 in umis if edit_distance(umi, umi2) <= threshold]
            for umi in umis
        }
示例#5
0
    def _get_adj_list_adjacency(self, umis, counts, threshold):
        ''' identify all umis within hamming distance threshold'''

        adj_list = {umi: [] for umi in umis}
        if len(umis) > 25:
            umi_length = len(umis[0])
            substr_idx = build_substr_idx(umis, umi_length, threshold)
            iter_umi_pairs = iter_nearest_neighbours(umis, substr_idx)
        else:
            iter_umi_pairs = itertools.combinations(umis, 2)
        for umi1, umi2 in iter_umi_pairs:
            if edit_distance(umi1, umi2) <= threshold:
                adj_list[umi1].append(umi2)
                adj_list[umi2].append(umi1)

        return adj_list
示例#6
0
    def _get_adj_list_directional(self, umis, counts, threshold=1):
        ''' identify all umis within the hamming distance threshold
        and where the counts of the first umi is > (2 * second umi counts)-1'''

        adj_list = {umi: [] for umi in umis}
        if len(umis) > 25:
            umi_length = len(umis[0])
            substr_idx = build_substr_idx(umis, umi_length, threshold)
            iter_umi_pairs = iter_nearest_neighbours(umis, substr_idx)
        else:
            iter_umi_pairs = itertools.combinations(umis, 2)
        for umi1, umi2 in iter_umi_pairs:
            if edit_distance(umi1, umi2) <= threshold:
                if counts[umi1] >= (counts[umi2]*2)-1:
                    adj_list[umi1].append(umi2)
                if counts[umi2] >= (counts[umi1]*2)-1:
                    adj_list[umi2].append(umi1)

        return adj_list
    def _get_adj_list_adjacency(self, umis, counts, threshold):
        ''' identify all umis within hamming distance threshold'''

        return {umi: [umi2 for umi2 in umis if
                      edit_distance(umi, umi2) <= threshold]
                for umi in umis}
def get_average_umi_distance(umis):
    if len(umis) == 1:
        return -1
    dists = [edit_distance(*pair) for pair in itertools.combinations(umis, 2)]
    return float(sum(dists))/(len(dists))
示例#9
0
def get_average_umi_distance(umis):
    if len(umis) == 1:
        return -1
    dists = [edit_distance(*pair) for pair in itertools.combinations(umis, 2)]
    return float(sum(dists)) / (len(dists))