Пример #1
0
def make_tree(kmers):
    rb = RedBlackTree()
    for k in tqdm(kmers, desc="adding to tree: "):
        if not rb.contains(k):
            rb.add(k)

    return rb
Пример #2
0
def true_address_no_sort(lcp: list, au: RedBlackTree, s_arr: deque, top=100, bot=20, distance=300):

    # dict to be returned
    d = OrderedDict()

    assert type(au) == RedBlackTree

    if type(lcp) != deque:
        lcp = deque(lcp)

    if type(s_arr) != deque:
        s_arr = deque(s_arr)

    assert len(lcp) == len(s_arr)
    d = {}
    past = 0
    runs = len(lcp)
    for _ in trange(runs,desc="calculating unique starts: "):
        l = lcp.popleft()
        lcp.append(past if past > l else l)
        past = l
    runs = len(s_arr)
    pbar = tqdm(total=runs, desc="Finding true addresses: ")

    u_ceil = 0
    u_floor = 0
    myl = []
    while s_arr:
        sa = s_arr.popleft()
        pbar.update(1)
        # get next suffix address and corresponding lcp (unique start) value
        lcp_curr = lcp.popleft()

        if type(sa) != int and type(lcp) != int:
            sa = int(sa)
            lcp_curr = int(lcp_curr)

        if sa > u_ceil or sa < u_floor:

            # myl = list from RedBlackTree
            # [unam ambs]
            myl = au.floor([sa])
            u_floor = 0 if not myl else myl[0]

            myl = au.ceil([sa])
            if not myl:
                continue
            u_ceil = myl[0]

        if (lcp_curr + sa) > u_ceil:
            continue

        else:
            difference = u_ceil - sa # do we need to add 1 here?
# todo: is the u_floor going too low?
        d[sa + myl[1]] = (lcp_curr, (difference if difference < top else top))

    return d
Пример #3
0
def rb_tree_ambs(ambs:deque, unam:deque):
    rb = RedBlackTree()
    u_off = -1
    a_off = 0
    # assuming that genome starts with ambs
    while ambs and unam:
        a_off += ambs.popleft()
        u_off += unam.popleft()
        pair = [u_off, a_off]
        rb.add(pair)

    return rb
Пример #4
0
def build_rb_tree(alist):
    rb_tree = RedBlackTree()

    for i in range(len(alist)):
        temp_a = alist[i].split(" ")
        temp_w = temp_a[0]

        if temp_w.isalpha():
            embedding = []

            for j in range(len(temp_a - 1)):
                embedding.append(temp_a[j + 1])

            temp_n = RBTNode(temp_w, embedding)
            rb_tree.insert(temp_n)
Пример #5
0
def main():
    ''' test rb tree '''
    rbt = RedBlackTree()

    rbt.add(5)
    rbt.add(10)
    rbt.add(30)
    rbt.add(3)
    rbt.add(13)

    print("PRE ORDER TRAVERSAL")
    print("===================\n")
    i = 0
    for node in rbt:
        if i == 0:
            print("ROOT: " + str(node))
        else:
            print(node)
        i += 1
Пример #6
0
def linkage(df):
    T = RedBlackTree()
    link = []
    INF = 1e9
    centroids = {}
    number = {}
    n = df.shape[0]
    for i in range(n):
        centroids[i] = list(df.loc[i, :])
        number[i] = 1
    current_clusters = set([i for i in range(n)])
    for i in current_clusters:
        for j in current_clusters:
            if (i != j):
                T.add(triple(i, j, cityblock(centroids[i], centroids[j])))
    for z in range(n - 1):
        iter = T.__iter__()
        x = iter.__next__()
        while (not (x.left_c in current_clusters
                    and x.right_c in current_clusters)):
            T.remove(x)
            iter = T.__iter__()
            x = iter.__next__()

        centroids[n + z] = [0 for i in range(df.shape[1])]
        for i in range(df.shape[1]):
            centroids[n +
                      z][i] = (centroids[x.left_c][i] * number[x.left_c] +
                               centroids[x.right_c][i] * number[x.right_c]) / (
                                   number[x.left_c] + number[x.right_c])
        number[n + z] = number[x.left_c] + number[x.right_c]
        link.append([x.left_c, x.right_c, x.d, number[n + z]])
        current_clusters.add(n + z)
        current_clusters.remove(x.left_c)
        current_clusters.remove(x.right_c)
        for i in current_clusters:
            if (i != n + z):
                T.add(
                    triple(i, n + z, cityblock(centroids[i],
                                               centroids[n + z])))
    return np.array(link)
Пример #7
0
class IntervalTree:
    rbTree = RedBlackTree()

    def __init__(self):
        pass

    def insert(self, value):
        return self.rbTree.add(value)

    def overlapping(self, i1, i2):
        return i1.minimum <= i2.maximum and i1.maximum >= i2.minimum

    # Return all overlapping intervals
    def find_overlap(self, value: Interval):
        # If the interval is less than root.min, traverse left, otherwise right
        pass
Пример #8
0
def func(li):
    tree = RedBlackTree()
    depths = []
    for num in li:
        next_node = tree.ceil(My_pair(num, None))
        prev_node = tree.floor(My_pair(num, None))
        parent_depth = -1
        for node in (next_node, prev_node):
            if node != None:
                parent_depth = max(parent_depth, node.value)
        tree.add(My_pair(num, parent_depth + 1))
        depths.append(parent_depth + 1)
    return depths
Пример #9
0
def func(li):
    li.sort(key=lambda x: x[1])
    tree = RedBlackTree()
    for a, h, b in li:
        ans = [(element.key, element.value) for element in tree]
        prev_b = tree.floor(My_pair(b, None))
        if prev_b is None:
            height_b = 0
        else:
            height_b = prev_b.value
        next_a = tree.ceil(My_pair(a, None))
        while (next_a is not None) and next_a.key <= b:
            tree.remove(next_a)
            next_a = tree.ceil(My_pair(a, None))
        tree.add(My_pair(a, h))
        tree.add(My_pair(b, height_b))
    ans = [(element.key, element.value) for element in tree]
    return ans
Пример #10
0
    return dir_data


def get_train_data(array7=[], *args):
    train_data = ""
    for dir_name in array7:
        path_to_dir = os.path.join(path_to_root_directory, dir_name)
        dir_data = get_dir_data(path_to_dir)
        train_data += dir_data
    return train_data


#print(get_train_data(array7))

file_content = get_train_data(array7)
tree = RedBlackTree()
for f in ngrams(file_content.split(), 5):
    tree.red_black_insert(f)
dictionary = tree.get_elements()

#for keys, values in dictionary.items():
#	print(keys, values)

Sorted_List = sorted(dictionary, key=dictionary.__getitem__, reverse=True)
for x in range(0, int(0.7 * len(Sorted_List)), 1):
    Sorted_List.pop()
print("\n")
for x in Sorted_List:
    print(x)

Writing_file_Reference = open("new_text.txt", "w")