Пример #1
0
def wangzhangplot(fname):
    seqlen = getseqlen(fname)
    print(seqlen)
    seqarr = []
    f = open(fname, 'r')

    c = f.read(1)
    while c != "":
        c = c.upper()
        if (c in baseset):
            #x = Node(c)
            seqarr.append(Node(c))
        c = f.read(1)
    f.close()

    start_time = datetime.datetime.now()
    '''print("non-A: ")
	wangzhangcalc(seqarr, seqlen, 'A')
	print("non-G: ")
	wangzhangcalc(seqarr, seqlen, 'G')
	print("non-C: ")
	wangzhangcalc(seqarr, seqlen, 'C')'''
    result = sqrt(
        pow(wangzhangcalc(seqarr, seqlen, 'A'), 2) +
        pow(wangzhangcalc(seqarr, seqlen, 'G'), 2) +
        pow(wangzhangcalc(seqarr, seqlen, 'C'), 2))
    print("mod: " + str(result))

    end_time = datetime.datetime.now()
    time_taken = end_time - start_time
    print('time taken = ' + str(time_taken.microseconds))
    print('time taken = ' + str(time_taken.seconds))
Пример #2
0
def randic2dplot(fname):
	window_list = list()
	seqlen = getseqlen(fname)
	print(seqlen, end=",")
	seqarr = []
	f = open(fname, 'r')
	
	c = f.read(1)
	while c!="":
		c = c.upper()
		if(c in baseset):
			seqarr.append(Node(c))
		c = f.read(1)
	f.close()

	for i in range (0, seqlen - 36, 3):
		for j in range (i, i+36):
			seqarr[j].reset()
		# print("slide_count: " + str(i))
		sliding_window(i, 36, seqarr, window_list)
	
	# for window in window_list:
	# 	print(str(window), end=",")
	print(','.join(map(str, window_list)))
Пример #3
0
def taonanwangplot(fname):
	'''f = open(fname, 'r')
	seqlen=0
	c = f.read(1)
	while c!="":
		if(c.upper() in baseset):
			#print("string is: ", c)
			seqlen+=1
		c = f.read(1)
	#print(seqlen)
	f.close()'''
	seqlen = getseqlen(fname)
	
	start_time = datetime.datetime.now()
	
	print("wsrocalc: ", wsrocalc(fname, seqlen))
	print("mkrocalc: ", mkrocalc(fname, seqlen))
	print("ryrocalc: ", ryrocalc(fname, seqlen))
	
	end_time = datetime.datetime.now()
	time_taken = end_time - start_time
	
	print('time taken = ' + str(time_taken.microseconds))
	print('time taken = ' + str(time_taken.seconds))
Пример #4
0
def lifeizhaoyuplot(fname):
    seqlen = getseqlen(fname)
    #print(seqlen)
    seqarr = []
    f = open(fname, 'r')
    c = f.read(1)
    while c != "":
        c = c.upper()
        if (c in baseset):
            #x = Node(c)
            seqarr.append(Node(c))
        c = f.read(1)
    f.close()

    start_time = datetime.datetime.now()

    point = basepos.O
    count = 0
    while (count < seqlen - 1):
        word = seqarr[count].base + seqarr[count + 1].base
        #print(count)
        #print(word)
        #(doubledict[word])
        #point = point.unit_couplet_value(doubledict[word])
        #point = doubledict[word].unit_couplet_value(point)
        point = doubledict[word].add(point)
        #print(point)
        count += 1
    #print(count)
    print(point)

    end_time = datetime.datetime.now()
    time_taken = end_time - start_time

    print('time taken = ' + str(time_taken.microseconds))
    print('time taken = ' + str(time_taken.seconds))
Пример #5
0
def nandyplot(fname):
    seqlen = getseqlen(fname)
    print(seqlen)
    seqarr = []
    f = open(fname, 'r')

    c = f.read(1)
    while c != "":
        c = c.upper()
        if (c in baseset):
            #x = Node(c)
            seqarr.append(Node(c))
        c = f.read(1)
    f.close()

    curr_x = 0
    curr_y = 0
    #curr_z = 0
    count_a = 0
    count_c = 0
    count_g = 0
    count_t = 0
    sum_x = 0
    sum_y = 0
    total_count = 0
    start_time = datetime.datetime.now()

    edmat = np.zeros((seqlen, seqlen))

    for i in range(0, seqlen):
        curr_base = seqarr[i].base
        if (curr_base == 'G'):
            total_count += 1
            count_g += 1
            curr_x = curr_x - 1
            sum_x = sum_x + curr_x
            sum_y = sum_y + curr_y
            #curr_y = curr_y + 1
            #curr_z = curr_z - 1
        elif (curr_base == 'C'):
            total_count += 1
            count_c += 1
            curr_y = curr_y + 1
            sum_x = sum_x + curr_x
            sum_y = sum_y + curr_y
            #curr_z = curr_z + 1
        elif (curr_base == 'A'):
            total_count += 1
            count_a += 1
            curr_x = curr_x + 1
            sum_x = sum_x + curr_x
            sum_y = sum_y + curr_y
            #curr_z = curr_z - 1
        elif (curr_base == 'T'):
            total_count += 1
            count_t += 1
            curr_y = curr_y - 1
            sum_x = sum_x + curr_x
            sum_y = sum_y + curr_y
            #curr_z = curr_z + 1
        seqarr[i].x = curr_x
        seqarr[i].y = curr_y

    mew_x = sum_x / total_count
    mew_y = sum_y / total_count

    gr_value = pow((pow(mew_x, 2.0) + pow(mew_y, 2.0)), 0.5)

    end_time = datetime.datetime.now()
    time_taken = end_time - start_time

    print('gr_value = ' + str(gr_value))
    print('time taken = ' + str(time_taken.microseconds))
    print('time taken = ' + str(time_taken.seconds))
Пример #6
0
def songtangplot(fname):
    seqlen = getseqlen(fname)
    print(seqlen)
    seqarr = []
    f = open(fname, 'r')

    c = f.read(1)
    while c != "":
        c = c.upper()
        if (c in baseset):
            #x = Node(c)
            seqarr.append(Node(c))
        c = f.read(1)
    f.close()

    curr_x = 0
    curr_y = 0
    start_time = datetime.datetime.now()

    edmat = np.zeros((seqlen, seqlen))

    for i in range(0, seqlen):
        '''if(seqarr[i].visited == False):
			seqarr[i].x = i+1'''
        for j in range(i, seqlen):
            if (seqarr[j].visited == False):
                curr_x += 1
                seqarr[j].x = curr_x
                seqarr[j].y = base_to_y_val_map[seqarr[j].base]
                seqarr[j].visited = True
            edmat[i][j] = sqrt(
                pow((seqarr[i].x - seqarr[j].x), 2) +
                pow((seqarr[i].y - seqarr[j].y), 2))
            edmat[j][i] = edmat[i][j]
    '''Computing the path-distance matrix'''
    pdmat = np.zeros((seqlen, seqlen))
    for i in range(0, seqlen):
        for j in range(i, seqlen):
            if (j == i):
                pdmat[i][j] = 0
            elif (i < j):
                pdmat[i][j] = pdmat[i][j - 1] + edmat[j - 1][j]
                pdmat[j][i] = pdmat[i][j]
    '''Computing the M/M matrix'''
    mbym_mat = np.zeros((seqlen, seqlen))
    for i in range(0, seqlen):
        for j in range(i, seqlen):
            if (i == j):
                mbym_mat[i][j] = 0
            else:
                mbym_mat[i][j] = edmat[i][j] / abs(i - j)
                mbym_mat[j][i] = mbym_mat[i][j]
    np.savetxt("songtang-mm.csv", mbym_mat, delimiter=",")
    '''Computing the L/L matrix'''
    lbyl_mat = np.zeros((seqlen, seqlen))
    for i in range(0, seqlen):
        for j in range(i, seqlen):
            if (i == j):
                lbyl_mat[i][j] = 0
            else:
                lbyl_mat[i][j] = edmat[i][j] / pdmat[i][j]
                lbyl_mat[j][i] = lbyl_mat[i][j]
    np.savetxt("songtang-ll.csv", lbyl_mat, delimiter=",")

    print(np.linalg.eigvals(mbym_mat).max())

    end_time = datetime.datetime.now()
    time_taken = end_time - start_time

    print('time taken = ' + str(time_taken.microseconds))
    print('time taken = ' + str(time_taken.seconds))
Пример #7
0
def randic3dplot(fname):
    seqlen = getseqlen(fname)
    print(seqlen)
    seqarr = []
    f = open(fname, 'r')

    c = f.read(1)
    while c != "":
        c = c.upper()
        if (c in baseset):
            #x = Node(c)
            seqarr.append(Node(c))
        c = f.read(1)
    f.close()

    curr_x = 0
    curr_y = 0
    curr_z = 0
    start_time = datetime.datetime.now()

    edmat = np.zeros((seqlen, seqlen))

    for i in range(0, seqlen):
        if (seqarr[i].visited == False):
            curr_base = seqarr[i].base
            if (curr_base == 'G'):
                curr_x = curr_x - 1
                curr_y = curr_y + 1
                curr_z = curr_z - 1
            elif (curr_base == 'C'):
                curr_x = curr_x - 1
                curr_y = curr_y - 1
                curr_z = curr_z + 1
            elif (curr_base == 'A'):
                curr_x = curr_x + 1
                curr_y = curr_y - 1
                curr_z = curr_z - 1
            elif (curr_base == 'T'):
                curr_x = curr_x + 1
                curr_y = curr_y + 1
                curr_z = curr_z + 1
            seqarr[i].x = curr_x
            seqarr[i].y = curr_y
            seqarr[i].z = curr_z
            seqarr[i].visited = True
        for j in range(i, seqlen):
            if (seqarr[j].visited == False):
                curr_base = seqarr[j].base
                if (curr_base == 'G'):
                    curr_x = curr_x - 1
                    curr_y = curr_y + 1
                    curr_z = curr_z - 1
                elif (curr_base == 'C'):
                    curr_x = curr_x - 1
                    curr_y = curr_y - 1
                    curr_z = curr_z + 1
                elif (curr_base == 'A'):
                    curr_x = curr_x + 1
                    curr_y = curr_y - 1
                    curr_z = curr_z - 1
                elif (curr_base == 'T'):
                    curr_x = curr_x + 1
                    curr_y = curr_y + 1
                    curr_z = curr_z + 1
                seqarr[j].x = curr_x
                seqarr[j].y = curr_y
                seqarr[j].z = curr_z
                seqarr[j].visited = True
            edmat[i][j] = sqrt(
                pow((seqarr[i].x - seqarr[j].x), 2) +
                pow((seqarr[i].y - seqarr[j].y), 2) +
                pow((seqarr[i].z - seqarr[j].z), 2)) / pow(3, 0.5)
            edmat[j][i] = edmat[i][j]
    '''Computing the path-distance matrix'''
    pdmat = np.zeros((seqlen, seqlen))
    for i in range(0, seqlen):
        for j in range(i, seqlen):
            if (j == i):
                pdmat[i][j] = 0
            elif (i < j):
                pdmat[i][j] = pdmat[i][j - 1] + edmat[j - 1][j]
                pdmat[j][i] = pdmat[i][j]
    '''Computing the M/M matrix'''
    mbym_mat = np.zeros((seqlen, seqlen))
    for i in range(0, seqlen):
        for j in range(i, seqlen):
            if (i == j):
                mbym_mat[i][j] = 0
            else:
                mbym_mat[i][j] = edmat[i][j] / abs(i - j)
                mbym_mat[j][i] = mbym_mat[i][j]
    '''Computing the L/L matrix'''
    lbyl_mat = np.zeros((seqlen, seqlen))
    for i in range(0, seqlen):
        for j in range(i, seqlen):
            if (i == j):
                lbyl_mat[i][j] = 0
            else:
                lbyl_mat[i][j] = edmat[i][j] / pdmat[i][j]
                lbyl_mat[j][i] = lbyl_mat[i][j]

    print(np.linalg.eigvals(mbym_mat).max())

    end_time = datetime.datetime.now()
    time_taken = end_time - start_time

    print('time taken = ' + str(time_taken.microseconds))
    print('time taken = ' + str(time_taken.seconds))
Пример #8
0
def yauplot(fname):
    seqlen = getseqlen(fname)
    print(seqlen)
    seqarr = []
    f = open(fname, 'r')

    c = f.read(1)
    while c != "":
        c = c.upper()
        if (c in baseset):
            seqarr.append(Node(c))
        c = f.read(1)
    f.close()

    curr_x = 0.0
    curr_y = 0.0
    count_a = 0
    count_c = 0
    count_g = 0
    count_t = 0
    sum_x = 0.0
    sum_y = 0.0
    total_count = 0
    start_time = datetime.datetime.now()

    sqrt_three = pow(3, 0.5)
    half = 1.0 / 2.0

    for i in range(0, seqlen):
        curr_base = seqarr[i].base
        if (curr_base == 'G'):
            total_count += 1
            count_g += 1
            curr_x = float(curr_x + (sqrt_three * half))
            sum_x = float(sum_x + curr_x)
            curr_y = float(curr_y - half)
            sum_y = float(sum_y + curr_y)
        elif (curr_base == 'C'):
            total_count += 1
            count_c += 1
            curr_x = float(curr_x + (sqrt_three * half))
            sum_x = float(sum_x + curr_x)
            curr_y = float(curr_y + half)
            sum_y = float(sum_y + curr_y)
        elif (curr_base == 'A'):
            total_count += 1
            count_a += 1
            curr_x = float(curr_x + half)
            sum_x = float(sum_x + curr_x)
            curr_y = float(curr_y - (sqrt_three * half))
            sum_y = float(sum_y + curr_y)
        elif (curr_base == 'T'):
            total_count += 1
            count_t += 1
            curr_x = float(curr_x + half)
            sum_x = float(sum_x + curr_x)
            curr_y = float(curr_y + (sqrt_three * half))
            sum_y = float(sum_y + curr_y)
        seqarr[i].x = curr_x
        seqarr[i].y = curr_y

    mew_x = float(sum_x / total_count)
    mew_y = float(sum_y / total_count)

    sqr_gr_value = pow(mew_x, 2.0) + pow(mew_y, 2.0)
    gr_value = pow(sqr_gr_value, 0.5)
    # gr_value = pow((pow(mew_x, 2.0) + pow(mew_y, 2.0)), 0.5)

    end_time = datetime.datetime.now()
    time_taken = end_time - start_time

    print('mew_x = ' + str(mew_x))
    print('mew_y = ' + str(mew_y))
    print('sqr_gr_value = ' + str(sqr_gr_value))

    print('gr_value = ' + str(gr_value))
    print('time taken = ' + str(time_taken.microseconds))
    print('time taken = ' + str(time_taken.seconds))
Пример #9
0
def randic3dplot(fname):
    seqlen = getseqlen(fname)
    print("Current thread: " + str(threading.get_ident()) +
          "; sequence length: " + str(seqlen))
    seqarr = []
    f = open(fname, 'r')

    c = f.read(1)
    while c != "":
        c = c.upper()
        if (c in baseset):
            #x = Node(c)
            seqarr.append(Node(c))
        c = f.read(1)
    f.close()

    start_time = datetime.datetime.now()

    # edmat = np.zeros((seqlen, seqlen))
    edmat = np.zeros((seqlen, seqlen), dtype=float)
    # edmat = pd.DataFrame(np.zeros((seqlen, seqlen)))

    for i in range(0, seqlen):
        visit_node(seqarr[i])
        for j in range(i, seqlen):
            visit_node(seqarr[j])
            edmat[i][j] = sqrt(
                pow((seqarr[i].x - seqarr[j].x), 2) +
                pow((seqarr[i].y - seqarr[j].y), 2) +
                pow((seqarr[i].z - seqarr[j].z), 2)) / pow(3, 0.5)
            edmat[j][i] = edmat[i][j]

    # pool = Pool(processes=2)

    # for i in range(0, seqlen):
    # 	for j in range(i, seqlen):
    # 		#pool.map(memoize_ed(seqarr, edmat, i, j))
    # 		pool.map(memoize_ed, seqarr)

    # '''Computing the path-distance matrix'''
    # pdmat = np.zeros((seqlen, seqlen))
    # for i in range(0, seqlen):
    # 	for j in range(i, seqlen):
    # 		if(j==i):
    # 			pdmat[i][j] = 0
    # 		elif(i<j):
    # 			pdmat[i][j] = pdmat[i][j-1] + edmat[j-1][j]
    # 			pdmat[j][i]= pdmat[i][j]

    # '''Computing the M/M matrix'''
    # mbym_mat = np.zeros((seqlen, seqlen))
    # print(current.name, current._identity + " Size of edmat:" + str(edmat.nbytes))
    # print("Current thread: " + str(threading.current_thread().getName) + "; Size of edmat(MB):" + str(edmat.nbytes/(1024*1024)))
    # print("Current thread: " + str(threading.Thread.getName) + "; Size of edmat(MB):" + str(edmat.nbytes/(1024*1024)))
    print("Current thread: " + str(threading.get_ident()) +
          "; Size of edmat(MB):" + str(edmat.nbytes / (1024 * 1024)))

    # mbym_mat = pd.DataFrame(np.zeros((seqlen, seqlen)))
    # mbym_mat = np.zeros((seqlen, seqlen))
    mbym_mat = np.zeros((seqlen, seqlen), dtype=float)
    for i in range(0, seqlen):
        for j in range(i, seqlen):
            if (i == j):
                mbym_mat[i][j] = 0
            else:
                mbym_mat[i][j] = edmat[i][j] / abs(i - j)
                mbym_mat[j][i] = mbym_mat[i][j]

    # print(current.name, current._identity + " Size of mbym_mat:" + str(mbym_mat.nbytes))
    print("Current thread: " + str(threading.get_ident()) +
          "; Size of mbym_mat(MB):" + str(mbym_mat.nbytes / (1024 * 1024)))

    # '''Computing the L/L matrix'''
    # lbyl_mat = np.zeros((seqlen, seqlen))
    # for i in range(0, seqlen):
    # 	for j in range(i, seqlen):
    # 		if(i==j):
    # 			lbyl_mat[i][j] = 0;
    # 		else:
    # 			lbyl_mat[i][j] = edmat[i][j] / pdmat[i][j]
    # 			lbyl_mat[j][i] = lbyl_mat[i][j]

    # print(current.name, current._identity + " Max eigvalue: " + np.linalg.eigvals(mbym_mat).max())
    # print("Max eigvalue: " + np.linalg.eigvals(mbym_mat).max())
    before_eigvals = datetime.datetime.now()
    # print("Max eigvalue(eig): " + str(np.linalg.eigvals(mbym_mat).max()) + ": time_taken: " + str(datetime.datetime.now() - before_eigvals))
    before_eigvalsh = datetime.datetime.now()
    print("Current thread: " + str(threading.get_ident()) +
          "; Max eigvalue(eigh): " + str(np.linalg.eigvalsh(mbym_mat).max()) +
          ": time_taken: " + str(datetime.datetime.now() - before_eigvalsh))

    end_time = datetime.datetime.now()
    time_taken = end_time - start_time

    # print('time taken = ' + str(time_taken.microseconds))
    # print("Current thread: " + str(threading.get_ident()) + "; time taken = " + str(time_taken.seconds))
    print("Current thread: " + str(threading.get_ident()) + "; time taken = " +
          str(time_taken))
    gc.collect()
Пример #10
0
def jiliplot(fname):
    seqlen = getseqlen(fname)
    print(seqlen)
    seqarr = []
    f = open(fname, 'r')

    c = f.read(1)
    while c != "":
        c = c.upper()
        if (c in baseset):
            #x = Node(c)
            seqarr.append(Node(c))
        c = f.read(1)
    f.close()
    #print(seqarr)

    #f = open(fname, 'r')
    #c = f.read(1).upper()
    rcount = 0
    ycount = 0
    start_time = datetime.datetime.now()
    '''Computing the Euclidean-distance-distance matrix'''
    edmat = np.zeros((seqlen, seqlen))
    for i in range(0, seqlen):
        #seqarr[i].x = 0; seqarr[i].y = 0;
        if (seqarr[i].base in baseset):
            if (seqarr[i].visited == False):
                if (seqarr[i].base in rset):
                    seqarr[i].x = 1
                    rcount += 1
                    seqarr[i].y = rcount
                else:
                    seqarr[i].x = 0
                    ycount += 1
                    seqarr[i].y = ycount
                seqarr[i].visited = True
        for j in range(i, seqlen):
            #seqarr[j].x = 0; seqarr[j].y = 0;
            if (j != i):
                if (seqarr[j].base in baseset):
                    if (seqarr[j].visited == False):
                        if (seqarr[j].base in rset):
                            seqarr[j].x = 1
                            rcount += 1
                            seqarr[j].y = rcount
                        else:
                            seqarr[j].x = 0
                            ycount += 1
                            seqarr[j].y = ycount
                        seqarr[j].visited = True
            else:
                seqarr[j].x = seqarr[i].x
                seqarr[j].y = seqarr[i].y
            edmat[i][j] = sqrt(
                pow((seqarr[i].x - seqarr[j].x), 2) +
                pow((seqarr[i].y - seqarr[j].y), 2))
            edmat[j][i] = edmat[i][j]
    '''Computing the path-distance matrix'''
    pdmat = np.zeros((seqlen, seqlen))
    for i in range(0, seqlen):
        for j in range(i, seqlen):
            if (j == i):
                pdmat[i][j] = 0
            elif (i < j):
                pdmat[i][j] = pdmat[i][j - 1] + edmat[j - 1][j]
                pdmat[j][i] = pdmat[i][j]
    '''Computing the M/M matrix'''
    mbym_mat = np.zeros((seqlen, seqlen))
    for i in range(0, seqlen):
        for j in range(i, seqlen):
            if (i == j):
                mbym_mat[i][j] = 0
            else:
                mbym_mat[i][j] = edmat[i][j] / abs(i - j)
                mbym_mat[j][i] = mbym_mat[i][j]
    '''Computing the L/L matrix'''
    lbyl_mat = np.zeros((seqlen, seqlen))
    for i in range(0, seqlen):
        for j in range(i, seqlen):
            if (i == j):
                lbyl_mat[i][j] = 0
            else:
                lbyl_mat[i][j] = edmat[i][j] / pdmat[i][j]
                lbyl_mat[j][i] = lbyl_mat[i][j]

    #print(pdmat)

    #print('time taken = ' + str(time_taken.microseconds))
    #print('time taken = ' + str(time_taken.seconds))
    '''print(np.linalg.eigvals(edmat).max())
	print(np.linalg.eigvals(pdmat).max())
	print(np.linalg.eigvals(mbym_mat).max())
	print(np.linalg.eigvals(lbyl_mat).max())'''
    ''' open('jili_ed.csv', 'w', newline='') as csvfile:
		writer = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
		writer.writerows(edmat)
	with open('jili_mm.csv', 'w', newline='') as csvfile:
		writer = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
		writer.writerows(mbym_mat)
	with open('jili_ll.csv', 'w', newline='') as csvfile:
		writer = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
		writer.writerows(lbyl_mat)
	with open('jili_pd.csv', 'w', newline='') as csvfile:
		writer = csv.writer(csvfile, delimiter=',',
                            quotechar='|', quoting=csv.QUOTE_MINIMAL)
		writer.writerows(pdmat)'''
    '''print('**************************************************')
	print(edmat)
	print('**************************************************')
	print('**************************************************')
	print(mbym_mat)
	print('**************************************************')
	print('**************************************************')
	print(lbyl_mat)
	print('**************************************************')'''

    print(np.linalg.eigvals(lbyl_mat).max())

    end_time = datetime.datetime.now()
    time_taken = end_time - start_time

    print('time taken = ' + str(time_taken.microseconds))
    print('time taken = ' + str(time_taken.seconds))