Exemplo n.º 1
0
def raw_squiggle_search3(squiggle,hashthang):
    result=[]
    for ref in hashthang:
        try:
            queryarray=scale(squiggle)
            dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Fprimewin'])
            result.append((dist,ref,"F",path[1][0],path[1][-1],path[0][0],path[0][-1],cost.mean()))
            dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Rprimewin'])
            result.append((dist,ref,"R",(len(hashthang[ref]['Rprimewin'])-path[1][-1]),(len(hashthang[ref]['Rprimewin'])-path[1][0]),path[0][0],path[0][-1],cost.mean()))
        except Exception,err:
            print "Warp Fail"
Exemplo n.º 2
0
def squiggle_search2(squiggle,kmerhash,seqlen):
	result=[]

	for ref in kmerhash:
		#print "ss2",ref
		queryarray = sklearn.preprocessing.scale(np.array(squiggle),axis=0,with_mean=True,with_std=True,copy=True)

		dist, cost, path = mlpy.dtw_subsequence(queryarray,kmerhash[ref]['Fprime'])
		result.append((dist,ref,"F",path[1][0],ref,path[1][-1]))
		dist, cost, path = mlpy.dtw_subsequence(queryarray,kmerhash[ref]['Rprime'])
		result.append((dist,ref,"R",path[1][0],ref,path[1][-1]))


	return sorted(result,key=lambda result: result[0])[0][1],sorted(result,key=lambda result: result[0])[0][0],sorted(result,key=lambda result: result[0])[0][2],sorted(result,key=lambda result: result[0])[0][3],sorted(result,key=lambda result: result[0])[0][4],sorted(result,key=lambda result: result[0])[0][5]
Exemplo n.º 3
0
def raw_squiggle_search2(squiggle,hashthang):
    result=[]
    #print args.speedmode
    for ref in hashthang:
        try:
            queryarray = scale(squiggle)
            mx = np.max(queryarray)
            scalingFactor = 1 # iqr # 3 # 1.2 # MS
            queryarray *= scalingFactor
            dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Fprime'])
            result.append((dist,ref,"F",path[1][0],path[1][-1],path[0][0],path[0][-1]))
            dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Rprime'])
            result.append((dist,ref,"R",(len(hashthang[ref]['Rprime'])-path[1][-1]),(len(hashthang[ref]['Rprime'])-path[1][0]),path[0][0],path[0][-1]))
        except Exception,err:
            print "Warp Fail"
			result.append((distance,id,"F",location,ref))
			subjectfile = id+"_"+str(ref)+"_"+"R"+"_subject.txt"
			subjectfile = re.sub('\|','_',subjectfile)
			#seqlen2 = str(seqlen[id])
			commands = subjectfile+' '+queryfile+' 200'+' 0.1'
			#print commands
			#current = str(multiprocessing.current_process())
			#currentnum=int(re.search(r'\d+', current).group())
			gpucode=str()
			#if (currentnum % 2 == 0):
				#print "Even"
			gpucode='./UCR_DTW '
			#else:
			#	#print "Odd"
			#	gpucode='./GPU-DTW '
			runcommand = gpucode+commands
Exemplo n.º 5
0
def raw_squiggle_search2(squiggle,hashthang):
    result=[]

    for ref in hashthang:
        try:
            memory_usage_psutil()
            queryarray = sklearn.preprocessing.scale(np.array(squiggle),axis=0,with_mean=True,with_std=True,copy=True)
            dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Fprime'])
            memory_usage_psutil()
            result.append((dist,ref,"F",path[1][0],path[1][-1],path[0][0],path[0][-1],cost,path))
            dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Rprime'])
            result.append((dist,ref,"R",(len(hashthang[ref]['Rprime'])-path[1][-1]),(len(hashthang[ref]['Rprime'])-path[1][0]),path[0][0],path[0][-1],cost,path))
            memory_usage_psutil()
        except Exception,err:
            print "Warp Fail"

	return sorted(result,key=lambda result: result[0])[0][1],sorted(result,key=lambda result: result[0])[0][0],sorted(result,key=lambda result: result[0])[0][2],sorted(result,key=lambda result: result[0])[0][3],sorted(result,key=lambda result: result[0])[0][4],sorted(result,key=lambda result: result[0])[0][5],sorted(result,key=lambda result: result[0])[0][6],sorted(result,key=lambda result: result[0])[0][7],sorted(result,key=lambda result: result[0])[0][8]
Exemplo n.º 6
0
def raw_squiggle_search2(squiggle,hashthang):
    result=[]

    for ref in hashthang:
        try:
            if (args.verbose is True):
                memory_usage_psutil()
            queryarray = sklearn.preprocessing.scale(np.array(squiggle),axis=0,with_mean=True,with_std=True,copy=True)
            dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Fprime'])
            if (args.verbose is True):
                memory_usage_psutil()
            result.append((dist,ref,"F",path[1][0],path[1][-1],path[0][0],path[0][-1],cost,path))
            dist, cost, path = mlpy.dtw_subsequence(queryarray,hashthang[ref]['Rprime'])
            result.append((dist,ref,"R",(len(hashthang[ref]['Rprime'])-path[1][-1]),(len(hashthang[ref]['Rprime'])-path[1][0]),path[0][0],path[0][-1],cost,path))
            if (args.verbose is True):
                memory_usage_psutil()
        except Exception,err:
            print "Warp Fail"
Exemplo n.º 7
0
    def getDistance(self, songPath, songName, dbPath):
        temp = []
        manager = midimanager.MidiManager()
        record = manager.segmentNote(songPath, songName + '_result.csv')

        # print 'Searching ' + songName + ' in database...'
        for fileName in os.listdir(dbPath):
            if fileName.endswith('.csv'):
                reference = manager.readMidi(dbPath, fileName)
                dist, cost, path = mlpy.dtw_subsequence(record, reference)
                temp.append([int(dist), fileName[:-10]])
        return temp
Exemplo n.º 8
0
def squiggle_search2_old(squiggle, kmerhash, seqlen):
    result = []

    for ref in kmerhash:
        # print "ss2",ref
        queryarray = sklearn.preprocessing.scale(np.array(squiggle), axis=0, with_mean=True, with_std=True, copy=True)

        dist, cost, path = mlpy.dtw_subsequence(queryarray, kmerhash[ref]["Fprime"])
        result.append((dist, ref, "F", path[1][0], ref, path[1][-1]))
        dist, cost, path = mlpy.dtw_subsequence(queryarray, kmerhash[ref]["Rprime"])
        result.append((dist, ref, "R", path[1][0], ref, path[1][-1]))

    # ('J02459', 41.017514495176989, 'F', 10003, 'J02459', 10198)
    # distanceR,seqmatchnameR,frR,rsR,reR,qsR,qeR=sorted(result,key=lambda result: result[0])[0]
    # return seqmatchnameR,distanceR,frR,rsR,reR,qsR,qeR
    return (
        sorted(result, key=lambda result: result[0])[0][1],
        sorted(result, key=lambda result: result[0])[0][0],
        sorted(result, key=lambda result: result[0])[0][2],
        sorted(result, key=lambda result: result[0])[0][3],
        sorted(result, key=lambda result: result[0])[0][4],
        sorted(result, key=lambda result: result[0])[0][5],
    )
Exemplo n.º 9
0
def squiggle_search2(squiggle,channel_id,read_id,args,seqids,threedarray,seqlen):
    '''
    This function matches an incoming squiggle to a reference. Coordinates are returned with respect to the forward
    strand only. This is important to recall for subsequent downstream processing. Thus a read which is reported as
    mapping to the reverse strand will report its coodinates on the forward strand.
    '''
    result=[]
    blocksize=200000
    overlap=blocksize-500
    for ref in seqids:
        refid=seqids.index(ref)
        Rprime,Fprime=threedarray[refid]
        #queryarray = sklearn.preprocessing.scale(np.array(squiggle),axis=0,with_mean=True,with_std=True,copy=True)
        queryarray = sklearn.preprocessing.scale(np.array(squiggle,dtype=float),axis=0,with_mean=True,with_std=True,copy=True)
        refsubset = Fprime
        indexes = np.array(xrange(len(refsubset)))
        subrefs = [refsubset[i:i+blocksize]for i in indexes[::overlap]]
        for blockid,ref_ in enumerate(subrefs):
            #current = multiprocessing.current_process()
            tic = time.time()
            dist, cost, path = mlpy.dtw_subsequence(queryarray,ref_)
            #result.append((dist,ref,"F",path[1][0],path[1][-1],path[0][0],path[0][-1]))
            result.append((dist,ref,"F",path[1][0]+(blockid*overlap),path[1][-1]+(blockid*overlap),path[0][0],path[0][-1]))
            #print "Blockid", blockid, time.time()-tic
        refsubset = Rprime
        subrefs = [refsubset[i:i+blocksize]for i in indexes[::overlap]]
        for blockid,ref_ in enumerate(subrefs):
            #print "Blockid", blockid, time.time()
            dist, cost, path = mlpy.dtw_subsequence(queryarray,ref_)
            #result.append((dist,ref,"R",path[1][0]+(blockid*overlap),ref))
            #result.append((dist,ref,"R",path[1][0]+(blockid*overlap),path[1][-1]+(blockid*overlap),path[0][0],path[0][-1]))
            #result.append((dist,ref,"R",(len(Rprime)-(path[1][-1]+(blockid*overlap))),(len(Rprime)-(path[1][0]+(blockid*overlap))),path[0][0],path[0][-1]))
            #Corrected for the fact that this is a reverse complement
            result.append((dist,ref,"R",(len(Rprime)-(path[1][0]+(blockid*overlap))),(len(Rprime)-(path[1][-1]+(blockid*overlap))),path[0][0],path[0][-1]))
    # Note first two elements flipped for return deliberately.
    distanceR,seqmatchnameR,frR,rsR,reR,qsR,qeR=sorted(result,key=lambda result: result[0])[0]
    return seqmatchnameR,distanceR,frR,rsR,reR,qsR,qeR
			testarray = kmerhash2[id][ref]['Rprime']
			filename = id+"_"+str(ref)+"_R_subject.bin"
			filename = re.sub('\|','_',filename)			
			with open(filename, "wb") as f:
				f.write(ar.array("f", testarray))
			filename = id+"_"+str(ref)+"_R_subject.txt"
			filename = re.sub('\|','_',filename)
			np.savetxt(filename, testarray, delimiter=',')


#ecit




db = MySQLdb.connect(host=dbhost, user=dbusername, passwd=dbpass, port=dbport)
cursor = db.cursor() 
#sql = "use minion_PLSP57501_2014_10_10_DSmin1_run2_LambdaSK002_5041"
#sql = "use minion_PLSP57501_20140909_JA_defA_4434"
sql = "use minion_LomanLabz_013731_11rx_v2_3135"
print sql
cursor.execute(sql)

numbers = range(0,10)

for number in numbers:
	#print number

	sql = "SELECT basename_id ,pos,flag, channel, read_id,tracking_id.basename,file_path FROM caller_basecalled_template_%s inner join align_sam_basecalled_2d using (basename_id) inner join config_general using (basename_id) inner join tracking_id using (basename_id) group by basename_id limit 1" %(number)
	#sql = "SELECT basename_id , channel, read_id FROM caller_basecalled_template_%s inner join config_general using (basename_id) where basename_id not in (select basename_id from align_sam_basecalled_template) group by basename_id" %(number)
	print sql
Exemplo n.º 11
0
import sys, os, re
		if(retcode is not None):
			break
	
#######################################################################
def squiggle_search2(squiggle,kmerhash2,channel_id,read_id,seqlen):
Exemplo n.º 13
0
#!/usr/bin/env python
Exemplo n.º 14
0
#!C:\anaconda python
Exemplo n.º 15
0
 with open("bench_log.txt", "a") as text_file:
     text_file.write("\n" + str(amp + 1) + "," + str(window + 1) +
                     ",mlpy," + str(mlpystddist) + "," +
                     str(timet.microseconds) + ',' +
                     str(mlpystdpath[1][0] + count) + ',' +
                     str(mlpystdpath[1][-1] + count))
 path1 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" +
                    str(window + 1) + '_query_mlpy.txt',
                    mlpystdpath[0],
                    delimiter=',')
 path2 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" +
                    str(window + 1) + '_ref_mlpy.txt',
                    mlpystdpath[1],
                    delimiter=',')
 timeb = datetime.now()
 mlpysubdist, mlpysubcost, mlpysubpath = mlpy.dtw_subsequence(x, y)
 timet = datetime.now() - timeb
 print("mlpy sub complete on amp " + str(amp + 1))
 with open("bench_log.txt", "a") as text_file:
     text_file.write("\n" + str(amp + 1) + "," + str(window + 1) +
                     ",mlpy_sub," + str(mlpysubdist) + "," +
                     str(timet.microseconds) + ',' +
                     str(mlpysubpath[1][0] + count) + ',' +
                     str(mlpysubpath[1][-1] + count))
 path1 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" +
                    str(window + 1) + '_query_mlpysub.txt',
                    mlpysubpath[0],
                    delimiter=',')
 path2 = np.savetxt('paths/' + "amp_" + str(amp + 1) + "_window_" +
                    str(window + 1) + '_ref_mlpysub.txt',
                    mlpysubpath[1],
Exemplo n.º 16
0
def dtwSubsequence(x, y):
    return mlpy.dtw_subsequence(x, y)
Exemplo n.º 17
0
def dtwCalculate(x, y):
    dist, cost, path = mlpy.dtw_subsequence(x, y)
    path = path[1]
    return dist, path[len(path) - 1]
Exemplo n.º 18
0
#!/usr/bin/env python
Exemplo n.º 19
0
import sys, os, re
Exemplo n.º 20
0
	]

refLen = len(ref_Ft)
#frm_ = refLen - frm
#to_ = refLen - to

#------------------------------------------------------------------------------
# DTW ...

import mlpy

def dtw(qry, (ref_F, ref_R)):

	if len(qry) is 0 : return '-1', -1

	dist_F, _, path_F = mlpy.dtw_subsequence(qry, ref_F)
	dist_R, _, path_R = mlpy.dtw_subsequence(qry, ref_R)

	if dist_F < dist_R: 	return 'F', path_F[1][0] - offset
	else: 			return 'R', refLen - path_R[1][0] - ampSz  + offset

#------------------------------------------------------------------------------
# Generate Synthectic Read...

def generateRead():
	sz =  ampSz
        trg = np.random.randint(0, refLen-sz, 1)[0]

	# Using t_model ...
	i,j = trg, trg+sz # NB ascending numerically
	#print trg, i, j
Exemplo n.º 21
0
 def map(self, ref_event_levels, query_events):
     self.ref_event_levels = ref_event_levels
     self.query_events = query_events
     result = mlpy.dtw_subsequence(self.scaled_query_event_currents, self.scaled_reference_event_currents)
     return MapResults(result)