Пример #1
0
def scan_sequence(seq,
                  seq_gc_bin,
                  motifs,
                  nreport,
                  scan_rc,
                  motifs_meanstd=None,
                  zscore=False):

    ret = []
    # scan for motifs
    for motif, cutoff in motifs:
        if cutoff is None:
            ret.append([])
        else:
            if zscore:
                m_mean, m_std = motifs_meanstd[seq_gc_bin][motif.id]
                result = pwmscan(seq, motif.logodds, motif.pwm_min_score(),
                                 nreport, scan_rc)
                result = [[(row[0] - m_mean) / m_std, row[1], row[2]]
                          for row in result]
                result = [row for row in result if row[0] >= cutoff]
            else:
                result = pwmscan(seq, motif.logodds, cutoff, nreport, scan_rc)
            if cutoff <= motif.pwm_min_score() and len(result) == 0:
                result = [[motif.pwm_min_score(), 0, 1]] * nreport

            ret.append(result)

    return ret
Пример #2
0
    def pwm_scan_to_gff(self,
                        fa,
                        gfffile,
                        cutoff=0.9,
                        nreport=50,
                        scan_rc=True,
                        append=False):
        if append:
            out = open(gfffile, "a")
        else:
            out = open(gfffile, "w")

        c = self.pwm_min_score() + (self.pwm_max_score() -
                                    self.pwm_min_score()) * cutoff
        pwm = self.pwm

        strandmap = {-1: "-", "-1": "-", "-": "-", "1": "+", 1: "+", "+": "+"}
        gff_line = ("{}\tpwmscan\tmisc_feature\t{}\t{}\t{}\t{}\t.\t"
                    "motif_name \"{}\" ; motif_instance \"{}\"\n")
        for name, seq in fa.items():
            result = pwmscan(seq.upper(), pwm, c, nreport, scan_rc)
            for score, pos, strand in result:
                out.write(
                    gff_line.format(name, pos, pos + len(pwm), score,
                                    strandmap[strand], self.id,
                                    seq[pos:pos + len(pwm)]))
        out.close()
Пример #3
0
    def pwm_scan_to_gff(self, fa, gfffile, cutoff=0.9, nreport=50, scan_rc=True, append=False):
        if append:
            out = open(gfffile, "a")
        else:    
            out = open(gfffile, "w")

        c = self.pwm_min_score() + (self.pwm_max_score() - self.pwm_min_score()) * cutoff        
        pwm = self.pwm

        strandmap = {-1:"-","-1":"-","-":"-","1":"+",1:"+","+":"+"}
        gff_line = ("{}\tpwmscan\tmisc_feature\t{}\t{}\t{}\t{}\t.\t"
                    "motif_name \"{}\" ; motif_instance \"{}\"\n")
        for name, seq in fa.items():
            result = pwmscan(seq.upper(), pwm, c, nreport, scan_rc)
            for score, pos, strand in result:
                out.write(gff_line.format( 
                    name, 
                    pos, 
                    pos + len(pwm), 
                    score, 
                    strandmap[strand], 
                    self.id, 
                    seq[pos:pos + len(pwm)]
                    ))
        out.close()
Пример #4
0
 def pwm_scan(self, fa, cutoff=0.9, nreport=50, scan_rc=True):
     c = self.pwm_min_score() + (self.pwm_max_score() - self.pwm_min_score()) * cutoff        
     pwm = self.pwm
     matches = {}
     for id, seq in fa.items():
         matches[id] = [] 
         result = pwmscan(seq.upper(), pwm, c, nreport, scan_rc)
         for _,pos,strand in result:
             matches[id].append(pos)
     return matches
Пример #5
0
 def pwm_scan_score(self, fa, cutoff=0, nreport=1, scan_rc=True):
     c = self.pwm_min_score() + (self.pwm_max_score() - self.pwm_min_score()) * cutoff        
     pwm = self.pwm
     matches = {}
     for name, seq in fa.items():
         matches[name] = [] 
         result = pwmscan(seq.upper(), pwm, c, nreport, scan_rc)
         for score,_,_ in result:
             matches[name].append(score)
     return matches
Пример #6
0
def scan_sequence(seq, motifs, nreport, scan_rc):
    
    ret = []
    # scan for motifs
    for motif, cutoff in motifs:
        result = pwmscan(seq, motif.pwm, cutoff, nreport, scan_rc)
        ret.append(result)

    # return results
    return ret
Пример #7
0
def scan_sequence(seq, motifs, nreport, scan_rc):

    ret = []
    # scan for motifs
    for motif, cutoff in motifs:
        result = pwmscan(seq, motif.pwm, cutoff, nreport, scan_rc)
        ret.append(result)

    # return results
    return ret
Пример #8
0
 def pwm_scan_all(self, fa, cutoff=0.9, nreport=50, scan_rc=True):
     c = self.pwm_min_score() + (self.pwm_max_score() - self.pwm_min_score()) * cutoff        
     pwm = self.pwm
     matches = {}
     for name, seq in fa.items():
         matches[name] = [] 
         result = pwmscan(seq.upper(), pwm, c, nreport, scan_rc)
         for score,pos,strand in result:
             matches[name].append((pos,score,strand))
     return matches
Пример #9
0
 def pwm_scan_score(self, fa, cutoff=0, nreport=1, scan_rc=True):
     c = self.pwm_min_score() + (self.pwm_max_score() - self.pwm_min_score()) * cutoff        
     pwm = self.pwm
     strandmap = {"+":"+",1:"+","1":"+","-":"-",-1:"-","-1":"-"}
     matches = {}
     for name, seq in fa.items():
         matches[name] = [] 
         result = pwmscan(seq.upper(), pwm, c, nreport, scan_rc)
         for score,pos,strand in result:
             matches[name].append(score)
     return matches
Пример #10
0
	def pwm_scan_score(self, fa, cutoff=0, nreport=1, scan_rc=True):
		from gimmemotifs.c_metrics import pwmscan
		c = self.pwm_min_score() + (self.pwm_max_score() - self.pwm_min_score()) * cutoff		
		pwm = self.pwm
		strandmap = {"+":"+",1:"+","1":"+","-":"-",-1:"-","-1":"-"}
		matches = {}
		for id, seq in fa.items():
			matches[id] = [] 
			result = pwmscan(seq.upper(), pwm, c, nreport, scan_rc)
			for score,pos,strand in result:
				matches[id].append(score)
		return matches
Пример #11
0
 def pwm_scan_score(self, fa, cutoff=0, nreport=1, scan_rc=True):
     c = self.pwm_min_score() + (self.pwm_max_score() -
                                 self.pwm_min_score()) * cutoff
     pwm = self.pwm
     strandmap = {"+": "+", 1: "+", "1": "+", "-": "-", -1: "-", "-1": "-"}
     matches = {}
     for name, seq in fa.items():
         matches[name] = []
         result = pwmscan(seq.upper(), pwm, c, nreport, scan_rc)
         for score, pos, strand in result:
             matches[name].append(score)
     return matches
Пример #12
0
	def pwm_scan(self, fa, cutoff=0.9, scan_strand=None, nreport=50):
		from gimmemotifs.c_metrics import pwmscan
		c = self.pwm_min_score() + (self.pwm_max_score() - self.pwm_min_score()) * cutoff		
		pwm = self.pwm
		strandmap = {"+":"+",1:"+","1":"+","-":"-",-1:"-","-1":"-"}
		matches = {}
		for id, seq in fa.items():
			matches[id] = [] 
			result = pwmscan(seq.upper(), pwm, c, nreport)
			for score,pos,strand in result:
				if not scan_strand or (scan_strand and strandmap[scan_strand] == strandmap[strand]):
					matches[id].append(pos)
		return matches
Пример #13
0
    def pwm_scan_all(self, fa, cutoff=0.9, nreport=50, scan_rc=True):
        from gimmemotifs.c_metrics import pwmscan

        c = self.pwm_min_score() + (self.pwm_max_score() - self.pwm_min_score()) * cutoff
        pwm = self.pwm
        strandmap = {"+": "+", 1: "+", "1": "+", "-": "-", -1: "-", "-1": "-"}
        matches = {}
        for id, seq in fa.items():
            matches[id] = []
            # sys.stderr.write("{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(seq.upper(), pwm, c, nreport, scan_rc))
            result = pwmscan(seq.upper(), pwm, c, nreport, scan_rc)
            for score, pos, strand in result:
                matches[id].append((pos, score, strand))
        return matches
Пример #14
0
def scan_sequence(seq, motifs, nreport, scan_rc):

    ret = []
    # scan for motifs
    for motif, cutoff in motifs:
        if cutoff is None:
            ret.append([])
        else:
            result = pwmscan(seq, motif.logodds, cutoff, nreport, scan_rc)
            if cutoff <= motif.pwm_min_score() and len(result) == 0:
                result = [[motif.pwm_min_score(), 0, 1]] * nreport
            ret.append(result)

    # return results
    return ret
Пример #15
0
def seqcor(m1, m2, seq=None):
    l1 = len(m1)
    l2 = len(m2)

    l = max(l1, l2)

    if seq is None:
        seq = RANDOM_SEQ

    # Scan random sequence
    result1 = pwmscan(seq, m1.pwm, m1.pwm_min_score(), len(seq), False, True)
    result2 = pwmscan(seq, m2.pwm, m2.pwm_min_score(), len(seq), False, True)
    result1 = np.array(result1)
    result2 = np.array(result2)

    # Return maximum correlation
    c = []
    for i in range(l1):
        c.append(1 -
                 distance.correlation(result1[:L - l - i], result2[i:L - l]))
    for i in range(l2):
        c.append(1 -
                 distance.correlation(result1[i:L - l], result2[:L - l - i]))
    return max(c)
Пример #16
0
def seqcor(m1,m2):
    l1 = len(m1)
    l2 = len(m2)

    l = max(l1, l2)

    # Create random sequence
    nucs = []
    L = 10 ** 4
    for i in range(L):
        nucs.append(random.choice(['A', 'C', 'T', 'G']))
    random_seq = "".join(nucs)

    # Scan random sequence
    result1 = pwmscan(random_seq.upper(), m1.pwm, m1.pwm_min_score(), len(random_seq), False, True)
    result2 = pwmscan(random_seq.upper(), m2.pwm, m2.pwm_min_score(), len(random_seq), False, True)

    # Return maximum correlation
    c = []
    for i in range(l1):
        c.append(1 - distance.correlation(result1[:L-l-i],result2[i:L-l]))
    for i in range(l2):
        c.append(1 - distance.correlation(result1[i:L-l],result2[:L-l-i]))
    return max(c)
Пример #17
0
def seqcor(m1,m2):
    l1 = len(m1)
    l2 = len(m2)

    l = max(l1, l2)

    # Create random sequence
    nucs = []
    L = 10 ** 4
    for i in range(L):
        nucs.append(random.choice(['A', 'C', 'T', 'G']))
    random_seq = "".join(nucs)

    # Scan random sequence
    result1 = pwmscan(random_seq.upper(), m1.pwm, m1.pwm_min_score(), len(random_seq), False, True)
    result2 = pwmscan(random_seq.upper(), m2.pwm, m2.pwm_min_score(), len(random_seq), False, True)

    # Return maximum correlation
    c = []
    for i in range(l1):
        c.append(1 - distance.correlation(result1[:L-l-i],result2[i:L-l]))
    for i in range(l2):
        c.append(1 - distance.correlation(result1[i:L-l],result2[:L-l-i]))
    return max(c)
Пример #18
0
	def pwm_scan_to_gff(self, fa, gfffile, cutoff=0.9, rc=True, nreport=50, append=False):
		from gimmemotifs.c_metrics import pwmscan
		if append:
			out = open(gfffile, "a")
		else:	
			out = open(gfffile, "w")

		c = self.pwm_min_score() + (self.pwm_max_score() - self.pwm_min_score()) * cutoff		
		pwm = self.pwm

		strandmap = {-1:"-","-1":"-","-":"-","1":"+",1:"+","+":"+"}
		for id, seq in fa.items():
			result = pwmscan(seq.upper(), pwm, c, nreport)
			for score, pos, strand in result:
				out.write("%s\tpwmscan\tmisc_feature\t%s\t%s\t%s\t%s\t.\tmotif_name \"%s\" ; motif_instance \"%s\"\n" % 
					(id, pos, pos + len(pwm), score, strandmap[strand], self.id, seq[pos:pos + len(pwm)]))
		out.close()
Пример #19
0
	def pwm_scan_to_gff(self, fa, gfffile, cutoff=0.9, nreport=50, scan_rc=True, append=False):
		#print "received", gfffile, cutoff, nreport, scan_rc, append
		from gimmemotifs.c_metrics import pwmscan
		if append:
			out = open(gfffile, "a")
		else:	
			out = open(gfffile, "w")

		c = self.pwm_min_score() + (self.pwm_max_score() - self.pwm_min_score()) * cutoff		
		pwm = self.pwm

		strandmap = {-1:"-","-1":"-","-":"-","1":"+",1:"+","+":"+"}
		for id, seq in fa.items():
			result = pwmscan(seq.upper(), pwm, c, nreport, scan_rc)
			for score, pos, strand in result:
				out.write("%s\tpwmscan\tmisc_feature\t%s\t%s\t%s\t%s\t.\tmotif_name \"%s\" ; motif_instance \"%s\"\n" % 
					(id, pos, pos + len(pwm), score, strandmap[strand], self.id, seq[pos:pos + len(pwm)]))
		out.close()
parser = OptionParser()
parser.add_option("-p", "--pwmfile", dest="pwmfile", help="File with pwms", metavar="FILE")
parser.add_option("-i", "--inputfile", dest="inputfile", help="FASTA file with background sequences", metavar="FILE") 
parser.add_option("-f", "--fpr", dest="fpr", help="Desired fpr", type="float", metavar="FLOAT") 

(options, args) = parser.parse_args()

if not options.pwmfile or not options.inputfile or not options.fpr:
	parser.print_help()
	exit()

if options.fpr < 0 or options.fpr > 1:
	print "Please specify a FPR between 0 and 1"
	sys.exit()

f = Fasta(options.inputfile)
motifs = pwmfile_to_motifs(options.pwmfile)

print "Motif\tScore\tCutoff"
for motif in motifs:
	pwm = motif.pwm
	scores = []
	min_score = motif.pwm_min_score()
	for name,seq in f.items():
		result = pwmscan(seq.upper(), pwm, min_score, 1, True)
		score = result[0][0]
		scores.append(score)
	opt_score = scoreatpercentile(scores, 100 - (100 * options.fpr))
	cutoff = (opt_score - min_score) / (motif.pwm_max_score() - min_score)
	print "%s\t%s\t%s" % (motif.id, opt_score , cutoff)
Пример #21
0
if options.nreport:
	nreport = int(options.nreport)

cutoff = float(options.cutoff)

motifs = pwmfile_to_motifs(options.pwmfile)

bed = options.bed

f = Fasta(inputfile)
strandmap = {-1:"-",1:"+"}
for (id,seq) in f.items():
	for motif in motifs:
		pwm = motif.pwm
		c =  motif.pwm_min_score() + (motif.pwm_max_score() - motif.pwm_min_score()) * cutoff 
		result = pwmscan(seq.upper(), pwm, c, nreport, options.scan_rc)
		for (score, pos, strand) in result:
			if bed:
				first = id.split(" ")[0]	
				(chr,loc) = first.split(":")
				if loc:
					(start, end) = map(int, loc.split("-"))
					print "%s\t%s\t%s\t%s" % (chr, start + pos, start + pos + len(pwm) , score)
				else:
					print "%s\t%s\t%s\t%s" % (id, pos, pos +  len(pwm), score)
			else:
				print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\tmotif_name \"%s\" ; motif_instance \"%s\"" % (
				id, 
				"pwmscan", 
				"misc_feature", 
				pos, pos + len(pwm) , 
Пример #22
0
if options.nreport:
	nreport = int(options.nreport)

cutoff = float(options.cutoff)

motifs = pwmfile_to_motifs(options.pwmfile)

bed = options.bed

f = Fasta(inputfile)
strandmap = {-1:"-",1:"+"}
for (id,seq) in f.items():
	for motif in motifs:
		pwm = motif.pwm
		c =  motif.pwm_min_score() + (motif.pwm_max_score() - motif.pwm_min_score()) * cutoff 
		result = pwmscan(seq.upper(), pwm, c, nreport)
		for (score, pos, strand) in result:
			if bed:
				first = id.split(" ")[0]	
				(chr,loc) = first.split(":")
				if loc:
					(start, end) = map(int, loc.split("-"))
					print "%s\t%s\t%s\t%s" % (chr, start + pos, start + pos + len(pwm) , score)
				else:
					print "%s\t%s\t%s\t%s" % (id, pos, pos +  len(pwm), score)
			else:
				print "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\tmotif_name \"%s\" ; motif_instance \"%s\"" % (
				id, 
				"pwmscan", 
				"misc_feature", 
				pos, pos + len(pwm) , 
Пример #23
0
                  dest="fpr",
                  help="Desired fpr",
                  type="float",
                  metavar="FLOAT")

(options, args) = parser.parse_args()

if not options.pwmfile or not options.inputfile or not options.fpr:
    parser.print_help()
    exit()

if options.fpr < 0 or options.fpr > 1:
    print "Please specify a FPR between 0 and 1"
    sys.exit()

f = Fasta(options.inputfile)
motifs = pwmfile_to_motifs(options.pwmfile)

print "Motif\tScore\tCutoff"
for motif in motifs:
    pwm = motif.pwm
    scores = []
    min_score = motif.pwm_min_score()
    for name, seq in f.items():
        result = pwmscan(seq.upper(), pwm, min_score, 1, True)
        score = result[0][0]
        scores.append(score)
    opt_score = scoreatpercentile(scores, 100 - (100 * options.fpr))
    cutoff = (opt_score - min_score) / (motif.pwm_max_score() - min_score)
    print "%s\t%s\t%s" % (motif.id, opt_score, cutoff)