Exemplo n.º 1
0
def mkdtst_test(fname, window, answer):
    # For test
    """
	>>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')]
	None
	"""
    if answer is not None:
        ans_sgr = util.ans(answer)
        ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1
        for idch, start, pssm in parse_pssm4pos(fname):
            idch = idch.strip()
            # 2012/1/31 pos -> pos + start
            # !!! now nodyfying !!!
            yield idch, {
                pos: (ans2int(idch, start + pos), mkvec(pssm, window, pos))
                for pos in range(len(pssm))
            }
    else:
        # For negative dataset
        for idch, pssm in parse_pssm(fname):
            idch = idch.strip()
            yield idch, {
                pos: (-1, mkvec(pssm, window, pos))
                for pos in range(len(pssm))
            }
Exemplo n.º 2
0
Arquivo: test.py Projeto: BILAB/Tools
def getpos_fasta(fname,answer):
	"""
	>>> fname = "../dataset/mono.d4.0.nxg.miss.fasta"
	>>> answer = "../dataset/answer_mono.d4.0.nxg.txt"
	>>> getpos_fasta(fname,answer).next()
	None
	"""
	# yielding answer position residue
	ans_sgr = util.ans(answer)
	ans2int = lambda idch,pos: 1 if ans_sgr.isans(idch,pos) else -1
	for idch,start,seq in seq2feature.fasta2seq(fname):
		idch = idch.strip()
		yield idch,{pos + start:res for pos,res in enumerate(seq)
			if ans_sgr.isans(idch,start + pos)}
Exemplo n.º 3
0
	def mkdtst_test(self,window,answer):
		# For test
		"""
		>>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')]
		None
		"""
	
		ans_sgr = util.ans(answer)
		ans2int = lambda idch,pos: 1 if ans_sgr.isans(idch,pos) else -1
	
		for idch,start,v_pssm in self.parse_pssm4pos():
			idch = idch.strip()
			idchs = [(idch,pos + start) for pos in range(len(v_pssm))]
			dataset = [self.mkvec(v_pssm,window,pos) for pos in range(len(v_pssm))]
			label = [ans2int(idch,start + pos) for pos in range(len(v_pssm))]
			yield idch,idchs,label,dataset
Exemplo n.º 4
0
def mkdtst_test(fname, window, ftr, answer):
    # For test
    """
	>>> import feature
	>>> [i for i in mkdtst_test('.test.fasta',10,lambda seq:feature.seq2frq(seq),answer='../dataset/answer_monod4.0.cluster1.txt')]
	None
	"""

    ans_sgr = util.ans(answer)
    ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1

    for idch, start, seq in fasta2seq(fname):
        idch = idch.strip()
        # 2012/1/31 pos -> pos + start
        # !!! now nodyfying !!!
        yield idch, {pos: (ans2int(idch, start + pos), mkvec(ftr, seq, window, pos)) for pos in range(len(seq))}
Exemplo n.º 5
0
def mkdtst_test4svr(fname, window, answer):
    # For test
    """
	>>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')]
	None
	"""
    ans_sgr = util.ans(answer)

    for idch, start, pssm in parse_pssm4pos(fname):
        idch = idch.strip()
        # 2012/1/31 pos -> pos + start
        # !!! now nodyfying !!!
        yield idch, {
            pos: (ans_sgr.get_dist(start + pos,
                                   idch), mkvec(pssm, window, pos))
            for pos in range(len(pssm))
        }
Exemplo n.º 6
0
	def mkdtst_train(self,window,answer):
		"""
		>>> import feature
		>>> [i for i in mkdtst_train('.test.pssm',10,answer='.answer.test')]
		None
		"""
		# only binding site For make positive dataet
		ans_sgr = util.ans(answer)
		# ans2int is function that return +1 if True else return -1
		ans2int = lambda idch,pos: 1 if ans_sgr.isans(idch,pos) else -1
	
		for idch,start,v_pssm in self.parse_pssm4pos():
			idch = idch.strip()
			idchs = [(idch,pos + start) for pos in range(len(v_pssm))]
			dataset = [self.mkvec(v_pssm,window,pos) for pos in range(len(v_pssm)) if ans_sgr.isans(idch,start + pos)]
			label = [ans2int(idch,start + pos) for pos in range(len(v_pssm)) if ans_sgr.isans(idch,start + pos)]

			yield idch,idchs,label,dataset
Exemplo n.º 7
0
	def mkdtst_neg(self,window,answer,size = 1):
		"""
		>>> import feature
		>>> [i for i in mkdtst_neg('.test.pssm',10,'.answer.test',1)]
		None
		"""

		ans_sgr = util.ans(answer)
		# To Dataset randomly choiced positions
		for idch,start,pssm in self.parse_pssm4pos():
			_indx = [i for i in range(1,len(pssm)) if not ans_sgr.isans(idch,i + start)]
			random.shuffle(_indx)
			selected_pos = _indx[:size*len(ans_sgr.get_pos(idch))]
			idch = idch.strip()
			idchs = [(idch,pos + start) for pos in selected_pos]
			dataset = [self.mkvec(pssm,window,pos) for pos in selected_pos]
			label = [-1 for pos in selected_pos]
			yield idch,idchs,label,dataset
Exemplo n.º 8
0
def mkdtst_near(fname, window, ftr, answer, low, up):
    # For test
    """
	>>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')]
	None
	"""
    ans_sgr = util.ans(answer)
    ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1

    for idch, start, seq in fasta2seq(fname):
        idch = idch.strip()
        # 2012/1/31 pos -> pos + start
        # !!! now nodyfying !!!
        yield idch, {
            pos: (ans2int(idch, start + pos), mkvec(ftr, seq, window, pos))
            for pos in range(len(seq))
            if low <= ans_sgr.get_dist(pos + start, idch) <= up or ans_sgr.isans(idch, start + pos)
        }
Exemplo n.º 9
0
def mkdtst_train(fname, window, answer):
    """
	>>> import feature
	>>> [i for i in mkdtst_train('.test.pssm',10,answer='.answer.test')]
	None
	"""
    # only binding site For make positive dataet
    ans_sgr = util.ans(answer)
    # ans2int is function that return +1 if True else return -1
    ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1

    for idch, start, pssm in parse_pssm4pos(fname):
        idch = idch.strip()
        # 2012/1/31 pos -> pos + start
        # !!! now nodyfying !!!
        yield idch, {
            pos: (ans2int(idch, start + pos), mkvec(pssm, window, pos))
            for pos in range(len(pssm)) if ans_sgr.isans(idch, start + pos)
        }
Exemplo n.º 10
0
def mkdtst_near(fname, window, answer, low, up):
    # For test
    """
	>>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')]
	None
	"""
    ans_sgr = util.ans(answer)
    ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1

    for idch, start, pssm in parse_pssm4pos(fname):
        idch = idch.strip()
        # 2012/1/31 pos -> pos + start
        # !!! now nodyfying !!!
        yield idch, {
            pos: (ans2int(idch, start + pos), mkvec(pssm, window, pos))
            for pos in range(len(pssm))
            if low <= ans_sgr.get_dist(pos + start, idch) <= up
            or ans_sgr.isans(idch, start + pos)
        }
Exemplo n.º 11
0
def mkdtst_train(fname, window, ftr, answer):
    """
	>>> import feature
	>>> [i for i in mkdtst_train('.test.fasta',10,lambda seq:feature.seq2frq(seq),answer='../dataset/answer_monod4.0.cluster1.txt')]
	None
	"""
    # only binding site For make positive dataet
    ans_sgr = util.ans(answer)
    # ans2int is function that return +1 if True else return -1
    ans2int = lambda idch, pos: 1 if ans_sgr.isans(idch, pos) else -1

    for idch, start, seq in fasta2seq(fname):
        idch = idch.strip()
        # 2012/1/31 pos -> pos + start
        # !!! now nodyfying !!!
        yield idch, {
            pos: (ans2int(idch, start + pos), mkvec(ftr, seq, window, pos))
            for pos in range(len(seq))
            if ans_sgr.isans(idch, start + pos)
        }
Exemplo n.º 12
0
	def make_dataset(self,window,answer):
		# For test
		"""
		>>> [i for i in mkdtst_test('.test.pssm',10,answer='./.answer.test')]
		None
		"""

		if answer is not None:
			answer = util.ans(answer)
			ans2int = lambda idch,pos: 1 if answer.isans(idch,pos) else -1
		else:
			# if answer is None. this is unknown sample.
			ans2int = lambda idch,pos: 0
	
		for idch,v_pssm in self.parse_pssm():
			idch = idch.strip()
			idchs = [(idch,pos) for pos in range(len(v_pssm))]
			dataset = [self.mkvec(v_pssm,window,pos) for pos in range(len(v_pssm))]
			label = [ans2int(idch,pos) for pos in range(len(v_pssm))]
			yield idch,idchs,label,dataset
Exemplo n.º 13
0
def iter_idch2result(idch,name,window,c,g,fans,fname):
	sgr_ans = util.ans(fans)
	start = starts(fname)[idch]
	for pos,val in enumerate(iter_desc(name,window,c,g,idch)):
		yield pos + start,sgr_ans.isans(idch,pos + start),val