def _mkneg(fname,window,ftr,pssm = False): with open(fname) as fp: neg = [i[0] for i in mkftr.getseq(fp)] # ? didn't idchoice() need ? for cnt,group in enumerate(mkftr.idchoice(neg)): fp.seek(0) for negid,seq in mkftr.getseq(fp): if pssm: # For PSSM seq = [int(i) for i in seq.split()] if len(seq) < 100: continue else: if len(seq) < 100 or seq.find('X') > 0: continue if negid in group: _indx = [i for i in range(1,len(seq))] random.shuffle(_indx) yield negid,{pos:(-1,mkvec(ftr,seq,window,pos,pssm = pssm)) for pos in sorted(_indx[:5])}
def _mkdtst(window,ftr,represent = 'dataset/sugar.txt', answer = 'dataset/answer_sugar.txt', name = 'sugar', flg = True,pssm = False): def ans2int(ans,idch,pos): if ans.isans(idch,pos): return 1 return -1 with open(represent) as fp: lid = [line.strip() for line in iter(fp.readline,'')] ans_sgr = mkftr.ans(answer) # ? Dose idchoice need ? for cnt,group in enumerate(mkftr.idchoice(lid)): for idch,start,seq in mkftr.slider(group,name,pssm = pssm): idch = idch.strip() # 2012/1/31 pos -> pos + start # !!! now nodyfying !!! yield idch,{pos:(ans2int(ans_sgr,idch,start + pos),mkvec(ftr,seq,window,pos,pssm = pssm)) for pos in range(len(seq)) if ans_sgr.isans(idch,start + pos) or not flg} # !!! now modfying !!! """