par.add_argument('--o', required = True, help = 'name of output file') par.add_argument('--uplimit', type = float, help = 'top n sequences to calculate') args = par.parse_args() database_path = '/home/anthill/fzheng/home/searchDB/support_bc-30-sc-correct-20141022/others' odir = os.getcwd() ldir = General.createLocalSpace() outfh = open(ldir + '/' + args.o, 'w') uplimit = args.uplimit nseq = 0 for match_line in open(args.m): if (uplimit != None) and (nseq == uplimit): break match_line = match_line.strip() indices = Analyze.index_from_match(match_line) index1, index2 = indices[args.n[0]], indices[args.n[1]] target_pds = match_line.split()[1] targetid = General.getBase( General.removePath(match_line.split()[1]) ) env_dict = database_path + '/' + targetid[1:3] + '/' + targetid + '.freedom.db' db = shelve.open(env_dict, 'r') # extract post-processed pdb files from target_pds resfile = database_path + '/' + targetid[1:3] + '/' + General.changeExt( General.removePath(target_pds), 'post.res') allres = open(resfile).read().splitlines() resid1, resid2 = allres[index1], allres[index2] resid1, resid2 = resid1[0] + ',' + resid1[1:], resid2[0] + ',' + resid2[1:] fields = ['sumcond', 'crwdnes', 'freedom', 'phi', 'psi', 'aa'] outfh.write(targetid + '\t') if not resid1 in db:
if args.conres != None: tempfile2 = General.changeExt(args.m, 'seqcontext.fasta2') tempfh2 = open(ldir + '/' + tempfile2, 'w') # output file names nr_matchf = args.outh + '_' + args.m nr_seqf = General.changeExt(nr_matchf, 'seq') nr_env = None oenv = General.changeExt(args.m, args.env) if os.path.isfile(oenv): nr_env = General.changeExt(nr_matchf, args.env) # write a custom .fasta file for match in matches: match_region_indices = Analyze.index_from_match(match) central_index = match_region_indices[args.cres - 1] match_id = General.getBase( General.removePath( match.split()[1] ) ) fullsequence = database[match_id] if central_index - args.wd < 1: seqcontext = fullsequence[0:(2 * args.wd + 1)] elif central_index + args.wd > len(fullsequence): seqcontext = fullsequence[-(2 * args.wd + 1):] else: seqcontext = fullsequence[(central_index - args.wd - 1):(central_index + args.wd)] tempfh.write('>match:'+str(matchind)+'\n'+seqcontext+'\n') if args.conres != None: con_index = match_region_indices[args.conres -1] if con_index - args.wd < 1: