def get_positions_to_skip_from_alignment( alignment, fraglen ): ''' eg. if fraglen equals to 3 sequence FDASFDSA-----FDSAFDSAFDSA pos_to_acquire ^^^^^^ ^^^^^^^^^^ ''' chainbreak_resnums = [] seq_map_dict = alignment_util.seq_mapping( alignment ) res_nums = sorted( seq_map_dict.keys() ) for idx, rsn in enumerate( res_nums ): try: next_rsn = res_nums[idx+1] #print idx except IndexError: # meaning it has reached the end of the container, get termini! chainbreak_resnums.append( rsn ) ref_rsn = seq_map_dict[rsn] ref_next_rsn = seq_map_dict[next_rsn] if ref_next_rsn - ref_rsn > 1: chainbreak_resnums.append( rsn ) rsd_to_skip = [] for rsd in chainbreak_resnums: rsd_to_skip += range(rsd-fraglen+2, rsd+1 ) rsd_to_skip = list( set(rsd_to_skip) ) return rsd_to_skip
def align_and_renumber_pdb(fulllength_fasta, truncated_pdbfile, ignore_check=False): """ This function is going to make the renumber_pdb() obsolete """ # make alignment fl_seq = seq_util.fasta_file_reader(fulllength_fasta) tc_seq = seq_util.pdb2fasta(truncated_pdbfile) alignment = alignment_util.align_two_seqs(fl_seq, tc_seq) if ignore_check: pdb_idx1( truncated_pdbfile, "temp.pdb" ) # for the following step, this has been used in alignment_util.correct_alignment_using_pdb else: alignment = alignment_util.correct_alignment_using_pdb(alignment, truncated_pdbfile, False) seq_map = alignment_util.seq_mapping(alignment) xyz_dict, pdbline_dict, resname_dict = create_xyzDict_bychain("temp.pdb") assert len(pdbline_dict.keys()) == 1, ( "this script does not deal with pdbs containing multiple chains (%s)" % pdbline_dict.keys() ) chain = pdbline_dict.keys()[0] xyz_dict = xyz_dict[chain] pdbline_dict = pdbline_dict[chain] resname_dict = resname_dict[chain] res_nums = sorted(pdbline_dict.keys()) out_pdblines = "REMARK full_length_aln %s\n" % alignment[0] out_pdblines += "REMARK truncated_aln %s\n" % alignment[1] for idx, rsn in enumerate(res_nums): newrsn = seq_map[rsn] for line in pdbline_dict[rsn].split("\n")[:-1]: # [:-1], because the last item in the list is '' out_pdblines += line[0:22] + "%4s" % newrsn + line[26:] + "\n" out_pdblines += "TER\n" os.remove("temp.pdb") return out_pdblines
fl_seq = seq_util.fasta_file_reader(opts.fragfile_fasta) if opts.truncated_pdb: tc_seq = seq_util.pdb2fasta(opts.truncated_pdb) alignment = alignment_util.correct_alignment_using_pdb( alignment_util.align_two_seqs(fl_seq, tc_seq), opts.truncated_pdb ) elif opts.truncated_fasta: tc_seq = seq_util.fasta_file_reader(opts.truncated_fasta) alignment = alignment_util.align_two_seqs(fl_seq, tc_seq) else: sys.stderr.write("ERROR: you need to either give --truncated_pdb or --truncated_fasta\n") exit() chainbreak_resnums = frag_util.get_positions_to_skip_from_alignment(alignment, frag_len) seq_map = alignment_util.seq_mapping(alignment) residues = sorted(seq_map.keys()) # residues before chain break shouldn't take # get residue positions to keep out_fraglines = "" if opts.debug: out_fraglines += "# %s\n" % (alignment[0]) out_fraglines += "# %s\n" % (alignment[1]) for pos in residues: # skip positions at chainbreaks and termini if pos in chainbreak_resnums: sys.stderr.write("Skipping: %s\n" % pos) continue