def main():
    usage = "usage: %prog [options] arg"
    d = "This program reads a CSV file that has been generated by Super_Structure.\
         The file corresponds to a Super Structure of a Protein. \
         Multiple residues can be added at the time, No terminal will be added..\n \
         This program can only add residues or terminals that are in the parameter file."

    opt_parser = optparse.OptionParser(usage, description=d)
    opt_parser.add_option(
        "--apn",
        type="str",
        help="Enter Instruction for where to append residues in hard '\"'\n \
                                                         quotes. Place: Amino Acid Number, Entity ID, Chain ID and  \n \
                                                         the direction to add residues separated by comas. Add. The \
                                                         direction to add residues is either Ndir or Cdir. This means \
                                                         that if a residue is added in residue 10, it could be toward \
                                                         the N or C terminal. This is important so that the program \
                                                         knows if the new residue is placed before or after the residue.\
                                                         Example \"1,1,A,Ndir\" or \"20,2,A,Cdir\". \n \
                                                         Chain ID, amino acid or terminal name are not case sensitive \
                                                         and do not need to go in quotes.\n"
    )
    opt_parser.add_option(
        "-r",
        "--res",
        type="str",
        help="Enter list of amino acids to be added in hard quotes.'\"'\n\
                                                         Example: \"ALA,VAL,ASP,ASN,GLU\"."
    )
    opt_parser.add_option("--inp",
                          type="str",
                          help="Path to CSV file for adding residue.")
    opt_parser.add_option(
        "--out",
        type="str",
        help="Path and name to CSV and PDB outputs with added residues.")
    opt_parser.add_option("--pep", type="str", help="Path to peptide file.")
    opt_parser.add_option("--par",
                          type="str",
                          help="Path to Charmm parameter folder.")
    options, args = opt_parser.parse_args()
    if not os.path.exists(options.inp):
        print "Error: File path Super Structure CSV file does not exist."
        print("Type -h or --help for description and options.")
        sys.exit(1)
    ########################## Init Setup #####################################
    # Comment out the next four lines to test in Spyder.
    directory, filename = os.path.split(options.inp)
    params = CP.read_charmm_FF(options.par)
    insulin = SS.Super_Structure(params, options.inp, 'add_linker')
    parse_list = options.apn.split(',')
    if options.res.find(',') == -1:
        aa_add = [i for i in options.res]
        aa_add = [ut.utilities.residueDict1_1[i] for i in aa_add]
    else:
        aa_add = options.res.split(',')
    parser2 = PDBParser()
    pep_file = parser2.get_structure('Peptides', options.pep)
    # Uncomment the next four lines to test
    #file_path = '/home/noel/Projects/Protein_design/EntropyMaxima/examples/Linker_minimization/2hiu.csv'
    #insulin = SS.Super_Structure(params, file_path,'add_linker')
    #parse_list = "1,1,A,Ndir".split(',')
    #aa_add =  "ALA".split(',')
    ###############################################
    insulin.build_pep_and_anchers(pep_file)
    ############### Begin processing parse_list and aa_add ####################
    message = ''
    print(parse_list, len(parse_list))
    if len(parse_list) == 4 and len(aa_add) > 0:
        aaid_add = int(parse_list[0])
        ent_id_add = int(parse_list[1])
        chain_add = str(parse_list[2]).upper()
        term_dir = str(parse_list[3])
        # So far this only works with natural aminoacids and ACE and CTER
        if term_dir in ['Ndir', 'Cdir']:
            message += 'Adding residues ' + str(
                aa_add) + ' in th ' + term_dir + ' at amino acid ' + str(
                    aaid_add) + ', ' + 'entity '
            message += str(ent_id_add) + ' and direction ' + term_dir + '.'
            print(message)
            # TODO: counting atoms do not seem necessary. Consider deleting.
            #count_atoms_added = 0
            #for i in aa_add:
            #    for j in insulin.params.AA[i].atoms:
            #        for k in j:
            #            count_atoms_added += 1
            #count_aa_added = len(aa_add)
            ###################################################################
            # So we now create the link dataframe and follow the prosses in
            # Super_Structures to populate its fields.
            link = pd.DataFrame()
            aa = []
            aaid = []
            entity_id = []
            chain_id = []
            atmtyp1 = []
            atmtyp2 = []
            charg = []
            component = []
            snum = 1
            for res in aa_add:
                chrm = res
                pdbx = res
                if chrm in insulin.params.AA:
                    comp = 1
                    for k in insulin.params.AA[chrm].atoms:
                        for l in k:
                            aa.append(pdbx)
                            aaid.append(snum)
                            entity_id.append(ent_id_add)
                            chain_id.append(chain_add)
                            atmtyp1.append(insulin.corrections(chrm, l))
                            atmtyp2.append(insulin.params.AA[chrm].atom_type[
                                insulin.corrections(chrm, l)])
                            charg.append(insulin.params.AA[chrm].atom_chrg[
                                insulin.corrections(chrm, l)])
                            if comp == 1:
                                component.append('AMINO')
                            else:
                                if l in ['C', 'O']:
                                    component.append('CARBO')
                                else:
                                    component.append(('SIDE' + str(comp)))
                        comp += 1
                    snum += 1
                else:
                    print('Warning: Amino Acid identifier', chrm,
                          ' is not found in parameters.')
                    sys.exit(1)
            link['aa'] = pd.Series(aa)
            link['aaid'] = pd.Series(aaid)
            link['ent_id'] = pd.Series(entity_id)
            link['chain'] = pd.Series(chain_id)
            link['atmtyp1'] = pd.Series(atmtyp1)
            link['atmtyp2'] = pd.Series(atmtyp2)
            link['component'] = pd.Series(component)
            link['charg'] = pd.Series(charg)
            ###########################################################################
            # Add atomtyp, masses and atmNumber to each atom type
            mass = []
            atmNum = []
            atmtyp3 = []
            epsilon = []
            rmin_half = []
            atminfo = []
            aainfo = []
            for i in link['atmtyp2']:
                atmNum.append(params.am.MASS[i][0])
                mass.append(params.am.MASS[i][1])
                atmtyp3.append(params.am.MASS[i][2])
                epsilon.append(params.NONBONDED[i][1])
                rmin_half.append(params.NONBONDED[i][2])
                atminfo.append(True)
                aainfo.append(False)
            link['epsilon'] = pd.Series(epsilon)
            link['rmin_half'] = pd.Series(rmin_half)
            link['atmtyp3'] = pd.Series(atmtyp3)
            link['mass'] = pd.Series(mass)
            link['atmNum'] = pd.Series(atmNum)
            ###########################################################################
            # DF Type correction.
            link['aaid'] = link['aaid'].apply(int)
            link['ent_id'] = link['ent_id'].apply(int)
            link['mass'] = link['mass'].apply(float)
            link['epsilon'] = link['epsilon'].apply(float)
            link['rmin_half'] = link['rmin_half'].apply(float)
            link['atmNum'] = link['atmNum'].apply(int)
            # We now  fill out the number of columns in the DataFrame with nan
            for i in insulin.Full_Structure.columns:
                if i not in list(link.columns):
                    if i[0:6] == 'aainfo':
                        link[i] = pd.Series(aainfo)
                    elif i[0:7] == 'atminfo':
                        link[i] = pd.Series(atminfo)
                    else:
                        link[i] = pd.Series(
                            [float('nan') for j in range(len(link))])
            if term_dir == 'Ndir':
                beg_insert =  min(insulin.Full_Structure.index[(insulin.Full_Structure.aaid == aaid_add) &\
                                                               (insulin.Full_Structure.ent_id == ent_id_add) &\
                                                               (insulin.Full_Structure.chain == chain_add)])
                end_insert = beg_insert + link.shape[0]
            elif term_dir == 'Cdir':
                print(
                    'WARNING: The code has not been design and tested for insertions in the CTER.'
                )
                print('Exiting the program without finishing.')
                sys.exit(1)
            else:
                print(
                    'ERROR: wrong terminal to insert link. Ndir and Cdir are the only choices. Exiting now.'
                )
                sys.exit(1)
            joint_df = pd.DataFrame(columns=link.columns)
            count = 0
            insert = True
            # When links are added , aaid needs to be fixed to reflect added residues
            aaid_offset = 0
            for i in insulin.Full_Structure.index:
                if (i >= beg_insert) and (i < end_insert):
                    if insert:
                        for j in link.index:
                            joint_df.loc[count] = link.loc[j]
                            joint_df.loc[count, 'aaid'] = joint_df.loc[
                                count, 'aaid'] + aaid_offset
                            current_aaid = link.loc[j, 'aaid']
                            count += 1
                        insert = False
                        aaid_offset = aaid_offset + current_aaid
                joint_df.loc[count] = insulin.Full_Structure.loc[i]
                # So that only residues after the added link get increased in the given ent_id and chain
                # Any other entity or chain in the molecules is not fixed.
                if (joint_df.loc[count, 'ent_id'] == ent_id_add) & (
                        joint_df.loc[count, 'chain'] == chain_add):
                    joint_df.loc[count,
                                 'aaid'] = joint_df.loc[count,
                                                        'aaid'] + aaid_offset
                count += 1
            # After adding residues, it all gets copied back to original dataframe.
            for i in joint_df.index:
                insulin.Full_Structure.loc[i] = joint_df.loc[i]
            # The way to get number of models is very specific to the way this program
            # stores data in DataFrame. Be careful if the data frame column structure changes.
            # TODO: missing atom coordinates are added manually. It needs to be automated more.
            num_models = len(
                range(((insulin.Full_Structure.shape[1] - 20) / 5))) + 1
            for i in range(1, num_models + 1):
                for j in range(len(aa_add), 0, -1):
                    insulin.fit_coordinates(term_dir, j, ent_id_add, chain_add,
                                            str(i), aa_add[j - 1])
            # NOTE: insulin.models are not in the Super Structure Class, but it is added here.
            #       This works, but it does not seem the best way to do it. should models be a field of super
            #       structures and be initialized there?
            insulin.models = [str(i) for i in range(1, num_models + 1)]
            ################   Write to outputs ####################
            file_name = os.path.basename(options.out).split('.')[0]
            dir_path = os.path.dirname(options.out)
            insulin.write_csv(os.path.dirname(options.out), file_name)
            IO.write_pdb(insulin, dir_path, file_name, 'all')
        else:
            print("ERROR: only two directions to add residues, Ndir and Cdir.")
            print("       The entries are not case sensitive.")
    else:
        message += 'The number of entries in the instruction field, followed by -a or --apn, is not right.\n'
        message += 'Type -h or --help for instructions\n'
    print(message)
def main():
    usage = "usage: %prog [options] arg"
    d = "This program reads a CSV file that has been generated by Super_Structure.\
         One residue or terminal will be deleted at the time.\n \
         WARNING: Deleting residues will leave a 'hole' in the structure. Amino Acids will not be renumbered.\n \
         This program can only delete residues or terminals that are in the parameter file.\
         The program will create a new and modified CSV file with the name of the input file plus the entity number."
    opt_parser = optparse.OptionParser(usage,description=d)
    opt_parser.add_option("--rem", type="str",help="Enter Instruction for removing amino acid or terminal in hard \n \
                                                         quotes '\"'. Place: Amino Acid Number, Entity ID, Chain ID, \
                                                         Residue or Terminal to be deleted separated by comas.\n \
                                                         Example \"1,1,A,ACE\", \"1,1,A,CTER\" or \"20,2,A,LYS\". \n \
                                                         Chain ID or terminal name are case sensitive and \
                                                         do not need to go in quotes.\n\
                                                         For residues all atoms will be deleted. For terminals, only the \
                                                         atoms that correspond to the terminal will be deleted.")
    opt_parser.add_option("--inp", type="str",help="Path to CSV file for removing residue.")
    opt_parser.add_option("--out", type="str",help="Name of output CSV file after removal of amino acid or terminal.")
    opt_parser.add_option("--par", type="str",help="Path to charmm parameters folder.")
    options, args = opt_parser.parse_args()
    if not os.path.exists(options.inp):
        print "Error: File path for Super Structure CSV file does not exist."
        print("Type -h or --help for description and options.")
        sys.exit(1)
    params = CP.read_charmm_FF(options.par)
    insulin = SS.Super_Structure(params, options.inp,'add_linker')
    parse_list = options.rem.split(',') 
    #insulin = SS.Super_Structure(params, '/home/noel/Projects/Protein_design/EntropyMaxima/examples/Linker_minimization/2zta.csv','add_linker')
    #parse_list = "1,1,A,CTER".split(',')
    message = ''
    if len(parse_list) == 4:
        amino_acid_number = int(parse_list[0])
        entity_number = int(parse_list[1])
        chain = str(parse_list[2]).upper()
        term_res = str(parse_list[3]).upper()
        # So far this only works with natural aminoacids and ACE and CTER
        if term_res in ['ILE','GLN','GLY','GLU','CYS','ASP','SER','HSD','HSE','PRO','HSP','ASN','VAL','THR','TRP','CTER',\
                        'LYS','PHE','ALA','MET','ACE','LEU','ARG','TYR']:            
            message += 'Deleting a '+term_res+' from '
            message += 'amino acid number '+str(amino_acid_number)+' in entity id '+str(entity_number)+' and chain '+chain+'.\n'
            insulin.delete_aa(amino_acid_number,entity_number,chain,term_res)
            # If amino acid that is at the protein terminal is deleted, the terminal must be deleted too.
            min_aa = min(insulin.Full_Structure.aaid[(insulin.Full_Structure.ent_id == entity_number) & (insulin.Full_Structure.chain == chain)])
            max_aa = max(insulin.Full_Structure.aaid[(insulin.Full_Structure.ent_id == entity_number) & (insulin.Full_Structure.chain == chain)])
            if amino_acid_number == min_aa:
                del_terminal = False
                # This checks that there is no ACETYL atoms and removes them it does.
                for ii in insulin.Full_Structure.index[(insulin.Full_Structure.aaid == amino_acid_number) &\
                                                       (insulin.Full_Structure.ent_id == entity_number) &\
                                                       (insulin.Full_Structure.chain == chain)]:
                    if insulin.Full_Structure.loc[ii,'component'] == 'ACETY':
                        del_terminal = True
                        term_res = 'ACE'
                        break
                if del_terminal:
                    insulin.delete_aa(amino_acid_number,entity_number,chain,term_res)
            if amino_acid_number == max_aa:
                if term_res != 'CTER':
                    del_terminal = False
                    for ii in insulin.Full_Structure.index[(insulin.Full_Structure.aaid == amino_acid_number) &\
                                                           (insulin.Full_Structure.ent_id == entity_number) &\
                                                           (insulin.Full_Structure.chain == chain)]:
                        if insulin.Full_Structure.loc[ii,'component'] == 'CTERM':
                            del_terminal = True
                            term_res = 'CTER'
                            break
                    if del_terminal:
                        insulin.delete_aa(amino_acid_number,entity_number,chain,term_res)
                else:
                    pass
            file_name = os.path.basename(options.out).split('.')[0]
            dir_path = os.path.dirname(options.out)
            # Super Structure needs to know about models. 
            num_models = len(range(((insulin.Full_Structure.shape[1]-20)/5)))+1
            insulin.models = [str(i) for i in range(1,num_models+1)]
            
            insulin.write_csv(dir_path,file_name)
            IO.write_pdb(insulin, dir_path, file_name, 'all')
        else:
            print('ERROR: del_residue.py only works with natural aminoacids and ACE and CTER terminals.')
            sys.exit(1)
    else:
        message += 'The number of entries in the instruction field, followed by -o or --rem, is not right.\n'
        message += 'Type -h or --help for instructions\n'
    print(message)
示例#3
0
def main():
    usage = "usage: %prog [options] arg"
    d = "This program reads a CIF file and checks that all residues in the file\
         are found in the CHARMM top_27 parameters. Residues found, but missing in \
         the structure, are added to the structure. The full structure is outputed \
         to a CSV file where Charmm, CIF and additional information is stored. \
         Added residues are copied from a peptide structure with all amino acids \
         present in the local CHARMM parameters files with fixed dihedral angles. \
         Info in the CSV file should be all there is to explore the conformational \
         space of added atoms."

    opt_parser = optparse.OptionParser(usage, description=d)

    group = optparse.OptionGroup(
        opt_parser,
        "Generates CSV and PDB files for each model from a CIF file.")
    group.add_option("--fromcif",
                     action="store_true",
                     help="Flag to generate a CSV frile from a CIF file.")
    group.add_option("-i", "--cif", type="str", help="Path to input cif file.")
    group.add_option("-o", "--out1", type="str", help="Path to output csv.")
    group.add_option("-p",
                     "--pep",
                     type="str",
                     help="Path to CHARMM peptide file.")
    opt_parser.add_option_group(group)

    group = optparse.OptionGroup(
        opt_parser, "Generates a CSV file from CRD and PSF files.")
    group.add_option(
        "--frompsfcrd",
        action="store_true",
        help="Flag to generates a CSV frile from a CRD and PSF file.")
    group.add_option("-f",
                     "--psf",
                     type="str",
                     help="Path to input PSF file in XPLOR format.")
    group.add_option("-d", "--crd", type="str", help="Path to input CRD file.")
    opt_parser.add_option_group(group)

    options, args = opt_parser.parse_args()
    ############################################  Options Entered ##########################################################
    if options.fromcif:
        if options.frompsfcrd:
            opt_parser.error(
                "Two option flags can't be selected at the same time. Enter -h for help."
            )


########################################################################################################################
    if options.fromcif:
        if not os.path.exists(options.cif):
            print "Error: File path for input file does not exist."
            print("Type -h or --help for description and options.")
            sys.exit(1)
        params = CP.read_charmm_FF()
        parser2 = PDBParser(QUIET=True)
        pep_file_path = pkg_resources.resource_filename(
            'em', 'params/' + 'peptides.pdb')
        p1 = parser2.get_structure('Peptides', pep_file_path)
        ###########################################################################
        # The peptide construct is build with charmm so corrections for some atom
        # names to PDB/Databank atom types is needed.
        # TODO: this might not be necessary as the correction and inv_correction dictionary in Super Structure takes care of it.
        # Check before removing the correction here.
        for i in p1.get_models():
            for j in i.get_chains():
                for k in j.get_residues():
                    for l in k.get_atom():
                        if k.get_resname() == 'ILE' and l.get_id() == 'CD':
                            l.name = 'CD1'
                            l.id = 'CD1'
        ###########################################################################
        # Create Super Structure
        myCIF = SS.Super_Structure(params, options.cif, 'setup')
        myCIF.build_pep_and_anchers(p1)
        myCIF.read_dict_into_dataframes()
        myCIF.check_models()
        myCIF.create_super_structure_df()
        ###########################################################################
        # Find missing residues to add to the Super Structure. Missing residues
        # are group in lists of contiguous residues and aded to another list.
        myCIF.build_missing_aa()
        file_name = os.path.basename(options.cif).split('.')[0]
        myCIF.write_csv('', file_name)
        #outPDB = IO.pdb()
        IO.write_pdb(myCIF, '', file_name, 'all')
    if options.frompsfcrd:
        if not os.path.exists(options.psf):
            print "Error: File path for PSF file does not exist."
            print("Type -h or --help for description and options.")
            sys.exit(1)
        if not os.path.exists(options.crd):
            print "Error: File path for CRD file does not exist."
            print("Type -h or --help for description and options.")
            sys.exit(1)
        directory, filename = os.path.split(options.crd)
        crd_file = IO.crd(options.crd)
        psf_file = IO.psf(options.psf)
        file_name = filename.split('.')[0]
        ################################################################################################################
        ###################### After reading files, Generate and Index a Super Structure  ##############################
        params = CP.read_charmm_FF()
        myCSV = SS.Super_Structure(params, directory, 'charmm_input')
        # At this point, a XPLOR psf could only have been creted from a complete structure, so no worries of gaps.
        myCSV.create_super_structure_df_from_CRD_PSF(crd_file, psf_file)
        myCSV.write_csv(directory, file_name)