pdbLocation = noah_structure_path + searchPDB #This is the location of the pdb file natural_sequences = af.get_natural_sequences_noah(file) #Gets a list with the natural sequences cut_natural_sequences = af.get_cut_natural_sequences(identity, natural_sequences) print "Length of Cut Natural Sequences: " + str(len(cut_natural_sequences[0])) ancestor = cut_natural_sequences[0] #Gets the "ancestral sequence" ancestor_length = len(ancestor) #Grab the length of the ancestral sequence print "Ancestor Length: " + str(len(natural_sequences[0])) #list of files that contain L vs RSA data data = [] #list of sequences with missing RSA values bad_list = [] #Get all the RSA values using DSSP seq_data = af.get_noah_RSA_values(pdb_id, chain_id) RSA = seq_data[1] #Get the RSA values RSA_dict = seq_data[2] #Get the dict mapping positions to RSA new_RSA = af.get_cut_RSA_values(pdb_id, RSA, RSA_dict) index = 0 #Arrays full of the designed and natural sequences natural_data_arr = cut_natural_sequences designed_data_arr = af.get_cut_designed_sequences(designed_file) fpW_natural = open("results_array_natural_" + pdb_id + "_" + chain_id + ".csv","w") fpW_designed = open("results_array_" + pdb_id + "_" + chain_id + "_" + "soft" + ".csv","w") #write the RSA values: fpW_natural.write(af.dump_csv_line(new_RSA)) fpW_designed.write(af.dump_csv_line(new_RSA))
gap_locations.append(counter) #This is an array that tracks which residues have gaps counter = counter + 1 #This section takes out the ancestral gaps from all the aligned sequences and then writes the files #to the results_PDB_ID_CHAIN_ID.csv natural_pdb_file_title = "results_natural_" + pdb_id + "_" + chain_id + ".csv" natural_out_sequences = open(natural_pdb_file_title,"w") #list of files that contain L vs RSA data data = [] #list of sequences with missing RSA values bad_list = [] #Get all the RSA values using DSSP seq_data = af.get_noah_RSA_values(pdb_id, chain_id) #Get the RSA info from DSSP RSAValues = [] RSA = seq_data[1] RSA_dict = seq_data[2] new_RSA = af.get_cut_RSA_values(pdb_id, RSA, RSA_dict) #Gets the RSA values that correspond to the sequence residues index = 0 natural_data_arr = cut_natural_sequences #filepointer for the partial result fpW_natural = open("results_array_natural_" + pdb_id + "_" + chain_id + ".csv","w") #write the RSA values: fpW_natural.write(af.dump_csv_line(new_RSA)) [natural_sample1, natural_sample2] = af.split_natural_sequences(natural_data_arr) #Splits the sequences into two samples