def add_backbone_atoms_linearly_from_loop_filepaths(self, loop_json_filepath, fasta_filepath, residue_ids): '''A utility wrapper around add_backbone_atoms_linearly. Adds backbone atoms in a straight line from the first to the last residue of residue_ids. loop_json_filepath is a path to a JSON file using the JSON format for Rosetta loops files. This file identifies the insertion points of the sequence. fasta_filepath is a path to a FASTA file with one sequence. This sequence will be used as the sequence for the inserted residues (between the start and stop residues defined in loop_json_filepath). residue_ids is a list of PDB chain residues (columns 22-27 of ATOM lines in the PDB format). It is assumed that they are sequential although the logic does not depend on that. This list should have the length length as the sequence identified in the FASTA file. ''' # Parse the loop file loop_def = json.loads(read_file(loop_json_filepath)) assert(len(loop_def['LoopSet']) == 1) start_res = loop_def['LoopSet'][0]['start'] end_res = loop_def['LoopSet'][0]['stop'] start_res = PDB.ChainResidueID2String(start_res['chainID'], (str(start_res['resSeq']) + start_res['iCode']).strip()) end_res = PDB.ChainResidueID2String(end_res['chainID'], (str(end_res['resSeq']) + end_res['iCode']).strip()) assert(start_res in residue_ids) assert(end_res in residue_ids) # Parse the FASTA file and extract the sequence f = FASTA(read_file(fasta_filepath), strict = False) assert(len(f.get_sequences()) == 1) insertion_sequence = f.sequences[0][2] if not len(residue_ids) == len(insertion_sequence): raise Exception('The sequence in the FASTA file must have the same length as the list of residues.') # Create the insertion sequence (a sub-sequence of the FASTA sequence) # The post-condition is that the start and end residues are the first and last elements of kept_residues respectively kept_residues = [] insertion_residue_map = {} in_section = False found_end = False for x in range(len(residue_ids)): residue_id = residue_ids[x] if residue_id == start_res: in_section = True if in_section: kept_residues.append(residue_id) insertion_residue_map[residue_id] = insertion_sequence[x] if residue_id == end_res: found_end = True break if not kept_residues: raise Exception('The insertion sequence is empty (check the start and end residue ids).') if not found_end: raise Exception('The end residue was not encountered when iterating over the insertion sequence (check the start and end residue ids).') # Identify the start and end Residue objects try: start_res = self.residues[start_res[0]][start_res[1:]] end_res = self.residues[end_res[0]][end_res[1:]] except Exception, e: raise Exception('The start or end residue could not be found in the PDB file.')
def get_fasta_object(self, pdb_id, acceptable_sequence_percentage_match = 90.0): self.log_lookup('FASTA object {0}'.format(pdb_id)) pdb_id = pdb_id.upper() if not self.fasta_objects.get(pdb_id): if not self.fasta_contents.get(pdb_id): if self.cache_dir: self.add_fasta_contents(pdb_id, download_fasta(pdb_id, self.cache_dir, silent = True)) else: self.add_fasta_contents(pdb_id, retrieve_fasta(pdb_id, silent = True)) self.add_fasta_object(pdb_id, FASTA.retrieve(pdb_id, cache_dir = self.cache_dir, bio_cache = self)) return self.fasta_objects[pdb_id]