def search(self, num_attempts=1, sleep_time=0.5): """ Perform a search of the Protein Data Bank using the REST API Parameters ---------- num_attempts : int In case of a failed retrieval, the number of attempts to try again sleep_time : int The amount of time to wait between requests, in case of API rate limits """ query_text = json.dumps(self.scan_params, indent=4) response = http_requests.request_limited(self.url, rtype="POST", data=query_text) if response is None or response.status_code != 200: warnings.warn("Retrieval failed, returning None") return None response_val = json.loads(response.text) if self.return_type == "entry": idlist = walk_nested_dict(response_val, "identifier", maxdepth=25, outputs=[]) return idlist else: return response_val
def get_info(pdb_id, url_root='https://data.rcsb.org/rest/v1/core/entry/'): '''Look up all information about a given PDB ID Parameters ---------- pdb_id : string A 4 character string giving a pdb entry of interest url_root : string The string root of the specific url for the request type Returns ------- out : dict() An ordered dictionary object corresponding to entry information ''' pdb_id = pdb_id.replace(":", "/") # replace old entry identifier url = url_root + pdb_id response = http_requests.request_limited(url) if response is None or response.status_code != 200: warnings.warn("Retrieval failed, returning None") return None result = str(response.text) out = json.loads(result) return out
def test_post__first_try_success(self, mock_sleep, mock_post): mock_response = mock.create_autospec(requests.models.Response) mock_response.status_code = 200 # A-OK! mock_post.return_value = mock_response self.assertEqual( http_requests.request_limited(url="http://get_your_proteins.com", rtype="POST"), mock_response) mock_post.assert_called_once_with("http://get_your_proteins.com") self.assertEqual(len(mock_sleep.mock_calls), 0)
def test_post__repeatedly_fails_return_nothing(self, mock_sleep, mock_post, mock_warn): # Busy response mock_busy_response = mock.create_autospec(requests.models.Response) mock_busy_response.status_code = 429 mock_post.return_value = mock_busy_response self.assertIsNone( http_requests.request_limited(url="http://protein_data_bank.com", rtype="POST")) mock_warn.assert_called_with( "Too many failures on requests. Exiting...") self.assertEqual(len(mock_post.mock_calls), 4) mock_post.assert_called_with("http://protein_data_bank.com") self.assertEqual(len(mock_sleep.mock_calls), 4)
def test_get__succeeds_third_try(self, mock_sleep, mock_get): # Busy response mock_busy_response = mock.create_autospec(requests.models.Response) mock_busy_response.status_code = 429 # Server Error response mock_error_response = mock.create_autospec(requests.models.Response) mock_error_response.status_code = 504 # All good (200) mock_ok_response = mock.create_autospec(requests.models.Response) mock_ok_response.status_code = 200 # Mocks `requests.get` to return Busy, then Server Error, then OK mock_get.side_effect = [ mock_busy_response, mock_error_response, mock_ok_response ] self.assertEqual( http_requests.request_limited(url="http://get_your_proteins.com", rtype="GET"), mock_ok_response) self.assertEqual(len(mock_get.mock_calls), 3) mock_get.assert_called_with("http://get_your_proteins.com") # Should only sleep on being throttled (not server error) self.assertEqual(len(mock_sleep.mock_calls), 1)
def get_pdb_file(pdb_id: str, filetype=PDBFileType.PDB, compression=False) -> Optional[str]: '''Get the full PDB file associated with a PDB_ID Parameters ---------- pdb_id : A 4 character string giving a pdb entry of interest filetype: The file type. PDB is the older file format, CIF is the newer replacement. XML an also be obtained and parsed using the various xml tools included in PyPDB STRUCTFACT retrieves structure factors (only available for certain PDB entries) compression : Whether or not to request the data as a compressed (gz) version of the file (note that the compression is undone by this function) Returns ------- result : string The string representing the full PDB file as an uncompressed string. (returns None if the request to RCSB failed) Examples -------- >>> pdb_file = get_pdb_file('4lza', filetype='cif', compression=True) >>> print(pdb_file[:200]) data_4LZA # _entry.id 4LZA # _audit_conform.dict_name mmcif_pdbx.dic _audit_conform.dict_version 4.032 _audit_conform.dict_location http://mmcif.pdb.org/dictionaries/ascii/mmcif_pdbx ''' if filetype is PDBFileType.CIF and not compression: warnings.warn("Consider using `get_pdb_file` with compression=True " "for CIF files (it makes the file download faster!)") pdb_url_builder = [PDB_DOWNLOAD_BASE_URL, pdb_id] if filetype is PDBFileType.STRUCTFACT: pdb_url_builder.append("-sf.cif") else: pdb_url_builder += [".", filetype.value] if compression: pdb_url_builder += ".gz" pdb_url = "".join(pdb_url_builder) print( "Sending GET request to {} to fetch {}'s {} file as a string.".format( pdb_url, pdb_id, filetype.value)) response = http_requests.request_limited(pdb_url) if response is None or not response.ok: warnings.warn("Retrieval failed, returning None") return None if compression: return gzip.decompress(response.content) return response.text
def test_fails_with_invalid_request(self, mock_sleep, mock_warnings): self.assertIsNone( http_requests.request_limited(url="http://protein_data_bank.com", rtype="MAIL")) mock_warnings.assert_called_once_with("Request type not recognized") self.assertEqual(len(mock_sleep.mock_calls), 0)