def append_file(self, path, file_data, **kwargs): """ Appends to an existing file on HDFS WebHDFS REST call: POST http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=APPEND [&buffersize=<INT>] Note: This function does not follow automatic redirects but instead uses a two step call to the API as required in the WebHDFS documentation """ #make the initial APPEND call to the HDFS namenode optional_args = kwargs uri = self._create_uri(path, operations.APPEND, **optional_args) init_response = requests.post(uri, data=file_data, allow_redirects=False) if not init_response.status_code == httplib.TEMPORARY_REDIRECT: raise errors.PyWebHdfsException(init_response.text) #Get the address provided in the location header of the # initial response from the namenode and make the APPEND request #to the datanode uri = init_response.headers['location'] response = requests.post(uri, data=file_data) if not response.status_code == httplib.OK: raise errors.PyWebHdfsException(response.text) return True
def create_file(self, path, file_data, **kwargs): """ Creates a new file on HDFS WebHDFS REST call: PUT http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=CREATE [&overwrite=<true|false>][&blocksize=<LONG>][&replication=<SHORT>] [&permission=<OCTAL>][&buffersize=<INT>] Note: This function does not follow automatic redirects but instead uses a two step call to the API as required in the WebHDFS documentation """ #make the initial CREATE call to the HDFS namenode optional_args = kwargs uri = self._create_uri(path, operations.CREATE, **optional_args) init_response = requests.put(uri, data=file_data, allow_redirects=False) if not init_response.status_code == httplib.TEMPORARY_REDIRECT: raise errors.PyWebHdfsException(init_response.text) #Get the address provided in the location header of the # initial response from the namenode and make the CREATE request #to the datanode uri = init_response.headers['location'] response = requests.put(uri, data=file_data) if not response.status_code == httplib.CREATED: raise errors.PyWebHdfsException(response.text) return response.header['location']
def temp_create_file(self, path, file_data, **kwargs): optional_args = kwargs uri = self._create_uri(path, operations.CREATE, **optional_args) response = requests.put(uri, data=file_data, allow_redirects=True) if not response.status_code == httplib.CREATED: raise errors.PyWebHdfsException(response.text) return response.header['location']
def _raise_pywebhdfs_exception(resp_code, message=None): if resp_code == http_client.BAD_REQUEST: raise errors.BadRequest(msg=message) elif resp_code == http_client.UNAUTHORIZED: raise errors.Unauthorized(msg=message) elif resp_code == http_client.NOT_FOUND: raise errors.FileNotFound(msg=message) elif resp_code == http_client.METHOD_NOT_ALLOWED: raise errors.MethodNotAllowed(msg=message) else: raise errors.PyWebHdfsException(msg=message)
def get_file_dir_status(self, path): """ Get the file_status of a single file or directory on HDFS WebHDFS REST call: GET http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETFILESTATUS """ uri = self._create_uri(path, operations.GETFILESTATUS) response = requests.get(uri) if not response.status_code == httplib.OK: raise errors.PyWebHdfsException(response.text) return response.json()
def delete_file_dir(self, path, recursive='false'): """ Delete an existing file or directory from HDFS WebHDFS REST call: DELETE <HOST>:<PORT>/webhdfs/v1/<PATH>?op=RENAME&destination=<PATH> """ uri = self._create_uri(path, operations.DELETE, recursive=recursive) response = requests.delete(uri, allow_redirects=True) if not response.status_code == httplib.OK: raise errors.PyWebHdfsException(response.text) return True
def list_dir(self, path): """ Get a list of file_status for all files and directories inside an HDFS directory WebHDFS REST call: GET http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=LISTSTATUS """ uri = self._create_uri(path, operations.LISTSTATUS) response = requests.get(uri, allow_redirects=True) if not response.status_code == httplib.OK: raise errors.PyWebHdfsException(response.text) return response.json()
def rename_file_dir(self, path, destination_path): """ Rename an existing directory or file on HDFS WebHDFS REST call: PUT <HOST>:<PORT>/webhdfs/v1/<PATH>?op=RENAME&destination=<PATH> """ uri = self._create_uri(path, operations.RENAME, destination=destination_path) response = requests.put(uri, allow_redirects=True) if not response.status_code == httplib.OK: raise errors.PyWebHdfsException(response.text) return True
def make_dir(self, path, **kwargs): """ Create a new durectory on HDFS WebHDFS REST call: PUT http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=MKDIRS [&permission=<OCTAL>] """ optional_args = kwargs uri = self._create_uri(path, operations.MKDIRS, **optional_args) response = requests.put(uri, allow_redirects=True) if not response.status_code == httplib.OK: raise errors.PyWebHdfsException(response.text) return True
def read_file(self, path, **kwargs): """ Reads from a file on HDFS and returns the content WebHDFS REST call: GET http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=OPEN [&offset=<LONG>][&length=<LONG>][&buffersize=<INT>] Note: this function follows automatic redirects """ optional_args = kwargs uri = self._create_uri(path, operations.OPEN, **optional_args) response = requests.get(uri, allow_redirects=True) if not response.status_code == httplib.OK: raise errors.PyWebHdfsException(response.text) return response.text