def check_hdfs_path(self, master_IP, dest, option): """ Check if a path exists in Hdfs 0: exists, 1: doesn't exist """ path_exists = ssh_call_hadoop("hduser", master_IP, " dfs -test " + option + " " + "\'" + dest + "\'") if option == '-e' and path_exists == 0: logging.error('File already exists. Aborting upload.') exit(error_fatal) elif option == '-d' and path_exists != 0: exit(error_fatal) return path_exists
def get_from_hadoop_to_local(self, cluster): """ Method for getting files from Hadoop clusters in ~okeanos to local filesystem.""" source = self.opts['source'] destination = self.opts['destination'] try: logging.log(SUMMARY, "Checking if \'{0}\' exists in Hadoop filesystem.".format(source)) src_file_exists = ssh_call_hadoop("hduser", cluster['master_IP'], " dfs -test -e " + "\'{0}\'".format(source)) if src_file_exists == 0: src_base_folder, src_file = os.path.split(source) dest_base_folder, dest_top_file_or_folder = os.path.split(destination) if os.path.exists(destination): if os.path.isfile(destination): logging.log(SUMMARY, "\'{0}\' already exists.".format(destination)) exit(error_fatal) elif os.path.isdir(destination): destination = os.path.join(destination,src_file) if os.path.exists(destination): logging.log(SUMMARY, "\'{0}\' already exists.".format(destination)) exit(error_fatal) else: try: if dest_base_folder: if not os.path.exists(dest_base_folder): os.makedirs(dest_base_folder) destination = os.path.join(dest_base_folder,src_file) else: if dest_top_file_or_folder.endswith("/"): destination = os.path.join(dest_top_file_or_folder,src_file) else: destination = dest_top_file_or_folder except OSError: logging.error('Choose another destination path-directory.') exit(error_fatal) logging.log(SUMMARY, 'Start downloading file from hdfs') ssh_stream_from_hadoop("hduser", cluster['master_IP'], source, destination) else: logging.error('Source file does not exist.') exit(error_fatal) if os.path.exists(destination): logging.log(SUMMARY, 'File downloaded from Hadoop filesystem.') else: logging.error('Error while downloading from Hadoop filesystem.') except Exception, e: logging.error(str(e.args[0])) exit(error_fatal)
def get_from_hadoop_to_pithos(self, cluster, destination_path): """ Method for getting files from Hadoop clusters in ~okeanos to pithos filesystem.""" try: file_exists = ssh_call_hadoop("hduser", cluster['master_IP'], " dfs -test -e " + "\'{0}\'".format(self.opts['source'])) if file_exists == 0: logging.log(SUMMARY, 'Start downloading file from hdfs') from_hdfs_to_pithos("hduser", cluster['master_IP'], self.opts['source'], destination_path) else: logging.error('File does not exist.') exit(error_fatal) except Exception, e: logging.error(str(e.args[0])) exit(error_fatal)
else: logging.error('Unrecognized destination filespec.') exit(error_fatal) except Exception, e: stderr.write('{0}'.format('\r')) logging.error(str(e.args[0])) exit(error_fatal) elif opt_filemkdir == True: try: file_protocol, remain = get_file_protocol(self.opts['directory'], 'filemkdir', 'destination') if file_protocol == "hdfs": if self.opts['recursive'] == True: str_command = " dfs -mkdir -p \"{0}\"".format(remain) else: str_command = " dfs -mkdir \"{0}\"".format(remain) retcode = ssh_call_hadoop("hduser", active_cluster['master_IP'], str_command) if str(retcode) == str(SUCCESS): logging.log(SUMMARY, "\"{0}\" created.".format(remain)) exit(SUCCESS) else: logging.log(SUMMARY, "\"{0}\" not created. Use -p for a nested destination.".format(remain)) else: logging.error('Invalid destination filesystem.') exit(error_fatal) except Exception, e: stderr.write('{0}'.format('\r')) logging.error(str(e.args[0])) exit(error_fatal) def list_pithos_files(self):