def _upload_folder_to_remote(self, local, remote): if _file_util.is_s3_path(remote): _file_util.upload_to_s3( local, remote, is_dir = True, aws_credentials = self.environment.get_credentials(), silent = True) elif _file_util.is_hdfs_path(remote): _file_util.upload_folder_to_hdfs( local, remote, self.environment.hadoop_conf_dir)
def prepare_job_files(environment, job): ''' Upload all job related information to HDFS so that it can be executed remotely ''' exec_dir = HadoopExecutionEnvironment.create_job_home_dir(environment, job.name) _job.HadoopJob._update_exec_dir(job, exec_dir) logging.info("Job working directory: %s" % job._exec_dir) temp_job_folder = _make_temp_directory(prefix='hadoop_job_') try: ExecutionEnvironment.prepare_job_exec_dir(job, temp_job_folder) # Move everything to HDFS _file_util.upload_folder_to_hdfs( temp_job_folder, exec_dir, hadoop_conf_dir = environment.hadoop_conf_dir) finally: _shutil.rmtree(temp_job_folder)
def prepare_job_files(environment, job): ''' Upload all job related information to HDFS so that it can be executed remotely ''' exec_dir = HadoopExecutionEnvironment.create_job_home_dir( environment, job.name) _job.HadoopJob._update_exec_dir(job, exec_dir) logging.info("Job working directory: %s" % job._exec_dir) temp_job_folder = _make_temp_directory(prefix='hadoop_job_') try: ExecutionEnvironment.prepare_job_exec_dir(job, temp_job_folder) # Move everything to HDFS _file_util.upload_folder_to_hdfs( temp_job_folder, exec_dir, hadoop_conf_dir=environment.hadoop_conf_dir) finally: _shutil.rmtree(temp_job_folder)