def _upload_job_files_to_hdfs(self, where, job_dir, job): mains = job.mains or [] libs = job.libs or [] uploaded_paths = [] hdfs_user = self.get_hdfs_user() with where.remote() as r: for m in mains: raw_data = d.get_raw_binary(m) mfs.put_file_to_maprfs(r, raw_data, m.name, job_dir, hdfs_user) uploaded_paths.append(job_dir + '/' + m.name) for l in libs: raw_data = d.get_raw_binary(l) lib_dir = job_dir + '/lib/' self.create_hdfs_dir(r, lib_dir) mfs.put_file_to_maprfs(r, raw_data, l.name, lib_dir, hdfs_user) uploaded_paths.append(lib_dir + l.name) return uploaded_paths
def _upload_job_files_to_hdfs(self, where, job_dir, job, configs, proxy_configs=None): mains = job.mains or [] libs = job.libs or [] builtin_libs = edp.get_builtin_binaries(job, configs) uploaded_paths = [] hdfs_user = self.get_hdfs_user() lib_dir = job_dir + '/lib' with where.remote() as r: for m in mains: raw_data = d.get_raw_binary(m, proxy_configs) mfs.put_file_to_maprfs(r, raw_data, m.name, job_dir, hdfs_user) uploaded_paths.append(os.path.join(job_dir, m.name)) if len(libs) > 0: self.create_hdfs_dir(r, lib_dir) for l in libs: raw_data = d.get_raw_binary(l, proxy_configs) mfs.put_file_to_maprfs(r, raw_data, l.name, lib_dir, hdfs_user) uploaded_paths.append(os.path.join(lib_dir, l.name)) for lib in builtin_libs: mfs.put_file_to_maprfs(r, lib['raw'], lib['name'], lib_dir, hdfs_user) uploaded_paths.append(lib_dir + '/' + lib['name']) return uploaded_paths
def _upload_job_files_to_hdfs(self, where, job_dir, job, configs, proxy_configs=None): mains = job.mains or [] libs = job.libs or [] builtin_libs = edp.get_builtin_binaries(job, configs) uploaded_paths = [] hdfs_user = self.get_hdfs_user() lib_dir = job_dir + '/lib' with where.remote() as r: for m in mains: path = jb_manager.JOB_BINARIES. \ get_job_binary_by_url(m.url). \ copy_binary_to_cluster(m, proxy_configs=proxy_configs, remote=r, context=context.ctx()) target = os.path.join(job_dir, m.name) mfs.copy_from_local(r, path, target, hdfs_user) uploaded_paths.append(target) if len(libs) > 0: self.create_hdfs_dir(r, lib_dir) for l in libs: path = jb_manager.JOB_BINARIES. \ get_job_binary_by_url(l.url). \ copy_binary_to_cluster(l, proxy_configs=proxy_configs, remote=r, context=context.ctx()) target = os.path.join(lib_dir, l.name) mfs.copy_from_local(r, path, target, hdfs_user) uploaded_paths.append(target) for lib in builtin_libs: mfs.put_file_to_maprfs(r, lib['raw'], lib['name'], lib_dir, hdfs_user) uploaded_paths.append(lib_dir + '/' + lib['name']) return uploaded_paths
def _upload_workflow_file(self, where, job_dir, wf_xml, hdfs_user): f_name = 'workflow.xml' with where.remote() as r: mfs.put_file_to_maprfs(r, wf_xml, f_name, job_dir, hdfs_user) return os.path.join(job_dir, f_name)