def create_job_home_dir(environment, job_name): ''' Given a job name, create a home directory for the job in Hadoop cluster ''' user_home_dir = HadoopExecutionEnvironment._get_user_hdfs_home_dir(environment) job_home = "%s/dato_distributed/jobs" % user_home_dir job_working_dir = '%s/dato_distributed/jobs/%s' % (user_home_dir, job_name) _file_util.hdfs_mkdir(job_home, environment.hadoop_conf_dir) return job_working_dir
def _save_remote(self, path, aws_credentials): tempdir = _gl.util._make_temp_filename(prefix='predictive_policy_') try: self._save_local(tempdir) if _file_util.is_s3_path(path): _file_util.upload_to_s3(tempdir, path, is_dir=True, \ aws_credentials = aws_credentials) elif _file_util.is_hdfs_path(path): _file_util.hdfs_mkdir(path) _file_util.upload_to_hdfs(tempdir + '/*', path) finally: _shutil.rmtree(tempdir)
def _save_remote(self, path, aws_credentials): '''Save current predictive object to S3 ''' tempdir = _tempfie.mkdtemp(prefix='predictive_object_') try: self._save_local(tempdir) if fu.is_s3_path(path): fu.upload_to_s3(tempdir, path, is_dir=True, aws_credentials = aws_credentials) elif fu.is_hdfs_path(path): fu.hdfs_mkdir(path) fu.upload_to_hdfs(tempdir + '/*', path) finally: shutil.rmtree(tempdir)
def create_job_home_dir(environment, job_name): ''' Given a job name, create a home directory for the job in Hadoop cluster ''' user_home_dir = HadoopExecutionEnvironment._get_user_hdfs_home_dir( environment) job_home = "%s/turi_distributed/jobs" % user_home_dir job_working_dir = '%s/turi_distributed/jobs/%s' % (user_home_dir, job_name) _file_util.hdfs_mkdir(job_home, environment.hadoop_conf_dir) return job_working_dir
def _copy_predictive_object_files(source_path, target_path, is_dir, src_credentials, tgt_credentials): ''' Copy either file or folder from source location to target location ''' # Cleanup existing file path if exists if _file_util.is_local_path(target_path) and _os.path.exists(target_path): _shutil.rmtree(target_path) if _file_util.is_s3_path(source_path) and _file_util.is_s3_path(target_path): # compare credentials _check_aws_credentials(src_credentials, tgt_credentials, source_path) # intra s3 copy model _file_util.intra_s3_copy_model(source_path, target_path, is_dir, tgt_credentials) elif _file_util.is_local_path(source_path): _file_util.copy_from_local(source_path, target_path, is_dir = is_dir) else: tmp_dir = _tempfile.mkdtemp(prefix = 'copy_predictive_object') try: # download to local first local_path = _os.path.join(tmp_dir, 'temp_po_file') if _file_util.is_s3_path(source_path): _file_util.download_from_s3( source_path, local_path, is_dir=is_dir, aws_credentials=src_credentials, silent=False) elif _file_util.is_hdfs_path(source_path): _file_util.download_from_hdfs(source_path, local_path, is_dir = False) else: raise RuntimeError('Unsupported file system type: %s' % source_path) # upload from local to remote if _file_util.is_s3_path(target_path): _file_util.upload_to_s3(local_path, target_path, is_dir=is_dir, aws_credentials=tgt_credentials, silent=False) elif _file_util.is_hdfs_path(target_path): _file_util.hdfs_mkdir(target_path) _file_util.upload_to_hdfs(local_path, target_path, force=True, silent=False) else: _file_util.upload_to_local(local_path, target_path, is_dir=is_dir, silent=False) finally: _shutil.rmtree(tmp_dir)
def _dml_create_working_dir(jobname, deploy_environment): working_dir = None if type(deploy_environment) is HadoopCluster: hadoop_cluster = deploy_environment working_dir = hadoop_cluster._create_job_home_dir(jobname) # Strangely, the above _create_job_home_dir only creates # directory one level up from the working_dir. # We have to explicitly create the working directory # Note: only by calling hdfs_mkdir, the working dir will be created with a+rw rights :( file_util.hdfs_mkdir(working_dir, hadoop_cluster.hadoop_conf_dir) elif type(deploy_environment) is LocalAsync: raise NotImplementedError() else: raise ValueError('Unsupported deploy environment') logger.debug('Working directory created: %s' % working_dir) return working_dir