def create_job_home_dir(environment, job_name):
        '''
        Given a job name, create a home directory for the job in Hadoop cluster
        '''
        user_home_dir = HadoopExecutionEnvironment._get_user_hdfs_home_dir(environment)
        job_home = "%s/dato_distributed/jobs" % user_home_dir

        job_working_dir = '%s/dato_distributed/jobs/%s' % (user_home_dir, job_name)

        _file_util.hdfs_mkdir(job_home, environment.hadoop_conf_dir)

        return job_working_dir
Пример #2
0
    def _save_remote(self, path, aws_credentials):
        tempdir = _gl.util._make_temp_filename(prefix='predictive_policy_')

        try:
            self._save_local(tempdir)
            if _file_util.is_s3_path(path):
                _file_util.upload_to_s3(tempdir, path, is_dir=True, \
                                        aws_credentials = aws_credentials)
            elif _file_util.is_hdfs_path(path):
                _file_util.hdfs_mkdir(path)
                _file_util.upload_to_hdfs(tempdir + '/*', path)
        finally:
            _shutil.rmtree(tempdir)
 def _save_remote(self, path, aws_credentials):
     '''Save current predictive object to S3
     '''
     tempdir = _tempfie.mkdtemp(prefix='predictive_object_')
     try:
         self._save_local(tempdir)
         if fu.is_s3_path(path):
             fu.upload_to_s3(tempdir, path, is_dir=True, aws_credentials = aws_credentials)
         elif fu.is_hdfs_path(path):
             fu.hdfs_mkdir(path)
             fu.upload_to_hdfs(tempdir + '/*', path)
     finally:
         shutil.rmtree(tempdir)
    def create_job_home_dir(environment, job_name):
        '''
        Given a job name, create a home directory for the job in Hadoop cluster
        '''
        user_home_dir = HadoopExecutionEnvironment._get_user_hdfs_home_dir(
            environment)
        job_home = "%s/turi_distributed/jobs" % user_home_dir

        job_working_dir = '%s/turi_distributed/jobs/%s' % (user_home_dir,
                                                           job_name)

        _file_util.hdfs_mkdir(job_home, environment.hadoop_conf_dir)

        return job_working_dir
def _copy_predictive_object_files(source_path, target_path, is_dir, src_credentials, tgt_credentials):
    '''
    Copy either file or folder from source location to target location
    '''
    # Cleanup existing file path if exists
    if _file_util.is_local_path(target_path) and _os.path.exists(target_path):
        _shutil.rmtree(target_path)

    if _file_util.is_s3_path(source_path) and _file_util.is_s3_path(target_path):

        # compare credentials
        _check_aws_credentials(src_credentials, tgt_credentials, source_path)

        # intra s3 copy model
        _file_util.intra_s3_copy_model(source_path, target_path, is_dir, tgt_credentials)
    elif _file_util.is_local_path(source_path):

        _file_util.copy_from_local(source_path, target_path, is_dir = is_dir)

    else:
        tmp_dir = _tempfile.mkdtemp(prefix = 'copy_predictive_object')
        try:
            # download to local first
            local_path = _os.path.join(tmp_dir, 'temp_po_file')
            if _file_util.is_s3_path(source_path):
                _file_util.download_from_s3(
                    source_path,
                    local_path,
                    is_dir=is_dir,
                    aws_credentials=src_credentials,
                    silent=False)
            elif _file_util.is_hdfs_path(source_path):
                _file_util.download_from_hdfs(source_path, local_path, is_dir = False)
            else:
                raise RuntimeError('Unsupported file system type: %s' % source_path)

            # upload from local to remote
            if _file_util.is_s3_path(target_path):
                _file_util.upload_to_s3(local_path, target_path, is_dir=is_dir,
                    aws_credentials=tgt_credentials, silent=False)
            elif _file_util.is_hdfs_path(target_path):
                _file_util.hdfs_mkdir(target_path)
                _file_util.upload_to_hdfs(local_path, target_path, force=True, silent=False)
            else:
                _file_util.upload_to_local(local_path, target_path, is_dir=is_dir, silent=False)

        finally:
            _shutil.rmtree(tmp_dir)
Пример #6
0
def _dml_create_working_dir(jobname, deploy_environment):
    working_dir = None
    if type(deploy_environment) is HadoopCluster:
        hadoop_cluster = deploy_environment
        working_dir = hadoop_cluster._create_job_home_dir(jobname)
        # Strangely, the above _create_job_home_dir only creates
        # directory one level up from the working_dir.
        # We have to explicitly create the working directory
        # Note: only by calling hdfs_mkdir, the working dir will be created with a+rw rights :(
        file_util.hdfs_mkdir(working_dir, hadoop_cluster.hadoop_conf_dir)
    elif type(deploy_environment) is LocalAsync:
        raise NotImplementedError()
    else:
        raise ValueError('Unsupported deploy environment')
    logger.debug('Working directory created: %s' % working_dir)
    return working_dir