Exemplo n.º 1
0
def create_shp_subset_polygons(dir_sub, training_shpAll, file_name, k_value):

    curr_dir = os.getcwd()

    # change the running folder
    os.chdir(dir_sub)
    args_list = [
        get_training_polygons_script, training_shpAll, file_name, '-k',
        str(k_value)
    ]
    basic.exec_command_args_list(args_list)
    # change back
    os.chdir(curr_dir)

    return True
Exemplo n.º 2
0
def unpack_tar_gz_file(file_path, work_dir):
    '''
    unpack a *.tar.gz package, the same to decompress_gz_file (has a bug)
    :param file_path:
    :param work_dir:
    :return:  the absolute path of a folder which contains the decompressed files
    '''
    if os.path.isdir(work_dir) is False:
        raise IOError('dir %s not exist' % os.path.abspath(work_dir))

    if file_path.endswith('.tar.gz') is False:
        raise ValueError('input %s do not end with .tar.gz')

    file_basename = os.path.basename(file_path)[:-7]

    # decompression file and remove it
    dst_folder = os.path.join(work_dir, file_basename)
    if os.path.isdir(dst_folder) and len(
            os.listdir(dst_folder)) > 1:  # on Mac, .DS_Store count one file.
        basic.outputlogMessage('%s exists and is not empty, skip unpacking' %
                               dst_folder)
        return dst_folder
    else:
        mkdir(dst_folder)
    # CommandString = 'tar -xvf  ' + file_tar + ' -C ' + dst_folder
    args_list = ['tar', '-zxvf', file_path, '-C', dst_folder]
    # (status, result) = basic.exec_command_string(CommandString)
    returncode = basic.exec_command_args_list(args_list)
    # print(returncode)
    if returncode != 0:
        return False

    return dst_folder
Exemplo n.º 3
0
def decompress_gz_file(file_path,work_dir,bkeepmidfile):
    """
    decompress a compressed file with gz extension
    Args:
        file_path:the path of gz file
        bkeepmidfile: indicate whether keep the middle file(eg *.tar file)

    Returns:the absolute path of a folder which contains the decompressed files

    """
    if os.path.isdir(work_dir) is False:
        basic.outputlogMessage('dir %s not exist'%os.path.abspath(work_dir))
        return False
    file_basename = os.path.basename(file_path).split('.')[0]
    # file_tar = os.path.join(os.path.abspath(work_dir), file_basename + ".tar")
    file_tar = os.path.join(os.path.dirname(file_path), file_basename + ".tar")


    # decompression file and keep it
    # CommandString = 'gzip -dk ' + landsatfile
    # change commond line like below, bucause gzip version on cry01 do not have the -k option  by hlc 2015.12.26
    # CommandString = 'gzip -dc ' + file_path + ' > ' + file_tar
    args_list = ['gzip','-dk',file_path]
    # (status, result) = basic.exec_command_string(CommandString)
    # if status != 0:
    #     basic.outputlogMessage(result)
    #     return False
    if os.path.isfile(file_tar):
        basic.outputlogMessage('%s already exist')
    else:
        basic.exec_command_args_list(args_list)

    # decompression file and remove it
    dst_folder = os.path.join(os.path.abspath(work_dir),file_basename)
    mkdir(dst_folder)
    # CommandString = 'tar -xvf  ' + file_tar + ' -C ' + dst_folder
    args_list = ['tar', '-xvf', file_tar,'-C',dst_folder]
    # (status, result) = basic.exec_command_string(CommandString)
    basic.exec_command_args_list(args_list)
    # if status != 0:
    #     basic.outputlogMessage(result)
    #     return False
    if bkeepmidfile is False:
        os.remove(file_tar)
    return dst_folder
Exemplo n.º 4
0
def train_kfold_cross_val(multi_training_files_allPolygons,
                          multi_training_files, k_value, test_num):

    ##################################################################
    # get subset of polygons
    training_shp_all = []
    with open(multi_training_files_allPolygons, 'r') as f_obj:
        training_lines = f_obj.readlines()
        for line in training_lines:
            line = line.strip()
            training_shp_all.append(
                line.split(':')[-1])  # the last one is the shape file

    for training_shpAll in training_shp_all:

        dir = os.path.dirname(training_shpAll)
        file_name = os.path.basename(training_shpAll)
        file_name_no_ext = os.path.splitext(file_name)[0]
        dir_sub = os.path.join(
            dir,
            '%s_%d-fold_cross_val_t%d' % (file_name_no_ext, k_value, test_num))

        if os.path.isdir(dir_sub) is False:

            # will save to dir_sub}
            io_function.mkdir(dir_sub)
            create_shp_subset_polygons(dir_sub, training_shpAll, file_name,
                                       k_value)
        else:
            # check shape file existence
            sub_shps = io_function.get_file_list_by_pattern(dir_sub, '*.shp')
            if len(sub_shps) == k_value:
                print2file(
                    log,
                    "subset of shapefile already exist, skip creating new")
            else:
                create_shp_subset_polygons(dir_sub, training_shpAll, file_name,
                                           k_value)

    ##################################################################
    # training on k subset
    for idx in range(1, k_value + 1):
        # remove previous trained model (the setting are the same to exp10)
        if os.path.isdir(trained_model_dir):
            io_function.delete_file_or_dir(trained_model_dir)

        print2file(log, "run training and inference of the %d_th fold" % idx)

        # replace shape file path in "multi_training_files"

        io_function.copy_file_to_dst(multi_training_files_allPolygons,
                                     multi_training_files,
                                     overwrite=True)
        # replace shape file path in multi_training_files
        for training_shpAll in training_shp_all:
            dir = os.path.dirname(training_shpAll)
            file_name_no_ext = os.path.splitext(
                os.path.basename(training_shpAll))[0]
            dir_sub = os.path.join(
                dir, '%s_%d-fold_cross_val_t%d' %
                (file_name_no_ext, k_value, test_num))

            new_shp_path = os.path.join(
                dir_sub, '%s_%dfold_%d.shp' % (file_name_no_ext, k_value, idx))
            repalce_string_in_file(multi_training_files, training_shpAll,
                                   new_shp_path)

        # modify exe.sh
        io_function.copy_file_to_dst('exe_template_kfold.sh',
                                     'exe_qtp.sh',
                                     overwrite=True)
        new_line = '%dfold_%d_t%d' % (k_value, idx, test_num)
        repalce_string_in_file('exe_qtp.sh', 'x_test_num', new_line)

        # check results existence
        result_shp = io_function.get_file_list_by_pattern(
            'result_backup', '*' + new_line + '*/*.shp')
        if len(result_shp) > 0:
            print2file(log,
                       "results of test: %s already exist, skip" % new_line)
        else:
            # run training
            print2file(log, "start: test:%d the %d_th fold" % (test_num, idx))
            argslist = ['./exe_qtp.sh']
            return_code = basic.exec_command_args_list(argslist)
            # exit code is not 0, means something wrong, then quit
            if return_code != 0:
                sys.exit(return_code)

    pass