def move_files(save_dir, out_fig, out_hist_info):
    if os.path.isdir(save_dir) is False:
        io_function.mkdir(save_dir)
    trim_fig = io_function.get_name_by_adding_tail(out_fig, 'trim')
    os.system('convert -trim %s %s' % (out_fig, trim_fig))
    io_function.movefiletodir(trim_fig, save_dir, overwrite=True)
    io_function.delete_file_or_dir(out_fig)
    # io_function.movefiletodir(out_fig,save_dir,overwrite=True)
    io_function.movefiletodir(out_hist_info, save_dir, overwrite=True)
示例#2
0
def check_dem_valid_per(dem_tif_list, work_dir, process_num =1, move_dem_threshold = None, area_pixel_num=None):
    '''
    get the valid pixel percentage for each DEM
    :param dem_tif_list:
    :param work_dir:
    :param move_dem_threshold: move a DEM to a sub-folder if its valid percentage small then the threshold
    :return:
    '''

    keep_dem_list = []
    print('start getting valid pixel percent for %d files'%len(dem_tif_list))
    dem_tif_valid_per = {}
    # when run in parallel, it has "Finalize object, dead" after a while,  cannot figure out why?, so temporal set process_num = 1
    # process_num = 1       #update on 15 March, 2021. I changed the python from 3.8 on uist to 3.7 (same as tesia), then problem solved.

    if process_num == 1:
        for idx,tif in enumerate(dem_tif_list):
            # RSImage.get_valid_pixel_count(tif)
            # per = RSImage.get_valid_pixel_percentage(tif,total_pixel_num=area_pixel_num)
            print('(%d/%d) get valid pixel percent for %s'%(idx+1, len(dem_tif_list),tif))
            per = raster_io.get_valid_pixel_percentage(tif, total_pixel_num=area_pixel_num)
            if per is False:
                return False
            dem_tif_valid_per[tif] = per
            keep_dem_list.append(tif)
    elif process_num > 1:
        theadPool = Pool(process_num)  # multi processes
        parameters_list = [(tif, area_pixel_num, '%d/%d'%(idx+1, len(dem_tif_list)) ) for idx,tif in enumerate(dem_tif_list)]
        results = theadPool.starmap(raster_io.get_valid_pixel_percentage, parameters_list)  # need python3
        for res, tif in zip(results, dem_tif_list):
            if res is False:
                return False
            dem_tif_valid_per[tif] = res
            keep_dem_list.append(tif)
    else:
        raise ValueError("Wrong process_num: %d"%process_num)
    # sort
    dem_tif_valid_per_d = dict(sorted(dem_tif_valid_per.items(), key=operator.itemgetter(1), reverse=True))
    percent_txt = os.path.join(work_dir,'dem_valid_percent.txt')
    with open(percent_txt,'w') as f_obj:
        for key in dem_tif_valid_per_d:
            f_obj.writelines('%s %.4f\n'%(os.path.basename(key),dem_tif_valid_per_d[key]))
        basic.outputlogMessage('save dem valid pixel percentage to %s'%percent_txt)

    # only keep dem with valid pixel greater than a threshold
    if move_dem_threshold is not None:  # int or float
        keep_dem_list = []      # reset the list
        mosaic_dir_rm = os.path.join(work_dir,'dem_valid_lt_%.2f'%move_dem_threshold)
        io_function.mkdir(mosaic_dir_rm)
        for tif in dem_tif_valid_per.keys():
            if dem_tif_valid_per[tif] < move_dem_threshold:
                io_function.movefiletodir(tif,mosaic_dir_rm)
            else:
                keep_dem_list.append(tif)

    return keep_dem_list
示例#3
0
def move_align_results(ref_dem, dem_tif, save_dir):

    coreg_save_dir = os.path.join(save_dir, 'dem_coreg')
    if os.path.isdir(coreg_save_dir) is False:
        io_function.mkdir(coreg_save_dir)

    align_outputs = check_align_folder(dem_tif)
    if len(align_outputs) < 9:
        raise ValueError('the output of dem_align.py is less than 9 files')

    dem_align = os.path.join(
        coreg_save_dir,
        os.path.basename(io_function.get_name_by_adding_tail(dem_tif,
                                                             'coreg')))
    # align DEM and a filt version, which one should I use? what filter they apply?
    # visually check one results (Banks east) , a the same location, align DEM and a filt one have exact values,
    # but the filt version have more nodata.  Let's use the filt version.
    # the nodata pixels usually are water pixels, but also some inside the thaw slumps
    align_filt = [
        out for out in align_outputs if out.endswith('align_filt.tif')
    ][0]
    io_function.move_file_to_dst(align_filt, dem_align, overwrite=True)

    # copy reference dem if necessary
    ref_dem_copy = os.path.join(coreg_save_dir, os.path.basename(ref_dem))
    if os.path.isfile(ref_dem_copy) is False:
        io_function.copy_file_to_dst(ref_dem, ref_dem_copy)

    # move the elevation difference?
    ele_diff_folder = os.path.join(save_dir, 'dem_diff_from_demcoreg')
    if os.path.isdir(ele_diff_folder) is False:
        io_function.mkdir(ele_diff_folder)
    dem_diff_filt = [
        out for out in align_outputs if out.endswith('align_diff_filt.tif')
    ][0]
    io_function.movefiletodir(dem_diff_filt, ele_diff_folder, overwrite=True)

    coreg_png_plot_folder = os.path.join(save_dir, 'demcoreg_png_plot')
    if os.path.isdir(coreg_png_plot_folder):
        io_function.mkdir(coreg_png_plot_folder)
    coreg_pngs = [out for out in align_outputs if out.endswith('.png')]
    for png in coreg_pngs:
        io_function.movefiletodir(png, coreg_png_plot_folder, overwrite=True)

    return True
示例#4
0
def main():

    reg_tif_dir = 'arcticdem_registration_tifs'
    while True:
        print(str(datetime.now()),
              'start moving or removing files or folders\n\n')
        reg_files = io_function.get_file_list_by_pattern(reg_tif_dir, '*')
        print('registration file count: %d in %s' %
              (len(reg_files), reg_tif_dir))
        for file in reg_files:
            if check_file_or_dir_is_old(file, time_hour_thr):
                print(
                    '%s is older than %f hours, will be moved to archieved dir'
                    % (file, time_hour_thr))
                io_function.movefiletodir(file,
                                          arcticDEM_reg_tif_dir,
                                          overwrite=True)

        SETSM_dir = io_function.get_file_list_by_pattern(
            './', 'SETSM_*2m_v3.0')
        print('SETSM folder count: %d in %s' % (len(SETSM_dir), './'))
        for folder in SETSM_dir:
            if check_file_or_dir_is_old(folder, time_hour_thr):
                print('%s is older than %f hours, will be removed' %
                      (folder, time_hour_thr))
                io_function.delete_file_or_dir(folder)

        grid_tmp_dir = io_function.get_file_list_by_pattern('./', 'grid*files')
        print('Grid tmp folder count: %d in %s' % (len(grid_tmp_dir), './'))
        for folder in grid_tmp_dir:
            if check_file_or_dir_is_old(folder, time_hour_thr):
                print('%s is older than %f hours, will be removed' %
                      (folder, time_hour_thr))
                io_function.delete_file_or_dir(folder)

        time.sleep(60)  # wait

    pass
示例#5
0
def train_evaluation_deeplab_separate(WORK_DIR, deeplab_dir, expr_name,
                                      para_file, network_setting_ini, gpu_num):
    '''
    in "train_evaluation_deeplab", run training, stop, then evaluation, then traininng, make learning rate strange, and the results worse.
    so in this function, we start two process, one for training, another for evaluation (run on CPU)
    '''
    # prepare training folder
    EXP_FOLDER = expr_name
    INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models')
    TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train')
    EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval')
    VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis')
    EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export')

    io_function.mkdir(INIT_FOLDER)
    io_function.mkdir(TRAIN_LOGDIR)
    io_function.mkdir(EVAL_LOGDIR)
    io_function.mkdir(VIS_LOGDIR)
    io_function.mkdir(EXPORT_DIR)

    # prepare the tensorflow check point (pretrained model) for training
    pre_trained_dir = parameters.get_directory_None_if_absence(
        network_setting_ini, 'pre_trained_model_folder')
    pre_trained_tar = parameters.get_string_parameters(network_setting_ini,
                                                       'TF_INIT_CKPT')
    pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar)
    if os.path.isfile(pre_trained_path) is False:
        print('pre-trained model: %s not exist, try to download' %
              pre_trained_path)
        # try to download the file
        pre_trained_url = parameters.get_string_parameters_None_if_absence(
            network_setting_ini, 'pre_trained_model_url')
        res = os.system('wget %s ' % pre_trained_url)
        if res != 0:
            sys.exit(1)
        io_function.movefiletodir(pre_trained_tar, pre_trained_dir)

    # unpack pre-trained model to INIT_FOLDER
    os.chdir(INIT_FOLDER)
    res = os.system('tar -xf %s' % pre_trained_path)
    if res != 0:
        raise IOError('failed to unpack %s' % pre_trained_path)
    os.chdir(WORK_DIR)

    dataset_dir = os.path.join(WORK_DIR, 'tfrecord')
    batch_size = parameters.get_digit_parameters(network_setting_ini,
                                                 'batch_size', 'int')
    # maximum iteration number
    iteration_num = parameters.get_digit_parameters(network_setting_ini,
                                                    'iteration_num', 'int')
    base_learning_rate = parameters.get_digit_parameters(
        network_setting_ini, 'base_learning_rate', 'float')

    train_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_output_stride', 'int')
    train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates1', 'int')
    train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates2', 'int')
    train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates3', 'int')

    inf_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_output_stride', 'int')
    inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates1', 'int')
    inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates2', 'int')
    inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates3', 'int')

    # depth_multiplier default is 1.0.
    depth_multiplier = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'depth_multiplier', 'float')

    decoder_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'decoder_output_stride', 'int')
    aspp_convs_filters = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'aspp_convs_filters', 'int')

    train_script = os.path.join(deeplab_dir, 'train.py')
    train_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'training_sample_list_txt'))[0]
    model_variant = parameters.get_string_parameters(network_setting_ini,
                                                     'model_variant')
    checkpoint = parameters.get_string_parameters(network_setting_ini,
                                                  'tf_initial_checkpoint')
    init_checkpoint_files = io_function.get_file_list_by_pattern(
        INIT_FOLDER, checkpoint + '*')
    if len(init_checkpoint_files) < 1:
        raise IOError('No initial checkpoint in %s with pattern: %s' %
                      (INIT_FOLDER, checkpoint))
    init_checkpoint = os.path.join(INIT_FOLDER, checkpoint)
    b_early_stopping = parameters.get_bool_parameters(para_file,
                                                      'b_early_stopping')
    b_initialize_last_layer = parameters.get_bool_parameters(
        para_file, 'b_initialize_last_layer')

    dataset = parameters.get_string_parameters(para_file, 'dataset_name')
    num_classes_noBG = parameters.get_digit_parameters_None_if_absence(
        para_file, 'NUM_CLASSES_noBG', 'int')
    assert num_classes_noBG != None
    if b_initialize_last_layer is True:
        if pre_trained_tar in pre_trained_tar_21_classes:
            print(
                'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21'
                % pre_trained_tar)
            num_classes_noBG = 20
        if pre_trained_tar in pre_trained_tar_19_classes:
            print(
                'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19'
                % pre_trained_tar)
            num_classes_noBG = 18
    num_of_classes = num_classes_noBG + 1

    image_crop_size = parameters.get_string_list_parameters(
        para_file, 'image_crop_size')
    if len(image_crop_size) != 2 and image_crop_size[0].isdigit(
    ) and image_crop_size[1].isdigit():
        raise ValueError('image_crop_size should be height,width')
    crop_size_str = ','.join(image_crop_size)

    # validation interval (epoch), do
    # validation_interval = parameters.get_digit_parameters_None_if_absence(para_file,'validation_interval','int')

    train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file)
    iter_per_epoch = math.ceil(train_count / batch_size)
    total_epoches = math.ceil(iteration_num / iter_per_epoch)
    already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
    if already_trained_iteration >= iteration_num:
        basic.outputlogMessage('Training already run %d iterations, skip' %
                               already_trained_iteration)
        return True

    save_interval_secs = 1200  # default is 1200 second for saving model
    save_summaries_secs = 600  # default is 600 second for saving summaries
    eval_interval_secs = save_interval_secs  # default is 300 second for running evaluation, if no new saved model, no need to run evaluation?

    train_process = Process(
        target=train_deeplab,
        args=(train_script, dataset, train_split, num_of_classes,
              base_learning_rate, model_variant, init_checkpoint, TRAIN_LOGDIR,
              dataset_dir, gpu_num, train_atrous_rates1, train_atrous_rates2,
              train_atrous_rates3, train_output_stride, crop_size_str,
              batch_size, iteration_num, depth_multiplier,
              decoder_output_stride, aspp_convs_filters,
              b_initialize_last_layer))
    train_process.start()
    time.sleep(60)  # wait
    if train_process.exitcode is not None and train_process.exitcode != 0:
        sys.exit(1)

    # eval_process.start()
    # time.sleep(10)  # wait
    # if eval_process.exitcode is not None and eval_process.exitcode != 0:
    #     sys.exit(1)

    while True:

        # only run evaluation when there is new trained model
        already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        basic.outputlogMessage(
            'Already trained iteration: %d, latest evaluation at %d step' %
            (already_trained_iteration, miou_dict['step'][-1]))
        if already_trained_iteration > miou_dict['step'][-1]:

            # run evaluation and wait until it finished
            gpuid = ""  # set gpuid to empty string, making evaluation run on CPU
            evl_script = os.path.join(deeplab_dir, 'eval.py')
            evl_split = os.path.splitext(
                parameters.get_string_parameters(
                    para_file, 'validation_sample_list_txt'))[0]
            # max_eva_number = -1  # run as many evaluation as possible, --eval_interval_secs (default is 300 seconds)
            max_eva_number = 1  # only run once inside the while loop, use while loop to control multiple evaluation
            eval_process = Process(
                target=evaluation_deeplab,
                args=(evl_script, dataset, evl_split, num_of_classes,
                      model_variant, inf_atrous_rates1, inf_atrous_rates2,
                      inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR,
                      EVAL_LOGDIR, dataset_dir, crop_size_str, max_eva_number,
                      depth_multiplier, decoder_output_stride,
                      aspp_convs_filters, gpuid, eval_interval_secs))
            eval_process.start(
            )  # put Process inside while loop to avoid error: AssertionError: cannot start a process twice
            while eval_process.is_alive():
                time.sleep(5)

        # check if need early stopping
        if b_early_stopping:
            print(datetime.now(), 'check early stopping')
            miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
            if 'overall' in miou_dict.keys() and len(
                    miou_dict['overall']) >= 5:
                # if the last five miou did not improve, then stop training
                if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005
                          ):  # 0.0001 (%0.01)  # 0.5 %
                    basic.outputlogMessage(
                        'early stopping: stop training because overall miou did not improved in the last five evaluation'
                    )
                    output_early_stopping_message(TRAIN_LOGDIR)

                    # train_process.kill()    # this one seems not working
                    # subprocess pid different from ps output
                    # https://stackoverflow.com/questions/4444141/subprocess-pid-different-from-ps-output
                    # os.system('kill ' + str(train_process.pid)) # still not working.  train_process.pid is not the one output by ps -aux

                    # train_process.terminate()   # Note that descendant processes of the process will not be terminated
                    # train_process.join()        # Wait until child process terminates

                    with open('train_py_pid.txt', 'r') as f_obj:
                        lines = f_obj.readlines()
                        train_pid = int(lines[0].strip())
                        os.system('kill ' + str(train_pid))
                        basic.outputlogMessage(
                            'kill training processing with id: %d' % train_pid)

                    break  # this breaks the while loop, making that it may not evaluate on some new saved model.

        # if the evaluation step is less than saved model iteration, run another iteration again immediately
        already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        if already_trained_iteration > miou_dict['step'][-1]:
            continue

        # if finished training
        if train_process.is_alive() is False:
            break
        # # if eval_process exit, then quit training as well
        # if eval_process.is_alive() is False and train_process.is_alive():
        #     train_process.kill()
        #     break
        time.sleep(eval_interval_secs)  # wait for next evaluation

    # save loss value to disk
    get_loss_learning_rate_list(TRAIN_LOGDIR)
    # get miou again
    miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)

    # eval_process did not exit as expected, kill it again.
    # os.system('kill ' + str(eval_process.pid))

    # get iou and backup
    iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt')
    loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt')
    patch_info = os.path.join(WORK_DIR, 'sub_images_patches_info.txt')

    # backup miou and training_loss & learning rate
    test_id = os.path.basename(WORK_DIR) + '_' + expr_name
    backup_dir = os.path.join(WORK_DIR, 'result_backup')
    if os.path.isdir(backup_dir) is False:
        io_function.mkdir(backup_dir)
    new_iou_name = os.path.join(backup_dir,
                                test_id + '_' + os.path.basename(iou_path))
    io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True)

    loss_new_name = os.path.join(backup_dir,
                                 test_id + '_' + os.path.basename(loss_path))
    io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True)

    new_patch_info = os.path.join(backup_dir,
                                  test_id + '_' + os.path.basename(patch_info))
    io_function.copy_file_to_dst(patch_info, new_patch_info, overwrite=True)

    # plot mIOU, loss, and learnint rate curves, and backup
    miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        iou_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        loss_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    miou_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(miou_curve_path))
    io_function.copy_file_to_dst(miou_curve_path,
                                 miou_curve_bakname,
                                 overwrite=True)
    loss_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(loss_curve_path))
    io_function.copy_file_to_dst(loss_curve_path,
                                 loss_curve_bakname,
                                 overwrite=True)
示例#6
0
def train_evaluation_deeplab(WORK_DIR, deeplab_dir, expr_name, para_file,
                             network_setting_ini, gpu_num):

    # prepare training folder
    EXP_FOLDER = expr_name
    INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models')
    TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train')
    EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval')
    VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis')
    EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export')

    io_function.mkdir(INIT_FOLDER)
    io_function.mkdir(TRAIN_LOGDIR)
    io_function.mkdir(EVAL_LOGDIR)
    io_function.mkdir(VIS_LOGDIR)
    io_function.mkdir(EXPORT_DIR)

    # prepare the tensorflow check point (pretrained model) for training
    pre_trained_dir = parameters.get_directory_None_if_absence(
        network_setting_ini, 'pre_trained_model_folder')
    pre_trained_tar = parameters.get_string_parameters(network_setting_ini,
                                                       'TF_INIT_CKPT')
    pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar)
    if os.path.isfile(pre_trained_path) is False:
        print('pre-trained model: %s not exist, try to download' %
              pre_trained_path)
        # try to download the file
        pre_trained_url = parameters.get_string_parameters_None_if_absence(
            network_setting_ini, 'pre_trained_model_url')
        res = os.system('wget %s ' % pre_trained_url)
        if res != 0:
            sys.exit(1)
        io_function.movefiletodir(pre_trained_tar, pre_trained_dir)

    # unpack pre-trained model to INIT_FOLDER
    os.chdir(INIT_FOLDER)
    res = os.system('tar -xf %s' % pre_trained_path)
    if res != 0:
        raise IOError('failed to unpack %s' % pre_trained_path)
    os.chdir(WORK_DIR)

    dataset_dir = os.path.join(WORK_DIR, 'tfrecord')
    batch_size = parameters.get_digit_parameters(network_setting_ini,
                                                 'batch_size', 'int')
    # maximum iteration number
    iteration_num = parameters.get_digit_parameters(network_setting_ini,
                                                    'iteration_num', 'int')
    base_learning_rate = parameters.get_digit_parameters(
        network_setting_ini, 'base_learning_rate', 'float')

    train_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_output_stride', 'int')
    train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates1', 'int')
    train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates2', 'int')
    train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates3', 'int')

    inf_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_output_stride', 'int')
    inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates1', 'int')
    inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates2', 'int')
    inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates3', 'int')

    # depth_multiplier default is 1.0.
    depth_multiplier = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'depth_multiplier', 'float')

    decoder_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'decoder_output_stride', 'int')
    aspp_convs_filters = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'aspp_convs_filters', 'int')

    train_script = os.path.join(deeplab_dir, 'train.py')
    train_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'training_sample_list_txt'))[0]
    model_variant = parameters.get_string_parameters(network_setting_ini,
                                                     'model_variant')
    checkpoint = parameters.get_string_parameters(network_setting_ini,
                                                  'tf_initial_checkpoint')
    init_checkpoint_files = io_function.get_file_list_by_pattern(
        INIT_FOLDER, checkpoint + '*')
    if len(init_checkpoint_files) < 1:
        raise IOError('No initial checkpoint in %s with pattern: %s' %
                      (INIT_FOLDER, checkpoint))
    init_checkpoint = os.path.join(INIT_FOLDER, checkpoint)
    b_early_stopping = parameters.get_bool_parameters(para_file,
                                                      'b_early_stopping')
    b_initialize_last_layer = parameters.get_bool_parameters(
        para_file, 'b_initialize_last_layer')

    dataset = parameters.get_string_parameters(para_file, 'dataset_name')
    num_classes_noBG = parameters.get_digit_parameters_None_if_absence(
        para_file, 'NUM_CLASSES_noBG', 'int')
    assert num_classes_noBG != None
    if b_initialize_last_layer is True:
        if pre_trained_tar in pre_trained_tar_21_classes:
            print(
                'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21'
                % pre_trained_tar)
            num_classes_noBG = 20
        if pre_trained_tar in pre_trained_tar_19_classes:
            print(
                'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19'
                % pre_trained_tar)
            num_classes_noBG = 18
    num_of_classes = num_classes_noBG + 1

    image_crop_size = parameters.get_string_list_parameters(
        para_file, 'image_crop_size')
    if len(image_crop_size) != 2 and image_crop_size[0].isdigit(
    ) and image_crop_size[1].isdigit():
        raise ValueError('image_crop_size should be height,width')
    crop_size_str = ','.join(image_crop_size)

    evl_script = os.path.join(deeplab_dir, 'eval.py')
    evl_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'validation_sample_list_txt'))[0]
    max_eva_number = 1

    # validation interval (epoch)
    validation_interval = parameters.get_digit_parameters_None_if_absence(
        para_file, 'validation_interval', 'int')
    train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file)
    iter_per_epoch = math.ceil(train_count / batch_size)
    total_epoches = math.ceil(iteration_num / iter_per_epoch)
    already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
    if already_trained_iteration >= iteration_num:
        basic.outputlogMessage('Training already run %d iterations, skip' %
                               already_trained_iteration)
        return True
    if validation_interval is None:
        basic.outputlogMessage(
            'No input validation_interval, so training to %d, then evaluating in the end'
            % iteration_num)
        # run training
        train_deeplab(train_script, dataset, train_split, num_of_classes,
                      base_learning_rate, model_variant, init_checkpoint,
                      TRAIN_LOGDIR, dataset_dir, gpu_num, train_atrous_rates1,
                      train_atrous_rates2, train_atrous_rates3,
                      train_output_stride, crop_size_str, batch_size,
                      iteration_num, depth_multiplier, decoder_output_stride,
                      aspp_convs_filters, b_initialize_last_layer)

        # run evaluation
        evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes,
                           model_variant, inf_atrous_rates1, inf_atrous_rates2,
                           inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR,
                           EVAL_LOGDIR, dataset_dir, crop_size_str,
                           max_eva_number, depth_multiplier,
                           decoder_output_stride, aspp_convs_filters)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        get_loss_learning_rate_list(TRAIN_LOGDIR)
    else:
        basic.outputlogMessage(
            'training to the maximum iteration of %d, and evaluating very %d epoch(es)'
            % (iteration_num, validation_interval))
        for epoch in range(validation_interval,
                           total_epoches + validation_interval,
                           validation_interval):

            to_iter_num = min(epoch * iter_per_epoch, iteration_num)
            if to_iter_num <= already_trained_iteration:
                continue
            basic.outputlogMessage(
                'training and evaluating to %d epoches (to iteration: %d)' %
                (epoch, to_iter_num))

            # run training
            train_deeplab(train_script, dataset, train_split, num_of_classes,
                          base_learning_rate, model_variant, init_checkpoint,
                          TRAIN_LOGDIR, dataset_dir, gpu_num,
                          train_atrous_rates1, train_atrous_rates2,
                          train_atrous_rates3, train_output_stride,
                          crop_size_str, batch_size, to_iter_num,
                          depth_multiplier, decoder_output_stride,
                          aspp_convs_filters, b_initialize_last_layer)

            # run evaluation
            evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes,
                               model_variant, inf_atrous_rates1,
                               inf_atrous_rates2, inf_atrous_rates3,
                               inf_output_stride, TRAIN_LOGDIR, EVAL_LOGDIR,
                               dataset_dir, crop_size_str, max_eva_number,
                               depth_multiplier, decoder_output_stride,
                               aspp_convs_filters)

            # get miou
            miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
            # save loss value to disk
            get_loss_learning_rate_list(TRAIN_LOGDIR)
            # check if need to early stopping
            if b_early_stopping:
                if len(miou_dict['overall']) >= 5:
                    # if the last five miou did not improve, then stop training
                    if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005
                              ):  # 0.0001 (%0.01)  # 0.5 %
                        basic.outputlogMessage(
                            'early stopping: stop training because overall miou did not improved in the last five evaluation'
                        )
                        output_early_stopping_message(TRAIN_LOGDIR)
                        break

    # plot mIOU, loss, and learnint rate curves
    iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt')
    loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt')
    miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        iou_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        loss_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)

    # backup miou and training_loss & learning rate
    test_id = os.path.basename(WORK_DIR) + '_' + expr_name
    backup_dir = os.path.join(WORK_DIR, 'result_backup')
    if os.path.isdir(backup_dir) is False:
        io_function.mkdir(backup_dir)

    new_iou_name = os.path.join(backup_dir,
                                test_id + '_' + os.path.basename(iou_path))
    io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True)
    miou_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(miou_curve_path))
    io_function.copy_file_to_dst(miou_curve_path,
                                 miou_curve_bakname,
                                 overwrite=True)

    loss_new_name = os.path.join(backup_dir,
                                 test_id + '_' + os.path.basename(loss_path))
    io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True)
    loss_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(loss_curve_path))
    io_function.copy_file_to_dst(loss_curve_path,
                                 loss_curve_bakname,
                                 overwrite=True)
def process_dem_tarball(tar_list,
                        work_dir,
                        save_dir,
                        remove_inter_data=False,
                        rm_tarball=False,
                        apply_registration=False):
    '''
    process dem tarball, unpack, apply registration
    :param tar_list:
    :param work_dir:
    :param save_dir:
    :param remove_inter_data:
    :param apply_registration:
    :return:
    '''

    if os.path.isdir(save_dir) is False:
        io_function.mkdir(save_dir)

    if os.path.isfile('no_registration_strips.txt'):
        no_registration_strips = io_function.read_list_from_txt(
            'no_registration_strips.txt')
    else:
        no_registration_strips = []

    out_dir_list = []
    out_reg_tifs = []
    for idx, targz in enumerate(tar_list):
        tar_base = os.path.basename(targz)[:-7]
        # check if no registraion information for this tarball
        if './' + tar_base in no_registration_strips:
            continue

        if check_files_existence(save_dir, tar_base):
            print("registration result of %s already exists, skip" % targz)
            continue

        # check free disk space
        free_GB = io_function.get_free_disk_space_GB(work_dir)
        total_wait_time = 0
        while free_GB < 50 and total_wait_time < 60 * 60 * 12:
            basic.outputlogMessage(
                ' The free disk space (%.4f) is less than 50 GB, wait 60 seconds'
                % free_GB)
            time.sleep(60)
            total_wait_time += 60
            free_GB = io_function.get_free_disk_space_GB(work_dir)

        out_tif, out_dir = process_dem_one_tarball(targz, work_dir,
                                                   apply_registration)
        if out_tif is None:
            if rm_tarball:
                io_function.delete_file_or_dir(targz)
            continue
        out_dir_list.append(out_dir)

        # move file to save_dir
        io_function.movefiletodir(out_tif, save_dir)
        dem_log = os.path.join(out_dir, tar_base + '_dem.log')
        if os.path.isfile(dem_log):
            io_function.movefiletodir(dem_log, save_dir)
        matchtag_tif = os.path.join(out_dir, tar_base + '_matchtag_reg.tif')
        if os.path.isfile(matchtag_tif):
            io_function.movefiletodir(matchtag_tif, save_dir)
        matchtag_tif_log = os.path.join(out_dir, tar_base + '_matchtag.log')
        if os.path.isfile(matchtag_tif_log):
            io_function.movefiletodir(matchtag_tif_log, save_dir)

        out_reg_tifs.append(os.path.join(save_dir, os.path.basename(out_tif)))
        # remove folder
        if remove_inter_data:
            io_function.delete_file_or_dir(out_dir)
        if rm_tarball:
            io_function.delete_file_or_dir(targz)

    # remove folder (in case failed in the previous step)
    if remove_inter_data:
        for dir in out_dir_list:
            if os.path.isdir(dir):
                io_function.delete_file_or_dir(dir)

    return out_reg_tifs
示例#8
0
def get_sub_images_multi_regions(para_file):

    print(
        "extract sub-images and sub-labels for a given shape file (training polygons)"
    )

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    get_subImage_script = os.path.join(code_dir, 'datasets',
                                       'get_subImages.py')
    SECONDS = time.time()

    # get name of training areas
    multi_training_regions = parameters.get_string_list_parameters_None_if_absence(
        para_file, 'training_regions')
    if multi_training_regions is None or len(multi_training_regions) < 1:
        raise ValueError('No training area is set in %s' % para_file)

    # multi_training_files = parameters.get_string_parameters_None_if_absence(para_file, 'multi_training_files')

    dstnodata = parameters.get_string_parameters(para_file, 'dst_nodata')
    buffersize = parameters.get_string_parameters(para_file, 'buffer_size')
    rectangle_ext = parameters.get_string_parameters(para_file,
                                                     'b_use_rectangle')
    process_num = parameters.get_digit_parameters(para_file, 'process_num',
                                                  'int')

    b_no_label_image = parameters.get_bool_parameters_None_if_absence(
        para_file, 'b_no_label_image')

    if os.path.isfile('sub_images_labels_list.txt'):
        io_function.delete_file_or_dir('sub_images_labels_list.txt')

    subImage_dir = parameters.get_string_parameters_None_if_absence(
        para_file, 'input_train_dir')
    subLabel_dir = parameters.get_string_parameters_None_if_absence(
        para_file, 'input_label_dir')

    # loop each training regions
    for idx, area_ini in enumerate(multi_training_regions):

        input_image_dir = parameters.get_directory_None_if_absence(
            area_ini, 'input_image_dir')

        # it is ok consider a file name as pattern and pass it the following functions to get file list
        input_image_or_pattern = parameters.get_string_parameters(
            area_ini, 'input_image_or_pattern')

        b_sub_images_json = parameters.get_bool_parameters(
            area_ini, 'b_sub_images_json')
        if b_sub_images_json is True:
            # copy sub-images, then covert json files to label images.
            object_names = parameters.get_string_list_parameters(
                para_file, 'object_names')
            get_subImages_json.get_subimages_label_josn(
                input_image_dir,
                input_image_or_pattern,
                subImage_dir,
                subLabel_dir,
                object_names,
                b_no_label_image=b_no_label_image,
                process_num=process_num)

            pass
        else:

            all_train_shp = parameters.get_file_path_parameters_None_if_absence(
                area_ini, 'training_polygons')
            train_shp = parameters.get_string_parameters(
                area_ini, 'training_polygons_sub')

            # get subImage and subLabel for one training polygons
            print(
                'extract training data from image folder (%s) and polgyons (%s)'
                % (input_image_dir, train_shp))
            if b_no_label_image is True:
                get_subImage_one_shp(get_subImage_script,
                                     all_train_shp,
                                     buffersize,
                                     dstnodata,
                                     rectangle_ext,
                                     train_shp,
                                     input_image_dir,
                                     file_pattern=input_image_or_pattern,
                                     process_num=process_num)
            else:
                get_subImage_subLabel_one_shp(
                    get_subImage_script,
                    all_train_shp,
                    buffersize,
                    dstnodata,
                    rectangle_ext,
                    train_shp,
                    input_image_dir,
                    file_pattern=input_image_or_pattern,
                    process_num=process_num)

    # check black sub-images or most part of the sub-images is black (nodata)
    new_sub_image_label_list = []
    delete_sub_image_label_list = []
    subImage_dir_delete = subImage_dir + '_delete'
    subLabel_dir_delete = subLabel_dir + '_delete'
    io_function.mkdir(subImage_dir_delete)
    if b_no_label_image is None or b_no_label_image is False:
        io_function.mkdir(subLabel_dir_delete)
    get_valid_percent_entropy.plot_valid_entropy(subImage_dir)
    with open('sub_images_labels_list.txt', 'r') as f_obj:
        lines = f_obj.readlines()
        for line in lines:
            image_path, label_path = line.strip().split(':')
            # valid_per = raster_io.get_valid_pixel_percentage(image_path)
            valid_per, entropy = raster_io.get_valid_percent_shannon_entropy(
                image_path)  # base=10
            if valid_per > 60 and entropy >= 0.5:
                new_sub_image_label_list.append(line)
            else:
                delete_sub_image_label_list.append(line)
                io_function.movefiletodir(image_path, subImage_dir_delete)
                if os.path.isfile(label_path):
                    io_function.movefiletodir(label_path, subLabel_dir_delete)
    if len(delete_sub_image_label_list) > 0:
        with open('sub_images_labels_list.txt', 'w') as f_obj:
            for line in new_sub_image_label_list:
                f_obj.writelines(line)

    # check weather they have the same subImage and subLabel
    if b_no_label_image is None or b_no_label_image is False:
        sub_image_list = io_function.get_file_list_by_pattern(
            subImage_dir, '*.tif')
        sub_label_list = io_function.get_file_list_by_pattern(
            subLabel_dir, '*.tif')
        if len(sub_image_list) != len(sub_label_list):
            raise ValueError(
                'the count of subImage (%d) and subLabel (%d) is different' %
                (len(sub_image_list), len(sub_label_list)))

    # save brief information of sub-images
    height_list = []
    width_list = []
    band_count = 0
    dtype = 'unknown'
    for line in new_sub_image_label_list:
        image_path, label_path = line.strip().split(':')
        height, width, band_count, dtype = raster_io.get_height_width_bandnum_dtype(
            image_path)
        height_list.append(height)
        width_list.append(width)
    # save info to file, if it exists, it will be overwritten
    img_count = len(new_sub_image_label_list)
    with open('sub_images_patches_info.txt', 'w') as f_obj:
        f_obj.writelines('information of sub-images: \n')
        f_obj.writelines('number of sub-images : %d \n' % img_count)
        f_obj.writelines('band count : %d \n' % band_count)
        f_obj.writelines('data type : %s \n' % dtype)
        f_obj.writelines('maximum width and height: %d, %d \n' %
                         (max(width_list), max(height_list)))
        f_obj.writelines('minimum width and height: %d, %d \n' %
                         (min(width_list), min(height_list)))
        f_obj.writelines(
            'mean width and height: %.2f, %.2f \n\n' %
            (sum(width_list) / img_count, sum(height_list) / img_count))

    duration = time.time() - SECONDS
    os.system(
        'echo "$(date): time cost of getting sub images and labels: %.2f seconds">>time_cost.txt'
        % duration)
示例#9
0
    # remove the files with dark area greater than 10%
    with rasterio.open(tif_img) as img_obj:
        # read the first band
        # indexes = img_obj.indexes
        # print(indexes)
        data_band1 = img_obj.read(1)
        # print(data_band1.shape)
        width, height = data_band1.shape
        # dark area are pixel value smaller than 3
        index_zeros = np.where(data_band1 < 3)

        # num_non_zero = np.count_nonzero(data_band1)
        # if num_non_zero != 16380:
        #     print(tif_img)
        #     print(num_non_zero)

        zeros_per = len(index_zeros[0]) / float(width * height)
        # print(tif_img)
        if zeros_per > 0.1:
            # remove this file
            print(zeros_per)
            print('remove image patch:', tif_img)
            io_function.movefiletodir(tif_img, rm_dark_img_dir)
            continue

    # use the same name of tif file
    output = os.path.basename(tif_img)
    if RSImageProcess.subset_image_baseimage(output, org_img,
                                             tif_img) is False:
        break
示例#10
0
def check_dem_valid_per(dem_tif_list,
                        work_dir,
                        process_num=1,
                        move_dem_threshold=None,
                        area_pixel_num=None):
    '''
    get the valid pixel percentage for each DEM
    :param dem_tif_list:
    :param work_dir:
    :param move_dem_threshold: move a DEM to a sub-folder if its valid percentage small then the threshold
    :return:
    '''

    keep_dem_list = []

    dem_tif_valid_per = {}
    if process_num == 1:
        for tif in dem_tif_list:
            # RSImage.get_valid_pixel_count(tif)
            # per = RSImage.get_valid_pixel_percentage(tif,total_pixel_num=area_pixel_num)
            per = raster_io.get_valid_pixel_percentage(
                tif, total_pixel_num=area_pixel_num)
            if per is False:
                return False
            dem_tif_valid_per[tif] = per
            keep_dem_list.append(tif)
    elif process_num > 1:
        theadPool = Pool(process_num)  # multi processes
        parameters_list = [(tif, area_pixel_num) for tif in dem_tif_list]
        results = theadPool.starmap(raster_io.get_valid_pixel_percentage,
                                    parameters_list)  # need python3
        for res, tif in zip(results, dem_tif_list):
            if res is False:
                return False
            dem_tif_valid_per[tif] = res
            keep_dem_list.append(tif)
        theadPool.close()
    else:
        raise ValueError("Wrong process_num: %d" % process_num)
    # sort
    dem_tif_valid_per_d = dict(
        sorted(dem_tif_valid_per.items(),
               key=operator.itemgetter(1),
               reverse=True))
    percent_txt = os.path.join(work_dir, 'dem_valid_percent.txt')
    with open(percent_txt, 'w') as f_obj:
        for key in dem_tif_valid_per_d:
            f_obj.writelines('%s %.4f\n' %
                             (os.path.basename(key), dem_tif_valid_per_d[key]))
        basic.outputlogMessage('save dem valid pixel percentage to %s' %
                               percent_txt)

    # only keep dem with valid pixel greater than a threshold
    if move_dem_threshold is not None:  # int or float
        keep_dem_list = []  # reset the list
        mosaic_dir_rm = os.path.join(work_dir,
                                     'dem_valid_lt_%.2f' % move_dem_threshold)
        io_function.mkdir(mosaic_dir_rm)
        for tif in dem_tif_valid_per.keys():
            if dem_tif_valid_per[tif] < move_dem_threshold:
                io_function.movefiletodir(tif, mosaic_dir_rm)
            else:
                keep_dem_list.append(tif)

    return keep_dem_list
示例#11
0
def main(options, args):

    polygons_shp = args[0]
    save_folder = args[1]  # folder for saving downloaded images

    # check training polygons
    assert io_function.is_file_exist(polygons_shp)
    os.system('mkdir -p ' + save_folder)

    item_types = options.item_types.split(
        ',')  # ["PSScene4Band"]  # , # PSScene4Band , PSOrthoTile

    start_date = datetime.strptime(
        options.start_date, '%Y-%m-%d')  #datetime(year=2018, month=5, day=20)
    end_date = datetime.strptime(options.end_date, '%Y-%m-%d')  #end_date
    cloud_cover_thr = options.cloud_cover  # 0.01

    planet_account = options.planet_account
    process_num = options.process_num

    # set Planet API key
    get_and_set_Planet_key(planet_account)

    shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(
        polygons_shp).strip()
    if shp_prj != '+proj=longlat +datum=WGS84 +no_defs':
        # reproject to 4326 projection
        basic.outputlogMessage('reproject %s to latlon' % polygons_shp)
        latlon_shp = io_function.get_name_by_adding_tail(
            polygons_shp, 'latlon')
        if os.path.isfile(latlon_shp) is False:
            vector_gpd.reproject_shapefile(polygons_shp, 'EPSG:4326',
                                           latlon_shp)
        polygons_shp = latlon_shp
        basic.outputlogMessage(
            'save new shapefile to %s for downloading images' % polygons_shp)

    # read polygons
    polygons_json = read_polygons_json(polygons_shp)

    read_excluded_scenes(
        save_folder)  # read the excluded_scenes before read download images

    #read geometry of images already in "save_folder"
    read_down_load_geometry(save_folder)

    # download images
    download_planet_images(polygons_json, start_date, end_date,
                           cloud_cover_thr, item_types, save_folder,
                           process_num)

    #check each downloaded ones are completed, otherwise, remove the incompleted ones
    geojson_list = io_function.get_file_list_by_ext('.geojson',
                                                    save_folder,
                                                    bsub_folder=False)
    # print(geojson_list)
    incom_dir = os.path.join(save_folder, 'incomplete_scenes')

    for geojson_file in geojson_list:
        scene_id = os.path.splitext(os.path.basename(geojson_file))[0]
        scene_dir = os.path.join(save_folder, scene_id)
        files = io_function.get_file_list_by_pattern(scene_dir, scene_id + '*')
        # print(files)
        if len(files) != len(asset_types):
            if os.path.isdir(incom_dir):
                io_function.mkdir(incom_dir)

            basic.outputlogMessage(
                'warning, downloading of %s is not completed, move to incomplete_scenes '
                % scene_id)
            io_function.movefiletodir(scene_dir, incom_dir, overwrite=True)
            io_function.movefiletodir(geojson_file, incom_dir, overwrite=True)

    test = 1

    pass
示例#12
0
def main():
    dem_index_shp = os.path.expanduser(
        '~/Data/Arctic/ArcticDEM/BROWSE_SERVER/indexes/ArcticDEM_Tile_Index_Rel7/ArcticDEM_Tile_Index_Rel7.shp'
    )
    # extent_shp = os.path.expanduser('~/Data/PDO/PDO_statistics_swatchs/swatch_bounding_boxes.shp')
    extent_shp = os.path.expanduser(
        '~/Data/PDO/extent_each_swatch/merge_all_qa_exent.shp')

    # extent polygons and projection (proj4)
    extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(
        extent_shp)
    dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(
        dem_index_shp)

    if extent_shp_prj != dem_shp_prj:
        basic.outputlogMessage(
            '%s and %s do not have the same projection, will reproject %s' %
            (extent_shp, dem_index_shp, os.path.basename(extent_shp)))
        epsg = map_projection.get_raster_or_vector_srs_info_epsg(dem_index_shp)
        # print(epsg)
        # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip())
        extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp, epsg)
    else:
        extent_polys = vector_gpd.read_polygons_gpd(extent_shp)

    poly_ids = [idx for idx in range(len(extent_polys))]
    if 'boxes' in os.path.basename(extent_shp):
        nc_file_names = vector_gpd.read_attribute_values_list(
            extent_shp, 'nc_file')
    else:
        nc_file_names = vector_gpd.read_attribute_values_list(
            extent_shp, 'layer')

    # read dem polygons and tile number
    dem_polygons, dem_tiles = vector_gpd.read_polygons_attributes_list(
        dem_index_shp, 'tile', b_fix_invalid_polygon=False)

    for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)):
        basic.outputlogMessage('get data for the %d th extent (%d/%d)' %
                               (idx, count, len(extent_polys)))

        save_txt_path = nc_file_names[idx] + '-' + 'dem_tiles_poly_%d.txt' % idx
        if os.path.isfile(save_txt_path):
            tiles = io_function.read_list_from_txt(save_txt_path)
            basic.outputlogMessage('read %d dem tiles from %s' %
                                   (len(tiles), save_txt_path))
        else:
            # get fileurl
            dem_poly_ids = vector_gpd.get_poly_index_within_extent(
                dem_polygons, ext_poly)
            basic.outputlogMessage('find %d DEM within %d th extent' %
                                   (len(dem_poly_ids), (idx)))
            tiles = [dem_tiles[id] for id in dem_poly_ids]

            # save to txt
            io_function.save_list_to_txt(save_txt_path, tiles)
            basic.outputlogMessage('save dem urls to %s' % save_txt_path)

        # download and create a mosaic
        url_head = 'https://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/mosaic/v3.0/32m/'
        download_tarball_for_one_polygon(tarball_dir, dem_tif_dir, url_head,
                                         tiles)

        # create a mosaic
        create_a_mosaic(nc_file_names[idx], idx, dem_eachSwatch_dir, ext_poly,
                        tiles)

    bak_folder = 'small_tifs'
    io_function.mkdir(bak_folder)
    # remove small and duplicated ones
    for file_name in nc_file_names:
        crop_tifs = io_function.get_file_list_by_pattern(
            dem_eachSwatch_dir, file_name + '*crop.tif')
        if len(crop_tifs) == 1:
            pass
        elif len(crop_tifs) > 1:
            # keep maximum one and move small ones
            tif_files_size = [
                io_function.get_file_size_bytes(item) for item in crop_tifs
            ]
            max_size = max(tif_files_size)
            max_index = tif_files_size.index(max_size)
            del crop_tifs[max_index]
            for tmp in crop_tifs:
                io_function.movefiletodir(tmp, bak_folder)
                tmp = tmp.replace('_crop', '')
                io_function.movefiletodir(tmp, bak_folder)

        else:  # no tif
            raise ValueError('Results for %s does not exist' % file_name)