Exemplo n.º 1
0
def copy_ini_files(ini_dir, work_dir, para_file, area_ini_list,backbone):

    import basic_src.io_function as io_function
    ini_list = [para_file, backbone]
    ini_list.extend(area_ini_list)
    for ini in ini_list:
        io_function.copy_file_to_dst(os.path.join(ini_dir, ini ), os.path.join(work_dir,ini), overwrite=True)
Exemplo n.º 2
0
def mosaic_dem_list_gdal_merge(key, dem_list, save_tif_dir,save_source):
    # Use gdal_merge.py to create a mosaic, In areas of overlap, the last image will be copied over earlier ones.

    save_mosaic = os.path.join(save_tif_dir, key + '.tif')
    b_save_mosaic = io_function.is_file_exist_subfolder(save_tif_dir, key + '.tif')
    if b_save_mosaic is not False:
        basic.outputlogMessage('warning, mosaic file: %s exist, skip' % save_mosaic)
        return save_mosaic

    # save the source file for producing the mosaic
    if save_source:
        save_mosaic_source_txt = os.path.join(save_tif_dir, key + '_src.txt')
        io_function.save_list_to_txt(save_mosaic_source_txt, dem_list)

    # if only one dem, then copy it if it's not VRT format
    if len(dem_list) == 1:
        if raster_io.get_driver_format(dem_list[0]) != 'VRT':
            io_function.copy_file_to_dst(dem_list[0], save_mosaic)
            return save_mosaic

    nodata = raster_io.get_nodata(dem_list[0])

    # create mosaic, can handle only input one file, but is slow
    result = RSImageProcess.mosaics_images(dem_list,save_mosaic,nodata=nodata,
                                           compress='lzw', tiled='yes', bigtiff='if_safer')

    if result is False:
        sys.exit(1)
        # return False
    return save_mosaic
    def remove_nonclass_polygon(self, shape_file, out_shp, class_field_name):
        """
        remove polygons that are not belong to targeted class, it means the value of class_field_name is 0
        :param shape_file: input shapefile containing all the polygons
        :param out_shp: output shapefile
        :param class_field_name: the name of class field, such as svmclass, treeclass
        :return: True if successful, False Otherwise
        """
        if io_function.is_file_exist(shape_file) is False:
            return False

        try:
            org_obj = shapefile.Reader(shape_file)
        except:
            basic.outputlogMessage(str(IOError))
            return False

        # Create a new shapefile in memory
        w = shapefile.Writer()
        w.shapeType = org_obj.shapeType

        org_records = org_obj.records()
        if (len(org_records) < 1):
            basic.outputlogMessage('error, no record in shape file ')
            return False

        # Copy over the geometry without any changes
        w.fields = list(org_obj.fields)
        field_index = self.__find_field_index(w.fields, class_field_name)
        if field_index is False:
            return False
        shapes_list = org_obj.shapes()
        org_shape_count = len(shapes_list)
        i = 0
        removed_count = 0
        for i in range(0, len(shapes_list)):
            rec = org_records[i]
            if rec[field_index] == 0:  # remove the record which class is 0, 0 means non-gully
                removed_count = removed_count + 1
                continue

            w._shapes.append(shapes_list[i])
            rec = org_records[i]
            w.records.append(rec)

        basic.outputlogMessage('Remove non-class polygon, total count: %d' %
                               removed_count)
        # w._shapes.extend(org_obj.shapes())
        if removed_count == org_shape_count:
            basic.outputlogMessage('error: already remove all the polygons')
            return False

        # copy prj file
        org_prj = os.path.splitext(shape_file)[0] + ".prj"
        out_prj = os.path.splitext(out_shp)[0] + ".prj"
        io_function.copy_file_to_dst(org_prj, out_prj, overwrite=True)

        w.save(out_shp)
        return True
Exemplo n.º 4
0
def copy_ini_files(curr_dir_before_ray, work_dir):
    import basic_src.io_function as io_function
    area_ini_list.append('main_para.ini')
    area_ini_list.extend(backbones)
    for ini in area_ini_list:
        io_function.copy_file_to_dst(os.path.join(curr_dir_before_ray, ini),
                                     ini,
                                     overwrite=True)
Exemplo n.º 5
0
def create_new_region_defined_parafile(template_para_file,
                                       img_dir,
                                       area_remark=None):
    '''
    create a new region defined para file. Only defined the new images (did not change others)
    :param template_para_file:
    :param img_dir:
    :param area_remark:
    :return:
    '''
    io_function.is_file_exist(template_para_file)

    dir_base = os.path.basename(img_dir)
    date_strs = re.findall('\d{8}', dir_base)
    if len(date_strs) == 1:
        date = date_strs[0]
    else:
        date = 'unknown'

    new_para_file = io_function.get_name_by_adding_tail(
        template_para_file, date + '_' + area_remark)
    new_para_file = os.path.basename(new_para_file)  # save to current folder
    if os.path.isfile(new_para_file):
        raise IOError('%s already exists, please check or remove first' %
                      new_para_file)

    # copy the file
    io_function.copy_file_to_dst(template_para_file, new_para_file)

    if area_remark is not None:
        modify_parameter(new_para_file, 'area_remark', area_remark)
    if date != 'unknown':
        modify_parameter(new_para_file, 'area_time', date)

    modify_parameter(new_para_file, 'input_image_dir', img_dir)
    modify_parameter(new_para_file, 'inf_image_dir', img_dir)

    tif_list = io_function.get_file_list_by_ext('.tif',
                                                img_dir,
                                                bsub_folder=False)
    if len(tif_list) < 1:
        raise ValueError('No tif in %s' % img_dir)
    if len(tif_list) == 1:
        modify_parameter(new_para_file, 'input_image_or_pattern',
                         os.path.basename(tif_list[0]))
        modify_parameter(new_para_file, 'inf_image_or_pattern',
                         os.path.basename(tif_list[0]))
    else:
        modify_parameter(new_para_file, 'input_image_or_pattern', '*.tif')
        modify_parameter(new_para_file, 'inf_image_or_pattern', '*.tif')

    print("modified and saved new parameter file: %s " % new_para_file)

    return new_para_file
Exemplo n.º 6
0
def original_sub_images_labels_list_before_gan():

    # backup original sub_images list
    sub_img_label_txt = 'sub_images_labels_list.txt'
    sub_img_label_txt_noGAN = 'sub_images_labels_list_noGAN.txt'
    area_ini_sub_images_labels_dict = 'area_ini_sub_images_labels.txt'
    if os.path.isfile(sub_img_label_txt_noGAN) is False:
        io_function.copy_file_to_dst(sub_img_label_txt,
                                     sub_img_label_txt_noGAN,
                                     overwrite=False)

    return sub_img_label_txt_noGAN, sub_img_label_txt, area_ini_sub_images_labels_dict
def copy_subImages_labels_directly(subImage_dir, subLabel_dir, area_ini):

    input_image_dir = parameters.get_directory_None_if_absence(
        area_ini, 'input_image_dir')
    # it is ok consider a file name as pattern and pass it the following functions to get file list
    input_image_or_pattern = parameters.get_string_parameters(
        area_ini, 'input_image_or_pattern')

    # label raster folder
    label_raster_dir = parameters.get_directory_None_if_absence(
        area_ini, 'label_raster_dir')
    sub_images_list = []
    label_path_list = []

    if os.path.isdir(subImage_dir) is False:
        io_function.mkdir(subImage_dir)
    if os.path.isdir(subLabel_dir) is False:
        io_function.mkdir(subLabel_dir)

    sub_images = io_function.get_file_list_by_pattern(input_image_dir,
                                                      input_image_or_pattern)
    for sub_img in sub_images:
        # find the corresponding label raster
        label_name = io_function.get_name_by_adding_tail(
            os.path.basename(sub_img), 'label')
        label_path = os.path.join(label_raster_dir, label_name)
        if os.path.isfile(label_path):
            sub_images_list.append(sub_img)
            label_path_list.append(label_path)
        else:
            print('Warning, cannot find label for %s in %s' %
                  (sub_img, label_raster_dir))

    # copy sub-images, adding to txt files
    with open('sub_images_labels_list.txt', 'a') as f_obj:
        for tif_path, label_file in zip(sub_images_list, label_path_list):
            if label_file is None:
                continue
            dst_subImg = os.path.join(subImage_dir, os.path.basename(tif_path))

            # copy sub-images
            io_function.copy_file_to_dst(tif_path, dst_subImg, overwrite=True)

            dst_label_file = os.path.join(subLabel_dir,
                                          os.path.basename(label_file))
            io_function.copy_file_to_dst(label_file,
                                         dst_label_file,
                                         overwrite=True)

            sub_image_label_str = dst_subImg + ":" + dst_label_file + '\n'
            f_obj.writelines(sub_image_label_str)
def make_note_all_task_done(extent_shp, reomte_node):
    if os.path.isdir(grid_ids_txt_dir) is False:
        io_function.mkdir(grid_ids_txt_dir)

    shp_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files(
        extent_shp)

    # shp_grid_id_txt should be in the current folder
    if os.path.isfile(log_grid_ids_txt) is False:
        io_function.copy_file_to_dst(shp_grid_id_txt, log_grid_ids_txt)

    if os.path.isfile(log_grid_ids_txt_done) is False:
        io_function.save_list_to_txt(log_grid_ids_txt_done, ['Done'])
        # copy the curc
        r_grid_ids_txt_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/grid_ids_txt'
        scp_communicate.copy_file_folder_to_remote_machine(
            reomte_node, r_grid_ids_txt_dir, log_grid_ids_txt_done)
def check_one_extent(extent_shp):
    print('start to check %s' % extent_shp)

    # local_grid_id_txt is in the current dir
    # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir
    local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files(
        extent_shp)
    if os.path.isfile(local_grid_id_txt) is False and os.path.isfile(
            log_grid_ids_txt):
        io_function.copy_file_to_dst(log_grid_ids_txt, local_grid_id_txt)
    if os.path.isfile(local_grid_id_txt) is False:
        print('the _grid_ids.txt for %s does not exist, maybe it has started' %
              extent_shp)
        return False

    # check if it has been complete
    if os.path.isfile(log_grid_ids_txt_done):
        basic.outputlogMessage('Tasks for extent %s have been completed' %
                               extent_shp)
        return True

    grid_ids_to_process_txt = io_function.get_name_no_ext(
        extent_shp) + '_' + 'grid_ids_to_process.txt'

    # read from txt file directly
    grid_ids = [
        int(item) for item in io_function.read_list_from_txt(local_grid_id_txt)
    ]

    update_complete_grid_list(grid_ids, task_list)

    # check complete files, to see if it's done
    # remove grids that has been complete or ignored
    ignore_ids = get_complete_ignore_grid_ids()
    num_grid_ids = save_grid_ids_need_to_process(
        grid_ids, ignore_ids=ignore_ids, save_path=grid_ids_to_process_txt)
    if num_grid_ids < 1:
        print(datetime.now(), ' %s is marked as completed' % extent_shp)
        make_note_all_task_done(extent_shp, curc_node)
    else:
        print(
            datetime.now(),
            ' %s has not completed, %d grids to process, total: %d' %
            (extent_shp, num_grid_ids, len(grid_ids)))

    return True
Exemplo n.º 10
0
def move_align_results(ref_dem, dem_tif, save_dir):

    coreg_save_dir = os.path.join(save_dir, 'dem_coreg')
    if os.path.isdir(coreg_save_dir) is False:
        io_function.mkdir(coreg_save_dir)

    align_outputs = check_align_folder(dem_tif)
    if len(align_outputs) < 9:
        raise ValueError('the output of dem_align.py is less than 9 files')

    dem_align = os.path.join(
        coreg_save_dir,
        os.path.basename(io_function.get_name_by_adding_tail(dem_tif,
                                                             'coreg')))
    # align DEM and a filt version, which one should I use? what filter they apply?
    # visually check one results (Banks east) , a the same location, align DEM and a filt one have exact values,
    # but the filt version have more nodata.  Let's use the filt version.
    # the nodata pixels usually are water pixels, but also some inside the thaw slumps
    align_filt = [
        out for out in align_outputs if out.endswith('align_filt.tif')
    ][0]
    io_function.move_file_to_dst(align_filt, dem_align, overwrite=True)

    # copy reference dem if necessary
    ref_dem_copy = os.path.join(coreg_save_dir, os.path.basename(ref_dem))
    if os.path.isfile(ref_dem_copy) is False:
        io_function.copy_file_to_dst(ref_dem, ref_dem_copy)

    # move the elevation difference?
    ele_diff_folder = os.path.join(save_dir, 'dem_diff_from_demcoreg')
    if os.path.isdir(ele_diff_folder) is False:
        io_function.mkdir(ele_diff_folder)
    dem_diff_filt = [
        out for out in align_outputs if out.endswith('align_diff_filt.tif')
    ][0]
    io_function.movefiletodir(dem_diff_filt, ele_diff_folder, overwrite=True)

    coreg_png_plot_folder = os.path.join(save_dir, 'demcoreg_png_plot')
    if os.path.isdir(coreg_png_plot_folder):
        io_function.mkdir(coreg_png_plot_folder)
    coreg_pngs = [out for out in align_outputs if out.endswith('.png')]
    for png in coreg_pngs:
        io_function.movefiletodir(png, coreg_png_plot_folder, overwrite=True)

    return True
Exemplo n.º 11
0
def save_polygons_to_shp(polygon_list, base_shp, folder):
    if len(polygon_list) < 1:
        print('Error, there is no polygon in the list')
        return False

    try:
        shp_obj = shapefile.Reader(base_shp)
    except IOError:
        print("Read file: %s failed: " % base_shp + str(IOError))
        return False

    save_shp_list = []

    save_id = 0
    for polygon in polygon_list:
        w = shapefile.Writer()
        w.shapeType = shp_obj.shapeType

        filename = os.path.join(
            folder,
            os.path.splitext(os.path.basename(base_shp))[0] + '_' +
            str(save_id) + '.shp')
        if os.path.isfile(filename) is False:
            w.field('id')
            w._shapes.append(polygon)
            w.record(save_id)

            # copy prj file
            org_prj = os.path.splitext(base_shp)[0] + ".prj"
            out_prj = os.path.splitext(filename)[0] + ".prj"
            io_function.copy_file_to_dst(org_prj, out_prj, overwrite=True)

            # save to file
            w.save(filename)
        else:
            print('warning: %s already exist, skip' % filename)

        save_id += 1
        save_shp_list.append(filename)

    return save_shp_list
Exemplo n.º 12
0
def prepare_data_for_evaluation(para_file):

    import workflow.whole_procedure as whole_procedure

    # get subimages
    whole_procedure.extract_sub_images_using_training_polygons(para_file)

    # split image
    whole_procedure.split_sub_images(para_file)

    # whole_procedure.training_img_augment(para_file)
    # whole_procedure.split_train_val(para_file)

    # put all the images patch for evaluation
    trainval = os.path.join('list', 'trainval.txt')
    test_list_txt = parameters.get_string_parameters(
        para_file, 'validation_sample_list_txt')
    test_list_txt = os.path.join('list', test_list_txt)
    io_function.copy_file_to_dst(trainval, test_list_txt, overwrite=True)

    # covert image to tf-records
    whole_procedure.build_TFrecord_tf1x(para_file)
Exemplo n.º 13
0
def main(options, args):

    time_str = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
    print2file(log, time_str)

    para_file = options.para_file
    k_value = int(args[0])
    test_num = int(args[1])

    print2file(log, 'kvalue : %d' % k_value)
    print2file(log, 'test_num : %d' % test_num)

    global trained_model_dir
    trained_model_dir = parameters.get_string_parameters(
        para_file, 'expr_name')

    # get the path of multi training polygons
    multi_training_files = parameters.get_string_parameters_None_if_absence(
        para_file, 'multi_training_files')
    if multi_training_files is None:
        raise ValueError('multi_training_files is not set in the %s' %
                         para_file)

    io_function.is_file_exist(multi_training_files)

    # backup the original training file which contains the full set of polyogns
    training_files_allPolygons = io_function.get_name_by_adding_tail(
        multi_training_files, 'allPolygons')
    if os.path.isfile(training_files_allPolygons) is False:
        io_function.copy_file_to_dst(multi_training_files,
                                     training_files_allPolygons)
    else:
        basic.outputlogMessage(
            'The full set polygons already exist')  #%multi_training_files

    # training on using the k subset
    train_kfold_cross_val(training_files_allPolygons, multi_training_files,
                          k_value, test_num)
Exemplo n.º 14
0
def mosaic_dem_list(key, dem_list, save_tif_dir,resample_method,save_source, o_format, thread_num=1):

    
    # print('\n\n os.fork \n\n', os.fork())
    # if os.fork()==0:
    #     proc_id = multiprocessing.current_process().pid
    #     basic.setlogfile('log_file_pid_%d.txt'%proc_id)
    
    save_mosaic = os.path.join(save_tif_dir, key + '.tif')
    # check file existence
    # if os.path.isfile(save_mosaic):
    b_save_mosaic = io_function.is_file_exist_subfolder(save_tif_dir, key + '.tif')
    if b_save_mosaic is not False:
        basic.outputlogMessage('warning, mosaic file: %s exist, skip' % save_mosaic)
        return save_mosaic
        # mosaic_list.append(b_save_mosaic)
        # continue
    # save the source file for producing the mosaic
    if save_source:
        save_mosaic_source_txt = os.path.join(save_tif_dir, key + '_src.txt')
        io_function.save_list_to_txt(save_mosaic_source_txt, dem_list)

    # if only one dem, then copy it if it's not VRT format
    if len(dem_list) == 1:
        if raster_io.get_driver_format(dem_list[0]) != 'VRT':
            io_function.copy_file_to_dst(dem_list[0], save_mosaic)
            return save_mosaic

    # create mosaic, can handle only input one file, but is slow
    result = RSImageProcess.mosaic_crop_images_gdalwarp(dem_list, save_mosaic, resampling_method=resample_method,
                                               o_format=o_format,
                                               compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=thread_num)
    if result is False:
        sys.exit(1)
        # return False
    return save_mosaic
Exemplo n.º 15
0
def organize_files(sub_img_dirs, save_dir):
    if os.path.isfile(save_dir) is False:
        io_function.mkdir(save_dir)

    # get all png files
    png_list = []
    for img_dir in sub_img_dirs:
        pngs = io_function.get_file_list_by_pattern(img_dir, '*.png')
        png_list.extend(pngs)

    image_name_list = []
    images_dir = os.path.join(save_dir, 'images')
    imageBound_dir = os.path.join(save_dir, 'imageBound')
    objectPolygons_dir = os.path.join(save_dir, 'objectPolygons')
    io_function.mkdir(images_dir)
    io_function.mkdir(imageBound_dir)
    io_function.mkdir(objectPolygons_dir)

    for idx, png in enumerate(png_list):
        basename = io_function.get_name_no_ext(png)
        new_name = 'img' + str(idx + 1).zfill(6) + '_' + basename
        image_name_list.append(new_name)

        io_function.copy_file_to_dst(
            png, os.path.join(images_dir, new_name + '.png'))
        png_xml = png + '.aux.xml'
        if os.path.isfile(png_xml):
            io_function.copy_file_to_dst(
                png_xml, os.path.join(images_dir, new_name + '.png.aux.xml'))

        bound_path = png.replace('.png', '_bound.geojson')
        io_function.copy_file_to_dst(
            bound_path,
            os.path.join(imageBound_dir, new_name + '_bound.geojson'))

        digit_str = re.findall('_\d+', basename)
        id_str = digit_str[0][1:]
        object_path = os.path.join(os.path.dirname(png),
                                   'id_%s.geojson' % id_str)
        io_function.copy_file_to_dst(
            object_path, os.path.join(objectPolygons_dir,
                                      new_name + '.geojson'))

    txt_path = os.path.join(save_dir, 'imageList.txt')
    io_function.save_list_to_txt(txt_path, image_name_list)
Exemplo n.º 16
0
def produce_corresponding_grid_ids_txt(extent_shp, local_grid_id_txt,
                                       log_grid_ids_txt):

    # if it in the logdir, not the current dir, then copy it
    if os.path.isfile(
            log_grid_ids_txt) and os.path.isfile(local_grid_id_txt) is False:
        io_function.copy_file_to_dst(log_grid_ids_txt,
                                     local_grid_id_txt,
                                     overwrite=False)
        return True

    # if not in the local dir, then generate it
    if os.path.isfile(local_grid_id_txt) is False:
        # read grids and ids
        time0 = time.time()
        all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
            grid_20_shp, 'id')
        print('time cost of read polygons and attributes', time.time() - time0)

        # this will create local_grid_id_txt
        grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids)

        # modify local_grid_id_txt by excluding grid_id already in adjacent extent
        other_grid_ids = read_grid_ids_from_other_extent()
        grid_ids = [id for id in grid_ids if id not in other_grid_ids]

        # over write local_grid_id_txt file
        grid_ids_str = [str(item) for item in grid_ids]
        io_function.copy_file_to_dst(local_grid_id_txt,
                                     io_function.get_name_by_adding_tail(
                                         local_grid_id_txt,
                                         'noRMadj'))  # save a copy
        io_function.save_list_to_txt(local_grid_id_txt, grid_ids_str)

        # copy to log dir
        io_function.copy_file_to_dst(local_grid_id_txt, log_grid_ids_txt)

    return True
Exemplo n.º 17
0
# aspect_line_imgAug16_tp=os.path.join(out_dir,'identified_ThawSlumps_MaiinLines_prj_TP.shp')
aspect_line_imgAug22_tp = os.path.join(
    out_dir, 'identified_ThawSlumps_MaiinLines_utm_TP.shp')

# output='aspect_ground_truth_winrose.jpg'
# draw_one_attribute_windrose(ground_truth,'asp_mean','',output ,color='grey',hatch="")
# io_function.copy_file_to_dst(output, os.path.join(out_dir,output), overwrite=True)

# draw wind rose of azimuth from manually draw lines
# output="aspectLine_ground_truth_winrose.jpg"
# draw_one_attribute_windrose(aspect_line, "aspectLine", "Mean Aspect ($^\circ$)", output,color='black')  # ,hatch='/'
# io_function.copy_file_to_dst(output, os.path.join(out_dir,output), overwrite=True)

####### use mapping polygons  ####
# output="aspectLine_imgAug16_tp_winrose.jpg"
# draw_one_attribute_windrose(aspect_line_imgAug16_tp, "aspectLine", "Mean Aspect ($^\circ$)", output,color='black')  # ,hatch='/'
output = "aspectLine_imgAug22_tp_winrose.jpg"
draw_one_attribute_windrose(aspect_line_imgAug22_tp,
                            "aspectLine",
                            "Mean Aspect ($^\circ$)",
                            output,
                            color='black')  # ,hatch='/'
io_function.copy_file_to_dst(output,
                             os.path.join(out_dir, output),
                             overwrite=True)
####### use mapping polygons  ####

#
# # clear
# os.system('rm processLog.txt')
os.system('rm *.jpg')
Exemplo n.º 18
0
    split_image.split_image(image_path, out_dir, patch_width, patch_height, overlay_x, overlay_y, out_format,pre_name=file_pre_name)



with open('sub_images_labels_list.txt') as txt_obj:
    line_list = [name.strip() for name in txt_obj.readlines()]
    for line in line_list:
        sub_image, sub_label = line.split(':')

        # split sub image
        split_to_patches(sub_image, 'split_images', patch_w, patch_h, overlay, overlay, split_image_format)

        # split sub label (change the file name to be the same as sub_image name)
        pre_name = os.path.splitext(os.path.basename(sub_image))[0]
        split_to_patches(sub_label, 'split_labels', patch_w, patch_h, overlay, overlay, split_image_format, file_pre_name=pre_name)

    # output trainval.txt and val.txt file
    files_list = io_function.get_file_list_by_ext(split_image_format, 'split_images',bsub_folder=False)
    io_function.mkdir('list')
    trainval = os.path.join('list','trainval.txt')
    val = os.path.join('list','val.txt')
    with open(trainval,'w') as w_obj:
        for file_name in files_list:
            w_obj.writelines(os.path.splitext(os.path.basename(file_name))[0] + '\n')

    io_function.copy_file_to_dst(trainval,val,overwrite=True)



Exemplo n.º 19
0
def split_sub_images(para_file):
    print("split sub-images and sub-labels")

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s'%(para_file, os.getcwd()))

    SECONDS = time.time()
    if os.path.isdir('split_images'):
        io_function.delete_file_or_dir('split_images')
    if os.path.isdir('split_labels'):
        io_function.delete_file_or_dir('split_labels')

    io_function.mkdir('split_images')

    ### split the training image to many small patch (480*480)
    patch_w=parameters.get_string_parameters(para_file,'train_patch_width')
    patch_h=parameters.get_string_parameters(para_file,'train_patch_height')
    overlay_x=parameters.get_string_parameters(para_file,'train_pixel_overlay_x')
    overlay_y=parameters.get_string_parameters(para_file,'train_pixel_overlay_y')
    split_image_format=parameters.get_string_parameters(para_file,'split_image_format')

    trainImg_dir=parameters.get_string_parameters(para_file,'input_train_dir')
    labelImg_dir=parameters.get_string_parameters(para_file,'input_label_dir')
    proc_num = parameters.get_digit_parameters(para_file,'process_num','int')

    if os.path.isdir(trainImg_dir) is False:
        raise IOError('%s not in the current folder, please get subImages first'%trainImg_dir)
    if os.path.isdir(labelImg_dir) is False:
        print('warning, %s not in the current folder'%labelImg_dir)
    else:
        io_function.mkdir('split_labels')

    sub_img_label_txt = 'sub_images_labels_list.txt'
    if os.path.isfile(sub_img_label_txt) is False:
        raise IOError('%s not in the current folder, please get subImages first' % sub_img_label_txt)

    with open(sub_img_label_txt) as txt_obj:
        line_list = [name.strip() for name in txt_obj.readlines()]
        # for line in line_list:
        #     sub_image, sub_label = line.split(':')
        #
        #     # split sub image
        #     split_to_patches(sub_image, 'split_images', patch_w, patch_h, overlay, overlay, split_image_format)
        #
        #     # split sub label (change the file name to be the same as sub_image name)
        #     pre_name = os.path.splitext(os.path.basename(sub_image))[0]
        #     split_to_patches(sub_label, 'split_labels', patch_w, patch_h, overlay, overlay, split_image_format, file_pre_name=pre_name)

        parameters_list = [(line, patch_w, patch_h, overlay_x, overlay_y, split_image_format) for line in line_list]
        theadPool = Pool(proc_num)  # multi processes
        results = theadPool.starmap(split_a_pair_sub_image_label, parameters_list)  # need python3

        # output trainval.txt and val.txt file
        files_list = io_function.get_file_list_by_ext(split_image_format, 'split_images',bsub_folder=False)
        io_function.mkdir('list')
        trainval = os.path.join('list','trainval.txt')
        val = os.path.join('list','val.txt')
        with open(trainval,'w') as w_obj:
            for file_name in files_list:
                w_obj.writelines(os.path.splitext(os.path.basename(file_name))[0] + '\n')

        io_function.copy_file_to_dst(trainval,val,overwrite=True)

        split_train_val.get_image_with_height_list(trainval, split_image_format, info_type='(no data augmentation)')


    duration= time.time() - SECONDS
    os.system('echo "$(date): time cost of splitting sub images and labels: %.2f seconds">>time_cost.txt'%duration)
Exemplo n.º 20
0
    def add_fields_shape(self, ori_shp, new_shp, output_shp):
        """
        add fields from another shapefile(merge the fields of two shape files), the two shape files should have the same number of features
        :param ori_shp: the path of original shape file which will be added new field
        :param new_shp: the shape file contains new fields
        :output_shp: saved shape file
        :return:True if successful, False otherwise

        """
        # Read in our existing shapefile
        if io_function.is_file_exist(
                ori_shp) is False or io_function.is_file_exist(
                    new_shp) is False:
            return False
        try:
            org_obj = shapefile.Reader(ori_shp)
            new_obj = shapefile.Reader(new_shp)
        except IOError:
            basic.outputlogMessage(str(IOError))
            return False

        if len(org_obj.shapes()) != len(new_obj.shapes()):
            basic.outputlogMessage(
                "error: the input two shape file do not have the same number of features"
            )
            return False
        if org_obj.shapeType != new_obj.shapeType:
            basic.outputlogMessage(
                "error: the input two shape file have different shapeType")
            return False

        # Create a new shapefile in memory
        w = shapefile.Writer()
        w.shapeType = org_obj.shapeType

        # Copy over the existing fields
        w.fields = list(org_obj.fields)
        for t_field in list(new_obj.fields):
            if isinstance(t_field, tuple):
                continue
            w.fields.append(t_field)

        # Add our new field using the pyshp API
        # w.field("KINSELLA", "C", "40")

        # # We'll create a counter in this example
        # # to give us sample data to add to the records
        # # so we know the field is working correctly.
        # i = 1
        #
        # Loop through each record, add a column.  We'll
        # insert our sample data but you could also just
        # insert a blank string or NULL DATA number
        # as a place holder
        org_records = org_obj.records()
        new_records = new_obj.records()
        for i in range(0, len(org_records)):
            rec = org_records[i]
            for value in new_records[i]:
                rec.append(value)

            # Add the modified record to the new shapefile
            w.records.append(rec)

        # Copy over the geometry without any changes
        w._shapes.extend(org_obj.shapes())

        # copy prj file
        org_prj = os.path.splitext(ori_shp)[0] + ".prj"
        out_prj = os.path.splitext(output_shp)[0] + ".prj"
        io_function.copy_file_to_dst(org_prj, out_prj, overwrite=True)

        # Save as a new shapefile (or write over the old one)
        w.save(output_shp)

        pass
Exemplo n.º 21
0
    def remove_shape_baseon_field_value(self,
                                        shape_file,
                                        out_shp,
                                        class_field_name,
                                        threashold,
                                        smaller=True):
        """
        remove features from shapefile based on the field value,
        if smaller is true, then the value smaller than threashold will be removed
        if smaller is False, then the value greater than threashold will be remove
        :param shape_file: input shape file
        :param out_shp: saved shape file
        :param class_field_name: the name of class field, such as area
        :param threashold: threashold value
        :param smaller:  if smaller is true, then the value smaller than threashold will be removed,
        :return: True if successful, False otherwise
        """
        if io_function.is_file_exist(shape_file) is False:
            return False

        try:
            org_obj = shapefile.Reader(shape_file)
        except:
            basic.outputlogMessage(str(IOError))
            return False

        # Create a new shapefile in memory
        w = shapefile.Writer()
        w.shapeType = org_obj.shapeType

        org_records = org_obj.records()
        if (len(org_records) < 1):
            basic.outputlogMessage('error, no record in shape file ')
            return False

        # Copy over the geometry without any changes
        w.fields = list(org_obj.fields)
        field_index = self.__find_field_index(w.fields, class_field_name)
        if field_index is False:
            return False
        shapes_list = org_obj.shapes()
        i = 0
        removed_count = 0
        if smaller is True:
            for i in range(0, len(shapes_list)):
                rec = org_records[i]
                if rec[field_index] < threashold:  # remove the record which is smaller than threashold
                    removed_count = removed_count + 1
                    continue
                w._shapes.append(shapes_list[i])
                rec = org_records[i]
                w.records.append(rec)
        else:
            for i in range(0, len(shapes_list)):
                rec = org_records[i]
                if rec[field_index] > threashold:  # remove the record which is greater than threashold
                    removed_count = removed_count + 1
                    continue
                w._shapes.append(shapes_list[i])
                rec = org_records[i]
                w.records.append(rec)

        basic.outputlogMessage('Remove polygons based on %s, total count: %d' %
                               (class_field_name, removed_count))
        # w._shapes.extend(org_obj.shapes())

        # copy prj file
        org_prj = os.path.splitext(shape_file)[0] + ".prj"
        out_prj = os.path.splitext(out_shp)[0] + ".prj"
        io_function.copy_file_to_dst(org_prj, out_prj, overwrite=True)

        w.save(out_shp)
        return True
Exemplo n.º 22
0
def train_kfold_cross_val(multi_training_files_allPolygons,
                          multi_training_files, k_value, test_num):

    ##################################################################
    # get subset of polygons
    training_shp_all = []
    with open(multi_training_files_allPolygons, 'r') as f_obj:
        training_lines = f_obj.readlines()
        for line in training_lines:
            line = line.strip()
            training_shp_all.append(
                line.split(':')[-1])  # the last one is the shape file

    for training_shpAll in training_shp_all:

        dir = os.path.dirname(training_shpAll)
        file_name = os.path.basename(training_shpAll)
        file_name_no_ext = os.path.splitext(file_name)[0]
        dir_sub = os.path.join(
            dir,
            '%s_%d-fold_cross_val_t%d' % (file_name_no_ext, k_value, test_num))

        if os.path.isdir(dir_sub) is False:

            # will save to dir_sub}
            io_function.mkdir(dir_sub)
            create_shp_subset_polygons(dir_sub, training_shpAll, file_name,
                                       k_value)
        else:
            # check shape file existence
            sub_shps = io_function.get_file_list_by_pattern(dir_sub, '*.shp')
            if len(sub_shps) == k_value:
                print2file(
                    log,
                    "subset of shapefile already exist, skip creating new")
            else:
                create_shp_subset_polygons(dir_sub, training_shpAll, file_name,
                                           k_value)

    ##################################################################
    # training on k subset
    for idx in range(1, k_value + 1):
        # remove previous trained model (the setting are the same to exp10)
        if os.path.isdir(trained_model_dir):
            io_function.delete_file_or_dir(trained_model_dir)

        print2file(log, "run training and inference of the %d_th fold" % idx)

        # replace shape file path in "multi_training_files"

        io_function.copy_file_to_dst(multi_training_files_allPolygons,
                                     multi_training_files,
                                     overwrite=True)
        # replace shape file path in multi_training_files
        for training_shpAll in training_shp_all:
            dir = os.path.dirname(training_shpAll)
            file_name_no_ext = os.path.splitext(
                os.path.basename(training_shpAll))[0]
            dir_sub = os.path.join(
                dir, '%s_%d-fold_cross_val_t%d' %
                (file_name_no_ext, k_value, test_num))

            new_shp_path = os.path.join(
                dir_sub, '%s_%dfold_%d.shp' % (file_name_no_ext, k_value, idx))
            repalce_string_in_file(multi_training_files, training_shpAll,
                                   new_shp_path)

        # modify exe.sh
        io_function.copy_file_to_dst('exe_template_kfold.sh',
                                     'exe_qtp.sh',
                                     overwrite=True)
        new_line = '%dfold_%d_t%d' % (k_value, idx, test_num)
        repalce_string_in_file('exe_qtp.sh', 'x_test_num', new_line)

        # check results existence
        result_shp = io_function.get_file_list_by_pattern(
            'result_backup', '*' + new_line + '*/*.shp')
        if len(result_shp) > 0:
            print2file(log,
                       "results of test: %s already exist, skip" % new_line)
        else:
            # run training
            print2file(log, "start: test:%d the %d_th fold" % (test_num, idx))
            argslist = ['./exe_qtp.sh']
            return_code = basic.exec_command_args_list(argslist)
            # exit code is not 0, means something wrong, then quit
            if return_code != 0:
                sys.exit(return_code)

    pass
Exemplo n.º 23
0
def convert_planet_to_rgb_images(tif_path,
                                 save_dir='RGB_images',
                                 sr_min=0,
                                 sr_max=3000,
                                 save_org_dir=None,
                                 sharpen=True,
                                 rgb_nodata=0):

    #if multiple processes try to derive the same rgb images, it may have problem.
    # save output to 'RGB_images' + processID

    if os.path.isdir(save_dir) is False:
        io_function.mkdir(save_dir)

    if save_org_dir is not None and os.path.isdir(save_org_dir) is False:
        io_function.mkdir(save_org_dir)

    if save_org_dir is not None:
        copied_org_img_path = os.path.join(save_org_dir,
                                           os.path.basename(tif_path))
        io_function.copy_file_to_dst(tif_path, copied_org_img_path)

    # filename_no_ext
    output = os.path.splitext(os.path.basename(tif_path))[0]
    if sharpen:
        fin_output = os.path.join(save_dir, output + '_8bit_rgb_sharpen.tif')
    else:
        fin_output = os.path.join(save_dir, output + '_8bit_rgb.tif')
    if os.path.isfile(fin_output):
        basic.outputlogMessage(
            "Skip, because File %s exists in current folder: %s" %
            (fin_output, os.getcwd()))
        return fin_output

    # use fix min and max to make the color be consistent to sentinel-images
    src_min = sr_min
    src_max = sr_max
    dst_min = 1  # 0 is the nodata, so set as 1
    dst_max = 255

    # gdal_translate -ot Byte -scale ${src_min} ${src_max} ${dst_min} ${dst_max} ${image_path} ${output}_8bit.tif
    if 'SR.tif' in tif_path:
        cmd_str = 'gdal_translate -ot Byte -scale %d %d %d %d -of VRT %s %s_8bit.tif' % (
            src_min, src_max, dst_min, dst_max, tif_path, output)
    else:
        # gdal_contrast_stretch -percentile-range 0.01 0.99 ${output}.tif ${output}_8bit.tif
        cmd_str = 'gdal_contrast_stretch -percentile-range 0.01 0.99 %s %s_8bit.tif' % (
            tif_path, output)
    status, result = basic.exec_command_string(cmd_str)
    if status != 0:
        print(result)
        sys.exit(status)

    # the third band is red, second is green, and first is blue
    #gdal_translate -b 3 -b 2 -b 1  ${output}_8bit.tif ${output}_8bit_rgb.tif
    cmd_str = 'gdal_translate -b 3 -b 2 -b 1 -of VRT %s_8bit.tif %s_8bit_rgb.tif' % (
        output, output)
    status, result = basic.exec_command_string(cmd_str)
    if status != 0:
        print(result)
        sys.exit(status)

    # python ${code_dir}/planetScripts/prePlanetImage.py ${output}_8bit_rgb.tif ${fin_output}
    if sharpen:
        cmd_str = 'python %s %s_8bit_rgb.tif %s' % (prePlanetImage, output,
                                                    fin_output)
    else:
        # convert from VRT format to tif format
        cmd_str = 'gdal_translate -of GTiff %s_8bit_rgb.tif %s' % (output,
                                                                   fin_output)
    status, result = basic.exec_command_string(cmd_str)
    if status != 0:
        print(result)
        sys.exit(status)

    # set nodata
    # gdal_edit.py -a_nodata 0  ${fin_output}
    cmd_str = 'gdal_edit.py -a_nodata %d  %s' % (rgb_nodata, fin_output)
    status, result = basic.exec_command_string(cmd_str)
    if status != 0:
        print(result)
        sys.exit(status)

    io_function.delete_file_or_dir('%s_8bit.tif' % output)
    io_function.delete_file_or_dir('%s_8bit_rgb.tif' % output)

    return fin_output
Exemplo n.º 24
0
def train_evaluation_deeplab(WORK_DIR, deeplab_dir, expr_name, para_file,
                             network_setting_ini, gpu_num):

    # prepare training folder
    EXP_FOLDER = expr_name
    INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models')
    TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train')
    EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval')
    VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis')
    EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export')

    io_function.mkdir(INIT_FOLDER)
    io_function.mkdir(TRAIN_LOGDIR)
    io_function.mkdir(EVAL_LOGDIR)
    io_function.mkdir(VIS_LOGDIR)
    io_function.mkdir(EXPORT_DIR)

    # prepare the tensorflow check point (pretrained model) for training
    pre_trained_dir = parameters.get_directory_None_if_absence(
        network_setting_ini, 'pre_trained_model_folder')
    pre_trained_tar = parameters.get_string_parameters(network_setting_ini,
                                                       'TF_INIT_CKPT')
    pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar)
    if os.path.isfile(pre_trained_path) is False:
        print('pre-trained model: %s not exist, try to download' %
              pre_trained_path)
        # try to download the file
        pre_trained_url = parameters.get_string_parameters_None_if_absence(
            network_setting_ini, 'pre_trained_model_url')
        res = os.system('wget %s ' % pre_trained_url)
        if res != 0:
            sys.exit(1)
        io_function.movefiletodir(pre_trained_tar, pre_trained_dir)

    # unpack pre-trained model to INIT_FOLDER
    os.chdir(INIT_FOLDER)
    res = os.system('tar -xf %s' % pre_trained_path)
    if res != 0:
        raise IOError('failed to unpack %s' % pre_trained_path)
    os.chdir(WORK_DIR)

    dataset_dir = os.path.join(WORK_DIR, 'tfrecord')
    batch_size = parameters.get_digit_parameters(network_setting_ini,
                                                 'batch_size', 'int')
    # maximum iteration number
    iteration_num = parameters.get_digit_parameters(network_setting_ini,
                                                    'iteration_num', 'int')
    base_learning_rate = parameters.get_digit_parameters(
        network_setting_ini, 'base_learning_rate', 'float')

    train_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_output_stride', 'int')
    train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates1', 'int')
    train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates2', 'int')
    train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates3', 'int')

    inf_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_output_stride', 'int')
    inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates1', 'int')
    inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates2', 'int')
    inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates3', 'int')

    # depth_multiplier default is 1.0.
    depth_multiplier = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'depth_multiplier', 'float')

    decoder_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'decoder_output_stride', 'int')
    aspp_convs_filters = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'aspp_convs_filters', 'int')

    train_script = os.path.join(deeplab_dir, 'train.py')
    train_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'training_sample_list_txt'))[0]
    model_variant = parameters.get_string_parameters(network_setting_ini,
                                                     'model_variant')
    checkpoint = parameters.get_string_parameters(network_setting_ini,
                                                  'tf_initial_checkpoint')
    init_checkpoint_files = io_function.get_file_list_by_pattern(
        INIT_FOLDER, checkpoint + '*')
    if len(init_checkpoint_files) < 1:
        raise IOError('No initial checkpoint in %s with pattern: %s' %
                      (INIT_FOLDER, checkpoint))
    init_checkpoint = os.path.join(INIT_FOLDER, checkpoint)
    b_early_stopping = parameters.get_bool_parameters(para_file,
                                                      'b_early_stopping')
    b_initialize_last_layer = parameters.get_bool_parameters(
        para_file, 'b_initialize_last_layer')

    dataset = parameters.get_string_parameters(para_file, 'dataset_name')
    num_classes_noBG = parameters.get_digit_parameters_None_if_absence(
        para_file, 'NUM_CLASSES_noBG', 'int')
    assert num_classes_noBG != None
    if b_initialize_last_layer is True:
        if pre_trained_tar in pre_trained_tar_21_classes:
            print(
                'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21'
                % pre_trained_tar)
            num_classes_noBG = 20
        if pre_trained_tar in pre_trained_tar_19_classes:
            print(
                'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19'
                % pre_trained_tar)
            num_classes_noBG = 18
    num_of_classes = num_classes_noBG + 1

    image_crop_size = parameters.get_string_list_parameters(
        para_file, 'image_crop_size')
    if len(image_crop_size) != 2 and image_crop_size[0].isdigit(
    ) and image_crop_size[1].isdigit():
        raise ValueError('image_crop_size should be height,width')
    crop_size_str = ','.join(image_crop_size)

    evl_script = os.path.join(deeplab_dir, 'eval.py')
    evl_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'validation_sample_list_txt'))[0]
    max_eva_number = 1

    # validation interval (epoch)
    validation_interval = parameters.get_digit_parameters_None_if_absence(
        para_file, 'validation_interval', 'int')
    train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file)
    iter_per_epoch = math.ceil(train_count / batch_size)
    total_epoches = math.ceil(iteration_num / iter_per_epoch)
    already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
    if already_trained_iteration >= iteration_num:
        basic.outputlogMessage('Training already run %d iterations, skip' %
                               already_trained_iteration)
        return True
    if validation_interval is None:
        basic.outputlogMessage(
            'No input validation_interval, so training to %d, then evaluating in the end'
            % iteration_num)
        # run training
        train_deeplab(train_script, dataset, train_split, num_of_classes,
                      base_learning_rate, model_variant, init_checkpoint,
                      TRAIN_LOGDIR, dataset_dir, gpu_num, train_atrous_rates1,
                      train_atrous_rates2, train_atrous_rates3,
                      train_output_stride, crop_size_str, batch_size,
                      iteration_num, depth_multiplier, decoder_output_stride,
                      aspp_convs_filters, b_initialize_last_layer)

        # run evaluation
        evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes,
                           model_variant, inf_atrous_rates1, inf_atrous_rates2,
                           inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR,
                           EVAL_LOGDIR, dataset_dir, crop_size_str,
                           max_eva_number, depth_multiplier,
                           decoder_output_stride, aspp_convs_filters)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        get_loss_learning_rate_list(TRAIN_LOGDIR)
    else:
        basic.outputlogMessage(
            'training to the maximum iteration of %d, and evaluating very %d epoch(es)'
            % (iteration_num, validation_interval))
        for epoch in range(validation_interval,
                           total_epoches + validation_interval,
                           validation_interval):

            to_iter_num = min(epoch * iter_per_epoch, iteration_num)
            if to_iter_num <= already_trained_iteration:
                continue
            basic.outputlogMessage(
                'training and evaluating to %d epoches (to iteration: %d)' %
                (epoch, to_iter_num))

            # run training
            train_deeplab(train_script, dataset, train_split, num_of_classes,
                          base_learning_rate, model_variant, init_checkpoint,
                          TRAIN_LOGDIR, dataset_dir, gpu_num,
                          train_atrous_rates1, train_atrous_rates2,
                          train_atrous_rates3, train_output_stride,
                          crop_size_str, batch_size, to_iter_num,
                          depth_multiplier, decoder_output_stride,
                          aspp_convs_filters, b_initialize_last_layer)

            # run evaluation
            evaluation_deeplab(evl_script, dataset, evl_split, num_of_classes,
                               model_variant, inf_atrous_rates1,
                               inf_atrous_rates2, inf_atrous_rates3,
                               inf_output_stride, TRAIN_LOGDIR, EVAL_LOGDIR,
                               dataset_dir, crop_size_str, max_eva_number,
                               depth_multiplier, decoder_output_stride,
                               aspp_convs_filters)

            # get miou
            miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
            # save loss value to disk
            get_loss_learning_rate_list(TRAIN_LOGDIR)
            # check if need to early stopping
            if b_early_stopping:
                if len(miou_dict['overall']) >= 5:
                    # if the last five miou did not improve, then stop training
                    if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005
                              ):  # 0.0001 (%0.01)  # 0.5 %
                        basic.outputlogMessage(
                            'early stopping: stop training because overall miou did not improved in the last five evaluation'
                        )
                        output_early_stopping_message(TRAIN_LOGDIR)
                        break

    # plot mIOU, loss, and learnint rate curves
    iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt')
    loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt')
    miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        iou_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        loss_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)

    # backup miou and training_loss & learning rate
    test_id = os.path.basename(WORK_DIR) + '_' + expr_name
    backup_dir = os.path.join(WORK_DIR, 'result_backup')
    if os.path.isdir(backup_dir) is False:
        io_function.mkdir(backup_dir)

    new_iou_name = os.path.join(backup_dir,
                                test_id + '_' + os.path.basename(iou_path))
    io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True)
    miou_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(miou_curve_path))
    io_function.copy_file_to_dst(miou_curve_path,
                                 miou_curve_bakname,
                                 overwrite=True)

    loss_new_name = os.path.join(backup_dir,
                                 test_id + '_' + os.path.basename(loss_path))
    io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True)
    loss_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(loss_curve_path))
    io_function.copy_file_to_dst(loss_curve_path,
                                 loss_curve_bakname,
                                 overwrite=True)
Exemplo n.º 25
0
def mask_dem_by_surface_water(crop_dem_list, extent_poly, extent_id, crop_tif_dir, o_res, process_num):

    # get list of the ArcticDEM mosaic
    water_mask_tifs = io_function.get_file_list_by_ext('.tif',mask_water_dir,bsub_folder=False)
    water_mask_ext_polys = get_dem_tif_ext_polygons(water_mask_tifs)

    overlap_index = vector_gpd.get_poly_index_within_extent(water_mask_ext_polys,extent_poly)

    #### crop and mosaic water mask
    sub_mosaic_dem_tifs = [water_mask_tifs[item] for item in overlap_index]
    water_mask_crop_tif_list = []
    for tif in sub_mosaic_dem_tifs:
        save_crop_path = os.path.join(crop_tif_dir, os.path.basename(io_function.get_name_by_adding_tail(tif, 'sub_poly_%d' % extent_id)) )
        if os.path.isfile(save_crop_path):
            basic.outputlogMessage('%s exists, skip' % save_crop_path)
            water_mask_crop_tif_list.append(save_crop_path)
        else:
            crop_tif = subset_image_by_polygon_box(tif, save_crop_path, extent_poly, resample_m='near',
                            o_format='VRT',out_res=o_res, same_extent=True,thread_num=process_num) #
            if crop_tif is False:
                raise ValueError('warning, crop %s failed' % tif)
            water_mask_crop_tif_list.append(crop_tif)
    if len(water_mask_crop_tif_list) < 1:
        basic.outputlogMessage('No water mask for %d grid'%extent_id)
        save_id_grid_no_watermask(extent_id)
        return None

    # create mosaic, can handle only input one file, but is slow
    save_water_mask_mosaic = os.path.join(crop_tif_dir, 'global_surface_water_grid%d.tif'%extent_id)
    result = RSImageProcess.mosaic_crop_images_gdalwarp(water_mask_crop_tif_list, save_water_mask_mosaic, resampling_method='average',o_format='GTiff',
                                               compress='lzw', tiled='yes', bigtiff='if_safer',thread_num=process_num)
    if result is False:
        return False

    # because the resolution of dem and water mask is different, so we polygonize the watermask, then burn into the dem
    water_mask_shp = os.path.join(crop_tif_dir, 'global_surface_water_grid%d.shp'%extent_id)
    if os.path.isfile(water_mask_shp):
        basic.outputlogMessage('%s exists, skip cropping' % water_mask_shp)
    else:
        # set 0 as nodata
        if raster_io.set_nodata_to_raster_metadata(save_water_mask_mosaic,0) is False:
            return False
        if vector_gpd.raster2shapefile(save_water_mask_mosaic,water_mask_shp,connect8=True) is None:
            return False

    # masking the strip version of DEMs
    mask_dem_list = []
    for idx, strip_dem in enumerate(crop_dem_list):
        save_path = io_function.get_name_by_adding_tail(strip_dem, 'maskWater')
        if os.path.isfile(save_path):
            basic.outputlogMessage('%s exist, skip'%save_path)
            mask_dem_list.append(save_path)
            continue

        io_function.copy_file_to_dst(strip_dem,save_path,overwrite=True)
        nodata = raster_io.get_nodata(save_path)
        if raster_io.burn_polygon_to_raster_oneband(save_path,water_mask_shp,nodata) is False:
            continue
        mask_dem_list.append(save_path)

    return mask_dem_list
Exemplo n.º 26
0
def copy_curc_job_files(sh_dir, work_dir, sh_list):
    for sh in sh_list:
        io_function.copy_file_to_dst(os.path.join(sh_dir, sh),
                                     os.path.join(work_dir,
                                                  sh))  #, overwrite=True
Exemplo n.º 27
0
def postProcess(para_file, inf_post_note, b_skip_getshp=False, test_id=None):
    # test_id is the related to training

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' %
                      (para_file, os.getcwd()))

    # the test string in 'exe.sh'
    test_note = inf_post_note

    WORK_DIR = os.getcwd()

    SECONDS = time.time()

    expr_name = parameters.get_string_parameters(para_file, 'expr_name')
    network_setting_ini = parameters.get_string_parameters(
        para_file, 'network_setting_ini')

    inf_dir = parameters.get_directory(para_file, 'inf_output_dir')
    if test_id is None:
        test_id = os.path.basename(WORK_DIR) + '_' + expr_name

    # get name of inference areas
    multi_inf_regions = parameters.get_string_list_parameters(
        para_file, 'inference_regions')

    # run post-processing parallel
    # max_parallel_postProc_task = 8

    backup_dir = os.path.join(WORK_DIR, 'result_backup')
    io_function.mkdir(backup_dir)

    # loop each inference regions
    sub_tasks = []
    same_area_time_inis = group_same_area_time_observations(multi_inf_regions)
    region_eva_reports = {}
    for key in same_area_time_inis.keys():
        multi_observations = same_area_time_inis[key]
        area_name = parameters.get_string_parameters(
            multi_observations[0],
            'area_name')  # they have the same name and time
        area_time = parameters.get_string_parameters(multi_observations[0],
                                                     'area_time')
        merged_shp_list = []
        map_raster_list_2d = [None] * len(multi_observations)
        for area_idx, area_ini in enumerate(multi_observations):
            area_remark = parameters.get_string_parameters(
                area_ini, 'area_remark')
            area_save_dir, shp_pre, _ = get_observation_save_dir_shp_pre(
                inf_dir, area_name, area_time, area_remark, test_id)

            # get image list
            inf_image_dir = parameters.get_directory(area_ini, 'inf_image_dir')
            # it is ok consider a file name as pattern and pass it the following functions to get file list
            inf_image_or_pattern = parameters.get_string_parameters(
                area_ini, 'inf_image_or_pattern')
            inf_img_list = io_function.get_file_list_by_pattern(
                inf_image_dir, inf_image_or_pattern)
            img_count = len(inf_img_list)
            if img_count < 1:
                raise ValueError(
                    'No image for inference, please check inf_image_dir and inf_image_or_pattern in %s'
                    % area_ini)

            merged_shp = os.path.join(WORK_DIR, area_save_dir,
                                      shp_pre + '.shp')
            if b_skip_getshp:
                pass
            else:
                # post image one by one
                result_shp_list = []
                map_raster_list = []
                for img_idx, img_path in enumerate(inf_img_list):
                    out_shp, out_raster = inf_results_to_shapefile(
                        WORK_DIR, img_idx, area_save_dir, test_id)
                    if out_shp is None or out_raster is None:
                        continue
                    result_shp_list.append(os.path.join(WORK_DIR, out_shp))
                    map_raster_list.append(out_raster)
                # merge shapefiles
                if merge_shape_files(result_shp_list, merged_shp) is False:
                    continue
                map_raster_list_2d[area_idx] = map_raster_list

            merged_shp_list.append(merged_shp)

        if b_skip_getshp is False:
            # add occurrence to each polygons
            get_occurence_for_multi_observation(merged_shp_list)

        for area_idx, area_ini in enumerate(multi_observations):
            area_remark = parameters.get_string_parameters(
                area_ini, 'area_remark')
            area_save_dir, shp_pre, area_remark_time = get_observation_save_dir_shp_pre(
                inf_dir, area_name, area_time, area_remark, test_id)

            merged_shp = os.path.join(WORK_DIR, area_save_dir,
                                      shp_pre + '.shp')
            if os.path.isfile(merged_shp) is False:
                print('Warning, %s not exist, skip' % merged_shp)
                continue

            # add attributes to shapefile
            # add_attributes_script = os.path.join(code_dir,'datasets', 'get_polygon_attributes.py')
            shp_attributes = os.path.join(WORK_DIR, area_save_dir,
                                          shp_pre + '_post_NOrm.shp')
            # add_polygon_attributes(add_attributes_script,merged_shp, shp_attributes, para_file, area_ini )
            add_polygon_attributes(merged_shp, shp_attributes, para_file,
                                   area_ini)

            # remove polygons
            # rm_polygon_script = os.path.join(code_dir,'datasets', 'remove_mappedPolygons.py')
            shp_post = os.path.join(WORK_DIR, area_save_dir,
                                    shp_pre + '_post.shp')
            # remove_polygons(rm_polygon_script,shp_attributes, shp_post, para_file)
            remove_polygons_main(shp_attributes, shp_post, para_file)

            # evaluate the mapping results
            # eval_shp_script = os.path.join(code_dir,'datasets', 'evaluation_result.py')
            out_report = os.path.join(WORK_DIR, area_save_dir,
                                      shp_pre + '_evaluation_report.txt')
            # evaluation_polygons(eval_shp_script, shp_post, para_file, area_ini,out_report)
            evaluation_polygons(shp_post, para_file, area_ini, out_report)

            ##### copy and backup files ######
            # copy files to result_backup
            if len(test_note) > 0:
                backup_dir_area = os.path.join(
                    backup_dir, area_name + '_' + area_remark_time + '_' +
                    test_id + '_' + test_note)
            else:
                backup_dir_area = os.path.join(
                    backup_dir,
                    area_name + '_' + area_remark_time + '_' + test_id)
            io_function.mkdir(backup_dir_area)
            if len(test_note) > 0:
                bak_merged_shp = os.path.join(
                    backup_dir_area, '_'.join([shp_pre, test_note]) + '.shp')
                bak_post_shp = os.path.join(
                    backup_dir_area,
                    '_'.join([shp_pre, 'post', test_note]) + '.shp')
                bak_eva_report = os.path.join(
                    backup_dir_area,
                    '_'.join([shp_pre, 'eva_report', test_note]) + '.txt')
                bak_area_ini = os.path.join(
                    backup_dir_area,
                    '_'.join([shp_pre, 'region', test_note]) + '.ini')
            else:
                bak_merged_shp = os.path.join(backup_dir_area,
                                              '_'.join([shp_pre]) + '.shp')
                bak_post_shp = os.path.join(
                    backup_dir_area, '_'.join([shp_pre, 'post']) + '.shp')
                bak_eva_report = os.path.join(
                    backup_dir_area,
                    '_'.join([shp_pre, 'eva_report']) + '.txt')
                bak_area_ini = os.path.join(
                    backup_dir_area, '_'.join([shp_pre, 'region']) + '.ini')

            io_function.copy_shape_file(merged_shp, bak_merged_shp)
            io_function.copy_shape_file(shp_post, bak_post_shp)
            if os.path.isfile(out_report):
                io_function.copy_file_to_dst(out_report,
                                             bak_eva_report,
                                             overwrite=True)
            io_function.copy_file_to_dst(area_ini,
                                         bak_area_ini,
                                         overwrite=True)

            # copy map raster
            b_backup_map_raster = parameters.get_bool_parameters_None_if_absence(
                area_ini, 'b_backup_map_raster')
            if b_backup_map_raster is True:
                if map_raster_list_2d[area_idx] is not None:
                    for map_tif in map_raster_list_2d[area_idx]:
                        bak_map_tif = os.path.join(backup_dir_area,
                                                   os.path.basename(map_tif))
                        io_function.copy_file_to_dst(map_tif,
                                                     bak_map_tif,
                                                     overwrite=True)

            region_eva_reports[shp_pre] = bak_eva_report

    if len(test_note) > 0:
        bak_para_ini = os.path.join(
            backup_dir, '_'.join([test_id, 'para', test_note]) + '.ini')
        bak_network_ini = os.path.join(
            backup_dir, '_'.join([test_id, 'network', test_note]) + '.ini')
        bak_time_cost = os.path.join(
            backup_dir, '_'.join([test_id, 'time_cost', test_note]) + '.txt')
    else:
        bak_para_ini = os.path.join(backup_dir,
                                    '_'.join([test_id, 'para']) + '.ini')
        bak_network_ini = os.path.join(backup_dir,
                                       '_'.join([test_id, 'network']) + '.ini')
        bak_time_cost = os.path.join(backup_dir,
                                     '_'.join([test_id, 'time_cost']) + '.txt')
    io_function.copy_file_to_dst(para_file, bak_para_ini)
    io_function.copy_file_to_dst(network_setting_ini, bak_network_ini)
    if os.path.isfile('time_cost.txt'):
        io_function.copy_file_to_dst('time_cost.txt', bak_time_cost)

    # output the evaluation report to screen
    for key in region_eva_reports.keys():
        report = region_eva_reports[key]
        if os.path.isfile(report) is False:
            continue
        print('evaluation report for %s:' % key)
        os.system('head -n 7 %s' % report)

    # output evaluation report to table
    if len(test_note) > 0:
        out_table = os.path.join(
            backup_dir,
            '_'.join([test_id, 'accuracy_table', test_note]) + '.xlsx')
    else:
        out_table = os.path.join(
            backup_dir, '_'.join([test_id, 'accuracy_table']) + '.xlsx')
    eva_reports = [
        region_eva_reports[key] for key in region_eva_reports
        if os.path.isfile(region_eva_reports[key])
    ]
    eva_report_to_tables.eva_reports_to_table(eva_reports, out_table)

    duration = time.time() - SECONDS
    os.system(
        'echo "$(date): time cost of post-procesing: %.2f seconds">>time_cost.txt'
        % duration)
Exemplo n.º 28
0
def get_subimages_SpaceNet(input_image_dir,
                           image_pattern,
                           input_polygon_dir,
                           polygon_pattern,
                           subImage_dir,
                           subLabel_dir,
                           process_num=1,
                           burn_value=1,
                           b_no_label_image=False):

    sub_images_list = io_function.get_file_list_by_pattern(
        input_image_dir, image_pattern)
    if len(sub_images_list) < 1:
        basic.outputlogMessage('No sub-images in: %s with pattern: %s' %
                               (input_image_dir, image_pattern))
        return False

    sub_images_count = len(sub_images_list)
    # do we need to check the projection of each sub-images?

    if os.path.isdir(subLabel_dir) is False:
        io_function.mkdir(subLabel_dir)
    if os.path.isdir(subImage_dir) is False:
        io_function.mkdir(subImage_dir)

    label_path_list = []
    if b_no_label_image is True:
        pass
    else:
        # polygon file list
        polygon_files_list = io_function.get_file_list_by_pattern(
            input_polygon_dir, polygon_pattern)
        if len(polygon_files_list) < 1:
            basic.outputlogMessage('No polygon files in: %s with pattern: %s' %
                                   (input_polygon_dir, polygon_pattern))
            return False

        polygon_name_list = [
            os.path.basename(item) for item in polygon_files_list
        ]

        # create label images
        for idx, tif_path in enumerate(sub_images_list):
            print('%d / %d create label raster for %s' %
                  (idx, sub_images_count, tif_path))
            # find polygon file
            poly_path = find_corresponding_geojson_SpaceNet(
                tif_path, polygon_files_list, polygon_name_list)
            if poly_path is None:
                print('Warning, cannot find corresponding polygon files')
                continue

            save_path = os.path.join(
                subLabel_dir,
                io_function.get_name_no_ext(poly_path) + '.tif')
            if os.path.isfile(save_path):
                print('warning, %s already exists, skip' % save_path)
                label_path_list.append(save_path)
                continue
            if rasterize_polygons_to_ref_raster(tif_path,
                                                poly_path,
                                                burn_value,
                                                None,
                                                save_path,
                                                datatype='Byte',
                                                ignore_edge=True) is True:
                label_path_list.append(save_path)

    # copy sub-images, adding to txt files
    with open('sub_images_labels_list.txt', 'a') as f_obj:
        for tif_path, label_file in zip(sub_images_list, label_path_list):
            if label_file is None:
                continue
            dst_subImg = os.path.join(subImage_dir, os.path.basename(tif_path))

            # copy sub-images
            io_function.copy_file_to_dst(tif_path, dst_subImg, overwrite=False)

            sub_image_label_str = dst_subImg + ":" + label_file + '\n'
            f_obj.writelines(sub_image_label_str)

    return True
Exemplo n.º 29
0
def train_evaluation_deeplab_separate(WORK_DIR, deeplab_dir, expr_name,
                                      para_file, network_setting_ini, gpu_num):
    '''
    in "train_evaluation_deeplab", run training, stop, then evaluation, then traininng, make learning rate strange, and the results worse.
    so in this function, we start two process, one for training, another for evaluation (run on CPU)
    '''
    # prepare training folder
    EXP_FOLDER = expr_name
    INIT_FOLDER = os.path.join(WORK_DIR, EXP_FOLDER, 'init_models')
    TRAIN_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'train')
    EVAL_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'eval')
    VIS_LOGDIR = os.path.join(WORK_DIR, EXP_FOLDER, 'vis')
    EXPORT_DIR = os.path.join(WORK_DIR, EXP_FOLDER, 'export')

    io_function.mkdir(INIT_FOLDER)
    io_function.mkdir(TRAIN_LOGDIR)
    io_function.mkdir(EVAL_LOGDIR)
    io_function.mkdir(VIS_LOGDIR)
    io_function.mkdir(EXPORT_DIR)

    # prepare the tensorflow check point (pretrained model) for training
    pre_trained_dir = parameters.get_directory_None_if_absence(
        network_setting_ini, 'pre_trained_model_folder')
    pre_trained_tar = parameters.get_string_parameters(network_setting_ini,
                                                       'TF_INIT_CKPT')
    pre_trained_path = os.path.join(pre_trained_dir, pre_trained_tar)
    if os.path.isfile(pre_trained_path) is False:
        print('pre-trained model: %s not exist, try to download' %
              pre_trained_path)
        # try to download the file
        pre_trained_url = parameters.get_string_parameters_None_if_absence(
            network_setting_ini, 'pre_trained_model_url')
        res = os.system('wget %s ' % pre_trained_url)
        if res != 0:
            sys.exit(1)
        io_function.movefiletodir(pre_trained_tar, pre_trained_dir)

    # unpack pre-trained model to INIT_FOLDER
    os.chdir(INIT_FOLDER)
    res = os.system('tar -xf %s' % pre_trained_path)
    if res != 0:
        raise IOError('failed to unpack %s' % pre_trained_path)
    os.chdir(WORK_DIR)

    dataset_dir = os.path.join(WORK_DIR, 'tfrecord')
    batch_size = parameters.get_digit_parameters(network_setting_ini,
                                                 'batch_size', 'int')
    # maximum iteration number
    iteration_num = parameters.get_digit_parameters(network_setting_ini,
                                                    'iteration_num', 'int')
    base_learning_rate = parameters.get_digit_parameters(
        network_setting_ini, 'base_learning_rate', 'float')

    train_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_output_stride', 'int')
    train_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates1', 'int')
    train_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates2', 'int')
    train_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'train_atrous_rates3', 'int')

    inf_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_output_stride', 'int')
    inf_atrous_rates1 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates1', 'int')
    inf_atrous_rates2 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates2', 'int')
    inf_atrous_rates3 = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'inf_atrous_rates3', 'int')

    # depth_multiplier default is 1.0.
    depth_multiplier = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'depth_multiplier', 'float')

    decoder_output_stride = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'decoder_output_stride', 'int')
    aspp_convs_filters = parameters.get_digit_parameters_None_if_absence(
        network_setting_ini, 'aspp_convs_filters', 'int')

    train_script = os.path.join(deeplab_dir, 'train.py')
    train_split = os.path.splitext(
        parameters.get_string_parameters(para_file,
                                         'training_sample_list_txt'))[0]
    model_variant = parameters.get_string_parameters(network_setting_ini,
                                                     'model_variant')
    checkpoint = parameters.get_string_parameters(network_setting_ini,
                                                  'tf_initial_checkpoint')
    init_checkpoint_files = io_function.get_file_list_by_pattern(
        INIT_FOLDER, checkpoint + '*')
    if len(init_checkpoint_files) < 1:
        raise IOError('No initial checkpoint in %s with pattern: %s' %
                      (INIT_FOLDER, checkpoint))
    init_checkpoint = os.path.join(INIT_FOLDER, checkpoint)
    b_early_stopping = parameters.get_bool_parameters(para_file,
                                                      'b_early_stopping')
    b_initialize_last_layer = parameters.get_bool_parameters(
        para_file, 'b_initialize_last_layer')

    dataset = parameters.get_string_parameters(para_file, 'dataset_name')
    num_classes_noBG = parameters.get_digit_parameters_None_if_absence(
        para_file, 'NUM_CLASSES_noBG', 'int')
    assert num_classes_noBG != None
    if b_initialize_last_layer is True:
        if pre_trained_tar in pre_trained_tar_21_classes:
            print(
                'warning, pretrained model %s is trained with 21 classes, set num_of_classes to 21'
                % pre_trained_tar)
            num_classes_noBG = 20
        if pre_trained_tar in pre_trained_tar_19_classes:
            print(
                'warning, pretrained model %s is trained with 19 classes, set num_of_classes to 19'
                % pre_trained_tar)
            num_classes_noBG = 18
    num_of_classes = num_classes_noBG + 1

    image_crop_size = parameters.get_string_list_parameters(
        para_file, 'image_crop_size')
    if len(image_crop_size) != 2 and image_crop_size[0].isdigit(
    ) and image_crop_size[1].isdigit():
        raise ValueError('image_crop_size should be height,width')
    crop_size_str = ','.join(image_crop_size)

    # validation interval (epoch), do
    # validation_interval = parameters.get_digit_parameters_None_if_absence(para_file,'validation_interval','int')

    train_count, val_count = get_train_val_sample_count(WORK_DIR, para_file)
    iter_per_epoch = math.ceil(train_count / batch_size)
    total_epoches = math.ceil(iteration_num / iter_per_epoch)
    already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
    if already_trained_iteration >= iteration_num:
        basic.outputlogMessage('Training already run %d iterations, skip' %
                               already_trained_iteration)
        return True

    save_interval_secs = 1200  # default is 1200 second for saving model
    save_summaries_secs = 600  # default is 600 second for saving summaries
    eval_interval_secs = save_interval_secs  # default is 300 second for running evaluation, if no new saved model, no need to run evaluation?

    train_process = Process(
        target=train_deeplab,
        args=(train_script, dataset, train_split, num_of_classes,
              base_learning_rate, model_variant, init_checkpoint, TRAIN_LOGDIR,
              dataset_dir, gpu_num, train_atrous_rates1, train_atrous_rates2,
              train_atrous_rates3, train_output_stride, crop_size_str,
              batch_size, iteration_num, depth_multiplier,
              decoder_output_stride, aspp_convs_filters,
              b_initialize_last_layer))
    train_process.start()
    time.sleep(60)  # wait
    if train_process.exitcode is not None and train_process.exitcode != 0:
        sys.exit(1)

    # eval_process.start()
    # time.sleep(10)  # wait
    # if eval_process.exitcode is not None and eval_process.exitcode != 0:
    #     sys.exit(1)

    while True:

        # only run evaluation when there is new trained model
        already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        basic.outputlogMessage(
            'Already trained iteration: %d, latest evaluation at %d step' %
            (already_trained_iteration, miou_dict['step'][-1]))
        if already_trained_iteration > miou_dict['step'][-1]:

            # run evaluation and wait until it finished
            gpuid = ""  # set gpuid to empty string, making evaluation run on CPU
            evl_script = os.path.join(deeplab_dir, 'eval.py')
            evl_split = os.path.splitext(
                parameters.get_string_parameters(
                    para_file, 'validation_sample_list_txt'))[0]
            # max_eva_number = -1  # run as many evaluation as possible, --eval_interval_secs (default is 300 seconds)
            max_eva_number = 1  # only run once inside the while loop, use while loop to control multiple evaluation
            eval_process = Process(
                target=evaluation_deeplab,
                args=(evl_script, dataset, evl_split, num_of_classes,
                      model_variant, inf_atrous_rates1, inf_atrous_rates2,
                      inf_atrous_rates3, inf_output_stride, TRAIN_LOGDIR,
                      EVAL_LOGDIR, dataset_dir, crop_size_str, max_eva_number,
                      depth_multiplier, decoder_output_stride,
                      aspp_convs_filters, gpuid, eval_interval_secs))
            eval_process.start(
            )  # put Process inside while loop to avoid error: AssertionError: cannot start a process twice
            while eval_process.is_alive():
                time.sleep(5)

        # check if need early stopping
        if b_early_stopping:
            print(datetime.now(), 'check early stopping')
            miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
            if 'overall' in miou_dict.keys() and len(
                    miou_dict['overall']) >= 5:
                # if the last five miou did not improve, then stop training
                if np.all(np.diff(miou_dict['overall'][-5:]) < 0.005
                          ):  # 0.0001 (%0.01)  # 0.5 %
                    basic.outputlogMessage(
                        'early stopping: stop training because overall miou did not improved in the last five evaluation'
                    )
                    output_early_stopping_message(TRAIN_LOGDIR)

                    # train_process.kill()    # this one seems not working
                    # subprocess pid different from ps output
                    # https://stackoverflow.com/questions/4444141/subprocess-pid-different-from-ps-output
                    # os.system('kill ' + str(train_process.pid)) # still not working.  train_process.pid is not the one output by ps -aux

                    # train_process.terminate()   # Note that descendant processes of the process will not be terminated
                    # train_process.join()        # Wait until child process terminates

                    with open('train_py_pid.txt', 'r') as f_obj:
                        lines = f_obj.readlines()
                        train_pid = int(lines[0].strip())
                        os.system('kill ' + str(train_pid))
                        basic.outputlogMessage(
                            'kill training processing with id: %d' % train_pid)

                    break  # this breaks the while loop, making that it may not evaluate on some new saved model.

        # if the evaluation step is less than saved model iteration, run another iteration again immediately
        already_trained_iteration = get_trained_iteration(TRAIN_LOGDIR)
        miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)
        if already_trained_iteration > miou_dict['step'][-1]:
            continue

        # if finished training
        if train_process.is_alive() is False:
            break
        # # if eval_process exit, then quit training as well
        # if eval_process.is_alive() is False and train_process.is_alive():
        #     train_process.kill()
        #     break
        time.sleep(eval_interval_secs)  # wait for next evaluation

    # save loss value to disk
    get_loss_learning_rate_list(TRAIN_LOGDIR)
    # get miou again
    miou_dict = get_miou_list_class_all(EVAL_LOGDIR, num_of_classes)

    # eval_process did not exit as expected, kill it again.
    # os.system('kill ' + str(eval_process.pid))

    # get iou and backup
    iou_path = os.path.join(EVAL_LOGDIR, 'miou.txt')
    loss_path = os.path.join(TRAIN_LOGDIR, 'loss_learning_rate.txt')
    patch_info = os.path.join(WORK_DIR, 'sub_images_patches_info.txt')

    # backup miou and training_loss & learning rate
    test_id = os.path.basename(WORK_DIR) + '_' + expr_name
    backup_dir = os.path.join(WORK_DIR, 'result_backup')
    if os.path.isdir(backup_dir) is False:
        io_function.mkdir(backup_dir)
    new_iou_name = os.path.join(backup_dir,
                                test_id + '_' + os.path.basename(iou_path))
    io_function.copy_file_to_dst(iou_path, new_iou_name, overwrite=True)

    loss_new_name = os.path.join(backup_dir,
                                 test_id + '_' + os.path.basename(loss_path))
    io_function.copy_file_to_dst(loss_path, loss_new_name, overwrite=True)

    new_patch_info = os.path.join(backup_dir,
                                  test_id + '_' + os.path.basename(patch_info))
    io_function.copy_file_to_dst(patch_info, new_patch_info, overwrite=True)

    # plot mIOU, loss, and learnint rate curves, and backup
    miou_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        iou_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    loss_curve_path = plot_miou_loss_curve.plot_miou_loss_main(
        loss_path,
        train_count=train_count,
        val_count=val_count,
        batch_size=batch_size)
    miou_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(miou_curve_path))
    io_function.copy_file_to_dst(miou_curve_path,
                                 miou_curve_bakname,
                                 overwrite=True)
    loss_curve_bakname = os.path.join(
        backup_dir, test_id + '_' + os.path.basename(loss_curve_path))
    io_function.copy_file_to_dst(loss_curve_path,
                                 loss_curve_bakname,
                                 overwrite=True)
Exemplo n.º 30
0
def split_train_val(para_file):
    print("split data set into training and validation")

    if os.path.isfile(para_file) is False:
        raise IOError('File %s not exists in current folder: %s' % (para_file, os.getcwd()))

    code_dir = os.path.join(os.path.dirname(sys.argv[0]), '..')
    sys.path.insert(0, code_dir)
    import parameters

    script = os.path.join(code_dir, 'datasets', 'train_test_split.py')

    training_data_per = parameters.get_digit_parameters_None_if_absence(para_file, 'training_data_per','float')
    train_sample_txt = parameters.get_string_parameters(para_file, 'training_sample_list_txt')
    val_sample_txt = parameters.get_string_parameters(para_file, 'validation_sample_list_txt')

    dir = 'list'
    all_img_list = os.path.join(dir,'trainval.txt')

    # command_string = script + ' -p ' + str(training_data_per) + \
    #                  ' -t ' + train_sample_txt  + \
    #                  ' -v ' + val_sample_txt  + \
    #                  ' --shuffle ' + all_img_list
    # res = os.system(command_string)
    # if res!=0:
    #     sys.exit(1)

    if training_data_per is None:
        # similar to VOC dataset, we only used 1449 images for validation (because the data also used for training,
        # so it is training accuracy, not validation accuracy)
        with open(all_img_list, 'r') as f_obj:
            file_names = f_obj.readlines()
            if len(file_names) < 1449:
                # val.txt is identical to trainval.txt
                io_function.copy_file_to_dst(all_img_list,os.path.join(dir,train_sample_txt))
                io_function.copy_file_to_dst(all_img_list,os.path.join(dir,val_sample_txt))
            else:
                io_function.copy_file_to_dst(all_img_list, os.path.join(dir, train_sample_txt))
                # randomly get 1449 image from trainval.txt
                import random
                sel_file_index = random.sample(range(len(file_names)), 1449)  # get a list of number without duplicates
                with open(os.path.join(dir, val_sample_txt), 'w') as w_obj:
                    sel_file_names = [file_names[item] for item in sel_file_index]
                    w_obj.writelines(sel_file_names)
    else:
        # split training and validation datasets
        Do_shuffle = True
        from datasets.train_test_split import train_test_split_main
        train_test_split_main(all_img_list,training_data_per,Do_shuffle,train_sample_txt,val_sample_txt)


    # save brief information of image patches
    img_ext = parameters.get_string_parameters_None_if_absence(para_file, 'split_image_format')

    get_image_with_height_list(os.path.join(dir,train_sample_txt), img_ext, info_type='training')

    get_image_with_height_list(os.path.join(dir,val_sample_txt), img_ext, info_type='validation')

    # save the count of each classes in training and validation
    get_sample_count_of_each_class(os.path.join(dir, train_sample_txt), info_type='training')

    get_sample_count_of_each_class(os.path.join(dir, val_sample_txt), info_type='validation')