def main():
    # run in ~/Data/Arctic/canada_arctic/autoMapping/multiArea_sub_images on tesia
    ini_list = io_function.get_file_list_by_pattern('./','area*.ini')
    txt_list = io_function.get_file_list_by_pattern('./','area*.txt')
    for txt in txt_list:
        ini_s = io_function.read_list_from_txt(txt)
        ini_list.extend(ini_s)

    ini_list = [os.path.abspath(item) for item in ini_list]
    file_names = [ io_function.get_name_no_ext(item) for item in ini_list ]

    cur_dir = os.getcwd()

    # show
    [print(item) for item in ini_list]
    time.sleep(3)

    for name, area_ini in zip(file_names,ini_list):
        word_dir = os.path.join(cur_dir,name)
        io_function.mkdir(word_dir)
        os.chdir(word_dir)
        # copy and modify main_para.ini
        io_function.copyfiletodir(os.path.join(cur_dir,'main_para.ini'),'./',overwrite=True)
        io_function.copyfiletodir(os.path.join(cur_dir,'exe.sh'),'./',overwrite=True)

        parameters.write_Parameters_file('main_para.ini','training_regions',area_ini)

        # run exe.sh
        res = os.system('./exe.sh')
        if res !=0:
            print(res)
            sys.exit(1)

        os.chdir(cur_dir)
def get_new_color_table_for_raster(raster, color_table_txt, out_dir=None):
    data, no_data = raster_io.read_raster_one_band_np(raster)

    # remove nodata
    data_1d = data.flatten()
    print(data_1d.shape)
    data_1d = data_1d[data_1d != no_data]
    print(data_1d.shape)
    unique_values = np.unique(data_1d)
    print('unique_values:', unique_values)

    save_color_table = io_function.get_name_no_ext(
        os.path.basename(raster)) + '_color.txt'
    if out_dir is not None:
        save_color_table = os.path.join(out_dir, save_color_table)

    save_lines = []
    with open(color_table_txt, 'r') as f_obj:
        all_lines = f_obj.readlines()

        # copy first two lines
        save_lines.append(all_lines[0])
        save_lines.append(all_lines[1])

        for idx in range(2, len(all_lines)):
            value = int(all_lines[idx].split(',')[0])
            if value in unique_values:
                save_lines.append(all_lines[idx])

    with open(save_color_table, 'w') as f_obj:
        f_obj.writelines(save_lines)

    print('Save color table to %s' % os.path.abspath(save_color_table))
Exemplo n.º 3
0
def main():

    hillshade_dir = os.path.join(work_dir,'hillshade_sub_images')
    dem_slope_8bit_dir = os.path.join(work_dir,'dem_slope_8bit_sub_images')
    dem_relative_8bit_dir = os.path.join(work_dir,'dem_relative_8bit_sub_images')
    other_dirs = [dem_slope_8bit_dir,dem_relative_8bit_dir]
    other_dirs_tifs = [ io_function.get_file_list_by_ext('.tif', o_dir, bsub_folder=True) for o_dir in  other_dirs]


    json_list = io_function.get_file_list_by_ext('.json', hillshade_dir, bsub_folder=True)
    json_base_list = [os.path.basename(item) for item in json_list]

    for json_path, base_name in zip(json_list, json_base_list):
        date_str, poly_num = get_date_str_poly_num(base_name)

        for tif_list in other_dirs_tifs:

            for tif in tif_list:
                name_noext = io_function.get_name_no_ext(tif)
                if date_str in name_noext and poly_num in name_noext:
                    # modify and save the json file
                    dst_path = os.path.join(os.path.dirname(tif), name_noext+'.json')
                    # io_function.copy_file_to_dst(json_path,dst_path)
                    data_dict = io_function.read_dict_from_txt_json(json_path)
                    data_dict['imagePath'] = os.path.basename(tif)
                    data_dict['imageData'] = None
                    io_function.save_dict_to_txt_json(dst_path, data_dict)
                    print('saving %s'%dst_path)

                    break

        pass
Exemplo n.º 4
0
def tifs_to_png(image_dir):
    tif_list = io_function.get_file_list_by_pattern(image_dir, '*/*.tif')
    for idx, tif in enumerate(tif_list):
        print('tif to png: %d/%d tif' % (idx + 1, len(tif_list)))
        basename = io_function.get_name_no_ext(tif)
        save_path = os.path.join(image_dir, basename + '.png')
        if os.path.isfile(save_path):
            print('%s exists, skip' % save_path)
            continue
        command_str = "gdal_translate -of PNG %s %s" % (tif, save_path)
        basic.os_system_exit_code(command_str)
Exemplo n.º 5
0
def get_tifs_bounding_boxes(image_dir):
    tif_list = io_function.get_file_list_by_pattern(image_dir, '*/*.tif')
    for idx, tif in enumerate(tif_list):
        print('get bounding box: %d/%d tif' % (idx + 1, len(tif_list)))
        basename = io_function.get_name_no_ext(tif)
        save_path = os.path.join(image_dir, basename + '_bound.geojson')
        if os.path.isfile(save_path):
            print('%s exists, skip' % save_path)
            continue

        command_str = imgExt + " %s -o tmp.gpkg" % tif
        basic.os_system_exit_code(command_str)
        command_str = "ogr2ogr -f GeoJSON -t_srs EPSG:3413 %s tmp.gpkg" % save_path  # note: projection is EPSG:3413
        basic.os_system_exit_code(command_str)

        io_function.delete_file_or_dir('tmp.gpkg')
def check_one_extent(extent_shp):
    print('start to check %s' % extent_shp)

    # local_grid_id_txt is in the current dir
    # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir
    local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files(
        extent_shp)
    if os.path.isfile(local_grid_id_txt) is False and os.path.isfile(
            log_grid_ids_txt):
        io_function.copy_file_to_dst(log_grid_ids_txt, local_grid_id_txt)
    if os.path.isfile(local_grid_id_txt) is False:
        print('the _grid_ids.txt for %s does not exist, maybe it has started' %
              extent_shp)
        return False

    # check if it has been complete
    if os.path.isfile(log_grid_ids_txt_done):
        basic.outputlogMessage('Tasks for extent %s have been completed' %
                               extent_shp)
        return True

    grid_ids_to_process_txt = io_function.get_name_no_ext(
        extent_shp) + '_' + 'grid_ids_to_process.txt'

    # read from txt file directly
    grid_ids = [
        int(item) for item in io_function.read_list_from_txt(local_grid_id_txt)
    ]

    update_complete_grid_list(grid_ids, task_list)

    # check complete files, to see if it's done
    # remove grids that has been complete or ignored
    ignore_ids = get_complete_ignore_grid_ids()
    num_grid_ids = save_grid_ids_need_to_process(
        grid_ids, ignore_ids=ignore_ids, save_path=grid_ids_to_process_txt)
    if num_grid_ids < 1:
        print(datetime.now(), ' %s is marked as completed' % extent_shp)
        make_note_all_task_done(extent_shp, curc_node)
    else:
        print(
            datetime.now(),
            ' %s has not completed, %d grids to process, total: %d' %
            (extent_shp, num_grid_ids, len(grid_ids)))

    return True
Exemplo n.º 7
0
def organize_files(sub_img_dirs, save_dir):
    if os.path.isfile(save_dir) is False:
        io_function.mkdir(save_dir)

    # get all png files
    png_list = []
    for img_dir in sub_img_dirs:
        pngs = io_function.get_file_list_by_pattern(img_dir, '*.png')
        png_list.extend(pngs)

    image_name_list = []
    images_dir = os.path.join(save_dir, 'images')
    imageBound_dir = os.path.join(save_dir, 'imageBound')
    objectPolygons_dir = os.path.join(save_dir, 'objectPolygons')
    io_function.mkdir(images_dir)
    io_function.mkdir(imageBound_dir)
    io_function.mkdir(objectPolygons_dir)

    for idx, png in enumerate(png_list):
        basename = io_function.get_name_no_ext(png)
        new_name = 'img' + str(idx + 1).zfill(6) + '_' + basename
        image_name_list.append(new_name)

        io_function.copy_file_to_dst(
            png, os.path.join(images_dir, new_name + '.png'))
        png_xml = png + '.aux.xml'
        if os.path.isfile(png_xml):
            io_function.copy_file_to_dst(
                png_xml, os.path.join(images_dir, new_name + '.png.aux.xml'))

        bound_path = png.replace('.png', '_bound.geojson')
        io_function.copy_file_to_dst(
            bound_path,
            os.path.join(imageBound_dir, new_name + '_bound.geojson'))

        digit_str = re.findall('_\d+', basename)
        id_str = digit_str[0][1:]
        object_path = os.path.join(os.path.dirname(png),
                                   'id_%s.geojson' % id_str)
        io_function.copy_file_to_dst(
            object_path, os.path.join(objectPolygons_dir,
                                      new_name + '.geojson'))

    txt_path = os.path.join(save_dir, 'imageList.txt')
    io_function.save_list_to_txt(txt_path, image_name_list)
def polygonize_one_label(idx,
                         label_path,
                         org_raster,
                         stats,
                         prefix,
                         b_remove_nodata,
                         process_num=1):

    save_dir = os.path.dirname(label_path)
    out_pre = io_function.get_name_no_ext(label_path)
    label_shp_path = os.path.join(save_dir, out_pre + '.shp')
    if os.path.isfile(label_shp_path):
        print('%s exist, skip' % label_shp_path)
        return idx, label_shp_path

    if b_remove_nodata is True:
        # remove nodato (it was copy from the input image)
        command_str = 'gdal_edit.py -unsetnodata ' + label_path
        res = os.system(command_str)
        if res != 0:
            return None, None

    # convert the label to shapefile
    command_string = 'gdal_polygonize.py -8 %s -b 1 -f "ESRI Shapefile" %s' % (
        label_path, label_shp_path)
    res = os.system(command_string)
    if res != 0:
        return None, None

    if org_raster is not None and stats is not None and prefix is not None:
        # get dem elevation information for each polygon,
        raster_statistic.zonal_stats_multiRasters(label_shp_path,
                                                  org_raster,
                                                  stats=stats,
                                                  prefix=prefix,
                                                  process_num=process_num)

    return idx, label_shp_path
Exemplo n.º 9
0
def segment_a_patch(idx, patch, patch_count, img_path, org_raster,
                    b_save_patch_label):

    print('tile: %d / %d' % (idx + 1, patch_count))
    image_name_no_ext = io_function.get_name_no_ext(img_path)
    patch_dir = image_name_no_ext + '_patch%d' % idx
    patch_label_path = os.path.join(
        patch_dir, image_name_no_ext + '_patch%d_label.tif' % idx)
    if b_save_patch_label is True:
        if os.path.isdir(patch_dir) is False:
            io_function.mkdir(patch_dir)
        if os.path.isfile(patch_label_path):
            print('%s exists, skip' % patch_label_path)
            return patch, patch_label_path, None, None

    # read imag
    one_band_img, nodata = raster_io.read_raster_one_band_np(img_path,
                                                             boundary=patch)

    # # apply median filter to image (remove some noise)
    one_band_img = cv2.medianBlur(one_band_img,
                                  3)  # with kernal=3, cannot accept int32

    # segmentation algorithm (the output of these algorithms is not alway good, need to chose the parameters carafully)
    # out_labels = watershed_segmentation(one_band_img)
    # out_labels = k_mean_cluster_segmentation(one_band_img)
    out_labels = quickshift_segmentaion(one_band_img,
                                        ratio=0.3,
                                        kernel_size=5,
                                        max_dist=10,
                                        sigma=1,
                                        convert2lab=False)
    #
    #
    # out_labels = mean_shift_segmentation(one_band_img)

    # print('min and max labels of out_labels', np.min(out_labels), np.max(out_labels))

    if b_save_patch_label is True:
        # save the label
        raster_io.save_numpy_array_to_rasterfile(
            out_labels, patch_label_path, img_path,
            boundary=patch)  # it copy nodata, need to unset it later
        return patch, patch_label_path, None, None

    # calculate the attributes based on orginal data for original data
    object_attributes = {}  # object id (label) and attributes (list)
    if org_raster is not None:
        org_img_b1, org_nodata = raster_io.read_raster_one_band_np(
            org_raster, boundary=patch)

        # get regions (the labels output by segmentation is not unique for superpixels)
        # regions = measure.regionprops(out_labels, intensity_image=org_img_b1)     # regions is based on out_labels, so it has the same issue.
        # print('region count from sk-image measure:',len(regions))

        label_list = np.unique(out_labels)
        # get statistics for each segmented object (label)
        for label in label_list:
            in_array = org_img_b1[out_labels == label]
            object_attributes[label] = get_stastics_from_array(
                in_array, org_nodata)

        return patch, out_labels, nodata, object_attributes

    return patch, out_labels, nodata, None
Exemplo n.º 10
0
def get_subimages_SpaceNet(input_image_dir,
                           image_pattern,
                           input_polygon_dir,
                           polygon_pattern,
                           subImage_dir,
                           subLabel_dir,
                           process_num=1,
                           burn_value=1,
                           b_no_label_image=False):

    sub_images_list = io_function.get_file_list_by_pattern(
        input_image_dir, image_pattern)
    if len(sub_images_list) < 1:
        basic.outputlogMessage('No sub-images in: %s with pattern: %s' %
                               (input_image_dir, image_pattern))
        return False

    sub_images_count = len(sub_images_list)
    # do we need to check the projection of each sub-images?

    if os.path.isdir(subLabel_dir) is False:
        io_function.mkdir(subLabel_dir)
    if os.path.isdir(subImage_dir) is False:
        io_function.mkdir(subImage_dir)

    label_path_list = []
    if b_no_label_image is True:
        pass
    else:
        # polygon file list
        polygon_files_list = io_function.get_file_list_by_pattern(
            input_polygon_dir, polygon_pattern)
        if len(polygon_files_list) < 1:
            basic.outputlogMessage('No polygon files in: %s with pattern: %s' %
                                   (input_polygon_dir, polygon_pattern))
            return False

        polygon_name_list = [
            os.path.basename(item) for item in polygon_files_list
        ]

        # create label images
        for idx, tif_path in enumerate(sub_images_list):
            print('%d / %d create label raster for %s' %
                  (idx, sub_images_count, tif_path))
            # find polygon file
            poly_path = find_corresponding_geojson_SpaceNet(
                tif_path, polygon_files_list, polygon_name_list)
            if poly_path is None:
                print('Warning, cannot find corresponding polygon files')
                continue

            save_path = os.path.join(
                subLabel_dir,
                io_function.get_name_no_ext(poly_path) + '.tif')
            if os.path.isfile(save_path):
                print('warning, %s already exists, skip' % save_path)
                label_path_list.append(save_path)
                continue
            if rasterize_polygons_to_ref_raster(tif_path,
                                                poly_path,
                                                burn_value,
                                                None,
                                                save_path,
                                                datatype='Byte',
                                                ignore_edge=True) is True:
                label_path_list.append(save_path)

    # copy sub-images, adding to txt files
    with open('sub_images_labels_list.txt', 'a') as f_obj:
        for tif_path, label_file in zip(sub_images_list, label_path_list):
            if label_file is None:
                continue
            dst_subImg = os.path.join(subImage_dir, os.path.basename(tif_path))

            # copy sub-images
            io_function.copy_file_to_dst(tif_path, dst_subImg, overwrite=False)

            sub_image_label_str = dst_subImg + ":" + label_file + '\n'
            f_obj.writelines(sub_image_label_str)

    return True
Exemplo n.º 11
0
def remove_merge_polygon_in_one_shp(in_shp, org_raster, attribute_name, attribute_range, min_area, max_area, process_num=1):
    # attribute_range: [min, max],

    lower = attribute_range[0]
    upper = attribute_range[1]

    save_shp = io_function.get_name_by_adding_tail(in_shp, 'post')
    if os.path.isfile(save_shp):
        basic.outputlogMessage('%s exists, skip'%save_shp)
        return save_shp

    shp_pre = io_function.get_name_no_ext(in_shp)
    # read polygons and label from segment algorithm, note: some polygons may have the same label
    polygons, attr_value_list = vector_gpd.read_polygons_attributes_list(in_shp,attribute_name)
    print('Read %d polygons'%len(polygons))
    if attr_value_list is None:
        raise ValueError('%s not in %s, need to remove it and then re-create'%(attribute_name,in_shp))


    remain_polyons = []
    rm_min_area_count = 0
    rm_att_value_count = 0
    for poly, att_value in zip(polygons, attr_value_list):
        if poly.area < min_area:
            rm_min_area_count += 1
            continue
        if lower is None:
            if att_value >= upper:
                rm_att_value_count += 1
                continue
        elif upper is None:
            if att_value <= lower:
                rm_att_value_count += 1
                continue
        else:
            # out of range, rmeove
            if att_value < lower or att_value > upper:
                rm_att_value_count += 1
                continue
        remain_polyons.append(poly)

    print('remove %d polygons based on min_area, %d polygons based on attribute_range, remain %d ones'%(rm_min_area_count, rm_diff_thr_count,len(remain_polyons)))

    if len(remain_polyons) > 1:
        # we should only merge polygon with similar reduction, but we already remove polygons with mean reduction > threshhold
        # merge touch polygons
        print(timeTools.get_now_time_str(), 'start building adjacent_matrix')
        # adjacent_matrix = vector_features.build_adjacent_map_of_polygons(remain_polyons)
        machine_name = os.uname()[1]
        # if 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name:
        #     print('Warning, some problem of parallel running in build_adjacent_map_of_polygons on curc, but ok in my laptop and uist, change process_num = 1')
        #     process_num = 1
        adjacent_matrix = vector_gpd.build_adjacent_map_of_polygons(remain_polyons, process_num=process_num)
        print(timeTools.get_now_time_str(), 'finish building adjacent_matrix')

        if adjacent_matrix is False:
            return False
        merged_polygons = vector_features.merge_touched_polygons(remain_polyons,adjacent_matrix)
        print(timeTools.get_now_time_str(), 'finish merging touched polygons, get %d ones'%(len(merged_polygons)))

        # remove large ones
        remain_polyons = []
        rm_max_area_count = 0
        for poly in merged_polygons:
            if poly.area > max_area:
                rm_max_area_count += 1
                continue
            remain_polyons.append(poly)

        print('remove %d polygons based on max_area, remain %d'%(rm_max_area_count, len(remain_polyons)))

    wkt = map_projection.get_raster_or_vector_srs_info_wkt(in_shp)

    polyons_noMulti = [ vector_gpd.MultiPolygon_to_polygons(idx,poly) for idx,poly in enumerate(remain_polyons) ]
    remain_polyons = []
    for polys in polyons_noMulti:
        polys = [poly for poly in polys if poly.area > min_area]    # remove tiny polygon before buffer
        remain_polyons.extend(polys)
    print('convert MultiPolygon to polygons, remove some small polygons, remain %d' % (len(remain_polyons)))


    # based on the merged polygons, calculate the mean dem diff, relative dem_diff
    buffer_surrounding = 20  # meters
    surrounding_polygons = vector_gpd.get_surrounding_polygons(remain_polyons,buffer_surrounding)
    surrounding_shp = io_function.get_name_by_adding_tail(in_shp, 'surrounding')
    surr_pd = pd.DataFrame({'Polygon': surrounding_polygons})
    vector_gpd.save_polygons_to_files(surr_pd, 'Polygon', wkt, surrounding_shp)
    raster_statistic.zonal_stats_multiRasters(surrounding_shp, org_raster, stats=['mean', 'std', 'count'], prefix='demD',process_num=process_num)


    # calcualte attributes of remain ones: area, dem_diff: mean, std
    merged_pd = pd.DataFrame({'Polygon': remain_polyons})
    merged_shp = io_function.get_name_by_adding_tail(in_shp, 'merged')
    vector_gpd.save_polygons_to_files(merged_pd, 'Polygon', wkt, merged_shp)
    raster_statistic.zonal_stats_multiRasters(merged_shp, dem_diff_tif, stats=['mean','std','count'], prefix='demD', process_num=process_num)

    # calculate the relative dem diff
    surr_dem_diff_list = vector_gpd.read_attribute_values_list(surrounding_shp,'demD_mean')
    merge_poly_dem_diff_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_mean')
    if len(surr_dem_diff_list) != len(merge_poly_dem_diff_list):
        raise ValueError('The number of surr_dem_diff_list and merge_poly_dem_diff_list is different')
    relative_dem_diff_list = [  mer - sur for sur, mer in zip(surr_dem_diff_list, merge_poly_dem_diff_list) ]

    merge_poly_demD_std_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_std')
    merge_poly_demD_count_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_count')

    # remove large ones
    save_polyons = []
    save_demD_mean_list = []
    save_demD_std_list = []
    save_demD_count_list = []
    save_rel_diff_list = []
    save_surr_demD_list = []
    rm_rel_dem_diff_count = 0
    rm_min_area_count = 0
    for idx in range(len(remain_polyons)):
        # relative dem diff
        if relative_dem_diff_list[idx] > dem_diff_thread_m:  #
            rm_rel_dem_diff_count += 1
            continue

        # when convert MultiPolygon to Polygon, may create some small polygons
        if remain_polyons[idx].area < min_area:
            rm_min_area_count += 1
            continue


        save_polyons.append(remain_polyons[idx])
        save_demD_mean_list.append(merge_poly_dem_diff_list[idx])
        save_demD_std_list.append(merge_poly_demD_std_list[idx])
        save_demD_count_list.append(merge_poly_demD_count_list[idx])
        save_rel_diff_list.append(relative_dem_diff_list[idx])
        save_surr_demD_list.append(surr_dem_diff_list[idx])

    print('remove %d polygons based on relative rel_demD and %d based on min_area, remain %d' % (rm_rel_dem_diff_count, rm_min_area_count, len(save_polyons)))

    poly_ids = [ item+1  for item in range(len(save_polyons)) ]
    poly_areas = [poly.area for poly in save_polyons]

    save_pd = pd.DataFrame({'poly_id':poly_ids, 'poly_area':poly_areas,'demD_mean':save_demD_mean_list, 'demD_std':save_demD_std_list,
                             'demD_count':save_demD_count_list, 'surr_demD':save_surr_demD_list, 'rel_demD':save_rel_diff_list ,'Polygon': save_polyons})

    vector_gpd.save_polygons_to_files(save_pd, 'Polygon', wkt, save_shp)

    # add date difference if there are available
    date_diff_base = os.path.basename(dem_diff_tif).replace('DEM_diff','date_diff')
    date_diff_tif = os.path.join(os.path.dirname(dem_diff_tif) , date_diff_base)
    if os.path.isfile(date_diff_tif):
        raster_statistic.zonal_stats_multiRasters(save_shp, date_diff_tif, stats=['mean', 'std'], prefix='dateD',
                                              process_num=process_num)

    return save_shp
def main(options, args):
    extent_shp = args[0]
    task_list = [args[item] for item in range(1, len(args))]
    # task_name = args[1]
    if len(task_list) < 1:
        raise ValueError('There is no task: %s' % str(task_list))

    # local_grid_id_txt is in the current dir
    # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir
    local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files(
        extent_shp)
    # check if it has been complete
    if os.path.isfile(log_grid_ids_txt_done):
        basic.outputlogMessage('Tasks for extent %s have been completed' %
                               extent_shp)
        return True

    r_working_dir = '/scratch/summit/lihu9680/Arctic/dem_processing' if options.remote_working_dir is None else options.remote_working_dir
    r_log_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/log_dir' if options.remote_log_dir is None else options.remote_log_dir
    process_node = '$curc_host' if options.process_node is None else options.process_node
    download_node = '$curc_host' if options.download_node is None else options.download_node

    max_grid_count = options.max_grids
    b_remove_tmp_folders = options.b_remove_tmp_folders
    b_dont_remove_DEM_files = options.b_dont_remove_DEM_files
    b_no_slurm = options.b_no_slurm
    b_divide_to_subsets = True

    # modify the folder name of subsets
    global subset_shp_dir
    subset_shp_dir = subset_shp_dir + '_' + io_function.get_name_no_ext(
        extent_shp)
    global msg_file_pre
    msg_file_pre = io_function.get_name_no_ext(extent_shp) + '_' + msg_file_pre

    grid_ids_to_process_txt = io_function.get_name_no_ext(
        extent_shp) + '_' + 'grid_ids_to_process.txt'

    # build map dem cover grid (take time, but only need to run once at the beginning)
    build_dict_of_dem_cover_grid_ids(dem_strip_shp, grid_20_shp,
                                     strip_dem_cover_grids_txt)
    build_dict_of_dem_cover_grid_ids(dem_tile_shp, grid_20_shp,
                                     tile_dem_cover_grids_txt)

    # get grids for processing
    # read grids and ids
    time0 = time.time()
    all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
        grid_20_shp, 'id')
    print('time cost of read polygons and attributes', time.time() - time0)

    gird_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp)

    # get grid ids based on input extent
    grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids)

    # based on extent shape, subset grid_20_id_raster
    # # using gdalwarp to crop the mask, also have 0.5 pixel offset, so dont use it
    # grid_20_id_raster_sub = io_function.get_name_by_adding_tail(os.path.basename(grid_20_id_raster),'sub')
    # if RSImageProcess.subset_image_by_shapefile(grid_20_id_raster,extent_shp,save_path=grid_20_id_raster_sub) is False:
    #     return False

    # read grid_ids_2d, then mask it
    grid_ids_2d, grid_nodata = raster_io.read_raster_one_band_np(
        grid_20_id_raster)  # 2d array of gird ids
    # rasterize grid_polys, will served as mask.
    grid_ids_2d_mask = raster_io.burn_polygons_to_a_raster(
        grid_20_id_raster, grid_polys, 1, None)
    # raster_io.save_numpy_array_to_rasterfile(grid_ids_2d_mask,'grid_ids_2d_mask.tif',grid_20_id_raster,nodata=255)  # save to disk for checking
    loc_masked_out = np.where(grid_ids_2d_mask != 1)
    # grid_ids_2d[ loc_masked_out ] = grid_nodata
    visit_np = np.zeros_like(grid_ids_2d, dtype=np.uint8)
    visit_np[loc_masked_out] = 1  # 1 indicate already visited
    visit_np[np.where(
        grid_ids_2d == grid_nodata)] = 1  # 1 indicate already visited

    subset_id = -1
    # on tesia, uist, vpn-connected laptop
    if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:
        io_function.mkdir(subset_shp_dir)
        sync_log_files(process_node, r_log_dir, process_log_dir)
        update_complete_grid_list(grid_ids, task_list)

    while True:
        subset_id += 1
        # on tesia, uist, vpn-connected laptop
        if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:

            # remove grids that has been complete or ignored
            ignore_ids = get_complete_ignore_grid_ids()
            num_grid_ids = save_grid_ids_need_to_process(
                grid_ids,
                ignore_ids=ignore_ids,
                save_path=grid_ids_to_process_txt)
            if num_grid_ids < 1:
                make_note_all_task_done(extent_shp, process_node)

            # if the input is not a shapefile, then don't divide it to many subsets
            if extent_shp.endswith('.txt'):
                select_grid_polys, selected_gird_ids = grid_polys, grid_ids
                if len(selected_gird_ids) > 2000:
                    raise ValueError('There are too many grid to process once')
                b_divide_to_subsets = False
                subset_id = 999999
                select_grids_shp = os.path.join(
                    subset_shp_dir,
                    io_function.get_name_no_ext(extent_shp) +
                    '_sub%d' % subset_id + '.shp')
                save_selected_girds_and_ids(selected_gird_ids,
                                            select_grid_polys, gird_prj,
                                            select_grids_shp)

            else:
                select_grids_shp = os.path.join(
                    subset_shp_dir,
                    io_function.get_name_no_ext(extent_shp) +
                    '_sub%d' % subset_id + '.shp')
                select_grid_polys, selected_gird_ids = get_grids_for_download_process(
                    grid_polys,
                    grid_ids,
                    ignore_ids,
                    max_grid_count,
                    grid_ids_2d,
                    visit_np,
                    select_grids_shp,
                    proj=gird_prj)
            if selected_gird_ids is None:
                break  # no more grids
            if len(selected_gird_ids) < 1:
                continue

            subset_info_txt = msg_file_pre + '%d.txt' % subset_id
            if os.path.isfile(subset_info_txt) is False:
                # init the file
                update_subset_info(subset_info_txt,
                                   key_list=[
                                       'id', 'createTime', 'shp', 'pre_status',
                                       'proc_status'
                                   ],
                                   info_list=[
                                       subset_id,
                                       str(datetime.now()), select_grids_shp,
                                       'notYet', 'notYet'
                                   ])

            # download and unpack ArcticDEM, do registration, send to curc
            if download_process_send_arctic_dem(subset_info_txt,
                                                r_working_dir,
                                                process_node,
                                                task_list,
                                                b_send_data=b_no_slurm
                                                == False) is True:
                continue

            # copy file from remote machine
            if b_no_slurm is False:
                copy_results_from_remote_node()

                sync_log_files(process_node, r_log_dir, process_log_dir)

                # update complete id list
                update_complete_grid_list(grid_ids, task_list)

            # save this to disk, to check progress, if there are not too many grids (<100),
            # we can use this one to process withtou divide grids to many subsets
            num_grid_ids = save_grid_ids_need_to_process(
                grid_ids, save_path=grid_ids_to_process_txt)
            if num_grid_ids < 1:
                make_note_all_task_done(extent_shp, process_node)

            if b_no_slurm:
                # process ArcticDEM using local computing resource
                if produce_dem_products(
                        task_list,
                        b_remove_job_folder=b_remove_tmp_folders,
                        no_slurm=b_no_slurm) is False:
                    break

            if b_divide_to_subsets is False:
                break

        elif 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name:  # curc
            # process ArcticDEM using the computing resource on CURC
            if produce_dem_products(
                    task_list,
                    b_remove_job_folder=b_remove_tmp_folders) is False:
                break
        else:
            print('unknown machine : %s ' % machine_name)
            break

        # remove no need dem files
        remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files)

    # monitor results in remote computer
    check_time = 200
    while check_time > 0 and b_no_slurm == False:
        # on tesia, uist, vpn-connected laptop
        if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:
            print(datetime.now(), 'wait 10 min for results in computing nodes')
            time.sleep(600)
            # copy file from remote machine
            copy_results_from_remote_node()
            # sync complete id list, dem info, no dem grids etcs.
            sync_log_files(process_node, r_log_dir, process_log_dir)
            # update complete id list
            update_complete_grid_list(grid_ids, task_list)
            # remove no need dem files
            remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files)
            remote_sub_txt = get_subset_info_txt_list(
                'proc_status', ['notYet', 'working'],
                remote_node=process_node,
                remote_folder=r_working_dir)
            if len(remote_sub_txt) < 1 and check_time != 1:
                check_time = 1  # set to 1, then will only check one more time
            else:
                check_time -= 1
        else:
            break
Exemplo n.º 13
0
def merge_subImages_from_gan(multi_gan_source_regions, multi_gan_regions,
                             gan_working_dir, gan_dir_pre_name, save_image_dir,
                             save_label_dir):
    '''
    merge translate subimages from GAN to orginal sub_images
    :param multi_gan_regions:
    :param gan_working_dir:
    :param gan_dir_pre_name:
    :return:
    '''

    current_dir = os.getcwd()
    sub_img_label_txt_noGAN, sub_img_label_txt, area_ini_sub_images_labels_dict = original_sub_images_labels_list_before_gan(
    )

    # # get original sub-images and labels
    # org_sub_images = []
    # org_sub_labels = []
    # with open(sub_img_label_txt_noGAN) as txt_obj:
    #     line_list = [name.strip() for name in txt_obj.readlines()]
    #     for line in line_list:
    #         sub_image, sub_label = line.split(':')
    #         org_sub_images.append(os.path.join(current_dir,sub_image))
    #         org_sub_labels.append(os.path.join(current_dir,sub_label))
    #
    # # merge new sub images, and copy sub labels if necessary.
    new_sub_images = []
    new_sub_labels = []

    area_ini_sub_images_labels = io_function.read_dict_from_txt_json(
        area_ini_sub_images_labels_dict)
    # copy the original sub images and labels before GAN
    for key in area_ini_sub_images_labels.keys():
        for line in area_ini_sub_images_labels[key]:
            sub_image, sub_label = line.split(':')
            new_sub_images.append(sub_image)
            new_sub_labels.append(sub_label)

    for area_idx, (area_ini, area_src_ini) in enumerate(
            zip(multi_gan_regions, multi_gan_source_regions)):
        area_name = parameters.get_string_parameters(area_ini, 'area_name')
        area_remark = parameters.get_string_parameters(area_ini, 'area_remark')
        area_time = parameters.get_string_parameters(area_ini, 'area_time')

        gan_project_save_dir = get_gan_project_save_dir(
            gan_working_dir, gan_dir_pre_name, area_name, area_remark,
            area_time, area_src_ini)

        org_sub_images = []
        org_sub_labels = []
        for line in area_ini_sub_images_labels[os.path.basename(area_src_ini)]:
            sub_image, sub_label = line.split(':')
            org_sub_images.append(os.path.join(current_dir, sub_image))
            org_sub_labels.append(os.path.join(current_dir, sub_label))

        # the new images, keep the same order of original images
        for idx, (org_img,
                  org_label) in enumerate(zip(org_sub_images, org_sub_labels)):
            new_img = os.path.join(gan_project_save_dir, 'subImages_translate',
                                   'I%d.tif' % idx)

            if os.path.isfile(new_img) is False:
                basic.outputlogMessage(
                    'warning, %d th image does not exist, '
                    'may exceed gen_max_dataset_size, skip the following images '
                    % idx)
                break

            # check height, width, band count, datatype
            height, width, count, dtype = raster_io.get_height_width_bandnum_dtype(
                new_img)
            o_height, o_width, o_count, o_dtype = raster_io.get_height_width_bandnum_dtype(
                org_img)
            if height != o_height or width != o_width or count != o_count or dtype != o_dtype:
                raise ValueError(
                    'inconsistence between new GAN image and original images: %s vs %s'
                    % (str([height, width, count, dtype
                            ]), str([o_height, o_width, o_count, o_dtype])))

            # copy subimage and sublabel
            new_file_name_no_ext = io_function.get_name_no_ext(
                org_img) + '_' + os.path.basename(gan_project_save_dir)
            save_img_path = os.path.join(save_image_dir,
                                         new_file_name_no_ext + '_gan.tif')
            save_label_path = os.path.join(save_label_dir,
                                           new_file_name_no_ext + '_label.tif')
            io_function.copy_file_to_dst(new_img,
                                         save_img_path,
                                         overwrite=False)
            io_function.copy_file_to_dst(org_label,
                                         save_label_path,
                                         overwrite=False)

            new_sub_images.append(save_img_path)
            new_sub_labels.append(save_label_path)

    # save new images_labels_list.txt, overwrite the original one
    with open(sub_img_label_txt, 'w') as f_obj:
        lines = [
            img + ':' + label + '\n'
            for img, label in zip(new_sub_images, new_sub_labels)
        ]
        f_obj.writelines(lines)

    return True