def main():
    # save
    ArcticDEM_grid_20km = os.path.join(shp_dir, 'ArcticDEM_grid_20km.shp')

    # ref_raster='grid_20km_bin.tif'
    polygons, ids = vector_gpd.read_polygons_attributes_list(
        ArcticDEM_grid_20km, 'id', b_fix_invalid_polygon=False)
    save_raster = os.path.join(shp_dir, 'ArcticDEM_grid_20km_id.tif')
    # raster_io.burn_polygons_to_a_raster(ref_raster,polygons,ids,save_raster,date_type='uint16')

    # if no reference raster
    extent = vector_gpd.get_vector_file_bounding_box(ArcticDEM_grid_20km)
    # print(extent)
    res = 20000  # 20 km
    wkt_string = map_projection.get_raster_or_vector_srs_info_proj4(
        ArcticDEM_grid_20km)
    nodata = 2**16 - 1
    raster_io.burn_polygons_to_a_raster(None,
                                        polygons,
                                        ids,
                                        save_raster,
                                        date_type='uint16',
                                        xres=res,
                                        yres=res,
                                        extent=extent,
                                        ref_prj=wkt_string,
                                        nodata=nodata)

    pass
예제 #2
0
def main(options, args):
    extent_shp_or_ids_txt = args[0]
    process_num = options.process_num
    o_res = options.out_res

    if os.path.isdir(grid_matchtag_sum_dir) is False:
        io_function.mkdir(grid_matchtag_sum_dir)

    basic.setlogfile('produce_matchtag_sum_ArcticDEM_log_%s.txt' %
                     timeTools.get_now_time_str())

    # read grids and ids
    time0 = time.time()
    all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
        grid_20_shp, 'id')
    print('time cost of read polygons and attributes', time.time() - time0)

    # get grid ids based on input extent
    grid_base_name = os.path.splitext(
        os.path.basename(extent_shp_or_ids_txt))[0]
    grid_polys, grid_ids = get_grid_20(extent_shp_or_ids_txt, all_grid_polys,
                                       all_ids)

    # check dem difference existence
    grid_dem_tifs, grid_ids_no_sum = get_existing_matchtag_sum(
        grid_matchtag_sum_dir, grid_base_name, grid_ids)
    if len(grid_ids_no_sum) > 0:
        # refine grid_polys
        if len(grid_ids) > len(grid_ids_no_sum):
            id_index = [grid_ids.index(id) for id in grid_ids_no_sum]
            grid_polys = [grid_polys[idx] for idx in id_index]

        # # download ArcticDEM and applying registration
        # tarballs, reg_tifs = download_dem_tarball(dem_strip_shp, grid_polys, arcticDEM_tarball_dir, grid_base_name,
        #                                         reg_tif_dir=arcticDEM_reg_tif_dir, poly_ids=grid_ids_no_demDiff)
        #
        # # unpack and applying registration
        # if len(tarballs) > 0:
        #     basic.outputlogMessage('Processs %d dem tarballs'%len(tarballs))
        #     out_reg_tifs = process_dem_tarball(tarballs,'./',arcticDEM_reg_tif_dir,remove_inter_data=True, apply_registration=True)
        #     basic.outputlogMessage('Get %d new registration dem tifs' % len(out_reg_tifs))
        #     reg_tifs.extend(out_reg_tifs)

        reg_tifs = io_function.get_file_list_by_ext('.tif',
                                                    arcticDEM_reg_tif_dir,
                                                    bsub_folder=False)
        matchtag_tifs = [tif for tif in reg_tifs
                         if 'matchtag' in tif]  # only keep matchtag
        # crop, sum
        out_dem_diffs = produce_matchtag_sum_grids(grid_polys,
                                                   grid_ids_no_sum,
                                                   grid_base_name,
                                                   matchtag_tifs,
                                                   o_res,
                                                   process_num=process_num)
def get_grid_ids_extent(extent_shp):
    if 'ArcticDEM_grid_20km' in os.path.basename(extent_shp):
        print(
            'input %s like a grid files, read grid polygons and ids from it directly'
            % extent_shp)
        grid_polys, grid_ids = vector_gpd.read_polygons_attributes_list(
            extent_shp, 'grid_id')
        file_name_base = os.path.splitext(os.path.basename(extent_shp))[0]
        shp_corresponding_grid_ids_txt = file_name_base + '_grid_ids.txt'
        io_function.save_list_to_txt(shp_corresponding_grid_ids_txt,
                                     [str(item) for item in grid_ids])
    else:
        # read grids and ids
        time0 = time.time()
        all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
            grid_20_shp, 'id')  # in this file, it's "id", not "grid_id"
        print('time cost of read polygons and attributes', time.time() - time0)
        grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids)

    return grid_ids
def main(options, args):
    extent_shp_or_ids_txt = args[0]
    process_num = options.process_num
    keep_dem_percent = options.keep_dem_percent
    o_res = options.out_res

    basic.setlogfile('produce_headwall_shp_ArcticDEM_log_%s.txt' %
                     timeTools.get_now_time_str())

    if os.path.isdir(grid_dem_headwall_shp_dir) is False:
        io_function.mkdir(grid_dem_headwall_shp_dir)

        # read grids and ids
    time0 = time.time()
    all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
        grid_20_shp, 'id')
    print('time cost of read polygons and attributes', time.time() - time0)

    # get grid ids based on input extent
    grid_base_name = os.path.splitext(
        os.path.basename(extent_shp_or_ids_txt))[0]
    grid_polys, grid_ids = get_grid_20(extent_shp_or_ids_txt, all_grid_polys,
                                       all_ids)

    # check dem difference existence
    grid_headwall_shps, grid_id_no_headwall_shp = get_existing_grid_headwall_shp(
        grid_dem_headwall_shp_dir, grid_base_name, grid_ids)
    if len(grid_id_no_headwall_shp) > 0:
        # refine grid_polys
        if len(grid_ids) > len(grid_id_no_headwall_shp):
            id_index = [grid_ids.index(id) for id in grid_id_no_headwall_shp]
            grid_polys = [grid_polys[idx] for idx in id_index]

        reg_tifs = io_function.get_file_list_by_ext('.tif',
                                                    arcticDEM_reg_tif_dir,
                                                    bsub_folder=False)
        reg_tifs = [tif for tif in reg_tifs
                    if 'matchtag' not in tif]  # remove matchtag
        #
        headwall_shp_folders = extract_headwall_grids(grid_polys,
                                                      grid_id_no_headwall_shp,
                                                      grid_base_name,
                                                      reg_tifs,
                                                      b_mosaic_id,
                                                      b_mosaic_date,
                                                      keep_dem_percent,
                                                      o_res,
                                                      process_num=process_num)
예제 #5
0
def rasterize_polygons(poly_path,
                       burn_value,
                       attribute_name,
                       xres,
                       yres,
                       save_path,
                       datatype='Byte'):
    '''

    :param poly_path:
    :param burn_value:
    :param attribute_name:
    :param xres:
    :param yres:
    :param save_path:
    :param datatype:
    :return:
    '''
    import raster_io
    import vector_gpd
    import basic_src.map_projection as map_projection
    if attribute_name is not None:
        polygons, values = vector_gpd.read_polygons_attributes_list(
            poly_path, attribute_name, b_fix_invalid_polygon=False)
        burn_value = values
    else:
        polygons = vector_gpd.read_polygons_gpd(poly_path,
                                                b_fix_invalid_polygon=False)

    if datatype == 'Byte':
        dtype = 'uint8'
    elif datatype == 'UInt16':
        dtype = 'uint16'
    else:
        dtype = 'int32'

    extent = vector_gpd.get_vector_file_bounding_box(poly_path)
    wkt_string = map_projection.get_raster_or_vector_srs_info_proj4(poly_path)

    return raster_io.burn_polygons_to_a_raster(None,
                                               polygons,
                                               burn_value,
                                               save_path,
                                               dtype,
                                               xres=xres,
                                               yres=yres,
                                               extent=extent,
                                               ref_prj=wkt_string)
def merge_multi_headwall_shp_to_one(shp_list, save_path):
    '''
    merge multiple shapefile of headwall on different dates to one file
    :param shp_dir:
    :param save_path:
    :return:
    '''
    # shp_list = io_function.get_file_list_by_ext('.shp',shp_dir,bsub_folder=False)
    if len(shp_list) < 1:
        print('Warning, no input shapefile, skip merging multiple shapefiles')
        return False

    if os.path.isfile(save_path):
        print('warning, %s already exists, skip' % save_path)
        return True

    # merge shapefile, one by one, and add the year and date from filename
    line_list = []
    id_list = []
    year_list = []
    date_list = []
    length_m_list = []  # length in meters
    for shp in shp_list:
        # these are line vector, we still can use the following function to read them
        lines, lengths = vector_gpd.read_polygons_attributes_list(
            shp, 'length_m')
        curr_count = len(id_list)
        acuiqsition_date = timeTools.get_yeardate_yyyymmdd(
            os.path.basename(shp))
        year = acuiqsition_date.year
        for idx, (line, length) in enumerate(zip(lines, lengths)):
            id_list.append(idx + curr_count)
            line_list.append(line)
            length_m_list.append(length)
            year_list.append(year)
            date_list.append(timeTools.date2str(acuiqsition_date))

    save_pd = pd.DataFrame({
        'id': id_list,
        'length_m': length_m_list,
        'dem_year': year_list,
        'dem_date': date_list,
        'Line': line_list
    })
    ref_prj = map_projection.get_raster_or_vector_srs_info_proj4(shp_list[0])
    return vector_gpd.save_polygons_to_files(save_pd, 'Line', ref_prj,
                                             save_path)
예제 #7
0
def get_surrounding_polygons(remain_polyons,surrounding_shp,wkt, dem_diff_tif,buffer_surrounding,process_num):
    if os.path.isfile(surrounding_shp):
        # also check the file is complete
        surr_polys, surr_demD = vector_gpd.read_polygons_attributes_list(surrounding_shp,'demD_mean')
        if len(surr_polys) < len(remain_polyons) or surr_demD is None or len(surr_demD) < len(remain_polyons):
            basic.outputlogMessage('%s already exists, but not complete, will be overwritten'%surrounding_shp)
        else:
            basic.outputlogMessage('%s already exists, skip'%surrounding_shp)
            return surrounding_shp

    # based on the merged polygons, calculate the relative dem_diff
    surrounding_polygons = vector_gpd.get_surrounding_polygons(remain_polyons, buffer_surrounding)
    surr_pd = pd.DataFrame({'Polygon': surrounding_polygons})
    vector_gpd.save_polygons_to_files(surr_pd, 'Polygon', wkt, surrounding_shp)
    raster_statistic.zonal_stats_multiRasters(surrounding_shp, dem_diff_tif, tile_min_overlap=tile_min_overlap,
                                              stats=['mean', 'std', 'count'],prefix='demD', process_num=process_num)
    return surrounding_shp
예제 #8
0
def produce_corresponding_grid_ids_txt(extent_shp, local_grid_id_txt,
                                       log_grid_ids_txt):

    # if it in the logdir, not the current dir, then copy it
    if os.path.isfile(
            log_grid_ids_txt) and os.path.isfile(local_grid_id_txt) is False:
        io_function.copy_file_to_dst(log_grid_ids_txt,
                                     local_grid_id_txt,
                                     overwrite=False)
        return True

    # if not in the local dir, then generate it
    if os.path.isfile(local_grid_id_txt) is False:
        # read grids and ids
        time0 = time.time()
        all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
            grid_20_shp, 'id')
        print('time cost of read polygons and attributes', time.time() - time0)

        # this will create local_grid_id_txt
        grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids)

        # modify local_grid_id_txt by excluding grid_id already in adjacent extent
        other_grid_ids = read_grid_ids_from_other_extent()
        grid_ids = [id for id in grid_ids if id not in other_grid_ids]

        # over write local_grid_id_txt file
        grid_ids_str = [str(item) for item in grid_ids]
        io_function.copy_file_to_dst(local_grid_id_txt,
                                     io_function.get_name_by_adding_tail(
                                         local_grid_id_txt,
                                         'noRMadj'))  # save a copy
        io_function.save_list_to_txt(local_grid_id_txt, grid_ids_str)

        # copy to log dir
        io_function.copy_file_to_dst(local_grid_id_txt, log_grid_ids_txt)

    return True
예제 #9
0
def remove_merge_polygon_in_one_shp(in_shp, org_raster, attribute_name, attribute_range, min_area, max_area, process_num=1):
    # attribute_range: [min, max],

    lower = attribute_range[0]
    upper = attribute_range[1]

    save_shp = io_function.get_name_by_adding_tail(in_shp, 'post')
    if os.path.isfile(save_shp):
        basic.outputlogMessage('%s exists, skip'%save_shp)
        return save_shp

    shp_pre = io_function.get_name_no_ext(in_shp)
    # read polygons and label from segment algorithm, note: some polygons may have the same label
    polygons, attr_value_list = vector_gpd.read_polygons_attributes_list(in_shp,attribute_name)
    print('Read %d polygons'%len(polygons))
    if attr_value_list is None:
        raise ValueError('%s not in %s, need to remove it and then re-create'%(attribute_name,in_shp))


    remain_polyons = []
    rm_min_area_count = 0
    rm_att_value_count = 0
    for poly, att_value in zip(polygons, attr_value_list):
        if poly.area < min_area:
            rm_min_area_count += 1
            continue
        if lower is None:
            if att_value >= upper:
                rm_att_value_count += 1
                continue
        elif upper is None:
            if att_value <= lower:
                rm_att_value_count += 1
                continue
        else:
            # out of range, rmeove
            if att_value < lower or att_value > upper:
                rm_att_value_count += 1
                continue
        remain_polyons.append(poly)

    print('remove %d polygons based on min_area, %d polygons based on attribute_range, remain %d ones'%(rm_min_area_count, rm_diff_thr_count,len(remain_polyons)))

    if len(remain_polyons) > 1:
        # we should only merge polygon with similar reduction, but we already remove polygons with mean reduction > threshhold
        # merge touch polygons
        print(timeTools.get_now_time_str(), 'start building adjacent_matrix')
        # adjacent_matrix = vector_features.build_adjacent_map_of_polygons(remain_polyons)
        machine_name = os.uname()[1]
        # if 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name:
        #     print('Warning, some problem of parallel running in build_adjacent_map_of_polygons on curc, but ok in my laptop and uist, change process_num = 1')
        #     process_num = 1
        adjacent_matrix = vector_gpd.build_adjacent_map_of_polygons(remain_polyons, process_num=process_num)
        print(timeTools.get_now_time_str(), 'finish building adjacent_matrix')

        if adjacent_matrix is False:
            return False
        merged_polygons = vector_features.merge_touched_polygons(remain_polyons,adjacent_matrix)
        print(timeTools.get_now_time_str(), 'finish merging touched polygons, get %d ones'%(len(merged_polygons)))

        # remove large ones
        remain_polyons = []
        rm_max_area_count = 0
        for poly in merged_polygons:
            if poly.area > max_area:
                rm_max_area_count += 1
                continue
            remain_polyons.append(poly)

        print('remove %d polygons based on max_area, remain %d'%(rm_max_area_count, len(remain_polyons)))

    wkt = map_projection.get_raster_or_vector_srs_info_wkt(in_shp)

    polyons_noMulti = [ vector_gpd.MultiPolygon_to_polygons(idx,poly) for idx,poly in enumerate(remain_polyons) ]
    remain_polyons = []
    for polys in polyons_noMulti:
        polys = [poly for poly in polys if poly.area > min_area]    # remove tiny polygon before buffer
        remain_polyons.extend(polys)
    print('convert MultiPolygon to polygons, remove some small polygons, remain %d' % (len(remain_polyons)))


    # based on the merged polygons, calculate the mean dem diff, relative dem_diff
    buffer_surrounding = 20  # meters
    surrounding_polygons = vector_gpd.get_surrounding_polygons(remain_polyons,buffer_surrounding)
    surrounding_shp = io_function.get_name_by_adding_tail(in_shp, 'surrounding')
    surr_pd = pd.DataFrame({'Polygon': surrounding_polygons})
    vector_gpd.save_polygons_to_files(surr_pd, 'Polygon', wkt, surrounding_shp)
    raster_statistic.zonal_stats_multiRasters(surrounding_shp, org_raster, stats=['mean', 'std', 'count'], prefix='demD',process_num=process_num)


    # calcualte attributes of remain ones: area, dem_diff: mean, std
    merged_pd = pd.DataFrame({'Polygon': remain_polyons})
    merged_shp = io_function.get_name_by_adding_tail(in_shp, 'merged')
    vector_gpd.save_polygons_to_files(merged_pd, 'Polygon', wkt, merged_shp)
    raster_statistic.zonal_stats_multiRasters(merged_shp, dem_diff_tif, stats=['mean','std','count'], prefix='demD', process_num=process_num)

    # calculate the relative dem diff
    surr_dem_diff_list = vector_gpd.read_attribute_values_list(surrounding_shp,'demD_mean')
    merge_poly_dem_diff_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_mean')
    if len(surr_dem_diff_list) != len(merge_poly_dem_diff_list):
        raise ValueError('The number of surr_dem_diff_list and merge_poly_dem_diff_list is different')
    relative_dem_diff_list = [  mer - sur for sur, mer in zip(surr_dem_diff_list, merge_poly_dem_diff_list) ]

    merge_poly_demD_std_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_std')
    merge_poly_demD_count_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_count')

    # remove large ones
    save_polyons = []
    save_demD_mean_list = []
    save_demD_std_list = []
    save_demD_count_list = []
    save_rel_diff_list = []
    save_surr_demD_list = []
    rm_rel_dem_diff_count = 0
    rm_min_area_count = 0
    for idx in range(len(remain_polyons)):
        # relative dem diff
        if relative_dem_diff_list[idx] > dem_diff_thread_m:  #
            rm_rel_dem_diff_count += 1
            continue

        # when convert MultiPolygon to Polygon, may create some small polygons
        if remain_polyons[idx].area < min_area:
            rm_min_area_count += 1
            continue


        save_polyons.append(remain_polyons[idx])
        save_demD_mean_list.append(merge_poly_dem_diff_list[idx])
        save_demD_std_list.append(merge_poly_demD_std_list[idx])
        save_demD_count_list.append(merge_poly_demD_count_list[idx])
        save_rel_diff_list.append(relative_dem_diff_list[idx])
        save_surr_demD_list.append(surr_dem_diff_list[idx])

    print('remove %d polygons based on relative rel_demD and %d based on min_area, remain %d' % (rm_rel_dem_diff_count, rm_min_area_count, len(save_polyons)))

    poly_ids = [ item+1  for item in range(len(save_polyons)) ]
    poly_areas = [poly.area for poly in save_polyons]

    save_pd = pd.DataFrame({'poly_id':poly_ids, 'poly_area':poly_areas,'demD_mean':save_demD_mean_list, 'demD_std':save_demD_std_list,
                             'demD_count':save_demD_count_list, 'surr_demD':save_surr_demD_list, 'rel_demD':save_rel_diff_list ,'Polygon': save_polyons})

    vector_gpd.save_polygons_to_files(save_pd, 'Polygon', wkt, save_shp)

    # add date difference if there are available
    date_diff_base = os.path.basename(dem_diff_tif).replace('DEM_diff','date_diff')
    date_diff_tif = os.path.join(os.path.dirname(dem_diff_tif) , date_diff_base)
    if os.path.isfile(date_diff_tif):
        raster_statistic.zonal_stats_multiRasters(save_shp, date_diff_tif, stats=['mean', 'std'], prefix='dateD',
                                              process_num=process_num)

    return save_shp
예제 #10
0
def download_dem_tarball(dem_index_shp,
                         extent_polys,
                         save_folder,
                         pre_name,
                         reg_tif_dir=None,
                         poly_ids=None,
                         b_arcticDEM_tile=False):
    # read dem polygons and url
    dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list(
        dem_index_shp, 'fileurl', b_fix_invalid_polygon=False)

    basic.outputlogMessage('%d dem polygons in %s' %
                           (len(dem_polygons), dem_index_shp))

    dem_tar_ball_list = []
    reg_tifs_list = []
    curr_dir = os.getcwd()
    b_save_grid_id_noDEM = True
    if poly_ids is None:
        poly_ids = [idx for idx in range(len(extent_polys))]
        b_save_grid_id_noDEM = False  # if poly_ids is not the global unique id, then don't save it.

    if os.path.isfile('no_registration_strips.txt'):
        no_registration_strips = io_function.read_list_from_txt(
            'no_registration_strips.txt')
    else:
        no_registration_strips = []

    # tarballs is being downloaded
    downloading_tarballs = []

    for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)):
        basic.outputlogMessage('get data for the %d th extent (%d/%d)' %
                               (idx, count, len(extent_polys)))

        save_txt_path = pre_name + '_dem_urls_poly_%d.txt' % idx
        if os.path.isfile(save_txt_path):
            urls = io_function.read_list_from_txt(save_txt_path)
            basic.outputlogMessage('read %d dem urls from %s' %
                                   (len(urls), save_txt_path))
        else:
            # get fileurl
            dem_poly_ids = vector_gpd.get_poly_index_within_extent(
                dem_polygons, ext_poly)
            basic.outputlogMessage('find %d DEM within %d th extent' %
                                   (len(dem_poly_ids), (idx)))
            urls = [dem_urls[id] for id in dem_poly_ids]

            # save to txt
            io_function.save_list_to_txt(save_txt_path, urls)
            basic.outputlogMessage('save dem urls to %s' % save_txt_path)

        if len(urls) > 0:

            # total_size_GB = get_total_size(urls)  # internet access, parallel running may cause problem. The info is not important
            # basic.outputlogMessage('the size of files will be downloaded is %.4lf GB for the %d th extent '%(total_size_GB,(idx+1)))
            # time.sleep(5)   # wait 5 seconds

            # download them using wget one by one
            for ii, url in enumerate(urls):
                tmp = urlparse(url)

                # in the Strip DEM, there are around 700 url are point to tif files, failed to download them
                # e.g. /mnt/pgc/data/elev/dem/setsm/ArcticDEM/geocell/v3.0/2m_temp/n59w137/SETSM_WV03_20150518_104001000B703200_104001000C715B00_seg8_2m_v3.0_dem.tif
                if url.startswith('/mnt') and url.endswith('.tif'):
                    basic.outputlogMessage("error: not a valid url: %s" % url)
                    continue

                filename = os.path.basename(tmp.path)
                save_dem_path = os.path.join(save_folder, filename)
                if reg_tif_dir is not None:
                    tar_base = os.path.basename(filename)[:-7]
                    # file_pattern = ['*dem_reg.tif', '*reg_dem.tif'] # Arctic strip and tile (mosaic) version
                    if b_arcticDEM_tile:
                        reg_tifs = io_function.get_file_list_by_pattern(
                            reg_tif_dir, tar_base + '*reg_dem.tif')
                    else:
                        reg_tifs = io_function.get_file_list_by_pattern(
                            reg_tif_dir, tar_base + '*dem_reg.tif')
                    if len(reg_tifs) > 0:
                        basic.outputlogMessage(
                            'warning, unpack and registrated tif for %s already exists, skip downloading'
                            % filename)
                        reg_tifs_list.append(reg_tifs[0])
                        continue

                    if './' + tar_base in no_registration_strips:
                        basic.outputlogMessage(
                            'warning, %s is in no_registration_strips list, skip downloading'
                            % filename)
                        continue

                if filename in downloading_tarballs:
                    basic.outputlogMessage(
                        'warning, %s is being downloaded by other processes' %
                        filename)
                    continue

                if os.path.isfile(
                        save_dem_path) and os.path.getsize(save_dem_path) > 1:
                    basic.outputlogMessage(
                        'warning, %s already exists, skip downloading' %
                        filename)
                else:
                    # download the dem
                    basic.outputlogMessage(
                        'starting downloading %d th DEM (%d in total)' %
                        ((ii + 1), len(urls)))
                    downloading_tarballs.append(filename)

                    # os.chdir(save_folder)

                    # run_a_process_download(url)  # download

                    ##################################################
                    # download in parallel
                    basic.check_exitcode_of_process(
                        download_tasks
                    )  # if there is one former job failed, then quit

                    while True:
                        job_count = basic.alive_process_count(download_tasks)
                        if job_count >= max_task_count:
                            print(
                                machine_name, datetime.now(),
                                'You are running %d or more tasks in parallel, wait '
                                % max_task_count)
                            time.sleep(60)  #
                            continue
                        break

                    # start the processing
                    sub_process = Process(
                        target=run_a_process_download,
                        args=(url, save_dem_path, reg_tif_dir, max_task_count,
                              b_unpack_after_downloading
                              ))  # start a process, don't wait
                    sub_process.start()
                    download_tasks.append(sub_process)

                    basic.close_remove_completed_process(download_tasks)

                    # os.chdir(curr_dir)

                dem_tar_ball_list.append(save_dem_path)

        else:
            basic.outputlogMessage(
                'Warning, can not find DEMs within %d th extent' % (idx))
            if b_save_grid_id_noDEM:
                save_id_grid_no_dem(idx)

    # wait until all task complete
    while True:
        job_count = basic.alive_process_count(download_tasks)
        if job_count > 0:
            print(
                machine_name, datetime.now(),
                'wait until all task are completed, alive task account: %d ' %
                job_count)
            time.sleep(60)  #
        else:
            break

    return dem_tar_ball_list, reg_tifs_list
예제 #11
0
def filter_merge_polygons(in_shp,merged_shp,wkt, min_area,max_area,dem_diff_tif,dem_diff_thread_m,process_num):

    if os.path.isfile(merged_shp):
        # also check the file is complete
        polys, demD_values = vector_gpd.read_polygons_attributes_list(merged_shp,'demD_mean')
        if len(polys) < 1 or demD_values is None or len(demD_values) < 1:
            basic.outputlogMessage('%s already exists, but not complete, will be overwritten'%merged_shp)
        else:
            basic.outputlogMessage('%s exists, skip'%merged_shp)
            return merged_shp

    # read polygons and label from segment algorithm, note: some polygons may have the same label
    # polygons, demD_mean_list = vector_gpd.read_polygons_attributes_list(in_shp,'demD_mean')
    polygons, attributes = vector_gpd.read_polygons_attributes_list(in_shp,['demD_mean','DN'])
    demD_mean_list = attributes[0]
    DN_list = attributes[1]
    print('Read %d polygons'%len(polygons))
    if demD_mean_list is None:
        raise ValueError('demD_mean not in %s, need to remove it and then re-create'%in_shp)

    # replace None (if exists) as nan
    demD_mean_list = np.array(demD_mean_list, dtype=float)

    # replace nan values as 0
    demD_mean_list = np.nan_to_num(demD_mean_list)

    remain_polyons = []
    rm_min_area_count = 0
    rm_diff_thr_count = 0
    for poly, demD_mean in zip(polygons, demD_mean_list):
        if poly.area < min_area:
            rm_min_area_count += 1
            continue
        # mean value: not subsidence
        if demD_mean > dem_diff_thread_m:  #
            rm_diff_thr_count += 1
            continue

        remain_polyons.append(poly)

    print('remove %d polygons based on min_area, %d polygons based on dem_diff_threshold, remain %d ones'%(rm_min_area_count, rm_diff_thr_count,len(remain_polyons)))
    if len(remain_polyons) < 1:
        return None


    # we should only merge polygon with similar reduction, but we already remove polygons with mean reduction > threshhold
    # merge touch polygons
    # print(timeTools.get_now_time_str(), 'start building adjacent_matrix')
    # # adjacent_matrix = vector_features.build_adjacent_map_of_polygons(remain_polyons)
    # machine_name = os.uname()[1]
    # if 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name:
    #     print('Warning, some problem of parallel running in build_adjacent_map_of_polygons on curc, '
    #           'but ok in my laptop and uist, change process_num = 1')
    #     process_num = 1
    ############################################################
    ## build adjacent_matrix then merge for entire raster
    # adjacent_matrix = vector_gpd.build_adjacent_map_of_polygons(remain_polyons, process_num=process_num)
    # print(timeTools.get_now_time_str(), 'finish building adjacent_matrix')
    #
    # if adjacent_matrix is False:
    #     return None
    # merged_polygons = vector_features.merge_touched_polygons(remain_polyons, adjacent_matrix)

    ############################################################
    # ## build adjacent_matrix then merge, patch by patch (not too many improvements)
    # label_id_range_txt = os.path.splitext(in_shp)[0] + '_label_IDrange.txt'
    # merged_polygons = merge_polygons_patchBYpatch(label_id_range_txt, remain_polyons, DN_list, process_num=process_num)

    ############################################################
    ## merge polygons using rasterize
    label_raster = os.path.splitext(in_shp)[0] + '_label.tif'
    merged_polygons = merge_polygon_rasterize(label_raster, remain_polyons)

    print(timeTools.get_now_time_str(), 'finish merging touched polygons, get %d ones' % (len(merged_polygons)))

    # remove large ones
    remain_polyons = []
    rm_max_area_count = 0
    for poly in merged_polygons:
        if poly.area > max_area:
            rm_max_area_count += 1
            continue
        remain_polyons.append(poly)

    print('remove %d polygons based on max_area, remain %d' % (rm_max_area_count, len(remain_polyons)))

    polyons_noMulti = [vector_gpd.MultiPolygon_to_polygons(idx, poly) for idx, poly in enumerate(remain_polyons)]
    remain_polyons = []
    for polys in polyons_noMulti:
        polys = [poly for poly in polys if poly.area > min_area]  # remove tiny polygon before buffer
        remain_polyons.extend(polys)
    print('convert MultiPolygon (filter_merge_polygons) to polygons and remove small ones, remain %d' % (len(remain_polyons)))

    if len(remain_polyons) < 1:
        return None

    # calcualte attributes of remain ones: area, dem_diff: mean, std
    merged_pd = pd.DataFrame({'Polygon': remain_polyons})
    vector_gpd.save_polygons_to_files(merged_pd, 'Polygon', wkt, merged_shp)

    # based on the merged polygons, calculate the mean dem diff
    raster_statistic.zonal_stats_multiRasters(merged_shp, dem_diff_tif, tile_min_overlap=tile_min_overlap,
                                              stats=['mean', 'std', 'count'], prefix='demD',process_num=process_num)

    return merged_shp
def main(options, args):
    extent_shp = args[0]
    task_list = [args[item] for item in range(1, len(args))]
    # task_name = args[1]
    if len(task_list) < 1:
        raise ValueError('There is no task: %s' % str(task_list))

    # local_grid_id_txt is in the current dir
    # log_grid_ids_txt, log_grid_ids_txt_done is in grid_ids_txt_dir
    local_grid_id_txt, log_grid_ids_txt, log_grid_ids_txt_done = get_extent_grid_id_txt_done_files(
        extent_shp)
    # check if it has been complete
    if os.path.isfile(log_grid_ids_txt_done):
        basic.outputlogMessage('Tasks for extent %s have been completed' %
                               extent_shp)
        return True

    r_working_dir = '/scratch/summit/lihu9680/Arctic/dem_processing' if options.remote_working_dir is None else options.remote_working_dir
    r_log_dir = '/scratch/summit/lihu9680/ArcticDEM_tmp_dir/log_dir' if options.remote_log_dir is None else options.remote_log_dir
    process_node = '$curc_host' if options.process_node is None else options.process_node
    download_node = '$curc_host' if options.download_node is None else options.download_node

    max_grid_count = options.max_grids
    b_remove_tmp_folders = options.b_remove_tmp_folders
    b_dont_remove_DEM_files = options.b_dont_remove_DEM_files
    b_no_slurm = options.b_no_slurm
    b_divide_to_subsets = True

    # modify the folder name of subsets
    global subset_shp_dir
    subset_shp_dir = subset_shp_dir + '_' + io_function.get_name_no_ext(
        extent_shp)
    global msg_file_pre
    msg_file_pre = io_function.get_name_no_ext(extent_shp) + '_' + msg_file_pre

    grid_ids_to_process_txt = io_function.get_name_no_ext(
        extent_shp) + '_' + 'grid_ids_to_process.txt'

    # build map dem cover grid (take time, but only need to run once at the beginning)
    build_dict_of_dem_cover_grid_ids(dem_strip_shp, grid_20_shp,
                                     strip_dem_cover_grids_txt)
    build_dict_of_dem_cover_grid_ids(dem_tile_shp, grid_20_shp,
                                     tile_dem_cover_grids_txt)

    # get grids for processing
    # read grids and ids
    time0 = time.time()
    all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
        grid_20_shp, 'id')
    print('time cost of read polygons and attributes', time.time() - time0)

    gird_prj = map_projection.get_raster_or_vector_srs_info_proj4(grid_20_shp)

    # get grid ids based on input extent
    grid_polys, grid_ids = get_grid_20(extent_shp, all_grid_polys, all_ids)

    # based on extent shape, subset grid_20_id_raster
    # # using gdalwarp to crop the mask, also have 0.5 pixel offset, so dont use it
    # grid_20_id_raster_sub = io_function.get_name_by_adding_tail(os.path.basename(grid_20_id_raster),'sub')
    # if RSImageProcess.subset_image_by_shapefile(grid_20_id_raster,extent_shp,save_path=grid_20_id_raster_sub) is False:
    #     return False

    # read grid_ids_2d, then mask it
    grid_ids_2d, grid_nodata = raster_io.read_raster_one_band_np(
        grid_20_id_raster)  # 2d array of gird ids
    # rasterize grid_polys, will served as mask.
    grid_ids_2d_mask = raster_io.burn_polygons_to_a_raster(
        grid_20_id_raster, grid_polys, 1, None)
    # raster_io.save_numpy_array_to_rasterfile(grid_ids_2d_mask,'grid_ids_2d_mask.tif',grid_20_id_raster,nodata=255)  # save to disk for checking
    loc_masked_out = np.where(grid_ids_2d_mask != 1)
    # grid_ids_2d[ loc_masked_out ] = grid_nodata
    visit_np = np.zeros_like(grid_ids_2d, dtype=np.uint8)
    visit_np[loc_masked_out] = 1  # 1 indicate already visited
    visit_np[np.where(
        grid_ids_2d == grid_nodata)] = 1  # 1 indicate already visited

    subset_id = -1
    # on tesia, uist, vpn-connected laptop
    if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:
        io_function.mkdir(subset_shp_dir)
        sync_log_files(process_node, r_log_dir, process_log_dir)
        update_complete_grid_list(grid_ids, task_list)

    while True:
        subset_id += 1
        # on tesia, uist, vpn-connected laptop
        if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:

            # remove grids that has been complete or ignored
            ignore_ids = get_complete_ignore_grid_ids()
            num_grid_ids = save_grid_ids_need_to_process(
                grid_ids,
                ignore_ids=ignore_ids,
                save_path=grid_ids_to_process_txt)
            if num_grid_ids < 1:
                make_note_all_task_done(extent_shp, process_node)

            # if the input is not a shapefile, then don't divide it to many subsets
            if extent_shp.endswith('.txt'):
                select_grid_polys, selected_gird_ids = grid_polys, grid_ids
                if len(selected_gird_ids) > 2000:
                    raise ValueError('There are too many grid to process once')
                b_divide_to_subsets = False
                subset_id = 999999
                select_grids_shp = os.path.join(
                    subset_shp_dir,
                    io_function.get_name_no_ext(extent_shp) +
                    '_sub%d' % subset_id + '.shp')
                save_selected_girds_and_ids(selected_gird_ids,
                                            select_grid_polys, gird_prj,
                                            select_grids_shp)

            else:
                select_grids_shp = os.path.join(
                    subset_shp_dir,
                    io_function.get_name_no_ext(extent_shp) +
                    '_sub%d' % subset_id + '.shp')
                select_grid_polys, selected_gird_ids = get_grids_for_download_process(
                    grid_polys,
                    grid_ids,
                    ignore_ids,
                    max_grid_count,
                    grid_ids_2d,
                    visit_np,
                    select_grids_shp,
                    proj=gird_prj)
            if selected_gird_ids is None:
                break  # no more grids
            if len(selected_gird_ids) < 1:
                continue

            subset_info_txt = msg_file_pre + '%d.txt' % subset_id
            if os.path.isfile(subset_info_txt) is False:
                # init the file
                update_subset_info(subset_info_txt,
                                   key_list=[
                                       'id', 'createTime', 'shp', 'pre_status',
                                       'proc_status'
                                   ],
                                   info_list=[
                                       subset_id,
                                       str(datetime.now()), select_grids_shp,
                                       'notYet', 'notYet'
                                   ])

            # download and unpack ArcticDEM, do registration, send to curc
            if download_process_send_arctic_dem(subset_info_txt,
                                                r_working_dir,
                                                process_node,
                                                task_list,
                                                b_send_data=b_no_slurm
                                                == False) is True:
                continue

            # copy file from remote machine
            if b_no_slurm is False:
                copy_results_from_remote_node()

                sync_log_files(process_node, r_log_dir, process_log_dir)

                # update complete id list
                update_complete_grid_list(grid_ids, task_list)

            # save this to disk, to check progress, if there are not too many grids (<100),
            # we can use this one to process withtou divide grids to many subsets
            num_grid_ids = save_grid_ids_need_to_process(
                grid_ids, save_path=grid_ids_to_process_txt)
            if num_grid_ids < 1:
                make_note_all_task_done(extent_shp, process_node)

            if b_no_slurm:
                # process ArcticDEM using local computing resource
                if produce_dem_products(
                        task_list,
                        b_remove_job_folder=b_remove_tmp_folders,
                        no_slurm=b_no_slurm) is False:
                    break

            if b_divide_to_subsets is False:
                break

        elif 'login' in machine_name or 'shas' in machine_name or 'sgpu' in machine_name:  # curc
            # process ArcticDEM using the computing resource on CURC
            if produce_dem_products(
                    task_list,
                    b_remove_job_folder=b_remove_tmp_folders) is False:
                break
        else:
            print('unknown machine : %s ' % machine_name)
            break

        # remove no need dem files
        remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files)

    # monitor results in remote computer
    check_time = 200
    while check_time > 0 and b_no_slurm == False:
        # on tesia, uist, vpn-connected laptop
        if machine_name == 'ubuntu' or machine_name == 'uist-int-colorado-edu' or 'colorado.edu' in machine_name or 'MacBook' in machine_name:
            print(datetime.now(), 'wait 10 min for results in computing nodes')
            time.sleep(600)
            # copy file from remote machine
            copy_results_from_remote_node()
            # sync complete id list, dem info, no dem grids etcs.
            sync_log_files(process_node, r_log_dir, process_log_dir)
            # update complete id list
            update_complete_grid_list(grid_ids, task_list)
            # remove no need dem files
            remove_no_need_dem_files(b_remove=b_dont_remove_DEM_files)
            remote_sub_txt = get_subset_info_txt_list(
                'proc_status', ['notYet', 'working'],
                remote_node=process_node,
                remote_folder=r_working_dir)
            if len(remote_sub_txt) < 1 and check_time != 1:
                check_time = 1  # set to 1, then will only check one more time
            else:
                check_time -= 1
        else:
            break
def build_dict_of_dem_cover_grid_ids(dem_info_shp, grid_20_shp, save_dict_txt):
    # this will take time, but only need to run once at the beginning
    if os.path.isfile(save_dict_txt):
        print('warning, %s exists, skip build_dict_of_dem_cover_grid_ids' %
              save_dict_txt)
        return True

    # extent polygons and projection (proj4)
    dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(
        dem_info_shp)
    if dem_shp_prj == '':
        raise ValueError('get proj4 of %s failed' % dem_info_shp)
    grid_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(
        grid_20_shp)
    if grid_shp_prj == '':
        raise ValueError('get proj4 of %s failed' % grid_20_shp)

    if dem_shp_prj != grid_shp_prj:
        raise ValueError('%s and %s do not have the same projection' %
                         (dem_info_shp, grid_20_shp))

    # read DEM info
    dem_polygons, dem_names = vector_gpd.read_polygons_attributes_list(
        dem_info_shp, 'name', b_fix_invalid_polygon=False)
    # dem_name: eg. SETSM_GE01_20090818_1050410001E0CF00_1050410001D80200_seg1_2m_v3.0  or 11_27_2_1_2m_v3.0
    dem_poly_count = len(dem_polygons)
    # check if there is duplicate dem names
    if len(dem_names) != len(set(dem_names)):
        raise ValueError('some duplicate dem name in %s' % dem_info_shp)

    # read grid polygons and ids
    all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
        grid_20_shp, 'id')

    dem_cover_grids = {}
    # this will take time.
    # for idx, (dem_poly,dem_name) in enumerate(zip(dem_polygons, dem_names)):
    #     print(timeTools.get_now_time_str(), idx, dem_poly_count)
    #     index = vector_gpd.get_poly_index_within_extent(all_grid_polys, dem_poly)
    #     gird_ids = [ all_ids[idx] for idx in index ]
    #     # if dem_name in dem_cover_grids.keys():
    #     #     basic.outputlogMessage('\n Warning, %s already in dict \n'%dem_name)
    #     dem_cover_grids[dem_name] = gird_ids

    ### parallel version
    theadPool = Pool(multiprocessing.cpu_count())  # multi processes
    parameters_list = [
        (all_ids, all_grid_polys, dem_poly, dem_name, idx, dem_poly_count)
        for idx, (dem_poly,
                  dem_name) in enumerate(zip(dem_polygons, dem_names))
    ]
    results = theadPool.starmap(get_overlap_grids_for_one_extent,
                                parameters_list)  # need python3
    for res in results:
        dem_name, gird_ids = res
        dem_cover_grids[dem_name] = gird_ids

    # save to dict
    io_function.save_dict_to_txt_json(save_dict_txt, dem_cover_grids)
    theadPool.close()
    return True
예제 #14
0
def main(options, args):

    extent_shp = args[0]
    # ext_shp_prj = map_projection.get_raster_or_vector_srs_info_epsg(extent_shp)
    # reproject if necessary, it seems that the gdalwarp can handle different projection
    # if ext_shp_prj != 'EPSG:3413':  # EPSG:3413 is the projection ArcticDEM used
    #     extent_shp_reprj = io_function.get_name_by_adding_tail(extent_shp,'3413')
    #     vector_gpd.reproject_shapefile(extent_shp,'EPSG:3413',extent_shp_reprj)
    #     extent_shp = extent_shp_reprj

    tar_dir = options.ArcticDEM_dir
    save_dir = options.save_dir
    b_mosaic_id = options.create_mosaic_id
    b_mosaic_date = options.create_mosaic_date
    b_rm_inter = options.remove_inter_data
    keep_dem_percent = options.keep_dem_percent
    inter_format = options.format
    arcticDEM_shp = options.arcticDEM_shp
    o_res = options.out_res
    b_dem_diff = options.create_dem_diff
    dem_list_txt = options.dem_list_txt

    # create mosaic is time consuming, but it also takes a lot memory. For a region of 50 km by 50 km, it may take 10 to 50 GB memory
    process_num = options.process_num
    basic.outputlogMessage(
        'The number of processes for creating the mosaic is: %d' % process_num)

    extent_shp_base = os.path.splitext(os.path.basename(extent_shp))[0]
    extent_prj = map_projection.get_raster_or_vector_srs_info_epsg(extent_shp)

    b_ArcticDEM_tar = False
    dem_tif_list = []
    if tar_dir is not None and arcticDEM_shp is not None:
        b_ArcticDEM_tar = True
    else:
        dem_tif_list = io_function.read_list_from_txt(dem_list_txt)
        # check projection
        for dem_tif in dem_tif_list:
            dem_prj = map_projection.get_raster_or_vector_srs_info_epsg(
                dem_tif)
            if dem_prj != extent_prj:
                raise ValueError('The projection of %s is different from %s' %
                                 (dem_prj, extent_prj))

    b_ArcticDEM_tiles = False
    if b_ArcticDEM_tar:
        arcdem_prj = map_projection.get_raster_or_vector_srs_info_epsg(
            arcticDEM_shp)

        # read dem polygons and url
        time0 = time.time()
        dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list(
            arcticDEM_shp, 'fileurl', b_fix_invalid_polygon=False)
        print('time cost of read polygons and attributes', time.time() - time0)
        basic.outputlogMessage('%d dem polygons in %s' %
                               (len(dem_polygons), extent_shp))
        # get tarball list
        tar_list = io_function.get_file_list_by_ext('.gz',
                                                    tar_dir,
                                                    bsub_folder=False)
        if len(tar_list) < 1:
            raise ValueError('No input tar.gz files in %s' % tar_dir)

        if is_ArcticDEM_tiles(tar_list):
            basic.outputlogMessage('Input is the mosaic version of ArcticDEM')
            b_ArcticDEM_tiles = True

        if extent_prj == arcdem_prj:
            extent_polys = vector_gpd.read_polygons_gpd(extent_shp)
        else:
            extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(
                extent_shp, arcdem_prj)
    else:
        extent_polys = vector_gpd.read_polygons_gpd(extent_shp)

    if len(extent_polys) < 1:
        raise ValueError('No polygons in %s' % extent_shp)
    else:
        basic.outputlogMessage('%d extent polygons in %s' %
                               (len(extent_polys), extent_shp))

    extPolys_ids = vector_gpd.read_attribute_values_list(extent_shp, 'id')
    if extPolys_ids is None or None in extPolys_ids:
        basic.outputlogMessage(
            'Warning, field: id is not in %s, will create default ID for each grid'
            % extent_shp)
        extPolys_ids = [id + 1 for id in range(len(extent_polys))]

    # print('extPolys_ids, count',extPolys_ids, len(extent_polys))

    same_extent = False
    if b_dem_diff:
        # crop each one to the same extent, easy for DEM differnce.
        same_extent = True

    for idx, ext_poly in zip(extPolys_ids, extent_polys):
        basic.outputlogMessage('get data for the %d th extent (%d in total)' %
                               (idx, len(extent_polys)))

        if b_ArcticDEM_tar:
            if b_ArcticDEM_tiles:
                proc_ArcticDEM_tile_one_grid_polygon(tar_dir, dem_polygons,
                                                     dem_urls, o_res, save_dir,
                                                     inter_format, b_rm_inter,
                                                     ext_poly, idx,
                                                     extent_shp_base)
            else:
                proc_ArcticDEM_strip_one_grid_polygon(
                    tar_dir,
                    dem_polygons,
                    dem_urls,
                    o_res,
                    save_dir,
                    inter_format,
                    b_mosaic_id,
                    b_mosaic_date,
                    b_rm_inter,
                    b_dem_diff,
                    ext_poly,
                    idx,
                    keep_dem_percent,
                    process_num,
                    extent_shp_base,
                    resample_method='average',
                    same_extent=same_extent)
        else:
            proc_dem_mosaic_diff(dem_tif_list,
                                 save_dir,
                                 idx,
                                 ext_poly,
                                 b_mosaic_id,
                                 b_mosaic_date,
                                 process_num,
                                 keep_dem_percent,
                                 o_res,
                                 b_dem_diff,
                                 extent_shp_base,
                                 b_rm_inter,
                                 resample_method='average')
예제 #15
0
def main():
    dem_index_shp = os.path.expanduser(
        '~/Data/Arctic/ArcticDEM/BROWSE_SERVER/indexes/ArcticDEM_Tile_Index_Rel7/ArcticDEM_Tile_Index_Rel7.shp'
    )
    # extent_shp = os.path.expanduser('~/Data/PDO/PDO_statistics_swatchs/swatch_bounding_boxes.shp')
    extent_shp = os.path.expanduser(
        '~/Data/PDO/extent_each_swatch/merge_all_qa_exent.shp')

    # extent polygons and projection (proj4)
    extent_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(
        extent_shp)
    dem_shp_prj = map_projection.get_raster_or_vector_srs_info_proj4(
        dem_index_shp)

    if extent_shp_prj != dem_shp_prj:
        basic.outputlogMessage(
            '%s and %s do not have the same projection, will reproject %s' %
            (extent_shp, dem_index_shp, os.path.basename(extent_shp)))
        epsg = map_projection.get_raster_or_vector_srs_info_epsg(dem_index_shp)
        # print(epsg)
        # extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp,dem_shp_prj.strip())
        extent_polys = vector_gpd.read_shape_gpd_to_NewPrj(extent_shp, epsg)
    else:
        extent_polys = vector_gpd.read_polygons_gpd(extent_shp)

    poly_ids = [idx for idx in range(len(extent_polys))]
    if 'boxes' in os.path.basename(extent_shp):
        nc_file_names = vector_gpd.read_attribute_values_list(
            extent_shp, 'nc_file')
    else:
        nc_file_names = vector_gpd.read_attribute_values_list(
            extent_shp, 'layer')

    # read dem polygons and tile number
    dem_polygons, dem_tiles = vector_gpd.read_polygons_attributes_list(
        dem_index_shp, 'tile', b_fix_invalid_polygon=False)

    for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)):
        basic.outputlogMessage('get data for the %d th extent (%d/%d)' %
                               (idx, count, len(extent_polys)))

        save_txt_path = nc_file_names[idx] + '-' + 'dem_tiles_poly_%d.txt' % idx
        if os.path.isfile(save_txt_path):
            tiles = io_function.read_list_from_txt(save_txt_path)
            basic.outputlogMessage('read %d dem tiles from %s' %
                                   (len(tiles), save_txt_path))
        else:
            # get fileurl
            dem_poly_ids = vector_gpd.get_poly_index_within_extent(
                dem_polygons, ext_poly)
            basic.outputlogMessage('find %d DEM within %d th extent' %
                                   (len(dem_poly_ids), (idx)))
            tiles = [dem_tiles[id] for id in dem_poly_ids]

            # save to txt
            io_function.save_list_to_txt(save_txt_path, tiles)
            basic.outputlogMessage('save dem urls to %s' % save_txt_path)

        # download and create a mosaic
        url_head = 'https://data.pgc.umn.edu/elev/dem/setsm/ArcticDEM/mosaic/v3.0/32m/'
        download_tarball_for_one_polygon(tarball_dir, dem_tif_dir, url_head,
                                         tiles)

        # create a mosaic
        create_a_mosaic(nc_file_names[idx], idx, dem_eachSwatch_dir, ext_poly,
                        tiles)

    bak_folder = 'small_tifs'
    io_function.mkdir(bak_folder)
    # remove small and duplicated ones
    for file_name in nc_file_names:
        crop_tifs = io_function.get_file_list_by_pattern(
            dem_eachSwatch_dir, file_name + '*crop.tif')
        if len(crop_tifs) == 1:
            pass
        elif len(crop_tifs) > 1:
            # keep maximum one and move small ones
            tif_files_size = [
                io_function.get_file_size_bytes(item) for item in crop_tifs
            ]
            max_size = max(tif_files_size)
            max_index = tif_files_size.index(max_size)
            del crop_tifs[max_index]
            for tmp in crop_tifs:
                io_function.movefiletodir(tmp, bak_folder)
                tmp = tmp.replace('_crop', '')
                io_function.movefiletodir(tmp, bak_folder)

        else:  # no tif
            raise ValueError('Results for %s does not exist' % file_name)
예제 #16
0
def remove_polygons_based_relative_dem_diff(remain_polyons,merged_shp,surrounding_shp,wkt, save_shp, min_area, dem_diff_thread_m):
    if os.path.isfile(save_shp):
        # also check the file is complete
        polys, demD_values = vector_gpd.read_polygons_attributes_list(save_shp, 'demD_mean')
        if len(polys) < 1 or demD_values is None or len(demD_values) < 1:
            basic.outputlogMessage('%s already exists, but not complete, will be overwritten' % save_shp)
        else:
            basic.outputlogMessage('%s exists, skip'%save_shp)
            return save_shp

    # calculate the relative dem diff
    surr_dem_diff_list = vector_gpd.read_attribute_values_list(surrounding_shp,'demD_mean')
    merge_poly_dem_diff_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_mean')
    # convert to float type (can change None to nan)
    surr_dem_diff_list = np.array(surr_dem_diff_list, dtype=float)
    merge_poly_dem_diff_list = np.array(merge_poly_dem_diff_list, dtype=float)

    if len(surr_dem_diff_list) != len(merge_poly_dem_diff_list):
        raise ValueError('The number of surr_dem_diff_list and merge_poly_dem_diff_list is different')
    relative_dem_diff_list = [  mer - sur for sur, mer in zip(surr_dem_diff_list, merge_poly_dem_diff_list) ]

    merge_poly_demD_std_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_std')
    merge_poly_demD_count_list = vector_gpd.read_attribute_values_list(merged_shp,'demD_count')

    # remove large ones
    save_polyons = []
    save_demD_mean_list = []
    save_demD_std_list = []
    save_demD_count_list = []
    save_rel_diff_list = []
    save_surr_demD_list = []
    rm_rel_dem_diff_count = 0
    rm_min_area_count = 0
    for idx in range(len(remain_polyons)):
        # relative dem diff
        if relative_dem_diff_list[idx] > dem_diff_thread_m:  #
            rm_rel_dem_diff_count += 1
            continue

        # when convert MultiPolygon to Polygon, may create some small polygons (in function merge shp)
        if remain_polyons[idx].area < min_area:
            rm_min_area_count += 1
            continue


        save_polyons.append(remain_polyons[idx])
        save_demD_mean_list.append(merge_poly_dem_diff_list[idx])
        save_demD_std_list.append(merge_poly_demD_std_list[idx])
        save_demD_count_list.append(merge_poly_demD_count_list[idx])
        save_rel_diff_list.append(relative_dem_diff_list[idx])
        save_surr_demD_list.append(surr_dem_diff_list[idx])

    print('remove %d polygons based on relative rel_demD and %d based on min_area, remain %d' % (rm_rel_dem_diff_count, rm_min_area_count, len(save_polyons)))

    if len(save_polyons) < 1:
        print('Warning, no polygons after remove based on relative demD')
        return None

    poly_ids = [ item+1  for item in range(len(save_polyons)) ]
    poly_areas = [poly.area for poly in save_polyons]

    save_pd = pd.DataFrame({'poly_id':poly_ids, 'poly_area':poly_areas,'demD_mean':save_demD_mean_list, 'demD_std':save_demD_std_list,
                             'demD_count':save_demD_count_list, 'surr_demD':save_surr_demD_list, 'rel_demD':save_rel_diff_list ,'Polygon': save_polyons})

    vector_gpd.save_polygons_to_files(save_pd, 'Polygon', wkt, save_shp)

    return save_shp
예제 #17
0
def get_file_size_dem_tarball(dem_index_shp,
                              extent_polys,
                              pre_name,
                              xlsx_size_path,
                              poly_ids=None):
    # read dem polygons and url
    dem_polygons, dem_urls = vector_gpd.read_polygons_attributes_list(
        dem_index_shp, 'fileurl', b_fix_invalid_polygon=False)

    if os.path.isfile(xlsx_size_path):
        size_table = pd.read_excel(xlsx_size_path)
        save_idx_list = size_table['index'].to_list()
        save_url_list = size_table['fileurl'].to_list()
        save_size_list = size_table['filesize'].to_list()
    else:
        save_idx_list = [item for item in range(len(dem_urls))]  # index list
        save_url_list = dem_urls
        save_size_list = [float('nan')] * len(save_idx_list)

    basic.outputlogMessage('%d dem polygons in %s' %
                           (len(dem_polygons), dem_index_shp))

    if poly_ids is None:
        poly_ids = [item for item in range(len(extent_polys))]

    for count, (idx, ext_poly) in enumerate(zip(poly_ids, extent_polys)):
        basic.outputlogMessage(
            'get ArcticDEM filesize for the %d th extent (%d/%d)' %
            (idx, count, len(extent_polys)))

        save_filesize_txt = pre_name + '_dem_FileSize_poly_%d.txt' % idx
        if os.path.isfile(save_filesize_txt):
            basic.outputlogMessage('%s exists, skip' % save_filesize_txt)
            continue

        # get fileurl
        dem_poly_idx_list = vector_gpd.get_poly_index_within_extent(
            dem_polygons, ext_poly)
        basic.outputlogMessage('find %d DEM within %d th extent' %
                               (len(dem_poly_idx_list), (idx)))
        urls = [dem_urls[id] for id in dem_poly_idx_list]
        url_size_list = [save_size_list[id] for id in dem_poly_idx_list]

        if len(urls) > 0:
            total_count = len(urls)
            for ii, (url, fileS, url_idx) in enumerate(
                    zip(urls, url_size_list, dem_poly_idx_list)):
                # remove url start with /mnt and end with .tif
                if url.startswith('/mnt') and url.endswith('.tif'):
                    basic.outputlogMessage("error: not a valid url: %s" % url)
                    continue
                if math.isnan(fileS) is False:
                    continue
                url_size_GB = get_one_url_file_size(url, ii, total_count)
                url_size_list[ii] = url_size_GB
                save_size_list[url_idx] = url_size_GB

            url_size_list_noNone = [
                item for item in url_size_list if math.isnan(item) is False
            ]

            if len(url_size_list_noNone) != len(url_size_list):
                basic.outputlogMessage(
                    'There are %d None value in url_size_list' %
                    (len(url_size_list) - len(url_size_list_noNone)))

            total_size_GB = sum(url_size_list_noNone)

            basic.outputlogMessage(
                'the size of files will be downloaded is %.4lf GB for the %d th extent '
                % (total_size_GB, (idx + 1)))
            with open(save_filesize_txt, 'w') as f_obj:
                f_obj.writelines('%d DEM files, total size is  %.6lf GB \n' %
                                 (len(urls), total_size_GB))
        else:
            basic.outputlogMessage(
                'Warning, can not find DEMs within %d th extent' % (idx))

    # save table
    save_dict = {
        'index': save_idx_list,
        'filesize': save_size_list,
        'fileurl': save_url_list
    }

    save_dict_pd = pd.DataFrame(save_dict)
    # set strings to url as  False: Number of URLS is over Excel's limit of 65,530 URLS per worksheet
    # https://github.com/cxn03651/write_xlsx/issues/42
    with pd.ExcelWriter(xlsx_size_path, options={'strings_to_urls':
                                                 False}) as writer:
        save_dict_pd.to_excel(writer, sheet_name='url_file_size')

    return None
예제 #18
0
def main(options, args):

    process_num = options.process_num
    buffer_size = options.buffer_size
    # perform the selection grid by grid
    basic.setlogfile('select_RTS_YOLO_demDiff_headwall_%s.txt' %
                     timeTools.get_now_time_str())

    b_grid = options.b_grid
    if b_grid:
        # process the selection grid by grid
        extent_shp_or_ids_txt = args[0]
        yolo_result_dir = os.path.expanduser(
            '~/Data/Arctic/alaska/autoMapping/alaskaNS_yolov4_1')
        dem_subsidence_dir = grid_dem_diffs_segment_dir
        grid_headwall_dir = grid_dem_headwall_shp_dir

        # read grids and ids
        time0 = time.time()
        all_grid_polys, all_ids = vector_gpd.read_polygons_attributes_list(
            grid_20_shp, 'id')
        print('time cost of read polygons and attributes', time.time() - time0)

        # get grid ids based on input extent
        grid_base_name = os.path.splitext(
            os.path.basename(extent_shp_or_ids_txt))[0]
        grid_polys, grid_ids = get_grid_20(extent_shp_or_ids_txt,
                                           all_grid_polys, all_ids)

        # check dem difference existence
        grid_rts_shps, grid_id_no_rts_shp = get_existing_select_grid_rts(
            grid_rts_shp_dir, grid_base_name, grid_ids)

        if len(grid_id_no_rts_shp) > 0:
            # refine grid_polys
            if len(grid_ids) > len(grid_id_no_rts_shp):
                id_index = [grid_ids.index(id) for id in grid_id_no_rts_shp]
                grid_polys = [grid_polys[idx] for idx in id_index]
            #
            rts_shp_folders = select_rts_map_demDiff_headwall_grids(
                yolo_result_dir,
                dem_subsidence_dir,
                grid_headwall_dir,
                grid_polys,
                grid_id_no_rts_shp,
                grid_base_name,
                process_num=process_num)
    else:
        # processing the selection for two input shapefile
        yolo_box_shp = args[0]
        dem_subsidence_shp = args[1]
        print('polygon group 1:', yolo_box_shp)
        print('polygon group 2:', dem_subsidence_shp)

        if options.save_path is not None:
            save_path = options.save_path
        else:
            save_path = io_function.get_name_by_adding_tail(
                yolo_box_shp, 'select')

        select_polygons_overlap_others_in_group2(yolo_box_shp,
                                                 dem_subsidence_shp,
                                                 save_path,
                                                 buffer_size=buffer_size,
                                                 process_num=process_num)

    pass