Пример #1
0
def main():
    parser = getparser()
    args = parser.parse_args()

    #Input filelist
    fn_list = args.src_fn_list
    #Might hit OS open file limit here
    print("Loading all datasets")
    ds_list = [gdal.Open(fn) for fn in fn_list]

    #Mosaic t_srs
    print("Parsing t_srs")
    t_srs = warplib.parse_srs(args.t_srs, ds_list)
    print(t_srs)

    #Mosaic res
    print("Parsing tr")
    tr = warplib.parse_res(args.tr, ds_list, t_srs=t_srs)
    print(tr)

    #Mosaic extent
    #xmin, ymin, xmax, ymax
    print("Parsing t_projwin")
    t_projwin = warplib.parse_extent(args.t_projwin, ds_list, t_srs=t_srs)
    print(t_projwin)
    #This could trim off some fraction of a pixel around margins
    t_projwin = geolib.extent_round(t_projwin, tr)
    mos_xmin, mos_ymin, mos_xmax, mos_ymax = t_projwin

    stat = args.stat
    if stat is not None:
        print("Mosaic type: %s" % stat)
    else:
        print("Mosaic type: Weighted average")

    #Tile dimensions in output projected units (meters)
    #Assume square
    tile_width = args.georef_tile_size
    tile_height = tile_width

    #This is number of simultaneous processes, each with one thread
    threads = args.threads

    o = args.o
    if o is None:
        o = 'mos_%im/mos' % tr
    odir = os.path.dirname(o)
    #If dirname is empty, use prefix for new directory
    if not odir:
        odir = o
        o = os.path.join(odir, o)
    if not os.path.exists(odir):
        os.makedirs(odir)
    iolib.setstripe(odir, threads)

    #Compute extent geom for all input datsets
    print("Computing extent geom for all input datasets")
    input_geom_dict = OrderedDict()
    for ds in ds_list:
        ds_geom = geolib.ds_geom(ds, t_srs)
        #Could use filename as key here
        input_geom_dict[ds] = ds_geom

    #Mosaic tile size
    #Should have float extent and tile dim here
    ntiles_w = int(math.ceil((mos_xmax - mos_xmin) / tile_width))
    ntiles_h = int(math.ceil((mos_ymax - mos_ymin) / tile_height))
    ntiles = ntiles_w * ntiles_h
    print("%i (%i cols x %i rows) tiles required for full mosaic" %
          (ntiles, ntiles_w, ntiles_h))
    #Use this for zero-padding of tile number
    ntiles_digits = len(str(ntiles))

    print("Computing extent geom for all output tiles")
    tile_dict = OrderedDict()
    for i in range(ntiles_w):
        for j in range(ntiles_h):
            tilenum = j * ntiles_w + i
            tile_xmin = mos_xmin + i * tile_width
            tile_xmax = mos_xmin + (i + 1) * tile_width
            tile_ymax = mos_ymax - j * tile_height
            tile_ymin = mos_ymax - (j + 1) * tile_height
            #Corner coord needed for geom
            x = [tile_xmin, tile_xmax, tile_xmax, tile_xmin, tile_xmin]
            y = [tile_ymax, tile_ymax, tile_ymin, tile_ymin, tile_ymax]
            tile_geom_wkt = 'POLYGON(({0}))'.format(', '.join(
                ['{0} {1}'.format(*a) for a in zip(x, y)]))
            tile_geom = ogr.CreateGeometryFromWkt(tile_geom_wkt)
            tile_geom.AssignSpatialReference(t_srs)
            #tile_dict[tilenum] = tile_geom
            tile_dict[tilenum] = {}
            tile_dict[tilenum]['geom'] = tile_geom

    out_tile_list = []
    print(
        "Computing valid intersections between input dataset geom and tile geom"
    )
    #for tilenum, tile_geom in tile_dict.iteritems():
    for tilenum in tile_dict.keys():
        tile_geom = tile_dict[tilenum]['geom']
        tile_dict_fn = []
        for ds, ds_geom in input_geom_dict.iteritems():
            if tile_geom.Intersects(ds_geom):
                out_tile_list.append(tilenum)
                ds_fn = ds.GetFileList()[0]
                #Write out shp for debugging
                #geolib.geom2shp(tile_geom, 'tile_%03i.shp' % tilenum)
                #To create unique tile fn lists comment break and uncomment following lines
                break
                #tile_dict_fn.append(ds_fn)
            #tile_dict[tilenum]['fn_list'] = tile_dict_fn

    #Could also preserve list of input files that intersect tile
    #Then only process those files for given tile bounds
    #Avoid loading all files for each dem_mosaic call

    print("%i valid output tiles" % len(out_tile_list))
    out_tile_list.sort()
    out_tile_list = list(set(out_tile_list))
    ni = max([len(str(i)) for i in out_tile_list])
    out_tile_list_str = ' '.join(map(str, out_tile_list))
    print(out_tile_list_str)

    #Better to dump dictionary here, use json
    out_fn = o + '_tilenum_list.txt'
    with open(out_fn, 'w') as f:
        f.write(out_tile_list_str)

    print("Running dem_mosaic in parallel with %i threads" % threads)
    dem_mosaic_args = [fn_list, o, tr, t_srs, t_projwin, tile_width, 1]
    processes = []
    log = False
    delay = 0.1
    outf = open(os.devnull, 'w')
    #outf = open('%s-log-dem_mosaic-tile-%i.log' % (o, tile), 'w')

    tile_fn_list = []
    with ThreadPoolExecutor(max_workers=threads) as executor:
        for n, tile in enumerate(out_tile_list):
            #print('%i of %i tiles: %i' % (n+1, len(out_tile_list), tile))
            #This passes only files that intersect the tile, but issues with dem_mosaic reducing bounding box
            #dem_mosaic_args[0] = tile_dict[tile]['fn_list']
            #Continue with inefficient approach providing full filename list to each dem_mosaic tile
            cmd = geolib.get_dem_mosaic_cmd(*dem_mosaic_args,
                                            tile=tile,
                                            stat=stat)
            executor.submit(subprocess.call,
                            cmd,
                            stdout=outf,
                            stderr=subprocess.STDOUT)
            tile_fn = '%s-tile-%0*i.tif' % (o, ni, tile)
            if stat is not None:
                tile_fn = os.path.splitext(tile_fn)[0] + '-%s.tif' % stat
            tile_fn_list.append(tile_fn)
            time.sleep(delay)

    outf = None

    print("Creating vrt of valid tiles")
    #tile_fn_list = glob.glob(o+'-tile-*.tif')
    vrt_fn = o + '.vrt'
    if stat is not None:
        vrt_fn = os.path.splitext(vrt_fn)[0] + '_%s.vrt' % stat
    cmd = ['gdalbuildvrt', vrt_fn]
    cmd.extend(tile_fn_list)
    print(cmd)
    subprocess.call(cmd)

    #Should create tile index shp/kml from tile_geom

    #This cleans up all of the log txt files (potentially 1000s of files)
    #Want to preserve these, as they contain list of DEMs that went into each tile
    log_fn_list = glob.glob(o + '-log-dem_mosaic-*.txt')
    print("Cleaning up %i dem_mosaic log files" % len(log_fn_list))
    if stat is not None:
        tar_fn = o + '_%s_dem_mosaic_log.tar.gz' % stat
    else:
        tar_fn = o + '_dem_mosaic_log.tar.gz'
    with tarfile.open(tar_fn, "w:gz") as tar:
        for log_fn in log_fn_list:
            tar.add(log_fn)
    for log_fn in log_fn_list:
        os.remove(log_fn)
Пример #2
0
#ext='DEM_%im.tif' % res
#buffer = 1000
buffer = None
min_area = 1000000

#topdir='/nobackupp8/deshean/conus_combined'
topdir = '/nobackupp8/deshean/conus_combined/sites/rainier/rerun/dem_coreg'
#topdir='/nobackupp8/deshean/hma'
topdir = os.getcwd()
#outdir = 'sites_landslides'
#outdir = 'sites_snow'
outdir = 'sites'
outdir = os.path.join(topdir, outdir)
if not os.path.exists(outdir):
    os.makedirs(outdir)
iolib.setstripe(outdir)

#This contains polygons defining study areas
site_shp_fn = sys.argv[1]
if not os.path.exists(site_shp_fn):
    sys.exit('Unable to find input shp: %s' % site_shp_fn)

site_shp_ds = ogr.Open(site_shp_fn)
site_shp_lyr = site_shp_ds.GetLayer()
site_shp_srs = site_shp_lyr.GetSpatialRef()
#Field number for site name
site_shp_lyr_defn = site_shp_lyr.GetLayerDefn()
site_shp_lyr_fieldnames = [
    site_shp_lyr_defn.GetFieldDefn(i).GetName()
    for i in range(site_shp_lyr_defn.GetFieldCount())
]
Пример #3
0
def main():
    parser = getparser()
    args = parser.parse_args()

    stat_list = [
        'wmean',
    ]
    if args.stat is not None:
        if isinstance(args.stat, str):
            stat_list = args.stat.split()
        else:
            stat_list = args.stat

    print("The following mosaics will be generated:")
    print(stat_list)

    #Tile dimensions in output projected units (meters)
    #Assume square
    tile_width = args.georef_tile_size
    tile_height = tile_width

    #This is number of simultaneous processes, each with one thread
    threads = args.threads

    #Might hit OS open file limit here
    #Workaround is to provide input filelist
    if len(args.src_fn_list) == 1 and os.path.splitext(
            args.src_fn_list[0])[-1] == '.txt':
        print("Reading filenames from input text file")
        with open(args.src_fn_list[0]) as f:
            fn_list = f.read().splitlines()
    else:
        fn_list = args.src_fn_list

    #Sort?

    #Create output directory
    o = args.o
    if o is None:
        #o = 'mos_%im/mos' % tr
        o = 'mos/mos' % tr
    odir = os.path.dirname(o)
    #If dirname is empty, use prefix for new directory
    if not odir:
        odir = o
        o = os.path.join(odir, o)
    if not os.path.exists(odir):
        os.makedirs(odir)
    iolib.setstripe(odir, threads)

    out_pickle_fn = o + '_tile_dict.pkl'
    if os.path.exists(out_pickle_fn):
        print("Loading existing tile dictionary")
        with open(out_pickle_fn, 'rb') as f:
            tile_dict = pickle.load(f)
        dummy = list(tile_dict.values())[0]
        tr = dummy['tr']
        t_srs = osr.SpatialReference()
        t_srs.ImportFromProj4(dummy['t_srs'])
        t_projwin = dummy['t_projwin']
    else:
        print("Loading input datasets")
        print(
            "Note: this could take several minutes depending on number of inputs and I/O performance"
        )
        ds_list = []
        for n, fn in enumerate(fn_list):
            if (n % 100 == 0):
                print('%i of %i done' % (n, len(fn_list)))
            ds_list.append(gdal.Open(fn))

        #Mosaic t_srs
        print("\nParsing t_srs")
        t_srs = warplib.parse_srs(args.t_srs, ds_list)
        print(t_srs.ExportToProj4())
        #Output file names will contain coordinate string
        latlon = False
        if t_srs.IsGeographic():
            latlon = True

        #Mosaic res
        print("\nParsing tr")
        tr = warplib.parse_res(args.tr, ds_list, t_srs=t_srs)
        print(tr)

        #Mosaic extent
        #xmin, ymin, xmax, ymax
        print("Determining t_projwin (bounding box for inputs)")
        t_projwin = warplib.parse_extent(args.t_projwin, ds_list, t_srs=t_srs)
        print(t_projwin)
        #Ensure that our extent is whole integer multiples of the mosaic res
        #This could trim off some fraction of a pixel around margins
        t_projwin = geolib.extent_round(t_projwin, tr)
        mos_xmin, mos_ymin, mos_xmax, mos_ymax = t_projwin

        #Compute extent geom for all input datsets
        print("Computing extent geom for all input datasets")
        input_geom_dict = OrderedDict()
        for n, ds in enumerate(ds_list):
            if (n % 100 == 0):
                print('%i of %i done' % (n, len(ds_list)))
            ds_geom = geolib.ds_geom(ds, t_srs)
            ds_fn = ds.GetFileList()[0]
            #Could use filename as key here
            input_geom_dict[ds_fn] = geolib.geom_dup(ds_geom)
            ds = None

        ds_list = None

        #Mosaic tile size
        #Should have float extent and tile dim here
        ntiles_w = int(math.ceil((mos_xmax - mos_xmin) / tile_width))
        ntiles_h = int(math.ceil((mos_ymax - mos_ymin) / tile_height))
        ntiles = ntiles_w * ntiles_h
        print("%i (%i cols x %i rows) tiles required for full mosaic" %
              (ntiles, ntiles_w, ntiles_h))
        #Use this for zero-padding of tile number
        ntiles_digits = len(str(ntiles))

        print("Computing extent geom for all output tiles")
        tile_dict = OrderedDict()
        for i in range(ntiles_w):
            for j in range(ntiles_h):
                tilenum = j * ntiles_w + i
                tile_xmin = mos_xmin + i * tile_width
                tile_xmax = mos_xmin + (i + 1) * tile_width
                tile_ymax = mos_ymax - j * tile_height
                tile_ymin = mos_ymax - (j + 1) * tile_height
                #Corner coord needed for geom
                x = [tile_xmin, tile_xmax, tile_xmax, tile_xmin, tile_xmin]
                y = [tile_ymax, tile_ymax, tile_ymin, tile_ymin, tile_ymax]
                tile_geom_wkt = 'POLYGON(({0}))'.format(', '.join(
                    ['{0} {1}'.format(*a) for a in zip(x, y)]))
                tile_geom = ogr.CreateGeometryFromWkt(tile_geom_wkt)
                tile_geom.AssignSpatialReference(t_srs)
                #tile_dict[tilenum] = tile_geom
                tile_dict[tilenum] = {}
                tile_dict[tilenum]['geom'] = tile_geom
                tile_dict[tilenum]['extent'] = [
                    tile_xmin, tile_ymin, tile_xmax, tile_ymax
                ]
                #Add center coord tile name
                cx = tile_geom.Centroid().GetX()
                cy = tile_geom.Centroid().GetY()
                #These round down
                #TanDEM-X uses lower left corner as name
                if latlon:
                    tilename = '{:.0f}N'.format(cy) + '{:03.0f}E'.format(cx)
                else:
                    tilename = '{:.0f}'.format(cy) + '_' + '{:.0f}'.format(cx)
                tile_dict[tilenum]['tilename'] = tilename

                #Add additional parameters that can be loaded at a later time without reprocessing all input datasets
                tile_dict[tilenum]['tr'] = tr
                tile_dict[tilenum]['t_srs'] = t_srs.ExportToProj4()
                #This is full extent, but preserve here
                tile_dict[tilenum]['t_projwin'] = t_projwin

        print(
            "Computing valid intersections between input dataset geom and tile geom"
        )
        for tilenum in sorted(tile_dict.keys()):
            print('%i of %i' % (tilenum, len(tile_dict.keys())))
            tile_geom = tile_dict[tilenum]['geom']
            tile_dict_fn = []
            for ds_fn, ds_geom in input_geom_dict.items():
                if tile_geom.Intersects(ds_geom):
                    tile_dict_fn.append(ds_fn)
                    #Write out shp for debugging
                    #geolib.geom2shp(tile_geom, 'tile_%03i.shp' % tilenum)
            if tile_dict_fn:
                tile_dict[tilenum]['fn_list'] = tile_dict_fn

        #This needs to be cleaned up, just create a new tile_dict, don't need list
        out_tile_list = []
        tile_dict_copy = copy.deepcopy(tile_dict)
        for tilenum in tile_dict_copy.keys():
            if 'fn_list' in tile_dict[tilenum]:
                out_tile_list.append(tilenum)
            else:
                del tile_dict[tilenum]

        print("%i valid output tiles" % len(out_tile_list))
        out_tile_list.sort()
        out_tile_list = list(set(out_tile_list))
        out_tile_list_str = ' '.join(map(str, out_tile_list))
        print(out_tile_list_str)

        #Write out dictionary with list of fn for each tile
        print("Writing out tile dictionary")
        with open(out_pickle_fn, 'wb') as f:
            pickle.dump(tile_dict, f)

    delay = 0.001
    outf = open(os.devnull, 'w')
    #outf = open('%s-log-dem_mosaic-tile-%i.log' % (o, tile), 'w')

    #Should run the tiles with the largest file count first, as they will likely take longer
    tile_dict = OrderedDict(
        sorted(tile_dict.items(),
               key=lambda item: len(item[1]['fn_list']),
               reverse=True))
    #Do tiles with smallest file count first
    #tile_dict = OrderedDict(sorted(tile_dict.items(), key=lambda item: len(item[1]['fn_list']), reverse=False))
    out_tile_list = tile_dict.keys()
    #Number of integers to use for tile number
    ni = max([len(str(i)) for i in out_tile_list])

    #If we're on Pleiades, split across multiple nodes
    #Hack with GNU parallel right now
    pbs = False
    import socket
    if 'nasa' in socket.getfqdn():
        pbs = True

    cmd_list = []
    f_cmd = None

    if pbs:
        out_cmd_fn = o + '_cmd.sh'
        if not os.path.exists(out_cmd_fn):
            print("Creating text file of commands")
            f_cmd = open(out_cmd_fn, 'w')

    for n, tile in enumerate(out_tile_list):
        #print('%i of %i tiles: %i' % (n+1, len(out_tile_list), tile))
        tile_fn_base = '%s-tile-%0*i.tif' % (o, ni, tile)
        tile_fn_list_txt = os.path.splitext(tile_fn_base)[0] + '_fn_list.txt'
        #Write out DEM file list for the tile
        with open(tile_fn_list_txt, 'w') as f_fn_list:
            f_fn_list.write('\n'.join(tile_dict[tile]['fn_list']))
        for stat in stat_list:
            tile_fn = os.path.splitext(tile_fn_base)[0] + '-%s.tif' % stat
            dem_mos_threads = 1
            #Use more threads for tiles with many inputs, will take much longer to finish
            #Should do some analysis of totals for all fn_list
            if len(tile_dict[tile]['fn_list']) > 80:
                dem_mos_threads = 4
            dem_mosaic_args = {'fn_list':tile_dict[tile]['fn_list'], 'o':tile_fn, \
                    'fn_list_txt':tile_fn_list_txt, \
                    'tr':tr, 't_srs':t_srs, 't_projwin':tile_dict[tile]['extent'], \
                    'threads':dem_mos_threads, 'stat':stat}
            if not os.path.exists(tile_fn):
                cmd = geolib.get_dem_mosaic_cmd(**dem_mosaic_args)
                #Hack to clean up extra quotes around proj4 string here '""'
                cmd_list.append([s.replace('\"', '') for s in cmd])
            if f_cmd is not None:
                #Write out command to file
                f_cmd.write('%s\n' % ' '.join(str(i) for i in cmd))

    f_cmd = None

    if pbs:
        stripecount = 28
        iolib.setstripe(odir, stripecount)
        #Get number of available devel nodes, submit with
        #$(node_stats.sh | grep -A 4 'devel' | grep Broadwell | awk '{print $NF}')
        pbs_script = os.path.join(
            os.path.split(os.path.realpath(__file__))[0],
            'dem_mosaic_parallel.pbs')
        cmd = ['qsub', '-v', 'cmd_fn=%s' % out_cmd_fn, pbs_script]
        print(' '.join(str(i) for i in cmd))
        subprocess.call(cmd)
        #This is currently the hack to interrupt and wait for pbs to finish, then 'continue' in ipdb
        import ipdb
        ipdb.set_trace()
        #print("qsub -v cmd_fn=%s %s" % (out_cmd_fn, pbs_script))
        #qtop_cmd = ['qtop_deshean.sh', '|', 'grep', 'dem_mos']
        #while qtop_cmd has output
        #Need to wait for job to finish, could get job id, then while qstat
    else:
        with ThreadPoolExecutor(max_workers=threads) as executor:
            print("Running dem_mosaic in parallel with %i threads" % threads)
            for cmd in cmd_list:
                executor.submit(subprocess.call,
                                cmd,
                                stdout=outf,
                                stderr=subprocess.STDOUT)
            time.sleep(delay)

    #Now aggegate into stats
    #Could do this in parallel
    for stat in stat_list:
        tile_fn_list = []
        for n, tile in enumerate(out_tile_list):
            tile_fn_base = '%s-tile-%0*i.tif' % (o, ni, tile)
            tile_fn = os.path.splitext(tile_fn_base)[0] + '-%s.tif' % stat
            if os.path.exists(tile_fn):
                tile_fn_list.append(tile_fn)
        print("\nMosaic type: %s" % stat)
        #Convert dem_mosaic index files to timestamp arrays
        if stat in ['lastindex', 'firstindex', 'medianindex']:
            #Update filenames with ts.tif extension
            tile_fn_list_torun = [
                tile_fn for tile_fn in tile_fn_list if
                not os.path.exists(os.path.splitext(tile_fn)[0] + '_ts.tif')
            ]
            if tile_fn_list_torun:
                print(
                    "Running dem_mosaic_index_ts in parallel with %i threads" %
                    threads)
                from multiprocessing import Pool
                pool = Pool(processes=threads)
                results = pool.map(make_dem_mosaic_index_ts,
                                   tile_fn_list_torun)
                pool.close()
                #results.wait()
            tile_fn_list = [
                os.path.splitext(tile_fn)[0] + '_ts.tif'
                for tile_fn in tile_fn_list
            ]

        print("\nCreating vrt of valid tiles")
        #tile_fn_list = glob.glob(o+'-tile-*.tif')
        vrt_fn = o + '.vrt'
        if stat is not None:
            vrt_fn = os.path.splitext(vrt_fn)[0] + '_%s.vrt' % stat
            if stat in ['lastindex', 'firstindex', 'medianindex']:
                vrt_fn = os.path.splitext(vrt_fn)[0] + '_ts.vrt'
        cmd = ['gdalbuildvrt']
        cmd.extend(['-r', 'cubic'])
        #cmd.append('-tap')
        cmd.append(vrt_fn)
        vrt_fn_list = []
        for tile_fn in tile_fn_list:
            if os.path.exists(tile_fn):
                vrt_fn_list.append(tile_fn)
            else:
                print("Missing file: %s" % tile_fn)
        cmd.extend(sorted(vrt_fn_list))
        print(cmd)
        subprocess.call(cmd)

        #Should create tile index shp/kml from tile_geom

        #This cleans up all of the log txt files (potentially 1000s of files)
        #Want to preserve these, as they contain list of DEMs that went into each tile
        log_fn_list = glob.glob(o + '*%s.tif-log-dem_mosaic-*.txt' % stat)
        print("\nCleaning up %i dem_mosaic log files" % len(log_fn_list))
        if stat is not None:
            tar_fn = o + '_%s_dem_mosaic_log.tar.gz' % stat
        else:
            tar_fn = o + '_dem_mosaic_log.tar.gz'
        with tarfile.open(tar_fn, "w:gz") as tar:
            for log_fn in log_fn_list:
                tar.add(log_fn)
        for log_fn in log_fn_list:
            os.remove(log_fn)

    outf = None