Пример #1
0
def prepare_timeseries(outfile,
                       unw_file,
                       metadata,
                       processor,
                       baseline_dir=None,
                       box=None):
    print('-' * 50)
    print('preparing timeseries file: {}'.format(outfile))

    # copy metadata to meta
    meta = {key: value for key, value in metadata.items()}
    phase2range = float(meta['WAVELENGTH']) / (4. * np.pi)

    # grab date list from the filename
    unw_files = sorted(glob.glob(unw_file))
    date12_list = [os.path.splitext(os.path.basename(i))[0] for i in unw_files]
    num_file = len(unw_files)
    print('number of unwrapped interferograms: {}'.format(num_file))

    ref_date = date12_list[0].split('_')[0]
    date_list = [ref_date] + [date12.split('_')[1] for date12 in date12_list]
    num_date = len(date_list)
    print('number of acquisitions: {}\n{}'.format(num_date, date_list))

    # baseline info
    if baseline_dir is not None:
        # read baseline data
        baseline_dict = isce_utils.read_baseline_timeseries(
            baseline_dir, processor=processor, ref_date=ref_date)
        # dict to array
        pbase = np.zeros(num_date, dtype=np.float32)
        for i in range(num_date):
            pbase_top, pbase_bottom = baseline_dict[date_list[i]]
            pbase[i] = (pbase_top + pbase_bottom) / 2.0

    # size info
    if not box:
        box = (0, 0, int(meta['WIDTH']), int(meta['LENGTH']))
    kwargs = dict(xoff=box[0],
                  yoff=box[1],
                  win_xsize=box[2] - box[0],
                  win_ysize=box[3] - box[1])

    # define dataset structure
    dates = np.array(date_list, dtype=np.string_)
    ds_name_dict = {
        "date": [dates.dtype, (num_date, ), dates],
        "bperp": [np.float32, (num_date, ), pbase],
        "timeseries":
        [np.float32, (num_date, box[3] - box[1], box[2] - box[0]), None],
    }

    # initiate HDF5 file
    meta["FILE_TYPE"] = "timeseries"
    meta["UNIT"] = "m"
    meta['REF_DATE'] = ref_date
    writefile.layout_hdf5(outfile, ds_name_dict, metadata=meta)

    # writing data to HDF5 file
    print('writing data to HDF5 file {} with a mode ...'.format(outfile))
    with h5py.File(outfile, "a") as f:
        prog_bar = ptime.progressBar(maxValue=num_file)
        for i, unw_file in enumerate(unw_files):
            # read data using gdal
            ds = gdal.Open(unw_file, gdal.GA_ReadOnly)
            data = np.array(ds.GetRasterBand(2).ReadAsArray(**kwargs),
                            dtype=np.float32)

            f["timeseries"][i + 1] = data * phase2range
            prog_bar.update(i + 1, suffix=date12_list[i])
        prog_bar.close()

        print('set value at the first acquisition to ZERO.')
        f["timeseries"][0] = 0.

    print('finished writing to HDF5 file: {}'.format(outfile))
    return outfile
Пример #2
0
def main(iargs=None):
    inps = cmd_line_parse(iargs)
    if inps.updateMode:
        print('update mode: ON')
    else:
        print('update mode: OFF')

    # extract metadata
    meta = extract_metadata(inps.unwFile)
    box, meta = read_subset_box(inps.template_file, meta)

    length = int(meta["LENGTH"])
    width = int(meta["WIDTH"])
    num_pair = meta["NUMBER_OF_PAIRS"]

    # prepare output directory
    out_dir = os.path.dirname(inps.outfile[0])
    os.makedirs(out_dir, exist_ok=True)

    ########## output file 1 - ifgramStack
    # define dataset structure for ifgramStack
    dsNameDict = {
        "date": (np.dtype('S8'), (num_pair, 2)),
        "dropIfgram": (np.bool_, (num_pair, )),
        "bperp": (np.float32, (num_pair, )),
        "unwrapPhase": (np.float32, (num_pair, length, width)),
        "coherence": (np.float32, (num_pair, length, width)),
        "connectComponent": (np.int16, (num_pair, length, width)),
    }

    if run_or_skip(inps, dsNameDict, out_file=inps.outfile[0]) == 'run':
        # initiate h5 file with defined structure
        meta['FILE_TYPE'] = 'ifgramStack'
        writefile.layout_hdf5(inps.outfile[0],
                              dsNameDict,
                              meta,
                              compression=inps.compression)

        # write data to h5 file in disk
        write_ifgram_stack(inps.outfile[0],
                           inps.unwFile,
                           inps.corFile,
                           inps.connCompFile,
                           box=box)

    ########## output file 2 - geometryGeo
    # define dataset structure for geometry
    dsNameDict = {
        "height": (np.float32, (length, width)),
        "incidenceAngle": (np.float32, (length, width)),
        "slantRangeDistance": (np.float32, (length, width)),
    }
    if inps.azAngleFile is not None:
        dsNameDict["azimuthAngle"] = (np.float32, (length, width))
    if inps.waterMaskFile is not None:
        dsNameDict["waterMask"] = (np.bool_, (length, width))

    if run_or_skip(inps, dsNameDict, out_file=inps.outfile[1]) == 'run':
        # initiate h5 file with defined structure
        meta['FILE_TYPE'] = 'geometry'
        writefile.layout_hdf5(inps.outfile[1],
                              dsNameDict,
                              meta,
                              compression=inps.compression)

        # write data to disk
        write_geometry(inps.outfile[1],
                       demFile=inps.demFile,
                       incAngleFile=inps.incAngleFile,
                       azAngleFile=inps.azAngleFile,
                       waterMaskFile=inps.waterMaskFile,
                       box=box)

    print('-' * 50)
    return inps.outfile
Пример #3
0
def ifgram_inversion(inps=None):
    """Phase triangulatino of small baseline interferograms

    Parameters: inps - namespace
    Example:    inps = cmd_line_parse()
                ifgram_inversion(inps)
    """

    if not inps:
        inps = cmd_line_parse()
    start_time = time.time()

    ## 1. input info

    stack_obj = ifgramStack(inps.ifgramStackFile)
    stack_obj.open(print_msg=False)
    date12_list = stack_obj.get_date12_list(dropIfgram=True)
    date_list = stack_obj.get_date_list(dropIfgram=True)
    length, width = stack_obj.length, stack_obj.width

    # 1.1 read values on the reference pixel
    inps.refPhase = stack_obj.get_reference_phase(
        unwDatasetName=inps.obsDatasetName,
        skip_reference=inps.skip_ref,
        dropIfgram=True)

    # 1.2 design matrix
    A = stack_obj.get_design_matrix4timeseries(date12_list)[0]
    num_ifgram, num_date = A.shape[0], A.shape[1] + 1
    inps.numIfgram = num_ifgram

    # 1.3 print key setup info
    msg = '-------------------------------------------------------------------------------\n'
    if inps.minNormVelocity:
        suffix = 'deformation velocity'
    else:
        suffix = 'deformation phase'
    msg += 'least-squares solution with L2 min-norm on: {}\n'.format(suffix)
    msg += 'minimum redundancy: {}\n'.format(inps.minRedundancy)
    msg += 'weight function: {}\n'.format(inps.weightFunc)

    if inps.maskDataset:
        if inps.maskDataset in ['coherence', 'offsetSNR']:
            suffix = '{} < {}'.format(inps.maskDataset, inps.maskThreshold)
        else:
            suffix = '{} == 0'.format(inps.maskDataset)
        msg += 'mask out pixels with: {}\n'.format(suffix)
    else:
        msg += 'mask: no\n'

    if np.linalg.matrix_rank(A) < A.shape[1]:
        msg += '***WARNING: the network is NOT fully connected.\n'
        msg += '\tInversion result can be biased!\n'
        msg += '\tContinue to use SVD to resolve the offset between different subsets.\n'
    msg += '-------------------------------------------------------------------------------'
    print(msg)

    print('number of interferograms: {}'.format(num_ifgram))
    print('number of acquisitions  : {}'.format(num_date))
    print('number of lines   : {}'.format(length))
    print('number of columns : {}'.format(width))

    ## 2. prepare output

    # 2.1 metadata
    meta = dict(stack_obj.metadata)
    for key in configKeys:
        meta[key_prefix + key] = str(vars(inps)[key])

    # 2.2 instantiate time-series
    dsNameDict = {
        "date": (np.dtype('S8'), (num_date, )),
        "bperp": (np.float32, (num_date, )),
        "timeseries": (np.float32, (num_date, length, width)),
    }

    meta['FILE_TYPE'] = 'timeseries'
    meta['UNIT'] = 'm'
    meta['REF_DATE'] = date_list[0]

    ts_obj = timeseries(inps.tsFile)
    ts_obj.layout_hdf5(dsNameDict, meta)

    # write date time-series
    date_list_utf8 = [dt.encode('utf-8') for dt in date_list]
    writefile.write_hdf5_block(inps.tsFile, date_list_utf8, datasetName='date')

    # write bperp time-series
    pbase = stack_obj.get_perp_baseline_timeseries(dropIfgram=True)
    writefile.write_hdf5_block(inps.tsFile, pbase, datasetName='bperp')

    # 2.3 instantiate temporal coherence
    dsNameDict = {"temporalCoherence": (np.float32, (length, width))}
    meta['FILE_TYPE'] = 'temporalCoherence'
    meta['UNIT'] = '1'
    meta.pop('REF_DATE')
    writefile.layout_hdf5(inps.tempCohFile, dsNameDict, metadata=meta)

    # 2.4 instantiate number of inverted observations
    dsNameDict = {"mask": (np.float32, (length, width))}
    meta['FILE_TYPE'] = 'mask'
    meta['UNIT'] = '1'
    writefile.layout_hdf5(inps.numInvFile, dsNameDict, metadata=meta)

    ## 3. run the inversion / estimation and write to disk

    # 3.1 split ifgram_file into blocks to save memory
    box_list, num_box = split2boxes(inps.ifgramStackFile,
                                    memory_size=inps.memorySize)

    # 3.2 prepare the input arguments for *_patch()
    data_kwargs = {
        "ifgram_file": inps.ifgramStackFile,
        "ref_phase": inps.refPhase,
        "obs_ds_name": inps.obsDatasetName,
        "weight_func": inps.weightFunc,
        "min_norm_velocity": inps.minNormVelocity,
        "water_mask_file": inps.waterMaskFile,
        "mask_ds_name": inps.maskDataset,
        "mask_threshold": inps.maskThreshold,
        "min_redundancy": inps.minRedundancy
    }

    # 3.3 invert / write block-by-block
    for i, box in enumerate(box_list):
        box_width = box[2] - box[0]
        box_length = box[3] - box[1]
        if num_box > 1:
            print('\n------- processing patch {} out of {} --------------'.
                  format(i + 1, num_box))
            print('box width:  {}'.format(box_width))
            print('box length: {}'.format(box_length))

        # update box argument in the input data
        data_kwargs['box'] = box

        if inps.cluster == 'no':
            # non-parallel
            ts, temp_coh, num_inv_ifg = ifgram_inversion_patch(
                **data_kwargs)[:-1]

        else:
            # parallel
            print('\n\n------- start parallel processing using Dask -------')

            # initiate the output data
            ts = np.zeros((num_date, box_length, box_width), np.float32)
            temp_coh = np.zeros((box_length, box_width), np.float32)
            num_inv_ifg = np.zeros((box_length, box_width), np.float32)

            # initiate dask cluster and client
            cluster_obj = cluster.DaskCluster(inps.cluster,
                                              inps.numWorker,
                                              config_name=inps.config)
            cluster_obj.open()

            # run dask
            ts, temp_coh, num_inv_ifg = cluster_obj.run(
                func=ifgram_inversion_patch,
                func_data=data_kwargs,
                results=[ts, temp_coh, num_inv_ifg])

            # close dask cluster and client
            cluster_obj.close()

            print('------- finished parallel processing -------\n\n')

        # write the block to disk
        # with 3D block in [z0, z1, y0, y1, x0, x1]
        # and  2D block in         [y0, y1, x0, x1]
        # time-series - 3D
        block = [0, num_date, box[1], box[3], box[0], box[2]]
        writefile.write_hdf5_block(inps.tsFile,
                                   data=ts,
                                   datasetName='timeseries',
                                   block=block)

        # temporal coherence - 2D
        block = [box[1], box[3], box[0], box[2]]
        writefile.write_hdf5_block(inps.tempCohFile,
                                   data=temp_coh,
                                   datasetName='temporalCoherence',
                                   block=block)

        # number of inverted obs - 2D
        writefile.write_hdf5_block(inps.numInvFile,
                                   data=num_inv_ifg,
                                   datasetName='mask',
                                   block=block)

        m, s = divmod(time.time() - start_time, 60)
        print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s))

    # 3.4 update output data on the reference pixel
    if not inps.skip_ref:
        # grab ref_y/x
        ref_y = int(stack_obj.metadata['REF_Y'])
        ref_x = int(stack_obj.metadata['REF_X'])
        print('-' * 50)
        print('update values on the reference pixel: ({}, {})'.format(
            ref_y, ref_x))

        print('set temporal coherence on the reference pixel to 1.')
        with h5py.File(inps.tempCohFile, 'r+') as f:
            f['temporalCoherence'][ref_y, ref_x] = 1.

        print('set  # of observations on the reference pixel as {}'.format(
            num_ifgram))
        with h5py.File(inps.numInvFile, 'r+') as f:
            f['mask'][ref_y, ref_x] = num_ifgram

    m, s = divmod(time.time() - start_time, 60)
    print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s))
    return
def main(iargs=None):
    """
        Overwrite filtered SLC images in Isce merged/SLC directory.
    """

    Parser = MinoPyParser(iargs, script='generate_temporal_coherence')
    inps = Parser.parse()

    dateStr = datetime.datetime.strftime(datetime.datetime.now(),
                                         '%Y%m%d:%H%M%S')

    if not iargs is None:
        msg = os.path.basename(__file__) + ' ' + ' '.join(iargs[:])
        string = dateStr + " * " + msg
        print(string)
    else:
        msg = os.path.basename(__file__) + ' ' + ' '.join(sys.argv[1::])
        string = dateStr + " * " + msg
        print(string)

    start_time = time.time()
    os.chdir(inps.work_dir)

    minopy_dir = os.path.dirname(inps.work_dir)
    minopy_template_file = os.path.join(minopy_dir, 'minopyApp.cfg')
    inps.ifgramStackFile = os.path.join(inps.work_dir, 'inputs/ifgramStack.h5')

    template = readfile.read_template(minopy_template_file)
    if template['minopy.timeseries.tempCohType'] == 'auto':
        template['minopy.timeseries.tempCohType'] = 'full'

    atr = {}
    atr['minopy.timeseries.tempCohType'] = template[
        'minopy.timeseries.tempCohType']
    ut.add_attribute(inps.ifgramStackFile, atr)

    # check if input observation dataset exists.
    stack_obj = ifgramStack(inps.ifgramStackFile)
    stack_obj.open(print_msg=False)
    metadata = stack_obj.get_metadata()
    length, width = stack_obj.length, stack_obj.width

    inps.invQualityFile = 'temporalCoherence.h5'
    mintpy_mask_file = os.path.join(inps.work_dir, 'maskTempCoh.h5')

    quality_name = os.path.join(
        minopy_dir, 'inverted/tempCoh_{}'.format(
            template['minopy.timeseries.tempCohType']))
    quality = np.memmap(quality_name,
                        mode='r',
                        dtype='float32',
                        shape=(length, width))

    # inps.waterMaskFile = os.path.join(minopy_dir, 'waterMask.h5')
    inps.waterMaskFile = None
    water_mask = np.ones(quality.shape, dtype=np.int8)

    if template['minopy.timeseries.waterMask'] != 'auto':
        inps.waterMaskFile = template['minopy.timeseries.waterMask']
        if os.path.exists(inps.waterMaskFile):
            with h5py.File(inps.waterMaskFile, 'r') as f2:
                if 'waterMask' in f2:
                    water_mask = f2['waterMask'][:, :]
                else:
                    water_mask = f2['mask'][:, :]

    if inps.shadow_mask:
        if os.path.exists(os.path.join(minopy_dir, 'shadow_mask.h5')):
            with h5py.File(os.path.join(minopy_dir, 'shadow_mask.h5'),
                           'r') as f2:
                shadow_mask = f2['mask'][:, :]
                water_mask = water_mask * shadow_mask

    inv_quality = np.zeros((quality.shape[0], quality.shape[1]))
    inv_quality_name = 'temporalCoherence'
    inv_quality[:, :] = quality[:, :]
    inv_quality[inv_quality <= 0] = np.nan
    inv_quality[water_mask < 0.5] = np.nan
    if os.path.exists(mintpy_mask_file):
        mintpy_mask = readfile.read(mintpy_mask_file, datasetName='mask')[0]
        inv_quality[mintpy_mask == 0] = np.nan

    if not os.path.exists(inps.invQualityFile):
        metadata['UNIT'] = '1'
        metadata['FILE_TYPE'] = inv_quality_name
        if 'REF_DATE' in metadata:
            metadata.pop('REF_DATE')
        ds_name_dict = {metadata['FILE_TYPE']: [np.float32, (length, width)]}
        writefile.layout_hdf5(inps.invQualityFile,
                              ds_name_dict,
                              metadata=metadata)

    # write the block to disk
    # with 3D block in [z0, z1, y0, y1, x0, x1]
    # and  2D block in         [y0, y1, x0, x1]
    block = [0, length, 0, width]
    writefile.write_hdf5_block(inps.invQualityFile,
                               data=inv_quality,
                               datasetName=inv_quality_name,
                               block=block)

    get_phase_linking_coherence_mask(metadata, inps.work_dir)

    m, s = divmod(time.time() - start_time, 60)
    print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s))

    return
Пример #5
0
def run_geocode(inps):
    """geocode all input files"""
    start_time = time.time()

    # feed the largest file for resample object initiation
    ind_max = np.argmax([os.path.getsize(i) for i in inps.file])

    # prepare geometry for geocoding
    res_obj = resample(lut_file=inps.lookupFile,
                       src_file=inps.file[ind_max],
                       SNWE=inps.SNWE,
                       lalo_step=inps.laloStep,
                       interp_method=inps.interpMethod,
                       fill_value=inps.fillValue,
                       nprocs=inps.nprocs,
                       max_memory=inps.maxMemory,
                       software=inps.software,
                       print_msg=True)
    res_obj.open()
    res_obj.prepare()

    # resample input files one by one
    for infile in inps.file:
        print('-' * 50 + '\nresampling file: {}'.format(infile))
        ext = os.path.splitext(infile)[1]
        atr = readfile.read_attribute(infile, datasetName=inps.dset)
        outfile = auto_output_filename(infile, inps)

        # update_mode
        if inps.updateMode:
            print('update mode: ON')
            if ut.run_or_skip(outfile, in_file=[infile,
                                                inps.lookupFile]) == 'skip':
                continue

        ## prepare output
        # update metadata
        if inps.radar2geo:
            atr = attr.update_attribute4radar2geo(atr, res_obj=res_obj)
        else:
            atr = attr.update_attribute4geo2radar(atr, res_obj=res_obj)

        # instantiate output file
        file_is_hdf5 = os.path.splitext(infile)[1] in ['.h5', '.he5']
        if file_is_hdf5:
            writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile)
        else:
            dsDict = dict()

        ## run
        dsNames = readfile.get_dataset_list(infile, datasetName=inps.dset)
        maxDigit = max([len(i) for i in dsNames])
        for dsName in dsNames:

            if not file_is_hdf5:
                dsDict[dsName] = np.zeros((res_obj.length, res_obj.width))

            # loop for block-by-block IO
            for i in range(res_obj.num_box):
                src_box = res_obj.src_box_list[i]
                dest_box = res_obj.dest_box_list[i]

                # read
                print('-' * 50 +
                      '\nreading {d:<{w}} in block {b} from {f} ...'.format(
                          d=dsName,
                          w=maxDigit,
                          b=src_box,
                          f=os.path.basename(infile)))

                data = readfile.read(infile,
                                     datasetName=dsName,
                                     box=src_box,
                                     print_msg=False)[0]

                # resample
                data = res_obj.run_resample(src_data=data, box_ind=i)

                # write / save block data
                if data.ndim == 3:
                    block = [
                        0, data.shape[0], dest_box[1], dest_box[3],
                        dest_box[0], dest_box[2]
                    ]
                else:
                    block = [
                        dest_box[1], dest_box[3], dest_box[0], dest_box[2]
                    ]

                if file_is_hdf5:
                    print('write data in block {} to file: {}'.format(
                        block, outfile))
                    writefile.write_hdf5_block(outfile,
                                               data=data,
                                               datasetName=dsName,
                                               block=block,
                                               print_msg=False)
                else:
                    dsDict[dsName][block[0]:block[1], block[2]:block[3]] = data

            # for binary file: ensure same data type
            if not file_is_hdf5:
                dsDict[dsName] = np.array(dsDict[dsName], dtype=data.dtype)

        # write binary file
        if not file_is_hdf5:
            writefile.write(dsDict,
                            out_file=outfile,
                            metadata=atr,
                            ref_file=infile)

    m, s = divmod(time.time() - start_time, 60)
    print('time used: {:02.0f} mins {:02.1f} secs.\n'.format(m, s))
    return outfile
Пример #6
0
def prepare_timeseries(outfile, unw_file, metadata, processor, baseline_dir=None):
    print('-'*50)
    print('preparing timeseries file: {}'.format(outfile))

    # copy metadata to meta
    meta = {key : value for key, value in metadata.items()}
    phase2range = -1. * float(meta['WAVELENGTH']) / (4. * np.pi)

    # grab date list from the filename
    unw_files = sorted(glob.glob(unw_file))
    date12_list = [os.path.splitext(os.path.basename(i))[0] for i in unw_files]
    num_file = len(unw_files)
    print('number of unwrapped interferograms: {}'.format(num_file))

    ref_date = date12_list[0].split('_')[0]
    date_list = [ref_date] + [date12.split('_')[1] for date12 in date12_list]
    num_date = len(date_list)
    print('number of acquisitions: {}\n{}'.format(num_date, date_list))

    # define dataset structure
    length, width = int(meta['LENGTH']), int(meta['WIDTH'])
    dsNameDict = {
        "date"       : (np.dtype("S8"), (num_date,)),
        "timeseries" : (np.float32,     (num_date, length, width))
    }

    # baseline info
    baseline_dict = {}
    if baseline_dir is not None:
        # read baseline data
        baseline_dict = isce_utils.read_baseline_timeseries(baseline_dir,
                                                            processor=processor,
                                                            ref_date=ref_date)
        # dict to array
        pbase = np.zeros(num_date, dtype=np.float32)
        for i in range(num_date):
            pbase_top, pbase_bottom = baseline_dict[date_list[i]]
            pbase[i] = (pbase_top + pbase_bottom) / 2.0

        # update dataset structure
        dsNameDict["bperp"] = (np.float32, (num_date,))

    # initiate HDF5 file
    meta["FILE_TYPE"] = "timeseries"
    meta["UNIT"] = "m"
    meta['REF_DATE'] = ref_date
    writefile.layout_hdf5(outfile, dsNameDict, meta)

    # writing data to HDF5 file
    print('writing data to HDF5 file {} with a mode ...'.format(outfile))
    with h5py.File(outfile, "a") as f:
        f["date"][:,] = np.array([np.string_(i) for i in date_list])
        f["bperp"][:,] = pbase

        prog_bar = ptime.progressBar(maxValue=num_file)
        for i in range(num_file):
            # read data using gdal
            ds = gdal.Open(unw_files[i], gdal.GA_ReadOnly)
            data = np.array(ds.GetRasterBand(2).ReadAsArray())

            f["timeseries"][i+1] = data * phase2range
            prog_bar.update(i+1, suffix=date12_list[i])
        prog_bar.close()

        print('set value at the first acquisition to ZERO.')
        f["timeseries"][0] = 0.

    print('finished writing to HDF5 file: {}'.format(outfile))
    return outfile
Пример #7
0
def main(iargs=None):
    inps = cmd_line_parse(iargs)
    start_time = time.time()

    if inps.updateMode:
        print('update mode: ON')
    else:
        print('update mode: OFF')

    # extract metadata
    meta = extract_metadata(inps.unwFile)
    box, meta = read_subset_box(inps.template_file, meta)
    if inps.xstep * inps.ystep > 1:
        meta = attr.update_attribute4multilook(meta,
                                               lks_y=inps.ystep,
                                               lks_x=inps.xstep)

    length = int(meta["LENGTH"])
    width = int(meta["WIDTH"])
    num_pair = int(meta["NUMBER_OF_PAIRS"])

    # prepare output directory
    out_dir = os.path.dirname(inps.outfile[0])
    os.makedirs(out_dir, exist_ok=True)

    ########## output file 1 - ifgramStack
    # define dataset structure for ifgramStack
    dsNameDict = {
        "date": (np.dtype('S8'), (num_pair, 2)),
        "dropIfgram": (np.bool_, (num_pair, )),
        "bperp": (np.float32, (num_pair, )),
        "unwrapPhase": (np.float32, (num_pair, length, width)),
        "coherence": (np.float32, (num_pair, length, width)),
        "connectComponent": (np.int16, (num_pair, length, width)),
    }
    if inps.magFile is not None:
        dsNameDict['magnitude'] = (np.float32, (num_pair, length, width))

    if run_or_skip(inps, dsNameDict, out_file=inps.outfile[0]) == 'run':
        # initiate h5 file with defined structure
        meta['FILE_TYPE'] = 'ifgramStack'
        writefile.layout_hdf5(inps.outfile[0],
                              dsNameDict,
                              meta,
                              compression=inps.compression)

        # write data to h5 file in disk
        write_ifgram_stack(inps.outfile[0],
                           unwStack=inps.unwFile,
                           cohStack=inps.corFile,
                           connCompStack=inps.connCompFile,
                           ampStack=inps.magFile,
                           box=box,
                           xstep=inps.xstep,
                           ystep=inps.ystep)

    ########## output file 2 - geometryGeo
    # define dataset structure for geometry
    dsNameDict = {
        "height": (np.float32, (length, width)),
        "incidenceAngle": (np.float32, (length, width)),
        "slantRangeDistance": (np.float32, (length, width)),
    }
    if inps.azAngleFile is not None:
        dsNameDict["azimuthAngle"] = (np.float32, (length, width))
    if inps.waterMaskFile is not None:
        dsNameDict["waterMask"] = (np.bool_, (length, width))

    if run_or_skip(inps, dsNameDict, out_file=inps.outfile[1]) == 'run':
        # initiate h5 file with defined structure
        meta['FILE_TYPE'] = 'geometry'
        writefile.layout_hdf5(inps.outfile[1],
                              dsNameDict,
                              meta,
                              compression=inps.compression)

        # write data to disk
        write_geometry(inps.outfile[1],
                       demFile=inps.demFile,
                       incAngleFile=inps.incAngleFile,
                       azAngleFile=inps.azAngleFile,
                       waterMaskFile=inps.waterMaskFile,
                       box=box,
                       xstep=inps.xstep,
                       ystep=inps.ystep)

    print('-' * 50)

    # time info
    m, s = divmod(time.time() - start_time, 60)
    print('time used: {:02.0f} mins {:02.1f} secs.'.format(m, s))

    return inps.outfile
Пример #8
0
def calculate_delay_timeseries(tropo_file, dis_file, geom_file, GACOS_dir):
    """calculate delay time-series and write to HDF5 file"""

    ## get list of dates
    atr = readfile.read_attribute(dis_file)
    ftype = atr['FILE_TYPE']
    if ftype == 'timeseries':
        date_list = timeseries(dis_file).get_date_list()

    elif ftype == '.unw':
        date12 = readfile.read_attribute(dis_file)['DATE12']
        date_list = ptime.yyyymmdd(date12.split('-'))

    else:
        raise ValueError(
            'un-supported displacement file type: {}'.format(ftype))

    # list of dates --> list of ztd files
    ztd_files = [
        os.path.join(GACOS_dir, '{}.ztd'.format(i)) for i in date_list
    ]

    # check missing ztd files
    flag = np.ones(len(date_list), dtype=np.bool_)
    for i in range(len(date_list)):
        if not os.path.isfile(ztd_files[i]):
            print('WARNING: {} file not found, ignore it and continue'.format(
                ztd_files[i]))
            flag[i] = False

    if np.any(flag == 0):
        date_list = np.array(date_list)[flag].tolist()
        ztd_files = np.array(ztd_files)[flag].tolist()

    ## update_mode
    def get_dataset_size(fname):
        atr = readfile.read_attribute(fname)
        return (atr['LENGTH'], atr['WIDTH'])

    def run_or_skip(ztd_files, tropo_file, geom_file):
        print('update mode: ON')
        print('output file: {}'.format(tropo_file))
        flag = 'skip'

        # check existance and modification time
        if ut.run_or_skip(out_file=tropo_file,
                          in_file=ztd_files,
                          print_msg=False) == 'run':
            flag = 'run'
            print(
                '1) output file either do NOT exist or is NOT newer than all ZTD files.'
            )

        else:
            print('1) output file exists and is newer than all ZTD files.')

            # check dataset size in space / time
            date_list = [str(re.findall('\d{8}', i)[0]) for i in ztd_files]
            if (get_dataset_size(tropo_file) != get_dataset_size(geom_file)
                    or any(i not in timeseries(tropo_file).get_date_list()
                           for i in date_list)):
                flag = 'run'
                print(
                    '2) output file does NOT have the same len/wid as the geometry file {} or does NOT contain all dates'
                    .format(geom_file))
            else:
                print(
                    '2) output file has the same len/wid as the geometry file and contains all dates'
                )

                # check if output file is fully written
                with h5py.File(tropo_file, 'r') as f:
                    if np.all(f['timeseries'][-1, :, :] == 0):
                        flag = 'run'
                        print('3) output file is NOT fully written.')
                    else:
                        print('3) output file is fully written.')

        # result
        print('run or skip: {}'.format(flag))
        return flag

    if run_or_skip(ztd_files, tropo_file, geom_file) == 'skip':
        return

    ## prepare output file

    # metadata
    atr['FILE_TYPE'] = 'timeseries'
    atr['UNIT'] = 'm'

    # remove metadata related with double reference
    # because absolute delay is calculated and saved
    for key in ['REF_DATE', 'REF_X', 'REF_Y', 'REF_LAT', 'REF_LON']:
        if key in atr.keys():
            atr.pop(key)

    # instantiate time-series
    length, width = int(atr['LENGTH']), int(atr['WIDTH'])
    num_date = len(date_list)
    dates = np.array(date_list, dtype=np.string_)
    ds_name_dict = {
        "date": [dates.dtype, (num_date, ), dates],
        "timeseries": [np.float32, (num_date, length, width), None],
    }
    writefile.layout_hdf5(tropo_file, ds_name_dict, metadata=atr)

    ## calculate phase delay

    # read geometry
    print('read incidenceAngle from file: {}'.format(geom_file))
    inc_angle = readfile.read(geom_file, datasetName='incidenceAngle')[0]
    cos_inc_angle = np.cos(inc_angle * np.pi / 180.0)

    if 'Y_FIRST' in atr.keys():
        pts_new = None

    else:
        # pixel coordinates in geometry file
        print('get pixel coordinates in geometry file')
        lats, lons = ut.get_lat_lon(atr, geom_file)
        pts_new = np.hstack((lats.reshape(-1, 1), lons.reshape(-1, 1)))

    # loop for date-by-date IO
    prog_bar = ptime.progressBar(maxValue=num_date)
    for i in range(num_date):
        date_str = date_list[i]
        ztd_file = ztd_files[i]

        # calc delay
        if 'Y_FIRST' in atr.keys():
            delay = get_delay_geo(ztd_file, atr, cos_inc_angle)

        else:
            delay = get_delay_radar(ztd_file, cos_inc_angle, pts_new)

        # write delay to file
        block = [i, i + 1, 0, length, 0, width]
        writefile.write_hdf5_block(tropo_file,
                                   data=delay,
                                   datasetName='timeseries',
                                   block=block,
                                   print_msg=False)

        prog_bar.update(i + 1, suffix=os.path.basename(ztd_file))
    prog_bar.close()

    return tropo_file
Пример #9
0
def multilook_file(infile,
                   lks_y,
                   lks_x,
                   outfile=None,
                   method='average',
                   margin=[0, 0, 0, 0]):
    """ Multilook input file
    Parameters: infile - str, path of input file to be multilooked.
                lks_y  - int, number of looks in y / row direction.
                lks_x  - int, number of looks in x / column direction.
                margin - list of 4 int, number of pixels to be skipped during multilooking.
                         useful for offset product, where the marginal pixels are ignored during
                         cross correlation matching.
                outfile - str, path of output file
    Returns:    outfile - str, path of output file
    """
    lks_y = int(lks_y)
    lks_x = int(lks_x)

    # input file info
    atr = readfile.read_attribute(infile)
    length, width = int(atr['LENGTH']), int(atr['WIDTH'])
    k = atr['FILE_TYPE']
    print('multilooking {} {} file: {}'.format(atr['PROCESSOR'], k, infile))
    print('number of looks in y / azimuth direction: %d' % lks_y)
    print('number of looks in x / range   direction: %d' % lks_x)
    print('multilook method: {}'.format(method))

    # margin --> box
    if margin is not [0, 0, 0, 0]:  # top, bottom, left, right
        box = (margin[2], margin[0], width - margin[3], length - margin[1])
        print(
            'number of pixels to skip in top/bottom/left/right boundaries: {}'.
            format(margin))
    else:
        box = (0, 0, width, length)

    # output file name
    ext = os.path.splitext(infile)[1]
    if not outfile:
        if os.getcwd() == os.path.dirname(os.path.abspath(infile)):
            outfile = os.path.splitext(infile)[0] + '_' + str(
                lks_y) + 'alks_' + str(lks_x) + 'rlks' + ext
        else:
            outfile = os.path.basename(infile)

    # update metadata
    atr = multilook_attribute(atr, lks_y, lks_x, box=box)

    if ext in ['.h5', '.he5']:
        writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile)

    # read source data and multilooking
    dsNames = readfile.get_dataset_list(infile)
    maxDigit = max([len(i) for i in dsNames])
    dsDict = dict()
    for dsName in dsNames:
        print('multilooking {d:<{w}} from {f} ...'.format(
            d=dsName, w=maxDigit, f=os.path.basename(infile)))

        # split in Y/row direction for IO for HDF5 only
        if ext in ['.h5', '.he5']:
            row_step = 200
        else:
            row_step = box[3] - box[1]

        num_step = int(np.ceil((box[3] - box[1]) / (row_step * lks_y)))
        for i in range(num_step):
            r0 = box[1] + row_step * lks_y * i
            r1 = box[1] + row_step * lks_y * (i + 1)
            r1 = min(r1, box[3])
            # IO box
            box_i = (box[0], r0, box[2], r1)
            box_o = (int((box[0] - box[0]) / lks_x), int(
                (r0 - box[1]) / lks_y), int(
                    (box[2] - box[0]) / lks_x), int((r1 - box[1]) / lks_y))
            print('box: {}'.format(box_o))

            # read / multilook
            if method == 'nearest':
                data = readfile.read(infile,
                                     datasetName=dsName,
                                     box=box_i,
                                     xstep=lks_x,
                                     ystep=lks_y,
                                     print_msg=False)[0]

                # fix the size discrepency between average / nearest method
                out_len = box_o[3] - box_o[1]
                out_wid = box_o[2] - box_o[0]
                if data.ndim == 3:
                    data = data[:, :out_len, :out_wid]
                else:
                    data = data[:out_len, :out_wid]

            else:
                data = readfile.read(infile,
                                     datasetName=dsName,
                                     box=box_i,
                                     print_msg=False)[0]

                # keep timeseries data as 3D matrix when there is only one acquisition
                # because readfile.read() will squeeze it to 2D
                if atr['FILE_TYPE'] == 'timeseries' and len(data.shape) == 2:
                    data = np.reshape(data, (1, data.shape[0], data.shape[1]))

                data = multilook_data(data, lks_y, lks_x)

            # output block
            if data.ndim == 3:
                block = [
                    0, data.shape[0], box_o[1], box_o[3], box_o[0], box_o[2]
                ]
            else:
                block = [box_o[1], box_o[3], box_o[0], box_o[2]]

            # write
            if ext in ['.h5', '.he5']:
                writefile.write_hdf5_block(outfile,
                                           data=data,
                                           datasetName=dsName,
                                           block=block,
                                           print_msg=False)
            else:
                dsDict[dsName] = data

    # for binary file with 2 bands, always use BIL scheme
    if (len(dsDict.keys()) == 2
            and os.path.splitext(infile)[1] not in ['.h5', '.he5']
            and atr.get('scheme', 'BIL').upper() != 'BIL'):
        print('the input binary file has 2 bands with band interleave as: {}'.
              format(atr['scheme']))
        print(
            'for the output binary file, change the band interleave to BIL as default.'
        )
        atr['scheme'] = 'BIL'

    if ext not in ['.h5', '.he5']:
        writefile.write(dsDict,
                        out_file=outfile,
                        metadata=atr,
                        ref_file=infile)
    return outfile
Пример #10
0
def calc_delay_timeseries(inps):
    """Calculate delay time-series and write it to HDF5 file.
    Parameters: inps : namespace, all input parameters
    Returns:    tropo_file : str, file name of ECMWF.h5
    """
    def get_dataset_size(fname):
        atr = readfile.read_attribute(fname)
        shape = (int(atr['LENGTH']), int(atr['WIDTH']))
        return shape

    def run_or_skip(grib_files, tropo_file, geom_file):
        print('update mode: ON')
        print('output file: {}'.format(tropo_file))
        flag = 'skip'

        # check existance and modification time
        if ut.run_or_skip(out_file=tropo_file,
                          in_file=grib_files,
                          print_msg=False) == 'run':
            flag = 'run'
            print(
                '1) output file either do NOT exist or is NOT newer than all GRIB files.'
            )

        else:
            print('1) output file exists and is newer than all GRIB files.')

            # check dataset size in space / time
            date_list = [
                str(re.findall('\d{8}', os.path.basename(i))[0])
                for i in grib_files
            ]
            if (get_dataset_size(tropo_file) != get_dataset_size(geom_file)
                    or any(i not in timeseries(tropo_file).get_date_list()
                           for i in date_list)):
                flag = 'run'
                print(
                    '2) output file does NOT have the same len/wid as the geometry file {} or does NOT contain all dates'
                    .format(geom_file))
            else:
                print(
                    '2) output file has the same len/wid as the geometry file and contains all dates'
                )

                # check if output file is fully written
                with h5py.File(tropo_file, 'r') as f:
                    if np.all(f['timeseries'][-1, :, :] == 0):
                        flag = 'run'
                        print('3) output file is NOT fully written.')
                    else:
                        print('3) output file is fully written.')

        # result
        print('run or skip: {}'.format(flag))
        return flag

    if run_or_skip(inps.grib_files, inps.tropo_file, inps.geom_file) == 'skip':
        return

    ## 1. prepare geometry data
    geom_obj = geometry(inps.geom_file)
    geom_obj.open()
    inps.inc = geom_obj.read(datasetName='incidenceAngle')
    inps.dem = geom_obj.read(datasetName='height')

    # for testing
    if inps.custom_height:
        print(
            'use input custom height of {} m for vertical integration'.format(
                inps.custom_height))
        inps.dem[:] = inps.custom_height

    if 'latitude' in geom_obj.datasetNames:
        # for lookup table in radar-coord (isce, doris)
        inps.lat = geom_obj.read(datasetName='latitude')
        inps.lon = geom_obj.read(datasetName='longitude')

    elif 'Y_FIRST' in geom_obj.metadata:
        # for lookup table in geo-coded (gamma, roipac) and obs. in geo-coord
        inps.lat, inps.lon = ut.get_lat_lon(geom_obj.metadata)

        # convert coordinates to lat/lon, e.g. from UTM for ASF HyPP3
        if not geom_obj.metadata['Y_UNIT'].startswith('deg'):
            inps.lat, inps.lon = ut.to_latlon(inps.atr['OG_FILE_PATH'],
                                              inps.lon, inps.lat)

    else:
        # for lookup table in geo-coded (gamma, roipac) and obs. in radar-coord
        inps.lat, inps.lon = ut.get_lat_lon_rdc(inps.atr)

    # mask of valid pixels
    mask = np.multiply(inps.inc != 0, ~np.isnan(inps.inc))

    ## 2. prepare output file
    # metadata
    atr = inps.atr.copy()
    atr['FILE_TYPE'] = 'timeseries'
    atr['UNIT'] = 'm'

    # remove metadata related with double reference
    # because absolute delay is calculated and saved
    for key in ['REF_DATE', 'REF_X', 'REF_Y', 'REF_LAT', 'REF_LON']:
        if key in atr.keys():
            atr.pop(key)

    # instantiate time-series
    length, width = int(atr['LENGTH']), int(atr['WIDTH'])
    num_date = len(inps.grib_files)
    date_list = [
        str(re.findall('\d{8}', os.path.basename(i))[0])
        for i in inps.grib_files
    ]
    dates = np.array(date_list, dtype=np.string_)
    ds_name_dict = {
        "date": [dates.dtype, (num_date, ), dates],
        "timeseries": [np.float32, (num_date, length, width), None],
    }
    writefile.layout_hdf5(inps.tropo_file, ds_name_dict, metadata=atr)

    ## 3. calculate phase delay
    print(
        '\n------------------------------------------------------------------------------'
    )
    print(
        'calculating absolute delay for each date using PyAPS (Jolivet et al., 2011; 2014) ...'
    )
    print('number of grib files used: {}'.format(num_date))

    prog_bar = ptime.progressBar(maxValue=num_date, print_msg=~inps.verbose)
    for i in range(num_date):
        grib_file = inps.grib_files[i]

        # calc tropo delay
        tropo_data = get_delay(grib_file,
                               tropo_model=inps.tropo_model,
                               delay_type=inps.delay_type,
                               dem=inps.dem,
                               inc=inps.inc,
                               lat=inps.lat,
                               lon=inps.lon,
                               mask=mask,
                               verbose=inps.verbose)

        # write tropo delay to file
        block = [i, i + 1, 0, length, 0, width]
        writefile.write_hdf5_block(inps.tropo_file,
                                   data=tropo_data,
                                   datasetName='timeseries',
                                   block=block,
                                   print_msg=False)

        prog_bar.update(i + 1, suffix=os.path.basename(grib_file))
    prog_bar.close()

    return inps.tropo_file
Пример #11
0
def run_deramp(fname,
               ramp_type,
               mask_file=None,
               out_file=None,
               datasetName=None):
    """ Remove ramp from each 2D matrix of input file
    Parameters: fname     : str, data file to be derampped
                ramp_type : str, name of ramp to be estimated.
                mask_file : str, file of mask of pixels used for ramp estimation
                out_file  : str, output file name
                datasetName : str, output dataset name, for ifgramStack file type only
    Returns:    out_file  : str, output file name
    """
    start_time = time.time()
    atr = readfile.read_attribute(fname)
    k = atr['FILE_TYPE']
    length = int(atr['LENGTH'])
    width = int(atr['WIDTH'])

    print('remove {} ramp from file: {}'.format(ramp_type, fname))
    if not out_file:
        fbase, fext = os.path.splitext(fname)
        out_file = '{}_ramp{}'.format(fbase, fext)
    if k == 'ifgramStack':
        out_file = fname

    # mask
    if os.path.isfile(mask_file):
        mask = readfile.read(mask_file)[0]
        print('read mask file: ' + mask_file)
    else:
        mask = np.ones((length, width), dtype=np.bool_)
        print('use mask of the whole area')

    # deramping
    if k == 'timeseries':
        # write HDF5 file with defined metadata and (empty) dataset structure
        writefile.layout_hdf5(out_file, ref_file=fname, print_msg=True)

        print('estimating phase ramp one date at a time ...')
        date_list = timeseries(fname).get_date_list()
        num_date = len(date_list)
        prog_bar = ptime.progressBar(maxValue=num_date)
        for i in range(num_date):
            # read
            data = readfile.read(fname, datasetName=date_list[i])[0]
            # deramp
            data = deramp(data, mask, ramp_type=ramp_type, metadata=atr)[0]
            # write
            writefile.write_hdf5_block(out_file,
                                       data,
                                       datasetName='timeseries',
                                       block=[i, i + 1, 0, length, 0, width],
                                       print_msg=False)
            prog_bar.update(i + 1, suffix='{}/{}'.format(i + 1, num_date))
        prog_bar.close()
        print('finished writing to file: {}'.format(out_file))

    elif k == 'ifgramStack':
        obj = ifgramStack(fname)
        obj.open(print_msg=False)
        if not datasetName:
            datasetName = 'unwrapPhase'
        with h5py.File(fname, 'a') as f:
            ds = f[datasetName]
            dsNameOut = '{}_ramp'.format(datasetName)
            if dsNameOut in f.keys():
                dsOut = f[dsNameOut]
                print('access HDF5 dataset /{}'.format(dsNameOut))
            else:
                dsOut = f.create_dataset(dsNameOut,
                                         shape=(obj.numIfgram, length, width),
                                         dtype=np.float32,
                                         chunks=True,
                                         compression=None)
                print('create HDF5 dataset /{}'.format(dsNameOut))

            prog_bar = ptime.progressBar(maxValue=obj.numIfgram)
            for i in range(obj.numIfgram):
                data = ds[i, :, :]
                data = deramp(data, mask, ramp_type=ramp_type, metadata=atr)[0]
                dsOut[i, :, :] = data
                prog_bar.update(i + 1,
                                suffix='{}/{}'.format(i + 1, obj.numIfgram))
            prog_bar.close()
            print('finished writing to file: {}'.format(fname))

    # Single Dataset File
    else:
        data = readfile.read(fname)[0]
        data = deramp(data, mask, ramp_type, metadata=atr)[0]
        print('writing >>> {}'.format(out_file))
        writefile.write(data, out_file=out_file, ref_file=fname)

    m, s = divmod(time.time() - start_time, 60)
    print('time used: {:02.0f} mins {:02.1f} secs.'.format(m, s))
    return out_file
Пример #12
0
def prepare_stack(outfile,
                  unw_file,
                  metadata,
                  processor,
                  baseline_dir=None,
                  box=None):
    print('-' * 50)
    print('preparing ifgramStack file: {}'.format(outfile))
    # copy metadata to meta
    meta = {key: value for key, value in metadata.items()}

    # get list of *.unw file
    unw_files = sorted(glob.glob(unw_file))
    num_pair = len(unw_files)
    print('number of interferograms:', num_pair)

    # get list of *.unw.conncomp file
    cc_files = [f'{x}.conncomp' for x in unw_files]
    cc_files = [x for x in cc_files if os.path.isfile(x)]
    print(f'number of connected components files: {len(cc_files)}')

    if len(cc_files) != len(unw_files):
        print(
            'the number of *.unw and *.unw.conncomp files are NOT consistent')
        print('skip creating ifgramStack.h5 file.')
        return

    # get date info: date12_list
    date12_list = [os.path.basename(x).split('.')[0] for x in unw_files]

    # prepare baseline info
    if baseline_dir is not None:
        # read baseline timeseries
        baseline_dict = isce_utils.read_baseline_timeseries(
            baseline_dir, processor=processor)

        # calc baseline for each pair
        print('calc perp baseline pairs from time-series')
        pbase = np.zeros(num_pair, dtype=np.float32)
        for i, date12 in enumerate(date12_list):
            [date1, date2] = date12.split('_')
            pbase[i] = np.subtract(baseline_dict[date2],
                                   baseline_dict[date1]).mean()

    # size info
    box = box if box else (0, 0, int(meta['WIDTH']), int(meta['LENGTH']))
    kwargs = dict(xoff=box[0],
                  yoff=box[1],
                  win_xsize=box[2] - box[0],
                  win_ysize=box[3] - box[1])

    # define (and fill out some) dataset structure
    date12_arr = np.array([x.split('_') for x in date12_list],
                          dtype=np.string_)
    drop_ifgram = np.ones(num_pair, dtype=np.bool_)
    ds_name_dict = {
        "date": [date12_arr.dtype, (num_pair, 2), date12_arr],
        "bperp": [np.float32, (num_pair, ), pbase],
        "dropIfgram": [np.bool_, (num_pair, ), drop_ifgram],
        "unwrapPhase":
        [np.float32, (num_pair, box[3] - box[1], box[2] - box[0]), None],
        "connectComponent":
        [np.float32, (num_pair, box[3] - box[1], box[2] - box[0]), None],
    }

    # initiate HDF5 file
    meta["FILE_TYPE"] = "ifgramStack"
    writefile.layout_hdf5(outfile, ds_name_dict, metadata=meta)

    # writing data to HDF5 file
    print('writing data to HDF5 file {} with a mode ...'.format(outfile))
    with h5py.File(outfile, "a") as f:
        prog_bar = ptime.progressBar(maxValue=num_pair)
        for i, (unw_file, cc_file) in enumerate(zip(unw_files, cc_files)):

            # read/write *.unw file
            ds = gdal.Open(unw_file, gdal.GA_ReadOnly)
            data = np.array(ds.GetRasterBand(2).ReadAsArray(**kwargs),
                            dtype=np.float32)
            f["unwrapPhase"][i] = data

            # read/write *.unw.conncomp file
            ds = gdal.Open(cc_file, gdal.GA_ReadOnly)
            data = np.array(ds.GetRasterBand(1).ReadAsArray(**kwargs),
                            dtype=np.float32)
            f["connectComponent"][i] = data

            prog_bar.update(i + 1, suffix=date12_list[i])
        prog_bar.close()

    print('finished writing to HDF5 file: {}'.format(outfile))
    return outfile
Пример #13
0
def diff_file(file1, file2, out_file=None, force=False, max_num_pixel=2e8):
    """calculate/write file1 - file2

    Parameters: file1   - str, path of file1
                file2   - list of str, path of file2(s)
                out_file - str, path of output file
                force   - bool, overwrite existing output file
                max_num_pixel - float, maximum number of pixels for each block
    """
    start_time = time.time()

    if not out_file:
        fbase, fext = os.path.splitext(file1)
        if len(file2) > 1:
            raise ValueError(
                'Output file name is needed for more than 2 files input.')
        out_file = '{}_diff_{}{}'.format(
            fbase,
            os.path.splitext(os.path.basename(file2[0]))[0], fext)
    print('{} - {} --> {}'.format(file1, file2, out_file))

    # Read basic info
    atr1 = readfile.read_attribute(file1)
    k1 = atr1['FILE_TYPE']
    atr2 = readfile.read_attribute(file2[0])
    k2 = atr2['FILE_TYPE']
    print('input files are: {} and {}'.format(k1, k2))

    if k1 == 'timeseries':
        if k2 not in ['timeseries', 'giantTimeseries']:
            raise Exception(
                'Input multiple dataset files are not the same file type!')
        if len(file2) > 1:
            raise Exception(
                ('Only 2 files substraction is supported for time-series file,'
                 ' {} input.'.format(len(file2) + 1)))

        atr1 = readfile.read_attribute(file1)
        atr2 = readfile.read_attribute(file2[0])
        dateList1 = timeseries(file1).get_date_list()
        if k2 == 'timeseries':
            dateList2 = timeseries(file2[0]).get_date_list()
            unit_fac = 1.
        elif k2 == 'giantTimeseries':
            dateList2 = giantTimeseries(file2[0]).get_date_list()
            unit_fac = 0.001

        # check reference point
        ref_date, ref_y, ref_x = check_reference(atr1, atr2)

        # check dates shared by two timeseries files
        dateListShared = [i for i in dateList1 if i in dateList2]
        dateShared = np.ones((len(dateList1)), dtype=np.bool_)
        if dateListShared != dateList1:
            print('WARNING: {} does not contain all dates in {}'.format(
                file2, file1))
            if force:
                dateListEx = list(set(dateList1) - set(dateListShared))
                print(
                    'Continue and enforce the differencing for their shared dates only.'
                )
                print(
                    '\twith following dates are ignored for differencing:\n{}'.
                    format(dateListEx))
                dateShared[np.array([dateList1.index(i)
                                     for i in dateListEx])] = 0
            else:
                raise Exception(
                    'To enforce the differencing anyway, use --force option.')

        # instantiate the output file
        writefile.layout_hdf5(out_file, ref_file=file1)

        # block-by-block IO
        length, width = int(atr1['LENGTH']), int(atr1['WIDTH'])
        num_box = int(np.ceil(len(dateList1) * length * width / max_num_pixel))
        box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length),
                                               num_split=num_box,
                                               dimension='y',
                                               print_msg=True)

        if ref_y and ref_x:
            ref_box = (ref_x, ref_y, ref_x + 1, ref_y + 1)
            ref_val = readfile.read(file2[0],
                                    datasetName=dateListShared,
                                    box=ref_box)[0] * unit_fac

        for i, box in enumerate(box_list):
            if num_box > 1:
                print('\n------- processing patch {} out of {} --------------'.
                      format(i + 1, num_box))
                print('box: {}'.format(box))

            # read data2 (consider different reference_date/pixel)
            print('read from file: {}'.format(file2[0]))
            data2 = readfile.read(
                file2[0], datasetName=dateListShared, box=box)[0] * unit_fac

            if ref_y and ref_x:
                print('* referencing data from {} to y/x: {}/{}'.format(
                    os.path.basename(file2[0]), ref_y, ref_x))
                data2 -= np.tile(ref_val.reshape(-1, 1, 1),
                                 (1, data2.shape[1], data2.shape[2]))

            if ref_date:
                print('* referencing data from {} to date: {}'.format(
                    os.path.basename(file2[0]), ref_date))
                ref_ind = dateListShared.index(ref_date)
                data2 -= np.tile(data2[ref_ind, :, :], (data2.shape[0], 1, 1))

            # read data1
            print('read from file: {}'.format(file1))
            data = readfile.read(file1, box=box)[0]

            # apply differencing
            mask = data == 0.
            data[dateShared] -= data2
            data[mask] = 0.  # Do not change zero phase value
            del data2

            # write the block
            block = [0, data.shape[0], box[1], box[3], box[0], box[2]]
            writefile.write_hdf5_block(out_file,
                                       data=data,
                                       datasetName=k1,
                                       block=block)

    elif all(i == 'ifgramStack' for i in [k1, k2]):
        obj1 = ifgramStack(file1)
        obj1.open()
        obj2 = ifgramStack(file2[0])
        obj2.open()
        dsNames = list(set(obj1.datasetNames) & set(obj2.datasetNames))
        if len(dsNames) == 0:
            raise ValueError('no common dataset between two files!')
        dsName = [i for i in ifgramDatasetNames if i in dsNames][0]

        # read data
        print('reading {} from file {} ...'.format(dsName, file1))
        data1 = readfile.read(file1, datasetName=dsName)[0]
        print('reading {} from file {} ...'.format(dsName, file2[0]))
        data2 = readfile.read(file2[0], datasetName=dsName)[0]

        # consider reference pixel
        if 'unwrapphase' in dsName.lower():
            print('referencing to pixel ({},{}) ...'.format(
                obj1.refY, obj1.refX))
            ref1 = data1[:, obj1.refY, obj1.refX]
            ref2 = data2[:, obj2.refY, obj2.refX]
            for i in range(data1.shape[0]):
                data1[i, :][data1[i, :] != 0.] -= ref1[i]
                data2[i, :][data2[i, :] != 0.] -= ref2[i]

        # operation and ignore zero values
        data1[data1 == 0] = np.nan
        data2[data2 == 0] = np.nan
        data = data1 - data2
        del data1, data2
        data[np.isnan(data)] = 0.

        # write to file
        dsDict = {}
        dsDict[dsName] = data
        writefile.write(dsDict, out_file=out_file, ref_file=file1)

    # Sing dataset file
    else:
        data1 = readfile.read(file1)[0]
        data = np.array(data1, data1.dtype)
        for fname in file2:
            data2 = readfile.read(fname)[0]
            data = np.array(data, dtype=np.float32) - np.array(
                data2, dtype=np.float32)
            data = np.array(data, data1.dtype)
        print('writing >>> ' + out_file)
        writefile.write(data, out_file=out_file, metadata=atr1)

    m, s = divmod(time.time() - start_time, 60)
    print('time used: {:02.0f} mins {:02.1f} secs'.format(m, s))

    return out_file
Пример #14
0
def main(iargs=None):
    inps = cmd_line_parse(iargs)
    stack_obj = ifgramStack(inps.ifgram_stack)
    stack_obj.open()
    length, width = stack_obj.length, stack_obj.width
    date12_list = stack_obj.get_date12_list(dropIfgram=True)
    date12_list_all = stack_obj.get_date12_list(dropIfgram=False)
    print('scene length, width', length, width)
    ref_phase = stack_obj.get_reference_phase(unwDatasetName='unwrapPhase')
    inps.length = length
    inps.width = width
    # retrieve the list of SLC dates from ifgramStack.h5
    ifgram0 = date12_list[0]
    date1, date2 = ifgram0.split('_')
    SLC_list = [date1, date2]
    for ifgram in date12_list:
        date1, date2 = ifgram.split('_')
        if date1 not in SLC_list:
            SLC_list.append(date1)
        if date2 not in SLC_list:
            SLC_list.append(date2)
    SLC_list.sort()
    print('number of SLC found : ', len(SLC_list))
    print('first SLC: ', SLC_list[0])
    print('last  SLC: ', SLC_list[-1])

    # split igram_file into blocks to save memory
    box_list, num_box = ifginv.split2boxes(inps.ifgram_stack, inps.max_memory)
    closurephase = np.zeros([length, width], np.complex64)
    #process block-by-block
    for i, box in enumerate(box_list):
        box_width = box[2] - box[0]
        box_length = box[3] - box[1]
        print(box)
        if num_box > 1:
            print('\n------- processing patch {} out of {} --------------'.
                  format(i + 1, num_box))
            print('box width:  {}'.format(box_width))
            print('box length: {}'.format(box_length))

        closurephase[box[1]:box[3],
                     box[0]:box[2]], numcp = cum_seq_closurePhase(
                         SLC_list, date12_list_all, inps.ifgram_stack,
                         ref_phase, inps.nl, box)

    # What is a good thredshold?
    # Assume that it's pure noise so that the phase is uniform distributed from -pi to pi.
    # The standard deviation of phase in each loop is pi/sqrt(3) (technically should be smaller because when forming loops there should be a reduction in phase variance)
    # The standard deviation of phase in cumulative wrapped closure phase is pi/sqrt(3)/sqrt(num_cp) -- again another simplification assuming no correlation.
    # We use 3\delta as default threshold -- 99.7% confidence

    if inps.numsigma:
        threshold_cp = np.pi / np.sqrt(3) / np.sqrt(numcp) * inps.numsigma
    else:
        threshold_cp = np.pi / np.sqrt(3) / np.sqrt(
            numcp) * 3  # 3/sigma, 99.7% confidence

    mask = np.ones([length, width], np.float32)
    mask[np.abs(np.angle(closurephase)) >
         threshold_cp] = 0  # this masks areas with potential bias
    mask[np.abs(
        np.abs(closurephase) / numcp < inps.episilon
    )] = 1  # this unmasks areas with low correlation (where it's hard to know wheter there is bias either)

    # save mask
    meta = dict(stack_obj.metadata)
    meta['FILE_TYPE'] = 'mask'
    ds_name_dict = {
        'cpmask': [np.float32, (length, width), mask],
    }
    writefile.layout_hdf5(os.path.join(inps.outdir, 'cpmask.h5'), ds_name_dict,
                          meta)

    # also save the average closure phase
    ds_name_dict2 = {
        'phase': [np.float32, (length, width),
                  np.angle(closurephase)],
        'amplitude':
        [np.float32, (length, width),
         np.abs(closurephase) / numcp],
    }
    writefile.layout_hdf5(os.path.join(inps.outdir, 'avgwcp.h5'),
                          ds_name_dict2, meta)
Пример #15
0
def correct_dem_error(inps):
    """Correct DEM error of input timeseries file"""

    start_time = time.time()

    ## 1. input info

    # 1.1 read date info
    ts_obj = timeseries(inps.timeseries_file)
    ts_obj.open()
    num_date = ts_obj.numDate
    length, width = ts_obj.length, ts_obj.width

    num_step = len(inps.stepFuncDate)

    # exclude dates
    date_flag = read_exclude_date(inps.excludeDate, ts_obj.dateList)[0]
    if inps.polyOrder > np.sum(date_flag):
        raise ValueError(
            "input poly order {} > number of acquisition {}! Reduce it!".
            format(inps.polyOrder, np.sum(date_flag)))

    # 1.2 design matrix part 1 - time func for surface deformation
    G_defo = get_design_matrix4defo(inps)

    ## 2. prepare output

    # 2.1 metadata
    meta = dict(ts_obj.metadata)
    print(
        'add/update the following configuration metadata to file:\n{}'.format(
            configKeys))
    for key in configKeys:
        meta[key_prefix + key] = str(vars(inps)[key])

    # 2.2 instantiate est. DEM error
    dem_err_file = 'demErr.h5'
    meta['FILE_TYPE'] = 'dem'
    meta['UNIT'] = 'm'
    ds_name_dict = {'dem': [np.float32, (length, width), None]}
    writefile.layout_hdf5(dem_err_file, ds_name_dict, metadata=meta)

    # 2.3 instantiate corrected time-series
    ts_cor_file = inps.outfile
    meta['FILE_TYPE'] = 'timeseries'
    writefile.layout_hdf5(ts_cor_file,
                          metadata=meta,
                          ref_file=inps.timeseries_file)

    # 2.4 instantiate residual phase time-series
    ts_res_file = os.path.join(os.path.dirname(inps.outfile),
                               'timeseriesResidual.h5')
    writefile.layout_hdf5(ts_res_file,
                          metadata=meta,
                          ref_file=inps.timeseries_file)

    # 2.5 instantiate est. step model(s)
    step_file = None
    if num_step > 0:
        step_file = os.path.join(os.path.dirname(inps.outfile),
                                 'timeseriesStepModel.h5')
        meta.pop('REF_DATE')
        step_dates = np.array(inps.stepFuncDate, dtype=np.string_)
        ds_name_dict = {
            'date': [step_dates.dtype, (num_step, ), step_dates],
            'timeseries': [np.float32, (num_step, length, width), None]
        }
        writefile.layout_hdf5(step_file, ds_name_dict, metadata=meta)

    ## 3. run the estimation and write to disk

    # 3.1 split ts_file into blocks to save memory
    box_list, num_box = split2boxes(inps.timeseries_file,
                                    geom_file=inps.geom_file,
                                    memory_size=inps.memorySize,
                                    num_step=num_step)

    # 3.2 invert / write block-by-block
    for i, box in enumerate(box_list):
        box_width = box[2] - box[0]
        box_length = box[3] - box[1]
        if num_box > 1:
            print('\n------- processing patch {} out of {} --------------'.
                  format(i + 1, num_box))
            print('box width:  {}'.format(box_width))
            print('box length: {}'.format(box_length))

        # invert
        (delta_z, ts_cor, ts_res, step_model) = correct_dem_error_patch(
            G_defo,
            ts_file=inps.timeseries_file,
            geom_file=inps.geom_file,
            box=box,
            date_flag=date_flag,
            num_step=num_step,
            phase_velocity=inps.phaseVelocity)

        # write the block to disk
        # with 3D block in [z0, z1, y0, y1, x0, x1]
        # and  2D block in         [y0, y1, x0, x1]

        # DEM error - 2D
        block = [box[1], box[3], box[0], box[2]]
        writefile.write_hdf5_block(dem_err_file,
                                   data=delta_z,
                                   datasetName='dem',
                                   block=block)

        # corrected time-series - 3D
        block = [0, num_date, box[1], box[3], box[0], box[2]]
        writefile.write_hdf5_block(ts_cor_file,
                                   data=ts_cor,
                                   datasetName='timeseries',
                                   block=block)

        # residual time-series - 3D
        block = [0, num_date, box[1], box[3], box[0], box[2]]
        writefile.write_hdf5_block(ts_res_file,
                                   data=ts_res,
                                   datasetName='timeseries',
                                   block=block)

        # step func time-series - 3D
        if num_step > 0:
            block = [0, num_step, box[1], box[3], box[0], box[2]]
            writefile.write_hdf5_block(step_file,
                                       data=step_model,
                                       datasetName='timeseries',
                                       block=block)

    # time info
    m, s = divmod(time.time() - start_time, 60)
    print('time used: {:02.0f} mins {:02.1f} secs.'.format(m, s))

    return dem_err_file, ts_cor_file, ts_res_file, step_file
def run_timeseries2time_func(inps):

    # basic info
    atr = readfile.read_attribute(inps.timeseries_file)
    length, width = int(atr['LENGTH']), int(atr['WIDTH'])
    num_date = inps.numDate
    dates = np.array(inps.dateList)
    seconds = atr.get('CENTER_LINE_UTC', 0)

    # use the 1st date as reference if not found, e.g. timeseriesResidual.h5 file
    if "REF_DATE" not in atr.keys() and not inps.ref_date:
        inps.ref_date = inps.dateList[0]
        print(
            'WARNING: No REF_DATE found in time-series file or input in command line.'
        )
        print('  Set "--ref-date {}" and continue.'.format(inps.dateList[0]))

    # get deformation model from parsers
    model, num_param = read_inps2model(inps)

    ## output preparation

    # time_func_param: attributes
    atrV = dict(atr)
    atrV['FILE_TYPE'] = 'velocity'
    atrV['UNIT'] = 'm/year'
    atrV['START_DATE'] = inps.dateList[0]
    atrV['END_DATE'] = inps.dateList[-1]
    atrV['DATE12'] = '{}_{}'.format(inps.dateList[0], inps.dateList[-1])
    if inps.ref_yx:
        atrV['REF_Y'] = inps.ref_yx[0]
        atrV['REF_X'] = inps.ref_yx[1]
    if inps.ref_date:
        atrV['REF_DATE'] = inps.ref_date

    # time_func_param: config parameter
    print('add/update the following configuration metadata:\n{}'.format(
        configKeys))
    for key in configKeys:
        atrV[key_prefix + key] = str(vars(inps)[key])

    # time_func_param: instantiate output file
    ds_name_dict, ds_unit_dict = model2hdf5_dataset(model,
                                                    ds_shape=(length,
                                                              width))[1:]
    writefile.layout_hdf5(inps.outfile,
                          metadata=atrV,
                          ds_name_dict=ds_name_dict,
                          ds_unit_dict=ds_unit_dict)

    # timeseries_res: attributes + instantiate output file
    if inps.save_res:
        atrR = dict(atr)
        # remove REF_DATE attribute
        for key in ['REF_DATE']:
            if key in atrR.keys():
                atrR.pop(key)
        # prepare ds_name_dict manually, instead of using ref_file, to support --ex option
        date_len = len(inps.dateList[0])
        ds_name_dict = {
            "date": [
                np.dtype(f'S{date_len}'), (num_date, ),
                np.array(inps.dateList, dtype=np.string_)
            ],
            "timeseries": [np.float32, (num_date, length, width), None]
        }
        writefile.layout_hdf5(inps.res_file,
                              ds_name_dict=ds_name_dict,
                              metadata=atrR)

    ## estimation

    # calc number of box based on memory limit
    memoryAll = (num_date + num_param * 2 + 2) * length * width * 4
    if inps.bootstrap:
        memoryAll += inps.bootstrapCount * num_param * length * width * 4
    num_box = int(np.ceil(memoryAll * 3 / (inps.maxMemory * 1024**3)))
    box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length),
                                           num_split=num_box,
                                           dimension='y',
                                           print_msg=True)

    # loop for block-by-block IO
    for i, box in enumerate(box_list):
        box_wid = box[2] - box[0]
        box_len = box[3] - box[1]
        num_pixel = box_len * box_wid
        if num_box > 1:
            print('\n------- processing patch {} out of {} --------------'.
                  format(i + 1, num_box))
            print('box width:  {}'.format(box_wid))
            print('box length: {}'.format(box_len))

        # initiate output
        m = np.zeros((num_param, num_pixel), dtype=dataType)
        m_std = np.zeros((num_param, num_pixel), dtype=dataType)

        # read input
        print('reading data from file {} ...'.format(inps.timeseries_file))
        ts_data = readfile.read(inps.timeseries_file, box=box)[0]

        # referencing in time and space
        # for file w/o reference info. e.g. ERA5.h5
        if inps.ref_date:
            print('referecing to date: {}'.format(inps.ref_date))
            ref_ind = inps.dateList.index(inps.ref_date)
            ts_data -= np.tile(ts_data[ref_ind, :, :],
                               (ts_data.shape[0], 1, 1))

        if inps.ref_yx:
            print('referencing to point (y, x): ({}, {})'.format(
                inps.ref_yx[0], inps.ref_yx[1]))
            ref_box = (inps.ref_yx[1], inps.ref_yx[0], inps.ref_yx[1] + 1,
                       inps.ref_yx[0] + 1)
            ref_val = readfile.read(inps.timeseries_file, box=ref_box)[0]
            ts_data -= np.tile(ref_val.reshape(ts_data.shape[0], 1, 1),
                               (1, ts_data.shape[1], ts_data.shape[2]))

        ts_data = ts_data[inps.dropDate, :, :].reshape(inps.numDate, -1)
        if atrV['UNIT'] == 'mm':
            ts_data *= 1. / 1000.

        ts_cov = None
        if inps.ts_cov_file:
            print(
                f'reading time-series covariance matrix from file {inps.ts_cov_file} ...'
            )
            ts_cov = readfile.read(inps.ts_cov_file, box=box)[0]
            if len(ts_cov.shape) == 4:
                # full covariance matrix in 4D --> 3D
                if inps.numDate < ts_cov.shape[0]:
                    ts_cov = ts_cov[inps.dropDate, :, :, :]
                    ts_cov = ts_cov[:, inps.dropDate, :, :]
                ts_cov = ts_cov.reshape(inps.numDate, inps.numDate, -1)

            elif len(ts_cov.shape) == 3:
                # diaginal variance matrix in 3D --> 2D
                if inps.numDate < ts_cov.shape[0]:
                    ts_cov = ts_cov[inps.dropDate, :, :]
                ts_cov = ts_cov.reshape(inps.numDate, -1)

            ## set zero value to a fixed small value to avoid divide by zero
            #epsilon = 1e-5
            #ts_cov[ts_cov<epsilon] = epsilon

        # mask invalid pixels
        print('skip pixels with zero/nan value in all acquisitions')
        ts_stack = np.nanmean(ts_data, axis=0)
        mask = np.multiply(~np.isnan(ts_stack), ts_stack != 0.)
        del ts_stack

        #if ts_cov is not None:
        #    print('skip pxiels with nan STD value in any acquisition')
        #    num_std_nan = np.sum(np.isnan(ts_cov), axis=0)
        #    mask *= num_std_nan == 0
        #    del num_std_nan

        ts_data = ts_data[:, mask]
        num_pixel2inv = int(np.sum(mask))
        idx_pixel2inv = np.where(mask)[0]
        print('number of pixels to invert: {} out of {} ({:.1f}%)'.format(
            num_pixel2inv, num_pixel, num_pixel2inv / num_pixel * 100))

        # go to next if no valid pixel found
        if num_pixel2inv == 0:
            continue

        ### estimation / solve Gm = d
        print('estimating time functions via linalg.lstsq ...')

        if inps.bootstrap:
            ## option 1 - least squares with bootstrapping
            # Bootstrapping is a resampling method which can be used to estimate properties
            # of an estimator. The method relies on independently sampling the data set with
            # replacement.
            print(
                'estimating time function STD with bootstrap resampling ({} times) ...'
                .format(inps.bootstrapCount))

            # calc model of all bootstrap sampling
            rng = np.random.default_rng()
            m_boot = np.zeros((inps.bootstrapCount, num_param, num_pixel2inv),
                              dtype=dataType)
            prog_bar = ptime.progressBar(maxValue=inps.bootstrapCount)
            for i in range(inps.bootstrapCount):
                # bootstrap resampling
                boot_ind = rng.choice(inps.numDate,
                                      size=inps.numDate,
                                      replace=True)
                boot_ind.sort()

                # estimation
                m_boot[i] = time_func.estimate_time_func(
                    model=model,
                    date_list=dates[boot_ind].tolist(),
                    dis_ts=ts_data[boot_ind],
                    seconds=seconds)[1]

                prog_bar.update(i + 1,
                                suffix='iteration {} / {}'.format(
                                    i + 1, inps.bootstrapCount))
            prog_bar.close()
            #del ts_data

            # get mean/std among all bootstrap sampling
            m[:, mask] = m_boot.mean(axis=0).reshape(num_param, -1)
            m_std[:, mask] = m_boot.std(axis=0).reshape(num_param, -1)
            del m_boot

            # get design matrix to calculate the residual time series
            G = time_func.get_design_matrix4time_func(inps.dateList,
                                                      model=model,
                                                      ref_date=inps.ref_date,
                                                      seconds=seconds)

        else:
            ## option 2 - least squares with uncertainty propagation
            G, m[:, mask], e2 = time_func.estimate_time_func(
                model=model,
                date_list=inps.dateList,
                dis_ts=ts_data,
                seconds=seconds)
            #del ts_data

            ## Compute the covariance matrix for model parameters:
            #       G * m = d
            #     C_m_hat = G+ * C_d * G+.T
            #
            # For ordinary least squares estimation:
            #     G+ = (G.T * G)^-1 * G.T                       (option 2.1)
            #
            # For weighted least squares estimation:
            #          G+ = (G.T * C_d^-1 * G)^-1 * G.T * C_d^-1
            # =>  C_m_hat = (G.T * C_d^-1 * G)^-1               (option 2.2)
            #
            # Assuming normality of the observation errors (in the time domain) with a variance of sigma^2
            # we have C_d = sigma^2 * I, then the above equation is simplfied into:
            #     C_m_hat = sigma^2 * (G.T * G)^-1              (option 2.3)
            #
            # Based on the law of integrated expectation, we estimate the obs sigma^2 using
            # the OLS estimation residual as:
            #           e_hat = d - d_hat
            # =>  sigma_hat^2 = (e_hat.T * e_hat) / N
            # =>      sigma^2 = sigma_hat^2 * N / (N - P)       (option 2.4)
            #                 = (e_hat.T * e_hat) / (N - P)
            # which is the equation (10) from Fattahi and Amelung (2015, JGR)

            if ts_cov is not None:
                # option 2.1 - linear propagation from time-series (co)variance matrix
                # TO DO: save the full covariance matrix of the time function parameters
                # only the STD is saved right now
                covar_flag = True if len(ts_cov.shape) == 3 else False
                msg = 'estimating time function STD from time-serries '
                msg += 'covariance pixel-by-pixel ...' if covar_flag else 'variance pixel-by-pixel ...'
                print(msg)

                # calc the common pseudo-inverse matrix
                Gplus = linalg.pinv(G)

                # loop over each pixel
                # or use multidimension matrix multiplication
                # m_cov = Gplus @ ts_cov @ Gplus.T
                prog_bar = ptime.progressBar(maxValue=num_pixel2inv)
                for i in range(num_pixel2inv):
                    idx = idx_pixel2inv[i]

                    # cov: time-series -> time func
                    ts_covi = ts_cov[:, :, idx] if covar_flag else np.diag(
                        ts_cov[:, idx])
                    m_cov = np.linalg.multi_dot([Gplus, ts_covi, Gplus.T])
                    m_std[:, idx] = np.sqrt(np.diag(m_cov))

                    prog_bar.update(i + 1,
                                    every=200,
                                    suffix='{}/{} pixels'.format(
                                        i + 1, num_pixel2inv))
                prog_bar.close()

            else:
                # option 2.3 - assume obs errors following normal dist. in time
                print(
                    'estimating time function STD from time-series fitting residual ...'
                )
                G_inv = linalg.inv(np.dot(G.T, G))
                m_var = e2.reshape(1, -1) / (num_date - num_param)
                m_std[:, mask] = np.sqrt(
                    np.dot(np.diag(G_inv).reshape(-1, 1), m_var))

                # option 2.4 - simplified form for linear velocity (without matrix linear algebra)
                # The STD can also be calculated using Eq. (10) from Fattahi and Amelung (2015, JGR)
                # ts_diff = ts_data - np.dot(G, m)
                # t_diff = G[:, 1] - np.mean(G[:, 1])
                # vel_std = np.sqrt(np.sum(ts_diff ** 2, axis=0) / np.sum(t_diff ** 2)  / (num_date - 2))

        # write - time func params
        block = [box[1], box[3], box[0], box[2]]
        ds_dict = model2hdf5_dataset(model, m, m_std, mask=mask)[0]
        for ds_name, data in ds_dict.items():
            writefile.write_hdf5_block(inps.outfile,
                                       data=data.reshape(box_len, box_wid),
                                       datasetName=ds_name,
                                       block=block)

        # write - residual file
        if inps.save_res:
            block = [0, num_date, box[1], box[3], box[0], box[2]]
            ts_res = np.ones(
                (num_date, box_len * box_wid), dtype=np.float32) * np.nan
            ts_res[:, mask] = ts_data - np.dot(G, m)[:, mask]
            writefile.write_hdf5_block(inps.res_file,
                                       data=ts_res.reshape(
                                           num_date, box_len, box_wid),
                                       datasetName='timeseries',
                                       block=block)

    return inps.outfile
Пример #17
0
def layout_hdf5(out_file, atr, model):
    """create HDF5 file for estimated time functions
    with defined metadata and (empty) dataset structure
    """

    # deformation model info
    poly_deg = model['polynomial']
    num_period = len(model['periodic'])
    num_step = len(model['step'])

    # size info
    length = int(atr['LENGTH'])
    width = int(atr['WIDTH'])

    ds_name_dict = {}
    ds_unit_dict = {}

    # time func 1 - polynomial
    for i in range(1, poly_deg + 1):
        # dataset name
        if i == 1:
            dsName = 'velocity'
            unit = 'm/year'
        elif i == 2:
            dsName = 'acceleration'
            unit = 'm/year^2'
        else:
            dsName = 'poly{}'.format(i)
            unit = 'm/year^{}'.format(i)

        # update ds_name/unit_dict
        ds_name_dict[dsName] = [dataType, (length, width), None]
        ds_unit_dict[dsName] = unit
        ds_name_dict[dsName+'Std'] = [dataType, (length, width), None]
        ds_unit_dict[dsName+'Std'] = unit

    # time func 2 - periodic
    for i in range(num_period):
        # dataset name
        period = model['periodic'][i]
        if period == 1:
            dsName = 'annualAmp'
        elif period == 0.5:
            dsName = 'semiAnnualAmp'
        else:
            dsName = 'periodY{}Amp'.format(period)

        # update ds_name/unit_dict
        ds_name_dict[dsName] = [dataType, (length, width), None]
        ds_unit_dict[dsName] = 'm'

    # time func 3 - step
    for i in range(num_step):
        # dataset name
        dsName = 'step{}'.format(model['step'][i])

        # update ds_name/unit_dict
        ds_name_dict[dsName] = [dataType, (length, width), None]
        ds_unit_dict[dsName] = 'm'
        ds_name_dict[dsName+'Std'] = [dataType, (length, width), None]
        ds_unit_dict[dsName+'Std'] = 'm'

    # layout hdf5
    writefile.layout_hdf5(out_file, ds_name_dict, metadata=atr)

    # add metadata to HDF5 dataset
    max_digit = max([len(i) for i in ds_unit_dict.keys()])
    with h5py.File(out_file, 'r+') as f:
        for key, value in ds_unit_dict.items():
            f[key].attrs['UNIT'] = value
            print('add /{d:<{w}} attribute: UNIT = {u}'.format(d=key,
                                                               w=max_digit,
                                                               u=value))

    return out_file
Пример #18
0
def correct_dem_error(inps):
    """Correct DEM error of input timeseries file"""

    start_time = time.time()

    # limit the number of threads to 1
    # for slight speedup and big CPU usage save
    num_threads_dict = cluster.set_num_threads("1")

    ## 1. input info

    # 1.1 read date info
    ts_obj = timeseries(inps.timeseries_file)
    ts_obj.open()
    num_date = ts_obj.numDate
    length, width = ts_obj.length, ts_obj.width

    num_step = len(inps.stepFuncDate)

    # exclude dates
    date_flag = read_exclude_date(inps.excludeDate, ts_obj.dateList)[0]
    if inps.polyOrder > np.sum(date_flag):
        raise ValueError(
            "input poly order {} > number of acquisition {}! Reduce it!".
            format(inps.polyOrder, np.sum(date_flag)))

    # 1.2 design matrix part 1 - time func for surface deformation
    G_defo = get_design_matrix4defo(inps)

    ## 2. prepare output

    # 2.1 metadata
    meta = dict(ts_obj.metadata)
    print(
        'add/update the following configuration metadata to file:\n{}'.format(
            configKeys))
    for key in configKeys:
        meta[key_prefix + key] = str(vars(inps)[key])

    # 2.2 instantiate est. DEM error
    dem_err_file = 'demErr.h5'
    meta['FILE_TYPE'] = 'dem'
    meta['UNIT'] = 'm'
    ds_name_dict = {'dem': [np.float32, (length, width), None]}
    writefile.layout_hdf5(dem_err_file, ds_name_dict, metadata=meta)

    # 2.3 instantiate corrected time-series
    ts_cor_file = inps.outfile
    meta['FILE_TYPE'] = 'timeseries'
    writefile.layout_hdf5(ts_cor_file,
                          metadata=meta,
                          ref_file=inps.timeseries_file)

    # 2.4 instantiate residual phase time-series
    ts_res_file = os.path.join(os.path.dirname(inps.outfile),
                               'timeseriesResidual.h5')
    writefile.layout_hdf5(ts_res_file,
                          metadata=meta,
                          ref_file=inps.timeseries_file)

    ## 3. run the estimation and write to disk

    # 3.1 split ts_file into blocks to save memory
    # 1st dimension size: ts (obs / cor / res / step) + dem_err/inc_angle/rg_dist (+pbase)
    num_epoch = num_date * 3 + num_step + 3
    if inps.geom_file:
        geom_obj = geometry(inps.geom_file)
        geom_obj.open(print_msg=False)
        if 'bperp' in geom_obj.datasetNames:
            num_epoch += num_date

    # split in row/line direction based on the input memory limit
    num_box = int(
        np.ceil((num_epoch * length * width * 4) * 2.5 /
                (inps.maxMemory * 1024**3)))
    box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length),
                                           num_split=num_box,
                                           dimension='y')

    # 3.2 prepare the input arguments for *_patch()
    data_kwargs = {
        'G_defo': G_defo,
        'ts_file': inps.timeseries_file,
        'geom_file': inps.geom_file,
        'date_flag': date_flag,
        'phase_velocity': inps.phaseVelocity,
    }

    # 3.3 invert / write block-by-block
    for i, box in enumerate(box_list):
        box_wid = box[2] - box[0]
        box_len = box[3] - box[1]
        if num_box > 1:
            print('\n------- processing patch {} out of {} --------------'.
                  format(i + 1, num_box))
            print('box width:  {}'.format(box_wid))
            print('box length: {}'.format(box_len))

        # update box argument in the input data
        data_kwargs['box'] = box

        # invert
        if not inps.cluster:
            # non-parallel
            delta_z, ts_cor, ts_res = correct_dem_error_patch(
                **data_kwargs)[:-1]

        else:
            # parallel
            print('\n\n------- start parallel processing using Dask -------')

            # initiate the output data
            delta_z = np.zeros((box_len, box_wid), dtype=np.float32)
            ts_cor = np.zeros((num_date, box_len, box_wid), dtype=np.float32)
            ts_res = np.zeros((num_date, box_len, box_wid), dtype=np.float32)

            # initiate dask cluster and client
            cluster_obj = cluster.DaskCluster(inps.cluster,
                                              inps.numWorker,
                                              config_name=inps.config)
            cluster_obj.open()

            # run dask
            delta_z, ts_cor, ts_res = cluster_obj.run(
                func=correct_dem_error_patch,
                func_data=data_kwargs,
                results=[delta_z, ts_cor, ts_res])

            # close dask cluster and client
            cluster_obj.close()

            print('------- finished parallel processing -------\n\n')

        # write the block to disk
        # with 3D block in [z0, z1, y0, y1, x0, x1]
        # and  2D block in         [y0, y1, x0, x1]

        # DEM error - 2D
        block = [box[1], box[3], box[0], box[2]]
        writefile.write_hdf5_block(dem_err_file,
                                   data=delta_z,
                                   datasetName='dem',
                                   block=block)

        # corrected time-series - 3D
        block = [0, num_date, box[1], box[3], box[0], box[2]]
        writefile.write_hdf5_block(ts_cor_file,
                                   data=ts_cor,
                                   datasetName='timeseries',
                                   block=block)

        # residual time-series - 3D
        block = [0, num_date, box[1], box[3], box[0], box[2]]
        writefile.write_hdf5_block(ts_res_file,
                                   data=ts_res,
                                   datasetName='timeseries',
                                   block=block)

    # roll back to the origial number of threads
    cluster.roll_back_num_threads(num_threads_dict)

    # time info
    m, s = divmod(time.time() - start_time, 60)
    print('time used: {:02.0f} mins {:02.1f} secs.'.format(m, s))

    return dem_err_file, ts_cor_file, ts_res_file
Пример #19
0
def run_timeseries2time_func(inps):

    # basic info
    atr = readfile.read_attribute(inps.timeseries_file)
    length, width = int(atr['LENGTH']), int(atr['WIDTH'])
    num_date = inps.numDate
    dates = np.array(inps.dateList)
    seconds = atr.get('CENTER_LINE_UTC', 0)

    # use the 1st date as reference if not found, e.g. timeseriesResidual.h5 file
    if "REF_DATE" not in atr.keys() and not inps.ref_date:
        inps.ref_date = inps.dateList[0]
        print(
            'WARNING: No REF_DATE found in time-series file or input in command line.'
        )
        print('  Set "--ref-date {}" and continue.'.format(inps.dateList[0]))

    # get deformation model from parsers
    model, num_param = read_inps2model(inps)

    ## output preparation

    # time_func_param: attributes
    atrV = dict(atr)
    atrV['FILE_TYPE'] = 'velocity'
    atrV['UNIT'] = 'm/year'
    atrV['START_DATE'] = inps.dateList[0]
    atrV['END_DATE'] = inps.dateList[-1]
    atrV['DATE12'] = '{}_{}'.format(inps.dateList[0], inps.dateList[-1])
    if inps.ref_yx:
        atrV['REF_Y'] = inps.ref_yx[0]
        atrV['REF_X'] = inps.ref_yx[1]
    if inps.ref_date:
        atrV['REF_DATE'] = inps.ref_date

    # time_func_param: config parameter
    print('add/update the following configuration metadata:\n{}'.format(
        configKeys))
    for key in configKeys:
        atrV[key_prefix + key] = str(vars(inps)[key])

    # time_func_param: instantiate output file
    ds_name_dict, ds_unit_dict = model2hdf5_dataset(model,
                                                    ds_shape=(length,
                                                              width))[1:]
    writefile.layout_hdf5(inps.outfile,
                          metadata=atrV,
                          ds_name_dict=ds_name_dict,
                          ds_unit_dict=ds_unit_dict)

    # timeseries_res: attributes + instantiate output file
    if inps.save_res:
        atrR = dict(atr)
        for key in ['REF_DATE']:
            if key in atrR.keys():
                atrR.pop(key)
        writefile.layout_hdf5(inps.res_file,
                              metadata=atrR,
                              ref_file=inps.timeseries_file)

    ## estimation

    # calc number of box based on memory limit
    memoryAll = (num_date + num_param * 2 + 2) * length * width * 4
    if inps.bootstrap:
        memoryAll += inps.bootstrapCount * num_param * length * width * 4
    num_box = int(np.ceil(memoryAll * 3 / (inps.maxMemory * 1024**3)))
    box_list = cluster.split_box2sub_boxes(box=(0, 0, width, length),
                                           num_split=num_box,
                                           dimension='y',
                                           print_msg=True)

    # loop for block-by-block IO
    for i, box in enumerate(box_list):
        box_wid = box[2] - box[0]
        box_len = box[3] - box[1]
        num_pixel = box_len * box_wid
        if num_box > 1:
            print('\n------- processing patch {} out of {} --------------'.
                  format(i + 1, num_box))
            print('box width:  {}'.format(box_wid))
            print('box length: {}'.format(box_len))

        # initiate output
        m = np.zeros((num_param, num_pixel), dtype=dataType)
        m_std = np.zeros((num_param, num_pixel), dtype=dataType)

        # read input
        print('reading data from file {} ...'.format(inps.timeseries_file))
        ts_data = readfile.read(inps.timeseries_file, box=box)[0]

        # referencing in time and space
        # for file w/o reference info. e.g. ERA5.h5
        if inps.ref_date:
            print('referecing to date: {}'.format(inps.ref_date))
            ref_ind = inps.dateList.index(inps.ref_date)
            ts_data -= np.tile(ts_data[ref_ind, :, :],
                               (ts_data.shape[0], 1, 1))

        if inps.ref_yx:
            print('referencing to point (y, x): ({}, {})'.format(
                inps.ref_yx[0], inps.ref_yx[1]))
            ref_box = (inps.ref_yx[1], inps.ref_yx[0], inps.ref_yx[1] + 1,
                       inps.ref_yx[0] + 1)
            ref_val = readfile.read(inps.timeseries_file, box=ref_box)[0]
            ts_data -= np.tile(ref_val.reshape(ts_data.shape[0], 1, 1),
                               (1, ts_data.shape[1], ts_data.shape[2]))

        ts_data = ts_data[inps.dropDate, :, :].reshape(inps.numDate, -1)
        if atrV['UNIT'] == 'mm':
            ts_data *= 1. / 1000.

        ts_std = None
        if inps.ts_std_file:
            ts_std = readfile.read(inps.ts_std_file, box=box)[0]
            ts_std = ts_std[inps.dropDate, :, :].reshape(inps.numDate, -1)
            # set zero value to a fixed small value to avoid divide by zero

            epsilon = 1e-5
            ts_std[ts_std < epsilon] = epsilon

        # mask invalid pixels
        print('skip pixels with zero/nan value in all acquisitions')
        ts_stack = np.nanmean(ts_data, axis=0)
        mask = np.multiply(~np.isnan(ts_stack), ts_stack != 0.)
        del ts_stack

        if ts_std is not None:
            print('skip pxiels with nan STD value in any acquisition')
            num_std_nan = np.sum(np.isnan(ts_std), axis=0)
            mask *= num_std_nan == 0
            del num_std_nan

        ts_data = ts_data[:, mask]
        num_pixel2inv = int(np.sum(mask))
        idx_pixel2inv = np.where(mask)[0]
        print('number of pixels to invert: {} out of {} ({:.1f}%)'.format(
            num_pixel2inv, num_pixel, num_pixel2inv / num_pixel * 100))

        # go to next if no valid pixel found
        if num_pixel2inv == 0:
            continue

        ### estimation / solve Gm = d
        print('estimating time functions via linalg.lstsq ...')

        if inps.bootstrap:
            ## option 1 - least squares with bootstrapping
            # Bootstrapping is a resampling method which can be used to estimate properties
            # of an estimator. The method relies on independently sampling the data set with
            # replacement.
            print(
                'estimating time function STD with bootstrap resampling ({} times) ...'
                .format(inps.bootstrapCount))

            # calc model of all bootstrap sampling
            rng = np.random.default_rng()
            m_boot = np.zeros((inps.bootstrapCount, num_param, num_pixel2inv),
                              dtype=dataType)
            prog_bar = ptime.progressBar(maxValue=inps.bootstrapCount)
            for i in range(inps.bootstrapCount):
                # bootstrap resampling
                boot_ind = rng.choice(inps.numDate,
                                      size=inps.numDate,
                                      replace=True)
                boot_ind.sort()

                # estimation
                m_boot[i] = time_func.estimate_time_func(
                    model=model,
                    date_list=dates[boot_ind].tolist(),
                    dis_ts=ts_data[boot_ind],
                    seconds=seconds)[1]

                prog_bar.update(i + 1,
                                suffix='iteration {} / {}'.format(
                                    i + 1, inps.bootstrapCount))
            prog_bar.close()
            #del ts_data

            # get mean/std among all bootstrap sampling
            m[:, mask] = m_boot.mean(axis=0).reshape(num_param, -1)
            m_std[:, mask] = m_boot.std(axis=0).reshape(num_param, -1)
            del m_boot

        else:
            ## option 2 - least squares with uncertainty propagation
            G, m[:, mask], e2 = time_func.estimate_time_func(
                model=model,
                date_list=inps.dateList,
                dis_ts=ts_data,
                seconds=seconds)
            #del ts_data

            ## Compute the covariance matrix for model parameters: Gm = d
            # C_m_hat = (G.T * C_d^-1, * G)^-1  # linear propagation from the TS covariance matrix. (option 2.1)
            #         = sigma^2 * (G.T * G)^-1  # assuming obs errors are normally dist. in time.   (option 2.2a)
            # Based on the law of integrated expectation, we estimate the obs sigma^2 using
            # the OLS estimation residual e_hat_i = d_i - d_hat_i
            # sigma^2 = sigma_hat^2 * N / (N - P)                                                   (option 2.2b)
            #         = (e_hat.T * e_hat) / (N - P)  # sigma_hat^2 = (e_hat.T * e_hat) / N

            if ts_std is not None:
                # option 2.1 - linear propagation from time-series covariance matrix
                print(
                    'estimating time function STD from time-series STD pixel-by-pixel ...'
                )
                prog_bar = ptime.progressBar(maxValue=num_pixel2inv)
                for i in range(num_pixel2inv):
                    idx = idx_pixel2inv[i]

                    try:
                        C_ts_inv = np.diag(1. /
                                           np.square(ts_std[:, idx].flatten()))
                        m_var = np.diag(linalg.inv(
                            G.T.dot(C_ts_inv).dot(G))).astype(np.float32)
                        m_std[:, idx] = np.sqrt(m_var)
                    except linalg.LinAlgError:
                        m_std[:, idx] = np.nan

                    prog_bar.update(i + 1,
                                    every=200,
                                    suffix='{}/{} pixels'.format(
                                        i + 1, num_pixel2inv))
                prog_bar.close()

            else:
                # option 2.2a - assume obs errors following normal dist. in time
                print(
                    'estimating time function STD from time-series fitting residual ...'
                )
                G_inv = linalg.inv(np.dot(G.T, G))
                m_var = e2.reshape(1, -1) / (num_date - num_param)
                m_std[:, mask] = np.sqrt(
                    np.dot(np.diag(G_inv).reshape(-1, 1), m_var))

                # option 2.2b - simplified form for linear velocity (without matrix linear algebra)
                # The STD can also be calculated using Eq. (10) from Fattahi and Amelung (2015, JGR)
                # ts_diff = ts_data - np.dot(G, m)
                # t_diff = G[:, 1] - np.mean(G[:, 1])
                # vel_std = np.sqrt(np.sum(ts_diff ** 2, axis=0) / np.sum(t_diff ** 2)  / (num_date - 2))

        # write - time func params
        block = [box[1], box[3], box[0], box[2]]
        ds_dict = model2hdf5_dataset(model, m, m_std, mask=mask)[0]
        for ds_name, data in ds_dict.items():
            writefile.write_hdf5_block(inps.outfile,
                                       data=data.reshape(box_len, box_wid),
                                       datasetName=ds_name,
                                       block=block)

        # write - residual file
        if inps.save_res:
            block = [0, num_date, box[1], box[3], box[0], box[2]]
            ts_res = np.ones(
                (num_date, box_len * box_wid), dtype=np.float32) * np.nan
            ts_res[:, mask] = ts_data - np.dot(G, m)[:, mask]
            writefile.write_hdf5_block(inps.res_file,
                                       data=ts_res.reshape(
                                           num_date, box_len, box_wid),
                                       datasetName='timeseries',
                                       block=block)

    return inps.outfile
Пример #20
0
def change_timeseries_ref_date(ts_file, ref_date, outfile=None, max_memory=4.0, force=False):
    """Change input file reference date to a different one.
    Parameters: ts_file : str, timeseries file to be changed
                ref_date : str, date in YYYYMMDD format
                outfile  : if str, save to a different file
                           if None, modify the data value in the existing input file
    """
    ts_file = os.path.abspath(ts_file)
    if not outfile:
        outfile = ts_file
    outfile = os.path.abspath(outfile)

    print('-'*50)
    print('change reference date for file: {}'.format(ts_file))
    atr = readfile.read_attribute(ts_file)
    dsName = atr['FILE_TYPE']

    # if the input reference date is the same as the existing one.
    if ref_date == atr.get('REF_DATE', None) and not force:
        print('input refDate is the same as the existing REF_DATE.')
        if outfile == ts_file:
            print('Nothing to be done.')
            return ts_file
        else:
            print('Copy {} to {}'.format(ts_file, outfile))
            shutil.copy2(ts_file, outfile)
            return outfile

    # basic info
    obj = timeseries(ts_file)
    obj.open(print_msg=False)
    num_date = obj.numDate
    length = obj.length
    width = obj.width
    ref_idx = obj.dateList.index(ref_date)

    # get list of boxes for block-by-block IO
    num_box = int(np.ceil((num_date * length * width * 4 * 2) / (max_memory * 1024**3)))
    box_list = split_box2sub_boxes(box=(0, 0, width, length),
                                   num_split=num_box,
                                   dimension='y',
                                   print_msg=True)

    # updating existing file or write new file
    if outfile == ts_file:
        mode = 'r+'
    else:
        mode = 'a'
        # instantiate output file
        writefile.layout_hdf5(outfile, ref_file=ts_file)

    # loop for block-by-block IO
    for i, box in enumerate(box_list):
        box_width  = box[2] - box[0]
        box_length = box[3] - box[1]
        if num_box > 1:
            print('\n------- processing patch {} out of {} --------------'.format(i+1, num_box))
            print('box width:  {}'.format(box_width))
            print('box length: {}'.format(box_length))

        # reading
        print('reading data ...')
        ts_data = readfile.read(ts_file, box=box)[0]

        print('referencing in time ...')
        dshape = ts_data.shape
        ts_data -= np.tile(ts_data[ref_idx, :, :].reshape(1, dshape[1], dshape[2]), (dshape[0], 1, 1))

        # writing
        block = (0, num_date, box[1], box[3], box[0], box[2])
        writefile.write_hdf5_block(outfile,
                                   data=ts_data,
                                   datasetName=dsName,
                                   block=block,
                                   mode=mode)

    # update metadata
    print('update "REF_DATE" attribute value to {}'.format(ref_date))
    with h5py.File(outfile, 'r+') as f:
        f.attrs['REF_DATE'] = ref_date
        f.attrs['FILE_PATH'] = outfile

    return outfile
Пример #21
0
def subset_file(fname, subset_dict_input, out_file=None):
    """Subset file with
    Inputs:
        fname        : str, path/name of file
        out_file     : str, path/name of output file
        subset_dict : dict, subsut parameter, including the following items:
                      subset_x   : list of 2 int,   subset in x direction,   default=None
                      subset_y   : list of 2 int,   subset in y direction,   default=None
                      subset_lat : list of 2 float, subset in lat direction, default=None
                      subset_lon : list of 2 float, subset in lon direction, default=None
                      fill_value : float, optional. filled value for area outside of data coverage. default=None
                                   None/not-existed to subset within data coverage only.
                      tight  : bool, tight subset or not, for lookup table file, i.e. geomap*.trans
    Outputs:
        out_file :  str, path/name of output file; 
                   out_file = 'subset_'+fname, if fname is in current directory;
                   out_file = fname, if fname is not in the current directory.
    """

    # Input File Info
    atr = readfile.read_attribute(fname)
    width = int(atr['WIDTH'])
    length = int(atr['LENGTH'])
    k = atr['FILE_TYPE']
    print('subset ' + k + ' file: ' + fname + ' ...')

    subset_dict = subset_dict_input.copy()
    # Read Subset Inputs into 4-tuple box in pixel and geo coord
    pix_box, geo_box = subset_input_dict2box(subset_dict, atr)

    coord = ut.coordinate(atr)
    # if fill_value exists and not None, subset data and fill assigned value for area out of its coverage.
    # otherwise, re-check subset to make sure it's within data coverage and initialize the matrix with np.nan
    outfill = False
    if 'fill_value' in subset_dict.keys() and subset_dict['fill_value']:
        outfill = True
    else:
        outfill = False
    if not outfill:
        pix_box = coord.check_box_within_data_coverage(pix_box)
        subset_dict['fill_value'] = np.nan

    geo_box = coord.box_pixel2geo(pix_box)
    data_box = (0, 0, width, length)
    print('data   range in (x0,y0,x1,y1): {}'.format(data_box))
    print('subset range in (x0,y0,x1,y1): {}'.format(pix_box))
    print('data   range in (W, N, E, S): {}'.format(
        coord.box_pixel2geo(data_box)))
    print('subset range in (W, N, E, S): {}'.format(geo_box))

    if pix_box == data_box:
        print('Subset range == data coverage, no need to subset. Skip.')
        return fname

    # Calculate Subset/Overlap Index
    pix_box4data, pix_box4subset = get_box_overlap_index(data_box, pix_box)

    ###########################  Data Read and Write  ######################
    # Output File Name
    if not out_file:
        if os.getcwd() == os.path.dirname(os.path.abspath(fname)):
            if 'tight' in subset_dict.keys() and subset_dict['tight']:
                out_file = '{}_tight{}'.format(
                    os.path.splitext(fname)[0],
                    os.path.splitext(fname)[1])
            else:
                out_file = 'sub_' + os.path.basename(fname)
        else:
            out_file = os.path.basename(fname)
    print('writing >>> ' + out_file)

    # update metadata
    atr = attr.update_attribute4subset(atr, pix_box)

    # subset datasets one by one
    dsNames = readfile.get_dataset_list(fname)
    maxDigit = max([len(i) for i in dsNames])

    ext = os.path.splitext(out_file)[1]
    if ext in ['.h5', '.he5']:
        # initiate the output file
        writefile.layout_hdf5(out_file, metadata=atr, ref_file=fname)

        # subset dataset one-by-one
        for dsName in dsNames:
            with h5py.File(fname, 'r') as fi:
                ds = fi[dsName]
                ds_shape = ds.shape
                ds_ndim = ds.ndim
                print('cropping {d} in {b} from {f} ...'.format(
                    d=dsName, b=pix_box4data, f=os.path.basename(fname)))

                if ds_ndim == 2:
                    # read
                    data = ds[pix_box4data[1]:pix_box4data[3],
                              pix_box4data[0]:pix_box4data[2]]

                    # crop
                    data_out = np.ones(
                        (pix_box[3] - pix_box[1], pix_box[2] - pix_box[0]),
                        data.dtype) * subset_dict['fill_value']
                    data_out[pix_box4subset[1]:pix_box4subset[3],
                             pix_box4subset[0]:pix_box4subset[2]] = data
                    data_out = np.array(data_out, dtype=data.dtype)

                    # write
                    block = [0, int(atr['LENGTH']), 0, int(atr['WIDTH'])]
                    writefile.write_hdf5_block(out_file,
                                               data=data_out,
                                               datasetName=dsName,
                                               block=block,
                                               print_msg=True)

                if ds_ndim == 3:
                    prog_bar = ptime.progressBar(maxValue=ds_shape[0])
                    for i in range(ds_shape[0]):
                        # read
                        data = ds[i, pix_box4data[1]:pix_box4data[3],
                                  pix_box4data[0]:pix_box4data[2]]

                        # crop
                        data_out = np.ones(
                            (1, pix_box[3] - pix_box[1],
                             pix_box[2] - pix_box[0]),
                            data.dtype) * subset_dict['fill_value']
                        data_out[:, pix_box4subset[1]:pix_box4subset[3],
                                 pix_box4subset[0]:pix_box4subset[2]] = data

                        # write
                        block = [
                            i, i + 1, 0,
                            int(atr['LENGTH']), 0,
                            int(atr['WIDTH'])
                        ]
                        writefile.write_hdf5_block(out_file,
                                                   data=data_out,
                                                   datasetName=dsName,
                                                   block=block,
                                                   print_msg=False)

                        prog_bar.update(i + 1,
                                        suffix='{}/{}'.format(
                                            i + 1, ds_shape[0]))
                    prog_bar.close()
                    print('finished writing to file: {}'.format(out_file))

    else:
        # IO for binary files
        dsDict = dict()
        for dsName in dsNames:
            dsDict[dsName] = subset_dataset(
                fname,
                dsName,
                pix_box,
                pix_box4data,
                pix_box4subset,
                fill_value=subset_dict['fill_value'])
        writefile.write(dsDict,
                        out_file=out_file,
                        metadata=atr,
                        ref_file=fname)

        # write extra metadata files for ISCE data files
        if os.path.isfile(fname + '.xml') or os.path.isfile(fname +
                                                            '.aux.xml'):
            # write ISCE XML file
            dtype_gdal = readfile.NUMPY2GDAL_DATATYPE[atr['DATA_TYPE']]
            dtype_isce = readfile.GDAL2ISCE_DATATYPE[dtype_gdal]
            writefile.write_isce_xml(out_file,
                                     width=int(atr['WIDTH']),
                                     length=int(atr['LENGTH']),
                                     bands=len(dsDict.keys()),
                                     data_type=dtype_isce,
                                     scheme=atr['scheme'],
                                     image_type=atr['FILE_TYPE'])
            print(f'write file: {out_file}.xml')

            # write GDAL VRT file
            if os.path.isfile(fname + '.vrt'):
                from isceobj.Util.ImageUtil import ImageLib as IML
                img = IML.loadImage(out_file)[0]
                img.renderVRT()
                print(f'write file: {out_file}.vrt')

    return out_file
Пример #22
0
def multilook_file(infile,
                   lks_y,
                   lks_x,
                   outfile=None,
                   method='average',
                   margin=[0, 0, 0, 0],
                   max_memory=4):
    """ Multilook input file
    Parameters: infile - str, path of input file to be multilooked.
                lks_y  - int, number of looks in y / row direction.
                lks_x  - int, number of looks in x / column direction.
                margin - list of 4 int, number of pixels to be skipped during multilooking.
                         useful for offset product, where the marginal pixels are ignored during
                         cross correlation matching.
                outfile - str, path of output file
    Returns:    outfile - str, path of output file
    """
    lks_y = int(lks_y)
    lks_x = int(lks_x)

    # input file info
    atr = readfile.read_attribute(infile)
    length, width = int(atr['LENGTH']), int(atr['WIDTH'])
    k = atr['FILE_TYPE']
    print('multilooking {} {} file: {}'.format(atr['PROCESSOR'], k, infile))
    print('number of looks in y / azimuth direction: %d' % lks_y)
    print('number of looks in x / range   direction: %d' % lks_x)
    print('multilook method: {}'.format(method))

    # margin --> box
    if margin is not [0, 0, 0, 0]:  # top, bottom, left, right
        box = (margin[2], margin[0], width - margin[3], length - margin[1])
        print(
            'number of pixels to skip in top/bottom/left/right boundaries: {}'.
            format(margin))
    else:
        box = (0, 0, width, length)

    # output file name
    ext = os.path.splitext(infile)[1]
    if not outfile:
        if os.getcwd() == os.path.dirname(os.path.abspath(infile)):
            outfile = os.path.splitext(infile)[0] + '_' + str(
                lks_y) + 'alks_' + str(lks_x) + 'rlks' + ext
        else:
            outfile = os.path.basename(infile)

    # update metadata
    atr = attr.update_attribute4multilook(atr, lks_y, lks_x, box=box)

    if ext in ['.h5', '.he5']:
        writefile.layout_hdf5(outfile, metadata=atr, ref_file=infile)

    # read source data and multilooking
    dsNames = readfile.get_dataset_list(infile)
    maxDigit = max([len(i) for i in dsNames])
    dsDict = dict()
    for dsName in dsNames:
        print('multilooking {d:<{w}} from {f} ...'.format(
            d=dsName, w=maxDigit, f=os.path.basename(infile)))

        # split in Y/row direction for IO for HDF5 only
        if ext in ['.h5', '.he5']:
            # calc step size with memory usage up to 4 GB
            with h5py.File(infile, 'r') as f:
                ds = f[dsName]
                ds_size = np.prod(ds.shape) * 4
            num_step = int(np.ceil(ds_size * 4 / (max_memory * 1024**3)))
            row_step = int(np.rint(length / num_step / 10) * 10)
            row_step = max(row_step, 10)

        else:
            row_step = box[3] - box[1]

        num_step = int(np.ceil((box[3] - box[1]) / (row_step * lks_y)))
        for i in range(num_step):
            r0 = box[1] + row_step * lks_y * i
            r1 = box[1] + row_step * lks_y * (i + 1)
            r1 = min(r1, box[3])
            # IO box
            box_i = (box[0], r0, box[2], r1)
            box_o = (int((box[0] - box[0]) / lks_x), int(
                (r0 - box[1]) / lks_y), int(
                    (box[2] - box[0]) / lks_x), int((r1 - box[1]) / lks_y))
            print('box: {}'.format(box_o))

            # read / multilook
            if method == 'nearest':
                data = readfile.read(infile,
                                     datasetName=dsName,
                                     box=box_i,
                                     xstep=lks_x,
                                     ystep=lks_y,
                                     print_msg=False)[0]

            else:
                data = readfile.read(infile,
                                     datasetName=dsName,
                                     box=box_i,
                                     print_msg=False)[0]

                data = multilook_data(data, lks_y, lks_x)

            # output block
            if data.ndim == 3:
                block = [
                    0, data.shape[0], box_o[1], box_o[3], box_o[0], box_o[2]
                ]
            else:
                block = [box_o[1], box_o[3], box_o[0], box_o[2]]

            # write
            if ext in ['.h5', '.he5']:
                writefile.write_hdf5_block(outfile,
                                           data=data,
                                           datasetName=dsName,
                                           block=block,
                                           print_msg=False)
            else:
                dsDict[dsName] = data

    # for binary file with 2 bands, always use BIL scheme
    if (len(dsDict.keys()) == 2
            and os.path.splitext(infile)[1] not in ['.h5', '.he5']
            and atr.get('scheme', 'BIL').upper() != 'BIL'):
        print('the input binary file has 2 bands with band interleave as: {}'.
              format(atr['scheme']))
        print(
            'for the output binary file, change the band interleave to BIL as default.'
        )
        atr['scheme'] = 'BIL'

    if ext not in ['.h5', '.he5']:
        writefile.write(dsDict,
                        out_file=outfile,
                        metadata=atr,
                        ref_file=infile)

        # write extra metadata files for ISCE data files
        if os.path.isfile(infile + '.xml') or os.path.isfile(infile +
                                                             '.aux.xml'):
            # write ISCE XML file
            dtype_gdal = readfile.NUMPY2GDAL_DATATYPE[atr['DATA_TYPE']]
            dtype_isce = readfile.GDAL2ISCE_DATATYPE[dtype_gdal]
            writefile.write_isce_xml(outfile,
                                     width=int(atr['WIDTH']),
                                     length=int(atr['LENGTH']),
                                     bands=len(dsDict.keys()),
                                     data_type=dtype_isce,
                                     scheme=atr['scheme'],
                                     image_type=atr['FILE_TYPE'])
            print(f'write file: {outfile}.xml')

            # write GDAL VRT file
            if os.path.isfile(infile + '.vrt'):
                from isceobj.Util.ImageUtil import ImageLib as IML
                img = IML.loadImage(outfile)[0]
                img.renderVRT()
                print(f'write file: {outfile}.vrt')

    return outfile