示例#1
0
def manager(tile, **kwargs):
    #SETUP VARIBLES
    info = kwargs.get('info', True)
    years = kwargs.get('years', None)
    outpath = kwargs.get('outpath', None)

    #GET IMAGE INFO
    for y in years:
        name = tile + '_' + y

        featurepath = fm.check_folder(outpath, name, 'Features')
        fn = [f for f in os.listdir(featurepath) if f.endswith('.tif')]
        if len(fn) == 0:
            raise IOError('Unable to find input data!')

    img = fm.readGeoTIFFD(fm.joinpath(featurepath, fn[0]), metadata=False)
    height, width, totfeatures = img.shape

    #CHECK TS DATA
    for y in years:
        for feature in range(totfeatures):
            n1 = tile + '_' + y
            n2 = 'NDI' + str(feature + 1)
            tspath = fm.check_folder(outpath, n1, 'NDI_TimeSeries', n2)
            if not os.path.exists(fm.joinpath(tspath, 'ts.h5')):
                raise IOError('Unable to find input data!')

    #PREPARE PARAMETERS
    height = str(height)
    width = str(width)
    startyear = str(years[0])
    endyear = str(years[-1])
    frequency = str(kwargs.get('frequency', 365))
    tile = str(tile)
    batchsize = str(kwargs.get('batchsize', 200))

    for feature in range(totfeatures):
        if info:
            print('Change detection for feature %i/%i...' %
                  ((feature + 1), totfeatures),
                  end='\r')

        feature = str(feature + 1)

        # rscript libs/ToolboxModules/callbfast.R height width startyear endyear frequency tile feature batchsize outpath
        process = subprocess.run([
            'rscript', 'libs/ToolboxModules/callbfast.R', height, width,
            startyear, endyear, frequency, tile, feature, batchsize, outpath
        ],
                                 stdout=subprocess.PIPE,
                                 universal_newlines=True)
示例#2
0
def manager(tile, **kwargs):
    #SETUP VARIBLES
    info = kwargs.get('info', True)
    year = kwargs.get('year', None)
    savepath = fm.check_folder(kwargs.get('savepath', None), 'Features')

    #GET FEATURES
    yearts, _, _ = tile.gettimeseries(year=year, option='farming')
    _feature(yearts, savepath, **kwargs)
示例#3
0
def load_block_DTW_multi(seed_class_mask, max_d, nc, nc1, savepath):

    mask = (seed_class_mask == nc)
    mask1 = (seed_class_mask == nc1)

    path = fm.check_folder(savepath, "Multifeature", 'DTW_matrix')
    with h5py.File(filename, 'r') as hf:
        simi_c_W = np.array(hf["DTW_matrix"][mask1, mask])
        simi_c_C = np.array(hf["DTW_matrix"][mask, mask])

    simi_c_W = np.negative(simi_c_W - max_d) / max_d
    simi_c_C = np.negative(simi_c_C - max_d) / max_d

    return simi_c_W, simi_c_C
示例#4
0
def main(datapath, **kwargs):

    from libs.RSdatamanager.Sentinel2.S2L2A import L2Atile, getTileList
    from libs.ToolboxModules import featurext as m1
    from libs.ToolboxModules import featurets as m2
    from libs.ToolboxModules import trendanalysis as m3
    from libs.ToolboxModules import LandCoverTraining as m4
    from libs.ToolboxModules import LCclassificationAndCD as m5

    #PREPARE SOME TOOLBOX PARAMETERS
    tilenames = kwargs['options'].get('tilenames', None)
    years = kwargs['options'].get('years', None)
    maindir = kwargs['options'].get('maindir', None)
    outpath = kwargs['options'].get('outpath', None)
    deltemp = kwargs['options'].get('deltemp', True)

    module1 = kwargs['module1'].get('run', False)
    module2 = kwargs['module2'].get('run', False)
    module3 = kwargs['module3'].get('run', False)
    module4 = kwargs['module4'].get('run', False)
    module5 = kwargs['module5'].get('run', False)

    if (module1 or module2):
        #READ DATASETS
        tiledict = getTileList(datapath)
        keys = tiledict.keys()

        for k in keys:
            if k in tilenames:
                tileDatapath = tiledict[k]
                print("Reading Tile-%s." % (k))
                tile = L2Atile(maindir, tileDatapath)

                for y in years:
                    #UPDATE OPTIONS
                    name = k + '_' + y
                    update = {
                        'year': y,
                        'savepath': fm.check_folder(outpath, name)
                    }

                    if module1:
                        #MODULE 1
                        t_mod1 = time.time()
                        options = kwargs.get('module1', {})
                        options.update(update)
                        m1.manager(tile, **options)
                        t_mod1 = (time.time() - t_mod1) / 60
                        print(
                            "MOD1 TIME = %imin                                        "
                            % (int(t_mod1)))

                    elif module2:
                        #MODULE 2
                        t_mod2 = time.time()
                        options = kwargs.get('module2', {})
                        options.update(update)
                        m2.manager(k, **options)
                        t_mod2 = (time.time() - t_mod2) / 60
                        print(
                            "MOD2 TIME = %imin                                        "
                            % (int(t_mod2)))

                #DELETE TILE-TEMPPATH CONTENT
                if deltemp:
                    flag = shutil.rmtree(tile.temppath())
                    if flag == None:
                        print(
                            "Temporary File Content of Tile-%s has been successfully removed!"
                            % (k))

    elif module3:
        for k in tilenames:
            #MODULE 3
            t_mod3 = time.time()
            options = kwargs.get('module3', {})
            m3.manager(k, **options)
            t_mod3 = (time.time() - t_mod3) / 60
            print("MOD3 TIME = %imin                                        " %
                  (int(t_mod3)))

    elif module4:
        for k in tilenames:
            #MODULE 4
            t_mod4 = time.time()
            options = kwargs.get('module4', {})
            m4.manager(k, **options)
            t_mod4 = (time.time() - t_mod4) / 60
            print("MOD4 TIME = %imin                                        " %
                  (int(t_mod4)))

    elif module5:
        for k in tilenames:
            #MODULE 5
            t_mod5 = time.time()
            options = kwargs.get('module5', {})
            m5.manager(k, **options)
            t_mod5 = (time.time() - t_mod5) / 60
            print("MOD5 TIME = %imin                                        " %
                  (int(t_mod5)))
示例#5
0
    module2 = args.module2
    module3 = args.module3
    module4 = args.module4
    module5 = args.module5

    #READ INITIALIZATION FILE AND SETUP OPTIONS
    config = configparser.ConfigParser()
    config.read(configfile)

    datapath = fm.formatPath(config['Paths']['data_path'])

    options = {
        'tilenames': config['Data']['tilenames'].split(','),
        'years': config['Data']['years'].split(','),
        'maindir': fm.formatPath(config['Paths']['main_dir']),
        'outpath': fm.check_folder(config['Paths']['output_path']),
        'info': True,
        'deltemp': False
    }

    m1options = {}
    m1options.update(options)
    m1options['run'] = module1

    m2options = {}
    m2options.update(options)
    m2options['run'] = module2
    m2options['blocksize'] = int(config['Module2']['blocksize'])
    m2options['mappath'] = fm.formatPath(config['Paths']['LC_path'])

    m3options = {}
def manager(tile, **kwargs):
    #SETUP VARIBLES  
    info = kwargs.get('info', True)
    years = kwargs.get('years', None)
    outpath = kwargs.get('outpath', None)
    loadpath = '' #TODO: where is the test data?
    savepath = fm.check_folder(outpath, tile, 'LCclassificationAndCD')

    blocksize = kwargs.get('blocksize', 200)
    n_classes = kwargs.get('n_classes', 9)
    DTW_max_samp = kwargs.get('DTW_max_samp', 15)   # max number of samples of DTW
    MAX_CD = kwargs.get('MAX_CD', 1)                # max number of detected changes

    col_nPIXEL = 0
    col_nCLASS = 1
    col_nBAND  = 2
    col_DATA = 3


    ###############################
    # GET INFO AND INITIALIZATION #
    ###############################
    for rootname, _, filenames in os.walk(loadpath):
        for f in filenames:
            if (f.endswith('.tif')):
                path = fm.joinpath(rootname, f) 
    img = fm.readGeoTIFFD(path, metadata=False)
    width, height, totfeature = img.shape

    for rootname, _, filenames in os.walk(loadpath):
        for f in filenames:
            if (f.endswith('ts.h5')):
                path = fm.joinpath(rootname)
    with h5py.File(fm.joinpath(path,f), 'r') as hf:
        NDI_ = np.array(hf["ts"])

    
    #Get classes intervals
    class_int = np.zeros(n_classes)
    class_int_mask = np.unique(NDI_[:,col_nCLASS]).astype(int).tolist()
    for n in class_int_mask:
        class_int[n-1] = n
    class_int = class_int.astype(int).tolist()

    #Get number of seeds
    n_seeds = len(np.unique(NDI_[:,col_nPIXEL]))
    
    #Get number of features
    n_features = totfeature

    #Get number of seeds per class and class seeds mask
    n_seeds_c = np.zeros(n_classes)
    for nc in class_int:
        n_seeds_c[nc-1] = np.size(NDI_[NDI_[:,col_nCLASS]==nc, :], axis=0)
    n_seeds_c = n_seeds_c.astype(int).tolist()

    seed_class_mask = NDI_[:,col_nCLASS]

    #Define blocksize
    nseeds_b = blocksize


    #Multi feature DTW maximum distance
    path = fm.check_folder(outpath, tile, 'LCTraining_DTW', 'Multifeature')

    DTW_max_d = 0
    for b1 in range(0, n_seeds, nseeds_b):
        for b2 in range(0, n_seeds, nseeds_b):
            with h5py.File(filename, 'r') as hf:
                max_d_block = np.nanmax(np.array(hf["DTW_matrix"][b1:b1+nseeds_b, b2:b2+nseeds_b]))
                if max_d_block > DTW_max_d:
                    DTW_max_d = max_d_block

    #Loading the models
    path = fm.check_folder(outpath, tile, 'LCTraining_DTW')
    models = np.load(fm.joinpath(path, "models.npy")) #TODO: npy or h5?

    
    ############################
    # LC CLASSIFICATION AND CD #
    ############################
    #Time array definition
    t_seq_st = np.array([1, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73])
    t_seq_en = np.array([366, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73])
    t_seq_st = np.cumsum(t_seq_st)
    t_seq_en = np.cumsum(t_seq_en)
    
    #Similarity trends computation and classification
    Test_simi_traj = [None]*n_seeds
    LC_seq = [None]*n_seeds

    for ns in range(n_seeds):
        Traj1 = None
        for nb, band in enumerate(n_features):
            Seeds = load_seeds(tile, ns, nb, col_DATA, **kwargs)
            if Traj1 is None:
                Traj1 = np.zeros((len(n_features), len(Seeds[col_DATA:])))
                Traj1[nb,:] = Seeds[col_DATA:]
            else:
                Traj1[nb,:] = Seeds[col_DATA:]

        pixnr = Seeds[col_nPIXEL]
        Test_simi_traj[pixnr] = np.empty((n_classes, np.size(t_seq_st)))
        LC_seq[pixnr] = np.empty((2, np.size(t_seq_st)))

        for ts in range(np.size(t_seq_st)):
            Traj1_T = Traj1[:, t_seq_st[ts]:t_seq_en[ts]]
            Traj1_T = np.roll(Traj1_T, 73*ts, axis=1)

            for nc in range(n_classes):
                max_simi = 0

                for nm in range(len(models[nc])):
                    Traj2 = models[nc][nm]
                    simi = (DTW_max_d - DTW(Traj1_T, Traj2, DTW_max_samp=DTW_max_samp)) / DTW_max_d #TODO: distance_fast
                    max_simi = max(max_simi, simi)

                Test_simi_traj[pixnr][nc,ts] = max_simi

            LC_seq[pixnr][0,ts] = np.argmax(Test_simi_traj[ns][:,ts]) + 1      # +1 number of class vs index
    
    #Stability rule application
    CD_counter = np.empty(n_seeds)
    break_p = np.empty((n_seeds, MAX_CD))
    LC_seq_bp = np.empty((n_seeds, MAX_CD+1))

    for ns in range(n_seeds):
        counter = 0

        for ts in range(np.size(t_seq_st)):
            if ts == 0:
                LC_seq[ns][1,ts] = LC_seq[ns][0,ts]
            else:
                if (LC_seq[ns][0,ts] == LC_seq[ns][0,ts-1]) and (counter == 0):
                    LC_seq[ns][1,ts] = LC_seq[ns][1,ts-1]
                elif LC_seq[ns][0,ts] != LC_seq[ns][0,ts-1]:
                    LC_seq[ns][1,ts] = LC_seq[ns][1,ts-1]
                    counter = 1
                elif LC_seq[ns][0,ts] == LC_seq[ns][0,ts-1]:
                    counter = counter + 1
                    if counter<4:
                        LC_seq[ns][1,ts] = LC_seq[ns][1,ts-1]
                    else:
                        LC_seq[ns][1,ts-3] = LC_seq[ns][0,ts]
                        LC_seq[ns][1,ts-2] = LC_seq[ns][0,ts]
                        LC_seq[ns][1,ts-1] = LC_seq[ns][0,ts]
                        LC_seq[ns][1,ts] = LC_seq[ns][0,ts]
                        counter = 0

        CD_counter[ns] = 0

        for ts in range(1, np.size(t_seq_st)):
            if LC_seq[ns][1,ts] != LC_seq[ns][1,ts-1]:
                CD_counter[ns] = CD_counter[ns] + 1
                if CD_counter[ns] <= MAX_CD:
                    break_p[ns, CD_counter[ns]-1] = ts #TODO: -1?
                    LC_seq_bp[ns, CD_counter[ns]-1] = LC_seq[ns][1,ts-1]
                    LC_seq_bp[ns, CD_counter[ns]] = LC_seq[ns][1,ts]

        if CD_counter[ns] == 0:
            break_p[ns,0] = 0
            LC_seq_bp[ns,0] = LC_seq[ns][1,0]
            LC_seq_bp[ns,1] = LC_seq[ns][1,0]

    np.save(fm.joinpath(savepath, "LC_seq.npy"), LC_seq)
    np.save(fm.joinpath(savepath, "Test_simi_traj.npy"), Test_simi_traj)

    #Output maps
    nyears = len(years)
    outmaps = [None]*nyears
    for ny in range(nyears):
        outmaps[ny] = np.zeros((height, width, 2))

    for row in range(height):
        for col in range(width):
            ns = width*row + col

            if break_p[ns,0] == 0:
                pass
            else:
                z = break_p[ns,0]
                start_z = t_seq_st[z]
                end_z = t_seq_en[z]
                int_z = np.arange(start_z, end_z)
                int_z = np.ceil(int_z/365)

                for ny in range(nyears):
                    perc = np.sum(int_z[int_z == (ny+1)]) / (365*(ny+1))
                    perc = perc*100
示例#7
0
def manager(tile, **kwargs):
    #SETUP DEFAULT OPTIONS
    info = kwargs.get('info', True)
    years = kwargs.get('years', None)
    outpath = kwargs.get('outpath', None)
    savepath = fm.check_folder(outpath, tile, 'LCTraining_DTW')

    blocksize = kwargs.get('blocksize', 500)
    n_classes = kwargs.get('n_classes', 9)
    multiprocessing = kwargs.get('multiprocessing', True)
    weekly = kwargs.get('weekly', True)

    singlefeaturedtw = kwargs.get('singlefeaturedtw', False)
    featureselection = kwargs.get('featureselection', False)
    multifeatureDTW = kwargs.get('multifeatureDTW', False)
    similarity = kwargs.get('similarity', False)
    classprototypes = kwargs.get('classprototypes', False)

    DTW_max_samp = kwargs.get('DTW_max_samp',
                              15)  # max number of samples of DTW

    col_nPIXEL = 0
    col_nCLASS = 1
    col_nBAND = 2
    col_DATA = 3

    ###############################
    # GET INFO AND INITIALIZATION #
    ###############################
    for rootname, _, filenames in os.walk(outpath):
        for f in filenames:
            if (f.endswith('.tif')):
                loadpath = fm.joinpath(rootname, f)
    img = fm.readGeoTIFFD(loadpath, metadata=False)
    width, height, totfeature = img.shape

    for rootname, _, filenames in os.walk(outpath):
        for f in filenames:
            if (f.endswith('ts.h5')):
                loadpath = fm.joinpath(rootname, f)

    with h5py.File(loadpath, 'r') as hf:
        NDI_ = np.array(hf["ts"])

    #Get classes intervals
    class_int = np.zeros(n_classes)
    class_int_mask = np.unique(NDI_[:, col_nCLASS]).astype(int).tolist()
    for n in class_int_mask:
        class_int[n - 1] = n
    class_int = class_int.astype(int).tolist()

    #Get number of seeds
    n_seeds = len(np.unique(NDI_[:, col_nPIXEL]))

    #Get number of features
    n_features = totfeature

    #Get number of seeds per class and class seeds mask
    n_seeds_c = np.zeros(n_classes)
    for nc in class_int:
        n_seeds_c[nc - 1] = np.size(NDI_[NDI_[:, col_nCLASS] == nc, :], axis=0)
    n_seeds_c = n_seeds_c.astype(int).tolist()

    seed_class_mask = NDI_[:, col_nCLASS]

    #Define blocksize
    nseeds_b = blocksize

    #Space of analysis parameters
    min_perc_samp_V = np.arange(
        1, 0.64, -0.03).tolist()  # minimum percentage of total used samples
    min_perc_samp_mod_V = np.ones(12, dtype=float) / np.arange(
        1, 13)  # minimum percentage of used samples per model
    min_perc_samp_mod_V = min_perc_samp_mod_V.tolist()

    sepa_b_vs_b = np.zeros((12, 12, n_features))

    ##########################################
    # SINGLE FEATURE DTW SIMILARITY MATRICES #
    ##########################################
    if singlefeaturedtw:

        for feature in range(n_features):
            if info:
                t_start = time.time()
                print('Computing DTW feature %i/%i...' %
                      ((feature + 1), n_features),
                      end='\r')

            path = fm.check_folder(savepath, "Singlefeature",
                                   'DTW_matrix_B' + str(feature + 1))

            for b1 in range(0, n_seeds, nseeds_b):
                Seeds_B_B1 = load_block(tile, b1, feature, col_DATA, **kwargs)
                for b2 in range(0, n_seeds, nseeds_b):
                    Seeds_B_B2 = load_block(tile, b2, feature, col_DATA,
                                            **kwargs)
                    singledtw(Seeds_B_B1, Seeds_B_B2, b1, b2, nseeds_b,
                              n_seeds, path, **kwargs)

            if info:
                t_end = time.time()
                print(
                    '\nMODULE 4: calculating DTW for %ith feature..Took %i' %
                    (feature + 1, (t_end - t_start) / 60), 'min')

    #Single feature DTW maximum distance
    DTW_max_d_B = np.zeros(n_features)

    for feature in range(n_features):
        path = fm.check_folder(savepath, "Singlefeature",
                               'DTW_matrix_B' + str(feature + 1))
        filename = fm.joinpath(path, 'DTW_matrix_B.h5')

        max_d = 0
        for b1 in range(0, n_seeds, nseeds_b):
            for b2 in range(0, n_seeds, nseeds_b):
                with h5py.File(filename, 'r') as hf:
                    block = np.array(hf["DTW_matrix_B"][b1:b1 + nseeds_b,
                                                        b2:b2 + nseeds_b])
                    max_d_block = np.nanmax(block[block != np.inf])
                    if max_d_block > max_d:
                        max_d = max_d_block

        DTW_max_d_B[feature] = max_d

    ######################################################
    # FEATURE SPACE ANALYSIS AND FEATURE SPACE REDUCTION #
    ######################################################
    if featureselection:

        for feature in range(n_features):
            if info:
                t_start = time.time()
                print('Feature %i/%i...' % ((feature + 1), n_features),
                      end='\r')

            sepa_c_vs_c = np.zeros((12, 12))
            sepa_c_vs_c_N = np.zeros((12, 12))

            for i, nc in enumerate(class_int_mask):
                c_r = np.delete(class_int_mask, i).tolist()
                for nc1 in c_r:
                    simi_c_W, simi_c_C = load_block_DTW(
                        seed_class_mask, feature, DTW_max_d_B[feature], nc,
                        nc1, savepath)

                    for col_i, min_perc_samp in enumerate(min_perc_samp_V):
                        for row_i, min_perc_samp_mod in enumerate(
                                min_perc_samp_mod_V):
                            sepa_mea = np.zeros(n_seeds_c[nc - 1])
                            for nsc in range(n_seeds_c[nc - 1]):
                                simi_c_C_s = simi_c_C[:, nsc]
                                simi_c_C_s = simi_c_C_s[~np.isnan(simi_c_C_s)]
                                simi_c_C_s = sorted(simi_c_C_s, reverse=True)
                                simi_c_C_s = simi_c_C_s[
                                    0:math.ceil(n_seeds_c[nc - 1] *
                                                min_perc_samp_mod *
                                                min_perc_samp)]
                                simi_c_W_s = simi_c_W[:, nsc]
                                simi_c_W_s = sorted(simi_c_W_s, reverse=True)
                                simi_c_W_s = simi_c_W_s[
                                    0:math.ceil(n_seeds_c[nc - 1] *
                                                min_perc_samp_mod *
                                                min_perc_samp)]
                                pd_C_mu, pd_C_sigma = scipy.stats.distributions.norm.fit(
                                    simi_c_C_s)
                                pd_W_mu, pd_W_sigma = scipy.stats.distributions.norm.fit(
                                    simi_c_W_s)
                                if pd_C_mu <= pd_W_mu:
                                    sepa_mea[nsc] = np.nan
                                else:
                                    sepa_mea[nsc] = (pd_C_mu - pd_W_mu) / (
                                        pd_C_sigma + pd_W_sigma)

                            if (sepa_mea[~np.isnan(sepa_mea)]).size / (
                                    n_seeds_c[nc - 1]) >= min_perc_samp:
                                sepa_c_vs_c[row_i, col_i] = sepa_c_vs_c[
                                    row_i, col_i] + np.mean(
                                        sepa_mea[~np.isnan(sepa_mea)])
                                sepa_c_vs_c_N[row_i,
                                              col_i] = sepa_c_vs_c_N[row_i,
                                                                     col_i] + 1

            sepa_b_vs_b[..., feature] = sepa_c_vs_c * sepa_c_vs_c_N

            if info:
                t_end = time.time()
                print(
                    '\nMODULE 4: feature selection for %i th feature..Took %i'
                    % (feature + 1, t_end - t_start / 60), 'min')

        np.save(fm.joinpath(savepath, "sepa_b_vs_b.npy"), sepa_b_vs_b)

    #Search for Class Cluster Parameters
    # select_bands = np.load(fm.joinpath(savepath, "select_bands.npy"))
    sepa_b_vs_b = np.load(fm.joinpath(savepath, "sepa_b_vs_b.npy"))
    # select_bands = select_bands.astype(int).tolist()
    sepa_FS = np.zeros((12, 12))
    for nb in range(n_features):
        sepa_FS = sepa_FS + sepa_b_vs_b[:, :, nb]

    mean_sepa_FS = np.mean(sepa_FS, axis=1)
    max_sepa_pos_samp_x_mod_FS = np.argmax(mean_sepa_FS)
    mean_sepa_max_v_FS = sepa_FS[max_sepa_pos_samp_x_mod_FS, :]
    mean_sepa_max_v_derivate_FS = np.diff(mean_sepa_max_v_FS)
    mean_sepa_max_v_derivate_FS = mean_sepa_max_v_derivate_FS / np.max(
        mean_sepa_max_v_derivate_FS)
    mean_sepa_max_v_derivate_FS = mean_sepa_max_v_derivate_FS * mean_sepa_max_v_FS[
        1:]

    max_sepa_pos_perc_samp_FS = np.argmax(mean_sepa_max_v_derivate_FS)
    max_sepa_pos_perc_samp_FS = max_sepa_pos_perc_samp_FS + 1

    min_perc_samp = min_perc_samp_V[max_sepa_pos_perc_samp_FS]
    min_perc_samp_mod = min_perc_samp_V[
        max_sepa_pos_perc_samp_FS] * min_perc_samp_mod_V[
            max_sepa_pos_samp_x_mod_FS]
    max_mod_class = np.round(min_perc_samp_V[max_sepa_pos_perc_samp_FS] /
                             min_perc_samp_mod)

    #######################################
    # MULTI FEATURE DTW SIMILARITY MATRIX #
    #######################################
    if multifeatureDTW:

        if info:
            t_start = time.time()
            print('Computing multifeature DTW ...', end='\r')

        # select_bands = np.load(fm.joinpath(savepath, "select_bands.npy"))
        # select_bands = select_bands.astype(int).tolist()

        path = fm.check_folder(savepath, 'Multifeature')

        for b1 in range(0, n_seeds, nseeds_b):
            Seeds_B1 = load_block_multifeature(tile, b1, n_features, col_DATA,
                                               **kwargs)
            for b2 in range(0, n_seeds, nseeds_b):
                Seeds_B2 = load_block_multifeature(tile, b1, n_features,
                                                   col_DATA, **kwargs)
                multidtw(Seeds_B1, Seeds_B2, b1, b2, nseeds_b, n_seeds, path,
                         **kwargs)

        if info:
            t_end = time.time()
            print(
                '\nMODULE 4: calculating multifeature DTW ...Took %i' %
                ((t_end - t_start) / 60), 'min')

    #Multi feature DTW maximum distance
    path = fm.check_folder(savepath, 'Multifeature')
    filename = fm.joinpath(path, 'DTW_matrix.h5')

    DTW_max_d = 0
    for b1 in range(0, n_seeds, nseeds_b):
        for b2 in range(0, n_seeds, nseeds_b):
            with h5py.File(filename, 'r') as hf:
                block = np.array(hf["DTW_matrix"][b1:b1 + nseeds_b,
                                                  b2:b2 + nseeds_b])
                max_d_block = np.nanmax(block[block != np.inf])
                if max_d_block > DTW_max_d:
                    DTW_max_d = max_d_block

    #######################
    # SIMILARITY ANALYSIS #
    #######################
    if similarity:

        simi_high = kwargs.get('simi_high', 1)  # high similarity measure
        simi_decr = kwargs.get('simi_decr',
                               0.001)  # decrese value of similarity measure

        min_c_vs_c = np.zeros((len(class_int_mask), len(class_int_mask) - 1))
        max_c_vs_c = np.zeros((len(class_int_mask), len(class_int_mask) - 1))
        mean_c_vs_c = np.zeros((len(class_int_mask), len(class_int_mask) - 1))
        simi_low = np.zeros((len(class_int_mask)))

        for i, nc in enumerate(class_int_mask):
            c_r = np.delete(class_int_mask, i).tolist()
            for n, nc1 in enumerate(c_r):
                simi_c_W, simi_c_C = load_block_DTW_multi(
                    seed_class_mask, DTW_max_d, nc, nc1, savepath)

                min_c_s = np.zeros((n_seeds_c[nc - 1]))
                max_c_s = np.zeros((n_seeds_c[nc - 1]))
                for nsc in range(n_seeds_c[nc - 1]):
                    simi_c_C_s = simi_c_C[:, nsc]
                    simi_c_C_s = simi_c_C_s[~np.isnan(simi_c_C_s)]
                    simi_c_C_s = sorted(simi_c_C_s, reverse=True)
                    simi_c_C_s = simi_c_C_s[0:math.ceil(n_seeds_c[nc - 1] *
                                                        min_perc_samp_mod *
                                                        min_perc_samp)]
                    simi_c_W_s = simi_c_W[:, nsc]
                    simi_c_W_s = sorted(simi_c_W_s, reverse=True)
                    simi_c_W_s = simi_c_W_s[0:math.ceil(n_seeds_c[nc - 1] *
                                                        min_perc_samp_mod *
                                                        min_perc_samp)]
                    pd_C_mu, pd_C_sigma = scipy.stats.distributions.norm.fit(
                        simi_c_C_s)
                    pd_W_mu, pd_W_sigma = scipy.stats.distributions.norm.fit(
                        simi_c_W_s)
                    if pd_C_mu <= pd_W_mu:
                        min_c_s[nsc] = np.nan
                    else:
                        a = scipy.stats.norm(pd_C_mu, pd_C_sigma).pdf(
                            np.arange(0, 1, simi_decr))
                        b = scipy.stats.norm(pd_W_mu, pd_W_sigma).pdf(
                            np.arange(0, 1, simi_decr))
                        for int_mu in np.int64(
                                np.arange(np.floor(pd_W_mu * (1 / simi_decr)),
                                          (math.ceil(pd_C_mu *
                                                     (1 / simi_decr)) + 1),
                                          1000 * simi_decr)):
                            if (round(b[int_mu - 1], 1) -
                                    round(a[int_mu - 1], 1) <= 0):
                                min_c_s[nsc] = int_mu * simi_decr
                                break
                            else:
                                min_c_s[nsc] = np.nan

                        for int_mu in np.flipud(
                                np.int64(
                                    np.arange(
                                        np.floor(pd_W_mu * (1 / simi_decr)),
                                        (math.ceil(pd_C_mu *
                                                   (1 / simi_decr)) + 1),
                                        1000 * simi_decr))):
                            if (round(a[int_mu - 1], 1) -
                                    round(b[int_mu - 1], 1) <= 0):
                                max_c_s[nsc] = int_mu * simi_decr
                                break
                            else:
                                max_c_s[nsc] = np.nan

                min_c_vs_c[i, n] = np.mean(min_c_s[~np.isnan(min_c_s)])
                max_c_vs_c[i, n] = np.mean(max_c_s[~np.isnan(max_c_s)])
                mean_c_vs_c[i, n] = min_c_vs_c[
                    i, n]  #mean([min_c_vs_c(nc,nc1) max_c_vs_c(nc,nc1)])

            simi_low[i] = np.max(mean_c_vs_c[i, :])

        np.save(fm.joinpath(savepath, "simi_low.npy"), simi_low)

    ###############################
    # CLASS PROTOTYPES GENERATION #
    ###############################
    if classprototypes:

        pass_table = np.zeros(n_classes)  # array of pass/no pass
        models_C = [None] * 9  # variable that contains the models seeds
        used_models = np.zeros(
            n_classes)  # array of number of model used per class
        used_samples_perc = np.zeros(
            n_classes)  # array of used samples per class
        used_simi = np.zeros(n_classes)  # array of used similarity per class

        for i, nc in enumerate(class_int_mask):
            max_s = 1  # set max similarity = 1
            min_s = 0  #simi_low(nc);   # set min similarity

            while pass_table[nc - 1] == 0:
                _, dist_simi_c = load_block_DTW_multi(seed_class_mask,
                                                      DTW_max_d, nc, nc,
                                                      savepath)

                count_simi_c = (
                    dist_simi_c > max_s
                )  # check class seed with a similarity major then the threshold
                mean_simi_c = np.empty(
                    (n_seeds_c[nc - 1]
                     )) * np.nan  # initializate the similarity mean value

                # compute the mean similarity value per seed for each accepted other seed
                for nsc in range(n_seeds_c[nc - 1]):
                    mean_simi_c[nsc] = np.mean(dist_simi_c[count_simi_c[:,
                                                                        nsc],
                                                           nsc])

                # form a matrix with [seed ID | number of accepted seeds | mean similarity for accepted seeds]
                simi_order = np.column_stack([
                    np.arange(0, n_seeds_c[nc - 1], 1),
                    np.sum(count_simi_c, axis=0), mean_simi_c
                ])

                # order the seeds
                simi_order = simi_order[np.argsort(-simi_order[:, 0])]
                simi_order = np.array(
                    simi_order[np.argsort(-simi_order[:, 0])], dtype=int)
                #simi_order = sorted(simi_order, key=lambda x : x[0], reverse=True)

                models = []  # initialize the models

                for nsc in range(n_seeds_c[nc - 1]):
                    n_mod = len(models)  #number of exist models

                    if n_mod == 0:  # if the number of models is zero, just insert the initial seed
                        models.append(simi_order[nsc, 0])

                    else:  # else check if any model can accept the new seed
                        simi = np.zeros(
                            (n_mod, 3))  #initialize the similarity matrix

                        # for each model check if all seed can accept the new one
                        for nm in range(n_mod):
                            seed_int = models[nm]  # get seed ID interval
                            # form a matrix with [model ID | acceptance value | mean similarity between new seed and model seeds]
                            simi[nm, :] = [
                                nm,
                                np.sum((dist_simi_c[simi_order[nsc, 0],
                                                    seed_int] > max_s) * 1) >=
                                (np.ceil(np.size(seed_int) * 1)),
                                np.mean(dist_simi_c[simi_order[nsc, 0],
                                                    seed_int])
                            ]

                        # sort the similarity matrix to get the most similar model
                        simi = np.array(simi[np.argsort(-simi[:, 2])],
                                        dtype=int)

                        if simi[0,
                                1] == 1:  # if the first model can accept the new seed, insert it
                            models[simi[0, 0]] = list(
                                flatten(
                                    [models[simi[0, 0]], simi_order[nsc, 0]]))

                        else:  # otherwise create a new model and insert the seed
                            models.append(simi_order[nsc, 0])

                n_mod = np.size(models, 0)  # get number of models
                # delete models with a percentage of seed lower than the threshold
                for nm in range(n_mod):
                    if np.size(models[nm]) < math.ceil(
                            n_seeds_c[nc - 1] * min_perc_samp_mod):
                        models[nm] = []

                models = list(filter(None, models))

                u_models = len(models)  # get number of used models
                u_samples = np.zeros(
                    u_models)  # initialized the percentage of used seeds
                # compute the percentage of used seeds
                for um in range(u_models):
                    u_samples[um] = np.size(models[um])
                u_samples = (np.sum(u_samples)) / (n_seeds_c[nc - 1])

                # if the pass condition are respected update the output matrixes
                if ((u_models <= max_mod_class)
                        and (bool(u_samples >= min_perc_samp))):
                    pass_table[nc - 1] = 1
                    models_C[nc - 1] = models
                    used_models[nc - 1] = u_models
                    used_samples_perc[nc - 1] = u_samples
                    used_simi[nc - 1] = max_s
                else:
                    if ((max_s > min_s) and (max_s > simi_decr)
                        ):  # otherwise decrease the similarity threshold
                        max_s = max_s - simi_decr
                        print(max_s)
                    else:  # or if not possible put in the pass table a false value
                        pass_table[nc - 1] = 2

        # class prototypes creation
        models = [[[] for _ in range(len(n_features))]
                  for _ in range(n_classes)]
        for nc in (class_int_mask):
            for nb_o, nb in enumerate(n_features):
                n_mod = np.size(models_C[nc - 1])
                Seeds_FR, Seeds_F = load_Seeds_FR(tile, nb, col_DATA, **kwargs)
                m1 = Seeds_F[:, col_nCLASS] == nc
                m2 = Seeds_F[:, col_nBAND] == nb
                m3 = np.logical_and(m1, m2)
                TABLE_cb = Seeds_FR[m3, :]
                for nm in range(n_mod):
                    TABLE_cbm = TABLE_cb[models_C[nc - 1][nm], :]
                    traj = np.mean(TABLE_cbm, 0)
                    models[nc - 1][nb_o].append(traj)

        # prototypes vs samples
        _, col = Seeds_FR.shape
        Traj1 = np.zeros((len(n_features), col))
        sampleVSmodels = np.zeros((n_seeds, n_classes + 3))

        for ns in range(n_seeds):
            for n, nb in enumerate(n_features):
                Seeds_FR, Seeds_F = load_Seeds_FR(tile, nb, col_DATA, **kwargs)
                Traj1[n, :] = Seeds_FR[ns, :]

            sample_simi = [ns, Seeds_F[ns, col_nCLASS], 0]
            for nc in (class_int):
                if nc == 0:
                    max_simi = 0
                else:
                    n_mod = len(models[nc - 1])
                    max_simi = 0
                    for nm in range(n_mod):
                        Traj2 = models[nc - 1][nm]
                        simi = ((DTW_max_d - distance_fast(
                            Traj1, Traj2, max_step=DTW_max_samp)) / DTW_max_d)
                        max_simi = np.max([max_simi, simi])

                sample_simi.append(max_simi)

            max_v = max(sample_simi[3:])
            max_p = sample_simi[3:].index(max_v)
            sample_simi[2] = max_p + 1
            sampleVSmodels[ns, :] = sample_simi

        #confusion matrix between training samples and prototypes
        CM_S = confusion_matrix(sampleVSmodels[:, 1], sampleVSmodels[:, 2])
示例#8
0
def manager(tilename, **kwargs):
    #SETUP DEFAULT OPTIONS
    info = kwargs.get('info', True)
    blocksize = kwargs.get('blocksize', 200)
    mappath = kwargs.get('mappath', None)

    #PATHS
    loadpath = fm.check_folder(kwargs.get('savepath', None), 'Features')
    savepath = fm.check_folder(kwargs.get('savepath', None), 'NDI_TimeSeries')
    maindir = kwargs.get('maindir', None)
    temppath = fm.joinpath(maindir, 'numpy', tilename)

    #GET IMAGE INFO
    fn = [f for f in os.listdir(loadpath) if f.endswith('.tif')]
    if len(fn) > 0:
        img = fm.readGeoTIFFD(fm.joinpath(loadpath, fn[0]), metadata=False)
        height, width, totfeatures = img.shape
    else:
        raise IOError('Unable to find input data!')

    #LOAD CLASSIFICATION MAP
    if mappath is not None:
        classmap = fm.readGeoTIFFD(mappath, metadata=False)
    else:
        classmap = np.empty(heigth, width)

    #ALLOC VARIABLES
    npixels = blocksize * blocksize
    rects = np.empty((npixels, 368))
    mse = np.empty((npixels, 4))

    #LOOP THROUGH FEATURES
    for feature in range(totfeatures):
        if info:
            print('Reconstructing feature %i/%i...' %
                  ((feature + 1), totfeatures),
                  end='\r')

        folder = 'NDI' + str(feature + 1)
        path = fm.check_folder(savepath, folder)

        #FOR EACH BLOCK POSITION
        for i in range(0, width, blocksize):
            for j in range(0, height, blocksize):

                matr, mask, days = loadts_block(i, j, feature, loadpath,
                                                temppath, **kwargs)

                counter = Value('i', 0)
                corecount = int(os.cpu_count() / 2 -
                                1)  #half to account for virtual cores

                p = Pool(corecount,
                         initializer=counterinit,
                         initargs=(counter, ))
                results = p.map(
                    partial(parallel_manager,
                            matr=matr,
                            mask=mask,
                            days=days,
                            blocksize=blocksize), range(npixels))
                p.close()
                p.join()

                for npx in range(npixels):
                    row, col = divmod(npx, blocksize)
                    row = row + i
                    col = col + j

                    rects[npx, 0] = width * row + col
                    rects[npx, 1] = classmap[row, col]
                    rects[npx, 2] = feature + 1
                    mse[npx, 0] = width * row + col
                    mse[npx, 1] = classmap[row, col]
                    mse[npx, 2] = feature + 1

                    rects[npx, 3:] = results[npx][0]
                    mse[npx, 3] = results[npx][1]

                filename = fm.joinpath(path, 'ts.h5')
                if not os.path.isfile(filename):
                    with h5py.File(filename, 'w') as hf:
                        hf.create_dataset("ts",
                                          data=rects,
                                          chunks=True,
                                          maxshape=(None, rects.shape[1]))
                else:
                    with h5py.File(filename, 'a') as hf:
                        hf["ts"].resize((hf["ts"].shape[0] + rects.shape[0]),
                                        axis=0)
                        hf["ts"][-rects.shape[0]:] = rects

                filename = fm.joinpath(path, 'mse.h5')
                if not os.path.isfile(filename):
                    with h5py.File(filename, 'w') as hf:
                        hf.create_dataset("mse",
                                          data=mse,
                                          chunks=True,
                                          maxshape=(None, mse.shape[1]))
                else:
                    with h5py.File(filename, 'a') as hf:
                        hf["mse"].resize((hf["mse"].shape[0] + mse.shape[0]),
                                         axis=0)
                        hf["mse"][-mse.shape[0]:] = mse