def manager(tile, **kwargs): #SETUP VARIBLES info = kwargs.get('info', True) years = kwargs.get('years', None) outpath = kwargs.get('outpath', None) #GET IMAGE INFO for y in years: name = tile + '_' + y featurepath = fm.check_folder(outpath, name, 'Features') fn = [f for f in os.listdir(featurepath) if f.endswith('.tif')] if len(fn) == 0: raise IOError('Unable to find input data!') img = fm.readGeoTIFFD(fm.joinpath(featurepath, fn[0]), metadata=False) height, width, totfeatures = img.shape #CHECK TS DATA for y in years: for feature in range(totfeatures): n1 = tile + '_' + y n2 = 'NDI' + str(feature + 1) tspath = fm.check_folder(outpath, n1, 'NDI_TimeSeries', n2) if not os.path.exists(fm.joinpath(tspath, 'ts.h5')): raise IOError('Unable to find input data!') #PREPARE PARAMETERS height = str(height) width = str(width) startyear = str(years[0]) endyear = str(years[-1]) frequency = str(kwargs.get('frequency', 365)) tile = str(tile) batchsize = str(kwargs.get('batchsize', 200)) for feature in range(totfeatures): if info: print('Change detection for feature %i/%i...' % ((feature + 1), totfeatures), end='\r') feature = str(feature + 1) # rscript libs/ToolboxModules/callbfast.R height width startyear endyear frequency tile feature batchsize outpath process = subprocess.run([ 'rscript', 'libs/ToolboxModules/callbfast.R', height, width, startyear, endyear, frequency, tile, feature, batchsize, outpath ], stdout=subprocess.PIPE, universal_newlines=True)
def manager(tile, **kwargs): #SETUP VARIBLES info = kwargs.get('info', True) year = kwargs.get('year', None) savepath = fm.check_folder(kwargs.get('savepath', None), 'Features') #GET FEATURES yearts, _, _ = tile.gettimeseries(year=year, option='farming') _feature(yearts, savepath, **kwargs)
def load_block_DTW_multi(seed_class_mask, max_d, nc, nc1, savepath): mask = (seed_class_mask == nc) mask1 = (seed_class_mask == nc1) path = fm.check_folder(savepath, "Multifeature", 'DTW_matrix') with h5py.File(filename, 'r') as hf: simi_c_W = np.array(hf["DTW_matrix"][mask1, mask]) simi_c_C = np.array(hf["DTW_matrix"][mask, mask]) simi_c_W = np.negative(simi_c_W - max_d) / max_d simi_c_C = np.negative(simi_c_C - max_d) / max_d return simi_c_W, simi_c_C
def main(datapath, **kwargs): from libs.RSdatamanager.Sentinel2.S2L2A import L2Atile, getTileList from libs.ToolboxModules import featurext as m1 from libs.ToolboxModules import featurets as m2 from libs.ToolboxModules import trendanalysis as m3 from libs.ToolboxModules import LandCoverTraining as m4 from libs.ToolboxModules import LCclassificationAndCD as m5 #PREPARE SOME TOOLBOX PARAMETERS tilenames = kwargs['options'].get('tilenames', None) years = kwargs['options'].get('years', None) maindir = kwargs['options'].get('maindir', None) outpath = kwargs['options'].get('outpath', None) deltemp = kwargs['options'].get('deltemp', True) module1 = kwargs['module1'].get('run', False) module2 = kwargs['module2'].get('run', False) module3 = kwargs['module3'].get('run', False) module4 = kwargs['module4'].get('run', False) module5 = kwargs['module5'].get('run', False) if (module1 or module2): #READ DATASETS tiledict = getTileList(datapath) keys = tiledict.keys() for k in keys: if k in tilenames: tileDatapath = tiledict[k] print("Reading Tile-%s." % (k)) tile = L2Atile(maindir, tileDatapath) for y in years: #UPDATE OPTIONS name = k + '_' + y update = { 'year': y, 'savepath': fm.check_folder(outpath, name) } if module1: #MODULE 1 t_mod1 = time.time() options = kwargs.get('module1', {}) options.update(update) m1.manager(tile, **options) t_mod1 = (time.time() - t_mod1) / 60 print( "MOD1 TIME = %imin " % (int(t_mod1))) elif module2: #MODULE 2 t_mod2 = time.time() options = kwargs.get('module2', {}) options.update(update) m2.manager(k, **options) t_mod2 = (time.time() - t_mod2) / 60 print( "MOD2 TIME = %imin " % (int(t_mod2))) #DELETE TILE-TEMPPATH CONTENT if deltemp: flag = shutil.rmtree(tile.temppath()) if flag == None: print( "Temporary File Content of Tile-%s has been successfully removed!" % (k)) elif module3: for k in tilenames: #MODULE 3 t_mod3 = time.time() options = kwargs.get('module3', {}) m3.manager(k, **options) t_mod3 = (time.time() - t_mod3) / 60 print("MOD3 TIME = %imin " % (int(t_mod3))) elif module4: for k in tilenames: #MODULE 4 t_mod4 = time.time() options = kwargs.get('module4', {}) m4.manager(k, **options) t_mod4 = (time.time() - t_mod4) / 60 print("MOD4 TIME = %imin " % (int(t_mod4))) elif module5: for k in tilenames: #MODULE 5 t_mod5 = time.time() options = kwargs.get('module5', {}) m5.manager(k, **options) t_mod5 = (time.time() - t_mod5) / 60 print("MOD5 TIME = %imin " % (int(t_mod5)))
module2 = args.module2 module3 = args.module3 module4 = args.module4 module5 = args.module5 #READ INITIALIZATION FILE AND SETUP OPTIONS config = configparser.ConfigParser() config.read(configfile) datapath = fm.formatPath(config['Paths']['data_path']) options = { 'tilenames': config['Data']['tilenames'].split(','), 'years': config['Data']['years'].split(','), 'maindir': fm.formatPath(config['Paths']['main_dir']), 'outpath': fm.check_folder(config['Paths']['output_path']), 'info': True, 'deltemp': False } m1options = {} m1options.update(options) m1options['run'] = module1 m2options = {} m2options.update(options) m2options['run'] = module2 m2options['blocksize'] = int(config['Module2']['blocksize']) m2options['mappath'] = fm.formatPath(config['Paths']['LC_path']) m3options = {}
def manager(tile, **kwargs): #SETUP VARIBLES info = kwargs.get('info', True) years = kwargs.get('years', None) outpath = kwargs.get('outpath', None) loadpath = '' #TODO: where is the test data? savepath = fm.check_folder(outpath, tile, 'LCclassificationAndCD') blocksize = kwargs.get('blocksize', 200) n_classes = kwargs.get('n_classes', 9) DTW_max_samp = kwargs.get('DTW_max_samp', 15) # max number of samples of DTW MAX_CD = kwargs.get('MAX_CD', 1) # max number of detected changes col_nPIXEL = 0 col_nCLASS = 1 col_nBAND = 2 col_DATA = 3 ############################### # GET INFO AND INITIALIZATION # ############################### for rootname, _, filenames in os.walk(loadpath): for f in filenames: if (f.endswith('.tif')): path = fm.joinpath(rootname, f) img = fm.readGeoTIFFD(path, metadata=False) width, height, totfeature = img.shape for rootname, _, filenames in os.walk(loadpath): for f in filenames: if (f.endswith('ts.h5')): path = fm.joinpath(rootname) with h5py.File(fm.joinpath(path,f), 'r') as hf: NDI_ = np.array(hf["ts"]) #Get classes intervals class_int = np.zeros(n_classes) class_int_mask = np.unique(NDI_[:,col_nCLASS]).astype(int).tolist() for n in class_int_mask: class_int[n-1] = n class_int = class_int.astype(int).tolist() #Get number of seeds n_seeds = len(np.unique(NDI_[:,col_nPIXEL])) #Get number of features n_features = totfeature #Get number of seeds per class and class seeds mask n_seeds_c = np.zeros(n_classes) for nc in class_int: n_seeds_c[nc-1] = np.size(NDI_[NDI_[:,col_nCLASS]==nc, :], axis=0) n_seeds_c = n_seeds_c.astype(int).tolist() seed_class_mask = NDI_[:,col_nCLASS] #Define blocksize nseeds_b = blocksize #Multi feature DTW maximum distance path = fm.check_folder(outpath, tile, 'LCTraining_DTW', 'Multifeature') DTW_max_d = 0 for b1 in range(0, n_seeds, nseeds_b): for b2 in range(0, n_seeds, nseeds_b): with h5py.File(filename, 'r') as hf: max_d_block = np.nanmax(np.array(hf["DTW_matrix"][b1:b1+nseeds_b, b2:b2+nseeds_b])) if max_d_block > DTW_max_d: DTW_max_d = max_d_block #Loading the models path = fm.check_folder(outpath, tile, 'LCTraining_DTW') models = np.load(fm.joinpath(path, "models.npy")) #TODO: npy or h5? ############################ # LC CLASSIFICATION AND CD # ############################ #Time array definition t_seq_st = np.array([1, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73]) t_seq_en = np.array([366, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73]) t_seq_st = np.cumsum(t_seq_st) t_seq_en = np.cumsum(t_seq_en) #Similarity trends computation and classification Test_simi_traj = [None]*n_seeds LC_seq = [None]*n_seeds for ns in range(n_seeds): Traj1 = None for nb, band in enumerate(n_features): Seeds = load_seeds(tile, ns, nb, col_DATA, **kwargs) if Traj1 is None: Traj1 = np.zeros((len(n_features), len(Seeds[col_DATA:]))) Traj1[nb,:] = Seeds[col_DATA:] else: Traj1[nb,:] = Seeds[col_DATA:] pixnr = Seeds[col_nPIXEL] Test_simi_traj[pixnr] = np.empty((n_classes, np.size(t_seq_st))) LC_seq[pixnr] = np.empty((2, np.size(t_seq_st))) for ts in range(np.size(t_seq_st)): Traj1_T = Traj1[:, t_seq_st[ts]:t_seq_en[ts]] Traj1_T = np.roll(Traj1_T, 73*ts, axis=1) for nc in range(n_classes): max_simi = 0 for nm in range(len(models[nc])): Traj2 = models[nc][nm] simi = (DTW_max_d - DTW(Traj1_T, Traj2, DTW_max_samp=DTW_max_samp)) / DTW_max_d #TODO: distance_fast max_simi = max(max_simi, simi) Test_simi_traj[pixnr][nc,ts] = max_simi LC_seq[pixnr][0,ts] = np.argmax(Test_simi_traj[ns][:,ts]) + 1 # +1 number of class vs index #Stability rule application CD_counter = np.empty(n_seeds) break_p = np.empty((n_seeds, MAX_CD)) LC_seq_bp = np.empty((n_seeds, MAX_CD+1)) for ns in range(n_seeds): counter = 0 for ts in range(np.size(t_seq_st)): if ts == 0: LC_seq[ns][1,ts] = LC_seq[ns][0,ts] else: if (LC_seq[ns][0,ts] == LC_seq[ns][0,ts-1]) and (counter == 0): LC_seq[ns][1,ts] = LC_seq[ns][1,ts-1] elif LC_seq[ns][0,ts] != LC_seq[ns][0,ts-1]: LC_seq[ns][1,ts] = LC_seq[ns][1,ts-1] counter = 1 elif LC_seq[ns][0,ts] == LC_seq[ns][0,ts-1]: counter = counter + 1 if counter<4: LC_seq[ns][1,ts] = LC_seq[ns][1,ts-1] else: LC_seq[ns][1,ts-3] = LC_seq[ns][0,ts] LC_seq[ns][1,ts-2] = LC_seq[ns][0,ts] LC_seq[ns][1,ts-1] = LC_seq[ns][0,ts] LC_seq[ns][1,ts] = LC_seq[ns][0,ts] counter = 0 CD_counter[ns] = 0 for ts in range(1, np.size(t_seq_st)): if LC_seq[ns][1,ts] != LC_seq[ns][1,ts-1]: CD_counter[ns] = CD_counter[ns] + 1 if CD_counter[ns] <= MAX_CD: break_p[ns, CD_counter[ns]-1] = ts #TODO: -1? LC_seq_bp[ns, CD_counter[ns]-1] = LC_seq[ns][1,ts-1] LC_seq_bp[ns, CD_counter[ns]] = LC_seq[ns][1,ts] if CD_counter[ns] == 0: break_p[ns,0] = 0 LC_seq_bp[ns,0] = LC_seq[ns][1,0] LC_seq_bp[ns,1] = LC_seq[ns][1,0] np.save(fm.joinpath(savepath, "LC_seq.npy"), LC_seq) np.save(fm.joinpath(savepath, "Test_simi_traj.npy"), Test_simi_traj) #Output maps nyears = len(years) outmaps = [None]*nyears for ny in range(nyears): outmaps[ny] = np.zeros((height, width, 2)) for row in range(height): for col in range(width): ns = width*row + col if break_p[ns,0] == 0: pass else: z = break_p[ns,0] start_z = t_seq_st[z] end_z = t_seq_en[z] int_z = np.arange(start_z, end_z) int_z = np.ceil(int_z/365) for ny in range(nyears): perc = np.sum(int_z[int_z == (ny+1)]) / (365*(ny+1)) perc = perc*100
def manager(tile, **kwargs): #SETUP DEFAULT OPTIONS info = kwargs.get('info', True) years = kwargs.get('years', None) outpath = kwargs.get('outpath', None) savepath = fm.check_folder(outpath, tile, 'LCTraining_DTW') blocksize = kwargs.get('blocksize', 500) n_classes = kwargs.get('n_classes', 9) multiprocessing = kwargs.get('multiprocessing', True) weekly = kwargs.get('weekly', True) singlefeaturedtw = kwargs.get('singlefeaturedtw', False) featureselection = kwargs.get('featureselection', False) multifeatureDTW = kwargs.get('multifeatureDTW', False) similarity = kwargs.get('similarity', False) classprototypes = kwargs.get('classprototypes', False) DTW_max_samp = kwargs.get('DTW_max_samp', 15) # max number of samples of DTW col_nPIXEL = 0 col_nCLASS = 1 col_nBAND = 2 col_DATA = 3 ############################### # GET INFO AND INITIALIZATION # ############################### for rootname, _, filenames in os.walk(outpath): for f in filenames: if (f.endswith('.tif')): loadpath = fm.joinpath(rootname, f) img = fm.readGeoTIFFD(loadpath, metadata=False) width, height, totfeature = img.shape for rootname, _, filenames in os.walk(outpath): for f in filenames: if (f.endswith('ts.h5')): loadpath = fm.joinpath(rootname, f) with h5py.File(loadpath, 'r') as hf: NDI_ = np.array(hf["ts"]) #Get classes intervals class_int = np.zeros(n_classes) class_int_mask = np.unique(NDI_[:, col_nCLASS]).astype(int).tolist() for n in class_int_mask: class_int[n - 1] = n class_int = class_int.astype(int).tolist() #Get number of seeds n_seeds = len(np.unique(NDI_[:, col_nPIXEL])) #Get number of features n_features = totfeature #Get number of seeds per class and class seeds mask n_seeds_c = np.zeros(n_classes) for nc in class_int: n_seeds_c[nc - 1] = np.size(NDI_[NDI_[:, col_nCLASS] == nc, :], axis=0) n_seeds_c = n_seeds_c.astype(int).tolist() seed_class_mask = NDI_[:, col_nCLASS] #Define blocksize nseeds_b = blocksize #Space of analysis parameters min_perc_samp_V = np.arange( 1, 0.64, -0.03).tolist() # minimum percentage of total used samples min_perc_samp_mod_V = np.ones(12, dtype=float) / np.arange( 1, 13) # minimum percentage of used samples per model min_perc_samp_mod_V = min_perc_samp_mod_V.tolist() sepa_b_vs_b = np.zeros((12, 12, n_features)) ########################################## # SINGLE FEATURE DTW SIMILARITY MATRICES # ########################################## if singlefeaturedtw: for feature in range(n_features): if info: t_start = time.time() print('Computing DTW feature %i/%i...' % ((feature + 1), n_features), end='\r') path = fm.check_folder(savepath, "Singlefeature", 'DTW_matrix_B' + str(feature + 1)) for b1 in range(0, n_seeds, nseeds_b): Seeds_B_B1 = load_block(tile, b1, feature, col_DATA, **kwargs) for b2 in range(0, n_seeds, nseeds_b): Seeds_B_B2 = load_block(tile, b2, feature, col_DATA, **kwargs) singledtw(Seeds_B_B1, Seeds_B_B2, b1, b2, nseeds_b, n_seeds, path, **kwargs) if info: t_end = time.time() print( '\nMODULE 4: calculating DTW for %ith feature..Took %i' % (feature + 1, (t_end - t_start) / 60), 'min') #Single feature DTW maximum distance DTW_max_d_B = np.zeros(n_features) for feature in range(n_features): path = fm.check_folder(savepath, "Singlefeature", 'DTW_matrix_B' + str(feature + 1)) filename = fm.joinpath(path, 'DTW_matrix_B.h5') max_d = 0 for b1 in range(0, n_seeds, nseeds_b): for b2 in range(0, n_seeds, nseeds_b): with h5py.File(filename, 'r') as hf: block = np.array(hf["DTW_matrix_B"][b1:b1 + nseeds_b, b2:b2 + nseeds_b]) max_d_block = np.nanmax(block[block != np.inf]) if max_d_block > max_d: max_d = max_d_block DTW_max_d_B[feature] = max_d ###################################################### # FEATURE SPACE ANALYSIS AND FEATURE SPACE REDUCTION # ###################################################### if featureselection: for feature in range(n_features): if info: t_start = time.time() print('Feature %i/%i...' % ((feature + 1), n_features), end='\r') sepa_c_vs_c = np.zeros((12, 12)) sepa_c_vs_c_N = np.zeros((12, 12)) for i, nc in enumerate(class_int_mask): c_r = np.delete(class_int_mask, i).tolist() for nc1 in c_r: simi_c_W, simi_c_C = load_block_DTW( seed_class_mask, feature, DTW_max_d_B[feature], nc, nc1, savepath) for col_i, min_perc_samp in enumerate(min_perc_samp_V): for row_i, min_perc_samp_mod in enumerate( min_perc_samp_mod_V): sepa_mea = np.zeros(n_seeds_c[nc - 1]) for nsc in range(n_seeds_c[nc - 1]): simi_c_C_s = simi_c_C[:, nsc] simi_c_C_s = simi_c_C_s[~np.isnan(simi_c_C_s)] simi_c_C_s = sorted(simi_c_C_s, reverse=True) simi_c_C_s = simi_c_C_s[ 0:math.ceil(n_seeds_c[nc - 1] * min_perc_samp_mod * min_perc_samp)] simi_c_W_s = simi_c_W[:, nsc] simi_c_W_s = sorted(simi_c_W_s, reverse=True) simi_c_W_s = simi_c_W_s[ 0:math.ceil(n_seeds_c[nc - 1] * min_perc_samp_mod * min_perc_samp)] pd_C_mu, pd_C_sigma = scipy.stats.distributions.norm.fit( simi_c_C_s) pd_W_mu, pd_W_sigma = scipy.stats.distributions.norm.fit( simi_c_W_s) if pd_C_mu <= pd_W_mu: sepa_mea[nsc] = np.nan else: sepa_mea[nsc] = (pd_C_mu - pd_W_mu) / ( pd_C_sigma + pd_W_sigma) if (sepa_mea[~np.isnan(sepa_mea)]).size / ( n_seeds_c[nc - 1]) >= min_perc_samp: sepa_c_vs_c[row_i, col_i] = sepa_c_vs_c[ row_i, col_i] + np.mean( sepa_mea[~np.isnan(sepa_mea)]) sepa_c_vs_c_N[row_i, col_i] = sepa_c_vs_c_N[row_i, col_i] + 1 sepa_b_vs_b[..., feature] = sepa_c_vs_c * sepa_c_vs_c_N if info: t_end = time.time() print( '\nMODULE 4: feature selection for %i th feature..Took %i' % (feature + 1, t_end - t_start / 60), 'min') np.save(fm.joinpath(savepath, "sepa_b_vs_b.npy"), sepa_b_vs_b) #Search for Class Cluster Parameters # select_bands = np.load(fm.joinpath(savepath, "select_bands.npy")) sepa_b_vs_b = np.load(fm.joinpath(savepath, "sepa_b_vs_b.npy")) # select_bands = select_bands.astype(int).tolist() sepa_FS = np.zeros((12, 12)) for nb in range(n_features): sepa_FS = sepa_FS + sepa_b_vs_b[:, :, nb] mean_sepa_FS = np.mean(sepa_FS, axis=1) max_sepa_pos_samp_x_mod_FS = np.argmax(mean_sepa_FS) mean_sepa_max_v_FS = sepa_FS[max_sepa_pos_samp_x_mod_FS, :] mean_sepa_max_v_derivate_FS = np.diff(mean_sepa_max_v_FS) mean_sepa_max_v_derivate_FS = mean_sepa_max_v_derivate_FS / np.max( mean_sepa_max_v_derivate_FS) mean_sepa_max_v_derivate_FS = mean_sepa_max_v_derivate_FS * mean_sepa_max_v_FS[ 1:] max_sepa_pos_perc_samp_FS = np.argmax(mean_sepa_max_v_derivate_FS) max_sepa_pos_perc_samp_FS = max_sepa_pos_perc_samp_FS + 1 min_perc_samp = min_perc_samp_V[max_sepa_pos_perc_samp_FS] min_perc_samp_mod = min_perc_samp_V[ max_sepa_pos_perc_samp_FS] * min_perc_samp_mod_V[ max_sepa_pos_samp_x_mod_FS] max_mod_class = np.round(min_perc_samp_V[max_sepa_pos_perc_samp_FS] / min_perc_samp_mod) ####################################### # MULTI FEATURE DTW SIMILARITY MATRIX # ####################################### if multifeatureDTW: if info: t_start = time.time() print('Computing multifeature DTW ...', end='\r') # select_bands = np.load(fm.joinpath(savepath, "select_bands.npy")) # select_bands = select_bands.astype(int).tolist() path = fm.check_folder(savepath, 'Multifeature') for b1 in range(0, n_seeds, nseeds_b): Seeds_B1 = load_block_multifeature(tile, b1, n_features, col_DATA, **kwargs) for b2 in range(0, n_seeds, nseeds_b): Seeds_B2 = load_block_multifeature(tile, b1, n_features, col_DATA, **kwargs) multidtw(Seeds_B1, Seeds_B2, b1, b2, nseeds_b, n_seeds, path, **kwargs) if info: t_end = time.time() print( '\nMODULE 4: calculating multifeature DTW ...Took %i' % ((t_end - t_start) / 60), 'min') #Multi feature DTW maximum distance path = fm.check_folder(savepath, 'Multifeature') filename = fm.joinpath(path, 'DTW_matrix.h5') DTW_max_d = 0 for b1 in range(0, n_seeds, nseeds_b): for b2 in range(0, n_seeds, nseeds_b): with h5py.File(filename, 'r') as hf: block = np.array(hf["DTW_matrix"][b1:b1 + nseeds_b, b2:b2 + nseeds_b]) max_d_block = np.nanmax(block[block != np.inf]) if max_d_block > DTW_max_d: DTW_max_d = max_d_block ####################### # SIMILARITY ANALYSIS # ####################### if similarity: simi_high = kwargs.get('simi_high', 1) # high similarity measure simi_decr = kwargs.get('simi_decr', 0.001) # decrese value of similarity measure min_c_vs_c = np.zeros((len(class_int_mask), len(class_int_mask) - 1)) max_c_vs_c = np.zeros((len(class_int_mask), len(class_int_mask) - 1)) mean_c_vs_c = np.zeros((len(class_int_mask), len(class_int_mask) - 1)) simi_low = np.zeros((len(class_int_mask))) for i, nc in enumerate(class_int_mask): c_r = np.delete(class_int_mask, i).tolist() for n, nc1 in enumerate(c_r): simi_c_W, simi_c_C = load_block_DTW_multi( seed_class_mask, DTW_max_d, nc, nc1, savepath) min_c_s = np.zeros((n_seeds_c[nc - 1])) max_c_s = np.zeros((n_seeds_c[nc - 1])) for nsc in range(n_seeds_c[nc - 1]): simi_c_C_s = simi_c_C[:, nsc] simi_c_C_s = simi_c_C_s[~np.isnan(simi_c_C_s)] simi_c_C_s = sorted(simi_c_C_s, reverse=True) simi_c_C_s = simi_c_C_s[0:math.ceil(n_seeds_c[nc - 1] * min_perc_samp_mod * min_perc_samp)] simi_c_W_s = simi_c_W[:, nsc] simi_c_W_s = sorted(simi_c_W_s, reverse=True) simi_c_W_s = simi_c_W_s[0:math.ceil(n_seeds_c[nc - 1] * min_perc_samp_mod * min_perc_samp)] pd_C_mu, pd_C_sigma = scipy.stats.distributions.norm.fit( simi_c_C_s) pd_W_mu, pd_W_sigma = scipy.stats.distributions.norm.fit( simi_c_W_s) if pd_C_mu <= pd_W_mu: min_c_s[nsc] = np.nan else: a = scipy.stats.norm(pd_C_mu, pd_C_sigma).pdf( np.arange(0, 1, simi_decr)) b = scipy.stats.norm(pd_W_mu, pd_W_sigma).pdf( np.arange(0, 1, simi_decr)) for int_mu in np.int64( np.arange(np.floor(pd_W_mu * (1 / simi_decr)), (math.ceil(pd_C_mu * (1 / simi_decr)) + 1), 1000 * simi_decr)): if (round(b[int_mu - 1], 1) - round(a[int_mu - 1], 1) <= 0): min_c_s[nsc] = int_mu * simi_decr break else: min_c_s[nsc] = np.nan for int_mu in np.flipud( np.int64( np.arange( np.floor(pd_W_mu * (1 / simi_decr)), (math.ceil(pd_C_mu * (1 / simi_decr)) + 1), 1000 * simi_decr))): if (round(a[int_mu - 1], 1) - round(b[int_mu - 1], 1) <= 0): max_c_s[nsc] = int_mu * simi_decr break else: max_c_s[nsc] = np.nan min_c_vs_c[i, n] = np.mean(min_c_s[~np.isnan(min_c_s)]) max_c_vs_c[i, n] = np.mean(max_c_s[~np.isnan(max_c_s)]) mean_c_vs_c[i, n] = min_c_vs_c[ i, n] #mean([min_c_vs_c(nc,nc1) max_c_vs_c(nc,nc1)]) simi_low[i] = np.max(mean_c_vs_c[i, :]) np.save(fm.joinpath(savepath, "simi_low.npy"), simi_low) ############################### # CLASS PROTOTYPES GENERATION # ############################### if classprototypes: pass_table = np.zeros(n_classes) # array of pass/no pass models_C = [None] * 9 # variable that contains the models seeds used_models = np.zeros( n_classes) # array of number of model used per class used_samples_perc = np.zeros( n_classes) # array of used samples per class used_simi = np.zeros(n_classes) # array of used similarity per class for i, nc in enumerate(class_int_mask): max_s = 1 # set max similarity = 1 min_s = 0 #simi_low(nc); # set min similarity while pass_table[nc - 1] == 0: _, dist_simi_c = load_block_DTW_multi(seed_class_mask, DTW_max_d, nc, nc, savepath) count_simi_c = ( dist_simi_c > max_s ) # check class seed with a similarity major then the threshold mean_simi_c = np.empty( (n_seeds_c[nc - 1] )) * np.nan # initializate the similarity mean value # compute the mean similarity value per seed for each accepted other seed for nsc in range(n_seeds_c[nc - 1]): mean_simi_c[nsc] = np.mean(dist_simi_c[count_simi_c[:, nsc], nsc]) # form a matrix with [seed ID | number of accepted seeds | mean similarity for accepted seeds] simi_order = np.column_stack([ np.arange(0, n_seeds_c[nc - 1], 1), np.sum(count_simi_c, axis=0), mean_simi_c ]) # order the seeds simi_order = simi_order[np.argsort(-simi_order[:, 0])] simi_order = np.array( simi_order[np.argsort(-simi_order[:, 0])], dtype=int) #simi_order = sorted(simi_order, key=lambda x : x[0], reverse=True) models = [] # initialize the models for nsc in range(n_seeds_c[nc - 1]): n_mod = len(models) #number of exist models if n_mod == 0: # if the number of models is zero, just insert the initial seed models.append(simi_order[nsc, 0]) else: # else check if any model can accept the new seed simi = np.zeros( (n_mod, 3)) #initialize the similarity matrix # for each model check if all seed can accept the new one for nm in range(n_mod): seed_int = models[nm] # get seed ID interval # form a matrix with [model ID | acceptance value | mean similarity between new seed and model seeds] simi[nm, :] = [ nm, np.sum((dist_simi_c[simi_order[nsc, 0], seed_int] > max_s) * 1) >= (np.ceil(np.size(seed_int) * 1)), np.mean(dist_simi_c[simi_order[nsc, 0], seed_int]) ] # sort the similarity matrix to get the most similar model simi = np.array(simi[np.argsort(-simi[:, 2])], dtype=int) if simi[0, 1] == 1: # if the first model can accept the new seed, insert it models[simi[0, 0]] = list( flatten( [models[simi[0, 0]], simi_order[nsc, 0]])) else: # otherwise create a new model and insert the seed models.append(simi_order[nsc, 0]) n_mod = np.size(models, 0) # get number of models # delete models with a percentage of seed lower than the threshold for nm in range(n_mod): if np.size(models[nm]) < math.ceil( n_seeds_c[nc - 1] * min_perc_samp_mod): models[nm] = [] models = list(filter(None, models)) u_models = len(models) # get number of used models u_samples = np.zeros( u_models) # initialized the percentage of used seeds # compute the percentage of used seeds for um in range(u_models): u_samples[um] = np.size(models[um]) u_samples = (np.sum(u_samples)) / (n_seeds_c[nc - 1]) # if the pass condition are respected update the output matrixes if ((u_models <= max_mod_class) and (bool(u_samples >= min_perc_samp))): pass_table[nc - 1] = 1 models_C[nc - 1] = models used_models[nc - 1] = u_models used_samples_perc[nc - 1] = u_samples used_simi[nc - 1] = max_s else: if ((max_s > min_s) and (max_s > simi_decr) ): # otherwise decrease the similarity threshold max_s = max_s - simi_decr print(max_s) else: # or if not possible put in the pass table a false value pass_table[nc - 1] = 2 # class prototypes creation models = [[[] for _ in range(len(n_features))] for _ in range(n_classes)] for nc in (class_int_mask): for nb_o, nb in enumerate(n_features): n_mod = np.size(models_C[nc - 1]) Seeds_FR, Seeds_F = load_Seeds_FR(tile, nb, col_DATA, **kwargs) m1 = Seeds_F[:, col_nCLASS] == nc m2 = Seeds_F[:, col_nBAND] == nb m3 = np.logical_and(m1, m2) TABLE_cb = Seeds_FR[m3, :] for nm in range(n_mod): TABLE_cbm = TABLE_cb[models_C[nc - 1][nm], :] traj = np.mean(TABLE_cbm, 0) models[nc - 1][nb_o].append(traj) # prototypes vs samples _, col = Seeds_FR.shape Traj1 = np.zeros((len(n_features), col)) sampleVSmodels = np.zeros((n_seeds, n_classes + 3)) for ns in range(n_seeds): for n, nb in enumerate(n_features): Seeds_FR, Seeds_F = load_Seeds_FR(tile, nb, col_DATA, **kwargs) Traj1[n, :] = Seeds_FR[ns, :] sample_simi = [ns, Seeds_F[ns, col_nCLASS], 0] for nc in (class_int): if nc == 0: max_simi = 0 else: n_mod = len(models[nc - 1]) max_simi = 0 for nm in range(n_mod): Traj2 = models[nc - 1][nm] simi = ((DTW_max_d - distance_fast( Traj1, Traj2, max_step=DTW_max_samp)) / DTW_max_d) max_simi = np.max([max_simi, simi]) sample_simi.append(max_simi) max_v = max(sample_simi[3:]) max_p = sample_simi[3:].index(max_v) sample_simi[2] = max_p + 1 sampleVSmodels[ns, :] = sample_simi #confusion matrix between training samples and prototypes CM_S = confusion_matrix(sampleVSmodels[:, 1], sampleVSmodels[:, 2])
def manager(tilename, **kwargs): #SETUP DEFAULT OPTIONS info = kwargs.get('info', True) blocksize = kwargs.get('blocksize', 200) mappath = kwargs.get('mappath', None) #PATHS loadpath = fm.check_folder(kwargs.get('savepath', None), 'Features') savepath = fm.check_folder(kwargs.get('savepath', None), 'NDI_TimeSeries') maindir = kwargs.get('maindir', None) temppath = fm.joinpath(maindir, 'numpy', tilename) #GET IMAGE INFO fn = [f for f in os.listdir(loadpath) if f.endswith('.tif')] if len(fn) > 0: img = fm.readGeoTIFFD(fm.joinpath(loadpath, fn[0]), metadata=False) height, width, totfeatures = img.shape else: raise IOError('Unable to find input data!') #LOAD CLASSIFICATION MAP if mappath is not None: classmap = fm.readGeoTIFFD(mappath, metadata=False) else: classmap = np.empty(heigth, width) #ALLOC VARIABLES npixels = blocksize * blocksize rects = np.empty((npixels, 368)) mse = np.empty((npixels, 4)) #LOOP THROUGH FEATURES for feature in range(totfeatures): if info: print('Reconstructing feature %i/%i...' % ((feature + 1), totfeatures), end='\r') folder = 'NDI' + str(feature + 1) path = fm.check_folder(savepath, folder) #FOR EACH BLOCK POSITION for i in range(0, width, blocksize): for j in range(0, height, blocksize): matr, mask, days = loadts_block(i, j, feature, loadpath, temppath, **kwargs) counter = Value('i', 0) corecount = int(os.cpu_count() / 2 - 1) #half to account for virtual cores p = Pool(corecount, initializer=counterinit, initargs=(counter, )) results = p.map( partial(parallel_manager, matr=matr, mask=mask, days=days, blocksize=blocksize), range(npixels)) p.close() p.join() for npx in range(npixels): row, col = divmod(npx, blocksize) row = row + i col = col + j rects[npx, 0] = width * row + col rects[npx, 1] = classmap[row, col] rects[npx, 2] = feature + 1 mse[npx, 0] = width * row + col mse[npx, 1] = classmap[row, col] mse[npx, 2] = feature + 1 rects[npx, 3:] = results[npx][0] mse[npx, 3] = results[npx][1] filename = fm.joinpath(path, 'ts.h5') if not os.path.isfile(filename): with h5py.File(filename, 'w') as hf: hf.create_dataset("ts", data=rects, chunks=True, maxshape=(None, rects.shape[1])) else: with h5py.File(filename, 'a') as hf: hf["ts"].resize((hf["ts"].shape[0] + rects.shape[0]), axis=0) hf["ts"][-rects.shape[0]:] = rects filename = fm.joinpath(path, 'mse.h5') if not os.path.isfile(filename): with h5py.File(filename, 'w') as hf: hf.create_dataset("mse", data=mse, chunks=True, maxshape=(None, mse.shape[1])) else: with h5py.File(filename, 'a') as hf: hf["mse"].resize((hf["mse"].shape[0] + mse.shape[0]), axis=0) hf["mse"][-mse.shape[0]:] = mse