def __compute_spectral_index(self, img_group_1, img_group_2): """ Compute spectral index alpha """ # - Check first if frequency data are available print("self.img_freqs") print(self.img_freqs) print("len(self.img_freqs)") print(len(self.img_freqs)) print("len(self.img_data)") print(len(self.img_data)) print("img_group_1") print(img_group_1) print("img_group_2") print(img_group_2) freqs = [] if self.img_freqs and len(self.img_freqs) == len(self.img_data): freqs = self.img_freqs else: if self.img_freqs_head and len(self.img_freqs_head) == len( self.img_data): freqs = self.img_freqs_head else: logger.error("No frequency data given (user/header)!") return -1 # - Check group indexes if len(img_group_1) != len(img_group_2): logger.error("Group indexes do not have the same length!") return -1 # - Check group indices are within available channels for i in range(len(img_group_1)): index = img_group_1[i] if index < 0 or index >= self.nchannels: logger.error( "Invalid index (%d) in group 1, must be in range [0,%d]!" % (index, self.nchannels - 1)) return -1 for i in range(len(img_group_2)): index = img_group_2[i] if index < 0 or index >= self.nchannels: logger.error( "Invalid index (%d) in group 2, must be in range [0,%d]!" % (index, self.nchannels - 1)) return -1 # - Loop over img combinations and compute spectral indices logger.info("Computing spectral index (#%d combinations) ..." % (len(img_group_1))) alphas = [] rcoeffs = [] smask = self.img_data_mask[self.refch] for i in range(len(img_group_1)): index_1 = img_group_1[i] index_2 = img_group_2[i] data_1 = self.img_data[index_1] data_2 = self.img_data[index_2] # - Find frequency from header nu1 = freqs[index_1] nu2 = freqs[index_2] #alpha12, alpha21= compute_alpha(data_1, data_2, nu1, nu2, smask, draw_plots) #alpha= 0.5*(alpha12+alpha21) outtuple = self.__compute_alpha(data_1, data_2, nu1, nu2, smask) if outtuple is None: logger.warn( "alpha calculation failed for map combination %d-%d, skip to next ..." % (index_1, index_2)) continue alpha = outtuple[0] r = outtuple[1] alphas.append(alpha) rcoeffs.append(r) logger.info("Computing average spectral index ...") print(alphas) alphas = np.array(alphas) alphas_safe = alphas[np.isfinite(alphas)] alphas = alphas_safe if alphas.size == 0: logger.warn( "No alpha measurement left (all nans), will set alpha values to -999 ..." ) alpha_mean = -999 alpha_median = -999 alpha_min = -999 alpha_max = -999 else: alpha_mean = np.mean(alphas) alpha_median = np.median(alphas) alpha_min = np.min(alphas) alpha_max = np.max(alphas) rcoeffs = np.array(rcoeffs) rcoeffs_safe = rcoeffs[np.isfinite(rcoeffs)] rcoeffs = rcoeffs_safe if rcoeffs.size == 0: logger.warn( "No rcoeffs measurement left (all nans), will set alpha values to -999 ..." ) rcoeff_mean = -999 rcoeff_median = -999 rcoeff_min = -999 rcoeff_max = -999 else: rcoeff_mean = np.mean(rcoeffs) rcoeff_median = np.median(rcoeffs) rcoeff_min = np.min(rcoeffs) rcoeff_max = np.max(rcoeffs) # - Set spectral index self.alpha = alpha_mean self.rcoeff = rcoeff_mean if self.alpha != -999 and self.rcoeff >= self.rcoeff_thr: self.has_good_alpha = True else: self.has_good_alpha = False return 0
def main(): """Main function""" #=========================== #== PARSE ARGS #=========================== logger.info("Get script args ...") try: args= get_args() except Exception as ex: logger.error("Failed to get and parse options (err=%s)",str(ex)) return 1 # - Input filelist datalist= args.datalist # - Data process options nx= args.nx ny= args.ny normalize= args.normalize scale_to_abs_max= args.scale_to_abs_max scale_to_max= args.scale_to_max log_transform= args.log_transform scale= args.scale scale_factors= [] if args.scale_factors!="": scale_factors= [float(x.strip()) for x in args.scale_factors.split(',')] standardize= args.standardize img_means= [] img_sigmas= [] if args.img_means!="": img_means= [float(x.strip()) for x in args.img_means.split(',')] if args.img_sigmas!="": img_sigmas= [float(x.strip()) for x in args.img_sigmas.split(',')] chan_divide= args.chan_divide chan_mins= [] if args.chan_mins!="": chan_mins= [float(x.strip()) for x in args.chan_mins.split(',')] erode= args.erode erode_kernel= args.erode_kernel # - Autoencoder options modelfile_encoder= args.modelfile_encoder weightfile_encoder= args.weightfile_encoder #add_channorm_layer= args.add_channorm_layer # - UMAP options run_umap= args.run_umap modelfile_umap= args.modelfile_umap outfile_umap_unsupervised= args.outfile_umap_unsupervised # - Clustering options run_clustering= args.run_clustering min_cluster_size= args.min_cluster_size min_samples= args.min_samples modelfile_clust= args.modelfile_clust predict_clust= args.predict_clust #=========================== #== READ DATALIST #=========================== # - Create data loader dl= DataLoader(filename=datalist) # - Read datalist logger.info("Reading datalist %s ..." % datalist) if dl.read_datalist()<0: logger.error("Failed to read input datalist!") return 1 #=============================== #== RUN AUTOENCODER PREDICT #=============================== logger.info("Running autoencoder classifier predict ...") vae_class= FeatExtractorAE(dl) vae_class.set_image_size(nx, ny) vae_class.normalize= normalize vae_class.scale_to_abs_max= scale_to_abs_max vae_class.scale_to_max= scale_to_max vae_class.log_transform_img= log_transform vae_class.scale_img= scale vae_class.scale_img_factors= scale_factors vae_class.standardize_img= standardize vae_class.img_means= img_means vae_class.img_sigmas= img_sigmas vae_class.chan_divide= chan_divide vae_class.chan_mins= chan_mins vae_class.erode= erode vae_class.erode_kernel= erode_kernel #vae_class.add_channorm_layer= add_channorm_layer if vae_class.predict_model(modelfile_encoder, weightfile_encoder)<0: logger.error("VAE predict failed!") return 1 #=========================== #== RUN UMAP PREDICT #=========================== if run_umap: # - Retrieve VAE encoded data logger.info("Retrieve latent data from autoencoder ...") snames= vae_class.source_names classids= vae_class.source_ids vae_data= vae_class.encoded_data # - Run UMAP logger.info("Running UMAP classifier prediction on autoencoder latent data ...") umap_class= FeatExtractorUMAP() umap_class.set_encoded_data_unsupervised_outfile(outfile_umap_unsupervised) if umap_class.run_predict(vae_data, class_ids=classids, snames=snames, modelfile=modelfile_umap)<0: logger.error("UMAP prediction failed!") return 1 #============================== #== RUN CLUSTERING #============================== if run_clustering: # - Retrieve VAE encoded data logger.info("Retrieve latent data from VAE ...") snames= vae_class.source_names classids= vae_class.source_ids vae_data= vae_class.encoded_data # - Run HDBSCAN clustering logger.info("Running HDBSCAN classifier prediction on autoencoder latent data ...") clust_class= Clusterer() clust_class.min_cluster_size= min_cluster_size clust_class.min_samples= min_samples status= 0 if predict_clust: if clust_class.run_predict(vae_data, class_ids=classids, snames=snames, modelfile=modelfile_clust)<0: logger.error("Clustering predict failed!") return 1 else: if clust_class.run_clustering(vae_data, class_ids=classids, snames=snames, modelfile=modelfile_clust)<0: logger.error("Clustering run failed!") return 1 return 0
def main(): """Main function""" #=========================== #== PARSE ARGS #=========================== logger.info("Get script args ...") try: args= get_args() except Exception as ex: logger.error("Failed to get and parse options (err=%s)",str(ex)) return 1 # - Input filelist inputfile= args.inputfile # - Data pre-processing normalize= args.normalize reduce_dim= args.reduce_dim reduce_dim_method= args.reduce_dim_method pca_ncomps= args.pca_ncomps pca_varthr= args.pca_varthr # - Clustering options min_cluster_size= args.min_cluster_size min_samples= args.min_samples modelfile_clust= args.modelfile_clust predict_clust= args.predict_clust #=========================== #== READ FEATURE DATA #=========================== ret= Utils.read_feature_data(inputfile) if not ret: logger.error("Failed to read data from file %s!" % (inputfile)) return 1 data= ret[0] snames= ret[1] classids= ret[2] #============================== #== RUN CLUSTERING #============================== logger.info("Running HDBSCAN classifier prediction on input feature data ...") clust_class= Clusterer() clust_class.min_cluster_size= min_cluster_size clust_class.min_samples= min_samples clust_class.normalize= normalize clust_class.reduce_dim= reduce_dim clust_class.reduce_dim_method= reduce_dim_method clust_class.pca_ncomps= pca_ncomps clust_class.pca_varthr= pca_varthr status= 0 if predict_clust: if clust_class.run_predict(data, class_ids=classids, snames=snames, modelfile=modelfile_clust)<0: logger.error("Clustering predict failed!") return 1 else: if clust_class.run_clustering(data, class_ids=classids, snames=snames, modelfile=modelfile_clust)<0: logger.error("Clustering run failed!") return 1 return 0
def __compute_alpha(self, data_1, data_2, nu1, nu2, smask): """ Compute alpha """ # - Get array of pixels !=0 & finite in both maps cond_img1 = np.logical_and(data_1 != 0, np.isfinite(data_1)) cond_img2 = np.logical_and(data_2 != 0, np.isfinite(data_2)) cond_img12 = np.logical_and(cond_img1, cond_img2) cond_final = np.logical_and(cond_img12, smask == 1) indexes = np.where(cond_final) img_1d_1 = data_1[indexes] img_1d_2 = data_2[indexes] logger.info("#%d pixels in image 1 ..." % (len(img_1d_1))) logger.info("#%d pixels in image 2 ..." % (len(img_1d_2))) if len(img_1d_1) <= 0 or len(img_1d_2) < 0: logger.warn( "No pixels left for T-T analysis after applying conditions (finite+mask) (hint: check if source is outside one or more channels)" ) return None # - Perform fit 1-2 logger.info("Compute spectral index from T-T fit ...") res_12 = linregress(img_1d_1, img_1d_2) slope_12 = res_12.slope intercept_12 = res_12.intercept alpha_12 = self.__slope2alpha(slope_12, nu1, nu2) r_12 = res_12.rvalue print("== FIT RES 1-2 ==") print(res_12) print("alpha_12=%f" % (alpha_12)) # - Perform fit 2-1 res_21 = linregress(img_1d_2, img_1d_1) slope_21 = res_21.slope intercept_21 = res_21.intercept alpha_21 = self.__slope2alpha(slope_21, nu2, nu1) r_21 = res_21.rvalue print("== FIT RES 2-1 ==") print(res_21) print("alpha_21=%f" % (alpha_21)) # - Reject fits if any of them is nan goodvalues_12 = np.isfinite(slope_12) and slope_12 > 0 goodvalues_21 = np.isfinite(slope_21) and slope_21 > 0 # - Add some goodness of fit criteria obs_12 = img_1d_2 pred_12 = slope_12 * img_1d_1 + intercept_12 residuals_12 = obs_12 - pred_12 residuals_mean_12 = np.mean(residuals_12) residuals_std_12 = np.std(residuals_12) residuals_min_12 = np.min(residuals_12) residuals_max_12 = np.max(residuals_12) obs_21 = img_1d_1 pred_21 = slope_21 * img_1d_2 + intercept_21 residuals_21 = obs_21 - pred_21 residuals_mean_21 = np.mean(residuals_21) residuals_std_21 = np.std(residuals_21) residuals_min_21 = np.min(residuals_21) residuals_max_21 = np.max(residuals_21) # - Set return tuple outtuple = () if goodvalues_12 and not goodvalues_21: outtuple = (alpha_12, r_12, residuals_mean_12, residuals_std_12, residuals_min_12, residuals_max_12) elif goodvalues_21 and not goodvalues_12: outtuple = (alpha_21, r_21, residuals_mean_21, residuals_std_21, residuals_min_21, residuals_max_21) else: # - Select best model best_resbias_id = 1 best_resstd_id = 1 best_rcoeff_id = 1 if np.abs(residuals_mean_21) < np.abs( residuals_mean_12): # check smallest residual bias best_resbias_id = 2 if np.abs(residuals_std_21) < np.abs( residuals_std_12): # check smallest residual std dev best_resstd_id = 2 if np.abs(r_21) > np.abs( r_12): # check larger (closer to 1) correlation coeff best_rcoeff_id = 2 if best_rcoeff_id == 1: outtuple = (alpha_12, r_12, residuals_mean_12, residuals_std_12, residuals_min_12, residuals_max_12) else: outtuple = (alpha_21, r_21, residuals_mean_21, residuals_std_21, residuals_min_21, residuals_max_21) return outtuple
def __evaluate_model(self): """ Evaluate model """ # - Create pipeline and models logger.info("Creating pipeline and model ...") if self.__create_pipeline() < 0: logger.error("Failed to create pipeline and model!") return -1 # - Evaluate models logger.info("Evaluating models as a function of #features ...") #results, nfeats = list(), list() results = list() rfe_best = None score_best = -1 nfeat_best = -1 rfe_best_index = -1 scores_stats = [] #for i in range(1,self.nfeatures): for i in range(len(self.nfeats)): n = self.nfeats[i] p = self.pipelines[i] scores = cross_val_score(p, self.data_preclassified, self.data_preclassified_targets, scoring=self.scoring, cv=self.cv, n_jobs=self.ncores, error_score='raise') scores_mean = np.mean(scores) scores_std = np.std(scores) scores_min = np.min(scores) scores_max = np.max(scores) scores_median = np.median(scores) scores_q1 = np.percentile(scores, 25) scores_q3 = np.percentile(scores, 75) scores_stats.append([ n, scores_mean, scores_std, scores_min, scores_max, scores_median, scores_q1, scores_q3 ]) results.append(scores) #nfeats.append(i) if scores_mean > score_best: score_best = scores_mean nfeat_best = n rfe_best_index = i logger.info('--> nfeats=%d: score=%.3f (std=%.3f)' % (n, scores_mean, scores_std)) # - Save scores stats logger.info("Saving score stats ...") scores_head = "# n mean std min max median q1 q3" scores_stats = np.array(scores_stats).reshape(len(self.nfeats), 8) Utils.write_ascii(scores_stats, self.outfile_scorestats, scores_head) # - Evaluate automatically-selected model? rfe_best = None if self.auto_selection: logger.info("Evaluate model (automated feature selection) ...") scores = cross_val_score(self.pipeline, self.data_preclassified, self.data_preclassified_targets, scoring=self.scoring, cv=self.cv, n_jobs=self.ncores, error_score='raise') best_scores_mean = np.mean(scores) best_scores_std = np.std(scores) logger.info( 'Selecting best scores automatically: %.3f (std=%.3f)' % (best_scores_mean, best_scores_std)) rfe_best = self.rfe else: logger.info( "Selecting best model after scan: index=%d, n_feat=%d, score=%.3f" % (rfe_best_index, nfeat_best, score_best)) rfe_best = RFE( estimator=self.models[rfe_best_index], #cv=self.cv, n_features_to_select=nfeat_best) # - Fit data and show which features were selected logger.info("Fitting RFE model on dataset ...") rfe_best.fit(self.data_preclassified, self.data_preclassified_targets) selfeats = rfe_best.support_ featranks = rfe_best.ranking_ nfeat_sel = rfe_best.n_features_ self.selfeatids = [] for i in range(self.data_preclassified.shape[1]): logger.info('Feature %d: selected? %d (rank=%.3f)' % (i, selfeats[i], featranks[i])) if selfeats[i]: self.selfeatids.append(i) self.selfeatids.sort() # - Extract selected data columns logger.info( "Extracting selected data columns (N=%d) from original data ..." % (nfeat_sel)) self.data_sel = self.data[:, selfeats] self.data_preclassified_sel = self.data_preclassified[:, selfeats] # - Plot results logger.info("Plotting and saving feature score results ...") plt.boxplot(results, labels=self.nfeats, showmeans=True) #plt.show() plt.savefig(self.outfile_scores) return 0
def set_data(self, featdata, class_ids=[], snames=[]): """ Set data from input array. Optionally give labels & obj names """ # - Set data vector self.data_labels = [] self.data_classids = [] self.data_targets = [] self.source_names = [] # - Set feature data self.data = featdata data_shape = self.data.shape if self.data.size == 0: logger.error("Empty feature data vector given!") return -1 self.nsamples = data_shape[0] self.nfeatures = data_shape[1] # - Set class ids & labels if class_ids: nids = len(class_ids) if nids != self.nsamples: logger.error( "Given class ids have size (%d) different than feature data (%d)!" % (nids, self.nsamples)) return -1 self.data_classids = class_ids for classid in self.data_classids: label = self.classid_label_map[classid] self.data_labels.append(label) else: self.data_classids = [0] * self.nsamples # Init to unknown type self.data_labels = ["UNKNOWN"] * self.nsamples # - Set target ids for j in range(len(self.data_classids)): obj_id = self.data_classids[j] targetid = self.classid_remap[obj_id] # remap obj id in class id self.data_targets.append(targetid) # - Set obj names if snames: n = len(snames) if n != self.nsamples: logger.error( "Given source names have size (%d) different than feature data (%d)!" % (n, self.nsamples)) return -1 self.source_names = snames else: self.source_names = ["XXX"] * self.nsamples # Init to unclassified logger.info("#nsamples=%d, #nfeatures=%d" % (self.nsamples, self.nfeatures)) # - Normalize feature data? if self.normalize: logger.info("Normalizing feature data ...") data_norm = self.__normalize_data(self.data, self.norm_min, self.norm_max) self.data = data_norm # - Set pre-classified data logger.info("Setting pre-classified data (if any) ...") self.__set_preclass_data() return 0
def main(): """Main function""" #=========================== #== PARSE ARGS #=========================== logger.info("Get script args ...") try: args = get_args() except Exception as ex: logger.error("Failed to get and parse options (err=%s)", str(ex)) return 1 # - Input filelist inputfile = args.inputfile # - Data pre-processing normalize = args.normalize # - Model options classifier = args.classifier scoring = args.scoring cv_nsplits = args.cv_nsplits nfeat_min = args.nfeat_min nfeat_max = args.nfeat_max autoselect = args.autoselect # - Run options colselect = args.colselect selcols = [] if colselect: if args.selcols == "": logger.error( "No selected column ids given (mandatory when colselect option is chosen)!" ) return 1 selcols = [int(x.strip()) for x in args.selcols.split(',')] # - Output options outfile = args.outfile #=========================== #== READ FEATURE DATA #=========================== ret = Utils.read_feature_data(inputfile) if not ret: logger.error("Failed to read data from file %s!" % (inputfile)) return 1 data = ret[0] snames = ret[1] classids = ret[2] #=========================== #== SELECT FEATURES #=========================== logger.info("Running feature selector on input feature data ...") fsel = FeatSelector() fsel.normalize = normalize fsel.classifier = classifier fsel.scoring = scoring fsel.outfile = outfile fsel.nfeat_min = nfeat_min fsel.nfeat_max = nfeat_max fsel.auto_selection = autoselect if colselect: status = fsel.select(data, selcols, classids, snames) else: status = fsel.run(data, classids, snames) if status < 0: logger.error("Feature selector failed!") return 1 return 0
def make_cutout(self, coord, radius, sname, region_sky): """ Run source cutout maker """ #=========================== #== SET SOURCE PARS #=========================== # - Check and set source cutout pars if len(coord) != 2: logger.error("Empty source position given!") return -1 if radius <= 0: logger.error("Radius must be >0") return -1 if sname == "": logger.error("Source name must not be empty string!") return -1 if region_sky is None: logger.error("None region given!") return -1 if self.nsurveys <= 0: logger.error("No surveys present in config!") return -1 self.ra = coord[0] self.dec = coord[1] self.radius = radius self.sname = sname self.region_sky = region_sky # - Update job dir in config #self.jobdir= os.path.join(self.topdir, sname) if not os.path.exists(self.datadir): logger.info("Creating cutout data dir %s ..." % (self.datadir)) Utils.mkdir(self.datadir, delete_if_exists=False) if not os.path.exists(self.datadir_mask): logger.info("Creating cutout masked data dir %s ..." % (self.datadir_mask)) Utils.mkdir(self.datadir_mask, delete_if_exists=False) self.config.workdir = self.datadir #=========================== #== RUN CUTOUT SEARCH #=========================== logger.info("Run cutout search for source %s ..." % (self.sname)) try: ch = CutoutHelper(self.config, self.ra, self.dec, self.sname, self.radius) if ch.run() < 0: errmsg = 'Failed to extract cutout for source ' + self.sname + '!' logger.warn(errmsg) return -1 except Exception as e: logger.error( 'Exception (%s) occurred when extracting cutout for source %s!' % (str(e), self.sname)) return -1 #=========================== #== MASKED CUTOUT DATA #=========================== logger.info("Computing masked cutouts for source %s ..." % (self.sname)) if self.make_masked_cutouts(self.region_sky, self.dilatemask, self.kernsize, self.maskval) < 0: logger.error("Failed to create masked cutouts for source %s!" % (self.sname)) return -1 return 0
def run(self, data, class_ids=[], snames=[], modelfile='', scalerfile=''): """ Find outliers in input data """ #================================ #== LOAD DATA SCALER #================================ # - Load scaler from file? if scalerfile!="": logger.info("Loading data scaler from file %s ..." % (scalerfile)) try: self.data_scaler= pickle.load(open(scalerfile, 'rb')) except Exception as e: logger.error("Failed to load data scaler from file %s!" % (scalerfile)) return -1 #================================ #== LOAD DATA #================================ # - Check inputs if data is None: logger.error("None input data specified!") return -1 if self.set_data(data, class_ids, snames)<0: logger.error("Failed to set data!") return -1 #================================ #== LOAD MODEL #================================ if modelfile and modelfile is not None: fitdata= False logger.info("Loading the model from file %s ..." % modelfile) try: self.model = pickle.load((open(modelfile, 'rb'))) except Exception as e: logger.error("Failed to load model from file %s!" % (modelfile)) return -1 else: logger.info("Creating the model ...") fitdata= True self.model= self.__create_model() #================================ #== FIND OUTLIERS #================================ logger.info("Searching for outliers ...") if self.__find_outliers(fitdata)<0: logger.error("Failed to search outliers!") return -1 #================================ #== SAVE #================================ if self.save_to_file: logger.info("Saving results ...") if self.__save()<0: logger.error("Failed to save outlier search results!") return -1 return 0
def __train(self): """ Build and train/test reducer """ # - Check if data are set if self.data is None: logger.error("Input data array is None!") return -1 # - Check if reducer is set if self.reducer is None: logger.error("UMAP reducer is not set!") return -1 #========================================================== #== FIT PRE-CLASSIFIED DATA (IF AVAILABLE) SUPERVISED #========================================================== if self.use_preclassified_data and len( self.data_preclassified) >= self.preclassified_data_minsize: logger.info( "Fitting input pre-classified data in a supervised way ...") self.learned_transf = self.reducer.fit( self.data_preclassified, self.data_preclassified_classids) self.encoded_data_preclassified = self.learned_transf.transform( self.data_preclassified) #================================ #== FIT DATA UNSUPERVISED #================================ logger.info("Fitting input data in a completely unsupervised way ...") self.encoded_data_unsupervised = self.reducer.fit_transform(self.data) # - Save model to file if self.dump_model: logger.info("Dumping model to file %s ..." % self.outfile_model) pickle.dump(self.reducer, open(self.outfile_model, 'wb')) #==================================================== #== ENCODE DATA USING LEARNED TRANSFORM (IF DONE) #==================================================== if self.learned_transf is not None: logger.info( "Encode input data using learned transform on pre-classified data ..." ) self.encoded_data_supervised = self.learned_transf.transform( self.data) #================================ #== SAVE ENCODED DATA #================================ # - Unsupervised encoded data logger.info("Saving unsupervised encoded data to file ...") N = self.encoded_data_unsupervised.shape[0] print("Unsupervised encoded data shape=", self.encoded_data_unsupervised.shape) print("Unsupervised encoded data N=", N) snames = np.array(self.source_names).reshape(N, 1) objids = np.array(self.data_classids).reshape(N, 1) # - Save unsupervised encoded data enc_data = np.concatenate( (snames, self.encoded_data_unsupervised, objids), axis=1) znames_counter = list(range(1, self.encoded_data_dim + 1)) znames = '{}{}'.format('z', ' z'.join(str(item) for item in znames_counter)) head = '{} {} {}'.format("# sname", znames, "id") Utils.write_ascii(enc_data, self.outfile_encoded_data_unsupervised, head) # - Supervised encoded data if self.encoded_data_supervised is not None: logger.info("Saving supervised encoded data to file ...") N = self.encoded_data_supervised.shape[0] print("Supervised encoded data shape=", self.encoded_data_supervised.shape) print("Supervised encoded data N=", N) enc_data = np.concatenate( (snames, self.encoded_data_supervised, objids), axis=1) Utils.write_ascii(enc_data, self.outfile_encoded_data_supervised, head) # - Pre-classified data if self.encoded_data_preclassified is not None: logger.info("Saving pre-classified encoded data to file ...") N = self.encoded_data_preclassified.shape[0] print("Pre-classified encoded data shape=", self.encoded_data_preclassified.shape) print("Pre-classified encoded data N=", N) snames_preclass = np.array( self.source_names_preclassified).reshape(N, 1) objids_preclass = np.array( self.data_preclassified_classids).reshape(N, 1) enc_data = np.concatenate( (snames_preclass, self.encoded_data_preclassified, objids_preclass), axis=1) Utils.write_ascii(enc_data, self.outfile_encoded_data_preclassified, head) return 0
def __read_and_merge_data(self, inputfiles, selcolids=[], allow_novars=False): """ Read and merge feature data """ # - Check selcolids has format [[selcol_1],[selcol_2]] if selcolids: if len(selcolids) != len(inputfiles): logger.error( "Given selcolid length (%d) must be equal to inputfile list length (%d)!" % (len(selcolids), len(inputfiles))) return -1 # - Read features dlist = [] nvars_tot = 0 for i in range(len(inputfiles)): inputfile = inputfiles[i] colprefix = "featset" + str(i + 1) + "_" if selcolids: selcols_i = selcolids[i] if selcols_i: d = Utils.read_sel_feature_data_dict(inputfile, selcols_i, colprefix=colprefix) else: logger.error("Empty selcols for file %s given!" % (inputfile)) return -1 else: d = Utils.read_feature_data_dict(inputfile, colprefix=colprefix, allow_novars=allow_novars) if not d or d is None: logger.error("Failed to read data from file %s!" % (inputfile)) return -1 nentries = len(d.keys()) firstitem = next(iter(d.items())) nvars = len(firstitem[1].keys()) - 2 nvars_tot += nvars logger.info("Data file %s has #%d entries (#%d vars) ..." % (inputfile, nentries, nvars)) dlist.append(d) logger.info("Merged set is expected to have %d vars ..." % (nvars_tot)) # - Merge features logger.info("Merging feature data for all input files ...") dmerged = collections.OrderedDict() for d in dlist: for key, value in d.items(): if key not in dmerged: dmerged[key] = collections.OrderedDict({}) dmerged[key].update(value) dmerged[key].move_to_end("id") # - Remove rows with less number of entries logger.info("Removing rows with number of vars !=%d ..." % (nvars_tot)) self.par_dict_list = [] for key, value in dmerged.items(): nvars = len(value.keys()) - 2 if nvars != nvars_tot: logger.info( "Removing entry (%s) as number of vars (%d) is !=%d ..." % (key, nvars, nvars_tot)) #del dmerged[key] continue self.par_dict_list.append(value) return 0
def main(): """Main function""" #=========================== #== PARSE ARGS #=========================== logger.info("Get script args ...") try: args= get_args() except Exception as ex: logger.error("Failed to get and parse options (err=%s)",str(ex)) return 1 # - Input filelist datalist= args.datalist # - Data process options nx= args.nx ny= args.ny normalize= args.normalize scale_to_abs_max= args.scale_to_abs_max scale_to_max= args.scale_to_max log_transform= args.log_transform scale= args.scale scale_factors= [] if args.scale_factors!="": scale_factors= [float(x.strip()) for x in args.scale_factors.split(',')] standardize= args.standardize img_means= [] img_sigmas= [] if args.img_means!="": img_means= [float(x.strip()) for x in args.img_means.split(',')] if args.img_sigmas!="": img_sigmas= [float(x.strip()) for x in args.img_sigmas.split(',')] chan_divide= args.chan_divide chan_mins= [] if args.chan_mins!="": chan_mins= [float(x.strip()) for x in args.chan_mins.split(',')] erode= args.erode erode_kernel= args.erode_kernel # - Autoencoder options modelfile_encoder= args.modelfile_encoder modelfile_decoder= args.modelfile_decoder weightfile_encoder= args.weightfile_encoder weightfile_decoder= args.weightfile_decoder add_channorm_layer= args.add_channorm_layer # - Reco metrics & plot options winsize= args.winsize save_plots= args.save_plots #=========================== #== READ DATALIST #=========================== # - Create data loader dl= DataLoader(filename=datalist) # - Read datalist logger.info("Reading datalist %s ..." % datalist) if dl.read_datalist()<0: logger.error("Failed to read input datalist!") return 1 #=============================== #== RUN AUTOENCODER RECO #=============================== logger.info("Running autoencoder classifier reconstruction ...") vae_class= FeatExtractorAE(dl) vae_class.set_image_size(nx, ny) vae_class.normalize= normalize vae_class.scale_to_abs_max= scale_to_abs_max vae_class.scale_to_max= scale_to_max vae_class.log_transform_img= log_transform vae_class.scale_img= scale vae_class.scale_img_factors= scale_factors vae_class.standardize_img= standardize vae_class.img_means= img_means vae_class.img_sigmas= img_sigmas vae_class.chan_divide= chan_divide vae_class.chan_mins= chan_mins vae_class.erode= erode vae_class.erode_kernel= erode_kernel vae_class.add_channorm_layer= add_channorm_layer status= vae_class.reconstruct_data( modelfile_encoder, weightfile_encoder, modelfile_decoder, weightfile_decoder, winsize= winsize, save_imgs= save_plots ) if status<0: logger.error("Autoencoder reconstruction failed!") return 1 return 0
def __compute_pars(self, data, sname, classid): """ Compute source image quality pars """ # - Init data dict param_dict= collections.OrderedDict() param_dict["sname"]= sname # - Find ref channel mask nchannels= data.shape[3] cond= np.logical_and(data[0,:,:,self.refch]!=0, np.isfinite(data[0,:,:,self.refch])) is_bad_data= False self.nvars_out= 0 for i in range(nchannels): data_2d= data[0,:,:,i] data_1d= data_2d[cond] # pixel in ref band mask n= data_1d.size n_bad= np.count_nonzero(np.logical_or(~np.isfinite(data_1d), data_1d==0)) n_neg= np.count_nonzero(data_1d<0) f_bad= float(n_bad)/float(n) f_negative= float(n_neg)/float(n) data_min= np.nanmin(data_1d) data_max= np.nanmax(data_1d) same_values= int(data_min==data_max) is_bad_ch_data= ( f_negative>=self.negative_pix_fract_thr or f_bad>=self.bad_pix_fract_thr or same_values==1 ) if is_bad_ch_data: is_bad_data= True logger.info("Source %s (ch%d): min/max=%f/%f, n=%d, n_neg=%d, is_bad_ch_data? %d" % (sname, i+1, data_min, data_max, n, n_neg, int(is_bad_ch_data))) # - Fill dict par_name= "equalPixValues_ch" + str(i+1) param_dict[par_name]= same_values self.nvars_out+= 1 par_name= "badPixFract_ch" + str(i+1) param_dict[par_name]= f_bad self.nvars_out+= 1 par_name= "negativePixFract_ch" + str(i+1) param_dict[par_name]= f_negative self.nvars_out+= 1 par_name= "isBad_ch" + str(i+1) param_dict[par_name]= int(is_bad_ch_data) self.nvars_out+= 1 param_dict["isBadData"]= int(is_bad_data) self.nvars_out+= 1 param_dict["id"]= classid return param_dict
def main(): """Main function""" #=========================== #== PARSE ARGS #=========================== logger.info("Get script args ...") try: args= get_args() except Exception as ex: logger.error("Failed to get and parse options (err=%s)",str(ex)) return 1 # - Input filelist datalist= args.datalist datalist_cv= args.datalist_cv # - Data process options nx= args.nx ny= args.ny augment= args.augment augment_scale_factor= args.augment_scale_factor scale= args.scale scale_factors= [] if args.scale_factors!="": scale_factors= [float(x.strip()) for x in args.scale_factors.split(',')] normalize= args.normalize scale_to_abs_max= args.scale_to_abs_max scale_to_max= args.scale_to_max log_transform= args.log_transform standardize= args.standardize img_means= [] img_sigmas= [] if args.img_means!="": img_means= [float(x.strip()) for x in args.img_means.split(',')] if args.img_sigmas!="": img_sigmas= [float(x.strip()) for x in args.img_sigmas.split(',')] chan_divide= args.chan_divide chan_mins= [] if args.chan_mins!="": chan_mins= [float(x.strip()) for x in args.chan_mins.split(',')] erode= args.erode erode_kernel= args.erode_kernel # - NN architecture modelfile= args.modelfile add_maxpooling_layer= args.add_maxpooling_layer add_batchnorm_layer= args.add_batchnorm_layer add_leakyrelu= args.add_leakyrelu add_dense_layer= args.add_dense_layer nfilters_cnn= [int(x.strip()) for x in args.nfilters_cnn.split(',')] kernsizes_cnn= [int(x.strip()) for x in args.kernsizes_cnn.split(',')] strides_cnn= [int(x.strip()) for x in args.strides_cnn.split(',')] dense_layer_sizes= [int(x.strip()) for x in args.dense_layer_sizes.split(',')] dense_layer_activation= args.dense_layer_activation add_dropout_layer= args.add_dropout_layer dropout_rate= args.dropout_rate # - Train options predict= args.predict multiclass= True if args.binary_class: multiclass= False weightfile= args.weightfile optimizer= args.optimizer learning_rate= args.learning_rate batch_size= args.batch_size nepochs= args.nepochs weight_seed= args.weight_seed reproducible= args.reproducible validation_steps= args.validation_steps #=========================== #== READ DATALIST #=========================== # - Create data loader dl= DataLoader(filename=datalist) # - Read datalist logger.info("Reading datalist %s ..." % datalist) if dl.read_datalist()<0: logger.error("Failed to read input datalist!") return 1 # - Create data loader for validation dl_cv= None if datalist_cv!="": logger.info("Reading datalist_cv %s ..." % datalist_cv) dl_cv= DataLoader(filename=datalist_cv) if dl_cv.read_datalist()<0: logger.error("Failed to read input datalist for validation!") return 1 #=========================== #== TRAIN VAE #=========================== logger.info("Running VAE classifier training ...") sclass= SClassifierNN(dl, multiclass=multiclass) sclass.modelfile= modelfile sclass.weightfile= weightfile sclass.set_image_size(nx, ny) sclass.augmentation= augment sclass.augment_scale_factor= augment_scale_factor sclass.normalize= normalize sclass.scale_to_abs_max= scale_to_abs_max sclass.scale_to_max= scale_to_max sclass.log_transform_img= log_transform sclass.scale_img= scale sclass.scale_img_factors= scale_factors sclass.standardize_img= standardize sclass.img_means= img_means sclass.img_sigmas= img_sigmas sclass.chan_divide= chan_divide sclass.chan_mins= chan_mins sclass.erode= erode sclass.erode_kernel= erode_kernel sclass.batch_size= batch_size sclass.nepochs= nepochs sclass.validation_steps= validation_steps sclass.set_optimizer(optimizer, learning_rate) if reproducible: sclass.set_reproducible_model() sclass.add_max_pooling= add_maxpooling_layer sclass.add_batchnorm= add_batchnorm_layer sclass.add_leakyrelu= add_leakyrelu sclass.add_dense= add_dense_layer sclass.nfilters_cnn= nfilters_cnn sclass.kernsizes_cnn= kernsizes_cnn sclass.strides_cnn= strides_cnn sclass.dense_layer_sizes= dense_layer_sizes sclass.dense_layer_activation= dense_layer_activation sclass.add_dropout_layer= add_dropout_layer sclass.dropout_rate= dropout_rate sclass.weight_seed= weight_seed sclass.dl_cv= dl_cv if predict: status= sclass.run_predict(modelfile, weightfile) else: status= sclass.run_train() if status<0: logger.error("Classifier run failed!") return 1 return 0
def fill_features(self): # - Save name self.param_dict["sname"]= self.sname # - Save source flux flux_ref= self.fluxes[self.refch] for j in range(len(self.fluxes)): flux= self.fluxes[j] parname= "flux_ch" + str(j+1) self.param_dict[parname]= flux # - Save source flux log ratios Fj/F_radio (i.e. colors) lgFluxRatio_safe= 0 is_good_flux_ref= (flux_ref>0) and (np.isfinite(flux_ref)) if not is_good_flux_ref: logger.warn("Flux for ref chan (%d) is <=0 or nan for image %s (id=%s), will set all color index to %d..." % (self.refch, self.sname, self.label, lgFluxRatio_safe)) for j in range(len(self.fluxes)): if j==self.refch: continue flux= self.fluxes[j] # if source is not detected this is the background level is_good_flux= (flux>0) and (np.isfinite(flux)) lgFluxRatio= 0 if is_good_flux_ref: if is_good_flux: lgFluxRatio= np.log10(flux/flux_ref) else: logger.warn("Flux for chan %d is <=0 or nan for image %s (id=%s), will set this color index to %d..." % (self.refch, self.sname, self.label, lgFluxRatio_safe)) lgFluxRatio= lgFluxRatio_safe else: lgFluxRatio= lgFluxRatio_safe parname= "lgFratio_ch" + str(self.refch+1) + "_" + str(j+1) self.param_dict[parname]= lgFluxRatio # - Save source flux log ratios Fj/F_radio (i.e. colors) cind_safe= 0 sflux_ref= self.sfluxes[self.refch] is_good_flux_ref= (sflux_ref is not None) and (sflux_ref>0) and (np.isfinite(sflux_ref)) if not is_good_flux_ref: logger.warn("Flux for ref chan (%d) is <=0 or nan for image %s (id=%s), will set all color index to %d..." % (self.refch, self.sname, self.label, cind_safe)) for j in range(len(self.sfluxes)): if j==self.refch: continue sflux= self.sfluxes[j] flux= self.fluxes[j] if sflux is None: # source is not detected, take sum of pixel fluxes inside ref source aperture (e.g. the background) logger.info("Source is not detected in chan %d, taking pixel sum over ref source aperture %f ..." % (j+1, flux)) sflux= flux is_good_flux= (sflux>0) and (np.isfinite(sflux)) cind= 0 if is_good_flux_ref: if is_good_flux: cind= np.log10(sflux/sflux_ref) else: logger.warn("Flux for chan %d is <=0 or nan for image %s (id=%s), will set this color index to %d..." % (self.refch, self.sname, self.label, cind_safe)) cind= cind_safe else: cind= cind_safe parname= "color_ch" + str(self.refch+1) + "_" + str(j+1) self.param_dict[parname]= cind # - Save source IOU for j in range(len(self.sious)): ch_i, ch_j= self.__get_triu_indices(j, self.nchannels) iou= self.sious[j] parname= "iou_ch" + str(ch_i) + "_" + str(ch_j) self.param_dict[parname]= iou # - Save source peak dist for j in range(len(self.speaks_dists)): ch_i, ch_j= self.__get_triu_indices(j, self.nchannels) peak_dist= self.speaks_dists[j] parname= "dpeak_ch" + str(ch_i) + "_" + str(ch_j) self.param_dict[parname]= peak_dist # - Save img moments for i in range(len(self.moments_zern)): for j in range(len(self.moments_zern[i])): if j==0: continue # Skip as mom0 is always the same m= self.moments_zern[i][j] parname= "zernmom" + str(j+1) + "_ch" + str(i+1) self.param_dict[parname]= m # - Save ssim parameters if self.save_ssim_pars: for j in range(len(self.ssim_avg)): ch_i, ch_j= self.__get_triu_indices(j, self.nchannels) parname= "ssim_avg_ch{}_{}".format(ch_i,ch_j) self.param_dict[parname]= self.ssim_avg[j] # - Save class id self.param_dict["id"]= self.id
def make_masked_cutouts(self, region_sky, dilatemask=False, kernsize=5, maskval=0): """ Produce masked cutouts """ # - Find cutout files produced logger.info("Searching for produced cutouts for source %s ..." % (self.sname)) cutout_dir = os.path.join(self.datadir, self.sname) file_pattern = os.path.join(cutout_dir, "*.fits") files = glob.glob(file_pattern) nfiles = len(files) if nfiles == 0 or nfiles != self.nsurveys: logger.warn( "Number of cutout files produced (%d) different wrt expected (%d)!" % (nfiles, self.nsurveys)) return -1 # - Create directory for masked cutouts masked_cutout_dir = os.path.join(self.datadir_mask, self.sname) if not os.path.exists(masked_cutout_dir): logger.info("Creating cutout masked data dir %s ..." % (masked_cutout_dir)) Utils.mkdir(masked_cutout_dir, delete_if_exists=False) # - Retrieve FITS header & wcs logger.info("Retrieving cutout FITS header & WCS for source %s ..." % (self.sname)) try: header = fits.getheader(files[0]) data_shape = fits.getdata(files[0]).shape wcs = WCS(header) except Exception as e: logger.error( "Failed to retrieve file %s header/WCS for source %s (err=%s)!" % (files[0], self.sname, str(e))) return -1 # - Convert region to pixel coords logger.info( "Converting sky region for source %s to pixel coordinates ..." % (self.sname)) try: region = region_sky.to_pixel(wcs) except Exception as e: logger.error( "Failed to convert sky region for source %s to pixel coordinates (err=%s)!" % (self.sname, str(e))) return -1 # - Compute mask logger.info("Computing mask for source %s ..." % (self.sname)) try: mask = region.to_mask(mode='center') except Exception as e: logger.error( "Failed to get mask from region for source %s (err=%s)!" % (self.sname, str(e))) return -1 if mask is None: logger.warn("mask obtained from region for source %s is None!" % (self.sname)) return -1 # - Compute image mask logger.info("Computing image mask for source %s ..." % (self.sname)) maskimg = mask.to_image(data_shape) if maskimg is None: logger.error( "maskimg is None for source %s, this shoudn't occur at this stage!" % (self.sname)) return -1 maskimg[maskimg != 0] = 1 maskimg = maskimg.astype(np.uint8) # - Dilate image mask to enlarge area around source if dilatemask: logger.info( "Dilating image mask to enlarge area around source %s ..." % (self.sname)) structel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernsize, kernsize)) maskimg_dil = cv2.dilate(maskimg, structel, iterations=1) maskimg = maskimg_dil # - Loop over files and create masked cutouts for i in range(nfiles): filename = files[i] filename_base = os.path.basename(filename) filename_base_noext = os.path.splitext(filename_base)[0] filename_mask = os.path.join(masked_cutout_dir, filename_base_noext + '_masked.fits') logger.info("Creating masked cutout file %s from file %s ..." % (filename_mask, filename_base)) try: header = fits.getheader(filename) data = fits.getdata(filename) data[maskimg == 0] = maskval hdu_out = fits.PrimaryHDU(data, header) hdul = fits.HDUList([hdu_out]) hdul.writeto(filename_mask, overwrite=True) except Exception as e: logger.error("Failed to create masked file %s for source %s!" % (filename_mask, self.sname)) return -1 return 0
def __process_sdata(self, index): """ Process source data """ #=========================== #== READ DATA #=========================== # - Read source data logger.info("Reading source and source masked data %d ..." % (index)) ret= self.__read_sdata(index) if ret is None: logger.error("Failed to read source data %d!" % (index)) return -1 sdata= ret[0] sdata_mask= ret[1] #=========================== #== MODIFY MASKS #=========================== # - Shrink img & mask in masked sdata? if self.shrink_masks: logger.info("Shrinking img+mask on source masked data %d ..." % (index)) if sdata_mask.shrink_masks(self.erode_kernels)<0: logger.warn("Failed to shrink mask for source masked data %d!" % (index)) return -1 # - Expand img & mask in masked sdata? if self.grow_masks: logger.info("Expanding img+mask on source masked data %d ..." % (index)) if sdata_mask.grow_masks(self.dilate_kernels)<0: logger.warn("Failed to expand mask for source masked data %d!" % (index)) return -1 masks= sdata_mask.img_data_mask #mask_ref= masks[self.refch] #=========================== #== CHECK DATA INTEGRITY #=========================== # - Check non-masked data has_good_data= sdata.has_good_data(check_mask=False, check_bad=True, check_neg=False, check_same=True) if not has_good_data: logger.warn("Source data %d are bad (too may NANs or equal pixel values)!" % (index)) return -1 # - Check masked data has_good_mask_data= sdata_mask.has_good_data(check_mask=False, check_bad=True, check_neg=True, check_same=True) if not has_good_mask_data: logger.warn("Source mask data %d are bad (too may NANs/negative or equal pixel values)!" % (index)) return -1 #=========================== #== CHECK AE RECO ACCURACY #=========================== # ... # ... #=========================== #== COMPUTE BKG/FLUX #=========================== # - Compute bkg on img over non-masked pixels logger.info("Computing bkg on source data %d ..." % (index)) if sdata.compute_bkg(masks)<0: logger.warn("Failed to compute bkg for source data %d!" % (index)) return -1 bkg_levels= sdata.bkg_levels #print("--> bkg levels") #print(bkg_levels) # - Apply masks to sdata # NB: Do this after bkg calculation (otherwise all non-masked pixels are set to 0, so bkg will be 0) and before subtract bkg logger.info("Applying masks to source data %d ..." % (index)) sdata.apply_masks(masks) # - Subtract bkg on img #if self.subtract_bkg: # logger.info("Subtracting bkg on source data %d ..." % (index)) # if sdata.subtract_bkg(bkg_levels, self.subtract_bkg_only_refch)<0: # logger.warn("Failed to subtract bkg for source data %d!" % (index)) # return -1 # - Compute integrated flux (no source extraction here, only sum of pixel fluxes in mask) logger.info("Computing flux on source data %d ..." % (index)) sdata.compute_fluxes(subtract_bkg=self.subtract_bkg, subtract_only_refch=self.subtract_bkg_only_refch) # - Extract sources and compute pars # NB: source extraction may fail or not be accurate (e.g. miss source, contour not accurate, etc) logger.info("Extracting source blobs on source data %d ..." % (index)) sdata.find_sources( seed_thr=self.seed_thr, merge_thr=self.merge_thr, dist_thr=self.dist_thr, subtract_bkg=self.subtract_bkg, subtract_only_refch=self.subtract_bkg_only_refch ) #=========================== #== COMPUTE MOMENTS #=========================== # - Compute centroids and moments on images (NB: masked before) logger.info("Computing moments on source data %d ..." % (index)) if sdata.compute_img_moments()<0: logger.warn("Failed to compute moments for source data %d!" % (index)) return -1 #=========================== #== COMPUTE SSIM #=========================== if self.save_ssim_pars: logger.info("Computing ssim pars on source data %d ..." % (index)) if sdata.compute_ssim_pars(self.ssim_winsize)<0: logger.warn("Failed to compute SSIM pars for source data %d!" % (index)) return -1 #=========================== #== FILL SOURCE OUT DATA #=========================== # - Fill and append features logger.info("Filling feature dict for source data %d ..." % (index)) sdata.fill_features() par_dict= sdata.param_dict if par_dict is None or not par_dict: logger.warn("Feature dict for source data %d is empty or None, skip it ..." % (index)) else: # - Select features? if self.select_feat and self.selfeatids: ret= sdata.select_features(self.selfeatids) par_dict= sdata.param_dict if ret==0: self.par_dict_list.append(par_dict) else: logger.warn("Failed to select features for source data %d, skip it ..." % (index)) else: self.par_dict_list.append(par_dict) return 0
def main(): """Main function""" #=========================== #== PARSE ARGS #=========================== logger.info("Get script args ...") try: args = get_args() except Exception as ex: logger.error("Failed to get and parse options (err=%s)", str(ex)) return 1 # - Input filelist inputfile = args.inputfile inputfile_cv = args.inputfile_cv # - Data pre-processing normalize = args.normalize scalerfile = args.scalerfile # - Model options classifier = args.classifier modelfile = args.modelfile predict = args.predict multiclass = True if args.binary_class: multiclass = False balance_classes = args.balance_classes # - Tree options max_depth = args.max_depth min_samples_split = args.min_samples_split min_samples_leaf = args.min_samples_leaf n_estimators = args.n_estimators num_leaves = args.num_leaves learning_rate = args.learning_rate niters = args.niters # - Outlier search options find_outliers = args.find_outliers modelfile_outlier = args.modelfile_outlier anomaly_thr = args.anomaly_thr save_outlier = args.save_outlier outfile_outlier = args.outfile_outlier # - Run options run_scan = args.run_scan ntrials = args.ntrials # - Output options outfile = args.outfile #=========================== #== READ FEATURE DATA #=========================== ret = Utils.read_feature_data(inputfile) if not ret: logger.error("Failed to read data from file %s!" % (inputfile)) return 1 data = ret[0] snames = ret[1] classids = ret[2] #==================================== #== READ FEATURE VALIDATION DATA #==================================== data_cv = None snames_cv = [] classids_cv = [] if inputfile_cv != "": ret_cv = Utils.read_feature_data(inputfile_cv) if not ret_cv: logger.error("Failed to read validation data from file %s!" % (inputfile_cv)) return 1 data_cv = ret_cv[0] snames_cv = ret_cv[1] classids_cv = ret_cv[2] #=========================== #== CLASSIFY DATA #=========================== logger.info("Running classifier on input feature data ...") sclass = SClassifier(multiclass=multiclass) sclass.normalize = normalize sclass.classifier = classifier sclass.outfile = outfile sclass.max_depth = max_depth sclass.min_samples_split = min_samples_split sclass.min_samples_leaf = min_samples_leaf sclass.n_estimators = n_estimators sclass.num_leaves = num_leaves sclass.learning_rate = learning_rate sclass.niters = niters sclass.balance_classes = balance_classes sclass.find_outliers = find_outliers sclass.outlier_modelfile = modelfile_outlier sclass.outlier_thr = anomaly_thr sclass.save_outlier = save_outlier sclass.outlier_outfile = outfile_outlier if predict: status = sclass.run_predict(data, classids, snames, modelfile, scalerfile) else: if run_scan: status = sclass.run_lgbm_scan(data, classids, snames, scalerfile, n_trials=ntrials) else: status = sclass.run_train( data, classids, snames, modelfile, scalerfile, data_cv, classids_cv, snames_cv, ) if status < 0: logger.error("Classifier run failed!") return 1 return 0
def compute_ssim_pars(self, winsize=3): """ Compute SSIM params """ # - Loop over images and compute params index= 0 for i in range(self.nchannels-1): img_i= self.img_data[i] cond_i= np.logical_and(img_i!=0, np.isfinite(img_i)) img_max_i= np.nanmax(img_i[cond_i]) img_min_i= np.nanmin(img_i[cond_i]) img_norm_i= (img_i-img_min_i)/(img_max_i-img_min_i) img_norm_i[~cond_i]= 0 # - Compute SSIM maps for j in range(i+1,self.nchannels): img_j= self.img_data[j] cond_j= np.logical_and(img_j!=0, np.isfinite(img_j)) img_max_j= np.nanmax(img_j[cond_j]) img_min_j= np.nanmin(img_j[cond_j]) img_norm_j= (img_j-img_min_j)/(img_max_j-img_min_j) img_norm_j[~cond_j]= 0 cond= np.logical_and(cond_i, cond_j) # - Compute SSIM moments # NB: Need to normalize images to max otherwise the returned values are always ~1. logger.info("Computing SSIM for image %s (id=%s, ch=%d-%d) ..." % (self.sname, self.label, i+1, j+1)) _, ssim_2d= structural_similarity(img_norm_i, img_norm_j, full=True, win_size=winsize, data_range=1) ssim_2d[ssim_2d<0]= 0 ssim_2d[~cond]= 0 self.ssim_maps.append(ssim_2d) ssim_1d= ssim_2d[cond] #if self.draw: # plt.subplot(1, 3, 1) # plt.imshow(img_norm_i, origin='lower') # plt.colorbar() # plt.subplot(1, 3, 2) # plt.imshow(img_norm_j, origin='lower') # plt.colorbar() # plt.subplot(1, 3, 3) # plt.imshow(ssim_2d, origin='lower') # plt.colorbar() # plt.show() if ssim_1d.size>0: ssim_mean= np.nanmean(ssim_1d) ssim_median= np.nanmedian(ssim_1d) ssim_avg= ssim_median self.ssim_avg.append(ssim_avg) logger.info("Image %s (chan=%d-%d): <SSIM>=%f" % (self.sname, i+1, j+1, ssim_avg)) else: logger.warn("Image %s (chan=%d-%d): SSIM array is empty, setting estimators to -999..." % (self.sname, i+1, j+1)) self.ssim_avg.append(-999) return 0
def set_data_from_file(self, filename): """ Set data from input file. Expected format: sname, N features, classid """ # - Read table row_start = 0 try: table = ascii.read(filename, data_start=row_start) except: logger.error("Failed to read feature file %s!" % filename) return -1 #print(table.colnames) #print(table) ncols = len(table.colnames) nfeat = ncols - 2 # - Set data vectors rowIndex = 0 self.data_labels = [] self.data_classids = [] self.data_targets = [] self.source_names = [] featdata = [] for data in table: sname = data[0] obj_id = data[ncols - 1] label = self.classid_label_map[classid] targetid = self.classid_remap[obj_id] # remap obj id in class id self.source_names.append(sname) self.data_labels.append(label) self.data_classids.append(obj_id) self.data_targets.append(targetid) featdata_curr = [] for k in range(nfeat): featdata_curr.append(data[k + 1]) featdata.append(featdata_curr) self.data = np.array(featdata) if self.data.size == 0: logger.error("Empty feature data vector read!") return -1 data_shape = self.data.shape self.nsamples = data_shape[0] self.nfeatures = data_shape[1] logger.info("#nsamples=%d, #nfeatures=%d" % (self.nsamples, self.nfeatures)) # - Normalize feature data? if self.normalize: logger.info("Normalizing feature data ...") data_norm = self.__normalize_data(self.data, self.norm_min, self.norm_max) self.data = data_norm # - Set pre-classified data logger.info("Setting pre-classified data (if any) ...") self.__set_preclass_data() return 0
def __extract_sources(self, data, bkg, rms, mask=None, seed_thr=4, merge_thr=3, dist_thr=-1): """ Find sources in channel data """ # - Compute image center data_shape= data.shape y_c= data_shape[0]/2.; x_c= data_shape[1]/2.; # - Compute mask if mask is None: logger.info("Computing image mask ...") mask= np.logical_and(data!=0, np.isfinite(data)) data_1d= data[mask] # - Threshold image at seed_thr zmap= (data-bkg)/rms binary_map= (zmap>merge_thr).astype(np.int32) binary_map[~mask]= 0 zmap[~mask]= 0 # - Extract source logger.info("Extracting sources ...") label_map= skimage.measure.label(binary_map) regprops= skimage.measure.regionprops(label_map, data) nsources= len(regprops) logger.info("#%d sources found ..." % nsources) # - Extract peaks kernsize= 3 footprint = np.ones((kernsize, ) * data.ndim, dtype=bool) peaks= peak_local_max(np.copy(zmap), footprint=footprint, threshold_abs=seed_thr, min_distance=2, exclude_border=True) #print(peaks) if peaks.shape[0]<=0: logger.info("No peaks detected in this image, return None ...") return None # - Select best source regprops_sel= [] peaks_sel= [] binary_maps_sel= [] polygons_sel= [] contours_sel= [] #binary_maps_sel= [] #binary_map_sel= np.zeros_like(binary_map) for regprop in regprops: # - Check if region max is >=seed_thr sslice= regprop.slice zmask= zmap[sslice] zmask_1d= zmask[np.logical_and(zmask!=0, np.isfinite(zmask))] zmax= zmask_1d.max() if zmax<seed_thr: logger.info("Skip source as zmax=%f<thr=%f" % (zmax, seed_thr)) continue # - Set binary map with this source logger.debug("Get source binary mask ...") bmap= np.zeros_like(binary_map) bmap[sslice]= binary_map[sslice] # - Extract contour and polygon from binary mask logger.info("Extracting contour and polygon from binary mask ...") contours= [] polygon= None try: bmap_uint8= bmap.copy() # copy as OpenCV internally modify origin mask bmap_uint8= bmap_uint8.astype(np.uint8) contours= cv2.findContours(bmap_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) contours= imutils.grab_contours(contours) if len(contours)>0: contour= np.squeeze(contours[0]) polygon = Polygon(contour) except Exception as e: logger.warn("Failed to compute mask contour (err=%s)!" % (str(e))) if polygon is None: logger.warn("Skip extracted blob as polygon failed to be computed...") continue # - Check if source has a local peak in the mask # NB: Check if polygon is computed has_peak= False peak_sel= None if polygon is not None: for peak in peaks: point = Point(peak[1], peak[0]) has_peak= polygon.contains(point) if has_peak: peak_sel= peak break if not has_peak: logger.info("Skip extracted blob as no peak was found inside source contour polygon!") continue # - Check for source peak distance wrt image center if dist_thr>0: dist= np.sqrt( (peak_sel[1]-x_c)**2 + (peak_sel[0]-y_c)**2 ) if dist>dist_thr: logger.info("Skip extracted source as peak-imcenter dist=%f<thr=%f" % (dist, dist_thr)) continue # - Update global binary mask and regprops #binary_map_sel[sslice]= binary_map[sslice] regprops_sel.append(regprop) peaks_sel.append(peak_sel) binary_maps_sel.append(bmap) polygons_sel.append(polygon) contours_sel.append(contours[0]) # - Return None if no source is selected nsources_sel= len(regprops_sel) if nsources_sel<=0: logger.info("No sources selected for this image ...") return None # - If more than 1 source is selected, take the one with peak closer to image center peak_final= peaks_sel[0] bmap_final= binary_maps_sel[0] regprop_final= regprops_sel[0] polygon_final= polygons_sel[0] contour_final= contours_sel[0] if nsources_sel>1: logger.info("#%d sources selected, going to select the closest to image center ..." % (nsources_sel)) dist_best= 1.e+99 index_best= -1 for j in range(len(peaks_sel)): peak= peaks_sel[j] bmap= binary_maps_sel[j] regprop= regprops_sel[j] polygon= polygons_sel[j] contour= contours_sel[j] dist= np.sqrt( (peak[1]-x_c)**2 + (peak[0]-y_c)**2 ) if dist<dist_best: dist_best= dist peak_final= peak bmap_final= bmap regprop_final= regprop polygon_final= polygon contour_final= contour index_best= j logger.info("Selected source no. %d as the closest one to image center ..." % (index_best)) else: logger.info("#%d sources selected..." % (nsources_sel)) # - Compute enclosing circle radius try: (xc, yc), radius= cv2.minEnclosingCircle(contour_final) enclosing_circle= (xc,yc,radius) except Exception as e: logger.warn("Failed to compute min enclosing circle (err=%s)!" % (str(e))) enclosing_circle= None # - Draw figure if self.draw: fig, ax = plt.subplots() # - Draw map #plt.imshow(label_map) #plt.imshow(data) plt.imshow(zmap) #plt.imshow(bmap_final) plt.colorbar() # - Draw bbox rectangle bbox= regprop_final.bbox ymin= bbox[0] ymax= bbox[2] xmin= bbox[1] xmax= bbox[3] dx= xmax-xmin-1 dy= ymax-ymin-1 rect = patches.Rectangle((xmin,ymin), dx, dy, linewidth=1, edgecolor='r', facecolor='none') ax.add_patch(rect) # - Draw selected peak if peak_final is not None: plt.scatter(peak_final[1], peak_final[0], s=10) # - Draw contour polygon if polygon_final is not None: plt.plot(*polygon_final.exterior.xy) # - Draw enclosing circle circle = plt.Circle((xc, yc), radius, color='g', clip_on=False, fill=False) ax.add_patch(circle) plt.show() return (peak_final, bmap_final, regprop_final, enclosing_circle)
def main(): """Main function""" #=========================== #== PARSE ARGS #== (ALL PROCS) #=========================== if procId == MASTER: logger.info("[PROC %d] Parsing script args ..." % (procId)) try: args = get_args() except Exception as ex: logger.error("[PROC %d] Failed to get and parse options (err=%s)" % (procId, str(ex))) return 1 imgfile = args.img regionfile = args.region configfile = args.scutout_config surveys = [] if args.surveys != "": surveys = [str(x.strip()) for x in args.surveys.split(',')] surveys_radio = [] if args.surveys_radio != "": surveys_radio = [str(x.strip()) for x in args.surveys_radio.split(',')] if imgfile == "" and not surveys: logger.error( "[PROC %d] No image passed, surveys option cannot be empty!" % (procId)) return 1 filter_regions_by_tags = args.filter_regions_by_tags tags = [] if args.tags != "": tags = [str(x.strip()) for x in args.tags.split(',')] jobdir = os.getcwd() if args.jobdir != "": if not os.path.exists(args.jobdir): logger.error("[PROC %d] Given job dir %s does not exist!" % (procId, args.jobdir)) return 1 jobdir = args.jobdir # - Classifier options normalize_feat = args.normalize_feat scalerfile = args.scalerfile binary_class = args.binary_class modelfile = args.modelfile save_class_labels = args.save_class_labels # - Autoencoder options run_aereco = args.run_aereco nx = args.nx ny = args.ny modelfile_encoder = args.modelfile_encoder modelfile_decoder = args.modelfile_decoder weightfile_encoder = args.weightfile_encoder weightfile_decoder = args.weightfile_decoder aereco_thr = args.aereco_thr empty_filenames = ((modelfile_encoder == "" or modelfile_decoder == "") or (weightfile_encoder == "" or weightfile_decoder == "")) if run_aereco and empty_filenames: logger.error("[PROC %d] Empty AE model/weight filename given!" % (procId)) return 1 # - Outlier search options find_outliers = args.find_outliers modelfile_outlier = args.modelfile_outlier anomaly_thr = args.anomaly_thr max_features = args.max_features max_samples = "auto" if args.max_samples > 0: max_samples = args.max_samples save_outlier = args.save_outlier outfile_outlier = args.outfile_outlier # - Color index refch = args.refch shrink_mask = args.shrink_mask kernsizes_shrink = args.kernsizes_shrink grow_mask = args.grow_mask kernsizes_grow = args.kernsizes_grow seed_thr = args.seed_thr merge_thr = args.merge_thr # - Spectral index add_spectral_index = args.add_spectral_index img_group_1 = [] img_group_2 = [] img_freqs = [] if args.img_group_1 != "": img_group_1 = [int(x.strip()) for x in args.img_group_1.split(',')] if args.img_group_2 != "": img_group_2 = [int(x.strip()) for x in args.img_group_2.split(',')] if args.img_freqs != "": img_freqs = [float(x.strip()) for x in args.img_freqs.split(',')] alpha_rcoeff_thr = args.alpha_rcoeff_thr save_spectral_index = args.save_spectral_index if add_spectral_index: if not img_group_1 or not img_group_2: logger.error( "Group image indices for spectral index calculation not given in input or empty!" ) return 1 if len(img_group_1) != len(img_group_2): logger.error( "Given group image indices for spectral index calculation do not have the same length!" ) return 1 # - Quality data options negative_pix_fract_thr = args.negative_pix_fract_thr bad_pix_fract_thr = args.bad_pix_fract_thr #================================== #== RUN #================================== pipeline = Pipeline() pipeline.jobdir = jobdir pipeline.filter_regions_by_tags = filter_regions_by_tags pipeline.tags = tags pipeline.configfile = configfile pipeline.surveys = surveys pipeline.surveys_radio = surveys_radio pipeline.normalize_feat = normalize_feat pipeline.scalerfile = scalerfile pipeline.modelfile = modelfile pipeline.binary_class = binary_class pipeline.save_class_labels = save_class_labels pipeline.find_outliers = find_outliers pipeline.modelfile_outlier = modelfile_outlier pipeline.outlier_thr = anomaly_thr pipeline.max_features = max_features pipeline.max_samples = max_samples pipeline.save_outlier = save_outlier pipeline.outfile_outlier = outfile_outlier pipeline.run_aereco = run_aereco pipeline.modelfile_encoder = modelfile_encoder pipeline.modelfile_decoder = modelfile_decoder pipeline.weightfile_encoder = weightfile_encoder pipeline.weightfile_decoder = weightfile_decoder pipeline.resize_img = True pipeline.nx = nx pipeline.ny = ny pipeline.normalize_img = True pipeline.scale_img_to_abs_max = False pipeline.scale_img_to_max = False pipeline.log_transform_img = False pipeline.scale_img = False pipeline.scale_img_factors = [] pipeline.standardize_img = False pipeline.img_means = [] pipeline.img_sigmas = [] pipeline.img_chan_divide = False pipeline.img_chan_mins = [] pipeline.img_erode = False pipeline.img_erode_kernel = 9 pipeline.add_channorm_layer = False pipeline.winsize = 3 pipeline.refch = refch pipeline.shrink_mask = shrink_mask pipeline.kernsizes_shrink = kernsizes_shrink pipeline.grow_mask = grow_mask pipeline.kernsizes_grow = kernsizes_grow pipeline.seed_thr = seed_thr pipeline.merge_thr = merge_thr pipeline.add_spectral_index = add_spectral_index pipeline.alpha_img_freqs = img_freqs pipeline.alpha_img_group_1 = img_group_1 pipeline.alpha_img_group_2 = img_group_2 pipeline.alpha_rcoeff_thr = alpha_rcoeff_thr pipeline.save_spectral_index_data = save_spectral_index pipeline.negative_pix_fract_thr = negative_pix_fract_thr pipeline.bad_pix_fract_thr = bad_pix_fract_thr print("pipeline.alpha_img_freqs") print(pipeline.alpha_img_freqs) print("pipeline.alpha_img_group_1") print(pipeline.alpha_img_group_1) print("pipeline.alpha_img_group_2") print(pipeline.alpha_img_group_2) logger.info("[PROC %d] Running source classification pipeline ..." % (procId)) status = pipeline.run(imgfile, regionfile) if status < 0: logger.error("Source classification pipeline run failed (see logs)!") return 1 return 0
def main(): """Main function""" #=========================== #== PARSE ARGS #=========================== logger.info("Get script args ...") try: args = get_args() except Exception as ex: logger.error("Failed to get and parse options (err=%s)", str(ex)) return 1 # - Input filelist datalist = args.datalist # - Data process options nx = args.nx ny = args.ny augment = args.augment augment_scale_factor = args.augment_scale_factor scale = args.scale scale_factors = [] if args.scale_factors != "": scale_factors = [ float(x.strip()) for x in args.scale_factors.split(',') ] normalize = args.normalize scale_to_abs_max = args.scale_to_abs_max scale_to_max = args.scale_to_max log_transform = args.log_transform standardize = args.standardize img_means = [] img_sigmas = [] if args.img_means != "": img_means = [float(x.strip()) for x in args.img_means.split(',')] if args.img_sigmas != "": img_sigmas = [float(x.strip()) for x in args.img_sigmas.split(',')] chan_divide = args.chan_divide chan_mins = [] if args.chan_mins != "": chan_mins = [float(x.strip()) for x in args.chan_mins.split(',')] erode = args.erode erode_kernel = args.erode_kernel # - NN architecture use_vae = args.use_vae #modelfile= args.modelfile modelfile_encoder = args.modelfile_encoder modelfile_decoder = args.modelfile_decoder add_maxpooling_layer = args.add_maxpooling_layer add_batchnorm_layer = args.add_batchnorm_layer add_leakyrelu = args.add_leakyrelu add_dense_layer = args.add_dense_layer add_channorm_layer = args.add_channorm_layer nfilters_cnn = [int(x.strip()) for x in args.nfilters_cnn.split(',')] kernsizes_cnn = [int(x.strip()) for x in args.kernsizes_cnn.split(',')] strides_cnn = [int(x.strip()) for x in args.strides_cnn.split(',')] dense_layer_sizes = [ int(x.strip()) for x in args.dense_layer_sizes.split(',') ] dense_layer_activation = args.dense_layer_activation decoder_output_layer_activation = args.decoder_output_layer_activation print("nfilters_cnn") print(nfilters_cnn) print("kernsizes_cnn") print(kernsizes_cnn) print("strides_cnn") print(strides_cnn) print("dense_layer_sizes") print(dense_layer_sizes) # - Train options #weightfile= args.weightfile weightfile_encoder = args.weightfile_encoder weightfile_decoder = args.weightfile_decoder latentdim = args.latentdim optimizer = args.optimizer learning_rate = args.learning_rate batch_size = args.batch_size nepochs = args.nepochs mse_loss = args.mse_loss scale_chan_mse_loss = args.scale_chan_mse_loss kl_loss = args.kl_loss ssim_loss = args.ssim_loss mse_loss_weight = args.mse_loss_weight kl_loss_weight = args.kl_loss_weight ssim_loss_weight = args.ssim_loss_weight ssim_win_size = args.ssim_win_size weight_seed = args.weight_seed reproducible = args.reproducible validation_steps = args.validation_steps # - UMAP options run_umap = args.run_umap latentdim_umap = args.latentdim_umap mindist_umap = args.mindist_umap nneighbors_umap = args.nneighbors_umap outfile_umap_unsupervised = args.outfile_umap_unsupervised outfile_umap_supervised = args.outfile_umap_supervised outfile_umap_preclassified = args.outfile_umap_preclassified # - Clustering options run_clustering = args.run_clustering min_cluster_size = args.min_cluster_size min_samples = args.min_samples modelfile_clust = args.modelfile_clust predict_clust = args.predict_clust #=========================== #== READ DATALIST #=========================== # - Create data loader dl = DataLoader(filename=datalist) # - Read datalist logger.info("Reading datalist %s ..." % datalist) if dl.read_datalist() < 0: logger.error("Failed to read input datalist!") return 1 #=========================== #== TRAIN VAE #=========================== logger.info("Running VAE classifier training ...") vae_class = FeatExtractorAE(dl) vae_class.use_vae = use_vae #vae_class.modelfile= modelfile vae_class.modelfile_encoder = modelfile_encoder vae_class.modelfile_decoder = modelfile_decoder #vae_class.weightfile= weightfile vae_class.weightfile_encoder = weightfile_encoder vae_class.weightfile_decoder = weightfile_decoder vae_class.latent_dim = latentdim vae_class.set_image_size(nx, ny) vae_class.augmentation = augment vae_class.augment_scale_factor = augment_scale_factor vae_class.normalize = normalize vae_class.scale_to_abs_max = scale_to_abs_max vae_class.scale_to_max = scale_to_max vae_class.log_transform_img = log_transform vae_class.scale_img = scale vae_class.scale_img_factors = scale_factors vae_class.standardize_img = standardize vae_class.img_means = img_means vae_class.img_sigmas = img_sigmas vae_class.chan_divide = chan_divide vae_class.chan_mins = chan_mins vae_class.erode = erode vae_class.erode_kernel = erode_kernel vae_class.batch_size = batch_size vae_class.nepochs = nepochs vae_class.validation_steps = validation_steps vae_class.set_optimizer(optimizer, learning_rate) if reproducible: vae_class.set_reproducible_model() vae_class.add_max_pooling = add_maxpooling_layer vae_class.add_batchnorm = add_batchnorm_layer vae_class.add_leakyrelu = add_leakyrelu vae_class.add_dense = add_dense_layer vae_class.add_channorm_layer = add_channorm_layer vae_class.nfilters_cnn = nfilters_cnn vae_class.kernsizes_cnn = kernsizes_cnn vae_class.strides_cnn = strides_cnn vae_class.dense_layer_sizes = dense_layer_sizes vae_class.dense_layer_activation = dense_layer_activation vae_class.use_mse_loss = mse_loss vae_class.scale_chan_mse_loss = scale_chan_mse_loss vae_class.use_kl_loss = kl_loss vae_class.use_ssim_loss = ssim_loss vae_class.mse_loss_weight = mse_loss_weight vae_class.kl_loss_weight = kl_loss_weight vae_class.ssim_loss_weight = ssim_loss_weight vae_class.ssim_win_size = ssim_win_size vae_class.weight_seed = weight_seed if vae_class.train_model() < 0: logger.error("VAE training failed!") return 1 #=========================== #== TRAIN UMAP #=========================== if run_umap: # - Retrieve VAE encoded data logger.info("Retrieve latent data from VAE ...") snames = vae_class.source_names classids = vae_class.source_ids vae_data = vae_class.encoded_data # - Run UMAP logger.info("Running UMAP classifier training on VAE latent data ...") umap_class = FeatExtractorUMAP() umap_class.set_encoded_data_unsupervised_outfile( outfile_umap_unsupervised) umap_class.set_encoded_data_supervised_outfile(outfile_umap_supervised) umap_class.set_encoded_data_preclassified_outfile( outfile_umap_preclassified) umap_class.set_encoded_data_dim(latentdim_umap) umap_class.set_min_dist(mindist_umap) umap_class.set_n_neighbors(nneighbors_umap) if umap_class.run_train(vae_data, class_ids=classids, snames=snames) < 0: logger.error("UMAP training failed!") return 1 #============================== #== RUN CLUSTERING #============================== if run_clustering: # - Retrieve VAE encoded data logger.info("Retrieve latent data from VAE ...") snames = vae_class.source_names classids = vae_class.source_ids vae_data = vae_class.encoded_data # - Run HDBSCAN clustering logger.info( "Running HDBSCAN classifier prediction on autoencoder latent data ..." ) clust_class = Clusterer() clust_class.min_cluster_size = min_cluster_size clust_class.min_samples = min_samples status = 0 if predict_clust: if clust_class.run_predict(vae_data, class_ids=classids, snames=snames, modelfile=modelfile_clust) < 0: logger.error("Clustering predict failed!") return 1 else: if clust_class.run_clustering(vae_data, class_ids=classids, snames=snames, modelfile=modelfile_clust) < 0: logger.error("Clustering run failed!") return 1 return 0