def __compute_spectral_index(self, img_group_1, img_group_2):
        """ Compute spectral index alpha """

        # - Check first if frequency data are available
        print("self.img_freqs")
        print(self.img_freqs)
        print("len(self.img_freqs)")
        print(len(self.img_freqs))
        print("len(self.img_data)")
        print(len(self.img_data))
        print("img_group_1")
        print(img_group_1)
        print("img_group_2")
        print(img_group_2)

        freqs = []
        if self.img_freqs and len(self.img_freqs) == len(self.img_data):
            freqs = self.img_freqs
        else:
            if self.img_freqs_head and len(self.img_freqs_head) == len(
                    self.img_data):
                freqs = self.img_freqs_head
            else:
                logger.error("No frequency data given (user/header)!")
                return -1

        # - Check group indexes
        if len(img_group_1) != len(img_group_2):
            logger.error("Group indexes do not have the same length!")
            return -1

        # - Check group indices are within available channels
        for i in range(len(img_group_1)):
            index = img_group_1[i]
            if index < 0 or index >= self.nchannels:
                logger.error(
                    "Invalid index (%d) in group 1, must be in range [0,%d]!" %
                    (index, self.nchannels - 1))
                return -1

        for i in range(len(img_group_2)):
            index = img_group_2[i]
            if index < 0 or index >= self.nchannels:
                logger.error(
                    "Invalid index (%d) in group 2, must be in range [0,%d]!" %
                    (index, self.nchannels - 1))
                return -1

        # - Loop over img combinations and compute spectral indices
        logger.info("Computing spectral index (#%d combinations) ..." %
                    (len(img_group_1)))
        alphas = []
        rcoeffs = []

        smask = self.img_data_mask[self.refch]

        for i in range(len(img_group_1)):
            index_1 = img_group_1[i]
            index_2 = img_group_2[i]
            data_1 = self.img_data[index_1]
            data_2 = self.img_data[index_2]

            # - Find frequency from header
            nu1 = freqs[index_1]
            nu2 = freqs[index_2]
            #alpha12, alpha21= compute_alpha(data_1, data_2, nu1, nu2, smask, draw_plots)
            #alpha= 0.5*(alpha12+alpha21)
            outtuple = self.__compute_alpha(data_1, data_2, nu1, nu2, smask)
            if outtuple is None:
                logger.warn(
                    "alpha calculation failed for map combination %d-%d, skip to next ..."
                    % (index_1, index_2))
                continue

            alpha = outtuple[0]
            r = outtuple[1]
            alphas.append(alpha)
            rcoeffs.append(r)

        logger.info("Computing average spectral index ...")
        print(alphas)

        alphas = np.array(alphas)
        alphas_safe = alphas[np.isfinite(alphas)]
        alphas = alphas_safe
        if alphas.size == 0:
            logger.warn(
                "No alpha measurement left (all nans), will set alpha values to -999 ..."
            )
            alpha_mean = -999
            alpha_median = -999
            alpha_min = -999
            alpha_max = -999
        else:
            alpha_mean = np.mean(alphas)
            alpha_median = np.median(alphas)
            alpha_min = np.min(alphas)
            alpha_max = np.max(alphas)

        rcoeffs = np.array(rcoeffs)
        rcoeffs_safe = rcoeffs[np.isfinite(rcoeffs)]
        rcoeffs = rcoeffs_safe
        if rcoeffs.size == 0:
            logger.warn(
                "No rcoeffs measurement left (all nans), will set alpha values to -999 ..."
            )
            rcoeff_mean = -999
            rcoeff_median = -999
            rcoeff_min = -999
            rcoeff_max = -999
        else:
            rcoeff_mean = np.mean(rcoeffs)
            rcoeff_median = np.median(rcoeffs)
            rcoeff_min = np.min(rcoeffs)
            rcoeff_max = np.max(rcoeffs)

        # - Set spectral index
        self.alpha = alpha_mean
        self.rcoeff = rcoeff_mean
        if self.alpha != -999 and self.rcoeff >= self.rcoeff_thr:
            self.has_good_alpha = True
        else:
            self.has_good_alpha = False

        return 0
示例#2
0
def main():
	"""Main function"""

	
	#===========================
	#==   PARSE ARGS
	#===========================
	logger.info("Get script args ...")
	try:
		args= get_args()
	except Exception as ex:
		logger.error("Failed to get and parse options (err=%s)",str(ex))
		return 1

	# - Input filelist
	datalist= args.datalist

	# - Data process options	
	nx= args.nx
	ny= args.ny

	normalize= args.normalize
	scale_to_abs_max= args.scale_to_abs_max
	scale_to_max= args.scale_to_max
	log_transform= args.log_transform
	scale= args.scale
	scale_factors= []
	if args.scale_factors!="":
		scale_factors= [float(x.strip()) for x in args.scale_factors.split(',')]
	standardize= args.standardize
	img_means= []
	img_sigmas= []
	if args.img_means!="":
		img_means= [float(x.strip()) for x in args.img_means.split(',')]
	if args.img_sigmas!="":
		img_sigmas= [float(x.strip()) for x in args.img_sigmas.split(',')]

	chan_divide= args.chan_divide
	chan_mins= []
	if args.chan_mins!="":
		chan_mins= [float(x.strip()) for x in args.chan_mins.split(',')]
	erode= args.erode	
	erode_kernel= args.erode_kernel
	
	# - Autoencoder options
	modelfile_encoder= args.modelfile_encoder
	weightfile_encoder= args.weightfile_encoder
	#add_channorm_layer= args.add_channorm_layer

	
	# - UMAP options
	run_umap= args.run_umap
	modelfile_umap= args.modelfile_umap
	outfile_umap_unsupervised= args.outfile_umap_unsupervised
		
	# - Clustering options
	run_clustering= args.run_clustering
	min_cluster_size= args.min_cluster_size
	min_samples= args.min_samples	
	modelfile_clust= args.modelfile_clust
	predict_clust= args.predict_clust

	#===========================
	#==   READ DATALIST
	#===========================
	# - Create data loader
	dl= DataLoader(filename=datalist)

	# - Read datalist	
	logger.info("Reading datalist %s ..." % datalist)
	if dl.read_datalist()<0:
		logger.error("Failed to read input datalist!")
		return 1
	
	#===============================
	#==   RUN AUTOENCODER PREDICT
	#===============================
	logger.info("Running autoencoder classifier predict ...")
	vae_class= FeatExtractorAE(dl)
	vae_class.set_image_size(nx, ny)
	vae_class.normalize= normalize
	vae_class.scale_to_abs_max= scale_to_abs_max
	vae_class.scale_to_max= scale_to_max
	vae_class.log_transform_img= log_transform
	vae_class.scale_img= scale
	vae_class.scale_img_factors= scale_factors
	vae_class.standardize_img= standardize
	vae_class.img_means= img_means
	vae_class.img_sigmas= img_sigmas
	vae_class.chan_divide= chan_divide
	vae_class.chan_mins= chan_mins
	vae_class.erode= erode
	vae_class.erode_kernel= erode_kernel
	#vae_class.add_channorm_layer= add_channorm_layer
	
	if vae_class.predict_model(modelfile_encoder, weightfile_encoder)<0:
		logger.error("VAE predict failed!")
		return 1

	#===========================
	#==   RUN UMAP PREDICT
	#===========================
	if run_umap:
		# - Retrieve VAE encoded data
		logger.info("Retrieve latent data from autoencoder ...")
		snames= vae_class.source_names
		classids= vae_class.source_ids
		vae_data= vae_class.encoded_data

		# - Run UMAP
		logger.info("Running UMAP classifier prediction on autoencoder latent data ...")
		umap_class= FeatExtractorUMAP()
		umap_class.set_encoded_data_unsupervised_outfile(outfile_umap_unsupervised)
		
		if umap_class.run_predict(vae_data, class_ids=classids, snames=snames, modelfile=modelfile_umap)<0:
			logger.error("UMAP prediction failed!")
			return 1

	#==============================
	#==   RUN CLUSTERING
	#==============================
	if run_clustering:
		# - Retrieve VAE encoded data
		logger.info("Retrieve latent data from VAE ...")
		snames= vae_class.source_names
		classids= vae_class.source_ids
		vae_data= vae_class.encoded_data

		# - Run HDBSCAN clustering
		logger.info("Running HDBSCAN classifier prediction on autoencoder latent data ...")
		clust_class= Clusterer()
		clust_class.min_cluster_size= min_cluster_size
		clust_class.min_samples= min_samples
	
		status= 0
		if predict_clust:
			if clust_class.run_predict(vae_data, class_ids=classids, snames=snames, modelfile=modelfile_clust)<0:
				logger.error("Clustering predict failed!")
				return 1
		else:
			if clust_class.run_clustering(vae_data, class_ids=classids, snames=snames, modelfile=modelfile_clust)<0:
				logger.error("Clustering run failed!")
				return 1

	return 0
def main():
	"""Main function"""

	
	#===========================
	#==   PARSE ARGS
	#===========================
	logger.info("Get script args ...")
	try:
		args= get_args()
	except Exception as ex:
		logger.error("Failed to get and parse options (err=%s)",str(ex))
		return 1

	# - Input filelist
	inputfile= args.inputfile

	# - Data pre-processing
	normalize= args.normalize
	reduce_dim= args.reduce_dim
	reduce_dim_method= args.reduce_dim_method
	pca_ncomps= args.pca_ncomps
	pca_varthr= args.pca_varthr
	
	# - Clustering options
	min_cluster_size= args.min_cluster_size
	min_samples= args.min_samples	
	modelfile_clust= args.modelfile_clust
	predict_clust= args.predict_clust

	#===========================
	#==   READ FEATURE DATA
	#===========================
	ret= Utils.read_feature_data(inputfile)
	if not ret:
		logger.error("Failed to read data from file %s!" % (inputfile))
		return 1

	data= ret[0]
	snames= ret[1]
	classids= ret[2]

	#==============================
	#==   RUN CLUSTERING
	#==============================
	logger.info("Running HDBSCAN classifier prediction on input feature data ...")
	clust_class= Clusterer()
	clust_class.min_cluster_size= min_cluster_size
	clust_class.min_samples= min_samples
	clust_class.normalize= normalize
	clust_class.reduce_dim= reduce_dim
	clust_class.reduce_dim_method= reduce_dim_method
	clust_class.pca_ncomps= pca_ncomps
	clust_class.pca_varthr= pca_varthr

	status= 0
	if predict_clust:
		if clust_class.run_predict(data, class_ids=classids, snames=snames, modelfile=modelfile_clust)<0:
			logger.error("Clustering predict failed!")
			return 1
	else:
		if clust_class.run_clustering(data, class_ids=classids, snames=snames, modelfile=modelfile_clust)<0:
			logger.error("Clustering run failed!")
			return 1

	return 0
    def __compute_alpha(self, data_1, data_2, nu1, nu2, smask):
        """ Compute alpha """

        # - Get array of pixels !=0 & finite in both maps
        cond_img1 = np.logical_and(data_1 != 0, np.isfinite(data_1))
        cond_img2 = np.logical_and(data_2 != 0, np.isfinite(data_2))
        cond_img12 = np.logical_and(cond_img1, cond_img2)
        cond_final = np.logical_and(cond_img12, smask == 1)

        indexes = np.where(cond_final)
        img_1d_1 = data_1[indexes]
        img_1d_2 = data_2[indexes]

        logger.info("#%d pixels in image 1 ..." % (len(img_1d_1)))
        logger.info("#%d pixels in image 2 ..." % (len(img_1d_2)))

        if len(img_1d_1) <= 0 or len(img_1d_2) < 0:
            logger.warn(
                "No pixels left for T-T analysis after applying conditions (finite+mask) (hint: check if source is outside one or more channels)"
            )
            return None

        # - Perform fit 1-2
        logger.info("Compute spectral index from T-T fit  ...")
        res_12 = linregress(img_1d_1, img_1d_2)
        slope_12 = res_12.slope
        intercept_12 = res_12.intercept
        alpha_12 = self.__slope2alpha(slope_12, nu1, nu2)
        r_12 = res_12.rvalue

        print("== FIT RES 1-2 ==")
        print(res_12)
        print("alpha_12=%f" % (alpha_12))

        # - Perform fit 2-1
        res_21 = linregress(img_1d_2, img_1d_1)
        slope_21 = res_21.slope
        intercept_21 = res_21.intercept
        alpha_21 = self.__slope2alpha(slope_21, nu2, nu1)
        r_21 = res_21.rvalue

        print("== FIT RES 2-1 ==")
        print(res_21)
        print("alpha_21=%f" % (alpha_21))

        # - Reject fits if any of them is nan
        goodvalues_12 = np.isfinite(slope_12) and slope_12 > 0
        goodvalues_21 = np.isfinite(slope_21) and slope_21 > 0

        # - Add some goodness of fit criteria
        obs_12 = img_1d_2
        pred_12 = slope_12 * img_1d_1 + intercept_12
        residuals_12 = obs_12 - pred_12
        residuals_mean_12 = np.mean(residuals_12)
        residuals_std_12 = np.std(residuals_12)
        residuals_min_12 = np.min(residuals_12)
        residuals_max_12 = np.max(residuals_12)

        obs_21 = img_1d_1
        pred_21 = slope_21 * img_1d_2 + intercept_21
        residuals_21 = obs_21 - pred_21
        residuals_mean_21 = np.mean(residuals_21)
        residuals_std_21 = np.std(residuals_21)
        residuals_min_21 = np.min(residuals_21)
        residuals_max_21 = np.max(residuals_21)

        # - Set return tuple
        outtuple = ()
        if goodvalues_12 and not goodvalues_21:
            outtuple = (alpha_12, r_12, residuals_mean_12, residuals_std_12,
                        residuals_min_12, residuals_max_12)
        elif goodvalues_21 and not goodvalues_12:
            outtuple = (alpha_21, r_21, residuals_mean_21, residuals_std_21,
                        residuals_min_21, residuals_max_21)
        else:
            # - Select best model
            best_resbias_id = 1
            best_resstd_id = 1
            best_rcoeff_id = 1
            if np.abs(residuals_mean_21) < np.abs(
                    residuals_mean_12):  # check smallest residual bias
                best_resbias_id = 2
            if np.abs(residuals_std_21) < np.abs(
                    residuals_std_12):  # check smallest residual std dev
                best_resstd_id = 2
            if np.abs(r_21) > np.abs(
                    r_12):  # check larger (closer to 1) correlation coeff
                best_rcoeff_id = 2

            if best_rcoeff_id == 1:
                outtuple = (alpha_12, r_12, residuals_mean_12,
                            residuals_std_12, residuals_min_12,
                            residuals_max_12)
            else:
                outtuple = (alpha_21, r_21, residuals_mean_21,
                            residuals_std_21, residuals_min_21,
                            residuals_max_21)

        return outtuple
    def __evaluate_model(self):
        """ Evaluate model """

        # - Create pipeline and models
        logger.info("Creating pipeline and model ...")
        if self.__create_pipeline() < 0:
            logger.error("Failed to create pipeline and model!")
            return -1

        # - Evaluate models
        logger.info("Evaluating models as a function of #features ...")
        #results, nfeats = list(), list()
        results = list()
        rfe_best = None
        score_best = -1
        nfeat_best = -1
        rfe_best_index = -1
        scores_stats = []

        #for i in range(1,self.nfeatures):
        for i in range(len(self.nfeats)):
            n = self.nfeats[i]
            p = self.pipelines[i]
            scores = cross_val_score(p,
                                     self.data_preclassified,
                                     self.data_preclassified_targets,
                                     scoring=self.scoring,
                                     cv=self.cv,
                                     n_jobs=self.ncores,
                                     error_score='raise')
            scores_mean = np.mean(scores)
            scores_std = np.std(scores)
            scores_min = np.min(scores)
            scores_max = np.max(scores)
            scores_median = np.median(scores)
            scores_q1 = np.percentile(scores, 25)
            scores_q3 = np.percentile(scores, 75)
            scores_stats.append([
                n, scores_mean, scores_std, scores_min, scores_max,
                scores_median, scores_q1, scores_q3
            ])

            results.append(scores)
            #nfeats.append(i)

            if scores_mean > score_best:
                score_best = scores_mean
                nfeat_best = n
                rfe_best_index = i
            logger.info('--> nfeats=%d: score=%.3f (std=%.3f)' %
                        (n, scores_mean, scores_std))

        # - Save scores stats
        logger.info("Saving score stats ...")
        scores_head = "# n mean std min max median q1 q3"
        scores_stats = np.array(scores_stats).reshape(len(self.nfeats), 8)
        Utils.write_ascii(scores_stats, self.outfile_scorestats, scores_head)

        # - Evaluate automatically-selected model?
        rfe_best = None

        if self.auto_selection:
            logger.info("Evaluate model (automated feature selection) ...")
            scores = cross_val_score(self.pipeline,
                                     self.data_preclassified,
                                     self.data_preclassified_targets,
                                     scoring=self.scoring,
                                     cv=self.cv,
                                     n_jobs=self.ncores,
                                     error_score='raise')

            best_scores_mean = np.mean(scores)
            best_scores_std = np.std(scores)
            logger.info(
                'Selecting best scores automatically: %.3f (std=%.3f)' %
                (best_scores_mean, best_scores_std))

            rfe_best = self.rfe

        else:
            logger.info(
                "Selecting best model after scan: index=%d, n_feat=%d, score=%.3f"
                % (rfe_best_index, nfeat_best, score_best))

            rfe_best = RFE(
                estimator=self.models[rfe_best_index],
                #cv=self.cv,
                n_features_to_select=nfeat_best)

        # - Fit data and show which features were selected
        logger.info("Fitting RFE model on dataset ...")
        rfe_best.fit(self.data_preclassified, self.data_preclassified_targets)

        selfeats = rfe_best.support_
        featranks = rfe_best.ranking_
        nfeat_sel = rfe_best.n_features_
        self.selfeatids = []
        for i in range(self.data_preclassified.shape[1]):
            logger.info('Feature %d: selected? %d (rank=%.3f)' %
                        (i, selfeats[i], featranks[i]))
            if selfeats[i]:
                self.selfeatids.append(i)

        self.selfeatids.sort()

        # - Extract selected data columns
        logger.info(
            "Extracting selected data columns (N=%d) from original data ..." %
            (nfeat_sel))
        self.data_sel = self.data[:, selfeats]
        self.data_preclassified_sel = self.data_preclassified[:, selfeats]

        # - Plot results
        logger.info("Plotting and saving feature score results ...")
        plt.boxplot(results, labels=self.nfeats, showmeans=True)
        #plt.show()
        plt.savefig(self.outfile_scores)

        return 0
    def set_data(self, featdata, class_ids=[], snames=[]):
        """ Set data from input array. Optionally give labels & obj names """

        # - Set data vector
        self.data_labels = []
        self.data_classids = []
        self.data_targets = []
        self.source_names = []

        # - Set feature data
        self.data = featdata
        data_shape = self.data.shape

        if self.data.size == 0:
            logger.error("Empty feature data vector given!")
            return -1

        self.nsamples = data_shape[0]
        self.nfeatures = data_shape[1]

        # - Set class ids & labels
        if class_ids:
            nids = len(class_ids)
            if nids != self.nsamples:
                logger.error(
                    "Given class ids have size (%d) different than feature data (%d)!"
                    % (nids, self.nsamples))
                return -1
            self.data_classids = class_ids

            for classid in self.data_classids:
                label = self.classid_label_map[classid]
                self.data_labels.append(label)

        else:
            self.data_classids = [0] * self.nsamples  # Init to unknown type
            self.data_labels = ["UNKNOWN"] * self.nsamples

        # - Set target ids
        for j in range(len(self.data_classids)):
            obj_id = self.data_classids[j]
            targetid = self.classid_remap[obj_id]  # remap obj id in class id
            self.data_targets.append(targetid)

        # - Set obj names
        if snames:
            n = len(snames)
            if n != self.nsamples:
                logger.error(
                    "Given source names have size (%d) different than feature data (%d)!"
                    % (n, self.nsamples))
                return -1
            self.source_names = snames
        else:
            self.source_names = ["XXX"] * self.nsamples  # Init to unclassified

        logger.info("#nsamples=%d, #nfeatures=%d" %
                    (self.nsamples, self.nfeatures))

        # - Normalize feature data?
        if self.normalize:
            logger.info("Normalizing feature data ...")
            data_norm = self.__normalize_data(self.data, self.norm_min,
                                              self.norm_max)
            self.data = data_norm

        # - Set pre-classified data
        logger.info("Setting pre-classified data (if any) ...")
        self.__set_preclass_data()

        return 0
示例#7
0
def main():
    """Main function"""

    #===========================
    #==   PARSE ARGS
    #===========================
    logger.info("Get script args ...")
    try:
        args = get_args()
    except Exception as ex:
        logger.error("Failed to get and parse options (err=%s)", str(ex))
        return 1

    # - Input filelist
    inputfile = args.inputfile

    # - Data pre-processing
    normalize = args.normalize

    # - Model options
    classifier = args.classifier
    scoring = args.scoring
    cv_nsplits = args.cv_nsplits
    nfeat_min = args.nfeat_min
    nfeat_max = args.nfeat_max
    autoselect = args.autoselect

    # - Run options
    colselect = args.colselect
    selcols = []
    if colselect:
        if args.selcols == "":
            logger.error(
                "No selected column ids given (mandatory when colselect option is chosen)!"
            )
            return 1
        selcols = [int(x.strip()) for x in args.selcols.split(',')]

    # - Output options
    outfile = args.outfile

    #===========================
    #==   READ FEATURE DATA
    #===========================
    ret = Utils.read_feature_data(inputfile)
    if not ret:
        logger.error("Failed to read data from file %s!" % (inputfile))
        return 1

    data = ret[0]
    snames = ret[1]
    classids = ret[2]

    #===========================
    #==   SELECT FEATURES
    #===========================
    logger.info("Running feature selector on input feature data ...")
    fsel = FeatSelector()
    fsel.normalize = normalize
    fsel.classifier = classifier
    fsel.scoring = scoring
    fsel.outfile = outfile
    fsel.nfeat_min = nfeat_min
    fsel.nfeat_max = nfeat_max
    fsel.auto_selection = autoselect

    if colselect:
        status = fsel.select(data, selcols, classids, snames)
    else:
        status = fsel.run(data, classids, snames)

    if status < 0:
        logger.error("Feature selector failed!")
        return 1

    return 0
示例#8
0
    def make_cutout(self, coord, radius, sname, region_sky):
        """ Run source cutout maker """

        #===========================
        #==   SET SOURCE PARS
        #===========================
        # - Check and set source cutout pars
        if len(coord) != 2:
            logger.error("Empty source position given!")
            return -1

        if radius <= 0:
            logger.error("Radius must be >0")
            return -1

        if sname == "":
            logger.error("Source name must not be empty string!")
            return -1

        if region_sky is None:
            logger.error("None region given!")
            return -1

        if self.nsurveys <= 0:
            logger.error("No surveys present in config!")
            return -1

        self.ra = coord[0]
        self.dec = coord[1]
        self.radius = radius
        self.sname = sname
        self.region_sky = region_sky

        # - Update job dir in config
        #self.jobdir= os.path.join(self.topdir, sname)

        if not os.path.exists(self.datadir):
            logger.info("Creating cutout data dir %s ..." % (self.datadir))
            Utils.mkdir(self.datadir, delete_if_exists=False)

        if not os.path.exists(self.datadir_mask):
            logger.info("Creating cutout masked data dir %s ..." %
                        (self.datadir_mask))
            Utils.mkdir(self.datadir_mask, delete_if_exists=False)

        self.config.workdir = self.datadir

        #===========================
        #==   RUN CUTOUT SEARCH
        #===========================
        logger.info("Run cutout search for source %s ..." % (self.sname))
        try:
            ch = CutoutHelper(self.config, self.ra, self.dec, self.sname,
                              self.radius)
            if ch.run() < 0:
                errmsg = 'Failed to extract cutout for source ' + self.sname + '!'
                logger.warn(errmsg)
                return -1

        except Exception as e:
            logger.error(
                'Exception (%s) occurred when extracting cutout for source %s!'
                % (str(e), self.sname))
            return -1

        #===========================
        #==   MASKED CUTOUT DATA
        #===========================
        logger.info("Computing masked cutouts for source %s ..." %
                    (self.sname))
        if self.make_masked_cutouts(self.region_sky, self.dilatemask,
                                    self.kernsize, self.maskval) < 0:
            logger.error("Failed to create masked cutouts for source %s!" %
                         (self.sname))
            return -1

        return 0
示例#9
0
	def run(self, data, class_ids=[], snames=[], modelfile='', scalerfile=''):
		""" Find outliers in input data """

		#================================
		#==   LOAD DATA SCALER
		#================================
		# - Load scaler from file?
		if scalerfile!="":
			logger.info("Loading data scaler from file %s ..." % (scalerfile))
			try:
				self.data_scaler= pickle.load(open(scalerfile, 'rb'))
			except Exception as e:
				logger.error("Failed to load data scaler from file %s!" % (scalerfile))
				return -1

		#================================
		#==   LOAD DATA
		#================================
		# - Check inputs
		if data is None:
			logger.error("None input data specified!")
			return -1

		if self.set_data(data, class_ids, snames)<0:
			logger.error("Failed to set data!")
			return -1

		#================================
		#==   LOAD MODEL
		#================================
		if modelfile and modelfile is not None:
			fitdata= False
			logger.info("Loading the model from file %s ..." % modelfile)
			try:
				self.model = pickle.load((open(modelfile, 'rb')))
			except Exception as e:
				logger.error("Failed to load model from file %s!" % (modelfile))
				return -1

		else:
			logger.info("Creating the model ...")
			fitdata= True
			self.model= self.__create_model()

		#================================
		#==   FIND OUTLIERS
		#================================	
		logger.info("Searching for outliers ...")
		if self.__find_outliers(fitdata)<0:
			logger.error("Failed to search outliers!")
			return -1
		
		#================================
		#==   SAVE
		#================================
		if self.save_to_file:
			logger.info("Saving results ...")
			if self.__save()<0:
				logger.error("Failed to save outlier search results!")
				return -1

		return 0
示例#10
0
    def __train(self):
        """ Build and train/test reducer """

        # - Check if data are set
        if self.data is None:
            logger.error("Input data array is None!")
            return -1

        # - Check if reducer is set
        if self.reducer is None:
            logger.error("UMAP reducer is not set!")
            return -1

        #==========================================================
        #==   FIT PRE-CLASSIFIED DATA (IF AVAILABLE) SUPERVISED
        #==========================================================
        if self.use_preclassified_data and len(
                self.data_preclassified) >= self.preclassified_data_minsize:
            logger.info(
                "Fitting input pre-classified data in a supervised way ...")
            self.learned_transf = self.reducer.fit(
                self.data_preclassified, self.data_preclassified_classids)
            self.encoded_data_preclassified = self.learned_transf.transform(
                self.data_preclassified)

        #================================
        #==   FIT DATA UNSUPERVISED
        #================================
        logger.info("Fitting input data in a completely unsupervised way ...")
        self.encoded_data_unsupervised = self.reducer.fit_transform(self.data)

        # - Save model to file
        if self.dump_model:
            logger.info("Dumping model to file %s ..." % self.outfile_model)
            pickle.dump(self.reducer, open(self.outfile_model, 'wb'))

        #====================================================
        #==   ENCODE DATA USING LEARNED TRANSFORM (IF DONE)
        #====================================================
        if self.learned_transf is not None:
            logger.info(
                "Encode input data using learned transform on pre-classified data ..."
            )
            self.encoded_data_supervised = self.learned_transf.transform(
                self.data)

        #================================
        #==   SAVE ENCODED DATA
        #================================
        # - Unsupervised encoded data
        logger.info("Saving unsupervised encoded data to file ...")
        N = self.encoded_data_unsupervised.shape[0]
        print("Unsupervised encoded data shape=",
              self.encoded_data_unsupervised.shape)
        print("Unsupervised encoded data N=", N)

        snames = np.array(self.source_names).reshape(N, 1)
        objids = np.array(self.data_classids).reshape(N, 1)

        # - Save unsupervised encoded data
        enc_data = np.concatenate(
            (snames, self.encoded_data_unsupervised, objids), axis=1)

        znames_counter = list(range(1, self.encoded_data_dim + 1))
        znames = '{}{}'.format('z',
                               ' z'.join(str(item) for item in znames_counter))
        head = '{} {} {}'.format("# sname", znames, "id")

        Utils.write_ascii(enc_data, self.outfile_encoded_data_unsupervised,
                          head)

        # - Supervised encoded data
        if self.encoded_data_supervised is not None:
            logger.info("Saving supervised encoded data to file ...")
            N = self.encoded_data_supervised.shape[0]
            print("Supervised encoded data shape=",
                  self.encoded_data_supervised.shape)
            print("Supervised encoded data N=", N)

            enc_data = np.concatenate(
                (snames, self.encoded_data_supervised, objids), axis=1)

            Utils.write_ascii(enc_data, self.outfile_encoded_data_supervised,
                              head)

        # - Pre-classified data
        if self.encoded_data_preclassified is not None:
            logger.info("Saving pre-classified encoded data to file ...")
            N = self.encoded_data_preclassified.shape[0]
            print("Pre-classified encoded data shape=",
                  self.encoded_data_preclassified.shape)
            print("Pre-classified encoded data N=", N)

            snames_preclass = np.array(
                self.source_names_preclassified).reshape(N, 1)
            objids_preclass = np.array(
                self.data_preclassified_classids).reshape(N, 1)

            enc_data = np.concatenate(
                (snames_preclass, self.encoded_data_preclassified,
                 objids_preclass),
                axis=1)

            Utils.write_ascii(enc_data,
                              self.outfile_encoded_data_preclassified, head)

        return 0
示例#11
0
    def __read_and_merge_data(self,
                              inputfiles,
                              selcolids=[],
                              allow_novars=False):
        """ Read and merge feature data """

        # - Check selcolids has format [[selcol_1],[selcol_2]]
        if selcolids:
            if len(selcolids) != len(inputfiles):
                logger.error(
                    "Given selcolid length (%d) must be equal to inputfile list length (%d)!"
                    % (len(selcolids), len(inputfiles)))
                return -1

        # - Read features
        dlist = []
        nvars_tot = 0

        for i in range(len(inputfiles)):
            inputfile = inputfiles[i]
            colprefix = "featset" + str(i + 1) + "_"

            if selcolids:
                selcols_i = selcolids[i]
                if selcols_i:
                    d = Utils.read_sel_feature_data_dict(inputfile,
                                                         selcols_i,
                                                         colprefix=colprefix)
                else:
                    logger.error("Empty selcols for file %s given!" %
                                 (inputfile))
                    return -1
            else:
                d = Utils.read_feature_data_dict(inputfile,
                                                 colprefix=colprefix,
                                                 allow_novars=allow_novars)
            if not d or d is None:
                logger.error("Failed to read data from file %s!" % (inputfile))
                return -1

            nentries = len(d.keys())
            firstitem = next(iter(d.items()))
            nvars = len(firstitem[1].keys()) - 2
            nvars_tot += nvars
            logger.info("Data file %s has #%d entries (#%d vars) ..." %
                        (inputfile, nentries, nvars))

            dlist.append(d)

        logger.info("Merged set is expected to have %d vars ..." % (nvars_tot))

        # - Merge features
        logger.info("Merging feature data for all input files ...")

        dmerged = collections.OrderedDict()

        for d in dlist:
            for key, value in d.items():
                if key not in dmerged:
                    dmerged[key] = collections.OrderedDict({})
                dmerged[key].update(value)
                dmerged[key].move_to_end("id")

        # - Remove rows with less number of entries
        logger.info("Removing rows with number of vars !=%d ..." % (nvars_tot))

        self.par_dict_list = []
        for key, value in dmerged.items():
            nvars = len(value.keys()) - 2
            if nvars != nvars_tot:
                logger.info(
                    "Removing entry (%s) as number of vars (%d) is !=%d ..." %
                    (key, nvars, nvars_tot))
                #del dmerged[key]
                continue
            self.par_dict_list.append(value)

        return 0
def main():
	"""Main function"""

	
	#===========================
	#==   PARSE ARGS
	#===========================
	logger.info("Get script args ...")
	try:
		args= get_args()
	except Exception as ex:
		logger.error("Failed to get and parse options (err=%s)",str(ex))
		return 1

	# - Input filelist
	datalist= args.datalist

	# - Data process options	
	nx= args.nx
	ny= args.ny

	normalize= args.normalize
	scale_to_abs_max= args.scale_to_abs_max
	scale_to_max= args.scale_to_max
	log_transform= args.log_transform
	scale= args.scale
	scale_factors= []
	if args.scale_factors!="":
		scale_factors= [float(x.strip()) for x in args.scale_factors.split(',')]
	standardize= args.standardize
	img_means= []
	img_sigmas= []
	if args.img_means!="":
		img_means= [float(x.strip()) for x in args.img_means.split(',')]
	if args.img_sigmas!="":
		img_sigmas= [float(x.strip()) for x in args.img_sigmas.split(',')]

	chan_divide= args.chan_divide
	chan_mins= []
	if args.chan_mins!="":
		chan_mins= [float(x.strip()) for x in args.chan_mins.split(',')]
	erode= args.erode	
	erode_kernel= args.erode_kernel

	# - Autoencoder options
	modelfile_encoder= args.modelfile_encoder
	modelfile_decoder= args.modelfile_decoder
	weightfile_encoder= args.weightfile_encoder
	weightfile_decoder= args.weightfile_decoder
	add_channorm_layer= args.add_channorm_layer

	# - Reco metrics & plot options
	winsize= args.winsize
	save_plots= args.save_plots

	#===========================
	#==   READ DATALIST
	#===========================
	# - Create data loader
	dl= DataLoader(filename=datalist)

	# - Read datalist	
	logger.info("Reading datalist %s ..." % datalist)
	if dl.read_datalist()<0:
		logger.error("Failed to read input datalist!")
		return 1


	#===============================
	#==   RUN AUTOENCODER RECO
	#===============================
	logger.info("Running autoencoder classifier reconstruction ...")
	vae_class= FeatExtractorAE(dl)
	vae_class.set_image_size(nx, ny)
	vae_class.normalize= normalize
	vae_class.scale_to_abs_max= scale_to_abs_max
	vae_class.scale_to_max= scale_to_max
	vae_class.log_transform_img= log_transform
	vae_class.scale_img= scale
	vae_class.scale_img_factors= scale_factors
	vae_class.standardize_img= standardize
	vae_class.img_means= img_means
	vae_class.img_sigmas= img_sigmas
	vae_class.chan_divide= chan_divide
	vae_class.chan_mins= chan_mins
	vae_class.erode= erode
	vae_class.erode_kernel= erode_kernel
	vae_class.add_channorm_layer= add_channorm_layer

	status= vae_class.reconstruct_data(
		modelfile_encoder, weightfile_encoder, 
		modelfile_decoder, weightfile_decoder,
		winsize= winsize,
		save_imgs= save_plots
	)

	if status<0:		
		logger.error("Autoencoder reconstruction failed!")
		return 1

	return 0
示例#13
0
	def __compute_pars(self, data, sname, classid):
		""" Compute source image quality pars """

		# - Init data dict
		param_dict= collections.OrderedDict()
		param_dict["sname"]= sname

		# - Find ref channel mask
		nchannels= data.shape[3]
		cond= np.logical_and(data[0,:,:,self.refch]!=0, np.isfinite(data[0,:,:,self.refch]))

		is_bad_data= False
		self.nvars_out= 0

		for i in range(nchannels):
			data_2d= data[0,:,:,i]
			data_1d= data_2d[cond] # pixel in ref band mask
			n= data_1d.size
			n_bad= np.count_nonzero(np.logical_or(~np.isfinite(data_1d), data_1d==0))
			n_neg= np.count_nonzero(data_1d<0)
			f_bad= float(n_bad)/float(n)
			f_negative= float(n_neg)/float(n)
			data_min= np.nanmin(data_1d)
			data_max= np.nanmax(data_1d)
			same_values= int(data_min==data_max)

			
			is_bad_ch_data= (
				f_negative>=self.negative_pix_fract_thr or
				f_bad>=self.bad_pix_fract_thr or
				same_values==1
			)
			if is_bad_ch_data:
				is_bad_data= True
	
			logger.info("Source %s (ch%d): min/max=%f/%f, n=%d, n_neg=%d, is_bad_ch_data? %d" % (sname, i+1, data_min, data_max, n, n_neg, int(is_bad_ch_data)))


			# - Fill dict
			par_name= "equalPixValues_ch" + str(i+1)
			param_dict[par_name]= same_values
			self.nvars_out+= 1

			par_name= "badPixFract_ch" + str(i+1)
			param_dict[par_name]= f_bad
			self.nvars_out+= 1

			par_name= "negativePixFract_ch" + str(i+1)		
			param_dict[par_name]= f_negative
			self.nvars_out+= 1
	
			par_name= "isBad_ch" + str(i+1)
			param_dict[par_name]= int(is_bad_ch_data)
			self.nvars_out+= 1

		param_dict["isBadData"]= int(is_bad_data)
		self.nvars_out+= 1

		param_dict["id"]= classid

		return param_dict
示例#14
0
def main():
	"""Main function"""

	
	#===========================
	#==   PARSE ARGS
	#===========================
	logger.info("Get script args ...")
	try:
		args= get_args()
	except Exception as ex:
		logger.error("Failed to get and parse options (err=%s)",str(ex))
		return 1

	# - Input filelist
	datalist= args.datalist
	datalist_cv= args.datalist_cv
	
	# - Data process options	
	nx= args.nx
	ny= args.ny
	augment= args.augment
	augment_scale_factor= args.augment_scale_factor
	scale= args.scale
	scale_factors= []
	if args.scale_factors!="":
		scale_factors= [float(x.strip()) for x in args.scale_factors.split(',')]

	normalize= args.normalize
	scale_to_abs_max= args.scale_to_abs_max
	scale_to_max= args.scale_to_max
	log_transform= args.log_transform
	standardize= args.standardize
	img_means= []
	img_sigmas= []
	if args.img_means!="":
		img_means= [float(x.strip()) for x in args.img_means.split(',')]
	if args.img_sigmas!="":
		img_sigmas= [float(x.strip()) for x in args.img_sigmas.split(',')]

	chan_divide= args.chan_divide
	chan_mins= []
	if args.chan_mins!="":
		chan_mins= [float(x.strip()) for x in args.chan_mins.split(',')]
	erode= args.erode	
	erode_kernel= args.erode_kernel

	# - NN architecture
	modelfile= args.modelfile
	add_maxpooling_layer= args.add_maxpooling_layer
	add_batchnorm_layer= args.add_batchnorm_layer
	add_leakyrelu= args.add_leakyrelu
	add_dense_layer= args.add_dense_layer	
	nfilters_cnn= [int(x.strip()) for x in args.nfilters_cnn.split(',')]
	kernsizes_cnn= [int(x.strip()) for x in args.kernsizes_cnn.split(',')]	
	strides_cnn= [int(x.strip()) for x in args.strides_cnn.split(',')]
	dense_layer_sizes= [int(x.strip()) for x in args.dense_layer_sizes.split(',')]
	dense_layer_activation= args.dense_layer_activation
	add_dropout_layer= args.add_dropout_layer
	dropout_rate= args.dropout_rate
	
	# - Train options
	predict= args.predict
	multiclass= True
	if args.binary_class:
		multiclass= False

	weightfile= args.weightfile
	optimizer= args.optimizer
	learning_rate= args.learning_rate
	batch_size= args.batch_size
	nepochs= args.nepochs
	weight_seed= args.weight_seed
	reproducible= args.reproducible
	validation_steps= args.validation_steps


	#===========================
	#==   READ DATALIST
	#===========================
	# - Create data loader
	dl= DataLoader(filename=datalist)

	# - Read datalist	
	logger.info("Reading datalist %s ..." % datalist)
	if dl.read_datalist()<0:
		logger.error("Failed to read input datalist!")
		return 1

	# - Create data loader for validation
	dl_cv= None
	if datalist_cv!="":
		logger.info("Reading datalist_cv %s ..." % datalist_cv)
		dl_cv= DataLoader(filename=datalist_cv)
		if dl_cv.read_datalist()<0:
			logger.error("Failed to read input datalist for validation!")
			return 1
	
	#===========================
	#==   TRAIN VAE
	#===========================
	logger.info("Running VAE classifier training ...")
	sclass= SClassifierNN(dl, multiclass=multiclass)
	sclass.modelfile= modelfile
	sclass.weightfile= weightfile
	sclass.set_image_size(nx, ny)
	sclass.augmentation= augment
	sclass.augment_scale_factor= augment_scale_factor
	sclass.normalize= normalize	
	sclass.scale_to_abs_max= scale_to_abs_max
	sclass.scale_to_max= scale_to_max
	sclass.log_transform_img= log_transform
	sclass.scale_img= scale
	sclass.scale_img_factors= scale_factors
	sclass.standardize_img= standardize
	sclass.img_means= img_means
	sclass.img_sigmas= img_sigmas
	sclass.chan_divide= chan_divide
	sclass.chan_mins= chan_mins
	sclass.erode= erode
	sclass.erode_kernel= erode_kernel

	sclass.batch_size= batch_size
	sclass.nepochs= nepochs
	sclass.validation_steps= validation_steps
	sclass.set_optimizer(optimizer, learning_rate)
	if reproducible:
		sclass.set_reproducible_model()
	
	sclass.add_max_pooling= add_maxpooling_layer
	sclass.add_batchnorm= add_batchnorm_layer
	sclass.add_leakyrelu= add_leakyrelu
	sclass.add_dense= add_dense_layer
	sclass.nfilters_cnn= nfilters_cnn
	sclass.kernsizes_cnn= kernsizes_cnn
	sclass.strides_cnn= strides_cnn
	sclass.dense_layer_sizes= dense_layer_sizes
	sclass.dense_layer_activation= dense_layer_activation
	sclass.add_dropout_layer= add_dropout_layer
	sclass.dropout_rate= dropout_rate
	sclass.weight_seed= weight_seed

	sclass.dl_cv= dl_cv

	if predict:
		status= sclass.run_predict(modelfile, weightfile)
	else:
		status= sclass.run_train()
	
	if status<0:
		logger.error("Classifier run failed!")
		return 1

	return 0
示例#15
0
	def fill_features(self):

		# - Save name
		self.param_dict["sname"]= self.sname

		# - Save source flux
		flux_ref= self.fluxes[self.refch]
		
		for j in range(len(self.fluxes)):
			flux= self.fluxes[j]
			parname= "flux_ch" + str(j+1)
			self.param_dict[parname]= flux

		# - Save source flux log ratios Fj/F_radio (i.e. colors)
		lgFluxRatio_safe= 0
		is_good_flux_ref= (flux_ref>0) and (np.isfinite(flux_ref))
		if not is_good_flux_ref:
			logger.warn("Flux for ref chan (%d) is <=0 or nan for image %s (id=%s),  will set all color index to %d..." % (self.refch, self.sname, self.label, lgFluxRatio_safe))

		for j in range(len(self.fluxes)):
			if j==self.refch:
				continue
			flux= self.fluxes[j] # if source is not detected this is the background level
			is_good_flux= (flux>0) and (np.isfinite(flux))
			
			lgFluxRatio= 0
			if is_good_flux_ref:
				if is_good_flux:
					lgFluxRatio= np.log10(flux/flux_ref)
				else:
					logger.warn("Flux for chan %d is <=0 or nan for image %s (id=%s),  will set this color index to %d..." % (self.refch, self.sname, self.label, lgFluxRatio_safe))
					lgFluxRatio= lgFluxRatio_safe
			else:
				lgFluxRatio= lgFluxRatio_safe
			 
			parname= "lgFratio_ch" + str(self.refch+1) + "_" + str(j+1)
			self.param_dict[parname]= lgFluxRatio

		
		# - Save source flux log ratios Fj/F_radio (i.e. colors)
		cind_safe= 0
		sflux_ref= self.sfluxes[self.refch]
		is_good_flux_ref= (sflux_ref is not None) and (sflux_ref>0) and (np.isfinite(sflux_ref))
		if not is_good_flux_ref:
			logger.warn("Flux for ref chan (%d) is <=0 or nan for image %s (id=%s),  will set all color index to %d..." % (self.refch, self.sname, self.label, cind_safe))

		for j in range(len(self.sfluxes)):
			if j==self.refch:
				continue
			sflux= self.sfluxes[j] 
			flux= self.fluxes[j]
			if sflux is None: # source is not detected, take sum of pixel fluxes inside ref source aperture (e.g. the background)
				logger.info("Source is not detected in chan %d, taking pixel sum over ref source aperture %f ..." % (j+1, flux))
				sflux= flux
				
			is_good_flux= (sflux>0) and (np.isfinite(sflux))
			
			cind= 0
			if is_good_flux_ref:
				if is_good_flux:
					cind= np.log10(sflux/sflux_ref)
				else:
					logger.warn("Flux for chan %d is <=0 or nan for image %s (id=%s),  will set this color index to %d..." % (self.refch, self.sname, self.label, cind_safe))
					cind= cind_safe
			else:
				cind= cind_safe
			
			parname= "color_ch" + str(self.refch+1) + "_" + str(j+1)
			self.param_dict[parname]= cind


		# - Save source IOU
		for j in range(len(self.sious)):
			ch_i, ch_j= self.__get_triu_indices(j, self.nchannels)
			iou= self.sious[j]
			parname= "iou_ch" + str(ch_i) + "_" + str(ch_j)
			self.param_dict[parname]= iou
			
		# - Save source peak dist
		for j in range(len(self.speaks_dists)):
			ch_i, ch_j= self.__get_triu_indices(j, self.nchannels)
			peak_dist= self.speaks_dists[j]
			parname= "dpeak_ch" + str(ch_i) + "_" + str(ch_j)
			self.param_dict[parname]= peak_dist


		# - Save img moments
		for i in range(len(self.moments_zern)):
			for j in range(len(self.moments_zern[i])):
				if j==0:
					continue # Skip as mom0 is always the same
				m= self.moments_zern[i][j]
				parname= "zernmom" + str(j+1) + "_ch" + str(i+1)
				self.param_dict[parname]= m

		# - Save ssim parameters
		if self.save_ssim_pars:
			for j in range(len(self.ssim_avg)):
				ch_i, ch_j= self.__get_triu_indices(j, self.nchannels)
				parname= "ssim_avg_ch{}_{}".format(ch_i,ch_j)
				self.param_dict[parname]= self.ssim_avg[j]
				
		# - Save class id
		self.param_dict["id"]= self.id
示例#16
0
    def make_masked_cutouts(self,
                            region_sky,
                            dilatemask=False,
                            kernsize=5,
                            maskval=0):
        """ Produce masked cutouts """

        # - Find cutout files produced
        logger.info("Searching for produced cutouts for source %s ..." %
                    (self.sname))
        cutout_dir = os.path.join(self.datadir, self.sname)
        file_pattern = os.path.join(cutout_dir, "*.fits")
        files = glob.glob(file_pattern)

        nfiles = len(files)
        if nfiles == 0 or nfiles != self.nsurveys:
            logger.warn(
                "Number of cutout files produced (%d) different wrt expected (%d)!"
                % (nfiles, self.nsurveys))
            return -1

        # - Create directory for masked cutouts
        masked_cutout_dir = os.path.join(self.datadir_mask, self.sname)
        if not os.path.exists(masked_cutout_dir):
            logger.info("Creating cutout masked data dir %s ..." %
                        (masked_cutout_dir))
            Utils.mkdir(masked_cutout_dir, delete_if_exists=False)

        # - Retrieve FITS header & wcs
        logger.info("Retrieving cutout FITS header & WCS for source %s ..." %
                    (self.sname))
        try:
            header = fits.getheader(files[0])
            data_shape = fits.getdata(files[0]).shape
            wcs = WCS(header)
        except Exception as e:
            logger.error(
                "Failed to retrieve file %s header/WCS for source %s (err=%s)!"
                % (files[0], self.sname, str(e)))
            return -1

        # - Convert region to pixel coords
        logger.info(
            "Converting sky region for source %s to pixel coordinates ..." %
            (self.sname))
        try:
            region = region_sky.to_pixel(wcs)
        except Exception as e:
            logger.error(
                "Failed to convert sky region for source %s to pixel coordinates (err=%s)!"
                % (self.sname, str(e)))
            return -1

        # - Compute mask
        logger.info("Computing mask for source %s ..." % (self.sname))
        try:
            mask = region.to_mask(mode='center')
        except Exception as e:
            logger.error(
                "Failed to get mask from region for source %s (err=%s)!" %
                (self.sname, str(e)))
            return -1

        if mask is None:
            logger.warn("mask obtained from region for source %s is None!" %
                        (self.sname))
            return -1

        # - Compute image mask
        logger.info("Computing image mask for source %s ..." % (self.sname))
        maskimg = mask.to_image(data_shape)
        if maskimg is None:
            logger.error(
                "maskimg is None for source %s, this shoudn't occur at this stage!"
                % (self.sname))
            return -1

        maskimg[maskimg != 0] = 1
        maskimg = maskimg.astype(np.uint8)

        # - Dilate image mask to enlarge area around source
        if dilatemask:
            logger.info(
                "Dilating image mask to enlarge area around source %s ..." %
                (self.sname))
            structel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,
                                                 (kernsize, kernsize))
            maskimg_dil = cv2.dilate(maskimg, structel, iterations=1)
            maskimg = maskimg_dil

        # - Loop over files and create masked cutouts
        for i in range(nfiles):
            filename = files[i]
            filename_base = os.path.basename(filename)
            filename_base_noext = os.path.splitext(filename_base)[0]
            filename_mask = os.path.join(masked_cutout_dir,
                                         filename_base_noext + '_masked.fits')

            logger.info("Creating masked cutout file %s from file %s ..." %
                        (filename_mask, filename_base))
            try:
                header = fits.getheader(filename)
                data = fits.getdata(filename)
                data[maskimg == 0] = maskval

                hdu_out = fits.PrimaryHDU(data, header)
                hdul = fits.HDUList([hdu_out])
                hdul.writeto(filename_mask, overwrite=True)

            except Exception as e:
                logger.error("Failed to create masked file %s for source %s!" %
                             (filename_mask, self.sname))
                return -1

        return 0
示例#17
0
	def __process_sdata(self, index):
		""" Process source data """

		#===========================
		#==    READ DATA
		#===========================
		# - Read source data
		logger.info("Reading source and source masked data %d ..." % (index))
		ret= self.__read_sdata(index)
		if ret is None:
			logger.error("Failed to read source data %d!" % (index))
			return -1

		sdata= ret[0]
		sdata_mask= ret[1]

		#===========================
		#==    MODIFY MASKS
		#===========================
		# - Shrink img & mask in masked sdata?		
		if self.shrink_masks: 
			logger.info("Shrinking img+mask on source masked data %d ..." % (index))
			if sdata_mask.shrink_masks(self.erode_kernels)<0:
				logger.warn("Failed to shrink mask for source masked data %d!" % (index))
				return -1

		# - Expand img & mask in masked sdata?
		if self.grow_masks:
			logger.info("Expanding img+mask on source masked data %d ..." % (index))
			if sdata_mask.grow_masks(self.dilate_kernels)<0:
				logger.warn("Failed to expand mask for source masked data %d!" % (index))
				return -1

		masks= sdata_mask.img_data_mask
		#mask_ref= masks[self.refch]

		#===========================
		#==  CHECK DATA INTEGRITY
		#===========================
		# - Check non-masked data
		has_good_data= sdata.has_good_data(check_mask=False, check_bad=True, check_neg=False, check_same=True)
		if not has_good_data:
			logger.warn("Source data %d are bad (too may NANs or equal pixel values)!" % (index))
			return -1

		# - Check masked data
		has_good_mask_data= sdata_mask.has_good_data(check_mask=False, check_bad=True, check_neg=True, check_same=True)
		if not has_good_mask_data:
			logger.warn("Source mask data %d are bad (too may NANs/negative or equal pixel values)!" % (index))
			return -1

		#===========================
		#==  CHECK AE RECO ACCURACY
		#===========================
		# ...
		# ...

		#===========================
		#==    COMPUTE BKG/FLUX
		#===========================
		# - Compute bkg on img over non-masked pixels
		logger.info("Computing bkg on source data %d ..." % (index))
		if sdata.compute_bkg(masks)<0:
			logger.warn("Failed to compute bkg for source data %d!" % (index))
			return -1

		bkg_levels= sdata.bkg_levels

		#print("--> bkg levels")
		#print(bkg_levels)

		# - Apply masks to sdata
		#   NB: Do this after bkg calculation (otherwise all non-masked pixels are set to 0, so bkg will be 0) and before subtract bkg
		logger.info("Applying masks to source data %d ..." % (index))
		sdata.apply_masks(masks)

		# - Subtract bkg on img
		#if self.subtract_bkg:
		#	logger.info("Subtracting bkg on source data %d ..." % (index))
		#	if sdata.subtract_bkg(bkg_levels, self.subtract_bkg_only_refch)<0:
		#		logger.warn("Failed to subtract bkg for source data %d!" % (index))
		#		return -1

		# - Compute integrated flux (no source extraction here, only sum of pixel fluxes in mask)
		logger.info("Computing flux on source data %d ..." % (index))
		sdata.compute_fluxes(subtract_bkg=self.subtract_bkg, subtract_only_refch=self.subtract_bkg_only_refch)

		# - Extract sources and compute pars 
		#   NB: source extraction may fail or not be accurate (e.g. miss source, contour not accurate, etc)
		logger.info("Extracting source blobs on source data %d ..." % (index))
		sdata.find_sources(
			seed_thr=self.seed_thr, merge_thr=self.merge_thr, dist_thr=self.dist_thr, 
			subtract_bkg=self.subtract_bkg, subtract_only_refch=self.subtract_bkg_only_refch
		)		

		#===========================
		#==    COMPUTE MOMENTS
		#===========================
		# - Compute centroids and moments on images (NB: masked before)
		logger.info("Computing moments on source data %d ..." % (index))	
		if sdata.compute_img_moments()<0:
			logger.warn("Failed to compute moments for source data %d!" % (index))
			return -1

		#===========================
		#==    COMPUTE SSIM
		#===========================
		if self.save_ssim_pars:
			logger.info("Computing ssim pars on source data %d ..." % (index))	
			if sdata.compute_ssim_pars(self.ssim_winsize)<0:
				logger.warn("Failed to compute SSIM pars for source data %d!" % (index))
				return -1

		#===========================
		#==   FILL SOURCE OUT DATA
		#===========================
		# - Fill and append features
		logger.info("Filling feature dict for source data %d ..." % (index))	
		sdata.fill_features()

		par_dict= sdata.param_dict
		if par_dict is None or not par_dict:
			logger.warn("Feature dict for source data %d is empty or None, skip it ..." % (index))
			
		else:
			# - Select features?
			if self.select_feat and self.selfeatids:
				ret= sdata.select_features(self.selfeatids)
				par_dict= sdata.param_dict

				if ret==0:
					self.par_dict_list.append(par_dict)
				else:
					logger.warn("Failed to select features for source data %d, skip it ..." % (index))

			else:
				self.par_dict_list.append(par_dict)
		
		return 0
示例#18
0
def main():
    """Main function"""

    #===========================
    #==   PARSE ARGS
    #===========================
    logger.info("Get script args ...")
    try:
        args = get_args()
    except Exception as ex:
        logger.error("Failed to get and parse options (err=%s)", str(ex))
        return 1

    # - Input filelist
    inputfile = args.inputfile
    inputfile_cv = args.inputfile_cv

    # - Data pre-processing
    normalize = args.normalize
    scalerfile = args.scalerfile

    # - Model options
    classifier = args.classifier
    modelfile = args.modelfile
    predict = args.predict
    multiclass = True
    if args.binary_class:
        multiclass = False

    balance_classes = args.balance_classes

    # - Tree options
    max_depth = args.max_depth
    min_samples_split = args.min_samples_split
    min_samples_leaf = args.min_samples_leaf
    n_estimators = args.n_estimators
    num_leaves = args.num_leaves
    learning_rate = args.learning_rate
    niters = args.niters

    # - Outlier search options
    find_outliers = args.find_outliers
    modelfile_outlier = args.modelfile_outlier
    anomaly_thr = args.anomaly_thr
    save_outlier = args.save_outlier
    outfile_outlier = args.outfile_outlier

    # - Run options
    run_scan = args.run_scan
    ntrials = args.ntrials

    # - Output options
    outfile = args.outfile

    #===========================
    #==   READ FEATURE DATA
    #===========================
    ret = Utils.read_feature_data(inputfile)
    if not ret:
        logger.error("Failed to read data from file %s!" % (inputfile))
        return 1

    data = ret[0]
    snames = ret[1]
    classids = ret[2]

    #====================================
    #==   READ FEATURE VALIDATION DATA
    #====================================
    data_cv = None
    snames_cv = []
    classids_cv = []

    if inputfile_cv != "":
        ret_cv = Utils.read_feature_data(inputfile_cv)
        if not ret_cv:
            logger.error("Failed to read validation data from file %s!" %
                         (inputfile_cv))
            return 1

        data_cv = ret_cv[0]
        snames_cv = ret_cv[1]
        classids_cv = ret_cv[2]

    #===========================
    #==   CLASSIFY DATA
    #===========================
    logger.info("Running classifier on input feature data ...")
    sclass = SClassifier(multiclass=multiclass)
    sclass.normalize = normalize
    sclass.classifier = classifier
    sclass.outfile = outfile
    sclass.max_depth = max_depth
    sclass.min_samples_split = min_samples_split
    sclass.min_samples_leaf = min_samples_leaf
    sclass.n_estimators = n_estimators
    sclass.num_leaves = num_leaves
    sclass.learning_rate = learning_rate
    sclass.niters = niters
    sclass.balance_classes = balance_classes

    sclass.find_outliers = find_outliers
    sclass.outlier_modelfile = modelfile_outlier
    sclass.outlier_thr = anomaly_thr
    sclass.save_outlier = save_outlier
    sclass.outlier_outfile = outfile_outlier

    if predict:
        status = sclass.run_predict(data, classids, snames, modelfile,
                                    scalerfile)
    else:
        if run_scan:
            status = sclass.run_lgbm_scan(data,
                                          classids,
                                          snames,
                                          scalerfile,
                                          n_trials=ntrials)

        else:
            status = sclass.run_train(
                data,
                classids,
                snames,
                modelfile,
                scalerfile,
                data_cv,
                classids_cv,
                snames_cv,
            )

    if status < 0:
        logger.error("Classifier run failed!")
        return 1

    return 0
示例#19
0
	def compute_ssim_pars(self, winsize=3):
		""" Compute SSIM params """

		# - Loop over images and compute params
		index= 0
		for i in range(self.nchannels-1):
			
			img_i= self.img_data[i]
			cond_i= np.logical_and(img_i!=0, np.isfinite(img_i))

			img_max_i= np.nanmax(img_i[cond_i])
			img_min_i= np.nanmin(img_i[cond_i])
			
			img_norm_i= (img_i-img_min_i)/(img_max_i-img_min_i)
			img_norm_i[~cond_i]= 0

			# - Compute SSIM maps
			for j in range(i+1,self.nchannels):
				img_j= self.img_data[j]
				cond_j= np.logical_and(img_j!=0, np.isfinite(img_j))
				img_max_j= np.nanmax(img_j[cond_j])
				img_min_j= np.nanmin(img_j[cond_j])
				
				img_norm_j= (img_j-img_min_j)/(img_max_j-img_min_j)
				img_norm_j[~cond_j]= 0

				cond= np.logical_and(cond_i, cond_j)
				
				# - Compute SSIM moments
				#   NB: Need to normalize images to max otherwise the returned values are always ~1.
				logger.info("Computing SSIM for image %s (id=%s, ch=%d-%d) ..." % (self.sname, self.label, i+1, j+1))
				_, ssim_2d= structural_similarity(img_norm_i, img_norm_j, full=True, win_size=winsize, data_range=1)

				ssim_2d[ssim_2d<0]= 0
				ssim_2d[~cond]= 0
				self.ssim_maps.append(ssim_2d)

				ssim_1d= ssim_2d[cond]

				#if self.draw:
				#	plt.subplot(1, 3, 1)
				#	plt.imshow(img_norm_i, origin='lower')
				#	plt.colorbar()

				#	plt.subplot(1, 3, 2)
				#	plt.imshow(img_norm_j, origin='lower')
				#	plt.colorbar()
					
				#	plt.subplot(1, 3, 3)
				#	plt.imshow(ssim_2d, origin='lower')
				#	plt.colorbar()

				#	plt.show()

				if ssim_1d.size>0:
					ssim_mean= np.nanmean(ssim_1d)
					ssim_median= np.nanmedian(ssim_1d)
					ssim_avg= ssim_median
					self.ssim_avg.append(ssim_avg)
					
					logger.info("Image %s (chan=%d-%d): <SSIM>=%f" % (self.sname, i+1, j+1, ssim_avg))

				else:
					logger.warn("Image %s (chan=%d-%d): SSIM array is empty, setting estimators to -999..." % (self.sname, i+1, j+1))
					self.ssim_avg.append(-999)
					

		return 0
    def set_data_from_file(self, filename):
        """ Set data from input file. Expected format: sname, N features, classid """

        # - Read table
        row_start = 0
        try:
            table = ascii.read(filename, data_start=row_start)
        except:
            logger.error("Failed to read feature file %s!" % filename)
            return -1

        #print(table.colnames)
        #print(table)

        ncols = len(table.colnames)
        nfeat = ncols - 2

        # - Set data vectors
        rowIndex = 0
        self.data_labels = []
        self.data_classids = []
        self.data_targets = []
        self.source_names = []
        featdata = []

        for data in table:
            sname = data[0]
            obj_id = data[ncols - 1]
            label = self.classid_label_map[classid]
            targetid = self.classid_remap[obj_id]  # remap obj id in class id

            self.source_names.append(sname)
            self.data_labels.append(label)
            self.data_classids.append(obj_id)
            self.data_targets.append(targetid)
            featdata_curr = []
            for k in range(nfeat):
                featdata_curr.append(data[k + 1])
            featdata.append(featdata_curr)

        self.data = np.array(featdata)
        if self.data.size == 0:
            logger.error("Empty feature data vector read!")
            return -1

        data_shape = self.data.shape
        self.nsamples = data_shape[0]
        self.nfeatures = data_shape[1]
        logger.info("#nsamples=%d, #nfeatures=%d" %
                    (self.nsamples, self.nfeatures))

        # - Normalize feature data?
        if self.normalize:
            logger.info("Normalizing feature data ...")
            data_norm = self.__normalize_data(self.data, self.norm_min,
                                              self.norm_max)
            self.data = data_norm

        # - Set pre-classified data
        logger.info("Setting pre-classified data (if any) ...")
        self.__set_preclass_data()

        return 0
示例#21
0
	def __extract_sources(self, data, bkg, rms, mask=None, seed_thr=4, merge_thr=3, dist_thr=-1):
		""" Find sources in channel data """
	
		# - Compute image center
		data_shape= data.shape
		y_c= data_shape[0]/2.;
		x_c= data_shape[1]/2.;

		# - Compute mask
		if mask is None:
			logger.info("Computing image mask ...")
			mask= np.logical_and(data!=0, np.isfinite(data))	

		data_1d= data[mask]
	
		# - Threshold image at seed_thr
		zmap= (data-bkg)/rms
		binary_map= (zmap>merge_thr).astype(np.int32)
		binary_map[~mask]= 0
		zmap[~mask]= 0
	
		# - Extract source
		logger.info("Extracting sources ...")
		label_map= skimage.measure.label(binary_map)
		regprops= skimage.measure.regionprops(label_map, data)

		nsources= len(regprops)
		logger.info("#%d sources found ..." % nsources)

		# - Extract peaks
		kernsize= 3
		footprint = np.ones((kernsize, ) * data.ndim, dtype=bool)
		peaks= peak_local_max(np.copy(zmap), footprint=footprint, threshold_abs=seed_thr, min_distance=2, exclude_border=True)
		#print(peaks)
		
		if peaks.shape[0]<=0:
			logger.info("No peaks detected in this image, return None ...")
			return None
		
		# - Select best source
		regprops_sel= []
		peaks_sel= []
		binary_maps_sel= []
		polygons_sel= []
		contours_sel= []
		#binary_maps_sel= []
		#binary_map_sel= np.zeros_like(binary_map)

		for regprop in regprops:
			# - Check if region max is >=seed_thr
			sslice= regprop.slice
			zmask= zmap[sslice]
			zmask_1d= zmask[np.logical_and(zmask!=0, np.isfinite(zmask))]	
			zmax= zmask_1d.max()
			if zmax<seed_thr:
				logger.info("Skip source as zmax=%f<thr=%f" % (zmax, seed_thr))
				continue

			# - Set binary map with this source
			logger.debug("Get source binary mask  ...")
			bmap= np.zeros_like(binary_map)
			bmap[sslice]= binary_map[sslice]

			# - Extract contour and polygon from binary mask
			logger.info("Extracting contour and polygon from binary mask ...")
			contours= []
			polygon= None
			try:
				bmap_uint8= bmap.copy() # copy as OpenCV internally modify origin mask
				bmap_uint8= bmap_uint8.astype(np.uint8)
				contours= cv2.findContours(bmap_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
				contours= imutils.grab_contours(contours)
				if len(contours)>0:
					contour= np.squeeze(contours[0])
					polygon = Polygon(contour)
			except Exception as e:
				logger.warn("Failed to compute mask contour (err=%s)!" % (str(e)))
			
			if polygon is None:
				logger.warn("Skip extracted blob as polygon failed to be computed...")
				continue

			# - Check if source has a local peak in the mask
			#   NB: Check if polygon is computed
			has_peak= False
			peak_sel= None
			if polygon is not None:
				for peak in peaks:
					point = Point(peak[1], peak[0])
					has_peak= polygon.contains(point)
					if has_peak:
						peak_sel= peak
						break
				
			if not has_peak: 
				logger.info("Skip extracted blob as no peak was found inside source contour polygon!")
				continue

			# - Check for source peak distance wrt image center
			if dist_thr>0:
				dist= np.sqrt( (peak_sel[1]-x_c)**2 + (peak_sel[0]-y_c)**2 )
				if dist>dist_thr:
					logger.info("Skip extracted source as peak-imcenter dist=%f<thr=%f" % (dist, dist_thr))
					continue

			# - Update global binary mask and regprops
			#binary_map_sel[sslice]= binary_map[sslice]
			regprops_sel.append(regprop)
			peaks_sel.append(peak_sel)
			binary_maps_sel.append(bmap)	
			polygons_sel.append(polygon)
			contours_sel.append(contours[0])
			
		# - Return None if no source is selected
		nsources_sel= len(regprops_sel)
		if nsources_sel<=0:
			logger.info("No sources selected for this image ...")
			return None

		# - If more than 1 source is selected, take the one with peak closer to image center
		peak_final= peaks_sel[0]
		bmap_final= binary_maps_sel[0]
		regprop_final= regprops_sel[0]
		polygon_final= polygons_sel[0]
		contour_final= contours_sel[0]

		if nsources_sel>1:
			logger.info("#%d sources selected, going to select the closest to image center ..." % (nsources_sel))
			
			dist_best= 1.e+99
			index_best= -1
			for j in range(len(peaks_sel)):
				peak= peaks_sel[j]
				bmap= binary_maps_sel[j]
				regprop= regprops_sel[j]
				polygon= polygons_sel[j]
				contour= contours_sel[j]
				dist= np.sqrt( (peak[1]-x_c)**2 + (peak[0]-y_c)**2 )
				if dist<dist_best:
					dist_best= dist
					peak_final= peak
					bmap_final= bmap
					regprop_final= regprop
					polygon_final= polygon
					contour_final= contour
					index_best= j
			
			logger.info("Selected source no. %d as the closest one to image center ..." % (index_best))							
		else:
			logger.info("#%d sources selected..." % (nsources_sel))
			
		# - Compute enclosing circle radius 
		try:
			(xc, yc), radius= cv2.minEnclosingCircle(contour_final)
			enclosing_circle= (xc,yc,radius)
		except Exception as e:
			logger.warn("Failed to compute min enclosing circle (err=%s)!" % (str(e)))
			enclosing_circle= None

		# - Draw figure
		if self.draw:
			fig, ax = plt.subplots()

			# - Draw map
			#plt.imshow(label_map)
			#plt.imshow(data)
			plt.imshow(zmap)
			#plt.imshow(bmap_final)
			plt.colorbar()

			# - Draw bbox rectangle
			bbox= regprop_final.bbox
			ymin= bbox[0]
			ymax= bbox[2]
			xmin= bbox[1]
			xmax= bbox[3]
			dx= xmax-xmin-1
			dy= ymax-ymin-1
			rect = patches.Rectangle((xmin,ymin), dx, dy, linewidth=1, edgecolor='r', facecolor='none')
			ax.add_patch(rect)

			# - Draw selected peak
			if peak_final is not None:
				plt.scatter(peak_final[1], peak_final[0], s=10)

			# - Draw contour polygon
			if polygon_final is not None:
				plt.plot(*polygon_final.exterior.xy)

			# - Draw enclosing circle
			circle = plt.Circle((xc, yc), radius, color='g', clip_on=False, fill=False)
			ax.add_patch(circle)

			plt.show()


		return (peak_final, bmap_final, regprop_final, enclosing_circle)
示例#22
0
def main():
    """Main function"""

    #===========================
    #==   PARSE ARGS
    #==     (ALL PROCS)
    #===========================
    if procId == MASTER:
        logger.info("[PROC %d] Parsing script args ..." % (procId))
    try:
        args = get_args()
    except Exception as ex:
        logger.error("[PROC %d] Failed to get and parse options (err=%s)" %
                     (procId, str(ex)))
        return 1

    imgfile = args.img
    regionfile = args.region
    configfile = args.scutout_config

    surveys = []
    if args.surveys != "":
        surveys = [str(x.strip()) for x in args.surveys.split(',')]

    surveys_radio = []
    if args.surveys_radio != "":
        surveys_radio = [str(x.strip()) for x in args.surveys_radio.split(',')]

    if imgfile == "" and not surveys:
        logger.error(
            "[PROC %d] No image passed, surveys option cannot be empty!" %
            (procId))
        return 1

    filter_regions_by_tags = args.filter_regions_by_tags
    tags = []
    if args.tags != "":
        tags = [str(x.strip()) for x in args.tags.split(',')]

    jobdir = os.getcwd()
    if args.jobdir != "":
        if not os.path.exists(args.jobdir):
            logger.error("[PROC %d] Given job dir %s does not exist!" %
                         (procId, args.jobdir))
            return 1
        jobdir = args.jobdir

    # - Classifier options
    normalize_feat = args.normalize_feat
    scalerfile = args.scalerfile
    binary_class = args.binary_class
    modelfile = args.modelfile
    save_class_labels = args.save_class_labels

    # - Autoencoder options
    run_aereco = args.run_aereco
    nx = args.nx
    ny = args.ny
    modelfile_encoder = args.modelfile_encoder
    modelfile_decoder = args.modelfile_decoder
    weightfile_encoder = args.weightfile_encoder
    weightfile_decoder = args.weightfile_decoder
    aereco_thr = args.aereco_thr
    empty_filenames = ((modelfile_encoder == "" or modelfile_decoder == "") or
                       (weightfile_encoder == "" or weightfile_decoder == ""))

    if run_aereco and empty_filenames:
        logger.error("[PROC %d] Empty AE model/weight filename given!" %
                     (procId))
        return 1

    # - Outlier search options
    find_outliers = args.find_outliers
    modelfile_outlier = args.modelfile_outlier
    anomaly_thr = args.anomaly_thr
    max_features = args.max_features
    max_samples = "auto"
    if args.max_samples > 0:
        max_samples = args.max_samples
    save_outlier = args.save_outlier
    outfile_outlier = args.outfile_outlier

    # - Color index
    refch = args.refch
    shrink_mask = args.shrink_mask
    kernsizes_shrink = args.kernsizes_shrink
    grow_mask = args.grow_mask
    kernsizes_grow = args.kernsizes_grow
    seed_thr = args.seed_thr
    merge_thr = args.merge_thr

    # - Spectral index
    add_spectral_index = args.add_spectral_index
    img_group_1 = []
    img_group_2 = []
    img_freqs = []
    if args.img_group_1 != "":
        img_group_1 = [int(x.strip()) for x in args.img_group_1.split(',')]
    if args.img_group_2 != "":
        img_group_2 = [int(x.strip()) for x in args.img_group_2.split(',')]
    if args.img_freqs != "":
        img_freqs = [float(x.strip()) for x in args.img_freqs.split(',')]

    alpha_rcoeff_thr = args.alpha_rcoeff_thr
    save_spectral_index = args.save_spectral_index

    if add_spectral_index:
        if not img_group_1 or not img_group_2:
            logger.error(
                "Group image indices for spectral index calculation not given in input or empty!"
            )
            return 1
        if len(img_group_1) != len(img_group_2):
            logger.error(
                "Given group image indices for spectral index calculation do not have the same length!"
            )
            return 1

    # - Quality data options
    negative_pix_fract_thr = args.negative_pix_fract_thr
    bad_pix_fract_thr = args.bad_pix_fract_thr

    #==================================
    #==   RUN
    #==================================
    pipeline = Pipeline()
    pipeline.jobdir = jobdir
    pipeline.filter_regions_by_tags = filter_regions_by_tags
    pipeline.tags = tags
    pipeline.configfile = configfile
    pipeline.surveys = surveys
    pipeline.surveys_radio = surveys_radio
    pipeline.normalize_feat = normalize_feat
    pipeline.scalerfile = scalerfile
    pipeline.modelfile = modelfile
    pipeline.binary_class = binary_class
    pipeline.save_class_labels = save_class_labels

    pipeline.find_outliers = find_outliers
    pipeline.modelfile_outlier = modelfile_outlier
    pipeline.outlier_thr = anomaly_thr
    pipeline.max_features = max_features
    pipeline.max_samples = max_samples
    pipeline.save_outlier = save_outlier
    pipeline.outfile_outlier = outfile_outlier

    pipeline.run_aereco = run_aereco
    pipeline.modelfile_encoder = modelfile_encoder
    pipeline.modelfile_decoder = modelfile_decoder
    pipeline.weightfile_encoder = weightfile_encoder
    pipeline.weightfile_decoder = weightfile_decoder
    pipeline.resize_img = True
    pipeline.nx = nx
    pipeline.ny = ny
    pipeline.normalize_img = True
    pipeline.scale_img_to_abs_max = False
    pipeline.scale_img_to_max = False
    pipeline.log_transform_img = False
    pipeline.scale_img = False
    pipeline.scale_img_factors = []
    pipeline.standardize_img = False
    pipeline.img_means = []
    pipeline.img_sigmas = []
    pipeline.img_chan_divide = False
    pipeline.img_chan_mins = []
    pipeline.img_erode = False
    pipeline.img_erode_kernel = 9
    pipeline.add_channorm_layer = False
    pipeline.winsize = 3

    pipeline.refch = refch
    pipeline.shrink_mask = shrink_mask
    pipeline.kernsizes_shrink = kernsizes_shrink
    pipeline.grow_mask = grow_mask
    pipeline.kernsizes_grow = kernsizes_grow
    pipeline.seed_thr = seed_thr
    pipeline.merge_thr = merge_thr

    pipeline.add_spectral_index = add_spectral_index
    pipeline.alpha_img_freqs = img_freqs
    pipeline.alpha_img_group_1 = img_group_1
    pipeline.alpha_img_group_2 = img_group_2
    pipeline.alpha_rcoeff_thr = alpha_rcoeff_thr
    pipeline.save_spectral_index_data = save_spectral_index

    pipeline.negative_pix_fract_thr = negative_pix_fract_thr
    pipeline.bad_pix_fract_thr = bad_pix_fract_thr

    print("pipeline.alpha_img_freqs")
    print(pipeline.alpha_img_freqs)
    print("pipeline.alpha_img_group_1")
    print(pipeline.alpha_img_group_1)
    print("pipeline.alpha_img_group_2")
    print(pipeline.alpha_img_group_2)

    logger.info("[PROC %d] Running source classification pipeline ..." %
                (procId))
    status = pipeline.run(imgfile, regionfile)

    if status < 0:
        logger.error("Source classification pipeline run failed (see logs)!")
        return 1

    return 0
示例#23
0
def main():
    """Main function"""

    #===========================
    #==   PARSE ARGS
    #===========================
    logger.info("Get script args ...")
    try:
        args = get_args()
    except Exception as ex:
        logger.error("Failed to get and parse options (err=%s)", str(ex))
        return 1

    # - Input filelist
    datalist = args.datalist

    # - Data process options
    nx = args.nx
    ny = args.ny
    augment = args.augment
    augment_scale_factor = args.augment_scale_factor
    scale = args.scale
    scale_factors = []
    if args.scale_factors != "":
        scale_factors = [
            float(x.strip()) for x in args.scale_factors.split(',')
        ]

    normalize = args.normalize
    scale_to_abs_max = args.scale_to_abs_max
    scale_to_max = args.scale_to_max
    log_transform = args.log_transform
    standardize = args.standardize
    img_means = []
    img_sigmas = []
    if args.img_means != "":
        img_means = [float(x.strip()) for x in args.img_means.split(',')]
    if args.img_sigmas != "":
        img_sigmas = [float(x.strip()) for x in args.img_sigmas.split(',')]

    chan_divide = args.chan_divide
    chan_mins = []
    if args.chan_mins != "":
        chan_mins = [float(x.strip()) for x in args.chan_mins.split(',')]
    erode = args.erode
    erode_kernel = args.erode_kernel

    # - NN architecture
    use_vae = args.use_vae
    #modelfile= args.modelfile
    modelfile_encoder = args.modelfile_encoder
    modelfile_decoder = args.modelfile_decoder
    add_maxpooling_layer = args.add_maxpooling_layer
    add_batchnorm_layer = args.add_batchnorm_layer
    add_leakyrelu = args.add_leakyrelu
    add_dense_layer = args.add_dense_layer
    add_channorm_layer = args.add_channorm_layer
    nfilters_cnn = [int(x.strip()) for x in args.nfilters_cnn.split(',')]
    kernsizes_cnn = [int(x.strip()) for x in args.kernsizes_cnn.split(',')]
    strides_cnn = [int(x.strip()) for x in args.strides_cnn.split(',')]
    dense_layer_sizes = [
        int(x.strip()) for x in args.dense_layer_sizes.split(',')
    ]
    dense_layer_activation = args.dense_layer_activation
    decoder_output_layer_activation = args.decoder_output_layer_activation

    print("nfilters_cnn")
    print(nfilters_cnn)
    print("kernsizes_cnn")
    print(kernsizes_cnn)
    print("strides_cnn")
    print(strides_cnn)
    print("dense_layer_sizes")
    print(dense_layer_sizes)

    # - Train options
    #weightfile= args.weightfile
    weightfile_encoder = args.weightfile_encoder
    weightfile_decoder = args.weightfile_decoder
    latentdim = args.latentdim
    optimizer = args.optimizer
    learning_rate = args.learning_rate
    batch_size = args.batch_size
    nepochs = args.nepochs
    mse_loss = args.mse_loss
    scale_chan_mse_loss = args.scale_chan_mse_loss
    kl_loss = args.kl_loss
    ssim_loss = args.ssim_loss
    mse_loss_weight = args.mse_loss_weight
    kl_loss_weight = args.kl_loss_weight
    ssim_loss_weight = args.ssim_loss_weight
    ssim_win_size = args.ssim_win_size
    weight_seed = args.weight_seed
    reproducible = args.reproducible
    validation_steps = args.validation_steps

    # - UMAP options
    run_umap = args.run_umap
    latentdim_umap = args.latentdim_umap
    mindist_umap = args.mindist_umap
    nneighbors_umap = args.nneighbors_umap
    outfile_umap_unsupervised = args.outfile_umap_unsupervised
    outfile_umap_supervised = args.outfile_umap_supervised
    outfile_umap_preclassified = args.outfile_umap_preclassified

    # - Clustering options
    run_clustering = args.run_clustering
    min_cluster_size = args.min_cluster_size
    min_samples = args.min_samples
    modelfile_clust = args.modelfile_clust
    predict_clust = args.predict_clust

    #===========================
    #==   READ DATALIST
    #===========================
    # - Create data loader
    dl = DataLoader(filename=datalist)

    # - Read datalist
    logger.info("Reading datalist %s ..." % datalist)
    if dl.read_datalist() < 0:
        logger.error("Failed to read input datalist!")
        return 1

    #===========================
    #==   TRAIN VAE
    #===========================
    logger.info("Running VAE classifier training ...")
    vae_class = FeatExtractorAE(dl)

    vae_class.use_vae = use_vae
    #vae_class.modelfile= modelfile
    vae_class.modelfile_encoder = modelfile_encoder
    vae_class.modelfile_decoder = modelfile_decoder
    #vae_class.weightfile= weightfile
    vae_class.weightfile_encoder = weightfile_encoder
    vae_class.weightfile_decoder = weightfile_decoder
    vae_class.latent_dim = latentdim
    vae_class.set_image_size(nx, ny)
    vae_class.augmentation = augment
    vae_class.augment_scale_factor = augment_scale_factor
    vae_class.normalize = normalize
    vae_class.scale_to_abs_max = scale_to_abs_max
    vae_class.scale_to_max = scale_to_max
    vae_class.log_transform_img = log_transform
    vae_class.scale_img = scale
    vae_class.scale_img_factors = scale_factors
    vae_class.standardize_img = standardize
    vae_class.img_means = img_means
    vae_class.img_sigmas = img_sigmas
    vae_class.chan_divide = chan_divide
    vae_class.chan_mins = chan_mins
    vae_class.erode = erode
    vae_class.erode_kernel = erode_kernel

    vae_class.batch_size = batch_size
    vae_class.nepochs = nepochs
    vae_class.validation_steps = validation_steps
    vae_class.set_optimizer(optimizer, learning_rate)
    if reproducible:
        vae_class.set_reproducible_model()

    vae_class.add_max_pooling = add_maxpooling_layer
    vae_class.add_batchnorm = add_batchnorm_layer
    vae_class.add_leakyrelu = add_leakyrelu
    vae_class.add_dense = add_dense_layer
    vae_class.add_channorm_layer = add_channorm_layer
    vae_class.nfilters_cnn = nfilters_cnn
    vae_class.kernsizes_cnn = kernsizes_cnn
    vae_class.strides_cnn = strides_cnn
    vae_class.dense_layer_sizes = dense_layer_sizes
    vae_class.dense_layer_activation = dense_layer_activation

    vae_class.use_mse_loss = mse_loss
    vae_class.scale_chan_mse_loss = scale_chan_mse_loss
    vae_class.use_kl_loss = kl_loss
    vae_class.use_ssim_loss = ssim_loss
    vae_class.mse_loss_weight = mse_loss_weight
    vae_class.kl_loss_weight = kl_loss_weight
    vae_class.ssim_loss_weight = ssim_loss_weight
    vae_class.ssim_win_size = ssim_win_size
    vae_class.weight_seed = weight_seed

    if vae_class.train_model() < 0:
        logger.error("VAE training failed!")
        return 1

    #===========================
    #==   TRAIN UMAP
    #===========================
    if run_umap:
        # - Retrieve VAE encoded data
        logger.info("Retrieve latent data from VAE ...")
        snames = vae_class.source_names
        classids = vae_class.source_ids
        vae_data = vae_class.encoded_data

        # - Run UMAP
        logger.info("Running UMAP classifier training on VAE latent data ...")
        umap_class = FeatExtractorUMAP()

        umap_class.set_encoded_data_unsupervised_outfile(
            outfile_umap_unsupervised)
        umap_class.set_encoded_data_supervised_outfile(outfile_umap_supervised)
        umap_class.set_encoded_data_preclassified_outfile(
            outfile_umap_preclassified)
        umap_class.set_encoded_data_dim(latentdim_umap)
        umap_class.set_min_dist(mindist_umap)
        umap_class.set_n_neighbors(nneighbors_umap)

        if umap_class.run_train(vae_data, class_ids=classids,
                                snames=snames) < 0:
            logger.error("UMAP training failed!")
            return 1

    #==============================
    #==   RUN CLUSTERING
    #==============================
    if run_clustering:
        # - Retrieve VAE encoded data
        logger.info("Retrieve latent data from VAE ...")
        snames = vae_class.source_names
        classids = vae_class.source_ids
        vae_data = vae_class.encoded_data

        # - Run HDBSCAN clustering
        logger.info(
            "Running HDBSCAN classifier prediction on autoencoder latent data ..."
        )
        clust_class = Clusterer()
        clust_class.min_cluster_size = min_cluster_size
        clust_class.min_samples = min_samples

        status = 0
        if predict_clust:
            if clust_class.run_predict(vae_data,
                                       class_ids=classids,
                                       snames=snames,
                                       modelfile=modelfile_clust) < 0:
                logger.error("Clustering predict failed!")
                return 1
        else:
            if clust_class.run_clustering(vae_data,
                                          class_ids=classids,
                                          snames=snames,
                                          modelfile=modelfile_clust) < 0:
                logger.error("Clustering run failed!")
                return 1

    return 0