Пример #1
0
    def __read_datalist(self):
        """ Read json filelist """

        # - Check datalist files
        if self.datalistfile == "" or self.datalistfile_mask == "":
            logger.error("Data list files are empty!")
            return -1

        # - Read data list for images and store number of instances per class, etc
        ret = self.__read_filelist(self.datalistfile)
        if ret is None:
            logger.error("Failed to read filelist for imgs!")
            return -1
        datadict = ret[0]
        nchannels_set = ret[1]

        self.datalist = datadict["data"]
        self.nchannels = list(nchannels_set)[0]
        self.datasize = len(self.datalist)
        self.labels = [item["label"] for item in self.datalist]
        self.snames = [item["sname"] for item in self.datalist]
        self.classids = [item["id"] for item in self.datalist]

        self.classfract_map = dict(Counter(self.classids).items())

        logger.info("#%d objects in dataset" % self.datasize)

        return 0
Пример #2
0
    def __run_aereco(self):
        """ Run AE reconstruction """

        # - Set FeatExtractorAE class
        ae = FeatExtractorAE(self.dl)
        ae.set_image_size(self.nx, self.ny)
        ae.normalize = self.normalize
        ae.scale_to_abs_max = self.scale_to_abs_max
        ae.scale_to_max = self.scale_to_max
        ae.log_transform_img = self.log_transform
        ae.scale_img = self.scale
        ae.scale_img_factors = self.scale_factors
        ae.standardize_img = self.standardize
        ae.img_means = self.img_means
        ae.img_sigmas = self.img_sigmas
        ae.chan_divide = self.chan_divide
        ae.chan_mins = self.chan_mins
        ae.erode = self.erode
        ae.erode_kernel = self.erode_kernel
        ae.add_channorm_layer = self.add_channorm_layer

        # - Run AE reco
        status = ae.reconstruct_data(self.modelfile_encoder,
                                     self.weightfile_encoder,
                                     self.modelfile_decoder,
                                     self.weightfile_decoder,
                                     winsize=self.winsize,
                                     outfile_metrics=self.outfile,
                                     save_imgs=self.save_imgs)

        if status < 0:
            logger.error("AE reconstruction failed (see logs)!")
            return -1

        return 0
Пример #3
0
	def __load_model(self, modelfile, weightfile):
		""" Load model and weights from input h5 file """

		#==============================
		#==   LOAD MODEL ARCHITECTURE
		#==============================
		# - Load model
		try:
			#self.model = model_from_json(open(modelfile_json).read())
			self.model = load_model(modelfile, custom_objects={'recall_metric': recall_metric, 'precision_metric': precision_metric, 'f1score_metric': f1score_metric})
			self.model.load_weights(weightfile)

		except Exception as e:
			logger.warn("Failed to load model from file %s (err=%s)!" % (modelfile, str(e)))
			return -1

		if not self.model or self.model is None:
			logger.error("Model object is None, loading failed!")
			return -1

		#===========================
		#==   SET LOSS & METRICS
		#===========================	
		self.model.compile(optimizer=self.optimizer, loss=self.loss_type, metrics=['accuracy', f1score_metric, precision_metric, recall_metric], run_eagerly=True)
		
		return 0
Пример #4
0
	def compute_bkg(self, masks, sigma_clip=3):
		""" Compute image background """

		# - Init bkg
		self.bkg_levels= [0]*self.nchannels
		self.bkg_rms= [0]*self.nchannels
		
		if len(masks)!=self.nchannels:
			logger.error("Number of input masks != nchannels, cannot compute bkg!")	
			return -1

		# - Compute bkg levels & rms
		logger.info("Computing image clipped stats of non-masked pixels ...")

		for i in range(self.nchannels):
			data= self.img_data[i]
			mask= masks[i]
			cond= np.logical_and(np.logical_and(data!=0, np.isfinite(data)), mask==0)
			data_1d= data[cond]
			print("--> data_1d.shape")
			print(data_1d.shape)
			mean, median, stddev= sigma_clipped_stats(data_1d, sigma=sigma_clip)
			self.bkg_levels[i]= median
			self.bkg_rms[i]= stddev

		return 0
Пример #5
0
	def compute_img_moments(self):
		""" Compute image moments """

		# - Init pars
		self.moments_c= []
		self.moments_hu= []
		self.moments_zern= []

		# - Compute raw moments and centroids
		if self.__compute_contour_pars()<0:
			logger.error("Failed to compute contour pars!")
			return -1

		# - Compute moments (central, Hu, Zernike) of intensity images	
		#   NB: use centroid from refch for all channels
		centroid= self.centroids[self.refch]
		#centroid= self.center_of_masses[self.refch]

		for i in range(self.nchannels):
			data= self.img_data[i]
			radius= self.radii[i]
			ret= self.__compute_moments(data, centroid, radius)
			if ret is None:
				logger.error("Failed to compute moments for image %s (id=%s, ch=%d)!" % (self.sname, self.label, i+1))
				return -1

			self.moments_c.append(ret[0])
			self.moments_hu.append(ret[1])
			self.moments_zern.append(ret[2])

		return 0
Пример #6
0
    def run(self, datalist):
        """ Run data checker """

        # - Init
        self.param_dict_list = []

        # - Read data
        logger.info("Read data list %s ..." % (self.datalist))
        self.__read_data(datalist)

        # - Run AE reco
        logger.info("Running autoencoder reconstruction ...")
        if self.__run_aereco() < 0:
            logger.error("AE reconstruction failed!")
            return -1

        # - Select AE reco data
        logger.info("Reading and thresholding AE reco metrics ...")
        if self.__fill_metric_data() < 0:
            logger.error("Failed to read and threshold AE reco metrics!")
            return -1

        # - Save output data
        logger.info("Saving output to file ...")
        if self.__save() < 0:
            logger.warn("Failed to save output data to file %s!" %
                        (self.outfile))
            return -1

        return 0
Пример #7
0
    def __select_cols(self, selcols):
        """ Select data columns provided in selcols list """

        # - Check sel cols
        if not selcols:
            logger.error("Empty sel col list!")
            return -1

        # - Remove any duplicated col ids, sort and set colsel flags
        selcols = list(set(selcols))
        selcols.sort()
        selcolflags = [False] * self.nfeatures
        for col in selcols:
            if col < 0 or col >= self.nfeatures:
                logger.error(
                    "Given sel col id %d is not in nfeature col range [0,%d]!"
                    % (col, self.nfeatures - 1))
                return -1
            selcolflags[col] = True

        print("--> Selected columns")
        print(selcols)
        print("--> Selected column flags")
        print(selcolflags)

        # - Extract selected data columns
        logger.info(
            "Extracting selected data columns (N=%d) from original data ..." %
            (len(selcols)))
        self.data_sel = self.data[:, selcolflags]
        self.data_preclassified_sel = self.data_preclassified[:, selcolflags]
        self.selfeatids = selcols

        return 0
Пример #8
0
	def select_features(self, selcolids):
		""" Select feature cols (0 index is the first feature, not sname) """

		# - Check if param dict is filled
		if not self.param_dict or self.param_dict is None:
			logger.error("Parameter dict is empty!")
			return -1

		# - Get list of sel keys given col indices
		keys= list(self.param_dict.keys())
		keys_sel= [keys[selcol+1] for selcol in selcolids] # +1 because 0 index is the first feature, not sname

		# - Create new dict with selected pars
		param_dict_sel= collections.OrderedDict()
		param_dict_sel["sname"]= self.param_dict["sname"]

		for key in keys_sel:
			param_dict_sel[key]= self.param_dict[key]

		param_dict_sel["id"]= self.param_dict["id"]

		# - Override old dict
		self.param_dict= param_dict_sel
		
		return 0
Пример #9
0
	def __read_fits(self, filename):
		""" Read FITS image and return data """

		# - Open file
		try:
			hdu= fits.open(filename,memmap=False)
		except Exception as ex:
			errmsg= 'Cannot read image file: ' + filename
			#cls._logger.error(errmsg)
			logger.error(errmsg)
			raise IOError(errmsg)

		# - Read data
		data= hdu[0].data
		data_size= np.shape(data)
		nchan= len(data.shape)
		if nchan==4:
			output_data= data[0,0,:,:]
		elif nchan==2:
			output_data= data	
		else:
			errmsg= 'Invalid/unsupported number of channels found in file ' + filename + ' (nchan=' + str(nchan) + ')!'
			#cls._logger.error(errmsg)
			logger.error(errmsg)
			hdu.close()
			raise IOError(errmsg)

		# - Read metadata
		header= hdu[0].header

		# - Close file
		hdu.close()

		return output_data, header
Пример #10
0
	def read_img(self):
		""" Read input image and generate Montage metadata """

		# - Read FITS (ALL PROC)
		logger.info("[PROC %d] Reading input image %s ..." % (procId, self.imgfile_fullpath))
		try:
			data, header, wcs= Utils.read_fits(self.imgfile_fullpath)
			
		except Exception as e:
			logger.error("[PROC %d] Failed to read input image %s (err=%s)!" % (procId, self.imgfile_fullpath, str(e)))
			return -1

			data= ret[0]
			header= ret[1]
			wcs= ret[2]
		
		# - Write input image Montage metadata (PROC 0)
		status= 0
		
		if procId==MASTER:
			status= Utils.write_montage_fits_metadata(inputfile=self.imgfile_fullpath, metadata_file=self.img_metadata, jobdir=self.jobdir_scutout)
		
		else: # OTHER PROCS
			status= -1
			
		if comm is not None:
			status= comm.bcast(status, root=MASTER)

		if status<0:
			logger.error("[PROC %d] Failed to generate Montage metadata for input image %s, exit!" % (procId, self.imgfile_fullpath))
			return -1

		return 0
Пример #11
0
	def __load_model(self, modelfile):
		""" Load model and weights from input h5 file """

		#==============================
		#==   LOAD MODEL ARCHITECTURE
		#==============================
		try:
			self.model= load_model(modelfile)
			
		except Exception as e:
			logger.warn("Failed to load model from file %s (err=%s)!" % (modelfile, str(e)))
			return -1

		if not self.model or self.model is None:
			logger.error("Model object is None, loading failed!")
			return -1

		
		#===========================
		#==   SET LOSS & METRICS
		#===========================	
		self.model.compile(optimizer=self.optimizer, loss=self.loss_type, metrics=['accuracy', f1score_metric, precision_metric, recall_metric], run_eagerly=True)
		
		# - Print and draw model
		self.model.summary()
		plot_model(self.model,to_file='model.png',show_shapes=True)

		return 0
Пример #12
0
    def run(self, img_group_1, img_group_2):
        """ Compute spectral index """

        # - Read image data
        if self.__read_imgs() < 0:
            logger.error("Failed to read input imgs!")
            return -1

        # - Check data integrity
        good_data = self.__has_good_data(check_mask=True,
                                         check_bad=True,
                                         check_neg=True,
                                         check_same=True)

        if not good_data:
            logger.warn("Source data selected as bad, skip this source...")
            return -1

        # - Compute spectral index
        if self.__compute_spectral_index(img_group_1, img_group_2) < 0:
            logger.error("Failed to compute spectral index (see logs)!")
            return -1

        # - Fill dict data
        self.__fill_data()

        return 0
Пример #13
0
	def subtract_bkg(self, bkgs, subtract_only_refch=False):
		""" Subtract image background """

		if len(bkgs)!=self.nchannels:
			logger.error("Number of input bkgs != nchannels, cannot subtract bkg!")	
			return -1

		# - Subtract bkg
		if subtract_only_refch:
			data= self.img_data[self.refch]
			mask= self.img_data_mask[self.refch]
			bkg= bkgs[self.refch]
			self.img_data[self.refch]-= bkg 
			self.img_data[self.refch][mask==0]= 0

		else:
			for i in range(self.nchannels):
				data= self.img_data[i]
				mask= self.img_data_mask[i]
				bkg= bkgs[i]
				self.img_data[i]-= bkg 
				self.img_data[i][mask==0]= 0

		# - Draw data & masks?
		if self.draw:
			fig, axs = plt.subplots(2, self.nchannels)
			for i in range(self.nchannels):			
				axs[0, i].imshow(self.img_data[i])
				axs[1, i].imshow(self.img_data_mask[i])
				
			plt.show()
	
		return 0
Пример #14
0
	def read_regions(self):
		""" Read regions """

		# - Read regions
		logger.info("[PROC %d] Reading DS9 region file %s ..." % (procId, self.regionfile))
		ret= Utils.read_regions([self.regionfile])
		if ret is None:
			logger.error("[PROC %d] Failed to read regions (check format)!" % (procId))
			return -1
	
		regs= ret[0]
		snames= ret[1]
		slabels= ret[2]

		# - Select region by tag
		regs_sel= regs
		snames_sel= snames
		slabels_sel= slabels
		if self.filter_regions_by_tags and self.tags:
			logger.info("[PROC %d] Selecting DS9 region with desired tags ..." % (procId))
			regs_sel, snames_sel, slabels_sel= Utils.select_regions(regs, self.tags)
		
		if not regs_sel:
			logger.warn("[PROC %d] No region left for processing (check input region file)!" % (procId))
			return -1

		self.sname_label_map= {}
		for i in range(len(snames_sel)):
			sname= snames_sel[i]
			slabel= slabels_sel[i]
			self.sname_label_map[sname]= slabel

		print("sname_label_map")
		print(self.sname_label_map)

		# - Compute centroids & radius
		centroids, radii= Utils.compute_region_info(regs_sel)

		# - Assign sources to each processor
		self.nsources= len(regs_sel)
		source_indices= list(range(0, self.nsources))
		source_indices_split= np.array_split(source_indices, nproc)
		source_indices_proc= list(source_indices_split[procId])
		self.nsources_proc= len(source_indices_proc)
		imin= source_indices_proc[0]
		imax= source_indices_proc[self.nsources_proc-1]
	
		self.snames_proc= snames_sel[imin:imax+1]
		self.slabels_proc= slabels_sel[imin:imax+1]
		self.regions_proc= regs_sel[imin:imax+1]
		self.centroids_proc= centroids[imin:imax+1]
		self.radii_proc= radii[imin:imax+1]
		logger.info("[PROC %d] #%d sources assigned to this processor ..." % (procId, self.nsources_proc))
	
		print("snames_proc %d" % (procId))
		print(self.snames_proc)
	
		return 0	
Пример #15
0
	def run_ae_reconstruction(self, datalist):
		""" Run AE reconstruction """

		if procId==MASTER:
			aereco_status= 0

			# - Create data loader
			dl= DataLoader(filename=datalist)

			# - Read datalist	
			logger.info("[PROC %d] Reading datalist %s ..." % (procId, datalist))
			dataread_status= dl.read_datalist()

			if dataread_status<0:
				logger.error("[PROC %d] Failed to read input datalist %s" % (procId, datalist))
				aereco_status= -1

			else:
				# - Run AE reco
				logger.info("[PROC %d] Running autoencoder classifier reconstruction ..." % (procId))
				ae= FeatExtractorAE(dl)
				ae.resize= self.resize_img
				ae.set_image_size(self.nx, self.ny)
				ae.normalize= self.normalize_img
				ae.scale_to_abs_max= self.scale_img_to_abs_max
				ae.scale_to_max= self.scale_img_to_max
				ae.log_transform_img= self.log_transform_img
				ae.scale_img= self.scale_img
				ae.scale_img_factors= self.scale_img_factors
				ae.standardize_img= self.standardize_img
				ae.img_means= self.img_means
				ae.img_sigmas= self.img_sigmas
				ae.chan_divide= self.img_chan_divide
				ae.chan_mins= self.img_chan_mins
				ae.erode= self.img_erode
				ae.erode_kernel= self.img_erode_kernel
				ae.add_channorm_layer= self.add_channorm_layer

				aereco_status= ae.reconstruct_data(
					self.modelfile_encoder, self.weightfile_encoder, 
					self.modelfile_decoder, self.weightfile_decoder,
					winsize= self.winsize,
					outfile_metrics=self.outfile_aerecometrics,
					save_imgs= False
				)

		else:
			aereco_status= 0

		if comm is not None:
			aereco_status= comm.bcast(aereco_status, root=MASTER)

		if aereco_status<0:
			logger.error("[PROC %d] Failed to run autoencoder reconstruction on data %s, exit!" % (procId, datalist))
			return -1

		return 0
Пример #16
0
    def __merge_data(self, dlist):
        """ Merge feature data """

        # - Check input list
        if not dlist:
            logger.error("Empty data dict list given!")
            return -1

        for i in range(len(dlist)):
            d = dlist[i]
            if not d:
                logger.error("Data dict %d is empty!" % (i + 1))
                return -1

        # - Compute number of vars
        nvars_tot = 0
        for d in dlist:
            print("d")
            print(d)
            nentries = len(d.keys())
            firstitem = next(iter(d.items()))
            nvars = len(firstitem[1].keys()) - 2
            nvars_tot += nvars
            logger.info("Data dict has #%d entries (#%d vars) ..." %
                        (nentries, nvars))

        logger.info("Merged set is expected to have %d vars ..." % (nvars_tot))

        # - Merge features
        logger.info("Merging feature data for input data dict ...")

        dmerged = collections.OrderedDict()

        for d in dlist:
            for key, value in d.items():
                if key not in dmerged:
                    dmerged[key] = collections.OrderedDict({})
                dmerged[key].update(value)
                dmerged[key].move_to_end("id")

        # - Remove rows with less number of entries
        logger.info("Removing rows with number of vars !=%d ..." % (nvars_tot))

        self.par_dict_list = []
        for key, value in dmerged.items():
            nvars = len(value.keys()) - 2
            if nvars != nvars_tot:
                logger.info(
                    "Removing entry (%s) as number of vars (%d) is !=%d ..." %
                    (key, nvars, nvars_tot))
                #del dmerged[key]
                continue
            self.par_dict_list.append(value)

        return 0
Пример #17
0
    def __predict(self):

        #====================================================
        #==   CHECK DATA & MODEL
        #====================================================
        # - Check if data are set
        if self.data is None:
            logger.error("Input data array is None!")
            return -1

        # - Check if clustering model is set
        if self.clusterer is None:
            logger.error("Clusterer is not set!")
            return -1

        # - Retrieve prediction data from current model
        logger.info(
            "Retrieving prediction data from current model (if any) ...")
        self.prediction_data = self.clusterer.prediction_data_

        #====================================================
        #==   CLUSTER DATA USING SAVED MODEL
        #====================================================
        logger.info("Encode input data using loaded model ...")
        self.labels, self.probs = hdbscan.approximate_predict(
            self.clusterer, self.data)

        #================================
        #==   SAVE CLUSTERED DATA
        #================================
        logger.info("Saving unsupervised encoded data to file ...")
        N = self.data.shape[0]
        print("Cluster data N=", N)

        snames = np.array(self.source_names).reshape(N, 1)
        objids = np.array(self.data_classids).reshape(N, 1)
        clustered_data = np.concatenate(
            (snames, objids, self.labels, self.probs), axis=1)

        head = "# sname id clustid clustprob"
        Utils.write_ascii(clustered_data, self.outfile, head)

        #================================
        #==   PLOT
        #================================
        logger.info("Plotting results ...")
        self.__plot_predict(self.clusterer, self.data, self.labels,
                            self.source_names, self.data_labels,
                            self.prediction_data, self.prediction_extra_data,
                            self.outfile_plot)

        return 0
Пример #18
0
    def set_data_from_file(self, filename):
        """ Set data from input file. Expected format: sname, N features, classid """

        # - Read table
        row_start = 0
        try:
            table = ascii.read(filename, data_start=row_start)
        except:
            logger.error("Failed to read feature file %s!" % filename)
            return -1

        print(table.colnames)
        print(table)

        ncols = len(table.colnames)
        nfeat = ncols - 2

        # - Set data vectors
        rowIndex = 0
        self.data_labels = []
        self.data_classids = []
        self.source_names = []
        featdata = []

        for data in table:
            sname = data[0]
            classid = data[ncols - 1]
            label = self.classid_label_map[classid]

            self.source_names.append(sname)
            self.data_labels.append(label)
            self.data_classids.append(classid)
            featdata_curr = []
            for k in range(nfeat):
                featdata_curr.append(data[k + 1])
            featdata.append(featdata_curr)

        self.data = np.array(featdata)
        if self.data.size == 0:
            logger.error("Empty feature data vector read!")
            return -1

        self.nsamples = data_shape[0]
        self.nfeatures = data_shape[1]
        logger.info("#nsamples=%d" % (self.nsamples))

        # - Set pre-classified data
        logger.info("Setting pre-classified data (if any) ...")
        self.__set_preclass_data()

        return 0
Пример #19
0
	def __read_sdata(self, index):
		""" Read source data """

		# - Check index
		if index<0 or index>=self.datasize:
			logger.error("Invalid index %d given!" % index)
			return None

		# - Init sdata
		sdata= SData()
		sdata.refch= self.refch
		sdata.kernsize= self.kernsize
		sdata.draw= self.draw
		sdata.save_ssim_pars= self.save_ssim_pars
		sdata.negative_pix_fract_thr= self.negative_pix_fract_thr
		sdata.bad_pix_fract_thr= self.bad_pix_fract_thr
		
		sdata_mask= SData()
		sdata_mask.refch= self.refch
		sdata_mask.kernsize= self.kernsize
		sdata_mask.draw= self.draw
		sdata_mask.save_ssim_pars= self.save_ssim_pars
		sdata_mask.negative_pix_fract_thr= self.negative_pix_fract_thr
		sdata_mask.bad_pix_fract_thr= self.bad_pix_fract_thr

		# - Read source image data
		logger.debug("Reading source image data %d ..." % index)
		#d= self.datalist["data"][index]
		d= self.datalist[index]
		if sdata.set_from_dict(d)<0:
			logger.error("Failed to set source image data %d!" % index)
			return None

		if sdata.read_imgs()<0:
			logger.error("Failed to read source images %d!" % index)
			return None

		# - Read source masked image data
		logger.debug("Reading source masked image data %d ..." % index)
		#d= self.datalist_mask["data"][index]
		d= self.datalist_mask[index]
		
		if sdata_mask.set_from_dict(d)<0:
			logger.error("Failed to set source masked image data %d!" % index)
			return None

		if sdata_mask.read_imgs()<0:
			logger.error("Failed to read source masked images %d!" % index)
			return None

		return sdata, sdata_mask
Пример #20
0
    def __create_pipeline(self):
        """ Build the feature selector pipeline """

        # - Create classifier inventory
        logger.info("Creating classifier inventory ...")
        self.__create_classifier_inventory()

        # - Set min/max nfeat range
        nf_min = self.nfeat_min
        nf_max = self.nfeat_max
        if nf_max == -1:
            nf_max = self.nfeatures

        self.nfeats = []
        for i in range(nf_min, nf_max + 1):
            self.nfeats.append(i)

        # - Create models
        self.model = self.__create_model()
        if self.model is None:
            logger.error("Created model is None!")
            return -1

        for i in range(len(self.nfeats)):
            m = self.__create_model()
            self.models.append(m)

        # - Define dataset split (unique for all models)
        self.cv = StratifiedKFold(n_splits=self.cv_nsplits,
                                  shuffle=True,
                                  random_state=self.cv_seed)

        # - Create RFE & pipeline
        self.rfe = RFECV(
            estimator=self.model,
            step=1,
            #cv=self.cv,
            min_features_to_select=self.nfeat_min)
        self.pipeline = Pipeline(steps=[('featsel',
                                         self.rfe), ('model', self.model)])

        for i in range(len(self.nfeats)):
            n = self.nfeats[i]
            r = RFE(
                estimator=self.models[i],
                #cv=self.cv,
                n_features_to_select=n)
            p = Pipeline(steps=[('featsel', r), ('model', self.models[i])])
            self.pipelines.append(p)

        return 0
Пример #21
0
    def run_from_dictlist(self, dlist, outfile='featdata_merged.dat'):
        """ Run feature merger """

        # - Read feature data and merge
        logger.info("Merging input feature data dicts ...")
        if self.__merge_data(dlist) < 0:
            logger.error("Failed to merge data!")
            return -1

        # - Save data
        logger.info("Saving merged data to file %s ..." % (outfile))
        self.__save(outfile)

        return 0
Пример #22
0
    def run_predict(self, datafile, modelfile):
        """ Run predict using input dataset """

        #================================
        #==   LOAD DATA
        #================================
        # - Check inputs
        if datafile == "":
            logger.error("Empty data file specified!")
            return -1

        if self.set_data_from_file(datafile) < 0:
            logger.error("Failed to read datafile %s!" % datafile)
            return -1

        #================================
        #==   LOAD MODEL
        #================================
        logger.info("Loading the UMAP reducer from file %s ..." % modelfile)
        try:
            self.reducer = pickle.load((open(modelfile, 'rb')))
        except Exception as e:
            logger.error("Failed to load model from file %s!" % (modelfile))
            return -1

        #================================
        #==   PREDICT
        #================================
        if self.__predict() < 0:
            logger.error("Predict failed!")
            return -1

        return 0
Пример #23
0
	def classify_sources(self):
		""" Run source classification """

		# - Run source classification
		if procId==MASTER:
			sclass_status= 0
			
			# - Define sclassifier class
			multiclass= True
			if self.binary_class:
				multiclass= False

			sclass= SClassifier(multiclass=multiclass)
			sclass.normalize= self.normalize_feat
			sclass.outfile= self.outfile_sclass
			sclass.outfile_metrics= self.outfile_sclass_metrics
			sclass.outfile_cm= self.outfile_sclass_cm
			sclass.outfile_cm_norm= self.outfile_sclass_cm_norm
			sclass.save_labels= self.save_class_labels

			sclass.find_outliers= self.find_outliers
			sclass.outlier_modelfile= self.modelfile_outlier
			sclass.outlier_thr= self.anomaly_thr
			sclass.outlier_max_samples= self.max_samples
			sclass.outlier_max_features= self.max_features
			sclass.save_outlier= self.save_outlier
			sclass.outlier_outfile= self.outfile_outlier
	
			# - Run classification
			sclass_status= sclass.run_predict(
				#data=self.feat_colors, class_ids=self.feat_colors_classids, snames=self.feat_colors_snames,
				data=self.feat_all, class_ids=self.feat_all_classids, snames=self.feat_all_snames,
				modelfile=self.modelfile, 
				scalerfile=self.scalerfile
			)
	
			if sclass_status<0:		
				logger.error("[PROC %d] Failed to run classifier on data %s!" % (procId, featfile_allfeat))

		else:
			sclass_status= 0

		if comm is not None:
			sclass_status= comm.bcast(sclass_status, root=MASTER)

		if sclass_status<0:
			logger.error("[PROC %d] Failed to run classifier on data %s, exit!" % (procId, featfile_allfeat))
			return -1

		return 0
Пример #24
0
    def run(self, datafile):
        """ Run feature selection """

        #================================
        #==   LOAD DATA
        #================================
        # - Check inputs
        if datafile == "":
            logger.error("Empty data file specified!")
            return -1

        if self.set_data_from_file(datafile) < 0:
            logger.error("Failed to read datafile %s!" % datafile)
            return -1

        #================================
        #==   EVALUATE MODELS
        #================================
        logger.info("Evaluating models ...")
        if self.__evaluate_model() < 0:
            logger.error("Failed to evaluate models!")
            return -1

        #================================
        #==   SAVE
        #================================
        logger.info("Saving results ...")
        if self.__save() < 0:
            logger.error("Failed to save results!")
            return -1

        return 0
Пример #25
0
    def run(self, data, class_ids=[], snames=[]):
        """ Run feature selection using input dataset """

        #================================
        #==   LOAD DATA
        #================================
        # - Check inputs
        if data is None:
            logger.error("None input data specified!")
            return -1

        if self.set_data(data, class_ids, snames) < 0:
            logger.error("Failed to read datafile %s!" % datafile)
            return -1

        #================================
        #==   EVALUATE MODELS
        #================================
        logger.info("Evaluating models ...")
        if self.__evaluate_model() < 0:
            logger.error("Failed to evaluate models!")
            return -1

        #================================
        #==   SAVE
        #================================
        logger.info("Saving results ...")
        if self.__save() < 0:
            logger.error("Failed to save results!")
            return -1

        return 0
Пример #26
0
    def run_predict(self, data, class_ids=[], snames=[], modelfile=''):
        """ Run precit using input dataset """

        #================================
        #==   LOAD DATA
        #================================
        # - Check inputs
        if data is None:
            logger.error("None input data specified!")
            return -1

        if self.set_data(data, class_ids, snames) < 0:
            logger.error("Failed to read datafile %s!" % datafile)
            return -1

        #================================
        #==   LOAD MODEL
        #================================
        logger.info("Loading the clustering model from file %s ..." %
                    modelfile)
        try:
            self.clusterer, self.prediction_extra_data = pickle.load(
                (open(modelfile, 'rb')))
        except Exception as e:
            logger.error("Failed to load model from file %s!" % (modelfile))
            return -1

        #================================
        #==   PREDICT
        #================================
        if self.__predict() < 0:
            logger.error("Predict failed!")
            return -1

        return 0
Пример #27
0
    def select(self, data, selcols, class_ids=[], snames=[]):
        """ Select data columns provided in selcols list """

        #================================
        #==   LOAD DATA
        #================================
        # - Check inputs
        if data is None:
            logger.error("None input data specified!")
            return -1

        if self.set_data(data, class_ids, snames) < 0:
            logger.error("Failed to read datafile %s!" % datafile)
            return -1

        #================================
        #==   SELECT COLUMNS
        #================================
        logger.info("Extracting columns ...")
        if self.__select_cols(selcols) < 0:
            logger.error("Failed to select data columns!")
            return -1

        #================================
        #==   SAVE
        #================================
        logger.info("Saving results ...")
        if self.__save() < 0:
            logger.error("Failed to save results!")
            return -1

        return 0
Пример #28
0
    def select_from_file(self, datafile, selcols):
        """ Select data columns provided in selcols list """

        #================================
        #==   LOAD DATA
        #================================
        # - Check inputs
        if datafile == "":
            logger.error("Empty data file specified!")
            return -1

        if self.set_data_from_file(datafile) < 0:
            logger.error("Failed to read datafile %s!" % datafile)
            return -1

        #================================
        #==   SELECT COLUMNS
        #================================
        logger.info("Extracting columns ...")
        if self.__select_cols(selcols) < 0:
            logger.error("Failed to select data columns!")
            return -1

        #================================
        #==   SAVE
        #================================
        logger.info("Saving results ...")
        if self.__save() < 0:
            logger.error("Failed to save results!")
            return -1

        return 0
Пример #29
0
	def shrink_masks(self, kernsizes=[]):
		""" Shrink masks """

		# - Set erosion kernel sizes
		if not kernsizes or len(kernsizes)!=self.nchannels:
			logger.info("kernsizes not specified, setting kernsize=%d for all channels ..." % (self.kernsize))
			kernsizes= [self.kernsize]*self.nchannels

		print("--> kernsizes")
		print(kernsizes)

		# - Erode masks
		if self.draw:
			fig, axs = plt.subplots(4, self.nchannels)

		try:
			counter= 0
			for i in range(self.nchannels):
				data= np.copy(self.img_data[i])
				mask= np.copy(self.img_data_mask[i])
				
				# - Do erosion if kernsize is >0
				if kernsizes[i]>0:
					structel= cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (kernsizes[i],kernsizes[i]))					
					mask_eroded = cv2.erode(mask, structel, iterations = 1)
					self.img_data_mask[i]= mask_eroded
					data_eroded= self.img_data[i]
					data_eroded[mask_eroded==0]= 0
					self.img_data[i]= data_eroded

				if self.draw:
					axs[0, i].imshow(data)
					counter+= 1
					axs[1, i].imshow(mask)
					counter+= 1
					axs[2, i].imshow(self.img_data[i])
					counter+= 1
					axs[3, i].imshow(self.img_data_mask[i])
					counter+= 1					

		except Exception as e:		
			logger.error("Failed to shrink masks (err=%s)!" % (str(e)))
			return -1	

		if self.draw:
			plt.show()

		return 0
Пример #30
0
    def __predict(self):

        #====================================================
        #==   CHECK DATA & MODEL
        #====================================================
        # - Check if data are set
        if self.data is None:
            logger.error("Input data array is None!")
            return -1

        # - Check if reducer is set
        if self.reducer is None:
            logger.error("UMAP reducer is not set!")
            return -1

        #====================================================
        #==   ENCODE DATA
        #====================================================
        logger.info("Encode input data using loaded model ...")
        self.encoded_data_unsupervised = self.reducer.transform(self.data)

        #================================
        #==   SAVE ENCODED DATA
        #================================
        # - Unsupervised encoded data
        logger.info("Saving unsupervised encoded data to file ...")
        N = self.encoded_data_unsupervised.shape[0]
        print("Unsupervised encoded data shape=",
              self.encoded_data_unsupervised.shape)
        print("Unsupervised encoded data N=", N)

        snames = np.array(self.source_names).reshape(N, 1)
        objids = np.array(self.data_ids).reshape(N, 1)

        # - Save unsupervised encoded data
        enc_data = np.concatenate(
            (snames, self.encoded_data_unsupervised, objids), axis=1)

        znames_counter = list(range(1, self.encoded_data_dim + 1))
        znames = '{}{}'.format('z',
                               ' z'.join(str(item) for item in znames_counter))
        head = '{} {} {}'.format("# sname", znames, "id")

        Utils.write_ascii(enc_data, self.outfile_encoded_data_unsupervised,
                          head)

        return 0