def test_statistics_gui_roi_spectrum(specviz_gui): # Ensure that the test is run on an unmodified workspace instance workspace = new_workspace(specviz_gui) hub = Hub(workspace=workspace) # Make region of interest cutout, using default cutout at .3 from the # middle in either direction specviz_gui.current_workspace.current_plot_window.plot_widget._on_add_linear_region( ) # Simulate cutout for truth data spectrum = extract_region(hub.plot_item._data_item.spectrum, SpectralRegion(*hub.selected_region_bounds)) # pull out stats dictionary stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats # Generate truth comparisons truth_dict = { 'mean': spectrum.flux.mean(), 'median': np.median(spectrum.flux), 'stddev': spectrum.flux.std(), 'centroid': centroid(spectrum, region=None), 'snr': "N/A", 'fwhm': fwhm(spectrum), 'ew': equivalent_width(spectrum), 'total': line_flux(spectrum), 'maxval': spectrum.flux.max(), 'minval': spectrum.flux.min() } # compare! assert stats_dict == truth_dict workspace.close()
def test_statistics_gui_full_spectrum(specviz_gui): # Ensure that the test is run on an unmodified workspace instance workspace = new_workspace(specviz_gui) hub = Hub(workspace=workspace) # pull out stats dictionary stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats # Generate truth comparisons spectrum = hub.plot_item._data_item.spectrum truth_dict = { 'mean': spectrum.flux.mean(), 'median': np.median(spectrum.flux), 'stddev': spectrum.flux.std(), 'centroid': centroid(spectrum, region=None), 'snr': "N/A", 'fwhm': fwhm(spectrum), 'ew': equivalent_width(spectrum), 'total': line_flux(spectrum), 'maxval': spectrum.flux.max(), 'minval': spectrum.flux.min() } # compare! assert stats_dict == truth_dict workspace.close()
def compute_stats(spectrum): """ Compute basic statistics for a spectral region. Parameters ---------- spectrum : `~specutils.spectra.spectrum1d.Spectrum1D` region: `~specutils.utils.SpectralRegion` """ flux = spectrum.flux mean = flux.mean() rms = np.sqrt(flux.dot(flux) / len(flux)) try: snr_val = snr(spectrum) except Exception as e: snr_val = "N/A" return { 'mean': mean, 'median': np.median(flux), 'stddev': flux.std(), 'centroid': centroid(spectrum, region=None ), # we may want to adjust this for continuum subtraction 'rms': rms, 'snr': snr_val, 'fwhm': fwhm(spectrum), 'ew': equivalent_width(spectrum), 'total': line_flux(spectrum), 'maxval': flux.max(), 'minval': flux.min() }
def test_statistics_gui_roi_spectrum(specviz_gui): # Ensure that the test is run on an unmodified workspace instance workspace = new_workspace(specviz_gui) hub = Hub(workspace=workspace) # Make region of interest cutout, using default cutout at .3 from the # middle in either direction specviz_gui.current_workspace.current_plot_window.plot_widget._on_add_linear_region() # Simulate cutout for truth data spectrum = extract_region(hub.plot_item._data_item.spectrum, SpectralRegion(*hub.selected_region_bounds)) # pull out stats dictionary stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats # Generate truth comparisons truth_dict = {'mean': spectrum.flux.mean(), 'median': np.median(spectrum.flux), 'stddev': spectrum.flux.std(), 'centroid': centroid(spectrum, region=None), 'snr': "N/A", 'fwhm': fwhm(spectrum), 'ew': equivalent_width(spectrum), 'total': line_flux(spectrum), 'maxval': spectrum.flux.max(), 'minval': spectrum.flux.min()} # compare! assert stats_dict == truth_dict workspace.close()
def test_statistics_gui_full_spectrum(specviz_gui): # Ensure that the test is run on an unmodified workspace instance workspace = new_workspace(specviz_gui) hub = Hub(workspace=workspace) # pull out stats dictionary stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats # Generate truth comparisons spectrum = hub.plot_item._data_item.spectrum truth_dict = {'mean': spectrum.flux.mean(), 'median': np.median(spectrum.flux), 'stddev': spectrum.flux.std(), 'centroid': centroid(spectrum, region=None), 'snr': "N/A", 'fwhm': fwhm(spectrum), 'ew': equivalent_width(spectrum), 'total': line_flux(spectrum), 'maxval': spectrum.flux.max(), 'minval': spectrum.flux.min()} # compare! assert stats_dict == truth_dict workspace.close()
def line(spec,wave1,wave2): # finding the centorid and deriving guesses parameters centre=centroid(spec, SpectralRegion(wave1*u.AA, wave2*u.AA)) centre=float(centre/(1. * u.AA)) FWHM=fwhm(spec) FWHM=float(FWHM/(1. * u.AA)) A=line_flux(spec, SpectralRegion(lamb1*u.AA, lamb2*u.AA)) a=1* u.Unit('J cm-2 s-1 AA-1') A=float(A/(1. * u.AA*a)) # PARAMETERS return [centre,A,FWHM]
def compute_stats(spectrum): """ Compute basic statistics for a spectral region. Parameters ---------- spectrum : `~specutils.spectra.spectrum1d.Spectrum1D` region: `~specutils.utils.SpectralRegion` """ try: cent = centroid(spectrum, region=None) # we may want to adjust this for continuum subtraction except Exception as e: logging.debug(e) cent = "Error" try: snr_val = snr(spectrum) except Exception as e: logging.debug(e) snr_val = "N/A" try: fwhm_val = fwhm(spectrum) except Exception as e: logging.debug(e) fwhm_val = "Error" try: ew = equivalent_width(spectrum) except Exception as e: logging.debug(e) ew = "Error" try: total = line_flux(spectrum) except Exception as e: logging.debug(e) total = "Error" return {'mean': spectrum.flux.mean(), 'median': np.median(spectrum.flux), 'stddev': spectrum.flux.std(), 'centroid': cent, 'snr': snr_val, 'fwhm': fwhm_val, 'ew': ew, 'total': total, 'maxval': spectrum.flux.max(), 'minval': spectrum.flux.min()}
def Load_Files(file_1, file_2, N_sample, objts, classification=False): print('INFO:') #hdul = fitsio.FITS(file_1) # Open file 1 -- 'truth_DR12Q.fits' #info=hdul.info() # File info hdul = fits.open(file_1, mode='denywrite') #data=hdul[1].read() # Database of spectra with human-expert classifications data = hdul[1].data #print('The file {} have {} objects. \n'.format(file_1,data.shape[0])) print('INFO:') # Reading data from data_dr12.fits. This file had the spectra from data dr12. #hdul_2 = fitsio.FITS(file_2) # Open file 2 -- 'data_dr12.fits' #info2=hdul_2.info() # File info #data2=hdul_2[1].read() # Database of spectra #spectra=hdul_2[0].read() # Spectrum of each object hdul_2 = fits.open(file_2, mode='denywrite') data2 = hdul_2[1].data # Database of spectra spectra = hdul_2[0].data # Spectrum of each object #print('The file {} have {} spectra. \n'.format(file_2,spectra.shape[0])) # Subset of PLATE parameters of both data data_PLATE_1 = data['PLATE'] data_PLATE_2 = data2['PLATE'] # Subset of MJD parameters of both data data_MJD_1 = data['MJD'] data_MJD_2 = data2['MJD'] # Subset of FIBERID parameters of both data data_FIBERID_1 = data['FIBERID'] data_FIBERID_2 = data2['FIBERID'] data_ID_1 = data['THING_ID'] data_ID_2 = data2['TARGETID'] objts = np.asarray(objts) # The column 'CLASS_PERSON' have a class identifier for each spectrum: STARS=1, GALAXY=4, QSO=3 and QSO_BAL=30. C_P = data['CLASS_PERSON'] #Class Person column STAR = C_P[C_P == 1] # objects classified as stars GALAXY = C_P[C_P == 4] # objects classified as galaxies QSO = C_P[C_P == 3] # objects classified as QSO (Quasars) QSO_BAL = C_P[ C_P == 30] # objects classified as QSO BAL (Quasars with Broad Absortions Lines) N_C = C_P[C_P != 30] N_C = N_C[N_C != 3] N_C = N_C[N_C != 1] N_C = N_C[N_C != 4] # objects wrong classified print('INFO: There is available') print('-->Star:', STAR.shape[0]) print('-->Galaxy:', GALAXY.shape[0]) print('-->QSO:', QSO.shape[0]) print('-->QSO BAL:', QSO_BAL.shape[0]) print('-->NN: {}\n'.format(N_C.shape[0])) # I create two DataFrame for Superset_DR12Q and data_dr12 with only three parameters data1 = { 'PLATE': data_PLATE_1, 'MJD': data_MJD_1, 'FIBERID': data_FIBERID_1, 'ID': data_ID_1 } data1 = pd.DataFrame(data=data1) data2 = { 'PLATE': data_PLATE_2, 'MJD': data_MJD_2, 'FIBERID': data_FIBERID_2, 'ID': data_ID_2 } data2 = pd.DataFrame(data=data2) # I convert all objects in both set to string chain in orden to combine them as one new ID. data1['PLATE'] = data1['PLATE'].astype(str) data1['MJD'] = data1['MJD'].astype(str) data1['FIBERID'] = data1['FIBERID'].astype(str) data1['PM'] = data1['MJD'].str.cat(data1['FIBERID'], sep="-") data1['NEWID'] = data1['PLATE'].str.cat(data1['PM'], sep="-") data_1 = data1.drop(columns=['PLATE', 'MJD', 'FIBERID', 'ID', 'PM']).values data2['PLATE'] = data2['PLATE'].astype(str) data2['MJD'] = data2['MJD'].astype(str) data2['FIBERID'] = data2['FIBERID'].astype(str) data2['PM'] = data2['MJD'].str.cat(data2['FIBERID'], sep="-") data2['NEWID'] = data2['PLATE'].str.cat(data2['PM'], sep="-") data_2 = data2.drop(columns=['PLATE', 'MJD', 'FIBERID', 'ID', 'PM'] ).values # New set of database 2 with new ID's # With the routine of numpy intersect1d, I find the intersections elements in both sets. This elements data_CO = np.array(np.intersect1d(data_1, data_2, return_indices=True)) data_CO_objects = data_CO[ 0] # The unique new ID of each element in both sets data_CO_ind1 = data_CO[ 1] # Indices of intersected elements from the original data 1 (Superset_DR12Q.fits) data_CO_ind2 = data_CO[ 2] # Indices of intersected elements form the original data 2 (data_dr12.fits) print('INFO:') print('I find {} objects with spectra from DR12 \n'.format( len(data_CO_objects))) indi = {'ind1': data_CO_ind1, 'ind2': data_CO_ind2} ind = pd.DataFrame(data=indi, index=data_CO_ind1) cp = np.array(data['CLASS_PERSON'], dtype=float) z = np.array(data['Z_VI'], dtype=float) zc = np.array(data['Z_CONF_PERSON'], dtype=float) bal = np.array(data['BAL_FLAG_VI'], dtype=float) bi = np.array(data['BI_CIV'], dtype=float) d = { 'CLASS_PERSON': cp, 'Z_VI': z, 'Z_CONF_PERSON': zc, 'BAL_FLAG_VI': bal, 'BI_CIV': bi } data_0 = pd.DataFrame(data=d) obj = data_0.loc[data_CO_ind1] if (classification != True): if (objts[0] == 'QSO'): qsos = obj.loc[obj['CLASS_PERSON'] == 3] qsos = qsos.loc[qsos['Z_CONF_PERSON'] == 3] sample_objects = qsos.sample(n=int(N_sample), weights='CLASS_PERSON', random_state=5) indi = np.array(sample_objects.index) indi1 = ind.loc[indi].values elif (objts[0] == 'QSO_BAL'): qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30] qsos_bal = qsos_bal.loc[qsos_bal['Z_CONF_PERSON'] == 3] sample_objects = qsos_bal.sample(n=int(N_sample), weights='CLASS_PERSON', random_state=5) indi = np.array(sample_objects.index) indi1 = ind.loc[indi].values elif (len(objts) == 2): qsos = obj.loc[obj['CLASS_PERSON'] == 3] qsos = qsos.loc[qsos['Z_CONF_PERSON'] == 3] qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30] qsos_bal = qsos_bal.loc[qsos_bal['Z_CONF_PERSON'] == 3] sample_qso = qsos.sample(n=int(N_sample / 2), weights='CLASS_PERSON', random_state=5) sample_qso_bal = qsos_bal.sample(n=int(N_sample / 2), weights='CLASS_PERSON', random_state=5) sample_objects = pd.concat([sample_qso, sample_qso_bal]) ind_qso = np.array(sample_qso.index) ind_qso_bal = np.array(sample_qso_bal.index) indi = np.concatenate((ind_qso, ind_qso_bal), axis=None) indi1 = ind.loc[indi].values spectra_ = np.zeros((N_sample, 886)) j = 0 kernel_size = 5 flux_threshold = 1.1 parameters = np.zeros( (N_sample, 7) ) #Number of lines // FHWM of max emission line // EW of max emission line // Spectrum Mean // Spectrum STDV // Spectrum Flux Integral // Spectrum SNR for i in indi: k = indi1[j, 1] x = np.linspace(3600, 10500, 443) zero_spectrum = spectra[k, :443] spectrum = Spectrum1D(flux=zero_spectrum * u.Jy, spectral_axis=x * u.AA) #Continuum fit and gaussian smooth g1_fit = fit_generic_continuum(spectrum) y_continuum_fitted = g1_fit(x * u.AA) spec_nw_2 = spectrum / y_continuum_fitted spectrum_smooth = gaussian_smooth(spec_nw_2, kernel_size) #Number of lines lines_1 = find_lines_derivative(spectrum_smooth, flux_threshold=flux_threshold) l = lines_1[lines_1['line_type'] == 'emission'] number_lines = l['line_center_index'].shape[0] parameters[j, 0] = number_lines #FWHM parameters[j, 1] = fwhm(spectrum_smooth).value #EW parameters[j, 2] = equivalent_width(spectrum_smooth).value #Spectrum Mean parameters[j, 3] = np.mean(spectrum_smooth.flux) #Spectrum STDV parameters[j, 4] = np.std(spectrum_smooth.flux) #Spectrum Flux Integral parameters[j, 5] = line_flux(spectrum_smooth).value #Spectrum SNR parameters[j, 6] = snr_derived(spectrum_smooth).value j += 1 d = { 'Lines_Number': parameters[:, 0], 'FHWM': parameters[:, 1], 'EW': parameters[:, 2], 'Mean': parameters[:, 3], 'STDV': parameters[:, 4], 'STDV': parameters[:, 4], 'Spectrum_Flux': parameters[:, 5], 'SNR': parameters[:, 6] } parameters = pd.DataFrame(data=d) #X=spectra_.values #mean_flx= np.ma.average(X[:,:443],axis=1) #ll=(X[:,:443]-mean_flx.reshape(-1,1))**2 #aveflux=np.ma.average(ll, axis=1) #sflux = np.sqrt(aveflux) #X = (X[:,:443]-mean_flx.reshape(-1,1))/sflux.reshape(-1,1) y = sample_objects['Z_VI'] y = np.array(y, dtype=float) #y_max=np.max(y) #y=y/y_max return parameters, y stars = obj.loc[obj['CLASS_PERSON'] == 1] galaxies = obj.loc[obj['CLASS_PERSON'] == 4] qsos = obj.loc[obj['CLASS_PERSON'] == 3] qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30] sample_star = stars.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_galaxy = galaxies.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_qso = qsos.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_qso_bal = qsos_bal.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_objects = pd.concat( [sample_star, sample_galaxy, sample_qso, sample_qso_bal]) ind_star = np.array(sample_star.index) ind_galaxy = np.array(sample_galaxy.index) ind_qso = np.array(sample_qso.index) ind_qso_bal = np.array(sample_qso_bal.index) indi = np.concatenate((ind_star, ind_galaxy, ind_qso, ind_qso_bal), axis=None) indi1 = ind.loc[indi].values spectra_ = np.zeros((N_sample, 886)) j = 0 for i in indi: k = indi1[j, 1] spectra_[j, :] = spectra[k, :] j = j + 1 spectra_ = pd.DataFrame(spectra_) X = spectra_.values #Renormalize spectra mean_flx = np.ma.average(X[:, :443], axis=1) ll = (X[:, :443] - mean_flx.reshape(-1, 1))**2 aveflux = np.ma.average(ll, axis=1) sflux = np.sqrt(aveflux) X = (X[:, :443] - mean_flx.reshape(-1, 1)) / sflux.reshape(-1, 1) y = sample_objects['CLASS_PERSON'] y = y.replace([1, 4, 3, 30], [0, 1, 2, 3]).values y = np.array(y, dtype=float) return X, y