def test_statistics_gui_full_spectrum(specviz_gui): # Ensure that the test is run on an unmodified workspace instance workspace = new_workspace(specviz_gui) hub = Hub(workspace=workspace) # pull out stats dictionary stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats # Generate truth comparisons spectrum = hub.plot_item._data_item.spectrum truth_dict = { 'mean': spectrum.flux.mean(), 'median': np.median(spectrum.flux), 'stddev': spectrum.flux.std(), 'centroid': centroid(spectrum, region=None), 'snr': "N/A", 'fwhm': fwhm(spectrum), 'ew': equivalent_width(spectrum), 'total': line_flux(spectrum), 'maxval': spectrum.flux.max(), 'minval': spectrum.flux.min() } # compare! assert stats_dict == truth_dict workspace.close()
def test_statistics_gui_roi_spectrum(specviz_gui): # Ensure that the test is run on an unmodified workspace instance workspace = new_workspace(specviz_gui) hub = Hub(workspace=workspace) # Make region of interest cutout, using default cutout at .3 from the # middle in either direction specviz_gui.current_workspace.current_plot_window.plot_widget._on_add_linear_region( ) # Simulate cutout for truth data spectrum = extract_region(hub.plot_item._data_item.spectrum, SpectralRegion(*hub.selected_region_bounds)) # pull out stats dictionary stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats # Generate truth comparisons truth_dict = { 'mean': spectrum.flux.mean(), 'median': np.median(spectrum.flux), 'stddev': spectrum.flux.std(), 'centroid': centroid(spectrum, region=None), 'snr': "N/A", 'fwhm': fwhm(spectrum), 'ew': equivalent_width(spectrum), 'total': line_flux(spectrum), 'maxval': spectrum.flux.max(), 'minval': spectrum.flux.min() } # compare! assert stats_dict == truth_dict workspace.close()
def test_statistics_gui_roi_spectrum(specviz_gui): # Ensure that the test is run on an unmodified workspace instance workspace = new_workspace(specviz_gui) hub = Hub(workspace=workspace) # Make region of interest cutout, using default cutout at .3 from the # middle in either direction specviz_gui.current_workspace.current_plot_window.plot_widget._on_add_linear_region() # Simulate cutout for truth data spectrum = extract_region(hub.plot_item._data_item.spectrum, SpectralRegion(*hub.selected_region_bounds)) # pull out stats dictionary stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats # Generate truth comparisons truth_dict = {'mean': spectrum.flux.mean(), 'median': np.median(spectrum.flux), 'stddev': spectrum.flux.std(), 'centroid': centroid(spectrum, region=None), 'snr': "N/A", 'fwhm': fwhm(spectrum), 'ew': equivalent_width(spectrum), 'total': line_flux(spectrum), 'maxval': spectrum.flux.max(), 'minval': spectrum.flux.min()} # compare! assert stats_dict == truth_dict workspace.close()
def compute_stats(spectrum): """ Compute basic statistics for a spectral region. Parameters ---------- spectrum : `~specutils.spectra.spectrum1d.Spectrum1D` region: `~specutils.utils.SpectralRegion` """ flux = spectrum.flux mean = flux.mean() rms = np.sqrt(flux.dot(flux) / len(flux)) try: snr_val = snr(spectrum) except Exception as e: snr_val = "N/A" return { 'mean': mean, 'median': np.median(flux), 'stddev': flux.std(), 'centroid': centroid(spectrum, region=None ), # we may want to adjust this for continuum subtraction 'rms': rms, 'snr': snr_val, 'fwhm': fwhm(spectrum), 'ew': equivalent_width(spectrum), 'total': line_flux(spectrum), 'maxval': flux.max(), 'minval': flux.min() }
def test_statistics_gui_full_spectrum(specviz_gui): # Ensure that the test is run on an unmodified workspace instance workspace = new_workspace(specviz_gui) hub = Hub(workspace=workspace) # pull out stats dictionary stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats # Generate truth comparisons spectrum = hub.plot_item._data_item.spectrum truth_dict = {'mean': spectrum.flux.mean(), 'median': np.median(spectrum.flux), 'stddev': spectrum.flux.std(), 'centroid': centroid(spectrum, region=None), 'snr': "N/A", 'fwhm': fwhm(spectrum), 'ew': equivalent_width(spectrum), 'total': line_flux(spectrum), 'maxval': spectrum.flux.max(), 'minval': spectrum.flux.min()} # compare! assert stats_dict == truth_dict workspace.close()
def line(spec,wave1,wave2): # finding the centorid and deriving guesses parameters centre=centroid(spec, SpectralRegion(wave1*u.AA, wave2*u.AA)) centre=float(centre/(1. * u.AA)) FWHM=fwhm(spec) FWHM=float(FWHM/(1. * u.AA)) A=line_flux(spec, SpectralRegion(lamb1*u.AA, lamb2*u.AA)) a=1* u.Unit('J cm-2 s-1 AA-1') A=float(A/(1. * u.AA*a)) # PARAMETERS return [centre,A,FWHM]
def compute_stats(spectrum): """ Compute basic statistics for a spectral region. Parameters ---------- spectrum : `~specutils.spectra.spectrum1d.Spectrum1D` region: `~specutils.utils.SpectralRegion` """ try: cent = centroid(spectrum, region=None) # we may want to adjust this for continuum subtraction except Exception as e: logging.debug(e) cent = "Error" try: snr_val = snr(spectrum) except Exception as e: logging.debug(e) snr_val = "N/A" try: fwhm_val = fwhm(spectrum) except Exception as e: logging.debug(e) fwhm_val = "Error" try: ew = equivalent_width(spectrum) except Exception as e: logging.debug(e) ew = "Error" try: total = line_flux(spectrum) except Exception as e: logging.debug(e) total = "Error" return {'mean': spectrum.flux.mean(), 'median': np.median(spectrum.flux), 'stddev': spectrum.flux.std(), 'centroid': cent, 'snr': snr_val, 'fwhm': fwhm_val, 'ew': ew, 'total': total, 'maxval': spectrum.flux.max(), 'minval': spectrum.flux.min()}
def test_to_spectrum1d_compound_bandpass(self): from specutils.analysis import line_flux box = SpectralElement(Box1D, amplitude=0.5, x_0=5000 * u.AA, width=1 * u.AA) bp = box * box spec = bp.to_spectrum1d() w = bp.waveset integrated_thru = bp.integrate() assert_quantity_allclose(spec.spectral_axis, w) assert_quantity_allclose(spec.flux, bp(w)) assert_quantity_allclose(integrated_thru, 0.25 * u.AA) assert_quantity_allclose(integrated_thru, line_flux(spec))
def test_to_spectrum1d_compound_source(self): from specutils.analysis import line_flux total_flux = 0.5 * (u.erg / u.s / u.cm / u.cm) fwhm = 1 * u.AA g1 = SourceSpectrum(GaussianFlux1D, mean=300 * u.nm, fwhm=fwhm, total_flux=total_flux) g2 = SourceSpectrum(GaussianFlux1D, mean=400 * u.nm, fwhm=fwhm, total_flux=total_flux) sp = g1 + g2 spec = sp.to_spectrum1d(flux_unit=units.FLAM) integrated_flux = sp.integrate(flux_unit=units.FLAM) assert_quantity_allclose(integrated_flux, 1 * total_flux.unit, rtol=0.002) assert_quantity_allclose(integrated_flux, line_flux(spec), rtol=1e-5)
def line_fit(spec, spec_err, wave_obj, dwave=10. * u.AA, dwave_cont=100. * u.AA, sigmamax=14. * u.AA): ''' Function to fit a 1D gaussian to a HETDEX spectrum from get_spec.py Parameters ---------- spec 1D spectrum from a row in the table provided by get_spec.py. Will assume unit of 10**-17*u.Unit('erg cm-2 s-1 AA-1') if no units are provided. spec_err 1D spectral uncertainty from table provided by get_spec.py. Will assume unit of 10**-17*u.Unit('erg cm-2 s-1 AA-1') if no units are provided. wave_obj wavelength you want to fit, an astropy quantity dwave spectral region above and below wave_obj to fit a line, an astropy quantity. Default is 10.*u.AA dwave_cont spectral region to fit continuum. Default is +/- 100.*u.AA sigmamax Maximum linewidth (this is sigma/stdev of the gaussian fit) to allow for a fit. Assumes unit of u.AA if not given Returns ------- ''' try: spectrum = Spectrum1D(flux=spec, spectral_axis=(2.0 * np.arange(1036) + 3470.) * u.AA, uncertainty=StdDevUncertainty(spec_err), velocity_convention=None) except ValueError: spectrum = Spectrum1D( flux=spec * 10**-17 * u.Unit('erg cm-2 s-1 AA-1'), spectral_axis=(2.0 * np.arange(1036) + 3470.) * u.AA, uncertainty=StdDevUncertainty(spec_err * 10**-17 * u.Unit('erg cm-2 s-1 AA-1')), velocity_convention=None) # measure continuum over 2*dwave_cont wide window first: cont_region = SpectralRegion((wave_obj - dwave_cont), (wave_obj + dwave_cont)) cont_spectrum = extract_region(spectrum, cont_region) cont = np.median(cont_spectrum.flux) if np.isnan(cont): #set continuum if its NaN print('Continuum fit is NaN. Setting to 0.0') cont = 0.0 * cont_spectrum.unit # now get region to fit the continuum subtracted line sub_region = SpectralRegion((wave_obj - dwave), (wave_obj + dwave)) sub_spectrum = extract_region(spectrum, sub_region) try: line_param = estimate_line_parameters(sub_spectrum - cont, models.Gaussian1D()) except: return None if np.isnan(line_param.amplitude.value): print('Line fit yields NaN result. Exiting.') return None try: sigma = np.minimum(line_param.stddev, sigmamax) except ValueError: sigma = np.minimum(line_param.stddev, sigmamax * u.AA) if np.isnan(sigma): sigma = sigmamax g_init = models.Gaussian1D(amplitude=line_param.amplitude, mean=line_param.mean, stddev=sigma) # lineregion = SpectralRegion((wave_obj-2*sigma), (wave_obj+2*sigma)) # cont = fit_generic_continuum(sub_spectrum, exclude_regions=lineregion, # model=models.Linear1D(slope=0)) #r1 = SpectralRegion((wave_obj-dwave), (wave_obj-2*sigma)) #r2 = SpectralRegion((wave_obj+2*sigma), (wave_obj+dwave)) #fitcontregion = r1 + r2 #fit_cont_spectrum = extract_region(sub_spectrum, fitcontregion) #cont = np.mean(np.hstack([fit_cont_spectrum[0].flux, fit_cont_spectrum[1].flux])) #contspec = cont(sub_spectrum.spectral_axis) g_fit = fit_lines(sub_spectrum - cont, g_init) x = np.arange(wave_obj.value - dwave.value, wave_obj.value + dwave.value, 0.5) * u.AA y_fit = g_fit(x) line_flux_model = np.sum(y_fit * 0.5 * u.AA) chi2 = calc_chi2(sub_spectrum - cont, g_fit) sn = np.sum(np.array(sub_spectrum.flux)) / np.sqrt( np.sum(sub_spectrum.uncertainty.array**2)) line_flux_data = line_flux(sub_spectrum - cont).to(u.erg * u.cm**-2 * u.s**-1) line_flux_data_err = np.sqrt(np.sum(sub_spectrum.uncertainty.array**2)) #fitted_region = SpectralRegion((line_param.mean - 2*sigma), # (line_param.mean + 2*sigma)) #fitted_spectrum = extract_region(spectrum, fitted_region) #line_param = estimate_line_parameters(fitted_spectrum, models.Gaussian1D()) #sn = np.sum(np.array(fitted_spectrum.flux)) / np.sqrt(np.sum( # fitted_spectrum.uncertainty.array**2)) #line_flux_data = line_flux(fitted_spectrum).to(u.erg * u.cm**-2 * u.s**-1) #line_flux_data_err = np.sqrt(np.sum(fitted_spectrum.uncertainty.array**2)) return line_param, sn, chi2, sigma, line_flux_data, line_flux_model, line_flux_data_err, g_fit, cont
def Load_Files(file_1, file_2, N_sample, objts, classification=False): print('INFO:') #hdul = fitsio.FITS(file_1) # Open file 1 -- 'truth_DR12Q.fits' #info=hdul.info() # File info hdul = fits.open(file_1, mode='denywrite') #data=hdul[1].read() # Database of spectra with human-expert classifications data = hdul[1].data #print('The file {} have {} objects. \n'.format(file_1,data.shape[0])) print('INFO:') # Reading data from data_dr12.fits. This file had the spectra from data dr12. #hdul_2 = fitsio.FITS(file_2) # Open file 2 -- 'data_dr12.fits' #info2=hdul_2.info() # File info #data2=hdul_2[1].read() # Database of spectra #spectra=hdul_2[0].read() # Spectrum of each object hdul_2 = fits.open(file_2, mode='denywrite') data2 = hdul_2[1].data # Database of spectra spectra = hdul_2[0].data # Spectrum of each object #print('The file {} have {} spectra. \n'.format(file_2,spectra.shape[0])) # Subset of PLATE parameters of both data data_PLATE_1 = data['PLATE'] data_PLATE_2 = data2['PLATE'] # Subset of MJD parameters of both data data_MJD_1 = data['MJD'] data_MJD_2 = data2['MJD'] # Subset of FIBERID parameters of both data data_FIBERID_1 = data['FIBERID'] data_FIBERID_2 = data2['FIBERID'] data_ID_1 = data['THING_ID'] data_ID_2 = data2['TARGETID'] objts = np.asarray(objts) # The column 'CLASS_PERSON' have a class identifier for each spectrum: STARS=1, GALAXY=4, QSO=3 and QSO_BAL=30. C_P = data['CLASS_PERSON'] #Class Person column STAR = C_P[C_P == 1] # objects classified as stars GALAXY = C_P[C_P == 4] # objects classified as galaxies QSO = C_P[C_P == 3] # objects classified as QSO (Quasars) QSO_BAL = C_P[ C_P == 30] # objects classified as QSO BAL (Quasars with Broad Absortions Lines) N_C = C_P[C_P != 30] N_C = N_C[N_C != 3] N_C = N_C[N_C != 1] N_C = N_C[N_C != 4] # objects wrong classified print('INFO: There is available') print('-->Star:', STAR.shape[0]) print('-->Galaxy:', GALAXY.shape[0]) print('-->QSO:', QSO.shape[0]) print('-->QSO BAL:', QSO_BAL.shape[0]) print('-->NN: {}\n'.format(N_C.shape[0])) # I create two DataFrame for Superset_DR12Q and data_dr12 with only three parameters data1 = { 'PLATE': data_PLATE_1, 'MJD': data_MJD_1, 'FIBERID': data_FIBERID_1, 'ID': data_ID_1 } data1 = pd.DataFrame(data=data1) data2 = { 'PLATE': data_PLATE_2, 'MJD': data_MJD_2, 'FIBERID': data_FIBERID_2, 'ID': data_ID_2 } data2 = pd.DataFrame(data=data2) # I convert all objects in both set to string chain in orden to combine them as one new ID. data1['PLATE'] = data1['PLATE'].astype(str) data1['MJD'] = data1['MJD'].astype(str) data1['FIBERID'] = data1['FIBERID'].astype(str) data1['PM'] = data1['MJD'].str.cat(data1['FIBERID'], sep="-") data1['NEWID'] = data1['PLATE'].str.cat(data1['PM'], sep="-") data_1 = data1.drop(columns=['PLATE', 'MJD', 'FIBERID', 'ID', 'PM']).values data2['PLATE'] = data2['PLATE'].astype(str) data2['MJD'] = data2['MJD'].astype(str) data2['FIBERID'] = data2['FIBERID'].astype(str) data2['PM'] = data2['MJD'].str.cat(data2['FIBERID'], sep="-") data2['NEWID'] = data2['PLATE'].str.cat(data2['PM'], sep="-") data_2 = data2.drop(columns=['PLATE', 'MJD', 'FIBERID', 'ID', 'PM'] ).values # New set of database 2 with new ID's # With the routine of numpy intersect1d, I find the intersections elements in both sets. This elements data_CO = np.array(np.intersect1d(data_1, data_2, return_indices=True)) data_CO_objects = data_CO[ 0] # The unique new ID of each element in both sets data_CO_ind1 = data_CO[ 1] # Indices of intersected elements from the original data 1 (Superset_DR12Q.fits) data_CO_ind2 = data_CO[ 2] # Indices of intersected elements form the original data 2 (data_dr12.fits) print('INFO:') print('I find {} objects with spectra from DR12 \n'.format( len(data_CO_objects))) indi = {'ind1': data_CO_ind1, 'ind2': data_CO_ind2} ind = pd.DataFrame(data=indi, index=data_CO_ind1) cp = np.array(data['CLASS_PERSON'], dtype=float) z = np.array(data['Z_VI'], dtype=float) zc = np.array(data['Z_CONF_PERSON'], dtype=float) bal = np.array(data['BAL_FLAG_VI'], dtype=float) bi = np.array(data['BI_CIV'], dtype=float) d = { 'CLASS_PERSON': cp, 'Z_VI': z, 'Z_CONF_PERSON': zc, 'BAL_FLAG_VI': bal, 'BI_CIV': bi } data_0 = pd.DataFrame(data=d) obj = data_0.loc[data_CO_ind1] if (classification != True): if (objts[0] == 'QSO'): qsos = obj.loc[obj['CLASS_PERSON'] == 3] qsos = qsos.loc[qsos['Z_CONF_PERSON'] == 3] sample_objects = qsos.sample(n=int(N_sample), weights='CLASS_PERSON', random_state=5) indi = np.array(sample_objects.index) indi1 = ind.loc[indi].values elif (objts[0] == 'QSO_BAL'): qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30] qsos_bal = qsos_bal.loc[qsos_bal['Z_CONF_PERSON'] == 3] sample_objects = qsos_bal.sample(n=int(N_sample), weights='CLASS_PERSON', random_state=5) indi = np.array(sample_objects.index) indi1 = ind.loc[indi].values elif (len(objts) == 2): qsos = obj.loc[obj['CLASS_PERSON'] == 3] qsos = qsos.loc[qsos['Z_CONF_PERSON'] == 3] qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30] qsos_bal = qsos_bal.loc[qsos_bal['Z_CONF_PERSON'] == 3] sample_qso = qsos.sample(n=int(N_sample / 2), weights='CLASS_PERSON', random_state=5) sample_qso_bal = qsos_bal.sample(n=int(N_sample / 2), weights='CLASS_PERSON', random_state=5) sample_objects = pd.concat([sample_qso, sample_qso_bal]) ind_qso = np.array(sample_qso.index) ind_qso_bal = np.array(sample_qso_bal.index) indi = np.concatenate((ind_qso, ind_qso_bal), axis=None) indi1 = ind.loc[indi].values spectra_ = np.zeros((N_sample, 886)) j = 0 kernel_size = 5 flux_threshold = 1.1 parameters = np.zeros( (N_sample, 7) ) #Number of lines // FHWM of max emission line // EW of max emission line // Spectrum Mean // Spectrum STDV // Spectrum Flux Integral // Spectrum SNR for i in indi: k = indi1[j, 1] x = np.linspace(3600, 10500, 443) zero_spectrum = spectra[k, :443] spectrum = Spectrum1D(flux=zero_spectrum * u.Jy, spectral_axis=x * u.AA) #Continuum fit and gaussian smooth g1_fit = fit_generic_continuum(spectrum) y_continuum_fitted = g1_fit(x * u.AA) spec_nw_2 = spectrum / y_continuum_fitted spectrum_smooth = gaussian_smooth(spec_nw_2, kernel_size) #Number of lines lines_1 = find_lines_derivative(spectrum_smooth, flux_threshold=flux_threshold) l = lines_1[lines_1['line_type'] == 'emission'] number_lines = l['line_center_index'].shape[0] parameters[j, 0] = number_lines #FWHM parameters[j, 1] = fwhm(spectrum_smooth).value #EW parameters[j, 2] = equivalent_width(spectrum_smooth).value #Spectrum Mean parameters[j, 3] = np.mean(spectrum_smooth.flux) #Spectrum STDV parameters[j, 4] = np.std(spectrum_smooth.flux) #Spectrum Flux Integral parameters[j, 5] = line_flux(spectrum_smooth).value #Spectrum SNR parameters[j, 6] = snr_derived(spectrum_smooth).value j += 1 d = { 'Lines_Number': parameters[:, 0], 'FHWM': parameters[:, 1], 'EW': parameters[:, 2], 'Mean': parameters[:, 3], 'STDV': parameters[:, 4], 'STDV': parameters[:, 4], 'Spectrum_Flux': parameters[:, 5], 'SNR': parameters[:, 6] } parameters = pd.DataFrame(data=d) #X=spectra_.values #mean_flx= np.ma.average(X[:,:443],axis=1) #ll=(X[:,:443]-mean_flx.reshape(-1,1))**2 #aveflux=np.ma.average(ll, axis=1) #sflux = np.sqrt(aveflux) #X = (X[:,:443]-mean_flx.reshape(-1,1))/sflux.reshape(-1,1) y = sample_objects['Z_VI'] y = np.array(y, dtype=float) #y_max=np.max(y) #y=y/y_max return parameters, y stars = obj.loc[obj['CLASS_PERSON'] == 1] galaxies = obj.loc[obj['CLASS_PERSON'] == 4] qsos = obj.loc[obj['CLASS_PERSON'] == 3] qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30] sample_star = stars.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_galaxy = galaxies.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_qso = qsos.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_qso_bal = qsos_bal.sample(n=int(N_sample / 4), weights='CLASS_PERSON', random_state=5) sample_objects = pd.concat( [sample_star, sample_galaxy, sample_qso, sample_qso_bal]) ind_star = np.array(sample_star.index) ind_galaxy = np.array(sample_galaxy.index) ind_qso = np.array(sample_qso.index) ind_qso_bal = np.array(sample_qso_bal.index) indi = np.concatenate((ind_star, ind_galaxy, ind_qso, ind_qso_bal), axis=None) indi1 = ind.loc[indi].values spectra_ = np.zeros((N_sample, 886)) j = 0 for i in indi: k = indi1[j, 1] spectra_[j, :] = spectra[k, :] j = j + 1 spectra_ = pd.DataFrame(spectra_) X = spectra_.values #Renormalize spectra mean_flx = np.ma.average(X[:, :443], axis=1) ll = (X[:, :443] - mean_flx.reshape(-1, 1))**2 aveflux = np.ma.average(ll, axis=1) sflux = np.sqrt(aveflux) X = (X[:, :443] - mean_flx.reshape(-1, 1)) / sflux.reshape(-1, 1) y = sample_objects['CLASS_PERSON'] y = y.replace([1, 4, 3, 30], [0, 1, 2, 3]).values y = np.array(y, dtype=float) return X, y