Пример #1
0
def test_statistics_gui_roi_spectrum(specviz_gui):
    # Ensure that the test is run on an unmodified workspace instance
    workspace = new_workspace(specviz_gui)
    hub = Hub(workspace=workspace)

    # Make region of interest cutout, using default cutout at .3 from the
    # middle in either direction
    specviz_gui.current_workspace.current_plot_window.plot_widget._on_add_linear_region(
    )

    # Simulate cutout for truth data
    spectrum = extract_region(hub.plot_item._data_item.spectrum,
                              SpectralRegion(*hub.selected_region_bounds))

    # pull out stats dictionary
    stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats

    # Generate truth comparisons
    truth_dict = {
        'mean': spectrum.flux.mean(),
        'median': np.median(spectrum.flux),
        'stddev': spectrum.flux.std(),
        'centroid': centroid(spectrum, region=None),
        'snr': "N/A",
        'fwhm': fwhm(spectrum),
        'ew': equivalent_width(spectrum),
        'total': line_flux(spectrum),
        'maxval': spectrum.flux.max(),
        'minval': spectrum.flux.min()
    }

    # compare!
    assert stats_dict == truth_dict

    workspace.close()
Пример #2
0
def test_statistics_gui_full_spectrum(specviz_gui):
    # Ensure that the test is run on an unmodified workspace instance
    workspace = new_workspace(specviz_gui)
    hub = Hub(workspace=workspace)

    # pull out stats dictionary
    stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats

    # Generate truth comparisons
    spectrum = hub.plot_item._data_item.spectrum
    truth_dict = {
        'mean': spectrum.flux.mean(),
        'median': np.median(spectrum.flux),
        'stddev': spectrum.flux.std(),
        'centroid': centroid(spectrum, region=None),
        'snr': "N/A",
        'fwhm': fwhm(spectrum),
        'ew': equivalent_width(spectrum),
        'total': line_flux(spectrum),
        'maxval': spectrum.flux.max(),
        'minval': spectrum.flux.min()
    }

    # compare!
    assert stats_dict == truth_dict

    workspace.close()
Пример #3
0
def compute_stats(spectrum):
    """
    Compute basic statistics for a spectral region.
    Parameters
    ----------
    spectrum : `~specutils.spectra.spectrum1d.Spectrum1D`
    region: `~specutils.utils.SpectralRegion`
    """
    flux = spectrum.flux
    mean = flux.mean()
    rms = np.sqrt(flux.dot(flux) / len(flux))

    try:
        snr_val = snr(spectrum)
    except Exception as e:
        snr_val = "N/A"

    return {
        'mean': mean,
        'median': np.median(flux),
        'stddev': flux.std(),
        'centroid':
        centroid(spectrum, region=None
                 ),  # we may want to adjust this for continuum subtraction
        'rms': rms,
        'snr': snr_val,
        'fwhm': fwhm(spectrum),
        'ew': equivalent_width(spectrum),
        'total': line_flux(spectrum),
        'maxval': flux.max(),
        'minval': flux.min()
    }
Пример #4
0
def test_statistics_gui_roi_spectrum(specviz_gui):
    # Ensure that the test is run on an unmodified workspace instance
    workspace = new_workspace(specviz_gui)
    hub = Hub(workspace=workspace)

    # Make region of interest cutout, using default cutout at .3 from the
    # middle in either direction
    specviz_gui.current_workspace.current_plot_window.plot_widget._on_add_linear_region()

    # Simulate cutout for truth data
    spectrum = extract_region(hub.plot_item._data_item.spectrum,
                              SpectralRegion(*hub.selected_region_bounds))

    # pull out stats dictionary
    stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats

    # Generate truth comparisons
    truth_dict = {'mean': spectrum.flux.mean(),
                  'median': np.median(spectrum.flux),
                  'stddev': spectrum.flux.std(),
                  'centroid': centroid(spectrum, region=None),
                  'snr': "N/A",
                  'fwhm': fwhm(spectrum),
                  'ew': equivalent_width(spectrum),
                  'total': line_flux(spectrum),
                  'maxval': spectrum.flux.max(),
                  'minval': spectrum.flux.min()}

    # compare!
    assert stats_dict == truth_dict

    workspace.close()
Пример #5
0
def test_statistics_gui_full_spectrum(specviz_gui):
    # Ensure that the test is run on an unmodified workspace instance
    workspace = new_workspace(specviz_gui)
    hub = Hub(workspace=workspace)

    # pull out stats dictionary
    stats_dict = specviz_gui.current_workspace._plugin_bars['Statistics'].stats

    # Generate truth comparisons
    spectrum = hub.plot_item._data_item.spectrum
    truth_dict = {'mean': spectrum.flux.mean(),
                  'median': np.median(spectrum.flux),
                  'stddev': spectrum.flux.std(),
                  'centroid': centroid(spectrum, region=None),
                  'snr': "N/A",
                  'fwhm': fwhm(spectrum),
                  'ew': equivalent_width(spectrum),
                  'total': line_flux(spectrum),
                  'maxval': spectrum.flux.max(),
                  'minval': spectrum.flux.min()}

    # compare!
    assert stats_dict == truth_dict

    workspace.close()
Пример #6
0
def line(spec,wave1,wave2):
    # finding the centorid and deriving guesses parameters
    centre=centroid(spec, SpectralRegion(wave1*u.AA, wave2*u.AA))  
    centre=float(centre/(1. * u.AA))
    FWHM=fwhm(spec) 
    FWHM=float(FWHM/(1. * u.AA))
    A=line_flux(spec, SpectralRegion(lamb1*u.AA, lamb2*u.AA))
    a=1* u.Unit('J cm-2 s-1 AA-1') 
    A=float(A/(1. * u.AA*a))
    # PARAMETERS
    return [centre,A,FWHM]
Пример #7
0
def compute_stats(spectrum):
    """
    Compute basic statistics for a spectral region.
    Parameters
    ----------
    spectrum : `~specutils.spectra.spectrum1d.Spectrum1D`
    region: `~specutils.utils.SpectralRegion`
    """

    try:
        cent = centroid(spectrum, region=None) # we may want to adjust this for continuum subtraction
    except Exception as e:
        logging.debug(e)
        cent = "Error"

    try:
        snr_val = snr(spectrum)
    except Exception as e:
        logging.debug(e)
        snr_val = "N/A"

    try:
        fwhm_val = fwhm(spectrum)
    except Exception as e:
        logging.debug(e)
        fwhm_val = "Error"

    try:
        ew = equivalent_width(spectrum)
    except Exception as e:
        logging.debug(e)
        ew = "Error"

    try:
        total = line_flux(spectrum)
    except Exception as e:
        logging.debug(e)
        total = "Error"

    return {'mean': spectrum.flux.mean(),
            'median': np.median(spectrum.flux),
            'stddev': spectrum.flux.std(),
            'centroid': cent,
            'snr': snr_val,
            'fwhm': fwhm_val,
            'ew': ew,
            'total': total,
            'maxval': spectrum.flux.max(),
            'minval': spectrum.flux.min()}
def Load_Files(file_1, file_2, N_sample, objts, classification=False):
    print('INFO:')
    #hdul = fitsio.FITS(file_1) # Open file 1 -- 'truth_DR12Q.fits'
    #info=hdul.info() # File info
    hdul = fits.open(file_1, mode='denywrite')
    #data=hdul[1].read() # Database of spectra with human-expert classifications
    data = hdul[1].data
    #print('The file {} have {} objects. \n'.format(file_1,data.shape[0]))

    print('INFO:')
    # Reading data from data_dr12.fits. This file had the spectra from data dr12.
    #hdul_2 = fitsio.FITS(file_2) # Open file 2 -- 'data_dr12.fits'
    #info2=hdul_2.info() # File info
    #data2=hdul_2[1].read() # Database of spectra
    #spectra=hdul_2[0].read() # Spectrum of each object
    hdul_2 = fits.open(file_2, mode='denywrite')
    data2 = hdul_2[1].data  # Database of spectra
    spectra = hdul_2[0].data  # Spectrum of each object

    #print('The file {} have {} spectra. \n'.format(file_2,spectra.shape[0]))

    # Subset of PLATE parameters of both data
    data_PLATE_1 = data['PLATE']
    data_PLATE_2 = data2['PLATE']

    # Subset of MJD parameters of both data
    data_MJD_1 = data['MJD']
    data_MJD_2 = data2['MJD']

    # Subset of FIBERID parameters of both data
    data_FIBERID_1 = data['FIBERID']
    data_FIBERID_2 = data2['FIBERID']
    data_ID_1 = data['THING_ID']
    data_ID_2 = data2['TARGETID']

    objts = np.asarray(objts)

    # The column 'CLASS_PERSON' have a class identifier for each spectrum: STARS=1, GALAXY=4, QSO=3 and QSO_BAL=30.
    C_P = data['CLASS_PERSON']  #Class Person column
    STAR = C_P[C_P == 1]  # objects classified as stars
    GALAXY = C_P[C_P == 4]  # objects classified as galaxies
    QSO = C_P[C_P == 3]  # objects classified as QSO (Quasars)
    QSO_BAL = C_P[
        C_P ==
        30]  # objects classified as QSO BAL (Quasars with Broad Absortions Lines)
    N_C = C_P[C_P != 30]
    N_C = N_C[N_C != 3]
    N_C = N_C[N_C != 1]
    N_C = N_C[N_C != 4]  # objects wrong classified

    print('INFO: There is available')
    print('-->Star:', STAR.shape[0])
    print('-->Galaxy:', GALAXY.shape[0])
    print('-->QSO:', QSO.shape[0])
    print('-->QSO BAL:', QSO_BAL.shape[0])
    print('-->NN: {}\n'.format(N_C.shape[0]))

    # I create two DataFrame for Superset_DR12Q and data_dr12 with only three parameters
    data1 = {
        'PLATE': data_PLATE_1,
        'MJD': data_MJD_1,
        'FIBERID': data_FIBERID_1,
        'ID': data_ID_1
    }
    data1 = pd.DataFrame(data=data1)

    data2 = {
        'PLATE': data_PLATE_2,
        'MJD': data_MJD_2,
        'FIBERID': data_FIBERID_2,
        'ID': data_ID_2
    }
    data2 = pd.DataFrame(data=data2)

    # I convert all objects in both set to string chain in orden to combine them as one new ID.
    data1['PLATE'] = data1['PLATE'].astype(str)
    data1['MJD'] = data1['MJD'].astype(str)
    data1['FIBERID'] = data1['FIBERID'].astype(str)
    data1['PM'] = data1['MJD'].str.cat(data1['FIBERID'], sep="-")
    data1['NEWID'] = data1['PLATE'].str.cat(data1['PM'], sep="-")
    data_1 = data1.drop(columns=['PLATE', 'MJD', 'FIBERID', 'ID', 'PM']).values

    data2['PLATE'] = data2['PLATE'].astype(str)
    data2['MJD'] = data2['MJD'].astype(str)
    data2['FIBERID'] = data2['FIBERID'].astype(str)
    data2['PM'] = data2['MJD'].str.cat(data2['FIBERID'], sep="-")
    data2['NEWID'] = data2['PLATE'].str.cat(data2['PM'], sep="-")
    data_2 = data2.drop(columns=['PLATE', 'MJD', 'FIBERID', 'ID', 'PM']
                        ).values  # New set of database 2 with new ID's

    # With the routine of numpy intersect1d, I find the intersections elements in both sets. This elements
    data_CO = np.array(np.intersect1d(data_1, data_2, return_indices=True))

    data_CO_objects = data_CO[
        0]  # The unique new ID of each element in both sets
    data_CO_ind1 = data_CO[
        1]  # Indices of intersected elements from the original data 1 (Superset_DR12Q.fits)
    data_CO_ind2 = data_CO[
        2]  # Indices of intersected elements form the original data 2 (data_dr12.fits)
    print('INFO:')
    print('I find {} objects with spectra from DR12 \n'.format(
        len(data_CO_objects)))
    indi = {'ind1': data_CO_ind1, 'ind2': data_CO_ind2}
    ind = pd.DataFrame(data=indi, index=data_CO_ind1)

    cp = np.array(data['CLASS_PERSON'], dtype=float)
    z = np.array(data['Z_VI'], dtype=float)
    zc = np.array(data['Z_CONF_PERSON'], dtype=float)
    bal = np.array(data['BAL_FLAG_VI'], dtype=float)
    bi = np.array(data['BI_CIV'], dtype=float)

    d = {
        'CLASS_PERSON': cp,
        'Z_VI': z,
        'Z_CONF_PERSON': zc,
        'BAL_FLAG_VI': bal,
        'BI_CIV': bi
    }
    data_0 = pd.DataFrame(data=d)

    obj = data_0.loc[data_CO_ind1]

    if (classification != True):

        if (objts[0] == 'QSO'):
            qsos = obj.loc[obj['CLASS_PERSON'] == 3]
            qsos = qsos.loc[qsos['Z_CONF_PERSON'] == 3]
            sample_objects = qsos.sample(n=int(N_sample),
                                         weights='CLASS_PERSON',
                                         random_state=5)

            indi = np.array(sample_objects.index)
            indi1 = ind.loc[indi].values

        elif (objts[0] == 'QSO_BAL'):
            qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30]
            qsos_bal = qsos_bal.loc[qsos_bal['Z_CONF_PERSON'] == 3]
            sample_objects = qsos_bal.sample(n=int(N_sample),
                                             weights='CLASS_PERSON',
                                             random_state=5)

            indi = np.array(sample_objects.index)
            indi1 = ind.loc[indi].values

        elif (len(objts) == 2):
            qsos = obj.loc[obj['CLASS_PERSON'] == 3]
            qsos = qsos.loc[qsos['Z_CONF_PERSON'] == 3]

            qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30]
            qsos_bal = qsos_bal.loc[qsos_bal['Z_CONF_PERSON'] == 3]

            sample_qso = qsos.sample(n=int(N_sample / 2),
                                     weights='CLASS_PERSON',
                                     random_state=5)
            sample_qso_bal = qsos_bal.sample(n=int(N_sample / 2),
                                             weights='CLASS_PERSON',
                                             random_state=5)
            sample_objects = pd.concat([sample_qso, sample_qso_bal])

            ind_qso = np.array(sample_qso.index)
            ind_qso_bal = np.array(sample_qso_bal.index)

            indi = np.concatenate((ind_qso, ind_qso_bal), axis=None)
            indi1 = ind.loc[indi].values

        spectra_ = np.zeros((N_sample, 886))

        j = 0
        kernel_size = 5
        flux_threshold = 1.1
        parameters = np.zeros(
            (N_sample, 7)
        )  #Number of lines // FHWM of max emission line // EW of max emission line // Spectrum Mean // Spectrum STDV // Spectrum Flux Integral // Spectrum SNR
        for i in indi:
            k = indi1[j, 1]
            x = np.linspace(3600, 10500, 443)
            zero_spectrum = spectra[k, :443]
            spectrum = Spectrum1D(flux=zero_spectrum * u.Jy,
                                  spectral_axis=x * u.AA)

            #Continuum fit and gaussian smooth
            g1_fit = fit_generic_continuum(spectrum)
            y_continuum_fitted = g1_fit(x * u.AA)
            spec_nw_2 = spectrum / y_continuum_fitted
            spectrum_smooth = gaussian_smooth(spec_nw_2, kernel_size)

            #Number of lines
            lines_1 = find_lines_derivative(spectrum_smooth,
                                            flux_threshold=flux_threshold)
            l = lines_1[lines_1['line_type'] == 'emission']
            number_lines = l['line_center_index'].shape[0]
            parameters[j, 0] = number_lines

            #FWHM
            parameters[j, 1] = fwhm(spectrum_smooth).value

            #EW
            parameters[j, 2] = equivalent_width(spectrum_smooth).value

            #Spectrum Mean
            parameters[j, 3] = np.mean(spectrum_smooth.flux)

            #Spectrum STDV
            parameters[j, 4] = np.std(spectrum_smooth.flux)

            #Spectrum Flux Integral
            parameters[j, 5] = line_flux(spectrum_smooth).value

            #Spectrum SNR
            parameters[j, 6] = snr_derived(spectrum_smooth).value
            j += 1

        d = {
            'Lines_Number': parameters[:, 0],
            'FHWM': parameters[:, 1],
            'EW': parameters[:, 2],
            'Mean': parameters[:, 3],
            'STDV': parameters[:, 4],
            'STDV': parameters[:, 4],
            'Spectrum_Flux': parameters[:, 5],
            'SNR': parameters[:, 6]
        }

        parameters = pd.DataFrame(data=d)
        #X=spectra_.values

        #mean_flx= np.ma.average(X[:,:443],axis=1)
        #ll=(X[:,:443]-mean_flx.reshape(-1,1))**2
        #aveflux=np.ma.average(ll, axis=1)
        #sflux = np.sqrt(aveflux)
        #X = (X[:,:443]-mean_flx.reshape(-1,1))/sflux.reshape(-1,1)

        y = sample_objects['Z_VI']
        y = np.array(y, dtype=float)
        #y_max=np.max(y)
        #y=y/y_max

        return parameters, y

    stars = obj.loc[obj['CLASS_PERSON'] == 1]
    galaxies = obj.loc[obj['CLASS_PERSON'] == 4]
    qsos = obj.loc[obj['CLASS_PERSON'] == 3]
    qsos_bal = obj.loc[obj['CLASS_PERSON'] == 30]

    sample_star = stars.sample(n=int(N_sample / 4),
                               weights='CLASS_PERSON',
                               random_state=5)
    sample_galaxy = galaxies.sample(n=int(N_sample / 4),
                                    weights='CLASS_PERSON',
                                    random_state=5)
    sample_qso = qsos.sample(n=int(N_sample / 4),
                             weights='CLASS_PERSON',
                             random_state=5)
    sample_qso_bal = qsos_bal.sample(n=int(N_sample / 4),
                                     weights='CLASS_PERSON',
                                     random_state=5)

    sample_objects = pd.concat(
        [sample_star, sample_galaxy, sample_qso, sample_qso_bal])

    ind_star = np.array(sample_star.index)
    ind_galaxy = np.array(sample_galaxy.index)
    ind_qso = np.array(sample_qso.index)
    ind_qso_bal = np.array(sample_qso_bal.index)

    indi = np.concatenate((ind_star, ind_galaxy, ind_qso, ind_qso_bal),
                          axis=None)
    indi1 = ind.loc[indi].values

    spectra_ = np.zeros((N_sample, 886))
    j = 0
    for i in indi:
        k = indi1[j, 1]
        spectra_[j, :] = spectra[k, :]
        j = j + 1

    spectra_ = pd.DataFrame(spectra_)
    X = spectra_.values

    #Renormalize spectra

    mean_flx = np.ma.average(X[:, :443], axis=1)
    ll = (X[:, :443] - mean_flx.reshape(-1, 1))**2
    aveflux = np.ma.average(ll, axis=1)
    sflux = np.sqrt(aveflux)
    X = (X[:, :443] - mean_flx.reshape(-1, 1)) / sflux.reshape(-1, 1)

    y = sample_objects['CLASS_PERSON']
    y = y.replace([1, 4, 3, 30], [0, 1, 2, 3]).values
    y = np.array(y, dtype=float)

    return X, y