Python data示例，util.get.data Python示例

示例#1

0

显示文件

文件： plot.py 项目： leonliang10/PCA

def coefficients(category = None, rebin_type = 'log', n = 80, legend = True, save = True, show = False):
	data_path = get.data('pca', category)
	mkdir.plots(category = 'all', data_type = 'pca/coefficients')
	for i in range(n):
		x = np.zeros([100])
		k = 0
		plt.figure()
		plt.grid()
		for data_file in data_path:
			data_category = data_file.split('/')[1]
			dataset = h5py.File(data_file, 'r')	
			coefficients_normal = dataset['coefficients_normal']
			[m,n] = coefficients_normal.shape
			plt.scatter(x[:n], coefficients_normal[i,:], color = COLORS[k%len(COLORS)], label = data_category)
			x += 1
			k += 1
			dataset.close()

		plt.scatter(x[0] + 2, np.array([0]), color = 'white')
		plt.title('coefficient ' + str(i))
		if legend:
			plt.legend()
		if save:
			name = 'supernova_data/all/plots/pca/coefficients/coefficient_' + str(i) + '.eps'
			plt.savefig(name, format='eps', dpi = 3500)
		if show:
			plt.show()
		plt.close()

示例#2

0

显示文件

文件： trim.py 项目： OdettaAnalytics/PCA

def run(min_wave = 4000, max_wave = 8000, category = None):
    '''
    run() trims all input category's deredshifted data based on
    the minimum and maximum wavelength and ouputs as HDF5 file

    Parameters
    ----------
    min_wave : int indicated minimum wavelength range

    max_wave : int indicated maximum wavelength range

    category : list of categories to trim
    '''
    data_path = get.data('deredshift', category)
    for data_file in data_path:
        dataset = h5py.File(data_file, 'r')
        data_category = data_file.split('/')[1]
        data_type = data_category + '_' + 'trim'
        for data_name in dataset:
            spectrum = dataset[data_name][:]
            wavelength = spectrum[:,0]
            if (min(wavelength) > min_wave) and (max(wavelength) < max_wave):
                convert_HDF5.write(data_category, str(data_name), data_type, spectrum)
                continue
            [num_wave,] = wavelength.shape
            for i in range(num_wave):
                if wavelength[i] >= min_wave:
                    min_range_start = i
                    break
            for j in xrange(num_wave-1, min_range_start, -1):
                if wavelength[j] <= max_wave:
                    max_range_start = j
                    break
            trimmed_spectrum = spectrum[min_range_start:max_range_start+1,:]
            convert_HDF5.write(data_category, str(data_name), data_type, trimmed_spectrum)

示例#3

0

显示文件

文件： plot.py 项目： leonliang10/PCA

def pcomponents(category = None, components = [[0,1]], legend = True, save = True, show = False):
	data_path = get.data('pca', category)
	mkdir.plots(category = 'all', data_type = 'pca/pcomponents')
	for component in components:
		k = 0
		plots = []
		plot_names = []
		plt.figure()
		plt.grid()
		i = component[0]
		j = component[1]
		for data_file in data_path:
			data_category = data_file.split('/')[1]
			dataset = h5py.File(data_file, 'r')	
			coefficients_reduced = dataset['coefficients_reduced'][:]
			cx = coefficients_reduced[i,:]
			cy = coefficients_reduced[j,:]
			p = plt.scatter(cx, cy, color = COLORS[k%len(COLORS)], label = category)
			plots.append(p)
			plot_names.append(data_category)
			k += 1
		if legend:
			plt.legend(plot_names, loc='right', bbox_to_anchor = (1.1, 0.2), fancybox = True)
		plt.grid()
		plt.xlabel('c' + str(i))
		plt.ylabel('c' + str(j))
		plt.title('c' + str(i) + ' vs ' + 'c' + str(j))
		if save:
			name = 'supernova_data/all/plots/pca/pcomponents/' + 'c' + str(i) + '_vs_' + 'c' + str(j) + '.eps'
			plt.savefig(name, format='eps', dpi = 3500)
		if show:
			plt.show()
		plt.close()

示例#4

0

显示文件

文件： rebin.py 项目： OdettaAnalytics/PCA

def run(min_wave = 4000, max_wave = 8000, n_rebin = 2000, category = None, rebin_type = 'log'):
    '''
    rebin each of the trimmed data in the category to desired number of points

    Parameters
    ----------
    min_wave : int indicating the minimum wavelength range

    max_wave : int indicating the maximum wavelength range

    n_rebin : int indicatinng the number of points wanted for rebin

    category : list of strings of category for rebinning

    rebin_type : string indicating the type of rebin wanted (log or linear)
    '''
    f_x = interpolation(min_wave, max_wave, category)
    data_path = get.data('demean', category)
    for data_file in data_path:
        dataset = h5py.File(data_file, 'r')
        data_category = data_file.split('/')[1]
        for data_name in dataset:
            if rebin_type == 'linear':
                new_wavelength = np.linspace(min_wave, max_wave, num = n_rebin, endpoint = False)
            else:
                new_wavelength = np.logspace(np.log10(min_wave), np.log10(max_wave), num = n_rebin, endpoint = False)
            f = f_x[str(data_name)]
            new_flux = f(new_wavelength)
            new_rebin_data = np.vstack([new_wavelength, new_flux]).T
            data_filename = data_category + '_rebin_' + rebin_type
            convert_HDF5.write(data_category, str(data_name), data_filename, new_rebin_data)

示例#5

0

显示文件

文件： rebin.py 项目： OdettaAnalytics/PCA

def interpolation(min_wave, max_wave, category = None):
    '''
    generates the interpolation function

    Parameters
    ----------
    min_wave : int indicating the minimum wavelength range

    max_wave : int indicating the maximum wavelength range

    category : list of strings of category for rebinning

    Returns
    -------
    f_x : list of interpolation functions for eahc of the dataset
    '''
    data_path = get.data('demean', category)
    f_x = {}
    for data_file in data_path:
        dataset = h5py.File(data_file, 'r')
        for data_name in dataset:
            spectrum = dataset[data_name][:,:]
            wavelength = spectrum[:,0]
            flux = spectrum[:,1]
            nonzeros = np.where(wavelength)
            wavelength = wavelength[nonzeros]
            flux = flux[nonzeros]
            [num_waves,] = wavelength.shape
            f = interpolate.interp1d(wavelength, flux, bounds_error = False, fill_value = 0)
            f_x[data_name] = f
    return f_x

示例#6

0

显示文件

文件： plot.py 项目： leonliang10/PCA

def K_reduced(category = None, data_file = None, legend = True, save = True, show = False):
	mkdir.plots(category = 'all', data_type = 'pca/K_reduced')
	data_path = get.data('pca', 'all')
	pca_dataset = h5py.File(data_path[0], 'r')
	specific_spectrum_index = np.where(pca_dataset['keys'][:] == data_file)[0]
	
	if specific_spectrum_index.shape[0] == 0:
		print 'Cannot find specific spectrum entered.'
		sys.exit()

	specific_spectrum_index = specific_spectrum_index[0]
	wavelength = pca_dataset['wavelength'][specific_spectrum_index,:]
	flux = pca_dataset['flux'][specific_spectrum_index,:]
	U = pca_dataset['U'][:,:]
	U_reduced = np.zeros(U.shape)
	for i in range(7):
		offset = 0
		for j in range(i+1):
			U_reduced[:,j] = U[:,j]

		coefficients_reduced = (flux.dot(U_reduced)).T
		K_reduced = (U_reduced.dot(coefficients_reduced)).T
		plt.figure()
		plt.grid()
		plt.plot(wavelength, flux, label = category)
		plt.plot(wavelength, K_reduced, label = 'sum(0:' + str(i) + ')', color = 'red')
		previous_single_K_reduced = 0
		for k in range(i+1):
			single_coefficients_reduced = (flux.dot(U_reduced[:,k])).T
			single_K_reduced = (U_reduced[:,k].dot(single_coefficients_reduced)).T
			if k == 0:
				offset += max(single_K_reduced) - min(min(K_reduced), min(flux))
			else:
				offset += max(single_K_reduced) - min(min(previous_single_K_reduced), min(flux))
			# offset += offset/50
			plt.plot(wavelength, single_K_reduced - offset, label = str(k), color = 'black')
			previous_single_K_reduced = single_K_reduced

		plt.xlabel('wavelength')
		plt.ylabel('flux')
		plt.title(category + '/' + data_file)
		if legend:
			plt.legend()
		if save:
			name = 'supernova_data/all/plots/pca/K_reduced/' + category + '_' + str(i) + '.eps'
			plt.savefig(name, format='eps', dpi = 3500)
			np.savetxt('supernova_data/all/plots/pca/K_reduced/' + category + '_coefficients_reduced.txt', coefficients_reduced[:6])
		if show:
			plt.show()
		plt.close()

示例#7

0

显示文件

文件： plot.py 项目： leonliang10/PCA

def raw(category = None):
	data_path = get.data('raw', category)
	mkdir.plots(category = None, data_type = 'raw')
	for data_name in data_path:
		spectrum = np.loadtxt(data_name)
		wavelength = spectrum[:,0]
		flux = spectrum[:,1]
		plt.figure()
		plt.grid()
		plt.plot(wavelength, flux)
		name = data_name.split('/')[4]
		plt.title(name)
		plt.xlabel('wavelength')
		plt.ylabel('flux')
		data_category = data_name.split('/')[1]
		filename = 'supernova_data/' + data_category + '/plots/raw/' + name + '.eps'
		plt.savefig(filename, format='eps', dpi = 3500)
		plt.close()

示例#8

0

显示文件

文件： plot.py 项目： leonliang10/PCA

def rebin(category = None, rebin_type = 'log'):
	data_path = get.data('rebin', category)
	mkdir.plots(category = None, dat_type = 'rebin_' + rebin_type)
	for data_file in data_path:
		data_category = data_file.split('/')[1]
		dataset = h5py.File(data_file, 'r')
		for data_name in dataset:
			wavelength = dataset[data_name][:, 0]
			flux = dataset[data_name][:, 1]
			plt.figure()
			plt.grid()
			plt.plot(wavelength, flux)
			plt.xlabel('wavelength')
			plt.ylabel('flux')
			plt.title(data_name)
			filename = 'supernova_data/' + data_category + '/plots/rebin_' + rebin_type + '/' + data_name + '.eps'
			plt.savefig(filename, format='eps', dpi = 3500)
			plt.close()
		dataset.close()

示例#9

0

显示文件

文件： plot.py 项目： leonliang10/PCA

def U_matrix(category = None, legend = True, save = True, show = False):
	data_path = get.data('pca', 'all')
	mkdir.plots(category = 'all', data_type = 'pca/U')
	mkdir.plots(category = 'all', data_type = 'pca/individual_U')
	wavelength = np.linspace(4000, 8000, 2000)
	dataset = h5py.File(data_path[0], 'r')
	U = dataset['U']
	for i in range(2000):
		plt.figure()
		p = plt.plot(wavelength, U[:,i])
		plt.grid()
		plt.xlabel('wavelength')
		plt.ylabel('U[:,' + str(i) + ']')
		plt.title('column ' + str(i) + ' of U')
		if save:
			name = 'supernova_data/all/plots/pca/individual_U/column_' + str(i) + '_of_U.eps'
			plt.savefig(name, format='eps', dpi = 3500)
		if show:
			plt.show()
		plt.close()
	for j in range(0,2000,5):
		plt.figure()
		plot_names = []
		offset = 0
		for k in range(5):
			p = plt.plot(wavelength, U[:,j+k] + offset, color = COLORS[k], label = str(j + k))
			offset += max(U[:,j+k]) + 0.2
			plot_names.append(str(j + k))
		plt.grid()
		plt.xlabel('wavelength')
		plt.ylabel('U[:,i]')
		plt.title('columns ' + str(j) + ' ' + str(j+1) + ' ' + str(j+2) + ' ' + str(j+3) + ' ' + str(j+4) + ' of U')
		if legend:
			plt.legend(plot_names, loc='right', bbox_to_anchor = (1.1, 0.2), fancybox = True)
		if save:
			name = 'supernova_data/all/plots/pca/U/columns_' + str(j) + '-' + str(j+4) + '_of_U.eps'
			plt.savefig(name, format='eps', dpi = 3500)
		if show:
			plt.show()
		plt.close()

示例#10

0

显示文件

文件： demean.py 项目： OdettaAnalytics/PCA

def demean_flux(category = None):
    data_path = get.data('trim', category)
    for data_file in data_path:
        dataset = h5py.File(data_file, 'r')
        data_category = data_file.split('/')[1]
        for data_name in dataset:
            name = str(data_name.split('.')[0])
            spectrum = dataset[data_name][:,:]
            wavelength = spectrum[:, 0]
            flux = spectrum[:, 1]
            demeaned_flux = demeaning(flux)
            demeaned_spectrum = np.vstack([wavelength, demeaned_flux])
            [nrows, ncolumns] = spectrum.shape
            rest_of_spectrum = None
            if ncolumns > 2:
                rest_of_spectrum = spectrum[:, 2:]
                [nrows, ncolumns] = rest_of_spectrum.shape
                for i in range(ncolumns):
                    demeaned_spectrum = np.vstack([demeaned_spectrum, rest_of_spectrum[:,i]])
            demeaned_spectrum = demeaned_spectrum.T
            data_filename = data_category + '_' + 'demean'
            convert_HDF5.write(data_category, str(data_name), data_filename, demeaned_spectrum)

示例#11

0

显示文件

文件： pca.py 项目： OdettaAnalytics/PCA

def form_matrix(category = None, rebin_type = 'log'):
    data_path = get.data('rebin', category, rebin_type)
    data_matrix = {}
    all_wavelength = np.array([], dtype = np.float64)
    all_flux = np.array([], dtype = np.float64)
    all_keys = []
    for data_file in data_path:
        data_mat = {}
        category_wavelength = np.array([], dtype = np.float64)
        category_flux = np.array([], dtype = np.float64)
        data_category = data_file.split('/')[1]
        dataset = h5py.File(data_file, 'r')
        for data_name in dataset:
            wavelength = dataset[data_name][:, 0]
            flux = dataset[data_name][:, 1]
            if len(category_wavelength) == 0:
                category_wavelength = wavelength
                category_flux = flux
            else:
                category_wavelength = np.vstack([category_wavelength, wavelength])
                category_flux = np.vstack([category_flux, flux])
            if len(all_wavelength) == 0:
                all_wavelength = wavelength
                all_flux = flux
            else:
                all_wavelength = np.vstack([all_wavelength, wavelength])
                all_flux = np.vstack([all_flux, flux])
        all_keys += dataset.keys()
        data_mat['wavelength'] = category_wavelength
        data_mat['flux'] = category_flux
        data_mat['keys'] = np.array(dataset.keys(), dtype = str)
        data_matrix[data_category] = data_mat
        dataset.close()
    data_mat = {}
    data_mat['wavelength'] = all_wavelength
    data_mat['flux'] = all_flux
    data_mat['keys'] = np.array(all_keys, dtype = str)
    data_matrix['all'] = data_mat
    return data_matrix

示例#12

0

显示文件

文件： deredshift.py 项目： leonliang10/PCA

def run(category = None):
	'''
	run() deredshifts all raw data from each category based on 
	z value found and outputs to HDF5 File

	Parameter
	---------
	category : list of category to deredshift
	'''
	data_path = get.data('raw', category)
	object_z_file = get.z_value()
	object_names, z_values = extract_z_values(object_z_file)
	for data_file in data_path:
		filename = data_file.split('/')
		data_category = filename[1]
		data_name = filename[len(filename)-1]
		name = data_name.split('.')[0]
		spectrum = np.loadtxt(data_file)
		wavelength = spectrum[:, 0]
		rest_of_spectrum = spectrum[:, 1:]
		z_value = None
		for j in range(len(object_names)):
			if (name.find(object_names[j]) != -1):
				z_value = z_values[j]
				break
		if z_value != None:
			deredshift_wavelength = wavelength/(1 + z_value)
			deredshift_spectrum = deredshift_wavelength
			[rows, columns] = rest_of_spectrum.shape
			for i in range(columns):
				deredshift_spectrum = np.vstack([deredshift_spectrum, rest_of_spectrum[:,i]])
			deredshift_spectrum = deredshift_spectrum.T
			data_filename = data_category + '_' + 'deredshift'
			convert_HDF5.write(data_category, str(data_name), data_filename, deredshift_spectrum)
		else:
			print 'Cannot find z value for ' + str(data_name)