def coefficients(category = None, rebin_type = 'log', n = 80, legend = True, save = True, show = False): data_path = get.data('pca', category) mkdir.plots(category = 'all', data_type = 'pca/coefficients') for i in range(n): x = np.zeros([100]) k = 0 plt.figure() plt.grid() for data_file in data_path: data_category = data_file.split('/')[1] dataset = h5py.File(data_file, 'r') coefficients_normal = dataset['coefficients_normal'] [m,n] = coefficients_normal.shape plt.scatter(x[:n], coefficients_normal[i,:], color = COLORS[k%len(COLORS)], label = data_category) x += 1 k += 1 dataset.close() plt.scatter(x[0] + 2, np.array([0]), color = 'white') plt.title('coefficient ' + str(i)) if legend: plt.legend() if save: name = 'supernova_data/all/plots/pca/coefficients/coefficient_' + str(i) + '.eps' plt.savefig(name, format='eps', dpi = 3500) if show: plt.show() plt.close()
def run(min_wave = 4000, max_wave = 8000, category = None): ''' run() trims all input category's deredshifted data based on the minimum and maximum wavelength and ouputs as HDF5 file Parameters ---------- min_wave : int indicated minimum wavelength range max_wave : int indicated maximum wavelength range category : list of categories to trim ''' data_path = get.data('deredshift', category) for data_file in data_path: dataset = h5py.File(data_file, 'r') data_category = data_file.split('/')[1] data_type = data_category + '_' + 'trim' for data_name in dataset: spectrum = dataset[data_name][:] wavelength = spectrum[:,0] if (min(wavelength) > min_wave) and (max(wavelength) < max_wave): convert_HDF5.write(data_category, str(data_name), data_type, spectrum) continue [num_wave,] = wavelength.shape for i in range(num_wave): if wavelength[i] >= min_wave: min_range_start = i break for j in xrange(num_wave-1, min_range_start, -1): if wavelength[j] <= max_wave: max_range_start = j break trimmed_spectrum = spectrum[min_range_start:max_range_start+1,:] convert_HDF5.write(data_category, str(data_name), data_type, trimmed_spectrum)
def pcomponents(category = None, components = [[0,1]], legend = True, save = True, show = False): data_path = get.data('pca', category) mkdir.plots(category = 'all', data_type = 'pca/pcomponents') for component in components: k = 0 plots = [] plot_names = [] plt.figure() plt.grid() i = component[0] j = component[1] for data_file in data_path: data_category = data_file.split('/')[1] dataset = h5py.File(data_file, 'r') coefficients_reduced = dataset['coefficients_reduced'][:] cx = coefficients_reduced[i,:] cy = coefficients_reduced[j,:] p = plt.scatter(cx, cy, color = COLORS[k%len(COLORS)], label = category) plots.append(p) plot_names.append(data_category) k += 1 if legend: plt.legend(plot_names, loc='right', bbox_to_anchor = (1.1, 0.2), fancybox = True) plt.grid() plt.xlabel('c' + str(i)) plt.ylabel('c' + str(j)) plt.title('c' + str(i) + ' vs ' + 'c' + str(j)) if save: name = 'supernova_data/all/plots/pca/pcomponents/' + 'c' + str(i) + '_vs_' + 'c' + str(j) + '.eps' plt.savefig(name, format='eps', dpi = 3500) if show: plt.show() plt.close()
def run(min_wave = 4000, max_wave = 8000, n_rebin = 2000, category = None, rebin_type = 'log'): ''' rebin each of the trimmed data in the category to desired number of points Parameters ---------- min_wave : int indicating the minimum wavelength range max_wave : int indicating the maximum wavelength range n_rebin : int indicatinng the number of points wanted for rebin category : list of strings of category for rebinning rebin_type : string indicating the type of rebin wanted (log or linear) ''' f_x = interpolation(min_wave, max_wave, category) data_path = get.data('demean', category) for data_file in data_path: dataset = h5py.File(data_file, 'r') data_category = data_file.split('/')[1] for data_name in dataset: if rebin_type == 'linear': new_wavelength = np.linspace(min_wave, max_wave, num = n_rebin, endpoint = False) else: new_wavelength = np.logspace(np.log10(min_wave), np.log10(max_wave), num = n_rebin, endpoint = False) f = f_x[str(data_name)] new_flux = f(new_wavelength) new_rebin_data = np.vstack([new_wavelength, new_flux]).T data_filename = data_category + '_rebin_' + rebin_type convert_HDF5.write(data_category, str(data_name), data_filename, new_rebin_data)
def interpolation(min_wave, max_wave, category = None): ''' generates the interpolation function Parameters ---------- min_wave : int indicating the minimum wavelength range max_wave : int indicating the maximum wavelength range category : list of strings of category for rebinning Returns ------- f_x : list of interpolation functions for eahc of the dataset ''' data_path = get.data('demean', category) f_x = {} for data_file in data_path: dataset = h5py.File(data_file, 'r') for data_name in dataset: spectrum = dataset[data_name][:,:] wavelength = spectrum[:,0] flux = spectrum[:,1] nonzeros = np.where(wavelength) wavelength = wavelength[nonzeros] flux = flux[nonzeros] [num_waves,] = wavelength.shape f = interpolate.interp1d(wavelength, flux, bounds_error = False, fill_value = 0) f_x[data_name] = f return f_x
def K_reduced(category = None, data_file = None, legend = True, save = True, show = False): mkdir.plots(category = 'all', data_type = 'pca/K_reduced') data_path = get.data('pca', 'all') pca_dataset = h5py.File(data_path[0], 'r') specific_spectrum_index = np.where(pca_dataset['keys'][:] == data_file)[0] if specific_spectrum_index.shape[0] == 0: print 'Cannot find specific spectrum entered.' sys.exit() specific_spectrum_index = specific_spectrum_index[0] wavelength = pca_dataset['wavelength'][specific_spectrum_index,:] flux = pca_dataset['flux'][specific_spectrum_index,:] U = pca_dataset['U'][:,:] U_reduced = np.zeros(U.shape) for i in range(7): offset = 0 for j in range(i+1): U_reduced[:,j] = U[:,j] coefficients_reduced = (flux.dot(U_reduced)).T K_reduced = (U_reduced.dot(coefficients_reduced)).T plt.figure() plt.grid() plt.plot(wavelength, flux, label = category) plt.plot(wavelength, K_reduced, label = 'sum(0:' + str(i) + ')', color = 'red') previous_single_K_reduced = 0 for k in range(i+1): single_coefficients_reduced = (flux.dot(U_reduced[:,k])).T single_K_reduced = (U_reduced[:,k].dot(single_coefficients_reduced)).T if k == 0: offset += max(single_K_reduced) - min(min(K_reduced), min(flux)) else: offset += max(single_K_reduced) - min(min(previous_single_K_reduced), min(flux)) # offset += offset/50 plt.plot(wavelength, single_K_reduced - offset, label = str(k), color = 'black') previous_single_K_reduced = single_K_reduced plt.xlabel('wavelength') plt.ylabel('flux') plt.title(category + '/' + data_file) if legend: plt.legend() if save: name = 'supernova_data/all/plots/pca/K_reduced/' + category + '_' + str(i) + '.eps' plt.savefig(name, format='eps', dpi = 3500) np.savetxt('supernova_data/all/plots/pca/K_reduced/' + category + '_coefficients_reduced.txt', coefficients_reduced[:6]) if show: plt.show() plt.close()
def raw(category = None): data_path = get.data('raw', category) mkdir.plots(category = None, data_type = 'raw') for data_name in data_path: spectrum = np.loadtxt(data_name) wavelength = spectrum[:,0] flux = spectrum[:,1] plt.figure() plt.grid() plt.plot(wavelength, flux) name = data_name.split('/')[4] plt.title(name) plt.xlabel('wavelength') plt.ylabel('flux') data_category = data_name.split('/')[1] filename = 'supernova_data/' + data_category + '/plots/raw/' + name + '.eps' plt.savefig(filename, format='eps', dpi = 3500) plt.close()
def rebin(category = None, rebin_type = 'log'): data_path = get.data('rebin', category) mkdir.plots(category = None, dat_type = 'rebin_' + rebin_type) for data_file in data_path: data_category = data_file.split('/')[1] dataset = h5py.File(data_file, 'r') for data_name in dataset: wavelength = dataset[data_name][:, 0] flux = dataset[data_name][:, 1] plt.figure() plt.grid() plt.plot(wavelength, flux) plt.xlabel('wavelength') plt.ylabel('flux') plt.title(data_name) filename = 'supernova_data/' + data_category + '/plots/rebin_' + rebin_type + '/' + data_name + '.eps' plt.savefig(filename, format='eps', dpi = 3500) plt.close() dataset.close()
def U_matrix(category = None, legend = True, save = True, show = False): data_path = get.data('pca', 'all') mkdir.plots(category = 'all', data_type = 'pca/U') mkdir.plots(category = 'all', data_type = 'pca/individual_U') wavelength = np.linspace(4000, 8000, 2000) dataset = h5py.File(data_path[0], 'r') U = dataset['U'] for i in range(2000): plt.figure() p = plt.plot(wavelength, U[:,i]) plt.grid() plt.xlabel('wavelength') plt.ylabel('U[:,' + str(i) + ']') plt.title('column ' + str(i) + ' of U') if save: name = 'supernova_data/all/plots/pca/individual_U/column_' + str(i) + '_of_U.eps' plt.savefig(name, format='eps', dpi = 3500) if show: plt.show() plt.close() for j in range(0,2000,5): plt.figure() plot_names = [] offset = 0 for k in range(5): p = plt.plot(wavelength, U[:,j+k] + offset, color = COLORS[k], label = str(j + k)) offset += max(U[:,j+k]) + 0.2 plot_names.append(str(j + k)) plt.grid() plt.xlabel('wavelength') plt.ylabel('U[:,i]') plt.title('columns ' + str(j) + ' ' + str(j+1) + ' ' + str(j+2) + ' ' + str(j+3) + ' ' + str(j+4) + ' of U') if legend: plt.legend(plot_names, loc='right', bbox_to_anchor = (1.1, 0.2), fancybox = True) if save: name = 'supernova_data/all/plots/pca/U/columns_' + str(j) + '-' + str(j+4) + '_of_U.eps' plt.savefig(name, format='eps', dpi = 3500) if show: plt.show() plt.close()
def demean_flux(category = None): data_path = get.data('trim', category) for data_file in data_path: dataset = h5py.File(data_file, 'r') data_category = data_file.split('/')[1] for data_name in dataset: name = str(data_name.split('.')[0]) spectrum = dataset[data_name][:,:] wavelength = spectrum[:, 0] flux = spectrum[:, 1] demeaned_flux = demeaning(flux) demeaned_spectrum = np.vstack([wavelength, demeaned_flux]) [nrows, ncolumns] = spectrum.shape rest_of_spectrum = None if ncolumns > 2: rest_of_spectrum = spectrum[:, 2:] [nrows, ncolumns] = rest_of_spectrum.shape for i in range(ncolumns): demeaned_spectrum = np.vstack([demeaned_spectrum, rest_of_spectrum[:,i]]) demeaned_spectrum = demeaned_spectrum.T data_filename = data_category + '_' + 'demean' convert_HDF5.write(data_category, str(data_name), data_filename, demeaned_spectrum)
def form_matrix(category = None, rebin_type = 'log'): data_path = get.data('rebin', category, rebin_type) data_matrix = {} all_wavelength = np.array([], dtype = np.float64) all_flux = np.array([], dtype = np.float64) all_keys = [] for data_file in data_path: data_mat = {} category_wavelength = np.array([], dtype = np.float64) category_flux = np.array([], dtype = np.float64) data_category = data_file.split('/')[1] dataset = h5py.File(data_file, 'r') for data_name in dataset: wavelength = dataset[data_name][:, 0] flux = dataset[data_name][:, 1] if len(category_wavelength) == 0: category_wavelength = wavelength category_flux = flux else: category_wavelength = np.vstack([category_wavelength, wavelength]) category_flux = np.vstack([category_flux, flux]) if len(all_wavelength) == 0: all_wavelength = wavelength all_flux = flux else: all_wavelength = np.vstack([all_wavelength, wavelength]) all_flux = np.vstack([all_flux, flux]) all_keys += dataset.keys() data_mat['wavelength'] = category_wavelength data_mat['flux'] = category_flux data_mat['keys'] = np.array(dataset.keys(), dtype = str) data_matrix[data_category] = data_mat dataset.close() data_mat = {} data_mat['wavelength'] = all_wavelength data_mat['flux'] = all_flux data_mat['keys'] = np.array(all_keys, dtype = str) data_matrix['all'] = data_mat return data_matrix
def run(category = None): ''' run() deredshifts all raw data from each category based on z value found and outputs to HDF5 File Parameter --------- category : list of category to deredshift ''' data_path = get.data('raw', category) object_z_file = get.z_value() object_names, z_values = extract_z_values(object_z_file) for data_file in data_path: filename = data_file.split('/') data_category = filename[1] data_name = filename[len(filename)-1] name = data_name.split('.')[0] spectrum = np.loadtxt(data_file) wavelength = spectrum[:, 0] rest_of_spectrum = spectrum[:, 1:] z_value = None for j in range(len(object_names)): if (name.find(object_names[j]) != -1): z_value = z_values[j] break if z_value != None: deredshift_wavelength = wavelength/(1 + z_value) deredshift_spectrum = deredshift_wavelength [rows, columns] = rest_of_spectrum.shape for i in range(columns): deredshift_spectrum = np.vstack([deredshift_spectrum, rest_of_spectrum[:,i]]) deredshift_spectrum = deredshift_spectrum.T data_filename = data_category + '_' + 'deredshift' convert_HDF5.write(data_category, str(data_name), data_filename, deredshift_spectrum) else: print 'Cannot find z value for ' + str(data_name)