def run(min_wave = 4000, max_wave = 8000, category = None): ''' run() trims all input category's deredshifted data based on the minimum and maximum wavelength and ouputs as HDF5 file Parameters ---------- min_wave : int indicated minimum wavelength range max_wave : int indicated maximum wavelength range category : list of categories to trim ''' data_path = get.data('deredshift', category) for data_file in data_path: dataset = h5py.File(data_file, 'r') data_category = data_file.split('/')[1] data_type = data_category + '_' + 'trim' for data_name in dataset: spectrum = dataset[data_name][:] wavelength = spectrum[:,0] if (min(wavelength) > min_wave) and (max(wavelength) < max_wave): convert_HDF5.write(data_category, str(data_name), data_type, spectrum) continue [num_wave,] = wavelength.shape for i in range(num_wave): if wavelength[i] >= min_wave: min_range_start = i break for j in xrange(num_wave-1, min_range_start, -1): if wavelength[j] <= max_wave: max_range_start = j break trimmed_spectrum = spectrum[min_range_start:max_range_start+1,:] convert_HDF5.write(data_category, str(data_name), data_type, trimmed_spectrum)
def run(min_wave = 4000, max_wave = 8000, n_rebin = 2000, category = None, rebin_type = 'log'): ''' rebin each of the trimmed data in the category to desired number of points Parameters ---------- min_wave : int indicating the minimum wavelength range max_wave : int indicating the maximum wavelength range n_rebin : int indicatinng the number of points wanted for rebin category : list of strings of category for rebinning rebin_type : string indicating the type of rebin wanted (log or linear) ''' f_x = interpolation(min_wave, max_wave, category) data_path = get.data('demean', category) for data_file in data_path: dataset = h5py.File(data_file, 'r') data_category = data_file.split('/')[1] for data_name in dataset: if rebin_type == 'linear': new_wavelength = np.linspace(min_wave, max_wave, num = n_rebin, endpoint = False) else: new_wavelength = np.logspace(np.log10(min_wave), np.log10(max_wave), num = n_rebin, endpoint = False) f = f_x[str(data_name)] new_flux = f(new_wavelength) new_rebin_data = np.vstack([new_wavelength, new_flux]).T data_filename = data_category + '_rebin_' + rebin_type convert_HDF5.write(data_category, str(data_name), data_filename, new_rebin_data)
def demean_flux(category = None): data_path = get.data('trim', category) for data_file in data_path: dataset = h5py.File(data_file, 'r') data_category = data_file.split('/')[1] for data_name in dataset: name = str(data_name.split('.')[0]) spectrum = dataset[data_name][:,:] wavelength = spectrum[:, 0] flux = spectrum[:, 1] demeaned_flux = demeaning(flux) demeaned_spectrum = np.vstack([wavelength, demeaned_flux]) [nrows, ncolumns] = spectrum.shape rest_of_spectrum = None if ncolumns > 2: rest_of_spectrum = spectrum[:, 2:] [nrows, ncolumns] = rest_of_spectrum.shape for i in range(ncolumns): demeaned_spectrum = np.vstack([demeaned_spectrum, rest_of_spectrum[:,i]]) demeaned_spectrum = demeaned_spectrum.T data_filename = data_category + '_' + 'demean' convert_HDF5.write(data_category, str(data_name), data_filename, demeaned_spectrum)
def run(category = None): ''' run() deredshifts all raw data from each category based on z value found and outputs to HDF5 File Parameter --------- category : list of category to deredshift ''' data_path = get.data('raw', category) object_z_file = get.z_value() object_names, z_values = extract_z_values(object_z_file) for data_file in data_path: filename = data_file.split('/') data_category = filename[1] data_name = filename[len(filename)-1] name = data_name.split('.')[0] spectrum = np.loadtxt(data_file) wavelength = spectrum[:, 0] rest_of_spectrum = spectrum[:, 1:] z_value = None for j in range(len(object_names)): if (name.find(object_names[j]) != -1): z_value = z_values[j] break if z_value != None: deredshift_wavelength = wavelength/(1 + z_value) deredshift_spectrum = deredshift_wavelength [rows, columns] = rest_of_spectrum.shape for i in range(columns): deredshift_spectrum = np.vstack([deredshift_spectrum, rest_of_spectrum[:,i]]) deredshift_spectrum = deredshift_spectrum.T data_filename = data_category + '_' + 'deredshift' convert_HDF5.write(data_category, str(data_name), data_filename, deredshift_spectrum) else: print 'Cannot find z value for ' + str(data_name)
def save_pca(data_matrix): for data_category in data_matrix: data_filename = data_category + "_pca" if data_category == 'all': convert_HDF5.write(data_category, 'U', data_filename, data_matrix[data_category]['svd']['U']) convert_HDF5.write(data_category, 'U_reduced', data_filename, data_matrix[data_category]['svd']['U_reduced']) convert_HDF5.write(data_category, 'S', data_filename, data_matrix[data_category]['svd']['S']) convert_HDF5.write(data_category, 'V', data_filename, data_matrix[data_category]['svd']['V']) convert_HDF5.write(data_category, 'wavelength', data_filename, data_matrix[data_category]['wavelength']) convert_HDF5.write(data_category, 'flux', data_filename, data_matrix[data_category]['flux']) convert_HDF5.write(data_category, 'keys', data_filename, data_matrix[data_category]['keys']) convert_HDF5.write(data_category, 'coefficients_normal', data_filename, data_matrix[data_category]['coefficients']['normal']) convert_HDF5.write(data_category, 'coefficients_reduced', data_filename, data_matrix[data_category]['coefficients']['reduced']) convert_HDF5.write(data_category, 'K_normal', data_filename, data_matrix[data_category]['K']['normal']) convert_HDF5.write(data_category, 'K_reduced', data_filename, data_matrix[data_category]['K']['reduced'])