def setUp(self): """ Create test Spectrum objects. """ self._size = 50 self._observed_raster = np.arange(self._size, dtype=np.float64) self._observed_values = np.arange(self._size, dtype=np.float64) self._observed_value_errors = np.ones(self._size, dtype=np.float64) self._observed = fourgp_speclib.Spectrum( wavelengths=self._observed_raster, values=self._observed_values, value_errors=self._observed_value_errors, metadata={"origin": "unit-test"}) self._absorption_values = np.ones(self._size, dtype=np.float64) self._absorption_value_errors = np.ones(self._size, dtype=np.float64) self._absorption = fourgp_speclib.Spectrum( wavelengths=self._observed_raster, values=self._absorption_values, value_errors=self._absorption_value_errors, metadata={"origin": "unit-test"}) self._polynomial = fourgp_speclib.SpectrumSmoothFactory( function_family=fourgp_speclib.SpectrumPolynomial, wavelengths=self._observed_raster, terms=3)
def test_search_1d_numerical_range(self): """ Check that we can search for spectra on a simple metadata numerical range constraint. """ # Insert ten random spectra into SpectrumLibrary size = 50 x_values = list(range(10)) for x in x_values: input_spectrum = fourgp_speclib.Spectrum( wavelengths=np.arange(size), values=np.random.random(size), value_errors=np.random.random(size), metadata={ "origin": "unit-test", "x_value": x }) self._lib.insert(input_spectrum, "x_{}".format(x)) # Search for spectra with x in a defined range x_range = [4.5, 8.5] x_values_expected = [ x for x in x_values if (x > x_range[0] and x < x_range[1]) ] my_spectra = self._lib.search(x_value=x_range) ids = [str(item["specId"]) for item in my_spectra] metadata = self._lib.get_metadata(ids=ids) x_values = [item['x_value'] for item in metadata] # Check that we got back the same spectrum we put in self.assertEqual(x_values, x_values_expected)
def test_spectrum_retrieval(self): """ Check that we can store a single spectra into the SpectrumLibrary and retrieve it again. """ # Create a random spectrum to insert into the spectrum library size = 50 raster = np.arange(size) values = np.random.random(size) value_errors = np.random.random(size) input_spectrum = fourgp_speclib.Spectrum( wavelengths=raster, values=values, value_errors=value_errors, metadata={"origin": "unit-test"}) # Insert it into the spectrum library self._lib.insert(input_spectrum, "dummy_filename") # Load it back as a SpectrumArray my_spectra = self._lib.search() my_spectrum_array = self._lib.open(filenames=my_spectra[0]['filename']) # Pick spectrum out of SpectrumArray my_spectrum = my_spectrum_array.extract_item(0) # Check that we got back the same spectrum we put in self.assertEqual(my_spectrum, input_spectrum)
def test_search_1d_string_value(self): """ Check that we can search for spectra on a simple metadata string point-value constraint. """ # Insert random spectra into SpectrumLibrary alphabet = "abcdefghijklmnopqrstuvwxyz" size = 50 x_values = list(range(10)) for x in x_values: input_spectrum = fourgp_speclib.Spectrum( wavelengths=np.arange(size), values=np.random.random(size), value_errors=np.random.random(size), metadata={ "origin": "unit-test", "x_value": alphabet[x:x + 3] }) self._lib.insert(input_spectrum, "x_{}".format(x)) # Search for spectra with matching x_value my_spectra = self._lib.search(x_value="def") filenames_got = [str(item["filename"]) for item in my_spectra] x_values_got = [ str(i["x_value"]) for i in self._lib.get_metadata(filenames=filenames_got) ] x_values_got.sort() # Check that we got back the same spectrum we put in self.assertEqual(x_values_got, ["def"])
def test_search_1d_numerical_value(self): """ Check that we can search for spectra on a simple metadata numerical point-value constraint. """ # Insert ten random spectra into SpectrumLibrary size = 50 x_values = list(range(10)) for x in x_values: input_spectrum = fourgp_speclib.Spectrum( wavelengths=np.arange(size), values=np.random.random(size), value_errors=np.random.random(size), metadata={ "origin": "unit-test", "x_value": x }) self._lib.insert(input_spectrum, "x_{}".format(x)) # Search for spectra with matching x_value my_spectra = self._lib.search(x_value=5) ids = [str(item["specId"]) for item in my_spectra] metadata = self._lib.get_metadata(ids=ids) x_values = [item['x_value'] for item in metadata] # Check that we got back the same spectrum we put in self.assertEqual(x_values, [5])
def upsample_spectrum(self, input, upsampling_factor): """ Upsample a spectrum object using cubic spline interpolation. :param input: The Spectrum object we should up sample :param upsampling_factor: The integer factor by which to up-sample the spectrum :return: An up-sampled Spectrum object """ multiplicative_spacing_in = input.wavelengths[1] / input.wavelengths[0] multiplicative_spacing_out = pow(multiplicative_spacing_in, 1. / upsampling_factor) # We impose an explicit length on the output, because the arange() here is numerically unstable about whether # it includes the final point or not raster_in_length = len(input.wavelengths) raster_out_length = (raster_in_length - 1) * upsampling_factor raster_out = logarithmic_raster(lambda_min=input.wavelengths[0], lambda_max=input.wavelengths[-1], lambda_step=input.wavelengths[0] * (multiplicative_spacing_out - 1) )[:raster_out_length] f = InterpolatedUnivariateSpline(x=input.wavelengths, y=input.values) return fourgp_speclib.Spectrum(wavelengths=raster_out, values=f(raster_out), value_errors=np.zeros_like(raster_out), metadata=input.metadata )
def test_addition_multiplication(self): """ Try adding spectra together repeatedly using the __sum__ and __isum__ methods. Check that this is the same as multiplying the spectrum by a fixed integer. """ # Create an empty numpy array to insert raster of multipliers into multiplier = np.empty(self._size) failures = 0 for i in range(2, 5): sum_1 = fourgp_speclib.Spectrum(wavelengths=self._raster, values=np.zeros(self._size), value_errors=self._value_errors) sum_2 = fourgp_speclib.Spectrum(wavelengths=self._raster, values=np.zeros(self._size), value_errors=self._value_errors) # Test __add__ method for j in range(i): sum_1 = sum_1 + self._spectrum # Test __iadd__ method for j in range(i): sum_2 += self._spectrum # Test __mul__ method multiplier.fill(i) b = fourgp_speclib.Spectrum(wavelengths=self._raster, values=multiplier, value_errors=self._value_errors) sum_3 = b * self._spectrum # Test __imul__ method b *= self._spectrum # Check that all three calculations reached the same result if sum_1 != sum_2: failures += 1 if sum_1 != sum_3: failures += 1 if sum_1 != b: failures += 1 del sum_1, sum_2, sum_3, b # Check that none of the calculations failed self.assertEqual(failures, 0)
def test_data_sizes_must_match_2(self): with self.assertRaises(AssertionError): raster = np.arange(self._size + 10, dtype=np.float64) other = fourgp_speclib.Spectrum(wavelengths=raster, values=raster, value_errors=raster) self._polynomial.fit_to_continuum_via_template( other=other, template=self._absorption)
def test_subtraction_division(self): """ Try subtracting a spectrum from zero N times. Then divide by minus N times, and ensure we get back to where we started. """ # Create an empty numpy array to insert raster of multipliers into multiplier = np.empty(self._size) failures = 0 for i in range(2, 5): sum_1 = fourgp_speclib.Spectrum(wavelengths=self._raster, values=np.zeros(self._size), value_errors=self._value_errors) sum_2 = fourgp_speclib.Spectrum(wavelengths=self._raster, values=np.zeros(self._size), value_errors=self._value_errors) # Test __sub__ method for j in range(i): sum_1 = sum_1 - self._spectrum # Test __isub__ method for j in range(i): sum_2 -= self._spectrum # Test __truediv__ method multiplier.fill(-i) b = fourgp_speclib.Spectrum(wavelengths=self._raster, values=multiplier, value_errors=self._value_errors) sum_3 = sum_1 / b sum_4 = sum_2 / b # Test __itruediv__ method sum_1 /= b sum_2 /= b # Check that all three calculations reached the same result for item in [sum_1, sum_2, sum_3, sum_4]: if item != self._spectrum: failures += 1 del sum_1, sum_2, sum_3, sum_4, b # Check that none of the calculations failed self.assertEqual(failures, 0)
def setUp(self): """ Create a Spectrum object. """ self._size = 50 self._raster = np.arange(self._size) self._values = np.arange(100, self._size + 100) self._value_errors = np.random.random(self._size) self._spectrum = fourgp_speclib.Spectrum( wavelengths=self._raster, values=self._values, value_errors=self._value_errors, metadata={"origin": "unit-test"})
def test_search_illegal_metadata(self): """ Check that we can search for spectra on a simple metadata constraint. """ # Insert ten random spectra into SpectrumLibrary size = 50 input_spectrum = fourgp_speclib.Spectrum( wavelengths=np.arange(size), values=np.random.random(size), value_errors=np.random.random(size), metadata={"origin": "unit-test"}) self._lib.insert(input_spectrum, "dummy_filename") # Search on an item of metadata which doesn't exist with self.assertRaises(AssertionError): self._lib.search(x_value=23)
def fit_spectrum(self, spectrum): """ Fit stellar labels to a spectrum which has not been continuum normalised. :param spectrum: A Spectrum object containing the spectrum for the Cannon to fit. :type spectrum: Spectrum :return: """ assert isinstance(spectrum, fourgp_speclib.Spectrum), \ "Supplied spectrum for the Cannon to fit is not a Spectrum object." assert spectrum.raster_hash == self._training_set.raster_hash, \ "Supplied spectrum for the Cannon to fit is not sampled on the same raster as the training set." if self._debugging: self._debugging_output_counter += 1 # Fitting tolerances max_iterations = 20 # Iterate a maximum number of times # Work out the raster of pixels inside each wavelength arm raster = spectrum.wavelengths lower_cut = 0 arm_rasters = [] for break_point in self._wavelength_arms: arm_rasters.append((raster >= lower_cut) * (raster < break_point)) lower_cut = break_point arm_rasters.append(raster >= lower_cut) # Make initial continuum mask, which covers entire spectrum continuum_mask = np.ones_like(raster, dtype=bool) # Begin iterative fitting of continuum iteration = 0 while True: iteration += 1 # Treat each wavelength arm separately. continuum_models = [] for i, arm_raster in enumerate(arm_rasters): # Make a mask of pixels which are both continuum and inside this wavelength arm pixel_mask = (arm_raster * continuum_mask * np.isfinite(spectrum.value_errors) * (spectrum.value_errors > 0)) # logger.info("Continuum pixels in arm {}: {} / {}".format(i, sum(pixel_mask), len(pixel_mask))) continuum_raster = raster[pixel_mask] continuum_values = spectrum.values[pixel_mask] continuum_value_errors = spectrum.value_errors[pixel_mask] # Make a new spectrum object containing only continuum pixels inside this wavelength arm continuum_spectrum = fourgp_speclib.Spectrum( wavelengths=continuum_raster, values=continuum_values, value_errors=continuum_value_errors, ) # logger.info("Continuum spectrum length: {}".format(len(continuum_spectrum))) # Fit a smooth function through these pixels continuum_model_factory = fourgp_speclib.SpectrumSmoothFactory( function_family=self._continuum_model_family, wavelengths=continuum_raster) continuum_smooth = continuum_model_factory.fit_to_continuum_via_mask( other=continuum_spectrum, mask=np.ones_like(continuum_raster, dtype=bool)) if isinstance(continuum_smooth, str): logger.info(continuum_smooth) return None, None, None, None, None, None # logger.info("Best-fit polynomial coefficients: {}".format(continuum_smooth.coefficients)) # Resample smooth function onto the full raster of pixels within this wavelength arm resampler = SpectrumResampler(input_spectrum=continuum_smooth) continuum_models.append( resampler.onto_raster(raster[arm_raster])) # Splice together the continuum in all the wavelength arms continuum_model = fourgp_speclib.spectrum_splice(*continuum_models) # Create continuum-normalised spectrum using the continuum model we've just made cn_spectrum = spectrum / continuum_model # Run the Cannon labels, cov, meta = super(CannonInstanceCaseyNewWithContinuumNormalisation, self). \ fit_spectrum(spectrum=cn_spectrum) # Fetch the Cannon's model spectrum model = fourgp_speclib.Spectrum( wavelengths=raster, values=self._model.predict(labels=labels), value_errors=np.zeros_like(raster)) # Make new model of which pixels are continuum (based on Cannon's template being close to one) continuum_mask = (model.values > 0.99) * (model.values < 1.01) logger.info("Continuum pixels: {} / {}".format( sum(continuum_mask), len(continuum_mask))) logger.info("Best-fit labels: {}".format(list(labels[0]))) # Produce debugging output if requested if self._debugging: np.savetxt( "/tmp/debug_{:06d}_{:03d}.txt".format( self._debugging_output_counter, iteration), np.transpose([ raster, spectrum.values, spectrum.value_errors, continuum_model.values, model.values, continuum_mask ])) # Decide whether output is good enough for us to stop iterating if iteration >= max_iterations: break return labels, cov, meta
def normalise(self, spectrum): """ This is a hook for doing some kind of normalisation on spectra. Not implemented in this base class. :param spectrum: The spectrum to be normalised. :return: Normalised version of this spectrum. """ # If we're passed a spectrum array, normalise each spectrum in turn if isinstance(spectrum, fourgp_speclib.SpectrumArray): l = len(spectrum) for i in range(l): spectrum_item = spectrum.extract_item(i) spectrum_normalised = self.normalise(spectrum_item) spectrum_item.values[:] = spectrum_normalised.values spectrum_item.value_errors[:] = spectrum_normalised.value_errors return spectrum assert isinstance(spectrum, fourgp_speclib.Spectrum), \ "The CannonInstance.normalise method requires a Spectrum object as input." if self._debugging: self._debugging_output_counter += 1 # Returns an array of length len(x)-(N-1) def running_mean(x, n): cumulative_sum = np.cumsum(np.insert(x, 0, 0)) return (cumulative_sum[n:] - cumulative_sum[:-n]) / float(n) # Work out the raster of pixels inside each wavelength arm raster = spectrum.wavelengths lower_cut = 0 arm_rasters = [] for break_point in self._wavelength_arms: arm_rasters.append((raster >= lower_cut) * (raster < break_point)) lower_cut = break_point arm_rasters.append(raster >= lower_cut) output_wavelengths = [] output_values = [] output_value_errors = [] for arm in arm_rasters: output_wavelengths.append(raster[arm]) input_values = spectrum.values[arm] input_errors = spectrum.value_errors[arm] normalisation = running_mean(input_values, self._window_width) padding_needed = len(input_values) - len(normalisation) padding_left = int(padding_needed / 2) padding_right = padding_needed - padding_left normalisation_full = np.concatenate([ np.repeat(normalisation[0], padding_left), normalisation, np.repeat(normalisation[-1], padding_right) ]) output_values.append(input_values / normalisation_full) output_value_errors.append(input_errors / normalisation_full) output = fourgp_speclib.Spectrum( wavelengths=np.concatenate(output_wavelengths), values=np.concatenate(output_values), value_errors=np.concatenate(output_value_errors), metadata=spectrum.metadata) # Produce debugging output if requested if self._debugging: np.savetxt( "/tmp/debug_{:06d}.txt".format(self._debugging_output_counter), np.transpose([raster, spectrum.values, spectrum.value_errors])) return output
def test_data_sizes_must_match_3(self): with self.assertRaises(AssertionError): fourgp_speclib.Spectrum(wavelengths=self._raster, values=self._values, value_errors=np.arange(self._size + 1))