def _step1d(self, steps1): log_likes = np.zeros_like(steps1) with progress_bar(len(steps1), title='Profiling likelihood') as p: for i, step in enumerate(steps1): if self._n_free_parameters > 0: # Profile out the free parameters self._wrapper.set_fixed_values(step) _, this_log_like = self._optimizer.minimize( compute_covar=False) else: # No free parameters, just compute the likelihood this_log_like = self._function(step) log_likes[i] = this_log_like p.increase() return log_likes
def _get_errors(self): """ Override this method if the minimizer provide a function to get all errors at once. If instead it provides a method to get one error at the time, override the _get_one_error method :return: a ordered dictionary parameter_path -> (negative_error, positive_error) """ # TODO: options for other significance levels target_delta_log_like = 0.5 errors = collections.OrderedDict() with progress_bar(2 * len(self.parameters), title='Computing errors') as p: for parameter_name in self.parameters: negative_error = self._get_one_error(parameter_name, target_delta_log_like, -1) p.increase() positive_error = self._get_one_error(parameter_name, target_delta_log_like, +1) p.increase() errors[parameter_name] = (negative_error, positive_error) return errors
def _step1d(self, steps1): log_likes = np.zeros_like(steps1) with progress_bar(len(steps1), title='Profiling likelihood') as p: for i, step in enumerate(steps1): if self._n_free_parameters > 0: # Profile out the free parameters self._wrapper.set_fixed_values(step) _, this_log_like = self._optimizer.minimize(compute_covar=False) else: # No free parameters, just compute the likelihood this_log_like = self._function(step) log_likes[i] = this_log_like p.increase() return log_likes
def _evaluate(self): """ calculate the best or mean fit of the new function or quantity :return: """ # if there are independent variables if self._independent_variable_range: variates = [] # scroll through the independent variables n_iterations = np.product(self._out_shape) with progress_bar(n_iterations, title="Propagating errors") as p: with use_astromodels_memoization(False): for variables in itertools.product(*self._independent_variable_range): variates.append(self._propagated_function(*variables)) p.increase() # otherwise just evaluate else: variates = self._propagated_function() # create a variates container self._propagated_variates = VariatesContainer(variates, self._out_shape, self._cl, self._transform, self._equal_tailed)
def execute_with_progress_bar(self, worker, items, chunk_size=None): # Let's make a wrapper which will allow us to recover the order def wrapper(x): (id, item) = x return (id, worker(item)) items_wrapped = [(i, item) for i, item in enumerate(items)] n_iterations = len(items) with progress_bar(n_iterations) as p: amr = self._interactive_map(wrapper, items_wrapped, ordered=False, chunk_size=chunk_size) results = [] for i, res in enumerate(amr): results.append(res) p.increase() # Reorder the list according to the id return map(lambda x:x[1], sorted(results, key=lambda x:x[0]))
def execute_with_progress_bar(self, worker, items, chunk_size=None): # Let's make a wrapper which will allow us to recover the order def wrapper(x): (id, item) = x return (id, worker(item)) items_wrapped = [(i, item) for i, item in enumerate(items)] n_iterations = len(items) with progress_bar(n_iterations) as p: amr = self._interactive_map(wrapper, items_wrapped, ordered=False, chunk_size=chunk_size) results = [] for i, res in enumerate(amr): results.append(res) p.increase() # Reorder the list according to the id return list( map(lambda x: x[1], sorted(results, key=lambda x: x[0])))
def go(self, continue_on_failure=True, compute_covariance=False, verbose=False, **options_for_parallel_computation): # Generate the data frame which will contain all results if verbose: log.setLevel(logging.INFO) self._continue_on_failure = continue_on_failure self._compute_covariance = compute_covariance # let's iterate, perform the fit and fill the data frame if threeML_config['parallel']['use-parallel']: # Parallel computation client = ParallelClient(**options_for_parallel_computation) results = client.execute_with_progress_bar(self.worker, range(self._n_iterations)) else: # Serial computation results = [] with progress_bar(self._n_iterations, title='Goodness of fit computation') as p: for i in range(self._n_iterations): results.append(self.worker(i)) p.increase() assert len(results) == self._n_iterations, "Something went wrong, I have %s results " \ "for %s intervals" % (len(results), self._n_iterations) # Store the results in the data frames parameter_frames = pd.concat(map(lambda x: x[0], results), keys=range(self._n_iterations)) like_frames = pd.concat(map(lambda x: x[1], results), keys=range(self._n_iterations)) # Store a list with all results (this is a list of lists, each list contains the results for the different # iterations for the same model) self._all_results = [] for i in range(self._n_models): this_model_results = map(lambda x: x[2][i], results) self._all_results.append(AnalysisResultsSet(this_model_results)) return parameter_frames, like_frames
def go(self): if is_parallel_computation_active(): client = ParallelClient() if self._n_decs % client.get_number_of_engines() != 0: warnings.warn( "The number of Dec bands is not a multiple of the number of engine. Make it so for optimal performances.", RuntimeWarning) res = client.execute_with_progress_bar( self.worker, list(range(len(self._points))), chunk_size=self._n_ras) else: n_points = len(self._points) with progress_bar(n_points) as p: res = np.zeros(n_points) for i, point in enumerate(self._points): res[i] = self.worker(i) p.increase() TS = 2 * (-np.array(res) - self._like0) #self._debug_map = {k:v for v,k in zip(self._points, TS)} # Get maximum of TS idx = TS.argmax() self._max_ts = (TS[idx], self._points[idx]) print("Maximum TS is %.2f at (R.A., Dec) = (%.3f, %.3f)" % (self._max_ts[0], self._max_ts[1][0], self._max_ts[1][1])) self._ts_map = TS.reshape(self._n_decs, self._n_ras) return self._ts_map
def sample_with_progress(title, p0, sampler, n_samples, **kwargs): # Loop collecting n_samples samples pos, prob, state = [None, None, None] # This is only for producing the progress bar with progress_bar(n_samples, title=title) as progress: for i, result in enumerate(sampler.sample(p0, iterations=n_samples, **kwargs)): # Show progress progress.animate((i + 1)) # Get the vectors with the results pos, prob, state = result return pos, prob, state
def _step2d(self, steps1, steps2): log_likes = np.zeros((len(steps1), len(steps2))) with progress_bar(len(steps1) * len(steps2), title='Profiling likelihood') as p: for i, step1 in enumerate(steps1): for j, step2 in enumerate(steps2): if self._n_free_parameters > 0: # Profile out the free parameters self._wrapper.set_fixed_values([step1, step2]) try: _, this_log_like = self._optimizer.minimize( compute_covar=False) except FitFailed: # If the user is stepping too far it might be that the fit fails. It is usually not a # problem this_log_like = np.nan else: # No free parameters, just compute the likelihood this_log_like = self._function(step1, step2) log_likes[i, j] = this_log_like p.increase() return log_likes
def _evaluate(self): """ calculate the best or mean fit of the new function or quantity :return: """ # if there are independent variables if self._independent_variable_range: variates = [] # scroll through the independent variables n_iterations = np.product(self._out_shape) with progress_bar(n_iterations, title="Propagating errors") as p: with use_astromodels_memoization(False): for variables in itertools.product( *self._independent_variable_range): variates.append(self._propagated_function(*variables)) p.increase() # otherwise just evaluate else: variates = self._propagated_function() # create a variates container self._propagated_variates = VariatesContainer(variates, self._out_shape, self._cl, self._transform, self._equal_tailed)
def _step2d(self, steps1, steps2): log_likes = np.zeros((len(steps1), len(steps2))) with progress_bar(len(steps1) * len(steps2), title='Profiling likelihood') as p: for i, step1 in enumerate(steps1): for j,step2 in enumerate(steps2): if self._n_free_parameters > 0: # Profile out the free parameters self._wrapper.set_fixed_values([step1, step2]) try: _, this_log_like = self._optimizer.minimize(compute_covar=False) except FitFailed: # If the user is stepping too far it might be that the fit fails. It is usually not a # problem this_log_like = np.nan else: # No free parameters, just compute the likelihood this_log_like = self._function(step1, step2) log_likes[i,j] = this_log_like p.increase() return log_likes
def __init__(self, pha_file_or_instance, file_type='observed', rsp_file=None, arf_file=None): """ A spectrum with dispersion build from an OGIP-compliant PHA FITS file. Both Type I & II files can be read. Type II spectra are selected either by specifying the spectrum_number or via the {spectrum_number} file name convention used in XSPEC. If the file_type is background, a 3ML InstrumentResponse or subclass must be passed so that the energy bounds can be obtained. :param pha_file_or_instance: either a PHA file name or threeML.plugins.OGIP.pha.PHAII instance :param spectrum_number: (optional) the spectrum number of the TypeII file to be used :param file_type: observed or background :param rsp_file: RMF filename or threeML.plugins.OGIP.response.InstrumentResponse instance :param arf_file: (optional) and ARF filename """ # extract the spectrum number if needed assert isinstance(pha_file_or_instance, str) or isinstance( pha_file_or_instance, PHAII), 'Must provide a FITS file name or PHAII instance' with fits.open(pha_file_or_instance) as f: try: HDUidx = f.index_of("SPECTRUM") except: raise RuntimeError("The input file %s is not in PHA format" % (pha2_file)) spectrum = f[HDUidx] data = spectrum.data if "COUNTS" in data.columns.names: has_rates = False data_column_name = "COUNTS" elif "RATE" in data.columns.names: has_rates = True data_column_name = "RATE" else: raise RuntimeError( "This file does not contain a RATE nor a COUNTS column. " "This is not a valid PHA file") # Determine if this is a PHA I or PHA II if len(data.field(data_column_name).shape) == 2: num_spectra = data.field(data_column_name).shape[0] else: raise RuntimeError( "This appears to be a PHA I and not PHA II file") pha_information = _read_pha_or_pha2_file(pha_file_or_instance, None, file_type, rsp_file, arf_file, treat_as_time_series=True) # default the grouping to all open bins # this will only be altered if the spectrum is rebinned self._grouping = np.ones_like(pha_information['counts']) # this saves the extra properties to the class self._gathered_keywords = pha_information['gathered_keywords'] self._file_type = file_type # need to see if we have count errors, tstart, tstop # if not, we create an list of None if pha_information['count_errors'] is None: count_errors = [None] * num_spectra else: count_errors = pha_information['count_errors'] if pha_information['tstart'] is None: tstart = [None] * num_spectra else: tstart = pha_information['tstart'] if pha_information['tstop'] is None: tstop = [None] * num_spectra else: tstop = pha_information['tstop'] # now build the list of binned spectra list_of_binned_spectra = [] with progress_bar(num_spectra, title='Loading PHAII spectra') as p: for i in range(num_spectra): list_of_binned_spectra.append( BinnedSpectrumWithDispersion( counts=pha_information['counts'][i], exposure=pha_information['exposure'][i, 0], response=pha_information['rsp'], count_errors=count_errors[i], sys_errors=pha_information['sys_errors'][i], is_poisson=pha_information['is_poisson'], quality=pha_information['quality'].get_slice(i), mission=pha_information['gathered_keywords'] ['mission'], instrument=pha_information['gathered_keywords'] ['instrument'], tstart=tstart[i], tstop=tstop[i])) p.increase() # now get the time intervals start_times = data.field('TIME') stop_times = data.field('ENDTIME') time_intervals = TimeIntervalSet.from_starts_and_stops( start_times, stop_times) reference_time = 0 # see if there is a reference time in the file if 'TRIGTIME' in spectrum.header: reference_time = spectrum.header['TRIGTIME'] for t_number in range(spectrum.header['TFIELDS']): if 'TZERO%d' % t_number in spectrum.header: reference_time = spectrum.header['TZERO%d' % t_number] super(PHASpectrumSet, self).__init__(list_of_binned_spectra, reference_time=reference_time, time_intervals=time_intervals)
def download_files_from_directory_ftp(ftp_url, destination_directory, filenames=None, namefilter=None): # Parse url tokens = urllib.parse.urlparse(ftp_url) serverAddress = tokens.netloc directory = tokens.path # if no filename has been specified, connect first to retrieve the list of files to download if filenames == None: # Connect to server and log in ftp = ftplib.FTP(serverAddress, "anonymous", '', '', timeout=60) try: ftp.login() except: # Maybe we are already logged in try: ftp.cwd('/') except: # nope! don't know what is happening raise # Move to origin directory ftp.cwd(directory) # Retrieve list of files filenames = [] ftp.retrlines('NLST', filenames.append) # Close connection (will reopen later) ftp.close() # Download files with progress report downloaded_files = [] with progress_bar(len(filenames)) as p: for i, filename in enumerate(filenames): if namefilter != None and filename.find(namefilter) < 0: p.increase() # Filename does not match, do not download it continue else: local_filename = os.path.join(destination_directory, filename) urllib.request.urlretrieve( "ftp://%s/%s/%s" % (serverAddress, directory, filename), local_filename) urllib.request.urlcleanup() downloaded_files.append(local_filename) return downloaded_files
def _fit_polynomials(self): """ fits a polynomial to all channels over the input time intervals :param fit_intervals: str input intervals :return: """ # mark that we have fit a poly now self._poly_fit_exists = True # set the fit method self._fit_method_info['bin type'] = 'Binned' self._fit_method_info['fit method'] = threeML_config['event list']['binned fit method'] # we need to adjust the selection to the true intervals of the time-binned spectra tmp_poly_intervals = self._poly_intervals poly_intervals = self._adjust_to_true_intervals(tmp_poly_intervals) self._poly_intervals = poly_intervals # now lets get all the counts, exposure and midpoints for the # selection selected_counts = [] selected_exposure = [] selected_midpoints = [] for selection in poly_intervals: # get the mask of these bins mask = self._select_bins(selection.start_time,selection.stop_time) # the counts will be (time, channel) here, # so the mask is selecting time. # a sum along axis=0 is a sum in time, while axis=1 is a sum in energy selected_counts.extend(self._binned_spectrum_set.counts_per_bin[mask]) selected_exposure.extend(self._binned_spectrum_set.exposure_per_bin[mask]) selected_midpoints.extend(self._binned_spectrum_set.time_intervals.mid_points[mask]) selected_counts = np.array(selected_counts) selected_midpoints = np.array(selected_midpoints) selected_exposure = np.array(selected_exposure) # Now we will find the the best poly order unless the use specified one # The total cnts (over channels) is binned if self._user_poly_order == -1: self._optimal_polynomial_grade = self._fit_global_and_determine_optimum_grade(selected_counts.sum(axis=1), selected_midpoints, selected_exposure) if self._verbose: print("Auto-determined polynomial order: %d" % self._optimal_polynomial_grade) print('\n') else: self._optimal_polynomial_grade = self._user_poly_order polynomials = [] # now fit the light curve of each channel # and save the estimated polynomial with progress_bar(self._n_channels, title="Fitting background") as p: for counts in selected_counts.T: polynomial, _ = polyfit(selected_midpoints, counts, self._optimal_polynomial_grade, selected_exposure) polynomials.append(polynomial) p.increase() self._polynomials = polynomials
def bayesian_blocks_not_unique(tt, ttstart, ttstop, p0): # Verify that the input array is one-dimensional tt = np.asarray(tt, dtype=float) assert tt.ndim == 1 # Now create the array of unique times unique_t = np.unique(tt) t = tt tstart = ttstart tstop = ttstop # Create initial cell edges (Voronoi tessellation) using the unique time stamps edges = np.concatenate([[tstart], 0.5 * (unique_t[1:] + unique_t[:-1]), [tstop]]) # The last block length is 0 by definition block_length = tstop - edges if np.sum((block_length <= 0)) > 1: raise RuntimeError("Events appears to be out of order! Check for order, or duplicated events.") N = unique_t.shape[0] # arrays to store the best configuration best = np.zeros(N, dtype=float) last = np.zeros(N, dtype=int) # Pre-computed priors (for speed) # eq. 21 from Scargle 2012 priors = 4 - np.log(73.53 * p0 * np.power(np.arange(1, N + 1), -0.478)) # Count how many events are in each Voronoi cell x, _ = np.histogram(t, edges) # Speed tricks: resolve once for all the functions which will be used # in the loop cumsum = np.cumsum log = np.log argmax = np.argmax numexpr_evaluate = numexpr.evaluate arange = np.arange # Decide the step for reporting progress incr = max(int(float(N) / 100.0 * 10), 1) logger.debug("Finding blocks...") # This is where the computation happens. Following Scargle et al. 2012. # This loop has been optimized for speed: # * the expression for the fitness function has been rewritten to # avoid multiple log computations, and to avoid power computations # * the use of scipy.weave and numexpr has been evaluated. The latter # gives a big gain (~40%) if used for the fitness function. No other # gain is obtained by using it anywhere else # Set numexpr precision to low (more than enough for us), which is # faster than high oldaccuracy = numexpr.set_vml_accuracy_mode('low') numexpr.set_num_threads(1) numexpr.set_vml_num_threads(1) with progress_bar(N) as progress: for R in range(N): br = block_length[R + 1] T_k = block_length[:R + 1] - br # N_k: number of elements in each block # This expression has been simplified for the case of # unbinned events (i.e., one element in each block) # It was: N_k = cumsum(x[:R + 1][::-1])[::-1] # Now it is: # N_k = arange(R + 1, 0, -1) # Evaluate fitness function # This is the slowest part, which I'm speeding up by using # numexpr. It provides a ~40% gain in execution speed. fit_vec = numexpr_evaluate('''N_k * log(N_k/ T_k) ''', optimization='aggressive', local_dict={'N_k': N_k, 'T_k': T_k}) p = priors[R] A_R = fit_vec - p A_R[1:] += best[:R] i_max = argmax(A_R) last[R] = i_max best[R] = A_R[i_max] progress.increase() numexpr.set_vml_accuracy_mode(oldaccuracy) logger.debug("Done\n") # Now find blocks change_points = np.zeros(N, dtype=int) i_cp = N ind = N while True: i_cp -= 1 change_points[i_cp] = ind if ind == 0: break ind = last[ind - 1] change_points = change_points[i_cp:] finalEdges = edges[change_points] return np.asarray(finalEdges)
def to_spectrumlike(self, from_bins=False, start=None, stop=None, interval_name='_interval', extract_measured_background=False): """ Create plugin(s) from either the current active selection or the time bins. If creating from an event list, the bins are from create_time_bins. If using a pre-time binned time series, the bins are those native to the data. Start and stop times can be used to control which bins are used. :param from_bins: choose to create plugins from the time bins :param start: optional start time of the bins :param stop: optional stop time of the bins :param extract_measured_background: Use the selected background rather than a polynomial fit to the background :param interval_name: the name of the interval :return: SpectrumLike plugin(s) """ # we can use either the modeled or the measured background. In theory, all the information # in the background spectrum should propagate to the likelihood if extract_measured_background: this_background_spectrum = self._measured_background_spectrum else: this_background_spectrum = self._background_spectrum # this is for a single interval if not from_bins: assert self._observed_spectrum is not None, 'Must have selected an active time interval' assert isinstance(self._observed_spectrum, BinnedSpectrum), 'You are attempting to create a SpectrumLike plugin from the wrong data type' if this_background_spectrum is None: custom_warnings.warn('No background selection has been made. This plugin will contain no background!') if self._response is None: return SpectrumLike(name=self._name, observation=self._observed_spectrum, background=this_background_spectrum, verbose=self._verbose, tstart=self._tstart, tstop=self._tstop) else: return DispersionSpectrumLike(name=self._name, observation=self._observed_spectrum, background=this_background_spectrum, verbose=self._verbose, tstart = self._tstart, tstop = self._tstop ) else: # this is for a set of intervals. assert self._time_series.bins is not None, 'This time series does not have any bins!' # save the original interval if there is one old_interval = copy.copy(self._active_interval) old_verbose = copy.copy(self._verbose) # we will keep it quiet to keep from being annoying self._verbose = False list_of_speclikes = [] # get the bins from the time series # for event lists, these are from created bins # for binned spectra sets, these are the native bines these_bins = self._time_series.bins # type: TimeIntervalSet if start is not None: assert stop is not None, 'must specify a start AND a stop time' if stop is not None: assert stop is not None, 'must specify a start AND a stop time' these_bins = these_bins.containing_interval(start, stop, inner=False) # loop through the intervals and create spec likes with progress_bar(len(these_bins), title='Creating plugins') as p: for i, interval in enumerate(these_bins): self.set_active_time_interval(interval.to_string()) assert isinstance(self._observed_spectrum, BinnedSpectrum), 'You are attempting to create a SpectrumLike plugin from the wrong data type' if extract_measured_background: this_background_spectrum = self._measured_background_spectrum else: this_background_spectrum = self._background_spectrum if this_background_spectrum is None: custom_warnings.warn( 'No bakckground selection has been made. This plugin will contain no background!') try: if self._response is None: sl = SpectrumLike(name="%s%s%d" % (self._name, interval_name, i), observation=self._observed_spectrum, background=this_background_spectrum, verbose=self._verbose, tstart=self._tstart, tstop=self._tstop) else: sl = DispersionSpectrumLike(name="%s%s%d" % (self._name, interval_name, i), observation=self._observed_spectrum, background=this_background_spectrum, verbose=self._verbose, tstart=self._tstart, tstop=self._tstop) list_of_speclikes.append(sl) except(NegativeBackground): custom_warnings.warn('Something is wrong with interval %s. skipping.' % interval) p.increase() # restore the old interval if old_interval is not None: self.set_active_time_interval(*old_interval) else: self._active_interval = None self._verbose = old_verbose return list_of_speclikes
def _unbinned_fit_polynomials(self): self._poly_fit_exists = True # inform the type of fit we have self._fit_method_info['bin type'] = 'Unbinned' self._fit_method_info['fit method'] = threeML_config['event list'][ 'unbinned fit method'] # Select all the events that are in the background regions # and make a mask all_bkg_masks = [] total_duration = 0. poly_exposure = 0 for selection in self._poly_intervals: total_duration += selection.duration poly_exposure += self.exposure_over_interval( selection.start_time, selection.stop_time) all_bkg_masks.append( np.logical_and(self._arrival_times >= selection.start_time, self._arrival_times <= selection.stop_time)) poly_mask = all_bkg_masks[0] # If there are multiple masks: if len(all_bkg_masks) > 1: for mask in all_bkg_masks[1:]: poly_mask = np.logical_or(poly_mask, mask) # Select the all the events in the poly selections # We only need to do this once total_poly_events = self._arrival_times[poly_mask] # For the channel energies we will need to down select again. # We can go ahead and do this to avoid repeated computations total_poly_energies = self._measurement[poly_mask] # Now we will find the the best poly order unless the use specified one # The total cnts (over channels) is binned to .1 sec intervals if self._user_poly_order == -1: self._optimal_polynomial_grade = self._unbinned_fit_global_and_determine_optimum_grade( total_poly_events, poly_exposure) if self._verbose: print("Auto-determined polynomial order: %d" % self._optimal_polynomial_grade) print('\n') else: self._optimal_polynomial_grade = self._user_poly_order channels = range(self._first_channel, self._n_channels + self._first_channel) # Check whether we are parallelizing or not t_start = self._poly_intervals.start_times t_stop = self._poly_intervals.stop_times polynomials = [] with progress_bar(self._n_channels, title="Fitting %s background" % self._instrument) as p: for channel in channels: channel_mask = total_poly_energies == channel # Mask background events and current channel # poly_chan_mask = np.logical_and(poly_mask, channel_mask) # Select the masked events current_events = total_poly_events[channel_mask] polynomial, _ = unbinned_polyfit( current_events, self._optimal_polynomial_grade, t_start, t_stop, poly_exposure) polynomials.append(polynomial) p.increase() # We are now ready to return the polynomials self._polynomials = polynomials
def download( self, remote_filename, destination_path, new_filename=None, progress=True, compress=False, ): assert remote_filename in self.files, ( "File %s is not contained in this directory (%s)" % (remote_filename, self._request_result.url)) destination_path = sanitize_filename(destination_path, abspath=True) assert path_exists_and_is_directory(destination_path), ( "Provided destination does not exist or " "is not a directory" % destination_path) # If no filename is specified, use the same name that the file has on the remote server if new_filename is None: new_filename = remote_filename.split("/")[-1] # Get the fully qualified path for the remote and the local file remote_path = self._request_result.url + remote_filename local_path = os.path.join(destination_path, new_filename) # Ask the server for the file, but do not download it just yet # (stream=True will get the HTTP header but nothing else) # Use stream=True for two reasons: # * so that the file is not downloaded all in memory before being written to the disk # * so that we can report progress is requested this_request = requests.get(remote_path, stream=True) # Figure out the size of the file file_size = int(this_request.headers["Content-Length"]) # Now check if we really need to download this file if compress: # Add a .gz at the end of the file path local_path += ".gz" if file_existing_and_readable(local_path): local_size = os.path.getsize(local_path) if local_size == file_size or compress: # if the compressed file already exists # it will have a smaller size # No need to download it again return local_path # Chunk size shouldn't bee too small otherwise we are causing a bottleneck in the download speed chunk_size = 1024 * 10 # If the user wants to compress the file, use gzip, otherwise the normal opener if compress: import gzip opener = gzip.open else: opener = open if progress: # Set a title for the progress bar bar_title = "Downloading %s" % new_filename with progress_bar(file_size, scale=1024 * 1024, units="Mb", title=bar_title) as bar: # type: ProgressBarBase with opener(local_path, "wb") as f: for chunk in this_request.iter_content( chunk_size=chunk_size): if chunk: # filter out keep-alive new chunks f.write(chunk) bar.increase(len(chunk)) this_request.close() else: with opener(local_path, "wb") as f: for chunk in this_request.iter_content(chunk_size=chunk_size): if chunk: # filter out keep-alive new chunks f.write(chunk) this_request.close() return local_path
def _minimize(self): # Gather the setup islands = self._setup_dict['islands'] pop_size = self._setup_dict['population_size'] evolution_cycles = self._setup_dict['evolution_cycles'] # Print some info print("\nPAGMO setup:") print("------------") print("- Number of islands: %i" % islands) print("- Population size per island: %i" % pop_size) print("- Evolutions cycles per island: %i\n" % evolution_cycles) Npar = len(self._internal_parameters) if is_parallel_computation_active(): wrapper = PAGMOWrapper(function=self.function, parameters=self._internal_parameters, dim=Npar) # use the archipelago, which uses the ipyparallel computation archi = pg.archipelago(udi=pg.ipyparallel_island(), n=islands, algo=self._setup_dict['algorithm'], prob=wrapper, pop_size=pop_size) archi.wait() # Display some info print("\nSetup before parallel execution:") print("--------------------------------\n") print(archi) # Evolve populations on islands print("Evolving... (progress not available for parallel execution)") # For some weird reason, ipyparallel looks for _winreg on Linux (where does # not exist, being a Windows module). Let's mock it with an empty module' mocked = False if os.path.exists("_winreg.py") is False: with open("_winreg.py", "w+") as f: f.write("pass") mocked = True archi.evolve() # Wait for completion (evolve() is async) archi.wait_check() # Now remove _winreg.py if needed if mocked: os.remove("_winreg.py") # Find best and worst islands fOpts = np.array(map(lambda x:x[0], archi.get_champions_f())) xOpts = archi.get_champions_x() else: # do not use ipyparallel. Evolve populations on islands serially wrapper = PAGMOWrapper(function=self.function, parameters=self._internal_parameters, dim=Npar) xOpts = [] fOpts = np.zeros(islands) with progress_bar(iterations=islands, title="pygmo minimization") as p: for island_id in range(islands): pop = pg.population(prob=wrapper, size=pop_size) for i in range(evolution_cycles): pop = self._setup_dict['algorithm'].evolve(pop) # Gather results xOpts.append(pop.champion_x) fOpts[island_id] = pop.champion_f[0] p.increase() # Find best and worst islands min_idx = fOpts.argmin() max_idx = fOpts.argmax() fOpt = fOpts[min_idx] fWorse = fOpts[max_idx] xOpt = np.array(xOpts)[min_idx] # Some information print("\nSummary of evolution:") print("---------------------") print("Best population has minimum %.3f" % (fOpt)) print("Worst population has minimum %.3f" % (fWorse)) print("") # Transform to numpy.array best_fit_values = np.array(xOpt) return best_fit_values, fOpt
def display_fit(self, smoothing_kernel_sigma=0.1, display_colorbar=False): """ Make a figure containing 4 maps for each active analysis bins with respectively model, data, background and residuals. The model, data and residual maps are smoothed, the background map is not. :param smoothing_kernel_sigma: sigma for the Gaussian smoothing kernel, for all but background maps :param display_colorbar: whether or not to display the colorbar in the residuals :return: a matplotlib.Figure """ n_point_sources = self._likelihood_model.get_number_of_point_sources() n_ext_sources = self._likelihood_model.get_number_of_extended_sources() # This is the resolution (i.e., the size of one pixel) of the image resolution = 3.0 # arcmin # The image is going to cover the diameter plus 20% padding xsize = self._get_optimal_xsize(resolution) n_active_planes = len(self._active_planes) n_columns = 4 fig, subs = plt.subplots(n_active_planes, n_columns, figsize=(2.7 * n_columns, n_active_planes * 2), squeeze=False) with progress_bar(len(self._active_planes), title='Smoothing maps') as prog_bar: images = ['None'] * n_columns for i, plane_id in enumerate(self._active_planes): data_analysis_bin = self._maptree[plane_id] # Get the center of the projection for this plane this_ra, this_dec = self._roi.ra_dec_center # Make a full healpix map for a second whole_map = self._get_model_map(plane_id, n_point_sources, n_ext_sources).as_dense() # Healpix uses longitude between -180 and 180, while R.A. is between 0 and 360. We need to fix that: longitude = ra_to_longitude(this_ra) # Declination is already between -90 and 90 latitude = this_dec # Background and excess maps bkg_subtracted, _, background_map = self._get_excess(data_analysis_bin, all_maps=True) # Make all the projections: model, excess, background, residuals proj_model = self._represent_healpix_map(fig, whole_map, longitude, latitude, xsize, resolution, smoothing_kernel_sigma) # Here we removed the background otherwise nothing is visible # Get background (which is in a way "part of the model" since the uncertainties are neglected) proj_data = self._represent_healpix_map(fig, bkg_subtracted, longitude, latitude, xsize, resolution, smoothing_kernel_sigma) # No smoothing for this one (because a goal is to check it is smooth). proj_bkg = self._represent_healpix_map(fig, background_map, longitude, latitude, xsize, resolution, None) proj_residuals = proj_data - proj_model # Common color scale range for model and excess maps vmin = min(np.nanmin(proj_model), np.nanmin(proj_data)) vmax = max(np.nanmax(proj_model), np.nanmax(proj_data)) # Plot model images[0] = subs[i][0].imshow(proj_model, origin='lower', vmin=vmin, vmax=vmax) subs[i][0].set_title('model, bin {}'.format(data_analysis_bin.name)) # Plot data map images[1] = subs[i][1].imshow(proj_data, origin='lower', vmin=vmin, vmax=vmax) subs[i][1].set_title('excess, bin {}'.format(data_analysis_bin.name)) # Plot background map. images[2] = subs[i][2].imshow(proj_bkg, origin='lower') subs[i][2].set_title('background, bin {}'.format(data_analysis_bin.name)) # Now residuals images[3] = subs[i][3].imshow(proj_residuals, origin='lower') subs[i][3].set_title('residuals, bin {}'.format(data_analysis_bin.name)) # Remove numbers from axis for j in range(n_columns): subs[i][j].axis('off') if display_colorbar: for j, image in enumerate(images): plt.colorbar(image, ax=subs[i][j]) prog_bar.increase() fig.set_tight_layout(True) return fig
def go(args): # Define the spectral and spatial models for the source spectrum = Log_parabola() source = PointSource("CrabNebula", ra=args.RA, dec=args.Dec, spectral_shape=spectrum) # NOTE: if you use units, you have to set up the values for the parameters # AFTER you create the source, because during creation the function Log_parabola # gets its units spectrum.piv = 10 * u.TeV # Pivot energy spectrum.piv.fix = True spectrum.K = 1e-14 / (u.TeV * u.cm**2 * u.s) # norm (in 1/(keV cm2 s)) spectrum.K.bounds = (1e-25, 1e-21) # without units energies are in keV spectrum.beta = 0 # log parabolic beta spectrum.beta.bounds = (-4., 2.) spectrum.alpha = -2.5 # log parabolic alpha (index) spectrum.alpha.bounds = (-4., 2.) print(source(1 * u.keV)) # Set up a likelihood model using the source. # Then create a HAWCLike object using the model, the maptree, and detector # response. lm = Model(source) llh = HAWCLike("CrabNebula", args.maptreefile, args.responsefile) llh.set_active_measurements(args.startBin, args.stopBin) # Double check the free parameters print("Likelihood model:\n") print(lm) # Set up the likelihood and run the fit print("Performing likelihood fit...\n") datalist = DataList(llh) jl = JointLikelihood(lm, datalist) # Pre-fit with root # The ROOT minimizer is still under dev and does # not provide a printer. This is the reason for the try/except try: jl.set_minimizer("ROOT") jl.fit() except AttributeError: pass jl.set_minimizer("minuit") result = jl.fit() # Print up the TS, and fit parameters, and then plot stuff print("\nTest statistic:") TS = llh.calc_TS() print("Test statistic: %g" % TS) # Get the differential flux at 1 TeV diff_flux = spectrum(1 * u.TeV) # Convert it to 1 / (TeV cm2 s) diff_flux_TeV = diff_flux.to(1 / (u.TeV * u.cm**2 * u.s)) print("Norm @ 1 TeV: %s \n" % diff_flux_TeV) spectrum.display() E = np.logspace(np.log10(1), np.log10(100), 100) * u.TeV fML = spectrum(E) fMin68 = None fMax68 = None fMin95 = None fMax95 = None # Get a flux uncertainty envelope from MCMC samples if args.nMCMC > 0: print("Running MCMC...") # The set_uninformative_prior will use the given prior type # and the current minimum and maximum to set the prior spectrum.K.set_uninformative_prior(Log_uniform_prior) spectrum.beta.set_uninformative_prior(Uniform_prior) spectrum.alpha.set_uninformative_prior(Uniform_prior) ba = BayesianAnalysis(lm, datalist) nW = 10 # number of MCMC walkers nB = 100 # number of samples for burning in nS = args.nMCMC # number of samples samples = ba.sample(nW, nB, nS) sigmas = [1., 2., 3.] levels = [1. - np.exp(-0.5 * s**2) for s in sigmas] # Make a corner plot of the sampled parameters from the MCMC fig = ba.corner_plot( labels=[ r"$K$ [keV$^{-1}$ cm$^{-2}$ s$^{-1}$]", r"$\alpha$", r"$\beta$" ], #range=[[-2.5, -2.], [-0.4, 0.], [-10.6, -10.45]], show_titles=True, title_kwargs={"fontsize": 10}, quantiles=[0.16, 0.5, 0.84], levels=levels, verbose=True) fig.savefig("crab_logParabola_MCMC.png") # Construct the uncertainty envelope on the flux using the sampled # parameter space fMin68, fMax68 = [], [] fMin95, fMax95 = [], [] # Just to be fancy, let's use the progress bar print("\nMaking spectrum plot...\n") from threeML.io.progress_bar import progress_bar with progress_bar(E.shape[0], title='Plotting') as progress: for i, energy in enumerate(E): progress.animate(i) flux = [] for (kk, a, b) in ba.raw_samples: spectrum.K = kk spectrum.alpha = a spectrum.beta = b # Note that here energy has units, so the computation # would be made with units and the output will be a # astropy.Quantity instance. However that is slow, # so let's use the value instead (the computation without # unit is much faster) this_diff_flux = spectrum(energy.to('keV').value) flux.append(this_diff_flux) # Evaluate the percentiles of the flux f02, f16, f84, f97 = np.percentile(np.asarray(flux), [2.5, 16, 84, 97.5]) fMin68.append(f16) fMax68.append(f84) fMin95.append(f02) fMax95.append(f97) print(" done") # Save the percentiles with units # y_unit is the unit of the output if the function is 1d, # as in y = f(x) fMin68 = np.asarray(fMin68) * spectrum.y_unit fMax68 = np.asarray(fMax68) * spectrum.y_unit fMin95 = np.asarray(fMin95) * spectrum.y_unit fMax95 = np.asarray(fMax95) * spectrum.y_unit fig, ax = plt.subplots(1, 1, figsize=(8, 6)) conv = (E)**2.5 if fMin95 is not None and fMax95 is not None: # Plot the 95% C.I. on the flux as a function of energy ax.fill_between( E.to(u.TeV).value, (conv * fMin95).to(u.TeV**2.5 / (u.TeV * u.cm**2 * u.s)).value, (conv * fMax95).to(u.TeV**2.5 / (u.TeV * u.cm**2 * u.s)).value, color="royalblue", alpha=0.3, label="95% C.I.") if fMin68 is not None and fMax68 is not None: # Plot the 68% C.I. on the flux as a function of energy ax.fill_between( E.to(u.TeV).value, (conv * fMin68).to(u.TeV**2.5 / (u.TeV * u.cm**2 * u.s)).value, (conv * fMax68).to(u.TeV**2.5 / (u.TeV * u.cm**2 * u.s)).value, color="royalblue", alpha=0.5, label="68% C.I.") ax.plot(E.to(u.TeV).value, (conv * fML).to(u.TeV**2.5 / (u.TeV * u.cm**2 * u.s)).value, "r--", lw=2, label="Max LL") ax.set(xscale="log", xlabel="energy [TeV]", yscale="log", ylabel=r"$E^{2.5}$ flux [TeV$^{1.5}$ cm$^{-2}$ s$^{-1}$]", ylim=[5e-12, 5e-11], title="Crab Nebula") ax.xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter("%g")) h, l = ax.get_legend_handles_labels() leg = ax.legend(h, l, loc=3) leg.get_frame().set_linewidth(0) fig.tight_layout() fig.savefig("crab_logParabola.png") plt.show()
def display_fit(self, smoothing_kernel_sigma=0.1): n_point_sources = self._likelihood_model.get_number_of_point_sources() n_ext_sources = self._likelihood_model.get_number_of_extended_sources() # This is the resolution (i.e., the size of one pixel) of the image resolution = 3.0 # arcmin # The image is going to cover the diameter plus 20% padding xsize = self._get_optimal_xsize(resolution) n_active_planes = len(self._active_planes) fig, subs = plt.subplots(n_active_planes, 3, figsize=(8, n_active_planes * 2)) with progress_bar(len(self._active_planes), title='Smoothing maps') as prog_bar: for i, plane_id in enumerate(self._active_planes): data_analysis_bin = self._maptree[plane_id] # Get the center of the projection for this plane this_ra, this_dec = self._roi.ra_dec_center this_model_map_hpx = self._get_expectation( data_analysis_bin, plane_id, n_point_sources, n_ext_sources) # Make a full healpix map for a second whole_map = SparseHealpix( this_model_map_hpx, self._active_pixels[plane_id], data_analysis_bin.observation_map.nside).as_dense() # Healpix uses longitude between -180 and 180, while R.A. is between 0 and 360. We need to fix that: longitude = ra_to_longitude(this_ra) # Declination is already between -90 and 90 latitude = this_dec # Plot model proj_m = self._represent_healpix_map(fig, whole_map, longitude, latitude, xsize, resolution, smoothing_kernel_sigma) subs[i][0].imshow(proj_m, origin='lower') # Remove numbers from axis subs[i][0].axis('off') # Plot data map # Here we removed the background otherwise nothing is visible # Get background (which is in a way "part of the model" since the uncertainties are neglected) background_map = data_analysis_bin.background_map.as_dense() bkg_subtracted = data_analysis_bin.observation_map.as_dense( ) - background_map proj_d = self._represent_healpix_map(fig, bkg_subtracted, longitude, latitude, xsize, resolution, smoothing_kernel_sigma) subs[i][1].imshow(proj_d, origin='lower') # Remove numbers from axis subs[i][1].axis('off') # Now residuals res = proj_d - proj_m # proj_res = self._represent_healpix_map(fig, res, # longitude, latitude, # xsize, resolution, smoothing_kernel_sigma) subs[i][2].imshow(res, origin='lower') # Remove numbers from axis subs[i][2].axis('off') prog_bar.increase() fig.set_tight_layout(True) return fig
def bayesian_blocks_not_unique(tt, ttstart, ttstop, p0): # Verify that the input array is one-dimensional tt = np.asarray(tt, dtype=float) assert tt.ndim == 1 # Now create the array of unique times unique_t = np.unique(tt) t = tt tstart = ttstart tstop = ttstop # Create initial cell edges (Voronoi tessellation) using the unique time stamps edges = np.concatenate([[tstart], 0.5 * (unique_t[1:] + unique_t[:-1]), [tstop]]) # The last block length is 0 by definition block_length = tstop - edges if np.sum((block_length <= 0)) > 1: raise RuntimeError( "Events appears to be out of order! Check for order, or duplicated events." ) N = unique_t.shape[0] # arrays to store the best configuration best = np.zeros(N, dtype=float) last = np.zeros(N, dtype=int) # Pre-computed priors (for speed) # eq. 21 from Scargle 2012 priors = 4 - np.log(73.53 * p0 * np.power(np.arange(1, N + 1), -0.478)) # Count how many events are in each Voronoi cell x, _ = np.histogram(t, edges) # Speed tricks: resolve once for all the functions which will be used # in the loop cumsum = np.cumsum log = np.log argmax = np.argmax numexpr_evaluate = numexpr.evaluate arange = np.arange # Decide the step for reporting progress incr = max(int(float(N) / 100.0 * 10), 1) logger.debug("Finding blocks...") # This is where the computation happens. Following Scargle et al. 2012. # This loop has been optimized for speed: # * the expression for the fitness function has been rewritten to # avoid multiple log computations, and to avoid power computations # * the use of scipy.weave and numexpr has been evaluated. The latter # gives a big gain (~40%) if used for the fitness function. No other # gain is obtained by using it anywhere else # Set numexpr precision to low (more than enough for us), which is # faster than high oldaccuracy = numexpr.set_vml_accuracy_mode('low') numexpr.set_num_threads(1) numexpr.set_vml_num_threads(1) with progress_bar(N) as progress: for R in range(N): br = block_length[R + 1] T_k = block_length[:R + 1] - br # N_k: number of elements in each block # This expression has been simplified for the case of # unbinned events (i.e., one element in each block) # It was: N_k = cumsum(x[:R + 1][::-1])[::-1] # Now it is: # N_k = arange(R + 1, 0, -1) # Evaluate fitness function # This is the slowest part, which I'm speeding up by using # numexpr. It provides a ~40% gain in execution speed. fit_vec = numexpr_evaluate('''N_k * log(N_k/ T_k) ''', optimization='aggressive', local_dict={ 'N_k': N_k, 'T_k': T_k }) p = priors[R] A_R = fit_vec - p A_R[1:] += best[:R] i_max = argmax(A_R) last[R] = i_max best[R] = A_R[i_max] progress.increase() numexpr.set_vml_accuracy_mode(oldaccuracy) logger.debug("Done\n") # Now find blocks change_points = np.zeros(N, dtype=int) i_cp = N ind = N while True: i_cp -= 1 change_points[i_cp] = ind if ind == 0: break ind = last[ind - 1] change_points = change_points[i_cp:] finalEdges = edges[change_points] return np.asarray(finalEdges)
def bin_by_significance( cls, arrival_times, background_getter, background_error_getter=None, sigma_level=10, min_counts=1, tstart=None, tstop=None, ): """ Bin the data to a given significance level for a given background method and sigma method. If a background error function is given then it is assumed that the error distribution is gaussian. Otherwise, the error distribution is assumed to be Poisson. :param background_getter: function of a start and stop time that returns background counts :param background_error_getter: function of a start and stop time that returns background count errors :param sigma_level: the sigma level of the intervals :param min_counts: the minimum counts per bin :return: """ if tstart is None: tstart = arrival_times.min() else: tstart = float(tstart) if tstop is None: tstop = arrival_times.max() else: tstop = float(tstop) starts = [] stops = [] # Switching to a fast search # Idea inspired by Damien Begue # these factors change the time steps # in the fast search. should experiment if sigma_level > 25: increase_factor = 0.5 decrease_factor = 0.5 else: increase_factor = 0.25 decrease_factor = 0.25 current_start = arrival_times[0] # first we need to see if the interval provided has enough counts _, counts = TemporalBinner._select_events(arrival_times, current_start, arrival_times[-1]) # if it does not, the flag for the big loop never gets set end_all_search = not TemporalBinner._check_exceeds_sigma_interval( current_start, arrival_times[-1], counts, sigma_level, background_getter, background_error_getter, ) # We will start the search at the mid point of the whole interval mid_point = 0.5 * (arrival_times[-1] + current_start) current_stop = mid_point # initialize the fast search flag end_fast_search = False # resolve once for functions used in the loop searchsorted = np.searchsorted # this is the main loop # as long as we have not reached the end of the interval # the loop will run with progress_bar(arrival_times.shape[0]) as pbar: while not end_all_search: # start of the fast search # we reset the flag for the interval # having been decreased in the last pass decreased_interval = False while not end_fast_search: # we calculate the sigma of the current region _, counts = TemporalBinner._select_events( arrival_times, current_start, current_stop) sigma_exceeded = TemporalBinner._check_exceeds_sigma_interval( current_start, current_stop, counts, sigma_level, background_getter, background_error_getter, ) time_step = abs(current_stop - current_start) # if we do not exceed the sigma # we need to increase the time interval if not sigma_exceeded: # however, if in the last pass we had to decrease # the interval, it means we have found where we # we need to start the slow search if decreased_interval: # mark where we are in the list start_idx = searchsorted(arrival_times, current_stop) # end the fast search end_fast_search = True # otherwise we increase the interval else: # unless, we would increase it too far if (current_stop + time_step * increase_factor) >= arrival_times[-1]: # mark where we are in the interval start_idx = searchsorted( arrival_times, current_stop) # then we also want to go ahead and get out of the fast search end_fast_search = True else: # increase the interval current_stop += time_step * increase_factor # if we did exceede the sigma level we will need to step # back in time to find where it was NOT exceeded else: # decrease the interval current_stop -= time_step * decrease_factor # inform the loop that we have been back stepping decreased_interval = True # Now we are ready for the slow forward search # where we count up all the photons # we have already counted up the photons to this point total_counts = counts # start searching from where the fast search ended pbar.increase(counts) for time in arrival_times[start_idx:]: total_counts += 1 pbar.increase() if total_counts < min_counts: continue else: # first use the background function to know the number of background counts bkg = background_getter(current_start, time) sig = Significance(total_counts, bkg) if background_error_getter is not None: bkg_error = background_error_getter( current_start, time) sigma = sig.li_and_ma_equivalent_for_gaussian_background( bkg_error)[0] else: sigma = sig.li_and_ma()[0] # now test if we have enough sigma if sigma >= sigma_level: # if we succeeded we want to mark the time bins stops.append(time) starts.append(current_start) # set up the next fast search # by looking past this interval current_start = time current_stop = 0.5 * (arrival_times[-1] + time) end_fast_search = False # get out of the for loop break # if we never exceeded the sigma level by the # end of the search, we never will if end_fast_search: # so lets kill the main search end_all_search = True if not starts: print( "The requested sigma level could not be achieved in the interval. Try decreasing it." ) else: return cls.from_starts_and_stops(starts, stops)
def _minimize(self): assert len( self._grid ) > 0, "You need to set up a grid using add_parameter_to_grid" if self._2nd_minimization is None: raise RuntimeError( "You did not setup this global minimizer (GRID). You need to use the .setup() method" ) # For each point in the grid, perform a fit parameters = self._grid.keys() overall_minimum = 1e20 internal_best_fit_values = None n_iterations = np.prod([x.shape for x in self._grid.values()]) with progress_bar(n_iterations, title='Grid minimization') as progress: for values_tuple in itertools.product(*self._grid.values()): # Reset everything to the original values, so that the fit will always start # from there, instead that from the values obtained in the last iterations, which # might have gone completely awry for par_name, par_value in self._original_values.items(): self.parameters[par_name].value = par_value # Now set the parameters in the grid to their starting values for i, this_value in enumerate(values_tuple): self.parameters[parameters[i]].value = this_value # Get a new instance of the minimizer. We need to do this instead of reusing an existing instance # because some minimizers (like iminuit) keep internal track of their status, so that reusing # a minimizer will create correlation between the different points # NOTE: this line necessarily needs to be after the values of the parameters has been set to the # point, because the init method of the minimizer instance will use those values to set the starting # point for the fit _minimizer = self._2nd_minimization.get_instance( self.function, self.parameters, verbosity=0) # Perform fit try: # We call _minimize() and not minimize() so that the best fit values are # in the internal system. this_best_fit_values_internal, this_minimum = _minimizer._minimize( ) except: # A failure is not a problem here, only if all of the fit fail then we have a problem # but this case is handled later continue # If this minimum is the overall minimum, save the result if this_minimum < overall_minimum: overall_minimum = this_minimum internal_best_fit_values = this_best_fit_values_internal # Use callbacks (if any) for callback in self._callbacks: callback(values_tuple, this_minimum) progress.increase() if internal_best_fit_values is None: raise AllFitFailed( "All fit starting from values in the grid have failed!") return internal_best_fit_values, overall_minimum
def __init__(self, pha_file_or_instance, file_type='observed',rsp_file=None, arf_file=None): """ A spectrum with dispersion build from an OGIP-compliant PHA FITS file. Both Type I & II files can be read. Type II spectra are selected either by specifying the spectrum_number or via the {spectrum_number} file name convention used in XSPEC. If the file_type is background, a 3ML InstrumentResponse or subclass must be passed so that the energy bounds can be obtained. :param pha_file_or_instance: either a PHA file name or threeML.plugins.OGIP.pha.PHAII instance :param spectrum_number: (optional) the spectrum number of the TypeII file to be used :param file_type: observed or background :param rsp_file: RMF filename or threeML.plugins.OGIP.response.InstrumentResponse instance :param arf_file: (optional) and ARF filename """ # extract the spectrum number if needed assert isinstance(pha_file_or_instance, str) or isinstance(pha_file_or_instance, PHAII), 'Must provide a FITS file name or PHAII instance' with fits.open(pha_file_or_instance) as f: try: HDUidx = f.index_of("SPECTRUM") except: raise RuntimeError("The input file %s is not in PHA format" % (pha2_file)) spectrum = f[HDUidx] data = spectrum.data if "COUNTS" in data.columns.names: has_rates = False data_column_name = "COUNTS" elif "RATE" in data.columns.names: has_rates = True data_column_name = "RATE" else: raise RuntimeError("This file does not contain a RATE nor a COUNTS column. " "This is not a valid PHA file") # Determine if this is a PHA I or PHA II if len(data.field(data_column_name).shape) == 2: num_spectra = data.field(data_column_name).shape[0] else: raise RuntimeError("This appears to be a PHA I and not PHA II file") pha_information = _read_pha_or_pha2_file(pha_file_or_instance, None, file_type, rsp_file, arf_file, treat_as_time_series=True) # default the grouping to all open bins # this will only be altered if the spectrum is rebinned self._grouping = np.ones_like(pha_information['counts']) # this saves the extra properties to the class self._gathered_keywords = pha_information['gathered_keywords'] self._file_type = file_type # need to see if we have count errors, tstart, tstop # if not, we create an list of None if pha_information['count_errors'] is None: count_errors = [None]*num_spectra else: count_errors = pha_information['count_errors'] if pha_information['tstart'] is None: tstart = [None] * num_spectra else: tstart = pha_information['tstart'] if pha_information['tstop'] is None: tstop = [None] * num_spectra else: tstop = pha_information['tstop'] # now build the list of binned spectra list_of_binned_spectra = [] with progress_bar(num_spectra,title='Loading PHAII spectra') as p: for i in xrange(num_spectra): list_of_binned_spectra.append(BinnedSpectrumWithDispersion(counts=pha_information['counts'][i], exposure=pha_information['exposure'][i,0], response=pha_information['rsp'], count_errors=count_errors[i], sys_errors=pha_information['sys_errors'][i], is_poisson=pha_information['is_poisson'], quality=pha_information['quality'].get_slice(i), mission=pha_information['gathered_keywords']['mission'], instrument=pha_information['gathered_keywords']['instrument'], tstart=tstart[i], tstop=tstop[i])) p.increase() # now get the time intervals start_times = data.field('TIME') stop_times = data.field('ENDTIME') time_intervals = TimeIntervalSet.from_starts_and_stops(start_times, stop_times) reference_time = 0 # see if there is a reference time in the file if 'TRIGTIME' in spectrum.header: reference_time = spectrum.header['TRIGTIME'] for t_number in range(spectrum.header['TFIELDS']): if 'TZERO%d' % t_number in spectrum.header: reference_time = spectrum.header['TZERO%d' % t_number] super(PHASpectrumSet, self).__init__(list_of_binned_spectra, reference_time=reference_time, time_intervals=time_intervals)
def download(self, remote_filename, destination_path, new_filename=None, progress=True, compress=False): assert remote_filename in self.files, "File %s is not contained in this directory (%s)" % (remote_filename, self._request_result.url) destination_path = sanitize_filename(destination_path, abspath=True) assert path_exists_and_is_directory(destination_path), "Provided destination does not exist or " \ "is not a directory" % destination_path # If no filename is specified, use the same name that the file has on the remote server if new_filename is None: new_filename = remote_filename.split("/")[-1] # Get the fully qualified path for the remote and the local file remote_path = self._request_result.url + remote_filename local_path = os.path.join(destination_path, new_filename) # Ask the server for the file, but do not download it just yet # (stream=True will get the HTTP header but nothing else) # Use stream=True for two reasons: # * so that the file is not downloaded all in memory before being written to the disk # * so that we can report progress is requested this_request = requests.get(remote_path, stream=True) # Figure out the size of the file file_size = int(this_request.headers['Content-Length']) # Now check if we really need to download this file if compress: # Add a .gz at the end of the file path local_path += '.gz' if file_existing_and_readable(local_path): local_size = os.path.getsize(local_path) if local_size == file_size or compress: # if the compressed file already exists # it will have a smaller size # No need to download it again return local_path # Chunk size shouldn't bee too small otherwise we are causing a bottleneck in the download speed chunk_size = 1024 * 10 # If the user wants to compress the file, use gzip, otherwise the normal opener if compress: import gzip opener = gzip.open else: opener = open if progress: # Set a title for the progress bar bar_title = "Downloading %s" % new_filename with progress_bar(file_size, scale=1024 * 1024, units='Mb', title=bar_title) as bar: # type: ProgressBarBase with opener(local_path, 'wb') as f: for chunk in this_request.iter_content(chunk_size=chunk_size): if chunk: # filter out keep-alive new chunks f.write(chunk) bar.increase(len(chunk)) this_request.close() else: with opener(local_path, 'wb') as f: for chunk in this_request.iter_content(chunk_size=chunk_size): if chunk: # filter out keep-alive new chunks f.write(chunk) this_request.close() return local_path
def _fit_polynomials(self): """ Binned fit to each channel. Sets the polynomial array that will be used to compute counts over an interval :return: """ self._poly_fit_exists = True self._fit_method_info['bin type'] = 'Binned' self._fit_method_info['fit method'] = threeML_config['event list'][ 'binned fit method'] # Select all the events that are in the background regions # and make a mask all_bkg_masks = [] for selection in self._poly_intervals: all_bkg_masks.append( np.logical_and(self._arrival_times >= selection.start_time, self._arrival_times <= selection.stop_time)) poly_mask = all_bkg_masks[0] # If there are multiple masks: if len(all_bkg_masks) > 1: for mask in all_bkg_masks[1:]: poly_mask = np.logical_or(poly_mask, mask) # Select the all the events in the poly selections # We only need to do this once total_poly_events = self._arrival_times[poly_mask] # For the channel energies we will need to down select again. # We can go ahead and do this to avoid repeated computations total_poly_energies = self._measurement[poly_mask] # This calculation removes the unselected portion of the light curve # so that we are not fitting zero counts. It will be used in the channel calculations # as well bin_width = 1. # seconds these_bins = np.arange(self._start_time, self._stop_time, bin_width) cnts, bins = np.histogram(total_poly_events, bins=these_bins) # Find the mean time of the bins and calculate the exposure in each bin mean_time = [] exposure_per_bin = [] for i in xrange(len(bins) - 1): m = np.mean((bins[i], bins[i + 1])) mean_time.append(m) exposure_per_bin.append( self.exposure_over_interval(bins[i], bins[i + 1])) mean_time = np.array(mean_time) exposure_per_bin = np.array(exposure_per_bin) # Remove bins with zero counts all_non_zero_mask = [] for selection in self._poly_intervals: all_non_zero_mask.append( np.logical_and(mean_time >= selection.start_time, mean_time <= selection.stop_time)) non_zero_mask = all_non_zero_mask[0] if len(all_non_zero_mask) > 1: for mask in all_non_zero_mask[1:]: non_zero_mask = np.logical_or(mask, non_zero_mask) # Now we will find the the best poly order unless the use specified one # The total cnts (over channels) is binned to .1 sec intervals if self._user_poly_order == -1: self._optimal_polynomial_grade = self._fit_global_and_determine_optimum_grade( cnts[non_zero_mask], mean_time[non_zero_mask], exposure_per_bin[non_zero_mask]) if self._verbose: print("Auto-determined polynomial order: %d" % self._optimal_polynomial_grade) print('\n') else: self._optimal_polynomial_grade = self._user_poly_order channels = range(self._first_channel, self._n_channels + self._first_channel) polynomials = [] with progress_bar(self._n_channels, title="Fitting %s background" % self._instrument) as p: for channel in channels: channel_mask = total_poly_energies == channel # Mask background events and current channel # poly_chan_mask = np.logical_and(poly_mask, channel_mask) # Select the masked events current_events = total_poly_events[channel_mask] # now bin the selected channel counts cnts, bins = np.histogram(current_events, bins=these_bins) # Put data to fit in an x vector and y vector polynomial, _ = polyfit(mean_time[non_zero_mask], cnts[non_zero_mask], self._optimal_polynomial_grade, exposure_per_bin[non_zero_mask]) polynomials.append(polynomial) p.increase() # We are now ready to return the polynomials self._polynomials = polynomials
def to_polarlike(self, from_bins=False, start=None, stop=None, interval_name='_interval', extract_measured_background=False): assert has_polarpy, 'you must have the polarpy module installed' assert issubclass( self._container_type, BinnedModulationCurve ), 'You are attempting to create a POLARLike plugin from the wrong data type' if extract_measured_background: this_background_spectrum = self._measured_background_spectrum else: this_background_spectrum = self._background_spectrum if isinstance(self._response, str): self._response = PolarResponse(self._response) if not from_bins: assert self._observed_spectrum is not None, 'Must have selected an active time interval' if this_background_spectrum is None: custom_warnings.warn( 'No background selection has been made. This plugin will contain no background!' ) return PolarLike( name=self._name, observation=self._observed_spectrum, background=this_background_spectrum, response=self._response, verbose=self._verbose, # tstart=self._tstart, # tstop=self._tstop ) else: # this is for a set of intervals. assert self._time_series.bins is not None, 'This time series does not have any bins!' # save the original interval if there is one old_interval = copy.copy(self._active_interval) old_verbose = copy.copy(self._verbose) # we will keep it quiet to keep from being annoying self._verbose = False list_of_polarlikes = [] # now we make one response to save time # get the bins from the time series # for event lists, these are from created bins # for binned spectra sets, these are the native bines these_bins = self._time_series.bins # type: TimeIntervalSet if start is not None: assert stop is not None, 'must specify a start AND a stop time' if stop is not None: assert stop is not None, 'must specify a start AND a stop time' these_bins = these_bins.containing_interval(start, stop, inner=False) # loop through the intervals and create spec likes with progress_bar(len(these_bins), title='Creating plugins') as p: for i, interval in enumerate(these_bins): self.set_active_time_interval(interval.to_string()) if extract_measured_background: this_background_spectrum = self._measured_background_spectrum else: this_background_spectrum = self._background_spectrum if this_background_spectrum is None: custom_warnings.warn( 'No bakckground selection has been made. This plugin will contain no background!' ) try: pl = PolarLike( name="%s%s%d" % (self._name, interval_name, i), observation=self._observed_spectrum, background=this_background_spectrum, response=self._response, verbose=self._verbose, # tstart=self._tstart, # tstop=self._tstop ) list_of_polarlikes.append(pl) except (NegativeBackground): custom_warnings.warn( 'Something is wrong with interval %s. skipping.' % interval) p.increase() # restore the old interval if old_interval is not None: self.set_active_time_interval(*old_interval) else: self._active_interval = None self._verbose = old_verbose return list_of_polarlikes
def to_polarlike(self, from_bins=False, start=None, stop=None, interval_name='_interval', extract_measured_background=False): assert has_polarpy, 'you must have the polarpy module installed' assert issubclass(self._container_type, BinnedModulationCurve), 'You are attempting to create a POLARLike plugin from the wrong data type' if extract_measured_background: this_background_spectrum = self._measured_background_spectrum else: this_background_spectrum = self._background_spectrum if isinstance(self._response,str): self._response = PolarResponse(self._response) if not from_bins: assert self._observed_spectrum is not None, 'Must have selected an active time interval' if this_background_spectrum is None: custom_warnings.warn('No background selection has been made. This plugin will contain no background!') return PolarLike(name=self._name, observation=self._observed_spectrum, background=this_background_spectrum, response=self._response, verbose=self._verbose, # tstart=self._tstart, # tstop=self._tstop ) else: # this is for a set of intervals. assert self._time_series.bins is not None, 'This time series does not have any bins!' # save the original interval if there is one old_interval = copy.copy(self._active_interval) old_verbose = copy.copy(self._verbose) # we will keep it quiet to keep from being annoying self._verbose = False list_of_polarlikes = [] # now we make one response to save time # get the bins from the time series # for event lists, these are from created bins # for binned spectra sets, these are the native bines these_bins = self._time_series.bins # type: TimeIntervalSet if start is not None: assert stop is not None, 'must specify a start AND a stop time' if stop is not None: assert stop is not None, 'must specify a start AND a stop time' these_bins = these_bins.containing_interval(start, stop, inner=False) # loop through the intervals and create spec likes with progress_bar(len(these_bins), title='Creating plugins') as p: for i, interval in enumerate(these_bins): self.set_active_time_interval(interval.to_string()) if extract_measured_background: this_background_spectrum = self._measured_background_spectrum else: this_background_spectrum = self._background_spectrum if this_background_spectrum is None: custom_warnings.warn( 'No bakckground selection has been made. This plugin will contain no background!') try: pl = PolarLike(name="%s%s%d" % (self._name, interval_name, i), observation=self._observed_spectrum, background=this_background_spectrum, response=self._response, verbose=self._verbose, # tstart=self._tstart, # tstop=self._tstop ) list_of_polarlikes.append(pl) except(NegativeBackground): custom_warnings.warn('Something is wrong with interval %s. skipping.' % interval) p.increase() # restore the old interval if old_interval is not None: self.set_active_time_interval(*old_interval) else: self._active_interval = None self._verbose = old_verbose return list_of_polarlikes
def to_spectrumlike(self, from_bins=False, start=None, stop=None, interval_name='_interval', extract_measured_background=False): """ Create plugin(s) from either the current active selection or the time bins. If creating from an event list, the bins are from create_time_bins. If using a pre-time binned time series, the bins are those native to the data. Start and stop times can be used to control which bins are used. :param from_bins: choose to create plugins from the time bins :param start: optional start time of the bins :param stop: optional stop time of the bins :param extract_measured_background: Use the selected background rather than a polynomial fit to the background :param interval_name: the name of the interval :return: SpectrumLike plugin(s) """ # we can use either the modeled or the measured background. In theory, all the information # in the background spectrum should propagate to the likelihood if extract_measured_background: this_background_spectrum = self._measured_background_spectrum else: this_background_spectrum = self._background_spectrum # this is for a single interval if not from_bins: assert self._observed_spectrum is not None, 'Must have selected an active time interval' assert isinstance( self._observed_spectrum, BinnedSpectrum ), 'You are attempting to create a SpectrumLike plugin from the wrong data type' if this_background_spectrum is None: custom_warnings.warn( 'No background selection has been made. This plugin will contain no background!' ) if self._response is None: return SpectrumLike(name=self._name, observation=self._observed_spectrum, background=this_background_spectrum, verbose=self._verbose, tstart=self._tstart, tstop=self._tstop) else: return DispersionSpectrumLike( name=self._name, observation=self._observed_spectrum, background=this_background_spectrum, verbose=self._verbose, tstart=self._tstart, tstop=self._tstop) else: # this is for a set of intervals. assert self._time_series.bins is not None, 'This time series does not have any bins!' # save the original interval if there is one old_interval = copy.copy(self._active_interval) old_verbose = copy.copy(self._verbose) # we will keep it quiet to keep from being annoying self._verbose = False list_of_speclikes = [] # get the bins from the time series # for event lists, these are from created bins # for binned spectra sets, these are the native bines these_bins = self._time_series.bins # type: TimeIntervalSet if start is not None: assert stop is not None, 'must specify a start AND a stop time' if stop is not None: assert stop is not None, 'must specify a start AND a stop time' these_bins = these_bins.containing_interval(start, stop, inner=False) # loop through the intervals and create spec likes with progress_bar(len(these_bins), title='Creating plugins') as p: for i, interval in enumerate(these_bins): self.set_active_time_interval(interval.to_string()) assert isinstance( self._observed_spectrum, BinnedSpectrum ), 'You are attempting to create a SpectrumLike plugin from the wrong data type' if extract_measured_background: this_background_spectrum = self._measured_background_spectrum else: this_background_spectrum = self._background_spectrum if this_background_spectrum is None: custom_warnings.warn( 'No bakckground selection has been made. This plugin will contain no background!' ) try: if self._response is None: sl = SpectrumLike( name="%s%s%d" % (self._name, interval_name, i), observation=self._observed_spectrum, background=this_background_spectrum, verbose=self._verbose, tstart=self._tstart, tstop=self._tstop) else: sl = DispersionSpectrumLike( name="%s%s%d" % (self._name, interval_name, i), observation=self._observed_spectrum, background=this_background_spectrum, verbose=self._verbose, tstart=self._tstart, tstop=self._tstop) list_of_speclikes.append(sl) except (NegativeBackground): custom_warnings.warn( 'Something is wrong with interval %s. skipping.' % interval) p.increase() # restore the old interval if old_interval is not None: self.set_active_time_interval(*old_interval) else: self._active_interval = None self._verbose = old_verbose return list_of_speclikes
def _minimize(self): # Gather the setup islands = self._setup_dict['islands'] pop_size = self._setup_dict['population_size'] evolution_cycles = self._setup_dict['evolution_cycles'] # Print some info print("\nPAGMO setup:") print("------------") print("- Number of islands: %i" % islands) print("- Population size per island: %i" % pop_size) print("- Evolutions cycles per island: %i\n" % evolution_cycles) Npar = len(self._internal_parameters) if is_parallel_computation_active(): wrapper = PAGMOWrapper(function=self.function, parameters=self._internal_parameters, dim=Npar) # use the archipelago, which uses the ipyparallel computation archi = pg.archipelago(udi=pg.ipyparallel_island(), n=islands, algo=self._setup_dict['algorithm'], prob=wrapper, pop_size=pop_size) archi.wait() # Display some info print("\nSetup before parallel execution:") print("--------------------------------\n") print(archi) # Evolve populations on islands print( "Evolving... (progress not available for parallel execution)") # For some weird reason, ipyparallel looks for _winreg on Linux (where does # not exist, being a Windows module). Let's mock it with an empty module' mocked = False if os.path.exists("_winreg.py") is False: with open("_winreg.py", "w+") as f: f.write("pass") mocked = True archi.evolve() # Wait for completion (evolve() is async) archi.wait_check() # Now remove _winreg.py if needed if mocked: os.remove("_winreg.py") # Find best and worst islands fOpts = np.array(map(lambda x: x[0], archi.get_champions_f())) xOpts = archi.get_champions_x() else: # do not use ipyparallel. Evolve populations on islands serially wrapper = PAGMOWrapper(function=self.function, parameters=self._internal_parameters, dim=Npar) xOpts = [] fOpts = np.zeros(islands) with progress_bar(iterations=islands, title="pygmo minimization") as p: for island_id in range(islands): pop = pg.population(prob=wrapper, size=pop_size) for i in range(evolution_cycles): pop = self._setup_dict['algorithm'].evolve(pop) # Gather results xOpts.append(pop.champion_x) fOpts[island_id] = pop.champion_f[0] p.increase() # Find best and worst islands min_idx = fOpts.argmin() max_idx = fOpts.argmax() fOpt = fOpts[min_idx] fWorse = fOpts[max_idx] xOpt = np.array(xOpts)[min_idx] # Some information print("\nSummary of evolution:") print("---------------------") print("Best population has minimum %.3f" % (fOpt)) print("Worst population has minimum %.3f" % (fWorse)) print("") # Transform to numpy.array best_fit_values = np.array(xOpt) return best_fit_values, fOpt
def _minimize(self): assert len(self._grid) > 0, "You need to set up a grid using add_parameter_to_grid" if self._2nd_minimization is None: raise RuntimeError("You did not setup this global minimizer (GRID). You need to use the .setup() method") # For each point in the grid, perform a fit parameters = self._grid.keys() overall_minimum = 1e20 internal_best_fit_values = None n_iterations = np.prod([x.shape for x in self._grid.values()]) with progress_bar(n_iterations, title='Grid minimization') as progress: for values_tuple in itertools.product(*self._grid.values()): # Reset everything to the original values, so that the fit will always start # from there, instead that from the values obtained in the last iterations, which # might have gone completely awry for par_name, par_value in self._original_values.items(): self.parameters[par_name].value = par_value # Now set the parameters in the grid to their starting values for i, this_value in enumerate(values_tuple): self.parameters[parameters[i]].value = this_value # Get a new instance of the minimizer. We need to do this instead of reusing an existing instance # because some minimizers (like iminuit) keep internal track of their status, so that reusing # a minimizer will create correlation between the different points # NOTE: this line necessarily needs to be after the values of the parameters has been set to the # point, because the init method of the minimizer instance will use those values to set the starting # point for the fit _minimizer = self._2nd_minimization.get_instance(self.function, self.parameters, verbosity=0) # Perform fit try: # We call _minimize() and not minimize() so that the best fit values are # in the internal system. this_best_fit_values_internal, this_minimum = _minimizer._minimize() except: # A failure is not a problem here, only if all of the fit fail then we have a problem # but this case is handled later continue # If this minimum is the overall minimum, save the result if this_minimum < overall_minimum: overall_minimum = this_minimum internal_best_fit_values = this_best_fit_values_internal # Use callbacks (if any) for callback in self._callbacks: callback(values_tuple, this_minimum) progress.increase() if internal_best_fit_values is None: raise AllFitFailed("All fit starting from values in the grid have failed!") return internal_best_fit_values, overall_minimum
def go(args): spectrum = Powerlaw() if args.shape == 'ps': source = PointSource("TestSource", args.RA, args.Dec, spectral_shape=spectrum) elif args.shape == 'disk': shape = Disk_on_sphere() source = ExtendedSource("TestSource", spatial_shape=shape, spectral_shape=spectrum) shape.lon0 = args.RA * u.degree shape.lon0.fix = True shape.lat0 = args.Dec * u.degree shape.lat0.fix = True shape.radius = 1. * u.degree shape.radius.bounds = (0.05 * u.degree, 1.5 * u.degree) elif args.shape == 'gauss': shape = Gaussian_on_sphere() source = ExtendedSource("TestSource", spatial_shape=shape, spectral_shape=spectrum) shape.lon0 = args.RA * u.degree shape.lon0.fix = True shape.lat0 = args.Dec * u.degree shape.lat0.fix = True shape.sigma = 1.0 * u.degree shape.sigma.fix = False shape.sigma.max_value = 5. elif args.shape == 'diffusion': shape = Continuous_injection_diffusion() source = ExtendedSource("TestSource", spatial_shape=shape, spectral_shape=spectrum) shape.lon0 = args.RA * u.degree shape.lon0.fix = True shape.lat0 = args.Dec * u.degree shape.lat0.fix = True shape.rdiff0 = 6.0 * u.degree shape.rdiff0.fix = False shape.rdiff0.max_value = 12. shape.delta = 0.33 shape.delta.fix = True shape.uratio = 0.5 # Diffusion spectral index (0.3 to 0.6) shape.uratio.fix = True shape.piv = 2e10 shape.piv.fix = True shape.piv2 = 1e9 shape.piv2.fix = True elif args.shape == 'powerlaw': shape = Power_law_on_sphere() source = ExtendedSource("TestSource", spatial_shape=shape, spectral_shape=spectrum) shape.lon0 = args.RA * u.degree shape.lon0.fix = True shape.lat0 = args.Dec * u.degree shape.lat0.fix = True shape.index = -1.1 shape.index.fix = False else: print 'Error, invalid shape:', args.shape sys.exit(0) fluxUnit = 1. / (u.TeV * u.cm**2 * u.s) # Set spectral parameters (do it after the source definition to make sure # the units are handled correctly) spectrum.K = 1e-14 * fluxUnit spectrum.K.bounds = (1e-16 * fluxUnit, 1e-12*fluxUnit) # Factor 1e9 to what's output spectrum.piv = 10 * u.TeV spectrum.piv.fix = True spectrum.index = -2.5 spectrum.index.bounds = (-4., -1.) spectrum.index.fix = (not args.freeindex) spectrum.display() if args.shape == 'ps': print("Morphology: point source.") else: shape.display() # Set up a likelihood model using the source lm = Model(source) llh = HAWCLike("TheLikelikood", args.maptreefile, args.responsefile) llh.set_active_measurements(args.startBin, args.stopBin) llh.set_ROI(args.RA, args.Dec, args.roiradius, True) # Double check the free parameters print("Likelihood model:\n") print(lm) # Set up the likelihood and run the fit print("Performing likelihood fit...\n") datalist = DataList(llh) jl = JointLikelihood(lm, datalist, verbose=True) try: jl.set_minimizer("ROOT") jl.fit() except AttributeError: jl.set_minimizer("minuit") result = jl.fit() # Print the TS TS = llh.calc_TS() print("\nTest statistic: %g" % TS) # Write model and residual maps if needed if args.outmodel is not None: llh.write_model_map(args.outmodel) if args.outresidual is not None: llh.write_residual_map(args.outresidual) # Get a flux uncertainty envelope from MCMC samples and plot it if args.nMCMC > 0 and len(lm.free_parameters)>1: print("Running MCMC...") # The set_uninformative_prior will use the given prior type # and the current minimum and maximum to set the prior # Set all priors to uniform, except the spectrum.K for parameter_name, parameter in lm.free_parameters.iteritems(): parameter.set_uninformative_prior(Uniform_prior) spectrum.K.set_uninformative_prior(Log_uniform_prior) ba = BayesianAnalysis(lm, datalist) nW = 10 # number of MCMC walkers nB = 100 # number of samples for burning in nS = args.nMCMC # number of samples print("Sampling") samples = ba.sample(nW, nB, nS) print("Sampling complete") print("\nMaking corner plot...\n") # Make a corner plot of the sampled parameters from the MCMC sigmas = [1., 2., 3.] levels = [1. - np.exp(-0.5 * s ** 2) for s in sigmas] fig = ba.corner_plot(show_titles=True, title_kwargs={"fontsize": 10}, quantiles=[0.16, 0.5, 0.84], levels=levels, verbose=True ) filename = "%s/corner_%s.png" %(args.outplotdir, args.outplottag) fig.savefig(filename,dpi=200) print("Save corner plot in: %s" %filename) print("\nMaking spectrum plot...\n") E = np.logspace(np.log10(1), np.log10(50), 100) * u.TeV fML = spectrum(E) # Construct the uncertainty envelope on the flux using the sampled # parameter space fMin68, fMax68 = [], [] fMin95, fMax95 = [], [] # Just to be fancy, let's use the progress bar from threeML.io.progress_bar import progress_bar with progress_bar(E.shape[0], title='Plotting') as progress: for i, energy in enumerate(E): progress.animate(i) flux = [] for rsamp in ba.raw_samples: # Dirty, but it looks like the morphology parameter come # first. Is there a safe way to do this? spectrum.K = rsamp[-2] spectrum.index = rsamp[-1] # Note that here energy has units, so the computation # would be made with units and the output will be a # astropy.Quantity instance. However that is slow, # so let's use the value instead (the computation without # unit is much faster) this_diff_flux = spectrum(energy.to('keV').value) flux.append( this_diff_flux ) # Evaluate the percentiles of the flux f02, f16, f84, f97 = np.percentile(np.asarray(flux), [2.5, 16, 84, 97.5]) fMin68.append(f16) fMax68.append(f84) fMin95.append(f02) fMax95.append(f97) print(" done") # Save the percentiles with units # y_unit is the unit of the output if the function is 1d, # as in y = f(x) fMin68 = np.asarray(fMin68) * spectrum.y_unit fMax68 = np.asarray(fMax68) * spectrum.y_unit fMin95 = np.asarray(fMin95) * spectrum.y_unit fMax95 = np.asarray(fMax95) * spectrum.y_unit fig, ax = plt.subplots(1, 1, figsize=(8, 6)) energyPower = 2.0 # To plot E**eP dN/dE conv = (E)**energyPower if fMin95 is not None and fMax95 is not None: # Plot the 95% C.I. on the flux as a function of energy ax.fill_between(E.to(u.TeV).value, (conv * fMin95).to(u.TeV**energyPower / (u.TeV*u.cm**2*u.s)).value, (conv * fMax95).to(u.TeV**energyPower / (u.TeV*u.cm**2*u.s)).value, color="royalblue", alpha=0.3, label="95% C.I.") if fMin68 is not None and fMax68 is not None: # Plot the 68% C.I. on the flux as a function of energy ax.fill_between(E.to(u.TeV).value, (conv * fMin68).to(u.TeV**energyPower / (u.TeV*u.cm**2*u.s)).value, (conv * fMax68).to(u.TeV**energyPower / (u.TeV*u.cm**2*u.s)).value, color="royalblue", alpha=0.5, label="68% C.I.") ax.plot(E.to(u.TeV).value, (conv * fML).to(u.TeV**energyPower / (u.TeV*u.cm**2*u.s)).value, "r--", lw=2, label="Max LL") ax.set(xscale="log", xlabel="energy [TeV]", yscale="log", ylabel=r"$E^{%.1f}$ flux [TeV$^{%.1f}$ cm$^{-2}$ s$^{-1}$]" %(energyPower, energyPower-1), title="Spectrum for %s shape" %args.shape ) ax.xaxis.set_major_formatter(mpl.ticker.FormatStrFormatter("%g")) h, l = ax.get_legend_handles_labels() leg = ax.legend(h, l) fig.tight_layout() filename = "%s/spectrum_%s.png" %(args.outplotdir, args.outplottag) fig.savefig(filename,dpi=200) print("Save spectrum plot in: %s" %filename) plt.show()
def download_files_from_directory_ftp(ftp_url, destination_directory, filenames=None, namefilter=None): # Parse url tokens = urlparse.urlparse(ftp_url) serverAddress = tokens.netloc directory = tokens.path # if no filename has been specified, connect first to retrieve the list of files to download if filenames == None: # Connect to server and log in ftp = ftplib.FTP(serverAddress, "anonymous", '', '', timeout=60) try: ftp.login() except: # Maybe we are already logged in try: ftp.cwd('/') except: # nope! don't know what is happening raise # Move to origin directory ftp.cwd(directory) # Retrieve list of files filenames = [] ftp.retrlines('NLST', filenames.append) # Close connection (will reopen later) ftp.close() # Download files with progress report downloaded_files = [] with progress_bar(len(filenames)) as p: for i, filename in enumerate(filenames): if namefilter != None and filename.find(namefilter) < 0: p.increase() # Filename does not match, do not download it continue else: local_filename = os.path.join(destination_directory, filename) urllib.urlretrieve("ftp://%s/%s/%s" % (serverAddress, directory, filename), local_filename) urllib.urlcleanup() downloaded_files.append(local_filename) return downloaded_files