def test_stats_calc_stat_wstat_diffbins(): """wstat statistic fails when src/bg bin sizes do not match""" statobj = WStat() data, model = setup_single_pha(True, False, background=True) # Tweak data to have one-less bin than the background. This # used to be easy but with data validation we need to # create a new object. # data2 = DataPHA("faked", channel=data.channel[:-1], counts=data.counts[:-1], staterror=data.staterror[:-1], grouping=data.grouping[:-1], exposure=data.exposure, backscal=data.backscal, areascal=data.areascal) # We might expect the ARF/RMF calls to fail if we add validation # (to check the ARF/RMF is valid for the PHA dataset). # data2.set_arf(data.get_arf()) data2.set_rmf(data.get_rmf()) data2.set_background(data.get_background()) # There is no Sherpa error for this, which seems surprising with pytest.raises(TypeError) as err: statobj.calc_stat(data2, model) assert str( err.value) == "input array sizes do not match, data: 5 vs group: 4"
def test_xspecvar_zero_handling(bexp, yexp, dyexp): """How does XSPEC variance handle 0 in source and/or background? The values were calculated using XSPEC 12.10.1m (HEASOFT 6.26.1) using the following commands to create the file foo.dat which contains (after three 'header' lines) the data 'x 0.5 y dy' data foo.fits iplot data wplot foo.dat quit where foo.fits is a fake PHA file set up to have the channel/count values used below (a CSC-style PHA file was used so that source and background were in the same file but a separate bgnd PHA file could also have been used). """ stat = Chi2XspecVar() chans = numpy.arange(1, 10, dtype=numpy.int16) scnts = numpy.asarray([0, 0, 0, 1, 3, 1, 1, 3, 3], dtype=numpy.int16) bcnts = numpy.asarray([0, 1, 3, 0, 0, 1, 3, 1, 3], dtype=numpy.int16) s = DataPHA('src', chans, scnts, exposure=1) b = DataPHA('bkg', chans, bcnts, exposure=bexp) s.set_background(b) s.subtract() y, dy, other = s.to_fit(staterrfunc=stat.calc_staterror) assert other is None assert y == pytest.approx(yexp) assert dy == pytest.approx(dyexp)
def calc_wstat_sherpa(mu_sig, n_on, n_off, alpha): import sherpa.stats as ss from sherpa.astro.data import DataPHA from sherpa.models import Const1D wstat = ss.WStat() model = Const1D() model.c0 = mu_sig data = DataPHA(counts=np.atleast_1d(n_on), name='dummy', channel=np.atleast_1d(1), backscal=1, exposure=1) background = DataPHA(counts=np.atleast_1d(n_off), name='dummy background', channel=np.atleast_1d(1), backscal=np.atleast_1d(1. / alpha), exposure=1) data.set_background(background, 1) # Docstring for ``calc_stat`` # https://github.com/sherpa/sherpa/blob/fe8508818662346cb6d9050ba676e23318e747dd/sherpa/stats/__init__.py#L219 stat = wstat.calc_stat(model=model, data=data) print("Sherpa stat: {}".format(stat[0])) print("Sherpa fvec: {}".format(stat[1]))
def test_1209_background(make_data_path): """Do we pick up the header keywords from the background? This is related to issue #1209 """ # We could set up channels and counts, but let's not. # d = DataPHA("dummy", None, None) assert d.header["TELESCOP"] == "none" assert d.header["INSTRUME"] == "none" assert d.header["FILTER"] == "none" infile = make_data_path(PHAFILE) bkg = io.read_pha(infile) d.set_background(bkg) # The PHA file contains a FILTER keyword but the responses do not. # assert d.header["TELESCOP"] == "SWIFT" assert d.header["INSTRUME"] == "XRT" assert d.header["FILTER"] == "NONE"
def test_fake_pha_background_pha(reset_seed): """Sample from background pha""" np.random.seed(1234) data = DataPHA("any", channels, counts, exposure=1000.) bkg = DataPHA("bkg", channels, bcounts, exposure=2000, backscal=2.5) data.set_background(bkg, id="used-bkg") data.set_arf(arf) data.set_rmf(rmf) mdl = Const1D("mdl") mdl.c0 = 0 # Just make sure that the model does not contribute fake_pha(data, mdl, is_source=True, add_bkgs=False) assert data.counts.sum() == 0 fake_pha(data, mdl, is_source=True, add_bkgs=True) # expected is [200, 200, 200] assert data.counts.sum() > 400 assert data.counts.sum() < 1000 # Add several more backgrounds. Actual background should be average. # We add 5 background with half the exposure time as this first oned # and essentially 0 counts. So, we should find 1/11 of the counts # we found in the last run. for i in range(5): bkg = DataPHA("bkg", channels, np.ones(3, dtype=np.int16), exposure=1000, backscal=2.5) data.set_background(bkg, id=i) fake_pha(data, mdl, is_source=True, add_bkgs=True) # expected is about [18, 18, 18] assert data.counts.sum() > 10 assert data.counts.sum() < 200
def get_data(mu_sig, n_on, n_off, alpha): from sherpa.astro.data import DataPHA from sherpa.models import Const1D model = Const1D() model.c0 = mu_sig data = DataPHA( counts=np.atleast_1d(n_on), name="dummy", channel=np.atleast_1d(1), backscal=1, exposure=1, ) background = DataPHA( counts=np.atleast_1d(n_off), name="dummy background", channel=np.atleast_1d(1), backscal=np.atleast_1d(1. / alpha), exposure=1, ) data.set_background(background, 1) return model, data
def get_data(mu_sig, n_on, n_off, alpha): from sherpa.astro.data import DataPHA from sherpa.models import Const1D model = Const1D() model.c0 = mu_sig data = DataPHA( counts=np.atleast_1d(n_on), name="dummy", channel=np.atleast_1d(1), backscal=1, exposure=1, ) background = DataPHA( counts=np.atleast_1d(n_off), name="dummy background", channel=np.atleast_1d(1), backscal=np.atleast_1d(1. / alpha), exposure=1, ) data.set_background(background, 1) return model, data
def read_pha(arg, use_errors=False, use_background=False): """Create a DataPHA object. Parameters ---------- arg The name of the file or a representation of the file (the type depends on the I/O backend) containing the PHA data. use_errors : bool, optional If the PHA file contains statistical error values for the count (or count rate) column, should it be read in. This defaults to ``False``. use_background : bool, optional Should the background PHA data (and optional responses) also be read in and associated with the data set? Returns ------- data : sherpa.astro.data.DataPHA """ datasets, filename = backend.get_pha_data(arg, use_background=use_background) phasets = [] output_once = True for data in datasets: if not use_errors: if data['staterror'] is not None or data['syserror'] is not None: if data['staterror'] is None: msg = 'systematic' elif data['syserror'] is None: msg = 'statistical' if output_once: wmsg = "systematic errors were not found in " + \ "file '{}'".format(filename) warning(wmsg) else: msg = 'statistical and systematic' if output_once: imsg = msg + " errors were found in file " + \ "'{}' \nbut not used; ".format(filename) + \ "to use them, re-read with use_errors=True" info(imsg) data['staterror'] = None data['syserror'] = None dname = os.path.dirname(filename) albl = 'ARF' rlbl = 'RMF' if use_background: albl = albl + ' (background)' rlbl = rlbl + ' (background)' arf = _read_ancillary(data, 'arffile', albl, dname, read_arf, output_once) rmf = _read_ancillary(data, 'rmffile', rlbl, dname, read_rmf, output_once) backgrounds = [] if data['backfile'] and data['backfile'].lower() != 'none': try: if os.path.dirname(data['backfile']) == '': data['backfile'] = os.path.join(os.path.dirname(filename), data['backfile']) bkg_datasets = [] # Do not read backgrounds of backgrounds if not use_background: bkg_datasets = read_pha(data['backfile'], use_errors, True) if output_once: info('read background file {}'.format( data['backfile'])) if numpy.iterable(bkg_datasets): for bkg_dataset in bkg_datasets: if bkg_dataset.get_response() == (None, None) and \ rmf is not None: bkg_dataset.set_response(arf, rmf) backgrounds.append(bkg_dataset) else: if bkg_datasets.get_response() == (None, None) and \ rmf is not None: bkg_datasets.set_response(arf, rmf) backgrounds.append(bkg_datasets) except: if output_once: warning(str(sys.exc_info()[1])) for bkg_type, bscal_type in izip(('background_up', 'background_down'), ('backscup', 'backscdn')): if data[bkg_type] is not None: b = DataPHA(filename, channel=data['channel'], counts=data[bkg_type], bin_lo=data['bin_lo'], bin_hi=data['bin_hi'], grouping=data['grouping'], quality=data['quality'], exposure=data['exposure'], backscal=data[bscal_type], header=data['header']) b.set_response(arf, rmf) if output_once: info("read {} into a dataset from file {}".format( bkg_type, filename)) backgrounds.append(b) for k in [ 'backfile', 'arffile', 'rmffile', 'backscup', 'backscdn', 'background_up', 'background_down' ]: data.pop(k, None) pha = DataPHA(filename, **data) pha.set_response(arf, rmf) for id, b in enumerate(backgrounds): if b.grouping is None: b.grouping = pha.grouping b.grouped = (b.grouping is not None) if b.quality is None: b.quality = pha.quality pha.set_background(b, id + 1) # set units *after* bkgs have been set pha._set_initial_quantity() phasets.append(pha) output_once = False if len(phasets) == 1: phasets = phasets[0] return phasets
def read_pha(arg, use_errors=False, use_background=False): """Create a DataPHA object. Parameters ---------- arg The name of the file or a representation of the file (the type depends on the I/O backend) containing the PHA data. use_errors : bool, optional If the PHA file contains statistical error values for the count (or count rate) column, should it be read in. This defaults to ``False``. use_background : bool, optional Should the background PHA data (and optional responses) also be read in and associated with the data set? Returns ------- data : sherpa.astro.data.DataPHA """ datasets, filename = backend.get_pha_data(arg, use_background=use_background) phasets = [] output_once = True for data in datasets: if not use_errors: if data['staterror'] is not None or data['syserror'] is not None: if data['staterror'] is None: msg = 'systematic' elif data['syserror'] is None: msg = 'statistical' if output_once: wmsg = "systematic errors were not found in " + \ "file '{}'".format(filename) warning(wmsg) else: msg = 'statistical and systematic' if output_once: imsg = msg + " errors were found in file " + \ "'{}' \nbut not used; ".format(filename) + \ "to use them, re-read with use_errors=True" info(imsg) data['staterror'] = None data['syserror'] = None dname = os.path.dirname(filename) albl = 'ARF' rlbl = 'RMF' if use_background: albl = albl + ' (background)' rlbl = rlbl + ' (background)' arf = _read_ancillary(data, 'arffile', albl, dname, read_arf, output_once) rmf = _read_ancillary(data, 'rmffile', rlbl, dname, read_rmf, output_once) backgrounds = [] if data['backfile'] and data['backfile'].lower() != 'none': try: if os.path.dirname(data['backfile']) == '': data['backfile'] = os.path.join(os.path.dirname(filename), data['backfile']) bkg_datasets = [] # Do not read backgrounds of backgrounds if not use_background: bkg_datasets = read_pha(data['backfile'], use_errors, True) if output_once: info('read background file {}'.format( data['backfile'])) if numpy.iterable(bkg_datasets): for bkg_dataset in bkg_datasets: if bkg_dataset.get_response() == (None, None) and \ rmf is not None: bkg_dataset.set_response(arf, rmf) backgrounds.append(bkg_dataset) else: if bkg_datasets.get_response() == (None, None) and \ rmf is not None: bkg_datasets.set_response(arf, rmf) backgrounds.append(bkg_datasets) except: if output_once: warning(str(sys.exc_info()[1])) for bkg_type, bscal_type in izip(('background_up', 'background_down'), ('backscup', 'backscdn')): if data[bkg_type] is not None: b = DataPHA(filename, channel=data['channel'], counts=data[bkg_type], bin_lo=data['bin_lo'], bin_hi=data['bin_hi'], grouping=data['grouping'], quality=data['quality'], exposure=data['exposure'], backscal=data[bscal_type], header=data['header']) b.set_response(arf, rmf) if output_once: info("read {} into a dataset from file {}".format( bkg_type, filename)) backgrounds.append(b) for k in ['backfile', 'arffile', 'rmffile', 'backscup', 'backscdn', 'background_up', 'background_down']: data.pop(k, None) pha = DataPHA(filename, **data) pha.set_response(arf, rmf) for id, b in enumerate(backgrounds): if b.grouping is None: b.grouping = pha.grouping b.grouped = (b.grouping is not None) if b.quality is None: b.quality = pha.quality pha.set_background(b, id + 1) # set units *after* bkgs have been set pha._set_initial_quantity() phasets.append(pha) output_once = False if len(phasets) == 1: phasets = phasets[0] return phasets
def read_pha(arg, use_errors=False, use_background=False): """ read_pha( filename [, use_errors=False [, use_background=False]] ) read_pha( PHACrate [, use_errors=False [, use_background=False]] ) """ datasets, filename = backend.get_pha_data(arg, use_background=use_background) phasets = [] output_once = True for data in datasets: if not use_errors: if data["staterror"] is not None or data["syserror"] is not None: if data["staterror"] is None: msg = "systematic" elif data["syserror"] is None: msg = "statistical" if output_once: wmsg = "systematic errors were not found in " + "file '{}'".format(filename) warning(wmsg) else: msg = "statistical and systematic" if output_once: imsg = ( msg + " errors were found in file " + "'{}' \nbut not used; ".format(filename) + "to use them, re-read with use_errors=True" ) info(imsg) data["staterror"] = None data["syserror"] = None dname = os.path.dirname(filename) albl = "ARF" rlbl = "RMF" if use_background: albl = albl + " (background)" rlbl = rlbl + " (background)" arf = _read_ancillary(data, "arffile", albl, dname, read_arf, output_once) rmf = _read_ancillary(data, "rmffile", rlbl, dname, read_rmf, output_once) backgrounds = [] if data["backfile"] and data["backfile"].lower() != "none": try: if os.path.dirname(data["backfile"]) == "": data["backfile"] = os.path.join(os.path.dirname(filename), data["backfile"]) bkg_datasets = [] # Do not read backgrounds of backgrounds if not use_background: bkg_datasets = read_pha(data["backfile"], use_errors, True) if output_once: info("read background file {}".format(data["backfile"])) if numpy.iterable(bkg_datasets): for bkg_dataset in bkg_datasets: if bkg_dataset.get_response() == (None, None) and rmf is not None: bkg_dataset.set_response(arf, rmf) backgrounds.append(bkg_dataset) else: if bkg_datasets.get_response() == (None, None) and rmf is not None: bkg_datasets.set_response(arf, rmf) backgrounds.append(bkg_datasets) except: if output_once: warning(str(sys.exc_info()[1])) for bkg_type, bscal_type in izip(("background_up", "background_down"), ("backscup", "backscdn")): if data[bkg_type] is not None: b = DataPHA( filename, channel=data["channel"], counts=data[bkg_type], bin_lo=data["bin_lo"], bin_hi=data["bin_hi"], grouping=data["grouping"], quality=data["quality"], exposure=data["exposure"], backscal=data[bscal_type], header=data["header"], ) b.set_response(arf, rmf) if output_once: info("read {} into a dataset from file {}".format(bkg_type, filename)) backgrounds.append(b) for k in ["backfile", "arffile", "rmffile", "backscup", "backscdn", "background_up", "background_down"]: data.pop(k, None) pha = DataPHA(filename, **data) pha.set_response(arf, rmf) for id, b in enumerate(backgrounds): if b.grouping is None: b.grouping = pha.grouping b.grouped = b.grouping is not None if b.quality is None: b.quality = pha.quality pha.set_background(b, id + 1) # set units *after* bkgs have been set pha._set_initial_quantity() phasets.append(pha) output_once = False if len(phasets) == 1: phasets = phasets[0] return phasets
def test_fake_pha_basic(has_bkg, is_source, reset_seed): """No background. See also test_fake_pha_add_background For simplicity we use perfect responses. A background dataset can be added, but it should not be used in the simulation with default settings """ np.random.seed(4276) data = DataPHA("any", channels, counts, exposure=1000.) if has_bkg: bkg = DataPHA("bkg", channels, bcounts, exposure=2000, backscal=0.4) data.set_background(bkg, id="unused-bkg") data.set_arf(arf) data.set_rmf(rmf) mdl = Const1D("mdl") mdl.c0 = 2 fake_pha(data, mdl, is_source=is_source, add_bkgs=False) assert data.exposure == pytest.approx(1000.0) assert (data.channel == channels).all() assert data.name == "any" assert data.get_arf().name == "user-arf" assert data.get_rmf().name == "delta-rmf" if has_bkg: assert data.background_ids == ["unused-bkg"] bkg = data.get_background("unused-bkg") assert bkg.name == "bkg" assert bkg.counts == pytest.approx(bcounts) assert bkg.exposure == pytest.approx(2000) else: assert data.background_ids == [] if is_source: # check we've faked counts (the scaling is such that it is # very improbable that this condition will fail) assert (data.counts > counts).all() # For reference the predicted source signal is # [200, 400, 400] # # What we'd like to say is that the predicted counts are # similar, but this is not easy to do. What we can try # is summing the counts (to average over the randomness) # and then a simple check # assert data.counts.sum() > 500 assert data.counts.sum() < 1500 # This is more likely to fail by chance, but still very unlikely assert data.counts[1] > data.counts[0] else: # No multiplication with exposure time, arf binning, etc. # so we just expect very few counts assert data.counts.sum() < 10 assert data.counts.sum() >= 2 # Essentially double the exposure by having two identical arfs data.set_arf(arf, 2) data.set_rmf(rmf, 2) fake_pha(data, mdl, is_source=is_source, add_bkgs=False) if is_source: assert data.counts.sum() > 1200 assert data.counts.sum() < 3000 assert data.counts[1] > data.counts[0] else: assert data.counts.sum() < 20 assert data.counts.sum() >= 4
def test_fake_pha_has_valid_ogip_keywords_all_fake(tmp_path, reset_seed): """See #1209 When everything is faked, what happens? """ np.random.seed(5) data = DataPHA("any", channels, counts, exposure=1000.) bkg = DataPHA("bkg", channels, bcounts, exposure=2000, backscal=1.) data.set_background(bkg, id="used-bkg") data.set_arf(arf) data.set_rmf(rmf) bkg.set_arf(arf) bkg.set_rmf(rmf) mdl = Const1D("mdl") mdl.c0 = 0 bmdl = Const1D("bmdl") bmdl.c0 = 2 fake_pha(data, mdl, is_source=True, add_bkgs=True, bkg_models={"used-bkg": bmdl}) outfile = tmp_path / "sim.pha" io.write_pha(str(outfile), data, ascii=False) inpha = io.read_pha(str(outfile)) assert inpha.channel == pytest.approx(channels) # it is not required that we check counts (that is, we can drop this # if it turns out not to be repeatable across platforms), but for # now keep the check. # assert inpha.counts == pytest.approx([188, 399, 416]) for field in [ "staterror", "syserror", "bin_lo", "bin_hi", "grouping", "quality" ]: assert getattr(inpha, field) is None assert inpha.exposure == pytest.approx(1000.0) assert inpha.backscal == pytest.approx(1.0) assert inpha.areascal == pytest.approx(1.0) assert not inpha.grouped assert not inpha.subtracted assert inpha.response_ids == [] assert inpha.background_ids == [] hdr = inpha.header assert hdr["TELESCOP"] == "none" assert hdr["INSTRUME"] == "none" assert hdr["FILTER"] == "none" for key in [ "EXPOSURE", "AREASCAL", "BACKSCAL", "ANCRFILE", "BACKFILE", "RESPFILE" ]: assert key not in hdr
def test_fake_pha_bkg_model(): """Test background model """ data = DataPHA('any', channels, counts, exposure=1000.) bkg = DataPHA('bkg', channels, bcounts, exposure=2000, backscal=1.) data.set_background(bkg, id='used-bkg') data.set_arf(arf) data.set_rmf(rmf) bkg.set_arf(arf) bkg.set_rmf(rmf) mdl = Const1D('mdl') mdl.c0 = 0 bmdl = Const1D('bmdl') bmdl.c0 = 2 fake_pha(data, mdl, is_source=True, add_bkgs=True, bkg_models={'used-bkg': bmdl}) assert data.exposure == pytest.approx(1000.0) assert (data.channel == channels).all() assert data.name == 'any' assert data.get_arf().name == 'user-arf' assert data.get_rmf().name == 'delta-rmf' # The background itself is unchanged assert data.background_ids == ['used-bkg'] bkg = data.get_background('used-bkg') assert bkg.name == 'bkg' assert bkg.counts == pytest.approx(bcounts) assert bkg.exposure == pytest.approx(2000) # check we've faked counts (the scaling is such that it is # very improbable that this condition will fail) assert (data.counts > counts).all() # For reference the predicted signal is # [200, 400, 400] # but, unlike in the test above, this time it's all coming # from the background. # # What we'd like to say is that the predicted counts are # similar, but this is not easy to do. What we can try # is summing the counts (to average over the randomness) # and then a simple check # assert data.counts.sum() > 500 assert data.counts.sum() < 1500 # This is more likely to fail by chance, but still very unlikely assert data.counts[1] > 1.5 * data.counts[0] # Now add a second set of arf/rmf for the data. # However, all the signal is background, so this does not change # any of the results. data.set_arf(arf, 2) data.set_rmf(rmf, 2) fake_pha(data, mdl, is_source=True, add_bkgs=True, bkg_models={'used-bkg': bmdl}) assert data.counts.sum() > 500 assert data.counts.sum() < 1500 assert data.counts[1] > 1.5 * data.counts[0]
def read_pha(arg, use_errors=False, use_background=False): """ read_pha( filename [, use_errors=False [, use_background=False]] ) read_pha( PHACrate [, use_errors=False [, use_background=False]] ) """ datasets, filename = backend.get_pha_data(arg, use_background=use_background) phasets = [] output_once = True for data in datasets: if not use_errors: if data['staterror'] is not None or data['syserror'] is not None: if data['staterror'] is None: msg = 'systematic' elif data['syserror'] is None: msg = 'statistical' if output_once: wmsg = "systematic errors were not found in " + \ "file '{}'".format(filename) warning(wmsg) else: msg = 'statistical and systematic' if output_once: imsg = msg + " errors were found in file " + \ "'{}' \nbut not used; ".format(filename) + \ "to use them, re-read with use_errors=True" info(imsg) data['staterror'] = None data['syserror'] = None dname = os.path.dirname(filename) albl = 'ARF' rlbl = 'RMF' if use_background: albl = albl + ' (background)' rlbl = rlbl + ' (background)' arf = _read_ancillary(data, 'arffile', albl, dname, read_arf, output_once) rmf = _read_ancillary(data, 'rmffile', rlbl, dname, read_rmf, output_once) backgrounds = [] if data['backfile'] and data['backfile'].lower() != 'none': try: if os.path.dirname(data['backfile']) == '': data['backfile'] = os.path.join(os.path.dirname(filename), data['backfile']) bkg_datasets = [] # Do not read backgrounds of backgrounds if not use_background: bkg_datasets = read_pha(data['backfile'], use_errors, True) if output_once: info('read background file {}'.format( data['backfile'])) if numpy.iterable(bkg_datasets): for bkg_dataset in bkg_datasets: if bkg_dataset.get_response() == (None, None) and \ rmf is not None: bkg_dataset.set_response(arf, rmf) backgrounds.append(bkg_dataset) else: if bkg_datasets.get_response() == (None, None) and \ rmf is not None: bkg_datasets.set_response(arf, rmf) backgrounds.append(bkg_datasets) except: if output_once: warning(str(sys.exc_info()[1])) for bkg_type, bscal_type in izip(('background_up', 'background_down'), ('backscup', 'backscdn')): if data[bkg_type] is not None: b = DataPHA(filename, channel=data['channel'], counts=data[bkg_type], bin_lo=data['bin_lo'], bin_hi=data['bin_hi'], grouping=data['grouping'], quality=data['quality'], exposure=data['exposure'], backscal=data[bscal_type], header=data['header']) b.set_response(arf, rmf) if output_once: info("read {} into a dataset from file {}".format( bkg_type, filename)) backgrounds.append(b) for k in [ 'backfile', 'arffile', 'rmffile', 'backscup', 'backscdn', 'background_up', 'background_down' ]: data.pop(k, None) pha = DataPHA(filename, **data) pha.set_response(arf, rmf) for id, b in enumerate(backgrounds): if b.grouping is None: b.grouping = pha.grouping b.grouped = (b.grouping is not None) if b.quality is None: b.quality = pha.quality pha.set_background(b, id + 1) # set units *after* bkgs have been set pha._set_initial_quantity() phasets.append(pha) output_once = False if len(phasets) == 1: phasets = phasets[0] return phasets
def read_pha(arg, use_errors=False, use_background=False): """ read_pha( filename [, use_errors=False [, use_background=False]] ) read_pha( PHACrate [, use_errors=False [, use_background=False]] ) """ datasets, filename = backend.get_pha_data(arg, use_background=use_background) phasets = [] output_once = True for data in datasets: if not use_errors: if data['staterror'] is not None or data['syserror'] is not None: if data['staterror'] is None: msg = 'systematic' elif data['syserror'] is None: msg = 'statistical' if output_once: wmsg = "systematic errors were not found in " + \ "file '{}'".format(filename) warning(wmsg) else: msg = 'statistical and systematic' if output_once: imsg = msg + " errors were found in file " + \ "'{}' \nbut not used; ".format(filename) + \ "to use them, re-read with use_errors=True" info(imsg) data['staterror'] = None data['syserror'] = None dname = os.path.dirname(filename) albl = 'ARF' rlbl = 'RMF' if use_background: albl = albl + ' (background)' rlbl = rlbl + ' (background)' arf = _read_ancillary(data, 'arffile', albl, dname, read_arf, output_once) rmf = _read_ancillary(data, 'rmffile', rlbl, dname, read_rmf, output_once) backgrounds = [] if data['backfile'] and data['backfile'].lower() != 'none': try: if os.path.dirname(data['backfile']) == '': data['backfile'] = os.path.join(os.path.dirname(filename), data['backfile']) bkg_datasets = [] # Do not read backgrounds of backgrounds if not use_background: bkg_datasets = read_pha(data['backfile'], use_errors, True) if output_once: info('read background file {}'.format( data['backfile'])) if numpy.iterable(bkg_datasets): for bkg_dataset in bkg_datasets: if bkg_dataset.get_response() == (None, None) and \ rmf is not None: bkg_dataset.set_response(arf, rmf) backgrounds.append(bkg_dataset) else: if bkg_datasets.get_response() == (None, None) and \ rmf is not None: bkg_datasets.set_response(arf, rmf) backgrounds.append(bkg_datasets) except: if output_once: warning(str(sys.exc_info()[1])) for bkg_type, bscal_type in izip(('background_up', 'background_down'), ('backscup', 'backscdn')): if data[bkg_type] is not None: b = DataPHA(filename, channel=data['channel'], counts=data[bkg_type], bin_lo=data['bin_lo'], bin_hi=data['bin_hi'], grouping=data['grouping'], quality=data['quality'], exposure=data['exposure'], backscal=data[bscal_type], header=data['header']) b.set_response(arf, rmf) if output_once: info("read {} into a dataset from file {}".format( bkg_type, filename)) backgrounds.append(b) for k in ['backfile', 'arffile', 'rmffile', 'backscup', 'backscdn', 'background_up', 'background_down']: data.pop(k, None) pha = DataPHA(filename, **data) pha.set_response(arf, rmf) for id, b in enumerate(backgrounds): if b.grouping is None: b.grouping = pha.grouping b.grouped = (b.grouping is not None) if b.quality is None: b.quality = pha.quality pha.set_background(b, id + 1) # set units *after* bkgs have been set pha._set_initial_quantity() phasets.append(pha) output_once = False if len(phasets) == 1: phasets = phasets[0] return phasets
def setup_single_pha(stat, sys, background=True, areascal="none"): """Return a single data set and model. This is aimed at wstat calculation, and so the DataPHA object has no attached response. The data set is grouped. Parameters ---------- stat, sys : bool Should statistical and systematic errors be explicitly set (True) or taken from the statistic (False)? background : bool Should a background data set be included (True) or not (False)? The background is *not* subtracted when True. areascal : {'none', 'scalar', 'array'} Is the AREASCAL set and, if so, to a scalar or array value? If background is True then it is also applied to the background data set. Returns ------- data, model DataPHA and Model objects. """ # For the array of areascals, ensure that areascal is not # constant within at least one group # areascals = { 'source': { 'none': None, 'scalar': 1.0, 'array': np.asarray([0.9, 0.9, 0.8, 0.9, 0.7], dtype=np.float32) }, 'background': { 'none': None, 'scalar': 0.8, 'array': np.asarray([1.2, 1.2, 1.2, 1.1, 1.4], dtype=np.float32) } } # If used the same bins as setup_single_1dint then could # re-use the results, but the bins are different, and it # is useful for the Data1DInt case to test non-consecutive # histogram bins. # channels = np.arange(1, 6, dtype=np.int16) counts = np.asarray([10, 13, 9, 17, 21], dtype=np.int16) if stat: staterror = np.asarray([3.0, 4.0, 3.0, 4.0, 5.0]) else: staterror = None if sys: syserror = 0.2 * counts else: syserror = None grouping = np.asarray([1, -1, 1, -1, 1], dtype=np.int16) # quality = np.asarray([0, 0, 0, 0, 0], dtype=np.int16) quality = None exposure = 150.0 backscal = 0.01 ascal = areascals['source'][areascal] # does not set areascal or header data = DataPHA(name='tstpha', channel=channels, counts=counts, staterror=staterror, syserror=syserror, grouping=grouping, quality=quality, exposure=exposure, backscal=backscal, areascal=ascal) if background: bgcounts = np.asarray([2, 1, 0, 2, 2], dtype=np.int16) if stat: bgstaterror = np.asarray([0.2, 0.4, 0.5, 0.3, 0.2]) else: bgstaterror = None if sys: bgsyserror = 0.3 * bgcounts else: bgsyserror = None bggrouping = None bgquality = None bgexposure = 550.0 bgbackscal = np.asarray([0.05, 0.06, 0.04, 0.04, 0.07]) bgascal = areascals['background'][areascal] bgdata = DataPHA(name='bgpha', channel=channels, counts=bgcounts, staterror=bgstaterror, syserror=bgsyserror, grouping=bggrouping, quality=bgquality, exposure=bgexposure, backscal=bgbackscal, areascal=bgascal) data.set_background(bgdata) # Trying a multi-component model, even though this actual # model is degenerate (cnst.c0 and poly.c0) cnst = Const1D('cnst') poly = Polynom1D('poly') cnst.c0 = 1.2 poly.c0 = 7.9 poly.c1 = 2.1 poly.c1.frozen = False mdl = cnst + poly return data, mdl
def test_fake_pha_bkg_model(reset_seed): """Test background model """ np.random.seed(5329853) data = DataPHA("any", channels, counts, exposure=1000.) bkg = DataPHA("bkg", channels, bcounts, exposure=2000, backscal=1.) data.set_background(bkg, id="used-bkg") data.set_arf(arf) data.set_rmf(rmf) bkg.set_arf(arf) bkg.set_rmf(rmf) mdl = Const1D("mdl") mdl.c0 = 0 bmdl = Const1D("bmdl") bmdl.c0 = 2 # With no background model the simulated source counts # are 0. # fake_pha(data, mdl, is_source=True, add_bkgs=False, bkg_models={"used-bkg": bmdl}) assert data.counts == pytest.approx([0, 0, 0]) # Check we have created source counts this time. # fake_pha(data, mdl, is_source=True, add_bkgs=True, bkg_models={"used-bkg": bmdl}) assert data.exposure == pytest.approx(1000.0) assert (data.channel == channels).all() assert data.name == "any" assert data.get_arf().name == "user-arf" assert data.get_rmf().name == "delta-rmf" # The background itself is unchanged assert data.background_ids == ["used-bkg"] bkg = data.get_background("used-bkg") assert bkg.name == "bkg" assert bkg.counts == pytest.approx(bcounts) assert bkg.exposure == pytest.approx(2000) # Apply a number of regression checks to test the output. These # can expect to change if the randomization changes (either # explicitly or implicity). There used to be a number of checks # that compares the simulated data to the input values, but these # could occasionally fail, and so the seed was fixed for these # tests. # # For reference the predicted signal is # [200, 400, 400] # but, unlike in the test above, this time it's all coming # from the background. # assert data.counts == pytest.approx([186, 411, 405]) # Now add a second set of arf/rmf for the data. # However, all the signal is background, so this does not change # any of the results. data.set_arf(arf, 2) data.set_rmf(rmf, 2) fake_pha(data, mdl, is_source=True, add_bkgs=True, bkg_models={"used-bkg": bmdl}) assert data.counts == pytest.approx([197, 396, 389])