Пример #1
0
Файл: io.py Проект: wmwv/sncosmo
def _read_salt2(f, **kwargs):
    """Read a new-style SALT2 file.

    Such a file has metadata on lines starting with '@' and column names
    on lines starting with '#' and containing a ':' after the column name.
    There is optionally a line containing '#end' before the start of data.
    """

    meta = odict()
    colnames = []
    cols = []
    readingdata = False
    for line in f:

        # strip leading & trailing whitespace & newline
        line = line.strip()
        if len(line) == 0:
            continue

        if not readingdata:
            # Read metadata
            if line[0] == '@':
                pos = line.find(' ')  # Find first space.
                if pos in [-1, 1]:  # Space must exist and key must exist.
                    raise ValueError('Incorrectly formatted metadata line: ' +
                                     line)
                meta[line[1:pos]] = _cast_str(line[pos:])
                continue

            # Read header line
            if line[0] == '#':
                pos = line.find(':')
                if pos in [-1, 1]:
                    continue  # comment line
                colname = line[1:pos].strip()
                if colname == 'end':
                    continue
                colnames.append(colname)
                cols.append([])
                continue

            # If the first non-whitespace character is not '@' or '#',
            # assume the line is the first data line.
            readingdata = True

        # strip comments
        pos = line.find('#')
        if pos > -1:
            line = line[:pos]
        if len(line) == 0:
            continue

        # Now we're reading data
        items = line.split()
        for col, item in zip(cols, items):
            col.append(_cast_str(item))

    data = odict(zip(colnames, cols))

    return meta, data
Пример #2
0
def _read_salt2(f, **kwargs):
    """Read a new-style SALT2 file.

    Such a file has metadata on lines starting with '@' and column names
    on lines starting with '#' and containing a ':' after the column name.
    There is optionally a line containing '#end' before the start of data.
    """

    meta = odict()
    colnames = []
    cols = []
    readingdata = False
    for line in f:

        # strip leading & trailing whitespace & newline
        line = line.strip()
        if len(line) == 0:
            continue

        if not readingdata:
            # Read metadata
            if line[0] == '@':
                pos = line.find(' ')  # Find first space.
                if pos in [-1, 1]:  # Space must exist and key must exist.
                    raise ValueError('Incorrectly formatted metadata line: ' +
                                     line)
                meta[line[1:pos]] = _cast_str(line[pos:])
                continue

            # Read header line
            if line[0] == '#':
                pos = line.find(':')
                if pos in [-1, 1]:
                    continue  # comment line
                colname = line[1:pos].strip()
                if colname == 'end':
                    continue
                colnames.append(colname)
                cols.append([])
                continue

            # If the first non-whitespace character is not '@' or '#',
            # assume the line is the first data line.
            readingdata = True

        # strip comments
        pos = line.find('#')
        if pos > -1:
            line = line[:pos]
        if len(line) == 0:
            continue

        # Now we're reading data
        items = line.split()
        for col, item in zip(cols, items):
            col.append(_cast_str(item))

    data = odict(zip(colnames, cols))

    return meta, data
Пример #3
0
Файл: io.py Проект: wmwv/sncosmo
def _write_json(f, data, meta, **kwargs):

    # Build a dictionary of pure-python objects
    output = odict([('meta', meta), ('data', odict())])
    for key in data.dtype.names:
        output['data'][key] = data[key].tolist()
    json.dump(output, f, encoding=sys.getdefaultencoding())
    del output
Пример #4
0
def _write_json(f, data, meta, **kwargs):

    # Build a dictionary of pure-python objects
    output = odict([("meta", meta), ("data", odict())])
    for key in data.dtype.names:
        output["data"][key] = data[key].tolist()
    json.dump(output, f)
    del output
Пример #5
0
def _write_json(f, data, meta, **kwargs):

    # Build a dictionary of pure-python objects
    output = odict([('meta', meta),
                    ('data', odict())])
    for key in data.dtype.names:
        output['data'][key] = data[key].tolist()
    json.dump(output, f, encoding=sys.getdefaultencoding())
    del output
Пример #6
0
def _parse_meta_from_line(line):
    """Return dictionary from key, value pairs on a line. Helper function for
    snana_read_simlib."""

    meta = odict()

    # Find position of all the colons
    colon_pos = []
    i = line.find(':')
    while i != -1:
        colon_pos.append(i)
        i = line.find(':', i + 1)

    # Find position of start of words before colons
    key_pos = []
    for i in colon_pos:
        j = line.rfind(' ', 0, i)
        key_pos.append(j + 1)

    # append an extra key position so that we know when to end the last value.
    key_pos.append(len(line))

    # get the keys, values based on positions above.
    for i in range(len(colon_pos)):
        key = line[key_pos[i]:colon_pos[i]]
        val = line[colon_pos[i] + 1:key_pos[i + 1]].strip()
        try:
            val = int(val)
        except ValueError:
            try:
                val = float(val)
            except ValueError:
                pass
        meta[key] = val
    return meta
Пример #7
0
def _parse_meta_from_line(line):
    """Return dictionary from key, value pairs on a line. Helper function for
    snana_read_simlib."""

    meta = odict()

    # Find position of all the colons
    colon_pos = []
    i = line.find(':')
    while i != -1:
        colon_pos.append(i)
        i = line.find(':', i+1)

    # Find position of start of words before colons
    key_pos = []
    for i in colon_pos:
        j = line.rfind(' ', 0, i)
        key_pos.append(j+1)

    # append an extra key position so that we know when to end the last value.
    key_pos.append(len(line))

    # get the keys, values based on positions above.
    for i in range(len(colon_pos)):
        key = line[key_pos[i]: colon_pos[i]]
        val = line[colon_pos[i]+1: key_pos[i+1]].strip()
        try:
            val = int(val)
        except ValueError:
            try:
                val = float(val)
            except ValueError:
                pass
        meta[key] = val
    return meta
Пример #8
0
def _salt2_rename_keys(d):
    newd = odict()
    for key, val in d.iteritems():
        key = key.lower()
        if key in SALT2KEY_TO_KEY:
            key = SALT2KEY_TO_KEY[key]
        newd[key] = val
    return newd
Пример #9
0
def _read_ascii(f, **kwargs):

    delim = kwargs.get('delim', None)
    metachar = kwargs.get('metachar', '@')
    commentchar = kwargs.get('commentchar', '#')

    meta = odict()
    colnames = []
    cols = []
    readingdata = False
    for line in f:

        # strip leading & trailing whitespace, newline, and comments
        line = line.strip()
        pos = line.find(commentchar)
        if pos > -1:
            line = line[:pos]
        if len(line) == 0:
            continue

        if not readingdata:
            # Read metadata
            if line[0] == metachar:
                pos = line.find(' ')  # Find first space.
                if pos in [-1, 1]:  # Space must exist and key must exist.
                    raise ValueError('Incorrectly formatted metadata line: ' +
                                     line)
                meta[line[1:pos]] = _cast_str(line[pos:])
                continue

            # Read header line
            for item in line.split(delim):
                colnames.append(item.strip())
                cols.append([])
            readingdata = True
            continue

        # Now we're reading data
        items = line.split(delim)
        for col, item in zip(cols, items):
            col.append(_cast_str(item))

    data = odict(zip(colnames, cols))
    return meta, data
Пример #10
0
Файл: io.py Проект: wmwv/sncosmo
def _read_ascii(f, **kwargs):

    delim = kwargs.get('delim', None)
    metachar = kwargs.get('metachar', '@')
    commentchar = kwargs.get('commentchar', '#')

    meta = odict()
    colnames = []
    cols = []
    readingdata = False
    for line in f:

        # strip leading & trailing whitespace, newline, and comments
        line = line.strip()
        pos = line.find(commentchar)
        if pos > -1:
            line = line[:pos]
        if len(line) == 0:
            continue

        if not readingdata:
            # Read metadata
            if line[0] == metachar:
                pos = line.find(' ')  # Find first space.
                if pos in [-1, 1]:  # Space must exist and key must exist.
                    raise ValueError('Incorrectly formatted metadata line: ' +
                                     line)
                meta[line[1:pos]] = _cast_str(line[pos:])
                continue

            # Read header line
            for item in line.split(delim):
                colnames.append(item.strip())
                cols.append([])
            readingdata = True
            continue

        # Now we're reading data
        items = line.split(delim)
        for col, item in zip(cols, items):
            col.append(_cast_str(item))

    data = odict(zip(colnames, cols))
    return meta, data
Пример #11
0
def normalize_data(data, zp=25.0, zpsys="ab"):
    """Return a copy of the data with all flux and fluxerr values normalized
    to the given zeropoint. Assumes data has already been standardized.

    Parameters
    ----------
    data : `~numpy.ndarray`
        Structured array.
    zp : float
    zpsys : str

    Returns
    -------
    normalized_data : `~numpy.ndarray`
    """

    normmagsys = get_magsystem(zpsys)
    factor = np.empty(len(data), dtype=np.float)

    for b in set(data["band"].tolist()):
        idx = data["band"] == b
        b = get_bandpass(b)

        bandfactor = 10.0 ** (0.4 * (zp - data["zp"][idx]))
        bandzpsys = data["zpsys"][idx]
        for ms in set(bandzpsys):
            idx2 = bandzpsys == ms
            ms = get_magsystem(ms)
            bandfactor[idx2] *= ms.zpbandflux(b) / normmagsys.zpbandflux(b)

        factor[idx] = bandfactor

    normalized_data = odict(
        [
            ("time", data["time"]),
            ("band", data["band"]),
            ("flux", data["flux"] * factor),
            ("fluxerr", data["fluxerr"] * factor),
            ("zp", zp),
            ("zpsys", zpsys),
        ]
    )
    return dict_to_array(normalized_data)
Пример #12
0
Файл: io.py Проект: wmwv/sncosmo
def write_lc(data, fname, format='ascii', **kwargs):
    """Write light curve data.

    Parameters
    ----------
    data : `~astropy.table.Table`
        Light curve data.
    fname : str
        Filename.
    format : {'ascii', 'salt2', 'snana', 'json'}, optional
        Format of file. Default is 'ascii'. 'salt2' is the new format available
        in snfit version >= 2.3.0.
    delim : str, optional
        **[ascii only]** Character used to separate entries on a line.
        Default is ' '.
    metachar : str, optional
        **[ascii only]** Metadata designator. Default is '@'.
    raw : bool, optional
        **[salt2, snana]** By default, the SALT2 and SNANA writers rename
        some metadata keys and column names in order to comply with what
        snfit and SNANA expect. Set to True to override this.
        Default is False.
    pedantic : bool, optional
        **[salt2, snana]** If True, check that output column names and header
        keys comply with expected formatting, and raise a ValueError if not.
        It is probably a good idea to set to False when raw is True.
        Default is True.
    """

    if format not in WRITERS:
        raise ValueError(
            "Writer not defined for format {0!r}. Options: ".format(format) +
            ", ".join(WRITERS.keys()))
    if isinstance(data, Table):
        meta = data.meta
        data = np.asarray(data)
    else:
        meta = odict()
        if not isinstance(data, np.ndarray):
            data = dict_to_array(data)
    with open(fname, 'w') as f:
        WRITERS[format](f, data, meta, **kwargs)
Пример #13
0
def dict_to_array(d):
    """Convert a dictionary of lists (or single values) to a structured
    numpy.ndarray."""

    # Convert all lists/values to 1-d arrays, in order to let numpy
    # figure out the necessary size of the string arrays.
    new_d = odict()
    for key in d:
        new_d[key] = np.atleast_1d(d[key])

    # Determine dtype of output array.
    dtype = [(key, arr.dtype) for key, arr in six.iteritems(new_d)]

    # Initialize ndarray and then fill it.
    col_len = max([len(v) for v in new_d.values()])
    result = np.empty(col_len, dtype=dtype)
    for key in new_d:
        result[key] = new_d[key]

    return result
Пример #14
0
def dict_to_array(d):
    """Convert a dictionary of lists (or single values) to a structured
    numpy.ndarray."""

    # Convert all lists/values to 1-d arrays, in order to let numpy
    # figure out the necessary size of the string arrays.
    new_d = odict()
    for key in d:
        new_d[key] = np.atleast_1d(d[key])

    # Determine dtype of output array.
    dtype = [(key, arr.dtype) for key, arr in new_d.iteritems()]

    # Initialize ndarray and then fill it.
    col_len = max([len(v) for v in new_d.values()])
    result = np.empty(col_len, dtype=dtype)
    for key in new_d:
        result[key] = new_d[key]

    return result
Пример #15
0
def write_lc(data, fname, format='ascii', **kwargs):
    """Write light curve data.

    Parameters
    ----------
    data : `~astropy.table.Table`
        Light curve data.
    fname : str
        Filename.
    format : {'ascii', 'salt2', 'snana', 'json'}, optional
        Format of file. Default is 'ascii'. 'salt2' is the new format available
        in snfit version >= 2.3.0.
    delim : str, optional
        **[ascii only]** Character used to separate entries on a line.
        Default is ' '.
    metachar : str, optional
        **[ascii only]** Metadata designator. Default is '@'.
    raw : bool, optional
        **[salt2, snana]** By default, the SALT2 and SNANA writers rename
        some metadata keys and column names in order to comply with what
        snfit and SNANA expect. Set to True to override this.
        Default is False.
    pedantic : bool, optional
        **[salt2, snana]** If True, check that output column names and header
        keys comply with expected formatting, and raise a ValueError if not.
        It is probably a good idea to set to False when raw is True.
        Default is True.
    """

    if format not in WRITERS:
        raise ValueError("Writer not defined for format {0!r}. Options: "
                         .format(format) + ", ".join(WRITERS.keys()))
    if isinstance(data, Table):
        meta = data.meta
        data = np.asarray(data)
    else:
        meta = odict()
        if not isinstance(data, np.ndarray):
            data = dict_to_array(data)
    with open(fname, 'wb') as f:
        WRITERS[format](f, data, meta, **kwargs)
def normalize_data(data, zp=25., zpsys='ab'):
    """Return a copy of the data with all flux and fluxerr values normalized
    to the given zeropoint. Assumes data has already been standardized.

    Parameters
    ----------
    data : `~numpy.ndarray`
        Structured array.
    zp : float
    zpsys : str

    Returns
    -------
    normalized_data : `~numpy.ndarray`
    """

    normmagsys = get_magsystem(zpsys)
    factor = np.empty(len(data), dtype=np.float)

    for b in set(data['band'].tolist()):
        idx = data['band'] == b
        b = get_bandpass(b)

        bandfactor = 10.**(0.4 * (zp - data['zp'][idx]))
        bandzpsys = data['zpsys'][idx]
        for ms in set(bandzpsys):
            idx2 = bandzpsys == ms
            ms = get_magsystem(ms)
            bandfactor[idx2] *= (ms.zpbandflux(b) / normmagsys.zpbandflux(b))

        factor[idx] = bandfactor

    normalized_data = odict([('time', data['time']),
                             ('band', data['band']),
                             ('flux', data['flux'] * factor),
                             ('fluxerr', data['fluxerr'] * factor),
                             ('zp', zp),
                             ('zpsys', zpsys)])
    return dict_to_array(normalized_data)
Пример #17
0
def normalize_data(data, zp=25., zpsys='ab'):
    """Return a copy of the data with all flux and fluxerr values normalized
    to the given zeropoint. Assumes data has already been standardized.

    Parameters
    ----------
    data : `~numpy.ndarray`
        Structured array.
    zp : float
    zpsys : str

    Returns
    -------
    normalized_data : `~numpy.ndarray`
    """

    normmagsys = get_magsystem(zpsys)
    factor = np.empty(len(data), dtype=np.float)

    for b in set(data['band'].tolist()):
        idx = data['band'] == b
        b = get_bandpass(b)

        bandfactor = 10.**(0.4 * (zp - data['zp'][idx]))
        bandzpsys = data['zpsys'][idx]
        for ms in set(bandzpsys):
            idx2 = bandzpsys == ms
            ms = get_magsystem(ms)
            bandfactor[idx2] *= (ms.zpbandflux(b) / normmagsys.zpbandflux(b))

        factor[idx] = bandfactor

    normalized_data = odict([('time', data['time']), ('band', data['band']),
                             ('flux', data['flux'] * factor),
                             ('fluxerr', data['fluxerr'] * factor), ('zp', zp),
                             ('zpsys', zpsys)])
    return dict_to_array(normalized_data)
Пример #18
0
def nest_lc(data, model, vparam_names, bounds, guess_amplitude_bound=False,
            minsnr=5., priors=None, ppfs=None, npoints=100, method='single',
            maxiter=None, maxcall=None, modelcov=False, rstate=None,
            verbose=False, flux_cov=None, fixed_mcov=None, **kwargs):
    """Run nested sampling algorithm to estimate model parameters and evidence.

    Parameters
    ----------
    data : `~astropy.table.Table` or `~numpy.ndarray` or `dict`
        Table of photometric data. Must include certain columns.
        See the "Photometric Data" section of the documentation for
        required columns.
    model : `~sncosmo.Model`
        The model to fit.
    vparam_names : list
        Model parameters to vary in the fit.
    bounds : `dict`
        Bounded range for each parameter. Bounds must be given for
        each parameter, with the exception of ``t0``: by default, the
        minimum bound is such that the latest phase of the model lines
        up with the earliest data point and the maximum bound is such
        that the earliest phase of the model lines up with the latest
        data point.
    guess_amplitude_bound : bool, optional
        If true, bounds for the model's amplitude parameter are determined
        automatically based on the data and do not need to be included in
        `bounds`. The lower limit is set to zero and the upper limit is 10
        times the amplitude "guess" (which is based on the highest-flux
        data point in any band). Default is False.
    minsnr : float, optional
        Minimum signal-to-noise ratio of data points to use when guessing
        amplitude bound. Default is 5.
    priors : `dict`, optional
        Prior probability distribution function for each parameter. The keys
        should be parameter names and the values should be callables that
        accept a float. If a parameter is not in the dictionary, the prior
        defaults to a flat distribution between the bounds.
    ppfs : `dict`, optional
        Prior percent point function (inverse of the cumulative distribution
        function) for each parameter. If a parameter is in this dictionary,
        the ppf takes precedence over a prior pdf specified in ``priors``.
    npoints : int, optional
        Number of active samples to use. Increasing this value increases
        the accuracy (due to denser sampling) and also the time
        to solution.
    method : {'classic', 'single', 'multi'}, optional
        Method used to select new points. Choices are 'classic',
        single-ellipsoidal ('single'), multi-ellipsoidal ('multi'). Default
        is 'single'.
    maxiter : int, optional
        Maximum number of iterations. Iteration may stop earlier if
        termination condition is reached. Default is no limit.
    modelcov : bool, optional
        Include model covariance when calculating chisq. Default is False.
    rstate : `~numpy.random.RandomState`, optional
        RandomState instance. If not given, the global random state of the
        ``numpy.random`` module will be used.
    verbose : bool, optional
        Print running evidence sum on a single line.
    flux_cov : NxN matrix [where N = len(data)], optional
        Covariance matrix of fluxes. When used, data["fluxerr"] is ignored
    fixed_mcov : NxN matrix [where N = len(data)], optional
        Overwrite mcov of fit during with this; can be used to fix mcov while 
        fitting

    Returns
    -------
    res : Result
        Attributes are:

        * ``niter``: total number of iterations
        * ``ncall``: total number of likelihood function calls
        * ``time``: time in seconds spent in iteration loop.
        * ``logz``: natural log of the Bayesian evidence Z.
        * ``logzerr``: estimate of uncertainty in logz (due to finite sampling)
        * ``h``: Bayesian information.
        * ``vparam_names``: list of parameter names varied.
        * ``samples``: 2-d `~numpy.ndarray`, shape is (nsamples, nparameters).
          Each row is the parameter values for a single sample. For example,
          ``samples[0, :]`` is the parameter values for the first sample.
        * ``logprior``: 1-d `~numpy.ndarray` (length=nsamples);
          log(prior volume) for each sample.
        * ``logl``: 1-d `~numpy.ndarray` (length=nsamples); log(likelihood)
          for each sample.
        * ``weights``: 1-d `~numpy.ndarray` (length=nsamples);
          Weight corresponding to each sample. The weight is proportional to
          the prior * likelihood for the sample.
        * ``parameters``: 1-d `~numpy.ndarray` of weighted-mean parameter
          values from samples (including fixed parameters). Order corresponds
          to ``model.param_names``.
        * ``covariance``: 2-d `~numpy.ndarray` of parameter covariance;
          indicies correspond to order of ``vparam_names``. Calculated from
          ``samples`` and ``weights``.
        * ``errors``: OrderedDict of varied parameter uncertainties.
          Corresponds to square root of diagonal entries in covariance matrix.
        * ``ndof``: Number of degrees of freedom (len(data) -
          len(vparam_names)).
        * ``bounds``: Dictionary of bounds on varied parameters (including
          any automatically determined bounds).

    estimated_model : `~sncosmo.Model`
        A copy of the model with parameters set to the values in
        ``res.parameters``.
    """

    try:
        import nestle
    except ImportError:
        raise ImportError("nest_lc() requires the nestle package.")

    if "nobj" in kwargs:
        warn("The nobj keyword is deprecated and will be removed in a future "
             "sncosmo release. Use `npoints` instead.")
        npoints = kwargs.pop("nobj")

    # experimental parameters
    tied = kwargs.get("tied", None)

    data = standardize_data(data)
    model = copy.copy(model)
    bounds = copy.copy(bounds)  # need to copy this b/c we modify it below

    # Order vparam_names the same way it is ordered in the model:
    vparam_names = [s for s in model.param_names if s in vparam_names]

    # Drop data that the model doesn't cover.
    data = cut_bands(data, model, z_bounds=bounds.get('z', None))

    if guess_amplitude_bound:
        if model.param_names[2] not in vparam_names:
            raise ValueError("Amplitude bounds guessing enabled but "
                             "amplitude parameter {0!r} is not varied"
                             .format(model.param_names[2]))
        if model.param_names[2] in bounds:
            raise ValueError("cannot supply bounds for parameter {0!r}"
                             " when guess_amplitude_bound=True"
                             .format(model.param_names[2]))

        # If redshift is bounded, set model redshift to midpoint of bounds
        # when doing the guess.
        if 'z' in bounds:
            model.set(z=sum(bounds['z']) / 2.)
        _, amplitude = guess_t0_and_amplitude(data, model, minsnr)
        bounds[model.param_names[2]] = (0., 10. * amplitude)

    # Find t0 bounds to use, if not explicitly given
    if 't0' in vparam_names and 't0' not in bounds:
        bounds['t0'] = t0_bounds(data, model)

    if ppfs is None:
        ppfs = {}
    if tied is None:
        tied = {}

    # Convert bounds/priors combinations into ppfs
    if bounds is not None:
        for key, val in six.iteritems(bounds):
            if key in ppfs:
                continue  # ppfs take priority over bounds/priors
            a, b = val
            if priors is not None and key in priors:
                # solve ppf at discrete points and return interpolating
                # function
                x_samples = np.linspace(0., 1., 101)
                ppf_samples = ppf(priors[key], x_samples, a, b)
                f = Interp1D(0., 1., ppf_samples)
            else:
                f = Interp1D(0., 1., np.array([a, b]))
            ppfs[key] = f

    # NOTE: It is important that iparam_names is in the same order
    # every time, otherwise results will not be reproducible, even
    # with same random seed.  This is because iparam_names[i] is
    # matched to u[i] below and u will be in a reproducible order,
    # so iparam_names must also be.
    iparam_names = [key for key in vparam_names if key in ppfs]
    ppflist = [ppfs[key] for key in iparam_names]
    npdim = len(iparam_names)  # length of u
    ndim = len(vparam_names)  # length of v

    # Check that all param_names either have a direct prior or are tied.
    for name in vparam_names:
        if name in iparam_names:
            continue
        if name in tied:
            continue
        raise ValueError("Must supply ppf or bounds or tied for parameter '{}'"
                         .format(name))

    def prior_transform(u):
        d = {}
        for i in range(npdim):
            d[iparam_names[i]] = ppflist[i](u[i])
        v = np.empty(ndim, dtype=np.float)
        for i in range(ndim):
            key = vparam_names[i]
            if key in d:
                v[i] = d[key]
            else:
                v[i] = tied[key](d)
        return v

    # Indicies of the model parameters in vparam_names
    idx = np.array([model.param_names.index(name) for name in vparam_names])

    def loglike(parameters):
        model.parameters[idx] = parameters
        return -0.5 * _chisq(data, model, modelcov=modelcov, flux_cov=flux_cov,
                             fixed_mcov=fixed_mcov)

    t0 = time.time()
    res = nestle.sample(loglike, prior_transform, ndim, npdim=npdim,
                        npoints=npoints, method=method, maxiter=maxiter,
                        maxcall=maxcall, rstate=rstate,
                        callback=(nestle.print_progress if verbose else None))
    elapsed = time.time() - t0

    # estimate parameters and covariance from samples
    vparameters, cov = nestle.mean_and_cov(res.samples, res.weights)

    # update model parameters to estimated ones.
    model.set(**dict(zip(vparam_names, vparameters)))

    # `res` is a nestle.Result object. Collect result into a sncosmo.Result
    # object for consistency, and add more fields.
    res = Result(niter=res.niter,
                 ncall=res.ncall,
                 logz=res.logz,
                 logzerr=res.logzerr,
                 h=res.h,
                 samples=res.samples,
                 weights=res.weights,
                 logvol=res.logvol,
                 logl=res.logl,
                 vparam_names=copy.copy(vparam_names),
                 ndof=len(data) - len(vparam_names),
                 bounds=bounds,
                 time=elapsed,
                 parameters=model.parameters.copy(),
                 covariance=cov,
                 errors=odict(zip(vparam_names, np.sqrt(np.diagonal(cov)))),
                 param_dict=odict(zip(model.param_names, model.parameters)))

    # Deprecated result fields.
    depmsg = ("The `param_names` attribute is deprecated in sncosmo v1.0 "
              "and will be removed in a future release. "
              "Use `vparam_names` instead.")
    res.__dict__['deprecated']['param_names'] = (res.vparam_names, depmsg)

    depmsg = ("The `logprior` attribute is deprecated in sncosmo v1.2 "
              "and will be changed in a future release. "
              "Use `logvol` instead.")
    res.__dict__['deprecated']['logprior'] = (res.logvol, depmsg)

    return res, model
Пример #19
0
def fit_lc(data, model, vparam_names, bounds=None, method='minuit',
           guess_amplitude=True, guess_t0=True, guess_z=True,
           minsnr=5., modelcov=False, verbose=False, maxcall=10000,
           flux_cov=None, fixed_mcov=None, **kwargs):
    """Fit model parameters to data by minimizing chi^2.

    Ths function defines a chi^2 to minimize, makes initial guesses for
    t0 and amplitude, then runs a minimizer.

    Parameters
    ----------
    data : `~astropy.table.Table` or `~numpy.ndarray` or `dict`
        Table of photometric data. Must include certain columns.
        See the "Photometric Data" section of the documentation for
        required columns.
    model : `~sncosmo.Model`
        The model to fit.
    vparam_names : list
        Model parameters to vary in the fit.
    bounds : `dict`, optional
        Bounded range for each parameter. Keys should be parameter
        names, values are tuples. If a bound is not given for some
        parameter, the parameter is unbounded. The exception is
        ``t0``: by default, the minimum bound is such that the latest
        phase of the model lines up with the earliest data point and
        the maximum bound is such that the earliest phase of the model
        lines up with the latest data point.
    guess_amplitude : bool, optional
        Whether or not to guess the amplitude from the data. If false, the
        current model amplitude is taken as the initial value. Only has an
        effect when fitting amplitude. Default is True.
    guess_t0 : bool, optional
        Whether or not to guess t0. Only has an effect when fitting t0.
        Default is True.
    guess_z : bool, optional
        Whether or not to guess z (redshift). Only has an effect when fitting
        redshift. Default is True.
    minsnr : float, optional
        When guessing amplitude and t0, only use data with signal-to-noise
        ratio (flux / fluxerr) greater than this value. Default is 5.
    method : {'minuit'}, optional
        Minimization method to use. Currently there is only one choice.
    modelcov : bool, optional
        Include model covariance when calculating chisq. Default is False.
    flux_cov : NxN matrix [where N = len(data)], optional
        Covariance matrix of fluxes. When used, data["fluxerr"] is ignored
    fixed_mcov : NxN matrix [where N = len(data)], optional
        Overwrite mcov of fit during with this; can be used to fix mcov while 
        fitting
    verbose : bool, optional
        Print messages during fitting.

    Returns
    -------
    res : Result
        The optimization result represented as a ``Result`` object, which is
        a `dict` subclass with attribute access. Therefore, ``res.keys()``
        provides a list of the attributes. Attributes are:

        - ``success``: boolean describing whether fit succeeded.
        - ``message``: string with more information about exit status.
        - ``ncall``: number of function evaluations.
        - ``chisq``: minimum chi^2 value.
        - ``ndof``: number of degrees of freedom
          (len(data) - len(vparam_names)).
        - ``param_names``: same as ``model.param_names``.
        - ``parameters``: 1-d `~numpy.ndarray` of best-fit values
          (including fixed parameters) corresponding to ``param_names``.
        - ``vparam_names``: list of varied parameter names.
        - ``covariance``: 2-d `~numpy.ndarray` of parameter covariance;
          indicies correspond to order of ``vparam_names``.
        - ``errors``: OrderedDict of varied parameter uncertainties.
          Corresponds to square root of diagonal entries in covariance matrix.

    fitmodel : `~sncosmo.Model`
        A copy of the model with parameters set to best-fit values.

    Notes
    -----

    **t0 guess:** If ``t0`` is being fit and ``guess_t0=True``, the
    function will guess the initial starting point for ``t0`` based on
    the data. The guess is made as follows:

    * Evaluate the time and value of peak flux for the model in each band
      given the current model parameters.
    * Determine the data point with maximum flux in each band, for points
      with signal-to-noise ratio > ``minsnr`` (default is 5). If no points
      meet this criteria, the band is ignored (for the purpose of guessing
      only).
    * For each band, compare model's peak flux to the peak data point. Choose
      the band with the highest ratio of data / model.
    * Set ``t0`` so that the model's time of peak in the chosen band
      corresponds to the peak data point in this band.

    **amplitude guess:** If amplitude (assumed to be the first model parameter)
    is being fit and ``guess_amplitude=True``, the function will guess the
    initial starting point for the amplitude based on the data.

    **redshift guess:** If redshift (``z``) is being fit and ``guess_z=True``,
    the function will set the initial value of ``z`` to the average of the
    bounds on ``z``.

    Examples
    --------

    The `~sncosmo.flatten_result` function can be used to make the result
    a dictionary suitable for appending as rows of a table:

    >>> from astropy.table import Table               # doctest: +SKIP
    >>> table_rows = []                               # doctest: +SKIP
    >>> for sn in sne:                                # doctest: +SKIP
    ...     res, fitmodel = sncosmo.fit_lc(           # doctest: +SKIP
    ...          sn, model, ['t0', 'x0', 'x1', 'c'])  # doctest: +SKIP
    ...     table_rows.append(flatten_result(res))    # doctest: +SKIP
    >>> t = Table(table_rows)                         # doctest: +SKIP

    """

    # Standardize and normalize data.
    data = standardize_data(data)
    data = normalize_data(data)

    # Make a copy of the model so we can modify it with impunity.
    model = copy.copy(model)

    # Check that vparam_names isn't empty and contains only parameters
    # known to the model.
    if len(vparam_names) == 0:
        raise ValueError("no parameters supplied")
    for s in vparam_names:
        if s not in model.param_names:
            raise ValueError("Parameter not in model: " + repr(s))

    # Order vparam_names the same way it is ordered in the model:
    vparam_names = [s for s in model.param_names if s in vparam_names]

    # initialize bounds
    if bounds is None:
        bounds = {}

    # Check that 'z' is bounded (if it is going to be fit).
    if 'z' in vparam_names:
        if 'z' not in bounds or None in bounds['z']:
            raise ValueError('z must be bounded if fit.')
        if guess_z:
            model.set(z=sum(bounds['z']) / 2.)
        if model.get('z') < bounds['z'][0] or model.get('z') > bounds['z'][1]:
            raise ValueError('z out of range.')

    # Cut bands that are not allowed by the wavelength range of the model.
    data = cut_bands(data, model, z_bounds=bounds.get('z', None))

    # Unique set of bands in data
    bands = set(data['band'].tolist())

    # Find t0 bounds to use, if not explicitly given
    if 't0' in vparam_names and 't0' not in bounds:
        bounds['t0'] = t0_bounds(data, model)

    # Note that in the parameter guessing below, we assume that the source
    # amplitude is the 3rd parameter of the Model (1st parameter of the Source)

    # Turn off guessing if we're not fitting the parameter.
    if model.param_names[2] not in vparam_names:
        guess_amplitude = False
    if 't0' not in vparam_names:
        guess_t0 = False

    # Make guesses for t0 and amplitude.
    # (For now, we assume it is the 3rd parameter of the model.)
    if (guess_amplitude or guess_t0):
        t0, amplitude = guess_t0_and_amplitude(data, model, minsnr)
        if guess_amplitude:
            model.parameters[2] = amplitude
        if guess_t0:
            model.set(t0=t0)

    # count degrees of freedom
    ndof = len(data) - len(vparam_names)

    if method == 'minuit':
        try:
            import iminuit
        except ImportError:
            raise ValueError("Minimization method 'minuit' requires the "
                             "iminuit package")

        # The iminuit minimizer expects the function signature to have an
        # argument for each parameter.
        def fitchisq(*parameters):
            model.parameters = parameters
            return _chisq(data, model, modelcov=modelcov, 
                          flux_cov=flux_cov, fixed_mcov=fixed_mcov)

        # Set up keyword arguments to pass to Minuit initializer.
        kwargs = {}
        for name in model.param_names:
            kwargs[name] = model.get(name)  # Starting point.

            # Fix parameters not being varied in the fit.
            if name not in vparam_names:
                kwargs['fix_' + name] = True
                kwargs['error_' + name] = 0.
                continue

            # Bounds
            if name in bounds:
                if None in bounds[name]:
                    raise ValueError('one-sided bounds not allowed for '
                                     'minuit minimizer')
                kwargs['limit_' + name] = bounds[name]

            # Initial step size
            if name in bounds:
                step = 0.02 * (bounds[name][1] - bounds[name][0])
            elif model.get(name) != 0.:
                step = 0.1 * model.get(name)
            else:
                step = 1.
            kwargs['error_' + name] = step

        if verbose:
            print("Initial parameters:")
            for name in vparam_names:
                print(name, kwargs[name], 'step=', kwargs['error_' + name],
                      end=" ")
                if 'limit_' + name in kwargs:
                    print('bounds=', kwargs['limit_' + name], end=" ")
                print()

        m = iminuit.Minuit(fitchisq, errordef=1.,
                           forced_parameters=model.param_names,
                           print_level=(1 if verbose else 0),
                           throw_nan=True, **kwargs)
        d, l = m.migrad(ncall=maxcall)

        # Build a message.
        message = []
        if d.has_reached_call_limit:
            message.append('Reached call limit.')
        if d.hesse_failed:
            message.append('Hesse Failed.')
        if not d.has_covariance:
            message.append('No covariance.')
        elif not d.has_accurate_covar:  # iminuit docs wrong
            message.append('Covariance may not be accurate.')
        if not d.has_posdef_covar:  # iminuit docs wrong
            message.append('Covariance not positive definite.')
        if d.has_made_posdef_covar:
            message.append('Covariance forced positive definite.')
        if not d.has_valid_parameters:
            message.append('Parameter(s) value and/or error invalid.')
        if len(message) == 0:
            message.append('Minimization exited successfully.')
        # iminuit: m.np_matrix() doesn't work

        # numpy array of best-fit values (including fixed parameters).
        parameters = np.array([m.values[name] for name in model.param_names])
        model.parameters = parameters  # set model parameters to best fit.

        # Covariance matrix (only varied parameters) as numpy array.
        if m.covariance is None:
            covariance = None
        else:
            covariance = np.array([
                [m.covariance[(n1, n2)] for n1 in vparam_names]
                for n2 in vparam_names])

        # OrderedDict of errors
        if m.errors is None:
            errors = None
        else:
            errors = odict([(name, m.errors[name]) for name in vparam_names])

        # Compile results
        res = Result(success=d.is_valid,
                     message=' '.join(message),
                     ncall=d.nfcn,
                     chisq=d.fval,
                     ndof=ndof,
                     param_names=model.param_names,
                     parameters=parameters,
                     vparam_names=vparam_names,
                     covariance=covariance,
                     errors=errors)

        # TODO remove cov_names in a future release.
        depmsg = ("The `cov_names` attribute is deprecated in sncosmo v1.0 "
                  "and will be removed in v1.1. Use `vparam_names` instead.")
        res.__dict__['deprecated']['cov_names'] = (vparam_names, depmsg)

    else:
        raise ValueError("unknown method {0:r}".format(method))

    # TODO remove this in a future release.
    if "flatten" in kwargs:
        warn("The `flatten` keyword is deprecated in sncosmo v1.0 "
             "and will be removed in v1.1. Use the flatten_result() "
             "function instead.")
        if kwargs["flatten"]:
            res = flatten_result(res)
    return res, model
Пример #20
0
def read_snana_simlib(fname):
    """Read an SNANA 'simlib' (simulation library) ascii file.

    Parameters
    ----------
    fname : str
        Filename.

    Returns
    -------
    meta : `OrderedDict`
        Global meta data, not associated with any one LIBID.
    observation_sets : `OrderedDict` of `astropy.table.Table`
        keys are LIBIDs, values are observation sets.

    Notes
    -----
    * Anything following '#' on each line is ignored as a comment.
    * Keywords are space separated strings ending wth a colon.
    * If a line starts with 'LIBID:', the following lines are associated
      with the value of LIBID, until 'END_LIBID:' is encountered.
    * While reading a given LIBID, lines starting with 'S' or 'T'
      keywords are assumed to contain 12 space-separated values after
      the keyword. These are (1) MJD, (2) IDEXPT, (3) FLT, (4) CCD GAIN,
      (5) CCD NOISE, (6) SKYSIG, (7) PSF1, (8) PSF2, (9) PSF 2/1 RATIO,
      (10) ZPTAVG, (11) ZPTSIG, (12) MAG.
    * Other lines inside a 'LIBID:'/'END_LIBID:' pair are treated as metadata
      for that LIBID.
    * Any other keywords outside a 'LIBID:'/'END_LIBID:' pair are treated
      as global header keywords and are returned in the `meta` dictionary.

    Examples
    --------
    >>> meta, obs_sets = read_snana_simlib('filename') # doctest: +SKIP

    The second object is a dictionary of astropy Tables indexed by LIBID:

    >>> obs_sets.keys()  # doctest: +SKIP
    [0, 1, 2, 3, 4]

    Each table (libid) has metadata:

    >>> obs_sets[0].meta  # doctest: +SKIP
    OrderedDict([('LIBID', 0), ('RA', 52.5), ('DECL', -27.5), ('NOBS', 161),
                 ('MWEBV', 0.0), ('PIXSIZE', 0.27)])

    Each table has the following columns:

    >>> obs_sets[0].colnames  # doctest: +SKIP
    ['SEARCH', 'MJD', 'IDEXPT', 'FLT', 'CCD_GAIN', 'CCD_NOISE', 'SKYSIG',
     'PSF1', 'PSF2', 'PSFRATIO', 'ZPTAVG', 'ZPTSIG', 'MAG']

    """

    from astropy.table import Table

    COLNAMES = [
        'SEARCH', 'MJD', 'IDEXPT', 'FLT', 'CCD_GAIN', 'CCD_NOISE', 'SKYSIG',
        'PSF1', 'PSF2', 'PSFRATIO', 'ZPTAVG', 'ZPTSIG', 'MAG'
    ]

    # Not used yet... if present in header, add to table.
    SPECIAL = ['FIELD', 'TELESCOPE', 'PIXSIZE']

    meta = odict()  # global metadata
    observation_sets = odict()  # dictionary of tables indexed by LIBID

    reading_obsset = False
    with open(fname, 'r') as infile:
        for line in infile.readlines():

            # strip comments
            idx = line.find('#')
            if idx != -1:
                line = line[0:idx]

            # split on spaces.
            words = line.split()
            if len(words) == 0:
                continue

            # If we're not currently reading an obs set, check if this line
            # is the start of one. If it isn't, update the global metadata.
            if not reading_obsset:
                if line[0:6] == 'LIBID:':
                    reading_obsset = True
                    current_meta = _parse_meta_from_line(line)
                    current_data = odict([(key, []) for key in COLNAMES])
                else:
                    meta.update(_parse_meta_from_line(line))

            # If we are currently reading an obsset...
            else:
                # Check for the explicit end of the obs set.
                if line[0:10] == 'END_LIBID:':
                    reading_obsset = False
                    observation_sets[current_meta['LIBID']] = \
                        Table(current_data, meta=current_meta)

                # Sometimes there's not an explicit end, but the next one
                # starts anyway.
                elif line[0:6] == 'LIBID:':
                    observation_sets[current_meta['LIBID']] = \
                        Table(current_data, meta=current_meta)
                    current_meta = _parse_meta_from_line(line)
                    current_data = odict([(key, []) for key in COLNAMES])

                # Otherwise, read the line into the current obs set.
                elif line[0:2] in ['S:', 'T:']:
                    words = line.split()
                    for colname, val in [('SEARCH', words[0] == 'S:'),
                                         ('MJD', float(words[1])),
                                         ('IDEXPT', int(words[2])),
                                         ('FLT', words[3]),
                                         ('CCD_GAIN', float(words[4])),
                                         ('CCD_NOISE', float(words[5])),
                                         ('SKYSIG', float(words[6])),
                                         ('PSF1', float(words[7])),
                                         ('PSF2', float(words[8])),
                                         ('PSFRATIO', float(words[9])),
                                         ('ZPTAVG', float(words[10])),
                                         ('ZPTSIG', float(words[11])),
                                         ('MAG', float(words[12]))]:
                        current_data[colname].append(val)
                else:
                    current_meta.update(_parse_meta_from_line(line))

    # At the end, check for the case where there's not an explicit end
    # to the last obs set:
    if reading_obsset:
        observation_sets[current_meta['LIBID']] = \
            Table(current_data, meta=current_meta)

    return meta, observation_sets
Пример #21
0
Файл: io.py Проект: wmwv/sncosmo
def _read_salt2_old(dirname, **kwargs):
    """Read old-style SALT2 files from a directory.

    A file named 'lightfile' must exist in the directory.
    """

    filenames = kwargs.get('filenames', None)

    # Get list of files in directory.
    if not (os.path.exists(dirname) and os.path.isdir(dirname)):
        raise IOError("Not a directory: '{0}'".format(dirname))
    dirfilenames = os.listdir(dirname)

    # Read metadata from lightfile.
    if 'lightfile' not in dirfilenames:
        raise IOError("no lightfile in directory: '{0}'".format(dirname))
    with open(os.path.join(dirname, 'lightfile'), 'r') as lightfile:
        meta = odict()
        for line in lightfile.readlines():
            line = line.strip()
            if len(line) == 0:
                continue
            try:
                key, val = line.split()
            except ValueError:
                raise ValueError('expected space-separated key value pairs in '
                                 'lightfile: {0}'.format(
                                     os.path.join(dirname, 'lightfile')))
            meta[key] = _cast_str(val)

    # Get list of filenames to read.
    if filenames is None:
        filenames = dirfilenames
    if 'lightfile' in filenames:
        filenames.remove('lightfile')  # We already read the lightfile.
    fullfilenames = [os.path.join(dirname, f) for f in filenames]

    # Read data from files.
    data = None
    for fname in fullfilenames:
        with open(fname, 'r') as f:
            filemeta, filedata = _read_salt2(f)

        # Check that all necessary file metadata was defined.
        if not ('INSTRUMENT' in filemeta and 'BAND' in filemeta
                and 'MAGSYS' in filemeta):
            raise ValueError(
                'not all necessary global keys (INSTRUMENT, '
                'BAND, MAGSYS) are defined in file {0}'.format(fname))

        # Add the instrument/band to the file data, in anticipation of
        # aggregating it with other files.
        firstkey = filedata.keys()[0]
        data_length = len(filedata[firstkey])
        filter_name = '{0}::{1}'.format(filemeta.pop('INSTRUMENT'),
                                        filemeta.pop('BAND'))
        filedata['Filter'] = data_length * [filter_name]
        filedata['MagSys'] = data_length * [filemeta.pop('MAGSYS')]

        # If this if the first file, initialize data lists, otherwise if keys
        # match, append this file's data to the main data.
        if data is None:
            data = filedata
        elif set(filedata.keys()) == set(data.keys()):
            for key in data:
                data[key].extend(filedata[key])
        else:
            raise ValueError('column names do not match between files')

        # Append any extra metadata in this file to the master metadata.
        if len(filemeta) > 0:
            meta[filter_name] = filemeta

    return meta, data
Пример #22
0
def nest_lc(data,
            model,
            vparam_names,
            bounds,
            guess_amplitude_bound=False,
            minsnr=5.,
            priors=None,
            ppfs=None,
            nobj=100,
            maxiter=10000,
            maxcall=1000000,
            modelcov=False,
            verbose=False):
    """Run nested sampling algorithm to estimate model parameters and evidence.

    Parameters
    ----------
    data : `~astropy.table.Table` or `~numpy.ndarray` or `dict`
        Table of photometric data. Must include certain column names.
    model : `~sncosmo.Model`
        The model to fit.
    vparam_names : list
        Model parameters to vary in the fit.
    bounds : `dict`
        Bounded range for each parameter. Bounds must be given for
        each parameter, with the exception of ``t0``: by default, the
        minimum bound is such that the latest phase of the model lines
        up with the earliest data point and the maximum bound is such
        that the earliest phase of the model lines up with the latest
        data point.
    guess_amplitude_bound : bool, optional
        If true, bounds for the model's amplitude parameter are determined
        automatically based on the data and do not need to be included in
        `bounds`. The lower limit is set to zero and the upper limit is 10
        times the amplitude "guess" (which is based on the highest-flux
        data point in any band). Default is False.
    minsnr : float, optional
        Minimum signal-to-noise ratio of data points to use when guessing
        amplitude bound. Default is 5.
    priors : `dict`, optional
        Prior probability distribution function for each parameter. The keys
        should be parameter names and the values should be callables that
        accept a float. If a parameter is not in the dictionary, the prior
        defaults to a flat distribution between the bounds.
    ppfs : `dict`, optional
        Prior percent point function (inverse of the cumulative distribution
        function) for each parameter. If a parameter is in this dictionary,
        the ppf takes precedence over a prior pdf specified in ``priors``.
    nobj : int, optional
        Number of objects (e.g., concurrent sample points) to use. Increasing
        nobj increases the accuracy (due to denser sampling) and also the time
        to solution.
    maxiter : int, optional
        Maximum number of iterations. Default is 10000.
    maxcall : int, optional
        Maximum number of likelihood evaluations. Default is 1000000.
    modelcov : bool, optional
        Include model covariance when calculating chisq. Default is False.
    verbose : bool, optional

    Returns
    -------
    res : Result
        Attributes are:

        * ``niter``: total number of iterations
        * ``ncall``: total number of likelihood function calls
        * ``time``: time in seconds spent in iteration loop.
        * ``logz``: natural log of the Bayesian evidence Z.
        * ``logzerr``: estimate of uncertainty in logz (due to finite sampling)
        * ``h``: Bayesian information.
        * ``vparam_names``: list of parameter names varied.
        * ``samples``: 2-d `~numpy.ndarray`, shape is (nsamples, nparameters).
          Each row is the parameter values for a single sample. For example,
          ``samples[0, :]`` is the parameter values for the first sample.
        * ``logprior``: 1-d `~numpy.ndarray` (length=nsamples);
          log(prior volume) for each sample.
        * ``logl``: 1-d `~numpy.ndarray` (length=nsamples); log(likelihood)
          for each sample.
        * ``weights``: 1-d `~numpy.ndarray` (length=nsamples);
          Weight corresponding to each sample. The weight is proportional to
          the prior * likelihood for the sample.
        * ``parameters``: 1-d `~numpy.ndarray` of weighted-mean parameter
          values from samples (including fixed parameters). Order corresponds
          to ``model.param_names``.
        * ``covariance``: 2-d `~numpy.ndarray` of parameter covariance;
          indicies correspond to order of ``vparam_names``. Calculated from
          ``samples`` and ``weights``.
        * ``errors``: OrderedDict of varied parameter uncertainties.
          Corresponds to square root of diagonal entries in covariance matrix.
        * ``ndof``: Number of degrees of freedom (len(data) -
          len(vparam_names)).
        * ``bounds``: Dictionary of bounds on varied parameters (including
          any automatically determined bounds).

    estimated_model : `~sncosmo.Model`
        A copy of the model with parameters set to the values in
        ``res.parameters``.

    Notes
    -----

    The algorithm uses the numpy random number generator to generate samples,
    and is therefore non-deterministic in default use. To get reproducible
    results, simply seed the random number generator before calling `nest_lc`:

    >>> import numpy as np
    >>> np.random.seed(0)

    """

    data = standardize_data(data)
    model = copy.copy(model)
    bounds = copy.copy(bounds)  # need to copy this dict b/c we modify it below

    # Drop data that the model doesn't cover.
    data = cut_bands(data, model, z_bounds=bounds.get('z', None))

    if guess_amplitude_bound:
        if model.param_names[2] in bounds:
            raise ValueError("cannot supply bounds for parameter {0!r}"
                             " when guess_amplitude_bound=True")

        # If redshift is bounded, set model redshift to midpoint of bounds
        # when doing the guess.
        if 'z' in bounds:
            model.set(z=sum(bounds['z']) / 2.)
        _, amplitude = guess_t0_and_amplitude(data, model, minsnr)
        bounds[model.param_names[2]] = (0., 10. * amplitude)

    # Find t0 bounds to use, if not explicitly given
    if 't0' in vparam_names and 't0' not in bounds:
        bounds['t0'] = t0_bounds(data, model)

    res = _nest_lc(data,
                   model,
                   vparam_names,
                   modelcov=modelcov,
                   bounds=bounds,
                   priors=priors,
                   ppfs=ppfs,
                   nobj=nobj,
                   maxiter=maxiter,
                   maxcall=maxcall,
                   verbose=verbose)

    res.bounds = bounds

    # calculate weighted average and weighted covariance matrix of samples
    vparameters, cov = weightedcov(res['samples'], res['weights'])

    model.set(**dict(zip(vparam_names, vparameters)))
    res.parameters = model.parameters.copy()
    res.covariance = cov
    res.errors = np.sqrt(np.diagonal(cov))

    # backwards compatibility. TODO remove these in a future release.
    res.param_names = res.vparam_names
    res.param_dict = odict(zip(model.param_names, model.parameters))

    return res, model
Пример #23
0
#!/usr/bin/env python
import numpy as np
import sncosmo
from astropy.utils import OrderedDict as odict
from astropy.table import Table

model = sncosmo.Model(source='salt2')
model.set(z=0.5, c=0.2, t0=55100., x1=0.5)
model.set_source_peakabsmag(-19.5, 'bessellb', 'ab')

times = np.linspace(55070., 55150., 40)
bands = np.array(10 * ['sdssg', 'sdssr', 'sdssi', 'sdssz'])
zp = 25. * np.ones(40)
zpsys = np.array(40 * ['ab'])

flux = model.bandflux(bands, times, zp=zp, zpsys=zpsys)
fluxerr = (0.05 * np.max(flux)) * np.ones(40, dtype=np.float)
flux += fluxerr * np.random.randn(40)

data = Table(odict([('time', times), ('band', bands), ('flux', flux),
                    ('fluxerr', fluxerr), ('zp', zp), ('zpsys', zpsys)]),
             meta=dict(zip(model.param_names, model.parameters)))

sncosmo.write_lc(data, 'example_photometric_data.dat')
Пример #24
0
def read_snana_fits(head_file, phot_file, snids=None, n=None):
    """Read the SNANA FITS format: two FITS files jointly representing
    metadata and photometry for a set of SNe.

    Parameters
    ----------
    head_file : str
        Filename of "HEAD" ("header") FITS file.
    phot_file : str
        Filename of "PHOT" ("photometry") FITS file.
    snids : list of str, optional
        If given, only return the single entry with the matching SNIDs.
    n : int
        If given, only return the first `n` entries.

    Returns
    -------
    sne : list of `~astropy.table.Table`
        Each item in the list is an astropy Table instance.

    Notes
    -----
    If `head_file` contains a column 'SNID' containing strings, leading and
    trailing whitespace is stripped from all the values in that column.

    If `phot_file` contains a column 'FLT', leading and trailing whitespace
    is stripped from all the values in that column.

    Examples
    --------
    >>> sne = read_snana_fits('HEAD.fits', 'PHOT.fits')  # doctest: +SKIP
    >>> for sn in sne:                                   # doctest: +SKIP
    ...     sn.meta  # Metadata in an OrderedDict.       # doctest: +SKIP
    ...     sn['MJD']  # MJD column                      # doctest: +SKIP

    """

    # Should we memmap? Only if we're going to read only a part of the file
    memmap = (snids is not None or n is not None)

    # Get metadata for all the SNe
    head_data = fits.getdata(head_file, 1, view=np.ndarray)
    phot_data = fits.getdata(phot_file, 1, view=np.ndarray, memmap=memmap)

    # Strip trailing whitespace characters from SNID.
    if 'SNID' in head_data.dtype.names:
        try:
            head_data['SNID'][:] = np.char.strip(head_data['SNID'])
        except TypeError:
            pass

    # Check which indicies to return.
    if snids is None and n is None:
        idx = range(len(head_data))
    elif n is None:
        if 'SNID' not in head_data.dtype.names:
            raise RuntimeError('Specific snids requested, but head file does'
                               ' not contain SNID column')
        idx = []
        for snid in snids:
            i = np.flatnonzero(head_data['SNID'] == snid)
            if len(i) != 1:
                raise RuntimeError('Unique snid requested, but there are '
                                   '{0:d} matching entries'.format(len(i)))
            idx.append(i[0])
    elif snids is None:
        idx = range(n)
    else:
        raise ValueError("cannot specify both 'snids' and 'n' arguments")

    # Loop over SNe in HEAD file
    sne = []
    for i in idx:
        meta = odict(zip(head_data.dtype.names, head_data[i]))

        j0 = head_data['PTROBS_MIN'][i] - 1
        j1 = head_data['PTROBS_MAX'][i]
        data = phot_data[j0:j1]
        if 'FLT' in data.dtype.names:
            data['FLT'][:] = np.char.strip(data['FLT'])
        sne.append(Table(data, meta=meta, copy=False))

    return sne
Пример #25
0
def nest_lc(data, model, vparam_names, bounds, guess_amplitude_bound=False,
            minsnr=5., priors=None, ppfs=None, nobj=100, maxiter=10000,
            maxcall=1000000, modelcov=False, verbose=False):
    """Run nested sampling algorithm to estimate model parameters and evidence.

    Parameters
    ----------
    data : `~astropy.table.Table` or `~numpy.ndarray` or `dict`
        Table of photometric data. Must include certain columns.
        See the "Photometric Data" section of the documentation for
        required columns.
    model : `~sncosmo.Model`
        The model to fit.
    vparam_names : list
        Model parameters to vary in the fit.
    bounds : `dict`
        Bounded range for each parameter. Bounds must be given for
        each parameter, with the exception of ``t0``: by default, the
        minimum bound is such that the latest phase of the model lines
        up with the earliest data point and the maximum bound is such
        that the earliest phase of the model lines up with the latest
        data point.
    guess_amplitude_bound : bool, optional
        If true, bounds for the model's amplitude parameter are determined
        automatically based on the data and do not need to be included in
        `bounds`. The lower limit is set to zero and the upper limit is 10
        times the amplitude "guess" (which is based on the highest-flux
        data point in any band). Default is False.
    minsnr : float, optional
        Minimum signal-to-noise ratio of data points to use when guessing
        amplitude bound. Default is 5.
    priors : `dict`, optional
        Prior probability distribution function for each parameter. The keys
        should be parameter names and the values should be callables that
        accept a float. If a parameter is not in the dictionary, the prior
        defaults to a flat distribution between the bounds.
    ppfs : `dict`, optional
        Prior percent point function (inverse of the cumulative distribution
        function) for each parameter. If a parameter is in this dictionary,
        the ppf takes precedence over a prior pdf specified in ``priors``.
    nobj : int, optional
        Number of objects (e.g., concurrent sample points) to use. Increasing
        nobj increases the accuracy (due to denser sampling) and also the time
        to solution.
    maxiter : int, optional
        Maximum number of iterations. Default is 10000.
    maxcall : int, optional
        Maximum number of likelihood evaluations. Default is 1000000.
    modelcov : bool, optional
        Include model covariance when calculating chisq. Default is False.
    verbose : bool, optional

    Returns
    -------
    res : Result
        Attributes are:

        * ``niter``: total number of iterations
        * ``ncall``: total number of likelihood function calls
        * ``time``: time in seconds spent in iteration loop.
        * ``logz``: natural log of the Bayesian evidence Z.
        * ``logzerr``: estimate of uncertainty in logz (due to finite sampling)
        * ``h``: Bayesian information.
        * ``vparam_names``: list of parameter names varied.
        * ``samples``: 2-d `~numpy.ndarray`, shape is (nsamples, nparameters).
          Each row is the parameter values for a single sample. For example,
          ``samples[0, :]`` is the parameter values for the first sample.
        * ``logprior``: 1-d `~numpy.ndarray` (length=nsamples);
          log(prior volume) for each sample.
        * ``logl``: 1-d `~numpy.ndarray` (length=nsamples); log(likelihood)
          for each sample.
        * ``weights``: 1-d `~numpy.ndarray` (length=nsamples);
          Weight corresponding to each sample. The weight is proportional to
          the prior * likelihood for the sample.
        * ``parameters``: 1-d `~numpy.ndarray` of weighted-mean parameter
          values from samples (including fixed parameters). Order corresponds
          to ``model.param_names``.
        * ``covariance``: 2-d `~numpy.ndarray` of parameter covariance;
          indicies correspond to order of ``vparam_names``. Calculated from
          ``samples`` and ``weights``.
        * ``errors``: OrderedDict of varied parameter uncertainties.
          Corresponds to square root of diagonal entries in covariance matrix.
        * ``ndof``: Number of degrees of freedom (len(data) -
          len(vparam_names)).
        * ``bounds``: Dictionary of bounds on varied parameters (including
          any automatically determined bounds).

    estimated_model : `~sncosmo.Model`
        A copy of the model with parameters set to the values in
        ``res.parameters``.

    Notes
    -----

    The algorithm uses the numpy random number generator to generate samples,
    and is therefore non-deterministic in default use. To get reproducible
    results, simply seed the random number generator before calling `nest_lc`:

    >>> import numpy as np
    >>> np.random.seed(0)

    """

    data = standardize_data(data)
    model = copy.copy(model)
    bounds = copy.copy(bounds)  # need to copy this dict b/c we modify it below

    # Order vparam_names the same way it is ordered in the model:
    vparam_names = [s for s in model.param_names if s in vparam_names]

    # Drop data that the model doesn't cover.
    data = cut_bands(data, model, z_bounds=bounds.get('z', None))

    if guess_amplitude_bound:
        if model.param_names[2] not in vparam_names:
            raise ValueError("Amplitude bounds guessing enabled but "
                             "amplitude parameter {0!r} is not varied"
                             .format(model.param_names[2]))
        if model.param_names[2] in bounds:
            raise ValueError("cannot supply bounds for parameter {0!r}"
                             " when guess_amplitude_bound=True"
                             .format(model.param_names[2]))

        # If redshift is bounded, set model redshift to midpoint of bounds
        # when doing the guess.
        if 'z' in bounds:
            model.set(z=sum(bounds['z']) / 2.)
        _, amplitude = guess_t0_and_amplitude(data, model, minsnr)
        bounds[model.param_names[2]] = (0., 10. * amplitude)

    # Find t0 bounds to use, if not explicitly given
    if 't0' in vparam_names and 't0' not in bounds:
        bounds['t0'] = t0_bounds(data, model)

    res = _nest_lc(data, model, vparam_names, modelcov=modelcov,
                   bounds=bounds, priors=priors, ppfs=ppfs, nobj=nobj,
                   maxiter=maxiter, maxcall=maxcall, verbose=verbose)

    res.bounds = bounds

    # calculate weighted average and weighted covariance matrix of samples
    vparameters, cov = weightedcov(res['samples'], res['weights'])

    model.set(**dict(zip(vparam_names, vparameters)))
    res.parameters = model.parameters.copy()
    res.covariance = cov
    res.errors = odict(zip(vparam_names, np.sqrt(np.diagonal(cov))))
    res.param_dict = odict(zip(model.param_names, model.parameters))

    # TODO remove/change in a future release.
    depmsg = ("The `param_names` attribute is deprecated in sncosmo v1.0 "
              "and will be changed in v1.1. Use `vparam_names` instead.")
    res.__dict__['deprecated']['param_names'] = (res.vparam_names, depmsg)

    return res, model
Пример #26
0
def standardize_data(data):
    """Standardize photometric data by converting to a structured numpy array
    with standard column names (if necessary) and sorting entries in order of
    increasing time.

    Parameters
    ----------
    data : `~astropy.table.Table`, `~numpy.ndarray` or dict

    Returns
    -------
    standardized_data : `~numpy.ndarray`
    """
    if isinstance(data, Table):
        data = np.asarray(data)

    if isinstance(data, np.ndarray):
        colnames = data.dtype.names

        # Check if the data already complies with what we want
        # (correct column names & ordered by date)
        if (set(colnames) == set(_photdata_aliases.keys())
                and np.all(np.ediff1d(data['time']) >= 0.)):
            return data

    elif isinstance(data, dict):
        colnames = data.keys()

    else:
        raise ValueError('Unrecognized data type')

    # Create mapping from lowercased column names to originals
    lower_to_orig = dict([(colname.lower(), colname) for colname in colnames])

    # Set of lowercase column names
    lower_colnames = set(lower_to_orig.keys())

    orig_colnames_to_use = []
    for aliases in _photdata_aliases.values():
        i = lower_colnames & aliases
        if len(i) != 1:
            raise ValueError('Data must include exactly one column from {0} '
                             '(case independent)'.format(', '.join(aliases)))
        orig_colnames_to_use.append(lower_to_orig[i.pop()])

    if isinstance(data, np.ndarray):
        new_data = data[orig_colnames_to_use].copy()
        new_data.dtype.names = _photdata_aliases.keys()

    else:
        new_data = odict()
        for newkey, oldkey in zip(_photdata_aliases.keys(),
                                  orig_colnames_to_use):
            new_data[newkey] = data[oldkey]

        new_data = dict_to_array(new_data)

    # Sort by time, if necessary.
    if not np.all(np.ediff1d(new_data['time']) >= 0.):
        new_data.sort(order=['time'])

    return new_data
Пример #27
0
def read_snana_ascii(fname, default_tablename=None):
    """Read an SNANA-format ascii file.

    Such files may contain metadata lines and one or more tables. See Notes
    for a summary of the format.

    Parameters
    ----------
    fname : str
        Filename of object to read.
    default_tablename : str, optional
        Default tablename, or the string that indicates a table row, when
        a table starts with 'NVAR:' rather than 'NVAR_TABLENAME:'.
    array : bool, optional
        If True, each table is converted to a numpy array. If False, each
        table is a dictionary of lists (each list is a column). Default is
        True.

    Returns
    -------
    meta : OrderedDict
        Metadata from keywords.
    tables : dict of `~astropy.table.Table`
        Tables, indexed by table name.

    Notes
    -----
    The file can contain one or more tables, as well as optional metadata.
    Here is an example of the expected format::

        META1: a
        META2: 6
        NVAR_SN: 3
        VARNAMES: A B C
        SN: 1 2.0 x
        SN: 4 5.0 y

    Behavior:

    * Any strings ending in a colon (:) are treated as keywords.
    * The start of a new table is indicated by a keyword starting with
      'NVAR'.
    * If the 'NVAR' is followed by an underscore (e.g., 'NVAR_TABLENAME'),
      then 'TABLENAME' is taken to be the name of the table. Otherwise the
      user *must specify* a ``default_tablename``.  This is because data
      rows are identified by the tablename.
    * After a keyword starting with 'NVAR', the next keyword must be
      'VARNAMES'. The strings following give the column names.
    * Any other keywords anywhere in the file are treated as metadata. The
      first string after the keyword is treated as the value for that keyword.
    * **Note:** Newlines are treated as equivalent to spaces; they do not
      indicate a new row.

    Examples
    --------

    >>> from StringIO import StringIO  # StringIO behaves like a file
    >>> f = StringIO('META1: a\\n'
    ...              'META2: 6\\n'
    ...              'NVAR_SN: 3\\n'
    ...              'VARNAMES: A B C\\n'
    ...              'SN: 1 2.0 x\\n'
    ...              'SN: 4 5.0 y\\n')
    ...
    >>> meta, tables = read_snana_ascii(f)

    The first object is a dictionary of metadata:

    >>> meta
    OrderedDict([('META1', 'a'), ('META2', 6)])

    The second is a dictionary of all the tables in the file:

    >>> tables['SN']
    <Table rows=2 names=('A','B','C')>
    array([(1, 2.0, 'x'), (4, 5.0, 'y')],
          dtype=[('A', '<i8'), ('B', '<f8'), ('C', 'S1')])


    If the file had an 'NVAR' keyword rather than 'NVAR_SN', for example::

        NVAR: 3
        VARNAMES: A B C
        SN: 1 2.0 x
        SN: 4 5.0 y
        SN: 5 8.2 z

    it can be read by supplying a default table name:

    >>> meta, tables = read_snana_ascii(f, default_tablename='SN')
    ...     # doctest: +SKIP

    """

    meta = odict()  # initialize structure to hold metadata.
    tables = {}  # initialize structure to hold data.

    if isinstance(fname, basestring):
        fh = open(fname, 'U')
    else:
        fh = fname
    words = fh.read().split()
    fh.close()

    i = 0
    nvar = None
    tablename = None
    while i < len(words):
        word = words[i]

        # If the word starts with 'NVAR', we are starting a new table.
        if word.startswith('NVAR'):
            nvar = int(words[i + 1])

            # Infer table name. The name will be used to designate a data row.
            if '_' in word:
                pos = word.find('_') + 1
                tablename = word[pos:].rstrip(':')
            elif default_tablename is not None:
                tablename = default_tablename
            else:
                raise ValueError(
                    'Table name must be given as part of NVAR keyword so '
                    'that rows belonging to this table can be identified. '
                    'Alternatively, supply the default_tablename keyword.')
            table = odict()
            tables[tablename] = table

            i += 2

        # If the word starts with 'VARNAMES', the following `nvar` words
        # define the column names of the table.
        elif word.startswith('VARNAMES'):

            # Check that nvar is defined and that no column names are defined
            # for the current table.
            if nvar is None or len(table) > 0:
                raise Exception('NVAR must directly precede VARNAMES')

            # Read the column names
            for j in range(i + 1, i + 1 + nvar):
                table[words[j]] = []
            i += nvar + 1

        # If the word matches the current tablename, we are reading a data row.
        elif word.rstrip(':') == tablename:
            for j, colname in enumerate(table.keys()):
                table[colname].append(words[i + 1 + j])
            i += nvar + 1

        # Otherwise, we are reading metadata or some comment
        # If the word ends with ":", it is metadata.
        elif word[-1] == ':':
            name = word[:-1]  # strip off the ':'
            if len(words) >= i + 2:
                try:
                    val = int(words[i + 1])
                except ValueError:
                    try:
                        val = float(words[i + 1])
                    except ValueError:
                        val = words[i + 1]
                meta[name] = val
            else:
                meta[name] = None
            i += 2
        else:
            # It is some comment; continue onto next word.
            i += 1

    # All values in each column are currently strings. Convert to int or
    # float if possible.
    for table in tables.values():
        for colname, values in table.iteritems():
            try:
                table[colname] = [int(val) for val in values]
            except ValueError:
                try:
                    table[colname] = [float(val) for val in values]
                except ValueError:
                    pass

    # All tables are dictionaries. Convert them to Tables
    for tablename in tables.keys():
        tables[tablename] = Table(tables[tablename])

    return meta, tables
Пример #28
0
def read_snana_fits(head_file, phot_file, snids=None, n=None):
    """Read the SNANA FITS format: two FITS files jointly representing
    metadata and photometry for a set of SNe.

    Parameters
    ----------
    head_file : str
        Filename of "HEAD" ("header") FITS file.
    phot_file : str
        Filename of "PHOT" ("photometry") FITS file.
    snids : list of str, optional
        If given, only return the single entry with the matching SNIDs.
    n : int
        If given, only return the first `n` entries.

    Returns
    -------
    sne : list of `~astropy.table.Table`
        Each item in the list is an astropy Table instance.

    Notes
    -----
    If `head_file` contains a column 'SNID' containing strings, leading and
    trailing whitespace is stripped from all the values in that column.

    If `phot_file` contains a column 'FLT', leading and trailing whitespace
    is stripped from all the values in that column.

    Examples
    --------
    >>> sne = read_snana_fits('HEAD.fits', 'PHOT.fits')  # doctest: +SKIP
    >>> for sn in sne:                                   # doctest: +SKIP
    ...     sn.meta  # Metadata in an OrderedDict.       # doctest: +SKIP
    ...     sn['MJD']  # MJD column                      # doctest: +SKIP

    """

    # Should we memmap? Only if we're going to read only a part of the file
    memmap = (snids is not None or n is not None)

    # Get metadata for all the SNe
    head_data = fits.getdata(head_file, 1, view=np.ndarray)
    phot_data = fits.getdata(phot_file, 1, view=np.ndarray, memmap=memmap)

    # Strip trailing whitespace characters from SNID.
    if 'SNID' in head_data.dtype.names:
        try:
            head_data['SNID'][:] = np.char.strip(head_data['SNID'])
        except TypeError:
            pass

    # Check which indicies to return.
    if snids is None and n is None:
        idx = range(len(head_data))
    elif n is None:
        if 'SNID' not in head_data.dtype.names:
            raise RuntimeError('Specific snids requested, but head file does'
                               ' not contain SNID column')
        idx = []
        for snid in snids:
            i = np.flatnonzero(head_data['SNID'] == snid)
            if len(i) != 1:
                raise RuntimeError('Unique snid requested, but there are '
                                   '{0:d} matching entries'.format(len(i)))
            idx.append(i[0])
    elif snids is None:
        idx = range(n)
    else:
        raise ValueError("cannot specify both 'snids' and 'n' arguments")

    # Loop over SNe in HEAD file
    sne = []
    for i in idx:
        meta = odict(zip(head_data.dtype.names, head_data[i]))

        j0 = head_data['PTROBS_MIN'][i] - 1
        j1 = head_data['PTROBS_MAX'][i]
        data = phot_data[j0:j1]
        if 'FLT' in data.dtype.names:
            data['FLT'][:] = np.char.strip(data['FLT'])
        sne.append(Table(data, meta=meta, copy=False))

    return sne
Пример #29
0
def read_snana_simlib(fname):
    """Read an SNANA 'simlib' (simulation library) ascii file.

    Parameters
    ----------
    fname : str
        Filename.

    Returns
    -------
    meta : `OrderedDict`
        Global meta data, not associated with any one LIBID.
    observation_sets : `OrderedDict` of `astropy.table.Table`
        keys are LIBIDs, values are observation sets.

    Notes
    -----
    * Anything following '#' on each line is ignored as a comment.
    * Keywords are space separated strings ending wth a colon.
    * If a line starts with 'LIBID:', the following lines are associated
      with the value of LIBID, until 'END_LIBID:' is encountered.
    * While reading a given LIBID, lines starting with 'S' or 'T'
      keywords are assumed to contain 12 space-separated values after
      the keyword. These are (1) MJD, (2) IDEXPT, (3) FLT, (4) CCD GAIN,
      (5) CCD NOISE, (6) SKYSIG, (7) PSF1, (8) PSF2, (9) PSF 2/1 RATIO,
      (10) ZPTAVG, (11) ZPTSIG, (12) MAG.
    * Other lines inside a 'LIBID:'/'END_LIBID:' pair are treated as metadata
      for that LIBID.
    * Any other keywords outside a 'LIBID:'/'END_LIBID:' pair are treated
      as global header keywords and are returned in the `meta` dictionary.

    Examples
    --------
    >>> meta, obs_sets = read_snana_simlib('filename') # doctest: +SKIP

    The second object is a dictionary of astropy Tables indexed by LIBID:

    >>> obs_sets.keys()  # doctest: +SKIP
    [0, 1, 2, 3, 4]

    Each table (libid) has metadata:

    >>> obs_sets[0].meta  # doctest: +SKIP
    OrderedDict([('LIBID', 0), ('RA', 52.5), ('DECL', -27.5), ('NOBS', 161),
                 ('MWEBV', 0.0), ('PIXSIZE', 0.27)])

    Each table has the following columns:

    >>> obs_sets[0].colnames  # doctest: +SKIP
    ['SEARCH', 'MJD', 'IDEXPT', 'FLT', 'CCD_GAIN', 'CCD_NOISE', 'SKYSIG',
     'PSF1', 'PSF2', 'PSFRATIO', 'ZPTAVG', 'ZPTSIG', 'MAG']

    """

    from astropy.table import Table

    COLNAMES = ['SEARCH', 'MJD', 'IDEXPT', 'FLT', 'CCD_GAIN', 'CCD_NOISE',
                'SKYSIG', 'PSF1', 'PSF2', 'PSFRATIO', 'ZPTAVG', 'ZPTSIG',
                'MAG']

    # Not used yet... if present in header, add to table.
    SPECIAL = ['FIELD', 'TELESCOPE', 'PIXSIZE']

    meta = odict()  # global metadata
    observation_sets = odict()  # dictionary of tables indexed by LIBID

    reading_obsset = False
    with open(fname, 'r') as infile:
        for line in infile.readlines():

            # strip comments
            idx = line.find('#')
            if idx != -1:
                line = line[0:idx]

            # split on spaces.
            words = line.split()
            if len(words) == 0:
                continue

            # are we currently reading an obsset?
            if not reading_obsset:
                if line[0:6] == 'LIBID:':
                    reading_obsset = True
                    current_meta = _parse_meta_from_line(line)
                    current_data = odict([(key, []) for key in COLNAMES])
                else:
                    meta.update(_parse_meta_from_line(line))

            else:
                if line[0:10] == 'END_LIBID:':
                    reading_obsset = False
                    observation_sets[current_meta['LIBID']] = \
                        Table(current_data, meta=current_meta)
                elif line[0:2] in ['S:', 'T:']:
                    words = line.split()
                    for colname, val in [('SEARCH', words[0] == 'S:'),
                                         ('MJD', float(words[1])),
                                         ('IDEXPT', int(words[2])),
                                         ('FLT', words[3]),
                                         ('CCD_GAIN', float(words[4])),
                                         ('CCD_NOISE', float(words[5])),
                                         ('SKYSIG', float(words[6])),
                                         ('PSF1', float(words[7])),
                                         ('PSF2', float(words[8])),
                                         ('PSFRATIO', float(words[9])),
                                         ('ZPTAVG', float(words[10])),
                                         ('ZPTSIG', float(words[11])),
                                         ('MAG', float(words[12]))]:
                        current_data[colname].append(val)
                else:
                    current_meta.update(_parse_meta_from_line(line))

    return meta, observation_sets
Пример #30
0
def mcmc_lc(data, model, vparam_names, bounds=None, priors=None,
            guess_amplitude=True, guess_t0=True, guess_z=True,
            minsnr=5., modelcov=False, nwalkers=10, nburn=200,
            nsamples=1000, thin=1, a=2.0, flux_cov=None,
            fixed_mcov=None):
    """Run an MCMC chain to get model parameter samples.

    This is a convenience function around `emcee.EnsembleSampler`.
    It defines the likelihood function and makes a heuristic guess at a
    good set of starting points for the walkers. It then runs the sampler,
    starting with a burn-in run.

    If you're not getting good results, you might want to try
    increasing the burn-in, increasing the walkers, or specifying a
    better starting position.  To get a better starting position, you
    could first run `~sncosmo.fit_lc`, then run this function with all
    ``guess_[name]`` keyword arguments set to False, so that the
    current model parameters are used as the starting point.

    Parameters
    ----------
    data : `~astropy.table.Table` or `~numpy.ndarray` or `dict`
        Table of photometric data. Must include certain columns.
        See the "Photometric Data" section of the documentation for
        required columns.
    model : `~sncosmo.Model`
        The model to fit.
    vparam_names : iterable
        Model parameters to vary.
    bounds : `dict`, optional
        Bounded range for each parameter. Keys should be parameter
        names, values are tuples. If a bound is not given for some
        parameter, the parameter is unbounded. The exception is
        ``t0``: by default, the minimum bound is such that the latest
        phase of the model lines up with the earliest data point and
        the maximum bound is such that the earliest phase of the model
        lines up with the latest data point.
    priors : `dict`, optional
        Prior probability functions. Keys are parameter names, values are
        functions that return probability given the parameter value.
        The default prior is a flat distribution.
    guess_amplitude : bool, optional
        Whether or not to guess the amplitude from the data. If false, the
        current model amplitude is taken as the initial value. Only has an
        effect when fitting amplitude. Default is True.
    guess_t0 : bool, optional
        Whether or not to guess t0. Only has an effect when fitting t0.
        Default is True.
    guess_z : bool, optional
        Whether or not to guess z (redshift). Only has an effect when fitting
        redshift. Default is True.
    minsnr : float, optional
        When guessing amplitude and t0, only use data with signal-to-noise
        ratio (flux / fluxerr) greater than this value. Default is 5.
    modelcov : bool, optional
        Include model covariance when calculating chisq. Default is False.
    nwalkers : int, optional
        Number of walkers in the EnsembleSampler
    nburn : int, optional
        Number of samples in burn-in phase.
    nsamples : int, optional
        Number of samples in production run.
    thin : int, optional
        Factor by which to thin samples in production run. Output samples
        array will have (nsamples/thin) samples.
    a : float, optional
        Proposal scale parameter passed to the EnsembleSampler.
    flux_cov : NxN matrix [where N = len(data)], optional
        Covariance matrix of fluxes. When used, data["fluxerr"] is ignored
    fixed_mcov : NxN matrix [where N = len(data)], optional
        Overwrite mcov of fit during with this; can be used to fix mcov while 
        fitting

    Returns
    -------
    res : Result
        Has the following attributes:

        * ``param_names``: All parameter names of model, including fixed.
        * ``parameters``: Model parameters, with varied parameters set to
          mean value in samples.
        * ``vparam_names``: Names of parameters varied. Order of parameters
          matches order of samples.
        * ``samples``: 2-d array with shape ``(N, len(vparam_names))``.
          Order of parameters in each row  matches order in
          ``res.vparam_names``.
        * ``covariance``: 2-d array giving covariance, measured from samples.
          Order corresponds to ``res.vparam_names``.
        * ``errors``: dictionary giving square root of diagonal of covariance
          matrix for varied parameters. Useful for ``plot_lc``.
        * ``mean_acceptance_fraction``: mean acceptance fraction for all
          walkers in the sampler.

    est_model : `~sncosmo.Model`
        Copy of input model with varied parameters set to mean value in
        samples.

    """

    try:
        import emcee
    except ImportError:
        raise ImportError("mcmc_lc() requires the emcee package.")

    # Standardize and normalize data.
    data = standardize_data(data)
    data = normalize_data(data)

    # Make a copy of the model so we can modify it with impunity.
    model = copy.copy(model)

    if bounds is None:
        bounds = {}
    if priors is None:
        priors = {}

    # Check that vparam_names isn't empty, check for unknown parameters.
    if len(vparam_names) == 0:
        raise ValueError("no parameters supplied")
    for names in (vparam_names, bounds, priors):
        for name in names:
            if name not in model.param_names:
                raise ValueError("Parameter not in model: " + repr(name))

    # Order vparam_names the same way it is ordered in the model:
    vparam_names = [s for s in model.param_names if s in vparam_names]
    ndim = len(vparam_names)

    # Check that 'z' is bounded (if it is going to be fit).
    if 'z' in vparam_names:
        if 'z' not in bounds or None in bounds['z']:
            raise ValueError('z must be bounded if allowed to vary.')
        if guess_z:
            model.set(z=sum(bounds['z']) / 2.)
        if model.get('z') < bounds['z'][0] or model.get('z') > bounds['z'][1]:
            raise ValueError('z out of range.')

    # Cut bands that are not allowed by the wavelength range of the model.
    data = cut_bands(data, model, z_bounds=bounds.get('z', None))

    # Find t0 bounds to use, if not explicitly given
    if 't0' in vparam_names and 't0' not in bounds:
        bounds['t0'] = t0_bounds(data, model)

    # Note that in the parameter guessing below, we assume that the source
    # amplitude is the 3rd parameter of the Model (1st parameter of the Source)

    # Turn off guessing if we're not fitting the parameter.
    if model.param_names[2] not in vparam_names:
        guess_amplitude = False
    if 't0' not in vparam_names:
        guess_t0 = False

    # Make guesses for t0 and amplitude.
    # (we assume amplitude is the 3rd parameter of the model.)
    if guess_amplitude or guess_t0:
        t0, amplitude = guess_t0_and_amplitude(data, model, minsnr)
        if guess_amplitude:
            model.parameters[2] = amplitude
        if guess_t0:
            model.set(t0=t0)

    # Indicies used in probability function.
    # modelidx: Indicies of model parameters corresponding to vparam_names.
    # idxbounds: tuples of (varied parameter index, low bound, high bound).
    # idxpriors: tuples of (varied parameter index, function).
    modelidx = np.array([model.param_names.index(k) for k in vparam_names])
    idxbounds = [(vparam_names.index(k), bounds[k][0], bounds[k][1])
                 for k in bounds]
    idxpriors = [(vparam_names.index(k), priors[k]) for k in priors]

    # Posterior function.
    def lnprob(parameters):
        for i, low, high in idxbounds:
            if not low < parameters[i] < high:
                return -np.inf

        model.parameters[modelidx] = parameters
        logp = -0.5 * _chisq(data, model, modelcov=modelcov, flux_cov=flux_cov,
                             fixed_mcov=fixed_mcov)

        for i, func in idxpriors:
            logp += math.log(func(parameters[i]))

        return logp

    # Heuristic determination of  walker initial positions:
    # distribute walkers in a symmetric gaussian ball, with heuristically
    # determined scale.
    ctr = model.parameters[modelidx]
    scale = np.ones(ndim)
    for i, name in enumerate(vparam_names):
        if name in bounds:
            scale[i] = 0.0001 * (bounds[name][1] - bounds[name][0])
        elif model.get(name) != 0.:
            scale[i] = 0.01 * model.get(name)
        else:
            scale[i] = 0.1
    pos = ctr + scale * np.random.normal(size=(nwalkers, ndim))

    # Run the sampler.
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, a=a)
    pos, prob, state = sampler.run_mcmc(pos, nburn)  # burn-in
    sampler.reset()
    sampler.run_mcmc(pos, nsamples, thin=thin)  # production run
    samples = sampler.flatchain

    # Summary statistics.
    vparameters = np.mean(samples, axis=0)
    cov = np.cov(samples, rowvar=0)
    model.set(**dict(zip(vparam_names, vparameters)))
    errors = odict(zip(vparam_names, np.sqrt(np.diagonal(cov))))
    mean_acceptance_fraction = np.mean(sampler.acceptance_fraction)

    res = Result(param_names=copy.copy(model.param_names),
                 parameters=model.parameters.copy(),
                 vparam_names=vparam_names,
                 samples=samples,
                 covariance=cov,
                 errors=errors,
                 mean_acceptance_fraction=mean_acceptance_fraction)

    return res, model
Пример #31
0
#!/usr/bin/env python
import numpy as np
import sncosmo
from astropy.utils import OrderedDict as odict
from astropy.table import Table


model = sncosmo.ObsModel(source='salt2')
model.set(z=0.5, c=0.2, t0=55100., x1=0.5)
model.set_source_peakabsmag(-19.5, 'bessellb', 'ab')

times = np.linspace(55070., 55150., 40)
bands = np.array(10 * ['sdssg', 'sdssr', 'sdssi', 'sdssz'])
zp = 25. * np.ones(40)
zpsys = np.array(40 * ['ab'])

flux = model.bandflux(bands, times, zp=zp, zpsys=zpsys)
fluxerr = (0.05 * np.max(flux)) * np.ones(40, dtype=np.float)
flux += fluxerr * np.random.randn(40)

data = Table(odict([('time', times), ('band', bands), ('flux', flux), 
                    ('fluxerr', fluxerr), ('zp', zp), ('zpsys', zpsys)]),
             meta=dict(zip(model.param_names, model.parameters)))

sncosmo.write_lc(data, 'example_photometric_data.dat')
Пример #32
0
def _read_salt2_old(dirname, **kwargs):
    """Read old-style SALT2 files from a directory.

    A file named 'lightfile' must exist in the directory.
    """

    filenames = kwargs.get('filenames', None)

    # Get list of files in directory.
    if not (os.path.exists(dirname) and os.path.isdir(dirname)):
        raise IOError("Not a directory: '{0}'".format(dirname))
    dirfilenames = os.listdir(dirname)

    # Read metadata from lightfile.
    if 'lightfile' not in dirfilenames:
        raise IOError("no lightfile in directory: '{0}'".format(dirname))
    with open(os.path.join(dirname, 'lightfile'), 'r') as lightfile:
        meta = odict()
        for line in lightfile.readlines():
            line = line.strip()
            if len(line) == 0:
                continue
            try:
                key, val = line.split()
            except ValueError:
                raise ValueError('expected space-separated key value pairs in '
                                 'lightfile: {0}'
                                 .format(os.path.join(dirname, 'lightfile')))
            meta[key] = val

    # Get list of filenames to read.
    if filenames is None:
        filenames = dirfilenames
    if 'lightfile' in filenames:
        filenames.remove('lightfile')  # We already read the lightfile.
    fullfilenames = [os.path.join(dirname, f) for f in filenames]

    # Read data from files.
    data = None
    for fname in fullfilenames:
        with open(fname, 'r') as f:
            filemeta, filedata = _read_salt2(f)

        # Check that all necessary file metadata was defined.
        if not ('INSTRUMENT' in filemeta and 'BAND' in filemeta and
                'MAGSYS' in filemeta):
            raise ValueError('not all necessary global keys (INSTRUMENT, '
                             'BAND, MAGSYS) are defined in file {0}'
                             .format(fname))

        # Add the instrument/band to the file data, in anticipation of
        # aggregating it with other files.
        firstkey = filedata.keys()[0]
        data_length = len(filedata[firstkey])
        filter_name = '{0}::{1}'.format(filemeta.pop('INSTRUMENT'),
                                        filemeta.pop('BAND'))
        filedata['Filter'] = data_length * [filter_name]
        filedata['MagSys'] = data_length * [filemeta.pop('MAGSYS')]

        # If this if the first file, initialize data lists, otherwise if keys
        # match, append this file's data to the main data.
        if data is None:
            data = filedata
        elif set(filedata.keys()) == set(data.keys()):
            for key in data:
                data[key].extend(filedata[key])
        else:
            raise ValueError('column names do not match between files')

        # Append any extra metadata in this file to the master metadata.
        if len(filemeta) > 0:
            meta[filter_name] = filemeta

    return meta, data
"""Convenience functions for photometric data."""
from __future__ import division

import math

import numpy as np
from astropy.utils import OrderedDict as odict
from astropy.table import Table
from astropy.extern import six

from .spectral import get_magsystem, get_bandpass

_photdata_aliases = odict([
    ('time', set(['time', 'date', 'jd', 'mjd', 'mjdobs', 'mjd_obs'])),
    ('band', set(['band', 'bandpass', 'filter', 'flt'])),
    ('flux', set(['flux', 'f'])),
    ('fluxerr', set(['fluxerr', 'fe', 'fluxerror', 'flux_error', 'flux_err'])),
    ('zp', set(['zp', 'zpt', 'zeropoint', 'zero_point'])),
    ('zpsys', set(['zpsys', 'zpmagsys', 'magsys']))
    ])

# Descriptions for docstring only.
_photdata_descriptions = {
    'time': 'Time of observation in days',
    'band': 'Bandpass of observation',
    'flux': 'Flux of observation',
    'fluxerr': 'Gaussian uncertainty on flux',
    'zp': 'Zeropoint corresponding to flux',
    'zpsys': 'Magnitude system for zeropoint'
    }
_photdata_types = {
    'time': 'float',
Пример #34
0
def nest_lc(data, model, vparam_names, bounds, guess_amplitude_bound=False,
            minsnr=5., priors=None, ppfs=None, npoints=100, method='single',
            maxiter=None, maxcall=None, modelcov=False, rstate=None,
            verbose=False, **kwargs):
    """Run nested sampling algorithm to estimate model parameters and evidence.

    Parameters
    ----------
    data : `~astropy.table.Table` or `~numpy.ndarray` or `dict`
        Table of photometric data. Must include certain columns.
        See the "Photometric Data" section of the documentation for
        required columns.
    model : `~sncosmo.Model`
        The model to fit.
    vparam_names : list
        Model parameters to vary in the fit.
    bounds : `dict`
        Bounded range for each parameter. Bounds must be given for
        each parameter, with the exception of ``t0``: by default, the
        minimum bound is such that the latest phase of the model lines
        up with the earliest data point and the maximum bound is such
        that the earliest phase of the model lines up with the latest
        data point.
    guess_amplitude_bound : bool, optional
        If true, bounds for the model's amplitude parameter are determined
        automatically based on the data and do not need to be included in
        `bounds`. The lower limit is set to zero and the upper limit is 10
        times the amplitude "guess" (which is based on the highest-flux
        data point in any band). Default is False.
    minsnr : float, optional
        Minimum signal-to-noise ratio of data points to use when guessing
        amplitude bound. Default is 5.
    priors : `dict`, optional
        Prior probability distribution function for each parameter. The keys
        should be parameter names and the values should be callables that
        accept a float. If a parameter is not in the dictionary, the prior
        defaults to a flat distribution between the bounds.
    ppfs : `dict`, optional
        Prior percent point function (inverse of the cumulative distribution
        function) for each parameter. If a parameter is in this dictionary,
        the ppf takes precedence over a prior pdf specified in ``priors``.
    npoints : int, optional
        Number of active samples to use. Increasing this value increases
        the accuracy (due to denser sampling) and also the time
        to solution.
    method : {'classic', 'single', 'multi'}, optional
        Method used to select new points. Choices are 'classic',
        single-ellipsoidal ('single'), multi-ellipsoidal ('multi'). Default
        is 'single'.
    maxiter : int, optional
        Maximum number of iterations. Iteration may stop earlier if
        termination condition is reached. Default is no limit.
    modelcov : bool, optional
        Include model covariance when calculating chisq. Default is False.
    rstate : `~numpy.random.RandomState`, optional
        RandomState instance. If not given, the global random state of the
        ``numpy.random`` module will be used.
    verbose : bool, optional
        Print running evidence sum on a single line.

    Returns
    -------
    res : Result
        Attributes are:

        * ``niter``: total number of iterations
        * ``ncall``: total number of likelihood function calls
        * ``time``: time in seconds spent in iteration loop.
        * ``logz``: natural log of the Bayesian evidence Z.
        * ``logzerr``: estimate of uncertainty in logz (due to finite sampling)
        * ``h``: Bayesian information.
        * ``vparam_names``: list of parameter names varied.
        * ``samples``: 2-d `~numpy.ndarray`, shape is (nsamples, nparameters).
          Each row is the parameter values for a single sample. For example,
          ``samples[0, :]`` is the parameter values for the first sample.
        * ``logprior``: 1-d `~numpy.ndarray` (length=nsamples);
          log(prior volume) for each sample.
        * ``logl``: 1-d `~numpy.ndarray` (length=nsamples); log(likelihood)
          for each sample.
        * ``weights``: 1-d `~numpy.ndarray` (length=nsamples);
          Weight corresponding to each sample. The weight is proportional to
          the prior * likelihood for the sample.
        * ``parameters``: 1-d `~numpy.ndarray` of weighted-mean parameter
          values from samples (including fixed parameters). Order corresponds
          to ``model.param_names``.
        * ``covariance``: 2-d `~numpy.ndarray` of parameter covariance;
          indicies correspond to order of ``vparam_names``. Calculated from
          ``samples`` and ``weights``.
        * ``errors``: OrderedDict of varied parameter uncertainties.
          Corresponds to square root of diagonal entries in covariance matrix.
        * ``ndof``: Number of degrees of freedom (len(data) -
          len(vparam_names)).
        * ``bounds``: Dictionary of bounds on varied parameters (including
          any automatically determined bounds).

    estimated_model : `~sncosmo.Model`
        A copy of the model with parameters set to the values in
        ``res.parameters``.
    """

    try:
        import nestle
    except ImportError:
        raise ImportError("nest_lc() requires the nestle package.")

    if "nobj" in kwargs:
        warn("The nobj keyword is deprecated and will be removed in a future "
             "sncosmo release. Use `npoints` instead.")
        npoints = kwargs.pop("nobj")

    # experimental parameters
    tied = kwargs.get("tied", None)

    data = standardize_data(data)
    model = copy.copy(model)
    bounds = copy.copy(bounds)  # need to copy this b/c we modify it below

    # Order vparam_names the same way it is ordered in the model:
    vparam_names = [s for s in model.param_names if s in vparam_names]

    # Drop data that the model doesn't cover.
    data = cut_bands(data, model, z_bounds=bounds.get('z', None))

    if guess_amplitude_bound:
        if model.param_names[2] not in vparam_names:
            raise ValueError("Amplitude bounds guessing enabled but "
                             "amplitude parameter {0!r} is not varied"
                             .format(model.param_names[2]))
        if model.param_names[2] in bounds:
            raise ValueError("cannot supply bounds for parameter {0!r}"
                             " when guess_amplitude_bound=True"
                             .format(model.param_names[2]))

        # If redshift is bounded, set model redshift to midpoint of bounds
        # when doing the guess.
        if 'z' in bounds:
            model.set(z=sum(bounds['z']) / 2.)
        _, amplitude = guess_t0_and_amplitude(data, model, minsnr)
        bounds[model.param_names[2]] = (0., 10. * amplitude)

    # Find t0 bounds to use, if not explicitly given
    if 't0' in vparam_names and 't0' not in bounds:
        bounds['t0'] = t0_bounds(data, model)

    if ppfs is None:
        ppfs = {}
    if tied is None:
        tied = {}

    # Convert bounds/priors combinations into ppfs
    if bounds is not None:
        for key, val in six.iteritems(bounds):
            if key in ppfs:
                continue  # ppfs take priority over bounds/priors
            a, b = val
            if priors is not None and key in priors:
                # solve ppf at discrete points and return interpolating
                # function
                x_samples = np.linspace(0., 1., 101)
                ppf_samples = ppf(priors[key], x_samples, a, b)
                f = Interp1D(0., 1., ppf_samples)
            else:
                f = Interp1D(0., 1., np.array([a, b]))
            ppfs[key] = f

    # NOTE: It is important that iparam_names is in the same order
    # every time, otherwise results will not be reproducible, even
    # with same random seed.  This is because iparam_names[i] is
    # matched to u[i] below and u will be in a reproducible order,
    # so iparam_names must also be.
    iparam_names = [key for key in vparam_names if key in ppfs]
    ppflist = [ppfs[key] for key in iparam_names]
    npdim = len(iparam_names)  # length of u
    ndim = len(vparam_names)  # length of v

    # Check that all param_names either have a direct prior or are tied.
    for name in vparam_names:
        if name in iparam_names:
            continue
        if name in tied:
            continue
        raise ValueError("Must supply ppf or bounds or tied for parameter '{}'"
                         .format(name))

    def prior_transform(u):
        d = {}
        for i in range(npdim):
            d[iparam_names[i]] = ppflist[i](u[i])
        v = np.empty(ndim, dtype=np.float)
        for i in range(ndim):
            key = vparam_names[i]
            if key in d:
                v[i] = d[key]
            else:
                v[i] = tied[key](d)
        return v

    # Indicies of the model parameters in vparam_names
    idx = np.array([model.param_names.index(name) for name in vparam_names])

    def loglike(parameters):
        model.parameters[idx] = parameters
        return -0.5 * _chisq(data, model, modelcov=modelcov)

    t0 = time.time()
    res = nestle.sample(loglike, prior_transform, ndim, npdim=npdim,
                        npoints=npoints, method=method, maxiter=maxiter,
                        maxcall=maxcall, rstate=rstate,
                        callback=(nestle.print_progress if verbose else None))
    elapsed = time.time() - t0

    # estimate parameters and covariance from samples
    vparameters, cov = nestle.mean_and_cov(res.samples, res.weights)

    # update model parameters to estimated ones.
    model.set(**dict(zip(vparam_names, vparameters)))

    # `res` is a nestle.Result object. Collect result into a sncosmo.Result
    # object for consistency, and add more fields.
    res = Result(niter=res.niter,
                 ncall=res.ncall,
                 logz=res.logz,
                 logzerr=res.logzerr,
                 h=res.h,
                 samples=res.samples,
                 weights=res.weights,
                 logvol=res.logvol,
                 logl=res.logl,
                 vparam_names=copy.copy(vparam_names),
                 ndof=len(data) - len(vparam_names),
                 bounds=bounds,
                 time=elapsed,
                 parameters=model.parameters.copy(),
                 covariance=cov,
                 errors=odict(zip(vparam_names, np.sqrt(np.diagonal(cov)))),
                 param_dict=odict(zip(model.param_names, model.parameters)))

    # Deprecated result fields.
    depmsg = ("The `param_names` attribute is deprecated in sncosmo v1.0 "
              "and will be removed in a future release. "
              "Use `vparam_names` instead.")
    res.__dict__['deprecated']['param_names'] = (res.vparam_names, depmsg)

    depmsg = ("The `logprior` attribute is deprecated in sncosmo v1.2 "
              "and will be changed in a future release. "
              "Use `logvol` instead.")
    res.__dict__['deprecated']['logprior'] = (res.logvol, depmsg)

    return res, model
def standardize_data(data):
    """Standardize photometric data by converting to a structured numpy array
    with standard column names (if necessary) and sorting entries in order of
    increasing time.

    Parameters
    ----------
    data : `~astropy.table.Table`, `~numpy.ndarray` or dict

    Returns
    -------
    standardized_data : `~numpy.ndarray`
    """
    if isinstance(data, Table):
        data = np.asarray(data)

    if isinstance(data, np.ndarray):
        colnames = data.dtype.names

        # Check if the data already complies with what we want
        # (correct column names & ordered by date)
        if (set(colnames) == set(_photdata_aliases.keys()) and
                np.all(np.ediff1d(data['time']) >= 0.)):
            return data

    elif isinstance(data, dict):
        colnames = data.keys()

    else:
        raise ValueError('Unrecognized data type')

    # Create mapping from lowercased column names to originals
    lower_to_orig = dict([(colname.lower(), colname) for colname in colnames])

    # Set of lowercase column names
    lower_colnames = set(lower_to_orig.keys())

    orig_colnames_to_use = []
    for aliases in _photdata_aliases.values():
        i = lower_colnames & aliases
        if len(i) != 1:
            raise ValueError('Data must include exactly one column from {0} '
                             '(case independent)'.format(', '.join(aliases)))
        orig_colnames_to_use.append(lower_to_orig[i.pop()])

    if isinstance(data, np.ndarray):
        new_data = data[orig_colnames_to_use].copy()
        new_data.dtype.names = _photdata_aliases.keys()

    else:
        new_data = odict()
        for newkey, oldkey in zip(_photdata_aliases.keys(),
                                  orig_colnames_to_use):
            new_data[newkey] = data[oldkey]

        new_data = dict_to_array(new_data)

    # Sort by time, if necessary.
    if not np.all(np.ediff1d(new_data['time']) >= 0.):
        new_data.sort(order=['time'])

    return new_data
Пример #36
0
def nest_lc(data, model, param_names, bounds, guess_amplitude_bound=False,
            minsnr=5., priors=None, nobj=100, maxiter=10000, verbose=False):
    """Run nested sampling algorithm to estimate model parameters and evidence.

    Parameters
    ----------
    data : `~astropy.table.Table` or `~numpy.ndarray` or `dict`
        Table of photometric data. Must include certain column names.
    model : `~sncosmo.Model`
        The model to fit.
    param_names : list
        Model parameters to vary in the fit.
    bounds : `dict`
        Bounded range for each parameter. Bounds must be given for
        each parameter, with the exception of ``t0``: by default, the
        minimum bound is such that the latest phase of the model lines
        up with the earliest data point and the maximum bound is such
        that the earliest phase of the model lines up with the latest
        data point.
    guess_amplitude_bound : bool, optional
        If true, bounds for the model's amplitude parameter are determined
        automatically based on the data and do not need to be included in 
        `bounds`. The lower limit is set to zero and the upper limit is 10
        times the amplitude "guess" (which is based on the highest-flux
        data point in any band). Default is False.
    minsnr : float, optional
        Minimum signal-to-noise ratio of data points to use when guessing 
        amplitude bound. Default is 5.
    priors : dict, optional
        Not currently used.
    nobj : int, optional
        Number of objects (e.g., concurrent sample points) to use. Increasing
        nobj increases the accuracy (due to denser sampling) and also the time
        to solution.
    maxiter : int, optional
        Maximum number of iterations. Default is 10000.
    verbose : bool, optional

    Returns
    -------
    res : Result
        Attributes are:
        
        * ``niter``: total number of iterations
        * ``ncall``: total number of likelihood function calls
        * ``time``: time in seconds spent in iteration loop.
        * ``logz``: natural log of the Bayesian evidence Z.
        * ``logzerr``: estimate of uncertainty in logz (due to finite sampling)
        * ``h``: Bayesian information.
        * ``param_names``: list of parameter names varied.
        * ``samples``: 2-d `~numpy.ndarray`, shape is (nsamples, nparameters).
          Each row is the parameter values for a single sample. For example,
          ``samples[0, :]`` is the parameter values for the first sample.
        * ``weights``: 1-d `~numpy.ndarray`, length=nsamples;
          Weight corresponding to each sample. The weight is proportional to
          the prior * likelihood for the sample.
        * ``logprior``: 1-d `~numpy.ndarray`, length=nsamples;
          log(prior volume) for each sample.
        * ``logl``: 1-d `~numpy.ndarray`, length=nsamples; log(likelihood)
          for each sample.
        * ``param_dict``: Dictionary of weighted average of sample parameter
          values (includes fixed parameters).
        * ``errors``: Dictionary of weighted standard deviation of sample
          parameter values (does not include fixed parameters). 
        * ``bounds``: Dictionary of bounds on varied parameters (including
          any automatically determined bounds).
        * ``ndof``: Number of degrees of freedom.
    est_model : `~sncosmo.Model`
        Copy of model with parameters set to the values in ``res.param_dict``.
    """

    data = standardize_data(data)
    model = copy.copy(model)
    bounds = copy.copy(bounds)  # need to copy this dict b/c we modify it below

    # Find t0 bounds to use, if not explicitly given
    if 't0' in param_names and 't0' not in bounds:
        bounds['t0'] = t0_bounds(data, model)

    if guess_amplitude_bound:
        if model.param_names[2] in bounds:
            raise ValueError("cannot supply bounds for parameter {0!r}"
                             " when guess_amplitude_bound=True")
        else:
            _, amplitude = guess_t0_and_amplitude(data, model, minsnr)
            bounds[model.param_names[2]] = (0., 10. * amplitude)

    res = _nest_lc(data, model, param_names, bounds=bounds, priors=priors,
                   nobj=nobj, maxiter=maxiter, verbose=verbose)
    
    # Weighted average of samples
    parameters = np.average(res['samples'], weights=res['weights'], axis=0)
    model.set(**dict(zip(param_names, parameters)))
    res.param_dict = dict(zip(model.param_names, model.parameters))

    # Weighted st. dev. of samples
    std = np.sqrt(np.sum(res['weights'][:, np.newaxis] *
                         (res['samples']-parameters)**2, axis=0))
    res.errors = odict(zip(res.param_names, std))
    res.bounds = bounds

    return res, model
Пример #37
0
"""Convenience functions for photometric data."""
from __future__ import division

import math

import numpy as np
from astropy.utils import OrderedDict as odict
from astropy.table import Table
from astropy.extern import six

from .spectral import get_magsystem, get_bandpass

_photdata_aliases = odict([
    ('time', set(['time', 'date', 'jd', 'mjd', 'mjdobs', 'mjd_obs'])),
    ('band', set(['band', 'bandpass', 'filter', 'flt'])),
    ('flux', set(['flux', 'f'])),
    ('fluxerr', set(['fluxerr', 'fe', 'fluxerror', 'flux_error', 'flux_err'])),
    ('zp', set(['zp', 'zpt', 'zeropoint', 'zero_point'])),
    ('zpsys', set(['zpsys', 'zpmagsys', 'magsys']))
])

# Descriptions for docstring only.
_photdata_descriptions = {
    'time': 'Time of observation in days',
    'band': 'Bandpass of observation',
    'flux': 'Flux of observation',
    'fluxerr': 'Gaussian uncertainty on flux',
    'zp': 'Zeropoint corresponding to flux',
    'zpsys': 'Magnitude system for zeropoint'
}
_photdata_types = {
    'time': 'float',
Пример #38
0
def realize_lcs(observations, model, params, thresh=None):
    """Realize data for a set of SNe given a set of observations.

    Parameters
    ----------
    observations : `~astropy.table.Table` or `~numpy.ndarray`
        Table of observations. Must contain the following column names:
        ``band``, ``time``, ``zp``, ``zpsys``, ``gain``, ``skynoise``.
    model : `sncosmo.Model`
        The model to use in the simulation.
    params : list (or generator) of dict
        List of parameters to feed to the model for realizing each light curve.
    thresh : float, optional
        If given, light curves are skipped (not returned) if none of the data
        points have signal-to-noise greater than ``thresh``.

    Returns
    -------
    sne : list of `~astropy.table.Table`
        Table of realized data for each item in ``params``.

    Notes
    -----
    ``skynoise`` is the image background contribution to the flux measurement
    error (in units corresponding to the specified zeropoint and zeropoint
    system). To get the error on a given measurement, ``skynoise`` is added
    in quadrature to the photon noise from the source.

    It is left up to the user to calculate ``skynoise`` as they see fit as the
    details depend on how photometry is done and possibly how the PSF is
    is modeled. As a simple example, assuming a Gaussian PSF, and perfect
    PSF photometry, ``skynoise`` would be ``4 * pi * sigma_PSF * sigma_pixel``
    where ``sigma_PSF`` is the standard deviation of the PSF in pixels and
    ``sigma_pixel`` is the background noise in a single pixel in counts.
    """

    observations = np.asarray(observations)
    lcs = []

    # TODO: copy model so we don't mess up the user's model?

    for p in params:
        model.set(**p)

        flux = model.bandflux(observations['band'],
                              observations['time'],
                              zp=observations['zp'],
                              zpsys=observations['zpsys'])

        fluxerr = np.sqrt(observations['skynoise']**2 +
                          np.abs(flux) / observations['gain'])

        # Scatter fluxes by the fluxerr
        flux = np.random.normal(flux, fluxerr)

        # Check if any of the fluxes are significant
        if thresh is not None and not np.any(flux/fluxerr > thresh):
            continue

        data = odict([('time', observations['time']),
                      ('band', observations['band']),
                      ('flux', flux),
                      ('fluxerr', fluxerr),
                      ('zp', observations['zp']),
                      ('zpsys', observations['zpsys'])])
        lcs.append(Table(data, meta=p))

    return lcs
Пример #39
0
def simulate_vol(obs_sets, model, gen_params, vrate,
                 cosmo=FlatLambdaCDM(H0=70., Om0=0.3),
                 z_range=(0., 1.), default_area=1., nsim=None,
                 nret=10, thresh=5.):
    """Simulate transient photometric data according to observations
    (EXPERIMENTAL).

    .. warning::

       This function is experimental in v0.4

    Parameters
    ----------
    obs_sets : dict of `astropy.table.Table`
        A dictionary of "observation sets". Each observation set is a table
        of observations. See the notes section below for information on what
        the table must contain.
    model : `sncosmo.Model`
        The model to use in the simulation.
    gen_params : callable
        A callable that accepts a single float (redshift) and returns
        a dictionary on each call. Typically the callable would randomly
        select parameters from some underlying distribution on each call.
    vrate : callable
        A callable that returns the SN rate per comoving volume as a
        function of redshift, in units yr^-1 Mpc^-3.
    cosmo : astropy.cosmology.Cosmology, optional
        Cosmology used to determine volume.
        The default is a FlatLambdaCDM cosmology with ``Om0=0.3``, ``H0=70.``.
    z_range : (float, float), optional
        Redshift range in which to generate transients.
    default_area : float, optional
        Area in deg^2 for observation sets that do not have an 'AREA'
        keyword in their metadata.
    nsim : int, optional
        Number of transients to simulate. Cannot set both `nsim` and `nret`.
        Default is `None`.
    nret : int, optional
        Number of transients to return (number simulated that pass
        flux significance threshold). Cannot set both `nsim` and `nret`.
        Default is 10. Set both `nsim` and `nret` to `None` to let the function
        automatically determine the number of SNe based on the area of each
        observation set and the volumetric rate.
    thresh : float, optional
        Minimum flux significant threshold for a transient to be returned.
    
    Returns
    -------
    sne : list of `~astropy.table.Table`
        List of tables where each table is the photometric
        data for a single simulated SN.

    Notes
    -----
    
    Each ``obs_set`` (values in ``obs_sets``) must have the following columns:
    
    * ``MJD``
    * ``FLT``
    * ``CCD_GAIN``
    * ``SKYSIG``
    * ``PSF1``
    * ``ZPTAVG``

    These are currently just what the SIMLIB files from SNANA have. In the
    future these can be more flexible.

    Examples
    --------

    Define a set of just three observations:

    >>> from astropy.table import Table
    >>> obs_set = Table({'MJD': [56176.19, 56188.254, 56207.172],
    ...                  'FLT': ['desg', 'desr', 'desi'],
    ...                  'CCD_GAIN': [1., 1., 1.],
    ...                  'SKYSIG': [91.27, 47.62, 60.40],
    ...                  'PSF1': [2.27, 2.5, 1.45],
    ...                  'ZPTAVG': [32.97, 33.05, 32.49]})
    >>> print obs_set
       MJD    ZPTAVG FLT  PSF1 SKYSIG CCD_GAIN
    --------- ------ ---- ---- ------ --------
     56176.19  32.97 desg 2.27  91.27      1.0
    56188.254  33.05 desr  2.5  47.62      1.0
    56207.172  32.49 desi 1.45   60.4      1.0
    >>> obs_sets = {0: obs_set}

    Get a model and a cosmology

    >>> import sncosmo                                    # doctest: +SKIP
    >>> from astropy import cosmology
    >>> model = sncosmo.Model(source='salt2-extended')    # doctest: +SKIP
    >>> cosmo = cosmology.FlatLambdaCDM(Om0=0.3, H0=70.)

    Get x0 corresponding to apparent mag = -19.1:

    >>> model.source.set_peakmag(-19.1, 'bessellb', 'vega')  # doctest: +SKIP
    >>> x0_0 = model.get('x0')                               # doctest: +SKIP

    Define a function that generates parameters of the model given a redshift:

    >>> from numpy.random import normal
    >>> def gen_params(z):
    ...     x1 = normal(0., 1.)
    ...     c = normal(0., 0.1)
    ...     resid = normal(0., 0.15)
    ...     hubble_offset = -0.13*x1 + 2.5*c + resid
    ...     dm = cosmo.distmod(z).value
    ...     x0 = x0_0 * 10**(-(dm + hubble_offset) / 2.5)
    ...     return {'c': c, 'x1': x1, 'x0': x0}

    Define a volumetric SN rate in SN / yr / Mpc^3:

    >>> def snrate(z):
    ...     return 0.25e-4 * (1. + 2.5 * z)

    Generate simulated SNe:

    >>> sne = simulate_vol(obs_sets, model, gen_params, snrate, cosmo=cosmo,
    ...                    nret=10)  # doctest: +SKIP
    >>> print len(sne)  # doctest: +SKIP
    10
    >>> print sne[0]  # doctest: +SKIP
       date   band      flux        fluxerr      zp  zpsys
    --------- ---- ------------- ------------- ----- -----
     56176.19 desg 780.472570859 2603.54291491 32.97    ab
    56188.254 desr 17206.2994496 1501.37068134 33.05    ab
    56207.172 desi 10323.4485412 1105.34529777 32.49    ab
    >>> print sne[0].meta  # doctest: +SKIP
    {'z': 0.52007602908199813, 'c': -0.09298497453338518,
     'x1': 1.1684716363315284, 'x0': 1.4010952818384196e-05,
     't0': 56200.279703804845}

    """

    if nsim is not None and nret is not None:
        raise ValueError('cannot specify both nsim and nret')

    # Get comoving volume in each redshift shell.
    z_bins = 100  # Good enough for now.
    z_min, z_max = z_range
    z_binedges = np.linspace(z_min, z_max, z_bins + 1) 
    z_binctrs = 0.5 * (z_binedges[1:] + z_binedges[:-1]) 
    sphere_vols = cosmo.comoving_volume(z_binedges) 
    shell_vols = sphere_vols[1:] - sphere_vols[:-1]

    # SN / (observer year) in shell
    shell_snrate = shell_vols * vrate(z_binctrs) / (1. + z_binctrs)

    # SN / (observer year) within z_binedges
    vol_snrate = np.zeros_like(z_binedges)
    vol_snrate[1:] = np.add.accumulate(shell_snrate)

    # Create a ppf (inverse cdf). We'll use this later to get
    # a random SN redshift from the distribution.
    snrate_cdf = vol_snrate / vol_snrate[-1]
    snrate_ppf = Spline1d(snrate_cdf, z_binedges, k=1)

    # Get obs sets' data, time ranges, areas and weights.
    # We do this now so we can weight the location of sne 
    # according to the area and time ranges of the observation sets.
    obs_sets = obs_sets.values()
    obs_sets_data = [np.asarray(obs_set) for obs_set in obs_sets]
    time_ranges = [(obs['MJD'].min() - 10. * (1. + z_max),
                    obs['MJD'].max() + 10. * (1. + z_max))
                   for obs in obs_sets_data]
    areas = [obs_set.meta['AREA'] if 'AREA' in obs_set.meta else default_area
             for obs_set in obs_sets]
    area_time_products = [a * (t[1] - t[0])
                          for a, t in zip(areas, time_ranges)]
    total_area_time = sum(area_time_products)
    weights = [a_t / total_area_time for a_t in area_time_products]
    cumweights = np.add.accumulate(np.array(weights))
    
    # How many to simulate?
    if nsim is not None:
        nret = 0
    elif nret is not None:
        nsim = 0
    else:
        nsim = total_area_time / wholesky_sqdeg * vol_snrate[-1]

    i = 0
    sne = []
    while i < nsim or len(sne) < nret:
        i += 1

        # which obs_set did this occur in?
        j = 0
        x = np.random.rand()
        while cumweights[j] < x:
            j += 1

        obsdata = obs_sets_data[j]
        time_range = time_ranges[j]

        # Get a redshift from the distribution
        z = snrate_ppf(np.random.rand())
        t0 = np.random.uniform(time_range[0], time_range[1])

        # Get rest of parameters from user-defined gen_params():
        params = gen_params(z)
        params.update(z=z, t0=t0)
        model.set(**params)

        # Get model fluxes 
        flux = model.bandflux(obsdata['FLT'], obsdata['MJD'],
                              zp=obsdata['ZPTAVG'], zpsys='ab')

        # Get flux errors
        noise_area = 4. * math.pi * obsdata['PSF1']
        bkgpixnoise = obsdata['SKYSIG']
        fluxerr = np.sqrt((noise_area * bkgpixnoise) ** 2 +
                          np.abs(flux) / obsdata['CCD_GAIN'])

        # Scatter fluxes by the fluxerr
        flux = np.random.normal(flux, fluxerr)

        # Check if any of the fluxes are significant
        if not np.any((flux / fluxerr) > thresh):
            continue

        simulated_data = odict([('date', obsdata['MJD']),
                                ('band', obsdata['FLT']),
                                ('flux', flux),
                                ('fluxerr', fluxerr),
                                ('zp', obsdata['ZPTAVG']),
                                ('zpsys', ['ab'] * len(flux))])
        sne.append(Table(simulated_data, meta=params))

    return sne
Пример #40
0
def read_snana_ascii(fname, default_tablename=None):
    """Read an SNANA-format ascii file.

    Such files may contain metadata lines and one or more tables. See Notes
    for a summary of the format.

    Parameters
    ----------
    fname : str
        Filename of object to read.
    default_tablename : str, optional
        Default tablename, or the string that indicates a table row, when
        a table starts with 'NVAR:' rather than 'NVAR_TABLENAME:'.
    array : bool, optional
        If True, each table is converted to a numpy array. If False, each
        table is a dictionary of lists (each list is a column). Default is
        True.

    Returns
    -------
    meta : OrderedDict
        Metadata from keywords.
    tables : dict of `~astropy.table.Table`
        Tables, indexed by table name.

    Notes
    -----
    The file can contain one or more tables, as well as optional metadata.
    Here is an example of the expected format::

        META1: a
        META2: 6
        NVAR_SN: 3
        VARNAMES: A B C
        SN: 1 2.0 x
        SN: 4 5.0 y

    Behavior:

    * Any strings ending in a colon (:) are treated as keywords.
    * The start of a new table is indicated by a keyword starting with
      'NVAR'.
    * If the 'NVAR' is followed by an underscore (e.g., 'NVAR_TABLENAME'),
      then 'TABLENAME' is taken to be the name of the table. Otherwise the
      user *must specify* a ``default_tablename``.  This is because data
      rows are identified by the tablename.
    * After a keyword starting with 'NVAR', the next keyword must be
      'VARNAMES'. The strings following give the column names.
    * Any other keywords anywhere in the file are treated as metadata. The
      first string after the keyword is treated as the value for that keyword.
    * **Note:** Newlines are treated as equivalent to spaces; they do not
      indicate a new row. This is necessary because some SNANA-format files
      have multiple metadata on a single row or single table rows split over
      multiple lines, making newline characters meaningless.

    Examples
    --------

    >>> from astropy.extern.six import StringIO  # StringIO behaves like a file
    >>> f = StringIO('META1: a\\n'
    ...              'META2: 6\\n'
    ...              'NVAR_SN: 3\\n'
    ...              'VARNAMES: A B C\\n'
    ...              'SN: 1 2.0 x\\n'
    ...              'SN: 4 5.0 y\\n')
    ...
    >>> meta, tables = read_snana_ascii(f)

    The first object is a dictionary of metadata:

    >>> meta
    OrderedDict([('META1', 'a'), ('META2', 6)])

    The second is a dictionary of all the tables in the file:

    >>> tables['SN']                                          # doctest: +SKIP
    <Table rows=2 names=('A','B','C')>
    array([(1, 2.0, 'x'), (4, 5.0, 'y')],
          dtype=[('A', '<i8'), ('B', '<f8'), ('C', 'S1')])


    If the file had an 'NVAR' keyword rather than 'NVAR_SN', for example::

        NVAR: 3
        VARNAMES: A B C
        SN: 1 2.0 x
        SN: 4 5.0 y
        SN: 5 8.2 z

    it can be read by supplying a default table name:

    >>> meta, tables = read_snana_ascii(f, default_tablename='SN')
    ...     # doctest: +SKIP

    """

    meta = odict()  # initialize structure to hold metadata.
    tables = {}  # initialize structure to hold data.

    if isinstance(fname, six.string_types):
        fh = open(fname, 'U')
    else:
        fh = fname
    words = fh.read().split()
    fh.close()

    i = 0
    nvar = None
    tablename = None
    while i < len(words):
        word = words[i]

        # If the word starts with 'NVAR', we are starting a new table.
        if word.startswith('NVAR'):
            nvar = int(words[i + 1])

            # Infer table name. The name will be used to designate a data row.
            if '_' in word:
                pos = word.find('_') + 1
                tablename = word[pos:].rstrip(':')
            elif default_tablename is not None:
                tablename = default_tablename
            else:
                raise ValueError(
                    'Table name must be given as part of NVAR keyword so '
                    'that rows belonging to this table can be identified. '
                    'Alternatively, supply the default_tablename keyword.')
            table = odict()
            tables[tablename] = table

            i += 2

        # If the word starts with 'VARNAMES', the following `nvar` words
        # define the column names of the table.
        elif word.startswith('VARNAMES') or word.startswith('VARLIST'):

            # Check that nvar is defined and that no column names are defined
            # for the current table.
            if nvar is None or len(table) > 0:
                raise Exception('NVAR must directly precede VARNAMES')

            # Read the column names
            for j in range(i + 1, i + 1 + nvar):
                table[words[j]] = []
            i += nvar + 1

        # If the word matches the current tablename, we are reading a data row.
        elif word.rstrip(':') == tablename:
            for j, colname in enumerate(table.keys()):
                table[colname].append(words[i + 1 + j])
            i += nvar + 1

        # Otherwise, we are reading metadata or some comment
        # If the word ends with ":", it is metadata.
        elif word[-1] == ':':
            name = word[:-1]  # strip off the ':'
            if len(words) >= i + 2:
                try:
                    val = int(words[i + 1])
                except ValueError:
                    try:
                        val = float(words[i + 1])
                    except ValueError:
                        val = words[i + 1]
                meta[name] = val
            else:
                meta[name] = None
            i += 2
        else:
            # It is some comment; continue onto next word.
            i += 1

    # All values in each column are currently strings. Convert to int or
    # float if possible.
    for table in tables.values():
        for colname, values in six.iteritems(table):
            try:
                table[colname] = [int(val) for val in values]
            except ValueError:
                try:
                    table[colname] = [float(val) for val in values]
                except ValueError:
                    pass

    # All tables are dictionaries. Convert them to Tables
    for tablename in tables.keys():
        tables[tablename] = Table(tables[tablename])

    return meta, tables
Пример #41
0
def fit_lc(data, model, vparam_names, bounds=None, method='minuit',
           guess_amplitude=True, guess_t0=True, guess_z=True,
           minsnr=5., modelcov=False, verbose=False, maxcall=10000,
           **kwargs):
    """Fit model parameters to data by minimizing chi^2.

    Ths function defines a chi^2 to minimize, makes initial guesses for
    t0 and amplitude, then runs a minimizer.

    Parameters
    ----------
    data : `~astropy.table.Table` or `~numpy.ndarray` or `dict`
        Table of photometric data. Must include certain columns.
        See the "Photometric Data" section of the documentation for
        required columns.
    model : `~sncosmo.Model`
        The model to fit.
    vparam_names : list
        Model parameters to vary in the fit.
    bounds : `dict`, optional
        Bounded range for each parameter. Keys should be parameter
        names, values are tuples. If a bound is not given for some
        parameter, the parameter is unbounded. The exception is
        ``t0``: by default, the minimum bound is such that the latest
        phase of the model lines up with the earliest data point and
        the maximum bound is such that the earliest phase of the model
        lines up with the latest data point.
    guess_amplitude : bool, optional
        Whether or not to guess the amplitude from the data. If false, the
        current model amplitude is taken as the initial value. Only has an
        effect when fitting amplitude. Default is True.
    guess_t0 : bool, optional
        Whether or not to guess t0. Only has an effect when fitting t0.
        Default is True.
    guess_z : bool, optional
        Whether or not to guess z (redshift). Only has an effect when fitting
        redshift. Default is True.
    minsnr : float, optional
        When guessing amplitude and t0, only use data with signal-to-noise
        ratio (flux / fluxerr) greater than this value. Default is 5.
    method : {'minuit'}, optional
        Minimization method to use. Currently there is only one choice.
    modelcov : bool, optional
        Include model covariance when calculating chisq. Default is False.
    verbose : bool, optional
        Print messages during fitting.

    Returns
    -------
    res : Result
        The optimization result represented as a ``Result`` object, which is
        a `dict` subclass with attribute access. Therefore, ``res.keys()``
        provides a list of the attributes. Attributes are:

        - ``success``: boolean describing whether fit succeeded.
        - ``message``: string with more information about exit status.
        - ``ncall``: number of function evaluations.
        - ``chisq``: minimum chi^2 value.
        - ``ndof``: number of degrees of freedom
          (len(data) - len(vparam_names)).
        - ``param_names``: same as ``model.param_names``.
        - ``parameters``: 1-d `~numpy.ndarray` of best-fit values
          (including fixed parameters) corresponding to ``param_names``.
        - ``vparam_names``: list of varied parameter names.
        - ``covariance``: 2-d `~numpy.ndarray` of parameter covariance;
          indicies correspond to order of ``vparam_names``.
        - ``errors``: OrderedDict of varied parameter uncertainties.
          Corresponds to square root of diagonal entries in covariance matrix.

    fitmodel : `~sncosmo.Model`
        A copy of the model with parameters set to best-fit values.

    Notes
    -----

    **t0 guess:** If ``t0`` is being fit and ``guess_t0=True``, the
    function will guess the initial starting point for ``t0`` based on
    the data. The guess is made as follows:

    * Evaluate the time and value of peak flux for the model in each band
      given the current model parameters.
    * Determine the data point with maximum flux in each band, for points
      with signal-to-noise ratio > ``minsnr`` (default is 5). If no points
      meet this criteria, the band is ignored (for the purpose of guessing
      only).
    * For each band, compare model's peak flux to the peak data point. Choose
      the band with the highest ratio of data / model.
    * Set ``t0`` so that the model's time of peak in the chosen band
      corresponds to the peak data point in this band.

    **amplitude guess:** If amplitude (assumed to be the first model parameter)
    is being fit and ``guess_amplitude=True``, the function will guess the
    initial starting point for the amplitude based on the data.

    **redshift guess:** If redshift (``z``) is being fit and ``guess_z=True``,
    the function will set the initial value of ``z`` to the average of the
    bounds on ``z``.

    Examples
    --------

    The `~sncosmo.flatten_result` function can be used to make the result
    a dictionary suitable for appending as rows of a table:

    >>> from astropy.table import Table               # doctest: +SKIP
    >>> table_rows = []                               # doctest: +SKIP
    >>> for sn in sne:                                # doctest: +SKIP
    ...     res, fitmodel = sncosmo.fit_lc(           # doctest: +SKIP
    ...          sn, model, ['t0', 'x0', 'x1', 'c'])  # doctest: +SKIP
    ...     table_rows.append(flatten_result(res))    # doctest: +SKIP
    >>> t = Table(table_rows)                         # doctest: +SKIP

    """

    # Standardize and normalize data.
    data = standardize_data(data)
    data = normalize_data(data)

    # Make a copy of the model so we can modify it with impunity.
    model = copy.copy(model)

    # Check that vparam_names isn't empty and contains only parameters
    # known to the model.
    if len(vparam_names) == 0:
        raise ValueError("no parameters supplied")
    for s in vparam_names:
        if s not in model.param_names:
            raise ValueError("Parameter not in model: " + repr(s))

    # Order vparam_names the same way it is ordered in the model:
    vparam_names = [s for s in model.param_names if s in vparam_names]

    # initialize bounds
    if bounds is None:
        bounds = {}

    # Check that 'z' is bounded (if it is going to be fit).
    if 'z' in vparam_names:
        if 'z' not in bounds or None in bounds['z']:
            raise ValueError('z must be bounded if fit.')
        if guess_z:
            model.set(z=sum(bounds['z']) / 2.)
        if model.get('z') < bounds['z'][0] or model.get('z') > bounds['z'][1]:
            raise ValueError('z out of range.')

    # Cut bands that are not allowed by the wavelength range of the model.
    data = cut_bands(data, model, z_bounds=bounds.get('z', None))

    # Unique set of bands in data
    bands = set(data['band'].tolist())

    # Find t0 bounds to use, if not explicitly given
    if 't0' in vparam_names and 't0' not in bounds:
        bounds['t0'] = t0_bounds(data, model)

    # Note that in the parameter guessing below, we assume that the source
    # amplitude is the 3rd parameter of the Model (1st parameter of the Source)

    # Turn off guessing if we're not fitting the parameter.
    if model.param_names[2] not in vparam_names:
        guess_amplitude = False
    if 't0' not in vparam_names:
        guess_t0 = False

    # Make guesses for t0 and amplitude.
    # (For now, we assume it is the 3rd parameter of the model.)
    if (guess_amplitude or guess_t0):
        t0, amplitude = guess_t0_and_amplitude(data, model, minsnr)
        if guess_amplitude:
            model.parameters[2] = amplitude
        if guess_t0:
            model.set(t0=t0)

    # count degrees of freedom
    ndof = len(data) - len(vparam_names)

    if method == 'minuit':
        try:
            import iminuit
        except ImportError:
            raise ValueError("Minimization method 'minuit' requires the "
                             "iminuit package")

        # The iminuit minimizer expects the function signature to have an
        # argument for each parameter.
        def fitchisq(*parameters):
            model.parameters = parameters
            return _chisq(data, model, modelcov=modelcov)

        # Set up keyword arguments to pass to Minuit initializer.
        kwargs = {}
        for name in model.param_names:
            kwargs[name] = model.get(name)  # Starting point.

            # Fix parameters not being varied in the fit.
            if name not in vparam_names:
                kwargs['fix_' + name] = True
                kwargs['error_' + name] = 0.
                continue

            # Bounds
            if name in bounds:
                if None in bounds[name]:
                    raise ValueError('one-sided bounds not allowed for '
                                     'minuit minimizer')
                kwargs['limit_' + name] = bounds[name]

            # Initial step size
            if name in bounds:
                step = 0.02 * (bounds[name][1] - bounds[name][0])
            elif model.get(name) != 0.:
                step = 0.1 * model.get(name)
            else:
                step = 1.
            kwargs['error_' + name] = step

        if verbose:
            print("Initial parameters:")
            for name in vparam_names:
                print(name, kwargs[name], 'step=', kwargs['error_' + name],
                      end=" ")
                if 'limit_' + name in kwargs:
                    print('bounds=', kwargs['limit_' + name], end=" ")
                print()

        m = iminuit.Minuit(fitchisq, errordef=1.,
                           forced_parameters=model.param_names,
                           print_level=(1 if verbose else 0),
                           throw_nan=True, **kwargs)
        d, l = m.migrad(ncall=maxcall)

        # Build a message.
        message = []
        if d.has_reached_call_limit:
            message.append('Reached call limit.')
        if d.hesse_failed:
            message.append('Hesse Failed.')
        if not d.has_covariance:
            message.append('No covariance.')
        elif not d.has_accurate_covar:  # iminuit docs wrong
            message.append('Covariance may not be accurate.')
        if not d.has_posdef_covar:  # iminuit docs wrong
            message.append('Covariance not positive definite.')
        if d.has_made_posdef_covar:
            message.append('Covariance forced positive definite.')
        if not d.has_valid_parameters:
            message.append('Parameter(s) value and/or error invalid.')
        if len(message) == 0:
            message.append('Minimization exited successfully.')
        # iminuit: m.np_matrix() doesn't work

        # numpy array of best-fit values (including fixed parameters).
        parameters = np.array([m.values[name] for name in model.param_names])
        model.parameters = parameters  # set model parameters to best fit.

        # Covariance matrix (only varied parameters) as numpy array.
        if m.covariance is None:
            covariance = None
        else:
            covariance = np.array([
                [m.covariance[(n1, n2)] for n1 in vparam_names]
                for n2 in vparam_names])

        # OrderedDict of errors
        if m.errors is None:
            errors = None
        else:
            errors = odict([(name, m.errors[name]) for name in vparam_names])

        # Compile results
        res = Result(success=d.is_valid,
                     message=' '.join(message),
                     ncall=d.nfcn,
                     chisq=d.fval,
                     ndof=ndof,
                     param_names=model.param_names,
                     parameters=parameters,
                     vparam_names=vparam_names,
                     covariance=covariance,
                     errors=errors)

        # TODO remove cov_names in a future release.
        depmsg = ("The `cov_names` attribute is deprecated in sncosmo v1.0 "
                  "and will be removed in v1.1. Use `vparam_names` instead.")
        res.__dict__['deprecated']['cov_names'] = (vparam_names, depmsg)

    else:
        raise ValueError("unknown method {0:r}".format(method))

    # TODO remove this in a future release.
    if "flatten" in kwargs:
        warnings.warn("The `flatten` keyword is deprecated in sncosmo v1.0 "
                      "and will be removed in v1.1. Use the flatten_result() "
                      "function instead.")
        if kwargs["flatten"]:
            res = flatten_result(res)
    return res, model
Пример #42
0
def _read_salt2_old(dirname, **kwargs):
    """Read old-style SALT2 files from a directory.

    A file named 'lightfile' must exist in the directory.
    """

    filenames = kwargs.get("filenames", None)

    # Get list of files in directory.
    if not (os.path.exists(dirname) and os.path.isdir(dirname)):
        raise IOError("Not a directory: '{0}'".format(dirname))
    dirfilenames = os.listdir(dirname)

    # Read metadata from lightfile.
    if "lightfile" not in dirfilenames:
        raise IOError("no lightfile in directory: '{0}'".format(dirname))
    with open(os.path.join(dirname, "lightfile"), "r") as lightfile:
        meta = odict()
        for line in lightfile.readlines():
            line = line.strip()
            if len(line) == 0:
                continue
            try:
                key, val = line.split()
            except ValueError:
                raise ValueError(
                    "expected space-separated key value pairs in "
                    "lightfile: {0}".format(os.path.join(dirname, "lightfile"))
                )
            meta[key] = _cast_str(val)

    # Get list of filenames to read.
    if filenames is None:
        filenames = dirfilenames
    if "lightfile" in filenames:
        filenames.remove("lightfile")  # We already read the lightfile.
    fullfilenames = [os.path.join(dirname, f) for f in filenames]

    # Read data from files.
    data = None
    for fname in fullfilenames:
        with open(fname, "r") as f:
            filemeta, filedata = _read_salt2(f)

        # Check that all necessary file metadata was defined.
        if not ("INSTRUMENT" in filemeta and "BAND" in filemeta and "MAGSYS" in filemeta):
            raise ValueError(
                "not all necessary global keys (INSTRUMENT, " "BAND, MAGSYS) are defined in file {0}".format(fname)
            )

        # Add the instrument/band to the file data, in anticipation of
        # aggregating it with other files.

        # PY3: next(iter(filedata.vlues()))
        firstcol = six.next(six.itervalues(filedata))
        data_length = len(firstcol)
        filter_name = "{0}::{1}".format(filemeta.pop("INSTRUMENT"), filemeta.pop("BAND"))
        filedata["Filter"] = data_length * [filter_name]
        filedata["MagSys"] = data_length * [filemeta.pop("MAGSYS")]

        # If this if the first file, initialize data lists, otherwise if keys
        # match, append this file's data to the main data.
        if data is None:
            data = filedata
        elif set(filedata.keys()) == set(data.keys()):
            for key in data:
                data[key].extend(filedata[key])
        else:
            raise ValueError("column names do not match between files")

        # Append any extra metadata in this file to the master metadata.
        if len(filemeta) > 0:
            meta[filter_name] = filemeta

    return meta, data
Пример #43
0
def mcmc_lc(data, model, vparam_names, bounds=None, priors=None,
            guess_amplitude=True, guess_t0=True, guess_z=True,
            minsnr=5., modelcov=False, nwalkers=10, nburn=200,
            nsamples=1000, thin=1, a=2.0):
    """Run an MCMC chain to get model parameter samples.

    This is a convenience function around `emcee.EnsembleSampler`.
    It defines the likelihood function and makes a heuristic guess at a
    good set of starting points for the walkers. It then runs the sampler,
    starting with a burn-in run.

    If you're not getting good results, you might want to try
    increasing the burn-in, increasing the walkers, or specifying a
    better starting position.  To get a better starting position, you
    could first run `~sncosmo.fit_lc`, then run this function with all
    ``guess_[name]`` keyword arguments set to False, so that the
    current model parameters are used as the starting point.

    Parameters
    ----------
    data : `~astropy.table.Table` or `~numpy.ndarray` or `dict`
        Table of photometric data. Must include certain columns.
        See the "Photometric Data" section of the documentation for
        required columns.
    model : `~sncosmo.Model`
        The model to fit.
    vparam_names : iterable
        Model parameters to vary.
    bounds : `dict`, optional
        Bounded range for each parameter. Keys should be parameter
        names, values are tuples. If a bound is not given for some
        parameter, the parameter is unbounded. The exception is
        ``t0``: by default, the minimum bound is such that the latest
        phase of the model lines up with the earliest data point and
        the maximum bound is such that the earliest phase of the model
        lines up with the latest data point.
    priors : `dict`, optional
        Prior probability functions. Keys are parameter names, values are
        functions that return probability given the parameter value.
        The default prior is a flat distribution.
    guess_amplitude : bool, optional
        Whether or not to guess the amplitude from the data. If false, the
        current model amplitude is taken as the initial value. Only has an
        effect when fitting amplitude. Default is True.
    guess_t0 : bool, optional
        Whether or not to guess t0. Only has an effect when fitting t0.
        Default is True.
    guess_z : bool, optional
        Whether or not to guess z (redshift). Only has an effect when fitting
        redshift. Default is True.
    minsnr : float, optional
        When guessing amplitude and t0, only use data with signal-to-noise
        ratio (flux / fluxerr) greater than this value. Default is 5.
    modelcov : bool, optional
        Include model covariance when calculating chisq. Default is False.
    nwalkers : int, optional
        Number of walkers in the EnsembleSampler
    nburn : int, optional
        Number of samples in burn-in phase.
    nsamples : int, optional
        Number of samples in production run.
    thin : int, optional
        Factor by which to thin samples in production run. Output samples
        array will have (nsamples/thin) samples.
    a : float, optional
        Proposal scale parameter passed to the EnsembleSampler.

    Returns
    -------
    res : Result
        Has the following attributes:

        * ``param_names``: All parameter names of model, including fixed.
        * ``parameters``: Model parameters, with varied parameters set to
          mean value in samples.
        * ``vparam_names``: Names of parameters varied. Order of parameters
          matches order of samples.
        * ``samples``: 2-d array with shape ``(N, len(vparam_names))``.
          Order of parameters in each row  matches order in
          ``res.vparam_names``.
        * ``covariance``: 2-d array giving covariance, measured from samples.
          Order corresponds to ``res.vparam_names``.
        * ``errors``: dictionary giving square root of diagonal of covariance
          matrix for varied parameters. Useful for ``plot_lc``.
        * ``mean_acceptance_fraction``: mean acceptance fraction for all
          walkers in the sampler.

    est_model : `~sncosmo.Model`
        Copy of input model with varied parameters set to mean value in
        samples.

    """

    try:
        import emcee
    except:
        raise ImportError("mcmc_lc() requires the emcee package.")

    # Standardize and normalize data.
    data = standardize_data(data)
    data = normalize_data(data)

    # Make a copy of the model so we can modify it with impunity.
    model = copy.copy(model)

    if bounds is None:
        bounds = {}
    if priors is None:
        priors = {}

    # Check that vparam_names isn't empty, check for unknown parameters.
    if len(vparam_names) == 0:
        raise ValueError("no parameters supplied")
    for names in (vparam_names, bounds, priors):
        for name in names:
            if name not in model.param_names:
                raise ValueError("Parameter not in model: " + repr(name))

    # Order vparam_names the same way it is ordered in the model:
    vparam_names = [s for s in model.param_names if s in vparam_names]
    ndim = len(vparam_names)

    # Check that 'z' is bounded (if it is going to be fit).
    if 'z' in vparam_names:
        if 'z' not in bounds or None in bounds['z']:
            raise ValueError('z must be bounded if allowed to vary.')
        if guess_z:
            model.set(z=sum(bounds['z']) / 2.)
        if model.get('z') < bounds['z'][0] or model.get('z') > bounds['z'][1]:
            raise ValueError('z out of range.')

    # Cut bands that are not allowed by the wavelength range of the model.
    data = cut_bands(data, model, z_bounds=bounds.get('z', None))

    # Find t0 bounds to use, if not explicitly given
    if 't0' in vparam_names and 't0' not in bounds:
        bounds['t0'] = t0_bounds(data, model)

    # Note that in the parameter guessing below, we assume that the source
    # amplitude is the 3rd parameter of the Model (1st parameter of the Source)

    # Turn off guessing if we're not fitting the parameter.
    if model.param_names[2] not in vparam_names:
        guess_amplitude = False
    if 't0' not in vparam_names:
        guess_t0 = False

    # Make guesses for t0 and amplitude.
    # (we assume amplitude is the 3rd parameter of the model.)
    if guess_amplitude or guess_t0:
        t0, amplitude = guess_t0_and_amplitude(data, model, minsnr)
        if guess_amplitude:
            model.parameters[2] = amplitude
        if guess_t0:
            model.set(t0=t0)

    # Indicies used in probability function.
    # modelidx: Indicies of model parameters corresponding to vparam_names.
    # idxbounds: tuples of (varied parameter index, low bound, high bound).
    # idxpriors: tuples of (varied parameter index, function).
    modelidx = np.array([model.param_names.index(k) for k in vparam_names])
    idxbounds = [(vparam_names.index(k), bounds[k][0], bounds[k][1])
                 for k in bounds]
    idxpriors = [(vparam_names.index(k), priors[k]) for k in priors]

    # Posterior function.
    def lnprob(parameters):
        for i, low, high in idxbounds:
            if not low < parameters[i] < high:
                return -np.inf

        model.parameters[modelidx] = parameters
        logp = -0.5 * _chisq(data, model, modelcov=modelcov)

        for i, func in idxpriors:
            logp += math.log(func(parameters[i]))

        return logp

    # Heuristic determination of  walker initial positions:
    # distribute walkers in a symmetric gaussian ball, with heuristically
    # determined scale.
    ctr = model.parameters[modelidx]
    scale = np.ones(ndim)
    for i, name in enumerate(vparam_names):
        if name in bounds:
            scale[i] = 0.0001 * (bounds[name][1] - bounds[name][0])
        elif model.get(name) != 0.:
            scale[i] = 0.01 * model.get(name)
        else:
            scale[i] = 0.1
    pos = ctr + scale * np.random.normal(size=(nwalkers, ndim))

    # Run the sampler.
    sampler = emcee.EnsembleSampler(nwalkers, ndim, lnprob, a=a)
    pos, prob, state = sampler.run_mcmc(pos, nburn)  # burn-in
    sampler.reset()
    sampler.run_mcmc(pos, nsamples, thin=thin)  # production run
    samples = sampler.flatchain

    # Summary statistics.
    vparameters = np.mean(samples, axis=0)
    cov = np.cov(samples, rowvar=0)
    model.set(**dict(zip(vparam_names, vparameters)))
    errors = odict(zip(vparam_names, np.sqrt(np.diagonal(cov))))
    mean_acceptance_fraction = np.mean(sampler.acceptance_fraction)

    res = Result(param_names=copy.copy(model.param_names),
                 parameters=model.parameters.copy(),
                 vparam_names=vparam_names,
                 samples=samples,
                 covariance=cov,
                 errors=errors,
                 mean_acceptance_fraction=mean_acceptance_fraction)

    return res, model
Пример #44
0
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""Convenience functions for photometric data."""
from __future__ import division

import math
import numpy as np
from astropy.utils import OrderedDict as odict
from astropy.table import Table
from .spectral import get_magsystem, get_bandpass

_photdata_aliases = odict(
    [
        ("time", set(["time", "date", "jd", "mjd", "mjdobs"])),
        ("band", set(["band", "bandpass", "filter", "flt"])),
        ("flux", set(["flux", "f"])),
        ("fluxerr", set(["fluxerr", "fe", "fluxerror", "flux_error", "flux_err"])),
        ("zp", set(["zp", "zpt", "zeropoint", "zero_point"])),
        ("zpsys", set(["zpsys", "zpmagsys", "magsys"])),
    ]
)

# Descriptions for docstring only.
_photdata_descriptions = {
    "time": "Time of observation in days",
    "band": "Bandpass of observation",
    "flux": "Flux of observation",
    "fluxerr": "Gaussian uncertainty on flux",
    "zp": "Zeropoint corresponding to flux",
    "zpsys": "Magnitude system for zeropoint",
}
_photdata_types = {"time": "float", "band": "str", "flux": "float", "fluxerr": "float", "zp": "float", "zpsys": "str"}