示例#1
0
def write1d(outfile, result, res_desc, CImethod):
    """
    Write the result of a fitting and its evaluation to a CSV file.

    :param str          outfile:  Name of the file to write to
    :param ResultStruct result:   Result of the fitting evaluation
        (e.g. output of :py:func:`fit_evaluation`)
    :param str          res_desc: Description of the residuals
        (in more details than just the name of the residuals)
    :param str          CImethod: Description of the confidence interval estimation method
    """
    with open(outfile, CSV_WRITE_FLAGS) as f:
        w = csv_writer(f)
        w.writerow(["Function", result.fct.fct.description])
        w.writerow(["Residuals", result.res_name, res_desc])
        w.writerow(["Parameter", "Value"])
        for pn, pv in izip(result.param_names, result.popt):
            w.writerow([pn, "%.20g" % pv])
        #TODO w.writerow(["Regression Evaluation"])
        w.writerow([])
        w.writerow(["Data"])
        w.writerow([
            result.xname, result.yname, result.fct_desc,
            "Residuals: %s" % result.res_name
        ])
        w.writerows(c_[result.xdata, result.ydata, result.yopts, result.res])
        w.writerow([])
        w.writerow(['Model validation'])
        w.writerow(
            [result.yname, 'Normalized residuals', 'Theoretical quantiles'])
        w.writerows(c_[result.sorted_yopts, result.scaled_res, result.normq])
        if result.eval_points is not result.xdata:
            w.writerow([])
            w.writerow(["Interpolated data"])
            w.writerow([result.xname, result.yname])
            w.writerows(c_[result.eval_points, result.interpolation])
        if result.CI:
            w.writerow([])
            w.writerow(["Confidence interval"])
            w.writerow(["Method", CImethod])
            head = ["Parameters"] + \
                list(chain(*[["%g%% - low" % v, "%g%% - high" % v] for v in result.CI]))
            w.writerow(head)
            #print(result.CIs[1])
            for cis in izip(result.param_names, *chain(*result.CIs[1])):
                cistr = [cis[0]] + ["%.20g" % v for v in cis[1:]]
                w.writerow(cistr)
            w.writerow([result.yname])
            head[0] = result.xname
            w.writerow(head)
            w.writerows(c_[tuple(chain([result.eval_points], *result.CIs[0]))])
示例#2
0
def plot1d(result, loc=0, fig=None, res_fig=None):
    """
    Use matplotlib to display the result of a fit, and return the list of plots used

    :rtype: :py:class:`Plot1dResult`
    :returns: hangles to the various figures and plots
    """
    if fig is None:
        fig = figure()
    else:
        try:
            figure(fig)
        except TypeError:
            figure(fig.number)

    p_est = plot(result.eval_points, result.interpolation,
                 label='estimated')[0]
    p_data = plot(result.xdata, result.ydata, '+', label='data')[0]
    p_CIs = []
    if result.CI:
        for p, (low, high) in izip(result.CI, result.CIs[0]):
            l = plot(result.eval_points, low, '--', label='%g%% CI' % (p, ))[0]
            h = plot(result.eval_points, high, l.get_color() + '--')[0]
            p_CIs += [l, h]
    if result.param_names:
        param_strs = ", ".join(
            "%s=%g" % (n, v) for n, v in izip(result.param_names, result.popt))
    else:
        param_strs = ", ".join("%g" % v for v in result.popt)
    param_strs = "$%s$" % (param_strs, )

    title("Estimated function %s with params %s" %
          (result.fct_desc, param_strs))

    xlabel(result.xname)
    ylabel(result.yname)
    legend(loc=loc)

    plots = {"figure": fig, "estimate": p_est, "data": p_data, "CIs": p_CIs}

    prt = plot_residual_tests(
        result.xdata, result.yopts, result.res,
        "{0} with params {1}".format(result.fct_desc, param_strs),
        result.xname, result.yname, result.res_name, result.sorted_yopts,
        result.scaled_res, result.normq, res_fig)

    plots.update(prt._asdict())

    return Plot1dResult(**plots)
示例#3
0
def sort(columniter):
    with status('Determining sort order'):
        info = datasets.source.columns
        if sum(info[column].type not in nononehandling_types
               for column in options.sort_columns):
            # At least one sort column can have unsortable values
            first = True
            iters = []
            for column in options.sort_columns:
                it = columniter(column, status_reporting=first)
                first = False
                if info[column].type not in nononehandling_types:
                    it = filter_unsortable(column, it)
                iters.append(it)
            if len(iters) == 1:
                # Special case to not make tuples when there is only one column.
                lst = list(iters[0])
            else:
                lst = list(izip(*iters))
        else:
            columns = options.sort_columns
            if len(columns) == 1:
                # Special case to not make tuples when there is only one column.
                columns = columns[0]
            lst = list(columniter(columns))
        reverse = (options.sort_order == 'descending')
        with status('Creating sort list'):
            return sorted(range(len(lst)),
                          key=lst.__getitem__,
                          reverse=reverse)
示例#4
0
	def __init__(self, slices):
		slices = range(slices)
		self._slices = iter(slices)
		tuple_len = pickle_load("Analysis.tuple")
		if tuple_len is False:
			self._is_tupled = False
		else:
			self._is_tupled = True
			self._loaders = [self._loader(ix, iter(slices)) for ix in range(tuple_len)]
			self._tupled = izip(*self._loaders)
示例#5
0
def bootstrap_result(worker, start_repeats, end_repeats):
    #print("Starting worker {} from {} to {}".format(worker, start_repeats, end_repeats))
    try:
        for i in irange(start_repeats, end_repeats):
            #print("Worker {} runs iteration {} with fit: {}".format(worker, i, fit))
            new_fit = fit(shuffled_x[..., i % nx, :], shuffled_y[i % ny, :],
                          *fit_args, **fit_kwrds)
            new_fit.fit()
            #print("new_fit = {}".format(new_fit))
            result_array[i + 1] = new_fit(eval_points)
            for ea, attr in izip(extra_arrays, extra_attrs):
                ea[i + 1] = getattr(new_fit, attr)
    except Exception:
        traceback.print_exc(None, sys.stderr)
        raise
示例#6
0
def getCIs(CI, *arrays):
    #sorted_arrays = [ np.sort(a, axis=0) for a in arrays ]

    if not np.iterable(CI):
        CI = (CI,)

    def make_CI(a):
        return np.zeros((len(CI), 2) + a.shape[1:], dtype=float)
    CIs = tuple(make_CI(a) for a in arrays)
    for i, ci in enumerate(CI):
        ci = (100. - ci) / 2
        for cis, arr in izip(CIs, arrays):
            low = np.percentile(arr, ci, axis=0)
            high = np.percentile(arr, 100 - ci, axis=0)
            cis[i] = [low, high]

    return CIs
def main(n, timer):
    times = []
    for i in xrange(n):
        t0 = timer()
        u = [1] * DEFAULT_N

        for dummy in xrange(10):
            v = eval_AtA_times_u(u)
            u = eval_AtA_times_u(v)

        vBv = vv = 0

        for ue, ve in izip(u, v):
            vBv += ue * ve
            vv  += ve * ve
        tk = timer()
        times.append(tk - t0)
    return times
def main(n:int, timer):
    times = []
    for i in xrange(n):
        t0 = timer()
        u = [1] * DEFAULT_N

        for dummy in xrange(10):
            v = eval_AtA_times_u(u)
            u = eval_AtA_times_u(v)

        vBv = vv = 0

        for ue, ve in izip(u, v):
            vBv += ue * ve
            vv  += ve * ve
        tk = timer()
        times.append(tk - t0)
    return times
示例#9
0
    def _iterate_datasets(to_iter, columns, pre_callback, post_callback,
                          filter_func, translation_func, translators,
                          want_tuple, range, status_reporting):
        skip_ds = None

        def argfixup(func, is_post):
            if func:
                if len(getargspec(func).args) == 1:
                    seen_ds = [None]

                    def wrapper(d, sliceno=None):
                        if d != seen_ds[0]:
                            if is_post:
                                if seen_ds[0] and seen_ds[0] != skip_ds:
                                    func(seen_ds[0])
                            else:
                                func(d)
                            seen_ds[0] = d

                    return wrapper, True
            return func, False

        pre_callback, unsliced_pre_callback = argfixup(pre_callback, False)
        post_callback, unsliced_post_callback = argfixup(post_callback, True)
        if not to_iter:
            return
        if range:
            range_k, (
                range_bottom,
                range_top,
            ) = next(iteritems(range))
            range_check = range_check_function(range_bottom, range_top)
            if range_k in columns and range_k not in translators and not translation_func:
                has_range_column = True
                range_i = columns.index(range_k)
                if want_tuple:
                    range_f = lambda t: range_check(t[range_i])
                else:
                    range_f = range_check
            else:
                has_range_column = False
        if status_reporting:
            from status import status
        else:
            from status import dummy_status as status

        def fmt_dsname(d, sliceno, rehash):
            if rehash:
                return d + ':REHASH'
            else:
                return '%s:%d' % (d, sliceno)

        if len(to_iter) == 1:
            msg_head = 'Iterating ' + fmt_dsname(*to_iter[0])

            def update_status(update, ix, d, sliceno, rehash):
                pass
        else:
            msg_head = 'Iterating %s to %s' % (
                fmt_dsname(*to_iter[0]),
                fmt_dsname(*to_iter[-1]),
            )

            def update_status(update, ix, d, sliceno, rehash):
                update('%s, %d/%d (%s)' % (msg_head, ix, len(to_iter),
                                           fmt_dsname(d, sliceno, rehash)))

        with status(msg_head) as update:
            for ix, (d, sliceno, rehash) in enumerate(to_iter, 1):
                if unsliced_post_callback:
                    post_callback(d)
                update_status(update, ix, d, sliceno, rehash)
                if pre_callback:
                    if d == skip_ds:
                        continue
                    try:
                        pre_callback(d, sliceno)
                    except SkipSlice:
                        if unsliced_pre_callback:
                            skip_ds = d
                        continue
                    except SkipJob:
                        skip_ds = d
                        continue
                it = d._iterator(None if rehash else sliceno, columns)
                for ix, trans in translators.items():
                    it[ix] = imap(trans, it[ix])
                if want_tuple:
                    it = izip(*it)
                else:
                    it = it[0]
                if rehash:
                    it = d._hashfilter(sliceno, rehash, it)
                if translation_func:
                    it = imap(translation_func, it)
                if range:
                    c = d.columns[range_k]
                    if c.min is not None and (not range_check(c.min)
                                              or not range_check(c.max)):
                        if has_range_column:
                            it = ifilter(range_f, it)
                        else:
                            if rehash:
                                filter_it = d._hashfilter(
                                    sliceno, rehash,
                                    d._column_iterator(None, range_k))
                            else:
                                filter_it = d._column_iterator(
                                    sliceno, range_k)
                            it = compress(it, imap(range_check, filter_it))
                if filter_func:
                    it = ifilter(filter_func, it)
                yield it
                if post_callback and not unsliced_post_callback:
                    post_callback(d, sliceno)
            if unsliced_post_callback:
                post_callback(None)
示例#10
0
def setup():
    global TMP_PATH
    TMP_PATH = tempfile.mkdtemp()
    for _, fn in izip(xrange(NUM_FILES), generate_files()):
        with open(fn, "w") as f:
            f.write(fn)
示例#11
0
def csvexport(sliceno, filename, labelsonfirstline):
    assert len(options.separator) == 1
    assert options.quote_fields in (
        '',
        "'",
        '"',
    )
    d = datasets.source[0]
    if not options.labels:
        options.labels = sorted(d.columns)
    if options.chain_source:
        if jobids.previous:
            prev_source = job_params(jobids.previous).datasets.source
            assert len(datasets.source) == len(prev_source)
        else:
            prev_source = [None] * len(datasets.source)
        lst = []
        for src, stop in zip(datasets.source, prev_source):
            lst.extend(src.chain(stop_ds=stop))
        datasets.source = lst
    if filename.lower().endswith('.gz'):
        mkwrite = mkwrite_gz
    elif filename.lower().endswith('.csv'):
        mkwrite = mkwrite_uncompressed
    else:
        raise Exception(
            "Filename should end with .gz for compressed or .csv for uncompressed"
        )
    iters = []
    first = True
    for label in options.labels:
        it = d.iterate_list(sliceno,
                            label,
                            datasets.source,
                            status_reporting=first)
        first = False
        t = d.columns[label].type
        if t == 'unicode' and PY2:
            it = imap(enc, it)
        elif t == 'bytes' and PY3:
            it = imap(lambda s: s.decode('utf-8', errors='backslashreplace'),
                      it)
        elif t in ('float32', 'float64', 'number'):
            it = imap(repr, it)
        elif t == 'json':
            it = imap(dumps, it)
        elif t not in ('unicode', 'ascii', 'bytes'):
            it = imap(str, it)
        iters.append(it)
    it = izip(*iters)
    with mkwrite(filename) as write:
        q = options.quote_fields
        sep = options.separator
        if q:
            qq = q + q
            if labelsonfirstline:
                write(
                    enc(
                        sep.join(q + n.replace(q, qq) + q
                                 for n in options.labels)))
            for data in it:
                write(sep.join(q + n.replace(q, qq) + q for n in data))
        else:
            if labelsonfirstline:
                write(enc(sep.join(options.labels)))
            for data in it:
                write(sep.join(data))
示例#12
0
def setup():
    global TMP_PATH
    TMP_PATH = tempfile.mkdtemp()
    for _, fn in izip(xrange(NUM_FILES), generate_files()):
        with open(fn, "w") as f:
            f.write(fn)
示例#13
0
def bootstrap(fit, xdata, ydata, CI, shuffle_method=bootstrap_residuals,
              shuffle_args=(), shuffle_kwrds={}, repeats=3000,
              eval_points=None, full_results=False, nb_workers=None,
              extra_attrs=(), fit_args=(), fit_kwrds={}):
    """
    This function implement the bootstrap algorithm for a regression algorithm.
    It is capable of spreading the load across many threads using shared memory
    and the :py:mod:`multiprocess` module.

    :type  fit: callable
    :param fit:
        Method used to compute regression. The call is::

            f = fit(xdata, ydata, *fit_args, **fit_kwrds)

        Fit should return an object that would evaluate the regression on a
        set of points. The next call will be::

            f(eval_points)

    :type  xdata: ndarray of shape (N,) or (k,N) for function with k predictors
    :param xdata: The independent variable where the data is measured

    :type  ydata: ndarray
    :param ydata: The dependant data

    :type  CI: tuple of float
    :param CI: List of percentiles to extract

    :type  shuffle_method: callable
    :param shuffle_method:
        Create shuffled dataset. The call is::

          shuffle_method(xdata, ydata, y_est, repeat=repeats, *shuffle_args,
                         **shuffle_kwrds)

        where ``y_est`` is the estimated dependant variable on the xdata.

    :type  shuffle_args: tuple
    :param shuffle_args: List of arguments for the shuffle method

    :type  shuffle_kwrds: dict
    :param shuffle_kwrds: Dictionnary of arguments for the shuffle method

    :type  repeats: int
    :param repeats: Number of repeats for the bootstraping

    :type  eval_points: ndarray or None
    :param eval_points: List of points to evaluate. If None, eval_point
        is xdata.

    :type  full_results: bool
    :param full_results: if True, output also the whole set of evaluations

    :type  nb_workers: int or None
    :param nb_worders: Number of worker threads. If None, the number of
        detected CPUs will be used. And if 1 or less, a single thread
        will be used.

    :type  extra_attrs: tuple of str
    :param extra_attrs: List of attributes of the fitting method to extract on
        top of the y values for confidence intervals

    :type  fit_args: tuple
    :param fit_args: List of extra arguments for the fit callable

    :type  fit_kwrds: dict
    :param fit_kwrds: Dictionnary of extra named arguments for the fit callable

    :rtype: :py:class:`BootstrapResult`
    :return: Estimated y on the data, on the evaluation points, the requested
        confidence intervals and, if requested, the shuffled X, Y and the full
        estimated distributions.
    """
    xdata = np.asarray(xdata)
    ydata = np.asarray(ydata)
    y_fit = fit(xdata, ydata, *fit_args, **fit_kwrds)
    y_fit.fit()

    shuffled_x, shuffled_y = shuffle_method(y_fit, xdata, ydata,
                                            repeats=repeats,
                                            *shuffle_args, **shuffle_kwrds)
    nx = shuffled_x.shape[-2]
    ny = shuffled_y.shape[0]
    extra_values = []
    for attr in extra_attrs:
        extra_values.append(getattr(y_fit, attr))

    if eval_points is None:
        eval_points = xdata
    if nb_workers is None:
        nb_workers = mp.cpu_count()

    multiprocess = nb_workers > 1

# Copy everything in shared mem
    if multiprocess:
        ra = sharedmem.zeros((repeats + 1, len(eval_points)), dtype=float)
        result_array = ra.np
        sx = sharedmem.array(shuffled_x)
        sy = sharedmem.array(shuffled_y)
        ep = sharedmem.array(eval_points)

        def make_ea(ev):
            return sharedmem.zeros((repeats + 1, len(ev)), dtype=float)
        eas = [make_ea(ev) for ev in extra_values]
        extra_arrays = [ea.np for ea in eas]
        pool = mp.Pool(mp.cpu_count(), bootstrap_workers.initialize_shared,
                       (nx, ny, ra, eas, sx, sy, ep, extra_attrs,
                        fit, fit_args, fit_kwrds))
    else:
        result_array = np.empty((repeats + 1, len(eval_points)), dtype=float)

        def make_ea(ev):
            return np.empty((repeats + 1, len(ev)), dtype=float)
        extra_arrays = [make_ea(ev) for ev in extra_values]
        bootstrap_workers.initialize(nx, ny, result_array, extra_arrays,
                                     shuffled_x, shuffled_y, eval_points,
                                     extra_attrs, fit, fit_args, fit_kwrds)

    result_array[0] = y_fit(eval_points)

    for ea, ev in izip(extra_arrays, extra_values):
        ea[0] = ev

    base_repeat = repeats // nb_workers
    if base_repeat * nb_workers < repeats:
        base_repeat += 1

    for i in irange(nb_workers):
        end_repeats = (i + 1) * base_repeat
        if end_repeats > repeats:
            end_repeats = repeats
        if multiprocess:
            pool.apply_async(bootstrap_workers.bootstrap_result,
                             (i, i * base_repeat, end_repeats))
        else:
            bootstrap_workers.bootstrap_result(i, i * base_repeat, end_repeats)

    if multiprocess:
        pool.close()
        pool.join()
    CIs = getCIs(CI, result_array, *extra_arrays)

    # copy the array to not return a view on a larger array
    y_eval = np.array(result_array[0])

    if not full_results:
        shuffled_y = shuffled_x = result_array = None
        extra_arrays = ()
    elif multiprocess:
        result_array = result_array.copy()  # copy in local memory
        extra_arrays = [ea.copy for ea in extra_arrays]

    return BootstrapResult(y_fit, y_fit(xdata), eval_points, y_eval, tuple(CI), CIs,
                           shuffled_x, shuffled_y, result_array)
示例#14
0
    def _iterate_datasets(to_iter, columns, pre_callback, post_callback,
                          filter_func, translation_func, translators,
                          want_tuple, range):
        skip_jobid = None

        def argfixup(func, is_post):
            if func:
                if len(getargspec(func).args) == 1:
                    seen_jobid = [None]

                    def wrapper(jobid, sliceno=None):
                        if jobid != seen_jobid[0]:
                            if is_post:
                                if seen_jobid[
                                        0] and seen_jobid[0] != skip_jobid:
                                    func(seen_jobid[0])
                            else:
                                func(jobid)
                            seen_jobid[0] = jobid

                    return wrapper, True
            return func, False

        pre_callback, unsliced_pre_callback = argfixup(pre_callback, False)
        post_callback, unsliced_post_callback = argfixup(post_callback, True)
        if not to_iter:
            return
        if range:
            range_k, (
                range_bottom,
                range_top,
            ) = next(iteritems(range))
            range_check = range_check_function(range_bottom, range_top)
            if range_k in columns and range_k not in translators and not translation_func:
                has_range_column = True
                range_i = columns.index(range_k)
                if want_tuple:
                    range_f = lambda t: range_check(t[range_i])
                else:
                    range_f = range_check
            else:
                has_range_column = False
        starting_at = '%s:%d' % (
            to_iter[0][0],
            to_iter[0][2],
        )
        if len(to_iter) == 1:
            msg = 'Iterating ' + starting_at
        else:
            msg = 'Iterating %d dataset slices starting at %s' % (
                len(to_iter),
                starting_at,
            )
        with status(msg):
            for ix, (jobid, d, sliceno, rehash) in enumerate(to_iter):
                if unsliced_post_callback:
                    post_callback(jobid)
                if pre_callback:
                    if jobid == skip_jobid:
                        continue
                    try:
                        pre_callback(jobid, sliceno)
                    except SkipSlice:
                        if unsliced_pre_callback:
                            skip_jobid = jobid
                        continue
                    except SkipJob:
                        skip_jobid = jobid
                        continue
                it = d._iterator(None if rehash else sliceno, columns)
                for ix, trans in translators.items():
                    it[ix] = imap(trans, it[ix])
                if want_tuple:
                    it = izip(*it)
                else:
                    it = it[0]
                if rehash:
                    it = d._hashfilter(sliceno, rehash, it)
                if translation_func:
                    it = imap(translation_func, it)
                if range:
                    c = d.columns[range_k]
                    if c.min is not None and (not range_check(c.min)
                                              or not range_check(c.max)):
                        if has_range_column:
                            it = ifilter(range_f, it)
                        else:
                            if rehash:
                                filter_it = d._hashfilter(
                                    sliceno, rehash,
                                    d._column_iterator(None, range_k))
                            else:
                                filter_it = d._column_iterator(
                                    sliceno, range_k)
                            it = compress(it, imap(range_check, filter_it))
                if filter_func:
                    it = ifilter(filter_func, it)
                with status('(%d/%d) %s:%s' % (
                        ix,
                        len(to_iter),
                        jobid,
                        'REHASH' if rehash else sliceno,
                )):
                    yield it
                if post_callback and not unsliced_post_callback:
                    post_callback(jobid, sliceno)
            if unsliced_post_callback:
                post_callback(None)