Exemplo n.º 1
0
Arquivo: perf.py Projeto: xyshi25/MibS
    def __init__(self, MetricTable):

        # Create empty ratio table
        nprobs = MetricTable.nprobs
        nsolvs = MetricTable.nsolvs
        self.ratios = ma.masked_array(1.0 * ma.zeros((nprobs + 1, nsolvs)))

        # Compute best relative performance ratios across
        # solvers for each problem
        for prob in range(nprobs):
            metrics = MetricTable.prob_mets(prob)
            best_met = ma.minimum(metrics)
            if (ma.count(metrics) == nsolvs
                    and ma.maximum(metrics) <= opts.minlimit):
                self.ratios[prob + 1, :] = 1.0
            else:
                self.ratios[prob + 1, :] = metrics * (1.0 / best_met)

        # Sort each solvers performance ratios
        for solv in range(nsolvs):
            self.ratios[:, solv] = ma.sort(self.ratios[:, solv])

        # Compute largest ratio and use to replace failures entries
        self.maxrat = ma.maximum(self.ratios)
        self.ratios = ma.filled(self.ratios, 1.01 * self.maxrat)
Exemplo n.º 2
0
    def __init__(self, MetricTable):

        # Create empty ratio table
        nprobs = MetricTable.nprobs
        nsolvs = MetricTable.nsolvs
        self.ratios = ma.masked_array(1.0 * ma.zeros((nprobs+1, nsolvs)))

        # Compute best relative performance ratios across
        # solvers for each problem
        for prob in range(nprobs):
            metrics  = MetricTable.prob_mets(prob)
            best_met = ma.minimum(metrics)
	    if (ma.count(metrics)==nsolvs and
                ma.maximum(metrics)<=opts.minlimit):
                self.ratios[prob+1,:] = 1.0;
	    else:
                self.ratios[prob+1,:] = metrics * (1.0 / best_met)

        # Sort each solvers performance ratios
        for solv in range(nsolvs):
            self.ratios[:,solv] = ma.sort(self.ratios[:,solv])

        # Compute largest ratio and use to replace failures entries
        self.maxrat = ma.maximum(self.ratios)
        self.ratios = ma.filled(self.ratios, 1.01 * self.maxrat)
Exemplo n.º 3
0
    def __init__(self, MetricTable, opts):

        epsilon = 0.0
        if opts.cpu:
            epsilon = 0.01

        # Create empty ratio table
        nprobs = MetricTable.nprobs
        nsolvs = MetricTable.nsolvs
        self.ratios = ma.zeros((nprobs, nsolvs), dtype=numpy.float)

        # Compute best relative performance ratios across
        # solvers for each problem
        for prob in range(nprobs):
            metrics  = MetricTable.prob_mets(prob) + epsilon
            best_met = ma.minimum(metrics)
            self.ratios[prob,:] = metrics * (1.0 / best_met)

        # Sort each solvers performance ratios
        for solv in range(nsolvs):
            self.ratios[:,solv] = ma.sort(self.ratios[:,solv])

        # Compute largest ratio and use to replace failure entries
        self.maxrat = ma.maximum(self.ratios)
        self.ratios = ma.filled(self.ratios, 10 * self.maxrat)
Exemplo n.º 4
0
    def predict(self, mu, sigma, Ys, model=None):
        #calculating var
        s = sigma + self.sigma
        alpha = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8000, 0.9000])
        q = np.outer(np.sqrt(2 * s), erfinv(2 * alpha - 1)) + mu

        z = self.warp(model.Y)[0]
        I = argsort(z, axis=0)
        sortz = sort(z, axis=0)
        sortt = model.Y[I]

        quant = self.warpinv(q, self._get_initial_points(q, sortz, sortt), 100)
        var = np.square((quant[:, 8] - (quant[:, 0])) / 4)

        #calculating mu
        H = np.array([7.6e-07, 0.0013436, 0.0338744, 0.2401386, 0.6108626, 0.6108626, 0.2401386, 0.0338744, 0.0013436, 7.6e-07])
        quard = np.array([-3.4361591, -2.5327317, -1.7566836, -1.0366108, -0.3429013, 0.3429013, 1.0366108, 1.7566836, 2.5327317, 3.4361591])
        mu_quad = np.outer(np.sqrt(2 * s), quard) + mu
        mean = self.warpinv(mu_quad, self._get_initial_points(mu_quad, sortz, sortt), 100)
        mean = mdot(mean, H[:, np.newaxis]) / np.sqrt(math.pi)
        lpd = None
        if not (Ys is None):
            ts, w = self.warp(Ys)
            lpd = -0.5*np.log(2*math.pi*s) - 0.5 * np.square(ts-mu)/s + np.log(w)
        return mean, var[:, np.newaxis], lpd[:, 0][:, np.newaxis]
Exemplo n.º 5
0
 def test_sort(self):
     series =self.series
     series.thresholds = (-0.5, +0.5)
     series.minimum_size = 5
     indices = series.indices
     idx = series.argsort()
     _series = ma.sort(series)
     assert_equal(_series, series[idx])
     assert_equal(_series.indices, indices[idx])
Exemplo n.º 6
0
def _lhsmu(N, samples=None, corr=None, random_state=None, M=5):

    if random_state is None:
        random_state = np.random.RandomState()
    elif not isinstance(random_state, np.random.RandomState):
        random_state = np.random.RandomState(random_state)

    if samples is None:
        samples = N

    I = M * samples

    rdpoints = random_state.uniform(size=(I, N))

    dist = spatial.distance.cdist(rdpoints, rdpoints, metric='euclidean')
    D_ij = ma.masked_array(dist, mask=np.identity(I))

    index_rm = np.zeros(I - samples, dtype=int)
    i = 0
    while i < I - samples:
        order = ma.sort(D_ij, axis=1)

        avg_dist = ma.mean(order[:, 0:2], axis=1)
        min_l = ma.argmin(avg_dist)

        D_ij[min_l, :] = ma.masked
        D_ij[:, min_l] = ma.masked

        index_rm[i] = min_l
        i += 1

    rdpoints = np.delete(rdpoints, index_rm, axis=0)

    if (corr is not None):
        #check if covariance matrix is valid
        assert type(corr) == np.ndarray
        assert corr.ndim == 2
        assert corr.shape[0] == corr.shape[1]
        assert corr.shape[0] == N

        norm_u = stats.norm().ppf(rdpoints)
        L = linalg.cholesky(corr, lower=True)

        norm_u = np.matmul(norm_u, L)

        H = stats.norm().cdf(norm_u)
    else:
        H = np.zeros_like(rdpoints, dtype=float)
        rank = np.argsort(rdpoints, axis=0)

        for l in range(samples):
            low = float(l) / samples
            high = float(l + 1) / samples

            l_pos = rank == l
            H[l_pos] = random_state.uniform(low, high, size=N)
    return H
 def test_sort(self):
     series = self.series
     series.thresholds = (-0.5, +0.5)
     series.minimum_size = 5
     indices = series.indices
     idx = series.argsort()
     _series = ma.sort(series)
     assert_equal(_series, series[idx])
     assert_equal(_series.indices, indices[idx])
Exemplo n.º 8
0
 def test_testCI(self):
     # Test of conversions and indexing
     x1 = np.array([1, 2, 4, 3])
     x2 = array(x1, mask=[1, 0, 0, 0])
     x3 = array(x1, mask=[0, 1, 0, 1])
     x4 = array(x1)
     # test conversion to strings
     str(x2)  # raises?
     repr(x2)  # raises?
     assert_(eq(np.sort(x1), sort(x2, fill_value=0)))
     # tests of indexing
     assert_(type(x2[1]) is type(x1[1]))
     assert_(x1[1] == x2[1])
     assert_(x2[0] is masked)
     assert_(eq(x1[2], x2[2]))
     assert_(eq(x1[2:5], x2[2:5]))
     assert_(eq(x1[:], x2[:]))
     assert_(eq(x1[1:], x3[1:]))
     x1[2] = 9
     x2[2] = 9
     assert_(eq(x1, x2))
     x1[1:3] = 99
     x2[1:3] = 99
     assert_(eq(x1, x2))
     x2[1] = masked
     assert_(eq(x1, x2))
     x2[1:3] = masked
     assert_(eq(x1, x2))
     x2[:] = x1
     x2[1] = masked
     assert_(allequal(getmask(x2), array([0, 1, 0, 0])))
     x3[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0])
     assert_(allequal(getmask(x3), array([0, 1, 1, 0])))
     x4[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0])
     assert_(allequal(getmask(x4), array([0, 1, 1, 0])))
     assert_(allequal(x4, array([1, 2, 3, 4])))
     x1 = np.arange(5) * 1.0
     x2 = masked_values(x1, 3.0)
     assert_(eq(x1, x2))
     assert_(allequal(array([0, 0, 0, 1, 0], MaskType), x2.mask))
     assert_(eq(3.0, x2.fill_value))
     x1 = array([1, 'hello', 2, 3], object)
     x2 = np.array([1, 'hello', 2, 3], object)
     s1 = x1[1]
     s2 = x2[1]
     assert_equal(type(s2), str)
     assert_equal(type(s1), str)
     assert_equal(s1, s2)
     assert_(x1[1:1].shape == (0,))
Exemplo n.º 9
0
 def test_testCI(self):
     # Test of conversions and indexing
     x1 = np.array([1, 2, 4, 3])
     x2 = array(x1, mask=[1, 0, 0, 0])
     x3 = array(x1, mask=[0, 1, 0, 1])
     x4 = array(x1)
     # test conversion to strings
     str(x2)  # raises?
     repr(x2)  # raises?
     assert_(eq(np.sort(x1), sort(x2, fill_value=0)))
     # tests of indexing
     assert_(type(x2[1]) is type(x1[1]))
     assert_(x1[1] == x2[1])
     assert_(x2[0] is masked)
     assert_(eq(x1[2], x2[2]))
     assert_(eq(x1[2:5], x2[2:5]))
     assert_(eq(x1[:], x2[:]))
     assert_(eq(x1[1:], x3[1:]))
     x1[2] = 9
     x2[2] = 9
     assert_(eq(x1, x2))
     x1[1:3] = 99
     x2[1:3] = 99
     assert_(eq(x1, x2))
     x2[1] = masked
     assert_(eq(x1, x2))
     x2[1:3] = masked
     assert_(eq(x1, x2))
     x2[:] = x1
     x2[1] = masked
     assert_(allequal(getmask(x2), array([0, 1, 0, 0])))
     x3[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0])
     assert_(allequal(getmask(x3), array([0, 1, 1, 0])))
     x4[:] = masked_array([1, 2, 3, 4], [0, 1, 1, 0])
     assert_(allequal(getmask(x4), array([0, 1, 1, 0])))
     assert_(allequal(x4, array([1, 2, 3, 4])))
     x1 = np.arange(5) * 1.0
     x2 = masked_values(x1, 3.0)
     assert_(eq(x1, x2))
     assert_(allequal(array([0, 0, 0, 1, 0], MaskType), x2.mask))
     assert_(eq(3.0, x2.fill_value))
     x1 = array([1, 'hello', 2, 3], object)
     x2 = np.array([1, 'hello', 2, 3], object)
     s1 = x1[1]
     s2 = x2[1]
     assert_equal(type(s2), str)
     assert_equal(type(s1), str)
     assert_equal(s1, s2)
     assert_(x1[1:1].shape == (0,))
Exemplo n.º 10
0
def get_pairwise():
    ntopic = 100
    # f = open(r'E:\python_workplace\hai2012\corpus\corpus_NP\corpus_NP.twords', encoding='utf-8')
    # tword_array = loadtxt(r'E:\python_workplace\hai2012\corpus\corpus_NP\corpus_NP.twdist')
    f = open(
        r'E:\python_workplace\Opinion_Mining\Data\Nokia 6610\Nokia6610.twords',
        encoding='utf-8')
    tword_array = loadtxt(
        r'E:\python_workplace\Opinion_Mining\Data\Nokia 6610\Nokia6610.twdist')
    tword_array = -sort(-tword_array, axis=1)
    tword_array = tword_array[:, 0:100].transpose()
    wdict = {}
    for num, line in enumerate(f):
        if num == 0:
            pass  # 忽略标题
        else:
            words = re.split("\t", line.strip())
            dcount = 0
            for w in words:
                if w in wdict:
                    wdict[w].append((num - 1, dcount))
                elif len(w) > 1:
                    wdict[w] = [(num - 1, dcount)]
                dcount += 1
    f.close()
    print(wdict)
    keys = [k for k in wdict.keys()]
    keys.sort()
    print(keys)
    # w_t = numpy.zeros([len(keys), ntopic])
    w_t = numpy.ones([len(keys), ntopic]) * 0.000001
    for i, k in enumerate(keys):
        for d in wdict[k]:
            w_t[i, d[1]] = tword_array[d[0]][d[1]]
    print(w_t)
    print(w_t.size)
    pairwise = spatial.distance.squareform(
        spatial.distance.pdist(w_t, metric="cosine"))
    # pairwise = spatial.distance.squareform(spatial.distance.pdist(w_t, lambda i,j: KL_Measure(i, j)))

    pairwise_filename = r'../Data/pairwise.txt'
    savetxt(pairwise_filename, pairwise, fmt='%.8f')
    print(pairwise)
    print(pairwise.size)
    return keys, pairwise
Exemplo n.º 11
0
def idealfourths(data, axis=None):
    """Returns an estimate of the lower and upper quartiles of the data along
    the given axis, as computed with the ideal fourths.
    """
    def _idf(data):
        x = data.compressed()
        n = len(x)
        if n < 3:
            return [np.nan,np.nan]
        (j,h) = divmod(n/4. + 5/12.,1)
        qlo = (1-h)*x[j-1] + h*x[j]
        k = n - j
        qup = (1-h)*x[k] + h*x[k-1]
        return [qlo, qup]
    data = ma.sort(data, axis=axis).view(MaskedArray)
    if (axis is None):
        return _idf(data)
    else:
        return ma.apply_along_axis(_idf, axis, data)
Exemplo n.º 12
0
def idealfourths(data, axis=None):
    """Returns an estimate of the lower and upper quartiles of the data along
    the given axis, as computed with the ideal fourths.
    """
    def _idf(data):
        x = data.compressed()
        n = len(x)
        if n < 3:
            return [np.nan,np.nan]
        (j,h) = divmod(n/4. + 5/12.,1)
        qlo = (1-h)*x[j-1] + h*x[j]
        k = n - j
        qup = (1-h)*x[k] + h*x[k-1]
        return [qlo, qup]
    data = ma.sort(data, axis=axis).view(MaskedArray)
    if (axis is None):
        return _idf(data)
    else:
        return ma.apply_along_axis(_idf, axis, data)
Exemplo n.º 13
0
def get_pairwise():
    ntopic = 100
    # f = open(r'E:\python_workplace\hai2012\corpus\corpus_NP\corpus_NP.twords', encoding='utf-8')
    # tword_array = loadtxt(r'E:\python_workplace\hai2012\corpus\corpus_NP\corpus_NP.twdist')
    f = open(r'E:\python_workplace\Opinion_Mining\Data\Nokia 6610\Nokia6610.twords', encoding='utf-8')
    tword_array = loadtxt(r'E:\python_workplace\Opinion_Mining\Data\Nokia 6610\Nokia6610.twdist')
    tword_array = -sort(-tword_array,axis=1)
    tword_array = tword_array[:,0:100].transpose()
    wdict = {}
    for num, line in enumerate(f):
        if num == 0:
            pass  # 忽略标题
        else:
            words = re.split("\t",line.strip())
            dcount = 0
            for w in words:
                if w in wdict:
                    wdict[w].append((num-1,dcount))
                elif len(w)>1:
                    wdict[w] = [(num-1,dcount)]
                dcount += 1
    f.close()
    print (wdict)
    keys = [k for k in wdict.keys()]
    keys.sort()
    print (keys)
    # w_t = numpy.zeros([len(keys), ntopic])
    w_t = numpy.ones([len(keys), ntopic]) * 0.000001
    for i, k in enumerate(keys):
        for d in wdict[k]:
            w_t[i,d[1]] = tword_array[d[0]][d[1]]
    print(w_t)
    print(w_t.size)
    pairwise = spatial.distance.squareform(spatial.distance.pdist(w_t, metric = "cosine"))
    # pairwise = spatial.distance.squareform(spatial.distance.pdist(w_t, lambda i,j: KL_Measure(i, j)))

    pairwise_filename = r'../Data/pairwise.txt'
    savetxt(pairwise_filename, pairwise, fmt='%.8f')
    print (pairwise)
    print (pairwise.size)
    return keys, pairwise
Exemplo n.º 14
0
def equi_n_discretization(array, intervals=5, dim=1):
    count = ma.sum(ma.array(ma.ones(array.shape, dtype=int), mask=array.mask), dim)
    cut = ma.zeros(len(count), dtype=int)
    sarray = ma.sort(array, dim)
    r = count % intervals
    pointsshape = list(array.shape)
    pointsshape[dim] = 1
    points = []
    for i in range(intervals):
        cutend = cut + count // intervals + numpy.ones(len(r)) * (r > i)
        if dim == 1:
            p = sarray[list(range(len(cutend))), numpy.array(cutend, dtype=int) -1]
        else:
            p = sarray[numpy.array(cutend, dtype=int) -1, list(range(len(cutend)))]
        points.append(p.reshape(pointsshape))
        cut = cutend
    darray = ma.array(ma.zeros(array.shape) - 1, mask=array.mask)
    darray[ma.nonzero(array <= points[0])] = 0
    for i in range(0, intervals):
        darray[ma.nonzero((array > points[i]))] = i + 1 
    return darray
Exemplo n.º 15
0
def idealfourths(data, axis=None):
    """
    Returns an estimate of the lower and upper quartiles.

    Uses the ideal fourths algorithm.

    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.

    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.

    """

    def _idf(data):
        x = data.compressed()
        n = len(x)
        if n < 3:
            return [np.nan, np.nan]
        (j, h) = divmod(n / 4. + 5 / 12., 1)
        j = int(j)
        qlo = (1 - h) * x[j - 1] + h * x[j]
        k = n - j
        qup = (1 - h) * x[k] + h * x[k - 1]
        return [qlo, qup]

    data = ma.sort(data, axis=axis).view(MaskedArray)
    if (axis is None):
        return _idf(data)
    else:
        return ma.apply_along_axis(_idf, axis, data)
Exemplo n.º 16
0
    def predict(self, mu, sigma, Ys, model=None):
        # calculating var
        s = sigma + self.sigma
        alpha = np.array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8000, 0.9000])
        q = np.outer(np.sqrt(2 * s), erfinv(2 * alpha - 1)) + mu

        z = self.warp(model.Y)[0]
        I = argsort(z, axis=0)
        sortz = sort(z, axis=0)
        sortt = model.Y[I]

        quant = self.warpinv(q, self._get_initial_points(q, sortz, sortt), 100)
        var = np.square((quant[:, 8] - (quant[:, 0])) / 4)

        # calculating mu
        H = np.array(
            [7.6e-07, 0.0013436, 0.0338744, 0.2401386, 0.6108626, 0.6108626, 0.2401386, 0.0338744, 0.0013436, 7.6e-07]
        )
        quard = np.array(
            [
                -3.4361591,
                -2.5327317,
                -1.7566836,
                -1.0366108,
                -0.3429013,
                0.3429013,
                1.0366108,
                1.7566836,
                2.5327317,
                3.4361591,
            ]
        )
        mu_quad = np.outer(np.sqrt(2 * s), quard) + mu
        mean = self.warpinv(mu_quad, self._get_initial_points(mu_quad, sortz, sortt), 100)
        mean = mdot(mean, H[:, np.newaxis]) / np.sqrt(math.pi)
        lpd = None
        if not (Ys is None):
            ts, w = self.warp(Ys)
            lpd = -0.5 * np.log(2 * math.pi * s) - 0.5 * np.square(ts - mu) / s + np.log(w)
        return mean, var[:, np.newaxis], lpd[:, 0][:, np.newaxis]
Exemplo n.º 17
0
def idealfourths(data, axis=None):
    """
    Returns an estimate of the lower and upper quartiles.

    Uses the ideal fourths algorithm.

    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.

    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.

    """

    def _idf(data):
        x = data.compressed()
        n = len(x)
        if n < 3:
            return [np.nan, np.nan]
        (j, h) = divmod(n / 4.0 + 5 / 12.0, 1)
        j = int(j)
        qlo = (1 - h) * x[j - 1] + h * x[j]
        k = n - j
        qup = (1 - h) * x[k] + h * x[k - 1]
        return [qlo, qup]

    data = ma.sort(data, axis=axis).view(MaskedArray)
    if axis is None:
        return _idf(data)
    else:
        return ma.apply_along_axis(_idf, axis, data)
Exemplo n.º 18
0
def equi_n_discretization(array, intervals=5, dim=1):
    count = ma.sum(ma.array(ma.ones(array.shape, dtype=int), mask=array.mask),
                   dim)
    cut = ma.zeros(len(count), dtype=int)
    sarray = ma.sort(array, dim)
    r = count % intervals
    pointsshape = list(array.shape)
    pointsshape[dim] = 1
    points = []
    for i in range(intervals):
        cutend = cut + count // intervals + numpy.ones(len(r)) * (r > i)
        if dim == 1:
            p = sarray[list(range(len(cutend))),
                       numpy.array(cutend, dtype=int) - 1]
        else:
            p = sarray[numpy.array(cutend, dtype=int) - 1,
                       list(range(len(cutend)))]
        points.append(p.reshape(pointsshape))
        cut = cutend
    darray = ma.array(ma.zeros(array.shape) - 1, mask=array.mask)
    darray[ma.nonzero(array <= points[0])] = 0
    for i in range(0, intervals):
        darray[ma.nonzero((array > points[i]))] = i + 1
    return darray
Exemplo n.º 19
0
    def fringe(self, exposure, fringe):
        """Fringe subtraction

        @param exposure Exposure to process
        @param frome Fringe frame to apply
        """
        assert exposure, "No exposure provided"
        assert fringe, "No fringe provided"
        fringe = self._checkDimensions("fringe", exposure, fringe)

        # XXX This is a first cut at fringe subtraction.  It should be fairly simple to generalise to allow
        # multiple fringe frames (generated from, e.g., Principal Component Analysis) and solve for the linear
        # combination that best reproduces the fringes on the science frame.        
        # Optimisations:
        # * Push the whole thing into C++
        # * Persist the fringe measurements along with the fringe frame


        science = exposure.getMaskedImage()
        fringe = fringe.getMaskedImage()

        # XXX Fringe can have mask bits set, because afwMath.statisticsStack propagates them
        fringe.getMask().set(0)
        
        width, height = exposure.getWidth(), exposure.getHeight()

        policy = self.config['fringe']
        num = policy['num']
        size = policy['size']
        iterations = policy['iterations']
        clip = policy['clip']
        discard = policy['discard']

        xList = numpy.random.random_integers(width - size, size=num)
        yList = numpy.random.random_integers(height - size, size=num)

        bgStats = afwMath.makeStatistics(science, afwMath.MEDIAN | afwMath.STDEVCLIP)
        bgScience = bgStats.getValue(afwMath.MEDIAN)
        sdScience = bgStats.getValue(afwMath.STDEVCLIP)
        bgFringe = afwMath.makeStatistics(fringe, afwMath.MEDIAN).getValue()

        measScience = ma.zeros(num)
        measFringe = ma.zeros(num)
        for i in range(num):
            x, y = int(xList[i]), int(yList[i])
            bbox = afwGeom.Box2I(afwGeom.Point2I(x, y), afwGeom.Point2I(x + size - 1, y + size - 1))

            subScience = science.Factory(science, bbox, afwImage.LOCAL)
            subFringe = fringe.Factory(fringe, bbox, afwImage.LOCAL)

            measScience[i] = afwMath.makeStatistics(subScience, afwMath.MEDIAN).getValue() - bgScience
            measFringe[i] = afwMath.makeStatistics(subFringe, afwMath.MEDIAN).getValue() - bgFringe

        # Immediately discard measurements that aren't in the background 'noise' (which includes the fringe
        # modulation.  These have been corrupted by objects.
        limit = discard * sdScience
        masked = ma.masked_outside(measScience, -limit, limit)
        measScience.mask = masked.mask
        measFringe.mask = masked.mask

        self.log.log(self.log.DEBUG, "Fringe discard: %f %d" % (limit, measScience.count()))

        regression = lambda x, y, n: ((x * y).sum() - x.sum() * y.sum() / n) / ((x**2).sum() - x.sum()**2 / n)

        # Solve for the fringe amplitude, with rejection of bad points
        lastNum = num
        for i in range(iterations):
            slope = regression(measFringe, measScience, 2.0 * num)
            intercept = measScience.mean() - slope * measFringe.mean()
            
            fit = measFringe * slope + intercept
            resid = measScience - fit
            sort = ma.sort(resid.copy())
            rms = 0.74 * (sort[int(0.75 * lastNum)] - sort[int(0.25 * lastNum)])
            limit = clip * rms

            resid = ma.masked_outside(resid, -limit, limit)
            measScience.mask = resid.mask
            measFringe.mask = resid.mask

            newNum = resid.count()
            self.log.log(self.log.DEBUG, "Fringe iter %d: %f %f %f %d" % (i, slope, intercept, rms, newNum))
            if newNum == lastNum:
                # Iterating isn't buying us anything
                break
            lastNum = newNum

        slope = regression(measFringe, measScience, 2.0 * num)
        self.log.log(self.log.INFO, "Fringe amplitude scaling: %f" % slope)
        science.scaledMinus(slope, fringe)
Exemplo n.º 20
0
def plot_fdc(series, multimode=True, plot_enso=False,
             starting_month=None, lag=6,
             scale='log', xmin=0.0005, xmax=0.9995, ax=None, **kwargs):
    """
    Plots one or several flow duration curves (FDCs) for the series.
    
    The input series should be 1D or 2D.
    By default, if the series is 1D, one curve only will be plotted, whereas if
    the series is 2D, a curve will be plotted for each line of the series.
    
    A 1D series can also be converted into an annual series
    with the :keyword:`starting_month` parameter. 
    In that case, ``starting_month`` should be an integer between 1 and 12
    precising the month at which the 12-month period should start.
    For example, to plot the FDCs for each water year (usually from April
    to the following March), use ``starting_month=4``.
    
    When ``enso=True``, ENSO phases are plotted with different colors. 
    When the series is 2D or if it has been converted to an annual frequency, 
    the ENSO indices are defined with the ``full_year=True`` option, where an ENSO
    episode lasts at least 12 consecutive months.

    Parameters
    ----------
    series : TimeSeries
        Flow data.
    ax : {None, :class:`matplotlib.axes.Axes`}, optional
        Subplot where to plot the flow duration curves.
        If None, use the current plot.
    multimode : {True, False}, optional
        Whether to interpret a 2D input series as several series or a single one.
    starting_month : {None, integer}, optional
        First month of each year.
        If None, plots the global flow duration curve.
        Otherwise, ``starting_month`` must be an integer between 1 and 12,
        corresponding to the first month of the water year
        (usually, 4 for April).
    plot_enso : {True, False}, optional
        Whether to plot each ENSO phase with a different color.
    lag : {integer}, optional
        Number of months of lag for the definition of ENSO indices. For example,
        if lag=6, the ENSO phase starting in Oct. 2001 is applied starting on 
        Apr. 2002.
        If None, use a lag computed as the time difference between ``starting_month``
        and the first month of the reference season of the ENSO indicator (or October
        if undefined).
    scale : {'log','lin'}, optional
        String indicating whether the x-axis is in log (``'log'``) or linear
        (``'lin'``) scale.
        If ``'log'``, each plotting position is expressed as a Gaussian pdf.
    other parameters :
        The parameters recognized by the :func:`matplotlib.pyplot.plot` function are 
        also recognized.

    Raises
    ------
    TypeError
        If ``plot_enso=True`` but the series is not a
        :class:`~scikits.hydroclimpy.enso.ClimateSeries`.

    ValueError
        * If ``starting_month`` is not between 1 and 12.
        * If ``starting_month`` is defined but the initial series is not 1D.
    """
    if ax is None:
        ax = gca()
    # Make sure we have at most a 2D series ...............
    if series.ndim > 2:
        raise ValueError("The input series should be 2D at most!")
    # Get the ENSO indicator associated w/ the series (if any)
    ensoindicator = getattr(series, 'ensoindicator', None)
    # Check the starting month ............................
    if starting_month is not None:
        # Make sure we have an integer between 1 and 12
        starting_month = int(starting_month)
        if (starting_month < 1) or (starting_month > 12):
            errmsg = "The starting month should be between 1 (Jan.) and "\
                     "12 (Dec.)! (got %s instead)" % starting_month
            raise ValueError(errmsg)

    # Check whether we need to plot the ENSO information ..
    if plot_enso is True:
        # Make sure we have some ENSO information .........
        if ensoindicator is None:
            errmsg = "No ENSO information is associated with the input series."
            raise InvalidENSOError(errmsg)
        # Reset the indices if we have a starting_month ...
        if starting_month is not None:
            if lag is None:
                refmonth = (ensoindicator.reference_season or [10, ])[0]
                lag = (starting_month + 12 - refmonth) % 12
            series.set_ensoindices(full_year=True, lag=lag)
        else:
            # Make sure that the indices are already set
            series.set_ensoindices()
        # Load the default marker colors ..................
        from scikits.hydroclimpy.plotlib.ensotools import ENSOlines, \
                                                          ENSOmarkers, \
                                                          ENSOlabels
    # No ENSO information to plot : get basic lines & markers
    else:
        ENSOlines = {'G':'#cccccc'}
        ENSOmarkers = {'G':'#cccccc'}
    # Check whether we are in multimode or not ............
    ## 1D input
    if series.ndim == 1:
        # Convert to annual if needed
        if starting_month:
            multimode = True
            series = series.convert(FR_ANNSTART[starting_month - 1], func=None)
        else:
            multimode = False
        _series = series.view(ma.MaskedArray)
    ## 2D input
    else:
        #  w/ starting month
        if starting_month is not None:
            errmsg = "The input series should be 2D! (got %s instead)"
            raise ValueError(errmsg % str(series.shape))
        # w/o multimode
        if not multimode:
            _series = series.view(ma.MaskedArray).ravel()
    # Get the number of valid data per year (ie, per row)
    n = _series.count(axis= -1)
    # Get the xdata .........
    scale = scale[:3].lower()
    if scale == 'lin':
        if multimode:
            xdata = [np.linspace(1. / (nx + 1), 1 - 1. / (nx + 1), nx)
                     for nx in n]
        else:
            xdata = np.linspace(1. / (n + 1), 1 - 1. / (n + 1), n)
#            xdata = ma.empty(len(series), dtype=float)
#            xdata[:n] = np.linspace(1. / (n + 1), 1 - 1. / (n + 1), n)
    elif scale == 'log':
        if multimode:
            xdata = [norm.ppf(np.linspace(1. / (nx + 1), 1 - 1. / (nx + 1), nx))
                     for nx in n]
        else:
            xdata = norm.ppf(np.linspace(1. / (n + 1), 1 - 1. / (n + 1), n))
#            xdata = ma.empty(len(series), dtype=float)
#            xdata[:n] = norm.ppf(np.linspace(1. / (n + 1), 1 - 1. / (n + 1), n))
    else:
        raise ValueError("Unrecognized option '%' for scale: "\
                         "should be in ['lin','log'])")
    # Get some defaults .....
    if multimode:
        lwdefault = 0.8
        zorderdefault = 3
        colordefault = ENSOlines['G']
    else:
        lwdefault = 2
        zorderdefault = 10
        colordefault = 'k'
    marker = kwargs.pop('marker', 'o')
    markersize = kwargs.get('markersize', kwargs.get('ms', 3))
    lw = kwargs.pop('linewidth', kwargs.pop('lw', lwdefault))
    zorder = kwargs.pop('zorder', zorderdefault)
    color = kwargs.pop('color', kwargs.pop('c', colordefault))

    # Multi-mode : one line per year ......................
    if multimode:
        if plot_enso:
            ensoindices = series.ensoindices
            if ensoindices.ndim > 1:
                ensoindices = ensoindices[:, 0]
            # ENSO mode : different colors for different phases
#            eidx = series.ensoindices._data
#            # Take the first column if it's 2D
#            if eidx.ndim > 1:
#                eidx=eidx[:,0]
            for(i, attr) in zip((-1, 0, 1), ('cold', 'neutral', 'warm')):
                key = attr[0].upper()
                label = ENSOlabels[key]
                ydata = series[ensoindices == i]
                ydata = [np.sort(_).compressed()[::-1] for _ in ydata]
#                ydata = np.sort(getattr(series, attr).compressed())[::-1]
                points = [zip(x, y) for (x, y) in zip(xdata, ydata)]
                collec = LineCollection(points,
                                        label=ENSOlabels[key],
                                        color=ENSOlines[key],
                                        zorder=zorder, linewidth=lw)
                ax.add_collection(collec, autolim=True)
        else:
            ydata = [np.sort(y.compressed())[::-1] for y in _series]
            points = [zip(x, y) for (x, y) in zip(xdata, ydata)]
            label = kwargs.pop('label', None)
            collec = LineCollection(points, label=label, linewidth=lw,
                                    colors=ENSOlines['G'])
            ax.add_collection(collec, autolim=True)
    # One line for the while dataset ......................
    else:
        ydata = ma.sort(series.compressed(), endwith=False)[::-1]
        points = [zip(xdata, ydata._series)]
        label = kwargs.pop('label', 'none')
        collec = LineCollection(points, label=label, linewidth=lw,
                                colors=color, zorder=zorder)
        ax.add_collection(collec, autolim=True)
        # If we need to add some colors
        if plot_enso and marker:
            for attr in ('cold', 'neutral', 'warm'):
                key = attr[0].upper()
                label = ENSOlabels[key]
                color = ENSOmarkers[key]
                #ydata = ma.sort(getattr(series, attr), endwith=False)[::-1]
                current = getattr(ydata, attr)._series
                _fdc = ax.plot(xdata, current, ls='', lw=0,
                               marker=marker, ms=markersize,
                               mfc=color, mec=color,
                               label=label, zorder=zorder)
    #........................
    set_normal_limits(ax, xmin=xmin, xmax=xmax, scale=scale)
    ax.set_ylim(_series.min(), _series.max())
    return ax
Exemplo n.º 21
0
def quantile(x,
             probs=DEF_PROBS,
             typ=DEF_TYPE,
             method=DEF_METHOD,
             limit=DEF_LIMIT,
             na_rm=DEF_NARM,
             is_sorted=False):
    """Compute the sample quantiles of any vector distribution.
    
        >>> quantile(x, probs=DEF_PROBS, type = DEF_TYPE, method=DEF_METHOD, limit=DEF_LIMIT, 
             na_rm = DEF_NARM, is_sorted=False)
    """

    ## various parameter checkings

    # check the data
    if isinstance(x, (pd.DataFrame, pd.Series)):
        try:
            x = x.values
        except:
            raise TypeError("conversion type error for input dataset")
    elif not isinstance(x, np.ndarray):
        try:
            x = np.asarray(x)
        except:
            raise TypeError("wrong type for input dataset")
    ndim = x.ndim
    if ndim > 2:
        raise ValueError("array should be 2D at most !")

    # check the probs
    if isinstance(probs, (pd.DataFrame, pd.Series)):
        try:
            probs = probs.values
        except:
            raise TypeError("conversion type error for input probabilities")
    elif isinstance(probs, (list, tuple)):
        try:
            probs = np.array(probs, copy=False, ndmin=1)
        except:
            raise TypeError("conversion type for error input probabilities")
    elif not isinstance(probs, np.ndarray):
        raise TypeError("wrong type for input probabilities")
    # adjust the values: this is taken from R implementation, where alues up to
    # 2e-14 outside that range are accepted and moved to the nearby endpoint
    eps = 100 * np.finfo(np.double).eps
    if (probs < -eps).any() or (probs > 1 + eps).any():
        raise ValueError("probs values outside [0,1]")
    probs = np.maximum(0, np.minimum(1, probs))

    #weights = np.ones(x)
    ## check the weights
    #if isinstance(weights, (pd.DataFrame,pd.Series)):
    #    try:        weights = weights.values
    #    except:     raise TypeError("conversion type error for input weights")
    #elif not isinstance(weights, np.ndarray):
    #    try:        weights = np.asarray(weights)
    #    except:     raise TypeError("wrong type for input weights")
    #if x.shape != weights.shape:
    #    raise ValueError("the length of data and weights must be the same")

    # check parameter typ value
    if typ not in TYPES:
        raise ValueError(
            "typ should be an integer in range [1,{}]!".format(TYPES))

    # check parameter method value
    if method not in METHODS:
        raise ValueError("method should be in {}!".format(METHODS))

    # check parameter method
    if not isinstance(is_sorted, bool):
        raise TypeError("wrong type for boolean flag is_sorted!")

    # check parameter na_rm
    if not isinstance(na_rm, bool):
        raise TypeError("wrong type for boolean flag na_rm!")

    # check parameter limit
    if not isinstance(limit, (list, tuple, np.ndarray)):
        raise TypeError("wrong type for boolean flag limit!")
    if len(limit) != 2:
        raise ValueError("the length of limit must be 2")

    ## algorithm implementation

    def gamma_indice(g, j, typ):
        gamma = np.zeros(len(j))
        if typ == 1:
            gamma[np.where(g > 0)] = 1
            # gamma[np.where(g <= 0)] = 0
        elif typ == 2:
            gamma[np.where(g > 0)] = 1
            gamma[np.where(g <= 0)] = 0.5
        elif typ == 3:
            gamma[np.where(np.logical_or(g != 0, j % 2 == 1))] = 1
        elif typ >= 4:
            gamma = g
        return gamma

    def _canonical_quantile1D(typ, sorted_x, probs):
        """Compute the quantile of a 1D numpy array using the canonical/direct
        approach derived from the original algorithms from Hyndman & Fan, Cunane
        and Filliben.
        """
        # inspired by the _quantiles1D function of mquantiles
        N = len(sorted_x)  # sorted_x.count()
        m_indice = lambda p, i: {1: 0, 2: 0, 3: -0.5, 4: 0, 5: 0.5,         \
                                 6: p, 7: 1-p, 8: (p+1)/3 , 9: (2*p+3)/8,   \
                                 10: .4 + .2 * p, 11: .3175 +.365*p}[i]
        j_indice = lambda p, n, m: np.int_(np.floor(n * p + m))
        g_indice = lambda p, n, m, j: p * n + m - j
        m = m_indice(probs, typ)
        j = j_indice(probs, N, m)
        j_1 = j - 1
        # adjust for the bounds
        j_1[j_1 < 0] = 0
        j[j > N - 1] = N - 1
        x1 = sorted_x[j_1]  # indexes start at 0...
        x2 = sorted_x[j]
        g = g_indice(probs, N, m, j)
        gamma = gamma_indice(g, j, typ)
        return (1 - gamma) * x1 + gamma * x2

    def _mquantile1D(typ, sorted_x, probs):
        """Compute the quantiles of a 1D numpy array following the implementation
        of the _quantiles1D function of mquantiles.
        source: https://github.com/scipy/scipy/blob/master/scipy/stats/mstats_basic.py
        """
        N = len(
            sorted_x
        )  # sorted_x.count() # though ndarray's have no 'count' attribute
        if N == 0:
            return np_ma.array(np.empty(len(probs), dtype=float), mask=True)
        elif N == 1:
            return np_ma.array(np.resize(sorted_x, probs.shape),
                               mask=np_ma.nomask)
        # note that, wrt to the original implementation (see source code mentioned
        # above), we also added the definitions of (alphap,betap) for typ in [1,2,3]
        abp_indice = lambda typ: {1: (0, 1), 2: (0, 1), 3: (-.5, -1.5), 4: (0, 1),  \
                           5: (.5 , .5),  6: (0 , 0),  7:(1 , 1), 8: (1/3, 1/3),    \
                            9: (3/8 , 3/8), 10: (.4,.4), 11: (.3175, .3175)}[typ]
        alphap, betap = abp_indice(typ)
        m = alphap + probs * (1. - alphap - betap)
        aleph = (probs * N + m)
        j = np.floor(aleph.clip(1, N - 1)).astype(int)
        g = (aleph - j).clip(0, 1)
        gamma = gamma_indice(g, j, typ)
        return (1. - gamma) * sorted_x[
            (j - 1).tolist()] + gamma * sorted_x[j.tolist()]

    def _wquantile1D(typ, x, probs, weights):  # not used
        """Compute the weighted quantile of a 1D numpy array.
        """
        # Check the data
        ind_sorted = np.argsort(x)
        sorted_x = x[ind_sorted]
        sorted_weights = weights[ind_sorted]
        # Compute the auxiliary arrays
        Sn = np.cumsum(sorted_weights)
        #assert Sn != 0, "The sum of the weights must not be zero"
        Pn = (Sn - 0.5 * sorted_weights) / np.sum(sorted_weights)
        # Get the value of the weighted median
        return np.interp(probs, Pn, sorted_x)

    ## actual calculation

    # select method
    if method == 'DIRECT':
        _quantile1D = _canonical_quantile1D

    elif method == 'INHERIT':
        _quantile1D = _mquantile1D

    # define input data
    if na_rm is True:
        data = np_ma.array(x, copy=True, mask=np.isnan(x))
        # weights = np_ma.array(x, copy=True, mask = np.isnan(x))
    elif np.isnan(x).any():
        raise ValueError(
            "missing values and NaN's not allowed if 'na_rm' is FALSE")
    else:
        data = np_ma.array(x, copy=False)

    # filter the input data
    if limit is True:
        condition = (limit[0] < data) & (data < limit[1])
        data[~condition.filled(True)] = np_ma.masked

    # sort if not already the case
    if is_sorted is False:
        # ind_sorted = np.argsort(x)
        # sorted_x = x[ind_sorted]
        sorted_data = np_ma.sort(data.compressed())

    # Computes quantiles along axis (or globally)
    if ndim == 1:
        return _quantile1D(typ, data if is_sorted else sorted_data, probs)
    else:
        return np_ma.apply_along_axis(_quantile1D, 1, typ,                         \
                                      data if is_sorted else sorted_data, probs)
Exemplo n.º 22
0
import numpy as np
from numpy.ma import sort

dfnum = 1.  # between group degrees of freedom
dfden = 48.  # within groups degrees of freedom
s = np.random.f(dfnum, dfden, 1000)
sort(s)[-10]