Пример #1
def display_data(X, width=None, save=False):
  m, n = X.shape
  width = sp.int_(width or sp.around(sp.sqrt(n)))
  height = sp.int_(n / width)
  display_rows = sp.int_(sp.floor(sp.sqrt(m)))
  display_cols = sp.int_(sp.ceil(m / display_rows))

  def rightward(acc, curr):
    return sp.hstack([acc, curr])

  def downward(acc, curr):
    return sp.vstack([acc, curr])

  def merge(func, init):
    return lambda arr: reduce(func, arr, init)

  init_rightward = sp.matrix([]).reshape([height, 0])
  init_downward = sp.matrix([]).reshape([0, width * display_cols])

  img_list = [X[i].reshape([height, width]).T for i in range(0, m)]
  img_list_split = [img_list[i:i+display_cols] for i in range(0, len(img_list), display_cols)]
  img = merge(downward, init_downward)(map(merge(rightward, init_rightward), img_list_split))

  plt.imshow(img, cmap='gray')
  plt.tick_params(labelbottom='off', labelleft='off')
  if save:

  return None
Пример #2
 def tree_sample(self):
     if self.subsampling:
         n_sample = SP.int_(self.n*self.sampsize)
         subsample = SP.random.permutation(self.n)[:n_sample]
         subsample = SP.random.random_integers(0, self.n-1, self.n)
     return subsample
Пример #3
    def updateW(self, m):
        M = self.components
        if self.noise == 'gauss':
            YmeanX = self.Z.E1
        elif self.noise == 'hurdle' or self.noise == 'poisson':
            YmeanX = self.meanX

        if (m < self.nKnown) or (m in self.iLatentSparse) or (m
                                                              in self.iLatent):
            logPi = SP.log(self.Pi[:, m] / (1 - self.Pi[:, m]))
        elif self.nScale > 0 and self.nScale < YmeanX.shape[0]:
            logPi = SP.log(self.Pi[:, m] / (1 - self.Pi[:, m]))
            isOFF_ = self.Pi[:, m] < .5
            logPi[isOFF_] = (YmeanX.shape[0] / self.nScale) * SP.log(
                self.Pi[isOFF_, m] / (1 - self.Pi[isOFF_, m]))

            isON_ = self.Pi[:, m] > .5

            if self.onF > 1.:
                logPi[isON_] = self.onF * SP.log(self.Pi[isON_, m] /
                                                 (1 - self.Pi[isON_, m]))

            onF = 1.
            logPi = SP.log(self.Pi[:, m] / (1 - self.Pi[:, m]))

        sigma2Sigmaw = (1.0 / self.Eps.E1) * self.Alpha.E1[m]

        setMinus = SP.int_(
                       list(range(M))[m + 1::]]))
        SmTSk = SP.sum(
            SP.tile(self.S.E1[:, m:m + 1],
                    (1, M - 1)) * self.S.E1[:, setMinus], 0)
        SmTSm = SP.dot(self.S.E1[:, m].transpose(),
                       self.S.E1[:, m]) + self.S.diagSigmaS[:, m].sum()

        b = SP.dot((self.W.C[:, setMinus, 0] * self.W.E1[:, setMinus]),
        diff = SP.dot(self.S.E1[:, m].transpose(), YmeanX) - b

        SmTSmSig = SmTSm + sigma2Sigmaw

        #update C and W

        u_qm = logPi + 0.5 * SP.log(sigma2Sigmaw) - 0.5 * SP.log(SmTSmSig) + (
            0.5 * self.Eps.E1) * ((diff**2) / SmTSmSig)
        self.W.C[:, m, 0] = 1. / (1 + SP.exp(-u_qm))

        self.W.C[:, m, 1] = 1 - self.W.C[:, m, 0]
        self.W.E1[:, m] = (diff / SmTSmSig)  #q(w_qm | s_qm=1), q=1,...,Q
        self.W.sigma2[:, m] = (1. / self.Eps.E1) / SmTSmSig
        self.W.E2diag[:, m] = self.W.E1[:, m]**2 + self.W.sigma2[:, m]
Пример #4
    def updateS(self, m):
        M = self.components
        if m >= self.nKnown:
            if self.noise == 'gauss':
                YmeanX = self.Z.E1
            elif self.noise == 'hurdle' or self.noise == 'poisson':
                YmeanX = self.meanX

            setMinus = SP.int_(
                           list(range(M))[m + 1::]]))
            #only account for actors that haven't been switched off already
            setMinus = setMinus[self.doUpdate[setMinus] == 1]

            #update S
            SW_sigma = (self.W.C[:, m, 0] * self.W.E1[:, m]) * self.Eps.E1
            SW2_sigma = (self.W.C[:, m, 0] *
                         (self.W.E2diag[:, m])) * self.Eps.E1
            setMinus = SP.int_(
                           list(range(M))[m + 1::]]))

            b0 = SP.dot(self.S.E1[:, setMinus],
                        (self.W.C[:, setMinus, 0] *
                         self.W.E1[:, setMinus]).transpose())
            b = SP.dot(b0, SW_sigma)

            alphaSm = SP.sum(SW2_sigma, 0)
            barmuS = SP.dot(YmeanX, SW_sigma) - b
            self.S.diagSigmaS[:, m] = 1. / (1 + alphaSm)
            self.S.E1[:, m] = barmuS / (1. + alphaSm)

            #keep diagSigmaS
            self.Eps.diagSigmaS[m] = SP.sum(self.S.diagSigmaS[:, m])

            SW2_sigma = (self.W.C[:, m, 0] *
                         (self.W.E2diag[:, m])) * self.Eps.E1
            alphaSm = SP.sum(SW2_sigma, 0)
            self.S.diagSigmaS[:, m] = 1. / (1 + alphaSm)
Пример #5
    def compute(nn_params):
        m = Y.shape[0]

        # Reshape nn_params back into the parameters theta_1 and theta_2
        theta_1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))]. \
                    reshape([hidden_layer_size, input_layer_size+1])
        theta_2 = nn_params[(hidden_layer_size*(input_layer_size+1)):]. \
                    reshape([num_labels, hidden_layer_size+1])

        theta_1_reg = sp.copy(theta_1)
        theta_1_reg[:, 0] = 0
        theta_2_reg = sp.copy(theta_2)
        theta_2_reg[:, 0] = 0

        # Forward propagation
        f = forward_prop(X)(theta_1, theta_2)

        # Initialize variables for back propagation
        a = f['a']

        # Add bias
        a_1 = a[0]
        a_2 = a[1]
        a_3 = a[2]

        z = f['z']
        z_2 = z[0]
        z_3 = z[1]

        # Transform Y
        b = sp.matrix(
                lambda n: sp.int_(sp.array(range(1, num_labels + 1)) == n), 1,

        DEL_1 = sp.matrix(sp.zeros((hidden_layer_size, input_layer_size + 1)))
        DEL_2 = sp.matrix(sp.zeros((num_labels, hidden_layer_size + 1)))

        for i in range(0, m):
            del_3 = a_3[i, :].T - b[i, :].T
            del_2 = sp.multiply(theta_2[:, 1:].T * del_3,
                                sigmoid_gradient(z_2[i, :].T))

            DEL_2 = DEL_2 + del_3 * a_2[i, :]
            DEL_1 = DEL_1 + del_2 * a_1[i, :]

        # Regularize
        theta_1_grad = DEL_1 / m + (_lambda / m) * theta_1_reg
        theta_2_grad = DEL_2 / m + (_lambda / m) * theta_2_reg
        grad = sp.concatenate([sp.ravel(theta_1_grad), sp.ravel(theta_2_grad)])

        return grad
Пример #6
def CenteredLagProduct(rawbeams,numtype=sp.complex128,pulse =sp.ones(14)):
    """ This function will create a centered lag product for each range using the
    raw IQ given to it.  It will form each lag for each pulse and then integrate
    all of the pulses.
        rawbeams - This is a NpxNs complex numpy array where Ns is number of
        samples per pulse and Npu is number of pulses
        N - The number of lags that will be created, default is 14.
        numtype - The type of numbers used to create the data. Default is sp.complex128
        acf_cent - This is a NrxNl complex numpy array where Nr is number of
        range gate and Nl is number of lags.
    # It will be assumed the data will be pulses vs rangne
    rawbeams = rawbeams.transpose()
    (Nr,Np) = rawbeams.shape

    # Make masks for each piece of data
    arex = sp.arange(0,N/2.0,0.5);
    arback = sp.array([-sp.int_(sp.floor(k)) for k in arex]);
    arfor = sp.array([sp.int_(sp.ceil(k)) for k in arex]) ;

    # figure out how much range space will be kept
    ap = sp.nanmax(abs(arback));
    ep = Nr- sp.nanmax(arfor);
    rng_ar_all = sp.arange(ap,ep);
#    wearr = (1./(N-sp.tile((arfor-arback)[:,sp.newaxis],(1,Np)))).astype(numtype)
    #acf_cent = sp.zeros((ep-ap,N))*(1+1j)
    acf_cent = sp.zeros((ep-ap,N),dtype=numtype)
    for irng in  sp.arange(len(rng_ar_all)):
        rng_ar1 =sp.int_(rng_ar_all[irng]) + arback
        rng_ar2 = sp.int_(rng_ar_all[irng]) + arfor
        # get all of the acfs across pulses # sum along the pulses
        acf_tmp = sp.conj(rawbeams[rng_ar1,:])*rawbeams[rng_ar2,:]#*wearr
        acf_ave = sp.sum(acf_tmp,1)
        acf_cent[irng,:] = acf_ave# might need to transpose this
    return acf_cent
Пример #7
 def compare(self, chunk, tiles):
     assert (chunk.shape[0] == self.compareChunkSize)
     chunk = scipy.int_(chunk)
     S = chunk.shape[0]
     # distance will contain the distance for each tile, for each position
     distances = scipy.zeros((self.shiftDim[0], self.shiftDim[1], tiles.shape[0]))
     for i in range(self.shiftDim[0]):
         for j in range(self.shiftDim[1]):
             distances[i,j,:] = self.distance(chunk, tiles[:,i:i+S,j:j+S,:])
     combinedIndex = scipy.unravel_index(scipy.argmin(distances), distances.shape)
     idx  = combinedIndex[-1]
     pos  = self.translatePos(combinedIndex[:-1])
     dist = distances[combinedIndex]
     return (idx, pos, dist)
Пример #8
    def test_delta_updating(self):
        n_sample = 100
        # A 20 x 2 random integer matrix
        X = SP.empty((n_sample, 2))
        X[:, 0] = SP.arange(0, 1, 1.0 / n_sample)
        X[:, 1] = SP.random.rand(n_sample)
        sd_noise = .5
        sd_conf = .5
        noise = SP.random.randn(n_sample, 1) * sd_noise

        # print 'true delta equals', (sd_noise**2)/(sd_conf**2)
        # Here, the observed y is just a linear function of the first column
        # in X and # a little independent gaussian noise
        y_fixed = (X[:, 0:1] > .5) * 1.0
        y_fn = y_fixed + noise

        # Divide into training and test sample using 2/3 of data for training
        training_sample = SP.zeros(n_sample, dtype='bool')
                        [:SP.int_(.66 * n_sample)]] = True
        test_sample = ~training_sample

        kernel = utils.getQuadraticKernel(X[:, 0], d=0.0025) +\
        # The confounded version of y_lin is computed as
        y_conf = sd_conf * SP.random.multivariate_normal(
            SP.zeros(n_sample), kernel, 1).reshape(-1, 1)
        y_tot = y_fn + y_conf
        # Selects rows and columns
        kernel_train = kernel[SP.ix_(training_sample, training_sample)]
        kernel_test = kernel[SP.ix_(test_sample, training_sample)]
        lm_forest = MF(kernel=kernel_train,
        # Returns prediction for random effect
        lm_forest.fit(X[training_sample], y_tot[training_sample])
        response_lmf = lm_forest.predict(X[test_sample], k=kernel_test)

        # print 'fitting forest (delta-update)'
        # earn random forest, not accounting for the confounding
        random_forest = MF(kernel=kernel_train,
        random_forest.fit(X[training_sample], y_tot[training_sample])
        response_rf = random_forest.predict(X[test_sample], k=kernel_test)
def get_distribution(list):
    """Returns al que posible probability distributions for the given list"""
    size = len(list)
    x = scipy.arange(size)
    y = scipy.int_(scipy.round_(scipy.stats.vonmises.rvs(5, size=size) * 255))
    h = plt.hist(y, bins=range(256), color='w')
    dist_names = ['gamma', 'beta', 'rayleigh', 'norm', 'rayleigh']
    for dist_name in dist_names:
        dist = getattr(scipy.stats, dist_name)
        param = dist.fit(y)
        pdf_fitted = dist.pdf(x, *param[:-2], loc=param[-2],
                              scale=param[-1]) * size
        plt.plot(pdf_fitted, label=dist_name)
        plt.xlim(0, 255)
    plt.legend(loc='upper right')
    plt.savefig('distribuciones.png', bbox_inches='tight')
Пример #10
    def test_delta_updating(self):
        n_sample = 100
        # A 20 x 2 random integer matrix
        X = SP.empty((n_sample, 2))
        X[:, 0] = SP.arange(0, 1, 1.0/n_sample)
        X[:, 1] = SP.random.rand(n_sample)
        sd_noise = .5
        sd_conf = .5
        noise = SP.random.randn(n_sample, 1)*sd_noise

        # print 'true delta equals', (sd_noise**2)/(sd_conf**2)
        # Here, the observed y is just a linear function of the first column
        # in X and # a little independent gaussian noise
        y_fixed = (X[:, 0:1] > .5)*1.0
        y_fn = y_fixed + noise

        # Divide into training and test sample using 2/3 of data for training
        training_sample = SP.zeros(n_sample, dtype='bool')
            SP.random.permutation(n_sample)[:SP.int_(.66*n_sample)]] = True
        test_sample = ~training_sample

        kernel = utils.getQuadraticKernel(X[:, 0], d=0.0025) +\
        # The confounded version of y_lin is computed as
        y_conf = sd_conf*SP.random.multivariate_normal(SP.zeros(n_sample),
                                                       kernel, 1).reshape(-1, 1)
        y_tot = y_fn + y_conf
        # Selects rows and columns
        kernel_train = kernel[SP.ix_(training_sample, training_sample)]
        kernel_test = kernel[SP.ix_(test_sample, training_sample)]
        lm_forest = MF(kernel=kernel_train, update_delta=False, max_depth=1,
        # Returns prediction for random effect
        lm_forest.fit(X[training_sample], y_tot[training_sample])
        response_lmf = lm_forest.predict(X[test_sample], k=kernel_test)

        # print 'fitting forest (delta-update)'
        # earn random forest, not accounting for the confounding
        random_forest = MF(kernel=kernel_train, update_delta=True, max_depth=5,
        random_forest.fit(X[training_sample], y_tot[training_sample])
        response_rf = random_forest.predict(X[test_sample], k=kernel_test)
Пример #11
    def compute(nn_params):
        m = Y.shape[0]

        # Reshape nn_params back into the parameters theta_1 and theta_2
        theta_1 = nn_params[0:(hidden_layer_size*(input_layer_size+1))]. \
                    reshape([hidden_layer_size, input_layer_size+1])
        theta_2 = nn_params[(hidden_layer_size*(input_layer_size+1)):]. \
                    reshape([num_labels, hidden_layer_size+1])

        theta_1_reg = sp.copy(theta_1)
        theta_1_reg[:, 0] = 0
        theta_2_reg = sp.copy(theta_2)
        theta_2_reg[:, 0] = 0

        # Forward propagation
        f = forward_prop(X)(theta_1, theta_2)
        a = f['a']
        a_3 = a[2]

        # Transform Y
        b = sp.matrix(
                lambda n: sp.int_(sp.array(range(1, num_labels + 1)) == n), 1,

        J = 0

        for i in range(0, m):
            J = J + (1 / m) * (-b[i, :] * sp.log(a_3[i, :].T) -
                               (1 - b[i, :]) * sp.log(1 - a_3[i, :].T))[0, 0]

        # Regularize
        J = J + (_lambda / (2 * m)) * (sp.sum(sp.power(theta_1_reg, 2)) +
                                       sp.sum(sp.power(theta_2_reg, 2))).real

        return J
Пример #12
def findDuplicateVectors(vec, tol=vTol, equivPM=False):
    Find vectors in an array that are equivalent to within
    a specified tolerance


          eqv = DuplicateVectors(vec, *tol)


          1) vec is n x m, a double array of m horizontally concatenated
                           n-dimensional vectors.
         *2) tol is 1 x 1, a scalar tolerance.  If not specified, the default
                           tolerance is 1e-14.
         *3) set equivPM to True if vec and -vec are to be treated as equivalent


          1) eqv is 1 x p, a list of p equivalence relationships.


          Each equivalence relationship is a 1 x q vector of indices that
          represent the locations of duplicate columns/entries in the array
          vec.  For example:

                | 1     2     2     2     1     2     7 |
          vec = |                                       |
                | 2     3     5     3     2     3     3 |

          eqv = [[1x2 double]    [1x3 double]], where

          eqv[0] = [0  4]
          eqv[1] = [1  3  5]

    vlen = vec.shape[1]
    vlen0 = vlen
    orid = asarray(range(vlen), dtype="int")

    torid = orid.copy()
    tvec = vec.copy()

    eqv = []
    eqvTot = 0
    uid = 0

    ii = 1
    while vlen > 1 and ii < vlen0:
        dupl = tile(tvec[:, 0], (vlen, 1))

        if not equivPM:
            diff = abs(tvec - dupl.T).sum(0)
            match = abs(diff[1:]) <= tol  # logical to find duplicates
            diffn = abs(tvec - dupl.T).sum(0)
            matchn = abs(diffn[1:]) <= tol
            diffp = abs(tvec + dupl.T).sum(0)
            matchp = abs(diffp[1:]) <= tol
            match = matchn + matchp

        kick = hstack([True, match])  # pick self too

        if kick.sum() > 1:
            eqv += [torid[kick].tolist()]
            eqvTot = hstack([eqvTot, torid[kick]])
            uid = hstack([uid, torid[kick][0]])

        cmask = ones((vlen, ))
        cmask[kick] = 0
        cmask = cmask != 0

        tvec = tvec[:, cmask]

        torid = torid[cmask]

        vlen = tvec.shape[1]

        ii += 1

    if len(eqv) == 0:
        eqvTot = []
        uid = []
        eqvTot = eqvTot[1:].tolist()
        uid = uid[1:].tolist()

    # find all single-instance vectors
    singles = sort(setxor1d(eqvTot, range(vlen0)))

    # now construct list of unique vector column indices
    uid = int_(sort(union1d(uid, singles))).tolist()
    # make sure is a 1D list
    if not hasattr(uid, '__len__'):
        uid = [uid]

    return eqv, uid
def var_ksFit(data, npoints, perc, extra=None):

    diag_vksf = dict()
    diag_vksf['data'] = data
    diag_vksf['npoints'] = npoints
    diag_vksf['perc'] = perc

    sio.savemat(home + '/diag_vksf.mat', diag_vksf)

    # kde_pdf = stats.gaussian_kde(flattened)
    kde_pdf = stats.gaussian_kde(data)

    # xi, dx = sp.linspace(flattened.min(), flattened.max(), npoints, retstep=True)
    xi, dx = sp.linspace(data.min(), data.max(), npoints, retstep=True)
    diag_vksf['xi'] = xi
    diag_vksf['dx'] = dx

    f = kde_pdf(xi)
    diag_vksf['f'] = f

    # plt.hist(flattened, bins=npoints, color=extra)
    plt.hist(data, bins=npoints, color=extra, alpha=0.5)

    mdx = sp.where(f == f.max())#[0][0]
    diag_vksf['mdx'] = mdx
    mu = xi[mdx]
    diag_vksf['mu'] = mu
    # sigma = sp.std(flattened)
    sigma = sp.std(data)
    diag_vksf['sigma'] = sigma

    err_lookforward = sp.int_(sp.floor(mdx + 0.5 * sigma / dx))
    diag_vksf['err_lookforward'] = err_lookforward

    diag_vksf['sigma_hat_0'] = list()
    diag_vksf['sigma_hat_1'] = list()
    diag_vksf['mu_hat_0'] = list()
    diag_vksf['mu_hat_1'] = list()
    diag_vksf['local_norm'] = list()
    diag_vksf['y_sigma'] = list()
    diag_vksf['y_mu'] = list()
    diag_vksf['s_sigma'] = list()
    diag_vksf['s_mu'] = list()
    diag_vksf['my_sigma'] = list()
    diag_vksf['my_mu'] = list()
    diag_vksf['delta_sigma'] = list()
    diag_vksf['delta_mu'] = list()
    diag_vksf['ci'] = list()

    for kk in xrange(3):

        sigma_hat = sp.arange(sigma*0.5, sigma*1.5 + sigma/200, sigma/200)

        delta = list()
        for i in xrange(len(sigma_hat)):
            local_norm = stats.norm(mu, sigma_hat[i])
            y = local_norm.pdf(xi)
            my = y.max()
            s = (y[sp.arange(0, err_lookforward)]/my
                 - f[sp.arange(0, err_lookforward)]/f.max()) ** 2
        delta = sp.array(delta)

        mx, mdx = delta.min(), sp.where(delta == delta.min())
        diag_vksf['mx_sigma'], diag_vksf['mdx_sigma'] = mx, mdx
        sigma_hat = sigma_hat[mdx]
        sigma = sigma_hat

        mu_hat = sp.arange(mu * 0.5, mu * 1.5 + mu/200, mu/200)

        delta = list()
        for i in xrange(len(mu_hat)):
            local_norm = stats.norm(mu_hat[i], sigma_hat)
            y = local_norm.pdf(xi)
            my = y.max()
            s = (y[sp.arange(0, err_lookforward)]/my
                 - f[sp.arange(0, err_lookforward)]/f.max()) ** 2
        delta = sp.array(delta)

        sio.savemat(home + '/diag_vksf.mat', diag_vksf)

        mx, mdx = delta.min(), sp.where(delta == delta.min())
        diag_vksf['mx_mu'], diag_vksf['mdx_mu'] = mx, mdx
        mu_hat = mu_hat[mdx]
        mu = mu_hat

        local_norm = stats.norm(mu_hat, sigma_hat)
        y = local_norm.pdf(xi)

        ci = local_norm.ppf(perc)
        sio.savemat(home + '/diag_vksf.mat', diag_vksf)
        # plt.plot(xi, y * f.max()/y.max() * len(flattened) * dx,
        plt.plot(xi, y * f.max()/y.max() * len(data) * dx,
                 marker='', linestyle='--', color='k')
        plt.plot((ci, ci), plt.ylim(), marker='',
                 linestyle='-', color='k')
        plt.savefig(home + '/cell_profiler_hist_' + extra + str(kk) + '.pdf')

    return ci
Пример #14
def findDuplicateVectors(vec, tol=vTol, equivPM=False):
    Find vectors in an array that are equivalent to within
    a specified tolerance


          eqv = DuplicateVectors(vec, *tol)


          1) vec is n x m, a double array of m horizontally concatenated
                           n-dimensional vectors.
         *2) tol is 1 x 1, a scalar tolerance.  If not specified, the default
                           tolerance is 1e-14.
         *3) set equivPM to True if vec and -vec are to be treated as equivalent


          1) eqv is 1 x p, a list of p equivalence relationships.


          Each equivalence relationship is a 1 x q vector of indices that
          represent the locations of duplicate columns/entries in the array
          vec.  For example:

                | 1     2     2     2     1     2     7 |
          vec = |                                       |
                | 2     3     5     3     2     3     3 |

          eqv = [[1x2 double]    [1x3 double]], where

          eqv[0] = [0  4]
          eqv[1] = [1  3  5]

    vlen  = vec.shape[1]
    vlen0 = vlen
    orid  = asarray(range(vlen), dtype="int")

    torid = orid.copy()
    tvec  = vec.copy()

    eqv    = []
    eqvTot = 0
    uid    = 0

    ii = 1
    while vlen > 1 and ii < vlen0:
        dupl = tile(tvec[:, 0], (vlen, 1))

        if not equivPM:
            diff  = abs(tvec - dupl.T).sum(0)
            match = abs(diff[1:]) <= tol    # logical to find duplicates
            diffn  = abs(tvec - dupl.T).sum(0)
            matchn = abs(diffn[1:]) <= tol
            diffp  = abs(tvec + dupl.T).sum(0)
            matchp = abs(diffp[1:]) <= tol
            match = matchn + matchp

        kick = hstack([True, match])    # pick self too

        if kick.sum() > 1:
            eqv    += [torid[kick].tolist()]
            eqvTot  = hstack( [ eqvTot, torid[kick] ] )
            uid     = hstack( [ uid, torid[kick][0] ] )

        cmask       = ones((vlen,))
        cmask[kick] = 0
        cmask       = cmask != 0

        tvec  = tvec[:, cmask]

        torid = torid[cmask]

        vlen = tvec.shape[1]

        ii += 1

    if len(eqv) == 0:
        eqvTot = []
        uid    = []
        eqvTot = eqvTot[1:].tolist()
        uid    = uid[1:].tolist()

    # find all single-instance vectors
    singles = sort( setxor1d( eqvTot, range(vlen0) ) )

    # now construct list of unique vector column indices
    uid = int_( sort( union1d( uid, singles ) ) ).tolist()
    # make sure is a 1D list
    if not hasattr(uid,'__len__'):
        uid = [uid]

    return eqv, uid
Пример #15
def one_vs_all(X, Y, num_labels, _lambda, cost_func, grad_func):
  m, n = X.shape
  X = sp.hstack((sp.ones((m, 1)), X))
  all_theta = sp.zeros([num_labels, n+1])

  for c in range(1, num_labels+1):
    init_theta = sp.ones(X.shape[1])
    all_theta[c%num_labels, :] = fmin_bfgs(cost_func, init_theta, fprime=grad_func, args=(X, sp.int_(Y==c), 0), maxiter=100)

  return all_theta
Пример #16
    def fit(self, X, y, recycle=True, **grow_params):

        """Build a linear mixed forest of trees from the training set (X, y).

        X : array-like of shape = [n_samples, n_features]
            The training input samples.

        y : array-like, shape = [n_samples] or [n_samples, 1]
            The real valued targets

        self : object
            Returns self.
        if self.kernel == 'data':
            self.kernel = SC.estimateKernel(X, maf=1.0/X.shape[0])
        elif self.kernel == 'iid':
            self.kernel = SP.identity(X.shape[0])
        # Use dedicated part of data as background model
        elif self.kernel.size == X.shape[1]:
            tmp_ind = self.kernel
            self.kernel = utils.estimateKernel(X[:, self.kernel],
            X = X[:, ~tmp_ind]
        # Extract and reshape data
        self.y = y.reshape(-1, 1)
        self.X = X
        self.n, self.m = self.X.shape
        if self.delta is None:
            self.BLUP = BLUP.BLUP()
            if self.verbose > 1:
                print('fitting BLUP')
            self.BLUP.fit(XTrain=self.X, yTrain=self.y, KTrain=self.kernel,
            if self.verbose > 1:
                print('done fitting BLUP')
            # Update delta if it used to be 'None'
            self.delta = self.BLUP.delta
        self.max_features = SP.maximum(SP.int_(self.ratio_features*self.m), 1)
        self.var_used = SP.zeros(self.m)
        self.log_importance = SP.zeros(self.m)
        self.depth = 0

        if self.verbose > 0:
            print(('log(delta) fitted to ', SP.log(self.delta)))

        # Initialize individual trees
        if recycle and self.trees != []:
            for tree in self.trees:
            n_trees = 0
            self.trees = []
            while n_trees < self.n_estimators:
                if self.verbose > 1:
                    print(('init. tree number ', n_trees))
                subsample = self.tree_sample()
                tree = MixedForestTree(self, subsample)
                n_trees += 1

        # Fitting with optimal depth constraint
        if self.fit_optimal_depth or self.update_delta:
            self.opt_depth = 0
            self.min_oob_err = self.get_oob_error(self.depth)
            if self.verbose > 0:
                print(('initial oob error is:', self.min_oob_err))
            grow_further = True
            curr_depth = self.depth
            while grow_further:
                # Updating ensemble increasing its depth by one
                if self.update_delta:
                    self.delta = self.delta_update()
                    if self.verbose > 0:
                        print(('delta was fitted to', self.delta))
                if self.verbose > 0:
                    print(('depth is:', self.depth))
                oob_err = self.get_oob_error(self.depth)
                if self.verbose > 0:
                    print(('oob error is:', oob_err))
                if oob_err < self.min_oob_err:
                    self.min_oob_err = oob_err
                    self.opt_depth = self.depth
                # Decide whether tree needs to be furthered
                grow_further = (curr_depth < self.depth) and\
                    (self.depth < self.max_depth)
                if self.build_to_opt_depth and (self.depth >= self.min_depth):
                    grow_further = grow_further and\
                        (oob_err == self.min_oob_err)
                curr_depth = self.depth
        # Growing full tree one by one
        return self
Пример #17
    # [opt_model_params,opt_lml]=GPR.optHyper(gpr_BP,hyperparams,priors=priors,gradcheck=True,Ifilter=Ifilter)

    import pygp.plot.gpr_plot as gpr_plot

    first = True
    [M, S] = gpr_opt_hyper.predict(opt_model_params, X)
    gpr_plot.plot_sausage(X, M[0], SP.sqrt(S[0]))
    gpr_plot.plot_sausage(X, M[1], SP.sqrt(S[1]))
    gpr_plot.plot_training_data(x1, C[1], replicate_indices=x1_rep.reshape(-1))
    gpr_plot.plot_training_data(x2, T[1], replicate_indices=x2_rep.reshape(-1))

    #    norm = PL.Normalize()

    break_lml = []
    plots = SP.int_(SP.sqrt(24) + 1)
    for i, BP in enumerate(x1[0, :]):
        # PL.subplot(plots,plots,i+1)
        _hyper = copy.deepcopy(opt_model_params)
        _logtheta = _hyper["covar"]
        _logtheta = SP.concatenate((_logtheta, [BP, 10]))  # SP.var(y[:,i])]))
        _hyper["covar"] = _logtheta

        priors_BP[3] = [lnpriors.lnGauss, [BP, 3]]
        #        [opt_model_params,opt_lml] = opt_hyper(gpr_BP,_hyper,priors=priors_BP,gradcheck=False,Ifilter=Ifilter_BP)
        # break_lml.append(opt_lml)

            break_lml.append(gpr_BP.LML(_hyper, priors_BP))
            print "Variance: %s" % (_logtheta)
Пример #18
@author: james

import matplotlib.pyplot as plt
import scipy
import scipy.stats

df = pd.read_excel(
    r"C:\Users\james\Documents\School\Machine Learning & Data mining\Project1\Data\hprice2.xls",
cols = range(0, 11)
raw_data = df.get_values()
X = raw_data[:, cols]

size = 30000
x = scipy.arange(size)
y = scipy.int_(scipy.round_(scipy.stats.vonmises.rvs(5, size=size) * 47))
h = plt.hist(y, bins=range(48))

dist_names = ['gamma', 'beta', 'rayleigh', 'norm', 'pareto']

for dist_name in dist_names:
    dist = getattr(scipy.stats, dist_name)
    param = dist.fit(y)
    pdf_fitted = dist.pdf(x, *param[:-2], loc=param[-2],
                          scale=param[-1]) * size
    plt.plot(pdf_fitted, label=dist_name)
    plt.xlim(0, 47)
plt.legend(loc='upper right')
Пример #19
def best_split_full_model(X, Uy, C, S, U, noderange, delta):
    mBest = -1
    sBest = -float('inf')
    score_best = -float('inf')
    left_mean = None
    right_mean = None
    ldelta = SP.log(delta)
    levels = map(SP.unique, X[noderange].T)
    feature_map = []
    s = []
    UXt = []
    cnt = 0
    for i in xrange(X.shape[1]):
        lev = levels[i]
        for j in xrange(lev.size - 1):
            split_point = SP.median(lev[j:j + 2])
            x = SP.int_(X[noderange, i] > split_point)
            UXt.append(SP.dot(U.T[:, noderange], x))
            cnt += 1
    UXt = SP.array(UXt).T
    if UXt.size == 0:  #predictors are homogeneous
        return mBest, sBest, left_mean, right_mean, score_best
        #print UXt
        #         print X[noderange]
        #         print ''
        #         print ''
        # test all transformed predictors
        scores = -NP.ones(cnt) * float('inf')
        UC = SP.dot(U.T, C)
        #finding the best split#
        score_0 = lmm_fast.nLLeval(ldelta, Uy[:, 0], UC, S)
        for snp_cnt in SP.arange(cnt):
            UX = SP.hstack((UXt[:, snp_cnt:snp_cnt + 1], UC))
            scores[snp_cnt] = -lmm_fast.nLLeval(ldelta, Uy[:, 0], UX, S)
            scores[snp_cnt] += score_0
        ###evaluate the new means###
        kBest = SP.argmax(scores)
        score_best = scores[kBest]
        sBest = s[kBest]
        if score_best > 0:
            sBest = s[kBest]
            score_best = scores[kBest]
            UX = SP.hstack((UXt[:, kBest:kBest + 1], UC))
            _, beta, _ = lmm_fast.nLLeval(ldelta,
                                          Uy[:, 0],
            mBest = feature_map[kBest]
            CX = SP.zeros_like(Uy)
            CX[noderange] = SP.int_(X[noderange, mBest:mBest + 1] > sBest)
            C_new = SP.hstack((CX, C))
            mean = SP.dot(C_new,
                                       -1))  #TODO:is this the correct way?
            left_mean = ((mean[noderange])[CX[noderange] == 0])[0]
            right_mean = ((mean[noderange])[CX[noderange] == 1])[0]
        return mBest, sBest, left_mean, right_mean, score_best
Пример #20
def get_ancestors(node_ind, node, parents):
    ancestors = SP.empty(SP.int_(SP.floor(SP.log2(node + 1))), dtype='int')
    for i in SP.arange(ancestors.size):
        node_ind = parents[node_ind]
        ancestors[i] = node_ind
    return ancestors
Пример #21
def best_split_full_model(X,
    mBest = -1
    sBest = -float('inf')
    score_best = -float('inf')
    left_mean = None
    right_mean = None
    ldelta = SP.log(delta)
    levels = list(map(SP.unique, X[noderange].T))
    feature_map = []
    s = []
    UXt = []
    cnt = 0
    for i in range(X.shape[1]):
        lev = levels[i]
        for j in range(lev.size-1):
            split_point = SP.median(lev[j:j+2])
            x = SP.int_(X[noderange,i] > split_point)
            UXt.append(SP.dot(U.T[:,noderange], x))
            cnt += 1
    UXt = SP.array(UXt).T
    if UXt.size == 0: #predictors are homogeneous
        return mBest, sBest, left_mean, right_mean, score_best
        #print UXt
#         print X[noderange]
#         print ''
#         print ''
        # test all transformed predictors
        scores = -NP.ones(cnt)*float('inf')
        UC = SP.dot(U.T,C)
        #finding the best split#
        score_0 = lmm_fast.nLLeval(ldelta,Uy[:,0],UC,S)
        for snp_cnt in SP.arange(cnt):
            UX=SP.hstack((UXt[:,snp_cnt:snp_cnt+1], UC))
            scores[snp_cnt] = -lmm_fast.nLLeval(ldelta,Uy[:,0],UX,S)
            scores[snp_cnt] += score_0
        ###evaluate the new means###
        kBest = SP.argmax(scores)
        score_best = scores[kBest]
        sBest = s[kBest]
        if score_best > 0:
                sBest = s[kBest]
                score_best = scores[kBest]
                UX=SP.hstack((UXt[:,kBest:kBest+1], UC))
                _, beta,_ = lmm_fast.nLLeval(ldelta, Uy[:,0], UX, S, MLparams=True)
                mBest = feature_map[kBest]
                CX = SP.zeros_like(Uy)
                CX[noderange] = SP.int_(X[noderange,mBest:mBest+1] > sBest)
                C_new = SP.hstack((CX,C))
                mean = SP.dot(C_new,beta.reshape(beta.size, -1)) #TODO:is this the correct way?
                left_mean = ((mean[noderange])[CX[noderange]==0])[0]
                right_mean = ((mean[noderange])[CX[noderange]==1])[0]
        return mBest, sBest, left_mean, right_mean, score_best
Пример #22
def process(pars, data=None):   
#%% load the parameters that CAN be specified from the command line
    NPlacers = pars['NPlacers']
    NScrapers = pars['NScrapers']
    iters = pars['iters']
    MaxTilesVert = pars['MaxTilesVert']
    per_page = pars['per_page']
    fidelity = pars['fidelity']
    poolSize = pars['poolSize']

    if (data != None):
        tags = data['search'].split(', ')
        #tags = ('Minimalism',)
        tags = ('Face','Leuven','Belgium','Computer')
        #tags = ('Bussum','Football','PSV','Minimalism','urbex')
#%% MPI stuff
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    status = MPI.Status()

#%% initiate plogger   
    logger = plogger.PLogger(rank, host_url=LOGGER_HOST)

#%% print the values of those parameters that CAN be specified via the command line
    #for key, value in pars.iteritems():
        #print "M{}: {} is now {}".format(rank, key, value)

#%% identify oneself
    #print "Master, process {} out of {}".format(rank, size)
    #print "M{}: > init".format(rank) 
    logger.write('Initializing', status=plogger.INIT)

#%% initialize the photo matcher
    pmPars = {'fidelity': fidelity}
    pm = photo_match.photoMatch(pmPars)

    # create empty save-path
    if not pars['useDB']:
        if (os.path.exists(pars['savepath'])):
            shutil.rmtree(pars['savepath'], ignore_errors=True)
#%% call the scrapers right at the beginning, as it is probably the slowest
    PixPerTile = scipy.array((75,75))
    ComparePixPerTile = scipy.array((fidelity,fidelity))
    scraperPars = {'pm': pm, 'tags': tags, 'PixPerTile': PixPerTile, 'poolSize': poolSize}
    for scraper in range(1,1+NScrapers):
	comm.send(scraperPars, dest=scraper, tag=0)

    TilesVert = int(MaxTilesVert/NPlacers) * NPlacers

    TargetImg = Image.open('./output/doesnotmatter.jpg')
    #TargetImg = Image.open('./Matilda.JPG')
    #TargetImg = Image.open('./rainbow_flag_by_kelly.jpg')
    #TargetImg = Image.open('./korneel_test.jpg')
    TargetSize = TargetImg.size
    TilesHor = (TargetSize[0]*PixPerTile[1]*TilesVert)/(TargetSize[1]*PixPerTile[0])
    Tiles = scipy.array((TilesHor, TilesVert), dtype=int)
    TilesPerNode = scipy.array((TilesHor, TilesVert/NPlacers), dtype=int)
    Pixels        = Tiles*PixPerTile
    ratio = 2.0 / 3.0
    TargetChunkPixels = Tiles*scipy.int_(PixPerTile*ratio)
    ComparePixels = Tiles*scipy.int_(ComparePixPerTile*ratio)
#%% adjust the image to have the correct shape (aspect ratio) for turning it into a mosaic
    UnscaledWidth = (TargetSize[1]*Tiles[0])/Tiles[1]# the width of the original size image to yield the correct aspect ratio
    CropMargin = (TargetSize[0] - UnscaledWidth)/2
    CroppedImg = TargetImg.transform((ComparePixels[0],ComparePixels[1]), Image.EXTENT, (CropMargin,0, CropMargin+UnscaledWidth,TargetImg.size[1]))
    CroppedArr = color.rgb2lab(scipy.array(CroppedImg))

#%% send each placer some parameters
    placerPars = {'TilesPerNode': TilesPerNode, 'UnscaledWidth': UnscaledWidth, 
                  'Tiles': Tiles, 'pm': pm, 'iters': iters, 'PixPerTile': PixPerTile, 'ComparePixPerTile' : ComparePixPerTile}
    for placer in range(NPlacers):
        comm.send(placerPars, dest=1+NScrapers+placer, tag=0)
    #print "M{}: < init".format(rank) 
    #print "M{}: > dividing image".format(rank) 
#%% reduce CroppedArr to NPlacers NodeArrs
    NodeArrs = scipy.split(CroppedArr, NPlacers, axis=0) 

#%% send each of the placers its piece of the picture
    for placer in range(NPlacers):
        comm.send(NodeArrs[placer], dest=1+NScrapers+placer, tag=1)

    #%% create the final image and divide it into pieces for the placers to FinalArr = CroppedArr.copy()
    # now the division has to be accurate!
    FinalArr = scipy.zeros((TargetChunkPixels[1], TargetChunkPixels[0], 3), dtype='i')
    # FinalArr = scipy.zeros((Tiles[1]*PixPerTile[1], Tiles[0]*PixPerTile[0], 3), dtype='i')
    NodeFinalArrs = scipy.split(FinalArr, NPlacers, axis=0)
    #print "M{}: < dividing image".format(rank) 

#%% listen to the placers' intermediate results
    tempNodeFinalArr = NodeFinalArrs[0].copy() # for receiving the data, before it is known whence it came
    for it in range(iters):
        #print "M{}: > not listening to the placer to scraper broadcast".format(rank) 
        dummy_arrs = scipy.zeros((per_page, PixPerTile[1], PixPerTile[0], 3), dtype=scipy.uint8)
        for scraper in range(1, 1+NScrapers):
            #print "M{}: not listening to scraper {}".format(rank, scraper)
            comm.Bcast(dummy_arrs, root=scraper)
        #print "M{}: < not listening to the placer to scraper broadcast".format(rank) 
        #print "M{}: now listening for placer results at iter {} out of {}".format(rank, it, iters)
        #print "M{}: > listening for results".format(rank) 
        logger.write('Listening for placers', status=plogger.RECEIVING)
        for p in range(NPlacers): # listen for the placers
            #print "M{}: NodeFinalArrs[{}] has shape ".format(rank, placer), NodeFinalArrs[placer].shape
            #print "M{}: NodeFinalArrs[{}] has type ".format(rank, placer), type(NodeFinalArrs[placer][0,0,0])
            comm.Recv([tempNodeFinalArr, MPI.INT], source=MPI.ANY_SOURCE, tag=4, status=status)
            placer = status.Get_source()
            NodeFinalArrs[placer-(1+NScrapers)][:,:,:] = tempNodeFinalArr
        #print "M{}: < listening for results".format(rank) 
        #print "M{}: > writing image".format(rank) 
        partial_filename = 'output/mosaic_{}.png'.format(it)
        FinalImg = Image.fromarray(scipy.array(FinalArr, dtype=scipy.uint8), 'RGB')
        FinalImg.save(partial_filename) # for fewer output images
        # Notify gui
        #print "M{}: < writing image at iter {}".format(rank, it)
    writepars = pars.copy()

    strrep = '_'.join(['{}{:d}'.format(item, value) for item, value in sorted(writepars.items())])
    final_filename = 'output/final{}_{}.png'.format(strrep, int(time.time()))
    os.chmod(final_filename, 0744)
    print "M{}: Final image saved".format(rank)

    shutil.copy('log', 'output/log_'+strrep)

    # email result
    if (data != None):
        msg = MIMEMultipart()
        msg['Subject'] = "KU Leuven openbedrijvendag - uw mozaiek"
        msg['From'] = "SuperPi <*****@*****.**>"
        msg['To'] = data['email']
        fp = open(final_filename, 'rb')
        img = MIMEImage(fp.read())
        s = smtplib.SMTP('mail4.cs.kuleuven.be')
        s.sendmail('*****@*****.**', [msg['To']], msg.as_string())
#%% signal completion
    logger.write('Finished', status=plogger.FINISHED)
Пример #23
# from https://stackoverflow.com/questions/6620471/fitting-empirical-distribution-to-theoretical-ones-with-scipy-python
# Saullo's answer
import matplotlib.pyplot as plt
import scipy
import scipy.stats
size = 20000
x = scipy.arange(size)
# creating the dummy sample (using beta distribution)
y = scipy.int_(scipy.round_(scipy.stats.beta.rvs(6,2,size=size)*47))
# creating the histogram
h = plt.hist(y, bins=range(48), density=True)

dist_names = ['alpha', 'beta', 'arcsine',
              'weibull_min', 'weibull_max', 'rayleigh']

for dist_name in dist_names:
    dist = getattr(scipy.stats, dist_name)
    param = dist.fit(y)
    pdf_fitted = dist.pdf(x, *param[:-2], loc=param[-2], scale=param[-1])
    plt.plot(pdf_fitted, label=dist_name)
plt.legend(loc='upper left')
#NB : from scipy.stats._continuous_distns import _distn_names #all dist names

#fit() method mentioned by @Saullo Castro provides maximum likelihood estimates (MLE). The best distribution for your data is the one give you the highest can be determined by several different ways: such as
#1, the one that gives you the highest log likelihood.
#2, the one that gives you the smallest AIC, BIC or BICc values (see wiki: http://en.wikipedia.org/wiki/Akaike_information_criterion, basically can be viewed as log likelihood adjusted for number of parameters, as distribution with more parameters are expected to fit better)
#3, the one that maximize the Bayesian posterior probability. (see wiki: http://en.wikipedia.org/wiki/Posterior_probability)
Пример #24
    Those are the same as python types and may be interchanged.

    Fixed widths:

    - int32 	Integer (-2147483648 to 2147483647)
    - uint32 	Unsigned integer (0 to 4294967295)
    - float32 	Single precision float: sign bit, 8 bits exponent, 23 bits mantissa
    - float64 	Double precision float: sign bit, 11 bits exponent, 52 bits mantissa
    - complex64 	Complex number, represented by two 32-bit floats (real and imaginary components)

    Those have fixed width on all systems, and may not be compatible with the python types.

    assert sp.array_equal(
        sp.array([1, 2, 3], dtype = sp.int_),
        sp.int_([1, 2, 3])

    # Different types evaluate to equal sp.arrays

    assert sp.array_equal(
        sp.array([1, 2, 3], dtype = sp.int_  ),
        sp.array([1, 2, 3], dtype = sp.float_)

    # Get type

    v = sp.array([1,2], dtype = sp.int32)
    assert v.dtype == sp.int32

    # Subtype:
Пример #25
    # _hyperparams['covar'] = _logtheta


    import pygp.plot.gpr_plot as gpr_plot
    first = True
    [M, S] = gpr_opt_hyper.predict(opt_model_params, X)
    gpr_plot.plot_sausage(X, M[0], SP.sqrt(S[0]))
    gpr_plot.plot_sausage(X, M[1], SP.sqrt(S[1]))
    gpr_plot.plot_training_data(x1, C[1], replicate_indices=x1_rep.reshape(-1))
    gpr_plot.plot_training_data(x2, T[1], replicate_indices=x2_rep.reshape(-1))
#    norm = PL.Normalize()

    break_lml = []
    plots = SP.int_(SP.sqrt(24) + 1)
    for i, BP in enumerate(x1[0,:]):
        _hyper = copy.deepcopy(opt_model_params)
        _logtheta = _hyper['covar']
        _logtheta = SP.concatenate((_logtheta, [BP, 10]))#SP.var(y[:,i])]))
        _hyper['covar'] = _logtheta

        priors_BP[3] = [lnpriors.lnGauss, [BP, 3]]
#        [opt_model_params,opt_lml] = opt_hyper(gpr_BP,_hyper,priors=priors_BP,gradcheck=False,Ifilter=Ifilter_BP)
            break_lml.append(gpr_BP.LML(_hyper, priors_BP))
Пример #26
    Those are the same as python types and may be interchanged.

    Fixed widths:

    - int32 	Integer (-2147483648 to 2147483647)
    - uint32 	Unsigned integer (0 to 4294967295)
    - float32 	Single precision float: sign bit, 8 bits exponent, 23 bits mantissa
    - float64 	Double precision float: sign bit, 11 bits exponent, 52 bits mantissa
    - complex64 	Complex number, represented by two 32-bit floats (real and imaginary components)

    Those have fixed width on all systems, and may not be compatible with the python types.

    assert sp.array_equal(sp.array([1, 2, 3], dtype=sp.int_),
                          sp.int_([1, 2, 3]))

    # Different types evaluate to equal sp.arrays

    assert sp.array_equal(sp.array([1, 2, 3], dtype=sp.int_),
                          sp.array([1, 2, 3], dtype=sp.float_))

    # Get type

    v = sp.array([1, 2], dtype=sp.int32)
    assert v.dtype == sp.int32

    # Subtype:

    sp.issubdtype(sp.int32, sp.int_)
Пример #27
def get_ancestors(node_ind, node, parents):
    ancestors = SP.empty(SP.int_(SP.floor(SP.log2(node+1))), dtype='int')
    for i in SP.arange(ancestors.size):
        node_ind = parents[node_ind]
        ancestors[i] = node_ind
    return ancestors