Пример #1
0
 def get_num_samples(self, idx):
     """
     Number of samples needed to estimate the population variance within the tolerance limit
     Sample variance is normally distributed http://stats.stackexchange.com/a/105338/71884
     (see warning below).
     Var(s^2) /approx 1/n * (\mu_4 - \sigma^4)
     Adjust n as per the tolerance needed to estimate the sample variance
     warning: does not work for some distributions like bernoulli - https://stats.stackexchange.com/a/104911
     use the min_samples for explicitly controlling the number of samples to be drawn
     """
     if self.min_samples:
         return self.min_samples
     min_samples = 1000
     tol = 10.0
     required_precision = self.prec / tol
     if not self.scipy_dist:
         return min_samples
     args, kwargs = self.scipy_arg_fn(**self.get_dist_params(idx, wrap_tensor=False))
     try:
         fourth_moment = np.max(self.scipy_dist.moment(4, *args, **kwargs))
         var = np.max(self.scipy_dist.var(*args, **kwargs))
         min_computed_samples = int(math.ceil((fourth_moment - math.pow(var, 2)) / required_precision))
     except (AttributeError, ValueError):
         return min_samples
     return max(min_samples, min_computed_samples)
Пример #2
0
def report_statistics(id_sub, stats):
    records = stats['records']
    distance = records['distance']
    delta = records['delta']
    order = scale_score(distance)
    order = order / float(order.size)

    r = Report('stats-%s' % id_sub)
    r.data('records', records)
    f = r.figure()
    
    with f.plot('scatter') as pylab:
        pylab.scatter(delta, distance)
        pylab.xlabel('delta')
        pylab.ylabel('distance')
        pylab.axis((-1, np.max(delta) + 1, -0.05, np.max(distance)))
        
    with f.plot('with_stats', **dp_predstats_fig) as pylab:
        fancy_error_display(pylab, delta, distance, 'g')

    with f.plot('distance_order', **dp_predstats_fig) as pylab:
        fancy_error_display(pylab, delta, order, color='k')
        
    f = r.figure(cols=1)        
    bins = np.linspace(0, np.max(distance), 100)
    for i, d in enumerate(set(delta)):
        with f.plot('conditional%d' % i) as pylab:
            which = delta == d
            pylab.hist(distance[which], bins)

    return r
Пример #3
0
def beta_limiter(r,cfl,theta=0.95,beta=0.66666666666666666):
    r"""
    Modification of CFL Superbee limiter with theta and beta parameters
    
    Additional Input:
     - *theta*
     - *beta*
    """
    a = np.empty((2,len(r)))
    b = np.zeros((2,len(r)))
    
    a[0,:] = 0.001
    a[1,:] = cfl
    cfmod1 = np.max(a,axis=0)
    a[0,:] = 0.999
    cfmod2 = np.min(a,axis=0)
    
    s1 = theta * 2.0 / cfmod1
    s2 = (1.0 + cfl) / 3.0
    phimax = theta * 2.0 / (1.0 - cfmod2)
    
    a[0,:] = s1*r
    a[1,:] = phimax
    b[1,:] = np.min(a)
    ultra = np.max(b)
    
    a[0,:] = 1.0 + (s2 - beta/2.0) * (r-1.0)
    a[1,:] = 1.0 + (s2 + beta/2.0) * (r-1.0)
    b[0,:] = ultra
    b[1,:] = np.max(a)
    a[0,:] = 0.0
    a[1,:] = np.min(b)
    
    return np.max(a)
Пример #4
0
    def _get_initial_classes(self):
        images = map(lambda f: cv2.imread(path.join(self._root, f)), self._files)
        self._avg_pixels = np.array([], dtype=np.uint8)

        # extract parts from each image for all of our 6 categories
        for i in range(0, self._n_objects):
            rects = self._rects[:, i]
            
            # compute maximum rectangle
            rows = np.max(rects['f2'] - rects['f0'])
            cols = np.max(rects['f3'] - rects['f1'])

            # extract annotated rectangles
            im_rects = map(lambda (im, r): im[r[0]:r[2],r[1]:r[3],:], zip(images, rects))

            # resize all rectangles to the max size & average all the rectangles
            im_rects = np.array(map(lambda im: cv2.resize(im, (cols, rows)), im_rects), dtype=np.float)
            avgs = np.around(np.average(im_rects, axis = 0))

            # average the resulting rectangle to compute 
            mn = np.around(np.array(cv2.mean(avgs), dtype='float'))[:-1].astype('uint8')

            if(self._avg_pixels.size == 0):
                self._avg_pixels = mn
            else:
                self._avg_pixels = np.vstack((self._avg_pixels, mn))
Пример #5
0
def cada_torrilhon_limiter(r,cfl,epsilon=1.0e-3):
    r"""
    Cada-Torrilhon modified
    
    Additional Input:
     - *epsilon* = 
    """
    a = np.ones((2,len(r))) * 0.95
    b = np.empty((3,len(r)))

    a[0,:] = cfl
    cfl = np.min(a)
    a[1,:] = 0.05
    cfl = np.max(a)
    
    # Multiply all parts except b[0,:] by (1.0 - epsilon) as well
    b[0,:] = 1.0 + (1+cfl) / 3.0 * (r - 1)
    b[1,:] = 2.0 * np.abs(r) / (cfl + epsilon)
    b[2,:] = (8.0 - 2.0 * cfl) / (np.abs(r) * (cfl - 1.0 - epsilon)**2)
    b[1,::2] *= (1.0 - epsilon)
    a[0,:] = np.min(b)
    a[1,:] = (-2.0 * (cfl**2 - 3.0 * cfl + 8.0) * (1.0-epsilon)
                    / (np.abs(r) * (cfl**3 - cfl**2 - cfl + 1.0 + epsilon)))
    
    return np.max(a)
Пример #6
0
def theta_limiter(r,cfl,theta=0.95):
    r"""
    Theta limiter
    
    Additional Input:
     - *theta* =
    """
    a = np.empty((2,len(r)))
    b = np.empty((3,len(r)))
    
    a[0,:] = 0.001
    a[1,:] = cfl
    cfmod1 = np.max(a,axis=0)
    a[0,:] = 0.999
    cfmod2 = np.min(a,axis=0)
    s1 = 2.0 / cfmod1
    s2 = (1.0 + cfl) / 3.0
    phimax = 2.0 / (1.0 - cfmod2)
    
    a[0,:] = (1.0 - theta) * s1
    a[1,:] = 1.0 + s2 * (r - 1.0)
    left = np.max(a,axis=0)
    a[0,:] = (1.0 - theta) * phimax * r
    a[1,:] = theta * s1 * r
    middle = np.max(a,axis=0)
    
    b[0,:] = left
    b[1,:] = middle
    b[2,:] = theta*phimax
    
    return np.min(b,axis=0)
Пример #7
0
def cfl_superbee_theta(r,cfl,theta=0.95):
    r"""
    CFL-Superbee (Roe's Ultrabee) with theta parameter
    """
    a = np.empty((2,len(r)))
    b = np.zeros((2,len(r)))
    
    a[0,:] = 0.001
    a[1,:] = cfl
    cfmod1 = np.max(a,axis=0)
    a[0,:] = 0.999
    cfmod2 = np.min(a,axis=0)

    s1 = theta * 2.0 / cfmod1
    phimax = theta * 2.0 / (1.0 - cfmod2)

    a[0,:] = s1*r
    a[1,:] = phimax
    b[1,:] = np.min(a,axis=0)
    ultra = np.max(b,axis=0)
    
    a[0,:] = ultra
    b[0,:] = 1.0
    b[1,:] = r
    a[1,:] = np.max(b,axis=0)
    return np.min(a,axis=0)
Пример #8
0
    def work(self, **kwargs):
        self.__dict__.update(kwargs)
        self.worked = True
        samples = LGMM1(rng=self.rng,
                size=(self.n_samples,),
                **self.LGMM1_kwargs)
        samples = np.sort(samples)
        edges = samples[::self.samples_per_bin]
        centers = .5 * edges[:-1] + .5 * edges[1:]
        print edges

        pdf = np.exp(LGMM1_lpdf(centers, **self.LGMM1_kwargs))
        dx = edges[1:] - edges[:-1]
        y = 1 / dx / len(dx)

        if self.show:
            plt.scatter(centers, y)
            plt.plot(centers, pdf)
            plt.show()
        err = (pdf - y) ** 2
        print np.max(err)
        print np.mean(err)
        print np.median(err)
        if not self.show:
            assert np.max(err) < .1
            assert np.mean(err) < .01
            assert np.median(err) < .01
def Haffine_from_points(fp, tp):
    '''计算仿射变换的单应性矩阵H,使得tp是由fp经过仿射变换得到的'''
    if fp.shape != tp.shape:
        raise RuntimeError('number of points do not match')

    # 对点进行归一化
    # 映射起始点
    m = numpy.mean(fp[:2], axis=1)
    maxstd = numpy.max(numpy.std(fp[:2], axis=1)) + 1e-9
    C1 = numpy.diag([1/maxstd, 1/maxstd, 1])
    C1[0, 2] = -m[0] / maxstd
    C1[1, 2] = -m[1] / maxstd
    fp_cond = numpy.dot(C1, fp)

    # 映射对应点
    m = numpy.mean(tp[:2], axis=1)
    maxstd = numpy.max(numpy.std(tp[:2], axis=1)) + 1e-9
    C2 = numpy.diag([1/maxstd, 1/maxstd, 1])
    C2[0, 2] = -m[0] / maxstd
    C2[1, 2] = -m[1] / maxstd
    tp_cond = numpy.dot(C2, tp)

    # 因为归一化之后点的均值为0,所以平移量为0
    A = numpy.concatenate((fp_cond[:2], tp_cond[:2]), axis=0)
    U, S, V = numpy.linalg.svd(A.T)
    # 创建矩阵B和C
    tmp = V[:2].T
    B = tmp[:2]
    C = tmp[2:4]

    tmp2 = numpy.concatenate((numpy.dot(C, numpy.linalg.pinv(B)), numpy.zeros((2, 1))), axis=1)
    H = numpy.vstack((tmp2, [0, 0, 1]))

    H = numpy.dot(numpy.linalg.inv(C2), numpy.dot(H, C1))  # 反归一化
    return H / H[2, 2]  # 归一化,然后返回
Пример #10
0
    def set_hard_hard_constraints(self, tdata1, tdata2, seeds):
        tdata1[seeds==2] = np.max(tdata1) + 1
        tdata2[seeds==1] = np.max(tdata2) + 1
        tdata1[seeds==1] = 0
        tdata2[seeds==2] = 0

        return tdata1, tdata2
Пример #11
0
    def work(self):
        self.worked = True
        kwargs = dict(
                weights=self.weights,
                mus=self.mus,
                sigmas=self.sigmas,
                low=self.low,
                high=self.high,
                q=self.q,
                )
        samples = GMM1(rng=self.rng,
                size=(self.n_samples,),
                **kwargs)
        samples = np.sort(samples)
        edges = samples[::self.samples_per_bin]
        #print samples

        pdf = np.exp(GMM1_lpdf(edges[:-1], **kwargs))
        dx = edges[1:] - edges[:-1]
        y = 1 / dx / len(dx)

        if self.show:
            plt.scatter(edges[:-1], y)
            plt.plot(edges[:-1], pdf)
            plt.show()
        err = (pdf - y) ** 2
        print np.max(err)
        print np.mean(err)
        print np.median(err)
        if not self.show:
            assert np.max(err) < .1
            assert np.mean(err) < .01
            assert np.median(err) < .01
Пример #12
0
def draw_ohl_graph(ax, data):

# sort data along args.x_column and make it np.array again
  all_data = sorted(data, key=itemgetter(args.x_column))
  scores = list({e[0] for e in all_data})
  scores.sort()
  print("scores=", scores)

  np_all_data = np.array(all_data)
  all_x = np_all_data[:, args.x_column]
  all_y = np_all_data[:, args.y_column]
  x_max = np.max(all_x)
  x_min = np.min(all_x)
  y_max = np.max(all_y)
  y_min = np.min(all_y)
  # print("ymax=", y_max, "ymin=", y_min)
  y_width = y_max - y_min
  if y_width == 0:
    if y_max == 0:
      y_width = 1.0
    else:
      y_min = 0
      y_width = y_max
  ax.set_xlim(xmax = x_max / args.scale)
  ax.set_xlim(xmin = 0)
  ax.set_ylim(ymax = y_max + y_width * 0.05)
  ax.set_ylim(ymin = y_min - y_width * 0.05)

  for score in scores:
    # print("score=", score)
    data = list(filter(lambda e: e[0] == score, all_data))

    data = np.array(data)

    x = data[:, args.x_column]
    y = data[:, args.y_column]
    x = x / args.scale

    ans = args.ans
    if len(data) < 5:
      ax.plot(x, y, '.', label=str(score))
      continue
    elif len(data) * 0.1 < args.ans:
      ans = int(len(data) * 0.1)
      if ans < 4:
        ans = 4
    # print("ans=", ans)

    weight = np.ones(ans, dtype=np.float)/ans
    y_average = np.convolve(y, weight, 'valid')
    rim = ans - 1
    rim_l = rim // 2
    rim_r = rim - rim_l
    ax.plot(x[rim_l:-rim_r], y_average, label=str(score))

  ax.legend(loc=2)
  ax.set_xlabel(args.xlabel)
  ax.set_ylabel(args.ylabel)

  ax.grid(linewidth=1, linestyle="-", alpha=0.1)
Пример #13
0
    def _crinfo_from_specific_data (self, data, margin):
# hledáme automatický ořez, nonzero dá indexy
        nzi = np.nonzero(data)

        x1 = np.min(nzi[0]) - margin[0]
        x2 = np.max(nzi[0]) + margin[0] + 1
        y1 = np.min(nzi[1]) - margin[0]
        y2 = np.max(nzi[1]) + margin[0] + 1
        z1 = np.min(nzi[2]) - margin[0]
        z2 = np.max(nzi[2]) + margin[0] + 1 

# ošetření mezí polí
        if x1 < 0:
            x1 = 0
        if y1 < 0:
            y1 = 0
        if z1 < 0:
            z1 = 0

        if x2 > data.shape[0]:
            x2 = data.shape[0]-1
        if y2 > data.shape[1]:
            y2 = data.shape[1]-1
        if z2 > data.shape[2]:
            z2 = data.shape[2]-1

# ořez
        crinfo = [[x1, x2],[y1,y2],[z1,z2]]
        #dataout = self._crop(data,crinfo)
        #dataout = data[x1:x2, y1:y2, z1:z2]
        return crinfo
Пример #14
0
    def zplane(self, title="", fontsize=18):
        """ Display filter in the complex plane

        Parameters
        ----------

        """
        rb = self.z
        ra = self.p

        t = np.arange(0, 2 * np.pi + 0.1, 0.1)
        plt.plot(np.cos(t), np.sin(t), "k")

        plt.plot(np.real(ra), np.imag(ra), "x", color="r")
        plt.plot(np.real(rb), np.imag(rb), "o", color="b")
        M1 = -10000
        M2 = -10000
        if len(ra) > 0:
            M1 = np.max([np.abs(np.real(ra)), np.abs(np.imag(ra))])
        if len(rb) > 0:
            M2 = np.max([np.abs(np.real(rb)), np.abs(np.imag(rb))])
        M = 1.6 * max(1.2, M1, M2)
        plt.axis([-M, M, -0.7 * M, 0.7 * M])
        plt.title(title, fontsize=fontsize)
        plt.show()
Пример #15
0
def viterbi_decode(score, transition_params):
  """Decode the highest scoring sequence of tags outside of TensorFlow.

  This should only be used at test time.

  Args:
    score: A [seq_len, num_tags] matrix of unary potentials.
    transition_params: A [num_tags, num_tags] matrix of binary potentials.

  Returns:
    viterbi: A [seq_len] list of integers containing the highest scoring tag
        indicies.
    viterbi_score: A float containing the score for the Viterbi sequence.
  """
  trellis = np.zeros_like(score)
  backpointers = np.zeros_like(score, dtype=np.int32)
  trellis[0] = score[0]

  for t in range(1, score.shape[0]):
    v = np.expand_dims(trellis[t - 1], 1) + transition_params
    trellis[t] = score[t] + np.max(v, 0)
    backpointers[t] = np.argmax(v, 0)

  viterbi = [np.argmax(trellis[-1])]
  for bp in reversed(backpointers[1:]):
    viterbi.append(bp[viterbi[-1]])
  viterbi.reverse()

  viterbi_score = np.max(trellis[-1])
  return viterbi, viterbi_score
Пример #16
0
def scatter(x, y, equal=False, xlabel=None, ylabel=None, xinvert=False, yinvert=False):
    """
    Plot a scatter with simple formatting options
    """
    plt.scatter(x, y, 200, color=[0.3, 0.3, 0.3], edgecolors="white", linewidth=1, zorder=2)
    sns.despine()
    if xlabel:
        plt.xlabel(xlabel)
    if ylabel:
        plt.ylabel(ylabel)
    if equal:
        plt.axes().set_aspect("equal")
        plt.plot([0, max([x.max(), y.max()])], [0, max([x.max(), y.max()])], color=[0.6, 0.6, 0.6], zorder=1)
        bmin = min([x.min(), y.min()])
        bmax = max([x.max(), y.max()])
        rng = abs(bmax - bmin)
        plt.xlim([bmin - rng * 0.05, bmax + rng * 0.05])
        plt.ylim([bmin - rng * 0.05, bmax + rng * 0.05])
    else:
        xrng = abs(x.max() - x.min())
        yrng = abs(y.max() - y.min())
        plt.xlim([x.min() - xrng * 0.05, x.max() + xrng * 0.05])
        plt.ylim([y.min() - yrng * 0.05, y.max() + yrng * 0.05])
    if xinvert:
        plt.gca().invert_xaxis()
    if yinvert:
        plt.gca().invert_yaxis()
Пример #17
0
def plot3d(X, Y, Z, point, zlim=None, ax=None, fig=None, xylabelsize=33):
    # Plot
    from matplotlib import cm
    if fig is None:
        fig = plt.figure()
    if ax is None:
        ax = fig.add_subplot(111, projection='3d')
    z_min = np.min(Z) - np.max(Z)/2
    ax.plot_surface(X, Y, Z, rstride=10, cstride=10,
                    #vmin=Z.min(), vmax=Z.max(),
                    cmap=cm.coolwarm,
                    linewidth=1, antialiased=True)
    cset = ax.contourf(X, Y, Z, zdir='z', offset=z_min,
                       #norm=colors.LogNorm(vmin=Z.min(), vmax=Z.max()),
                       cmap=cm.coolwarm)
    argmin = X.ravel()[Z.argmin()], Y.ravel()[Z.argmin()]
    print("argmin", argmin)
    # add point and cross at defined point
    ax.plot([point[0]], [point[1]], 'wo', zs=[z_min], ms=20)
    ax.plot([X.min(), X.max()], [point[1], point[1]], '--w', zs=[z_min, z_min], linewidth=2.0)
    ax.plot([point[0], point[0]], [Y.min(), Y.max()], '--w', zs=[z_min, z_min], linewidth=2.0)
    # add point and cross at argmin
    ax.plot([argmin[0]], [argmin[1]], 'o', color='k', zs=[z_min], ms=20)
    ax.plot([X.min(), X.max()], [argmin[1], argmin[1]], '--', color='k', zs=[z_min, z_min], linewidth=2.0)
    ax.plot([argmin[0], argmin[0]], [Y.min(), Y.max()], '--', color='k', zs=[z_min, z_min], linewidth=2.0)
    #ax.text(argmin[0], argmin[1], z_min, ".  (%.3f, %.3f)" % argmin)
    ax.set_xlabel(r'$\beta_1$', size=xylabelsize)
    ax.set_ylabel(r'$\beta_2$', size=xylabelsize)
    #ax.set_zlabel(r'Error', size=xylabelsize)
    ax.set_zlim(z_min, np.max(Z))
    return ax, z_min, argmin
Пример #18
0
def test_zernike_get_opd():
    zernike_optic = wfe.ZernikeWFE(coefficients=[NWAVES * WAVELENGTH,], radius=RADIUS)
    opd_map = zernike_optic.get_opd(WAVELENGTH, units='meters')
    assert np.max(opd_map) == NWAVES * WAVELENGTH

    opd_map_waves = zernike_optic.get_opd(WAVELENGTH, units='waves')
    assert np.max(opd_map_waves) == NWAVES
Пример #19
0
  def testEncodeUnrelatedAreas(self):
    """
    assert unrelated areas don"t share bits
    (outside of chance collisions)
    """
    avgThreshold = 0.3

    maxThreshold = 0.12
    overlaps = overlapsForUnrelatedAreas(1499, 37, 5)
    self.assertLess(np.max(overlaps), maxThreshold)
    self.assertLess(np.average(overlaps), avgThreshold)

    maxThreshold = 0.12
    overlaps = overlapsForUnrelatedAreas(1499, 37, 10)
    self.assertLess(np.max(overlaps), maxThreshold)
    self.assertLess(np.average(overlaps), avgThreshold)

    maxThreshold = 0.17
    overlaps = overlapsForUnrelatedAreas(999, 25, 10)
    self.assertLess(np.max(overlaps), maxThreshold)
    self.assertLess(np.average(overlaps), avgThreshold)

    maxThreshold = 0.25
    overlaps = overlapsForUnrelatedAreas(499, 13, 10)
    self.assertLess(np.max(overlaps), maxThreshold)
    self.assertLess(np.average(overlaps), avgThreshold)
Пример #20
0
 def update_im_clim(self, val, im, slider):
     if np.mean(self.data[self.frame_slice]) < 0:
         self.im.set_clim(np.min(self.data[self.frame_slice]) * (self.sliders[-1]._slider.val / 100),
                          np.max(self.data[self.frame_slice]) * (self.sliders[-2]._slider.val / 100))
     else:
         self.im.set_clim(np.max(self.data[self.frame_slice]) * (self.sliders[-1]._slider.val / 100),
                          np.max(self.data[self.frame_slice]) * (self.sliders[-2]._slider.val / 100))
Пример #21
0
 def max(self, axis=None, out=None, keepdims=False):
     self._prepare_out(out=out)
     try:
         value = np.max(self.value, axis=axis, out=out, keepdims=keepdims)
     except:  # numpy < 1.7
         value = np.max(self.value, axis=axis, out=out)
     return self._new_view(value)
Пример #22
0
    def quantify(self):
        """Quantify shape of the contours."""
        four_pi = 4. * np.pi
        for edge in self.edges:
            # Positions
            x = edge['x']
            y = edge['y']

            A, perimeter, x_center, y_center, distances = \
                self.get_shape_factor(x, y)

            # Set values.
            edge['area'] = A
            edge['perimeter'] = perimeter
            edge['x_center'] = x_center
            edge['y_center'] = y_center
            # Circle is 1. Rectangle is 0.78. Thread-like is close to zero.
            edge['shape_factor'] = four_pi * edge['area'] / \
                                   edge['perimeter'] ** 2.

            # We assume that the radius of the edge
            # as the median value of the distances from the center.
            radius = np.median(distances)
            edge['radius_deviation'] = np.std(distances - radius) / radius

            edge['x_min'] = np.min(x)
            edge['x_max'] = np.max(x)
            edge['y_min'] = np.min(y)
            edge['y_max'] = np.max(y)
Пример #23
0
	def get_batch(self, model, batch_size):
		len_memory = len(self.memory)
		num_actions = 6
		encouraged_actions = np.zeros(num_actions, dtype=np.int)
		predicted_actions = np.zeros(num_actions, dtype=np.int)
		inputs = np.zeros((min(len_memory, batch_size), 4, 80, 74))
		targets = np.zeros((inputs.shape[0], num_actions))
		q_list = np.zeros(inputs.shape[0])
		for i, idx in enumerate(np.random.randint(0, len_memory, size=inputs.shape[0])):
			input_t, action_t, reward_t, input_tp1 = self.memory[idx][0]
			terminal = self.memory[idx][1]

			inputs[i] = input_t

			targets[i] = model.predict(input_t.reshape(1, 4, 80, 74))[0]
			q_next = np.max(model.predict(input_tp1.reshape(1, 4, 80, 74))[0])

			q_list[i] = np.max(targets[i])
			predicted_actions[np.argmax(targets[i])] += 1

			targets[i, action_t] =  (1. - terminal) * self.discount * q_next + reward_t

			if reward_t > 0. or terminal:
				print "Action %d rewarded with %f (sample #%d)"%(action_t, targets[i, action_t], idx)

			encouraged_actions[np.argmax(targets[i])] += 1

		return inputs, targets, encouraged_actions, predicted_actions, np.average(q_list)
Пример #24
0
def grid_xyz(xyz, n_x, n_y, **kwargs):
    """ Grid data as a list of X,Y,Z coords into a 2D array

    Parameters
    ----------
    xyz: np.array
        Numpy array of X,Y,Z values, with shape (n_points, 3)
    n_x: int
        Number of points in x direction (fastest varying!)
    n_y: int
        Number of points in y direction

    Returns
    -------
    gridded_data: np.array
        2D array of gridded data, with shape (n_x, n_y)

    Notes
    -----
    'x' is the inner dimension, i.e. image dimensions are (n_y, n_x). This is
    counterintuitive (to me at least) but in line with numpy definitions.
    """
    x, y, z = xyz[:, 0], xyz[:, 1], xyz[:, 2]
    x_ax = np.linspace(np.min(x), np.max(x), n_x)
    y_ax = np.linspace(np.min(y), np.max(y), n_y)

    xg, yg = np.meshgrid(x_ax, y_ax)

    data = griddata(xyz[:, :2], z, (xg, yg), **kwargs)
    return data    
Пример #25
0
def makeThresholdMap(image, findCars, scales=[1.5], percentOfHeapmapToToss=.5):
    print("scales:", scales, ", type:", type(scales), "image.shape:", image.shape, ", dtype:", image.dtype, ", percentOfHeapmapToToss:", percentOfHeapmapToToss)
    boundingBoxList=[]
    boundingBoxWeights=[]
    for scale in scales:
        listOfBoundingBoxes, listOfWeights = findCars(image, scale)
        boundingBoxList+=listOfBoundingBoxes
        boundingBoxWeights+=listOfWeights

    if USEBOUNDINGBOXWEIGHTS:
        unNormalizedHeatMap=addWeightedHeat(image.shape, boundingBoxList, boundingBoxWeights)
    else:
        unNormalizedHeatMap=addHeat(image.shape, boundingBoxList)

    if USESTACKOFHEATMAPS:
        unNormalizedHeatMap,_=totalHeatmapStack(unNormalizedHeatMap)


    unNormalizedHeatMapCounts=np.unique(unNormalizedHeatMap, return_counts=True)
    if TESTING: print("makeThresholdMap-unNormalizedHeatMapCounts:", unNormalizedHeatMapCounts, ", len(unNormalizedHeatMapCounts):", len(unNormalizedHeatMapCounts), ", len(unNormalizedHeatMapCounts[0]):", len(unNormalizedHeatMapCounts[0]))
    unNormalizedHeatMapMidpoint=unNormalizedHeatMapCounts[0][int(round(len(unNormalizedHeatMapCounts[0])*percentOfHeapmapToToss))]
    thresholdMap=applyThreshold(unNormalizedHeatMap, unNormalizedHeatMapMidpoint)
    print("makeThresholdMap-max(thresholdMap):", np.max(thresholdMap), ", min(thresholdMap):", np.min(thresholdMap))
    if TESTING: print("makeThresholdMap-thresholdMap counts:", (np.unique(thresholdMap, return_counts=True)), ", len(thresholdMap):", len(thresholdMap), ", len(thresholdMap[0]):", len(thresholdMap[0]))
    normalizedMap=normalizeMap(thresholdMap)
    if TESTING: print("makeThresholdMap-normalizedMap counts:", (np.unique(normalizedMap, return_counts=True)), ", len(normalizedMap):", len(normalizedMap), ", len(normalizedMap[0]):", len(normalizedMap[0]))
    print("makeThresholdMap-max(normalizedMap):", np.max(normalizedMap), ", min(normalizedMap):", np.min(normalizedMap))
    return normalizedMap, boundingBoxList, unNormalizedHeatMap, boundingBoxWeights
Пример #26
0
Файл: gmm.py Проект: kslin/CS181
def gm_assign_to_cluster(X, center_list, cov_list, p_k):
    """Assigns each sample to one of the Gaussian clusters given.
    
    Returns an array with numbers, 0 corresponding to the first cluster in the
    cluster list.
    """
    # Reused code from E-step, should be unified somehow:
    samples = X.shape[0]
    K = len(center_list)
    log_p_Xn_mat = np.zeros((samples, K))
    for k in range(K):
        log_p_Xn_mat[:, k] = logmulnormpdf(X, center_list[k], cov_list[k]) + np.log(p_k[k])
    pmax = np.max(log_p_Xn_mat, axis=1)
    log_p_Xn = pmax + np.log(np.sum(np.exp(log_p_Xn_mat.T - pmax), axis=0).T)
    logL = np.sum(log_p_Xn)

    log_p_nk = np.zeros((samples, K))
    for k in range(K):
        # log_p_nk[:,k] = logmulnormpdf(X, center_list[k], cov_list[k]) + np.log(p_k[k]) - log_p_Xn
        log_p_nk[:, k] = log_p_Xn_mat[:, k] - log_p_Xn

    print log_p_nk
    # Assign to cluster:
    maxP_k = np.c_[np.max(log_p_nk, axis=1)] == log_p_nk
    # print np.max(log_p_nk, axis=1)
    maxP_k = maxP_k * (np.array(range(K)) + 1)
    return np.sum(maxP_k, axis=1) - 1
Пример #27
0
    def diff_dist_matrix(self, res_range=None, scaled=False):
        if res_range != None: assert(len(res_range) == 2)
        
        dist_matrices = []
        for pdb in self.get_next_pdb():
            ca_xyz = pdb.get_ca_xyz_matrix()
            if res_range != None: ca_xyz = ca_xyz[res_range[0]-1:res_range[1], :]
            dist_matrix = calc_distance_matrix(ca_xyz)
            dist_matrices.append(dist_matrix)

        scaled_diff_dist_matrix = num.zeros(dist_matrices[0].shape, 'd')
        count = 0
        for i in range(len(dist_matrices)):
            for j in range(i+1, len(dist_matrices)):
                diff_dist_matrix = num.abs(dist_matrices[i] - dist_matrices[j])
                if scaled:
                    scale = num.max(diff_dist_matrix)
                    if scale == 0: continue
                    diff_dist_matrix /= scale
                scaled_diff_dist_matrix += diff_dist_matrix
                count += 1
        #print >> sys.stderr, count
        scaled_diff_dist_matrix /= count
        if scaled:
            scaled_diff_dist_matrix /= num.max(scaled_diff_dist_matrix)
        return scaled_diff_dist_matrix
Пример #28
0
 def max(self, axis=None, out=None, keepdims=False):
     self._prepare_out(out=out)
     try:
         value = np.max(self.value, axis=axis, out=out, keepdims=keepdims)
     except:  # numpy < 1.7
         value = np.max(self.value, axis=axis, out=out)
     return self.__quantity_instance__(value, self.unit, copy=False)
Пример #29
0
def get_spherical_bounding_box(lons, lats):
    """
    Given a collection of points find and return the bounding box,
    as a pair of longitudes and a pair of latitudes.

    Parameters define longitudes and latitudes of a point collection
    respectively in a form of lists or numpy arrays.

    :return:
        A tuple of four items. These items represent western, eastern,
        northern and southern borders of the bounding box respectively.
        Values are floats in decimal degrees.
    :raises ValueError:
        If points collection has the longitudinal extent of more than
        180 degrees (it is impossible to define a single hemisphere
        bound to poles that would contain the whole collection).
    """
    north, south = numpy.max(lats), numpy.min(lats)
    west, east = numpy.min(lons), numpy.max(lons)
    assert (-180 <= west <= 180) and (-180 <= east <= 180)
    if get_longitudinal_extent(west, east) < 0:
        # points are lying on both sides of the international date line
        # (meridian 180). the actual west longitude is the lowest positive
        # longitude and east one is the highest negative.
        west = min(lon for lon in lons if lon > 0)
        east = max(lon for lon in lons if lon < 0)
        if not all((get_longitudinal_extent(west, lon) >= 0
                    and get_longitudinal_extent(lon, east) >= 0)
                   for lon in lons):
            raise ValueError('points collection has longitudinal extent '
                             'wider than 180 deg')
    return west, east, north, south
Пример #30
0
def compare_objs(x, y):
    assert type(x) is type(y)
    if type(x) is dict:
        assert x.keys().sort() == y.keys().sort()
        for ky in x:
            compare_objs(x[ky], y[ky])
    elif type(x) is list:
        assert len(x) == len(y)
        for ind in range(len(x)):
            compare_objs(x[ind], y[ind])
    elif type(x) is np.ndarray:
        assert x.shape == y.shape
        if not np.allclose(x, y, atol=1.0e-5, rtol=0.0):
            x = x.reshape(x.size)
            y = y.reshape(y.size)
            dd = x - y
            worst_case = np.max(np.abs(dd))
            print "worst case abs diff = %e" % worst_case
            ind = np.where((x != 0) | (y != 0))
            rel_err = np.abs(np.divide(dd[ind], np.abs(x[ind]) + np.abs(y[ind])))
            worst_case = np.max(rel_err)
            print "worst case rel diff = %e" % worst_case
            assert False
    else:
        assert x == y
Пример #31
0
    def hmapsave(self, filename, polar=False):
        '''
        This helper function saves a beam to a fits file.
        The default is to save a rectangular-coordinate beam map to a
        binary table extension.

        Args:
            filename -- file in which to save the data.
            polar -- Save in a FEBECOP-compatible polar-coordinate format.
                default is False.
        '''
        # SAVE HYBRID BEAM MAP (self.beam) INTO FITS
        # FORMAT:
        # self.beam['SQUARE_X']: -hrmax, +hrmax in arcmin per
        #                        pixsize arcsec steps - 1D vector
        # self.beam['SQUARE_Y']
        # self.beam['SQUARE']: associated beam values - 1D vector
        # self.beam['POLAR_X']: carthesian coordinates of spherical phi, theta
        #                       for phi in 0, hrmax per pixsize arcsec steps
        #                       and theta in 0, 359.5 per half a degree steps
        # self.beam['POLAR_Y']
        # self.beam['POLAR']: associated beam values - 1D vector
        fits_keyword_data = {}
        if polar:
            # set up fits keywords
            fits_keyword_data['Mintheta'] = [
                np.min(self.beam['POLAR_Y']) * np.pi / 180,
                'Min polar angle [rad]']
            fits_keyword_data['Maxtheta'] = [
                np.max(self.beam['POLAR_Y']) * np.pi / 180,
                'Max polar angle [rad]']
            fits_keyword_data['Nphi'] = [np.size(self.beam['POLAR_X']),
                                         'Number of points in azimuth angle.']
            fits_keyword_data['Ntheta'] = [np.size(self.beam['POLAR_Y']),
                                           'Number of points in polar angle.']
            # Add a bunch of zeros to the polarized beam since it doesn't exist
            nulldata = np.zeros(np.shape(np.ravel(self.beam['POLAR'])))
            tbhdu = pyfits.BinTableHDU.from_columns(
                [pyfits.Column(name='BEAMDATA', unit='', format='E',
                               array=np.ravel(self.beam['POLAR'])),
                 pyfits.Column(name='BEAMDATAQ', unit='', format='E',
                               array=nulldata),
                 pyfits.Column(name='BEAMDATAU', unit='', format='E',
                               array=nulldata),
                 pyfits.Column(name='BEAMDATAV', unit='', format='E',
                               array=nulldata)])
        else:
            fits_keyword_data['NX'] = [np.size(self.beam['SQUARE_X']),
                                       'Grid X size']
            fits_keyword_data['NY'] = [np.size(self.beam['SQUARE_Y']),
                                       'Grid Y size']
            fits_keyword_data['XDELTA'] = [
                (self.beam['SQUARE_X'][1] - self.beam['SQUARE_X'][0])
                * np.pi / 180 / 60,
                'Grid X step [radians]']
            fits_keyword_data['YDELTA'] = [
                (self.beam['SQUARE_Y'][1] - self.beam['SQUARE_Y'][0])
                * np.pi / 180 / 60,
                'Grid Y step [radians]']
            fits_keyword_data['XCENTRE'] = [np.size(self.beam['SQUARE_X']) / 2,
                                            'Center location (X index)']
            fits_keyword_data['YCENTRE'] = [np.size(self.beam['SQUARE_Y']) / 2,
                                            'Center location (Y index)']
            tbhdu = pyfits.BinTableHDU.from_columns([
                pyfits.Column(name='BEAMDATA', unit='', format='E',
                              array=np.ravel(self.beam['SQUARE']))])

        for kk in fits_keyword_data.keys():
            tbhdu.header.set(kk, fits_keyword_data[kk][0],
                             fits_keyword_data[kk][1])

        tbhdu.writeto(filename)
        return
Пример #32
0
print("n comp: {}".format(env.state.board_config.sum()))

s = State.init_state(config=cfg.vals)
state_features = Features.featurize_state(s)


y_hat = env._predict(state_features, n_samples=500)

print(y_hat)
print(y_hat.shape)

agg_sales = y_hat.sum(axis=1)
agg_mean = np.mean(agg_sales)
agg_lower, agg_upper = np.quantile(agg_sales, q=[.05, .95])

x_lim = [np.min(agg_sales), np.max(np.max(agg_sales))]

#plt.hist(agg_sales)
plt.axvline(agg_mean,linestyle='--',c='blue')
plt.axvline(agg_lower,linestyle='dotted',c='red')
plt.axvline(agg_upper,linestyle='dotted',c='red')

sns.distplot(agg_sales, hist=True, kde=True, color = 'blue',
             hist_kws={'edgecolor':'black'}, norm_hist=True)

plt.xlabel("Revenue ($)")
plt.savefig("figs/unseen_state-dist.pdf")
plt.clf()
plt.close()

board_config = s.board_config
Пример #33
0
  def train(self, X, y, learning_rate=1e-3, num_iters=100,
            batch_size=200, verbose=False):
    """
    Train this linear classifier using stochastic gradient descent.

    Inputs:
    - X: A numpy array of shape (N, D) containing training data; there are N
      training samples each of dimension D.
    - y: A numpy array of shape (N,) containing training labels; y[i] = c
      means that X[i] has label 0 <= c < C for C classes.
    - learning_rate: (float) learning rate for optimization.
    - reg: (float) regularization strength.
    - num_iters: (integer) number of steps to take when optimizing
    - batch_size: (integer) number of training examples to use at each step.
    - verbose: (boolean) If true, print progress during optimization.

    Outputs:
    A list containing the value of the loss function at each training iteration.
    """
    num_train, dim = X.shape
    num_classes = np.max(y) + 1 # assume y takes values 0...K-1 where K is number of classes
    if self.W is None:
      # lazily initialize W
      self.W = 0.001 * np.random.randn(dim, num_classes)

    # Run stochastic gradient descent to optimize W
    loss_history = []
    for it in range(num_iters):

      #########################################################################
      # TODO:                                                                 #
      # Sample batch_size elements from the training data and their           #
      # corresponding labels to use in this round of gradient descent.        #
      # Store the data in X_batch and their corresponding labels in           #
      # y_batch; after sampling X_batch should have shape (dim, batch_size)   #
      # and y_batch should have shape (batch_size,)                           #
      #                                                                       #
      # Hint: Use np.random.choice to generate indices. Sampling with         #
      # replacement is faster than sampling without replacement.              #
      #########################################################################
      indices = np.random.choice(np.arange(num_train), size=batch_size)
      X_batch = X[indices,:]
      y_batch = y[indices]
      #########################################################################
      #                       END OF YOUR CODE                                #
      #########################################################################

      # evaluate loss and gradient
      loss, grad = self.loss(X_batch, y_batch)
      loss_history.append(loss)

      # perform parameter update
      #########################################################################
      # TODO:                                                                 #
      # Update the weights using the gradient and the learning rate.          #
      #########################################################################
      self.W -= learning_rate*grad
      #########################################################################
      #                       END OF YOUR CODE                                #
      #########################################################################

      if verbose and it % 100 == 0:
        print('iteration %d / %d: loss %f' % (it, num_iters, loss))

    return loss_history
Пример #34
0
estimator = LogisticRegression(x_train, y_train, polynomial_degree,
                               sinusoid_degree)
cost = estimator.train(max_iteration)
labels = estimator.unique_labels
plt.plot(range(len(cost[0])), cost[0], label=labels[0])
plt.plot(range(len(cost[1])), cost[1], label=labels[1])
plt.legend()
plt.show()
y_train_predictions = estimator.predict(x_train)

precision = np.sum(y_train_predictions == y_train) / y_train.shape[0] * 100
print(precision)

x_min = np.min(x_train[:, 0])
x_max = np.max(x_train[:, 0])
y_min = np.min(x_train[:, 1])
y_max = np.max(x_train[:, 1])

X = np.linspace(x_min, x_max, num_examples)
Y = np.linspace(y_min, y_max, num_examples)

Z = np.zeros((num_examples, num_examples))

for x_index, x in enumerate(X):
    for y_index, y in enumerate(Y):
        data = np.array([[x, y]])
        predictions = estimator.predict(data)
        Z[x_index][y_index] = estimator.predict(data)[0, 0]

positives = (y_train == 1).flatten()
Пример #35
0
def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum()
Пример #36
0
	def save(self, trainer):
		"""
		Saves the results of trainer in a file
		Args ; 
			trainer :
		"""
		# create results folders and files
		self.create()

		results_dict = {key: None for key in self.columns_list}

		# Launches a comparison before saving results
		trainer.comparator.launch_comparison(50)

		lead_times = []
		for agent in trainer.agents:
			lead_times.append(agent.lead_time.display())

		test_id = self.get_test_id()

		results_dict['ID'] = test_id
		#### GAME PARAMETERS
		results_dict['CLT_DEMAND'] = trainer.env.params['client_demand'].display()
		results_dict['CP_AGENT'] = trainer.params['comparison_agent'].label
		results_dict['AGENTS'] = trainer.get_agents_labels()
		results_dict['USE_BO'] = trainer.params['use_backorders']
		results_dict['T'] = trainer.params['number_periods']
		results_dict['LDT'] = str(lead_times)
		results_dict['HC'] = trainer.params['holding_cost']
		results_dict['SC'] = trainer.params['shortage_cost']
		results_dict['SR'] = trainer.params['TS']
		results_dict['IIL'] = trainer.params['initial_inventory']

		#### AI PARAMETERS

		results_dict['ACTIONS'] = str("(min = "+str(np.min(trainer.params['AI_possible_actions']))+" , max = "+str(np.max(trainer.params['AI_possible_actions']))+")")
		results_dict['m'] = trainer.params['m']
		results_dict['AI_DN'] = str(trainer.params['AI_DN'])
		results_dict['N_ITER'] = trainer.train_iter
		results_dict["TIME_PERF"] = round(trainer.time_per_iteration * 100,2)

		#### RESULTS
		results_dict['AVG_SUM_DEMAND'] = trainer.comparator.AI_performance['sum_demand']
		results_dict['AI_AVG_CUM_COSTS'] = trainer.comparator.AI_performance['costs']
		results_dict['CP_AVG_CUM_COSTS'] = trainer.comparator.CP_performance['costs']
		
		#### AI RESULTS
		results_dict['AI_AVG_CR'] = trainer.comparator.AI_performance['coverage_rate']
		results_dict["AI_AVG_BR"] = trainer.comparator.AI_performance['breakdown_rate']
		results_dict["AI_AVG_SR"] = trainer.comparator.AI_performance['service_rate']
		#### CP RESULTS
		results_dict['CP_AVG_CR'] = trainer.comparator.CP_performance['coverage_rate']
		results_dict["CP_AVG_BR"] = trainer.comparator.CP_performance['breakdown_rate']
		results_dict["CP_AVG_SR"] = trainer.comparator.CP_performance['service_rate']
		#### DATE 
		results_dict["TEST_DATE"] = datetime.datetime.now().replace(second=0, microsecond=0)

		# Convert result dictionnary into a list
		results = list(results_dict.values())

		# Load the excel workbook
		wb = load_workbook(Path(self.results_file_path))
		ws = wb.active

		i = 1
		while ws['A'+str(i)].value != None:
			i += 1
		for j, result in enumerate(results):
			cell = ws.cell(column = j+1, row=i, value=result)
			cell.alignment = Alignment(horizontal='center')
		
		wb.save(Path(self.results_file_path))
Пример #37
0
def _plot_categorical(df, xlabel, ylabel, x_keys, y_keys, prefac, ax, cmap, s):
    """
    Plot two categorical variables against each other in a bubble plot.

    Parameters
    ----------
    df : pd.DataFrame
        A pandas DataFrame with the data

    xlabel : str
        The column name for the variable on the x-axis

    ylabel : str
        The column name for the variable on the y-axis

    x_keys : iterable
        A list containing the different categories in df[xlabel]

    y_keys: iterable
        A list containing the different categories in df[ylabel]

    prefac : float
        A pre-factor steering the shading of the bubbles

    ax : matplotlib.Axes object
        The matplotlib.Axes object to plot the bubble plot into

    cmap : matplotlib.cm.colormap
        A matplotlib colormap to use for shading the bubbles

    s : float
        A pre-factor changing the overall size of the bubbles

    Returns
    -------
    ax : matplotlib.Axes object
        The same matplotlib.Axes object for further manipulation

    """
    tuples, counts = [], []
    for i in range(len(x_keys)):
        for j in range(len(y_keys)):
            tuples.append((i,j))

            counts.append(len(df[(df[xlabel] == x_keys[i]) &
                                 (df[ylabel] == y_keys[j])]))

    x, y = list(zip(*tuples))

    cmap = plt.cm.get_cmap(cmap)
    sizes = (np.array(counts)/np.sum(counts))

    ax.scatter(x, y, s=s*1000*sizes, marker='o', linewidths=1, edgecolor='black',
                c=cmap(prefac*sizes/(np.max(sizes)-np.min(sizes))), alpha=0.7)

    ax.set_xticks(np.arange(len(x_keys)))
    ax.set_xticklabels(x_keys)
    ax.set_xlim(np.min(x)-1, np.max(x)+1)
    ax.set_xlabel(xlabel)

    ax.set_yticks(np.arange(len(y_keys)))
    ax.set_yticklabels(y_keys)
    ax.set_ylim(np.min(y)-1, np.max(y)+1)
    ax.set_ylabel(ylabel)

    return ax
Пример #38
0
            y_acc = ["Time Series/Y/Original Record/Acceleration"]
            sa_gmroti50 = ims.gmrotipp(x_acc.value, x_acc.attrs["Time-step"],
                                       y_acc.value, y_acc.attrs["Time-step"],
                                       periods, 50.0)
            # Assumes Psuedo-spectral acceleration
            sa_gmroti50 = sa_gmroti50["PSA"]
    return sa_gmroti50

SPECTRA_FROM_FILE = {"Geometric": get_geometric_mean,
                     "GMRotI50": get_gmroti50,
                     "GMRotD50": get_gmrotd50}


SCALAR_XY = {"Geometric": lambda x, y : np.sqrt(x * y),
             "Arithmetic": lambda x, y : (x + y) / 2.,
             "Larger": lambda x, y: np.max(np.array([x, y])),
             "Vectorial": lambda x, y : np.sqrt(x ** 2. + y ** 2.)}

def get_scalar(fle, i_m, component="Geometric"):
    """
    Retrieves the scalar IM from the database
    :param fle:
        Instance of :class: h5py.File
    :param str i_m:
        Intensity measure
    :param str component:
        Horizontal component of IM
    """
     
    if not "H" in fle["IMS"].keys():
        x_im = fle["IMS/X/Scalar/" + i_m].value[0]
Пример #39
0
    def run(self):
        (
            results_dir,
            ligands_file,
            target_sel,
            flex_sel,
            box_sel,
            ph,
            exhaustiveness,
            num_modes,
            energy_range,
            cpu,
            seed,
        ) = self.args

        self.logEvent.emit("<h2>Preparation</h2>")

        #
        # Check if the output
        #
        if os.listdir(results_dir):
            self.logEvent.emit(f"""
                <br/>
                <font color="red">
                    <b>The output folder is not empty!</b>
                </font>
            """)

        #
        # Create ligand directory
        #

        ligands_dir = results_dir + "/ligands"
        try:
            os.mkdir(ligands_dir)
        except FileExistsError:
            shutil.rmtree(ligands_dir)
            os.mkdir(ligands_dir)

        #
        # Convert SMILES file into PDBQT
        #
        obabel = pymol.plugins.pref_get("DOCKING_OBABEL")
        command = (f'"{obabel}" -i smi "{ligands_file}"'
                   f" -ph {ph} --gen3d -m"
                   f' -O "{ligands_dir}/.pdbqt"')
        output, success = run(command)
        if success:
            self.logEvent.emit(f"""
                <br/>
                <br/><b>Ligands converted to PDBQT.</b>
                <br/><b>OpenBabel command:</b> {command}
            """)
            self.logCodeEvent.emit(output)
        else:
            self.logEvent.emit(f"""
                <br/>
                <br/><b>Ligands conversion to PDBQT failed.</b>
                <br/><b>OpenBabel command:</b> {command}
            """)
            self.logCodeEvent.emit(output)
            self.done.emit(False)
            return

        #
        # Rename PDBQT files accordingly to SMILES
        # Be aware that not every SMILES file has a name column
        #
        count = 0
        lineno = 0
        has_names = True
        with open(ligands_file) as smi:
            for line in smi:
                lineno += 1

                # skip empty lines
                if line.strip() == "":
                    continue

                count += 1
                if has_names:
                    try:
                        # it really has names
                        smiles, name = line.split()
                    except:
                        if count != 1 and has_names:
                            self.logEvent.emit(f"""
                                <br/>
                                <br/><b>Inconsistent SMILES naming at molecule line #{lineno}.</b>
                                <br/><b><i>Please check you SMILES file.</i></b>
                            """)
                            self.done.emit(False)
                            return

                        # first line don't have name
                        # don't rename files
                        has_names = False
                        continue
                    shutil.move(f"{ligands_dir}/{count}.pdbqt",
                                f"{ligands_dir}/{name}.pdbqt")

        if len(glob(f"{ligands_dir}/*.pdbqt")) != count:
            # The number of generated ligands and SMILES differ
            self.logEvent.emit(f"""
                <br/>
                <br/><b>Number of generated PDBQT files and SMILES molecules differ.</b>
                <br/><b>Please check you SMILES file.</b>
            """)
            self.done.emit(False)
            return

        #
        # The number of dockings to do
        #
        n_ligands = count
        self.numSteps.emit(count)

        #
        # Prepare rigid target
        #

        target_pdb = f"{results_dir}/target.pdb"
        cmd.save(target_pdb, target_sel)

        with chdir(dirname(target_pdb)):
            adt_python = pymol.plugins.pref_get("DOCKING_ADT_PYTHON")
            prepare_target = pymol.plugins.pref_get("DOCKING_PREPARE_RECEPTOR")
            command = f'"{adt_python}"' f' "{prepare_target}" -r "{target_pdb}"'
            output, success = run(command)
            if success:
                self.logEvent.emit(f"""
                    <br/>
                    <br/><b>Rigid target prepared.</b>
                    <br/><b>AutoDock command:</b> {command}
                """)
                self.logCodeEvent.emit(output)
            else:
                self.logEvent.emit(f"""
                    <br/>
                    <br/><b>Rigid target preparation failed.</b>
                    <br/><b>AutoDock command:</b> {command}
                """)
                self.logCodeEvent.emit(output)
                self.done.emit(False)
                return

        #
        # Prepare flexible target
        #
        if flex_sel != "":
            #
            # Construct residues string
            #
            flex_residues = set()
            for atom in cmd.get_model(flex_sel).atom:
                flex_residues.add(f"{atom.chain}:{atom.resn}{atom.resi}")

            flex_residues = ",".join(f"target:{res}" for res in flex_residues)

            #
            # Run AutoDock command
            #

            target_pdbqt = f"{results_dir}/target.pdbqt"
            with chdir(dirname(target_pdb)):
                adt_python = pymol.plugins.pref_get("DOCKING_ADT_PYTHON")
                prepare_flexreceptor = pymol.plugins.pref_get(
                    "DOCKING_PREPARE_FLEXRECEPTOR")
                command = (f'"{adt_python}"'
                           f'"{prepare_flexreceptor}"'
                           f' -r "{target_pdbqt}"'
                           f" -s {flex_residues}")
                output, success = run(command)
                if success:
                    self.logEvent.emit(f"""
                        <br/>
                        <br/><b>Flexible target prepared.</b>
                        <br/><b>AutoDock command:</b> {command}
                    """)
                    self.logCodeEvent.emit(output)
                else:
                    self.logEvent.emit(f"""
                        <br/>
                        <br/><b>Flexible target preparation failed.</b>
                        <br/><b>AutoDock command:</b> {command}
                    """)
                    self.logCodeEvent.emit(output)
                    self.done.emit(False)
                    return

        #
        # Create Vina results directory
        #

        output_dir = f"{results_dir}/poses"
        try:
            os.mkdir(output_dir)
        except FileExistsError:
            pass

        #
        # Compute box variables
        #
        box_coords = cmd.get_coords(box_sel)

        max = np.max(box_coords, axis=0)
        min = np.min(box_coords, axis=0)

        half_size = (max - min) / 2
        center = min + half_size

        size_x, size_y, size_z = half_size * 2
        center_x, center_y, center_z = center

        size_x, size_y, size_z = (
            round(float(size_x), 2),
            round(float(size_y), 2),
            round(float(size_z), 2),
        )

        center_x, center_y, center_z = (
            round(float(center_x), 2),
            round(float(center_y), 2),
            round(float(center_z), 2),
        )

        #
        # Project data
        #

        project_file = results_dir + "/docking.json"
        project_data = {}

        project_data.update({
            "program": "vina",
            "results_dir": results_dir,
            "ligands_dir": ligands_dir,
            "output_dir": output_dir,
            "size_x": size_x,
            "size_y": size_y,
            "size_z": size_z,
            "center_x": center_x,
            "center_y": center_y,
            "center_z": center_z,
        })

        if flex_sel == "":
            project_data.update({
                "flexible": False,
                "target_pdbqt": f"{results_dir}/target.pdbqt"
            })
        else:
            project_data.update({
                "flexible":
                True,
                "rigid_pdbqt":
                f"{results_dir}/target_rigid.pdbqt",
                "flex_pdbqt":
                f"{results_dir}/target_flex.pdbqt",
            })
        #
        # Prompt for user confirmation
        #

        vina = pymol.plugins.pref_get("DOCKING_VINA")
        base_command = (f"{vina}"
                        f" --center_x {center_x}"
                        f" --center_y {center_y}"
                        f" --center_z {center_z}"
                        f" --size_x {size_x}"
                        f" --size_y {size_y}"
                        f" --size_z {size_z}"
                        f" --cpu {cpu}"
                        f" --seed {seed}"
                        f" --exhaustiveness {exhaustiveness}"
                        f" --num_modes {num_modes}"
                        f" --energy_range {energy_range}")
        self.logEvent.emit(f"""
            <br/>
            <h2>Docking</h2>
            <br/>
            <b>Vina base command:</b> {base_command}
        """)

        fail_count = 0
        for i, ligand_pdbqt in enumerate(glob(f"{ligands_dir}/*.pdbqt")):
            name, _ = splitext(basename(ligand_pdbqt))
            output_pdbqt = f"{output_dir}/{name}.out.pdbqt"
            log_txt = f"{output_dir}/{name}.log"

            command = base_command + (f' --ligand "{ligand_pdbqt}"'
                                      f' --out "{output_pdbqt}"'
                                      f' --log "{log_txt}"')
            if project_data["flexible"]:
                rigid_pdbqt = project_data["rigid_pdbqt"]
                flex_pdbqt = project_data["flex_pdbqt"]
                command += f' --receptor "{rigid_pdbqt}"' f' --flex "{flex_pdbqt}"'
            else:
                target_pdbqt = project_data["target_pdbqt"]
                command += f' --receptor "{target_pdbqt}"'

            output, success = run(command)
            self.currentStep.emit(i + 1)
            if not success:
                fail_count += 1
                if fail_count <= 10:
                    self.logEvent.emit(f"""
                        <br/>
                        <font color="red">
                            <b>Vina command failed:</b> {command}
                            <br/>
                            <pre>{output}</pre>
                        </font>
                    """)
                elif fail_count == 11:
                    self.logEvent.emit(f"""
                        <br/>
                        <h3>
                            <font color="red">
                                Too many errors. Omitting output.
                            </font>
                        <h3>f
                    """)

        done_ligands = len(glob(f"{output_dir}/*.out.pdbqt"))

        self.logEvent.emit("<br/><h2>Summary</h2>")
        summary = f"""
            <br/><b>Total expected runs:</b> {n_ligands}
            <br/><b>Total failures:</b> {fail_count}
            <br/><b>Total found PDBQT files:</b> {done_ligands}
        """
        if done_ligands < n_ligands or fail_count > 0:
            self.logEvent.emit(f"<font color='red'>{summary}</font>")
        else:
            self.logEvent.emit(f"{summary}")

        with open(results_dir + f"/docking.json", "w") as docking_file:
            json.dump(project_data, docking_file, indent=4)
        self.done.emit(True)
Пример #40
0
    def step(self, action):
        # Check action
        action = np.clip(action, self.low_action, self.high_action)
        assert self.action_space.contains(action), "%r (%s) invalid action" % (action, type(action))
        
        # States before simulate
        xpos, ypos, xface, yface = self.state
        rotate = action[0]
        theta = np.arctan2(yface, xface)

        # Simulate
        # update facing
        theta = theta + self.rotate_scale*rotate
        xface = np.cos(theta)
        yface = np.sin(theta)
        # update position
        xpos = xpos + xface*self.speed_scale
        ypos = ypos + yface*self.speed_scale

        # States after simulate
        self.state = [xpos, ypos, xface, yface]
        self.state = np.clip(self.state, self.low_state, self.high_state)
        
        # Record Trajectory
        self.traj.append(self.state[:2])

        # Define reward function
        # Define done
        done = False
        reward = 0
        xpos, ypos, xface, yface = self.state
        # time penalty(distance)
        vec = np.array([xpos, ypos])-self.target_coord[self.task[0]]
        dist = np.linalg.norm(vec)
        reward += -dist
        #print('Distance Reward: {}'.format(reward))
        # time penalty(task)
        reward += self.task_penalty
        if self.task_penalty > 0:
            #print('Task: {}'.format(self.task_penalty))
            self.task_penalty = np.max((0, self.task_penalty-self.speed_scale/3))
        
        done_status = ''
        # hit the target
        for i in range(self.num_targets):
            # skip finished target
            if self.finished_task.count(i) > 0:
                continue
            vec_i = np.array([xpos, ypos])-self.target_coord[i]
            dist_i = np.linalg.norm(vec_i)
            if dist_i < self.target_size:

                if i == self.task[0]:
                    # hit right target
                    if len(self.task) == 1:
                        # finish all tasks
                        done = True
                        reward += 10
                        done_status = 'Finish Task'
                    else:
                        # finish subtask
                        done_status = 'Right Target'
                        # start task penalty
                        self.task_penalty = np.linalg.norm(self.target_coord[self.task[0]]-self.target_coord[self.task[1]])
                        # pop task
                        self.finished_task.append(self.task[0])
                        self.task = self.task[1:]

                else:
                    # hit wrong target
                    done = True
                    done_status = 'Wrong Target'
                break
        
        # hit the wall
        if not done:
            if xpos == 1 or xpos == -1 or ypos == 1 or ypos == -1:
                done = True
                done_status = 'Hit the Wall'
        
        # times up
        self.timesteps += 1
        if not done and self.timesteps >= self.max_timesteps:
            done = True
            done_status = 'Times Up'

        # record
        min_dist_cp = 0
        min_dist_ft = 0
        if done:
            self.traj = np.array(self.traj)
            # find dist closest to checkpoint
            ctcp = np.argmin(np.linalg.norm(self.traj-self.target_coord[self.fixed_task[0]], axis=1))
            ctft = ctcp+np.argmin(np.linalg.norm(self.traj[ctcp:]-self.target_coord[self.fixed_task[1]], axis=1))
            min_dist_cp = np.linalg.norm(self.traj[ctcp]-self.target_coord[self.fixed_task[0]])
            min_dist_ft = np.linalg.norm(self.traj[ctft]-self.target_coord[self.fixed_task[1]])

            # episode
            self.episode = (self.episode + 1) % 10

        return self.get_obs(), reward, done, {'done_status': done_status, 'dist': dist, 'min_dist_cp': min_dist_cp, 'min_dist_ft': min_dist_ft}
def generate_quanser_summary(trajectory, residuals, detections):
    #
    # Print reprojection error statistics
    #
    print("summary")
    weights = detections[:, ::3]
    reprojection_errors = []
    for i in range(trajectory.shape[0]):
        valid = np.reshape(residuals[i], [2,-1])[:, weights[i,:] == 1]
        reprojection_errors.extend(np.linalg.norm(valid, axis=0))
    reprojection_errors = np.array(reprojection_errors)
    print('Reprojection error over whole image sequence:')
    print('- Maximum: %.04f pixels' % np.max(reprojection_errors))
    print('- Average: %.04f pixels' % np.mean(reprojection_errors))
    print('- Median: %.04f pixels' % np.median(reprojection_errors))

    #
    # Figure: Reprojection error distribution
    #
    plt.figure(figsize=(8,3))
    plt.hist(reprojection_errors, bins=80, color='k')
    plt.ylabel('Frequency')
    plt.xlabel('Reprojection error (pixels)')
    plt.title('Reprojection error distribution')
    plt.tight_layout()
    plt.savefig('out_histogram.png')

    #
    # Figure: Comparison between logged encoder values and vision estimates
    #
    logs       = np.loadtxt('../data/logs.txt')
    enc_time   = logs[:,0]
    enc_yaw    = logs[:,1]
    enc_pitch  = logs[:,2]
    enc_roll   = logs[:,3]

    # Note: The logs have been time-synchronized with the image sequence,
    # but there will be an offset between the motor angles and the vision
    # estimates. That offset is automatically subtracted here.
    vis_yaw = trajectory[:,0] + enc_yaw[0] - trajectory[0,0]
    vis_pitch = trajectory[:,1] + enc_pitch[0] - trajectory[0,1]
    vis_roll = trajectory[:,2] + enc_roll[0] - trajectory[0,2]

    vis_fps  = 16
    enc_frame = enc_time*vis_fps
    vis_frame = np.arange(trajectory.shape[0])

    fig,axes = plt.subplots(3, 1, figsize=[6,6], sharex='col')
    axes[0].plot(enc_frame, enc_yaw, 'k:', label='Encoder log')
    axes[0].plot(vis_frame, vis_yaw, 'k', label='Vision estimate')
    axes[0].legend()
    axes[0].set_xlim([0, vis_frame[-1]])
    axes[0].set_ylim([-1, 1])
    axes[0].set_ylabel('Yaw (radians)')

    axes[1].plot(enc_frame, enc_pitch, 'k:')
    axes[1].plot(vis_frame, vis_pitch, 'k')
    axes[1].set_xlim([0, vis_frame[-1]])
    axes[1].set_ylim([0.0, 0.6])
    axes[1].set_ylabel('Pitch (radians)')

    axes[2].plot(enc_frame, enc_roll, 'k:')
    axes[2].plot(vis_frame, vis_roll, 'k')
    axes[2].set_xlim([0, vis_frame[-1]])
    axes[2].set_ylim([-0.6, 0.6])
    axes[2].set_ylabel('Roll (radians)')
    axes[2].set_xlabel('Image number')
    plt.tight_layout()
    plt.savefig('out_trajectory.png')
Пример #42
0
def extract_structure_information(data,
                                  k,
                                  outlier_threshold,
                                  distance_measure,
                                  minkowski_p=None):
    """
    Extract structure information from the dataset

    :param data: a numpy-matrix. each column represents an attribute; each row a data item
    :param k: the amount of neighbours to use for the initial knn graph
    :param outlier_threshold: the maximum density an outlier can have
    :param distance_measure: a str describing the distance measure:
        ‘braycurtis’,
        ‘canberra’,
        ‘chebyshev’,
        ‘cityblock’,
        ‘correlation’,
        ‘cosine’,
        ‘dice’,
        ‘euclidean’,
        ‘hamming’,
        ‘jaccard’,
        ‘kulsinski’,
        ‘mahalanobis’,
        ‘matching’,
        ‘minkowski’,
        ‘rogerstanimoto’,
        ‘russellrao’,
        ‘seuclidean’,
        ‘sokalmichener’,
        ‘sokalsneath’,
        ‘sqeuclidean’,
        ‘yule’
    :param minkowski_p: The p-norm to apply. Mandatory for un/weighted Minkowski distance
    :param weighted_minkowski_weights: The weight vector. Mandatory for weighted Minkowski
    :return: a tuple of lists: (cluster supporting objects, cluster outliers, rest),
        where each list contains the indices of the objects in the data matrix
        * Cluster Supporting Object (CSO): object with density higher than all its neighbors
        * Cluster Outliers: object with density lower than all its neighbors, and lower than a predefined threshold
        * Rest Object: object not assigned to one of the previous groups
        * Distance matrix: a matrix of distances between data points
        * K-Nearest neighbours: a python list of numpy arrays of the nearest neighbours of each element
    """
    # check that a p value exists if minkowski distance is used
    if distance_measure == 'minkowski' and minkowski_p is None:
        raise FlameError(
            "Minkowski distance requires a p value to be supplied!")

    item_count = data.shape[0]

    if k > item_count:
        raise FlameError("More cluster neighbours (" + str(k) +
                         ") requested than data points available! (" +
                         str(item_count) + ")")
    if k <= 0:
        raise FlameError("Requested cluster neighbour count is " + str(k) +
                         "...")

    # get a distance matrix describing our data from scipy, square it so creating the knn graph is easy
    distance_matrix = squareform(pdist(data, distance_measure, p=minkowski_p))

    # creates an adjacency list where each row contains the k nearest neighbours
    # neighbours after the k-nearest-neighbour are appended if they have the same distance as the k-nearest-neighbour
    knn_graph = []
    for i in range(item_count):
        # get the distances and sort them
        distance_matrix_row = distance_matrix[i]
        knns = distance_matrix_row.argsort()[1:]

        # append neighbours with the same distance as the k-nearest-neighbour
        same_distance_k = k
        last_neighbour_distance = distance_matrix_row[knns[k - 1]]
        for j in range(k, item_count):
            if j >= len(knns) or distance_matrix_row[
                    knns[j]] > last_neighbour_distance:
                break
            else:
                same_distance_k += 1

        knn_graph.append(knns[:same_distance_k])

    # calculate the density for each item
    max_distance = numpy.max(distance_matrix)

    densities = numpy.empty((item_count, ), dtype=float)
    for i in range(item_count):
        distance_sum = (numpy.sum(distance_matrix[i].take(knn_graph[i])) /
                        len(knn_graph[i]))
        if distance_sum > 0:
            densities[i] = max_distance / distance_sum
        else:
            densities[i] = sys.float_info.max

    # create item bins
    cluster_supporting_objects = []
    outliers = []
    rest = []

    # sort items
    for i in range(densities.shape[0]):
        knn_densities = densities.take(knn_graph[i])
        item_density = densities[i]
        if item_density <= outlier_threshold and item_density < knn_densities.min(
        ):
            outliers.append(i)
        elif item_density > knn_densities.max():
            cluster_supporting_objects.append(i)
        else:
            rest.append(i)

    return cluster_supporting_objects, outliers, rest, distance_matrix, knn_graph
Пример #43
0
            marker='o',
            linestyle='None',
            linewidth=2,
            color=col.maroon1)
    f3.set_xlabel('P(fitter)')
    f3.set_ylabel('P(equal fitness)')
    #f3.set_xlim([0,0.6])
    #f3.set_ylim([0,0.3])

    nbins = 20

    # Replaces 0 in A[:,1], so that the log operation works.
    A[np.where(A[:, 1] == 0), 1] = 0.1
    D = np.log(A[:, 1])

    print('D min/max: {0}/{1}'.format(np.min(D), np.max(D)))

    bins = np.linspace(np.min(D), np.max(D), nbins)
    h, b = np.histogram(D, bins)
    f4 = F1.add_subplot(2, 2, 4)
    f4.plot(b[:-1], h, '.', color=col.darkviolet, marker='o', markersize=8)
    f4.set_xlabel('log (P(fitter))')
    f4.set_ylabel('Freq')

    F1.tight_layout()
    figpath = 'figures/prob_fitinc_pOn_' + str(pOn) + '.png'
    plt.savefig(figpath)
    plt.clf()

print("See results in figures/prob_fitinc_pOn*.png")
Пример #44
0
def display_box_sel(name, sel):
    coords = cmd.get_coords(sel)
    max = np.max(coords, axis=0)
    min = np.min(coords, axis=0)
    display_box(name, max, min)
Пример #45
0
def main(filename, zmin=None, zmax=None):

    #Read the parameter file
    params = read_redm_param(filename)
    if zmin != None and zmax != None:
        params['zmin'] = zmin
        params['zmax'] = zmax
    print >> sys.stderr, "Parameter file read successful"

    #Read in the data
    print >> sys.stderr, "Reading clusters file..."
    if os.path.isfile(params['cluster_file']) == False:
        print >> sys.stderr, "ERROR:  Cluster catalog file " + params[
            'cluster_file'] + " not found"
        sys.exit(1)
    cat = pyfits.open(params['cluster_file'])
    cat = cat[1].data
    print >> sys.stderr, len(cat), " clusters read in"
    print >> sys.stderr, np.max(cat['z'])
    print >> sys.stderr, "Reading members file..."
    if os.path.isfile(params['member_file']) == False:
        print >> sys.stderr, "ERROR:  Member catalog file " + params[
            'member_file'] + " not found"
        sys.exit(1)
    mem = pyfits.open(params['member_file'])
    mem = mem[1].data
    print >> sys.stderr, len(mem), " galaxies read in"

    #If asked to use zred instead of z_lambda, set up for that
    if int(params['use_zred']) == 1:
        [cat, mem] = set_zred.set_zred(cat, mem)

    #If not asked to use lambda_errors in calculations, set them to zero
    if int(params['use_lambda_err']) == 0:
        cat['lambda_chisq_e'][:] = cat['lambda_chisq_e'][:] * 0.

    #Also read in data to abundance match to, if requested
    if int(params['ABM']) == 1:
        print >> sys.stderr, "Reading in clusters to use for abundance matching..."
        if os.path.isfile(params['abm_file']) == False:
            print >> sys.stderr, "ERROR:  ABM catalog file " + params[
                'abm_file'] + " not found"
            sys.exit(1)
        abm = pyfits.open(params['abm_file'])
        abm = abm[1].data
        print >> sys.stderr, len(abm), " clusters for ABM"
    if params['stellarmass']:
        mag = mem['MSTAR_50']
    else:
        #Read in amag data if it exists, otherwise run kcorrect
        if os.path.isfile(params['kcorr_file']) == False:
            #Running kcorrect -- note this requires running IDL
            #os.system('setenv CATFILE '+params['cluster_file'])
            os.environ['CATFILE'] = params['cluster_file']
            os.environ['MEMFILE'] = params['member_file']
            os.environ['KCORRFILE'] = params['kcorr_file']
            os.environ['USE_DES'] = params['use_des']
            os.environ['NO_UBAND'] = params['no_uband']
            os.environ['LRG'] = params['LRG']
            os.environ['BANDSHIFT'] = params['bandshift']
            os.environ['USE_ZRED'] = params['use_zred']
            os.system(
                '/afs/slac/g/ki/software/idl/idl70/bin/idl < /afs/slac.stanford.edu/u/ki/chto100/code/redmapper_clf/redmapper/rm_kcorr_wrapper.pro'
            )
            if os.path.isfile(params['kcorr_file']) == False:
                print >> sys.stderr, "ERROR:  Kcorrect failed, no file found"
                sys.exit(1)
        kcorr = pyfits.open(params['kcorr_file'])
        kcorr = kcorr[0].data
        print >> sys.stderr, len(kcorr), " galaxies kcorrected"
        if len(kcorr) != len(mem):
            print >> sys.stderr, "ERROR!   Number of galaxies != number of kcorrect values"
            sys.exit(1)

        #Pick up the mags we want for all galaxies
        if (int(params['obs_clf']) == 1) & (int(params['use_lum']) == 0):
            mag = mem['imag']
        else:
            if int(params['use_REFMAG']) == 1:
                if params['Analband'] == "r":
                    print >> sys.stderr, "using r band"
                    mag = mem['REFMAG'] + (kcorr[:, 1] -
                                           mem['model_mag'][:, 3])
                elif params['Analband'] == "i":
                    mag = mem['REFMAG'] + (kcorr[:, 2] -
                                           mem['model_mag'][:, 3])
                else:
                    print >> sys.stderr, "ERRPR: OOPS I don't know the band you specified (because I am too lazy), which is {0}".format(
                        params['Analband'])
            else:
                print(params['use_des'])
                if len(mem['model_mag'][0]) == 4:
                    mag = mem['imag'] + (kcorr[:, 2] - mem['model_mag'][:, 2])
                else:
                    mag = mem['imag'] + (kcorr[:, 2] - mem['model_mag'][:, 3])

    #Pulling limits from the parameters structure
    lm_min = np.array(params['lm_min'])
    lm_max = np.array(params['lm_max'])
    zmin = np.array(params['zmin'])
    zmax = np.array(params['zmax'])
    lm_min = lm_min.astype(float)
    lm_max = lm_max.astype(float)
    zmin = zmin.astype(float)
    zmax = zmax.astype(float)
    #If requested, switch to ABM lambda limits
    if int(params['ABM']) == 1:
        print >> sys.stderr, "Running abundance matching..."
        lm_min = abm_limits(cat, abm, params['area'], params['abm_area'], zmin,
                            zmax, lm_min)
        lm_max = abm_limits(cat, abm, params['area'], params['abm_area'], zmin,
                            zmax, lm_max)
        print "abundance matching: lm_min", lm_min
        print "abundance matching: lm_max", lm_max
    else:
        my_nz = len(zmin)
        lm_min = np.repeat([lm_min], my_nz, axis=0)
        lm_max = np.repeat([lm_max], my_nz, axis=0)

    #Get central absolute mags
    #Will only recalcuate index if it doesn't exist; otherwise, read in the index
    #and get the magnitudes from the mag values already available
    #Updated version -- for redmapper v5.10 and greater -- uses indices supplied
    #in the catalog files for finding the necessary index
    c_names = cat.columns.names
    use_id_cent = False
    #Check to see if the central IDs are available
    for name in c_names:
        if name == 'ID_CENT':
            use_id_cent = True
            break
    ''' 
    if use_id_cent:
        cengalindex = np.zeros_like(cat['id_cent'])
        cenmag = np.zeros_like(cengalindex).astype(float)
        #Hash table taking galaxy ID to index
        offset = np.min(mem['id'])
        g_index = np.zeros(np.max(mem['id'])-offset+1)-1
        g_index[mem['id']-offset] = np.array(range(len(mem)))
#modified by chto to handle that cat['id_cent']=0
        for i in range(len(cengalindex[0])):  
            index = np.where(cat['id_cent'][:,i]-offset>=0)
            cengalindex[index,i] = g_index[cat['id_cent'][index,i]-offset]
            cenmag[index,i] = mag[cengalindex[index,i]]
        if int(params['weight_cen'])==0:
            cenmag = cenmag[0]
            cengalindex = cengalindex[0]
        del g_index
    '''
    #############
    #Algorithm changed by Chto
    #The reason is that is np.zeros of a large number will cause mem error in Python
    #############
    if use_id_cent:
        cengalindex = np.zeros_like(cat['id_cent'])
        cenmag = np.zeros_like(cengalindex).astype(float)
        #Hash table taking galaxy ID to index
        offset = np.min(mem['id'])
        g_index = dok_matrix((np.max(mem['id']) - offset + 1, 1), dtype=np.int)
        g_index[mem['id'] - offset] = np.array(range(len(mem)))[:, np.newaxis]
        if len(cenmag.shape) == 1:
            index = np.where(cat['id_cent'][:] - offset >= 0)[0]
            cengalindex[index] = g_index[cat['id_cent'][index] -
                                         offset].toarray().flatten()
            cenmag[index] = mag[cengalindex[index].astype(int)]
            cengalindex = cengalindex.astype(int)
            cengalindex = cengalindex.reshape(-1, 1)
            cenmag = cenmag.reshape(-1, 1)
        else:
            for i in range(len(cengalindex[0])):
                index = np.where(cat['id_cent'][:, i] - offset >= 0)[0]
                cengalindex[index, i] = g_index[cat['id_cent'][index, i] -
                                                offset].toarray().flatten()
                cenmag[index, i] = mag[cengalindex[index, i]]
        if int(params['weight_cen']) == 0:
            cenmag = cenmag[:, 0]
            cengalindex = cengalindex[:, 0]
            cengalindex = cengalindex.reshape(-1, 1)
            cenmag = cenmag.reshape(-1, 1)
        del g_index

        gc.collect()
#########################
    else:
        #No central IDs found -- doing it the hard way
        if os.path.isfile(params['cindex_file']) == False:
            print >> sys.stderr, "Getting central magnitudes..."
            [cenmag, cengalindex
             ] = get_central_mag(cat,
                                 mem,
                                 mag,
                                 weight_cen=int(params['weight_cen']))
            cengalindex = cengalindex.astype(long)
            hdu = pyfits.PrimaryHDU(cengalindex)
            hdu.writeto(params['cindex_file'])
        else:
            print >> sys.stderr, "Reading in central magnitudes..."
            cengalindex = pyfits.open(params['cindex_file'])
            cengalindex = cengalindex[0].data
            cengalindex = cengalindex.astype(long)
            cenmag = np.zeros_like(cengalindex).astype(float)
            print len(cenmag)
            for i in range(len(cenmag)):
                cenmag[i] = mag[cengalindex[i]]

    #If available, get the limiting magnitude information
    #Currently set for the more generous cut
    use_limmag = False
    for name in c_names:
        if name == 'LIM_LIMMAG':
            use_limmag = True
            limmag = cat['LIM_LIMMAG']
            break
    #Convert limiting magnitude to absolute if needed; based on central
    #galaxy's k-correction
    if use_limmag:
        if params['dohaloCat']:
            limmag = cat['LIM_LIMMAG_DERED']
        else:
            if int(params['get_limmag_5sigma']) == 1:
                limmag = get_limmag_5sigma(limmag, cat['lim_exptime'])
            if int(params['use_des']) == 1:
                if params['Analband'] == "r":
                    limmag = limmag - (cat['MODEL_MAG'][:, 1] - cenmag[:, 0])
                elif params['Analband'] == "i":
                    limmag = limmag - (cat['MODEL_MAG'][:, 2] - cenmag[:, 0])
                else:
                    print >> sys.stderr, "ERRPR: OOPS I don't know the band you specified (because I am too lazy), which is {0}".format(
                        params['Analband'])
            else:
                limmag = limmag - (cat['imag'] - cenmag[:, 0])
    else:
        limmag = []
    if params['stellarmass']:
        limmag = []

    #For the rest of the calculations, since these are predicated on the
    #given lambda/z thresholds, remove all the clusters we don't care about.
    #This should speed things up significantly/avoid memory issues.
    print >> sys.stderr, "Ultimate max z cut is at: ", float(
        params['zcut_max'])
    clist = np.where(cat['z_lambda'] < float(params['zcut_max']))[0]
    cmlist = np.where(mem['z'] < float(params['zcut_max']))[0]
    cat = cat[clist]
    cenmag = cenmag[clist]
    cengalindex = cengalindex[clist]
    if len(limmag) != 0:
        limmag = limmag[clist]
    #Note that we also must trim galaxies
    mem = mem[cmlist]
    if not params['stellarmass']:
        kcorr = kcorr[cmlist]
    idn_list = np.zeros(len(mag))
    idn_list[cmlist] = np.array(range(len(cmlist)))
    mag = mag[cmlist]
    cengalindex = idn_list[cengalindex]
    cengalindex = cengalindex.astype(long)
    del cmlist, clist
    print "cmlist, clist: ", gc.collect()
    ####
    ##Try to know what is the magnitude.
    #WARNING: Cuts should be VERY generous, otherwise, may have issues with P(z) tails...

    #Make the main output directory
    os.system("mkdir -p " + params['outdir'])

    #Now that we have our magnitudes, add central corrections if necessary
    if int(params['use_dr8_cen_corr']) == 1:
        if int(params['weight_cen']) == 1:
            cenmag[:, 0] = cenmag[:, 0] + correct_dr8_cen([0.213, -0.08],
                                                          cat['z_lambda'])
            cenmag[:, 1] = cenmag[:, 1] + correct_dr8_cen([0.104, -0.036],
                                                          cat['z_lambda'])
            mag[cengalindex[:, 0]] = mag[cengalindex[:, 0]] + correct_dr8_cen(
                [0.213, -0.08], mem['z'][cengalindex[:, 0]])
            corrlist = np.where(cat['ncent_good'] >= 2)[0]
            if len(corrlist) > 0:
                mag[cengalindex[
                    corrlist,
                    1]] = mag[cengalindex[corrlist, 1]] + correct_dr8_cen(
                        [0.104, -0.036], mem['z'][cengalindex[corrlist, 1]])
        else:
            cenmag = cenmag + correct_dr8_cen([0.213, -0.08], cat['z_lambda'])
            mag[cengalindex] = mag[cengalindex] + correct_dr8_cen(
                [0.213, -0.08], cat['z_lambda'])

    #Convert everything to log(L) if requested
    if int(params['use_lum']) == 1:
        #Value currently hard-coded to offset for z=0.3 bandshift in SDSS i-band
        abs_solar = float(params['abs_solar'])
        mag = np.log10(
            mag_to_Lsolar(mag,
                          use_des=int(params['use_des']),
                          abs_solar=abs_solar))
        cenmag = np.log10(
            mag_to_Lsolar(cenmag,
                          use_des=int(params['use_des']),
                          abs_solar=abs_solar))
        if use_limmag:
            limmag = np.log10(
                mag_to_Lsolar(limmag,
                              use_des=int(params['use_des']),
                              abs_solar=abs_solar))
    np.save(params['outdir'] + "limmag.npy", limmag)
    np.save(params['outdir'] + "cengalindex.npy", cengalindex)
    print "I save the limiting magnitude.... HAAH"
    print >> sys.stderr, "Finished converting mags to log(Lsolar)"
    np.save(params['outdir'] + "cen_mag.npy", cenmag)
    #    limmag=[] #so hacky :( this is not my style

    print "magnitude distribution: "
    print np.histogram(mag, bins=np.array(range(35)) * 0.08 + 9.3)
    np.save(params['outdir'] + "mag.npy", mag)
    #Fix the normaliztion of the p(z) so that triangular integration works okay in pz_utils
    ##Modified by chto@@
    dz = cat['pzbins'][:, 1] - cat['pzbins'][:, 0]
    weight = np.sum(cat['pz'], axis=1) * dz
    cat['pz'] = cat['pz'] / weight[:, None]
    del dz, weight
    print "dz, weight: ", gc.collect()
    print >> sys.stderr, "Done renormalizing P(z)"

    print >> sys.stderr, "Max lambda: ", np.max(cat['lambda_chisq'])

    #Now produce the galaxy samples and a hash table that takes cluster ID to the first
    #of its listed galaxies
    #Create an array that gives p_cen for all member galaxies
    pcen_all = np.zeros(len(mem))
    if int(params['weight_cen']) == 0:
        pcen_all[cengalindex] = 0 * cengalindex + 1.
    else:
        for i in range(len(cengalindex[0])):
            clist = np.where(cengalindex[:, i] != -1)[0]
            #print len(cengalindex),cengalindex[i][0],clist[0]
            pcen_all[cengalindex[
                clist,
                i]] = pcen_all[cengalindex[clist, i]] + cat['p_cen'][clist, i]

    np.save(params['outdir'] + "pcen_all.py", pcen_all)

    #Reassign p with p_ext if requested
    if int(params['use_p_ext']) > 0:
        mem['p'][:] = pext_correct.pext_correct_full(cat, mem, pcen_all,
                                                     int(params['use_p_ext']),
                                                     int(params['ncolors']))

    #Add a systematic probability offset if requested
    if float(params['p_offset']) != 0:
        mem['p'][:] = mem['p'][:] + float(params['p_offset'])
        #Fix any objects with p>1 or p<0
        plist = np.where(mem['p'] > 1.)[0]
        if len(plist) > 0:
            mem['p'][plist] = 0. * plist + 1.
        plist = np.where(mem['p'] < 0.)[0]
        if len(plist) > 0:
            mem['p'][plist] = 0. * plist

    #Now make the satellite lists
    print >> sys.stderr, "PCEN: ", np.max(pcen_all), len(cat), len(
        np.where(pcen_all > 0)[0]), len(
            np.where(pcen_all > 0.9)[0]), np.sum(pcen_all)

    #Make necessary bootstrap samples
    #Includes redshifts taken from P(z)
    #Only want to do this once
    #Also allows covariance estimates between measuresments, which are
    #Not currently implemented
    #Number of bootstrap samples -- current hard-coded
    nboot = dodotune.njack
    bootlist = pz_utils.make_boot_samples_simple(nboot, cat)
    [match_index,
     gboot] = pz_utils.make_boot_samples_gal_full(bootlist,
                                                  cat['mem_match_id'],
                                                  mem['mem_match_id'],
                                                  mem['p'] * (1 - pcen_all))

    print >> sys.stderr, "Done setting up bootstrap samples"
    assert params['jackknife_error']
    if float(params['jackknife_error']) != 0:
        print "doing jackknife CLF"
        njack = dodotune.njack
        jacklist = pz_utils.make_jack_samples_simple(njack, cat)

        match_index_jack = pz_utils.getjackgal(jacklist[0],
                                               cat['mem_match_id'],
                                               mem['mem_match_id'])
        assert (mem['mem_match_id'][match_index_jack[cat['mem_match_id']]
                                    [:, 0]] == cat['mem_match_id']).all()

        #       [match_index_jack, gjack] = pz_utils.make_jack_samples_gal_full(jacklist,cat['mem_match_id'],
        #                                                               mem['mem_match_id'],mem['p']*(1-pcen_all))
        print >> sys.stderr, "Done setting up jackknife samples"

    #If requested, calculate n(z)
    if int(params['do_nz']) == 1:
        print >> sys.stderr, "Calculating n(z)..."
        redm_nz_calc(cat,
                     params['area'],
                     params['outdir'],
                     bootlist,
                     descale=bool(int(params['nz_descale'])))
        print >> sys.stderr, "Done calculating n(z)"

    #If requested, calculate n(lambda)
    if int(params['do_nlambda']) == 1:
        print >> sys.stderr, "Calculating n(lambda)..."
        redm_nlambda_err(cat['lambda_chisq'], cat['lambda_chisq_e'],
                         cat['z_lambda'], cat['pz'], cat['pzbins'], bootlist,
                         params['outdir'], zmin, zmax, params['area'])
        print >> sys.stderr, "Done calculating n(lambda)"

        redm_bigcount(cat['lambda_chisq'], cat['z_lambda'], zmin, zmax,
                      params['outdir'])

        print >> sys.stderr, "Done calculating bonus listing of massive clusters"

    print >> sys.stderr, "lm_min: ", lm_min, "lm_max: ", lm_max, "zmin: ", zmin, "zmax: ", zmax

    #Calculate the CLF
    if int(params['do_clf']) == 1:
        #np.save("mag.npy",mag)
        #np.save("cenmag.npy",cenmag)
        #np.save("cat.npy",cat)
        #np.save("mem.npy", mem)
        print >> sys.stderr, "Calculating CLF..."
        if params["troughNames"] is not None:
            trough_data = np.array([
                hp.read_map(params['troughNames'].format(i)) for i in range(5)
            ])
            nside = hp.npix2nside(trough_data.shape[1])

            def getTroughProb(RaDec):
                ra, dec = RaDec
                return trough_data[:, DeclRaToIndex(ra, dec, nside)]

            positionArray = np.array([cat['RA'], cat['DEC']]).T
            positionArray_mem = np.array([mem['RA'], mem['DEC']]).T
            troughProb = np.array(map(getTroughProb, positionArray))
            troughProb_mem = np.array(map(getTroughProb, positionArray_mem))
            oldmemP = copy.deepcopy(mem['p'])
            oldcatpcen = copy.deepcopy(cat['p_cen'])
            oldcatpsat = copy.deepcopy(cat['p_sat'])
            for i in range(5):
                mem['p'] *= troughProb_mem[:, i]
                cat['p_cen'] *= troughProb[:, i].reshape(-1, 1)
                cat['p_sat'] *= troughProb[:, i].reshape(-1, 1)
                outdir = params['outdir'] + "trough_{0}/".format(i)
                os.system("mkdir -p " + outdir)
                redm_clf.redm_clf(cat,
                                  mem,
                                  mag,
                                  cenmag,
                                  cengalindex,
                                  lm_min,
                                  lm_max,
                                  zmin,
                                  zmax,
                                  pcen_all,
                                  bootlist,
                                  gboot,
                                  match_index,
                                  outdir,
                                  weight_cen=int(params['weight_cen']),
                                  obs_clf=int(params['obs_clf']),
                                  use_lum=int(params['use_lum']),
                                  limmag=limmag,
                                  trough=True,
                                  stellarMass=params['stellarmass'])
                mem['p'] = oldmemP
                cat['p_cen'] = oldcatpcen
                cat['p_sat'] = oldcatpsat
        else:
            #print limmag
            if float(params['jackknife_error']) != 0:
                print "doing jackknife CLF"

                redm_clf.redm_clf(cat,
                                  mem,
                                  mag,
                                  cenmag,
                                  cengalindex,
                                  lm_min,
                                  lm_max,
                                  zmin,
                                  zmax,
                                  pcen_all,
                                  jacklist,
                                  gboot=None,
                                  match_index=match_index_jack,
                                  outdir=params['outdir'],
                                  weight_cen=int(params['weight_cen']),
                                  obs_clf=int(params['obs_clf']),
                                  use_lum=int(params['use_lum']),
                                  limmag=limmag,
                                  stellarMass=params['stellarmass'],
                                  jackknifeerr=int(params['jackknife_error']))
            else:
                print "doing bootstrap CLF"
                redm_clf.redm_clf(cat,
                                  mem,
                                  mag,
                                  cenmag,
                                  cengalindex,
                                  lm_min,
                                  lm_max,
                                  zmin,
                                  zmax,
                                  pcen_all,
                                  bootlist,
                                  gboot,
                                  match_index,
                                  params['outdir'],
                                  weight_cen=int(params['weight_cen']),
                                  obs_clf=int(params['obs_clf']),
                                  use_lum=int(params['use_lum']),
                                  limmag=limmag,
                                  stellarMass=params['stellarmass'])
        print >> sys.stderr, "Done calculating CLF"

    #PLot CLF:
    if int(params['plot_clf']) == 1:
        redm_clf.plot_clf(lm_min, lm_max, zmin, zmax, indir=params['outdir'])


#    assert False

#Calculate the radial profiles
    if int(params['do_rpr']) == 1:
        print >> sys.stderr, "Calculating radial profiles..."

        #First, get necessary input parameter limits on magnitude
        rpr_minlum = np.array(params['rpr_minlum'])
        rpr_maxlum = np.array(params['rpr_maxlum'])
        rpr_minlum = rpr_minlum.astype(float)
        rpr_maxlum = rpr_maxlum.astype(float)

        #Basic weirdness checking for luminosity limits
        error_check = 0
        elist = np.where(rpr_minlum > rpr_maxlum)[0]
        if len(elist) > 0:
            print >> sys.stderr, "ERROR:  Require rpr_minlum < rpr_maxlum"
            error_check = 1
        if int(params['obs_clf']) == 1 or int(
                params['use_lum']) == 1 and min(rpr_minlum) < 0:
            print >> sys.stderr, "ERROR:  Radial profiles are using app mags or solar lum,"
            print >> sys.stderr, "        but rpr_minlum/rpr_maxlum < 0"
            error_check = 1
        if int(params['obs_clf']) == 0 and int(
                params['use_lum']) == 0 and max(rpr_maxlum) > 0:
            print >> sys.stderr, "ERROR:  Radial profiles are using abs mags,"
            print >> sys.stderr, "        but rpr_minlum/rpr_maxlum > 0"
            error_check = 1

        if error_check == 0:
            redm_rpr.redm_rpr(cat, mem, mag, lm_min, lm_max, zmin, zmax,
                              rpr_minlum, rpr_maxlum, bootlist, gboot,
                              params['outdir'])
        else:
            print >> sys.stderr, "SKIPPING RADIAL PROFILES"

    #Calculate magnitude gaps
    if int(params['do_mgap']) == 1:
        print >> sys.stderr, "Calculating magnitude gaps..."

        redm_mgap.redm_mgap(cat,
                            mem,
                            cenmag,
                            cengalindex,
                            mag,
                            zmin,
                            zmax,
                            lm_min,
                            lm_max,
                            bootlist,
                            gboot,
                            params['outdir'],
                            use_lum=bool(int(params['use_lum'])),
                            use_obs=bool(int(params['obs_clf'])),
                            weight_cen=bool(int(params['weight_cen'])))

    #calculate the probability that the brightest galaxy is not the central galaxy
    if int(params['do_pbcg']) == 1:
        print >> sys.stderr, "Calculating P(BCG!=central)..."

        redm_pbcg.get_p_bcg_not_cen(cat,
                                    cenmag,
                                    cengalindex,
                                    cat['p_cen'],
                                    mem['mem_match_id'],
                                    mag,
                                    mem['p'],
                                    zmin,
                                    zmax,
                                    params['outdir'],
                                    use_lum=int(params['use_lum']),
                                    weight_cen=int(params['weight_cen']))

    #Calculate the distribution of the brightest satellite galaxy
    if int(params['do_bsat']) == 1:
        print >> sys.stderr, "Calculating brightest satellite clf..."
        redm_bright_sat.get_brightest_satellite_all(
            cat,
            mem,
            mag,
            cengalindex,
            lm_min,
            lm_max,
            zmin,
            zmax,
            bootlist,
            gboot,
            match_index,
            params['outdir'],
            weight_cen=int(params['weight_cen']),
            use_lum=int(params['use_lum']),
            obs_clf=int(params['obs_clf']))

        print >> sys.stderr, "Calculating joint brightest sat-central distribution..."
        count_arr = redm_bright_sat.get_bright_sat_cen_all(
            cat,
            mem,
            mag,
            cengalindex,
            cenmag,
            lm_min,
            lm_max,
            zmin,
            zmax,
            bootlist,
            gboot,
            match_index,
            params['outdir'],
            weight_cen=int(params['weight_cen']),
            use_lum=int(params['use_lum']),
            obs_clf=int(params['obs_clf']))
    #output the params
    np.save(params['outdir'] + "params.npy", params)
Пример #46
0
def beta_cont_frac_gsl(a, b, x):
    # This computes B_x(a,b) using a continued fraction approximation.
    # We do require a>0, but b can be negative.
    # Having b<0 and x very near 1 can cause of precision (not enough iterations)
    # However, x near 1 only occurs in the far future for our cosmology application
    # Require 0<=x<1.
    #
    # This python subroutine is adapted from the
    # Gnu Science Library (GSL) specfunc/beta_inc.c code
    # by Daniel Eisenstein (July 2015).
    # Changes were generally to strip down to the case of interest, removing
    # the pre-factor from the complete beta function.  Also vectorized.
    #
    # Original GSL header:
    # Copyright (C) 2007 Brian Gough
    # Copyright (C) 1996, 1997, 1998, 1999, 2000 Gerard Jungman
    #
    # This program is free software; you can redistribute it and/or modify
    # it under the terms of the GNU General Public License as published by
    # the Free Software Foundation; either version 3 of the License, or (at
    # your option) any later version.
    #
    # This program is distributed in the hope that it will be useful, but
    # WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    # General Public License for more details.
    #
    # You should have received a copy of the GNU General Public License
    # along with this program; if not, write to the Free Software
    # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
    #
    # Author:  G. Jungman

    x = np.array(x, copy=False, ndmin=1)
    if (np.min(x) < 0 or np.max(x) >= 1):
        print("Illegal entry in beta_cont_frac_gsl()\n")
        import sys
        sys.exit()
    cutoff = 1e-30  #  control the zero cutoff

    # standard initialization for continued fraction
    num_term = 1.0
    den_term = 1.0 - (a + b) * x / (a + 1.0)
    den_term[np.where(np.abs(den_term) < cutoff)] = cutoff
    den_term = 1.0 / den_term
    cf = den_term

    for k in range(1, 200):
        # first step
        coeff = k * (b - k) * x / (((a - 1.0) + 2 * k) * (a + 2 * k))
        den_term = 1.0 + coeff * den_term
        num_term = 1.0 + coeff / num_term
        den_term[np.where(np.abs(den_term) < cutoff)] = cutoff
        num_term[np.where(np.abs(num_term) < cutoff)] = cutoff
        den_term = 1.0 / den_term
        cf *= den_term * num_term

        # second step
        coeff = -(a + k) * (a + b + k) * x / ((a + 2 * k) * (a + 2 * k + 1.0))
        den_term = 1.0 + coeff * den_term
        num_term = 1.0 + coeff / num_term
        den_term[np.where(np.abs(den_term) < cutoff)] = cutoff
        num_term[np.where(np.abs(num_term) < cutoff)] = cutoff
        den_term = 1.0 / den_term
        cf *= den_term * num_term

        # Are we done?
        if (np.max(np.abs(den_term * num_term - 1)) < 1e-12): break
        # End k loop
    # If this ends, we're just accepting the answer even if we haven't converged

    # Include the prefactor
    # We need a>0 so that x=0 doesn't crash.
    cf *= np.power(x, a) * np.power(1 - x, b) / a
    if (len(cf) == 1): return cf[0]  # Get back to a scalar
    else: return cf
# -*- coding: utf-8 -*-

import pandas as pd
import numpy as np
#%%  import data

data = pd.read_csv("data.csv")
data.drop(["id", "Unnamed: 32"], axis=1, inplace=True)

# %%
data.diagnosis = [1 if each == "M" else 0 for each in data.diagnosis]
y = data.diagnosis.values
x_data = data.drop(["diagnosis"], axis=1)
#%% normalization

x = (x_data - np.min(x_data)) / (np.max(x_data) - np.min(x_data))

# %% train test split
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.15,
                                                    random_state=42)

#%%
from sklearn.tree import DecisionTreeClassifier
dt = DecisionTreeClassifier()
dt.fit(x_train, y_train)

print("score: ", dt.score(x_test, y_test))
Пример #48
0
 def prox(self, x, alpha):
     return np.sign(x)*np.max([np.abs(x) - self.regcoef*alpha, np.zeros(x.size)], axis=0)