Exemplo n.º 1
0
def candlestick_trades(samplet, lookback, t, px, sz):
    #requires = ["CONTIGUOUS", "ALIGNED"]

    lib = _load_candlestick_lib()
    lib.c_candlestick.restype = None
    lib.c_candlestick.argtypes = [np.ctypeslib.c_intp,
                                  np.ctypeslib.ndpointer(float,
                                                         flags="aligned, contiguous"),
                                  ctypes.c_double,
                                  np.ctypeslib.c_intp,
                                  np.ctypeslib.ndpointer(float, ndim=1,
                                                         flags="aligned, contiguous"),
                                  np.ctypeslib.ndpointer(float, ndim=1,
                                                         flags="aligned, contiguous"),
                                  np.ctypeslib.ndpointer(float, ndim=1,
                                                         flags="aligned, contiguous"),
                                  np.ctypeslib.ndpointer(float, ndim=1,
                                                         flags="aligned, contiguous,"
                                                               "writeable")]

    # samplet = np.require(samplet, float, requires)
    # c = np.empty_like(a)
    samplelen = np.alen(samplet)
    datalen = np.alen(t)
    res = np.empty(6*samplelen)
    lib.c_candlestick(samplelen, samplet, lookback, datalen, t, px, sz, res)
    return res
Exemplo n.º 2
0
def eccentricity(data, exponent=1.,  metricpar={}, callback=None):
    if data.ndim==1:
        assert metricpar=={}, 'No optional parameter is allowed for a dissimilarity matrix.'
        ds = squareform(data, force='tomatrix')
        if exponent in (np.inf, 'Inf', 'inf'):
            return ds.max(axis=0)
        elif exponent==1.:
            ds = np.power(ds, exponent)
            return ds.sum(axis=0)/float(np.alen(ds))
        else:
            ds = np.power(ds, exponent)
            return np.power(ds.sum(axis=0)/float(np.alen(ds)), 1./exponent)
    else:
        progress = progressreporter(callback)
        N = np.alen(data)
        ecc = np.empty(N)
        if exponent in (np.inf, 'Inf', 'inf'):
            for i in range(N):
                ecc[i] = cdist(data[(i,),:], data, **metricpar).max()
                progress((i+1)*100//N)
        elif exponent==1.:
            for i in range(N):
                ecc[i] = cdist(data[(i,),:], data, **metricpar).sum()/float(N)
                progress((i+1)*100//N)
        else:
            for i in range(N):
                dsum = np.power(cdist(data[(i,),:], data, **metricpar),
                                exponent).sum()
                ecc[i] = np.power(dsum/float(N), 1./exponent)
                progress((i+1)*100//N)
        return ecc
Exemplo n.º 3
0
 def get_data(self, orig):
     data = self.serializer.clipboard_data
     data_len = np.alen(data)
     orig_len = np.alen(orig)
     if data_len > orig_len > 1:
         data_len = orig_len
     return data[0:data_len]
Exemplo n.º 4
0
    def _do_problem(self, problem, integrator, old_api=True, **integrator_params):
        jac = None
        if hasattr(problem, 'jac'):
            jac = problem.jac
        res = problem.res

        ig = dae(integrator, res, jacfn=jac, old_api=old_api)
        ig.set_options(old_api=old_api, **integrator_params)
        z = empty((1+len(problem.stop_t),alen(problem.z0)), float)
        zprime = empty((1+len(problem.stop_t),alen(problem.z0)), float)
        ist = ig.init_step(0., problem.z0, problem.zprime0, z[0], zprime[0])
        i=1
        for time in problem.stop_t:
            soln = ig.step(time, z[i], zprime[i])
            if old_api:
                flag, rt = soln
            else:
                flag = soln.flag
                rt = soln.values.t
            i += 1
            if integrator == 'ida':
                assert flag==0, (problem.info(), flag)
            else:
                assert flag > 0, (problem.info(), flag)

        assert problem.verify(array(z), array(zprime),  [0.]+problem.stop_t), \
                    (problem.info(),)
Exemplo n.º 5
0
 def get_data(self, orig):
     data = self.data
     data_len = np.alen(data)
     orig_len = np.alen(orig)
     if orig_len > data_len:
         reps = (orig_len // data_len) + 1
         data = np.tile(data, reps)
     return data[0:orig_len]
Exemplo n.º 6
0
 def get_data(self, orig):
     bytes = self.data
     data_len = np.alen(bytes)
     orig_len = np.alen(orig)
     if orig_len > data_len:
         reps = (orig_len / data_len) + 1
         bytes = np.tile(bytes, reps)
     return bytes[0:orig_len]
Exemplo n.º 7
0
	def tableSum(self):
		"""docstring for tableSum"""
		self.fsum = 0
		for i in xrange(numpy.alen(self.newx)):
			for j in xrange(numpy.alen(self._angles)):
				self.fsum += self.newx[i]*1.15*self._angles[j]*math.fabs(self.interpedgrid[i,j])
		
		
		return self.fsum
Exemplo n.º 8
0
 def competence(stochastic):
     """
     The competence function for TWalk.
     """
     if stochastic.dtype in float_dtypes and np.alen(stochastic.value) > 4:
         if np.alen(stochastic.value) >=10:
             return 2
         return 1
     return 0
Exemplo n.º 9
0
def create_binary(filename, num, outfile, options):
    """Create patterned binary data with the first 7 characters of the filename
    interleaved with a byte ramp, e.g.  A128   \x00A128   \x01A128   \x02 etc.
    """
    root, _ = outfile.split(".")
    prefix = ("%s        " % root)[0:8]
    a = np.fromstring(prefix, dtype=np.uint8)
    b = np.tile(a, (num / np.alen(a)) + 1)[0:num]
    b[7::8] = np.arange(np.alen(b) / 8, dtype=np.uint8)
    with open(filename, "wb") as fh:
        fh.write(b.tostring())
Exemplo n.º 10
0
    def get_bitplanes(self, segment_viewer, bytes_per_row, nr, count, byte_values, style, colors):
        bitplanes = self.bitplanes
        _, rem = divmod(np.alen(byte_values), bitplanes)
        if rem > 0:
            byte_values = np.append(byte_values, np.zeros(rem, dtype=np.uint8))
            style = np.append(style, np.zeros(rem, dtype=np.uint8))
        pixels_per_row = 8 * bytes_per_row // bitplanes
        bits = np.unpackbits(byte_values).reshape((-1, 8))
        pixels = np.empty((nr * bytes_per_row // bitplanes, pixels_per_row), dtype=np.uint8)
        self.get_bitplane_pixels(bits, pixels, bytes_per_row, pixels_per_row)
        pixels = pixels.reshape((nr, pixels_per_row))
        s = self.get_bitplane_style(style)
        style_per_pixel = s.repeat(8).reshape((-1, pixels_per_row))
        normal = (style_per_pixel & self.ignore_mask) == 0
        highlight = (style_per_pixel & style_bits.selected_bit_mask) == style_bits.selected_bit_mask
        data = (style_per_pixel & style_bits.data_bit_mask) == style_bits.data_bit_mask
        comment = (style_per_pixel & style_bits.comment_bit_mask) == style_bits.comment_bit_mask
        match = (style_per_pixel & style_bits.match_bit_mask) == style_bits.match_bit_mask

        color_registers, h_colors, m_colors, c_colors, d_colors = colors
        bitimage = np.empty((nr, pixels_per_row, 3), dtype=np.uint8)
        for i in range(2**bitplanes):
            color_is_set = (pixels == i)
            bitimage[color_is_set & normal] = color_registers[i]
            bitimage[color_is_set & data] = d_colors[i]
            bitimage[color_is_set & comment] = c_colors[i]
            bitimage[color_is_set & match] = m_colors[i]
            bitimage[color_is_set & highlight] = h_colors[i]
        bitimage[count:,:,:] = segment_viewer.preferences.empty_background_color.Get(False)
        return bitimage
Exemplo n.º 11
0
 def do_change(self, editor, undo):
     self.prepare_data(editor)
     indexes = self.get_clipped_indexes(editor)
     data = self.get_data(self.segment.data[indexes])
     log.debug("orig data: %s" % self.segment.data[indexes])
     log.debug("new data: %s" % data)
     indexes = indexes[0:np.alen(data)]
     log.debug("indexes truncated to data length: %s" % str(indexes))
     s = self.serializer
     if s.clipboard_relative_comment_indexes is not None:
         log.debug("relative comment indexes: %s" % (str(s.clipboard_relative_comment_indexes)))
         subset = s.clipboard_relative_comment_indexes[s.clipboard_relative_comment_indexes < np.alen(indexes)]
         log.debug("comment index subset: %s" % str(subset))
         comment_indexes = indexes[subset]
         log.debug("new comment indexes: %s" % str(comment_indexes))
         clamped_ranges = indexes_to_ranges(indexes)
         log.debug("clamped ranges: %s" % str(clamped_ranges))
         old_comment_info = self.segment.get_comment_restore_data(clamped_ranges)
     else:
         old_comment_info = None
     undo.flags.index_range = indexes[0], indexes[-1]
     undo.flags.select_range = True
     undo.flags.byte_values_changed = True
     old_data = self.segment[indexes].copy()
     self.segment[indexes] = data
     style = self.get_style(data)
     if style is not None:
         old_style = self.segment.style[indexes].copy()
         self.segment.style[indexes] = style
     else:
         old_style = None
     if old_comment_info is not None:
         log.debug("setting comments: %s" % s.clipboard_comments)
         self.segment.set_comments_at_indexes(clamped_ranges, comment_indexes, s.clipboard_comments)
     return (old_data, indexes, old_style, old_comment_info)
Exemplo n.º 12
0
def weightedregionprops(L,ncc,dfore):

    if DEBUG_TRACKINGSETTINGS: print 'in weightedregionprops, ncc = ' + str(ncc) + ', max(L) = ' + str(num.max(L)) + ', nnz(L) = ' + str(num.flatnonzero(L).shape) + ', sum(dfore) = ' + str(num.sum(num.sum(dfore)))
    if DEBUG_TRACKINGSETTINGS:
        for l in range(1,num.max(L)+1):
            print 'nnz(L == %d) = '%l + str(num.alen(num.flatnonzero(L==l)))

    if ncc == 0:
        return []

    # all connected components
    index = range(1,ncc+1)

    time0 = time.time()

    # create the unnormalized weight matrix
    w = dfore
    #w[L==0] = 0

    # compute the normalization terms
    z = num.array(meas.sum(w,L,index),ndmin=1)
    z[z==0] = 1

    # compute the unnormalized centers
    cx = num.array(meas.sum(w*params.GRID.X,L,index),ndmin=1)
    cy = num.array(meas.sum(w*params.GRID.Y,L,index),ndmin=1)

    # normalize centers
    cx /= z
    cy /= z

    # compute unnormalized, uncentered variances
    cx2 = num.array(meas.sum(w*params.GRID.X2,L,index),ndmin=1)
    cy2 = num.array(meas.sum(w*params.GRID.Y2,L,index),ndmin=1)
    cxy = num.array(meas.sum(w*params.GRID.XY,L,index),ndmin=1)

    # normalize variances
    cx2 /= z
    cy2 /= z
    cxy /= z

    # center variances
    cx2 -= cx**2
    cy2 -= cy**2
    cxy -= cx*cy

    # create ellipses
    ellipses = []
    for i in range(len(cx)):
        # compute major, minor, angle from cov
        (sizeH,sizeW,angle) = cov2ell2(cx2[i],cy2[i],cxy[i])
        if (sizeH < .125) or num.isnan(sizeH):
            sizeH = .125
        if (sizeW < .125) or num.isnan(sizeW):
            sizeW = .125
        # compute area
        area = num.pi * sizeW * sizeH * 4
        ellipses.append(Ellipse(cx[i],cy[i],sizeW,sizeH,angle,area,-1))

    return ellipses
Exemplo n.º 13
0
 def fit_model(self):
     if self.similarity_matrix is None:
         self._init_similarity_matrix()
     self.means = []
     for i in xrange(self.dataset.n_items):
         i_ = self.item_user_matrix[i][self.item_user_matrix[i] > 0]
         self.means.append(np.mean(i_) if not np.alen(i_) == 0 else 0)
Exemplo n.º 14
0
def add_xexboot_header(bytes, bootcode=None, title="DEMO", author="an atari user"):
    sec_size = 128
    xex_size = len(bytes)
    num_sectors = (xex_size + sec_size - 1) / sec_size
    padded_size = num_sectors * sec_size
    if xex_size < padded_size:
        bytes = np.append(bytes, np.zeros([padded_size - xex_size], dtype=np.uint8))
    paragraphs = padded_size / 16
    
    if bootcode is None:
        bootcode = np.fromstring(xexboot_header, dtype=np.uint8)
    else:
        # don't insert title or author in user supplied bootcode; would have to
        # assume that the user supplied everything desired in their own code!
        title = ""
        author = ""
    bootsize = np.alen(bootcode)
    v = bootcode[9:11].view(dtype="<u2")
    v[0] = xex_size
    
    bootsectors = np.zeros([384], dtype=np.uint8)
    bootsectors[0:bootsize] = bootcode

    insert_string(bootsectors, 268, title, 0b11000000)
    insert_string(bootsectors, 308, author, 0b01000000)

    image = np.append(bootsectors, bytes)
    return image
Exemplo n.º 15
0
def compressed_submatrix(dm, idx):
    '''
    Extract from a compressed distance matrix the corresponding matrix for
    a subset of points without bringing the matrix into square form first.

    The indices in the list C{idx} must be in increasing order.

    @param dm: compressed distance matrix
    @type dm: numpy.ndarray(N*(N-1)/2, dtype=float)
    @param idx: indices of the subset
    @type idx: numpy.ndarray(n, dtype=int)
    @param N: the number of observation in C{dm} (optional)
    @type N: integer

    @return: compressed distance matrix
    @rtype: numpy.ndarray(n*(n-1)/2, dtype=float)
    '''
    N = n_obs(dm)
    n = np.alen(idx)
    res = np.empty(n*(n-1)//2,dtype=dm.dtype)
    # Shorter Python code, does the same thing.
    # Which variant is faster?
    #
    #for i,c in enumerate(combinations(idx,2)):
    #    res[i] = dm[compressed_idx(N,*c)]
    for r in range(n-1):
        s = (2*n-1-r)*r//2
        t = idx[r]
        i = idx[r+1:] + (2*N-3-t)*t//2-1
        res[s:s+n-1-r] = dm[i]

    return res
Exemplo n.º 16
0
def distance_to_measure(data, k, metricpar={}, callback=None):
    r'''.. math::

  \mathit{distance\_to\_measure}(x)  = \sqrt{\frac 1k\sum^k_{j=1}d(x,\nu_j(x))^2},

where :math:`\nu_1(x),\ldots,\nu_k(x)` are the :math:`k`  nearest neighbors of :math:`x` in the data set. Again, the first nearest neighbor is :math:`x` itself with distance 0.

Reference: [R4]_.
'''
    if data.ndim==1:
        assert metricpar=={}, ('No optional parameter is allowed for a '
                               'dissimilarity matrix.')
        # dm data
        ds = squareform(data, force='tomatrix')
        N = np.alen(ds)
        r = np.empty(N)
        for i in range(N):
            s = np.sort(ds[i,:])
            assert s[0]==0.
            d = s[1:k]
            r[i] = np.sqrt((d*d).sum()/float(k))
        return r
    else:
        # vector data
        if metricpar=={} or metricpar['metric']=='euclidean':
            from scipy.spatial import cKDTree
            T = cKDTree(data)
            d, j = T.query(data, k+1)
            d = d[:,1:k]
            return np.sqrt((d*d).sum(axis=1)/k)
        else:
            print(kwargs)
            raise ValueError('Not implemented')
Exemplo n.º 17
0
def testLBP (format, formatMask, path, output) :
    dataset = pd.read_csv(path)
    idxCls = dataset['idx']
   # cnts = dataset['Cnt']
    fnList = dataset['path']
  #  out = open(output, 'w')
    lbps = list(map(lambda x: local_binary_pattern(cv2.bitwise_and(imread(format.format(x)),imread(formatMask.format(x))), lbpP, lbpR, lbpMethod), fnList))
    histograms = list(map(lambda x:  np.histogram(x, bins=range(int(np.max(lbps)) + 1))[0], lbps))
    distances = prw.pairwise_distances(histograms, metric='l1')
    np.fill_diagonal(distances, math.inf)
    guessedClasses = np.apply_along_axis(lambda x: np.argmin(x), 1, distances)
    scores = np.apply_along_axis(lambda x: np.min(x), 1, distances)
    correct = list(map(lambda i: idxCls[guessedClasses[i]] == idxCls[i], range(0, np.alen(idxCls))))
   # out.write(str(np.average(correct)))
  #  fpr, tpr, thresholds = roc_curve(correct, scores, pos_label=1)
  #  pyplot.plot(tpr, fpr)
   # pyplot.show()
    with open(output + 'lbp_distances.csv', 'w', newline='') as fp:
        a = csv.writer(fp, delimiter=',')
        a.writerows(distances)

    with open(output + 'lbp_guessedClasses.csv', 'w', newline='') as fp:
        a = csv.writer(fp, delimiter=',')
        a.writerow(guessedClasses)

    with open(output + 'lbp_correct.csv', 'w', newline='') as fp:
        a = csv.writer(fp, delimiter=',')
        a.writerow(correct)

    with open(output + 'lbp_real.csv', 'w', newline='') as fp:
        a = csv.writer(fp, delimiter=',')
        a.writerow(idxCls)
Exemplo n.º 18
0
def circ_interp(x, y, num, long_arc=False):
    """
    Given two vectors of angle values in radians, perform circular
    interpotation between them.

    :param x: a vector of angle values (in radians)
    :param y: a vector of angle values (in radians)
    :param num: the number of interpolated values
    :param long_arc: interpolate through the long arc
    :type x: numpy.ndarray
    :type y: numpy.ndarray
    :type num: int
    :type long_arc: bool
    :return: a matrix of original and interpolated values, the original
        vectors x and y are in the first and the last matrix columns
    :rtype: numpy.ndarray
    """

    delta = circ_dist(x, y)
    n = np.alen(x)
    interpolation_mask = np.reshape(
        np.repeat(np.array(range(0, num + 2)), n),
        (num+2, n)).transpose()/float(num + 1)
    result = np.repeat(x, num + 2).reshape(n, num + 2)
    delta = np.repeat(delta, num + 2).reshape(n, num + 2)
    if not long_arc:
        result += delta * interpolation_mask
    else:
        result += -np.sign(delta) * (2*np.pi - np.abs(delta)) * \
            interpolation_mask
    return result
Exemplo n.º 19
0
Arquivo: prg.py Projeto: JohnReid/prg
def recall_gain(tp, fn, fp, tn):
    """Calculates Recall Gain from the contingency table

    This function calculates Recall Gain from the entries of the contingency
    table: number of true positives (TP), false negatives (FN), false positives
    (FP), and true negatives (TN). More information on Precision-Recall-Gain
    curves and how to cite this work is available at
    http://www.cs.bris.ac.uk/~flach/PRGcurves/.

    Args:
        tp (float) or ([float]): True Positives
        fn (float) or ([float]): False Negatives
        fp (float) or ([float]): False Positives
        tn (float) or ([float]): True Negatives
    Returns:
        (float) or ([float])
    """
    n_pos = tp + fn
    n_neg = fp + tn
    with np.errstate(divide='ignore', invalid='ignore'):
        rg = 1. - (n_pos/n_neg) * (fn/tp)
    if np.alen(rg) > 1:
        rg[tn + fn == 0] = 1
    elif tn + fn == 0:
        rg = 1
    return rg
Exemplo n.º 20
0
def window(C_t, type=None, width=None, mirror=False):
  """ Filters (half-sided) time series C(t) through one of two different
      window functions.
      The implemented functions are cos(π/2 t/T) and exp[-1/2 (t/λT)²].
      Appropriate window functions must have an area of unity in frequency
      domain, which corresponds to a value of one at time zero in time domain.
      for Gaussian: width λ = σ / T
  """
  len = np.alen(C_t)
  if width == np.inf: type = None
  if type == 'cos':
    if width == None: width = 1.0
    cut = np.floor(len * width)
    C = C_t * np.append(np.cos(0.5 * np.pi * np.linspace(0., 1., cut)),
                        np.zeros(len - cut))
  elif type == 'exp':
    if width is None: width = 0.3
    C = C_t * np.exp(-0.5 * (np.linspace(0., 1., len) / width)**2)
  elif type is None or type == 'None':
    C = C_t
  else:
    sys.exit("\nError: Window type '{0}' unknown. Exit Program!".format(type))
  if mirror:  # explicit mirroring, e.g. [1., 0.5, 0.] -> [0., 0.5, 1., 0.5]
    return np.append(C[::-1].conj(), C[1:-1])  # C(-t) = C*(t)
  else:
    return C
Exemplo n.º 21
0
    def fill_masked_regions(self, themap, magic=N.inf):
        """Fill masked regions (defined where values == magic) in themap.
        """
        masked_boxes = N.where(themap == magic) # locations of masked regions
        for i in range(N.size(masked_boxes,1)):
            num_unmasked = 0
            x, y = masked_boxes[0][i], masked_boxes[1][i]
            delx = dely = 1
            while num_unmasked == 0:
                x1 = x - delx
                if x1 < 0: x1 = 0
                x2 = x + 1 + delx
                if x2 > themap.shape[0]: x2 = themap.shape[0]
                y1 = y - dely
                if y1 < 0: y1 = 0
                y2 = y + 1 + dely
                if y2 > themap.shape[1]: y2 = themap.shape[1]

                cutout = themap[x1:x2, y1:y2].ravel()
                goodcutout = cutout[cutout != magic]
                num_unmasked = N.alen(goodcutout)
                if num_unmasked > 0:
                    themap[x, y] = N.nansum(goodcutout)/float(len(goodcutout))
                delx += 1
                dely += 1
        themap[N.where(N.isnan(themap))] = 0.0
        return themap
Exemplo n.º 22
0
def learn_option(option, environment_name, num_episodes, max_steps):
    """
    :param source: the source community
    :type source: int
    :param target: the target community
    :param target: int
    """
    from pyrl.agents.sarsa_lambda import sarsa_lambda
    from pyrl.rlglue import RLGlueLocal as RLGlueLocal
    from pyrl.environments.pinball import PinballRLGlue
    import numpy as np
    import logging
    import pyflann
    import options
    import cPickle
    import random
    import csv

    prefix = 'option-%d-to-%d'%(option.label, option.target)
    score_file = csv.writer(open(prefix + '-score.csv', 'wb'))

    # Create agent and environments
    agent = sarsa_lambda(epsilon=0.01, alpha=0.001, gamma=0.9, lmbda=0.9,
    params={'name':'fourier', 'order':4})

    # Wrap the environment with the option's pseudo-reward
    environment = options.TrajectoryRecorder(options.PseudoRewardEnvironment(PinballRLGlue(environment_name), option, 10000), prefix + '-trajectory')

    # Connect to RL-Glue
    rlglue = RLGlueLocal.LocalGlue(environment, agent)
    rlglue.RL_init()

    # Execute episodes
    if not num_episodes:
        num_episodes = np.alen(option.initial_states)
        print 'Learning %d episodes'%(num_episodes,)

    for i in xrange(num_episodes):
        initial_state = option.initial_state()
        rlglue.RL_env_message('set-start-state %f %f %f %f'
               %(initial_state[0], initial_state[1], initial_state[2], initial_state[3]))

        terminated = rlglue.RL_episode(max_steps)

        total_steps = rlglue.RL_num_steps()
        total_reward = rlglue.RL_return()

        with open(prefix + '-score.csv', 'a') as f:
            writer = csv.writer(f)
            writer.writerow([i, total_steps, total_reward, terminated])

    rlglue.RL_cleanup()

    # Save function approximation
    option.basis = agent.basis
    option.weights = agent.weights[0,:,:]

    cPickle.dump(option, open(prefix + '-policy.pl', 'wb'))

    return option
Exemplo n.º 23
0
def kNN_distance(data, k, metricpar={}, callback=None):
    r'''The distance to the :math:`k`-th nearest neighbor as an (inverse) measure of density.

Note how the number of nearest neighbors is understood: :math:`k=1`, the first neighbor, makes no sense for a filter function since the first nearest neighbor of a data point is always the point itself, and hence this filter function is constantly zero. The parameter :math:`k=2` measures the distance from :math:`x_i` to the nearest data point other than  :math:`x_i` itself.
    '''
    if data.ndim==1:
        assert metricpar=={}, ('No optional parameter is allowed for a '
                               'dissimilarity matrix.')
        # dm data
        ds = squareform(data, force='tomatrix')
        N = np.alen(ds)
        r = np.empty(N)
        for i in range(N):
            s = np.sort(ds[i,:])
            assert s[0]==0.
            r[i] = s[k]
        return r
    else:
        # vector data
        if metricpar=={} or metricpar['metric']=='euclidean':
            from scipy.spatial import cKDTree
            T = cKDTree(data)
            d, j = T.query(data, k+1)
            return d[:,k]
        else:
            print(metricpar)
            raise ValueError('Not implemented')
    def __init__(self, pos):
        vals = np.array([np.float(val) for val in pos.split(";")])
        numOfVariables = (np.alen(vals) - 3) / 4

        """
        self.particlePosition = [np.float64(val) for val in vals[0].split(",")];
        self.velocity = [np.float64(val) for val in vals[1].split(",")];
        self.fitness = [np.float64(val) for val in vals[2].split(",")];
        self.persBestPos = [np.float64(val) for val in vals[3].split(",")];
        self.persBestVal = [np.float64(val) for val in vals[4].split(",")];
        self.globalBestPos = [np.float64(val) for val in vals[5].split(",")];
        self.globalBestVal = [np.float64(val) for val in vals[6].split(",")];
        """
        index = 0
        self.particlePosition = vals[index : index + numOfVariables]
        index += numOfVariables
        self.velocity = vals[index : index + numOfVariables]
        index += numOfVariables
        self.fitness = vals[index : index + 1]
        index += 1
        self.persBestPos = vals[index : index + numOfVariables]
        index += numOfVariables
        self.persBestVal = vals[index : index + 1]
        index += 1
        self.globalBestPos = vals[index : index + numOfVariables]
        index += numOfVariables
        self.globalBestVal = vals[index : index + 1]
Exemplo n.º 25
0
def plot_counts(ax, dictorigin, x_locator, x_formatter, bin_edges_in, snum, enum):
    # compute all data needed
    time = dictorigin["time"]
    cumcounts = np.arange(1, np.alen(time) + 1)
    if len(bin_edges_in) < 2:
        return
    binsize = bin_edges_in[1] - bin_edges_in[0]
    binsize_str = binsizelabel(binsize)

    # plot
    counts, bin_edges_out, patches = ax.hist(
        time, bin_edges_in, cumulative=False, histtype="bar", color="black", edgecolor=None
    )
    ax.grid(True)
    ax.xaxis_date()
    plt.setp(ax.get_xticklabels(), rotation=90, horizontalalignment="center", fontsize=7)
    ax.set_ylabel("# Earthquakes\n%s" % binsize_str, fontsize=8)
    ax.xaxis.set_major_locator(x_locator)
    ax.xaxis.set_major_formatter(x_formatter)
    if snum and enum:
        ax.set_xlim(snum, enum)

    ax2 = ax.twinx()
    p2, = ax2.plot(time, cumcounts, "g", lw=2.5)
    ax2.yaxis.get_label().set_color(p2.get_color())
    ytl_obj = plt.getp(ax2, "yticklabels")  # get the properties for yticklabels
    # plt.getp(ytl_obj)                       # print out a list of properties
    plt.setp(ytl_obj, color="g")  # set the color of yticks to red
    plt.setp(plt.getp(ax2, "yticklabels"), color="g")  # xticklabels: same
    ax2.set_ylabel("Cumulative\n# Earthquakes", fontsize=8)
    ax2.xaxis.set_major_locator(x_locator)
    ax2.xaxis.set_major_formatter(x_formatter)
    if snum and enum:
        ax2.set_xlim(snum, enum)
    return
Exemplo n.º 26
0
    def availability(self):
        availability={}

        for key in self.magnet_sets:
            availability[key]=range(np.alen(self.magnet_sets[key]))
            
        return availability
Exemplo n.º 27
0
Arquivo: gmm.py Projeto: Tug/mnist
def ExpectationMaximization(dataset):
    # dimension of the space
    N = np.alen(dataset[0])
    m = 10
    minw = 0.01
    minsigma = 0.01
    # mu: esperance
    # sigma2: variance
    # w: mixing weight
    mu, sigma2, w = initParameters(m, N)
    epsi = 0.1
    conv = False
    while not conv:
        Elikelihood = 0
        # for each mixture component
        for j in range(m):
            # Expectation
            # gamma: responsibility values
            gamma = w[j] * gaussian2(dataset, mu[j], sigma2[j], N)
            Nwj = np.sum(gamma)
            gamma = gamma/Nwj
            # Maximization (of the likelihood)
            gammat = np.array([gamma]).T
            mu[j]     = np.sum( gammat * dataset, 0 ) / Nwj
            sigma2[j] = np.sum( gammat * ((dataset - mu[j]) ** 2), 0 ) / Nwj
            w[j]      = Nwj/N
            # prevent variances from reaching 0
            sigma2[j] = map(lambda sig2: sig2 * (sig2 >= minsigma) or minsigma, sigma2[j])
            # prevent mixin coefficient from reaching 0
            if w[j] < minw:
                w[j] = minw
            Elikelihood -= np.log(Nwj)
        print Elikelihood
        conv = np.abs(Elikelihood) < epsi
    return (w, mu, sigma2)
Exemplo n.º 28
0
 def parse_tile_map(self, panel, tile_map):
     sizer = panel.GetSizer()
     sizer.Clear(True)
     self.tile_map = tile_map
     self.categories = []
     self.items = []
     self.pattern_to_item = {}
     for items in tile_map:
         label = items[0]
         t = wx.StaticText(panel, -1, label)
         sizer.Add(t, 0, wx.EXPAND, 0)
         self.categories.append(t)
         w = wx.WrapSizer()
         for tiles in items[1:]:
             for i in np.arange(np.alen(tiles)):
                 data = tiles[i:i+1]
                 bmp = self.segment_viewer.machine.antic_font.get_image(data[0], self.zoom)
                 btn = TileButton(panel, -1, bmp, style=wx.BORDER_NONE|wx.BU_EXACTFIT)
                 btn.SetBackgroundColour(self.bg)
                 btn.tile_data = data
                 btn.Bind(wx.EVT_BUTTON, self.on_tile_clicked)
                 w.Add(btn, 0, wx.ALL, 0)
                 self.items.append(btn)
                 self.pattern_to_item[tuple(data)] = btn
         sizer.Add(w, 0, wx.EXPAND, 0)
     self.Layout()
Exemplo n.º 29
0
Arquivo: prg.py Projeto: JohnReid/prg
def create_segments(labels, pos_scores, neg_scores):
    n = np.alen(labels)
    # reorder labels and pos_scores by decreasing pos_scores, using increasing neg_scores in breaking ties
    new_order = np.lexsort((neg_scores, -pos_scores))
    labels = labels[new_order]
    pos_scores = pos_scores[new_order]
    neg_scores = neg_scores[new_order]
    # create a table of segments
    segments = {'pos_score': np.zeros(n), 'neg_score': np.zeros(n),
                'pos_count': np.zeros(n), 'neg_count': np.zeros(n)}
    j = -1
    for i, label in enumerate(labels):
        if ((i == 0) or (pos_scores[i-1] != pos_scores[i])
                     or (neg_scores[i-1] != neg_scores[i])):
            j += 1
            segments['pos_score'][j] = pos_scores[i]
            segments['neg_score'][j] = neg_scores[i]
        if label == 0:
            segments['neg_count'][j] += 1
        else:
            segments['pos_count'][j] += 1
    segments['pos_score'] = segments['pos_score'][0:j+1]
    segments['neg_score'] = segments['neg_score'][0:j+1]
    segments['pos_count'] = segments['pos_count'][0:j+1]
    segments['neg_count'] = segments['neg_count'][0:j+1]
    return segments
def classify(data, trueclass, traindata, final_set,a):
	X=np.vstack(data[traindata[:,1],:])
	#np.savetxt("parkinsons/foo.csv",x, fmt='%0.5f',delimiter=",")
	b=[]
	b.append(traindata[:,1])
	
	C = np.searchsorted(a, b)
	D = np.delete(np.arange(np.alen(a)), C)
	D= np.array(D)
	D=D.reshape(D.size,-1)
	
	true_labels = np.ravel(np.vstack(trueclass[D[:,0],0]))
	test_data = np.vstack(data[D[:,0],:])
	#print test_data.shape
	#np.savetxt("parkinsons/foo.csv",test_data, fmt='%0.6s')
	y=np.ravel(np.vstack(traindata[:,0]))
	
	clf=svm.SVC(kernel='linear')
	clf.fit(X,y)
	
	labels=clf.predict(test_data) #predicting true labels for the remaining rows 
	predicted_labels = labels.reshape(labels.size,-1)
	np.savetxt("parkinsons/foo%d.csv"%final_set, np.concatenate((test_data, predicted_labels,np.vstack(trueclass[D[:,0],0])), axis=1),fmt='%0.5f',delimiter=",")
	
	print true_labels
	print labels
	misclassify_rate = 1-accuracy_score(true_labels,labels)
	print "Misclassification rate = %f" %misclassify_rate
	return misclassify_rate
Exemplo n.º 31
0
    def clean(self, mask=None, verbose=None):
        """
		Given the mask, we replace the actual problematic pixels with the masked 5x5 median value.
		This mimics what is done in L.A.Cosmic, but it's a bit harder to do in python, as there is no
		readymade masked median. So for now we do a loop...
		Saturated stars, if calculated, are also masked : they are not "cleaned", but their pixels are not
		used for the interpolation.
		
		We will directly change self.cleanimage. Instead of using the self.mask, you can supply your
		own mask as argument. This might be useful to apply this cleaning function iteratively.
		But for the true L.A.Cosmic, we don't use this, i.e. we use the full mask at each iteration.

		"""
        if verbose == None:
            verbose = self.verbose
        if mask == None:
            mask = self.mask

        if verbose:
            print "Cleaning cosmic affected pixels ..."

        # So... mask is a 2D array containing False and True, where True means "here is a cosmic"
        # We want to loop through these cosmics one by one.
        cosmicindices = np.argwhere(mask)
        # This is a list of the indices of cosmic affected pixels.
        #print cosmicindices

        # We put cosmic ray pixels to np.Inf to flag them :
        self.cleanarray[mask] = np.Inf

        # Now we want to have a 2 pixel frame of Inf padding around our image.
        w = self.cleanarray.shape[0]
        h = self.cleanarray.shape[1]
        padarray = np.zeros((w + 4, h + 4)) + np.Inf
        padarray[2:w + 2, 2:h + 2] = self.cleanarray.copy(
        )  # that copy is important, we need 2 independent arrays

        # The medians will be evaluated in this padarray, skipping the np.Inf.
        # Now in this copy called padarray, we also put the saturated stars to np.Inf, if available :
        if self.satstars != None:
            padarray[2:w + 2, 2:h + 2][self.satstars] = np.Inf
            # Viva python, I tested this one, it works...

        # A loop through every cosmic pixel :
        for cosmicpos in cosmicindices:
            x = cosmicpos[0]
            y = cosmicpos[1]
            cutout = padarray[
                x:x + 5,
                y:y + 5].ravel()  # remember the shift due to the padding !
            #print cutout
            # Now we have our 25 pixels, some of them are np.Inf, and we want to take the median
            goodcutout = cutout[cutout != np.Inf]
            #print np.alen(goodcutout)

            if np.alen(goodcutout) >= 25:
                # This never happened, but you never know ...
                raise RuntimeError, "Mega error in clean !"
            elif np.alen(goodcutout) > 0:
                replacementvalue = np.median(goodcutout)
            else:
                # i.e. no good pixels : Shit, a huge cosmic, we will have to improvise ...
                print "OH NO, I HAVE A HUUUUUUUGE COSMIC !!!!!"
                replacementvalue = self.guessbackgroundlevel()

            # We update the cleanarray,
            # but measure the medians in the padarray, so to not mix things up...
            self.cleanarray[x, y] = replacementvalue

        # That's it.
        if verbose:
            print "Cleaning done"

        # FYI, that's how the LACosmic cleaning looks in iraf :
        """
Exemplo n.º 32
0
import numpy as np
from DataHandler import DataHandler

data_handler = DataHandler(16, 4, "state_responses.csv")

status = True
count = 1
while status:
    status, features, labels = data_handler.get_next_batch(20000)
    print status
    print np.alen(features)
    print np.alen(labels)
    print str(count)
    print "\n\n"
    count = count + 1
Exemplo n.º 33
0
            else:
                #Add experience to memory
                memoryS = np.concatenate((memoryS,tempGameS),axis=0)
                memoryRR = np.concatenate((memoryRR,tempGameRR),axis=0)
                memoryA = np.concatenate((memoryA,tempGameA),axis=0)
                memorySA = np.concatenate((memorySA,tempGameSA),axis=0)

                memoryR = np.concatenate((memoryR,tempGameR),axis=0)
                memoryW = np.concatenate((memoryW,tempGameW),axis=0)


                if gameR.mean() > max_game_average :
                    max_game_average = gameR.mean()

            #if memory is full remove first element
            if np.alen(memoryR) >= max_memory_len:
                memorySA = memorySA[gameR.shape[0]:]
                memoryR = memoryR[gameR.shape[0]:]
                memoryA = memoryA[gameR.shape[0]:]
                memoryS = memoryS[gameR.shape[0]:]
                memoryRR = memoryRR[gameR.shape[0]:]
                memoryW = memoryW[gameR.shape[0]:]


        qs=s

        if done and game > num_initial_observation and not PLAY_GAME:
            last_game_average = gameR.mean()
            if game > 3 and game %2 ==0:
                # train on all memory
                print("Experience Replay")
Exemplo n.º 34
0
 def __len__(self):
     return np.alen(self.order)
Exemplo n.º 35
0
def run_case():
    print('--> Load training set')
    x_t = sio.loadmat('../datasets/compact_uber.mat')['x_t']  # training inputs
    y_t = sio.loadmat(
        '../datasets/compact_uber.mat')['y_t'] - np.pi  # training outputs
    n_data = np.alen(y_t)

    print('--> Load validation set')
    x_v = sio.loadmat('../datasets/compact_uber.mat')[
        'x_v']  # validation inputs
    y_v = sio.loadmat(
        '../datasets/compact_uber.mat')['y_v'] - np.pi  # validation outputs

    print('--> Load prediction set')
    x_p = sio.loadmat('../datasets/compact_uber.mat')[
        'x_p']  # prediction inputs

    n_pred = np.alen(x_p)
    n_totp = n_data + n_pred

    print('--> Calculate kernel')
    # Set kernel parameters
    noise = 1E-1
    params = {
        's2': 250.00,
        'ell2': 5.0E+1**2,
    }
    k1 = np.array([[4.0]])
    k2 = np.array([[7.0]])  # previously 7.0

    y_t = y_t.reshape(n_data, 1)
    x_t = x_t.reshape(n_data, 1)
    x_p = x_p.reshape(n_pred, 1)

    x = np.vstack((x_p, x_t))

    # Calculate kernels
    mat_k_cc = kernels.se_iso(x, x, params)
    mat_k_ss = kernels.se_iso(x, x, params)

    mat_k = np.bmat([[mat_k_cc, np.zeros_like(mat_k_cc)],
                     [np.zeros_like(mat_k_ss), mat_k_ss]])
    mat_k = np.asarray(mat_k)
    mat_k += noise * np.eye(mat_k.shape[0])

    # Find inverse
    mat_ell = la.cholesky(mat_k, lower=True)
    mat_kin = la.solve(mat_ell.T, la.solve(mat_ell, np.eye(mat_ell.shape[0])))

    print('--> Initialising model variables')

    psi_p = (2 * np.random.rand(n_pred, 1) - 1) * 2
    mf_k1 = np.log(np.random.rand(n_totp, 1) * 0.1)
    mf_m1 = (2 * np.random.rand(n_totp, 1) - 1) * 2

    n_var = psi_p.shape[0] + mf_k1.shape[0] + mf_m1.shape[0]
    idx = np.arange(0, n_var)

    config = {
        'N_data':
        n_data,
        'N_pred':
        n_pred,
        'c_data':
        np.cos(y_t),
        's_data':
        np.sin(y_t),
        'c_2data':
        np.cos(2 * y_t),
        's_2data':
        np.sin(2 * y_t),
        'Kinv':
        mat_kin,
        'idx_psi_p':
        idx[0:psi_p.shape[0]],
        'idx_mf_k1':
        idx[psi_p.shape[0]:psi_p.shape[0] + mf_k1.shape[0]],
        'idx_mf_m1':
        idx[psi_p.shape[0] + mf_k1.shape[0]:psi_p.shape[0] + mf_k1.shape[0] +
            mf_m1.shape[0]],
        'k1':
        k1,
        'k2':
        k2,
    }

    xin = np.vstack((psi_p, mf_k1, mf_m1))

    print('--> Starting optimisation')
    t0 = time()
    results = mgvm.vi.inference_model_opt(xin, config)
    tf = time()

    print 'Total elapsed time: ' + str(tf - t0) + ' s'

    print results.message

    # Keep all values between -pi and pi
    new_psi_p = uc.cfix(results.x[config['idx_psi_p']])

    # Predictions
    print('--> Saving and displaying results')
    holl_score = 0.
    for ii in xrange(0, n_pred):
        m1_idx = new_psi_p[ii]
        m2_idx = new_psi_p[ii]
        holl_score += uc.holl(y_v[ii], m1_idx, m2_idx, k1, k2)

    print 'HOLL score: ' + str(holl_score)
Exemplo n.º 36
0
    return z


if __name__ == "__main__":
    pkls_path = 'F:/Insula-Gcamp6/record/result_pkl/20_gonogo/after_ex/news/'
    result_path = 'F:/Insula-Gcamp6/record/result_pkl/20_gonogo/after_ex/news/'
    filename = 'F:/Insula-Gcamp6/record/result_pkl/20_gonogo/after_ex/news/l_#f1_gonogo80record__r(red)_left(blue)_190707.pkl'

    #print(filename + ' start!!\r')
    data = unpickle(filename)
    odor1 = data['odor1']
    odor2 = data['odor2']
    print(np.max(odor2), np.min(odor2))
    name = 'l_#f1_gonogo80record__r(red)_left(blue)_190707'
    result_name = result_path + name + '_mean_result'
    for k in range(np.alen(odor1)):
        odor1[k, :] = z_score(odor1[k, 0:100], odor1[k, :])
    for k in range(np.alen(odor2)):
        odor2[k, :] = z_score(odor2[k, 0:100], odor2[k, :])
    cue1_mean_cal = np.zeros(700)
    cue1_error_cal = np.zeros(700)
    cue2_mean_cal = np.zeros(700)
    cue2_error_cal = np.zeros(700)
    tr1, _ = np.shape(odor1)
    tr2, _ = np.shape(odor2)
    for k in range(700):
        cue1_mean_cal[k] = np.mean(odor1[:, k])
        cue1_error_cal[k] = np.std(odor1[:, k]) / np.sqrt(tr1)
        cue2_mean_cal[k] = np.mean(odor2[:, k])
        cue2_error_cal[k] = np.std(odor2[:, k]) / np.sqrt(tr2)
Exemplo n.º 37
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test/sparseness%d/test%d.txt' % (spa,
                                                                          case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/loc_ae_values/spa%d' % (spa)

    if isUserAutoEncoder:
        loc_path += '/user_info.txt'
    else:
        loc_path += '/ws_info.txt'

    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_data, dtype=float)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('转换数据到矩阵开始')
    tnow = time.time()
    u = trdata[:, 0]
    s = trdata[:, 1]
    u = np.array(u, int)
    s = np.array(s, int)
    R = np.full(us_shape, NoneValue, float)
    R[u, s] = trdata[:, 2]
    del trdata, u, s
    print('转换数据到矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('地域信息加载开始')
    tnow = time.time()
    lp = Location.LocationProcesser(loc_path)
    print('地域信息加载结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('预处理数据开始')
    tnow = time.time()
    R = preprocess(R)
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    #     print ('选取特定地域数据开始');
    #     tnow = time.time();
    #     lae = LocBPAE.LocAutoEncoder(lp,40,R,hidden_node,
    #                                  [actfunc1,deactfunc1,
    #                                    actfunc1,deactfunc1],isUserAutoEncoder);
    #
    #     loc_name = None;
    #     loc_index = lae.loc_aes[loc_name][0];
    #     loc_index = np.array(loc_index)-1;
    #     if isUserAutoEncoder:
    #         R = R[loc_index,:];
    #     else:
    #         R = R[:,loc_index];
    #     print ('选取特定地域数据结束,耗时 %.2f秒  \n'%((time.time() - tnow)));

    print('训练模型开始')
    tnow = time.time()
    lae = LocBPAE.LocAutoEncoder(lp, oeg, R, hidden_node,
                                 [actfunc1, deactfunc1, actfunc1, deactfunc1],
                                 isUserAutoEncoder)

    if not isUserAutoEncoder:
        R = R.T
    if loadvalues and lae.exitValue(values_path, name_list_train):
        lae.loadValue(values_path, name_list_train)
    if continue_train:
        lae.train_by_names(name_list_train, learn_param, repeat, values_path)

        # lae.saveValues(values_path);

    lae.loadValue(values_path, name_list_pr)
    PR = np.zeros_like(R)

    for i in range(len(name_list_pr) - 1, -1, -1):
        n = name_list_pr[i]
        nind = lae.getIndexByLocName(n)
        tPR = lae.fill(n, R[nind, :])
        PR[nind, :] = tPR
    if not isUserAutoEncoder:
        R = R.T
        PR = PR.T

    print(R)
    print()
    print(PR)
    print()
    ############# PR 还原处理   ###############
    PR = PR * 20.0
    R = R * 20.0
    #     for i in range(PR.shape[0]):
    #         for j in range(PR.shape[1]):
    #             if R[i,j]!=NoneValue:
    #                 PR[i,j]=R[i,j];
    PR = np.where(R != NoneValue, R, PR)
    print(PR)

    ############# PR 还原处理   ###############
    #     if isUserAutoEncoder:
    #         PR = PR.T;
    print('训练模型开始结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载测试数据开始')
    tnow = time.time()
    trdata = np.loadtxt(test_data, dtype=float)
    n = np.alen(trdata)
    print('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - tnow), n))

    print('评测开始')
    tnow = time.time()
    tmp_vect = []
    lp_vect = []
    for n in name_list_pr:
        tmp_vect.append([
            lae.getIndexByLocName(n),
            0.0,  # mae
            0.0,  # rmse
            0
        ])
        #
    for n in lae.loc_aes:
        lp_vect.append([
            lae.getIndexByLocName(n),
            0.0,  # mae
            0.0,  # rmse
            0,
            n
        ])
        #

    mae = 0.0
    rmse = 0.0
    cot = 0
    for tc in trdata:
        uid = int(tc[0])
        sid = int(tc[1])
        if tc[2] <= 0:
            continue
        rt = PR[uid, sid]
        tm = abs(rt - tc[2])
        trm = (rt - tc[2])**2
        mae += tm
        rmse += trm
        cot += 1

        if isUserAutoEncoder:
            tagind = uid
        else:
            tagind = sid
        for v in tmp_vect:
            if tagind not in v[0]:
                continue
            v[1] += tm
            v[2] += trm
            v[3] += 1

        for v in lp_vect:
            if tagind not in v[0]:
                continue
            v[1] += tm
            v[2] += trm
            v[3] += 1

    for v in tmp_vect:
        if v[3] == 0:
            continue
        print('pr_bef->\t', v[1])
        v[1] = v[1] / v[3]
        v[2] = np.sqrt(v[2] / v[3])
    for i in range(len(name_list_pr)):
        print('pr->\t' + name_list_pr[i] + ':\t', tmp_vect[i][1:])

    print()

    for v in lp_vect:
        if v[3] == 0:
            continue
        print('lp_bef->\t\t', v[4], ':\t\t', v[1])
        v[1] = v[1] / v[3]
        v[2] = np.sqrt(v[2] / v[3])
    for i in range(len(lp_vect)):
        print('lp->:\t\t', lp_vect[i][1:])

    mae = mae * 1.0 / cot
    rmse = np.sqrt(rmse / cot)
    print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow)))

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' %
          ((time.time() - now), spa, mae, rmse))

    print(W)
    print(S)
Exemplo n.º 38
0
def encoder_run(spa):
    train_data = base_path + '/Dataset/ws/train_n/sparseness%d/training%d.txt' % (
        spa, case)
    test_data = base_path + '/Dataset/ws/test_n/sparseness%d/test%d.txt' % (
        spa, case)
    W_path = base_path + '/Dataset/ws/BP_CF_W_spa%d_t%d.txt' % (spa, case)
    SW_path = base_path + '/Dataset/ws/BP_CF_SW_spa%d_t%d.txt' % (spa, case)
    loc_path = base_path + '/Dataset/ws'
    values_path = base_path + '/Dataset/ae_values_space/spa%d' % (spa)

    mf_values_path = base_path + '/Dataset/mf_baseline_values/spa%d' % (spa)

    print('开始实验,稀疏度=%d,case=%d' % (spa, case))
    print('加载训练数据开始')
    now = time.time()
    trdata = np.loadtxt(train_data, dtype=float)
    n = np.alen(trdata)
    print('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - now), n))

    print('转换数据到矩阵开始')
    tnow = time.time()
    u = trdata[:, 0]
    s = trdata[:, 1]
    u = np.array(u, int)
    s = np.array(s, int)
    R = np.full(us_shape, NoneValue, float)
    R[u, s] = trdata[:, 2]
    del trdata, u, s
    print('转换数据到矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('预处理数据开始')
    tnow = time.time()
    Preprocess.removeNoneValue(R)
    oriR = R.copy()
    ############################
    # 矩阵分解填补预处理
    mean = np.sum(R) / np.count_nonzero(R)
    mf = MF_bl(R.shape, f, mean)
    mf.preloadValues(mf_values_path)

    ############################
    Preprocess.preprocessMF_rat(R, mf, rat=cmp_rat)
    print(np.sum(R - oriR))
    R /= 20.0
    oriR /= 20.0
    print('预处理数据结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载地理位置信息开始')
    tnow = time.time()
    if isICF:
        loc_path += '/ws_info.txt'
    else:
        loc_path += '/user_info.txt'
    global loc_tab
    loc_tab = loadLocation(loc_path)
    print('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n' %
          ((time.time() - tnow), len(loc_tab)))

    print('训练模型开始')
    tnow = time.time()
    tx = us_shape[0]
    if isUserAutoEncoder:
        tx = us_shape[1]
    encoder = BPAE.BPAutoEncoder(tx, hidden_node, actfunc1, deactfunc1,
                                 actfunc1, deactfunc1, check_none)
    if not isUserAutoEncoder:
        R = R.T
    if loadvalues and encoder.exisValues(values_path):
        encoder.preloadValues(values_path)
    if continue_train:
        encoder.train(R, learn_param, repeat, None)
        encoder.saveValues(values_path)

    # R = oriR;
    PR = encoder.calFill(R)
    print(R)
    print()
    print(PR)
    print()
    ############# PR 还原处理   ###############
    PR = PR * 20.0
    R = R * 20
    oriR = oriR * 20
    PR = np.where(R != NoneValue, R, PR)
    print(PR)
    if not isUserAutoEncoder:
        PR = PR.T
        R = R.T


############# PR 还原处理   ###############
    print('训练模型开始结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('随机删除开始')
    tnow = time.time()
    Preprocess.random_empty(PR, cut_rate)
    print('随机删除开始,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    global W, S
    print('计算相似度矩阵开始')
    tnow = time.time()
    oR = R
    R = PR
    for i in range(axis0 - 1):
        if i % 50 == 0:
            print('----->step%d' % (i))
        for j in range(i + 1, axis0):
            ws = 0.0
            a = R[i, :]
            b = R[j, :]
            # log =
            deta = np.subtract(a,
                               b,
                               out=np.zeros_like(a),
                               where=((a != NoneValue) & (b != NoneValue)))
            ws += np.sum(deta**2)
            W[i, j] = W[j, i] = 1.0 / math.exp(np.sqrt(ws / axis1))

            # origin W[i,j]=W[j,i]=1.0/(ws ** (1.0/p)+1.0);
            # W[i,j]=W[j,i]=1.0/( ((ws/cot) ** (1.0/p))+1.0);

            # W[i,j]=W[j,i]= 1.0/math.exp(((ws) ** (1.0/p))/cot);
    np.savetxt(W_path, W, '%.30f')

    R = PR.T
    SW = np.zeros((axis1, axis1))

    if os.path.exists(SW_path) and load_SW:
        SW = np.loadtxt(SW_path, np.float64)
    else:
        for i in range(axis1 - 1):
            if i % 50 == 0:
                print('----->step%d' % (i))
            for j in range(i + 1, axis1):
                ws = 0.0
                a = R[i, :]
                b = R[j, :]
                # log =
                deta = np.subtract(a,
                                   b,
                                   out=np.zeros_like(a),
                                   where=((a != NoneValue) & (b != NoneValue)))
                ws += np.sum(deta**2)
                SW[i, j] = SW[j, i] = 1.0 / math.exp(np.sqrt(ws / axis1))

                # origin W[i,j]=W[j,i]=1.0/(ws ** (1.0/p)+1.0);
                # W[i,j]=W[j,i]=1.0/( ((ws/cot) ** (1.0/p))+1.0);

                # W[i,j]=W[j,i]= 1.0/math.exp(((ws) ** (1.0/p))/cot);
        np.savetxt(SW_path, SW, '%.10f')

    R = PR

    print('计算相似度矩阵结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('生成相似列表开始')
    tnow = time.time()
    S = np.argsort(-W)[:, 0:k]
    SS = np.argsort(-SW)[:, 0:sk]
    print('生成相似列表开始结束,耗时 %.2f秒  \n' % ((time.time() - tnow)))

    print('加载测试数据开始')
    tnow = time.time()
    trdata = np.loadtxt(test_data, dtype=float)
    n = np.alen(trdata)
    print('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n' % ((time.time() - tnow), n))

    print('评测开始')
    tnow = time.time()
    mae = 0.0
    rmse = 0.0
    cot = 0
    print('oR', oR)
    print('R', R)
    for tc in trdata:
        if tc[2] <= 0:
            continue
        urt = predict(int(tc[0]), int(tc[1]), R, W, S)
        srt = predict_for_s(int(tc[0]), int(tc[1]), R.T, SW, SS)
        rt = cf_w * urt + (1 - cf_w) * srt
        mae += abs(rt - tc[2])
        rmse += (rt - tc[2])**2
        cot += 1
    mae = mae * 1.0 / cot
    rmse = np.sqrt(rmse / cot)
    print('评测完成,耗时 %.2f秒\n' % ((time.time() - tnow)))

    print('实验结束,总耗时 %.2f秒,稀疏度=%d,MAE=%.6f,RMSE=%.6f\n' %
          ((time.time() - now), spa, mae, rmse))

    print(W)
Exemplo n.º 39
0
def clean_cosmics(img,
                  mask,
                  badpixmask=None,
                  method='median',
                  boxsize=5,
                  verbose=False,
                  timit=False):
    """
        This routine replaces the flux in the pixels identified as being affected by cosmics rays (from function "identify_cosmics") with either
        a median value of surrounding non-cosmic-affected pixels, or with the value of a surface fit to the surrounding non-affected pixels,
        depending on the "method" kwarg.
        
        INPUT:
        "img"          - a 2-dim image
        "mask"         - a 2-dim boolean mask, where True identifies pixels affected by cosmic rays (this MUST have the same dimensions as "img"!!!)
        "badpixmask"   - a 2-dim mask of otherwise bad pixels (other than cosmics), which are not going to be replaced but are not used in the calculation
                         of the median values or spline interpolation of the replacement values
        
        KWARGS:
        "method"    - 'median' : the flux values in the cosmic-affected pixels are replaced by the median value of the surrounding non-cosmic-affected pixels
                    - 'spline' : a cubic spline interpolation is performed through the surrounding non-cosmic-affected pixels and the flux of the cosmic-affected
                                 pixels is replaced with the interpolated value at their respective locations
        "boxsize"   - the size of the surrounding pixels to be considered. default value is 5, ie a box of 5x5 pixels centred on the affected pixel
        "verbose"   - for debugging...
        "timit"     - boolean - do you want to measure execution run time?

        This routine borrows heavily from the python translation of LACosmic by Malte Tewes!
        
        TODO:
        implement surface fit method
        """

    if timit:
        start_time = time.time()

    #check that img and mask really do have the same size
    if img.shape != mask.shape:
        print(
            "ERROR: Image and cosmic pixel mask do not have the same dimensions!!!"
        )
        quit()
        return

    #if badpixmask is supplied, check that it has the same dimensions as well
    if (badpixmask is not None):
        if badpixmask.shape != mask.shape:
            print(
                'WARNING: Bad pixel mask has different dimension than image and cosmic pixel mask!!!'
            )
            choice = None
            while choice == None:
                choice = raw_input(
                    'Do you want to continue without using the bad pixel mask? ["y"/"n"]     : '
                )
                if choice in ['n', 'N', 'no', 'No']:
                    quit()
                    return
                elif choice in ['y', 'Y', 'yes', 'Yes']:
                    print('OK, ignoring bad pixel mask...')
                else:
                    print('Invalid input! Please try again...')
                    choice = None

    #check that boxsize is an odd number
    while (boxsize % 2) == 0:
        print(
            'ERROR: size of the box for median/interpolation needs to be an odd number, please try again!'
        )
        boxsize = input('Enter an odd number for the box size: ')

    #create a copy of the image which is to be manipulated
    cleaned = img.copy()

    if verbose:
        print("Cleaning cosmic-affected pixels ...")

    # So...mask is a 2D-array containing False and True, where True means "here is a cosmic"
    # We want to loop through these cosmics one by one. This is a list of the indices of cosmic affected pixels:
    cosmicindices = np.argwhere(mask)

    # We put cosmic ray pixels to np.Inf to flag them :
    cleaned[mask] = np.Inf

    # Now we want to have a 2 pixel frame of Inf padding around our image.
    w = cleaned.shape[0]
    h = cleaned.shape[1]
    #padsize = floor(boxsize/2.)   #same thing really...
    padsize = int(boxsize) / 2
    #create this "padarray" so that edge effects are taken care of without the need for awkward for-/if-loops around adge pixels
    padarray = np.zeros((w + boxsize - 1, h + boxsize - 1)) + np.Inf
    padarray[padsize:w + padsize, padsize:h + padsize] = cleaned.copy(
    )  # that copy is important, we need 2 independent arrays

    # The medians will be evaluated in this padarray, excluding the infinite values (that are either the edges, the cosmic-affected pixels or the otherwise bad pixels)
    # Now in this copy called padarray, we also put the saturated stars to np.Inf, if available :
    if badpixmask is not None:
        padarray[padsize:w + padsize, padsize:h + padsize][badpixmask] = np.Inf

    # A loop through every cosmic pixel :
    for cosmicpos in cosmicindices:
        x = cosmicpos[0]
        y = cosmicpos[1]
        #             if verbose:
        #                 print('[x,y] = ['+str(x)+','+str(y)+']')
        cutout = padarray[
            x:x + boxsize,
            y:y + boxsize].ravel()  # remember the shift due to the padding !
        # Now we have our cutout pixels, some of them are np.Inf, which will be ignored for calculating median or interpolating
        goodcutout = cutout[cutout != np.Inf]

        if np.alen(goodcutout) >= boxsize * boxsize:
            # This never happened, but you never know ...
            #raise RuntimeError, "Mega error in clean !"
            raise RuntimeError("Mega error in clean !")
        elif np.alen(goodcutout) > 0:
            #WHICH METHOD???
            if method == 'median':
                replacementvalue = np.median(goodcutout)
            elif method == 'spline':
                print('WARNING: THIS IS NOT FULLY IMPLEMENTED YET!!!!!')
                box = padarray[x:x + boxsize, y:y + boxsize]
                goodbox = np.argwhere(box != np.Inf)
                xx = goodbox[:, 1]
                yy = goodbox[:, 0]
                zz = goodcutout
                spline_func = ipol.interp2d(xx, yy, zz, kind='cubic')
                replacementvalue = spline_func(padsize, padsize)
            else:
                #raise RuntimeError, 'invalid kwarg for "method" !'
                raise RuntimeError('invalid kwarg for "method" !')
        else:
            # i.e. no good pixels : Shit, a huge cosmic, we will have to improvise ...
            print(
                "WARNING: Huge cosmic ray encounterd - it fills the entire (" +
                str(boxsize) + "x" + str(boxsize) +
                ")-pixel cutout! Using backup value...")
            replacementvalue = np.median(
                padarray[padarray != np.Inf]
            )  #I don't like this...maybe need to do sth smarter in the future, but I doubt it will ever happen if boxsize is sufficiently large

        # Now update the cleaned array, but remember the median was calculated from the padarray...otherwise it would depend on the order in which the cosmics are treated!!!
        cleaned[x, y] = replacementvalue

    # That's it.
    if verbose:
        #print "Cleaning done!"
        print("Cleaning done!")

    if timit:
        print('Time elapsed: ' + str(np.round(time.time() - start_time, 1)) +
              ' seconds')

    #return the cleaned image
    return cleaned
Exemplo n.º 40
0
def JCAMP_calc_xsec(jcamp_dict,
                    wavemin=None,
                    wavemax=None,
                    skip_nonquant=True,
                    debug=False):
    '''
    Taking as input a JDX file, extract the spectrum information and transform the absorption spectrum
    from existing units to absorption cross-section.

    This function also corrects for unphysical data (such as negative transmittance values, or
    transmission above 1.0), and calculates absorbance if transmittance given. Instead of a return
    value, the function inserts the information into the input dictionary.

    Note that the conversion assumes that the measurements were collected for gas at a temperature of
    296K (23 degC).

    Parameters
    ----------
    jcamp_dict : dict
        A JCAMP spectrum dictionary.
    wavemin : float, optional
        The shortest wavelength in the spectrum to limit the calculation to.
    wavemax : float, optional
        The longest wavelength in the spectrum to limit the calculation to.
    skip_nonquant: bool
        If True then return "None" if the spectrum is missing quantitative data. If False, then try \
        to fill in missing quantitative values with defaults.
    '''

    x = jcamp_dict['x']
    y = jcamp_dict['y']

    T = 296.0  ## the temperature (23 degC) used by NIST when collecting spectra
    R = 1.0355E-25  ## the constant for converting data (includes the gas constant)

    ## Note: normally when we convert from wavenumber to wavelength units, the ordinate must be nonuniformly
    ## rescaled in order to compensate. But this is only true if we resample the abscissa to a uniform sampling
    ## grid. In this case here, we keep the sampling grid nonuniform in wavelength space, such that each digital
    ## bin retains its proportionality to energy, which is what we want.
    if (jcamp_dict['xunits'].lower() in ('1/cm', 'cm-1', 'cm^-1')):
        jcamp_dict['wavenumbers'] = array(
            x)  ## note that array() always performs a copy
        x = 10000.0 / x
        jcamp_dict['wavelengths'] = x
    elif (jcamp_dict['xunits'].lower()
          in ('micrometers', 'um', 'wavelength (um)')):
        jcamp_dict['wavelengths'] = x
        jcamp_dict['wavenumbers'] = 10000.0 / x
    elif (jcamp_dict['xunits'].lower()
          in ('nanometers', 'nm', 'wavelength (nm)')):
        x = x * 1000.0
        jcamp_dict['wavelengths'] = x
        jcamp_dict['wavenumbers'] = 10000.0 / x
    else:
        raise ValueError(
            'Don\'t know how to convert the spectrum\'s x units ("' +
            jcamp_dict['xunits'] + '") to micrometers.')

    ## Correct for any unphysical negative values.
    y[y < 0.0] = 0.0

    ## Make sure "y" refers to absorbance.
    if (jcamp_dict['yunits'].lower() == 'transmittance'):
        ## If in transmittance, then any y > 1.0 are unphysical.
        y[y > 1.0] = 1.0

        ## Convert to absorbance.
        okay = (y > 0.0)
        y[okay] = log10(1.0 / y[okay])
        y[logical_not(okay)] = nan

        jcamp_dict['absorbance'] = y
    elif (jcamp_dict['yunits'].lower() == 'absorbance'):
        pass
    elif (jcamp_dict['yunits'].lower() == '(micromol/mol)-1m-1 (base 10)'):
        jcamp_dict['yunits'] = 'xsec (m^2))'
        jcamp_dict['xsec'] = y / 2.687e19
        return
    else:
        raise ValueError(
            'Don\'t know how to convert the spectrum\'s y units ("' +
            jcamp_dict['yunits'] + '") to absorbance.')

    ## Determine the effective path length "ell" of the measurement chamber, in meters.
    if ('path length' in jcamp_dict):
        (val, unit) = jcamp_dict['path length'].lower().split()[0:2]
        if (unit == 'cm'):
            ell = float(val) / 100.0
        elif (unit == 'm'):
            ell = float(val)
        elif (unit == 'mm'):
            ell = float(val) / 1000.0
        else:
            ell = 0.1
    else:
        if skip_nonquant:
            return ({'info': None, 'x': None, 'xsec': None, 'y': None})
        ell = 0.1
        if debug:
            print(
                'Path length variable not found. Using 0.1m as a default ...')

    assert (alen(x) == alen(y))

    if ('npoints' in jcamp_dict):
        if (alen(x) != jcamp_dict['npoints']):
            npts_retrieved = str(alen(x))
            msg = '"' + jcamp_dict['title'] + '": Number of data points retrieved (' + npts_retrieved + \
                  ') does not equal the expected length (npoints = ' + str(jcamp_dict['npoints']) + ')!'
            raise ValueError(msg)

    ## For each gas, manually define the pressure "p" at which the measurement was taken (in units of mmHg).
    ## These values are obtained from the NIST Infrared spectrum database, which for some reason did not
    ## put the partial pressure information into the header.
    if ('partial_pressure' in jcamp_dict):
        p = float(jcamp_dict['partial_pressure'].split()[0])
        p_units = jcamp_dict['partial_pressure'].split()[1]
        if (p_units.lower() == 'mmhg'):
            pass
        elif (p_units.lower() == 'ppm'):
            p = p * 759.8 * 1.0E-6  ## scale PPM units at atmospheric pressure to partial pressure in mmHg
    else:
        if debug:
            print('Partial pressure variable value for ' +
                  jcamp_dict['title'] +
                  ' is missing. Using the default p = 150.0 mmHg ...')
        if skip_nonquant:
            return ({'info': None, 'x': None, 'xsec': None, 'y': None})
        p = 150.0

    ## Convert the absorbance units to cross-section in meters squared per molecule.
    xsec = y * T * R / (p * ell)

    ## Add the "xsec" values to the data dictionary.
    jcamp_dict['xsec'] = xsec

    return
Exemplo n.º 41
0
n_folds = 10
for i, (name, dataset) in enumerate(mldata.datasets.iteritems()):
    print('Dataset number {}'.format(i))
    if name == 'MNIST':
        # TODO get a stratified portion of the validation set [0:60000]
        dataset._data = dataset._data[-10000:]
        dataset._target = dataset._target[-10000:]

    mldata.sumarize_datasets(name)
    for mc in np.arange(mc_iterations):
        skf = StratifiedKFold(dataset.target, n_folds=n_folds, shuffle=True)
        test_folds = skf.test_folds
        for test_fold in np.arange(n_folds):
            x_train, y_train, x_test, y_test = separate_sets(
                dataset.data, dataset.target, test_fold, test_folds)
            n_training = np.alen(y_train)
            for actual_class in dataset.classes:
                tr_class = x_train[y_train == actual_class, :]
                t_labels = (y_test == actual_class).astype(int)
                prior = np.alen(tr_class) / n_training
                if np.alen(tr_class) > 1 and not all(t_labels == 0):
                    n_c = tr_class.shape[1]
                    if n_c > np.alen(tr_class):
                        n_c = np.alen(tr_class)

                    # Train a Density estimator
                    model_mymvn = MyMultivariateNormal(covariance_type='diag')
                    model_mymvn.fit(tr_class)

                    model_mvn = MultivariateNormal(covariance_type='diag')
                    model_mvn.fit(tr_class)
Exemplo n.º 42
0
                              gameR[(gameR.shape[0] - 1) - i][0])

                if memoryR.shape[0] == 1:
                    memorySA = gameSA
                    memoryR = gameR
                    memoryA = gameA
                    memoryS = gameS
                else:
                    #Add experience to memory
                    memorySA = np.concatenate((memorySA, gameSA), axis=0)
                    memoryS = np.concatenate((memoryS, gameS), axis=0)
                    memoryR = np.concatenate((memoryR, gameR), axis=0)
                    memoryA = np.concatenate((memoryA, gameA), axis=0)

                #if memory is full remove first element
                if np.alen(memorySA) >= max_memory_len:
                    memorySA = memorySA[gameR.shape[0]:]
                    memoryR = memoryR[gameR.shape[0]:]
                    memoryA = memoryA[gameR.shape[0]:]
                    memoryS = memoryS[gameR.shape[0]:]

            #Update the states
            qs = s

            #Retrain every X failures after num_initial_observation
            if done and game >= num_initial_observation:
                if game % 5 == 0:
                    print("Training  game# ", game, "momory size",
                          memorySA.shape[0])

                    #training Reward predictor model
Exemplo n.º 43
0
def run_cf(spa):
    global R,W,S,sumS,loc_tab;
    
    train_data = base_path+'/Dataset/ws/train_n/sparseness%d/training%d.txt'%(spa,case);
    test_data = base_path+'/Dataset/ws/test_n/sparseness%d/test%d.txt'%(spa,case);
    W_path = base_path+'/Dataset/ws/BP_CF_W_spa%d_t%d.txt'%(spa,case);
    loc_path = base_path+'/Dataset/ws';
    
    print('开始实验,isICF=%s,稀疏度=%d,case=%d'%(isICF,spa,case));
    print ('加载训练数据开始');
    now = time.time();
    trdata = np.loadtxt(train_data, dtype=float);
    n = np.alen(trdata);
    print ('加载训练数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - now),n));

    print ('加载地理位置信息开始');
    tnow = time.time();
    if isICF:
        loc_path+='/ws_info.txt';
    else:
        loc_path+='/user_info.txt';        
    loc_tab = loadLocation(loc_path);
    n = np.alen(trdata);
    print ('加载地理位置信息完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),n));

    
    print ('转换数据到矩阵开始');
    tnow = time.time();
    u = trdata[:,0];
    s = trdata[:,1];
    u = np.array(u,int);
    s = np.array(s,int);
    R = np.full(us_shape, NoneValue, float);
    R[u,s]=trdata[:,2];
    if isICF:
        R = R.T;
    del trdata,u,s;
    print ('转换数据到矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('计算相似度矩阵开始');
    tnow = time.time();
    i=0;
    if readWcache and os.path.exists(W_path):
        W = np.loadtxt(W_path, np.float128);
    else:
        for i in range(axis0):
            if i%50 ==0:
                print('----->step%d'%(i))
            for j in range(axis0):
                a = R[i,:];
                b = R[j,:];
                alog = a!=NoneValue;
                blog = b!=NoneValue;
                delta = np.subtract(a,b,out=np.zeros_like(a),where=alog&blog); 
                ws = np.sum(delta**2);
                W[i,j]= 1.0/math.exp(np.sqrt(ws));
        
        for i in range(axis0):
            W[i,i]=0;
                
       
        np.savetxt(W_path,W,'%.30f');                
    print ('计算相似度矩阵结束,耗时 %.2f秒  \n'%((time.time() - tnow)));


    print ('生成相似列表开始');
    tnow = time.time();
    S = np.argsort(-W)[:,0:k];
    for i in range(axis0):
        sumS[i] = np.sum(W[i,S[i]]);            
    print ('生成相似列表开始结束,耗时 %.2f秒  \n'%((time.time() - tnow)));

    
    print ('加载测试数据开始');
    tnow = time.time();
    trdata = np.loadtxt(test_data, dtype=float);
    n = np.alen(trdata);
    print ('加载测试数据完成,耗时 %.2f秒,数据总条数%d  \n'%((time.time() - tnow),n));

    print ('评测开始');
    tnow = time.time();
    mae=0.0;rmse=0.0;cot=0;
    for tc in trdata:
        if tc[2]<=0:
            continue;
        rt = predict(int(tc[0]),int(tc[1]));
        mae+=abs(rt-tc[2]);
        rmse+=(rt-tc[2])**2;
        cot+=1;
    mae = mae * 1.0 / cot;
    rmse= sqrt(rmse/cot);
    print ('评测完成,耗时 %.2f秒\n'%((time.time() - tnow)));    
    
    print('实验结束,总耗时 %.2f秒,isICF=%s,稀疏度=%d,MAE=%.3f,RMSE=%.3f\n'%((time.time()-now),isICF,spa,mae,rmse));
    print('----------------------------------------------------------\n');


    print(W);
    print(S);    
Exemplo n.º 44
0
def bspleval(x, knots, coeffs, order, debug=False):
    '''
    Evaluate a B-spline at a set of points.

    Parameters
    ----------
    x : list or ndarray
        The set of points at which to evaluate the spline.
    knots : list or ndarray
        The set of knots used to define the spline.
    coeffs : list of ndarray
        The set of spline coefficients.
    order : int
        The order of the spline.

    Returns
    -------
    y : ndarray
        The value of the spline at each point in x.
    '''

    k = order
    t = knots
    m = np.alen(t)
    npts = np.alen(x)
    B = np.zeros((m - 1, k + 1, npts))

    if debug:
        print('k=%i, m=%i, npts=%i' % (k, m, npts))
        print('t=', t)
        print('coeffs=', coeffs)

    ## Create the zero-order B-spline basis functions.
    for i in range(m - 1):
        B[i, 0, :] = np.float64(np.logical_and(x >= t[i], x < t[i + 1]))

    if (k == 0):
        B[m - 2, 0, -1] = 1.0

    ## Next iteratively define the higher-order basis functions, working from lower order to higher.
    for j in range(1, k + 1):
        for i in range(m - j - 1):
            if (t[i + j] - t[i] == 0.0):
                first_term = 0.0
            else:
                first_term = ((x - t[i]) / (t[i + j] - t[i])) * B[i, j - 1, :]

            if (t[i + j + 1] - t[i + 1] == 0.0):
                second_term = 0.0
            else:
                second_term = ((t[i + j + 1] - x) /
                               (t[i + j + 1] - t[i + 1])) * B[i + 1, j - 1, :]

            B[i, j, :] = first_term + second_term
        B[m - j - 2, j, -1] = 1.0

    if debug:
        plt.figure()
        for i in range(m - 1):
            plt.plot(x, B[i, k, :])
        plt.title('B-spline basis functions')

    ## Evaluate the spline by multiplying the coefficients with the highest-order basis functions.
    y = np.zeros(npts)
    for i in range(m - k - 1):
        y += coeffs[i] * B[i, k, :]

    if debug:
        plt.figure()
        plt.plot(x, y)
        plt.title('spline curve')
        plt.show()

    return (y)
Exemplo n.º 45
0
def div_by_behav(odor1, odor2, air, pump, lick, delay=2):
    odor1_changed = np.diff(odor1)
    odor2_changed = np.diff(odor2)
    trial_start = 0
    odor1_trial = 0
    odor2_trial = 0
    trials = 0
    odor_list = []
    delay = delay * 100
    for k in range(len(odor1_changed)):
        if ((odor1_changed[k] == 1) and (odor1[k + 20] == 1)):
            trials += 1
            odor1_trial += 1
            odor_list.append(1)
        #			go_trial_position.append(k)
        elif ((odor2_changed[k] == 1) and (odor2[k + 20] == 1)):
            trials += 1
            odor2_trial += 1
            odor_list.append(2)
    #print(odor1_trial)

    print(trials, 'trials')
    print(max(odor_list), odor1_trial, odor2_trial)

    all_lick = np.zeros((trials, 1400))
    all_pump = np.zeros((trials, 1400))
    all_airpuff = np.zeros((trials, 1400))

    now1_trial = -1
    now2_trial = -1
    now_trial = -1
    i = 0
    time = -500
    stop = 0
    while now_trial + 1 < trials:

        while i < len(odor1_changed):
            if now_trial > trials:
                break

            if (odor1_changed[i] == 1) and (odor1[i + 20]
                                            == 1) and (trial_start == 0):
                now_trial += 1
                now1_trial += 1
                odor = 1
                if np.alen(lick[i - delay:i + (1400 - delay)]) == 1400:
                    all_lick[now_trial, :] = lick[i - delay:i + (1400 - delay)]
                    all_pump[now_trial, :] = pump[i - delay:i + (1400 - delay)]
                    all_airpuff[now_trial, :] = air[i - delay:i +
                                                    (1400 - delay)]
                else:
                    l = np.alen(lick[i - delay:i + (1400 - delay)])
                    all_lick[now_trial, 0:l] = lick[i - delay:i + (l - delay)]
                    all_pump[now_trial, 0:l] = pump[i - delay:i + (l - delay)]
                    all_airpuff[now_trial,
                                0:l] = air[i - delay:i + (l - delay)]
                if odor_list[now_trial] != odor:
                    print(odor_list[now_trial], odor, ' diff!!!!')
                i += 1000

                continue
            elif (odor2_changed[i] == 1) and (odor2[i + 20]
                                              == 1) and (trial_start == 0):

                now_trial += 1
                now2_trial += 1

                odor = 2
                if np.alen(lick[i - delay:i + (1400 - delay)]) == 1400:
                    all_lick[now_trial, :] = lick[i - delay:i + (1400 - delay)]
                    all_pump[now_trial, :] = pump[i - delay:i + (1400 - delay)]
                    all_airpuff[now_trial, :] = air[i - delay:i +
                                                    (1400 - delay)]
                else:
                    l = np.alen(lick[i - delay:i + (1400 - delay)])
                    all_lick[now_trial, 0:l] = lick[i - delay:i + (l - delay)]
                    all_pump[now_trial, 0:l] = pump[i - delay:i + (l - delay)]
                    all_airpuff[now_trial,
                                0:l] = air[i - delay:i + (l - delay)]

                if odor_list[now_trial] != odor:
                    print(odor_list[now_trial], odor, ' diff!!!!')
                i += 1000
                continue
            i += 1

    return all_lick, all_pump, all_airpuff, odor_list
Exemplo n.º 46
0
(eigval,eigvect) = np.linalg.eigh(laplasian)
eigval = np.array(eigval)
eigval = eigval.astype(int)

ei = np.argsort(eigval)
# Saving the eigen values into a text file.
np.savetxt("eigenvalues.csv", eigval, delimiter=" ")
G = nx.from_numpy_matrix(eigvect)
nx.draw_networkx(G,with_labels=True)

firstkmat = eigvect[ei[::-1][0:4]]
firstkmat = np.transpose(firstkmat)

# Clustering using kmeans++ and the number of clusters are chosen from the eigen value plot.
kmeans = KMeans(n_clusters=np.alen(firstkmat[0]), init='k-means++', max_iter=100, precompute_distances=True)

kmeans.fit(firstkmat)
labels = kmeans.predict(firstkmat)

clusters_dict = {}
def get_key(val):
    for key, value in username_list_dict.items():
         if val == value:
             return key
for i in range(0,len(labels)):
    ls = clusters_dict[labels[i]]
    if labels[i] not in  clusters_dict:
        clusters_dict[labels[i]]=[]
    ls = []
    if not len(clusters_dict.get(labels[i]))==0 :
Exemplo n.º 47
0
                '''
                    #Add experience to memory
                    memorySA = np.concatenate((memorySA,gameSA),axis=0)
                    #memoryS = np.concatenate((memoryS,gameS),axis=0)
                    memoryR = np.concatenate((memoryR,gameR),axis=0)
                    #memoryA = np.concatenate((memoryA,gameA),axis=0)
                '''
                if memoryR.shape[0] == 1:
                    memoryR = tempGameR
                    memorySA = tempGameSA
                else:
                    memorySA = np.concatenate((memorySA, tempGameSA), axis=0)
                    memoryR = np.concatenate((memoryR, tempGameR), axis=0)

                #if memory is full remove first element
                if np.alen(memoryR) >= max_memory_len:
                    memoryR = memoryR[np.alen(gameR):]
                    memorySA = memorySA[np.alen(gameR):]
                    #print("memory full. mem len ", np.alen(memoryX))
                    #for l in range(np.alen(gameR)):
                    #memorySA = np.delete(memorySA, 0, axis=0)
                    #memoryR = np.delete(memoryR, 0, axis=0)
                    #memoryA = np.delete(memoryA, 0, axis=0)
                    #memoryS = np.delete(memoryS, 0, axis=0)

            #Update the states
            previous_state = np.copy(qs)
            qs = s

            #Retrain every X failures after num_initial_observation
            if done and game >= num_initial_observation:
                if i == (game_y.shape[0] - 1):
                    logging.info(
                        'Training game: #%s steps: %s last reward: %s end score: %s',
                        cur_game, step, reward, game_y[game_epoch][0])

            # Memory is experience
            if memory_x.shape[0] == 1:
                memory_x = game_x
                memory_y = game_y
            # Add experience to memory
            else:
                memory_x = np.concatenate((memory_x, game_x), axis=0)
                memory_y = np.concatenate((memory_y, game_y), axis=0)

            # Clear memory if full
            if np.alen(memory_x) >= max_memory:
                logging.debug('Memory filled at %s. Clearing memory...',
                              np.alen(memory_x))
                for exp in range(np.alen(game_x)):
                    memory_x = np.delete(memory_x, 0, axis=0)
                    memory_y = np.delete(memory_y, 0, axis=0)

        # Update states
        q_state = state

        # Retrain every 10 sets
        if done:
            if cur_game % training_sets == 0:
                logging.info('Training game: #%s Memory: %s', cur_game,
                             memory_x.shape[0])
                history = model.fit(memory_x,
print("Xtrain")
print(Xtrain)
print(Xtrain.shape)
Xtest = X[300:]
print("Xtest")
print(Xtest)
print(Xtest.shape)

T = readExcel(excelfile)
print("T shape:")
print(T.shape)

# convert Y  = 1 and N = 0;
TB = np.full(T.shape, -1).astype(int)
for i, row in enumerate(T):
    for j in range(np.alen(row)):
        if T[i, j] == 'Y':
            # 1 for positive
            TB[i, j] = 1
        else:
            # 0 for negative
            TB[i, j] = 0

# Separate training and testing class labels
Ttrain = TB[:300]
Ttest = TB[300:]
# Prepare the training data
TtrainSmile = Ttrain[:, 0]
TtrainBlink = Ttrain[:, 1]
TtrainGood = Ttrain[:, 2]
print("Traing good:")
Exemplo n.º 50
0
def run(args):
    if args.files:
        files = args.files
    elif args.listfile:
        with open(args.listfile) as f:
            files = list(map(str.strip, f.readlines()))
    else:
        parser.print_usage()
        sys.exit(0)

    failed = False
    for f in files:
        ext = os.path.splitext(f)
        if not os.path.exists(f):
            eprint('Error: %s does not exist.' % f)
            failed = True
        elif not (ext[1] == '.h5' or ext[1] == '.hdf5'):
            eprint('Error: %s is not a hdf5 file.' % f)
            failed = True
        elif not os.path.isfile(f):
            eprint('Error: %s is not a file.' % f)
            failed = True
    if failed:
        sys.exit(1)

    sub = None
    if args.substitute:
        split = args.substitute.split(':')
        if len(split) != 2:
            eprint('Error: invalid subsitution syntax "%s". Syntax is "replace:with".' % args.substitute)
            failed = True
        sub = split
    if args.destdir:
        if not os.path.isdir(args.destdir):
            eprint('Error: destination dir "%s" does not exist.' % args.destdir)
            failed = True
    if failed:
        sys.exit(1)

    bases = []
    shifts_fns = []
    for f in files:
        base_name = os.path.splitext(f)[0]

        remove_suffix = '_aligned'
        if base_name.endswith(remove_suffix):
            base_name = base_name[:-len(remove_suffix)]

        shifts_fn = base_name + '_shifts.npy'
        shifts_fns.append(shifts_fn)
        if not args.no_shifts and not os.path.exists(shifts_fn):
            eprint('Error: "%s" does not exist.' % shifts_fn)
            failed = True

        if args.destdir:
            base_name = os.path.join(args.destdir, os.path.basename(base_name))
            bases.append(base_name)
        elif sub:
            if base_name.find(sub[0]) == -1:
                eprint('Error: filename "%s" does not contain "%s" for substitution.' % (f, sub[0]))
                failed = True
            base_name = base_name.replace(*sub)
            bases.append(base_name)
        else:
            bases.append(base_name)
    if failed:
        sys.exit(1)

    necessary = []

    if not args.no_verbose:
        print('Arguments look good. This will be processed:')
    for f, b in zip(files, bases):
        this_necessary = not all([os.path.isfile(b + s) for s in SUFFIXES]) or args.overwrite
        necessary.append(this_necessary)

        if not args.no_verbose:
            print(('' if this_necessary else '[SKIP] ') + f)
            for suffix in SUFFIXES:
                print((' -> ' if this_necessary else '[ALREADY EXISTS] ') + '%s%s' % (b, suffix))
            print()

    necessary_files = [(f, shifts_fn, b) for f, shifts_fn, b, n in zip(files, shifts_fns, bases, necessary) if n]

    if len(necessary_files) == 0:
        print('Nothing to process.')
        sys.exit(0)

    template = segmentation.load_template()
    for f, shifts_fn, b in prog_percent(necessary_files):
        print(f)
        print('='*len(f))

        try:
            base = b
            if not args.no_shifts:
                print('Loading shifts...')
                shifts = np.load(shifts_fn)
                shift_dists = np.sqrt(np.sum(np.square(shifts), axis=1))
            print('Loading stack...')
            #stack = dd.io.load(f)
            with h5py.File(f, "r") as f_:
                # List all groups
                print("Keys: %s" % f_.keys())
                start=time.time()
                stack=f_['data'][()]
                end=time.time()
                print('Time to load file: ',end-start)
            print('Computing std...')

            if not args.no_shifts:
                invalid_frames = [i for i in np.arange(np.alen(stack)) if shift_dists[i] > args.shift_threshold]
            else:
                invalid_frames = []

            valid_frames = segmentation.valid_frames(invalid_frames, length=np.alen(stack))
            std = segmentation.std(stack, valid_frames=valid_frames)
            print('Saving std...')
            io.save(base + STD_DEV_SUFFIX, std, spacing=io.SPACING_JAKOB)
            print('Finding rois...')
            rois = segmentation.find_rois_template(std, template=template)
            print('Saving rois...')
            np.save(base + ROIS_SUFFIX, rois)
            print('Getting traces...')
            traces = segmentation.get_traces(stack, rois, use_radius=5)
            print('Saving traces...')
            np.save(base + TRACES_SUFFIX, traces)

        except Exception as e:
            print('An exception occured:')
            print(e)
Exemplo n.º 51
0
def div_by_cue(cal_time, cal_data, cue1, cue2, cue_hz, delay=2):
    cue_interval = 1 / cue_hz
    i = 0
    t = 0
    delay = delay * 50
    trial_start = 0
    change_cue1 = np.diff(cue1)
    cue1_trialnum = np.sum(np.abs(change_cue1) / 2)
    #print(int(cue1_trialnum))
    change_cue2 = np.diff(cue2)
    cue2_trialnum = np.sum(np.abs(change_cue2) / 2)
    print("tn", cue1_trialnum, cue2_trialnum)
    all_cue_cal = np.zeros((int(cue1_trialnum + cue2_trialnum), 700))
    cue1_cal = np.zeros((int(cue1_trialnum), 700))
    cue2_cal = np.zeros((int(cue2_trialnum), 700))
    all_cue = 0
    now_cue1 = 0
    now_cue2 = 0
    cue_order = []
    while i < np.alen(change_cue1):
        if change_cue1[i] == 1:
            trial_start = round(i / 20)
            for k in range(trial_start - 20, trial_start + 20):
                if np.sum(cal_time[0:k]) <= t and np.sum(
                        cal_time[0:k + 1]) > t:
                    if len(cal_data[k:k + 600]) < 600:
                        cue1_cal = np.delete(cue1_cal, now_cue1, 0)
                        all_cue_cal = np.delete(all_cue_cal, all_cue, 0)
                        print('cue1' + str(now_cue1))
                        break
                    else:
                        cue1_cal[now_cue1, 0:delay] = cal_data[k - delay:k]
                        cue1_cal[now_cue1,
                                 delay:] = cal_data[k:k + (700 - delay)]
                        all_cue_cal[all_cue, :] = cue1_cal[now_cue1, :]
                        cue_order.append(1)
                        now_cue1 += 1
                        all_cue += 1
                        break

            #print('1!')

        elif change_cue2[i] == 1:
            trial_start = round(i / 20)
            for k in range(trial_start - 20, trial_start + 20):
                if np.sum(cal_time[0:k]) <= t and np.sum(
                        cal_time[0:k + 1]) > t:
                    if len(cal_data[k:k + 600]) < 600:
                        cue2_cal = np.delete(cue2_cal, now_cue2, 0)
                        all_cue_cal = np.delete(all_cue_cal, all_cue, 0)
                        print('cue2' + str(now_cue2))
                        break
                    else:
                        cue2_cal[now_cue2, 0:delay] = cal_data[k - delay:k]
                        cue2_cal[now_cue2,
                                 delay:] = cal_data[k:k + (700 - delay)]
                        all_cue_cal[all_cue, :] = cue2_cal[now_cue2, :]
                        cue_order.append(2)
                        now_cue2 += 1
                        all_cue += 1
                        break

            #print('2!')

        t += cue_interval
        i += 1
    for i in range(np.alen(all_cue_cal)):
        if np.max(all_cue_cal[i, :]) == 0:
            all_cue_cal = np.delete(all_cue_cal, i, 0)
            print('empty trial!', i)
            break

    print(np.shape(all_cue_cal))
    #print(cue1_cal,np.mean(cue1_cal))
    #print(cue2_cal,np.mean(cue2_cal))
    return cue1_cal, cue2_cal, all_cue_cal, cue_order
Exemplo n.º 52
0
def actor_experience_replay():
    tSA = (memorySA)
    tR = (memoryR)
    tX = (memoryS)
    tY = (memoryA)
    tW = (memoryW)

    target = tR.mean() #+ math.fabs( tR.mean() - tR.max()  )/2 #+ math.fabs( tR.mean() - tR.max()  )/4
    train_C = np.arange(np.alen(tR))
    train_C = train_C[tR.flatten()>target]
    tX = tX[train_C,:]
    tY = tY[train_C,:]
    tW = tW[train_C,:]
    tR = tR[train_C,:]

    train_A = np.random.randint(tY.shape[0],size=int(min(experience_replay_size,np.alen(tR) )))

    tX = tX[train_A,:]
    tY = tY[train_A,:]
    tW = tW[train_A,:]
    tR = tR[train_A,:]

    train_B = np.arange(np.alen(tR))

    tX_train = np.zeros(shape=(1,num_env_variables))
    tY_train = np.zeros(shape=(1,num_env_actions))
    for i in range(np.alen(train_B)):
        #pr = predictTotalRewards(tX[i],tY[i])
        ''' YOU CAN"T USE predictTotalRewards
        IF YOU DON"T TRAIN THE QMODEL

        if tR[i][0] < pr:
            tW[i][0] = -1
        else:
        '''
        d = math.fabs( memoryR.max() - target)
        tW[i] =  math.fabs(tR[i]-(target+0.000000000005)) / d
        tW[i] = math.exp(1-(1/tW[i]**2))


        if tW[i]> np.random.rand(1):
            tX_train = np.vstack((tX_train,tX[i]))
            tY_train = np.vstack((tY_train,tY[i]))


            #print ("tW",tW[i],"exp", math.exp(1-(1/tW[i]**2)))
            #tW[i] = math.exp(1-(1/tW[i]**2))
            #tW[i] =  1
        #print("tW[i] %3.1f tR %3.2f pr %3.2f "%(tW[i],tR[i],pr))
    '''
    train_B = train_B[tW.flatten()>0]

    print("%8d were better results than pr"%np.alen(tX_train))

    tX = tX[train_B,:]
    tY = tY[train_B,:]
    tW = tW[train_B,:]
    tR = tR[train_B,:]
    #print("tW",tW)
    '''
    print("%8d were better results than pr"%np.alen(tX_train))
    ''' REMOVE FIRST ELEMENT BEFORE TRAINING '''
    tX_train = tX_train[1:]
    tY_train = tY_train[1:]
    print("%8d were better After removing first element"%np.alen(tX_train))
    if np.alen(tX_train)>0:
        #tW = scale_weights(tR,tW)
        #print("# setps short listed ", np.alen(tR))

        action_predictor_model.fit(tX_train,tY_train, batch_size=mini_batch, nb_epoch=training_epochs,verbose=0)
Exemplo n.º 53
0
                memoryW = tempGameW
            else:
                #Add experience to memory
                memoryS = np.concatenate((memoryS, tempGameS), axis=0)
                memoryRR = np.concatenate((memoryRR, tempGameRR), axis=0)
                memoryA = np.concatenate((memoryA, tempGameA), axis=0)
                memorySA = np.concatenate((memorySA, tempGameSA), axis=0)

                memoryR = np.concatenate((memoryR, tempGameR), axis=0)
                memoryW = np.concatenate((memoryW, tempGameW), axis=0)

                if gameR.mean() > max_game_average:
                    max_game_average = gameR.mean()

            #if memory is full remove first element
            if np.alen(memoryR) >= max_memory_len:
                memorySA = memorySA[gameR.shape[0]:]
                memoryR = memoryR[gameR.shape[0]:]
                memoryA = memoryA[gameR.shape[0]:]
                memoryS = memoryS[gameR.shape[0]:]
                memoryRR = memoryRR[gameR.shape[0]:]
                memoryW = memoryW[gameR.shape[0]:]

        qs = s

        if done and game > num_initial_observation and not PLAY_GAME:
            last_game_average = gameR.mean()
            if game > 3 and game % 2 == 0:
                # train on all memory
                print("Experience Replay")
                #for i in range(3):
Exemplo n.º 54
0
    def propose(self):

        if self.verbose:
            print_(self._id + ' proposing')

        fc = pm.gp.fast_matrix_copy

        eps_p_f = pm.utils.value(self.eps_p_f)
        f = pm.utils.value(self.f_eval)
        for i in xrange(len(self.scratch3)):
            self.scratch3[i] = np.sum(eps_p_f[self.ti[i]] - f[i])

        # Compute Cholesky factor of covariance of eps_p_f, C(x,x) + V
        C_eval_value = pm.utils.value(self.C_eval)
        C_eval_shape = C_eval_value.shape

        # Get the Cholesky factor of C_eval, plus the nugget.
        # I don't think you can use S_eval for speed, unfortunately.
        in_chol = fc(C_eval_value, self.scratch1)

        v_val = pm.utils.value(self.V)
        for i in xrange(pm.utils.value(C_eval_shape)[0]):
            in_chol[i, i] += v_val[i] / np.alen(self.ti[i])

        info = pm.gp.linalg_utils.dpotrf_wrap(in_chol)
        if info > 0:
            raise np.linalg.LinAlgError

        # Compute covariance of f conditional on eps_p_f.
        offdiag = fc(C_eval_value, self.scratch2)
        offdiag = pm.gp.trisolve(in_chol,
                                 offdiag,
                                 uplo='U',
                                 transa='T',
                                 inplace=True)

        C_step = offdiag.T * offdiag
        C_step *= -1
        C_step += C_eval_value

        # Compute mean of f conditional on eps_p_f.
        for i in xrange(len(self.scratch3)):
            self.scratch3[i] = np.mean(eps_p_f[self.ti[i]])
        m_step = pm.utils.value(self.M_eval) + np.dot(
            offdiag.T,
            pm.gp.trisolve(in_chol, (self.scratch3 - self.M_eval.value),
                           uplo='U',
                           transa='T')).view(np.ndarray).ravel()

        sig_step = C_step
        info = pm.gp.linalg_utils.dpotrf_wrap(C_step.T)
        if info > 0:
            warnings.warn(
                'Full conditional covariance was not positive definite.')
            return

        # Update value of f.
        self.f_eval.value = m_step + np.dot(
            sig_step, np.random.normal(size=sig_step.shape[1])).view(
                np.ndarray).ravel()
        # Propose the rest of the field from its conditional prior.
        self.f.rand()
Exemplo n.º 55
0
 def test_alen(self):
     self.assert_deprecated(lambda: np.alen(np.array([1, 2, 3])))
Exemplo n.º 56
0
def process_dicom_series(
    dicom_series_dict,
    series_uid,
    parent_sorting_field="PatientName",
    return_extra=True,
    individual_file=False,
    initial_sop_class_name_default="UNKNOWN",
):
    if not individual_file:
        logger.info(f"  Processing series UID: {series_uid}")
        dicom_file_list = dicom_series_dict[series_uid]
    else:
        logger.info(f"  Processing individual file: {individual_file}")
        dicom_file_list = [individual_file]

    logger.info(f"  Number of DICOM files: {len(dicom_file_list)}")

    initial_dicom = pydicom.read_file(dicom_file_list[0])

    # Get the data in the parent sorting field, clean with RegEx
    parent_sorting_data = re.sub(
        r"[^\w]", "_", str(initial_dicom[parent_sorting_field].value)).upper()

    if parent_sorting_data == "":
        logger.error(
            f"Could not find any data in {parent_sorting_field}. This is very bad, the data cannot be sorted properly."
        )
        """
        ! TO DO
        Implement a routine to let a user correlate a root directory with a name
        """
        parent_sorting_data = "TEMP"

    try:
        initial_dicom_sop_class_name = initial_dicom.SOPClassUID.name
    except AttributeError:
        logger.warning(
            f"Could not find DICOM SOP Class UID, using {initial_sop_class_name_default}."
        )
        initial_dicom_sop_class_name = initial_sop_class_name_default

    try:
        study_uid = initial_dicom.StudyInstanceUID
    except AttributeError:
        study_uid = "00001"
    """
    ! TO DO
    Need to check for secondary capture image storage
    This can include JPEGs with written information on them
    This is typically not very useful
    We can dump it to file
    Or just save the DICOM file in the folder of interest

    Not a big problem, sort out another day
    """

    # Check the potential types of DICOM files
    if ("Image" in initial_dicom_sop_class_name and
            initial_dicom_sop_class_name != "Secondary Capture Image Storage"):
        # Load as an primary image

        sorted_file_list = safe_sort_dicom_image_list(dicom_file_list)

        try:
            image = sitk.ReadImage(sorted_file_list)
        except RuntimeError:
            logger.warning("  Could not read image into SimpleITK.")
            logger.info("  Processing files individually.")

            for dicom_file in dicom_file_list:
                return process_dicom_series(
                    dicom_series_dict,
                    series_uid,
                    parent_sorting_field=parent_sorting_field,
                    return_extra=return_extra,
                    individual_file=dicom_file,
                    initial_sop_class_name_default=
                    initial_sop_class_name_default,
                )

        dicom_file_metadata = {
            "parent_sorting_data": parent_sorting_data,
            "study_uid": study_uid,
        }
        """
        ! TO DO - integrity check
            Read in all the files here, check the slice location and determine if any are missing
        """
        if initial_dicom.Modality == "PT":

            # scaling_factor = get_suv_bw_scale_factor(initial_dicom)
            # image *= scaling_factor

            # !TO DO
            # Work on PET SUV conversion
            None
        """
        ! CHECKPOINT
        Some DCE MRI sequences have the same series UID
        Here we check the sequence name, and split if necessary
        """

        if initial_dicom.Modality == "MR":

            try:
                sequence_names = np.unique([
                    pydicom.read_file(x).SequenceName for x in dicom_file_list
                ])

                sequence_dict = {}
                for dcm_name in dicom_file_list:
                    dcm_obj = pydicom.read_file(dcm_name)
                    var = dcm_obj.SequenceName
                    if var not in sequence_dict.keys():
                        sequence_dict[var] = [dcm_name]
                    else:
                        sequence_dict[var].append(dcm_name)

            except AttributeError:
                try:
                    logger.warning(
                        "    MRI sequence name not found. The SeriesDescription will be used instead."
                    )

                    sequence_names = np.unique([
                        pydicom.read_file(x).SeriesDescription
                        for x in dicom_file_list
                    ])

                    sequence_dict = {}
                    for dcm_name in dicom_file_list:
                        dcm_obj = pydicom.read_file(dcm_name)
                        var = dcm_obj.SeriesDescription
                        if var not in sequence_dict.keys():
                            sequence_dict[var] = [dcm_name]
                        else:
                            sequence_dict[var].append(dcm_name)

                except AttributeError:
                    logger.warning(
                        "    MRI SeriesDescription not found. The AcquisitionComments will be used instead."
                    )

                    sequence_names = np.unique([
                        pydicom.read_file(x).AcquisitionComments
                        for x in dicom_file_list
                    ])

                    sequence_dict = {}
                    for dcm_name in dicom_file_list:
                        dcm_obj = pydicom.read_file(dcm_name)
                        var = dcm_obj.AcquisitionComments
                        if var not in sequence_dict.keys():
                            sequence_dict[var] = [dcm_name]
                        else:
                            sequence_dict[var].append(dcm_name)

            if initial_dicom.Manufacturer == "GE MEDICAL SYSTEMS":
                # GE use the DICOM tag (0019, 10a2) [Raw data run number]
                # in Diffusion weighted MRI sequences
                # We need to separate this out to get the difference sequences

                if initial_dicom.SeriesDescription == "Diffusion Weighted":

                    # num_sequences = int( (initial_dicom[(0x0025, 0x1007)]) / (initial_dicom[(0x0021, 0x104f)]) )
                    # number_of_images / images_per_seq
                    num_images_per_seq = initial_dicom[(0x0021, 0x104F)].value

                    sequence_names = np.unique([
                        f"DWI_{str( ( pydicom.read_file(x)['InstanceNumber'].value - 1) // num_images_per_seq )}"
                        for x in dicom_file_list
                    ])

                    sequence_name_index_dict = {
                        name: index
                        for index, name in enumerate(sequence_names)
                    }

                    sequence_dict = {}
                    for dcm_name in dicom_file_list:
                        dcm_obj = pydicom.read_file(dcm_name)
                        var = f"DWI_{str( ( dcm_obj['InstanceNumber'].value - 1) // num_images_per_seq )}"
                        var_to_index = sequence_name_index_dict[var]

                        if var_to_index not in sequence_dict.keys():
                            sequence_dict[var_to_index] = [dcm_name]
                        else:
                            sequence_dict[var_to_index].append(dcm_name)

                    sequence_names = sorted(sequence_dict.keys())

            if np.alen(sequence_names) > 1:
                logger.warning(
                    "  Two MR sequences were found under a single series UID.")
                logger.warning("  These will be split into separate images.")

                # Split up the DICOM file list by sequence name
                for sequence_name in sequence_names:

                    dicom_file_list_by_sequence = sequence_dict[sequence_name]

                    logger.info(sequence_name)
                    logger.info(len(dicom_file_list_by_sequence))

                    sorted_file_list = safe_sort_dicom_image_list(
                        dicom_file_list_by_sequence)

                    initial_dicom = pydicom.read_file(sorted_file_list[0],
                                                      force=True)

                    image_by_sequence = sitk.ReadImage(sorted_file_list)

                    dicom_file_metadata_by_sequence = {
                        "parent_sorting_data": parent_sorting_data,
                        "study_uid": study_uid,
                    }

                    yield "IMAGES", dicom_file_metadata_by_sequence, initial_dicom, image_by_sequence
                return  # Stop iteration

        yield "IMAGES", dicom_file_metadata, initial_dicom, image

    if "Structure" in initial_dicom_sop_class_name:
        # Load as an RT structure set
        # This should be done individually for each file

        logger.info(f"      Number of files: {len(dicom_file_list)}")
        for index, dicom_file in enumerate(dicom_file_list):
            dicom_object = pydicom.read_file(dicom_file, force=True)

            # We must also read in the corresponding DICOM image
            # This can be found by matching the references series UID to the series UID
            """
            ! TO DO
            What happens if there is an RT structure set with different referenced sequences?
            """

            # Get the "ReferencedFrameOfReferenceSequence", first item
            referenced_frame_of_reference_item = dicom_object.ReferencedFrameOfReferenceSequence[
                0]

            # Get the "RTReferencedStudySequence", first item
            # This retrieves the study UID
            # This might be useful, but would typically match the actual StudyInstanceUID in the
            # DICOM object
            rt_referenced_series_item = (referenced_frame_of_reference_item.
                                         RTReferencedStudySequence[0])

            # Get the "RTReferencedSeriesSequence", first item
            # This retreives the actual referenced series UID, which we need to match imaging
            # parameters
            rt_referenced_series_again_item = rt_referenced_series_item.RTReferencedSeriesSequence[
                0]

            # Get the appropriate series instance UID
            image_series_uid = rt_referenced_series_again_item.SeriesInstanceUID
            logger.info(
                f"      Item {index}: Matched SeriesInstanceUID = {image_series_uid}"
            )

            # Read in the corresponding image
            sorted_file_list = safe_sort_dicom_image_list(
                dicom_series_dict[image_series_uid])
            image = sitk.ReadImage(sorted_file_list)

            initial_dicom = pydicom.read_file(sorted_file_list[0], force=True)

            (
                structure_name_list,
                structure_image_list,
            ) = transform_point_set_from_dicom_struct(image, dicom_object)

            dicom_file_metadata = {
                "parent_sorting_data": parent_sorting_data,
                "study_uid": study_uid,
                "structure_name_list": structure_name_list,
            }

            yield "STRUCTURES", dicom_file_metadata, dicom_object, structure_image_list

    if "Dose" in initial_dicom_sop_class_name:
        # Load as an RT Dose distribution
        # This should be done individually for each file

        logger.info(f"      Number of files: {len(dicom_file_list)}")
        for index, dicom_file in enumerate(dicom_file_list):
            dicom_object = pydicom.read_file(dicom_file, force=True)
            """
            ! CHECKPOINT
            There should only be a single RT dose file (with each series UID)
            If there are more, yield each
            """

            initial_dicom = pydicom.read_file(dicom_file, force=True)

            dicom_file_metadata = {
                "parent_sorting_data": parent_sorting_data,
                "study_uid": study_uid,
            }

            # We must read in as a float otherwise when we multiply by one later it will not work!
            raw_dose_image = sitk.ReadImage(dicom_file, sitk.sitkFloat32)

            dose_grid_scaling = dicom_object.DoseGridScaling

            logger.debug(f"  Dose grid scaling: {dose_grid_scaling} Gy")

            scaled_dose_image = raw_dose_image * dose_grid_scaling

            yield "DOSES", dicom_file_metadata, dicom_object, scaled_dose_image
        """
        ! TO DO
        1. (DONE) Implement conversion of dose files (to NIFTI images)
        2. Implement conversion of RT plan files to text dump
        3. Do something with other files (e.g. Deformable Image Registration stuff)
        """

    return
Exemplo n.º 57
0
def _StartCountStride(elem, shape, dimensions=None, grp=None, datashape=None,\
        put=False, use_get_vars = False):
    """Return start, count, stride and indices needed to store/extract data
    into/from a netCDF variable.

    This function is used to convert a slicing expression into a form that is
    compatible with the nc_get_vars function. Specifically, it needs
    to interpret integers, slices, Ellipses, and 1-d sequences of integers
    and booleans.

    Numpy uses "broadcasting indexing" to handle array-valued indices.
    "Broadcasting indexing" (a.k.a "fancy indexing") treats all multi-valued
    indices together to allow arbitrary points to be extracted. The index
    arrays can be multidimensional, and more than one can be specified in a
    slice, as long as they can be "broadcast" against each other.
    This style of indexing can be very powerful, but it is very hard
    to understand, explain, and implement (and can lead to hard to find bugs).
    Most other python packages and array processing
    languages (such as netcdf4-python, xray, biggus, matlab and fortran)
    use "orthogonal indexing" which only allows for 1-d index arrays and
    treats these arrays of indices independently along each dimension.

    The implementation of "orthogonal indexing" used here requires that
    index arrays be 1-d boolean or integer. If integer arrays are used,
    the index values must be sorted and contain no duplicates.

    In summary, slicing netcdf4-python variable objects with 1-d integer or
    boolean arrays is allowed, but may give a different result than slicing a
    numpy array.

    Numpy also supports slicing an array with a boolean array of the same
    shape. For example x[x>0] returns a 1-d array with all the positive values of x.
    This is also not supported in netcdf4-python, if x.ndim > 1.

    Orthogonal indexing can be used in to select netcdf variable slices
    using the dimension variables. For example, you can use v[lat>60,lon<180]
    to fetch the elements of v obeying conditions on latitude and longitude.
    Allow for this sort of simple variable subsetting is the reason we decided to
    deviate from numpy's slicing rules.

    This function is used both by the __setitem__ and __getitem__ method of
    the Variable class.

    Parameters
    ----------
    elem : tuple of integer, slice, ellipsis or 1-d boolean or integer
    sequences used to slice the netCDF Variable (Variable[elem]).
    shape : tuple containing the current shape of the netCDF variable.
    dimensions : sequence
      The name of the dimensions.
      __setitem__.
    grp  : netCDF Group
      The netCDF group to which the variable being set belongs to.
    datashape : sequence
      The shape of the data that is being stored. Only needed by __setitem__
    put : True|False (default False).  If called from __setitem__, put is True.

    Returns
    -------
    start : ndarray (..., n)
      A starting indices array of dimension n+1. The first n
      dimensions identify different independent data chunks. The last dimension
      can be read as the starting indices.
    count : ndarray (..., n)
      An array of dimension (n+1) storing the number of elements to get.
    stride : ndarray (..., n)
      An array of dimension (n+1) storing the steps between each datum.
    indices : ndarray (..., n)
      An array storing the indices describing the location of the
      data chunk in the target/source array (__getitem__/__setitem__).

    Notes:

    netCDF data is accessed via the function:
       nc_get_vars(grpid, varid, start, count, stride, data)

    Assume that the variable has dimension n, then

    start is a n-tuple that contains the indices at the beginning of data chunk.
    count is a n-tuple that contains the number of elements to be accessed.
    stride is a n-tuple that contains the step length between each element.

    """
    # Adapted from pycdf (http://pysclint.sourceforge.net/pycdf)
    # by Andre Gosselin..
    # Modified by David Huard to handle efficiently fancy indexing with
    # sequences of integers or booleans.

    nDims = len(shape)
    if nDims == 0:
        nDims = 1
        shape = (1, )

    # is there an unlimited dimension? (only defined for __setitem__)
    if put:
        hasunlim = False
        unlimd = {}
        if dimensions:
            for i in range(nDims):
                dimname = dimensions[i]
                # is this dimension unlimited?
                # look in current group, and parents for dim.
                dim = _find_dim(grp, dimname)
                unlimd[dimname] = dim.isunlimited()
                if unlimd[dimname]:
                    hasunlim = True
    else:
        hasunlim = False

    # When a single array or (non-tuple) sequence of integers is given
    # as a slice, assume it applies to the first dimension,
    # and use ellipsis for remaining dimensions.
    if np.iterable(elem):
        if type(elem) == np.ndarray or (type(elem) != tuple and \
            np.array([_is_int(e) for e in elem]).all()):
            elem = [elem]
            for n in range(len(elem) + 1, nDims + 1):
                elem.append(slice(None, None, None))
    else:  # Convert single index to sequence
        elem = [elem]

    # ensure there is at most 1 ellipse
    #  we cannot use elem.count(Ellipsis), as with fancy indexing would occur
    #  np.array() == Ellipsis which gives ValueError: The truth value of an
    #  array with more than one element is ambiguous. Use a.any() or a.all()
    if sum(1 for e in elem if e is Ellipsis) > 1:
        raise IndexError(
            "At most one ellipsis allowed in a slicing expression")

    # replace boolean arrays with sequences of integers.
    newElem = []
    IndexErrorMsg=\
    "only integers, slices (`:`), ellipsis (`...`), and 1-d integer or boolean arrays are valid indices"
    i = 0
    for e in elem:
        # string-like object try to cast to int
        # needs to be done first, since strings are iterable and
        # hard to distinguish from something castable to an iterable numpy array.
        if type(e) in [str, bytes, unicode]:
            try:
                e = int(e)
            except:
                raise IndexError(IndexErrorMsg)
        ea = np.asarray(e)
        # Raise error if multidimensional indexing is used.
        if ea.ndim > 1:
            raise IndexError("Index cannot be multidimensional")
        # set unlim to True if dimension is unlimited and put==True
        # (called from __setitem__)
        if hasunlim and put and dimensions:
            try:
                dimname = dimensions[i]
                unlim = unlimd[dimname]
            except IndexError:  # more slices than dimensions (issue 371)
                unlim = False
        else:
            unlim = False
        # convert boolean index to integer array.
        if np.iterable(ea) and ea.dtype.kind == 'b':
            # check that boolen array not too long
            if not unlim and shape[i] != len(ea):
                msg = """
Boolean array must have the same shape as the data along this dimension."""
                raise IndexError(msg)
            ea = np.flatnonzero(ea)
        # an iterable (non-scalar) integer array.
        if np.iterable(ea) and ea.dtype.kind == 'i':
            # convert negative indices in 1d array to positive ones.
            ea = np.where(ea < 0, ea + shape[i], ea)
            if np.any(ea < 0):
                raise IndexError("integer index out of range")
            # if unlim, let integer index be longer than current dimension
            # length.
            if ea.shape != (0, ):
                elen = shape[i]
                if unlim:
                    elen = max(ea.max() + 1, elen)
                if ea.max() + 1 > elen:
                    msg = "integer index exceeds dimension size"
                    raise IndexError(msg)
            newElem.append(ea)
        # integer scalar
        elif ea.dtype.kind == 'i':
            newElem.append(e)
        # slice or ellipsis object
        elif type(e) == slice or type(e) == type(Ellipsis):
            if not use_get_vars and type(e) == slice and e.step not in [None,-1,1] and\
               dimensions is not None and grp is not None:
                # convert strided slice to integer sequence if possible
                # (this will avoid nc_get_vars, which is slow - issue #680).
                start = e.start if e.start is not None else 0
                step = e.step
                if e.stop is None and dimensions is not None and grp is not None:
                    stop = len(_find_dim(grp, dimensions[i]))
                else:
                    stop = e.stop
                    if stop < 0:
                        stop = len(_find_dim(grp, dimensions[i])) + stop
                try:
                    ee = np.arange(start, stop, e.step)
                    if len(ee) > 0:
                        e = ee
                except:
                    pass
            newElem.append(e)
        else:  # castable to a scalar int, otherwise invalid
            try:
                e = int(e)
                newElem.append(e)
            except:
                raise IndexError(IndexErrorMsg)
        if type(e) == type(Ellipsis):
            i += 1 + nDims - len(elem)
        else:
            i += 1
    elem = newElem

    # replace Ellipsis and integer arrays with slice objects, if possible.
    newElem = []
    for e in elem:
        ea = np.asarray(e)
        # Replace ellipsis with slices.
        if type(e) == type(Ellipsis):
            # The ellipsis stands for the missing dimensions.
            newElem.extend(
                (slice(None, None, None), ) * (nDims - len(elem) + 1))
        # Replace sequence of indices with slice object if possible.
        elif np.iterable(e) and len(e) > 1:
            start = e[0]
            stop = e[-1] + 1
            step = e[1] - e[0]
            try:
                ee = range(start, stop, step)
            except ValueError:  # start, stop or step is not valid for a range
                ee = False
            if ee and len(e) == len(ee) and (e == np.arange(start, stop,
                                                            step)).all():
                # don't convert to slice unless abs(stride) == 1
                # (nc_get_vars is very slow, issue #680)
                if not use_get_vars and step not in [1, -1]:
                    newElem.append(e)
                else:
                    newElem.append(slice(start, stop, step))
            else:
                newElem.append(e)
        elif np.iterable(e) and len(e) == 1:
            newElem.append(slice(e[0], e[0] + 1, 1))
        else:
            newElem.append(e)
    elem = newElem

    # If slice doesn't cover all dims, assume ellipsis for rest of dims.
    if len(elem) < nDims:
        for n in range(len(elem) + 1, nDims + 1):
            elem.append(slice(None, None, None))

    # make sure there are not too many dimensions in slice.
    if len(elem) > nDims:
        raise ValueError(
            "slicing expression exceeds the number of dimensions of the variable"
        )

    # Compute the dimensions of the start, count, stride and indices arrays.
    # The number of elements in the first n dimensions corresponds to the
    # number of times the _get method will be called.
    sdim = []
    for i, e in enumerate(elem):
        # at this stage e is a slice, a scalar integer, or a 1d integer array.
        # integer array:  _get call for each True value
        if np.iterable(e):
            sdim.append(np.alen(e))
        # Scalar int or slice, just a single _get call
        else:
            sdim.append(1)

    # broadcast data shape when assigned to full variable (issue #919)
    try:
        fullslice = elem.count(slice(None, None, None)) == len(elem)
    except:  # fails if elem contains a numpy array.
        fullslice = False
    if fullslice and datashape and put and not hasunlim:
        datashape = broadcasted_shape(shape, datashape)

    # pad datashape with zeros for dimensions not being sliced (issue #906)
    # only used when data covers slice over subset of dimensions
    if datashape and len(datashape) != len(elem) and\
       len(datashape) == sum(1 for e in elem if type(e) == slice):
        datashapenew = ()
        i = 0
        for e in elem:
            if type(e) != slice and not np.iterable(e):  # scalar integer slice
                datashapenew = datashapenew + (0, )
            else:  # slice object
                datashapenew = datashapenew + (datashape[i], )
                i += 1
        datashape = datashapenew

    # Create the start, count, stride and indices arrays.

    sdim.append(max(nDims, 1))
    start = np.empty(sdim, dtype=int)
    count = np.empty(sdim, dtype=int)
    stride = np.empty(sdim, dtype=int)
    indices = np.empty(sdim, dtype=object)

    for i, e in enumerate(elem):

        ea = np.asarray(e)

        # set unlim to True if dimension is unlimited and put==True
        # (called from __setitem__). Note: grp and dimensions must be set.
        if hasunlim and put and dimensions:
            dimname = dimensions[i]
            unlim = unlimd[dimname]
        else:
            unlim = False

        #    SLICE    #
        if type(e) == slice:

            # determine length parameter for slice.indices.

            # shape[i] can be zero for unlim dim that hasn't been written to
            # yet.
            # length of slice may be longer than current shape
            # if dimension is unlimited (and we are writing, not reading).
            if unlim and e.stop is not None and e.stop > shape[i]:
                length = e.stop
            elif unlim and e.stop is None and datashape != ():
                try:
                    if e.start is None:
                        length = datashape[i]
                    else:
                        length = e.start + datashape[i]
                except IndexError:
                    raise IndexError("shape of data does not conform to slice")
            else:
                if unlim and datashape == () and len(dim) == 0:
                    # writing scalar along unlimited dimension using slicing
                    # syntax (var[:] = 1, when var.shape = ())
                    length = 1
                else:
                    length = shape[i]

            beg, end, inc = e.indices(length)
            n = len(range(beg, end, inc))

            start[..., i] = beg
            count[..., i] = n
            stride[..., i] = inc
            indices[..., i] = slice(None)

        #    ITERABLE    #
        elif np.iterable(e) and np.array(
                e).dtype.kind in 'i':  # Sequence of integers
            start[..., i] = np.apply_along_axis(lambda x: e * x, i,
                                                np.ones(sdim[:-1]))
            indices[...,
                    i] = np.apply_along_axis(lambda x: np.arange(sdim[i]) * x,
                                             i, np.ones(sdim[:-1], int))

            count[..., i] = 1
            stride[..., i] = 1

        #   all that's left is SCALAR INTEGER    #
        else:
            if e >= 0:
                start[..., i] = e
            elif e < 0 and (-e <= shape[i]):
                start[..., i] = e + shape[i]
            else:
                raise IndexError("Index out of range")

            count[..., i] = 1
            stride[..., i] = 1
            indices[..., i] = -1  # Use -1 instead of 0 to indicate that
            # this dimension shall be squeezed.

    return start, count, stride, indices  #, out_shape
Exemplo n.º 58
0
    def sample(self,
               p0,
               model0,
               lnprob0=None,
               lnlike0=None,
               iterations=1,
               thin=1,
               storechain=True):
        """
        Advance the chains ``iterations`` steps as a generator.

        :param p0:
            The initial positions of the walkers.  Shape should be
            ``(ntemps, nwalkers, dim)``.

        :param lnprob0: (optional)
            The initial posterior values for the ensembles.  Shape
            ``(ntemps, nwalkers)``.

        :param lnlike0: (optional)
            The initial likelihood values for the ensembles.  Shape
            ``(ntemps, nwalkers)``.

        :param iterations: (optional)
            The number of iterations to preform.

        :param thin: (optional)
            The number of iterations to perform between saving the
            state to the internal chain.

        :param storechain: (optional)
            If ``True`` store the iterations in the ``chain``
            property.

        At each iteration, this generator yields

        * ``p``, the current position of the walkers.

        * ``lnprob`` the current posterior values for the walkers.

        * ``lnlike`` the current likelihood values for the walkers.

        """
        p = np.copy(np.array(p0))

        # If we have no lnprob or logls compute them
        if lnprob0 is None or lnlike0 is None:
            lnprob0, lnlike0 = self._get_lnprob(p, 0)

        lnprob = lnprob0
        lnlike = lnlike0
        model = model0

        # initialize chain variables
        self._chain = [[] for ii in range(self.nmodel)]
        self._lnprob = [[] for ii in range(self.nmodel)]
        self._lnlikelihood = [[] for ii in range(self.nmodel)]
        self._modchain = np.zeros(iterations)

        # do sampling
        self.iterations = 0
        for i in range(iterations):
            self.iterations += 1

            # propose jump in parameter space
            q, newmod = self._get_jump(p, model)
            q = np.array(q)

            # get number of new parameters (can be negative)
            newpar = np.alen(q) - np.alen(p)

            # evaluate posterior
            newlnprob, newlnlike = self._get_lnprob(q, newpar)

            # MH step
            diff = newlnprob - lnprob

            if diff < 0:
                diff = np.exp(diff) - np.random.rand()

            if diff >= 0:
                p = q
                model = newmod
                lnprob = newlnprob
                lnlike = newlnlike
                self.naccepted[model - 1] += 1

            # add count to specific model
            self._modchain[i] = model
            self._nmod[model - 1] += 1

            # save chain values
            if (i + 1) % thin == 0:
                if storechain:
                    self._chain[model - 1].append(p)
                    self._lnprob[model - 1].append(lnprob)
                    self._lnlikelihood[model - 1].append(lnlike)

            yield p, model, lnprob, lnlike
Exemplo n.º 59
0
        good_pump = np.array([])
        good_air = np.array([])
        #
        omission_cals = np.array([])
        omission_odor = []
        omission_lick = np.array([])
        omission_pump = np.array([])
        omission_air = np.array([])

        for i in range(win_num):
            acc_gos = sum(goacc_list[i * 10:(i + 1) * 10])
            go_nums = odor_list[i * 10:(i + 1) * 10].count(1)
            print(go_nums)
            if acc_gos / go_nums <= 0.5:
                print('omi!', i)
                if np.alen(omission_cals) == 0:
                    omission_cals = cal[i * 10:(i + 1) * 10, :]
                    omission_odor = odor_list[i * 10:(i + 1) * 10]
                    omission_lick = lick[i * 10:(i + 1) * 10, :]
                    omission_pump = pump[i * 10:(i + 1) * 10, :]
                    omission_air = airpuff[i * 10:(i + 1) * 10, :]
                else:
                    omission_cals = np.vstack(
                        (omission_cals, cal[i * 10:(i + 1) * 10, :]))
                    for k in range(len(odor_list[i * 10:(i + 1) * 10])):
                        omission_odor.append(odor_list[i * 10 + k])
                    omission_lick = np.vstack(
                        (omission_lick, lick[i * 10:(i + 1) * 10, :]))
                    omission_pump = np.vstack(
                        (omission_pump, pump[i * 10:(i + 1) * 10, :]))
                    omission_air = np.vstack(
Exemplo n.º 60
0
def div_by_cue(cal_time, cal_data, cue1, cue2, cue3, cue_hz):
    cue_interval = 1 / cue_hz
    i = 0
    t = 0
    trial_start = 0
    change_cue1 = np.diff(cue1)
    cue1_trialnum = np.sum(np.abs(change_cue1) / 2)
    #print(int(cue1_trialnum))
    change_cue2 = np.diff(cue2)
    cue2_trialnum = np.sum(np.abs(change_cue2) / 2)
    change_cue3 = np.diff(cue3)
    cue3_trialnum = np.sum(np.abs(change_cue3) / 2)
    print("tn", cue1_trialnum, cue2_trialnum, cue3_trialnum)
    cue1_cal = np.zeros((int(cue1_trialnum), 700))
    cue2_cal = np.zeros((int(cue2_trialnum), 700))
    cue3_cal = np.zeros((int(cue2_trialnum), 700))
    now_cue1 = 0
    now_cue2 = 0
    now_cue3 = 0
    cue_ordor = []
    while i < np.alen(change_cue1):
        if change_cue1[i] == 1:
            trial_start = round(i / 20)
            for k in range(trial_start - 20, trial_start + 20):
                if np.sum(cal_time[0:k]) <= t and np.sum(
                        cal_time[0:k + 1]) > t:
                    if len(cal_data[k:k + 600]) < 600:
                        cue1_cal = np.delete(cue1_cal, now_cue1, 0)
                        print('cue1' + str(now_cue1))
                    else:
                        cue1_cal[now_cue1, 0:100] = cal_data[k - 100:k]
                        cue1_cal[now_cue1, 100:] = cal_data[k:k + 600]

            now_cue1 += 1
            cue_ordor.append(1)
            #print('1!')

        elif change_cue2[i] == 1:
            trial_start = round(i / 20)
            for k in range(trial_start - 20, trial_start + 20):
                if np.sum(cal_time[0:k]) <= t and np.sum(
                        cal_time[0:k + 1]) > t:
                    if len(cal_data[k:k + 600]) < 600:
                        cue2_cal = np.delete(cue2_cal, now_cue2, 0)
                        print('cue2' + str(now_cue2))
                    else:
                        cue2_cal[now_cue2, 0:100] = cal_data[k - 100:k]
                        cue2_cal[now_cue2, 100:] = cal_data[k:k + 600]
                    # 	tw = len(cal_data[k:k + 600])
                    #
                    # except:
                    # 	print(len(cue2_cal[now_cue2, 100:]), len(cal_data[k:k + 600]))
                    #	continue

            now_cue2 += 1
            cue_ordor.append(2)
            #print('2!')

        elif change_cue3[i] == 1:
            trial_start = round(i / 20)
            for k in range(trial_start - 20, trial_start + 20):
                if np.sum(cal_time[0:k]) <= t and np.sum(
                        cal_time[0:k + 1]) > t:
                    if len(cal_data[k:k + 600]) < 600:
                        cue3_cal = np.delete(cue3_cal, now_cue3, 0)
                        print('cue3' + str(now_cue3))
                    else:
                        cue3_cal[now_cue3, 0:100] = cal_data[k - 100:k]
                        cue3_cal[now_cue3, 100:] = cal_data[k:k + 600]

            now_cue3 += 1
            cue_ordor.append(3)
        t += cue_interval
        i += 1
    #print(cue1_cal,np.mean(cue1_cal))
    #print(cue2_cal,np.mean(cue2_cal))
    return cue1_cal, cue2_cal, cue3_cal, cue_ordor