Exemplo n.º 1
0
def stats(t, snp=None):
    '''Return a record array with imputation statistics.'''
    T = t.sample_index_to_impute
    imputed = t.imputed_data[:, T, :]
    tot_to_impute = 2 * imputed.shape[1]
    snp = snp if snp is not None else np.arange(t.num_snps)
    stats = np.zeros((len(snp),),
                     dtype=[
                            ('dist_cm', 'f4'), # Genetic distance from beginning of chromosome
                            ('count', '(2,)i4'), # Allele count
                            ('frequency', '(2,)f4'), # Allele frequency
                            ('call_rate', 'f4'), # Imputation Call rate
                            ('call_rate_training', 'f4')  # Imputation Call rate
                            ])
    call_rate_training = 1.0 * np.sum(np.sum(t.imputed_data[:, t.sample_index, :] != 0, axis=2), axis=1)# / (2 * len(t.sample_index))        
    for row, snp_index in enumerate(snp):
        # TODO: replace by a bulk group-by/hist?
        # g = t.__t.training_data[snp_index, :, :]
        i = imputed[snp_index, :]
        (c1, c2) = (len(np.where(i == 1)[0]), len(np.where(i == 2)[0]))
        c = c1 + c2 + SMALL_FLOAT
        f1, f2 = (1.0 * c1) / c, (1.0 * c2) / c
        call_rate = 1.0 * len(i.nonzero()[0]) / tot_to_impute
        # print 'c1 %4d c2 %4d f1 %.2f f2 %.2f call rate %5.2f' % (c1, c2, f1, f2, call_rate)
        stats[row] = (t.snp['dist_cm'][snp_index], [c1, c2], [f1, f2], call_rate, call_rate_training[snp_index])
    return stats
Exemplo n.º 2
0
def f1(u, eigvals, Z10, Z11):
    """
    A component of exact time pdf (Eq. 22, HJC92).

    Parameters
    ----------
    u : float
        u = t - tres
    eigvals : array_like, shape (k,)
        Eigenvalues of -Q matrix.
    Z10, Z11 (or gama10, gama11) : list of array_likes
        Constants for the exact open/shut time pdf. Z10, Z11 for likelihood
        calculation or gama10, gama11 for time distributions.

    Returns
    -------
    f : ndarray
    """

#    f = np.zeros(Z10[0].shape)
#    for i in range(len(eigvals)):
#        f += (Z10[i] + Z11[i] * u) *  math.exp(-eigvals[i] * u)

    if Z10.ndim > 1:
        f = np.sum((Z10 + Z11 * u) *
            np.exp(-eigvals * u).reshape(Z10.shape[0],1,1), axis=0)
    else:
        f = np.sum((Z10 + Z11 * u) * np.exp(-eigvals * u))
    return f
Exemplo n.º 3
0
def test_matrix_assemble(dim):
    eps = 1000*DOLFIN_EPS

    (u, uu), (v, vv), (U, UU), dPP, bc = _create_dp_problem(dim)

    # Scalar assemble
    mat = assemble(u*v*U*dPP)

    # Create a numpy matrix based on the local size of the vector
    # and populate it with values from local vector
    loc_range = u.vector().local_range()
    vec_mat = np.zeros_like(mat.array())
    vec_mat[range(loc_range[1] - loc_range[0]),
            range(loc_range[0], loc_range[1])] = u.vector().get_local()

    assert np.sum(np.absolute(mat.array() - vec_mat)) < eps

    # Vector assemble
    mat = assemble((uu[0]*vv[0]*UU[0] + uu[1]*vv[1]*UU[1])*dPP)

    # Create a numpy matrix based on the local size of the vector
    # and populate it with values from local vector
    loc_range = uu.vector().local_range()
    vec_mat = np.zeros_like(mat.array())
    vec_mat[range(loc_range[1] - loc_range[0]),
            range(loc_range[0], loc_range[1])] = uu.vector().get_local()

    assert np.sum(np.absolute(mat.array() - vec_mat)) < eps
Exemplo n.º 4
0
def test_decimate():
    """Test decimation of digitizer headshapes with too many points."""
    # load headshape and convert to meters
    hsp_mm = _get_ico_surface(5)['rr'] * 100
    hsp_m = hsp_mm / 1000.

    # save headshape to a file in mm in temporary directory
    tempdir = _TempDir()
    sphere_hsp_path = op.join(tempdir, 'test_sphere.txt')
    np.savetxt(sphere_hsp_path, hsp_mm)

    # read in raw data using spherical hsp, and extract new hsp
    with warnings.catch_warnings(record=True) as w:
        raw = read_raw_kit(sqd_path, mrk_path, elp_txt_path, sphere_hsp_path)
    assert_true(any('more than' in str(ww.message) for ww in w))
    # collect headshape from raw (should now be in m)
    hsp_dec = np.array([dig['r'] for dig in raw.info['dig']])[8:]

    # with 10242 points and _decimate_points set to resolution of 5 mm, hsp_dec
    # should be a bit over 5000 points. If not, something is wrong or
    # decimation resolution has been purposefully changed
    assert_true(len(hsp_dec) > 5000)

    # should have similar size, distance from center
    dist = np.sqrt(np.sum((hsp_m - np.mean(hsp_m, axis=0))**2, axis=1))
    dist_dec = np.sqrt(np.sum((hsp_dec - np.mean(hsp_dec, axis=0))**2, axis=1))
    hsp_rad = np.mean(dist)
    hsp_dec_rad = np.mean(dist_dec)
    assert_almost_equal(hsp_rad, hsp_dec_rad, places=3)
 def measure_objects(self, operand, workspace):
     '''Performs the measurements on the requested objects'''
     objects = workspace.get_objects(operand.operand_objects.value)
     if objects.has_parent_image:
         area_occupied = np.sum(objects.segmented[objects.parent_image.mask]>0)
         perimeter = np.sum(outline(np.logical_and(objects.segmented != 0,objects.parent_image.mask)))
         total_area = np.sum(objects.parent_image.mask)
     else:
         area_occupied = np.sum(objects.segmented > 0)
         perimeter = np.sum(outline(objects.segmented) > 0)
         total_area = np.product(objects.segmented.shape)
     m = workspace.measurements
     m.add_image_measurement(F_AREA_OCCUPIED%(operand.operand_objects.value),
                             np.array([area_occupied], dtype=float ))
     m.add_image_measurement(F_PERIMETER%(operand.operand_objects.value),
                             np.array([perimeter], dtype=float ))
     m.add_image_measurement(F_TOTAL_AREA%(operand.operand_objects.value),
                             np.array([total_area], dtype=float))
     if operand.should_save_image.value:
         binary_pixels = objects.segmented > 0
         output_image = cpi.Image(binary_pixels,
                                  parent_image = objects.parent_image)
         workspace.image_set.add(operand.image_name.value,
                                 output_image)
     return[[operand.operand_objects.value,
             str(area_occupied),str(perimeter),str(total_area)]]
Exemplo n.º 6
0
def chol_logdet(U):
    if isinstance(U, np.ndarray):
        return 2*np.sum(np.log(np.diag(U)))
    elif isinstance(U, cholmod.Factor):
        return np.sum(np.log(U.D()))
    else:
        raise ValueError("Unknown type of Cholesky factor")
Exemplo n.º 7
0
def _lmvnpdffull(obs, means, covars):
    """
    Log probability for full covariance matrices.

    WARNING: In certain cases, this function will modify in-place
    some of the covariance matrices

    """
    from scipy import linalg
    import itertools
    if hasattr(linalg, 'solve_triangular'):
        # only in scipy since 0.9
        solve_triangular = linalg.solve_triangular
    else:
        # slower, but works
        solve_triangular = linalg.solve
    n_obs, n_dim = obs.shape
    nmix = len(means)
    log_prob = np.empty((n_obs, nmix))
    for c, (mu, cv) in enumerate(itertools.izip(means, covars)):
        try:
            cv_chol = linalg.cholesky(cv, lower=True)
        except linalg.LinAlgError:
            # The model is most probabily stuck in a component with too
            # few observations, we need to reinitialize this components
            cv[:] = 10 * np.eye(cv.shape[0])
            cv_chol = cv
        cv_log_det = 2 * np.sum(np.log(np.diagonal(cv_chol)))
        cv_sol = solve_triangular(cv_chol, (obs - mu).T, lower=True).T
        log_prob[:, c] = -.5 * (np.sum(cv_sol ** 2, axis=1) + \
                           n_dim * np.log(2 * np.pi) + cv_log_det)

    return log_prob
Exemplo n.º 8
0
    def transform_scalars(self, dataset, N=25):
        """Add Poisson noise to tilt images"""
        self.progress.maximum = 1

        tiltSeries = utils.get_array(dataset).astype(float)
        if tiltSeries is None:
            raise RuntimeError("No scalars found!")

        Ndata = tiltSeries.shape[0] * tiltSeries.shape[1]

        self.progress.maximum = tiltSeries.shape[2]
        step = 0
        for i in range(tiltSeries.shape[2]):
            if self.canceled:
                return

            tiltImage = tiltSeries[:, :, i].copy()
            tiltImage = tiltImage / np.sum(tiltSeries[:, :, i]) * (Ndata * N)
            tiltImage = np.random.poisson(tiltImage)
            tiltImage = tiltImage * np.sum(tiltSeries[:, :, i]) / (Ndata * N)

            tiltSeries[:, :, i] = tiltImage.copy()
            step += 1
            self.progress.value = step

        utils.set_array(dataset, tiltSeries)
Exemplo n.º 9
0
def sum_to_shape(X, s):
    """
    Sum axes of the array such that the resulting shape is as given.

    Thus, the shape of the result will be s or an error is raised.
    """
    # First, sum and remove axes that are not in s
    if np.ndim(X) > len(s):
        axes = tuple(range(-np.ndim(X), -len(s)))
    else:
        axes = ()
    Y = np.sum(X, axis=axes)

    # Second, sum axes that are 1 in s but keep the axes
    axes = ()
    for i in range(-np.ndim(Y), 0):
        if s[i] == 1:
            if np.shape(Y)[i] > 1:
                axes = axes + (i,)
        else:
            if np.shape(Y)[i] != s[i]:
                raise ValueError("Shape %s can't be summed to shape %s" %
                                 (np.shape(X), s))
    Y = np.sum(Y, axis=axes, keepdims=True)
    
    return Y
Exemplo n.º 10
0
def log_sum_exp(x, axis=0):
    """ Calculates the logarithm of the sum of e to the power of input 'x'. The method tries to avoid
        overflows by using the relationship: log(sum(exp(x))) = alpha + log(sum(exp(x-alpha))).
        
    :Parameter:
        x:    data.
             -type: float or numpy array 
          
        axis: Sums along the given axis.
             -type: int
        
    :Return:
        Logarithm of the sum of exp of x. 
       -type: float or numpy array.
        
    """
    alpha = x.max(axis) - numx.log(numx.finfo(numx.float64).max)/2.0
    if axis == 1:
        return numx.squeeze(alpha + numx.log(
                                             numx.sum(
                                                      numx.exp(x.T - alpha)
                                                      , axis=0)))
    else:
        return numx.squeeze(alpha + numx.log(
                                             numx.sum(
                                                      numx.exp(x - alpha)
                                                      , axis=0)))
Exemplo n.º 11
0
def torgerson(distances, n_components=2):
    """
    Perform classical mds (Torgerson scaling).

    ..note ::
        If the distances are euclidean then this is equivalent to projecting
        the original data points to the first `n` principal components.

    """
    distances = np.asarray(distances)
    assert distances.shape[0] == distances.shape[1]
    N = distances.shape[0]
    # O ^ 2
    D_sq = distances ** 2

    # double center the D_sq
    rsum = np.sum(D_sq, axis=1, keepdims=True)
    csum = np.sum(D_sq, axis=0, keepdims=True)
    total = np.sum(csum)
    D_sq -= rsum / N
    D_sq -= csum / N
    D_sq += total / (N ** 2)
    B = np.multiply(D_sq, -0.5, out=D_sq)

    U, L, _ = np.linalg.svd(B)
    if n_components > N:
        U = np.hstack((U, np.zeros((N, n_components - N))))
        L = np.hstack((L, np.zeros((n_components - N))))
    U = U[:, :n_components]
    L = L[:n_components]
    D = np.diag(np.sqrt(L))
    return np.dot(U, D)
Exemplo n.º 12
0
def svm_loss(x, y):
    """
    Computes the loss and gradient using for multiclass SVM classification.

    Inputs:
    - x: Input data, of shape (N, C) where x[i, j] is the score for the jth class
    for the ith input.
    - y: Vector of labels, of shape (N,) where y[i] is the label for x[i] and
    0 <= y[i] < C

    Returns a tuple of:
    - loss: Scalar giving the loss
    - dx: Gradient of the loss with respect to x
    """
    N = x.shape[0]
    correct_class_scores = x[np.arange(N), y]
    margins = np.maximum(0, x - correct_class_scores[:, np.newaxis] + 1.0)
    margins[np.arange(N), y] = 0
    loss = np.sum(margins) / N
    num_pos = np.sum(margins > 0, axis=1)
    dx = np.zeros_like(x)
    dx[margins > 0] = 1
    dx[np.arange(N), y] -= num_pos
    dx /= N
    return loss, dx
Exemplo n.º 13
0
 def dist_tree(self, other, tol=1e-8):
     if self.ndim == 2:
         from . import d2 as sim
     else:
         from . import d3 as sim
         
     assert self.ndim == other.ndim
     
     if self.gamma != 0 or other.gamma != 0:
         assert np.abs(self.gamma - other.gamma) <= tol
         gamma = (self.gamma + other.gamma) / 2.0
         box = sim.LeesEdwardsBox(sim.Vec(1,1), gamma)
     else:
         box = sim.OriginBox(1.0)
     
     sz1 = self.size_indices()
     assert(len(sz1) == 2)
     cutoff1 = int(np.sum(sz1[0]))
     
     sz2 = other.size_indices()
     assert(len(sz2) == 2)
     cutoff2 = int(np.sum(sz2[0]))
     
     vs1 = [sim.Vec(*xy) for idx in sz1 for xy in self.rs[idx]]
     vs2 = [sim.Vec(*xy) for idx in sz2 for xy in other.rs[idx]]
     
     tree = sim.JammingTreeBD(box, sim.vecvector(vs1), sim.vecvector(vs2), cutoff1, cutoff2)
     return tree
Exemplo n.º 14
0
    def compute_distances_no_loops(self, X):
        """
        Compute the distance between each test point in X and each training point
        in self.X_train using no explicit loops.

        Input / Output: Same as compute_distances_two_loops
        """
        num_test = X.shape[0]
        num_train = self.X_train.shape[0]
        dists = np.zeros((num_test, num_train)) 
        
        # l2(A, X) = l2([a,b], [x,y]) = ...
        #     sqrt( (a-x)^2 + (b-y)^2 ) = ...
        #     = sqrt( a^2 -2ax + x^2 + b^2 -2by + y^2 ) = 
        #     = sqrt( (a^2 + b^2) + (x^2 + y^2) -2(ax + by) ) = ...
        #     = sqrt( A^2 - 2 * A.dot(X.T) + X^2 ) =
        #     = np.sqrt( np.sum(A ** 2) + np.sum(X ** 2) -2 * A.dot(X.T) )
        
        # -2*A.dot(X)
        res = -2 * X.dot(self.X_train.T)   
        # -2*A.dot(X) + A^2 + X^2
        res = res + np.sum(X ** 2, axis=1).reshape((-1,1))  + np.sum(self.X_train ** 2, axis=1)
        dists = np.sqrt(res)
        
        return dists
Exemplo n.º 15
0
def dice_(seg, gt):
    intersection = 2. * np.sum(seg * gt)
    denominator = (np.sum(np.square(seg)) + np.sum(np.square(gt)))
    if denominator == 0:
        return 1.
    similarity = intersection / denominator
    return similarity
Exemplo n.º 16
0
def discrepancy(observed, simulated, expected):
    """Calculates Freeman-Tukey statistics (Freeman and Tukey 1950) as
    a measure of discrepancy between observed and r replicates of simulated data. This
    is a convenient method for assessing goodness-of-fit (see Brooks et al. 2000).
    
    D(x|\theta) = \sum_j (\sqrt{x_j} - \sqrt{e_j})^2
    
    :Parameters:
      observed : Iterable of observed values (length n)
      simulated : Iterable of simulated values (length rxn)
      expected : Iterable of expected values (length rxn)
    
    :Returns:
      D_obs : Discrepancy of observed values
      D_sim : Discrepancy of simulated values
    
    """
    try:
        simulated = simulated.astype(float)
    except AttributeError:
        simulated = simulated.trace().astype(float)
    try:
        expected = expected.astype(float)
    except AttributeError:
        expected = expected.trace().astype(float)
    
    D_obs = np.sum([(np.sqrt(observed)-np.sqrt(e))**2 for e in expected], 1)
    D_sim = np.sum([(np.sqrt(s)-np.sqrt(e))**2 for s,e in zip(simulated, expected)], 1)
    
    # Print p-value
    count = sum(s>o for o,s in zip(D_obs,D_sim))
    print_('Bayesian p-value: p=%.3f' % (1.*count/len(D_obs)))
    
    return D_obs, D_sim
Exemplo n.º 17
0
def get_num_diffs(cummulativeFrames):
  shape = cummulativeFrames.shape
  xMax = shape[0]
  yMax = shape[1]

  numBlocks = (xMax * yMax / PIXINCR)
  print numBlocks
  totalDiffs = np.sum(cummulativeFrames)
  totalDiffsAvg = totalDiffs / numBlocks
  print totalDiffsAvg

  xPos = 0 
  yPos = 0

  allDiffs = []

  #print "Starting diffs.."
  diff_count = 0
  while yPos < yMax:
    xPos = 0
    while xPos < xMax:
      chunk = cummulativeFrames[xPos:xPos+PIXINCR, yPos:yPos+PIXINCR]
      diffs = np.sum(chunk)
      diffs=1
      allDiffs.append(diffs)
      xPos+=PIXINCR

    yPos+=PIXINCR
   
    for diff in allDiffs: 
      if diff > totalDiffsAvg:
        diff_count+=1
  return diff_count
Exemplo n.º 18
0
    def smooth_objective(self, x, mode='both', check_feasibility=False):
        """
        Evaluate a smooth function and/or its gradient

        if mode == 'both', return both function value and gradient
        if mode == 'grad', return only the gradient
        if mode == 'func', return only the function value
        """
        x = self.apply_offset(x)
        exp_x = np.exp(x)

        #TODO: Using transposes to scale the rows of a 2d array - should we use an affine_transform to do this?
        #JT: should be able to do this with np.newaxis

        if mode == 'both':
            ratio = ((self.trials/(1. + np.sum(exp_x, axis=1))) * exp_x.T).T
            f, g = -2. * self.scale(np.sum(self.firstcounts * x) -  np.dot(self.trials, np.log(1. + np.sum(exp_x, axis=1)))), - 2 * self.scale(self.firstcounts - ratio) 
            return f, g
        elif mode == 'grad':
            ratio = ((self.trials/(1. + np.sum(exp_x, axis=1))) * exp_x.T).T
            f, g = None, - 2 * self.scale(self.firstcounts - ratio) 
            return g
        elif mode == 'func':
            f, g = -2. * self.scale(np.sum(self.firstcounts * x) -  np.dot(self.trials, np.log(1. + np.sum(exp_x, axis=1)))), None
            return f
        else:
            raise ValueError("mode incorrectly specified")
 def __init__(self, data, classes, tree_features, n_trees=100):
     self.n_features = np.shape(data)[1]
     n_rows = np.shape(data)[0]
     n_nans = np.sum(np.isnan(data), 0)
     data = data[:, n_nans < n_rows]
     self.n_features = np.shape(data)[1]
     
     n_nans = np.sum(np.isnan(data), 1)
     data = data[n_nans < self.n_features, :]
     self.n_rows = np.shape(data)[0]
     
     if (tree_features > self.n_features):
         tree_features = self.n_features
     
     self.col_list = np.zeros((n_trees, tree_features), dtype='int')
     self.n_trees = n_trees
     self.bags = []
     for i in range(n_trees):
         cols = sample(range(self.n_features), tree_features)
         cols.sort()
         self.col_list[i, :] = cols
         data_temp = data[:, cols]
         n_nans = np.sum(np.isnan(data_temp), 1)
         data_temp = data_temp[n_nans == 0, :]
         classes_temp = classes[n_nans == 0]
         #bag = BaggingClassifier(n_estimators=1, max_features=tree_features)
         bag = RandomForestClassifier(n_estimators=1, max_features=tree_features)
         bag.fit(data_temp, classes_temp)
         self.bags.append(bag)
         print(np.shape(data_temp))
    def __update_b_vec(self,cur_obs):
        # convert measurement vector into emission probabilities
        # repeat the observation in columns
        cur_obs_mat = np.tile(cur_obs,(self.V_mat.shape[1],1)).T
        masked_mat = cur_obs_mat == self.V_mat

        # Extract the probability of the observation on each link for each state
        p_obs_given_off_link = np.sum(self.off_links*masked_mat,axis=1)
        p_obs_given_on_link  = np.sum(self.on_links*masked_mat,axis=1)

        # replicate the probability of each measurement on each link for each state
        p_obs_mat_off = np.tile(p_obs_given_off_link,(self.num_states,1)).T
        p_obs_mat_on  = np.tile(p_obs_given_on_link,(self.num_states,1)).T

        # Compute emission probabilities
        tmp1 = self.codewords*p_obs_mat_on
        tmp2 = np.logical_not(self.codewords)*p_obs_mat_off
        tmp3 = tmp1 + tmp2
        
        # divide tmp3 into groups of 4.  Multiply and normalize
        prev = np.ones(self.num_states)
        start_mark = 0
        end_mark = 4
        group = end_mark
        while start_mark < self.num_links:
            current = np.product(tmp3[start_mark:np.minimum(self.num_links,end_mark),:],axis=0)
            current = current/np.sum(current)
            prev = (prev*current)/np.sum(prev*current)
            end_mark += group
            start_mark += group

        # add emission probabilities to the circular buffer
        self.C.add_observation(prev)        
 def __set_static_gaus_pmfs(self):
     if np.logical_not(self.off_buff.is_full()):
         print "The long term buffer is not yet full.  This may give undesirable results"
     
     # median RSS of off-state buffer
     cal_med = self.off_buff.get_no_nan_median()
     
     if (np.sum(cal_med == 127) > 0) | (np.sum(np.isnan(cal_med)) > 0):
         sys.stderr.write('At least one link has a median of 127 or is nan\n\n')
         quit()
          
     if (np.sum(np.isnan(self.off_buff.get_nanvar())) > 0):
         sys.stderr.write('the long term buffer has a nan')
         quit()
     
     cal_med_mat = np.tile(cal_med,(self.V_mat.shape[1],1)).T
     
     # variance of RSS during calibration
     cal_var = np.maximum(self.off_buff.get_nanvar(),self.omega) #3.0 
     cal_var_mat = np.tile(cal_var,(self.V_mat.shape[1],1)).T
     
     # Compute the off_link emission probabilities for each link
     x = np.exp(- (self.V_mat - cal_med_mat)**2/(2*cal_var_mat/1.0)) # 1.0
     self.off_links = self.__normalize_pmf(x)
     
     # Compute the on_link emission probabilities for each link
     x = np.exp(- (self.V_mat - (cal_med_mat-self.Delta))**2/(self.eta*2*cal_var_mat)) # 3
     self.on_links = self.__normalize_pmf(x) 
Exemplo n.º 22
0
def findrotation_kabsch(coords1, coords2, align_com=True):
    '''
    Kabsch, Wolfgang, (1976) "A solution of the best rotation to relate two sets of vectors", Acta Crystallographica 32:922
    '''
    
    # check if arrays are of same size
    if(coords1.size != coords2.size):
        raise BaseException("dimension of arrays does not match")
    
    # reshape the arrays
    x1 = coords1.reshape([-1,3])
    x2 = coords2.reshape([-1,3])
    
    # determine number of atoms
    natoms = x1.shape[0]
    
    # set both com to zero
    if(align_com):
        com1 = np.sum(x1,axis=0) / float(natoms)
        com2 = np.sum(x2,axis=0) / float(natoms)
        x1 -= com1
        x2 -= com2
  
    # calculate covariance matrix
    A = np.dot( x2.transpose(), x1)
    # and do single value decomposition
    u, s, v = np.linalg.svd(A)
 
    if np.linalg.det(u) * np.linalg.det(v) + 1.0 < 1e-8:
        s[-1] = -s[-1]
        u[:,-1] = -u[:,-1]
 
    return  np.dot(u, v).transpose()
Exemplo n.º 23
0
    def compute_size(fonts, layout):
        """Compute width + height for the layout + line boxes + text boxes."""
        for line in layout.children:
            for box in line.children:
                if isinstance(box, TextBox):
                    font = fonts.font(box.style)
                    box.width = font.width(box.text)

                    # Box baseline is the relative offset from the line baseline in canvas coordinates
                    alignment_baseline = box.style["alignment-baseline"]
                    if alignment_baseline == "alphabetic":
                        box.baseline = 0
                    elif alignment_baseline == "central":
                        box.baseline = font.ascent * 0.5
                    elif alignment_baseline == "hanging":
                        box.baseline = font.ascent
                    elif alignment_baseline == "middle":
                        box.baseline = font.ascent * 0.35
                    else:
                        raise ValueError("Unknown alignment-baseline value: %s" % alignment_baseline)

                    # Box top is the relative offset from the line baseline in canvas coordinates
                    box.top = box.baseline - font.ascent
                    # Box bottom is the relative offset from the line baseline in canvas coordinates
                    box.bottom = box.baseline - font.descent

                    box.height = box.bottom - box.top
                elif isinstance(box, MarkerBox):
                    font = fonts.font(box.style)

                    box.baseline = 0
                    box.top = box.baseline - font.ascent
                    box.bottom = box.baseline - font.descent
                    box.height = box.bottom - box.top
                    box.width = box.height
                elif isinstance(box, (PushHyperlink, PopHyperlink)):
                    box.baseline = 0
                    box.top = box.baseline
                    box.bottom = box.baseline
                    box.height = box.bottom - box.top
                    box.width = 0
                else:
                    raise Exception("Unexpected box type: %s" % box) # pragma: no cover

            # Line top is the relative offset from the line baseline in canvas coordinates
            line.top = numpy.min([child.top for child in line.children]) if line.children else 0
            # Line bottom is the relative offset from the line baseline in canvas coordinates
            line.bottom = numpy.max([child.bottom for child in line.children]) if line.children else 0

            actual_line_height = line.bottom - line.top
            explicit_line_height = line.style["line-height"]
            offset = (explicit_line_height - actual_line_height) * 0.5
            if offset > 0:
                line.top -= offset
                line.bottom += offset

            line.width = numpy.sum([child.width for child in line.children]) if line.children else 0
            line.height = line.bottom - line.top

        layout.height = numpy.sum([line.height for line in layout.children]) if layout.children else 0
Exemplo n.º 24
0
    def fitBrokenToStrips(self):

        """Fits broken-line to strip data"""

        # convenience views to avoid typos
        bStrip = (self.stripCounts > 10)
        if np.sum(bStrip) < 1:
            return

        x = self.stripFeHs[bStrip]
        y = self.stripMedns[bStrip]  # could come back to this later

        # guess set of parameters
        #guess = [-0.4, -40., -240., 40.]
        guess = [-0.3, 0.0, -250., 0.0]

        # Yes this could all be looped through...

        meritStraight = lambda pars, x, y: oneStraight(x, pars) - y
        meritBroken = lambda pars, x, y: twoStraight(x, pars) - y 

        # do the fitting 
        self.parsStraight, self.successStraight = \
            leastsq(meritStraight,guess[0:2] ,args=(x,y))

        self.parsBroken, self.successBroken = \
            leastsq(meritBroken,guess[:] ,args=(x,y))

        # compute the sum of residuals, over the strips (not the
        # stars)
        self.chisqStraight = np.sum(meritStraight(self.parsStraight, x, y)**2)
        self.chisqBroken = np.sum(meritBroken(self.parsBroken, x, y)**2)
Exemplo n.º 25
0
def decider(pp_matrix, ML_dec, t, R_lim, num_annotators,C):

    pp_matrix2 = np.hstack((pp_matrix, ML_dec.reshape((C,1)))) #concatenate transpose of ML_decision to pp_matrix
    v = np.sum(pp_matrix2, axis=1)/np.sum(np.sum(pp_matrix2)) #create vector of normalized sums of pp_matrix2
    maximum = np.amax(v) #initialize maximum, max value of v
    maxIdx = np.argmax(v) #initialize maxIdx, index of max value of v

    if maximum >= t[maxIdx]: #if maximum is above threshold for given class, retire image

        decision = 1
        print('Image is retired')

    elif num_annotators >= R_lim: #if more than R_lim annotators have looked at image and no decision reached, pass to more experience users

        decision = 2
        print('Image is given to the upper class')

    else: #if fewer than R_lim annotators have looked at image, keep image

        decision = 3
        print('More labels are needed for the image')

    image_class = maxIdx

    return decision, image_class
Exemplo n.º 26
0
def std(f):
    x = np.array(range(len(f)))
    # normalize; we do not prefer attributes with many values
    x = x / x.mean()
    xf = np.multiply(f, x)
    x2f = np.multiply(f, np.power(x, 2))
    return np.sqrt((np.sum(x2f) - np.power(np.sum(xf), 2) / np.sum(f)) / (np.sum(f) - 1))
 def _LMLgrad_beta(self, hyperparams):
     LATildeInfTP = numpy.dot(self._LAtildeInv.T, self._P)
     gBeta = .5 * (self.d * (numpy.trace(self._C) + (self.n - self.m) * self._beta_inv - self._psi_0)
                   - self.TrYY + self._TrPP
                   + self._beta_inv ** 2 * self.d * numpy.sum(self._LAtildeInv * self._LAtildeInv)
                   + self._beta_inv * numpy.sum(LATildeInfTP ** 2))
     return -gBeta  # negative because gradient is w.r.t loglikelihood
Exemplo n.º 28
0
def cond_entropy(period, data, p_bins=10, m_bins=5):
	'''
	Compute the conditional entropy for the 
	normalized observations
	'''
	if period <= 0:
		return np.PINF
	r = rephase(data, period)
	bins, *_ = np.histogram2d(r[:,0], r[:,1], [p_bins, m_bins],
								[[0,1], [0,1]])
	size = r.shape[0]
	if size > 0:
		divided_bins = bins / size
		arg_positive = divided_bins > 0
		column_sums = np.sum(divided_bins, axis=1)
		column_sums = np.repeat(np.reshape(column_sums,
								(p_bins,1)), m_bins, axis=1)
		
		select_divided_bins = divided_bins[arg_positive]
		select_column_sums  = column_sums[arg_positive]

		A = np.empty((p_bins, m_bins), dtype=float)
		A[ arg_positive] = select_divided_bins \
                         * np.log(select_column_sums \
                         / select_divided_bins)
		A[~arg_positive] = 0

		return np.sum(A)
	else:
		return np.PINF
Exemplo n.º 29
0
    def _beam_map_single(self, bl_index, f_index):

        p_stokes = [ 0.5 * np.array([[1.0,   0.0], [0.0,  1.0]]),
                     0.5 * np.array([[1.0,   0.0], [0.0, -1.0]]),
                     0.5 * np.array([[0.0,   1.0], [1.0,  0.0]]),
                     0.5 * np.array([[0.0, -1.0J], [1.0J, 0.0]]) ]

        # Get beam maps for each feed.
        feedi, feedj = self.uniquepairs[bl_index]
        beami, beamj = self.beam(feedi, f_index), self.beam(feedj, f_index)

        # Get baseline separation and fringe map.
        uv = self.baselines[bl_index] / self.wavelengths[f_index]
        fringe = visibility.fringe(self._angpos, self.zenith, uv)

        pow_stokes = [ np.sum(beami * np.dot(beamj.conjugate(), polproj), axis=1) * self._horizon for polproj in p_stokes]

        # Calculate the solid angle of each beam
        pxarea = (4*np.pi / beami.shape[0])

        om_i = np.sum(np.abs(beami)**2 * self._horizon[:, np.newaxis]) * pxarea
        om_j = np.sum(np.abs(beamj)**2 * self._horizon[:, np.newaxis]) * pxarea

        omega_A = (om_i * om_j)**0.5

        # Calculate the complex visibility transfer function
        cv_stokes = [ p * (2 * fringe / omega_A) for p in pow_stokes ]

        return cv_stokes
Exemplo n.º 30
0
	def pick_parents(self):

		# compute fitness
		w = self.calc_fitness()
		# sample parents proportional to their fitness
		males = np.array(np.nonzero(self.sex == 0)).flatten()
		females = np.array(np.nonzero(self.sex == 1)).flatten()
		w_male = w[males]/np.sum(w[males])
		w_female = w[females]/np.sum(w[females])
		males = np.random.choice(males, size = self.size/2, p = w_male)
		females = np.random.choice(females, size = self.size/2, p = w_female)
		assert(len(males) == len(females))

		idx = []
		sexes = []
		for i in range(0, len(males)):
			idx.append(males[i])
			idx.append(females[i])
			sexes.append(0)
			sexes.append(1)

		# make a copy of current population
		newpop = copy.deepcopy(self)
		newpop.sex = np.array(sexes)

		# now copy chromosomes from current population to new one
		for j in range(0, len(self.chrlen)):
			for i in range(0, self.size):
				newpop.tr[i] = self.tr[ idx[i] ]
				newpop.chroms[j][2*i] = self.chroms[j][ 2*idx[i] ]
				newpop.chroms[j][2*i+1] = self.chroms[j][ 2*idx[i]+1 ]

		return newpop
Exemplo n.º 31
0
    def GetPascalVOCMetrics(
            self,
            boundingboxes,
            IOUThreshold=0.5,
            method=MethodAveragePrecision.EveryPointInterpolation):
        """Get the metrics used by the VOC Pascal 2012 challenge.
        Get
        Args:
            boundingboxes: Object of the class BoundingBoxes representing ground truth and detected
            bounding boxes;
            IOUThreshold: IOU threshold indicating which detections will be considered TP or FP
            (default value = 0.5);
            method (default = EveryPointInterpolation): It can be calculated as the implementation
            in the official PASCAL VOC toolkit (EveryPointInterpolation), or applying the 11-point
            interpolatio as described in the paper "The PASCAL Visual Object Classes(VOC) Challenge"
            or EveryPointInterpolation"  (ElevenPointInterpolation);
        Returns:
            A list of dictionaries. Each dictionary contains information and metrics of each class.
            The keys of each dictionary are:
            dict['class']: class representing the current dictionary;
            dict['precision']: array with the precision values;
            dict['recall']: array with the recall values;
            dict['AP']: average precision;
            dict['interpolated precision']: interpolated precision values;
            dict['interpolated recall']: interpolated recall values;
            dict['total positives']: total number of ground truth positives;
            dict['total TP']: total number of True Positive detections;
            dict['total FP']: total number of False Negative detections;
        """
        ret = [
        ]  # list containing metrics (precision, recall, average precision) of each class
        # List with all ground truths (Ex: [imageName,class,confidence=1, (bb coordinates XYX2Y2)])
        groundTruths = []
        # List with all detections (Ex: [imageName,class,confidence,(bb coordinates XYX2Y2)])
        detections = []
        # Get all classes
        classes = []

        # Loop through all bounding boxes and separate them into GTs and detections
        # print(f'bb {len(boundingboxes.getBoundingBoxes())}')
        for bb in boundingboxes.getBoundingBoxes():

            # [imageName, class, confidence, (bb coordinates XYX2Y2)]
            if bb.getBBType() == BBType.GroundTruth:

                groundTruths.append([
                    bb.getImageName(),
                    bb.getClassId(), 1,
                    bb.getAbsoluteBoundingBox(BBFormat.XYX2Y2)
                ])
            else:
                detections.append([
                    bb.getImageName(),
                    bb.getClassId(),
                    bb.getConfidence(),
                    bb.getAbsoluteBoundingBox(BBFormat.XYX2Y2)
                ])
            # get class

            if bb.getClassId() not in classes:
                classes.append(bb.getClassId())

        classes = sorted(classes)
        print(f'gts {groundTruths}')
        print(f'predict {detections}')

        # Precision x Recall is obtained individually by each class
        # Loop through by classes
        for c in classes:
            # Get only detection of class c

            dects = []
            [dects.append(d) for d in detections if d[1] == c]
            # Get only ground truths of class c
            gts = []
            [gts.append(g) for g in groundTruths if g[1] == c]
            npos = len(gts)
            # sort detections by decreasing confidence
            dects = sorted(dects, key=lambda conf: conf[2], reverse=True)
            print(f'sorted detections {dects}')
            TP = np.zeros(len(dects))
            FP = np.zeros(len(dects))
            # create dictionary with amount of gts for each image
            det = Counter([cc[0] for cc in gts])

            # print (f'det {det}')

            for key, val in det.items():
                det[key] = np.zeros(val)
            # print(det['00003'])
            # print("Evaluating class: %s (%d detections)" % (str(c), len(dects)))
            # Loop through detections
            print(len(dects))
            for d in range(len(dects)):
                # print('dect %s => %s' % (dects[d][0], dects[d][3],))
                # Find ground truth image
                # for gt in gts:
                #     if gt[0] == dects[d][0]:
                #         print (gt)

                gt = [gt for gt in gts if gt[0] == dects[d][0]]

                print(f'checked_gt {gt}')
                iouMax = sys.float_info.min
                print(f'ioumax {iouMax}')
                for j in range(len(gt)):
                    # print('Ground truth gt => %s' % (gt[j][3],))
                    print(f'iou {dects[d][3], gt[j][3]}')
                    iou = Evaluator.iou(dects[d][3], gt[j][3])
                    if iou > iouMax:
                        iouMax = iou
                        jmax = j
                # Assign detection as true positive/don't care/false positive
                if iouMax >= IOUThreshold:
                    print(det[dects[d][0]])
                    if det[dects[d][0]][jmax] == 0:
                        TP[d] = 1  # count as true positive
                        det[dects[d][0]][jmax] = 1  # flag as already 'seen'
                        # print("TP")
                    else:
                        FP[d] = 1  # count as false positive
                        # print("FP")
                # - A detected "cat" is overlaped with a GT "cat" with IOU >= IOUThreshold.
                else:
                    FP[d] = 1  # count as false positive
                    # print("FP")
            print(f'true pos {TP}')
            print(f'flase pos {FP}')
            # compute precision, recall and average precision
            acc_FP = np.cumsum(FP)
            acc_TP = np.cumsum(TP)
            rec = acc_TP / npos
            prec = np.divide(acc_TP, (acc_FP + acc_TP))
            # Depending on the method, call the right implementation
            if method == MethodAveragePrecision.EveryPointInterpolation:
                [ap, mpre, mrec,
                 ii] = Evaluator.CalculateAveragePrecision(rec, prec)
            else:
                [ap, mpre, mrec,
                 _] = Evaluator.ElevenPointInterpolatedAP(rec, prec)
            # add class result in the dictionary to be returned
            r = {
                'class': c,
                'precision': prec,
                'recall': rec,
                'AP': ap,
                'interpolated precision': mpre,
                'interpolated recall': mrec,
                'total positives': npos,
                'total TP': np.sum(TP),
                'total FP': np.sum(FP)
            }
            ret.append(r)
        return ret
Exemplo n.º 32
0
def surf_semisupervised(
        vol_names,
        atlas_vol,
        atlas_seg,
        nb_surface_pts,
        labels=None,
        batch_size=1,
        surf_bidir=True,
        surface_pts_upsample_factor=2,
        smooth_seg_std=1,
        nb_labels_sample=None,
        sdt_vol_resize=1,
        align_segs=False,
        add_feat_axis=True
    ):
    """
    Scan-to-atlas generator for semi-supervised learning using surface point clouds from segmentations.

    Parameters:
        vol_names: List of volume files to load.
        atlas_vol: Atlas volume array.
        atlas_seg: Atlas segmentation array.
        nb_surface_pts: Total number surface points for all structures.
        labels: Label list to include. If None, all labels in atlas_seg are used. Default is None.
        batch_size: Batch size. NOTE some features only implemented for 1. Default is 1.
        surf_bidir: Train with bidirectional surface distance. Default is True.
        surface_pts_upsample_factor: Upsample factor for surface pointcloud. Default is 2.
        smooth_seg_std: Segmentation smoothness sigma. Default is 1.
        nb_labels_sample: Number of labels to sample. Default is None.
        sdt_vol_resize: Resize factor for signed distance transform volumes. Default is 1.
        align_segs: Whether to pass in segmentation image instead. Default is False.
        add_feat_axis: Load volume arrays with added feature axis. Default is True.
    """

    # some input checks
    assert nb_surface_pts > 0, 'number of surface point should be greater than 0'

    # prepare some shapes
    vol_shape = atlas_seg.shape
    sdt_shape = [int(f * sdt_vol_resize) for f in vol_shape]

    # compute labels from atlas, and the number of labels to sample.
    if labels is not None:
        atlas_seg = voxelmorph.py.utils.filter_labels(atlas_seg, labels)
    else:
        labels = np.sort(np.unique(atlas_seg))[1:]

    # use all labels by default
    if nb_labels_sample is None:
        nb_labels_sample = len(labels)

    # prepare keras format atlases
    atlas_vol_bs = np.repeat(atlas_vol[np.newaxis, ..., np.newaxis], batch_size, axis=0)
    atlas_seg_bs = np.repeat(atlas_seg[np.newaxis, ..., np.newaxis], batch_size, axis=0)

    # prepare surface extraction function
    std_to_surf = lambda x, y: voxelmorph.py.utils.sdt_to_surface_pts(x, y, surface_pts_upsample_factor=surface_pts_upsample_factor, thr=(1 / surface_pts_upsample_factor + 1e-5))
    
    # prepare zeros, which will be used for outputs unused in cost functions
    zero_flow = np.zeros((batch_size, *vol_shape, len(vol_shape)))
    zero_surface_values = np.zeros((batch_size, nb_surface_pts, 1))

    # precompute label edge volumes
    atlas_sdt = [None] * len(labels) 
    atlas_label_vols = [None] * len(labels) 
    nb_edges = np.zeros(len(labels))
    for li, label in enumerate(labels):  # if only one label, get surface points here
        atlas_label_vols[li] = atlas_seg == label
        atlas_label_vols[li] = voxelmorph.py.utils.clean_seg(atlas_label_vols[li], smooth_seg_std)
        atlas_sdt[li] = voxelmorph.py.utils.vol_to_sdt(atlas_label_vols[li], sdt=True, sdt_vol_resize=sdt_vol_resize)
        nb_edges[li] = np.sum(np.abs(atlas_sdt[li]) < 1.01)
    layer_edge_ratios = nb_edges / np.sum(nb_edges)

    # if working with all the labels passed in (i.e. no label sampling per batch), 
    # pre-compute the atlas surface points
    atlas_surface_pts = np.zeros((batch_size, nb_surface_pts, len(vol_shape) + 1))
    if nb_labels_sample == len(labels):
        nb_surface_pts_sel = voxelmorph.py.utils.get_surface_pts_per_label(nb_surface_pts, layer_edge_ratios)
        for li, label in enumerate(labels):  # if only one label, get surface points here
            atlas_surface_pts_ = std_to_surf(atlas_sdt[li], nb_surface_pts_sel[li])[np.newaxis, ...]
            # get the surface point stack indexes for this element
            srf_idx = slice(int(np.sum(nb_surface_pts_sel[:li])), int(np.sum(nb_surface_pts_sel[:li + 1])))
            atlas_surface_pts[:, srf_idx, :-1] = np.repeat(atlas_surface_pts_, batch_size, 0)
            atlas_surface_pts[:, srf_idx,  -1] = li

    # generator
    gen = volgen(vol_names, return_segs=True, batch_size=batch_size, add_feat_axis=add_feat_axis)
    
    assert batch_size == 1, 'only batch size 1 supported for now'

    while True:

        # prepare data
        X = next(gen)
        X_img = X[0]
        X_seg = voxelmorph.py.utils.filter_labels(X[1], labels)

        # get random labels
        sel_label_idxs = range(len(labels))  # all labels
        if nb_labels_sample != len(labels):
            sel_label_idxs = np.sort(np.random.choice(range(len(labels)), size=nb_labels_sample, replace=False))
            sel_layer_edge_ratios = [layer_edge_ratios[li] for li in sel_label_idxs]
            nb_surface_pts_sel = voxelmorph.py.utils.get_surface_pts_per_label(nb_surface_pts, sel_layer_edge_ratios)
                
        # prepare signed distance transforms and surface point arrays
        X_sdt_k = np.zeros((batch_size, *sdt_shape, nb_labels_sample))
        atl_dt_k = np.zeros((batch_size, *sdt_shape, nb_labels_sample))
        subj_surface_pts = np.zeros((batch_size, nb_surface_pts, len(vol_shape) + 1))
        if nb_labels_sample != len(labels):
            atlas_surface_pts = np.zeros((batch_size, nb_surface_pts, len(vol_shape) + 1))

        for li, sli in enumerate(sel_label_idxs):
            # get the surface point stack indexes for this element
            srf_idx = slice(int(np.sum(nb_surface_pts_sel[:li])), int(np.sum(nb_surface_pts_sel[:li+1])))

            # get atlas surface points for this label
            if nb_labels_sample != len(labels):
                atlas_surface_pts_ = std_to_surf(atlas_sdt[sli], nb_surface_pts_sel[li])[np.newaxis, ...]
                atlas_surface_pts[:, srf_idx, :-1] = np.repeat(atlas_surface_pts_, batch_size, 0)
                atlas_surface_pts[:, srf_idx,  -1] = sli

            # compute X distance from surface
            X_label = X_seg == labels[sli]
            X_label = voxelmorph.py.utils.clean_seg_batch(X_label, smooth_seg_std)
            X_sdt_k[..., li] = voxelmorph.py.utils.vol_to_sdt_batch(X_label, sdt=True, sdt_vol_resize=sdt_vol_resize)[..., 0]

            if surf_bidir:
                atl_dt = atlas_sdt[li][np.newaxis, ...]
                atl_dt_k[..., li] = np.repeat(atl_dt, batch_size, 0)
                ssp_lst = [std_to_surf(f[...], nb_surface_pts_sel[li]) for f in X_sdt_k[..., li]]
                subj_surface_pts[:, srf_idx, :-1] = np.stack(ssp_lst, 0)
                subj_surface_pts[:, srf_idx,  -1] = li

        # check if returning segmentations instead of images
        # this is a bit hacky for basically building a segmentation-only network (no images)
        X_ret = X_img
        atlas_ret = atlas_vol_bs

        if align_segs:
            assert len(labels) == 1, 'align_seg generator is only implemented for single label'
            X_ret = X_seg == labels[0]
            atlas_ret = atlas_seg_bs == labels[0]

        # finally, output
        if surf_bidir:
            inputs  = [X_ret, atlas_ret, X_sdt_k, atl_dt_k, subj_surface_pts, atlas_surface_pts]
            outputs = [atlas_ret, X_ret, zero_flow, zero_surface_values, zero_surface_values]
        else:
            inputs  = [X_ret, atlas_ret, X_sdt_k, atlas_surface_pts]
            outputs = [atlas_ret, X_ret, zero_flow, zero_surface_values]

        yield (inputs, outputs)
Exemplo n.º 33
0
        def refresh(i):  # i=3, a full epoch occurs every i=798/48
            if i % self.best_args['update_interval'] == 0:
                z = model.extract_feature(self.feature, args, None, train_iter,
                                          N, self.xpu).values()[0]

                p = np.zeros((z.shape[0], self.best_args['num_centers']))
                self.dec_op.forward([z, args['dec_mu'].asnumpy()], [p])
                # the soft assignments qi (pred)
                y_pred = p.argmax(axis=1)
                print np.std(np.bincount(y_pred)), np.bincount(y_pred)

                # use a y that only considers the filledbyBC examples
                # compare soft assignments with known labels
                print '\n... Updating i = %f' % i
                print np.std(
                    np.bincount(self.best_args['y_dec'].astype(
                        np.int))), np.bincount(self.best_args['y_dec'].astype(
                            np.int))

                # do for overall class B and M
                Z_embedding_tree = sklearn.neighbors.BallTree(z, leaf_size=5)
                # This finds the indices of 5 closest neighbors
                labels = np.asarray(self.best_args['roi_labels'])
                Neg = sum(labels == np.unique(labels)[0])  #for B
                Pos = sum(labels == np.unique(labels)[2])  #for M
                TP = []
                TN = []
                for k in range(z.shape[0]):
                    iclass = labels[k]
                    dist, ind = Z_embedding_tree.query([z[k]], k=6)
                    dist5nn, ind5nn = dist[k != ind], ind[k != ind]
                    class5nn = labels[ind5nn]
                    # exlcude U class
                    class5nn = class5nn[class5nn != 'K']
                    if (len(class5nn) > 0):
                        predc = []
                        for c in np.unique(class5nn):
                            predc.append(sum(class5nn == c))
                        # predicion based on majority
                        predclass = np.unique(class5nn)[predc == max(predc)]

                        if (len(predclass) == 1):
                            # compute TP if M
                            if (iclass == 'M'):
                                TP.append(predclass[0] == iclass)
                            # compute TN if B
                            if (iclass == 'B'):
                                TN.append(predclass[0] == iclass)

                        if (len(predclass) == 2):
                            # compute TP if M
                            if (iclass == 'M'):
                                TP.append(predclass[1] == iclass)
                            # compute TN if B
                            if (iclass == 'B'):
                                TN.append(predclass[0] == iclass)

                # compute TPR and TNR
                TPR = sum(TP) / float(Pos)
                TNR = sum(TN) / float(Neg)
                Acc = sum(TP + TN) / float(Pos + Neg)
                print "True Posite Rate (TPR) = %f " % TPR
                print "True Negative Rate (TNR) = %f " % TNR
                print "Accuracy (Acc) = %f " % Acc

                # save best args
                self.best_args['acci'].append(Acc)
                if (Acc >= self.maxAcc):
                    print 'Improving maxAcc = {}'.format(Acc)
                    for key, v in args.items():
                        self.best_args[key] = args[key]

                    self.maxAcc = Acc
                    self.best_args['bestacci'].append(Acc)

                if (i % self.best_args['plot_interval'] == 0
                        and self.ploti <= 15):
                    # Visualize the progression of the embedded representation in a subsample of data
                    # For visualization we use t-SNE (van der Maaten & Hinton, 2008) applied to the embedded points zi. It
                    tsne = TSNE(n_components=2,
                                perplexity=self.perplexity,
                                learning_rate=self.learning_rate,
                                init='pca',
                                random_state=0,
                                verbose=2,
                                method='exact')
                    Z_tsne = tsne.fit_transform(z)

                    ax = fig.add_subplot(4, 4, 1 + self.ploti)
                    plot_embedding_unsuper_NMEdist_intenh(
                        Z_tsne,
                        named_y,
                        ax,
                        title="Epoch %d z_tsne iter (%d)" % (self.ploti, i),
                        legend=False)
                    self.ploti = self.ploti + 1

                ## COMPUTING target distributions P
                ## we compute pi by first raising qi to the second power and then normalizing by frequency per cluster:
                weight = 1.0 / p.sum(axis=0)  # p.sum provides fj
                weight *= self.best_args['num_centers'] / weight.sum()
                p = (p**2) * weight
                train_iter.data_list[1][:] = (p.T / p.sum(axis=1)).T
                print np.sum(y_pred != self.best_args['y_pred']
                             ), 0.001 * y_pred.shape[0]

                # For the purpose of discovering cluster assignments, we stop our procedure when less than tol% of points change cluster assignment between two consecutive iterations.
                # tol% = 0.001
                if i == self.best_args[
                        'update_interval'] * 200:  # performs 1epoch = 615/3 = 205*1000epochs
                    self.best_args['y_pred'] = y_pred
                    self.best_args['p'] = p
                    self.best_args['z'] = z
                    self.best_args['acci'].append(Acc)
                    return True

                self.best_args['y_pred'] = y_pred
                self.best_args['p'] = p
                self.best_args['z'] = z
Exemplo n.º 34
0
 def ce(label, pred):
     return np.sum(label * np.log(label /
                                  (pred + 0.000001))) / label.shape[0]
Exemplo n.º 35
0
    def test_02_03_same_size(self):
        '''Align two images keeping sizes the same'''
        np.random.seed(0)
        shape = (50, 45)
        i, j = np.mgrid[0:shape[0], 0:shape[1]]
        for offset in ((1, 0), (0, 1), (1, 1), (3, 5), (-3, 5), (3, -5),
                       (-3, -5), (0, 5), (3, 0), (0, 0)):
            for mask1 in (None, np.random.uniform(size=shape) > .1):
                for mask2 in (None, np.random.uniform(size=shape) > .1):
                    for method in (A.M_MUTUAL_INFORMATION,
                                   A.M_CROSS_CORRELATION):
                        if method == A.M_CROSS_CORRELATION and (
                            (mask1 is not None) or (mask2 is not None)):
                            continue
                        image1 = np.random.randint(
                            0, 10, size=shape).astype(float) / 10.0
                        image1[np.sqrt(((i - shape[0] / 2)**2 +
                                        (j - shape[1] / 2)**2)) < 20] = .5
                        si1, si2 = self.slice_helper(offset[0],
                                                     image1.shape[0])
                        sj1, sj2 = self.slice_helper(offset[1],
                                                     image1.shape[1])
                        image2 = np.zeros(image1.shape)
                        if method == A.M_MUTUAL_INFORMATION:
                            image2[si2, sj2] = 1 - image1[si1, sj1]
                        else:
                            image2 = image1[(i + shape[0] - offset[0]) %
                                            shape[0],
                                            (j + shape[1] - offset[1]) %
                                            shape[1]]
                            image2 += (np.random.uniform(size=shape) -
                                       .5) * .1 * np.std(image2)
                        if mask1 is not None:
                            image1[~mask1] = np.random.uniform(
                                size=np.sum(~mask1))
                        if mask2 is not None:
                            image2[~mask2] = np.random.uniform(
                                size=np.sum(~mask2))
                        workspace, module = self.make_workspace(
                            (image1, image2), (mask1, mask2))
                        self.assertTrue(isinstance(module, A.Align))
                        module.alignment_method.value = method
                        module.crop_mode.value = A.C_SAME_SIZE
                        module.run(workspace)
                        output = workspace.image_set.get_image('Aligned0')
                        m = workspace.measurements
                        self.assertTrue(isinstance(m, cpmeas.Measurements))
                        off_i0 = -m.get_current_image_measurement(
                            'Align_Yshift_Aligned0')
                        off_j0 = -m.get_current_image_measurement(
                            'Align_Xshift_Aligned0')
                        off_i1 = -m.get_current_image_measurement(
                            'Align_Yshift_Aligned1')
                        off_j1 = -m.get_current_image_measurement(
                            'Align_Xshift_Aligned1')

                        self.assertEqual(off_i0 - off_i1, offset[0])
                        self.assertEqual(off_j0 - off_j1, offset[1])

                        si_in, si_out = self.slice_same(off_i0, shape[0])
                        sj_in, sj_out = self.slice_same(off_j0, shape[1])
                        np.testing.assert_almost_equal(
                            image1[si_in, sj_in], output.pixel_data[si_out,
                                                                    sj_out])
                        if mask1 is not None:
                            self.assertTrue(
                                np.all(output.mask[si_out,
                                                   sj_out] == mask1[si_in,
                                                                    sj_in]))

                        temp = output.mask.copy()
                        temp[si_out, sj_out] = False
                        self.assertTrue(np.all(~temp))

                        output = workspace.image_set.get_image("Aligned1")
                        si_in, si_out = self.slice_same(off_i1, shape[0])
                        sj_in, sj_out = self.slice_same(off_j1, shape[1])
                        np.testing.assert_almost_equal(
                            image2[si_in, sj_in], output.pixel_data[si_out,
                                                                    sj_out])
                        if mask2 is not None:
                            self.assertTrue(
                                np.all(mask2[si_in,
                                             sj_in] == output.mask[si_out,
                                                                   sj_out]))
                        temp = output.mask.copy()
                        temp[si_out, sj_out] = False
                        self.assertTrue(np.all(~temp))
Exemplo n.º 36
0
    def test_02_01_crop(self):
        '''Align two images and crop the result'''
        np.random.seed(0)
        shape = (50, 45)
        i, j = np.mgrid[0:shape[0], 0:shape[1]]
        for offset in ((3, 5), (-3, 5), (3, -5), (-3, -5), (0, 5), (3, 0),
                       (0, 0)):
            #
            # Do something to give the image some information over
            # the distance, 5,5
            #
            for mask1 in (None, np.random.uniform(size=shape) > .1):
                for mask2 in (None, np.random.uniform(size=shape) > .1):
                    for method in (A.M_MUTUAL_INFORMATION,
                                   A.M_CROSS_CORRELATION):
                        if method == A.M_CROSS_CORRELATION and (
                            (mask1 is not None) or (mask2 is not None)):
                            continue

                        image1 = np.random.randint(
                            0, 10, size=shape).astype(float) / 10.0
                        image1[np.sqrt(((i - shape[0] / 2)**2 +
                                        (j - shape[1] / 2)**2)) < 20] = .5
                        si1, si2 = self.slice_helper(offset[0],
                                                     image1.shape[0])
                        sj1, sj2 = self.slice_helper(offset[1],
                                                     image1.shape[1])
                        image2 = np.zeros(image1.shape)
                        if method == A.M_MUTUAL_INFORMATION:
                            image2[si2, sj2] = 1 - image1[si1, sj1]
                        else:
                            image2 = image1[(i + shape[0] - offset[0]) %
                                            shape[0],
                                            (j + shape[1] - offset[1]) %
                                            shape[1]]
                            image2 += (np.random.uniform(size=shape) -
                                       .5) * .1 * np.std(image2)
                        if mask1 is not None:
                            image1[~mask1] = np.random.uniform(
                                size=np.sum(~mask1))
                        if mask2 is not None:
                            image2[~mask2] = np.random.uniform(
                                size=np.sum(~mask2))
                        workspace, module = self.make_workspace(
                            (image1, image2), (mask1, mask2))
                        self.assertTrue(isinstance(module, A.Align))
                        module.alignment_method.value = method
                        module.crop_mode.value = A.C_CROP
                        module.run(workspace)
                        output = workspace.image_set.get_image('Aligned0')
                        m = workspace.measurements
                        self.assertTrue(isinstance(m, cpmeas.Measurements))
                        off_i0 = -m.get_current_image_measurement(
                            'Align_Yshift_Aligned0')
                        off_j0 = -m.get_current_image_measurement(
                            'Align_Xshift_Aligned0')
                        off_i1 = -m.get_current_image_measurement(
                            'Align_Yshift_Aligned1')
                        off_j1 = -m.get_current_image_measurement(
                            'Align_Xshift_Aligned1')

                        self.assertEqual(off_i0 - off_i1, offset[0])
                        self.assertEqual(off_j0 - off_j1, offset[1])
                        out_shape = output.pixel_data.shape
                        self.assertEqual(out_shape[0],
                                         shape[0] - abs(offset[0]))
                        self.assertEqual(out_shape[1],
                                         shape[1] - abs(offset[1]))
                        i_slice = self.single_slice_helper(
                            -off_i0, out_shape[0])
                        j_slice = self.single_slice_helper(
                            -off_j0, out_shape[1])
                        np.testing.assert_almost_equal(
                            image1[i_slice, j_slice], output.pixel_data)
                        if mask1 is not None:
                            self.assertTrue(
                                np.all(output.mask == mask1[i_slice, j_slice]))

                        if offset[0] == 0 and offset[1] == 0:
                            self.assertFalse(output.has_crop_mask)
                        else:
                            temp = output.crop_mask.copy()
                            self.assertEqual(tuple(temp.shape), shape)
                            self.assertTrue(np.all(temp[i_slice, j_slice]))
                            temp[i_slice, j_slice] = False
                            self.assertTrue(np.all(~temp))

                        output = workspace.image_set.get_image("Aligned1")
                        i_slice = self.single_slice_helper(
                            -off_i1, out_shape[0])
                        j_slice = self.single_slice_helper(
                            -off_j1, out_shape[1])

                        np.testing.assert_almost_equal(
                            image2[i_slice, j_slice], output.pixel_data)
                        if mask2 is not None:
                            self.assertTrue(
                                np.all(output.mask == mask2[i_slice, j_slice]))
                        if offset[0] == 0 and offset[1] == 0:
                            self.assertFalse(output.has_crop_mask)
                        else:
                            temp = output.crop_mask.copy()
                            self.assertEqual(tuple(temp.shape), shape)
                            self.assertTrue(np.all(temp[i_slice, j_slice]))
                            temp[i_slice, j_slice] = False
                            self.assertTrue(np.all(~temp))
Exemplo n.º 37
0
def _extract_template_features(
        mmcif_object: mmcif_parsing.MmcifObject, pdb_id: str,
        mapping: Mapping[int, int], template_sequence: str,
        query_sequence: str, template_chain_id: str,
        kalign_binary_path: str) -> Tuple[Dict[str, Any], Optional[str]]:
    """Parses atom positions in the target structure and aligns with the query.

  Atoms for each residue in the template structure are indexed to coincide
  with their corresponding residue in the query sequence, according to the
  alignment mapping provided.

  Args:
    mmcif_object: mmcif_parsing.MmcifObject representing the template.
    pdb_id: PDB code for the template.
    mapping: Dictionary mapping indices in the query sequence to indices in
      the template sequence.
    template_sequence: String describing the amino acid sequence for the
      template protein.
    query_sequence: String describing the amino acid sequence for the query
      protein.
    template_chain_id: String ID describing which chain in the structure proto
      should be used.
    kalign_binary_path: The path to a kalign executable used for template
        realignment.

  Returns:
    A tuple with:
    * A dictionary containing the extra features derived from the template
      protein structure.
    * A warning message if the hit was realigned to the actual mmCIF sequence.
      Otherwise None.

  Raises:
    NoChainsError: If the mmcif object doesn't contain any chains.
    SequenceNotInTemplateError: If the given chain id / sequence can't
      be found in the mmcif object.
    QueryToTemplateAlignError: If the actual template in the mmCIF file
      can't be aligned to the query.
    NoAtomDataInTemplateError: If the mmcif object doesn't contain
      atom positions.
    TemplateAtomMaskAllZerosError: If the mmcif object doesn't have any
      unmasked residues.
  """
    if mmcif_object is None or not mmcif_object.chain_to_seqres:
        raise NoChainsError('No chains in PDB: %s_%s' %
                            (pdb_id, template_chain_id))

    warning = None
    try:
        seqres, chain_id, mapping_offset = _find_template_in_pdb(
            template_chain_id=template_chain_id,
            template_sequence=template_sequence,
            mmcif_object=mmcif_object)
    except SequenceNotInTemplateError:
        # If PDB70 contains a different version of the template, we use the sequence
        # from the mmcif_object.
        chain_id = template_chain_id
        warning = (
            f'The exact sequence {template_sequence} was not found in '
            f'{pdb_id}_{chain_id}. Realigning the template to the actual sequence.'
        )
        logger.warning(warning)
        # This throws an exception if it fails to realign the hit.
        seqres, mapping = _realign_pdb_template_to_query(
            old_template_sequence=template_sequence,
            template_chain_id=template_chain_id,
            mmcif_object=mmcif_object,
            old_mapping=mapping,
            kalign_binary_path=kalign_binary_path)
        logger.info('Sequence in %s_%s: %s successfully realigned to %s',
                    pdb_id, chain_id, template_sequence, seqres)
        # The template sequence changed.
        template_sequence = seqres
        # No mapping offset, the query is aligned to the actual sequence.
        mapping_offset = 0

    try:
        # Essentially set to infinity - we don't want to reject templates unless
        # they're really really bad.
        all_atom_positions, all_atom_mask = _get_atom_positions(
            mmcif_object, chain_id, max_ca_ca_distance=150.0)
    except (CaDistanceError, KeyError) as ex:
        raise NoAtomDataInTemplateError('Could not get atom data (%s_%s): %s' %
                                        (pdb_id, chain_id, str(ex))) from ex

    all_atom_positions = np.split(all_atom_positions,
                                  all_atom_positions.shape[0])
    all_atom_masks = np.split(all_atom_mask, all_atom_mask.shape[0])

    output_templates_sequence = []
    templates_all_atom_positions = []
    templates_all_atom_masks = []

    for _ in query_sequence:
        # Residues in the query_sequence that are not in the template_sequence:
        templates_all_atom_positions.append(
            np.zeros((residue_constants.atom_type_num, 3)))
        templates_all_atom_masks.append(
            np.zeros(residue_constants.atom_type_num))
        output_templates_sequence.append('-')

    for k, v in mapping.items():
        template_index = v + mapping_offset
        templates_all_atom_positions[k] = all_atom_positions[template_index][0]
        templates_all_atom_masks[k] = all_atom_masks[template_index][0]
        output_templates_sequence[k] = template_sequence[v]

    # Alanine (AA with the lowest number of atoms) has 5 atoms (C, CA, CB, N, O).
    if np.sum(templates_all_atom_masks) < 5:
        raise TemplateAtomMaskAllZerosError(
            'Template all atom mask was all zeros: %s_%s. Residue range: %d-%d'
            % (pdb_id, chain_id, min(mapping.values()) + mapping_offset,
               max(mapping.values()) + mapping_offset))

    output_templates_sequence = ''.join(output_templates_sequence)

    templates_aatype = residue_constants.sequence_to_onehot(
        output_templates_sequence, residue_constants.HHBLITS_AA_TO_ID)

    return ({
        'template_all_atom_positions':
        np.array(templates_all_atom_positions),
        'template_all_atom_masks':
        np.array(templates_all_atom_masks),
        'template_sequence':
        output_templates_sequence.encode(),
        'template_aatype':
        np.array(templates_aatype),
        'template_domain_names':
        f'{pdb_id.lower()}_{chain_id}'.encode(),
    }, warning)
Exemplo n.º 38
0
def generate_h2_pT(h5py_file, LDSC_results_file, output_h2, PS, output_pT,
                   annotation_flag):
    # generate two types of prior files
    ### load the fixed input file ###
    h5f1 = h5py.File('ref/AnnotMatrix/baseline.h5', 'r')
    baseline = h5f1['annot'][:]
    h5f1.close()

    if annotation_flag == 'tier0':
        h5f1 = h5py.File('ref/AnnotMatrix/tier0.h5', 'r')
        tier = h5f1['annot'][:]
        h5f1.close()
    elif annotation_flag == 'tier1':
        h5f1 = h5py.File('ref/AnnotMatrix/tier1.h5', 'r')
        tier = h5f1['annot'][:]
        h5f1.close()
    elif annotation_flag == 'tier2':
        h5f1 = h5py.File('ref/AnnotMatrix/tier2.h5', 'r')
        tier = h5f1['annot'][:]
        h5f1.close()
    elif annotation_flag == 'tier3':
        h5f1 = h5py.File('ref/AnnotMatrix/tier3.h5', 'r')
        tier = h5f1['annot'][:]
        h5f1.close()
    else:
        exit("Illegal tier name!")

    annot = np.concatenate((baseline, tier), axis=1)
    h5f2 = h5py.File('ref/AnnotMatrix/1000G_SNP_info.h5', 'r')
    snp_chr = h5f2['snp_chr'][:]
    h5f2.close()
    ### get the snp list from h5py ###
    chromosomes_list = ['chrom_%d' % (x) for x in range(1, 23)]
    chromosomes_list.append('chrom_X')

    df = h5py.File(h5py_file, 'r')
    cord_data_g = df['cord_data']

    SNPids = []
    for chrom_str in chromosomes_list:
        if chrom_str in cord_data_g.keys():
            g = cord_data_g[chrom_str]
            #Filter monomorphic SNPs (SNPs with variance equal to 0)
            snp_stds = g['snp_stds_ref'][...]
            snp_stds = snp_stds.flatten()
            ok_snps_filter = snp_stds > 0
            pval_derived_betas = g['betas'][...]
            pval_derived_betas = pval_derived_betas[ok_snps_filter]
            sids = g['sids'][...]
            SNPids = np.append(SNPids, sids[ok_snps_filter])
    num_snps = len(SNPids)
    ### overlap with SNP in annot files ###
    stt1 = np.in1d(snp_chr[:, 2], SNPids)
    ant1 = annot[stt1]
    snp_chr1 = snp_chr[stt1]
    ### check order ###
    if sum(snp_chr1[:, 2] == SNPids) == len(SNPids):
        print 'Good!'
    else:
        print 'Shit happens, sorting ant1 to have the same order as SNPids'
        O1 = np.argsort(snp_chr1[:, 2])
        O2 = np.argsort(SNPids)
        O3 = np.argsort(O2)
        ant1 = ant1[O1][O3]

    ### load LDSC results ###
    LD_results = np.genfromtxt(LDSC_results_file, dtype=None, names=True)

    tau0 = LD_results['Coefficient']

    ### get heritability  ###
    sig2_0 = np.dot(ant1, tau0)

    ### adjust for minus terms ###
    sig2_0[sig2_0 < 0] = np.repeat(min(sig2_0[sig2_0 > 0]), np.sum(sig2_0 < 0))
    np.sum(sig2_0)

    ### save prior file (h2) ###
    h2_out = []
    for i in range(len(sig2_0)):
        h2_out.append(
            str(snp_chr1[:, 0][i]) + ' ' + str(snp_chr1[:, 2][i]) + ' ' +
            str(sig2_0[i]) + '\n')
    #np.savetxt(output_h2,(snp_chr1[:,0],snp_chr1[:,1],sig2_0),fmt="%s")
    ff = open(output_h2, "w")
    ff.writelines(h2_out)
    ff.close()

    ### start calculating p_T ###
    M = np.empty(annot.shape[1])
    for i in range(len(M)):
        M[i] = np.sum(np.logical_and(annot[:, 0], annot[:, i]))
    bgt = datetime.datetime.now()
    M_T = defaultdict(int)
    for i in range(annot.shape[0]):
        tup_i = tuple(annot[i])
        M_T[tup_i] += 1
    edt = datetime.datetime.now()
    print edt - bgt
    bgt = datetime.datetime.now()
    N_T = defaultdict(int)
    for i in range(ant1.shape[0]):
        tup_i = tuple(ant1[i])
        N_T[tup_i] += 1
    edt = datetime.datetime.now()
    print edt - bgt

    H0 = np.dot(M, tau0)
    N0 = float(len(SNPids))
    sig2V = np.dot(ant1, tau0)

    # N_T = {x:annotV1.count(x) for x in annotV1}

    M_TV = np.empty(ant1.shape[0])
    N_TV = np.empty(ant1.shape[0])
    for i in range(ant1.shape[0]):
        tup_i = tuple(ant1[i])
        M_TV[i] = M_T[tup_i]
        N_TV[i] = N_T[tup_i]

#    for ps in PS:
    pr_p = (PS * N0 / H0) * M_TV * sig2V / N_TV
    sig2 = M_TV * sig2V / N_TV
    m1 = min(pr_p[pr_p > 0])
    m2 = min(sig2[sig2 > 0])
    pr_p[pr_p < 0] = np.repeat(m1, np.sum(pr_p < 0))
    sig2[sig2 < 0] = np.repeat(m2, np.sum(sig2 < 0))
    pr_p[pr_p > 1] = np.repeat(1, np.sum(pr_p > 1))
    pT_out = []
    for i in range(len(sig2)):
        pT_out.append(
            str(snp_chr1[:, 0][i]) + ' ' + str(snp_chr1[:, 2][i]) + ' ' +
            str(pr_p[i]) + ' ' + str(sig2[i]) + '\n')


#    ff = open(output_pT+'_'+str(ps)+'_file.txt',"w")
    ff = open(output_pT, "w")
    ff.writelines(pT_out)
    ff.close()

    return math.ceil(num_snps / 3000.0)
Exemplo n.º 39
0
 def _compute_norms(self, state):
     self.XYZ = norm(state["position"])
     self.V = norm(state["velocity"])
     self.legs_contact = np.sum(state["leg_contact"])
Exemplo n.º 40
0
    def updateAndGetBeliefStates(self, evidences):
        """
        Given a list of (noised) distances from pacman to ghosts,
        returns a list of belief states about ghosts positions

        Arguments:
        ----------
        - `evidences`: list of (noised) ghost positions at state x_{t}
          where 't' is the current time step

        Return:
        -------
        - A list of Z belief states at state x_{t} about ghost positions
          as N*M numpy matrices of probabilities
          where N and M are respectively width and height
          of the maze layout and Z is the number of ghosts.

        N.B. : [0,0] is the bottom left corner of the maze
        """
        # XXX: Your code here

        # if self.iter < 0:
        #     np.save('Entropy{}_{}'.format(self.w, self.p), self.entropy)
        #     sys.exit()
        #
        # self.iter = self.iter - 1

        if (self.m or self.n) is None:
            self.m = self.walls.height
            self.n = self.walls.width

        if not self.board:
            for x in np.arange(self.n):
                for y in np.arange(self.m):
                    self.board.append((x, y))

        if self.transitionMatrix is None:
            self.transitionMatrix = self.createTransitionMatrix()

        if self.sensorMatrix is None:
            self.sensorMatrix = self.createSensorModel()

        beliefStates = self.beliefGhostStates

        # self.entropy.append(self.entropyF(beliefStates))

        for i, e in enumerate(evidences):
            """
            To manage multiple ghosts.
            """
            col_beliefStates = np.reshape(beliefStates[i, :, :], (-1, 1))

            index = self.board.index(e)
            O_col = self.sensorMatrix[:, index]

            O = np.diag(O_col)
            """
            O = Observation matrix.
            """

            col_bel = np.dot(O, self.transitionMatrix)
            col_beliefStates = np.dot(col_bel, col_beliefStates)

            alpha = 1 / (np.sum(col_beliefStates))
            col_beliefStates = alpha * col_beliefStates

            beliefState = col_beliefStates.reshape((self.n, self.m))
            beliefStates[i, :, :] = beliefState

        # XXX: End of your code
        self.beliefGhostStates = beliefStates
        return beliefStates
Exemplo n.º 41
0
        V = np.dot(A, q_i)

    Bq, Br = gram_schmidt_qr(V)
    diagonal_Br = np.diagonal(Br)
    return Bq, diagonal_Br


input_data = np.loadtxt(open('iris1.csv', 'rb'),
                        delimiter=',',
                        usecols=range(0, 4))
n, c = input_data.shape

for i in range(c):
    col_mean = np.mean(input_data[:, i])
    input_data[:, i] = input_data[:, i] - col_mean

A = np.dot(np.transpose(input_data), input_data) / n

eigen_vectors, eigen_values = my_pca(A)

standard_deviation = np.sqrt(eigen_values)
print("Standard deviation")
print(standard_deviation)
proportion_of_variance = eigen_values / np.sum(eigen_values)
print("Proportion of Variance")
print(proportion_of_variance)

# In[ ]:

# In[ ]:
Exemplo n.º 42
0
    def fit(self, X, y):
        """Fit a semi-supervised label propagation model based

        All the input data is provided matrix X (labeled and unlabeled)
        and corresponding label matrix y with a dedicated marker value for
        unlabeled samples.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            A {n_samples by n_samples} size matrix will be created from this

        y : array_like, shape = [n_samples]
            n_labeled_samples (unlabeled points are marked as -1)
            All unlabeled samples will be transductively assigned labels

        Returns
        -------
        self : returns an instance of self.
        """
        if sparse.isspmatrix(X):
            self.X_ = X
        else:
            self.X_ = np.asarray(X)

        # actual graph construction (implementations should override this)
        graph_matrix = self._build_graph()

        # label construction
        # construct a categorical distribution for classification only
        classes = np.unique(y)
        classes = (classes[classes != -1])
        self.classes_ = classes

        n_samples, n_classes = len(y), len(classes)

        y = np.asarray(y)
        unlabeled = y == -1
        clamp_weights = np.ones((n_samples, 1))
        clamp_weights[unlabeled, 0] = self.alpha

        # initialize distributions
        self.label_distributions_ = np.zeros((n_samples, n_classes))
        for label in classes:
            self.label_distributions_[y == label, classes == label] = 1

        y_static = np.copy(self.label_distributions_)
        if self.alpha > 0.:
            y_static *= 1 - self.alpha
        y_static[unlabeled] = 0

        l_previous = np.zeros((self.X_.shape[0], n_classes))

        remaining_iter = self.max_iter
        if sparse.isspmatrix(graph_matrix):
            graph_matrix = graph_matrix.tocsr()
        while (_not_converged(self.label_distributions_, l_previous, self.tol)
               and remaining_iter > 1):
            l_previous = self.label_distributions_
            self.label_distributions_ = safe_sparse_dot(
                graph_matrix, self.label_distributions_)
            # clamp
            self.label_distributions_ = np.multiply(
                clamp_weights, self.label_distributions_) + y_static
            remaining_iter -= 1

        normalizer = np.sum(self.label_distributions_, axis=1)[:, np.newaxis]
        self.label_distributions_ /= normalizer
        # set the transduction item
        transduction = self.classes_[np.argmax(self.label_distributions_,
                                               axis=1)]
        self.transduction_ = transduction.ravel()
        return self
Exemplo n.º 43
0
def main():
    l_rate = 10
    repeat = 12000
    Lambda = 0.005

    data = []
    for i in range(18):
        data.append([])
    n_row = 0
    text = open(train_path, 'r')  #, encoding='big5')
    row = csv.reader(text, delimiter=",")
    for r in row:
        if n_row != 0:
            for i in range(3, 27):
                if r[i] != "NR":
                    data[(n_row - 1) % 18].append(float(r[i]))
                else:
                    data[(n_row - 1) % 18].append(float(0))
        n_row = n_row + 1
    text.close()
    x = []
    y = []
    for i in range(12):
        for j in range(471):
            x.append([])
            for t in range(18):
                for s in range(9):
                    x[471 * i + j].append(data[t][480 * i + j + s])
            y.append(data[9][480 * i + j + 9])
    x = np.array(x)
    y = np.array(y)
    # pick Variable
    X = [1] * len(x)
    for row in range(len(x)):
        X[row] = x[row][Variable_list]
    x = np.array(X)
    '''
    x_transpose = list(map(list, zip(*x)))
    Varlist = []
    Minlist = []
    for row in range(x.shape[1]):
        Varlist.append(np.std(x_transpose[row]))
        Minlist.append(np.mean(x_transpose[row]))
    for row in range(x.shape[0]):
        for col in range(x.shape[1]):
            x[row][col] = (x[row][col] - Minlist[col]) / Varlist[col]
    '''
    # add square term
    x = np.concatenate((x, x**2), axis=1)
    # add bias
    x = np.concatenate((np.ones((x.shape[0], 1)), x), axis=1)
    w = np.zeros(len(x[0]))
    ## Linear regression
    x_t = x.transpose()
    s_gra = np.zeros(len(x[0]))
    for i in range(repeat):
        hypo = np.dot(x, w)
        loss = hypo - y + Lambda * np.sum(w**2)
        cost = np.sum(loss**2) / len(x)
        cost_a = math.sqrt(cost)
        gra = np.dot(x_t, loss)
        s_gra += gra**2
        ada = np.sqrt(s_gra)
        w = w - l_rate * gra / ada
        if i % 100 == 0:
            print('iteration: %d | Cost: %f ' % (i, cost_a))
    # save model
    np.save('model_best.npy', w)
Exemplo n.º 44
0
    def track(self, bb, prev, curr):
        self._n_samples = 100
        self._fb_max_dist = 1
        self._ds_factor = 0.95
        self._min_n_points = 10

        # sample points inside the bounding box
        p0 = np.empty((self._n_samples, 2))
        p0[:, 0] = np.random.randint(bb[0], bb[2] + 1, self._n_samples)
        p0[:, 1] = np.random.randint(bb[1], bb[3] + 1, self._n_samples)

        p0 = p0.astype(np.float32)

        # forward-backward tracking
        p1, st, err = cv2.calcOpticalFlowPyrLK(prev, curr, p0, None, **self.lk_params)
        indx = np.where(st == 1)[0]
        p0 = p0[indx, :]
        p1 = p1[indx, :]
        p0r, st, err = cv2.calcOpticalFlowPyrLK(curr, prev, p1, None, **self.lk_params)
        if err is None:
            return None

        # check forward-backward error and min number of points
        fb_dist = np.abs(p0 - p0r).max(axis=1)
        good = fb_dist < self._fb_max_dist

        # keep half of the points
        err = err[good].flatten()
        if len(err) < self._min_n_points:
            return None

        indx = np.argsort(err)
        half_indx = indx[:len(indx) // 2]
        p0 = (p0[good])[half_indx]
        p1 = (p1[good])[half_indx]

        # estimate displacement
        dx = np.median(p1[:, 0] - p0[:, 0])
        dy = np.median(p1[:, 1] - p0[:, 1])

        # all pairs in prev and curr
        i, j = np.triu_indices(len(p0), k=1)
        pdiff0 = p0[i] - p0[j]
        pdiff1 = p1[i] - p1[j]

        # estimate change in scale
        p0_dist = np.sum(pdiff0 ** 2, axis=1)
        p1_dist = np.sum(pdiff1 ** 2, axis=1)
        ds = np.sqrt(np.median(p1_dist / (p0_dist + 2**-23)))
        ds = (1.0 - self._ds_factor) + self._ds_factor * ds;

        # update bounding box
        dx_scale = (ds - 1.0) * 0.5 * (bb[3] - bb[1] + 1)
        dy_scale = (ds - 1.0) * 0.5 * (bb[2] - bb[0] + 1)
        bb_curr = (int(bb[0] + dx - dx_scale + 0.5),
                   int(bb[1] + dy - dy_scale + 0.5),
                   int(bb[2] + dx + dx_scale + 0.5),
                   int(bb[3] + dy + dy_scale + 0.5))

        if bb_curr[0] >= bb_curr[2] or bb_curr[1] >= bb_curr[3]:
            return None

        bb_curr = (min(max(0, bb_curr[0]), curr.shape[1]),
                   min(max(0, bb_curr[1]), curr.shape[0]),
                   min(max(0, bb_curr[2]), curr.shape[1]),
                   min(max(0, bb_curr[3]), curr.shape[0]))

        return bb_curr
Exemplo n.º 45
0
fieldnum = 10
fieldsize = 30. #fieldsize in arcseconds
smoothingscale = 2
mapsave = "n"
galaxynum = 20000



gesnum = pointnum*fieldnum
kernelnum1 = int(gesnum)
kernelnum = kernelnum1-1
fieldsize = fieldsize/3600.
startt = time()
print("Filling the D-functions")
Dstar1,D1 = makefield.filld(kernelnum,fieldsize,pointnum)
Dstar1 = Dstar1/np.sum(np.abs(Dstar1))
D1 = D1/np.sum(np.abs(D1))
Dstar1r = Dstar1.real
Dstar1i = Dstar1.imag
D1r = D1.real
D1i = D1.imag
print("Finished filling. Time:",round(time()-startt),"s")

#print (np.sum(Dstar1))
#print (np.sum(D1))
def getgamma(kappa):
    kappa = kappa.astype("complex")
    print("Convolving kappa to gamma...")
    kappar = kappa.real
    kappai = kappa.imag
    gammar = convolve_fft(kappar,D1r,boundary="wrap") - convolve_fft(kappai,D1i,boundary="wrap")
Exemplo n.º 46
0
def loss(desired, final):
    return 0.5 * np.sum(desired - final)**2
Exemplo n.º 47
0
def create_plot_height_quantile_remove_over_height(in_dir, out_dir, medial_level, gridSize, sensor_d='e'):
    
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)
        
    out_sub_dir = os.path.join(out_dir, str(medial_level), str(gridSize))
    if not os.path.isdir(out_sub_dir):
        os.makedirs(out_sub_dir)
    
    d1 = date(2018, 4, 15)  # start date
    d2 = date(2018, 7, 29)  # end date

    delta = d2 - d1         # timedelta
    
    for i in range(delta.days + 1):
        str_date = str(d1 + timedelta(days=i))
        base_name = '{}_heightHist_{}_{}.npy'.format(str_date, sensor_d, str(gridSize)) #2017-06-30_heightHist_e_0.npy
        file_path = os.path.join(in_dir, base_name)
        if not os.path.isfile(file_path):
            continue
        
        heightHist = np.load(file_path)
        heightHist = heightHist[medial_level]
        if np.amax(heightHist) == 0:
            continue
        nonZeroIndex = np.nonzero(heightHist)
        max_y_ind = np.amax(nonZeroIndex[2])
        out_hist = np.zeros((864, 3))
        out_hist[:] = np.nan
        array_ind = 0
        save_flag = False
        for x in range(54):
            for y in range(16):
                if np.amax(heightHist[x,y])>0:
                    if heightHist[x,y,max_y_ind] < 10:
                        save_item = np.zeros(3)
                        save_item[0] = x
                        save_item[1] = y
                        targetHist = heightHist[x,y]
                        targetHist = targetHist/np.sum(targetHist)
                        quantiles = np.cumsum(targetHist)
                        b = np.arange(len(quantiles))
                        c = b[quantiles>0.98]
                        save_item[2] = min(c)
                        out_hist[array_ind] = save_item
                        save_flag = True
                array_ind += 1
                
        if save_flag:
            out_csv_path = os.path.join(out_sub_dir, '{}_heightHist.csv'.format(str_date))
            with open(out_csv_path, 'w') as f:
                for i in range(len(out_hist)):
                    out_np_line = out_hist[i]
                    if np.amax(out_np_line) > 0:
                        out_np_line = out_np_line.astype(int)
                        print_line = ','.join(map(str,out_np_line))
                        f.write(print_line+'\n')
                    else:
                        print_line = ','.join(map(str,out_np_line))
                        f.write(print_line+'\n')
    
    return
Exemplo n.º 48
0
def convert_model(from_popn, from_model, from_vars, to_popn, to_model, to_vars):
    """ Convert from one model to another model of a different type
        Generally this will involve projecting impulse responses, etc.
        It's hairy business.
    """

    # Idea: Get the state of the GLMs, e.g. the impulse responses, etc.
    #       Project those states onto the parameters of the to-model
    N = from_popn.N
    from_state = from_popn.eval_state(from_vars)
    to_state = to_popn.eval_state(to_vars)

    conv_vars = None
    if from_model['impulse']['type'].lower() == 'basis':
        if to_model['impulse']['type'].lower() == 'normalized' or \
           to_model['impulse']['type'].lower() == 'dirichlet':
            import copy
            conv_vars = copy.deepcopy(to_vars)

            # To convert from basis -> normalized, project the impulse
            # responses onto the normalized basis, divide by the area
            # under the curve to get the weight.
            W = np.zeros((N,N))
            for n2 in np.arange(N):
                B = to_state['glms'][n2]['imp']['basis'].shape[1]
                w_ir_n2 = np.zeros((N,B))
                for n1 in np.arange(N):
                    # Solve a nonnegative least squares problem
                    (w_ir_n1n2p, residp) = nnls(to_state['glms'][n2]['imp']['basis'],
                                                from_state['glms'][n2]['imp']['impulse'][n1,:])
                    (w_ir_n1n2n, residn) = nnls(to_state['glms'][n2]['imp']['basis'],
                                                -1.0*from_state['glms'][n2]['imp']['impulse'][n1,:])

                    # Take the better of the two solutions
                    if residp < residn:
                        Wsgn = 1.0
                        w_ir_n1n2 = w_ir_n1n2p
                    else:
                        Wsgn = -1.0
                        w_ir_n1n2 = w_ir_n1n2n

                    # Normalized weights must be > 0, sum to 1
                    w_ir_n1n2 = w_ir_n1n2
                    w_ir_n1n2 = np.clip(w_ir_n1n2,0.001,np.Inf)
                    # Normalize the impulse response to get a weight
                    W[n1,n2] = Wsgn*np.sum(w_ir_n1n2)

                    # Set impulse response to normalized impulse response
                    w_ir_n2[n1,:] = w_ir_n1n2 / np.sum(w_ir_n1n2)

                # Update to_vars
                if to_model['impulse']['type'].lower() == 'normalized':
                    conv_vars['glms'][n2]['imp']['w_lng'] = np.log(w_ir_n2.flatten())
                if to_model['impulse']['type'].lower() == 'dirichlet':
                    for n1 in range(N):
                        # Scale up the weights such that the average is preserved
                        alpha = to_popn.glm.imp_model.alpha
                        B = to_popn.glm.imp_model.B
                        conv_vars['glms'][n2]['imp']['g_%d' % n1] = alpha * B * w_ir_n2[n1,:]

            # Update to_vars
            conv_vars['net']['weights']['W'] = W.flatten()

            # Threshold the adjacency matrix to start with the right level of sparsity
            if 'rho' in to_model['network']['graph'].keys():
                W_sorted = np.sort(np.abs(W.ravel()))
                thresh = W_sorted[np.floor((1.0-2.0*to_model['network']['graph']['rho'])*(N**2-N)-N)]
                conv_vars['net']['graph']['A'] = (np.abs(W) >= thresh).astype(np.int8)
            else:
                conv_vars['net']['graph']['A'] = np.ones((N,N), dtype=np.int8)

    # Copy over the bias
    for n in np.arange(N):
        conv_vars['glms'][n]['bias']['bias'] = from_vars['glms'][n]['bias']['bias']

    # Copy over the background params
    if 'sharedtuningcurves' in to_model['latent'] and \
        from_model['bkgd']['type'] == 'spatiotemporal':
        convert_stimulus_filters_to_sharedtc(from_popn, from_model, from_vars,
                                             to_popn, to_model, conv_vars)

    return conv_vars
Exemplo n.º 49
0
def create_plot_betyCsv(in_dir, out_dir, medial_level, gridSize, para = 0.98, sensor_d='e'):
    
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)
        
    out_sub_dir = os.path.join(out_dir, str(medial_level), str(gridSize))
    if not os.path.isdir(out_sub_dir):
        os.makedirs(out_sub_dir)
    
    d1 = date(2018, 4, 25)  # start date
    d2 = date(2018, 8, 1)  # end date

    delta = d2 - d1         # timedelta
    
    (fields, traits) = get_traits_table_height_quantile()
    
    for i in range(delta.days + 1):
        str_date = str(d1 + timedelta(days=i))
        base_name = '{}_heightHist_{}_{}.npy'.format(str_date, sensor_d, str(gridSize)) #2017-06-30_heightHist_e_0.npy
        file_path = os.path.join(in_dir, base_name)
        if not os.path.isfile(file_path):
            continue
        
        heightHist = np.load(file_path)
        heightHist = heightHist[medial_level]
        if np.amax(heightHist) == 0:
            continue
        nonZeroIndex = np.nonzero(heightHist)
        max_y_ind = np.amax(nonZeroIndex[2])
        out_hist = np.zeros((864, 3))
        out_hist[:] = np.nan
        array_ind = 0
        save_flag = False
        for x in range(54):
            for y in range(16):
                if np.amax(heightHist[x,y])>0:
                    if heightHist[x,y,max_y_ind] < 50:
                        save_item = np.zeros(3)
                        save_item[0] = x
                        save_item[1] = y
                        targetHist = heightHist[x,y]
                        targetHist = targetHist/np.sum(targetHist)
                        quantiles = np.cumsum(targetHist)
                        b = np.arange(len(quantiles))
                        c = b[quantiles>para]
                        save_item[2] = min(c)-5
                        out_hist[array_ind] = save_item
                        save_flag = True
                array_ind += 1
                
        if save_flag:
            out_csv_path = os.path.join(out_sub_dir, '{}_99th_quantile.csv'.format(str_date))
            csvHandle = open(out_csv_path, 'w')
            csvHandle.write(','.join(map(str, fields)) + '\n')
            for j in range(len(out_hist)):
                out_np_line = out_hist[j]
                if np.amax(out_np_line) > 0:
                    str_time = str_date+'T12:00:00'
                    traits['local_datetime'] = str_time
                    traits['99th_quantile_canopy_height'] = str(int(out_np_line[2]))
                    traits['site'] = parse_site_from_range_column(out_np_line[0], out_np_line[1], 6)
                    trait_list = generate_traits_list_height(traits)
                    csvHandle.write(','.join(map(str, trait_list)) + '\n')
    
            csvHandle.close()
    #submitToBety(out_file)
    #betydb.submit_traits(out_file, filetype='csv', betykey=betydb.get_bety_key(), betyurl=betydb.get_bety_url())

    return
Exemplo n.º 50
0
def circuit_integrals(u_abs, u, v, w, lon, lat, glon, glat, z, r):
    # Integrate u.dl around the circuit of trajectories
    # 1st and last 2 trajectories are the same so don't double count
    dlambda, dx, dy, dz = [], [], [], []
    for n in range(1, len(u) - 1):
        # dlambda is length along true longitudes to match the direction of
        # the Earth rotation
        dlambda.append(r[n] * np.cos(lat[n]) * 0.5 * (lon[n + 1] - lon[n - 1]))

        # dx and dy are in the direction of the rotated grid which corresponds
        # to the wind fields in the forecast
        dx.append(r[n] * np.cos(glat[n]) * 0.5 * (glon[n + 1] - glon[n - 1]))
        dy.append(r[n] * 0.5 * (glat[n + 1] - glat[n - 1]))

        # dz is independent of grid rotation
        dz.append(0.5 * (z[n + 1] - z[n - 1]))

    dlambda = np.array(dlambda)
    dx = np.array(dx)
    dy = np.array(dy)
    dz = np.array(dz)

    # \int dl: Tracks the errors in each calculation (should be zero)
    dx_tot = np.sum(dx)
    dy_tot = np.sum(dy)
    dz_tot = np.sum(dz)
    dlambda_tot = np.sum(dlambda)

    # \int |dl|
    length = np.sum(np.sqrt(dx ** 2 + dy ** 2 + dz ** 2))

    # u * r cos(phi) dlambda
    circ_u = u[1:-1] * dx

    # v * r dphi
    circ_v = v[1:-1] * dy

    # w * dz
    circ_w = w[1:-1] * dz

    # u_abs * r cos(phi) dlambda
    circ_p = u_abs[1:-1] * dlambda
    """
    r_ave = 0.5 * (r[1:] + r[:-1])

    dlambda = r_ave * np.cos(0.5 * (lat[1:] + lat[:-1])) * (lon[1:] - lon[:-1])
    dx = r_ave * np.cos(0.5 * (glat[1:] + glat[:-1])) * (glon[1:] - glon[:-1])
    dy = r_ave * (glat[1:] - glat[:-1])
    dz = (z[1:] - z[:-1])

    # \int dl
    dx_tot = np.sum(dx)
    dy_tot = np.sum(dy)
    dz_tot = np.sum(dz)
    dlambda_tot = np.sum(dlambda)

    # \int |dl|
    length = np.sum(np.sqrt(dx ** 2 + dy ** 2 + dz ** 2))

    # u * r cos(phi) dlambda
    circ_u = 0.5 * (u[1:] + u[:-1]) * dx

    # v * r dphi
    circ_v = 0.5 * (v[1:] + v[:-1]) * dy

    # w * dz
    circ_w = 0.5 * (w[1:] + w[:-1]) * dz

    # u_abs * r cos(phi) dlambda
    circ_p = 0.5 * (u_abs[1:] + u_abs[:-1]) * dlambda
    """

    rel_circulation = np.sum(circ_u + circ_v + circ_w)
    planetary_circulation = np.sum(circ_p)
    abs_circulation = np.sum(circ_u + circ_v + circ_w + circ_p)

    return (dx_tot, dy_tot, dz_tot, dlambda_tot, length,
            rel_circulation, planetary_circulation, abs_circulation)
def projections(iswt, iorient, K=20, inc=1, aspace=None, arange=None):
    if (K < 4 or K > 1024):
        print(
            'Invalid average value. Accepted values between 4 and half the size of your image. Setting default value.'
        )
        K = 12

    if (inc > 90 or inc < 0):
        print('Invalid Delta, must be positive and less than 90')
        inc = 1
    # pad the image with zeros so we don't lose anything when we rotate.
    iLength, iWidth = iswt.shape
    iDiag = math.sqrt(iLength**2 + iWidth**2)
    LengthPad = math.ceil(iDiag - iLength) + 1
    WidthPad = math.ceil(iDiag - iWidth) + 1

    padIMG = np.zeros((iLength + LengthPad, iWidth + WidthPad))
    pad1 = int(math.ceil(LengthPad / 2))
    pad2 = int(math.ceil(LengthPad / 2) + iLength)

    pad3 = int(math.ceil(WidthPad / 2))
    pad4 = int(math.ceil(WidthPad / 2) + iWidth)
    padIMG[pad1:pad2, pad3:pad4] = iswt

    padIMGOR = np.zeros((iLength + LengthPad, iWidth + WidthPad))
    padIMGOR[pad1:pad2, pad3:pad4] = iorient
    #
    #  loop over the number of angles, rotate 90-theta (because we can easily sum
    #  if we look at stuff from the top), and then add up.  Don't perform any
    #  interpolation on the rotating.
    #
    #   -90 and 90 are the same, we must remove 90
    THETA = list(range(-90, 90, inc))
    th = np.zeros(len(THETA)) + np.inf
    if (arange):
        for ang in aspace:
            k = ang + 90
            kplus = k + arange
            kminus = k - arange
            if (kplus > 179):
                kplus = 179
            if (kminus < 0):
                kminus = 0
            th[k:kplus] = THETA[k:kplus]
            th[kminus:k] = THETA[kminus:k]
    else:
        th = THETA
    th = np.array(th, dtype=np.float32) * np.pi * (1 / 180.0)

    n = len(THETA)
    PR = np.zeros((padIMG.shape[1], n))
    M = padIMG  # > 0

    iPL, iPW = padIMG.shape
    center = (iPL / 2, iPW / 2)
    for i in range(n):
        if (th[i] != np.inf):
            final = oft(M, K, padIMGOR, th[i])
            Mt = cv2.getRotationMatrix2D(center, -THETA[i], 1.0)
            rotated = cv2.warpAffine(final, Mt, (iPL, iPW))
            PR[:, i] = (np.sum(rotated, axis=0))
        else:
            PR[:, i] = 0

    PR[np.nonzero(PR < 0)] = 0.0
    PR = PR / iDiag
    PR = PR * 10
    PR = np.multiply(PR, PR)
    PR = PR * 0.1
    PR = PR / np.max(PR)
    return PR
Exemplo n.º 52
0
def stereo_hist_to_bety_format(in_dir, out_dir):
    
    if not os.path.isdir(out_dir):
        os.makedirs(out_dir)
        
    d1 = date(2016, 10, 16)  # start date
    d2 = date(2016, 11, 7)  # end date

    delta = d2 - d1         # timedelta
    
    (fields, traits) = get_traits_table_stereo_height_quantile()
    
    for i in range(delta.days + 1):
        str_date = str(d1 + timedelta(days=i))
        base_name = '{}_stereoHeight.npy'.format(str_date) #2016-10-16_stereoHeight.npy
        file_path = os.path.join(in_dir, base_name)
        if not os.path.isfile(file_path):
            continue
        
        heightHist = np.load(file_path)
        if np.amax(heightHist) == 0:
            continue
        out_hist = np.zeros((864, 3))
        out_hist[:] = np.nan
        array_ind = 0
        save_flag = False
        for i in range(864):
            if np.amax(heightHist[2*i])>0:
                save_item = np.zeros(3)
                plot_row, plot_col = plotNum_to_fieldPartition_864(i+1)
                save_item[0] = plot_row-1
                save_item[1] = plot_col-1
                targetHist = heightHist[2*i]
                targetHist = targetHist/np.sum(targetHist)
                quantiles = np.cumsum(targetHist)
                b = np.arange(len(quantiles))
                c = b[quantiles>0.98]
                save_item[2] = min(c)
                out_hist[array_ind] = save_item
                save_flag = True
            array_ind += 1
                
        if save_flag:
            out_csv_path = os.path.join(out_dir, '{}_98th_quantile_stereoHeight.csv'.format(str_date))
            csvHandle = open(out_csv_path, 'w')
            csvHandle.write(','.join(map(str, fields)) + '\n')
            for j in range(len(out_hist)):
                out_np_line = out_hist[j]
                if np.amax(out_np_line) > 0:
                    str_time = str_date+'T12:00:00'
                    traits['local_datetime'] = str_time
                    traits['98th_quantile_canopy_height'] = str(int(out_np_line[2]))
                    traits['site'] = parse_site_from_range_column(out_np_line[0], out_np_line[1], 2)
                    trait_list = generate_traits_list_height(traits)
                    csvHandle.write(','.join(map(str, trait_list)) + '\n')
    
            csvHandle.close()
    #submitToBety(out_file)
    #betydb.submit_traits(out_file, filetype='csv', betykey=betydb.get_bety_key(), betyurl=betydb.get_bety_url())

    return
Exemplo n.º 53
0
def get_posterior(prior, P, outcome):
    n_models = len(prior)
    total_probability = prior * P[:, outcome] # get total_probability[i] = prior[i] * P[i, outcome]
    posterior = total_probability / np.sum(total_probability)
    ## So probability of outcome for model i is just...
    return posterior
def discardwrongpeaks(R, positivePairs, negativePairs, gridsize, verbose=True):
    """  Discard wrong peaks  
         Discard wrong peaks is based in the following facts:
          - In a grid you will find positive and negative angles
          - Make pairs of positive and negative angles. We group first positive, then negative
          - If we don't have enough for a square, we finish at that point.
      Otherwise,  we check that the pairs sum 90 degrees complementary, so we
      group by square. For each positive, it must exist a negative that
      complements, and vice versa.
       ------------------------------------------------------------------------
      Function ERRORS:
      - couldn't find enough peaks fitting grid conditions
    """
    goodpeaks = []
    error = 0
    error = enoughpeaks(positivePairs, negativePairs)
    # Error can be 0, 1 or 2. I
    anglepos = np.array([peak[1] for peak in positivePairs])
    angleneg = np.array([peak[1] for peak in negativePairs])

    if (error < 0): return -1, [], []
    if error == 1:
        positivePairs = findCrossSequence(anglepos, gridsize)
        negativePairs = predictGrid(positivePairs, R, gridsize)
        if len(negativePairs) == 0 or len(positivePairs) == 0:
            return -1, [], []
    if error == 2:
        negativePairs = findCrossSequence(angleneg, gridsize)
        positivePairs = predictGrid(negativePairs, R, gridsize)
        if len(negativePairs) == 0 or len(positivePairs) == 0:
            return -2, [], []
    # We have 2 exceptions: 0 and -1, in that case, majority wins and is converted
    ### TODO: To be tested
    total_0 = np.sum(angleneg == 0)
    total_1 = np.sum(anglepos == 1)
    total_m1 = np.sum(angleneg == -1)
    total_90 = np.sum([anglepos == 90, anglepos == 89])  #We consider 90 and 89
    total_m89 = np.sum([angleneg == -90, angleneg == -89,
                        angleneg == -88])  # We consider -89 and -88
    # We have to group angles in complementary sets:
    # -1,0,1 can show up together
    # -89,90 can also show up together
    # However, we only admit correspondences such as: (-1,0)+(90,89)  or  (1,2)+(-89,-88)
    # If we have many pairs, we only keep the majority
    if (total_0 > 0 and total_1 > 0) or (total_90 > 0 and total_m89 > 0):
        if (total_90 + total_0 + total_m1 > total_m89 + total_1):
            # Remove all 1's and -89
            anglepos = anglepos[np.where(anglepos != 1)]
            angleneg = angleneg[np.where(angleneg != -89)]
            angleneg = angleneg[np.where(angleneg != -88)]
        else:  # Remove all 0's and 90's
            anglepos = anglepos[np.where(anglepos != 0)]
            angleneg = angleneg[np.where(angleneg != -1)]
            anglepos = anglepos[np.where(anglepos != 89)]
            anglepos = anglepos[np.where(anglepos != 90)]
    # To be tested

    # now make 90 degrees pairs
    # Take first positive angles and compare with negative if they add
    # 90+/-5
    pos_ang = np.unique(anglepos)
    neg_ang = np.unique(angleneg)
    good_angles_pos = set()
    good_angles_neg = set()

    for elp in pos_ang:
        for eln in neg_ang:
            nty = elp + np.abs(eln)
            if (nty > 85 and nty < 95):  #good combination
                good_angles_pos.add(elp)
                good_angles_neg.add(eln)

    good_pos = [
        positivePairs[ind] for ind, angle in enumerate(anglepos)
        if angle in good_angles_pos
    ]
    good_neg = [
        negativePairs[ind] for ind, angle in enumerate(angleneg)
        if angle in good_angles_neg
    ]

    error = enoughpeaks(good_pos, good_neg)
    if error < 0:
        return -3, [], []

    positivePairs = findCrossSequence(good_pos, gridsize)
    negativePairs = findCrossSequence(good_neg, gridsize)

    if len(positivePairs) == 0 and len(negativePairs) == 0:
        return -3, [], []

    error = enoughpeaks(positivePairs, negativePairs)

    if error == 1:
        negativePairs = predictGrid(positivePairs, R, gridsize)
        if len(negativePairs) < 1:
            return -1, [], []
    if error == 2:
        positivePairs = predictGrid(negativePairs, R, gridsize)
        if len(positivePairs) < 1:
            return -2, [], []
    # get angles
    positivePairs = np.squeeze(np.array(positivePairs))
    negativePairs = np.squeeze(np.array(negativePairs))
    total_pos = positivePairs.shape[0]
    total_neg = negativePairs.shape[0]
    topval = 91
    topscore = 0
    for elp in positivePairs:
        for eln in negativePairs:
            val = np.abs((elp[1] - eln[1]) - 90)
            score = elp[2] + eln[2]
            if (val < topval):
                fangpos = elp[1]
                fangneg = eln[1]
                topscore = score
                topval = val
            elif val == topval:
                if score > topscore:
                    fangpos = elp[1]
                    fangneg = eln[1]
                    topscore = score
                    topval = val

    nty = fangpos - fangneg

    for i in range(total_pos):
        positivePairs[i, 1] = fangpos
    for i in range(total_neg):
        negativePairs[i, 1] = fangneg

    if verbose:
        print('Angle sum :' + str(nty))
    if (nty < (85) and nty > (60)) and verbose:
        print('The angle orientations are not 90 degrees. Adjust properly.')
    return 0, positivePairs, negativePairs
def benchmark_mean (y_true, checkMatrix):
   y_true = y_true * checkMatrix
   return (np.sum(y_true, axis=1) / np.sum((checkMatrix == 1.0), axis=1)).reshape(y_true.shape[0], 1) * np.ones(y_true.shape)
def findCrossSequence(ipeaks, gridsize=None):
    # Checks which is the best combination of peaks that fits the gridsize
    # The allowed grid sequence can be one strike after another
    # or just one line.
    # Peaks need to be tuples [position,angle,X]
    # Grid size must be in pixels. This findCrossSequence expects a separation of n pixels  ------------*----*--------
    # If we need n,m pixels (periodical)  ---*--*--------*--*------ needs modification
    # If gridsize is None, the pairing is omitted
    # Always return [] if error
    total_peaks = len(ipeaks)
    gpeaks = []
    if gridsize is None: return []

    # Get all possible combinations of peaks
    possible_pairs = list(itertools.combinations(ipeaks, 2))
    error = []
    saved_group = []
    for pair in possible_pairs:
        pair = np.array(pair)
        total_p = pair[pair.argsort(axis=0)[:, 0]]
        terror = 0
        dist_p = np.abs(total_p[0, 0] - total_p[1, 0])
        good = 1
        dif_error = np.abs(dist_p - gridsize[0]) / gridsize[0]
        terror = terror + dif_error
        # first test, distance of spacing
        if (dif_error > 0.35): good = 0

        if (good == 1):
            if (
                    np.abs(total_p[0, 1] - total_p[1, 1]) > 2
            ):  # second test, difference between angles not bigger than 3 deg
                good = 0

        if (good == 1):
            saved_group.append(total_p)
            error.append(terror)

    if len(saved_group) == 0:
        return []
    # For each group check that they are not competing in the same distances
    k = 0
    good_group = []
    if (len(saved_group) == 1): return saved_group

    for i, g1 in enumerate(saved_group):
        for j in range(i + 1, len(saved_group)):
            if (i != j):
                g2 = saved_group[j]
                err_dif = np.abs(error[i] - error[j])
                if (err_dif > 0.1):  # We keep the set with minimum error
                    if (error[i] < error[j]):
                        good_group.append(g1)
                    else:
                        good_group.append(g2)

                else:  #potential candidates, leave them
                    good_group.append(g1)
                    good_group.append(g2)

    if len(good_group) == 0:
        return []

    # Now is time to select the BEST candidate
    # Is going to be the one that sums up the most
    sg = []
    for g in good_group:
        sg.append(np.sum(g[:, 2]))
    maxind = np.argsort(np.array(sg))[::-1]
    return good_group[maxind[0]]
Exemplo n.º 57
0
 def eval(self, x, y, train=False):
     x, y = to_variable(var=(x, y.long()), cuda=self.cuda)
     out, _, _ = self.model(x)
     pred = out.data.max(dim=1, keepdim=False)[1]  # get the index of the max log-probability
     acc = float(np.sum(pred.detach().cpu().numpy() == y.detach().cpu().numpy())) / float(len(y))
     return acc, pred
Exemplo n.º 58
0
def accuracy(out, labels):
    outputs = np.argmax(out, axis=1)
    return np.sum(outputs == labels)
def vector_norm( v_in ):
	return np.sqrt( np.sum( np.abs( v_in )**2 ) )
random.seed(0)
check = True
while check:
   training_set = checkMatrix.copy()

   for i in range(training_set.shape[1]):
      # use only rated restaurants
      index = list(np.where(training_set[:,i] == 1)[0]) 

      # randomly select 30% of whole dataset
      flag = int(round(len(index)*0.3))

      index_flag = random.sample(index,flag)
      training_set[index_flag,i] = 0  

   if np.sum(training_set,axis=1).min() > 1: 
      check = False

testing_set = checkMatrix - training_set
print(f"Training set: {training_set.sum()}\nTesting set: {testing_set.sum()}\n")

# ---- Use evaluation metrics to get the quantified values of model performances --- #
# Use root-mean-squared-error method for model performance employing linear regression
def root_mean_squared(y_true, y_predicte, R):
   rsme = np.sqrt(mean_squared_error(y_true[R == 1], y_predicte[R == 1]))
   return rsme

# --- Create a benchmark model which returns average predicted rating and use it to compare ratings ----- #

# Function for benchmark model mean 
def benchmark_mean (y_true, checkMatrix):