Exemplo n.º 1
0
    def process_binned(self, counts, x, scale, monitor, vanadium_count=None, vanadium_monitor=None, vcorr=None):
        """Bin the data"""
        binWidth = self.getProperty("BinWidth").value
        bins = np.arange(x.min(), x.max()+binWidth, binWidth) # calculate bin boundaries
        inds = np.digitize(x, bins) # get bin indices

        if vcorr is not None:
            vcorr = np.tile(vcorr, (counts.shape[1], 1)).T
            vcorr_binned = np.bincount(inds, weights=vcorr.ravel(), minlength=len(bins))
        else:
            vanadium_count = np.tile(vanadium_count, (counts.shape[1], 1)).T
            vanadium_binned = np.bincount(inds, weights=vanadium_count.ravel(), minlength=len(bins))
            vanadium_monitor_binned = np.bincount(inds, minlength=len(bins))*vanadium_monitor

        monitor = np.tile(monitor, (counts.shape[0], 1))

        counts_binned = np.bincount(inds, weights=counts.ravel(), minlength=len(bins))
        monitor_binned = np.bincount(inds, weights=monitor.ravel(), minlength=len(bins))
        number_binned = np.bincount(inds, minlength=len(bins))

        old_settings = np.seterr(all='ignore') # otherwise it will complain about divide by zero
        if vcorr is not None:
            y = (counts_binned/vcorr_binned*number_binned/monitor_binned)[1:]
            e = (np.sqrt(1/counts_binned)[1:])*y
        else:
            y = (counts_binned/vanadium_binned*vanadium_monitor_binned/monitor_binned)[1:]
            e = (np.sqrt(1/counts_binned + 1/vanadium_binned + 1/vanadium_monitor + 1/monitor_binned)[1:])*y
        np.seterr(**old_settings)
        x = bins

        return x, np.nan_to_num(y*scale), np.nan_to_num(e*scale)
Exemplo n.º 2
0
	def segment_units(self, sfid, verb=False):
		"""
		This function takes the list of unit breakpoints, plus the raw metadata, and assembles 'cooked' segments in the corpus segtable.
		
		Note: currently ignores the amplitude scalars (aside from generating stats)...
		
		"""
		segmented = self.get_sorted_units_list(sfid)
		raw_amps, raw_mfccs, raw_chromas = self.get_raw_metadata(sfid) # , raw_chromas
		amps, reheated = [], []
		
		if verb: print 'raw: ', raw_amps
		amps_stripped = np.nan_to_num(raw_amps)
		if verb: print 'amps_stripped: ', amps_stripped
		mfccs_stripped = np.nan_to_num(raw_mfccs)
		if verb: print 'mfccs_stripped: ', mfccs_stripped
		chromas_stripped = np.nan_to_num(raw_chromas)
		if verb: print 'chromas_stripped: ', chromas_stripped

		for relid, sfu in enumerate(segmented):

			offset = int(math.floor(sfu.onset / self.HOP_SECS))
			dur = int(math.floor(sfu.dur / self.HOP_SECS))
			if verb: print '[[', offset, '|', dur, ']]'
			self.sftree.nodes[sfid].add_metadata_for_relid(relid, powers=self.feat.powers.proc_funcs[0](amps_stripped, offset, dur))
			# WHY ARE THE FUNCTION SIGNATURES DIFFERENT FOR OFFSET AND DUR???
			self.sftree.nodes[sfid].add_metadata_for_relid(relid, mfccs=self.feat.proc_funcs[0](mfccs_stripped[offset:(offset+dur)]))
			if verb: print self.feat.proc_funcs[1]
			if verb: print mfccs_stripped[offset:(offset+dur)]
			self.sftree.nodes[sfid].add_metadata_for_relid(relid, mfcc_vars=self.feat.proc_funcs[1](mfccs_stripped[offset:(offset+dur)]))
			self.sftree.nodes[sfid].add_metadata_for_relid(relid, chromas=self.feat.proc_funcs[0](chromas_stripped[offset:(offset+dur)]))
			self.sftree.nodes[sfid].add_metadata_for_relid(relid, chroma_vars=self.feat.proc_funcs[1](chromas_stripped[offset:(offset+dur)]))
Exemplo n.º 3
0
def shifted_corr(reference, image, displacement):
    """Calculate the correlation between the reference and the image shifted
    by the given displacement.

    Parameters
    ----------
    reference : np.ndarray
    image : np.ndarray
    displacement : np.ndarray

    Returns
    -------
    correlation : float

    """

    ref_cuts = np.maximum(0, displacement)
    ref = reference[ref_cuts[0]:, ref_cuts[1]:, ref_cuts[2]:]
    im_cuts = np.maximum(0, -displacement)
    im = image[im_cuts[0]:, im_cuts[1]:, im_cuts[2]:]
    s = np.minimum(im.shape, ref.shape)
    ref = ref[:s[0], :s[1], :s[2]]
    im = im[:s[0], :s[1], :s[2]]
    ref -= nanmean(ref.reshape(-1, ref.shape[-1]), axis=0)
    ref = np.nan_to_num(ref)
    im -= nanmean(im.reshape(-1, im.shape[-1]), axis=0)
    im = np.nan_to_num(im)
    assert np.all(np.isfinite(ref)) and np.all(np.isfinite(im))
    corr = nanmean(
        [old_div(np.sum(i * r), np.sqrt(np.sum(i * i) * np.sum(r * r))) for
         i, r in zip(np.rollaxis(im, -1), np.rollaxis(ref, -1))])
    return corr
Exemplo n.º 4
0
def plotheatmaps(data, title=''):
  local = get_local_full()
  glob = get_global_full()
  gden = [('%4.0f'%float(i)).lstrip('0') for i in glob['density']]
  gcnt = [int(i) for i in glob['count']]
  max_gden = max([float(i) for i in glob['density']])
  for tbin in data.keys():
    c = np.array(data[tbin])
    # gcnt = np.sum(c, axis=1)
    # lcnt = np.sum(c, axis=0)
    lcnt = [int(i) for i in local[tbin]['count']]
    lden = [float(i) for i in local[tbin]['density']]
    lden_norm = [i / sum(lden) for i in lden]
    lden_scaled = [i * max_gden for i in lden_norm]
    denlab = [('%3.0f'%i) for i in lden_scaled]
    print(local[tbin]['volume'])
    glabels = ['%4d/%4s' % i for i in zip(gcnt,gden)]
    llabels = ['%4d/%4s' % i for i in zip(lcnt,denlab)]
    norm_c = np.nan_to_num(c / np.linalg.norm(c, axis=-1)[:, np.newaxis]).T
    P.heatmap(norm_c, glabels, llabels, title+tbin+'_col')
    d = c.T
    norm_r = np.nan_to_num(d / np.linalg.norm(d, axis=-1)[:, np.newaxis])
    P.heatmap(norm_r, glabels, llabels, title+tbin+'_row')

    combined = (norm_c + norm_r) / 2
    P.heatmap(combined, glabels, llabels, title+tbin+'_combined')
    print(combined)
Exemplo n.º 5
0
    def time_std(self):
        if hasattr(self, '_time_std'):
            return self._time_std
        if self.savedir is not None:
            try:
                with open(join(self.savedir, 'time_std.pkl'),
                          'rb') as f:
                    time_std = pickle.load(f)
            except IOError:
                pass
            else:
                # Same protocol as the averages. Make sure the
                # std is a single 4D (zyxc) array and if not just
                # re-calculate the time std.
                if isinstance(time_std, np.ndarray):
                    self._time_std = time_std
                    return self._time_std

        sums = np.zeros(self.frame_shape)
        sums_squares = np.zeros(self.frame_shape)
        counts = np.zeros(self.frame_shape)
        for frame in it.chain.from_iterable(self):
            sums += np.nan_to_num(frame)
            sums_squares += np.square(np.nan_to_num(frame))
            counts[np.isfinite(frame)] += 1
        means = old_div(sums, counts)
        mean_of_squares = old_div(sums_squares, counts)
        std = np.sqrt(mean_of_squares-np.square(means))
        if self.savedir is not None and not self._read_only:
            with open(join(self.savedir, 'time_std.pkl'), 'wb') as f:
                pickle.dump(std, f, pickle.HIGHEST_PROTOCOL)
        self._time_std = std
        return self._time_std
Exemplo n.º 6
0
    def uncertaintyMap(self, psf, method='convolve', fitParams=None):
        '''
        return the intensity based uncertainty due to the unsharpness of the image
        as standard deviation
        
        method = ['convolve' , 'unsupervised_wiener']
                    latter one also returns the reconstructed image (deconvolution)
        '''

        #ignore background:
        #img[img<0]=0
        ###noise should not influence sharpness uncertainty:
        ##img = median_filter(img, 3)

        # decrease noise in order not to overestimate result:
        img = scaleSignal(self.img, fitParams=fitParams)

        if method == 'convolve':
            #print 'convolve'
            blurred = convolve2d(img, psf, 'same')
            m = abs(img-blurred) / abs(img + blurred)
            m = np.nan_to_num(m)
            m*=self.std**2
            m[m>1]=1
            self.blur_distortion = m
            np.save('blurred', blurred)
            return m
        else:
            restored = unsupervised_wiener(img, psf)[0]
            m = abs(img-restored) / abs(img + restored)
            m = np.nan_to_num(m)
            m*=self.std**2
            m[m>1]=1
            self.blur_distortion = m
            return m, restored
Exemplo n.º 7
0
    def _to_raw(self, data1, data2):
        from matplotlib import pyplot as plt
        from matplotlib.colors import Normalize
        cmapdir = options.config.get("webgl", "colormaps")
        cmap = plt.imread(os.path.join(cmapdir, "%s.png"%self.cmap))

        norm1 = Normalize(self.vmin, self.vmax)
        norm2 = Normalize(self.vmin2, self.vmax2)
        
        d1 = np.clip(norm1(data1), 0, 1)
        d2 = np.clip(1 - norm2(data2), 0, 1)
        dim1 = np.round(d1 * (cmap.shape[1]-1))
        # Nans in data seemed to cause weird interaction with conversion to uint32
        dim1 = np.nan_to_num(dim1).astype(np.uint32) 
        dim2 = np.round(d2 * (cmap.shape[0]-1))
        dim2 = np.nan_to_num(dim2).astype(np.uint32)

        colored = cmap[dim2.ravel(), dim1.ravel()]
        r, g, b, a = colored.T
        r.shape = dim1.shape
        g.shape = dim1.shape
        b.shape = dim1.shape
        a.shape = dim1.shape
        # Preserve nan values as alpha = 0
        aidx = np.logical_or(np.isnan(data1),np.isnan(data2))
        a[aidx] = 0
        # Code from master, to handle alpha input, prob better here but not tested.
        # # Possibly move this above setting nans to alpha = 0;
        # # Possibly multiply specified alpha by alpha in colormap??
        # if 'alpha' in self.attrs:
        #     # Over-write alpha from colormap / nans with alpha arg if provided.
        #     # Question: Might it be important tokeep alpha as an attr?
        #     a = self.attrs.pop('alpha')
        return r, g, b, a
Exemplo n.º 8
0
def crossEntropy(a,t):
    a=np.array(a)
    t=np.array(t)
    num_samples=len(a[:,0])
    p=np.array([x/np.sum(x) for x in a]) # normalize output to [0,1]
    return np.sum(-t*np.nan_to_num(np.log(p))-
                  (1-t)*np.nan_to_num(np.log(1.-p)))/num_samples
	def __lazy_cost_function__(H, Y):
		result = 0.0
		for i in range(0, Y.shape[0]):
			a = np.nan_to_num(np.log2(H[i]) * Y[i])
			b = np.nan_to_num((1. - Y[i]) * np.log2((1. - H[i])))
			result += a + b
		return result
Exemplo n.º 10
0
    def compute_parameters(input_data, colors=None, rays=None):
        """
        Having as inputs a Connectivity matrix(required) and two arrays that 
        represent the rays and colors of the nodes from the matrix(optional) 
        this method will build the required parameter dictionary that will be 
        sent to the HTML/JS 3D representation of the connectivity matrix.
        """
        if colors is not None:
            color_list = colors.array_data.tolist()
            color_list = ABCDisplayer.get_one_dimensional_list(color_list, input_data.number_of_regions,
                                                               "Invalid input size for Sphere Colors")
            color_list = numpy.nan_to_num(numpy.array(color_list, dtype=numpy.float64)).tolist()
        else:
            color_list = [1.0] * input_data.number_of_regions

        if rays is not None:
            rays_list = rays.array_data.tolist()
            rays_list = ABCDisplayer.get_one_dimensional_list(rays_list, input_data.number_of_regions,
                                                              "Invalid input size for Sphere Sizes")
            rays_list = numpy.nan_to_num(numpy.array(rays_list, dtype=numpy.float64)).tolist()
        else:
            rays_list = [1.0] * input_data.number_of_regions

        params = dict(raysArray=json.dumps(rays_list), rayMin=min(rays_list), rayMax=max(rays_list),
                      colorsArray=json.dumps(color_list), colorMin=min(color_list), colorMax=max(color_list))
        return params, {}
Exemplo n.º 11
0
    def compare_derivatives(self, var_in, var_out, rel_error=False):

        model = self.model

        # Numeric
        Jn = model.calc_gradient(var_in, var_out, mode="fd",
                                 return_format='array')
        #print 'finite diff', Jn

        # Analytic forward
        Jf = model.calc_gradient(var_in, var_out, mode='fwd',
                                 return_format='array')

        #print 'forward', Jf

        if rel_error:
            diff = np.nan_to_num(abs(Jf - Jn) / Jn)
        else:
            diff = abs(Jf - Jn)

        assert_rel_error(self, diff.max(), 0.0, 1e-3)

        # Analytic adjoint
        Ja = model.calc_gradient(var_in, var_out, mode='rev',
                                 return_format='array')

        # print Ja

        if rel_error:
            diff = np.nan_to_num(abs(Ja - Jn) / Jn)
        else:
            diff = abs(Ja - Jn)

        assert_rel_error(self, diff.max(), 0.0, 1e-3)
Exemplo n.º 12
0
    def attractive(self):

        # Potential energy and force r < self.r_c
        r_low = np.arange(0, self.r_c, 0.02)
        with np.errstate(all='ignore'):
            v_att_low = np.zeros_like(r_low) - 1
            v_rep_low = np.zeros_like(r_low) + 4.0*((self.sigma/r_low) ** 12 - (self.sigma/r_low) ** 6 + (1.0 / 4.0))
            force_low = np.zeros_like(r_low) + 4.0*(12*(self.sigma**12)/(r_low**13) - (6*(self.sigma**6)/(r_low**7)))

        # Potential energy and force r_c <= r <= r_c + w_c
        r_mid = np.arange(r_low[len(r_low)-1] + 0.02, self.r_c + self.w_c, 0.02)
        v_att_mid = np.zeros_like(r_mid) - (np.cos(np.pi*(r_mid - self.r_c)/(2.0*self.w_c)))**2
        v_rep_mid = np.zeros_like(r_mid)
        force_mid = np.zeros_like(r_mid) - np.cos(np.pi*(r_mid - self.r_c)/(2.0*self.w_c))*np.sin(np.pi*(r_mid - self.r_c)/(2.0*self.w_c))*(np.pi/self.w_c)

        # For r > r_c + w_c
        r_hi = np.arange(r_mid[len(r_mid)-1] + 0.02, 4.02, 0.02)
        v_att_hi = np.zeros_like(r_hi)
        v_rep_hi = np.zeros_like(r_hi)
        force_hi = np.zeros_like(r_hi)

        # Concatenate for full attractive forces
        r = np.append(np.append(r_low, r_mid), r_hi)
        v_attractive = np.append(np.append((v_att_low + v_rep_low), (v_att_mid + v_rep_mid)), v_att_hi+v_rep_hi)
        force_attractive = np.append(np.append(force_low, force_mid), force_hi)

        force_attractive = np.nan_to_num(force_attractive)
        v_attractive = np.nan_to_num(v_attractive)
        r[0] = 1.0e-6
        return r, force_attractive, v_attractive
 def costf(self, train_data, train_targets):
     '''The traindata should contain the training inputs and
     train_targets the target vectors. Evaluates the cross entropy cost
     with the current set of data and parameters'''
     Y = self.Y(train_data)
     J = -sum([dot(t, ly) for t,ly in zip(train_targets, np.nan_to_num(np.log(np.nan_to_num(Y))))])
     return J
Exemplo n.º 14
0
def data_prepare():
    dataset1 = np.loadtxt('1.txt',dtype=float) 
    dataset1 = np.nan_to_num(normalize_cols(dataset1))
    label1 = np.ones((len(dataset1),1))            # generate label1
    dataset1 = np.concatenate((dataset1,label1),axis=1)  
    
    dataset2 = np.loadtxt('2.txt',dtype=float)    
    dataset2 = np.nan_to_num(normalize_cols(dataset2))
    label2 = 2*np.ones((len(dataset2),1))          # generate label2
    dataset2 = np.concatenate((dataset2,label2),axis=1)  
    
    dataset3 = np.loadtxt('3.txt',dtype=float)    
    dataset3 = np.nan_to_num(normalize_cols(dataset3))
    label3 = 3*np.ones((len(dataset3),1))          # generate label3
    dataset3 = np.concatenate((dataset3,label3),axis=1)      
        
    dataset4 = np.loadtxt('4.txt',dtype=float)    
    dataset4 = np.nan_to_num(normalize_cols(dataset4))
    label4 = 4*np.ones((len(dataset4),1))          # generate label4
    dataset4 = np.concatenate((dataset4,label4),axis=1)     

    dataset5 = np.loadtxt('5.txt',dtype=float)    
    dataset5 = np.nan_to_num(normalize_cols(dataset5))
    label5 = 5*np.ones((len(dataset5),1))          # generate label5
    dataset5 = np.concatenate((dataset5,label5),axis=1)     
    
    dataset = np.concatenate((dataset1,dataset2,dataset3,dataset4,dataset5,dataset5,dataset5),axis=0)
    random.shuffle(dataset)                       #random shuffle dataset
    return (dataset) 
Exemplo n.º 15
0
def test_mflist():
    ml = flopy.modflow.Modflow(model_ws=out_dir)
    dis = flopy.modflow.ModflowDis(ml, 10, 10, 10, 10)
    sp_data = {0: [[1, 1, 1, 1.0], [1, 1, 2, 2.0], [1, 1, 3, 3.0]],
               1: [1, 2, 4, 4.0]}
    wel = flopy.modflow.ModflowWel(ml, stress_period_data=sp_data)
    m4ds = ml.wel.stress_period_data.masked_4D_arrays

    sp_data = flopy.utils.MfList.masked4D_arrays_to_stress_period_data \
        (flopy.modflow.ModflowWel.get_default_dtype(), m4ds)
    assert np.array_equal(sp_data[0], ml.wel.stress_period_data[0])
    assert np.array_equal(sp_data[1], ml.wel.stress_period_data[1])
    # the last entry in sp_data (kper==9) should equal the last entry
    # with actual data in the well file (kper===1)
    assert np.array_equal(sp_data[9], ml.wel.stress_period_data[1])

    pth = os.path.join('..', 'examples', 'data', 'mf2005_test')
    ml = flopy.modflow.Modflow.load(os.path.join(pth, "swi2ex4sww.nam"),
                                    verbose=True)
    m4ds = ml.wel.stress_period_data.masked_4D_arrays

    sp_data = flopy.utils.MfList.masked4D_arrays_to_stress_period_data \
        (flopy.modflow.ModflowWel.get_default_dtype(), m4ds)

    # make a new wel file
    wel = flopy.modflow.ModflowWel(ml, stress_period_data=sp_data)
    flx1 = m4ds["flux"]
    flx2 = wel.stress_period_data.masked_4D_arrays["flux"]

    flx1 = np.nan_to_num(flx1)
    flx2 = np.nan_to_num(flx2)

    assert flx1.sum() == flx2.sum()
Exemplo n.º 16
0
  def transform_row(self, i, df, data_dir):
    """
    Normalizes the data (X, y, w, ...) in a single row).
    """
    row = df.iloc[i]

    if self.transform_X:
      X = load_from_disk(
          os.path.join(data_dir, row['X-transformed']))
      X = np.nan_to_num((X - self.X_means) / self.X_stds)
      save_to_disk(X, os.path.join(data_dir, row['X-transformed']))

    if self.transform_y:

      y = load_from_disk(os.path.join(data_dir, row['y-transformed']))

      # transform tasks as normal
      y = np.nan_to_num((y - self.y_means) / self.y_stds)

      # add 2nd order correction term to gradients
      grad_var = 1/self.y_stds[0]*(self.ydely_means-self.y_means[0]*self.y_means[1:])
      for i in range(y.shape[0]):
        y[i,1:] = y[i,1:] - grad_var*y[i,0]/self.y_stds[0]

      save_to_disk(y, os.path.join(data_dir, row['y-transformed']))
Exemplo n.º 17
0
def AvgQE(x, y, ye, bin, bintype=1, hardlimit=0, binmax=None):
    '''Average values of scatter plot'''
    def HelpMe(kk, BR, ii):
        if len(kk) > 0:
            return np.average(kk)
        elif len(kk) == 0:
            return (BR[ii-1] + BR[ii])/2.


    x = array(x)
    y = array(y)
    w = 1 / array(ye)**2.
    BinNo, BinsReturned = ReturnBins(x, bin, bintype=bintype, hardlimit=hardlimit, binmax=binmax)
    #print BinNo, BinsReturned
    #BinSize = np.max(BinNo)+1
    BinSize = len(BinsReturned)
    xavg = [HelpMe(x[BinNo == i], BinsReturned, i) for i in range(1, BinSize)]
    xstd = [np.std(x[BinNo == i]) for i in range(1, BinSize)]
    yavg = [np.average(y[BinNo == i], weights=w[BinNo == i]) for i in range(1, BinSize)]
    ystd = [np.sqrt(1/np.sum(w[BinNo == i])) for i in range(1, BinSize)]
    N = [y[BinNo == i].shape[0] for i in range(1, BinSize)]
    xavg = np.array(xavg)
    xstd = np.array(xstd)
    yavg = np.array(yavg)
    ystd = np.array(ystd)
    N = np.array(N)
    yavg = np.nan_to_num(yavg)
    ystd = np.nan_to_num(ystd)
    xstd = np.nan_to_num(xstd)
    N = np.nan_to_num(N)
    return xavg, xstd, yavg, ystd, N
Exemplo n.º 18
0
def list_of_tuples_rm_flat_avg_signal(ret_values,lot_list,average_signal,discr_coefficient):
	oper_list=list()
	if len(ret_values[0,:]) >= len(average_signal):
		v_length=len(average_signal)
	else:
		v_length=len(ret_values[0,:])
	for x in ret_values:
		oper_list.append(np.trapz(np.nan_to_num(np.abs(x[:v_length])),x=np.arange(v_length)))

	i=lot_list[0][1][0]
	lot_rm_index=list()
	lotcopy=list()
	count=0
	#rmcount=0
	#rescaling
	average_signal=average_signal[:v_length]
	#integrating average signal
	avg_integral=np.trapz(np.nan_to_num(np.abs(average_signal)),x=np.arange(v_length))

	for a in lot_list:
		j=a[1][1]
		if ( np.abs(avg_integral - oper_list[j])/oper_list[j] > discr_coefficient) :
			lot_rm_index.append(count)
			#rmcount+=1
			count+=1
			continue
		lotcopy.append(a) # czyli jesli zlapie na coefficent, to nie wpisze do nowej tabeli dixlist, a jesli nie zlapie to po prostu przepisze go (ten row ze zgadzajacym sie oper value)
		count+=1
	#print 'Removed a total of ', rmcount, ' signals'
	#print 'Reached count:', count
	return lotcopy
Exemplo n.º 19
0
 def transform_array(self, X, y, w):
   """Transform the data in a set of (X, y, w) arrays."""
   if self.transform_X:
     X = np.nan_to_num((X - self.X_means) / self.X_stds)
   if self.transform_y:
     y = np.nan_to_num((y - self.y_means) / self.y_stds)
   return (X, y, w)
def linear_regression(folder):
	statsfldr = folder + statsfldrext
	line_fit_log = [f for f in os.listdir(statsfldr) if re.search("linefit.txt", f)]
	if not len(line_fit_log):
		print "Calculating the linear regression\n"
		av, cols, rows = open_as_array(statsfldr + "/average" + ext)
		avg_array = av.ravel() # ravel converts the 2d array to a 1d array
		avg_array = array(avg_array)
		# Find the linear model for SNR as a function of AVERAGE, y = snr, x = avg
		snr, cols, rows = open_as_array(statsfldr + "/SNR" + ext)
		snr_array = snr.ravel()
		yy = numpy.nan_to_num(snr_array)
		print len(yy)
		xx = numpy.nan_to_num(avg_array)
		# y = numpy.array([a[:5] for a in yy])
		s = 1 # shorten the data to per thousand original
		length = len(yy)
		mini = (length - length * s/1000) / 2
		maxi = length - mini
		print mini
		print maxi
		y = yy[mini:maxi]
		print len(y)
		x = xx[mini:maxi]
		y[y > 4095] = 4095 # converts any obserdly high numbers to 4095
		x[x > 4095] = 4095 # converts any obserdly high numbers to 4095
		print 'this part takes time'
		popt, pcov = curve_fit(fit_func_line, x, y) # curve fit needs a function to call to return the fit
		write_to_log('\t' + str(datetime.datetime.now()) + '    Calculated the linear regression\n')
		with open(statsfldr + "/linefit.txt", "w") as text_file: # "a" is to append, "w" is to overwrite
			text_file.write(str(popt))
		plotting(x,y,popt)
	return
Exemplo n.º 21
0
def costFunctionReg(theta, X, y, lmbda):
    # Initialize some useful values
    m = y.shape[0]  # number of training examples

    # You need to return the following variables correctly
    J = 0
    grad = np.zeros(theta.shape)

    # ====================== YOUR CODE HERE ======================

    def h(X, theta):
        return X.dot(theta)

    J = np.float(-y.T * np.nan_to_num(np.log(sigmoid(h(X, theta))).T) -
                 (1 - y).T * np.nan_to_num(np.log(1 - sigmoid(h(X, theta))).T)) / m
    reg_cost = theta.copy()
    reg_cost[0] = 0
    J += (lmbda * reg_cost.T.dot(reg_cost)) / (2 * m)

    grad = (sigmoid(h(X, theta)) - y.T).dot(X) / m
    reg_grad = theta * (float(lmbda) / m)
    reg_grad[0] = 0
    grad = grad.A1 + reg_grad

    # =============================================================

    return (J, grad)
Exemplo n.º 22
0
    def test_unity_3x3_withnan(self, boundary):
        '''
        Test that a 3x3 unit kernel returns the same array (except when
        boundary is None). This version includes a NaN value in the original
        array.
        '''

        x = np.array([[1., 2., 3.],
                      [4., np.nan, 6.],
                      [7., 8., 9.]], dtype='>f8')

        y = np.array([[0., 0., 0.],
                      [0., 1., 0.],
                      [0., 0., 0.]], dtype='>f8')

        z = convolve(x, y, boundary=boundary, nan_treatment='fill',
                     preserve_nan=True)

        assert np.isnan(z[1, 1])
        x = np.nan_to_num(z)
        z = np.nan_to_num(z)

        if boundary is None:
            assert np.all(z == np.array([[0., 0., 0.],
                                         [0., 0., 0.],
                                         [0., 0., 0.]], dtype='>f8'))
        else:
            assert np.all(z == x)
Exemplo n.º 23
0
  def write_data_array(self,output_file_name,times):
    # find distance to next nearest time                         
    f = h5py.File(output_file_name,'w')
    names = self.keys()
    lon = [self[n].meta['longitude'] for n in names]
    lat = [self[n].meta['latitude'] for n in names]
    positions = np.array([lon,lat]).transpose()
    f['position'] = positions
    f['name'] = names
    f['time'] = times
    f.create_dataset('mean',shape=(len(times),len(names),3),dtype=float)
    f.create_dataset('mask',shape=(len(times),len(names)),dtype=bool)
    f.create_dataset('covariance',shape=(len(times),len(names),3,3),dtype=float)
    f.create_dataset('variance',shape=(len(times),len(names),3),dtype=float)
    f.create_dataset('sigma',shape=(len(times),len(names),3),dtype=float)
    for i,n in enumerate(names):
      logger.info('writing displacement data for station %s' % n)
      mean,sigma = self[n](times)
      f['mean'][:,i,:] = mean
      f['mask'][:,i] = np.any(np.isinf(sigma),axis=1)
      f['covariance'][:,i,:,:] = np.array([np.diag(v) for v in np.nan_to_num(sigma**2)])
      f['variance'][:,i,:] = np.nan_to_num(sigma**2)
      f['sigma'][:,i,:] = np.nan_to_num(sigma)

    f.close()
Exemplo n.º 24
0
def decayCoefObjectiveFn(x, Y, EX2): 
	"""
	Computes the objective function for terms involving lambda in the M-step. 
	Checked. 
	Input: 
	x: value of lambda
	Y: the matrix of observed values
	EX2: the matrix of values of EX2 estimated in the E-step. 
	Returns: 
	obj: value of objective function
	grad: gradient
	"""
	with warnings.catch_warnings():
		warnings.simplefilter("ignore")
		y_squared = Y ** 2
		Y_is_zero = np.abs(Y) < 1e-6
		exp_Y_squared = np.exp(-x * y_squared)
		log_exp_Y = np.nan_to_num(np.log(1 - exp_Y_squared))
		exp_ratio = np.nan_to_num(exp_Y_squared / (1 - exp_Y_squared))
		obj = sum(sum(Y_is_zero * (-EX2*x) + (1 - Y_is_zero) * log_exp_Y))
		grad = sum(sum(Y_is_zero * (-EX2) + (1 - Y_is_zero) * y_squared * exp_ratio))
		if type(obj) is np.float64:
			obj = -np.array([obj])
		if type(grad) is np.float64:
			grad = -np.array([grad])
		return obj, grad
Exemplo n.º 25
0
def compare_csv(csv1, csv2, column_headers=True, eps=1e-3):
    """
    """
    column_types = DTYPE_MAP[os.path.basename(csv1)]
    da1 = read_csv(csv1, column_headers, column_types)
    da2 = read_csv(csv2, column_headers, column_types)
    # compare cloumn names
    ret = da1.dtype.names == da2.dtype.names
    # compare all string columns
    fields = [field for field in da1.dtype.fields
              if da1.dtype[field].kind == 'S']
    if fields:
        ret = ret and np.array_equal(da1[fields], da2[fields])
    # compare all integer fields
    fields = [field for field in da1.dtype.fields
              if da1.dtype[field].kind == 'i']
    if fields:
        ret = ret and np.array_equal(da1[fields], da2[fields])
    # compare all float type columns (with epsilon)
    fields = [field for field in da1.dtype.fields
              if da1.dtype[field].kind == 'f']
    # make copy of float view, so that we can safely replace nan's
    if fields:
        fa1 = np.nan_to_num(da1[fields].view((float, len(fields))))
        fa2 = np.nan_to_num(da2[fields].view((float, len(fields))))
        ret = ret and np.allclose(fa1, fa2, rtol=0, atol=eps)
    return ret
Exemplo n.º 26
0
def compute_homogeneous_statistics(unit_statistic, unit_statistic_permutation, p_value_threshold, homogeneous_statistic='normalized MMD2u', verbose=True):
    """Compute p_values from permutations and create homogeneous statistics.
    """
    # Compute p-values for each unit    
    print("Homogeneous statistic: %s" % homogeneous_statistic)
    print("Computing MMD2u thresholds for each unit with p-value=%f" % p_value_threshold)
    mmd2us_threshold = compute_statistic_threshold(unit_statistic_permutation, p_value_threshold)
    print("Computing actual p-values at each unit on the original (unpermuted) data")
    p_value = compute_pvalues_from_permutations(unit_statistic, unit_statistic_permutation)
    print("Computing the p-value of each permutation of each unit.")
    p_value_permutation = compute_pvalues_of_permutations(unit_statistic_permutation)

    # Here we try to massage the unit statistic so that it becomes homogeneous across different units, to compute the cluster statistic later on
    if homogeneous_statistic == '1-p_value': # Here we use (1-p_value) instead of the MMD2u statistic : this is perfectly homogeneous across units because the p_value is uniformly distributed, by definition
        unit_statistic_permutation_homogeneous = 1.0 - p_value_permutation
        unit_statistic_homogeneous = 1.0 - p_value
    elif homogeneous_statistic == 'normalized MMD2u': # Here we use a z-score of MMD2u, which is good if its distribution normal or approximately normal
        mmd2us_mean = unit_statistic_permutation.mean(1)
        mmd2us_std = unit_statistic_permutation.std(1)
        unit_statistic_permutation_homogeneous = np.nan_to_num((unit_statistic_permutation - mmd2us_mean[:,None]) / mmd2us_std[:,None])
        unit_statistic_homogeneous = np.nan_to_num((unit_statistic - mmd2us_mean) / mmd2us_std)
    elif homogeneous_statistic == 'unit_statistic': # Here we use the unit statistic assuming that it is homogeneous across units (this is not much true)
        unit_statistic_permutation_homogeneous = unit_statistic_permutation
        unit_statistic_homogeneous = unit_statistic
    elif homogeneous_statistic == 'p_value': # Here we use p_value instead of the MMD2u statistic : this is perfectly homogeneous across units because the p_value is uniformly distributed, by definition
        unit_statistic_permutation_homogeneous = p_value_permutation
        unit_statistic_homogeneous = p_value
    else:
        raise Exception

    return p_value, p_value_permutation, unit_statistic_homogeneous, unit_statistic_permutation_homogeneous
Exemplo n.º 27
0
    def setup_measureCrosstalk(self, isTrimmed=False, nSources=8):
        """Generate a simulated set of exposures and test the measured
        crosstalk matrix.

        Parameters
        ----------
        isTrimmed : `bool`, optional
            Should the simulation use trimmed or untrimmed raw
            exposures?
        nSources : `int`, optional
            Number of random simulated sources to generate in the
            simulated exposures.

        Returns
        -------
        coeffErr : `np.ndarray`
            Array of booleans indicating if the measured and expected
            crosstalk ratios are smaller than the measured uncertainty
            in the crosstalk ratio.
        """
        config = isrMock.IsrMockConfig()
        config.rngSeed = 12345
        config.doAddCrosstalk = True
        config.doAddSky = True
        config.doAddSource = True
        config.skyLevel = 0.0
        config.readNoise = 0.0
        mcConfig = MeasureCrosstalkConfig()
        mcConfig.threshold = 4000
        mct = MeasureCrosstalkTask(config=mcConfig)
        fullResult = []

        config.isTrimmed = isTrimmed

        # Generate simulated set of exposures.
        for idx in range(0, 10):
            config.rngSeed = 12345 + idx * 1000

            # Allow each simulated exposure to have nSources random
            # bright sources.
            config.sourceAmp = (np.random.randint(8, size=nSources)).tolist()
            config.sourceFlux = ((np.random.random(size=nSources) * 25000.0 + 20000.0).tolist())
            config.sourceX = ((np.random.random(size=nSources) * 100.0).tolist())
            config.sourceY = ((np.random.random(size=nSources) * 50.0).tolist())

            exposure = isrMock.CalibratedRawMock(config=config).run()
            result = mct.run(exposure, dataId=None)
            fullResult.append(result)

        # Generate the final measured CT ratios, uncertainties, pixel counts.
        coeff, coeffSig, coeffNum = mct.reduce(fullResult)

        # Needed because measureCrosstalk cannot find coefficients equal to 0.0
        coeff = np.nan_to_num(coeff)
        coeffSig = np.nan_to_num(coeffSig)

        # Compare result against expectation used to create the simulation.
        expectation = isrMock.CrosstalkCoeffMock().run()
        coeffErr = abs(coeff - expectation) <= coeffSig
        return coeffErr
Exemplo n.º 28
0
    def test_unity_3_withnan(self, boundary, nan_treatment,
                             normalize_kernel, preserve_nan):
        '''
        Test that a unit kernel with three elements returns the same array
        (except when boundary is None). This version includes a NaN value in
        the original array.
        '''

        x = np.array([1., np.nan, 3.], dtype='>f8')

        y = np.array([0., 1., 0.], dtype='>f8')

        z = convolve(x, y, boundary=boundary, nan_treatment=nan_treatment,
                     normalize_kernel=normalize_kernel,
                     preserve_nan=preserve_nan)

        if preserve_nan:
            assert np.isnan(z[1])

        x = np.nan_to_num(z)
        z = np.nan_to_num(z)

        if boundary is None:
            assert np.all(z == np.array([0., 0., 0.], dtype='>f8'))
        else:
            assert np.all(z == x)
Exemplo n.º 29
0
def forward_procedure(A, B, PI, O, wx, pubmsg):
    T = len(O)
    N = len(B)
    alpha = numpy.zeros((N,  T))
    C = numpy.zeros(T)

    alpha[:,0] = PI * [B[i](O[0]) for i in range(N)]

    C[0] = 1.0/numpy.sum(alpha[:,0])
    alpha[:,0] = C[0] * alpha[:,0]


    ITERATIONS = T*4
    count = 2*T
    for t in xrange(1, T):
        #B[i](O[:,t])  =>  numpy.prod(B[i](O[:,t]))
        #b_o = numpy.array([numpy.prod(B[i](O[:,t])) for i in range(N)])
        b_o = [B[i](O[t]) for i in range(N)]

        alpha[:,t] = numpy.dot(alpha[:,t-1], A) * b_o

        C[t] = numpy.nan_to_num(1.0/numpy.sum(alpha[:,t]))
        alpha[:,t] = numpy.nan_to_num(alpha[:,t] * C[t])

        if numpy.sum(alpha[:,t]) == 0:
            alpha[:,t] = 0.0000000000001

        if wx: wx.CallAfter(pubmsg, "hmm", msg="Running HMM Method... %2.0f%%" % (100.0*(count-1)/(ITERATIONS)))
        count+=1
        #print t, O[:,t], alpha[:,t]

    log_Prob_Obs = - (numpy.sum(numpy.log(C)))
    return(( log_Prob_Obs, alpha, C ))
Exemplo n.º 30
0
    def test_unity_3x3x3_withnan(self, boundary, nan_treatment):
        '''
        Test that a 3x3x3 unit kernel returns the same array (except when
        boundary is None). This version includes a NaN value in the original
        array.
        '''

        x = np.array([[[1., 2., 1.], [2., 3., 1.], [3., 2., 5.]],
                      [[4., 3., 1.], [5., np.nan, 2.], [6., 1., 1.]],
                      [[7., 0., 2.], [8., 2., 3.], [9., 2., 2.]]], dtype='>f8')

        y = np.zeros((3, 3, 3), dtype='>f8')
        y[1, 1, 1] = 1.

        z = convolve(x, y, boundary=boundary, nan_treatment=nan_treatment,
                     preserve_nan=True)

        assert np.isnan(z[1, 1, 1])
        x = np.nan_to_num(z)
        z = np.nan_to_num(z)

        if boundary is None:
            assert np.all(z == np.array([[[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]],
                                         [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]],
                                         [[0., 0., 0.], [0., 0., 0.], [0., 0., 0.]]], dtype='>f8'))
        else:
            assert np.all(z == x)
Exemplo n.º 31
0
# preprocess data
feature_unscaled = feature.values.reshape(feature.shape[0],-1)
#feature2_unscaled = feature2.values.reshape(feature.shape[0],-1)

#feature_unscaled = np.concatenate((feature_unscaled, feature2_unscaled), axis=1)
label_unscaled = label.values.reshape(label.shape[0],-1)


scaler_f = StandardScaler()
Xorg = scaler_f.fit_transform(feature_unscaled)

scaler_l = StandardScaler()
yorg = scaler_l.fit_transform(label_unscaled)

Xall = np.nan_to_num(Xorg)
yall = np.nan_to_num(yorg)

# shift
shift = 3
for lead in [3, 6, 9, 12, 15, 0]:
    print_header(f'Lead time: {lead} month')

    y = yall[lead+shift:]
    X = Xall[:-lead-shift]
    timey = oni.index[lead+shift:]

    for decade in [60, 70, 80, 90, 100, 110]:
        print_header(f'Test period: {1902+decade}-01-01 till {1911+decade}-12-01')
        K.clear_session()
Exemplo n.º 32
0
def automatic_community_detector(G, 
                                 method = 'un_louvain',
                                 scale = 'log',
                                 figsize = (15,15),
                                 dpi = 100.,
                                 ylabel = 'Presynaptic Neuron ID',
                                 xlabel = 'Postsynaptic Neuron ID',
                                 xticklabels = None,
                                 yticklabels = None,
                                 title = 'Community-Ordered Connectivity Matrix (Log Scale)',
                                 vmax = None,
                                 export_format = None,
                                 figname = 'example_automatic_community'):
    """ Automatically detects communities for a large networkx graph.
    
    # Arguments:
        G (network.Graph): A networkx graph or subclass object, including flybrainlab.graph.NeuronGraph.
        method (str): Method to use. One of un_louvain, label_propagation, leiden, louvain, walktrap or infomap.
        scale (str): 'linear', 'log' or 'scaledlog'. Use linear or log scale to cluster. 'scaledlog' uses 50th percentile of nonzero entries as vmax.
        figsize (tuple): size of the figure.
        dpi (float): dpi of the figure.
        xlabel (str): Name of the x label.
        ylabel (str): Name of the y label.
        xticklabels (list): x tick labels to have.
        yticklabels (list): y tick labels to have.
        title (str): Title for the diagram.
        vmax (float): Maximum value for the diagram.
        export_format (str): if specified, file format to export the diagram.
        figname (str): Name for the diagram.
    
    # Returns:
        np.ndarray: community-ordered connectivity matrix in linear scale
        list: a list of list of node ids for each group member
    """
    Gun, all_list_nodes, all_nodes  = community_detection(G, method = method)
    
    all_pre_nodes = [i for i in all_list_nodes]
    all_post_nodes = [i for i in all_list_nodes]
    B = nx.adjacency_matrix(G).todense()[np.ix_(all_pre_nodes,all_post_nodes)].copy()

    if xticklabels is None:
        if isinstance(G, NeuronGraph):
            xticklabels = sum(nodes_to_unames(G, all_nodes),[])
        else:
            xticklabels = sum(all_nodes, [])
    if yticklabels is None:
        if isinstance(G, NeuronGraph):
            yticklabels = sum(nodes_to_unames(G, all_nodes),[])
        else:
            yticklabels = sum(all_nodes, [])
    if scale == 'log':
        Bd = np.log10(1.+B)
        # Bd[Bd>np.percentile(Bd, 90) = np.percentile(Bd, 90)
        # Bd[Bd<np.percentile(Bd, 10) = np.percentile(Bd, 10)
    elif scale == 'scaledlog':
        print('Min B:', np.min(B))
        Bd = np.log10(1.+B)
        # Bd[Bd>np.percentile(Bd, 90)] = np.percentile(Bd, 90)
    else:
        Bd = B
        if title == 'Community-Ordered Connectivity Matrix (Log Scale)':
            title = 'Community-Ordered Connectivity Matrix'

    if scale == 'scaledlog':
        Bd_s = np.array(Bd)
        Bd_s = Bd_s[Bd_s>0.]
        vmax = np.percentile(Bd_s, 50)
        Bd = np.nan_to_num(Bd)
        print('Scaled vmax:', vmax)
    sizes = np.cumsum([len(i) for i in all_nodes])
    gen_heatmap(Bd, figsize = figsize, dpi = dpi, xlabel = xlabel, ylabel = ylabel,
                xticklabels = xticklabels, yticklabels = yticklabels,
                hlines = sizes, vlines = sizes, title = title, vmax = vmax, vmin=0.,
                export_format = export_format, figname = figname)
    return B, all_nodes
Exemplo n.º 33
0
def highly_variable_genes(adata,
                          min_disp=None,
                          max_disp=None,
                          min_mean=None,
                          max_mean=None,
                          n_top_genes=None,
                          n_bins=20,
                          flavor='seurat',
                          binning_method='equal_width',
                          subset=False,
                          inplace=True):
    """Annotate highly variable genes [Satija15]_ [Zheng17]_.

    Expects logarithmized data.

    Depending on `flavor`, this reproduces the R-implementations of Seurat
    [Satija15]_ and Cell Ranger [Zheng17]_.

    The normalized dispersion is obtained by scaling with the mean and standard
    deviation of the dispersions for genes falling into a given bin for mean
    expression of genes. This means that for each bin of mean expression, highly
    variable genes are selected.

    Parameters
    ----------
    adata : :class:`~anndata.AnnData`
        The annotated data matrix of shape `n_obs` × `n_vars`. Rows correspond
        to cells and columns to genes.
    min_mean : `float`, optional (default: 0.0125)
        If `n_top_genes` unequals `None`, this and all other cutoffs for the means and the
        normalized dispersions are ignored.
    max_mean : `float`, optional (default: 3)
        If `n_top_genes` unequals `None`, this and all other cutoffs for the means and the
        normalized dispersions are ignored.
    min_disp : `float`, optional (default: 0.5)
        If `n_top_genes` unequals `None`, this and all other cutoffs for the means and the
        normalized dispersions are ignored.
    max_disp : `float`, optional (default: `None`)
        If `n_top_genes` unequals `None`, this and all other cutoffs for the means and the
        normalized dispersions are ignored.
    n_top_genes : `int` or `None`, optional (default: `None`)
        Number of highly-variable genes to keep.
    n_bins : `int`, optional (default: 20)
        Number of bins for binning the mean gene expression. Normalization is
        done with respect to each bin. If just a single gene falls into a bin,
        the normalized dispersion is artificially set to 1. You'll be informed
        about this if you set `settings.verbosity = 4`.
    flavor : `{'seurat', 'cell_ranger'}`, optional (default: 'seurat')
        Choose the flavor for computing normalized dispersion. In their default
        workflows, Seurat passes the cutoffs whereas Cell Ranger passes
        `n_top_genes`.
    binning_method : `{'equal_width', 'equal_frequency'}`, optional (default: 'equal_width')
        Choose the binning method for the means. In `equal_width`, each bin covers the same width.
        For `equal_frequency`, each bin has an equal number of genes. 
    subset : `bool`, optional (default: `False`)
        Inplace subset to highly-variable genes if `True` otherwise merely indicate
        highly variable genes.
    inplace : `bool`, optional (default: `True`)
        Whether to place calculated metrics in `.var` or return them.

    Returns
    -------
    :class:`~numpy.recarray`, `None`
        Depending on `inplace` returns calculated metrics (:class:`~numpy.recarray`) or
        updates `.var` with the following fields

        * `highly_variable` - boolean indicator of highly-variable genes
        * `means` - means per gene
        * `dispersions` - dispersions per gene
        * `dispersions_norm` - normalized dispersions per gene

    Notes
    -----
    This function replaces :func:`~scanpy.pp.filter_genes_dispersion`.
    """
    logg.msg('extracting highly variable genes', r=True, v=4)

    if not isinstance(adata, AnnData):
        raise ValueError(
            '`pp.highly_variable_genes` expects an `AnnData` argument, '
            'pass `inplace=False` if you want to return a `np.recarray`.')

    if n_top_genes is not None and not all([
            min_disp is None, max_disp is None, min_mean is None,
            max_mean is None
    ]):
        logg.info('If you pass `n_top_genes`, all cutoffs are ignored.')
    if min_disp is None: min_disp = 0.5
    if min_mean is None: min_mean = 0.0125
    if max_mean is None: max_mean = 3

    X = np.expm1(adata.X) if flavor == 'seurat' else adata.X

    mean, var = materialize_as_ndarray(_get_mean_var(X))
    # now actually compute the dispersion
    mean[mean == 0] = 1e-12  # set entries equal to zero to small value
    dispersion = var / mean
    if flavor == 'seurat':  # logarithmized mean as in Seurat
        dispersion[dispersion == 0] = np.nan
        dispersion = np.log(dispersion)
        mean = np.log1p(mean)
    # all of the following quantities are "per-gene" here
    df = pd.DataFrame()
    df['mean'] = mean
    df['dispersion'] = dispersion
    if flavor == 'seurat':
        if binning_method == 'equal_width':
            df['mean_bin'] = pd.cut(df['mean'], bins=n_bins)
        elif binning_method == 'equal_frequency':
            df['mean_bin'] = pd.qcut(df['mean'], q=n_bins, duplicates='drop')
        else:
            raise ValueError(
                '`binning_method` needs to be "equal_width" or "equal_frequency"'
            )
        disp_grouped = df.groupby('mean_bin')['dispersion']
        disp_mean_bin = disp_grouped.mean()
        disp_std_bin = disp_grouped.std(ddof=1)
        # retrieve those genes that have nan std, these are the ones where
        # only a single gene fell in the bin and implicitly set them to have
        # a normalized disperion of 1
        one_gene_per_bin = disp_std_bin.isnull()
        gen_indices = np.where(
            one_gene_per_bin[df['mean_bin'].values])[0].tolist()
        if len(gen_indices) > 0:
            logg.msg(
                'Gene indices {} fell into a single bin: their '
                'normalized dispersion was set to 1.\n    '
                'Decreasing `n_bins` will likely avoid this effect.'.format(
                    gen_indices),
                v=4)
        # Circumvent pandas 0.23 bug. Both sides of the assignment have dtype==float32,
        # but there’s still a dtype error without “.value”.
        disp_std_bin[one_gene_per_bin.values] = disp_mean_bin[
            one_gene_per_bin.values].values
        disp_mean_bin[one_gene_per_bin.values] = 0
        # actually do the normalization
        df['dispersion_norm'] = ((
            df['dispersion'].values  # use values here as index differs
            - disp_mean_bin[df['mean_bin'].values].values) /
                                 disp_std_bin[df['mean_bin'].values].values)
    elif flavor == 'cell_ranger':
        from statsmodels import robust
        df['mean_bin'] = pd.cut(
            df['mean'],
            np.r_[-np.inf,
                  np.percentile(df['mean'], np.linspace(10, 100, n_bins - 1)),
                  np.inf])
        disp_grouped = df.groupby('mean_bin')['dispersion']
        disp_median_bin = disp_grouped.median()
        # the next line raises the warning: "Mean of empty slice"
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            disp_mad_bin = disp_grouped.apply(robust.mad)
        df['dispersion_norm'] = (
            np.abs(df['dispersion'].values -
                   disp_median_bin[df['mean_bin'].values].values) /
            disp_mad_bin[df['mean_bin'].values].values)
    else:
        raise ValueError('`flavor` needs to be "seurat" or "cell_ranger"')
    dispersion_norm = df['dispersion_norm'].values.astype('float32')
    if n_top_genes is not None:
        dispersion_norm = dispersion_norm[~np.isnan(dispersion_norm)]
        dispersion_norm[::-1].sort(
        )  # interestingly, np.argpartition is slightly slower
        disp_cut_off = dispersion_norm[n_top_genes - 1]
        gene_subset = np.nan_to_num(
            df['dispersion_norm'].values) >= disp_cut_off
        logg.msg(
            'the {} top genes correspond to a normalized dispersion cutoff of'.
            format(n_top_genes, disp_cut_off),
            v=5,
        )
    else:
        max_disp = np.inf if max_disp is None else max_disp
        dispersion_norm[np.isnan(dispersion_norm)] = 0  # similar to Seurat
        gene_subset = np.logical_and.reduce((
            mean > min_mean,
            mean < max_mean,
            dispersion_norm > min_disp,
            dispersion_norm < max_disp,
        ))

    logg.msg('    finished', time=True, v=4)

    if inplace or subset:
        logg.hint('added\n'
                  '    \'highly_variable\', boolean vector (adata.var)\n'
                  '    \'means\', float vector (adata.var)\n'
                  '    \'dispersions\', float vector (adata.var)\n'
                  '    \'dispersions_norm\', float vector (adata.var)')
        adata.var['highly_variable'] = gene_subset
        adata.var['means'] = df['mean'].values
        adata.var['dispersions'] = df['dispersion'].values
        adata.var['dispersions_norm'] = df['dispersion_norm'].values.astype(
            'float32', copy=False)
        if subset:
            adata._inplace_subset_var(gene_subset)
    else:
        arrays = (gene_subset, df['mean'].values, df['dispersion'].values,
                  df['dispersion_norm'].values.astype('float32', copy=False))
        dtypes = [
            ('highly_variable', np.bool_),
            ('means', 'float32'),
            ('dispersions', 'float32'),
            ('dispersions_norm', 'float32'),
        ]
        return np.rec.fromarrays(arrays, dtype=dtypes)
Exemplo n.º 34
0
for ii in hypR:
    # compute policies corresponding to each R in hypR
    # P is assumed to be known
    Q = VI(P,ii[:,np.newaxis],ns,na,l)
    hypQ[:,:,ic]=Q2OptAct(Q)
    ic+=1

D = []
# intial likelihood of each hyp
likhyp = np.ones((ns,))/ns

# active phase
for ii in range(0,ns):    
    #weight votes
    p = np.sum((hypQ*likhyp),axis=2)
    e = np.nan_to_num(-p*np.log(p),0)
    we = np.sum(e,axis=1)    
    #print(np.nonzero(we == np.max(we))[0])
    ss = random.choice(np.nonzero(we == np.max(we))[0])
    a = polO[ss]
    # add element (ss,a) to the demonstration
    D.append((ss,a))
    likhyp *= np.exp(5*hypQ[ss,a,:]-1)
    likhyp = likhyp/np.sum(likhyp)
    #print(ss,a,likhyp)
    # if the likelihood of a given hypothesis is bigger than 50% than
    # it is probably it is the best one
    if np.max(likhyp>0.5):
        print("Final Dataset",D)
        break
Exemplo n.º 35
0
def get_mean(X_raw):
    # use masked array to ignore -1
    means = np.apply_along_axis(
        lambda x: [np.mean(np.ma.array(x, mask=(x == -1)))], 1,
        np.array(X_raw))
    return np.nan_to_num(means, nan=0.0)
Exemplo n.º 36
0
arr_sl[arr_sl < int(0)] = np.nan  # here negeative values as nas
arr_dem[arr_dem == int(65536)] = np.nan  # here 65535 as nas
arr_thp[arr_thp == int(65535)] = np.nan

print("mean slope", round(np.nanmean(arr_sl), 2))
print("max slope", round(np.nanmax(arr_sl), 2))
print("min slope", round(np.nanmin(arr_sl), 2))
print("mean dem", round(np.nanmean(arr_dem), 2))
print("max dem", round(np.nanmax(arr_dem), 2))
print("min dem", round(np.nanmin(arr_dem), 2))

# task 2
# binary raster where elevatoin < 1000m and slope < 30deg
# slope < 30 deg

arr_sl_bin = np.nan_to_num(arr_sl)
arr_sl_bin[arr_sl_bin < float(30.00)] = 1
arr_sl_bin[arr_sl_bin >= float(30.00)] = 0

np.unique(arr_sl_bin)

# elevation < 1000m
arr_dem_bin = np.nan_to_num(arr_dem)
arr_dem_bin[arr_dem_bin < float(1000.00)] = 1
arr_dem_bin[arr_dem_bin >= float(1000.00)] = 0

np.unique(arr_dem_bin)

# combine the two arrays
arr_comb = arr_dem_bin + arr_sl_bin
Exemplo n.º 37
0
    def step(self, dataloader, mode):
        print('Start {}'.format(mode))
        # self.model = self.prev_model
        if mode == 'train':
            self.model.train()
        elif mode == 'val' or mode == 'test':
            self.model.eval()

        loss_sum = 0
        confidence_loss_sum = 0
        depth_loss_sum = 0
        rotation_loss_sum = 0
        rotation_loss_count = 0

        for index, (hp_data, depth_image, camera_info_path, hp_data_gt,
                    annotation_data) in tqdm.tqdm(enumerate(dataloader),
                                                  total=len(dataloader),
                                                  desc='{} epoch={}'.format(
                                                      mode, self.epo),
                                                  leave=False):

            # if index == 0:
            #     self.model = self.prev_model

            self.cameramodel\
                = cameramodels.PinholeCameraModel.from_yaml_file(
                    camera_info_path[0])
            self.cameramodel.target_size = self.target_size

            depth_image = hp_data.numpy().copy()[0, 0, ...]
            depth_image = np.nan_to_num(depth_image)
            depth_image = unnormalize_depth(depth_image, self.depth_range[0],
                                            self.depth_range[1])
            hp_data = hp_data.to(self.device)

            depth_image_bgr = colorize_depth(depth_image,
                                             ignore_value=self.depth_range[0])

            if mode == 'train':
                confidence, depth, rotation = self.model(hp_data)
            elif mode == 'val' or mode == 'test':
                with torch.no_grad():
                    confidence, depth, rotation = self.model(hp_data)

            confidence_np = confidence[0, ...].cpu().detach().numpy().copy()
            confidence_np[confidence_np >= 1] = 1.
            confidence_np[confidence_np <= 0] = 0.
            confidence_vis = cv2.cvtColor(confidence_np[0, ...] * 255,
                                          cv2.COLOR_GRAY2BGR)

            if mode != 'test':
                pos_weight = hp_data_gt.detach().numpy().copy()
                pos_weight = pos_weight[:, 0, ...]
                zeroidx = np.where(pos_weight < 0.5)
                nonzeroidx = np.where(pos_weight >= 0.5)
                pos_weight[zeroidx] = 0.5
                pos_weight[nonzeroidx] = 1.0
                pos_weight = torch.from_numpy(pos_weight)
                pos_weight = pos_weight.to(self.device)

                hp_data_gt = hp_data_gt.to(self.device)
                confidence_gt = hp_data_gt[:, 0:1, ...]
                rois_list_gt, rois_center_list_gt = find_rois(confidence_gt)

                criterion = HPNETLoss(self.use_coords).to(self.device)

                if self.model.rois_list is None or rois_list_gt is None:
                    return None, None

                annotated_rois = annotate_rois(self.model.rois_list,
                                               rois_list_gt, annotation_data)

                confidence_loss, depth_loss, rotation_loss = criterion(
                    confidence, hp_data_gt, pos_weight, depth, rotation,
                    annotated_rois)

                if self.train_depth:
                    loss = confidence_loss + rotation_loss + depth_loss
                else:
                    loss = confidence_loss + rotation_loss

                if torch.isnan(loss):
                    print('loss is nan!!')
                    self.model = self.prev_model
                    self.optimizer = torch.optim.Adam(self.model.parameters(),
                                                      lr=self.lr,
                                                      betas=(0.9, 0.999),
                                                      eps=1e-10,
                                                      weight_decay=0,
                                                      amsgrad=False)
                    self.optimizer.load_state_dict(
                        self.prev_optimizer.state_dict())
                    continue
                else:
                    self.prev_model = copy.deepcopy(self.model)
                    self.prev_optimizer = copy.deepcopy(self.optimizer)

                if mode == 'train':
                    self.optimizer.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5)
                    self.optimizer.step()

                axis_gt = depth_image_bgr.copy()

                confidence_gt_vis = cv2.cvtColor(
                    confidence_gt[0, 0, ...].cpu().detach().numpy().copy() *
                    255, cv2.COLOR_GRAY2BGR)

                # Visualize gt axis and roi
                for roi, roi_c in zip(rois_list_gt[0], rois_center_list_gt[0]):
                    if roi.tolist() == [0, 0, 0, 0]:
                        continue
                    roi = roi.cpu().detach().numpy().copy()
                    cx = roi_c[0]
                    cy = roi_c[1]

                    depth_and_rotation_gt = get_value_gt([cx, cy],
                                                         annotation_data[0])
                    rotation_gt = depth_and_rotation_gt[1:]
                    depth_gt_val = depth_and_rotation_gt[0]
                    unnormalized_depth_gt_val = unnormalize_depth(
                        depth_gt_val, self.depth_range[0], self.depth_range[1])

                    hanging_point_pose = np.array(
                        self.cameramodel.project_pixel_to_3d_ray(
                            [int(cx), int(cy)])) \
                        * unnormalized_depth_gt_val * 0.001

                    if self.use_coords:
                        rot = quaternion2matrix(rotation_gt),

                    else:
                        v = np.matmul(quaternion2matrix(rotation_gt),
                                      [1, 0, 0])
                        rot = rotation_matrix_from_axis(v, [0, 1, 0], 'xy')
                    try:
                        draw_axis(axis_gt, rot, hanging_point_pose,
                                  self.cameramodel.K)
                    except Exception:
                        print('Fail to draw axis')

                    confidence_gt_vis = draw_roi(confidence_gt_vis,
                                                 roi,
                                                 val=depth_gt_val,
                                                 gt=True)
                    axis_gt = draw_roi(axis_gt, roi, val=depth_gt_val, gt=True)

            # Visualize pred axis and roi
            axis_pred = depth_image_bgr.copy()

            for i, (roi, roi_c) in enumerate(
                    zip(self.model.rois_list[0],
                        self.model.rois_center_list[0])):

                if roi.tolist() == [0, 0, 0, 0]:
                    continue
                roi = roi.cpu().detach().numpy().copy()
                cx = roi_c[0]
                cy = roi_c[1]

                dep = depth[i].cpu().detach().numpy().copy()
                normalized_dep_pred = float(dep)
                dep = unnormalize_depth(dep, self.depth_range[0],
                                        self.depth_range[1])

                confidence_vis = draw_roi(confidence_vis,
                                          roi,
                                          val=normalized_dep_pred)
                axis_pred = draw_roi(axis_pred, roi, val=normalized_dep_pred)

                if mode != 'test':
                    if annotated_rois[i][2]:
                        confidence_vis = draw_roi(confidence_vis,
                                                  annotated_rois[i][0],
                                                  val=annotated_rois[i][1][0],
                                                  gt=True)
                        axis_pred = draw_roi(axis_pred,
                                             annotated_rois[i][0],
                                             val=annotated_rois[i][1][0],
                                             gt=True)

                hanging_point_pose = np.array(
                    self.cameramodel.project_pixel_to_3d_ray(
                        [int(cx), int(cy)])) * float(dep * 0.001)

                if self.use_coords:
                    # have not check this yet
                    q = rotation[i].cpu().detach().numpy().copy()
                    q /= np.linalg.norm(q)
                    rot = quaternion2matrix(q)

                else:
                    v = rotation[i].cpu().detach().numpy()
                    v /= np.linalg.norm(v)
                    rot = rotation_matrix_from_axis(v, [0, 1, 0], 'xy')

                try:
                    draw_axis(axis_pred, rot, hanging_point_pose,
                              self.cameramodel.K)
                except Exception:
                    print('Fail to draw axis')

            axis_pred = cv2.cvtColor(axis_pred, cv2.COLOR_BGR2RGB)
            confidence_vis = cv2.cvtColor(confidence_vis, cv2.COLOR_BGR2RGB)

            if self.config['use_bgr']:
                if self.config['use_bgr2gray']:
                    in_gray = hp_data.cpu().detach().numpy().copy()[0, 1:2,
                                                                    ...] * 255
                    in_gray = in_gray.transpose(1, 2, 0).astype(np.uint8)
                    in_gray = cv2.cvtColor(in_gray, cv2.COLOR_GRAY2RGB)
                    in_gray = in_gray.transpose(2, 0, 1)
                    in_img = in_gray
                else:
                    in_bgr = hp_data.cpu().detach().numpy().copy()[
                        0, 3:, ...].transpose(1, 2, 0)
                    in_rgb = cv2.cvtColor(in_bgr, cv2.COLOR_BGR2RGB).transpose(
                        2, 0, 1)
                    in_img = in_rgb

            if mode != 'test':
                confidence_loss_sum += confidence_loss.item()

                axis_gt = cv2.cvtColor(axis_gt, cv2.COLOR_BGR2RGB)
                confidence_gt_vis = cv2.cvtColor(confidence_gt_vis,
                                                 cv2.COLOR_BGR2RGB)

                if rotation_loss.item() > 0:
                    depth_loss_sum += depth_loss.item()
                    rotation_loss_sum += rotation_loss.item()
                    loss_sum = loss_sum \
                        + confidence_loss.item() \
                        + rotation_loss.item()
                    rotation_loss_count += 1

                if np.mod(index, 1) == 0:
                    print(
                        'epoch {}, {}/{},{} loss is confidence:{} rotation:{} depth:{}'
                        .format(  # noqa
                            self.epo, index, len(dataloader), mode,
                            confidence_loss.item(), rotation_loss.item(),
                            depth_loss.item()))

                self.vis.images(
                    [axis_gt.transpose(2, 0, 1),
                     axis_pred.transpose(2, 0, 1)],
                    win='{} axis'.format(mode),
                    opts=dict(title='{} axis'.format(mode)))
                self.vis.images(
                    [
                        confidence_gt_vis.transpose(2, 0, 1),
                        confidence_vis.transpose(2, 0, 1)
                    ],
                    win='{}_confidence_roi'.format(mode),
                    opts=dict(title='{} confidence(GT, Pred)'.format(mode)))

                if self.config['use_bgr']:
                    self.vis.images([in_img],
                                    win='{} in_gray'.format(mode),
                                    opts=dict(title='{} in_gray'.format(mode)))
            else:
                if self.config['use_bgr']:
                    self.vis.images(
                        [
                            in_img,
                            confidence_vis.transpose(2, 0, 1),
                            axis_pred.transpose(2, 0, 1)
                        ],
                        win='{}-{}'.format(mode, index),
                        opts=dict(
                            title='{}-{} hanging_point_depth (pred)'.format(
                                mode, index)))
                else:
                    self.vis.images(
                        [
                            confidence_vis.transpose(2, 0, 1),
                            axis_pred.transpose(2, 0, 1)
                        ],
                        win='{}-{}'.format(mode, index),
                        opts=dict(
                            title='{}-{} hanging_point_depth (pred)'.format(
                                mode, index)))

            if np.mod(index, 1000) == 0:
                save_file = osp.join(
                    self.save_dir,
                    'hpnet_latestmodel_' + self.time_now + '.pt')
                print('save {}'.format(save_file))
                torch.save(self.model.state_dict(),
                           save_file,
                           _use_new_zipfile_serialization=False)

        if mode != 'test':
            if len(dataloader) > 0:
                avg_confidence_loss\
                    = confidence_loss_sum / len(dataloader)
                if rotation_loss_count > 0:
                    avg_rotation_loss\
                        = rotation_loss_sum / rotation_loss_count
                    avg_depth_loss\
                        = depth_loss_sum / rotation_loss_count
                    avg_loss\
                        = loss_sum / rotation_loss_count
                else:
                    avg_rotation_loss = 1e10
                    avg_depth_loss = 1e10
                    avg_loss = 1e10
            else:
                avg_loss = loss_sum
                avg_confidence_loss = confidence_loss_sum
                avg_rotation_loss = rotation_loss_sum
                avg_depth_loss = rotation_loss_sum

            self.vis.line(X=np.array([self.epo]),
                          Y=np.array([avg_confidence_loss]),
                          opts={'title': 'confidence'},
                          win='confidence loss',
                          name='{}_confidence_loss'.format(mode),
                          update='append')
            if rotation_loss_count > 0:
                self.vis.line(X=np.array([self.epo]),
                              Y=np.array([avg_rotation_loss]),
                              opts={'title': 'rotation loss'},
                              win='rotation loss',
                              name='{}_rotation_loss'.format(mode),
                              update='append')
                self.vis.line(X=np.array([self.epo]),
                              Y=np.array([avg_depth_loss]),
                              opts={'title': 'depth loss'},
                              win='depth loss',
                              name='{}_depth_loss'.format(mode),
                              update='append')
                self.vis.line(X=np.array([self.epo]),
                              Y=np.array([avg_loss]),
                              opts={'title': 'loss'},
                              win='loss',
                              name='{}_loss'.format(mode),
                              update='append')

            if mode == 'val':
                if np.mod(self.epo, self.save_model_interval) == 0:
                    save_file = osp.join(
                        self.save_dir,
                        'hpnet_latestmodel_' + self.time_now + '.pt')
                    print('save {}'.format(save_file))
                    torch.save(self.model.state_dict(),
                               save_file,
                               _use_new_zipfile_serialization=False)

                if self.best_loss > avg_loss:
                    print('update best model {} -> {}'.format(
                        self.best_loss, avg_loss))
                    self.best_loss = avg_loss
                    save_file = osp.join(
                        self.save_dir,
                        'hpnet_bestmodel_' + self.time_now + '.pt')
                    print('save {}'.format(save_file))
                    # For ros(python 2, torch 1.4)
                    torch.save(self.model.state_dict(),
                               save_file,
                               _use_new_zipfile_serialization=False)
Exemplo n.º 38
0
 def step(self, action):
     import numpy as np
     action = np.nan_to_num(action)
     action = np.clip(action, self.action_space.low, self.action_space.high)
     return self.env.step(action)
    def sentence_similarity(self,
                            wnsimilarity,
                            sentence1,
                            sentence2,
                            icneed=False):
        """ compute the sentence similarity using Wordnet """
        # Tokenize and tag
        sentence1 = pos_tag(word_tokenize(sentence1))
        sentence2 = pos_tag(word_tokenize(sentence2))

        # Get the synsets for the tagged words
        synsets1 = [
            self.tagged_to_synset(*tagged_word) for tagged_word in sentence1
        ]
        synsets2 = [
            self.tagged_to_synset(*tagged_word) for tagged_word in sentence2
        ]

        # Filter out the Nones
        synsets1 = [ss for ss in synsets1 if ss]
        synsets2 = [ss for ss in synsets2 if ss]

        score, count = 0.0, 0
        # For each word in the first sentence
        for synset in synsets1:

            # Get the similarity value of the most similar word in the other sentence
            score_list = []
            if icneed == True:
                for ss in synsets2:
                    try:
                        temp = wnsimilarity(synset, ss, self.brown_ic)
                        score_list.append(temp)
                    except:
                        continue

            else:
                for ss in synsets2:
                    try:
                        temp = wnsimilarity(synset, ss)
                        score_list.append(temp)
                    except:
                        continue

            score_list = np.array(score_list, dtype=np.float64)
            score_list = np.nan_to_num(score_list)
            #            print(score_list)
            if len(score_list) > 0:
                best_score = np.nanmax(score_list)
            else:
                best_score = 0.0
#            print(best_score)
#            print(type(best_score))

# Check that the similarity could have been computed
            if best_score is not None:
                score = score + best_score
                #                print(score)
                count = count + 1

#        print("one sentence over")
# Average the values
        score /= count
        return score
Exemplo n.º 40
0
def tSNEgen(MF,
            CDs,
            tol_fact,
            filter,
            metric='chebyshev',
            fetch_ann='online',
            p=30,
            ea=12):
    """Performs tSNE analysis on the molecular data collected using spaceM.
    The documentation page of the sklearn implementation of tSNE:
    http://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html

    Args:
        MF (str): path to the Main Folder.
        CDs (list): correlation distance tresholds used for filtering background annotation images, only used when
            filter is 'correlation'. Default value is 0.75.
        tol_fact (float): tolerance factor to use for the filter 'mean'.
        filter (str): filter strategy to select background and on-sample annotation images:
            'mean': compare the mean ion intensity from off and on-sample pixels. Consider annotation as coming from
                the sample if  mean on-sample intensity > tol_fact * mean off-sample intensity.
            'correlation': compute the correlation distance between the intensity thresholded annotation image and
                the cell distribution binary mask. The annotation is considered as coming from the sample if the
                correlation distance is inferior to CDs[i]. The cell distribution mask has pixel equal to 1 if its
                corresponding ablation mark is touching a cell and 0 if not. The treshold value to binarize the
                annotation image is found using an optimizer which minimzes the correlation distance with the cell
                distribution mask. This removes the negative effect that an uneven ion intensity distribution will
                have on the correlation distance with the cell distribution mask.
        metric (str): The metric to use when calculating distance between instances in a feature array. Metric value
            must be one of the options allowed by scipy.spatial.distance.pdist for its metric parameter, or a metric
            listed in pairwise.PAIRWISE_DISTANCE_FUNCTIONS.
        fetch_ann (str): method for fetching annotations:
            'online': (default) queries metaspace using the name of the .imzml data present in the MALDI input folder
                as dataset name,
            'offline': reads annotation images from a provided dataframe.
        p (float): perplexity value to use for the tSNE algorithm. The perplexity is related to the number of nearest
            neighbors that is used in other manifold learning algorithms. Larger datasets usually require a larger
            perplexity. Consider selecting a value between 5 and 50. The choice is not extremely critical since t-SNE
            is quite insensitive to this parameter.
        ea (float): early exaggeration value to use for the tSNE algorithm. Controls how tight natural clusters in the
            original space are in the embedded space and how much space will be between them. For larger values,
            the space between natural clusters will be larger in the embedded space. Again, the choice of this
            parameter is not very critical. If the cost function increases during initial optimization, the early
            exaggeration factor or the learning rate might be too high.

    """

    if fetch_ann == 'online' and filter == 'correlation':
        MOLcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/CD={}/MOLonlyData.csv'.format(
            CDs[0])
        MOLallcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/CD={}/MOLallData.csv'.format(
            CDs[0])
    elif fetch_ann == 'online' and filter == 'mean':
        MOLcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/tol_fact={}/MOLonlyData.csv'.format(
            tol_fact)
        MOLallcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/tol_fact={}/MOLallData.csv'.format(
            tol_fact)
    if fetch_ann == 'offline':
        MOLcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/offline/MOLonlyData.csv'
        MOLallcsv_p = MF + 'Analysis/scAnalysis/Molecular_features/offline/MOLallData.csv'
    MOLdf = pd.read_csv(MOLcsv_p)
    fluo_data = pd.read_csv(MOLallcsv_p).fluoMarksMean_lu.as_matrix()
    tsne_input = np.nan_to_num(np.log10(MOLdf.iloc[:, 2:].as_matrix()))
    # perp = [5,10,15,20,25,30,40,50,75,100]
    # for p in perp:
    # p = 30
    dist = pairwise_distances(tsne_input, metric=metric)
    tsne = manifold.TSNE(n_components=2,
                         metric='precomputed',
                         early_exaggeration=ea,
                         perplexity=p)
    X_tsne = tsne.fit_transform(np.nan_to_num(dist))
    x_min, x_max = np.min(X_tsne, 0), np.max(X_tsne, 0)
    X2_tsne = (X_tsne - x_min) / (x_max - x_min)
    tsne_colors_i = fluo_data

    contrast_cut = 5
    pc_top = np.percentile(tsne_colors_i, 100 - contrast_cut)
    pc_down = np.percentile(tsne_colors_i, contrast_cut)

    tsne_colors_f = []
    for i in tsne_colors_i:
        if i >= pc_top:
            val = pc_top
        elif i <= pc_down:
            val = pc_down
        else:
            val = i
        tsne_colors_f = np.append(tsne_colors_f, val)

    plt.figure()
    plt.scatter(X2_tsne[:, 0],
                X2_tsne[:, 1],
                50,
                np.log10(tsne_colors_f),
                cmap='viridis',
                edgecolors='none')
    plt.xlabel('tSNE dim 1', fontsize=20)
    plt.ylabel('tSNE dim 2', fontsize=20)
    plt.axis('equal')
    coords = pd.DataFrame({
        'tSNE1': X2_tsne[:, 0],
        'tSNE2': X2_tsne[:, 1],
        'ObjectNumber': MOLdf['ObjectNumber_lu']
    })
    coords.to_csv(MF +
                  'Analysis/tSNE/metric={}_perp={}_KLD='.format(metric, p) +
                  str(tsne.kl_divergence_)[:5] + '_' + fetch_ann + '.csv',
                  index=False)
    plt.savefig(MF + 'Analysis/tSNE/metric={}_perp={}_KLD='.format(metric, p) +
                str(tsne.kl_divergence_)[:5] + '_' + fetch_ann + '.png',
                dpi=200)
    plt.close('all')
Exemplo n.º 41
0
def register2Fragments(id1, id2, keyptspath, descpath, resultpath, logpath,
                       gtLog, desc_name, inlier_ratio, distance_threshold):
    """
    Register point cloud {id1} and {id2} using the keypts location and descriptors.
    """
    cloud_bin_s = f'cloud_bin_{id1}'
    cloud_bin_t = f'cloud_bin_{id2}'
    write_file = f'{cloud_bin_s}_{cloud_bin_t}.rt.txt'
    if os.path.exists(os.path.join(resultpath, write_file)):
        return 0, 0, 0
    source_keypts = get_keypts(keyptspath, cloud_bin_s)
    target_keypts = get_keypts(keyptspath, cloud_bin_t)
    source_desc = get_desc(descpath, cloud_bin_s, desc_name)
    target_desc = get_desc(descpath, cloud_bin_t, desc_name)
    source_desc = np.nan_to_num(source_desc)
    target_desc = np.nan_to_num(target_desc)
    # Select {num_keypts} points based on the scores. The descriptors and keypts are already sorted based on the detection score.
    num_keypts = 250
    source_keypts = source_keypts[-num_keypts:, :]
    source_desc = source_desc[-num_keypts:, :]
    target_keypts = target_keypts[-num_keypts:, :]
    target_desc = target_desc[-num_keypts:, :]
    # Select {num_keypts} points randomly.
    # num_keypts = 250
    # source_indices = np.random.choice(range(source_keypts.shape[0]), num_keypts)
    # target_indices = np.random.choice(range(target_keypts.shape[0]), num_keypts)
    # source_keypts = source_keypts[source_indices, :]
    # source_desc = source_desc[source_indices, :]
    # target_keypts = target_keypts[target_indices, :]
    # target_desc = target_desc[target_indices, :]
    key = f'{cloud_bin_s.split("_")[-1]}_{cloud_bin_t.split("_")[-1]}'
    if key not in gtLog.keys():
        # skip the pairs that have less than 30% overlap.
        num_inliers = 0
        inlier_ratio = 0
        gt_flag = 0
    else:
        # build correspondence set in feature space.
        corr = build_correspondence(source_desc, target_desc)

        # calculate the inlier ratio, this is for Feature Matching Recall.
        gt_trans = gtLog[key]
        frag1 = source_keypts[corr[:, 0]]
        frag2_pc = open3d.PointCloud()
        frag2_pc.points = open3d.utility.Vector3dVector(target_keypts[corr[:,
                                                                           1]])
        frag2_pc.transform(gt_trans)
        frag2 = np.asarray(frag2_pc.points)
        distance = np.sqrt(np.sum(np.power(frag1 - frag2, 2), axis=1))
        num_inliers = np.sum(distance < distance_threshold)
        if num_inliers / len(distance) < inlier_ratio:
            print(key)
            print("num_corr:", len(corr), "inlier_ratio:",
                  num_inliers / len(distance))
        inlier_ratio = num_inliers / len(distance)
        gt_flag = 1

        # calculate the transformation matrix using RANSAC, this is for Registration Recall.
        source_pcd = open3d.PointCloud()
        source_pcd.points = open3d.utility.Vector3dVector(source_keypts)
        target_pcd = open3d.PointCloud()
        target_pcd.points = open3d.utility.Vector3dVector(target_keypts)
        s_desc = open3d.registration.Feature()
        s_desc.data = source_desc.T
        t_desc = open3d.registration.Feature()
        t_desc.data = target_desc.T
        result = open3d.registration_ransac_based_on_feature_matching(
            source_pcd, target_pcd, s_desc, t_desc, 0.05,
            open3d.TransformationEstimationPointToPoint(False), 3, [
                open3d.CorrespondenceCheckerBasedOnEdgeLength(0.9),
                open3d.CorrespondenceCheckerBasedOnDistance(0.05)
            ], open3d.RANSACConvergenceCriteria(50000, 1000))

        # write the transformation matrix into .log file for evaluation.
        with open(os.path.join(logpath, f'{desc_name}_{timestr}.log'),
                  'a+') as f:
            trans = result.transformation
            trans = np.linalg.inv(trans)
            s1 = f'{id1}\t {id2}\t  37\n'
            f.write(s1)
            f.write(
                f"{trans[0,0]}\t {trans[0,1]}\t {trans[0,2]}\t {trans[0,3]}\t \n"
            )
            f.write(
                f"{trans[1,0]}\t {trans[1,1]}\t {trans[1,2]}\t {trans[1,3]}\t \n"
            )
            f.write(
                f"{trans[2,0]}\t {trans[2,1]}\t {trans[2,2]}\t {trans[2,3]}\t \n"
            )
            f.write(
                f"{trans[3,0]}\t {trans[3,1]}\t {trans[3,2]}\t {trans[3,3]}\t \n"
            )

    # write the result into resultpath so that it can be re-shown.
    s = f"{cloud_bin_s}\t{cloud_bin_t}\t{num_inliers}\t{inlier_ratio:.8f}\t{gt_flag}"
    with open(os.path.join(resultpath, f'{cloud_bin_s}_{cloud_bin_t}.rt.txt'),
              'w+') as f:
        f.write(s)
    return num_inliers, inlier_ratio, gt_flag
def optimization(generate_opt_data=True, read_opt_data=False, beta_fin=4, x_max=5, 
                 potential=harmonic_potential, potential_string='harmonic_potential',
                 nx_min=50, nx_max=1000, nx_sampling=50, N_iter_min=1, N_iter_max=20,
                 save_opt_data=False, opt_data_file_name=None, plot=True,
                 show_plot=True, save_plot=True, opt_plot_file_name=None):
    """
    Uso:    calcula diferentes valores de error usando calc_error() para encontrar valores de
            dx y beta_ini óptimos para correr el alcoritmo (óptimos = que minimicen error)
    
    Recibe:
        generate_opt_data: bool ->  decide si genera datos para optimización.
        read_opt_data: bool     ->  decide si lee datos para optimización.
        Nota: generate_opt_data y read_opt_data son excluyentes. Se evalúa primero la primera. 
        nx_min: int  
        nx_max: int             ->  se relaciona  con dx = 2*x_max/(nx-1).
        nx_sampling: int        ->  se generan nx mediante range(nx_max,nx_min,-1*nx_sampling).
        N_iter_min: int  
        N_iter_max: int         ->  se relaciona con beta_ini = beta_fin **(-N_iter). Se gereran
                                    valores de N_iter con range(N_iter_max,N_iter_min-1,-1).
        save_opt_data: bool     ->  decide si guarda datos de optimización en archivo CSV.
        opt_data_file_name: str ->  nombre de archivo para datos de optimización.
        plot: bool              ->  decide si grafica optimización.
        show_plot: bool         ->  decide si muestra optimización.
        save_plot: bool         ->  decide si guarda optimización. 
        opt_plot_file_name: str ->  nombre de gráfico de optimización. Si valor es None, se
                                    guarda con nombre conveniente según parámetros relevantes.
    
    Devuelve: 
        error: list, shape=(nb,ndx) ->  valores de calc_error para diferentes valores de dx y
                                        beta_ini. dx incrementa de izquierda a derecha en lista
                                        y beta_ini incrementa de arriba a abajo.
        dx_grid: list, shape=(ndx,)         -> valores de dx para los que se calcula error.
        beta-ini_grid: list, shape=(nb,)    -> valores de beta_ini para los que se calcula error.
    """
    
    t_0 = time()
    
    # Decide si genera o lee datos.
    if generate_opt_data:
        N_iter_min = int(N_iter_min)
        N_iter_max = int(N_iter_max)
        nx_min = int(nx_min)
        nx_max = int(nx_max)

        if nx_min%2==1:
            nx_min -= 1
        if nx_max%2==0:
            nx_max += 1
        
        # Crea valores de nx y N_iter (equivalente a generar valores de dx y beta_ini)
        nx_values = range(nx_max,nx_min,-1*nx_sampling)
        N_iter_values = range(N_iter_max,N_iter_min-1,-1)

        dx_grid = [2*x_max/(nx-1) for nx in nx_values]
        beta_ini_grid = [beta_fin * 2**(-N_iter) for N_iter in N_iter_values]
        
        error = []

        # Calcula error para cada valor de nx y N_iter especificado
        # (equivalentemente dx y beta_ini).
        for N_iter in N_iter_values:
            row = []
            for nx in nx_values:
                rho,trace_rho,grid_x = run_pi_x_sq_trotter(x_max, nx, N_iter, beta_fin,
                                                           potential, potential_string,
                                                           False, False, None, None, False,
                                                           False, False)
                grid_x = np.array(grid_x)
                dx = grid_x[1]-grid_x[0]
                rho_normalized = np.copy(rho)/trace_rho
                pi_x = np.diag(rho_normalized)
                theoretical_pi_x = QHO_canonical_ensemble(grid_x,beta_fin)
                error_comp_theo = calc_error(pi_x,theoretical_pi_x,dx)
                row.append(error_comp_theo)
            error.append(row)
        #error = np.array(error)

    elif read_opt_data:
        error =  pd.read_csv(opt_data_file_name, index_col=0, comment='#')
        dx_grid = error.columns.to_numpy()
        beta_ini_grid = error.index.to_numpy()
        error = error.to_numpy()

    else:
        raise Exception('Escoja si generar o leer datos en optimization(.)')
    
    #print(error)

    error = np.array(error)

    # Toma valores de error  en cálculo de Z (nan e inf) y los remplaza por
    # el valor de mayor error en el gráfico.
    try:
        error = np.where(np.isinf(error),0,error)
        error = np.where(np.isnan(error),0,error)
        nan_value = 1.3*np.max(error)
        error = np.where(error==0, float('nan'), error)
    except:
        nan_value = 0
    error = np.nan_to_num(error, nan=nan_value, posinf=nan_value, neginf=nan_value)
    
    script_dir = os.path.dirname(os.path.abspath(__file__))

    # Guarda datos (solo si fueron generados y se escoje guardar)
    if generate_opt_data and save_opt_data:
        
        if opt_data_file_name is None:
            opt_data_file_name = ('pi_x-ms-opt-%s-beta_fin_%.3f'%(potential_string, beta_fin)
                                  + '-x_max_%.3f-nx_min_%d-nx_max_%d'%(x_max, nx_min, nx_max)
                                  + '-nx_sampling_%d-N_iter_min_%d'%(nx_sampling, N_iter_min)
                                  + '-N_iter_max_%d.csv'%(N_iter_max))
        
        opt_data_file_name = script_dir + '/' + opt_data_file_name
        
        relevant_info = ['Optimization of parameters dx and beta_ini of matrix squaring'
                         + ' algorithm', '%s   beta_fin = %.3f   '%(potential_string, beta_fin)
                         + 'x_max = %.3f   nx_min = %d   nx_max = %d   '%(x_max, nx_min, nx_max)
                         + 'nx_sampling = %d N_iter_min = %d   '%(nx_sampling, N_iter_min)
                         + 'N_iter_max = %d'%(N_iter_max)]
        
        save_csv(error, dx_grid, beta_ini_grid, opt_data_file_name, relevant_info)
    
    t_1 = time()

    # Grafica 
    if plot:

        fig, ax = plt.subplots(1, 1)

        DX, BETA_INI = np.meshgrid(dx_grid, beta_ini_grid)
        cp = plt.contourf(DX,BETA_INI,error)
        plt.colorbar(cp)
        
        ax.set_ylabel(u'$\\beta_{ini}$')
        ax.set_xlabel('$dx$')
        plt.tight_layout()
        
        if save_plot:
            
            if opt_plot_file_name is None:
                opt_plot_file_name = \
                   ('pi_x-ms-opt-plot-%s-beta_fin_%.3f'%(potential_string, beta_fin)
                    + '-x_max_%.3f-nx_min_%d-nx_max_%d'%(x_max, nx_min, nx_max)
                    + '-nx_sampling_%d-N_iter_min_%d'%(nx_sampling, N_iter_min)
                    + '-N_iter_max_%d.eps'%(N_iter_max))
            
            opt_plot_file_name = script_dir + '/' + opt_plot_file_name

            plt.savefig(opt_plot_file_name)

        if show_plot:
            plt.show()

        plt.close()
    comp_time = t_1 - t_0
    return error, dx_grid, beta_ini_grid, comp_time
Exemplo n.º 43
0
#sns.distplot(GCP, bins=100)
#sns.distplot(MET, bins=100)
#plt.legend(['GCP', 'Metabolomics'])
"""## 1d. Train on GCP to predict metabolism
First, let's split the data into training and test sets.
"""

from sklearn.model_selection import train_test_split
"""Convert to Numpy array."""

GCP = GCP.to_numpy()
MET = MET.to_numpy()
"""Let's ensure all NaNs are 0."""

# Ensure all values are finite
GCP = np.nan_to_num(GCP, nan=0)
MET = np.nan_to_num(MET, nan=0)
"""Split the data into validation (30%) and training (70%) data."""

# Split the CCLE data into a validation set
Xtrain, Xval, Ytrain, Yval = train_test_split(GCP,
                                              MET,
                                              test_size=0.3,
                                              random_state=0)
"""Print $X_{train}$ and $Y_{train}$."""

import sys
np.set_printoptions(threshold=sys.maxsize)
#print(Xtrain)
#print(Ytrain)
"""Print shape of $X_{train}$ and $Y_{train}$."""
def ElasticNet_OptimalAlpha_KFold(Training_Data, Training_Score, Fold_Quantity,
                                  Alpha_Range, L1_ratio_Range, ResultantFolder,
                                  Parallel_Quantity):
    #
    # Select optimal regularization parameter using nested cross-validation
    #
    # Training_Data:
    #     n*m matrix, n is subjects quantity, m is features quantity
    # Training_Score:
    #     n*1 vector, n is subjects quantity
    # Fold_Quantity:
    #     Fold quantity for the cross-validation
    #     5 or 10 is recommended generally, the small the better accepted by community, but the results may be worse as traning samples are fewer
    # Alpha_Range:
    #     Range of alpha, the regularization parameter balancing the training error and L2 penalty
    #     Our previous paper used (2^(-10), 2^(-9), ..., 2^4, 2^5), see Cui and Gong (2018), NeuroImage
    # L1_ratio_Range:
    #     Range of l1 ratio, the parameter balancing l1 and l2 penalty
    #     Our previous paper 10 values in the range [0.2,1], see Cui et al., (2018), Cerebral Cortex
    # ResultantFolder:
    #     Path of the folder storing the results
    # Parallel_Quantity:
    #     Parallel multi-cores on one single computer, at least 1
    #

    Subjects_Quantity = len(Training_Score)
    # Sort the subjects score
    Sorted_Index = np.argsort(Training_Score)
    Training_Data = Training_Data[Sorted_Index, :]
    Training_Score = Training_Score[Sorted_Index]

    Inner_EachFold_Size = np.int(
        np.fix(np.divide(Subjects_Quantity, Fold_Quantity)))
    MaxSize = Inner_EachFold_Size * Fold_Quantity
    EachFold_Max = np.ones(Fold_Quantity, np.int) * MaxSize
    tmp = np.arange(Fold_Quantity - 1, -1, -1)
    EachFold_Max = EachFold_Max - tmp
    Remain = np.mod(Subjects_Quantity, Fold_Quantity)
    for j in np.arange(Remain):
        EachFold_Max[j] = EachFold_Max[j] + Fold_Quantity

    Parameter_Combination_Quantity = len(Alpha_Range) * len(L1_ratio_Range)
    Inner_Corr = np.zeros((Fold_Quantity, Parameter_Combination_Quantity))
    Inner_MAE_inv = np.zeros((Fold_Quantity, Parameter_Combination_Quantity))

    for k in np.arange(Fold_Quantity):

        Inner_Fold_K_Index = np.arange(k, EachFold_Max[k], Fold_Quantity)
        Inner_Fold_K_Data_test = Training_Data[Inner_Fold_K_Index, :]
        Inner_Fold_K_Score_test = Training_Score[Inner_Fold_K_Index]
        Inner_Fold_K_Data_train = np.delete(Training_Data,
                                            Inner_Fold_K_Index,
                                            axis=0)
        Inner_Fold_K_Score_train = np.delete(Training_Score,
                                             Inner_Fold_K_Index)

        Scale = preprocessing.MinMaxScaler()
        Inner_Fold_K_Data_train = Scale.fit_transform(Inner_Fold_K_Data_train)
        Inner_Fold_K_Data_test = Scale.transform(Inner_Fold_K_Data_test)

        Parallel(n_jobs=Parallel_Quantity, backend="threading")(
            delayed(ElasticNet_SubAlpha)
            (Inner_Fold_K_Data_train, Inner_Fold_K_Score_train,
             Inner_Fold_K_Data_test, Inner_Fold_K_Score_test, Alpha_Range,
             L1_ratio_Range, l, ResultantFolder)
            for l in np.arange(Parameter_Combination_Quantity))
        for l in np.arange(Parameter_Combination_Quantity):
            print(l)
            Fold_l_Mat_Path = ResultantFolder + '/Alpha_' + str(l) + '.mat'
            Fold_l_Mat = sio.loadmat(Fold_l_Mat_Path)
            Inner_Corr[k, l] = Fold_l_Mat['Corr'][0][0]
            Inner_MAE_inv[k, l] = Fold_l_Mat['MAE_inv']
            os.remove(Fold_l_Mat_Path)

        Inner_Corr = np.nan_to_num(Inner_Corr)

    Inner_Corr_Mean = np.mean(Inner_Corr, axis=0)
    Inner_Corr_Mean = (Inner_Corr_Mean -
                       np.mean(Inner_Corr_Mean)) / np.std(Inner_Corr_Mean)
    Inner_MAE_inv_Mean = np.mean(Inner_MAE_inv, axis=0)
    Inner_MAE_inv_Mean = (Inner_MAE_inv_Mean - np.mean(Inner_MAE_inv_Mean)
                          ) / np.std(Inner_MAE_inv_Mean)
    Inner_Evaluation = Inner_Corr_Mean + Inner_MAE_inv_Mean

    Inner_Evaluation_Mat = {
        'Inner_Corr': Inner_Corr,
        'Inner_MAE_inv': Inner_MAE_inv,
        'Inner_Evaluation': Inner_Evaluation
    }
    sio.savemat(ResultantFolder + '/Inner_Evaluation.mat',
                Inner_Evaluation_Mat)

    Optimal_Combination_Index = np.argmax(Inner_Evaluation)

    Optimal_Alpha_Index = np.int64(
        np.ceil((Optimal_Combination_Index + 1) / len(L1_ratio_Range))) - 1
    Optimal_Alpha = Alpha_Range[Optimal_Alpha_Index]
    Optimal_L1_ratio_Index = np.mod(Optimal_Combination_Index,
                                    len(L1_ratio_Range))
    Optimal_L1_ratio = L1_ratio_Range[Optimal_L1_ratio_Index]
    return (Optimal_Alpha, Optimal_L1_ratio)
Exemplo n.º 45
0
 def fn(a, y):
     return np.sum(np.nan_to_num(-y * np.log(a) - (1 - y) * np.log(1 - a)))
def create_features(seg_id, seg, X, st, end):
    """
    create features including fft features, statistical features and time series features
    :param seg_id: the ID for a sample
    :param seg: s signal segment
    :param X: train set features before creating these features
    :param st: the start index of the signal segment
    :param end: the end index of the signal segment
    :return: train set features after creating these features
    """
    try:
        # test set won't create these features because its seg_id is string
        X.loc[seg_id, 'seg_id'] = np.int32(seg_id)
        X.loc[seg_id, 'seg_start'] = np.int32(st)
        X.loc[seg_id, 'seg_end'] = np.int32(end)
    except ValueError:
        pass

    xc = pd.Series(seg['acoustic_data'].values)
    xcdm = xc - np.mean(xc)

    b, a = des_bw_filter_lp(cutoff=18000)
    xcz = sg.lfilter(b, a, xcdm)

    zc = np.fft.fft(xcz)
    zc = zc[:MAX_FREQ]

    # FFT transform values
    realFFT = np.real(zc)
    imagFFT = np.imag(zc)

    freq_bands = [x for x in range(0, MAX_FREQ, FREQ_BAND)]
    magFFT = np.sqrt(realFFT ** 2 + imagFFT ** 2)
    phzFFT = np.arctan(imagFFT / realFFT)
    phzFFT[phzFFT == -np.inf] = -np.pi / 2.0
    phzFFT[phzFFT == np.inf] = np.pi / 2.0
    phzFFT = np.nan_to_num(phzFFT)

    for freq in freq_bands:
        X.loc[seg_id, 'FFT_Mag_01q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_BAND], 0.01)
        X.loc[seg_id, 'FFT_Mag_10q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_BAND], 0.1)
        X.loc[seg_id, 'FFT_Mag_90q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_BAND], 0.9)
        X.loc[seg_id, 'FFT_Mag_99q%d' % freq] = np.quantile(magFFT[freq: freq + FREQ_BAND], 0.99)
        X.loc[seg_id, 'FFT_Mag_mean%d' % freq] = np.mean(magFFT[freq: freq + FREQ_BAND])
        X.loc[seg_id, 'FFT_Mag_std%d' % freq] = np.std(magFFT[freq: freq + FREQ_BAND])
        X.loc[seg_id, 'FFT_Mag_max%d' % freq] = np.max(magFFT[freq: freq + FREQ_BAND])

        X.loc[seg_id, 'FFT_Phz_mean%d' % freq] = np.mean(phzFFT[freq: freq + FREQ_BAND])
        X.loc[seg_id, 'FFT_Phz_std%d' % freq] = np.std(phzFFT[freq: freq + FREQ_BAND])

    X.loc[seg_id, 'FFT_Rmean'] = realFFT.mean()
    X.loc[seg_id, 'FFT_Rstd'] = realFFT.std()
    X.loc[seg_id, 'FFT_Rmax'] = realFFT.max()
    X.loc[seg_id, 'FFT_Rmin'] = realFFT.min()
    X.loc[seg_id, 'FFT_Imean'] = imagFFT.mean()
    X.loc[seg_id, 'FFT_Istd'] = imagFFT.std()
    X.loc[seg_id, 'FFT_Imax'] = imagFFT.max()
    X.loc[seg_id, 'FFT_Imin'] = imagFFT.min()

    X.loc[seg_id, 'FFT_Rmean_first_6000'] = realFFT[:6000].mean()
    X.loc[seg_id, 'FFT_Rstd__first_6000'] = realFFT[:6000].std()
    X.loc[seg_id, 'FFT_Rmax_first_6000'] = realFFT[:6000].max()
    X.loc[seg_id, 'FFT_Rmin_first_6000'] = realFFT[:6000].min()
    X.loc[seg_id, 'FFT_Rmean_first_18000'] = realFFT[:18000].mean()
    X.loc[seg_id, 'FFT_Rstd_first_18000'] = realFFT[:18000].std()
    X.loc[seg_id, 'FFT_Rmax_first_18000'] = realFFT[:18000].max()
    X.loc[seg_id, 'FFT_Rmin_first_18000'] = realFFT[:18000].min()

    del xcz
    del zc

    b, a = des_bw_filter_lp(cutoff=2500)
    xc0 = sg.lfilter(b, a, xcdm)

    b, a = des_bw_filter_bp(low=2500, high=5000)
    xc1 = sg.lfilter(b, a, xcdm)

    b, a = des_bw_filter_bp(low=5000, high=7500)
    xc2 = sg.lfilter(b, a, xcdm)

    b, a = des_bw_filter_bp(low=7500, high=10000)
    xc3 = sg.lfilter(b, a, xcdm)

    b, a = des_bw_filter_bp(low=10000, high=12500)
    xc4 = sg.lfilter(b, a, xcdm)

    b, a = des_bw_filter_bp(low=12500, high=15000)
    xc5 = sg.lfilter(b, a, xcdm)

    b, a = des_bw_filter_bp(low=15000, high=17500)
    xc6 = sg.lfilter(b, a, xcdm)

    b, a = des_bw_filter_bp(low=17500, high=20000)
    xc7 = sg.lfilter(b, a, xcdm)

    b, a = des_bw_filter_hp(cutoff=20000)
    xc8 = sg.lfilter(b, a, xcdm)

    sigs = [xc, pd.Series(xc0), pd.Series(xc1), pd.Series(xc2), pd.Series(xc3),
            pd.Series(xc4), pd.Series(xc5), pd.Series(xc6), pd.Series(xc7), pd.Series(xc8)]

    for i, sig in enumerate(sigs):
        X.loc[seg_id, 'mean_%d' % i] = sig.mean()
        X.loc[seg_id, 'std_%d' % i] = sig.std()
        X.loc[seg_id, 'max_%d' % i] = sig.max()
        X.loc[seg_id, 'min_%d' % i] = sig.min()

        X.loc[seg_id, 'mean_change_abs_%d' % i] = np.mean(np.diff(sig))
        X.loc[seg_id, 'mean_change_rate_%d' % i] = calc_mean_change_rate(sig)
        X.loc[seg_id, 'abs_max_%d' % i] = np.abs(sig).max()

        X.loc[seg_id, 'std_first_50000_%d' % i] = sig[:50000].std()
        X.loc[seg_id, 'std_last_50000_%d' % i] = sig[-50000:].std()
        X.loc[seg_id, 'std_first_10000_%d' % i] = sig[:10000].std()
        X.loc[seg_id, 'std_last_10000_%d' % i] = sig[-10000:].std()

        X.loc[seg_id, 'avg_first_50000_%d' % i] = sig[:50000].mean()
        X.loc[seg_id, 'avg_last_50000_%d' % i] = sig[-50000:].mean()
        X.loc[seg_id, 'avg_first_10000_%d' % i] = sig[:10000].mean()
        X.loc[seg_id, 'avg_last_10000_%d' % i] = sig[-10000:].mean()

        X.loc[seg_id, 'min_first_50000_%d' % i] = sig[:50000].min()
        X.loc[seg_id, 'min_last_50000_%d' % i] = sig[-50000:].min()
        X.loc[seg_id, 'min_first_10000_%d' % i] = sig[:10000].min()
        X.loc[seg_id, 'min_last_10000_%d' % i] = sig[-10000:].min()

        X.loc[seg_id, 'max_first_50000_%d' % i] = sig[:50000].max()
        X.loc[seg_id, 'max_last_50000_%d' % i] = sig[-50000:].max()
        X.loc[seg_id, 'max_first_10000_%d' % i] = sig[:10000].max()
        X.loc[seg_id, 'max_last_10000_%d' % i] = sig[-10000:].max()

        X.loc[seg_id, 'max_to_min_%d' % i] = sig.max() / np.abs(sig.min())
        X.loc[seg_id, 'max_to_min_diff_%d' % i] = sig.max() - np.abs(sig.min())
        X.loc[seg_id, 'count_big_%d' % i] = len(sig[np.abs(sig) > 500])

        X.loc[seg_id, 'mean_change_rate_first_50000_%d' % i] = calc_mean_change_rate(sig[:50000])
        X.loc[seg_id, 'mean_change_rate_last_50000_%d' % i] = calc_mean_change_rate(sig[-50000:])
        X.loc[seg_id, 'mean_change_rate_first_10000_%d' % i] = calc_mean_change_rate(sig[:10000])
        X.loc[seg_id, 'mean_change_rate_last_10000_%d' % i] = calc_mean_change_rate(sig[-10000:])

        X.loc[seg_id, 'q95_%d' % i] = np.quantile(sig, 0.95)
        X.loc[seg_id, 'q99_%d' % i] = np.quantile(sig, 0.99)
        X.loc[seg_id, 'q05_%d' % i] = np.quantile(sig, 0.05)
        X.loc[seg_id, 'q01_%d' % i] = np.quantile(sig, 0.01)

        X.loc[seg_id, 'abs_q95_%d' % i] = np.quantile(np.abs(sig), 0.95)
        X.loc[seg_id, 'abs_q99_%d' % i] = np.quantile(np.abs(sig), 0.99)
        X.loc[seg_id, 'abs_q05_%d' % i] = np.quantile(np.abs(sig), 0.05)
        X.loc[seg_id, 'abs_q01_%d' % i] = np.quantile(np.abs(sig), 0.01)

        X.loc[seg_id, 'trend_%d' % i] = add_trend_feature(sig)
        X.loc[seg_id, 'abs_trend_%d' % i] = add_trend_feature(sig, abs_values=True)
        X.loc[seg_id, 'abs_mean_%d' % i] = np.abs(sig).mean()
        X.loc[seg_id, 'abs_std_%d' % i] = np.abs(sig).std()

        X.loc[seg_id, 'mad_%d' % i] = sig.mad()
        X.loc[seg_id, 'kurt_%d' % i] = sig.kurtosis()
        X.loc[seg_id, 'skew_%d' % i] = sig.skew()
        X.loc[seg_id, 'med_%d' % i] = sig.median()

        X.loc[seg_id, 'Hilbert_mean_%d' % i] = np.abs(hilbert(sig)).mean()
        X.loc[seg_id, 'Hann_window_mean'] = (convolve(xc, hann(150), mode='same') / sum(hann(150))).mean()

        X.loc[seg_id, 'classic_sta_lta1_mean_%d' % i] = classic_sta_lta(sig, 500, 10000).mean()
        X.loc[seg_id, 'classic_sta_lta2_mean_%d' % i] = classic_sta_lta(sig, 5000, 100000).mean()
        X.loc[seg_id, 'classic_sta_lta3_mean_%d' % i] = classic_sta_lta(sig, 3333, 6666).mean()
        X.loc[seg_id, 'classic_sta_lta4_mean_%d' % i] = classic_sta_lta(sig, 10000, 25000).mean()

        X.loc[seg_id, 'Moving_average_700_mean_%d' % i] = sig.rolling(window=700).mean().mean(skipna=True)
        X.loc[seg_id, 'Moving_average_1500_mean_%d' % i] = sig.rolling(window=1500).mean().mean(skipna=True)
        X.loc[seg_id, 'Moving_average_3000_mean_%d' % i] = sig.rolling(window=3000).mean().mean(skipna=True)
        X.loc[seg_id, 'Moving_average_6000_mean_%d' % i] = sig.rolling(window=6000).mean().mean(skipna=True)

        ewma = pd.Series.ewm
        X.loc[seg_id, 'exp_Moving_average_300_mean_%d' % i] = ewma(sig, span=300).mean().mean(skipna=True)
        X.loc[seg_id, 'exp_Moving_average_3000_mean_%d' % i] = ewma(sig, span=3000).mean().mean(skipna=True)
        X.loc[seg_id, 'exp_Moving_average_30000_mean_%d' % i] = ewma(sig, span=30000).mean().mean(skipna=True)

        no_of_std = 3
        X.loc[seg_id, 'MA_700MA_std_mean_%d' % i] = sig.rolling(window=700).std().mean()
        X.loc[seg_id, 'MA_700MA_BB_high_mean_%d' % i] = (
                X.loc[seg_id, 'Moving_average_700_mean_%d' % i] + no_of_std * X.loc[
            seg_id, 'MA_700MA_std_mean_%d' % i]).mean()
        X.loc[seg_id, 'MA_700MA_BB_low_mean_%d' % i] = (
                X.loc[seg_id, 'Moving_average_700_mean_%d' % i] - no_of_std * X.loc[
            seg_id, 'MA_700MA_std_mean_%d' % i]).mean()
        X.loc[seg_id, 'MA_400MA_std_mean_%d' % i] = sig.rolling(window=400).std().mean()
        X.loc[seg_id, 'MA_400MA_BB_high_mean_%d' % i] = (
                X.loc[seg_id, 'Moving_average_700_mean_%d' % i] + no_of_std * X.loc[
            seg_id, 'MA_400MA_std_mean_%d' % i]).mean()
        X.loc[seg_id, 'MA_400MA_BB_low_mean_%d' % i] = (
                X.loc[seg_id, 'Moving_average_700_mean_%d' % i] - no_of_std * X.loc[
            seg_id, 'MA_400MA_std_mean_%d' % i]).mean()
        X.loc[seg_id, 'MA_1000MA_std_mean_%d' % i] = sig.rolling(window=1000).std().mean()

        X.loc[seg_id, 'iqr_%d' % i] = np.subtract(*np.percentile(sig, [75, 25]))
        X.loc[seg_id, 'q999_%d' % i] = np.quantile(sig, 0.999)
        X.loc[seg_id, 'q001_%d' % i] = np.quantile(sig, 0.001)
        X.loc[seg_id, 'ave10_%d' % i] = stats.trim_mean(sig, 0.1)

        X.loc[seg_id, 'num_peaks_10_%d' % i] = feature_calculators.number_peaks(sig, 10)
        X.loc[seg_id, 'cid_ce_1_%d' % i] = feature_calculators.cid_ce(sig, 1)  # time series complexity
        X.loc[seg_id, 'count_1000_0_%d' % i] = feature_calculators.range_count(sig, -1000, 0)
        X.loc[seg_id, 'binned_entropy_5_%d' % i] = feature_calculators.binned_entropy(sig, 5)
        X.loc[seg_id, 'binned_entropy_15_%d' % i] = feature_calculators.binned_entropy(sig, 15)

    # sliding window is a kind of filter, so this code is out of the cycle of band pass
    for windows in [10, 100, 1000]:
        x_roll_std = xc.rolling(windows).std().dropna()
        x_roll_mean = xc.rolling(windows).mean().dropna()

        X.loc[seg_id, 'ave_roll_std_' + str(windows)] = x_roll_std.mean()
        X.loc[seg_id, 'std_roll_std_' + str(windows)] = x_roll_std.std()
        X.loc[seg_id, 'max_roll_std_' + str(windows)] = x_roll_std.max()
        X.loc[seg_id, 'min_roll_std_' + str(windows)] = x_roll_std.min()
        X.loc[seg_id, 'q01_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.01)
        X.loc[seg_id, 'q05_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.05)
        X.loc[seg_id, 'q95_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.95)
        X.loc[seg_id, 'q99_roll_std_' + str(windows)] = np.quantile(x_roll_std, 0.99)
        X.loc[seg_id, 'av_change_abs_roll_std_' + str(windows)] = np.mean(np.diff(x_roll_std))
        X.loc[seg_id, 'av_change_rate_roll_std_' + str(windows)] = calc_mean_change_rate(x_roll_std)
        X.loc[seg_id, 'abs_max_roll_std_' + str(windows)] = np.abs(x_roll_std).max()

        X.loc[seg_id, 'ave_roll_mean_' + str(windows)] = x_roll_mean.mean()
        X.loc[seg_id, 'std_roll_mean_' + str(windows)] = x_roll_mean.std()
        X.loc[seg_id, 'max_roll_mean_' + str(windows)] = x_roll_mean.max()
        X.loc[seg_id, 'min_roll_mean_' + str(windows)] = x_roll_mean.min()
        X.loc[seg_id, 'q01_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.01)
        X.loc[seg_id, 'q05_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.05)
        X.loc[seg_id, 'q95_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.95)
        X.loc[seg_id, 'q99_roll_mean_' + str(windows)] = np.quantile(x_roll_mean, 0.99)
        X.loc[seg_id, 'av_change_abs_roll_mean_' + str(windows)] = np.mean(np.diff(x_roll_mean))
        X.loc[seg_id, 'av_change_rate_roll_mean_' + str(windows)] = calc_mean_change_rate(x_roll_mean)
        X.loc[seg_id, 'abs_max_roll_mean_' + str(windows)] = np.abs(x_roll_mean).max()

    return X
Exemplo n.º 47
0
def make_gpwg(Mgg, reference_point, xyz_cid0, grid_cps, coords, log):
    """
    Calculates the Grid Point Weight Generator (GPWG) table.

    Parameters
    ----------
    reference_point : (3, ) float ndarray
        the reference point
    grid_point : int
        0->origin, x>0, that grid point
    Mgg : (N, N) matrix
        the mass matrix
    xyz_cid0 : (ngrids, 3) float ndarray
        the xyz coordinates of the grids
    grid_cps : (ngrids, ) int ndarray
        array of cp values corresponding to xyz_cid0
    coords : dict[cp] : Coord()
        dict of cp values corresponding to the Cp coordinate systems
    log : logger()
        logging object

    Returns
    -------
    Mo : (6, 6) float ndarray
        the rigid body mass matrix in the basic coordinate system
    S : (3, 3) float ndarray
        the scalar partition matrix (also known as the principal mass axes)
    mass : (3, ) float ndarray
        the mass in the 3 pricincipal (basic) axes
    cg : (3, 3) float ndarray
        the cg in the 3 principal (basic) axes
    II : (3, 3) float ndarray
        inertias relative to the CG
        also called I(S)
    IQ : (3, ) float ndarray
        principal moments of inertia about the CG
        also called I(Q)
    Q : (3, 3) float ndarray
        the coordinate transformation between the S axes and the Q axes

    .. todo:: doesn't consider SPOINTs/EPOINTs
    .. todo:: hasn't been tested
    """
    nnodes = xyz_cid0.shape[0]
    D = np.zeros((nnodes * 6, 6), dtype='float32')

    # we subtract ref point so as to not change xyz_cid0
    for i, node in enumerate(xyz_cid0 - reference_point):
        r1, r2, r3 = node
        j = i * 6
        Tr = np.array([[0., r3, -r2], [-r3, 0., r1], [r2, -r1, 0.]],
                      dtype='float32')
        #print('Tr[%i]=\n%s\n' % (i+1, Tr))

        cp = grid_cps[i]
        Ti = coords[cp].beta()
        if not np.array_equal(Ti, np.eye(3)):
            log.info('Ti[%i]=\n%s\n' % (i + 1, Ti))
        TiT = Ti.T
        d = np.zeros((6, 6), dtype='float32')
        d[:3, :3] = TiT
        d[3:, 3:] = TiT
        d[:3, 3:] = TiT @ Tr
        D[j:j + 6, :] = d

    Mo = np.zeros((6, 6), dtype='float32')
    #print('D=\n%s\n' % D)
    # translati

    Mo = triple(D, Mgg)
    log.info('Mgg=\n%s\n' % Mgg)
    log.info('Mo=\n%s\n' % Mo)

    # t-translation; r-rotation
    Mt_bar = Mo[:3, :3]
    Mtr_bar = Mo[:3, 3:]
    #Mrt_bar = Mo[3:, :3]
    Mr_bar = Mo[3:, 3:]

    #print('dinner =', diag(Mt_bar))
    delta = np.linalg.norm(np.diag(Mt_bar))
    #print('einner =', Mt_bar - diag(Mt_bar))
    epsilon = np.linalg.norm([
        Mt_bar[0, 1],
        Mt_bar[0, 2],
        Mt_bar[1, 2],
    ])
    if epsilon / delta > 0.001:
        # user warning 3042
        pass

    log.info('Mt_bar (correct) =\n%s\n' % Mt_bar)
    log.info('delta=%s' % delta)
    log.info('epsilon=%s' % epsilon)
    log.info('e/d=%s\n' % (epsilon / delta))

    # hermitian eigenvectors
    omega, S = np.linalg.eigh(Mt_bar)
    log.info('omega=%s' % omega)
    log.info('S (right, but not correct order) =\n%s\n' % S)

    Mt = triple(S, Mt_bar)
    Mtr = triple(S, Mtr_bar)
    Mr = triple(S, Mr_bar)

    # 4. determine the principal axis & cg in the principal mass axis system
    # eq G-18
    Mx = Mt[0, 0]
    My = Mt[1, 1]
    Mz = Mt[2, 2]
    mass = np.diag(Mt)
    log.info('mass = %s' % mass)
    #if min(mass) == 0.:
    #raise RuntimeError('mass = %s' % mass)
    cg = np.array([
        [Mtr[0, 0], -Mtr[0, 2], Mtr[0, 1]],
        [Mtr[1, 2], Mtr[1, 1], -Mtr[1, 0]],
        [-Mtr[2, 1], Mtr[2, 0], Mtr[2, 2]],
    ],
                  dtype='float32')
    if mass[0] != 0.:
        cg[0, :] /= Mx
    if mass[1] != 0.:
        cg[1, :] /= My
    if mass[2] != 0.:
        cg[2, :] /= Mz
    #cg = nan_to_num(cg)

    log.info('cg=\n%s\n' % cg)
    #xx = cg[0, 0]
    yx = cg[0, 1]
    zx = cg[0, 2]

    xy = cg[1, 0]
    #yy = cg[1, 1]
    zy = cg[1, 2]

    xz = cg[2, 0]
    yz = cg[2, 1]
    #zz = cg[2, 2]
    I11 = Mr[0, 0] - My * zy**2 - Mz * yz**2
    I21 = I12 = -Mr[0, 1] - Mz * xz * yz
    I13 = I31 = -Mr[0, 2] - My * xy * zy
    I22 = Mr[1, 1] - Mz * xz**2 - Mx * zx**2
    I23 = -Mr[1, 2] - Mx * yx * zx
    I32 = I23
    I33 = Mr[2, 2] - Mx * yx**2 - My * xy**2
    II = np.array([
        [I11, I12, I13],
        [I21, I22, I13],
        [I31, I32, I33],
    ],
                  dtype='float32')
    II = np.nan_to_num(II)

    log.info('I(S)=\n%s\n' % II)

    # 6. Reverse the sign of the off diagonal terms
    np.fill_diagonal(-II, np.diag(II))
    #print('I~=\n%s\n' % II)
    if np.nan in II:
        Q = np.zeros((3, 3), dtype='float32')
    else:
        omegaQ, Q = np.linalg.eig(II)
    #i = argsort(omegaQ)
    log.info('omegaQ = %s' % omegaQ)
    log.info('Q -> wrong =\n%s\n' % Q)
    IQ = triple(Q, II)
    #print('I(Q) -> wrong =\n%s\n' % IQ)

    return Mo, S, mass, cg, II, IQ, Q
    def save_tensor_stats(self, tensor, tag, id, tensors_q={}, force_global_min_max=False):
        # ignore FC or 1x1 case
        if len(tensor.shape) < 3 or (tensor.shape[2] == 1 and tensor.shape[3] == 1):
            return

        # Assume activation dimentions [N,C,H,W]
        t = tensor.transpose(0, 1).contiguous()  # [C, N, H, W]
        t = t.view(t.shape[0], -1) # [C, NxHxW]

        mean_ = t.mean(-1)
        std_ = torch.std(t, dim=-1, unbiased=True)
        for sn in self.stats_names:
            if sn == 'kurtosis':
                st = torch.mean(((t - mean_.unsqueeze(-1)) / std_.unsqueeze(-1))**4, dim=-1) - 3
            elif sn == 'b':
                st = torch.mean(torch.abs(t - mean_.unsqueeze(-1)), dim=-1)
            elif sn == 'std':
                st = std_
            elif sn == 'std_pos':
                t_relu = torch.nn.functional.relu(t)
                st = torch.std(t_relu, dim=-1, unbiased=True)
            elif sn == 'mean':
                st = mean_
            elif sn == 'max':
                if force_global_min_max:
                    st = t.max(-1)[0]
                else:
                    st = torch.mean(tensor.view(tensor.shape[0], tensor.shape[1], -1).max(dim=-1)[0], dim=0) \
                        if self.batch_avg else t.max(-1)[0]
            elif sn == 'min':
                if force_global_min_max:
                    st = t.min(-1)[0]
                else:
                    st = torch.mean(tensor.view(tensor.shape[0], tensor.shape[1], -1).min(dim=-1)[0], dim=0)if self.batch_avg else \
                        torch.min(tensor.view(tensor.shape[0], tensor.shape[1], -1).min(dim=-1)[0], dim=0)[0]
            elif 'mse' in sn:
                if len(tensors_q) > 0:
                    t_orig = tensors_q['orig']
                    t_q = tensors_q[sn.split('_')[1]]
                    st = torch.mean(torch.mean(((t_orig - t_q)**2).view(t_orig.shape[0], t_orig.shape[1], -1), dim=-1), dim=0)
                else:
                    continue
            # elif 'mae' in sn:
            #     if len(tensors_q) > 0:
            #         t = tensors_q['orig'].view(t.shape)
            #         t_q = tensors_q[sn.split('_')[1]].view(t.shape)
            #         st = torch.mean(torch.abs(t - t_q), dim=-1)
            #     else:
            #         continue
            elif 'cos' in sn:
                if len(tensors_q) > 0:
                    t_orig = tensors_q['orig'].view(tensor.shape[0], tensor.shape[1], -1)
                    t_q = tensors_q[sn.split('_')[1]].view(tensor.shape[0], tensor.shape[1], -1)
                    st = cos_sim(t_orig, t_q, dims=[-1, 0])
                else:
                    continue
            # elif 'ang' in sn:
            #     if len(tensors_q) > 0:
            #         t = tensors_q['orig'].view(t.shape)
            #         t_q = tensors_q[sn.split('_')[1]].view(t.shape)
            #         cos = cos_sim(t, t_q)
            #         st = torch.acos(cos)
            #     else:
            #         continue
            else:
                pass

            st = st.cpu().numpy()
            if 'cos' in sn:
                st = np.nan_to_num(st)
                st[st == 0] = 1.

            if id not in self.stats:
                self.stats[id] = {}
            if sn not in self.stats[id]:
                self.stats[id][sn] = st
            else:
                # if len(st.shape) > 1:
                self.stats[id][sn] = np.vstack([self.stats[id][sn], st])
Exemplo n.º 49
0
def plot_community_conn_mat(conn_matrix,
                            labels,
                            out_path_fig_comm,
                            community_aff,
                            cmap,
                            dpi_resolution=300):
    """
    Plot a community-parcellated connectivity matrix.

    Parameters
    ----------
    conn_matrix : array
        NxN matrix.
    labels : list
        List of string labels corresponding to ROI nodes.
    out_path_fig_comm : str
        File path to save the community-parcellated connectivity matrix image
        as a .png figure.
    community_aff : array
        Community-affiliation vector.
    """
    import warnings
    warnings.filterwarnings("ignore")
    import matplotlib
    import mplcyberpunk
    from matplotlib import pyplot as plt
    matplotlib.use("agg")
    plt.style.use("cyberpunk")
    import matplotlib.patches as patches
    import matplotlib.ticker as mticker
    from nilearn.plotting import plot_matrix
    from pynets.core import thresholding

    plt.style.use("cyberpunk")

    conn_matrix_bin = thresholding.binarize(conn_matrix)
    conn_matrix = thresholding.standardize(conn_matrix)
    conn_matrix_plt = np.nan_to_num(np.multiply(conn_matrix, conn_matrix_bin))

    sorting_array = sorted(range(len(community_aff)),
                           key=lambda k: community_aff[k])
    sorted_conn_matrix = conn_matrix[sorting_array, :]
    sorted_conn_matrix = sorted_conn_matrix[:, sorting_array]
    rois_num = sorted_conn_matrix.shape[0]
    if rois_num < 100:
        try:
            plot_matrix(
                conn_matrix_plt,
                figure=(10, 10),
                labels=labels,
                vmax=np.percentile(conn_matrix_plt[conn_matrix_plt > 0], 95),
                vmin=0,
                reorder=False,
                auto_fit=True,
                grid=False,
                colorbar=False,
                cmap=cmap,
            )
        except RuntimeWarning:
            print("Connectivity matrix too sparse for plotting...")
    else:
        try:
            plot_matrix(
                conn_matrix_plt,
                figure=(10, 10),
                vmax=np.abs(np.max(conn_matrix_plt)),
                vmin=0,
                auto_fit=True,
                grid=False,
                colorbar=False,
                cmap=cmap,
            )
        except RuntimeWarning:
            print("Connectivity matrix too sparse for plotting...")

    ax = plt.gca()
    total_size = 0
    for community in np.unique(community_aff):
        size = sum(sorted(community_aff) == community)
        ax.add_patch(
            patches.Rectangle(
                (total_size, total_size),
                size,
                size,
                fill=False,
                edgecolor="white",
                alpha=None,
                linewidth=1,
            ))
        total_size += size

    if len(labels) > 500:
        tick_interval = 5
    elif len(labels) > 100:
        tick_interval = 4
    elif len(labels) > 50:
        tick_interval = 2
    else:
        tick_interval = 1

    plt.axes().yaxis.set_major_locator(mticker.MultipleLocator(tick_interval))
    plt.axes().xaxis.set_major_locator(mticker.MultipleLocator(tick_interval))
    for param in ['figure.facecolor', 'axes.facecolor', 'savefig.facecolor']:
        plt.rcParams[param] = '#000000'
    plt.savefig(out_path_fig_comm, dpi=dpi_resolution)
    plt.close()
    return
Exemplo n.º 50
0
    def train_ctax_path(self, stepsize, number_of_weights):
        """
        Here the weights for each ctax step is calculated

        Load values: linear paths and random paths including the reductions from TIMER
        output: b values (weigths) for given ctax levels 
        """
        self.stepsize = stepsize
        self.number_of_weights = number_of_weights

        # for some reason, some values are not exactly rounded
        self.train_path[:, -1] = np.round(self.train_path[:, -1])

        # get lin paths based on final ctax
        self.lin_train = [
            self.lin_path[self.lin_path[:, -1] == path[-1]]
            for path in self.train_path
        ]
        self.lin_train = np.vstack(self.lin_train)
        self.lin_train = self.lin_train[self.lin_train[:, -1].argsort()]
        self.train_path = self.train_path[self.train_path[:, -1].argsort()]

        delta_cs = self.lin_train - self.train_path
        final_ctax = self.lin_train[:, -1]
        delta_c_norm = delta_cs / final_ctax[:, None]
        delta_c_norm = np.nan_to_num(delta_c_norm)  # for first two rows
        delta_c_dict = [{
            'delta_c': delta_c_norm[i],
            'final ctax': final_ctax[i]
        } for i in range(len(delta_c_norm))]

        # stepsize is number of paths used to calculate the weights 20 is the dollar step used in TIMER
        for index in range(0, 200 + stepsize, stepsize):

            stepsize_ctax = index * 20

            delta_c_step = [
                ctax['delta_c'] for ctax in delta_c_dict
                if ctax['final ctax'] <= stepsize_ctax
                and ctax['final ctax'] >= stepsize_ctax - (stepsize * 20)
            ]

            # get number of weights wanted
            self.count_weights = int(len(delta_c_step[0]) / number_of_weights)
            delta_c_slice = [
                np.mean(delta_c[1:].reshape(-1, self.count_weights), axis=1)
                for delta_c in delta_c_step
            ]

            lin_reduction_step = self.df_combined_lin[
                (self.df_combined_lin[self.year] <= stepsize_ctax)
                & (self.df_combined_lin[self.year] >= stepsize_ctax -
                   (stepsize * 20))]

            train_reduction_step = self.df_combined_train[
                (self.df_combined_train[self.year] <= stepsize_ctax)
                & (self.df_combined_train[self.year] >= stepsize_ctax -
                   (stepsize * 20))]

            train_reduction_step[self.year] = train_reduction_step[
                self.year].round(0)
            lin_reduction_step[self.year] = lin_reduction_step[
                self.year].round(0)

            lin_reduction_step = pd.merge(lin_reduction_step,
                                          train_reduction_step,
                                          on=self.year)
            lin_reduction_step = lin_reduction_step[['reduction_x']].values
            train_reduction_step = train_reduction_step[['reduction']].values

            #            print(len(delta_c_slice), len(lin_reduction_step), len(train_reduction_step))

            # set initial values to 0
            x0 = [i * 0 for i in delta_c_slice[0]]

            res = minimize(self.objective,
                           x0,
                           args=(delta_c_slice, lin_reduction_step,
                                 train_reduction_step))

            weights = pd.DataFrame([[x for x in res.x] + [index * 20]])
            weights.columns = [*weights.columns[:-1], 'ctax']

            self.weights = pd.concat([self.weights, weights])
            self.weights = self.weights.reset_index(drop=True)

        print('weights dataframe:', '\n', self.weights)

        # quick vis of paths found
        weights_columns = weights.columns.values
        weights_columns = weights_columns[:-1]

        fig1, ax1 = plt.subplots()
        for column in weights_columns:
            ax1.plot(self.weights['ctax'], self.weights[column], label=column)
        ax1.set_xlabel('final ctax')
        ax1.set_ylabel('weight')
        ax1.legend()
    def backward_G(self, epoch, seg_criterion=None, A_gt=False):
        # self.loss_G_A = torch.zeros(1).cuda()
        if not self.multi_D:
            pred_fake = self.netD_A.forward(self.fake_B)
            if self.opt.use_wgan:
                self.loss_G_A = -pred_fake.mean()
            elif self.opt.use_ragan:
                pred_real = self.netD_A.forward(self.real_B)
                self.loss_G_A = (
                    self.criterionGAN(pred_real - torch.mean(pred_fake), False)
                    + self.criterionGAN(pred_fake - torch.mean(pred_real),
                                        True)) / 2
            else:
                self.loss_G_A = self.criterionGAN(pred_fake, True)
        else:
            self.loss_G_A = 0
            for c in range(5):
                # select by category; if empty: tensor([])
                if (self.category == c).nonzero().size(0) == 0: continue
                pred_fake = self.netD_As[c].forward(
                    torch.index_select(
                        self.fake_B, 0,
                        (self.category == c).nonzero().view(-1).type(
                            torch.cuda.LongTensor)))
                if self.opt.use_wgan:
                    self.loss_G_A += -pred_fake.mean()
                elif self.opt.use_ragan:
                    pred_real = self.netD_As[c].forward(
                        torch.index_select(
                            self.real_B, 0,
                            (self.category == c).nonzero().view(-1).type(
                                torch.cuda.LongTensor)))
                    self.loss_G_A += (self.criterionGAN(
                        pred_real - torch.mean(pred_fake),
                        False) + self.criterionGAN(
                            pred_fake - torch.mean(pred_real), True)) / 2
                else:
                    self.loss_G_A += self.criterionGAN(pred_fake, True)

        loss_G_A = 0
        if self.opt.patchD:
            pred_fake_patch = self.netD_P.forward(self.fake_patch)
            if self.opt.hybrid_loss:
                loss_G_A += self.criterionGAN(pred_fake_patch, True)
            else:
                pred_real_patch = self.netD_P.forward(self.real_patch)
                loss_G_A += (self.criterionGAN(
                    pred_real_patch - torch.mean(pred_fake_patch), False) +
                             self.criterionGAN(
                                 pred_fake_patch - torch.mean(pred_real_patch),
                                 True)) / 2
        if self.opt.patchD_3 > 0:
            for i in range(self.opt.patchD_3):
                pred_fake_patch_1 = self.netD_P.forward(self.fake_patch_1[i])
                if self.opt.hybrid_loss:
                    loss_G_A += self.criterionGAN(pred_fake_patch_1, True)
                else:
                    pred_real_patch_1 = self.netD_P.forward(
                        self.real_patch_1[i])
                    loss_G_A += (self.criterionGAN(
                        pred_real_patch_1 - torch.mean(pred_fake_patch_1),
                        False) + self.criterionGAN(
                            pred_fake_patch_1 - torch.mean(pred_real_patch_1),
                            True)) / 2

            if not self.opt.D_P_times2:
                self.loss_G_A += loss_G_A / float(self.opt.patchD_3 + 1)
            else:
                self.loss_G_A += loss_G_A / float(self.opt.patchD_3 + 1) * 2
        else:
            if not self.opt.D_P_times2:
                self.loss_G_A += loss_G_A
            else:
                self.loss_G_A += loss_G_A * 2

        self.loss_G = self.loss_G_A
        if epoch < 0:
            vgg_w = 0
        else:
            if seg_criterion is None: vgg_w = 1
            else: vgg_w = 0
        if vgg_w > 0:
            if self.opt.vgg > 0:
                self.loss_vgg_b = self.vgg_loss.compute_vgg_loss(
                    self.vgg, self.fake_B,
                    self.real_A) * self.opt.vgg if self.opt.vgg > 0 else 0
                if self.opt.patch_vgg:
                    if not self.opt.IN_vgg:
                        loss_vgg_patch = self.vgg_loss.compute_vgg_loss(
                            self.vgg, self.fake_patch,
                            self.input_patch) * self.opt.vgg
                    else:
                        loss_vgg_patch = self.vgg_patch_loss.compute_vgg_loss(
                            self.vgg, self.fake_patch,
                            self.input_patch) * self.opt.vgg
                    if self.opt.patchD_3 > 0:
                        for i in range(self.opt.patchD_3):
                            if not self.opt.IN_vgg:
                                loss_vgg_patch += self.vgg_loss.compute_vgg_loss(
                                    self.vgg, self.fake_patch_1[i],
                                    self.input_patch_1[i]) * self.opt.vgg
                            else:
                                loss_vgg_patch += self.vgg_patch_loss.compute_vgg_loss(
                                    self.vgg, self.fake_patch_1[i],
                                    self.input_patch_1[i]) * self.opt.vgg
                        self.loss_vgg_b += loss_vgg_patch / float(
                            self.opt.patchD_3 + 1)
                    else:
                        self.loss_vgg_b += loss_vgg_patch
                self.loss_G = self.loss_G_A + self.loss_vgg_b * vgg_w
            elif self.opt.fcn > 0:
                self.loss_fcn_b = self.fcn_loss.compute_fcn_loss(
                    self.fcn, self.fake_B,
                    self.real_A) * self.opt.fcn if self.opt.fcn > 0 else 0
                if self.opt.patchD:
                    loss_fcn_patch = self.fcn_loss.compute_vgg_loss(
                        self.fcn, self.fake_patch,
                        self.input_patch) * self.opt.fcn
                    if self.opt.patchD_3 > 0:
                        for i in range(self.opt.patchD_3):
                            loss_fcn_patch += self.fcn_loss.compute_vgg_loss(
                                self.fcn, self.fake_patch_1[i],
                                self.input_patch_1[i]) * self.opt.fcn
                        self.loss_fcn_b += loss_fcn_patch / float(
                            self.opt.patchD_3 + 1)
                    else:
                        self.loss_fcn_b += loss_fcn_patch
                self.loss_G = self.loss_G_A + self.loss_fcn_b * vgg_w
            # self.loss_G = self.L1_AB + self.L1_BA

        ## Seg Loss ################################
        if seg_criterion is not None:
            # mIoU of enhanced image
            inter, union = utils_seg.batch_intersection_union(
                self.fake_B_Seg.data, self.mask, 19)
            idx = union > 0
            IoU = 1.0 * inter[idx] / (np.spacing(1) + union[idx])
            self.mIoU = np.nan_to_num(IoU.mean())

            with torch.no_grad():
                # mIoU of origin image by pretrained Seg Model
                inter, union = utils_seg.batch_intersection_union(
                    self.real_A_Seg.data, self.mask, 19)
                idx = union > 0
                IoU = 1.0 * inter[idx] / (np.spacing(1) + union[idx])
                self.mIoU_ori = np.nan_to_num(IoU.mean())
                self.mIoU_delta_mean = 0.8 * self.mIoU_delta_mean + 0.2 * np.round(
                    self.mIoU - self.mIoU_ori, 3)

                # mIoU of origin image by Generator
                inter, union = utils_seg.batch_intersection_union(
                    self.seg_real_A.data, self.mask, 19)
                idx = union > 0
                IoU = 1.0 * inter[idx] / (np.spacing(1) + union[idx])
                print("mIoU_generator", np.round(np.nan_to_num(IoU.mean()), 3))

            print("G:", self.loss_G.data[0], "mIoU gain:",
                  np.round(self.mIoU - self.mIoU_ori, 3), "mean:",
                  np.round(self.mIoU_delta_mean, 3), "lum:",
                  255 * (1 - self.input_A_gray).mean(), "epoch:", epoch)

            lambd = 3
            self.loss_Seg = seg_criterion(
                self.fake_B_Seg,
                self.mask) + lambd * seg_criterion(self.seg_real_A, self.mask)
            self.loss_G += self.loss_Seg
        ############################################
        ## GAN_GT Loss ################################
        if A_gt:
            # msssim = msssim_loss((self.fake_B.clamp(-1, 1)+1)/2*255, (self.A_gt+1)/2*255, weight_map=self.A_boundary)
            l1 = (F.l1_loss((self.fake_B + 1) / 2 * 255,
                            (self.A_gt + 1) / 2 * 255,
                            reduction='none') * self.A_boundary).mean()
            # self.loss_gt = 3 * msssim + 0.16 * l1
            self.loss_gt = 0.1 * l1
            print("loss_gt", self.loss_gt.data[0])
            self.loss_G += self.loss_gt
        ############################################

        self.loss_G.backward(retain_graph=True)
Exemplo n.º 52
0
def main():
    usage = '''
Usage:
------------------------------------------------
Run the iterated MAD algorithm on two multispectral images   

python %s [OPTIONS] filename1 filename2
    
Options:

   -h           this help
   -i  <int>    maximum iterations (default 50)
   -d  <list>   spatial subset list e.g. -d [0,0,500,500]
   -p  <list>   band positions list e.g. -p [1,2,3]
   -l  <float>  regularization (default 0)
   -n           suppress graphics
   -c           append canonical variates to output
    
    
The output MAD variate file is has the same format
as filename1 and is named

      path/MAD_filebasename1-filebasename2.ext1
      
where filename1 = path/filebasename1.ext1
      filename2 = path/filebasename2.ext2

For ENVI files, ext1 or ext2 is the empty string.       
-----------------------------------------------------''' % sys.argv[0]
    options, args = getopt.getopt(sys.argv[1:], 'hncl:p:i:d:')
    pos = None
    dims = None
    niter = 50
    graphics = True
    cvs = False
    lam = 0.0
    for option, value in options:
        if option == '-h':
            print(usage)
            return
        elif option == '-n':
            graphics = False
        elif option == '-c':
            cvs = True
        elif option == '-p':
            pos = eval(value)
        elif option == '-d':
            dims = eval(value)
        elif option == '-i':
            niter = eval(value)
        elif option == '-l':
            lam = eval(value)
    if len(args) != 2:
        print('Incorrect number of arguments')
        print(usage)
        return
    gdal.AllRegister()
    fn1 = args[0]
    fn2 = args[1]
    path = os.path.dirname(fn1)
    basename1 = os.path.basename(fn1)
    root1, ext1 = os.path.splitext(basename1)
    basename2 = os.path.basename(fn2)
    root2, _ = os.path.splitext(basename2)
    outfn = path + '/' + 'MAD_%s-%s%s' % (root1, root2, ext1)
    inDataset1 = gdal.Open(fn1, GA_ReadOnly)
    inDataset2 = gdal.Open(fn2, GA_ReadOnly)
    try:
        cols = inDataset1.RasterXSize
        rows = inDataset1.RasterYSize
        bands = inDataset1.RasterCount
        cols2 = inDataset2.RasterXSize
        rows2 = inDataset2.RasterYSize
        bands2 = inDataset2.RasterCount
    except Exception as e:
        print('Error: %s  --Images could not be read.' % e)
        sys.exit(1)
    if (bands != bands2) or (cols != cols2) or (rows != rows2):
        sys.stderr.write("Size mismatch")
        sys.exit(1)
    if pos is None:
        pos = range(1, bands + 1)
    else:
        bands = len(pos)
    if dims is None:
        x0 = 0
        y0 = 0
    else:
        x0, y0, cols, rows = dims
# if second image is warped, assume it has same dimensions as dims
    if root2.find('_warp') != -1:
        x2 = 0
        y2 = 0
    else:
        x2 = x0
        y2 = y0
    print('------------IRMAD -------------')
    print(time.asctime())
    print('first scene:  ' + fn1)
    print('second scene: ' + fn2)
    start = time.time()
    #  iteration of MAD
    cpm = auxil.Cpm(2 * bands)
    delta = 1.0
    oldrho = np.zeros(bands)
    itr = 0
    tile = np.zeros((cols, 2 * bands))
    sigMADs = 0
    means1 = 0
    means2 = 0
    A = 0
    B = 0
    rasterBands1 = []
    rasterBands2 = []
    rhos = np.zeros((niter, bands))
    for b in pos:
        rasterBands1.append(inDataset1.GetRasterBand(b))
    for b in pos:
        rasterBands2.append(inDataset2.GetRasterBand(b))
    while (delta > 0.001) and (itr < niter):
        #      spectral tiling for statistics
        for row in range(rows):
            for k in range(bands):
                tile[:, k] = rasterBands1[k].ReadAsArray(x0, y0 + row, cols, 1)
                tile[:, bands + k] = rasterBands2[k].ReadAsArray(
                    x2, y2 + row, cols, 1)
#          eliminate no-data pixels
            tile = np.nan_to_num(tile)
            tst1 = np.sum(tile[:, 0:bands], axis=1)
            tst2 = np.sum(tile[:, bands::], axis=1)
            idx1 = set(np.where((tst1 != 0))[0])
            idx2 = set(np.where((tst2 != 0))[0])
            idx = list(idx1.intersection(idx2))
            if itr > 0:
                mads = np.asarray((tile[:, 0:bands] - means1) * A -
                                  (tile[:, bands::] - means2) * B)
                chisqr = np.sum((mads / sigMADs)**2, axis=1)
                wts = 1 - stats.chi2.cdf(chisqr, [bands])
                cpm.update(tile[idx, :], wts[idx])
            else:
                cpm.update(tile[idx, :])
#      weighted covariance matrices and means
        S = cpm.covariance()
        means = cpm.means()
        #      reset prov means object
        cpm.__init__(2 * bands)
        s11 = S[0:bands, 0:bands]
        s11 = (1 - lam) * s11 + lam * np.identity(bands)
        s22 = S[bands:, bands:]
        s22 = (1 - lam) * s22 + lam * np.identity(bands)
        s12 = S[0:bands, bands:]
        s21 = S[bands:, 0:bands]
        c1 = s12 * linalg.inv(s22) * s21
        b1 = s11
        c2 = s21 * linalg.inv(s11) * s12
        b2 = s22
        #      solution of generalized eigenproblems
        if bands > 1:
            mu2a, A = auxil.geneiv(c1, b1)
            mu2b, B = auxil.geneiv(c2, b2)
            #          sort a
            idx = np.argsort(mu2a)
            A = (A[:, idx])[:, ::-1]
            #          sort b
            idx = np.argsort(mu2b)
            B = (B[:, idx])[:, ::-1]
            mu2 = (mu2b[idx])[::-1]
        else:
            mu2 = c1 / b1
            A = 1 / np.sqrt(b1)
            B = 1 / np.sqrt(b2)
#      canonical correlations
        mu = np.sqrt(mu2)
        a2 = np.diag(A.T * A)
        b2 = np.diag(B.T * B)
        sigma = np.sqrt((2 - lam * (a2 + b2)) / (1 - lam) - 2 * mu)
        rho = mu * (1 - lam) / np.sqrt((1 - lam * a2) * (1 - lam * b2))
        #      stopping criterion
        delta = max(abs(rho - oldrho))
        rhos[itr, :] = rho
        oldrho = rho
        #      tile the sigmas and means
        sigMADs = np.tile(sigma, (cols, 1))
        means1 = np.tile(means[0:bands], (cols, 1))
        means2 = np.tile(means[bands::], (cols, 1))
        #      ensure sum of positive correlations between X and U is positive
        D = np.diag(1 / np.sqrt(np.diag(s11)))
        s = np.ravel(np.sum(D * s11 * A, axis=0))
        A = A * np.diag(s / np.abs(s))
        #      ensure positive correlation between each pair of canonical variates
        cov = np.diag(A.T * s12 * B)
        B = B * np.diag(cov / np.abs(cov))
        itr += 1


#  canonical correlations
    print('rho: %s' % str(rho))

    #  write results to disk
    driver = inDataset1.GetDriver()
    outBands = []
    if cvs:
        outDataset = driver.Create(outfn, cols, rows, 3 * bands + 1,
                                   GDT_Float32)
        for k in range(3 * bands + 1):
            outBands.append(outDataset.GetRasterBand(k + 1))
    else:
        outDataset = driver.Create(outfn, cols, rows, bands + 1, GDT_Float32)
        for k in range(bands + 1):
            outBands.append(outDataset.GetRasterBand(k + 1))
    projection = inDataset1.GetProjection()
    geotransform = inDataset1.GetGeoTransform()
    if geotransform is not None:
        gt = list(geotransform)
        gt[0] = gt[0] + x0 * gt[1]
        gt[3] = gt[3] + y0 * gt[5]
        outDataset.SetGeoTransform(tuple(gt))
    if projection is not None:
        outDataset.SetProjection(projection)
    for row in range(rows):
        for k in range(bands):
            tile[:, k] = rasterBands1[k].ReadAsArray(x0, y0 + row, cols, 1)
            tile[:, bands + k] = rasterBands2[k].ReadAsArray(
                x2, y2 + row, cols, 1)
            cv1 = (tile[:, 0:bands] - means1) * A
            cv2 = (tile[:, bands::] - means2) * B
        mads = np.asarray(cv1 - cv2)
        chisqr = np.sum((mads / (sigMADs))**2, axis=1)
        for k in range(bands):
            outBands[k].WriteArray(np.reshape(mads[:, k], (1, cols)), 0, row)
        outBands[bands].WriteArray(np.reshape(chisqr, (1, cols)), 0, row)
        if cvs:
            for k in range(bands + 1, 2 * bands + 1):
                outBands[k].WriteArray(
                    np.reshape(cv1[:, k - bands - 1], (1, cols)), 0, row)
            for k in range(2 * bands + 1, 3 * bands + 1):
                outBands[k].WriteArray(
                    np.reshape(cv2[:, k - 2 * bands - 1], (1, cols)), 0, row)
    for outBand in outBands:
        outBand.FlushCache()
    outDataset = None
    inDataset1 = None
    inDataset2 = None
    print('result written to: ' + outfn)
    print('elapsed time: %s' % str(time.time() - start))
    x = np.array(range(itr - 1))
    if graphics:
        plt.plot(x, rhos[0:itr - 1, :])
        plt.title('Canonical correlations')
        plt.xlabel('Iteration')
        plt.show()
        cm1 = (s11 * A - s12 * B) * D * np.diag(1 / sigma)
        ax = plt.subplot(111)
        for i in range(bands):
            ax.plot(range(1, bands + 1), cm1[:, i], label='MAD' + str(i + 1))
        plt.title('iMAD correlations with first scene')
        plt.xlabel('Band')
        ax.legend()
        plt.show()
Exemplo n.º 53
0
    def makekeyspec(samplekeydata, targetpercent):
        def makekeyaverage(data):
            b = np.array(data[0]) * 0
            for i in data:
                j = np.array(i)
                b = b + j
            average = b // len(data)
            return average

        havengsample = 1
        ngfileadr = []

        while havengsample == 1:
            print('-' * 19 + '判断按键良品中' + '-' * 19)
            print('数目:', len(samplekeydata))
            samplekeyaverage = makekeyaverage(samplekeydata)
            percentarray = []
            diffvaluearray = []

            for data in samplekeydata:
                specvalue = abs((((np.array(data)) / samplekeyaverage) - 1))
                percentarray.append(specvalue)

                diffvalue = abs((np.array(data)) - samplekeyaverage)
                diffvaluearray.append(diffvalue)

            testsamplenumber = 0
            samplenumber = 0
            ngsamplenumber = []
            havengsample = 0

            percentarray = np.nan_to_num(percentarray)
            diffvaluearray = np.nan_to_num(diffvaluearray)

            for samplepercent in percentarray:
                maxpercent = np.max(samplepercent)

                if maxpercent >= targetpercent:
                    maxlocation = np.where(
                        samplepercent == np.max(samplepercent))

                    maxdatanumbers = len(maxlocation)
                    diffarray = []

                    while (maxdatanumbers >= 1):
                        x = 0
                        row = maxlocation[x]
                        diff = diffvaluearray[testsamplenumber][row]
                        diffarray.append(diff)
                        maxdatanumbers -= 1
                        x += 1

                    maxdiff = np.max(diffarray)
                    if (maxdiff <= 5):
                        samplenumber += 1
                        break
                    else:
                        havengsample = 1
                        ngsamplenumber.append(testsamplenumber)
                        del samplekeydata[samplenumber]

                    testsamplenumber += 1

                else:
                    samplenumber += 1
                    testsamplenumber += 1

            if havengsample == 1:
                for ng in ngsamplenumber:
                    ngfileadr.append(L[ng])
        print('*' * 19 + '按键不良样品' + '*' * 19)
        print(ngfileadr)
        print('\n')
        return samplekeyaverage
Exemplo n.º 54
0
def plot_conn_mat(conn_matrix,
                  labels,
                  out_path_fig,
                  cmap,
                  binarized=False,
                  dpi_resolution=300):
    """
    Plot a connectivity matrix.

    Parameters
    ----------
    conn_matrix : array
        NxN matrix.
    labels : list
        List of string labels corresponding to ROI nodes.
    out_path_fig : str
        File path to save the connectivity matrix image as a .png figure.
    """
    import warnings
    warnings.filterwarnings("ignore")
    import matplotlib
    import mplcyberpunk
    matplotlib.use('Agg')
    from matplotlib import pyplot as plt
    plt.style.use("cyberpunk")
    from matplotlib import pyplot as plt
    from nilearn.plotting import plot_matrix
    from pynets.core import thresholding
    import matplotlib.ticker as mticker

    conn_matrix = thresholding.standardize(conn_matrix)
    conn_matrix_bin = thresholding.binarize(conn_matrix)
    conn_matrix_plt = np.nan_to_num(np.multiply(conn_matrix, conn_matrix_bin))

    try:
        plot_matrix(
            conn_matrix_plt,
            figure=(10, 10),
            labels=labels,
            vmax=np.percentile(conn_matrix_plt[conn_matrix_plt > 0], 95),
            vmin=0,
            reorder="average",
            auto_fit=True,
            grid=False,
            colorbar=True,
            cmap=cmap,
        )
    except RuntimeWarning:
        print("Connectivity matrix too sparse for plotting...")

    if len(labels) > 500:
        tick_interval = 5
    elif len(labels) > 100:
        tick_interval = 4
    elif len(labels) > 50:
        tick_interval = 2
    else:
        tick_interval = 1

    plt.axes().yaxis.set_major_locator(mticker.MultipleLocator(tick_interval))
    plt.axes().xaxis.set_major_locator(mticker.MultipleLocator(tick_interval))
    for param in ['figure.facecolor', 'axes.facecolor', 'savefig.facecolor']:
        plt.rcParams[param] = '#000000'
    plt.savefig(out_path_fig, dpi=dpi_resolution)
    plt.close()
    return
import numpy as np
max = 7000000
resample_factor = 25
# get data
url = 'https://mapcore-bucket1.s3-us-west-2.amazonaws.com/ISAN/csv-data/use-case-2/Sample_1_18907001_channel_1.csv'
bigfile = "C:\\Users\jkho021\Downloads\\bigdata.csv"
data = np.loadtxt(bigfile,
                  skiprows=1,
                  usecols=tuple(range(0, 3)),
                  delimiter=",",
                  max_rows=max)

data_array = data.view()
data_array = data_array.transpose()
data_array = np.nan_to_num(data_array)

data_resampled = signal.resample(data_array[1], int(max / resample_factor))

with open('resampled.json', 'w') as f:
    json.dump({'data': data_resampled.tolist()}, f)

# Create figure
fig = go.Figure()

fig.add_trace(
    go.Scattergl(
        x=np.linspace(0, len(data_array[1]), len(data_resampled)),
        y=data_resampled,
        mode='lines',
        name='resampled',
Exemplo n.º 56
0
def apply_telcal(st,
                 data,
                 threshold=1 / 10.,
                 onlycomplete=True,
                 sign=+1,
                 savesols=False,
                 returnsoltime=False):
    """ Wrap all telcal functions to parse telcal file and apply it to data
    sign defines if calibration is applied (+1) or backed out (-1).
    assumes dual pol and that each spw has same nch and chansize.
    Threshold is minimum ratio of gain amp to median gain amp.
    If no solution found, it will blank the data to zeros.
    """

    assert sign in [-1, +1], 'sign must be +1 or -1'

    if st.gainfile is None:
        return data
    else:
        if (not os.path.exists(st.gainfile)) or (not os.path.isfile(
                st.gainfile)):
            logger.warning(
                '{0} is not a valid gain file. No calibration applied.'.format(
                    st.gainfile))
            return data
        else:
            sols = getsols(st,
                           threshold=threshold,
                           onlycomplete=onlycomplete,
                           savesols=savesols)
            reffreq, nchan, chansize = st.metadata.spw_sorted_properties
            skyfreqs = np.around([
                reffreq[i] + (chansize[i] * nchan[i] // 2)
                for i in range(len(nchan))
            ], -6) / 1e6  # GN skyfreq is band center
            if len(sols):
                pols = np.array([0, 1])
                solskyfreqs = np.unique(sols['skyfreq'])
                logger.info(
                    "Applying solutions from frequencies {0} to data frequencies {1}"
                    .format(solskyfreqs, np.unique(skyfreqs)))
                gaindelay = np.nan_to_num(calcgaindelay(sols,
                                                        st.blarr,
                                                        skyfreqs,
                                                        pols,
                                                        chansize[0] / 1e6,
                                                        nchan[0],
                                                        sign=sign),
                                          copy=False).take(st.chans, axis=1)
            else:
                logger.info(
                    "No calibration solutions found for data freqs {0}".format(
                        np.unique(skyfreqs)))
                gaindelay = np.zeros_like(data)

        # check for repeats or bad values
        repeats = [(item, count) for item, count in Counter(
            gaindelay[:, ::nchan[0]].flatten()).items() if count > 1]
        if len(repeats):
            for item, count in repeats:
                if item == 0j:
                    logger.info(
                        "{0} of {1} telcal solutions zeroed or flagged".format(
                            count, gaindelay[:, ::nchan[0]].size))
                    if gaindelay.any():
                        blinds, chans, pols = np.where(
                            gaindelay[:, ::nchan[0]] == 0)
                        if len(blinds):
                            counts = list(
                                zip(*np.histogram(
                                    st.blarr[np.unique(blinds)].flatten(),
                                    bins=np.arange(
                                        1, 1 + max(st.blarr[np.unique(
                                            blinds)].flatten())))))

                            logger.info('Flagged solutions for: {0}'.format(
                                ', '.join([
                                    'Ant {1}: {0}'.format(a, b)
                                    for (a, b) in counts
                                ])))
                else:
                    logger.warn(
                        "Repeated telcal solutions ({0}: {1}) found. Likely a parsing error!"
                        .format(item, count))

        if returnsoltime:
            soltime = np.unique(sols['mjd'])
            return data * gaindelay, soltime
        else:
            return data * gaindelay
 def _entropy(self, values):
     p = values.sum() / values.shape[0]
     q = 1.0 - p        
     return - np.nan_to_num(p * np.log2(p)) - np.nan_to_num(q * np.log2(q))
Exemplo n.º 58
0
    def makespec(testsampledata, targetpercent):
        def makeaverage(sampledata2):
            b = (np.array(sampledata2[0])) * 0
            for i in sampledata2:
                j = np.array(i)
                b = b + j
            average = b // (len(sampledata2))
            return average

        havengsample = 1
        ngfileadr = []

        while havengsample == 1:
            print('-' * 19 + '判断良品中' + '-' * 19)
            print('数目:', len(testsampledata))
            print('\n')
            sampleaverage = makeaverage(testsampledata)
            percentarray = []
            diffvaluearray = []

            for data in testsampledata:
                specvalue = abs(((np.array(data)) / sampleaverage) - 1)
                percentarray.append(specvalue)

                diffvalue = abs((np.array(data) - sampleaverage))
                diffvaluearray.append(diffvalue)

            testsamplenumber = 0
            samplenumber = 0
            ngsamplenumber = []
            havengsample = 0
            percentarray = np.nan_to_num(percentarray)
            diffvaluearray = np.nan_to_num(diffvaluearray)

            for samplepercent in percentarray:
                maxpercent = np.max(samplepercent)
                if maxpercent >= targetpercent:

                    singellinepercent = samplepercent.flatten(
                    )  #样品数据从二维变为一维方便比较
                    singellinediff = (
                        diffvaluearray[testsamplenumber]
                    ).flatten()  #样品测试数值与average值的差值从二维变为一维方便比较
                    b = np.arange(len(singellinepercent))
                    c = b[
                        singellinepercent >=
                        targetpercent]  # c array 存放的是单个样品中大于targetpercent位置的索引

                    for i in range(len(c)):
                        if singellinediff[c[i]] > 5:
                            havengsample = 1
                            ngsamplenumber.append(testsamplenumber)
                            del testsampledata[samplenumber]
                            samplenumber -= 1
                            break

                testsamplenumber += 1
                samplenumber += 1

            if havengsample == 1:
                for ng in ngsamplenumber:
                    ngfileadr.append(L[ng])
        print('*' * 19 + 'VA区不良样品' + '*' * 19)
        print(ngfileadr)
        print('VA区不良样品总数:', len(ngfileadr))
        print('\n')
        '''print(sampleaverage)'''
        return sampleaverage
Exemplo n.º 59
0
def main():
    parser = _build_args_parser()
    args = parser.parse_args()
    logging.basicConfig(level=logging.INFO)

    # make sure the input files exist
    if not isfile(args.time_series):
        parser.error('The file "{0}" must exist.'.format(args.time_series))

    if not isfile(args.mesh):
        parser.error('The file "{0}" must exist.'.format(args.mesh))

    # make sure files are not accidently overwritten
    if isfile(args.output):
        if args.overwrite:
            logging.info('Overwriting "{0}".'.format(args.output))
        else:
            parser.error('The file "{0}" already exists. Use -f to overwrite it.'.format(args.output))

    # load mapping
    mesh = np.load(args.mesh, allow_pickle=True)

    mapping = mesh['mapping']
    shape = mesh['shape']

    logging.info('Loading timeseries data.')

    # load time series for left and right hemispheres
    full_time_series_data = np.load(args.time_series)
    time_series_data = np.concatenate((full_time_series_data['lh_time_series'], 
                                       full_time_series_data['rh_time_series']))

    logging.info('TS length:' + str(time_series_data.shape))
    logging.info('Calculating mean signal for ' + str(shape[0]) + ' vertices.')

    # initialise an array to fill in the loop
    mean_time_series = np.empty([shape[0], time_series_data.shape[1]], dtype=np.float64)

    # calculate mean signal at each 
    # vertice given the current mapping
    for i in range(shape[0]):
        vertex = mapping[i]
        mean_time_series[i, :] = np.mean(time_series_data[vertex, :], axis=0)

    logging.info('Calculating FC.')

    n = shape[0]
    fc = np.ones(((n * (n+1)) / 2))
    index = 0

    # calculate the FC using an upper triangular indexing scheme
    for i in xrange(n-1):
        index += 1
        offset = n - i

        # calculate the correlation between the current vertex all the vertices in the vector that come after it
        fc[index:(index+offset-1)] = corr2(mean_time_series[i:(i+1), :], mean_time_series[(i+1):, :]).ravel()

        # used for upper triangular indices
        index += offset-1

    result = np.zeros((n,n))
    result[np.triu_indices(n)] = fc

    # replace all nans with 0s
    result = np.nan_to_num(result)

    sub_sub_fc = None
    sub_surf_fc = None

    # calculate subcortical FC
    if not args.sub_rois is None:
        logging.info('Calculating FC for subcortical regions.')
         
        labels = ['label_' + lbl for lbl in map(str, args.sub_rois)]
        n = len(labels)

        sub_mean_time_series = np.empty([n, time_series_data.shape[1]], dtype=np.float64)
        
        for i in range(n):
            sub_mean_time_series[i,:] = np.mean(full_time_series_data[labels[i]], axis=0)
   
        fc = np.ones(((n * (n+1)) / 2))
        index = 0

        # calculate the subcortico-subcortical FC using an upper triangular indexing scheme
        for i in xrange(n-1):
            index += 1
            offset = n - i

            fc[index:(index+offset-1)] = corr2(sub_mean_time_series[i:(i+1), :], 
                                               sub_mean_time_series[(i+1):, :]).ravel()

            # used for upper triangular indices
            index += offset-1

        sub_sub_fc = np.zeros((n,n))
        sub_sub_fc[np.triu_indices(n)] = fc

        # replace all nans with 0s
        sub_sub_fc = np.nan_to_num(sub_sub_fc)

        # calculate the cortico-subcortical FC
        sub_surf_fc = np.ones((n, shape[0]))

        # calculate the subcortico-subcortical FC using an upper triangular indexing scheme
        for i in range(n):
            sub_surf_fc[i,:] = corr2(sub_mean_time_series[i:(i+1), :], mean_time_series).ravel()

        # replace all nans with 0s
        sub_surf_fc = np.nan_to_num(sub_surf_fc)

    # save the results
    if not args.sub_rois is None:
        scio.savemat(args.output, {'fc': result, 
                                   'sub_sub_fc': sub_sub_fc, 
                                   'sub_surf_fc': sub_surf_fc})
    else:
        scio.savemat(args.output, {'fc': result})
Exemplo n.º 60
0
noxh=noxh[7:(len(noxh)-14)]
noxh[noxh<0]=np.nan
co=[]
hc=[]
ben=[]
nox=[]
no2=[]

for i in range(23,len(no2h)+23,24):
    co.append(np.nanmean(coh[(i-23):i]))
    hc.append(np.nanmean(hch[(i-23):i]))
    ben.append(np.nanmean(bh[(i-23):i]))
    nox.append(np.nanmean(noxh[(i-23):i]))
    no2.append(np.nanmean(no2h[(i-23):i]))

co=np.nan_to_num(co)
co[co==0]=np.random.random()*np.nanmean(co)
hc=np.nan_to_num(hc)
ben=np.nan_to_num(ben)
ben[ben==0]=np.random.random()*np.nanmean(ben)
nox=np.nan_to_num(nox)
nox[nox==0]=np.random.random()*np.nanmean(nox)
no2=np.nan_to_num(no2)
no2[no2==0]=np.random.random()*np.nanmean(no2)

dias=range(1,len(co)+1)

fig=plt.figure()
plt.plot(dias,no2)
plt.show()