def mk_center_dictionary(positions, data, before=49, after=80): """ Computes clusters 'centers' or templates and associated data. Clusters' centers should be built such that they can be used for subtraction, this implies that we should make them long enough, on both side of the peak, to see them go back to baseline. Formal parameters before and after bellow should therefore be set to larger values than the ones used for clustering. Parameters ---------- positions : a vector of spike times, that should all come from the same cluster and correspond to reasonably 'clean' events. data : a data matrix. before : the number of sampling point to keep before the peak. after : the number of sampling point to keep after the peak. Returns ------- A dictionary with the following components: center: the estimate of the center (obtained from the median). centerD: the estimate of the center's derivative (obtained from the median of events cut on the derivative of data). centerDD: the estimate of the center's second derivative (obtained from the median of events cut on the second derivative of data). centerD_norm2: the squared norm of the center's derivative. centerDD_norm2: the squared norm of the center's second derivative. centerD_dot_centerDD: the scalar product of the center's first and second derivatives. center_idx: an array of indices generated by np.arange(-before,after+1). """ from scipy.signal import fftconvolve from numpy import apply_along_axis as apply dataD = apply(lambda x: fftconvolve(x, np.array([1, 0, -1]) / 2., 'same'), 1, data) dataDD = apply(lambda x: fftconvolve(x, np.array([1, 0, -1]) / 2., 'same'), 1, dataD) evts = mk_events(positions, data, before, after) evtsD = mk_events(positions, dataD, before, after) evtsDD = mk_events(positions, dataDD, before, after) evts_median = apply(np.median, 0, evts) evtsD_median = apply(np.median, 0, evtsD) evtsDD_median = apply(np.median, 0, evtsDD) return { "center": evts_median, "centerD": evtsD_median, "centerDD": evtsDD_median, "centerD_norm2": np.dot(evtsD_median, evtsD_median), "centerDD_norm2": np.dot(evtsDD_median, evtsDD_median), "centerD_dot_centerDD": np.dot(evtsD_median, evtsDD_median), "center_idx": np.arange(-before, after + 1) }
def mk_center_dictionary(positions, data, before=49, after=80): """ Computes clusters 'centers' or templates and associated data. Clusters' centers should be built such that they can be used for subtraction, this implies that we should make them long enough, on both side of the peak, to see them go back to baseline. Formal parameters before and after bellow should therefore be set to larger values than the ones used for clustering. Parameters ---------- positions : a vector of spike times, that should all come from the same cluster and correspond to reasonably 'clean' events. data : a data matrix. before : the number of sampling point to keep before the peak. after : the number of sampling point to keep after the peak. Returns ------- A dictionary with the following components: center: the estimate of the center (obtained from the median). centerD: the estimate of the center's derivative (obtained from the median of events cut on the derivative of data). centerDD: the estimate of the center's second derivative (obtained from the median of events cut on the second derivative of data). centerD_norm2: the squared norm of the center's derivative. centerDD_norm2: the squared norm of the center's second derivative. centerD_dot_centerDD: the scalar product of the center's first and second derivatives. center_idx: an array of indices generated by np.arange(-before,after+1). """ from scipy.signal import fftconvolve from numpy import apply_along_axis as apply dataD = apply(lambda x: fftconvolve(x,np.array([1,0,-1])/2.,'same'), 1, data) dataDD = apply(lambda x: fftconvolve(x,np.array([1,0,-1])/2.,'same'), 1, dataD) evts = mk_events(positions, data, before, after) evtsD = mk_events(positions, dataD, before, after) evtsDD = mk_events(positions, dataDD, before, after) evts_median = apply(np.median,0,evts) evtsD_median = apply(np.median,0,evtsD) evtsDD_median = apply(np.median,0,evtsDD) return {"center" : evts_median, "centerD" : evtsD_median, "centerDD" : evtsDD_median, "centerD_norm2" : np.dot(evtsD_median,evtsD_median), "centerDD_norm2" : np.dot(evtsDD_median,evtsDD_median), "centerD_dot_centerDD" : np.dot(evtsD_median, evtsDD_median), "center_idx" : np.arange(-before,after+1)}
def good_evts_fct(samp, thr=3): samp_med = apply(np.median, 0, samp) samp_mad = apply(swp.mad, 0, samp) above = samp_med > 0 samp_r = samp.copy() for i in range(samp.shape[0]): samp_r[i, above] = 0 samp_med[above] = 0 res = apply(lambda x: np.all(abs((x - samp_med) / samp_mad) < thr), 1, samp_r) return res
def good_evts_fct(samp,thr=3): samp_med = apply(np.median,0,samp) samp_mad = apply(swp.mad,0,samp) above = samp_med > 0 samp_r = samp.copy() for i in range(samp.shape[0]): samp_r[i,above] = 0 samp_med[above] = 0 res = apply(lambda x: np.all(abs((x-samp_med)/samp_mad)<thr), 1, samp_r) return(res)
def generate_profiles(raw_data, bins, halo_center) : singletons = ["r_in", "r_out", "Volume", "MASS"] quantities = raw_data.keys()+extra_quantities+singletons bin_values = dict(( q, [] ) for q in quantities) raw_data["POS "] = raw_data["POS "] - halo_center for i, bin in enumerate(bins[1:]) : print i in_bin = bins[i-1] out_bin = bin bin_data = select_bin(raw_data, in_bin, out_bin) #populate bins for weighting parameters bin_values["r_in"].append( in_bin ) bin_values["r_out"].append( out_bin) bin_values["Volume"].append( 4./3.*pi*(out_bin**3 - in_bin**3) ) bin_values["MASS"].append( np.sum(bin_data["MASS"]) ) for q in quantities : if q in mass_weighted_properties : bin_values[q].append(np.sum(bin_data[q]*bin_data["MASS"])/bin_values["MASS"][-1]) elif q in volume_weighted_properties : bin_values[q].append(np.sum(bin_data[q])/bin_values["Volume"][-1]) elif q == "T_sl" : #calculate spectroscopic temperature mask = (bin_data["T"] > 1e6) T_cut_T = bin_data["T"][mask] T_cut_Rho = bin_data["RHO "][mask] T_cut = np.array(T_cut_T, T_cut_Rho).T numerator = np.sum(np.apply(calc_spec_temperature, T_cut, 0.25)) denominator = np.sum(np.apply(calc_spec_temperature, T_cut, -0.75)) bin_values["T_sl"].append( numerator/denominator ) else : continue for q in quantities : bin_values[q] = np.array(bin_values[q]) return bin_values
def _build(self, train: np.ndarray, depth: int = 1) -> dict: # split data where value lower than v and else _split = lambda i, v: (train[np.where(train[:, i] < v)], train[np.where(train[:, i] >= v)]) # information gain _gain = lambda gs: -sum([self.metric(g) * len(g) / len(list(chain(*gs))) for g in gs]) # _split and calc value using metric _apply = np.vectorize(lambda v, i: _gain(_split(i, v))) # get index of applied minimum _mini = lambda uni, idx, i: idx[_apply(uni, i).argmax()] # terminal node _t = lambda x: Counter(x[:, -1]).most_common() m = apply(lambda i: _mini(*np.unique(train[:, i], True), i), 1, np.array([self.features]).T) idx, row = max(zip(self.features, m), key=lambda t: _gain(_split(t[0], train[t[1]][t[0]]))) left, right = _split(idx, train[row][idx]) node = { 'index': idx, 'value': train[row][idx], 'left': left, 'right': right, } if not left.size or not right.size: node['left'] = node['right'] = _t(np.concatenate([left, right])) elif depth >= self.max_depth or -_gain([left, right]) < self.min_gain: node['left'], node['right'] = _t(left), _t(right) else: node['left'] = _t(left) if len(left) <= self.min_size else self._build(left, depth+1) node['right'] = _t(right) if len(right) <= self.min_size else self._build(right, depth+1) return node
def predict(self, X: np.ndarray) -> List[float]: """predict using generated random forest with input X @param: X test X """ pred = np.array([e.predict(X) for e in self.estimators]) return apply(lambda y: Counter(y).most_common()[0][0], 1, pred.T)
def kmeans_t(points, k, iterations): centroids = initialize_centroids(points, k) for i in range(iterations): centroids = np.apply(np.vstack, [ move_centroids(points, closest_centroid(points, centroids), centroids) ]) return centroids
def kmeans(points, iterations, initial_centroids, enable_output): centroids = initial_centroids for i in range(iterations): if enable_output: print("centroids in iteration ", i, ": ", centroids) centroids = np.apply(np.vstack, [ move_centroids(points, closest_centroid(points, centroids), centroids) ]) return centroids
def predict(self, X: np.ndarray) -> float: """predict using generated decision tree with input X @param: X test X """ return apply(partial(self._predict, self.tree), 1, X)
def mk_aligned_events(positions, data, before=14, after=30): """Align events on the central event using first or second order Taylor expansion. Parameters ---------- positions: a vector of indices with the positions of the detected events. data: a matrix whose rows contains the recording channels. before: an integer, how many points should be within the cut before the reference index / time given by positions. after: an integer, how many points should be within the cut after the reference index / time given by positions. Returns ------- A tuple whose elements are: A matrix with as many rows as events and whose rows are the cuts on the different recording sites glued one after the other. These events have been jitter corrected using the second order Taylor expansion. A vector of events positions where "actual" positions have been rounded to the nearest index. A vector of jitter values. Details ------- (1) The data first and second derivatives are estimated first. (2) Events are cut next on each of the three versions of the data. (3) The global median event for each of the three versions are obtained. (4) Each event is then aligned on the median using a first order Taylor expansion. (5) If this alignment decreases the squared norm of the event (6) an improvement is looked for using a second order expansion. If this second order expansion still decreases the squared norm and if the estimated jitter is larger than 1, the whole procedure is repeated after cutting a new the event based on a better peak position (7). """ from scipy.signal import fftconvolve from numpy import apply_along_axis as apply from scipy.spatial.distance import squareform n_evts = len(positions) new_positions = positions.copy() jitters = np.zeros(n_evts) # Details (1) dataD = apply(lambda x: fftconvolve(x,np.array([1,0,-1])/2., 'same'), 1, data) dataDD = apply(lambda x: fftconvolve(x,np.array([1,0,-1])/2.,'same'), 1, dataD) # Details (2) evts = mk_events(positions, data, before, after) evtsD = mk_events(positions, dataD, before, after) evtsDD = mk_events(positions, dataDD, before, after) # Details (3) center = apply(np.median,0,evts) centerD = apply(np.median,0,evtsD) centerD_norm2 = np.dot(centerD,centerD) centerDD = apply(np.median,0,evtsDD) centerDD_norm2 = np.dot(centerDD,centerDD) centerD_dot_centerDD = np.dot(centerD,centerDD) # Details (4) for evt_idx in range(n_evts): # Details (5) evt = evts[evt_idx,:] evt_pos = positions[evt_idx] h = evt - center h_order0_norm2 = sum(h**2) h_dot_centerD = np.dot(h,centerD) jitter0 = h_dot_centerD/centerD_norm2 h_order1_norm2 = sum((h-jitter0*centerD)**2) if h_order0_norm2 > h_order1_norm2: # Details (6) h_dot_centerDD = np.dot(h,centerDD) first = -2*h_dot_centerD + \ 2*jitter0*(centerD_norm2 - h_dot_centerDD) + \ 3*jitter0**2*centerD_dot_centerDD + \ jitter0**3*centerDD_norm2 second = 2*(centerD_norm2 - h_dot_centerDD) + \ 6*jitter0*centerD_dot_centerDD + \ 3*jitter0**2*centerDD_norm2 jitter1 = jitter0 - first/second h_order2_norm2 = sum((h-jitter1*centerD- \ jitter1**2/2*centerDD)**2) if h_order1_norm2 <= h_order2_norm2: jitter1 = jitter0 else: jitter1 = 0 if abs(round(jitter1)) > 0: # Details (7) evt_pos -= int(round(jitter1)) evt = cut_sgl_evt(evt_pos,data=data, before=before, after=after) h = evt - center h_order0_norm2 = sum(h**2) h_dot_centerD = np.dot(h,centerD) jitter0 = h_dot_centerD/centerD_norm2 h_order1_norm2 = sum((h-jitter0*centerD)**2) if h_order0_norm2 > h_order1_norm2: h_dot_centerDD = np.dot(h,centerDD) first = -2*h_dot_centerD + \ 2*jitter0*(centerD_norm2 - h_dot_centerDD) + \ 3*jitter0**2*centerD_dot_centerDD + \ jitter0**3*centerDD_norm2 second = 2*(centerD_norm2 - h_dot_centerDD) + \ 6*jitter0*centerD_dot_centerDD + \ 3*jitter0**2*centerDD_norm2 jitter1 = jitter0 - first/second h_order2_norm2 = sum((h-jitter1*centerD- \ jitter1**2/2*centerDD)**2) if h_order1_norm2 <= h_order2_norm2: jitter1 = jitter0 else: jitter1 = 0 if sum(evt**2) > sum((h-jitter1*centerD- jitter1**2/2*centerDD)**2): evts[evt_idx,:] = evt-jitter1*centerD- \ jitter1**2/2*centerDD new_positions[evt_idx] = evt_pos jitters[evt_idx] = jitter1 return (evts, new_positions,jitters)
def get_is_occupied(self): """Return boolean array where True refers to occupied and False to virtual.""" norms = np.apply(self.is_k_vec_virtual, axis=1, arr=self.momenta) return norms < self.parameters.k_fermi
for i,y in enumerate(dataQsd): plt.plot(qq,y,color=colors[i], linestyle='dashed') plt.xlabel('Normal quantiles') plt.ylabel('Empirical quantiles') # Detect peaks from scipy.signal import fftconvolve from numpy import apply_along_axis as apply # Smooth and threshold data data_filtered = apply(lambda x: fftconvolve(x, np.array([1,1,1,1,1])/5.0,'same'), 1, np.array(data)) data_filtered = (data_filtered.transpose() / apply(swp.mad,1,data_filtered)).transpose() data_filtered[data_filtered < 4] = 0 plt.plot(tt, data[0], color='black') plt.axhline(y=4, color='blue', linestyle = 'dashed') plt.plot(tt, data_filtered[0,], color='red') plt.xlim([0, 0.2]) plt.ylim([-5, 10]) plt.xlabel('Time (s)') # Get peaks spikes0 = swp.peak(data_filtered.sum(0))
def apply_rotation(rot, p): # p is a homogeneous image point ~ (x,y,1) direction_vector = np.dot(invKK, p) rotated_direction = np.dot(rot, direction_vector) p_prime = np.apply(KK, rotated_direction) return p_prime
from numpy import apply_along_axis as apply from scipy.signal import fftconvolve import sorting_with_python as swp from load_data import load_data from numpy.linalg import svd from pandas.plotting import scatter_matrix import pandas as pd import csv from sklearn.cluster import KMeans raw_data, data_len = load_data() tt = np.arange(0, data_len) / 1.5e4 data = list(map(lambda x: (x - np.median(x)) / swp.mad(x), raw_data)) data_filtered = apply( lambda x: fftconvolve(x, np.array([1, 1, 1, 1, 1]) / 5., 'same'), 1, np.array(data)) data_filtered = (data_filtered.transpose() / \ apply(swp.mad,1,data_filtered)).transpose() data_filtered[data_filtered < 4] = 0 sp0 = swp.peak(data_filtered.sum(0)) sp0E = sp0[sp0 <= data_len / 2.] sp0L = sp0[sp0 > data_len / 2.] evtsE = swp.mk_events(sp0E, np.array(data), 49, 50) evtsE_median = apply(np.median, 0, evtsE) evtsE_mad = apply(swp.mad, 0, evtsE) evtsE = swp.mk_events(sp0E, np.array(data), 14, 30)
def normalize_unit_sd(array): def temp(v): return v / np.std(v) return np.array(apply(temp, 0, array))
def mk_aligned_events(positions, data, before=14, after=30): """Align events on the central event using first or second order Taylor expansion. Parameters ---------- positions: a vector of indices with the positions of the detected events. data: a matrix whose rows contains the recording channels. before: an integer, how many points should be within the cut before the reference index / time given by positions. after: an integer, how many points should be within the cut after the reference index / time given by positions. Returns ------- A tuple whose elements are: A matrix with as many rows as events and whose rows are the cuts on the different recording sites glued one after the other. These events have been jitter corrected using the second order Taylor expansion. A vector of events positions where "actual" positions have been rounded to the nearest index. A vector of jitter values. Details ------- (1) The data first and second derivatives are estimated first. (2) Events are cut next on each of the three versions of the data. (3) The global median event for each of the three versions are obtained. (4) Each event is then aligned on the median using a first order Taylor expansion. (5) If this alignment decreases the squared norm of the event (6) an improvement is looked for using a second order expansion. If this second order expansion still decreases the squared norm and if the estimated jitter is larger than 1, the whole procedure is repeated after cutting a new the event based on a better peak position (7). """ from scipy.signal import fftconvolve from numpy import apply_along_axis as apply from scipy.spatial.distance import squareform n_evts = len(positions) new_positions = positions.copy() jitters = np.zeros(n_evts) # Details (1) dataD = apply(lambda x: fftconvolve(x, np.array([1, 0, -1]) / 2., 'same'), 1, data) dataDD = apply(lambda x: fftconvolve(x, np.array([1, 0, -1]) / 2., 'same'), 1, dataD) # Details (2) evts = mk_events(positions, data, before, after) evtsD = mk_events(positions, dataD, before, after) evtsDD = mk_events(positions, dataDD, before, after) # Details (3) center = apply(np.median, 0, evts) centerD = apply(np.median, 0, evtsD) centerD_norm2 = np.dot(centerD, centerD) centerDD = apply(np.median, 0, evtsDD) centerDD_norm2 = np.dot(centerDD, centerDD) centerD_dot_centerDD = np.dot(centerD, centerDD) # Details (4) for evt_idx in range(n_evts): # Details (5) evt = evts[evt_idx, :] evt_pos = positions[evt_idx] h = evt - center h_order0_norm2 = sum(h**2) h_dot_centerD = np.dot(h, centerD) jitter0 = h_dot_centerD / centerD_norm2 h_order1_norm2 = sum((h - jitter0 * centerD)**2) if h_order0_norm2 > h_order1_norm2: # Details (6) h_dot_centerDD = np.dot(h, centerDD) first = -2*h_dot_centerD + \ 2*jitter0*(centerD_norm2 - h_dot_centerDD) + \ 3*jitter0**2*centerD_dot_centerDD + \ jitter0**3*centerDD_norm2 second = 2*(centerD_norm2 - h_dot_centerDD) + \ 6*jitter0*centerD_dot_centerDD + \ 3*jitter0**2*centerDD_norm2 jitter1 = jitter0 - first / second h_order2_norm2 = sum((h-jitter1*centerD- \ jitter1**2/2*centerDD)**2) if h_order1_norm2 <= h_order2_norm2: jitter1 = jitter0 else: jitter1 = 0 if abs(round(jitter1)) > 0: # Details (7) evt_pos -= int(round(jitter1)) evt = cut_sgl_evt(evt_pos, data=data, before=before, after=after) h = evt - center h_order0_norm2 = sum(h**2) h_dot_centerD = np.dot(h, centerD) jitter0 = h_dot_centerD / centerD_norm2 h_order1_norm2 = sum((h - jitter0 * centerD)**2) if h_order0_norm2 > h_order1_norm2: h_dot_centerDD = np.dot(h, centerDD) first = -2*h_dot_centerD + \ 2*jitter0*(centerD_norm2 - h_dot_centerDD) + \ 3*jitter0**2*centerD_dot_centerDD + \ jitter0**3*centerDD_norm2 second = 2*(centerD_norm2 - h_dot_centerDD) + \ 6*jitter0*centerD_dot_centerDD + \ 3*jitter0**2*centerDD_norm2 jitter1 = jitter0 - first / second h_order2_norm2 = sum((h-jitter1*centerD- \ jitter1**2/2*centerDD)**2) if h_order1_norm2 <= h_order2_norm2: jitter1 = jitter0 else: jitter1 = 0 if sum(evt**2) > sum( (h - jitter1 * centerD - jitter1**2 / 2 * centerDD)**2): evts[evt_idx,:] = evt-jitter1*centerD- \ jitter1**2/2*centerDD new_positions[evt_idx] = evt_pos jitters[evt_idx] = jitter1 return (evts, new_positions, jitters)
import matplotlib.pylab as plt import numpy as np from numpy import apply_along_axis as apply from scipy.signal import fftconvolve import sorting_with_python as swp from load_data import load_data raw_data, data_len = load_data() tt = np.arange(0, data_len) / 1.5e4 data = list(map(lambda x: (x - np.median(x)) / swp.mad(x), raw_data)) data_filtered = apply( lambda x: fftconvolve(x, np.array([1, 1, 1, 1, 1]) / 5., 'same'), 1, np.array(data)) data_filtered = (data_filtered.transpose() / \ apply(swp.mad,1,data_filtered)).transpose() data_filtered[data_filtered < 4] = 0 def print_stats(title, sp): print("Stats for %s" % title) print("giving %d spikes" % len(sp)) print("a mean inter-event interval of %f sampling points" % round(np.mean(np.diff(sp)))) print("a standard deviation of %f sampling points" % round(np.std(np.diff(sp)))) print("a smallest inter-event interval of %f sampling points" % np.min(np.diff(sp))) print("and a largest of %f sampling points" % np.max(np.diff(sp))) print("")