def calculate_averages(self): """Returns the clustering averages for prediction. :return: row_avg(array): Array 1 x m array with the averages per row. :return: col_avgarray): Array 1 x m array with the averages per column. :return: row_cltr_avg(array): Array 1 x m array with the averages per row cluster. :return: col_cltr_avg(array): Array 1 x m array with the averages per column cluster. :return: co_cltr_avg(array): Array 1 x m array with the averages per co-cluster. """ # Add row and column averages row_avg = nanmean(self.Z, 1) col_avg = nanmean(self.Z, 0) # Initialize empty average arrays: row_cltr_avg = np.zeros(self.n_row, np.double) col_cltr_avg = np.zeros(self.n_col, np.double) co_cltr_avg = np.zeros((self.n_cltr_r, self.n_cltr_c), np.double) # Initialize empty count arrays row_cltr_count = np.zeros(self.n_cltr_r, np.double) col_cltr_count = np.zeros(self.n_cltr_c, np.double) co_cltr_count = np.zeros((self.n_cltr_r, self.n_cltr_c), np.double) # Initialize empty sum arrays row_cltr_sum = np.zeros(self.n_cltr_r, np.double) col_cltr_sum = np.zeros(self.n_cltr_c, np.double) co_cltr_sum = np.zeros((self.n_cltr_r, self.n_cltr_c), np.double) # Compute sums, counts, and averages for row clusters for cluster in range(0, self.n_cltr_r): for row in range(0, self.n_row): if self.row_cltr[row, cluster] == 1.0: # Increment count by self.W matrix, if one of n values in the row is missing, count is 1-1/n row_cltr_count[cluster] += nanmean(self.W[row, :]) row_cltr_sum[cluster] += nanmean(self.Z[row]) row_cltr_avg = np.divide(row_cltr_sum, row_cltr_count) # Compute sums, counts, and averages for column clusters for cluster in range(0, self.n_cltr_c): for col in range(0, self.n_col): if self.col_cltr[col, cluster] == 1.0: # Increment count by self.W matrix, if one of n values in the column is missing, count is 1-1/n col_cltr_count[cluster] += self.W[:, col].mean() col_cltr_sum[cluster] += self.Z[:, col].mean() col_cltr_avg = np.divide(col_cltr_sum, col_cltr_count) # Compute sums, counts, and averages for co-cluster for rc in range(0, self.n_cltr_r): for row in range(0, self.n_row): if self.row_cltr[row, rc] == 1.0: for cc in range(0, self.n_cltr_c): for col in range(0, self.n_col): if self.col_cltr[col, cc] == 1.0: # Increment count by self.W matrix, if value is missing, W matrix = 0, count+= 0 co_cltr_count[rc, cc] += self.W[row, col] co_cltr_sum[rc, cc] += self.Z[row, col] co_cltr_avg = np.divide(co_cltr_sum, co_cltr_count) return row_avg, col_avg, row_cltr_avg, col_cltr_avg, co_cltr_avg
def original_ratings(): "Function returning the automatic specificity scores along with the human ratings." data = loadmat('./original_data/specificity_automated.mat') automatic = data['specificity_automated'][0] data = loadmat('./original_data/specificity_scores_MEM5S.mat') ratings = data['scores'] ratings = [nanmean([nanmean(row) for row in image]) for image in ratings] return automatic, ratings
def impute_non_finite(data, data_is_fin=None): '''Replace non-finite entries with mean of columns. ''' if data_is_fin is None: data_is_fin = np.isfinite(data) col_means = sp.nanmean(data, 0) nan_ridx, nan_cidx = np.where(np.invert(data_is_fin)) data[nan_ridx, nan_cidx] = col_means[nan_cidx]
def replace_nans(data, col_thold=.7, row_thold=.9): '''Replace nan entries in each row with mean of column ''' data_is_fin = np.isfinite(data) print "Stats before filtering" print_nan_stats(data, data_is_fin) print "Removing columns with < %d%% finite values" % int(100 * col_thold) cols_pct_fin = np.mean(data_is_fin, 0) assert cols_pct_fin.size == data_is_fin.shape[1] col_mask = cols_pct_fin > col_thold data = data[:, col_mask] data_is_fin = data_is_fin[:, col_mask] print_nan_stats(data, data_is_fin) print "" print "Removing rows with < %d%% finite values" % int(100 * row_thold) rows_pct_fin = np.mean(data_is_fin, 1) row_mask = rows_pct_fin > row_thold data = data[row_mask] data_is_fin = data_is_fin[row_mask] print_nan_stats(data, data_is_fin) print "" print "Replacing nans with average of column" col_means = sp.nanmean(data, 0) nan_ridx, nan_cidx = np.where(np.invert(data_is_fin)) data[nan_ridx, nan_cidx] = col_means[nan_cidx] return data, row_mask, col_mask
def replace_nans(psi, col_thold=.7, row_thold=.9): '''Replace nan entries in each row with mean of column Rows are samples, cols are events. ''' psi_is_fin = np.isfinite(psi) print "Stats before filtering" run_preproc_tests(psi, psi_is_fin) print "Removing columns with < %d%% finite values" % int(100 * col_thold) cols_pct_fin = np.mean(psi_is_fin, 0) assert cols_pct_fin.size == psi_is_fin.shape[1] col_mask = cols_pct_fin > col_thold psi = psi[:, col_mask] psi_is_fin = psi_is_fin[:, col_mask] run_preproc_tests(psi, psi_is_fin) print "" print "Removing rows with < %d%% finite values" % int(100 * row_thold) rows_pct_fin = np.mean(psi_is_fin, 1) row_mask = rows_pct_fin > row_thold psi = psi[row_mask] psi_is_fin = psi_is_fin[row_mask] run_preproc_tests(psi, psi_is_fin) print "" print "Replacing nans with average of column" col_means = sp.nanmean(psi, 0) nan_ridx, nan_cidx = np.where(np.invert(psi_is_fin)) psi[nan_ridx, nan_cidx] = col_means[nan_cidx] return psi, row_mask, col_mask
def downSample(data, sampleRate = 20000, dsType = 'mean'): """ Function that downsamples data. :param data: list including syllables with sample data :param sampleRate: desired samplerate :param dsType: Type of interpolating used for downsampling. Can be mean or IIR, which uses an order 8 Chebyshev type 1 filter (default = mean) :returns syllables: downsampled data, in same format as input data """ syllables = [] for syllable in data: samples = [] for sample in syllable: SR = int(np.round(sample[1]/float(sampleRate))) if dsType == 'mean': pad_size = int(math.ceil(float(sample[0].size)/SR)*SR - sample[0].size) s_padded = np.append(sample[0], np.zeros(pad_size)*np.NaN) s_new = sp.nanmean(s_padded.reshape(-1,SR), axis=1) elif dsType == 'IIR': s_new = ss.decimate(sample[0],SR) samples.append([s_new, sampleRate]) syllables.append(samples) return syllables
def updateW(x1, x2, tau, y, mu0_ws, var0_w): w1 = (mu0_ws / var0_w + nandot(x1, (y.T).T * tau)) / (nandot(x2, tau) + 1.0 / var0_w) w2 = w1**2 + 1.0 / (nandot(x2, tau) + 1.0 / var0_w) LOG.debug("After W update, <w>=%.1f, mean absolute error=%.3f" % (w1.mean(), SP.nanmean(abs(y.T - SP.outer(w1, x1))).mean())) return w1, w2
def load_unicef_data(): """Loads Unicef data from CSV file. Retrieves a matrix of all rows and columns from Unicef child mortality dataset. Args: none Returns: Country names, feature names, and matrix of values as a tuple (countries, features, values). countries: vector of N country names features: vector of F feature names values: matrix N-by-F """ fname = 'SOWC_combined_simple.csv' # Uses pandas to help with string-NaN-numeric data. data = pd.read_csv(fname, na_values='_', encoding='latin1') # Strip countries title from feature names. features = data.axes[1][1:] # Separate country names from feature values. countries = data.values[:,0] values = data.values[:,1:] # Convert to numpy matrix for real. values = np.asmatrix(values,dtype='float64') # Modify NaN values (missing values). mean_vals = nanmean(values, axis=0) inds = np.where(np.isnan(values)) values[inds] = np.take(mean_vals, inds[1]) return (countries, features, values)
def rebin(a, newLength): """rebin(old array, new number of bins) This is a very general downsampling rebinner, but has for loops and if statements, hence it is slower than down_sample(). """ #TODO Make this code run faster. Vectorize newBins = np.linspace(0, a.size, newLength, endpoint=False) width = math.ceil(a.size / newLength) a_rebin = np.zeros((newLength, width)) * np.nan #Using NaN means that we do not have extra zeros in the array that would get averaged row = 0 column = 0 for ii in range(0, a.size): if ii < (newBins[row] + newBins[1]): a_rebin[row, column] = a[ii] column += 1 else: column = 0 row += 1 a_rebin[row, column] = a[ii] column += 1 a_rebinned = sp.nanmean(a_rebin, axis=1) #NaN mean does not count NaNs in total return a_rebinned #*np.amax(a)/np.amax(a_rebinned)
def _make_ribbon_mesh(self, x, y, w, h): signal_power = np.square(self.data) frames_per_pixel = int(self.frames_per_beat / HORIZ_SCALE) scale_factor = frames_per_pixel * 2 pad_size = math.ceil(float(signal_power.size)/scale_factor)*scale_factor - signal_power.size signal_power = np.append(signal_power, np.zeros(pad_size)*np.NaN) print signal_power.shape signal_power = signal_power.reshape(-1, scale_factor) print signal_power.shape signal_power = scipy.nanmean(signal_power, axis=1) print signal_power.shape signal_power /= np.max(signal_power) print 'signal power', len(signal_power) print signal_power[100:200] print np.max(signal_power) segments = self.blah_width mesh = Mesh() # create indices mesh.indices = range(segments * 2 + 2) # create vertices with evenly spaced texture coordinates span = np.linspace(0.0, 1.0, segments + 1) verts = [] mid_y = y + h/2 y_scale = h/2 idx = 0 for s in span: height = y_scale * signal_power[idx] verts += [x + s * w, mid_y - height, s, 0, x + s * w, mid_y + height, s, 1] idx += 1 mesh.vertices = verts # # animate a sine wave by setting the vert positions every frame: # theta = 3.0 * self.time # y = 300 + 50 * np.sin(np.linspace(theta, theta + 2 * np.pi, self.segments + 1)) # self.mesh.vertices[5::8] = y # seems that you have to reassign the entire verts list in order for the change # to take effect. mesh.vertices = mesh.vertices # # assign texture # if tex_file: # mesh.texture = Image(tex_file).texture # standard triangle strip mode mesh.mode = 'triangle_strip' return mesh
def infer_JACKS_meanfc(gene_index, testdata, ctrldata): results = {} for gene in gene_index: Ig = gene_index[gene] y = (testdata[Ig, :, 0] - ctrldata[Ig, :, 0]) w1 = SP.nanmean(y, axis=0) results[gene] = (y, -1.0, -1.0, -1.0, w1, -1.0) return results
def image_specificity(descriptions, vectorizer, analyzer, model): "Compute image specificity." similarities = [ sentence_similarity(sent1, sent2, vectorizer, analyzer, model) for sent1, sent2 in combinations(descriptions, 2) ] specificity = nanmean(similarities) return specificity
def downsample(signal, factor): # fill with NaN till the size is divisible by the factor pad_size = np.ceil(float(signal.size)/factor)*factor - signal.size pad_size = np.int(pad_size) b_padded = np.append(signal, np.zeros(pad_size)*np.NaN) # Reshape by the factor and take the mean factor = np.int(factor) return nanmean(b_padded.reshape(-1,factor), axis=1)
def makehistsingle(testpath, npulses): """ Make a histogram from a single collection of data. """ sns.set_style("whitegrid") sns.set_context("notebook") params = ['Ne', 'Te', 'Ti', 'Vi'] paramsLT = ['N_e', 'T_e', 'T_i', 'V_i'] datadict, er1, er2, edatadict = makehistdata(params, testpath) (figmplf, axmat) = plt.subplots(2, 2, figsize=(12, 8), facecolor='w') axvec = axmat.flatten() histlims = [[4e10, 2e11], [1200., 3000.], [300., 1900.], [-250., 250.]] histvecs = [sp.linspace(ipm[0], ipm[1], 100) for ipm in histlims] linehand = [] lablist = ['Histogram', 'Variance', 'Error'] for iax, iparam in enumerate(params): mu = PVALS[iax] curvals = datadict[iparam] mu = sp.nanmean(curvals.real) RMSE = sp.sqrt(sp.nanvar(curvals)) Error_mean = sp.sqrt(sp.nanmean(sp.power(edatadict[iparam], 2))) curhist, x = sp.histogram(curvals, bins=histvecs[iax]) dx = x[1] - x[0] curhist_norm = curhist.astype(float) / (curvals.size * dx) plthand = axvec[iax].plot(x[:-1], curhist_norm, 'r-', label='Histogram'.format(npulses))[0] linehand.append(plthand) rmsedist = sp.stats.norm.pdf((x - mu) / RMSE) / RMSE plthand = axvec[iax].plot(x, rmsedist, label='Var'.format(npulses))[0] linehand.append(plthand) emeandist = sp.stats.norm.pdf((x - mu) / Error_mean) / Error_mean plthand = axvec[iax].plot(x, emeandist, label='Error'.format(npulses))[0] linehand.append(plthand) axvec[iax].set_xlabel(r'$' + paramsLT[iax] + '$') axvec[iax].set_title(r'Distributions for $' + paramsLT[iax] + '$') leg = figmplf.legend(linehand[:len(lablist)], lablist) plt.tight_layout() plt.subplots_adjust(top=0.9) spti = figmplf.suptitle('Pulses J = {:d}'.format(npulses), fontsize=18) return (figmplf, axvec, linehand)
def down_sample(ar, fact): """down_sample(ar, fact) down sample array, ar, by downsampling factor, fact """ #TODO this is fast, but not as general as possible downsampled = ar.reshape(-1, fact).mean(axis=1) return downsampled return sp.nanmean(ar_new, axis=1) # ingnore NaNs in mean
def shorten(feature, n_frames): """Compute neighbourhood mean to shorten the feature vector.""" assert feature.shape[0] >= n_frames scale = int(feature.shape[0] / n_frames) pad_size = int(np.ceil(float(feature.shape[0]) / scale) * scale - feature.shape[0]) feature = np.vstack([feature, np.zeros((pad_size, feature.shape[1])) * np.nan]) return scipy.nanmean( feature.reshape(-1, scale, feature.shape[1]), axis=1)[:n_frames]
def shorten(feature, n_frames): """Compute neighbourhood mean to shorten the feature vector.""" assert feature.shape[0] >= n_frames scale = int(feature.shape[0] / n_frames) pad_size = int( np.ceil(float(feature.shape[0]) / scale) * scale - feature.shape[0]) feature = np.vstack( [feature, np.zeros((pad_size, feature.shape[1])) * np.nan]) return scipy.nanmean(feature.reshape(-1, scale, feature.shape[1]), axis=1)[:n_frames]
def decim(x, q): # decimate a 1 x n array # x: 1xn matrix (float) # q: int (decimate ratio), 0<q<=x.size assert x.size >= q and q > 0 pad_size = math.ceil(float(x.size) / q) * q - x.size pad = np.empty(pad_size) pad[:] = np.nan x_padded = np.append(x, pad) return sp.nanmean(x_padded.reshape(-1, q), axis=1)
def decim(x, q): # decimate a 1 x n array # x: 1xn matrix (float) # q: int (decimate ratio), 0<q<=x.size assert (x.size >= q and q > 0) pad_size = int(math.ceil(float(x.size) / q) * q - x.size) pad = np.empty(pad_size) pad[:] = np.nan x_padded = np.append(x, pad) return sp.nanmean(x_padded.reshape(-1, q), axis=1)
def npq(plight, pdark): """"Try to compute NPQ (Non-photochemical quenching) from pulses values Parameters ---------- plight : dict light-addapted pulse. pdark : dict dark-addpated pulse. Returns ------- NPQ : float light-addapted pulse : np.array dark-addapted pulse : np.array Examples -------- from PyPAM.parse import raw_extract curves, pulses = raw_extract('file.rpt') plight = pulses[0] pdark = pulses[1] npq, lightpulses, darkpulses = npq(plight, pdark) """ ppl = np.array([plight['Fm1'], plight['Fm2'], plight['Fm3'], plight['Fm4']]) ppd = np.array([pdark['Fm1'], pdark['Fm2'], pdark['Fm3'], pdark['Fm4']]) Fm_ = nanmean(nanmean(ppl)) Fm = nanmean(nanmean(ppd)) npq = (Fm - Fm_) / Fm_ return npq, ppl, ppd
def updateX(w1, w2, tau, y, mu0_x, var0_x): x1 = (mu0_x / var0_x + nandot( (y.T).T * tau, w1)) / (nandot(tau, w2) + 1.0 / var0_x) x2 = x1**2 + 1.0 / (nandot(tau, w2) + 1.0 / var0_x) wadj = 0.5 / len(x1) #Normalize by the median-emphasized mean of x x1m = x1.mean( ) + 2 * wadj * np.nanmedian(x1) - wadj * x1.max() - wadj * x1.min() LOG.debug("After X update, <x>=%.1f, mean absolute error=%.3f" % (x1.mean(), SP.nanmean(abs(y.T - SP.outer(w1, x1))).mean())) return x1 / x1m, x2 / x1m / x1m
def load_psd(): """ Resamples advLIGO noise PSD to 4096 Hz """ # psd has freq resolution = 1/3 with 6145 samples psd = np.loadtxt("ZERO_DET_high_P_PSD.txt")[:,1] down_factor = 3 pad_size = int(np.ceil(float(psd.size)/down_factor)*down_factor - psd.size) psd_padded = np.append(psd, np.zeros(pad_size)*np.NaN) psd = sp.nanmean(psd_padded.reshape(-1,down_factor), axis=1) # now dF = 1 # length of psd = 2048 return psd
def fixed_meanVector(vec, chunk): size = (vec.size * chunk) R = (vec.size / size) pad_size = math.ceil(float(vec.size) / R) * R - vec.size vec_padded = np.append(vec, np.zeros(pad_size) * np.NaN) print "Org Vector: ", vec.size, "output Size: ", size, "Windows Size: ", R, "Padding size", pad_size newVec = scipy.nanmean(vec_padded.reshape(-1, R), axis=1) #print "New Vector shape: ",newVec.shape #print newVec return newVec
def downSampleChannel (self, R, channelSignal): dsSignal = [] # z 140 samplov spravime 140/R # padding 0 na koniec aby sa dal spravit downsamp., koniec pola aj tak nie je dolezity pad_size = math.ceil(float(len(channelSignal))/R)*R - len(channelSignal) b_padded = np.append(channelSignal, np.zeros(pad_size)*np.NaN) dsTmp = sc.nanmean(b_padded.reshape(-1,R), axis=1) dsSignal.append(dsTmp) return dsSignal
def makehistsingle(testpath,npulses): """ Make a histogram from a single collection of data. """ sns.set_style("whitegrid") sns.set_context("notebook") params = ['Ne','Te','Ti','Vi'] paramsLT = ['N_e','T_e','T_i','V_i'] datadict,er1,er2,edatadict = makehistdata(params,testpath) (figmplf, axmat) = plt.subplots(2, 2,figsize=(12,8), facecolor='w') axvec = axmat.flatten() histlims = [[4e10,2e11],[1200.,3000.],[300.,1900.],[-250.,250.]] histvecs = [sp.linspace(ipm[0],ipm[1],100) for ipm in histlims] linehand = [] lablist=['Histogram','Variance','Error'] for iax,iparam in enumerate(params): mu = PVALS[iax] curvals = datadict[iparam] mu = sp.nanmean(curvals.real) RMSE = sp.sqrt(sp.nanvar(curvals)) Error_mean = sp.sqrt(sp.nanmean(sp.power(edatadict[iparam],2))) curhist,x = sp.histogram(curvals,bins=histvecs[iax]) dx=x[1]-x[0] curhist_norm = curhist.astype(float)/(curvals.size*dx) plthand = axvec[iax].plot(x[:-1],curhist_norm,'r-',label='Histogram'.format(npulses))[0] linehand.append(plthand) rmsedist = sp.stats.norm.pdf((x-mu)/RMSE)/RMSE plthand = axvec[iax].plot(x,rmsedist,label='Var'.format(npulses))[0] linehand.append(plthand) emeandist = sp.stats.norm.pdf((x-mu)/Error_mean)/Error_mean plthand = axvec[iax].plot(x,emeandist,label='Error'.format(npulses))[0] linehand.append(plthand) axvec[iax].set_xlabel(r'$'+paramsLT[iax]+'$') axvec[iax].set_title(r'Distributions for $'+paramsLT[iax]+'$') leg = figmplf.legend(linehand[:len(lablist)],lablist) plt.tight_layout() plt.subplots_adjust(top=0.9) spti = figmplf.suptitle('Pulses J = {:d}'.format(npulses),fontsize=18) return (figmplf,axvec,linehand)
def runinversion(basedir,configfile,acfdir='ACF',invtype='tik'): """ """ costdir = os.path.join(basedir,'Cost') pname=os.path.join(costdir,'cost{0}-{1}.pickle'.format(acfdir,invtype)) pickleFile = open(pname, 'rb') alpha_arr=pickle.load(pickleFile)[-1] pickleFile.close() ionoinfname=os.path.join(basedir,acfdir,'00lags.h5') ionoin=IonoContainer.readh5(ionoinfname) dirio = ('Spectrums','Mat','ACFMat') inputdir = os.path.join(basedir,dirio[0]) dirlist = glob.glob(os.path.join(inputdir,'*.h5')) (listorder,timevector,filenumbering,timebeg,time_s) = IonoContainer.gettimes(dirlist) Ionolist = [dirlist[ikey] for ikey in listorder] if acfdir.lower()=='acf': ionosigname=os.path.join(basedir,acfdir,'00sigs.h5') ionosigin=IonoContainer.readh5(ionosigname) nl,nt,np1,np2=ionosigin.Param_List.shape sigs=ionosigin.Param_List.reshape((nl*nt,np1,np2)) sigsmean=sp.nanmean(sigs,axis=0) sigdiag=sp.diag(sigsmean) sigsout=sp.power(sigdiag/sigdiag[0],.5).real alpha_arr=sp.ones_like(alpha_arr)*alpha_arr[0] acfloc='ACFInv' elif acfdir.lower()=='acfmat': mattype='matrix' acfloc='ACFMatInv' mattype='sim' RSTO = RadarSpaceTimeOperator(Ionolist,configfile,timevector,mattype=mattype) if 'perryplane' in basedir.lower() or 'SimpData': rbounds=[-500,500] else: rbounds=[0,500] ionoout=invertRSTO(RSTO,ionoin,alpha_list=alpha_arr,invtype=invtype,rbounds=rbounds)[0] outfile=os.path.join(basedir,acfloc,'00lags{0}.h5'.format(invtype)) ionoout.saveh5(outfile) if acfdir=='ACF': lagsDatasum=ionoout.Param_List # !!! This is done to speed up development lagsNoisesum=sp.zeros_like(lagsDatasum) Nlags=lagsDatasum.shape[-1] pulses_s=RSTO.simparams['Tint']/RSTO.simparams['IPP'] Ctt=makeCovmat(lagsDatasum,lagsNoisesum,pulses_s,Nlags) outfile=os.path.join(basedir,acfloc,'00sigs{0}.h5'.format(invtype)) ionoout.Param_List=Ctt ionoout.Param_Names=sp.repeat(ionoout.Param_Names[:,sp.newaxis],Nlags,axis=1) ionoout.saveh5(outfile)
def load_psd(): """ Resamples advLIGO noise PSD to 4096 Hz """ # psd has freq resolution = 1/3 with 6145 samples psd = np.loadtxt("ZERO_DET_high_P_PSD.txt")[:, 1] down_factor = 3 pad_size = int( np.ceil(float(psd.size) / down_factor) * down_factor - psd.size) psd_padded = np.append(psd, np.zeros(pad_size) * np.NaN) psd = sp.nanmean(psd_padded.reshape(-1, down_factor), axis=1) # now dF = 1 # length of psd = 2048 return psd
def crop_and_downsample(source_array, downsample_ratio, average=True): ys, xs = source_array.shape print "shape is ", source_array.shape cropped_array = source_array[:ys - (ys % int(downsample_ratio)), :xs - (xs % int(downsample_ratio))] if average: zoomed_array = scipy.nanmean(numpy.concatenate( [[cropped_array[i::downsample_ratio, j::downsample_ratio] for i in range(downsample_ratio)] for j in range(downsample_ratio)]), axis=0) else: zoomed_array = cropped_array[::downsample_ratio, ::downsample_ratio] return zoomed_array
def upper_bound(fm, nr_subs=None, scale_factor=1): """ compute the inter-subject consistency upper bound for a fixmat. Input: fm : a fixmat instance nr_subs : the number of subjects used for the prediction. Defaults to the total number of subjects in the fixmat minus 1 scale_factor : the scale factor of the FDMs. Default is 1. Returns: A list of scores; the list contains one dictionary for each measure. Each dictionary contains one key for each category and corresponding values is an array with scores for each subject. """ nr_subs_total = len(np.unique(fm.SUBJECTINDEX)) if not nr_subs: nr_subs = nr_subs_total - 1 assert (nr_subs < nr_subs_total) # initialize output structure; every measure gets one dict with # category numbers as keys and numpy-arrays as values intersub_scores = [] for measure in range(len(measures.scores)): res_dict = {} result_vectors = [ np.empty(nr_subs_total) + np.nan for _ in np.unique(fm.category) ] res_dict.update(list(zip(np.unique(fm.category), result_vectors))) intersub_scores.append(res_dict) #compute inter-subject scores for every stimulus, with leave-one-out #over subjects for fm_cat in fm.by_field('category'): cat = fm_cat.category[0] for (sub_counter, sub) in enumerate(np.unique(fm_cat.SUBJECTINDEX)): image_scores = [] for fm_single in fm_cat.by_field('filenumber'): predicting_subs = (np.setdiff1d( np.unique(fm_single.SUBJECTINDEX), [sub])) np.random.shuffle(predicting_subs) predicting_subs = predicting_subs[0:nr_subs] predicting_fm = fm_single[(ismember(fm_single.SUBJECTINDEX, predicting_subs))] predicted_fm = fm_single[fm_single.SUBJECTINDEX == sub] try: predicting_fdm = compute_fdm(predicting_fm, scale_factor=scale_factor) except RuntimeError: predicting_fdm = None image_scores.append( measures.prediction_scores(predicting_fdm, predicted_fm)) for (measure, score) in enumerate(nanmean(image_scores, 0)): intersub_scores[measure][cat][sub_counter] = score return intersub_scores
def get_best(self): best = 0 for i, trial in enumerate(self.trials): res = np.NaN if trial['result'] == trial['result']: res = trial['result'] elif np.isfinite(trial['instance_results']).any(): res = scipy.nanmean(trial['instance_results']) else: continue if res < self.trials[best]: best = i return self.trials[best]
def crop_and_downsample(source_array, downsample_ratio, average=True): ys, xs = source_array.shape cropped_array = source_array[:ys - (ys % int(downsample_ratio)), :xs - (xs % int(downsample_ratio))] if average: zoomed_array = scipy.nanmean(numpy.concatenate([[ cropped_array[i::downsample_ratio, j::downsample_ratio] for i in range(downsample_ratio) ] for j in range(downsample_ratio)]), axis=0) else: zoomed_array = cropped_array[::downsample_ratio, ::downsample_ratio] return zoomed_array
def upper_bound(fm, nr_subs = None, scale_factor = 1): """ compute the inter-subject consistency upper bound for a fixmat. Input: fm : a fixmat instance nr_subs : the number of subjects used for the prediction. Defaults to the total number of subjects in the fixmat minus 1 scale_factor : the scale factor of the FDMs. Default is 1. Returns: A list of scores; the list contains one dictionary for each measure. Each dictionary contains one key for each category and corresponding values is an array with scores for each subject. """ nr_subs_total = len(np.unique(fm.SUBJECTINDEX)) if not nr_subs: nr_subs = nr_subs_total - 1 assert (nr_subs < nr_subs_total) # initialize output structure; every measure gets one dict with # category numbers as keys and numpy-arrays as values intersub_scores = [] for measure in range(len(measures.scores)): res_dict = {} result_vectors = [np.empty(nr_subs_total) + np.nan for _ in np.unique(fm.category)] res_dict.update(list(zip(np.unique(fm.category), result_vectors))) intersub_scores.append(res_dict) #compute inter-subject scores for every stimulus, with leave-one-out #over subjects for fm_cat in fm.by_field('category'): cat = fm_cat.category[0] for (sub_counter, sub) in enumerate(np.unique(fm_cat.SUBJECTINDEX)): image_scores = [] for fm_single in fm_cat.by_field('filenumber'): predicting_subs = (np.setdiff1d(np.unique( fm_single.SUBJECTINDEX),[sub])) np.random.shuffle(predicting_subs) predicting_subs = predicting_subs[0:nr_subs] predicting_fm = fm_single[ (ismember(fm_single.SUBJECTINDEX, predicting_subs))] predicted_fm = fm_single[fm_single.SUBJECTINDEX == sub] try: predicting_fdm = compute_fdm(predicting_fm, scale_factor = scale_factor) except RuntimeError: predicting_fdm = None image_scores.append(measures.prediction_scores( predicting_fdm, predicted_fm)) for (measure, score) in enumerate(nanmean(image_scores, 0)): intersub_scores[measure][cat][sub_counter] = score return intersub_scores
def add_ratings(filmography_dict): for name, films in filmography_dict.iteritems(): new = [] nums = [] for film in films: if str(film) in set(ratings_list): rating = ratings_list[film] else: rating = sp.nan new.append([film, rating]) nums.append(rating) new.append(['Average Rating', sp.nanmean(nums)]) filmography_dict[name] = new return filmography_dict
def down_sample(self, output_dir, samples): # For each csv file for filename in self.getCsvDataset(): # Read file with open(self.dir + filename, "r") as f: reader = csv.reader(f, delimiter=',') vals = list(reader) result = numpy.array(vals).astype('float') R = int(1.0 * result[:, 0].size / samples) a = numpy.zeros((samples - 1, int(result[0, :].size))) # Sampling for i in range(0, samples - 1): start = i * R end = ((i + 1) * R) a[i, 0] = scipy.nanmean(result[start:end, 0]) a[i, 1] = scipy.nanmean(result[start:end, 1]) a[i, 2] = scipy.nanmean(result[start:end, 2]) # Save file numpy.savetxt(output_dir + filename, a, delimiter=',')
def npq(plight, pdark): """"Try to compute NPQ (Non-photochemical quenching) from pulses values Parameters ---------- plight : dict light-addapted pulse. pdark : dict dark-addpated pulse. Returns ------- NPQ : float light-addapted pulse : np.array dark-addapted pulse : np.array Examples -------- from PyPAM.parse import raw_extract curves, pulses = raw_extract('file.rpt') plight = pulses[0] pdark = pulses[1] npq, lightpulses, darkpulses = npq(plight, pdark) """ ppl = np.array( [plight['Fm1'], plight['Fm2'], plight['Fm3'], plight['Fm4']]) ppd = np.array([pdark['Fm1'], pdark['Fm2'], pdark['Fm3'], pdark['Fm4']]) Fm_ = nanmean(nanmean(ppl)) Fm = nanmean(nanmean(ppd)) npq = (Fm - Fm_) / Fm_ return npq, ppl, ppd
def get_ratings(name, filmography_dict): '''get the ratings for every film by the writer of director filmography_dict = director_films or writer_films''' ratings = [] nums = [] films = filmography_dict[name] for film in films: if film in set(ratings_list): rating = float(ratings_list[film]) else: rating = sp.nan # figure out why some ratings not found ratings.append([film, rating]) nums.append(rating) ratings.append(['Average Rating', sp.nanmean(nums)]) return ratings
def get_pressure_timeseries_data(sample_id): raw_data = Device.get_raw_data(sample_id) data = list() for line in raw_data: value = float(line) / 0.0000241395 data.append(value) # data.append(0 - value) original = np.array(data) R = len(data) / 1000 pad_size = math.ceil(float(original.size) / R) * R - original.size original_padded = np.append(original, np.zeros(pad_size) * np.NaN) downsampled = scipy.nanmean(original_padded.reshape(-1, R), axis=1) xf = np.linspace(0.0, len(data) / 10, num=1000) result = zip(xf.tolist(), downsampled.tolist()) return result
def averaged_sequence(s: np.ndarray, R: float) -> np.ndarray: '''sampling, with averaging r2 = R // 2 return [sum(s[i-r2:i+r2]) for i in range(0, len(s), R)] true sampling, no averaging return [s[i] for i in range(0, len(s), R)] sampling done with np - first pad, then reshape/sample, then unpad''' pad_size = math.ceil(float(s.size) / R) * R - s.size s_padded = np.append(s, np.zeros(pad_size) * np.NaN) sampled = sp.nanmean(s_padded.reshape(-1, R), axis=1) sampled_nopad = sampled[:-pad_size] retval = sampled_nopad.reshape((len(sampled_nopad), )) return retval
def parse_intercept(self): """ Parse intercept factor """ # Sanity checks # TO-DO: CHECK THAT MODEL_OPTS AND DATA_OPTS ARE PROPERLY DEFINED K = self.dimensionalities["K"] M = self.dimensionalities["M"] N = self.dimensionalities["N"] # If we want to learn the intercept, we add a constant covariate of 1s if self.model_opts['learnIntercept']: if self.data_opts['covariates'] is not None: self.data_opts['covariates'] = s.insert( self.data_opts['covariates'], obj=0, values=1., axis=1) self.data_opts['scale_covariates'].insert(0, False) else: self.data_opts['covariates'] = s.ones((N, 1)) self.data_opts['scale_covariates'] = [False] # Parse intercept # self.model_opts['factors'] += 1 # self.dimensionalities["K"] += 1 # Remove sparsity from the Intercept factor # TO-DO: CHECK THAT THE MODEL IS ALREADY NOT SPARSE # TO-DO: RECHECK THIS, ITS UGLY # stop if not self.model_opts["learnIntercept"] == TRUE for m in range(M): # Weights # if self.model_opts["likelihoods"][m]=="gaussian": self.model_opts["initSW"]["mean_S1"][m][:, 0] = s.nanmean( self.data[m], axis=0) self.model_opts["initSW"]["var_S1"][m][:, 0] = 1e-10 # Theta self.model_opts['sparsity'][m][0] = 0. self.model_opts["initSW"]["Theta"][m][:, 0] = 1. self.model_opts["priorTheta"]['a'][m][0] = s.nan self.model_opts["priorTheta"]['b'][m][0] = s.nan self.model_opts["initTheta"]["a"][m][0] = s.nan self.model_opts["initTheta"]["b"][m][0] = s.nan self.model_opts["initTheta"]["E"][m][0] = 1.
def rebin(ar, newlen): """rebin(ar, newlen) down sample array, ar, to newlen number of bins This is a general downsampling rebinner, but is slower than down_sample(). 'ar' must be a 1-d array """ newBins = np.linspace(0, ar.size, newlen, endpoint=False) stride = newBins[1] - newBins[0] maxWid = int(np.ceil(stride)) ar_new = np.empty((newlen, maxWid)) # init empty array ar_new.fill(np.nan) # fill with NaNs (no extra 0s in mean) for ii, lbin in enumerate(newBins): rbin = int(np.ceil(lbin + stride)) lbin = int(np.ceil(lbin)) ar_new[ii, 0:rbin - lbin] = ar[lbin:rbin] return sp.nanmean(ar_new, axis=1) # ingnore NaNs in mean
def load_unicef_data(): fname = 'SOWC_combined_simple.csv' # Uses pandas to help with string-NaN-numeric data. data = pd.read_csv(fname, na_values='_', encoding='latin1') # Strip countries title from feature names. features = data.axes[1][1:] # Separate country names from feature values. countries = data.values[:, 0] values = data.values[:, 1:] # Convert to numpy matrix for real. values = np.asmatrix(values, dtype='float64') # Modify NaN values (missing values). mean_vals = nanmean(values, axis=0) inds = np.where(np.isnan(values)) #print(inds[1][:5]) values[inds] = np.take(mean_vals, inds[1]) return (countries, features, values)
def downsample(y,R): """ Simple downsampling scheme using mean within the downsampling window. Parameters ----------- y: np.array signal to downsample R: int decimate-factor Returns ------- y: np.array downsampled data """ pad_size = int(math.ceil(float(y.size)/R)*R - y.size) y_padded = np.append(y, np.zeros(pad_size)*np.NaN) y2=scipy.nanmean(y_padded.reshape(-1,R), axis=1) return y2
def fitsurfaceplot(paramdict,plotvals,configfile,y_acf,yerr=None,filetemplate='fitsurfs',suptitle = 'Fit Surfaces'): (sensdict,simparams) = readconfigfile(configfile) specs = simparams['species'] nspecs = len(specs) # make param lists paramlist = [[]]*(2*nspecs+1) paramlist[2*(nspecs-1)] =paramdict['Ne'] paramlist[2*(nspecs-1)+1] =paramdict['Te'] if 'frac' in paramdict.keys(): frac = paramdict['frac'] else: frac = [[1./(nspecs-1)]]*(nspecs-1) for ispec in range(nspecs-1): paramlist[2*ispec] =frac[ispec] paramlist[2*ispec+1] = paramdict['Ti'][ispec] if 'Vi' in paramdict.keys(): paramlist[-1] = paramdict['Vi'] else: paramlist[-1] =[0.] pvals = {'Ne':2*(nspecs-1),'Te':2*(nspecs-1)+1,'Ti':1,'frac':0} fitsurfs= makefitsurf(paramlist,y_acf,sensdict,simparams,yerr) quad = (3,3) i_fig=0 for iplt, idict in enumerate(plotvals): iaxn = sp.mod(iplt,sp.prod(quad)) if iaxn==0: (figmplf, axmat) = plt.subplots(quad[0],quad[1],figsize=(20, 15), facecolor='w') axvec = axmat.flatten() setstr = idict['setparam'] xstr = idict['xparam'] ystr = idict['yparam'] mloc = pvals[setstr] xdim = pvals[xstr] ydim = pvals[ystr] setval = paramlist[setstr][idict['indx']] transarr = sp.arange(2*nspecs+1).tolist() transarr.remove(mloc) transarr.remove(xdim) transarr.remove(ydim) transarr = [mloc,ydim,xdim] +transarr fitupdate = sp.transpose(fitsurfs,transarr) while fitupdate.ndim>3: fitupdate = sp.nanmean(fitupdate,dim=-1) Z1 = fitupdate[idict['indx']] iax = axvec[iaxn] xvec = paramdict[xstr] yvec = paramdict[ystr] [Xmat,Ymat]= sp.meshgrid(xvec,yvec) iax.pcolor(Xmat,Ymat,Z1,norm=LogNorm(vmin=Z1.min(), vmax=Z1.max())) iax.xlabel=xstr iax.ylabel=ystr iax.title('{0} at {0}'.format(setstr,setval)) if iaxn ==sp.prod(quad)-1: figmplf.suptitle(suptitle, fontsize=20) fname= filetemplate+'_{0:0>4}.png'.format(i_fig) plt.savefig(fname) plt.close(figmplf) i_fig+=1
def mean(self, dat): if self.n_avg == 1: return dat[..., 0] if np.isnan(dat).any(): return nanmean(dat, axis=-1) return np.mean(dat, axis=-1)
def decimate(self,R): pad_size = math.ceil(float(self.x.size)/R)*R - self.x.size; arr_x_padded = np.append(self.x, np.zeros(pad_size)*np.NaN); self.x = nanmean(arr_x_padded.reshape(-1,R), axis=1); arr_y_padded = np.append(self.y, np.zeros(pad_size)*np.NaN); self.y = nanmean(arr_y_padded.reshape(-1,R), axis=1);
def downsample(rows, downsample_factor): # simple downsampling method used in reducing dimension of spectral plot input rows = np.array(rows) pad_size = math.ceil(float(rows.size)/downsample_factor) * downsample_factor - rows.size rows_padded = np.append(rows, np.zeros(pad_size)*np.NaN) scipy.nanmean(rows_padded.reshape(-1, downsample_factor), axis=1) return list(rows_padded)
def invertRSTO(RSTO,Iono,alpha_list=1e-2,invtype='tik',rbounds=[100,200],Nlin=0): """ This will run the inversion program given an ionocontainer, an alpha and """ nlout,ntout,nl=Iono.Param_List.shape if Nlin !=0: nl=Nlin nlin=len(RSTO.Cart_Coords_In) time_out=RSTO.Time_Out time_in=RSTO.Time_In overlaps = RSTO.overlaps xin,yin,zin=RSTO.Cart_Coords_In.transpose() z_u=sp.unique(zin) rplane=sp.sqrt(xin**2+yin**2)*sp.sign(xin) r_u=sp.unique(rplane) n_z=z_u.size n_r=r_u.size dims= [n_r,n_z] rin,azin,elin=RSTO.Sphere_Coords_In.transpose() anglist=RSTO.simparams['angles'] ang_vec=sp.array([[i[0],i[1]] for i in anglist]) # trim out cruft zmin,zmax=[150,500] rpmin,rpmax=rbounds#[-50,100]#[100,200] altlog= sp.logical_and(zin>zmin,zin<zmax) rplog=sp.logical_and(rplane>rpmin,rplane<rpmax) allrng= RSTO.simparams['Rangegatesfinal'] dR=allrng[1]-allrng[0] nldir=sp.ceil(int(nl)/2.) posang_log1= sp.logical_and(ang_vec[:,0]<=180.,ang_vec[:,0]>=0) negang_log1 = sp.logical_or(ang_vec[:,0]>180.,ang_vec[:,0]<0) azin_pos = sp.logical_and(azin<=180.,azin>=0) azin_neg = sp.logical_or(azin>180.,azin<0) minangpos=0 minangneg=0 if sp.any(posang_log1): minangpos=ang_vec[posang_log1,1].min() if sp.any(negang_log1): minangneg=ang_vec[negang_log1,1].min() rngbounds=[allrng[0]-nldir*dR,allrng[-1]+nldir*dR] rng_log=sp.logical_and(rin>rngbounds[0],rin<rngbounds[1]) elbounds_pos=sp.logical_and(azin_pos,elin>minangpos) elbounds_neg=sp.logical_and(azin_neg,elin>minangneg) elbounds=sp.logical_or(elbounds_pos,elbounds_neg) keeplog=sp.logical_and(sp.logical_and(rng_log,elbounds),sp.logical_and(altlog,rplog)) keeplist=sp.where(keeplog)[0] nlin_red=len(keeplist) # set up derivative matrix dx,dy=diffmat(dims) dx_red=dx[keeplist][:,keeplist] dy_red=dy[keeplist][:,keeplist] # need the sparse vstack to make srue things stay sparse D=sp.sparse.vstack((dx_red,dy_red)) # New parameter matrix new_params=sp.zeros((nlin,len(time_out),nl),dtype=Iono.Param_List.dtype) if isinstance(alpha_list,numbers.Number): alpha_list=[alpha_list]*nl ave_datadif=sp.zeros((len(time_out),nl)) ave_data_const = sp.zeros_like(ave_datadif) q=1e10 for itimen, itime in enumerate(time_out): print('Making Outtime {0:d} of {1:d}'.format(itimen+1,len(time_out))) #allovers=overlaps[itimen] #curintimes=[i[0] for i in allovers] #for it_in_n,it in enumerate(curintimes): #print('\t Making Intime {0:d} of {1:d}'.format(it_in_n+1,len(curintimes))) #A=RSTO.RSTMat[itimen*nlout:(itimen+1)*nlout,it*nlin:(it+1)*nlin] A=RSTO.RSTMat[itimen*nlout:(itimen+1)*nlout,itimen*nlin:(itimen+1)*nlin] Acvx=cvx.Constant(A[:,keeplist]) for ip in range(nl): alpha=alpha_list[ip]*2 print('\t\t Making Lag {0:d} of {1:d}'.format(ip+1,nl)) datain=Iono.Param_List[:,itimen,ip] xr=cvx.Variable(nlin_red) xi=cvx.Variable(nlin_red) if invtype.lower()=='tik': constr=alpha*cvx.norm(xr,2) consti=alpha*cvx.norm(xi,2) elif invtype.lower()=='tikd': constr=alpha*cvx.norm(D*xr,2) consti=alpha*cvx.norm(D*xi,2) elif invtype.lower()=='tv': constr=alpha*cvx.norm(D*xr,1) consti=alpha*cvx.norm(D*xi,1) br=datain.real/q bi=datain.imag/q if ip==0: objective=cvx.Minimize(cvx.norm(Acvx*xr-br,2)+constr) constraints= [xr>=0] prob=cvx.Problem(objective) result=prob.solve(verbose=True,solver=cvx.SCS,use_indirect=True,max_iters=4000) # new_params[keeplog,it,ip]=xr.value.flatten() xcomp=sp.array(xr.value).flatten()*q else: objective=cvx.Minimize(cvx.norm(Acvx*xr-br,2)+constr) prob=cvx.Problem(objective) result=prob.solve(verbose=True,solver=cvx.SCS,use_indirect=True,max_iters=4000) objective=cvx.Minimize(cvx.norm(Acvx*xi-bi,2)+consti) prob=cvx.Problem(objective) result=prob.solve(verbose=True,solver=cvx.SCS,use_indirect=True,max_iters=4000) xcomp=sp.array(xr.value + 1j*xi.value).flatten()*q # new_params[keeplog,it,ip]=xcomp new_params[keeplog,itimen,ip]=xcomp ave_datadif[itimen,ip]=sp.sqrt(sp.nansum(sp.absolute(A[:,keeplist].dot(xcomp)-datain)**2)) if invtype.lower()=='tik': sumconst=sp.sqrt(sp.nansum(sp.power(sp.absolute(xcomp),2))) elif invtype.lower()=='tikd': dx=D.dot(xcomp) sumconst=sp.sqrt(sp.nansum(sp.power(sp.absolute(dx),2))) elif invtype.lower()=='tv': dx=D.dot(xcomp) sumconst=sp.nansum(sp.absolute(dx)) ave_data_const[itimen,ip]=sumconst # set up nans new_params[sp.logical_not(keeplog),itimen]=sp.nan datadif=sp.nanmean(ave_datadif,axis=0) constval=sp.nanmean(ave_data_const,axis=0) ionoout=IonoContainer(coordlist=RSTO.Cart_Coords_In,paramlist=new_params,times = time_out,sensor_loc = sp.zeros(3),ver =0,coordvecs = ['x','y','z'],paramnames=Iono.Param_Names[:Nlin]) return (ionoout,datadif,constval)
def fitsurfaceplot(paramdict, plotvals, configfile, y_acf, yerr=None, filetemplate="fitsurfs", suptitle="Fit Surfaces"): """ This will create a fit surface plot. Inputs paramdict - A dictionary with the followign key value pairs. Ne - Array of possible electron density values. Te - Array of possible electron tempreture values. Ti - Array of possible ion tempreture values. frac - Array of possible fraction shares of the ion make up. plotvals - A dictionary with key value pars. setparam - A string that describes he parameter thats set. xparam - The parameter that's varied along the x axis of the image. yparam - The parameter that's varied along the y axis of the image. indx - The index from the paramdict for the set variable. configfile - The file thats used for the simulation. y_acf - the complex ACF used to create the errors. yerr - The standard deviation of the acf measurement. filetemplate - The template on how the file will be named. suptitle - The super title for the plots. """ sns.set_style("whitegrid") sns.set_context("notebook") (sensdict, simparams) = readconfigfile(configfile) specs = simparams["species"] nspecs = len(specs) # make param lists paramlist = [[]] * (2 * nspecs + 1) paramlist[2 * (nspecs - 1)] = paramdict["Ne"] paramlist[2 * (nspecs - 1) + 1] = paramdict["Te"] if "frac" in paramdict.keys(): frac = paramdict["frac"] else: frac = [[1.0 / (nspecs - 1)]] * (nspecs - 1) for ispec in range(nspecs - 1): paramlist[2 * ispec] = frac[ispec] paramlist[2 * ispec + 1] = paramdict["Ti"][ispec] if "Vi" in paramdict.keys(): paramlist[-1] = paramdict["Vi"] else: paramlist[-1] = [0.0] pvals = {"Ne": 2 * (nspecs - 1), "Te": 2 * (nspecs - 1) + 1, "Ti": 1, "frac": 0} fitsurfs = makefitsurf(paramlist, y_acf, sensdict, simparams, yerr) quad = (3, 3) i_fig = 0 for iplt, idict in enumerate(plotvals): iaxn = sp.mod(iplt, sp.prod(quad)) if iaxn == 0: (figmplf, axmat) = plt.subplots(quad[0], quad[1], figsize=(20, 15), facecolor="w") axvec = axmat.flatten() setstr = idict["setparam"] xstr = idict["xparam"] ystr = idict["yparam"] mloc = pvals[setstr] xdim = pvals[xstr] ydim = pvals[ystr] setval = paramlist[setstr][idict["indx"]] transarr = sp.arange(2 * nspecs + 1).tolist() transarr.remove(mloc) transarr.remove(xdim) transarr.remove(ydim) transarr = [mloc, ydim, xdim] + transarr fitupdate = sp.transpose(fitsurfs, transarr) while fitupdate.ndim > 3: fitupdate = sp.nanmean(fitupdate, dim=-1) Z1 = fitupdate[idict["indx"]] iax = axvec[iaxn] xvec = paramdict[xstr] yvec = paramdict[ystr] [Xmat, Ymat] = sp.meshgrid(xvec, yvec) iax.pcolor(Xmat, Ymat, Z1, norm=colors.LogNorm(vmin=Z1.min(), vmax=Z1.max())) iax.xlabel = xstr iax.ylabel = ystr iax.title("{0} at {0}".format(setstr, setval)) if iaxn == sp.prod(quad) - 1: figmplf.suptitle(suptitle, fontsize=20) fname = filetemplate + "_{0:0>4}.png".format(i_fig) plt.savefig(fname) plt.close(figmplf) i_fig += 1
def downsample(a, fact): print 'downsamle', a.size, 'by', fact, 'to', a.size / fact pad_size = math.ceil(float(a.size)/fact)*fact - a.size a_padded = np.append(a, np.zeros(pad_size)*np.NaN) return scipy.nanmean(a_padded.reshape(-1, fact), axis=1)
# Need to use underlying numpy arrays for singleton expansion ('broadcasting') # and form new DataFrame using appropriate column names. # TODO use DataFrame.sub() instead: # TODO wcl_foldch = np.log2(wcl[wcl_exp]).sub(np.log2(wcl[wcl_ctrl])) wcl_foldch = pd.DataFrame( np.log2(wcl[wcl_exp]).values - np.log2(wcl[wcl_ctrl]).values, columns=wcl_exp, index=names ) wclp_foldch = pd.DataFrame( np.log2(wclp[wclp_exp]).values - np.log2(wclp[wclp_ctrl]).values, columns=wclp_exp, index=names ) ub_foldch = pd.DataFrame( np.log2(ub[ub_exp]).values - np.log2(ub[ub_ctrl]).values, columns=ub_exp, index=names ) ubp_foldch = pd.DataFrame( np.log2(ubp[ubp_exp]).values - np.log2(ubp[ubp_ctrl]).values, columns=ubp_exp, index=names ) wcl_st = (wcl - sp.nanmean(wcl)) / sp.nanstd(wcl) wclp_st = (wclp - sp.nanmean(wclp)) / sp.nanstd(wclp) ub_st = (ub - sp.nanmean(ub)) / sp.nanstd(ub) ubp_st = (ubp - sp.nanmean(ubp)) / sp.nanstd(ubp)
import scipy import math with open(sys.argv[1]) as file: data = file.read() file.seek(0,2) size = file.tell() converted = struct.unpack("<{}h".format(int(size/2)), data) arr = np.array(converted) padding = math.ceil(float(arr.size)/45)*45 - arr.size arr = np.append(arr, np.zeros(padding)*np.NaN) reshaped = scipy.nanmean(arr.reshape(-1,45), axis=1) reshaped = reshaped / (2.0**16) reshaped = reshaped * 255 pbm = np.zeros((1000,255)) for i in range(1000): for j in range(255): if np.abs(reshaped[i]) < j: pbm[i][j] = 1 with open('test.pbm','w') as file: file.write('P1\n') file.write('255 1000\n') for i in range(1000): file.write(str(pbm[i,:]) + '\n')
def lower_bound(fm, nr_subs = None, nr_imgs = None, scale_factor = 1): """ Compute the spatial bias lower bound for a fixmat. Input: fm : a fixmat instance nr_subs : the number of subjects used for the prediction. Defaults to the total number of subjects in the fixmat minus 1 nr_imgs : the number of images used for prediction. If given, the same number will be used for every category. If not given, leave-one-out will be used in all categories. scale_factor : the scale factor of the FDMs. Default is 1. Returns: A list of spatial bias scores; the list contains one dictionary for each measure. Each dictionary contains one key for each category and corresponding values is an array with scores for each subject. """ nr_subs_total = len(np.unique(fm.SUBJECTINDEX)) if nr_subs is None: nr_subs = nr_subs_total - 1 assert (nr_subs < nr_subs_total) # initialize output structure; every measure gets one dict with # category numbers as keys and numpy-arrays as values sb_scores = [] for measure in range(len(measures.scores)): res_dict = {} result_vectors = [np.empty(nr_subs_total) + np.nan for _ in np.unique(fm.category)] res_dict.update(list(zip(np.unique(fm.category),result_vectors))) sb_scores.append(res_dict) # compute mean spatial bias predictive power for all subjects in all # categories for fm_cat in fm.by_field('category'): cat = fm_cat.category[0] nr_imgs_cat = len(np.unique(fm_cat.filenumber)) if not nr_imgs: nr_imgs_current = nr_imgs_cat - 1 else: nr_imgs_current = nr_imgs assert(nr_imgs_current < nr_imgs_cat) for (sub_counter, sub) in enumerate(np.unique(fm.SUBJECTINDEX)): image_scores = [] for fm_single in fm_cat.by_field('filenumber'): # Iterating by field filenumber makes filenumbers # in fm_single unique: Just take the first one to get the # filenumber for this fixmat fn = fm_single.filenumber[0] predicting_subs = (np.setdiff1d(np.unique( fm_cat.SUBJECTINDEX), [sub])) np.random.shuffle(predicting_subs) predicting_subs = predicting_subs[0:nr_subs] predicting_fns = (np.setdiff1d(np.unique( fm_cat.filenumber), [fn])) np.random.shuffle(predicting_fns) predicting_fns = predicting_fns[0:nr_imgs_current] predicting_fm = fm_cat[ (ismember(fm_cat.SUBJECTINDEX, predicting_subs)) & (ismember(fm_cat.filenumber, predicting_fns))] predicted_fm = fm_single[fm_single.SUBJECTINDEX == sub] try: predicting_fdm = compute_fdm(predicting_fm, scale_factor = scale_factor) except RuntimeError: predicting_fdm = None image_scores.append(measures.prediction_scores(predicting_fdm, predicted_fm)) for (measure, score) in enumerate(nanmean(image_scores, 0)): sb_scores[measure][cat][sub_counter] = score return sb_scores
def parametersweep(basedir,configfile,acfdir='ACF',invtype='tik'): """ This function will run the inversion numerious times with different constraint parameters. This will create a directory called cost and place. Input basedir - The directory that holds all of the data for the simulator. configfile - The ini file for the simulation. acfdir - The directory within basedir that hold the acfs to be inverted. invtype - The inversion method that will be tested. Can be tik, tikd, and tv. """ alpha_sweep=sp.logspace(-3.5,sp.log10(7),25) costdir = os.path.join(basedir,'Cost') ionoinfname=os.path.join(basedir,acfdir,'00lags.h5') ionoin=IonoContainer.readh5(ionoinfname) dirio = ('Spectrums','Mat','ACFMat') inputdir = os.path.join(basedir,dirio[0]) dirlist = glob.glob(os.path.join(inputdir,'*.h5')) (listorder,timevector,filenumbering,timebeg,time_s) = IonoContainer.gettimes(dirlist) Ionolist = [dirlist[ikey] for ikey in listorder] RSTO = RadarSpaceTimeOperator(Ionolist,configfile,timevector,mattype='Sim') npts=RSTO.simparams['numpoints'] ionospec=makeionocombined(dirlist) if npts==ionospec.Param_List.shape[-1]: tau,acfin=spect2acf(ionospec.Param_Names,ionospec.Param_List) nloc,ntimes=acfin.shape[:2] ambmat=RSTO.simparams['amb_dict']['WttMatrix'] np=ambmat.shape[0] acfin_amb=sp.zeros((nloc,ntimes,np),dtype=acfin.dtype) # get the original acf ambmat=RSTO.simparams['amb_dict']['WttMatrix'] np=ambmat.shape[0] for iloc,locarr in enumerate(acfin): for itime,acfarr in enumerate(locarr): acfin_amb[iloc,itime]=sp.dot(ambmat,acfarr) acfin_amb=acfin_amb[:,0] else: acfin_amb=ionospec.Param_List[:,0] if not os.path.isdir(costdir): os.mkdir(costdir) # pickle file stuff pname=os.path.join(costdir,'cost{0}-{1}.pickle'.format(acfdir,invtype)) alpha_list=[] errorlist=[] errorlaglist=[] datadiflist=[] constlist=[] if 'perryplane' in basedir.lower() or 'SimpData': rbounds=[-500,500] else: rbounds=[0,500] alpha_list_new=alpha_sweep.tolist() for i in alpha_list: if i in alpha_list_new: alpha_list_new.remove(i) for i in alpha_list_new: ionoout,datadif,constdif=invertRSTO(RSTO,ionoin,alpha_list=i,invtype=invtype,rbounds=rbounds,Nlin=1) datadiflist.append(datadif) constlist.append(constdif) acfout=ionoout.Param_List[:,0] alpha_list.append(i) outdata=sp.power(sp.absolute(acfout-acfin_amb),2) aveerror=sp.sqrt(sp.nanmean(outdata,axis=0)) errorlaglist.append(aveerror) errorlist.append(sp.nansum(aveerror)) pickleFile = open(pname, 'wb') pickle.dump([alpha_list,errorlist,datadiflist,constlist,errorlaglist],pickleFile) pickleFile.close() mkalphalist(pname) alphaarr=sp.array(alpha_list) errorarr=sp.array(errorlist) errorlagarr=sp.array(errorlaglist) datadif=sp.array(datadiflist) constdif=sp.array(constlist) fig,axlist,axmain=plotalphaerror(alphaarr,errorarr,errorlagarr) fig.savefig(os.path.join(costdir,'cost{0}-{1}.png'.format(acfdir,invtype))) fig,axlist=plotLcurve(alphaarr,datadif,constdif) fig.savefig(os.path.join(costdir,'lcurve{0}-{1}.png'.format(acfdir,invtype)))
def _stica(space_pcs, time_pcs, mu=0.01, n_components=30, path=None): """Perform spatio-temporal ICA given spatial and temporal Principal Components Parameters ---------- space_pcs : array The spatial representations of the PCs. Shape: (num_rows, num_columns, num_pcs). time_pcs : array The temporal representations of the PCs. Shape: (num_times, num_pcs). mu : float Weighting parameter for the trade off between spatial and temporal information. Must be between 0 and 1. Low values give higher weight to temporal information. Default: 0.01 n_components : int The maximum number of ICA components to generate. Default: 30 path : str Directory for saving or loading stICA results. Returns ------- st_components : array stICA components Shape: (num_rows, num_columns, n_components) """ # attempt to retrive the stICA data from a save file ret = None if path is not None: try: data = np.load(path) except IOError: pass else: if data['st_components'].shape[2] == n_components and \ data['mu'].item() == mu and \ data['num_pcs'] == time_pcs.shape[1]: ret = data['st_components'] data.close() if ret is not None: return ret # preprocess the PCA data for i in range(space_pcs.shape[2]): space_pcs[:, :, i] = mu*(space_pcs[:, :, i] - nanmean(space_pcs[:, :, i]))/np.max(space_pcs) for i in range(time_pcs.shape[1]): time_pcs[:, i] = (1-mu)*(time_pcs[:, i]-nanmean(time_pcs[:, i])) / \ np.max(time_pcs) # concatenate the space and time PCs y = np.concatenate((space_pcs.reshape( space_pcs.shape[0]*space_pcs.shape[1], space_pcs.shape[2]), time_pcs)) # execute the FastICA algorithm ica = FastICA(n_components=n_components, max_iter=1500) st_components = np.real(np.array(ica.fit_transform(y))) # pull out the spacial portion of the st_components st_components = \ st_components[:(space_pcs.shape[0]*space_pcs.shape[1]), :] st_components = st_components.reshape(space_pcs.shape[0], space_pcs.shape[1], st_components.shape[1]) # normalize the ica results for i in range(st_components.shape[2]): st_component = st_components[:, :, i] st_component = abs(st_component-np.mean(st_component)) st_component = st_component/np.max(st_component) st_components[:, :, i] = st_component # save the ica components if a path has been provided if path is not None: np.savez(path, st_components=st_components, mu=mu, num_pcs=time_pcs.shape[1]) return st_components
def downsample(array, factor): pad_size = np.ceil(old_div(float(array.size),factor))*factor - array.size array_padded = np.append(array, np.zeros([pad_size.astype(np.int64)])*np.NaN) return scipy.nanmean(array_padded.reshape(-1,factor), axis=1)
x_dat = [] y_dat = [] for line in f: line = line.strip() columns = line.split() x_dat.append(float(columns[0])) y_dat.append(float(columns[2])) f.close() y_dat=np.array(y_dat) x_dat=np.array(x_dat) #let the user provide the windows R=int(sys.argv[2]) pad_size = math.ceil(float(y_dat.size)/R)*R - y_dat.size b_padded = np.append(y_dat, np.zeros(pad_size)*np.NaN) x=scipy.nanmean(b_padded.reshape(-1,R), axis=1) a_padded = np.append(x_dat, np.zeros(pad_size)*np.NaN) y=scipy.nanmean(a_padded.reshape(-1,R), axis=1) # #no_input = int(sys.argv[2])+1 #on_i=[] # #for i in xrange(2*no_input): # on_i.append(float(sys.argv[3+i])) # #noise=[] # ##chopper
def reduce_dim(x, d): pad_size = math.ceil(float(x.size) / d) * d - x.size x_padded = np.append(x, np.zeros(pad_size) * np.NaN) return sp.nanmean(x_padded.reshape(-1, d), axis=0)
def stat(x): return([round(nanmean(x), 5), round(nanstd(x), 5)])