def _testBasic(self, dtype): indices = np.asarray([0, 2, -1, 1], dtype=np.int64) depth = 3 on_value = np.asarray(1.0, dtype=dtype) off_value = np.asarray(-1.0, dtype=dtype) truth = np.asarray( [[1.0, -1.0, -1.0], [-1.0, -1.0, 1.0], [-1.0, -1.0, -1.0], [-1.0, 1.0, -1.0]], dtype=dtype) # axis == -1 self._testBothOneHot( indices=indices, depth=depth, on_value=on_value, off_value=off_value, dtype=dtype, truth=truth) # axis == 0 self._testBothOneHot( indices=indices, depth=depth, on_value=on_value, off_value=off_value, axis=0, dtype=dtype, truth=truth.T) # Output is transpose version in this case
def resample(oldrate,newrate,x,n,dtype,factor): print "Resampling from",oldrate,"Hz to",newrate,"Hz, amplification factor",factor rategcd = gcd(oldrate,newrate) uprate = newrate / rategcd dnrate = oldrate / rategcd oldcount = len(x) midcount = oldcount * uprate newcount = midcount / dnrate print "Upsampling by",uprate if uprate == 1: yout = np.asarray(x, dtype=dtype) else: yout = np.zeros(midcount, dtype=dtype) for i in range(0, oldcount-1): yout[i * uprate] = x[i] * uprate wl = min(1.0/uprate,1.0/dnrate) print "Antialias filtering at",wl midrate = oldrate * uprate filt = firfilter(0, (midrate * wl) / 2.0, midrate, n) y = signal.lfilter(filt, 1, yout) print "Downsampling by",dnrate if dnrate == 1: yout = np.asarray(y, dtype=dtype) else: yout = np.zeros(newcount, dtype=dtype) for i in range(0, newcount-1): yout[i] = y[i * dnrate] * factor return yout
def compress(data): """ Convert 4-byte integer value to semi-logarithmic 2-byte integer. The storage format for numbers up to 32767 is the number itself The storage format for numbers above 32767 is - (mantissa + 10000*10**power) where the mantissa is 4 digits and power is 1, 2 or 3. add an extra integer at 0, 1024 """ data = numpy.asarray(data.flatten(), 'int32') assert len(data) == 16384 # Logarithmic compression base = 10000 erridx = data > 2767000 idx = data > 32767 power = numpy.ceil(numpy.log10(data[idx]))-4 mantissa = data[idx] // (10**power) data[idx] = numpy.asarray(-(mantissa + power*base), data.dtype) data[erridx] = -777 # Add values at 0, 1022, 2*1022, ... fulldata = numpy.zeros(16384 + 17, 'i') idx = numpy.arange(len(fulldata),dtype='i')%1022 != 0 fulldata[idx] = data return fulldata
def _testDefaultBasic(self, dtype): indices = np.asarray([0, 2, -1, 1], dtype=dtype) depth = 3 truth = np.asarray( [[1.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype=dtype) # axis == -1 self._testBothOneHot( indices=indices, depth=depth, dtype=dtype, truth=truth) # axis == 0 self._testBothOneHot( indices=indices, depth=depth, axis=0, dtype=dtype, truth=truth.T) # Output is transpose version in this case
def load_adm_sat_school_data(return_X_y=False): with open("./merged_adm_sat_data.csv") as csv_file: data_file = csv.reader(csv_file) temp = next(data_file) n_samples = int(temp[0]) n_features = int(temp[1]) target_names = np.array(temp[2:]) df = pd.read_csv("./merged_adm_sat_data.csv", sep=",", usecols=(0, 1, 2, 3), skiprows=0) data = np.empty((n_samples, n_features), dtype=int) target = np.ma.empty((n_samples,), dtype=int) for index, row in df.iterrows(): data[index] = np.asarray([df.iloc[index][0], df.iloc[index][1], df.iloc[index][2]], dtype=np.float) target[index] = np.asarray(df.iloc[index][3], dtype=np.int) feature_names = np.array(['ACT_AVG','SAT_AVG','GRAD_DEBT','REGION']) if return_X_y: return data, target return datasets.base.Bunch(data=data, target=target, target_names=target_names, DESCR='School Data set', feature_names=feature_names)
def test_layer_mul(self): # Ensure layer multiplication gives the correct output layer_o=self.layer6*self.layer7 array1=np.asarray(layer_o.get_nparray()) res = np.asarray([[5]*3]*3) self.assertEqual(np.all(array1==5),True) self.assertTrue(allequal(layer_o._data, res))
def time_column(table, ifo=None): """Extract the 'time' column from the given table. This function uses time_func to determine the correct column to use as a proxy for 'time' and returns that column. The following mappings are used: - `sngl_inspiral` -> 'end' time - `sngl_burst` -> 'peak' time - `sngl_ringdown` -> 'start' time @param table any `LIGO_LW` table @param ifo an interferometer prefix if you want single-detector times @returns a numpy array object with a 'time' element for each row in the table """ if hasattr(table, "get_time"): return numpy.asarray(table.get_time()) func_name = time_func(ligolw_table.StripTableName(table.tableName)).__name__ if hasattr(table, func_name): return numpy.asarray(getattr(table, func_name)()) else: return numpy.asarray(map(func_name, table))
def test_layer_add(self): # Ensure layer addition gives the correct output layer_o=self.layer6+self.layer7 array1=np.asarray(layer_o.get_nparray()) res = np.asarray([[6]*3]*3) self.assertEqual(np.all(array1==6),True) self.assertTrue(allequal(layer_o._data, res))
def test_layer_sub(self): # Ensure layer subtraction gives the correct output layer_o=self.layer6-self.layer7 array1=np.asarray(layer_o.get_nparray()) res = np.asarray([[-4]*3]*3) self.assertEqual(np.all(array1==-4),True) self.assertTrue(allequal(layer_o._data, res))
def learn(tuned_parameters,model): # produceFeature(trainfile) dataset = genfromtxt(open('Data/'+trainfile,'r'), delimiter=',',dtype='f8')[0:] target = [x[0] for x in dataset] train = [x[1:] for x in dataset] # print train[1:10] # print target # print len(train) # produceFeature(testfile) test = genfromtxt(open('Data/'+testfile,'r'),delimiter=',',dtype='f8')[0:] test_target = [x[1:] for x in test] # X, y = digits.data, digits.target trainnp = np.asarray(train) targetnp = np.asarray(target) # turn the data in a (samples, feature) matrix: X, y = trainnp, targetnp # X = digits.images.reshape((n_samples, -1)) # y = digits.target # Split the dataset in two equal parts X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.5, random_state=0) scores = ['precision', 'recall'] for score in scores: print("# Tuning hyper-parameters for %s" % score) print() clf = GridSearchCV(model, tuned_parameters, cv=5, scoring='%s_weighted' % score) clf.fit(X_train, y_train) print("Best parameters set found on development set:") print() print(clf.best_params_) print() print("Grid scores on development set:") print() for params, mean_score, scores in clf.grid_scores_: print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() * 2, params)) print() print("Detailed classification report:") print() print("The model is trained on the full development set.") print("The scores are computed on the full evaluation set.") print() y_true, y_pred = y_test, clf.predict(X_test) print(classification_report(y_true, y_pred)) print()
def test_layer_div(self): # Ensure layer division gives the correct output layer_o=self.layer6/self.layer7 array1=np.asarray(layer_o.get_nparray()) res = np.asarray([[0.2]*3]*3) self.assertEqual(np.all(array1==0.2),True) self.assertTrue(allequal(layer_o._data, res))
def sample_every_two_correlation_times(energy_data, magnetization_data, correlation_time, no_of_sites): """Sample the given data every 2 correlation times and determine value and error.""" magnet_samples = [] energy_samples = [] for t in np.arange(0, len(energy_data), 2 * int(np.ceil(correlation_time))): magnet_samples.append(magnetization_data[t]) energy_samples.append(energy_data[t]) magnet_samples = np.asarray(magnet_samples) energy_samples = np.asarray(energy_samples) abs_magnetization = np.mean(np.absolute(magnet_samples)) abs_magnetization_error = calculate_error(magnet_samples) print("<m> (<|M|/N>) = {0} +/- {1}".format(abs_magnetization, abs_magnetization_error)) magnetization = np.mean(magnet_samples) magnetization_error = calculate_error(magnet_samples) print("<M/N> = {0} +/- {1}".format(magnetization, magnetization_error)) energy = np.mean(energy_samples) energy_error = calculate_error(energy_samples) print("<E/N> = {0} +/- {1}".format(energy, energy_error)) magnetization_squared = np.mean((magnet_samples * no_of_sites)**2) magnetization_squared_error = calculate_error((magnet_samples * no_of_sites)**2) print("<M^2> = {0} +/- {1}".format(magnetization_squared, magnetization_squared_error))
def GetAllData(self): Cal = self.GetCal() pars = self.query('CALC:PAR:CAT?') pars = pars.strip('\n').strip("'").split(',') parnames = pars[1::2] pars = pars[::2] names = ['Frequency (Hz)'] alltrc = [self.GetFrequency()] for pp in parnames: names.append('%sre ()' % pp) names.append('%sim ()' % pp) if Cal: for pp in parnames: names.append('%sre unc ()' % pp) names.append('%sim unc ()' % pp) for par in pars: yy = self.query("CALC:DATA:TRAC? '%s', SDAT" % par) yy = np.asarray([float(xx) for xx in yy.split(',')]) yyre = yy[::2] yyim = yy[1::2] alltrc.append(yyre) alltrc.append(yyim) if Cal: for par in pars: yy = self.query("CALC:DATA:TRAC? '%s', NCD" % par) yy = np.asarray([float(xx) for xx in yy.split(',')]) yyre = yy[::2] yyim = yy[1::2] alltrc.append(yyre) alltrc.append(yyim) final = OrderedDict() for name,data in zip(names,alltrc): final[name]=data return final
def add_lines(self, levels, colors, linewidths, erase=True): """ Draw lines on the colorbar. *colors* and *linewidths* must be scalars or sequences the same length as *levels*. Set *erase* to False to add lines without first removing any previously added lines. """ y = self._locate(levels) igood = (y < 1.001) & (y > -0.001) y = y[igood] if cbook.iterable(colors): colors = np.asarray(colors)[igood] if cbook.iterable(linewidths): linewidths = np.asarray(linewidths)[igood] N = len(y) x = np.array([0.0, 1.0]) X, Y = np.meshgrid(x, y) if self.orientation == "vertical": xy = [list(zip(X[i], Y[i])) for i in range(N)] else: xy = [list(zip(Y[i], X[i])) for i in range(N)] col = collections.LineCollection(xy, linewidths=linewidths) if erase and self.lines: for lc in self.lines: lc.remove() self.lines = [] self.lines.append(col) col.set_color(colors) self.ax.add_collection(col)
def save_nodes_to_store(self, store, queue): for node_id, node in self.nodes.items(): features = {} features['neighbors'] = node['neighbors'] if 'soft_label' in self.nodes_features: features['soft_label'] = node['soft_label'] if 'size' in self.nodes_features: features['size'] = len(node['pos']) if 'pos' in self.nodes_features: features['pos'] = np.asarray(node['pos']) if features['pos'].shape == (0,): features['pos'] = np.zeros(shape=(0,3)) if 'mesh' in self.nodes_features: #Because ml incluedes the overlap is possible #That a node has a mesh in the overlap #But not a single voxel in the non-overlap region vertices, triangles = mesh.marche_cubes( node_id , self.ml ) vertices += np.asarray(self.start).astype(np.uint16) * 2 #translate mesh features['mesh'] = mesh.get_adjacent( vertices, triangles ) if 'semantic_sum' in self.nodes_features: features['semantic_sum'] = node['semantic_sum'] features['tree'] = Tree(node_id) existent_node_features = store.get_node(node_id) if existent_node_features: features = self.sum_nodes_features(existent_node_features, features ) store.put_node(node_id, features)
def estimate_transition_matrix(count_matrix): """ Simple Maximum Likelihood estimator of transition matrix. Parameters ---------- count_matrix : array or sparse matrix A square matrix of transition counts Returns ------- tProb : array or sparse matrix Most likely transition matrix given `tCount` """ # 1. Make sure you don't modify tCounts. # 2. Make sure you handle both floats and ints if scipy.sparse.isspmatrix(count_matrix): C = scipy.sparse.csr_matrix(count_matrix).asfptype() weights = np.asarray(C.sum(axis=1)).flatten() inv_weights = np.zeros(len(weights)) inv_weights[weights != 0] = 1.0 / weights[weights != 0] D = scipy.sparse.dia_matrix((inv_weights, 0), C.shape).tocsr() tProb = D.dot(C) else: tProb = np.asarray(count_matrix.astype(float)) # astype creates a copy weights = tProb.sum(axis=1) inv_weights = np.zeros(len(weights)) inv_weights[weights != 0] = 1.0 / weights[weights != 0] tProb = tProb * inv_weights.reshape((weights.shape[0], 1)) return tProb
def shared(data): """ Place the data into shared variables. This allows Theano to copy the data to the GPU, if one is available. """ shared_x = theano.shared(numpy.asarray(data[:,0].tolist(), dtype=theano.config.floatX), borrow=True) shared_y = theano.shared(numpy.asarray(data[:,1].tolist(), dtype=theano.config.floatX), borrow=True) return shared_x, T.cast(shared_y, "int32")
def Seuil_var(img): """ This fonction compute threshold value. In first the image's histogram is calculated. The threshold value is set to the first indexe of histogram wich respect the following criterion : DH > 0, DH(i)/H(i) > 0.1 , H(i) < 0.01 % of the Norm. In : img : ipl Image : image to treated Out: seuil : Int : Value of the threshold """ dim=255 MaxValue=np.amax(np.asarray(img[:])) Norm = np.asarray(img[:]).shape[0]*np.asarray(img[:]).shape[1] scale=MaxValue/dim Wdim=dim*scale MaxValue=np.amax(np.asarray(img[:])) bins= [float(x) for x in range(dim)] hist,bin_edges = np.histogram(np.asarray(img[:]), bins) Norm = Norm -hist[0] median=np.median(hist) mean=0 var=0 i=1 som = 0 while (som < 0.8*Norm and i <len(hist)-1): som = som + hist[i] i=i+1 while ((hist[i]-hist[i-1] < 0 or (hist[i]-hist[i-1])/hist[i-1]>0.1 or hist[i]> 0.01*Norm ) and i < len(hist)-1): i=i+1 if( i == len(hist)-1): seuil=0 seuil = i var = 0 return seuil
def test_point_in_poly3(point): """ tests points that should be in the polygon """ assert point_in_poly(poly2_ccw, np.asarray(point, dtype=np.float64)) assert point_in_poly(poly2_cw, np.asarray(point, dtype=np.float64))
def Draw(self, nrb=None, MeshColor=None, NurbsColor=None, PointsColor=None, alpha=ALPHA, blend=False): if NurbsColor is None: if self.NurbsColor is None: NurbsColor = list(asarray(Theme().color_viewer("default_patch")).copy()) else: NurbsColor = list(asarray(self.NurbsColor).copy()) if self.show: if nrb is not None: list_nrb = [nrb] else: list_nrb = self._list for i in range(0, len(list_nrb)): nrb = list_nrb[i] nrbInfo = self.list_patchInfo[i] if nrbInfo.show: _NurbsColor = asarray(NurbsColor).copy() if nrbInfo.NurbsColor is not None: _NurbsColor = asarray(nrbInfo.NurbsColor).copy() NurbsSteps = nrbInfo.steps evaluator = self.GetEvaluator( nrb, MeshColor=MeshColor, NurbsColor=_NurbsColor, alpha=alpha, steps=NurbsSteps ) showMesh = self.showMesh or nrbInfo.showMesh evaluator.draw(mesh=showMesh, nurbs=True, blend=blend) if self.showPoints or nrbInfo.showPoints: # Draw control points self.DrawControlPoints(nrb, PointsColor=PointsColor, alpha=alpha, blend=blend)
def _threshold_brier_score_vectorized(observations, forecasts, thresholds): observations = np.asarray(observations) thresholds = np.asarray(thresholds) forecasts = np.asarray(forecasts) def exceedances(x): # NaN safe calculation of threshold exceedances # add an extra dimension to `x` and broadcast `thresholds` so that it # varies along that new dimension with suppress_warnings('invalid value encountered in greater'): exceeds = (x[..., np.newaxis] > thresholds.reshape((1,) * x.ndim + (-1,)) ).astype(float) if x.ndim == 0 and np.isnan(x): exceeds[:] = np.nan else: exceeds[np.where(np.isnan(x))] = np.nan return exceeds binary_obs = exceedances(observations) if observations.shape == forecasts.shape: prob_forecast = exceedances(forecasts) elif observations.shape == forecasts.shape[:-1]: # axis=-2 should be the 'realization' axis, after swapping that axes # to the end of forecasts and inserting one extra axis with suppress_warnings('Mean of empty slice'): prob_forecast = np.nanmean(exceedances(forecasts), axis=-2) else: raise AssertionError return brier_score(binary_obs, prob_forecast)
def test_cross_validator_with_default_indices(): n_samples = 4 n_unique_labels = 4 n_folds = 2 p = 2 n_iter = 10 # (the default value) X = np.array([[1, 2], [3, 4], [5, 6], [7, 8]]) X_1d = np.array([1, 2, 3, 4]) y = np.array([1, 1, 2, 2]) labels = np.array([1, 2, 3, 4]) loo = LeaveOneOut() lpo = LeavePOut(p) kf = KFold(n_folds) skf = StratifiedKFold(n_folds) lolo = LeaveOneLabelOut() lopo = LeavePLabelOut(p) ss = ShuffleSplit(random_state=0) ps = PredefinedSplit([1, 1, 2, 2]) # n_splits = np of unique folds = 2 n_splits = [n_samples, comb(n_samples, p), n_folds, n_folds, n_unique_labels, comb(n_unique_labels, p), n_iter, 2] for i, cv in enumerate([loo, lpo, kf, skf, lolo, lopo, ss, ps]): # Test if get_n_splits works correctly assert_equal(n_splits[i], cv.get_n_splits(X, y, labels)) # Test if the cross-validator works as expected even if # the data is 1d np.testing.assert_equal(list(cv.split(X, y, labels)), list(cv.split(X_1d, y, labels))) # Test that train, test indices returned are integers for train, test in cv.split(X, y, labels): assert_equal(np.asarray(train).dtype.kind, 'i') assert_equal(np.asarray(train).dtype.kind, 'i')
def encode_doc(doc, max_len): if doc is None: return np.asarray([]) # enc = np.asarray([max(min(ord(c), max_char-1), 0) for c in doc[:max_len]]) enc = np.asarray([vocab.token2id.get(c, default_id) for c in itertools.islice(gensim.utils.tokenize(doc, to_lower=True), max_len)]) return enc
def set_data(self, x, y, A): A = cbook.safe_masked_invalid(A) if x is None: x = np.arange(0, A.shape[1]+1, dtype=np.float64) else: x = np.asarray(x, np.float64).ravel() if y is None: y = np.arange(0, A.shape[0]+1, dtype=np.float64) else: y = np.asarray(y, np.float64).ravel() if A.shape[:2] != (y.size-1, x.size-1): print A.shape print y.size print x.size raise ValueError("Axes don't match array shape") if A.ndim not in [2, 3]: raise ValueError("A must be 2D or 3D") if A.ndim == 3 and A.shape[2] == 1: A.shape = A.shape[:2] self.is_grayscale = False if A.ndim == 3: if A.shape[2] in [3, 4]: if (A[:,:,0] == A[:,:,1]).all() and (A[:,:,0] == A[:,:,2]).all(): self.is_grayscale = True else: raise ValueError("3D arrays must have RGB or RGBA as last dim") self._A = A self._Ax = x self._Ay = y self._rgbacache = None
def test_sym_matrix_to_vec(): sym = np.ones((3, 3)) sqrt2 = 1. / sqrt(2.) vec = np.array([sqrt2, 1., sqrt2, 1., 1., sqrt2]) assert_array_almost_equal(sym_matrix_to_vec(sym), vec) vec = np.array([1., 1., 1.]) assert_array_almost_equal(sym_matrix_to_vec(sym, discard_diagonal=True), vec) # Check sym_matrix_to_vec is the inverse function of vec_to_sym_matrix n = 5 p = n * (n + 1) // 2 rand_gen = np.random.RandomState(0) # when diagonal is included vec = rand_gen.rand(p) sym = vec_to_sym_matrix(vec) assert_array_almost_equal(sym_matrix_to_vec(sym), vec) # when diagonal given separately diagonal = rand_gen.rand(n + 1) sym = vec_to_sym_matrix(vec, diagonal=diagonal) assert_array_almost_equal(sym_matrix_to_vec(sym, discard_diagonal=True), vec) # multiple matrices case when diagonal is included vecs = np.asarray([vec, 2. * vec, 0.5 * vec]) syms = vec_to_sym_matrix(vecs) assert_array_almost_equal(sym_matrix_to_vec(syms), vecs) # multiple matrices case when diagonal is given seperately diagonals = np.asarray([diagonal, 3. * diagonal, -diagonal]) syms = vec_to_sym_matrix(vecs, diagonal=diagonals) assert_array_almost_equal(sym_matrix_to_vec(syms, discard_diagonal=True), vecs)
def _testTensorArrayWriteConcat(self, tf_dtype): dtype = tf_dtype.as_numpy_dtype() with self.test_session(use_gpu=self._use_gpu): ta = tensor_array_ops.TensorArray( dtype=tf_dtype, tensor_array_name="foo", size=3, infer_shape=False) if tf_dtype == tf.string: # In Python3, np.str is unicode, while we always want bytes convert = lambda x: np.asarray(x).astype("|S") else: convert = lambda x: np.asarray(x).astype(dtype) w0 = ta.write(0, convert([[4.0, 5.0], [104.0, 105.0], [204.0, 205.0]])) w1 = w0.write(1, convert([[6.0, 7.0], [106.0, 107.0]])) w2 = w1.write(2, convert([[8.0, 9.0]])) c0 = w2.concat() self.assertAllEqual( convert([[4.0, 5.0], [104.0, 105.0], [204.0, 205.0], [6.0, 7.0], [106.0, 107.0], [8.0, 9.0]]), c0.eval())
def segment_haar(cnarr): """Do segmentation for CNVkit. Calculate copy number segmentation by HaarSeg (http://haarseg.r-forge.r-project.org/) Input: log2 coverage data in Nexus 'basic' format Output: the CBS data table """ chrom_tables = [] # Segment each chromosome individually # ENH - skip large gaps (segment chrom. arms separately) for chrom, subprobes in cnarr.by_chromosome(): # echo(chrom, ':') # DBG segtable = haarSeg(subprobes['log2']) chromtable = pd.DataFrame({ 'chromosome': chrom, 'start': np.asarray(subprobes['start']).take(segtable['start']), 'end': np.asarray(subprobes['end'] ).take(segtable['start']+segtable['size']-1), 'gene': '.', 'log2': segtable['log2'], 'probes': segtable['size'], }) # echo(chromtable) # DBG chrom_tables.append(chromtable) result = pd.concat(chrom_tables) echo("haar: Found", len(result), "segments") segarr = cnarr.as_dataframe(result) segarr.sort_columns() return segarr
def __init__(self, qdir='GM', verbosity=1, filepattern=None): """ qdir ... (opt) 'GM' or 'GK' for symmetry direction of q verbosity ... (opt) 0 silent, 1 minimal output, 3 debug, >3 debug interpolation filepattern.. (opt) read eps2D from files matching filepattern qdir has no effect in this case """ self.qdir = qdir; self.verbosity = verbosity; # read DP-output files containing EPS2D (sorted by momentum transfer) if filepattern is None: self.path = os.path.dirname(os.path.abspath(__file__))+'/data/'+qdir; filepattern = self.path+'/CUTOFF_R12.6_grapheneAA-2d0-HIGH-RPA*-high-%s-q*_outlf.eps'%(self.qdir); self.spectra= dp_mdf.GetMDF(filepattern); self.spectra.sort(key=lambda mdf: np.linalg.norm(mdf.get_q('cc','au'))); # check that we have eps2D assert len(self.spectra)>0 for mdf in self.spectra: assert mdf.param['quantity']=='mdf'; assert (''.join(mdf.param['comment'])).find('eps2D'); # extract data self.eps2D = np.asarray([ mdf.eps for mdf in self.spectra ]); q = [ np.linalg.norm(mdf.get_q('cc','au')) for mdf in self.spectra ]; self.q = np.asarray(q, dtype=float); # in 1/bohr self.E = self.spectra[0].get_E(); # in eV self.calc_param = deepcopy(self.spectra[0].param); self.set_qprecision();
def access_Measurement(lat, long, year): path_ccsm4 = '/Users/DavidKMYang/ClimateResearch/WBGT/ccsm4_tasmax_nepal/' os.chdir(path_ccsm4) file_names_ccsm4 = glob.glob("tasmax_" + str(year)+"*.mat") for i in range(len(file_names_ccsm4)): lat_index = 0 long_index = 0 print (file_names_ccsm4[i]) tempData = scipy.io.loadmat(path_ccsm4 + file_names_ccsm4[i]) tempData = tempData[file_names_ccsm4[i][:-4]][0] tempLatList = [] for k in range(len(tempData[0])): tempLatList.append(tempData[0][k][0]) tempLatList = np.asarray(tempLatList) lat_index = find_nearest(tempLatList, lat) tempLongList = tempData[1][0] tempLongList = np.asarray(tempLongList) long_index = find_nearest(tempLongList, long) print (tempLatList[lat_index]) print (tempLongList[long_index]) print (tempData[2][lat_index][long_index]) access_Measurement(25, 30, 2001)
def test_stratified_shuffle_split_init(): X = np.arange(7) y = np.asarray([0, 1, 1, 1, 2, 2, 2]) # Check that error is raised if there is a class with only one sample assert_raises(ValueError, next, StratifiedShuffleSplit(3, 0.2).split(X, y)) # Check that error is raised if the test set size is smaller than n_classes assert_raises(ValueError, next, StratifiedShuffleSplit(3, 2).split(X, y)) # Check that error is raised if the train set size is smaller than # n_classes assert_raises(ValueError, next, StratifiedShuffleSplit(3, 3, 2).split(X, y)) X = np.arange(9) y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2]) # Check that errors are raised if there is not enough samples assert_raises(ValueError, StratifiedShuffleSplit, 3, 0.5, 0.6) assert_raises(ValueError, next, StratifiedShuffleSplit(3, 8, 0.6).split(X, y)) assert_raises(ValueError, next, StratifiedShuffleSplit(3, 0.6, 8).split(X, y)) # Train size or test size too small assert_raises(ValueError, next, StratifiedShuffleSplit(train_size=2).split(X, y)) assert_raises(ValueError, next, StratifiedShuffleSplit(test_size=2).split(X, y))
def extract_emission_features(sentence_tokens: List[Token], word_index: int, tag: str, feature_indexer: Indexer, words_to_tag_counters, tf_idf_score, add_to_indexer: bool): """ Extracts emission features for tagging the word at word_index with tag. :param sentence_tokens: sentence to extract over :param word_index: word index to consider :param tag: the tag that we're featurizing for :param feature_indexer: Indexer over features :param add_to_indexer: boolean variable indicating whether we should be expanding the indexer or not. This should be True at train time (since we want to learn weights for all features) and False at test time (to avoid creating any features we don't have weights for). :return: an ndarray """ feats = [] # curr_word = lemmatizer.lemmatize(sentence_tokens[word_index].word) curr_word = sentence_tokens[word_index].word # Lexical and POS features on this word, the previous, and the next (Word-1, Word0, Word1) for idx_offset in range(-1, 2): if word_index + idx_offset < 0: active_word = "<s>" elif word_index + idx_offset >= len(sentence_tokens): active_word = "</s>" else: active_word = sentence_tokens[word_index + idx_offset].word if word_index + idx_offset < 0: active_pos = "<S>" elif word_index + idx_offset >= len(sentence_tokens): active_pos = "</S>" else: active_pos = sentence_tokens[word_index + idx_offset].pos maybe_add_feature(feats, feature_indexer, add_to_indexer, tag + ":Word" + repr(idx_offset) + "=" + active_word) maybe_add_feature(feats, feature_indexer, add_to_indexer, tag + ":Pos" + repr(idx_offset) + "=" + active_pos) # Character n-grams of the current word max_ngram_size = 3 for ngram_size in range(1, max_ngram_size + 1): start_ngram = curr_word[0:min(ngram_size, len(curr_word))] maybe_add_feature(feats, feature_indexer, add_to_indexer, tag + ":StartNgram=" + start_ngram) end_ngram = curr_word[max(0, len(curr_word) - ngram_size):] maybe_add_feature(feats, feature_indexer, add_to_indexer, tag + ":EndNgram=" + end_ngram) # Look at a few word shape features maybe_add_feature(feats, feature_indexer, add_to_indexer, tag + ":IsCap=" + repr(curr_word[0].isupper())) # Compute word shape new_word = [] for i in range(0, len(curr_word)): if curr_word[i].isupper(): new_word += "X" elif curr_word[i].islower(): new_word += "x" elif curr_word[i].isdigit(): new_word += "0" else: new_word += "?" maybe_add_feature(feats, feature_indexer, add_to_indexer, tag + ":WordShape=" + repr(new_word)) maybe_add_feature( feats, feature_indexer, add_to_indexer, tag + ":WordCount=" + repr(words_to_tag_counters[curr_word])) if tf_idf_score >= 0.75: maybe_add_feature(feats, feature_indexer, add_to_indexer, tag + ":TF-IDF=" + "1-TFIDF") elif tf_idf_score >= 0.5: maybe_add_feature(feats, feature_indexer, add_to_indexer, tag + ":TF-IDF=" + "0.75-TFIDF") elif tf_idf_score >= 0.25: maybe_add_feature(feats, feature_indexer, add_to_indexer, tag + ":TF-IDF=" + "0.5-TFIDF") else: maybe_add_feature(feats, feature_indexer, add_to_indexer, tag + ":TF-IDF=" + "0.25-TFIDF") return np.asarray(feats, dtype=int)
def get_kmeans_random_batch(model, X, X_noisy, X_adv, dataset, k=10, batch_size=100, pca=False): """ Get the mean distance of each Xi in X_adv to its k nearest neighbors. :param model: :param X: normal images :param X_noisy: noisy images :param X_adv: advserial images :param dataset: 'mnist', 'cifar', 'svhn', has different DNN architectures :param k: the number of nearest neighbours for LID estimation :param batch_size: default 100 :param pca: using pca or not, if True, apply pca to the referenced sample and a minibatch of normal samples, then compute the knn mean distance of the referenced sample. :return: kms_normal: kmean of normal images (num_examples, 1) kms_noisy: kmean of normal images (num_examples, 1) kms_adv: kmean of adv images (num_examples, 1) """ # get deep representations funcs = [K.function([model.layers[0].input, K.learning_phase()], [model.layers[-2].output])] km_dim = len(funcs) print("Number of layers to use: ", km_dim) def estimate(i_batch): start = i_batch * batch_size end = np.minimum(len(X), (i_batch + 1) * batch_size) n_feed = end - start km_batch = np.zeros(shape=(n_feed, km_dim)) km_batch_adv = np.zeros(shape=(n_feed, km_dim)) km_batch_noisy = np.zeros(shape=(n_feed, km_dim)) for i, func in enumerate(funcs): X_act = func([X[start:end], 0])[0] X_act = np.asarray(X_act, dtype=np.float32).reshape((n_feed, -1)) # print("X_act: ", X_act.shape) X_adv_act = func([X_adv[start:end], 0])[0] X_adv_act = np.asarray(X_adv_act, dtype=np.float32).reshape((n_feed, -1)) # print("X_adv_act: ", X_adv_act.shape) X_noisy_act = func([X_noisy[start:end], 0])[0] X_noisy_act = np.asarray(X_noisy_act, dtype=np.float32).reshape((n_feed, -1)) # print("X_noisy_act: ", X_noisy_act.shape) # Maximum likelihood estimation of local intrinsic dimensionality (LID) if pca: km_batch[:, i] = kmean_pca_batch(X_act, X_act, k=k) else: km_batch[:, i] = kmean_batch(X_act, X_act, k=k) # print("lid_batch: ", lid_batch.shape) if pca: km_batch_adv[:, i] = kmean_pca_batch(X_act, X_adv_act, k=k) else: km_batch_adv[:, i] = kmean_batch(X_act, X_adv_act, k=k) # print("lid_batch_adv: ", lid_batch_adv.shape) if pca: km_batch_noisy[:, i] = kmean_pca_batch(X_act, X_noisy_act, k=k) else: km_batch_noisy[:, i] = kmean_batch(X_act, X_noisy_act, k=k) # print("lid_batch_noisy: ", lid_batch_noisy.shape) return km_batch, km_batch_noisy, km_batch_adv kms = [] kms_adv = [] kms_noisy = [] n_batches = int(np.ceil(X.shape[0] / float(batch_size))) for i_batch in tqdm(range(n_batches)): km_batch, km_batch_noisy, km_batch_adv = estimate(i_batch) kms.extend(km_batch) kms_adv.extend(km_batch_adv) kms_noisy.extend(km_batch_noisy) # print("kms: ", kms.shape) # print("kms_adv: ", kms_noisy.shape) # print("kms_noisy: ", kms_noisy.shape) kms = np.asarray(kms, dtype=np.float32) kms_noisy = np.asarray(kms_noisy, dtype=np.float32) kms_adv = np.asarray(kms_adv, dtype=np.float32) return kms, kms_noisy, kms_adv
def get_lids_random_batch(model, X, X_noisy, X_adv, dataset, k=10, batch_size=100): """ Get the local intrinsic dimensionality of each Xi in X_adv estimated by k close neighbours in the random batch it lies in. :param model: :param X: normal images :param X_noisy: noisy images :param X_adv: advserial images :param dataset: 'mnist', 'cifar', 'svhn', has different DNN architectures :param k: the number of nearest neighbours for LID estimation :param batch_size: default 100 :return: lids: LID of normal images of shape (num_examples, lid_dim) lids_adv: LID of advs images of shape (num_examples, lid_dim) """ # get deep representations funcs = [K.function([model.layers[0].input, K.learning_phase()], [out]) for out in get_layer_wise_activations(model, dataset)] lid_dim = len(funcs) print("Number of layers to estimate: ", lid_dim) def estimate(i_batch): start = i_batch * batch_size end = np.minimum(len(X), (i_batch + 1) * batch_size) n_feed = end - start lid_batch = np.zeros(shape=(n_feed, lid_dim)) lid_batch_adv = np.zeros(shape=(n_feed, lid_dim)) lid_batch_noisy = np.zeros(shape=(n_feed, lid_dim)) for i, func in enumerate(funcs): X_act = func([X[start:end], 0])[0] X_act = np.asarray(X_act, dtype=np.float32).reshape((n_feed, -1)) # print("X_act: ", X_act.shape) X_adv_act = func([X_adv[start:end], 0])[0] X_adv_act = np.asarray(X_adv_act, dtype=np.float32).reshape((n_feed, -1)) # print("X_adv_act: ", X_adv_act.shape) X_noisy_act = func([X_noisy[start:end], 0])[0] X_noisy_act = np.asarray(X_noisy_act, dtype=np.float32).reshape((n_feed, -1)) # print("X_noisy_act: ", X_noisy_act.shape) # random clean samples # Maximum likelihood estimation of local intrinsic dimensionality (LID) lid_batch[:, i] = mle_batch(X_act, X_act, k=k) # print("lid_batch: ", lid_batch.shape) lid_batch_adv[:, i] = mle_batch(X_act, X_adv_act, k=k) # print("lid_batch_adv: ", lid_batch_adv.shape) lid_batch_noisy[:, i] = mle_batch(X_act, X_noisy_act, k=k) # print("lid_batch_noisy: ", lid_batch_noisy.shape) return lid_batch, lid_batch_noisy, lid_batch_adv lids = [] lids_adv = [] lids_noisy = [] n_batches = int(np.ceil(X.shape[0] / float(batch_size))) for i_batch in tqdm(range(n_batches)): lid_batch, lid_batch_noisy, lid_batch_adv = estimate(i_batch) lids.extend(lid_batch) lids_adv.extend(lid_batch_adv) lids_noisy.extend(lid_batch_noisy) # print("lids: ", lids.shape) # print("lids_adv: ", lids_noisy.shape) # print("lids_noisy: ", lids_noisy.shape) lids = np.asarray(lids, dtype=np.float32) lids_noisy = np.asarray(lids_noisy, dtype=np.float32) lids_adv = np.asarray(lids_adv, dtype=np.float32) return lids, lids_noisy, lids_adv
def test3(self): #define nodal points and triangles of a small test grid #got this grid from http://matplotlib.org/examples/pylab_examples/triplot_demo.html xy = numpy.asarray([[-0.101, 0.872], [-0.080, 0.883], [-0.069, 0.888], [-0.054, 0.890], [-0.045, 0.897], [-0.057, 0.895], [-0.073, 0.900], [-0.087, 0.898], [-0.090, 0.904], [-0.069, 0.907], [-0.069, 0.921], [-0.080, 0.919], [-0.073, 0.928], [-0.052, 0.930], [-0.048, 0.942], [-0.062, 0.949], [-0.054, 0.958], [-0.069, 0.954], [-0.087, 0.952], [-0.087, 0.959], [-0.080, 0.966], [-0.085, 0.973], [-0.087, 0.965], [-0.097, 0.965], [-0.097, 0.975], [-0.092, 0.984], [-0.101, 0.980], [-0.108, 0.980], [-0.104, 0.987], [-0.102, 0.993], [-0.115, 1.001], [-0.099, 0.996], [-0.101, 1.007], [-0.090, 1.010], [-0.087, 1.021], [-0.069, 1.021], [-0.052, 1.022], [-0.052, 1.017], [-0.069, 1.010], [-0.064, 1.005], [-0.048, 1.005], [-0.031, 1.005], [-0.031, 0.996], [-0.040, 0.987], [-0.045, 0.980], [-0.052, 0.975], [-0.040, 0.973], [-0.026, 0.968], [-0.020, 0.954], [-0.006, 0.947], [0.003, 0.935], [0.006, 0.926], [0.005, 0.921], [0.022, 0.923], [0.033, 0.912], [0.029, 0.905], [0.017, 0.900], [0.012, 0.895], [0.027, 0.893], [0.019, 0.886], [0.001, 0.883], [-0.012, 0.884], [-0.029, 0.883], [-0.038, 0.879], [-0.057, 0.881], [-0.062, 0.876], [-0.078, 0.876], [-0.087, 0.872], [-0.030, 0.907], [-0.007, 0.905], [-0.057, 0.916], [-0.025, 0.933], [-0.077, 0.990], [-0.059, 0.993]]) triangles = numpy.asarray([[67, 66, 1], [65, 2, 66], [1, 66, 2], [64, 2, 65], [63, 3, 64], [60, 59, 57], [2, 64, 3], [3, 63, 4], [0, 67, 1], [62, 4, 63], [57, 59, 56], [59, 58, 56], [61, 60, 69], [57, 69, 60], [4, 62, 68], [6, 5, 9], [61, 68, 62], [69, 68, 61], [9, 5, 70], [6, 8, 7], [4, 70, 5], [8, 6, 9], [56, 69, 57], [69, 56, 52], [70, 10, 9], [54, 53, 55], [56, 55, 53], [68, 70, 4], [52, 56, 53], [11, 10, 12], [69, 71, 68], [68, 13, 70], [10, 70, 13], [51, 50, 52], [13, 68, 71], [52, 71, 69], [12, 10, 13], [71, 52, 50], [71, 14, 13], [50, 49, 71], [49, 48, 71], [14, 16, 15], [14, 71, 48], [17, 19, 18], [17, 20, 19], [48, 16, 14], [48, 47, 16], [47, 46, 16], [16, 46, 45], [23, 22, 24], [21, 24, 22], [17, 16, 45], [20, 17, 45], [21, 25, 24], [27, 26, 28], [20, 72, 21], [25, 21, 72], [45, 72, 20], [25, 28, 26], [44, 73, 45], [72, 45, 73], [28, 25, 29], [29, 25, 31], [43, 73, 44], [73, 43, 40], [72, 73, 39], [72, 31, 25], [42, 40, 43], [31, 30, 29], [39, 73, 40], [42, 41, 40], [72, 33, 31], [32, 31, 33], [39, 38, 72], [33, 72, 38], [33, 38, 34], [37, 35, 38], [34, 38, 35], [35, 37, 36]]) num_elems = len(triangles) elements = UnstructuredGrid(num_elems) elements.n1 = triangles[:, 0] - 1 elements.n2 = triangles[:, 1] - 1 elements.n3 = triangles[:, 2] - 1 nodes = UnstructuredGrid(len(xy)) nodes.lon = (xy[:, 0] | units.rad) nodes.lat = (xy[:, 1] | units.rad) grid = StaggeredGrid(elements, nodes) values = numpy.random.random(num_elems) print values elements.values = values nodes.values = grid.map_elements_to_nodes(values) print nodes.values remapped_values = grid.map_nodes_to_elements(nodes.values) print remapped_values before_sum = values.sum() after_sum = remapped_values.sum() print 'before', before_sum, 'after', after_sum self.assertAlmostEquals( after_sum, before_sum, msg="Sum of values before and after remapping should be the same")
def _symbols_to_ints(symbols, unit_dict): ints = [unit_dict[symbol] for symbol in symbols] return np.asarray(ints, dtype=np.int32)
import matplotlib.image as mpimg import numpy as np from PIL import Image from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis from sklearn.neural_network import MLPClassifier from sklearn.externals import joblib path = 'Dataset' imagePaths = [os.path.join(path, f) for f in os.listdir(path)] print imagePaths faces = [] IDs = [] data = [] for imagePath in imagePaths: if imagePath != 'Dataset/.DS_Store': faceImg = Image.open(imagePath).convert('L') faceNp = np.array(faceImg, 'uint8') faceNp = cv2.resize(faceNp, (40, 40)) dataNp = faceNp.reshape((1600,)) ID = int(os.path.split(imagePath)[-1].split('.')[1]) faces.append(faceNp) IDs.append(ID) data.append(dataNp) final_images = np.asarray(faces) final_data = np.asarray(data) n_features = final_data.shape[1] y = np.asarray(IDs) n_classes = 3 np.save('final_data', final_data) np.save('final_labels', y)
def main(): parser = argparse.ArgumentParser() # filname of image parser.add_argument('-f', action='store', dest='fname', help='Image to detect edges on.', required=True) # user inputs what sigma value they want to use for Gaussian kernel parser.add_argument('-s', action='store', dest='sigma', type=float, help='Sigma value for the Gaussian kernel.', required=True) # low value for hysteresis thresholding parser.add_argument('-L', action='store', dest='low', type=int, help='Lower end of the threshold.', required=True) # high value for hysteresis thresholding parser.add_argument('-H', action='store', dest='high', type=int, help='Higher end of the threshold.', required=True) # thredhold to determine if R score belongs to a corner or not parser.add_argument('-R', action='store', dest='rval', type=int, help='Threshold for the R scores.', required=True) # desired dimensions of the kernel parser.add_argument('-S', action='store', dest='size', type=int, default=5, help='Size of the Gaussian kernel. Default is 5x5 kernel.') args = parser.parse_args() if(args.low >= args.high): raise ValueError("High value threshold must be greater than low value!") if(args.high > 255): raise ValueError("High threshold cannot be greater than 255!") if(args.low < 0): raise ValueError("Low threshold cannot be lower than 0!") img = Image.open(args.fname) # need image to be gray scale for algorithm to work img = np.asarray( ImageOps.grayscale(img) ) # get spatially separated kernel dGx, dGy = gaussian_kernel_d(args.sigma, args.size) # get x and y gradients Ix = convolve(img, dGx) Iy = convolve(img, dGy) # get the R scores for the image R = harris_scores(Ix, Iy, args.sigma) # will colour the spots that are greater than the R threshold as white corn = show_corners(R, img, args.rval) # calclate the magnitudes of the gradient at each pixel dG = np.sqrt(Ix**2 + Iy**2) # this calculates the angle of the gradient at each pixel angles = np.arctan2(Iy, Ix) # we only want the largest value along the gradient to be visible nm = non_max_spr(dG, angles) corn_sup = non_max_spr(corn, angles) # exacts the most important pixels based on the threshold values thresholding(nm, args.low, args.high) edgeImg = Image.fromarray(nm) cornerImg = Image.fromarray(corn_sup) # show edges edgeImg.show() # show corners cornerImg.show()
validation2_labels = validation2[:, 1] validation2 = train_scaler.transform((validation2[:, 0]).reshape(-1,1)) test_labels = test[:, 1] test = train_scaler.transform((test[:, 0]).reshape(-1,1)) # prepare sequence data and labels X_train, y_train = prepare_seq2seq_data(train, look_back, look_ahead) X_validation1, y_validation1 = prepare_seq2seq_data(validation1, look_back, look_ahead) X_validation2, y_validation2 = prepare_seq2seq_data(validation2, look_back, look_ahead) X_validation2_labels, y_validation2_labels = prepare_seq2seq_data(validation2_labels, look_back, look_ahead) X_test, y_test = prepare_seq2seq_data(test, look_back, look_ahead) X_test_labels, y_test_labels = prepare_seq2seq_data(test_labels, look_back, look_ahead) return train_scaler, X_train, y_train, X_validation1, y_validation1, X_validation2, y_validation2, y_validation2_labels, X_test, y_test, y_test_labels if __name__ == "__main__": print 22 arr = np.array([[1,2,3,4,5],[2,3,4,5,6],[3,4,5,6,7],[4,5,6,7,8],[5,6,7,8,9],[6,7,8,9,10],[7.1,8.1,9.1,10,11]]) train_diagonals = get_diagonals(arr) # diagonals contains a reading's values calculated at different points in time # the top left and bottom right predictions do not contain predictions for all timesteps # fill the missing prediction values in diagonals. curenttly using the first predicted value for all missing timesteps for idx, diag in enumerate(train_diagonals): diag = diag.flatten() # missing value filled with the first value train_diagonals[idx] = np.hstack((diag, np.full(5 - len(diag), diag[0]))) train_diagonals = np.asarray(train_diagonals) print train_diagonals
def __call__(self, a): self.variables = (a,) self.back = np.asarray(a > 0, dtype=a.dtype) return a.data * self.back
model.load_state_dict(torch.load(best_path)) data_loader_test = DataLoader(TID_test,batch_size=16,num_workers=2) batch_test_losses = [] model.eval() i=0 mos_idx = torch.arange(10) mos_op_list = [] mos_gt_list = [] mos_idx = mos_idx.to(device) for i,data in enumerate(data_loader_test): images = data['img'].to(device) labels = data['label'].to(device) with torch.no_grad(): outputs = model(images) outputs = outputs.view(-1, 10) test_loss = emd_loss(labels, outputs) mos_op = torch.mean(outputs*mos_idx,1) mos_gt = torch.mean(labels*mos_idx,1) mos_op_list.extend(mos_op.cpu().detach().numpy()) mos_gt_list.extend(mos_gt.cpu().detach().numpy()) batch_test_losses.append((test_loss.item())*(images.size()[0])) avg_test_loss = sum(batch_test_losses)/len(TID_test) print('Test EMD loss is {:.4f}'.format(avg_test_loss)) mos_op_list = np.asarray(mos_op_list) mos_gt_list = np.asarray(mos_gt_list) r,_= scipy.stats.spearmanr(mos_op_list, mos_gt_list, axis=0) num = mos_gt_list.shape[0] upper,lower = srcc_conf_bounds(r,num) print(r) print("SRCC 95 % confidence interval, lower {} upper {}".format(lower, upper))
def check_array_indexer(array: AnyArrayLike, indexer: Any) -> Any: """ Check if `indexer` is a valid array indexer for `array`. For a boolean mask, `array` and `indexer` are checked to have the same length. The dtype is validated, and if it is an integer or boolean ExtensionArray, it is checked if there are missing values present, and it is converted to the appropriate numpy array. Other dtypes will raise an error. Non-array indexers (integer, slice, Ellipsis, tuples, ..) are passed through as is. .. versionadded:: 1.0.0 Parameters ---------- array : array-like The array that is being indexed (only used for the length). indexer : array-like or list-like The array-like that's used to index. List-like input that is not yet a numpy array or an ExtensionArray is converted to one. Other input types are passed through as is. Returns ------- numpy.ndarray The validated indexer as a numpy array that can be used to index. Raises ------ IndexError When the lengths don't match. ValueError When `indexer` cannot be converted to a numpy ndarray to index (e.g. presence of missing values). See Also -------- api.types.is_bool_dtype : Check if `key` is of boolean dtype. Examples -------- When checking a boolean mask, a boolean ndarray is returned when the arguments are all valid. >>> mask = pd.array([True, False]) >>> arr = pd.array([1, 2]) >>> pd.api.indexers.check_array_indexer(arr, mask) array([ True, False]) An IndexError is raised when the lengths don't match. >>> mask = pd.array([True, False, True]) >>> pd.api.indexers.check_array_indexer(arr, mask) Traceback (most recent call last): ... IndexError: Boolean index has wrong length: 3 instead of 2. NA values in a boolean array are treated as False. >>> mask = pd.array([True, pd.NA]) >>> pd.api.indexers.check_array_indexer(arr, mask) array([ True, False]) A numpy boolean mask will get passed through (if the length is correct): >>> mask = np.array([True, False]) >>> pd.api.indexers.check_array_indexer(arr, mask) array([ True, False]) Similarly for integer indexers, an integer ndarray is returned when it is a valid indexer, otherwise an error is (for integer indexers, a matching length is not required): >>> indexer = pd.array([0, 2], dtype="Int64") >>> arr = pd.array([1, 2, 3]) >>> pd.api.indexers.check_array_indexer(arr, indexer) array([0, 2]) >>> indexer = pd.array([0, pd.NA], dtype="Int64") >>> pd.api.indexers.check_array_indexer(arr, indexer) Traceback (most recent call last): ... ValueError: Cannot index with an integer indexer containing NA values For non-integer/boolean dtypes, an appropriate error is raised: >>> indexer = np.array([0., 2.], dtype="float64") >>> pd.api.indexers.check_array_indexer(arr, indexer) Traceback (most recent call last): ... IndexError: arrays used as indices must be of integer or boolean type """ from pandas.core.construction import array as pd_array # whatever is not an array-like is returned as-is (possible valid array # indexers that are not array-like: integer, slice, Ellipsis, None) # In this context, tuples are not considered as array-like, as they have # a specific meaning in indexing (multi-dimensional indexing) if is_list_like(indexer): if isinstance(indexer, tuple): return indexer else: return indexer # convert list-likes to array if not is_array_like(indexer): indexer = pd_array(indexer) if len(indexer) == 0: # empty list is converted to float array by pd.array indexer = np.array([], dtype=np.intp) dtype = indexer.dtype if is_bool_dtype(dtype): if is_extension_array_dtype(dtype): indexer = indexer.to_numpy(dtype=bool, na_value=False) else: indexer = np.asarray(indexer, dtype=bool) # GH26658 if len(indexer) != len(array): raise IndexError( f"Boolean index has wrong length: " f"{len(indexer)} instead of {len(array)}" ) elif is_integer_dtype(dtype): try: indexer = np.asarray(indexer, dtype=np.intp) except ValueError as err: raise ValueError( "Cannot index with an integer indexer containing NA values" ) from err else: raise IndexError("arrays used as indices must be of integer or boolean type") return indexer
def naive_bayes_multiclass(samples_file, outfile, mkplots=False): """Naive Bayes training function for two or more classes from sampled pixel RGB values Inputs: samples_file = Input text file containing sampled pixel RGB values for each training class. The file should be a tab-delimited table with one training class per column. The required first row must contain header labels for each class. The row values for each class must be comma-delimited RGB values. See the file plantcv/tests/data/sampled_rgb_points.txt for an example. outfile = Name of the output text file that will store the color channel probability density functions. mkplots = Make PDF plots (True or False). :param samples_file: str :param outfile: str :param mkplots: bool """ # Initialize a dictionary to store sampled RGB pixel values for each input class sample_points = {} # Open the sampled points text file f = open(samples_file, "r") # Read the first line and use the column headers as class labels header = f.readline() header = header.rstrip("\n") class_list = header.split("\t") # Initialize a dictionary for the red, green, and blue channels for each class for cls in class_list: sample_points[cls] = {"red": [], "green": [], "blue": []} # Loop over the rest of the data in the input file for row in f: # Remove newlines and quotes row = row.rstrip("\n") row = row.replace('"', '') # If this is not a blank line, parse the data if len(row) > 0: # Split the row into a list of points per class points = row.split("\t") # For each point per class for i, point in enumerate(points): if len(point) > 0: # Split the point into red, green, and blue integer values red, green, blue = map(int, point.split(",")) # Append each intensity value into the appropriate class list sample_points[class_list[i]]["red"].append(red) sample_points[class_list[i]]["green"].append(green) sample_points[class_list[i]]["blue"].append(blue) f.close() # Initialize a dictionary to store probability density functions per color channel in HSV colorspace pdfs = {"hue": {}, "saturation": {}, "value": {}} # For each class for cls in class_list: # Create a blue, green, red-formatted image ndarray with the class RGB values bgr_img = cv2.merge((np.asarray(sample_points[cls]["blue"], dtype=np.uint8), np.asarray(sample_points[cls]["green"], dtype=np.uint8), np.asarray(sample_points[cls]["red"], dtype=np.uint8))) # Convert the BGR ndarray to an HSV ndarray hsv_img = cv2.cvtColor(bgr_img, cv2.COLOR_BGR2HSV) # Split the HSV ndarray into the component HSV channels hue, saturation, value = cv2.split(hsv_img) # Create an HSV channel dictionary that stores the channels as lists (horizontally stacked ndarrays) channels = {"hue": np.hstack(hue), "saturation": np.hstack(saturation), "value": np.hstack(value)} # For each channel for channel in channels.keys(): # Create a kernel density estimator for the channel values (Gaussian kernel) kde = stats.gaussian_kde(channels[channel]) # Use the KDE to calculate a probability density function for the channel # Sample at each of the possible 8-bit values pdfs[channel][cls] = kde(range(0, 256)) if mkplots: # If mkplots is True, generate a density curve plot per channel for each class for channel, cls in pdfs.items(): _plot_pdf(channel, os.path.dirname(outfile), **cls) # Write the PDFs to a text file out = open(outfile, "w") # Write the column labels out.write("class\tchannel\t" + "\t".join(map(str, range(0, 256))) + "\n") # For each channel for channel, cls in pdfs.items(): # For each class for class_name, pdf in cls.items(): # Each row is the PDF for the given class and color channel out.write(class_name + "\t" + channel + "\t" + "\t".join(map(str, pdf)) + "\n")
def dsm_gen(dsms): """Generator for DSMs""" for dsm in dsms: yield np.asarray(dsm)
def hotspot2sine(self, element, x_size, y_size, hs_group, posMat, data, thetas): ''' aligns projections to a sine curve based on hotspot information Variables ----------- element: int element index x_size: int ROI pixel dimension in x y_size: int ROI pixel dimension in y hs_group: int hotspot group number posMat: ndarray position matrix. 2 data: ndarray 4D xrf dataset ndarray [elements, theta, y,x] thetas: ndarray sorted projection angle list ''' self.posMat = posMat self.posMat[0] = posMat[0] + x_size//2 self.posMat[1] = posMat[1] + y_size//2 hs_x_pos, hs_y_pos, firstPosOfHotSpot, hotSpotX, hotSpotY, data = self.alignment_parameters(element, x_size, y_size, hs_group, self.posMat, data) #**************** num_projections = data.shape[1] y_shifts = np.zeros(num_projections) x_shifts = np.zeros(num_projections) thetas = np.asarray(thetas) for j in range(num_projections): if hs_x_pos[j] != 0 and hs_y_pos[j] != 0: xxshift = int(round(x_size//2 - hotSpotX[j])) yyshift = int(round(y_size//2 - hotSpotY[j])) if hs_x_pos[j] == 0: xxshift = 0 if hs_y_pos[j] == 0: yyshift = 0 x_shifts[j] = xxshift y_shifts[j] = yyshift hotspotXPos = np.zeros(num_projections, dtype=np.int) hotspotYPos = np.zeros(num_projections, dtype=np.int) for i in range(num_projections): hotspotYPos[i] = int(round(hs_y_pos[i])) hotspotXPos[i] = int(round(hs_x_pos[i])) hotspotProj = np.where(hotspotXPos != 0)[0] theta_tmp = thetas[hotspotProj] com = hotspotXPos[hotspotProj] if hs_group == 0: self.fitCenterOfMass(com, x=theta_tmp) else: self.fitCenterOfMass2(com, self.centers, x=theta_tmp) self.alignCenterOfMass2(hotspotProj, data) ## yfit for i in hotspotProj: y_shifts[i] = int(hotspotYPos[hotspotProj[0]]) - int(hotspotYPos[i]) # data[:, i] = np.roll(data[:, i], y_shifts[i], axis=1) data = self.shiftProjection(data, 0,y_shifts[i],i) #update reconstruction slider value # self.recon.sld.setValue(self.centers[2]) print("align done") self.centers = list(np.round(self.centers)) return data, x_shifts, y_shifts
def deepLPI_train(dataset, dataset_str, lncRNA_len, mRNA_len, lncRNA_struct_len, mRNA_struct_len): train_bags = dataset['train'] test_bags = dataset['test'] train_mRNA_bags = dataset['train_mRNA'] test_mRNA_bags = dataset['test_mRNA'] train_lncRNA_bags = dataset['train_lncRNA'] test_lncRNA_bags = dataset['test_lncRNA'] train_bags_nm = dataset['train_bags_nm'] train_ins_nm = dataset['train_ins_nm'] test_bags_nm = dataset['test_bags_nm'] test_ins_nm = dataset['test_ins_nm'] train_bags_str = dataset_str['train'] test_bags_str = dataset_str['test'] train_mRNA_bags_str = dataset_str['train_mRNA'] test_mRNA_bags_str = dataset_str['test_mRNA'] train_lncRNA_bags_str = dataset_str['train_lncRNA'] test_lncRNA_bags_str = dataset_str['test_lncRNA'] train_bags_nm_str = dataset_str['train_bags_nm'] train_ins_nm_str = dataset_str['train_ins_nm'] test_bags_nm_str = dataset_str['test_bags_nm'] test_ins_nm_str = dataset_str['test_ins_nm'] # convert bag to batch train_mRNA_set = convertToBatch(train_mRNA_bags) test_mRNA_set = convertToBatch(test_mRNA_bags) train_lncRNA_set = convertToBatch(train_lncRNA_bags) test_lncRNA_set = convertToBatch(test_lncRNA_bags) train_set = convertToBatch(train_bags) test_set = convertToBatch(test_bags) dimension = train_set[0][0].shape[0] train_mRNA_set_str = convertToBatch(train_mRNA_bags_str) test_mRNA_set_str = convertToBatch(test_mRNA_bags_str) train_lncRNA_set_str = convertToBatch(train_lncRNA_bags_str) test_lncRNA_set_str = convertToBatch(test_lncRNA_bags_str) train_set_str = convertToBatch(train_bags_str) test_set_str = convertToBatch(test_bags_str) dimension_str = train_set_str[0][0].shape[0] model = model_func(lncRNA_len, mRNA_len, lncRNA_struct_len, mRNA_struct_len) # train model t1 = time.time() num_batch = len(train_set) all_auc=[] all_auprc=[] iso_expr_data_all=get_expr_data("./dataset/isoform_expression_data.txt") lnc_expr_data_all=get_expr_data("./dataset/lncRNA_expression_data.txt") lncRNA_feature_colum=188 #small dataset for epoch in range(args.max_epoch): #Training initial_score_all = np.array([]) crf_bag_index=[] y_all=np.array([]) lnc_expr_data=[] iso_expr_data=[] num_train_batch = len(train_set) train_loss = np.zeros((num_train_batch, 1), dtype=float) train_acc = np.zeros((num_train_batch, 1), dtype=float) for ibatch, batch in enumerate(train_mRNA_set): if train_set[ibatch][0].shape[0]!=train_set_str[ibatch][0].shape[0]: continue y_all=np.hstack((y_all, train_mRNA_set[ibatch][1])) initial_score_all_ = model.predict_on_batch([train_lncRNA_set[ibatch][0], train_mRNA_set[ibatch][0], train_lncRNA_set_str[ibatch][0], train_mRNA_set_str[ibatch][0]]) initial_score_all = np.hstack((initial_score_all, np.transpose(initial_score_all_)[0])) i=0 for i in range(train_mRNA_set[ibatch][0].shape[0]): crf_bag_index.append(ibatch) ibag_name=train_bags_nm[ibatch].encode('ascii','ignore').strip() ibag_name.replace("'","-") if len(ibag_name.split('-'))>2: lncRNA_name=ibag_name.split('-')[0]+'-'+ibag_name.split('-')[1] else: lncRNA_name=ibag_name.split('-')[0] for ins in train_ins_nm[ibatch]: if lncRNA_name in lnc_expr_data_all: lnc_expr_data.append(lnc_expr_data_all[lncRNA_name]) else: lnc_expr_data.append([0] * lncRNA_feature_colum) iso_expr_data.append(iso_expr_data_all[ins.encode('ascii','ignore').strip()])#nicodedata.normalize("NFKD", ins)]) y_all=np.asarray(y_all, dtype=np.int) #WGCNA for isoform expression data iso_expr_data=np.asarray(iso_expr_data) co_exp_net=np.corrcoef(iso_expr_data) # Set nan to be zero nan_where = np.isnan(co_exp_net) co_exp_net[nan_where] = 0 # Diagnal to be zero for ii in range(co_exp_net.shape[0]): co_exp_net[ii, ii] = 0 # Apply soft threshold co_exp_net = np.fabs(co_exp_net) co_exp_net = pow(co_exp_net, 6) co_exp_net_isoform=co_exp_net #WGCNA for lncRNA expression data lnc_expr_data=np.asarray(lnc_expr_data) lnc_co_exp_net=np.corrcoef(lnc_expr_data) # Set nan to be zero lnc_nan_where = np.isnan(lnc_co_exp_net) lnc_co_exp_net[lnc_nan_where] = 0 # Diagnal to be zero for ii in range(lnc_co_exp_net.shape[0]): lnc_co_exp_net[ii, ii] = 0 # Apply soft threshold lnc_co_exp_net = np.fabs(lnc_co_exp_net) lnc_co_exp_net = pow(lnc_co_exp_net, 6) co_exp_net_lncRNA=lnc_co_exp_net crf_bag_index=np.asarray(crf_bag_index) K_training_size=y_all.shape[0] K_testing_size=0 theta = np.array([1.0, 1.0]) new_label, theta, pos_prob_crf, unary_potential, pairwise_potential = run_crf(epoch, initial_score_all, y_all, crf_bag_index, co_exp_net_isoform, co_exp_net_lncRNA, K_training_size, K_testing_size, theta, sigma=0.1) if epoch > 0: s_index=0 updated_train_label=[] for ibatch, batch in enumerate(train_mRNA_set): e_index=s_index+train_lncRNA_set[ibatch][1].shape[0] updated_train_label.append((train_lncRNA_set[ibatch][0], np.asarray(new_label[s_index:e_index]))) s_index=e_index train_lncRNA_set=updated_train_label for ibatch, batch in enumerate(train_mRNA_set): if train_set[ibatch][0].shape[0]!=train_set_str[ibatch][0].shape[0] : continue if train_set[ibatch][0].shape[0]!=train_lncRNA_set[ibatch][1].shape[0]: continue result = model.train_on_batch([train_lncRNA_set[ibatch][0], train_mRNA_set[ibatch][0], train_lncRNA_set_str[ibatch][0], train_mRNA_set_str[ibatch][0]], train_lncRNA_set[ibatch][1]) train_loss[ibatch] = result[0] train_acc[ibatch] = result[1] model, mean_train_loss, mean_train_acc = model, np.mean(train_loss), np.mean(train_acc) return model
import pickle import numpy as np # db = pickle.load(open('bert_fine_tune.p', 'rb')) from utils import Config, safe_pickle_dump, strip_version db = pickle.load(open(Config.db_path, 'rb')) orig = pickle.load(open('elmo_embed.p', 'rb')) # db = pickle.load(open('bert_out.p', 'rb')) # print(len(db)) # X = np.array(list(db.values())) # normalization X = orig / np.linalg.norm(orig, axis=1, keepdims=1) # print(X.shape) pids = list(db.keys()) # B = N ds = -np.asarray(np.dot(X, X.T)) #NxD * DxB => NxB # print(ds[0][0]) IX = np.argsort(ds, axis=0) # NxB # pid = '1407.2515' # pid = '1904.05856' # pid = '1904.07460' # ID = pids.index(pid) # print(IX.shape) ARXIV_PATH = 'https://arxiv.org/abs/' # print(ARXIV_PATH + pids[ID]) # print(orig[ID]) # for i in range(0,6): # # print(IX[ID][i]) # # print(orig[IX[i][ID]]) # # print(1+ds[ID][IX[i][ID]], end=' ') # sim_pid = pids[IX[i][ID]] # print(ARXIV_PATH + sim_pid)
mindata1 = halfmin[str(table.stock1[pair])][32:332].values mindata2 = halfmin[str(table.stock2[pair])][32:332].values # print(mindata1) # print(mindata2) mindata1 = preprocessing.scale(mindata1) mindata2 = preprocessing.scale(mindata2) new_spread[1,106:406] = mindata1 new_spread[2,106:406] = mindata2 whole_day.append(new_spread) whole_day = np.asarray(whole_day) whole_day = torch.FloatTensor(whole_day) output = model(whole_day) if table.shape[0] == 1: _, predicted = torch.max(output,0) else: _, predicted = torch.max(output,1) action_choose = predicted.cpu().numpy() action_choose = action_choose.tolist() table["action"] = pd.Series(action_choose)
matData = scipy.io.loadmat('rat3_all.mat') matrixA = matData['EEGandEMG'] matrixB = matData['labels'] matrixRowSize = matrixA.shape[1] A = matrixA B = matrixB A = A.T B = B.T train_len = 10000 x_train = A[0:train_len,:] y_train = B[0:train_len,:] x_test = A[train_len:matrixRowSize,:] y_test = B[train_len:matrixRowSize,:] x_train = np.asarray(x_train).flatten() y_train = np.asarray(y_train).flatten() x_train = x_train.reshape(train_len, 1, 4000) y_train = y_train.reshape(train_len, 1, 6) x_test = np.asarray(x_test).flatten() x_test = x_test.reshape(matrixRowSize-train_len, 1, 4000) y_test = y_test.reshape(matrixRowSize-train_len, 1, 6) from keras.layers import Dense, Dropout, Activation, Flatten, Reshape, LSTM, regularizers from keras.optimizers import SGD as SGD from keras.optimizers import Adam print('LSTMaspiration2') model = Sequential() model.add(LSTM(6, return_sequences = True, input_shape = (1, 4000), activation = 'softsign')) model.add(Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
name_array = [] mean_array = [] sd_array = [] median_array = [] diff = False scal = False #looking through each directory for dir in directoryname_list: print("First argument is", sys.argv[1]) #change directory to each directory in argument os.chdir(dir) #create a list of all the led fits files we are interested in initial_img_list = np.asarray(glob.glob("*exp_"+exposure_time+"_t00*_940nm_led.*.1.1.fits")) final_img_list = np.asarray(glob.glob("*exp_"+exposure_time+"_t00*_940nm_led.*.1.103.fits")) print initial_img_list print final_img_list name = initial_img_list[0].split('.')[0] initial_list = name+"_initial.lis" final_list = name+"_final.lis" sub_list = name+"_sub_image.lis" gain_sub_list = name+"_gain_sub_image.lis" np.savetxt(initial_list,initial_img_list,fmt='%s') np.savetxt(name+"_final.lis",final_img_list,fmt='%s') os.system("awk '{gsub(/.fits/,\".diff.fits\");print$0}' "+initial_list+" > "+sub_list)
other_outputs = [] with torch.no_grad(): for f in tqdm(listdir(test_dir)): if '.png' in f: img = cv2.imread(path.join(test_dir, f), cv2.IMREAD_COLOR) img2 = cv2.imread(path.join(test_dir2, f), cv2.IMREAD_COLOR) img3 = cv2.imread(path.join(test_dir3, f), cv2.IMREAD_COLOR) img = np.concatenate([img, img2, img3], axis=2) img = cv2.copyMakeBorder(img, 14, 14, 14, 14, cv2.BORDER_REFLECT_101) inp = [] inp.append(img) inp = np.asarray(inp, dtype='float') inp = preprocess_inputs(inp) inp = torch.from_numpy(inp.transpose((0, 3, 1, 2))).float() inp = Variable(inp).cuda() nadir, cat_inp, coord_inp = parse_img_id(f) nadir = torch.from_numpy(np.asarray([nadir / 60.0 ]).copy()).float() cat_inp = torch.from_numpy(cat_inp.copy()[np.newaxis, ...]).float() coord_inp = torch.from_numpy(coord_inp.copy()[np.newaxis, ...]).float()
W = [0] W = W + [np.deg2rad(angle) for angle in range(-180, 180, 5)] # heading/angular velocity # print('actions:', len(W)) stepSize = 0.5 # velocity tau = 0.1 # discrertization U = [0] U = U + [stepSize] * (len(W) - 1) # print('vel:', len(U)) control_in = [] for i in range(len(U)): control_in.append([W[i], U[i]]) control_in = np.asarray( control_in) # convention: the first control action should correspond to "null" action (stay idle) # obstacle_space = [[2, 8], [35, 15], [5, 65], [95, 2]] # points occupied by obstacles ############################################------ Main Algorithm------- ############################# count = 0 replan_step = 10 # how often to replan the trajectory robot_pose = p_init robot_ori = theta_init actual_path = [] future_step = 20 # initiate the ros node for publishing trajectory msg
MAX_NUM_WORDS = 2000 NUM_CATEGORIES = dp.categories_size() # ---------------------- # prepare dataset: # ---------------------- tokenizer = Tokenizer(num_words=MAX_NUM_WORDS) tokenizer.fit_on_texts(dp.fetch_dataset_train().data) word_index = tokenizer.word_index # integer encode documents X_train = tokenizer.texts_to_matrix(dp.fetch_dataset_train().data, mode='tfidf') X_test = tokenizer.texts_to_matrix(dp.fetch_dataset_test().data, mode='tfidf') y_train = dp.fetch_dataset_train().target y_train = to_categorical(np.asarray(y_train)) y_test = dp.fetch_dataset_test().target y_test = to_categorical(np.asarray(y_test)) keras_model = Sequential() keras_model.add(Embedding(MAX_NUM_WORDS, 512, input_length=MAX_NUM_WORDS)) keras_model.add(Dense(64, activation='relu')) keras_model.add(Flatten()) keras_model.add(Dense(NUM_CATEGORIES, activation='softmax')) keras_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(keras_model.summary()) keras_model.fit(X_train, y_train, epochs=10, batch_size=128) loss, accuracy = keras_model.evaluate(X_test, y_test, batch_size=128)
img = cv2.resize(img, dsize=None, fx=f, fy=f) # 同比例缩放 data = np.zeros([max_size, max_size, 3]) w_new, h_new, c = img.shape s_x = int((max_size - w_new) / 2) s_y = int((max_size - h_new) / 2) data[s_x:s_x + w_new, s_y:s_y + h_new, :] = img return data if __name__== "__main__": labels = {'NILM': 0, 'HSIL': 1} img = cv2.imread('./2017-07-24-16_51_57_0.png') img = size_process(img, 224) data = img[:, :, ::-1] data = np.asarray(data, dtype=np.float32) data = data_process(data) data = np.expand_dims(data, axis=0) fr = open('./checkpoint_dir/weights.pkl', 'rb') weights = pickle.load(fr) with tf.Session() as sess: model = ResNet50_fpn(sess=sess, inputs=(None, 224, 224, 3), blocks=[3, 4, 6, 3], weights=weights) init = tf.global_variables_initializer() sess.run(init) print(model.var_list) saver = tf.train.Saver(var_list=model.var_list) save_path = saver.save(sess, "./checkpoint_dir/MyModel") output = model.predict(data) print(output)
def find_nearest(centers, point): centers = np.asarray(centers) # for center in centers: idx = np.array((centers - point) ** 2).sum(axis=1).argmin() return centers[idx]
def sampleZ(self, NUMBER_OF_MCMC_SAMPLES_TOTAL): invEst = numpy.linalg.inv(self.X.transpose() @ self.X + 1.0 * numpy.eye(self.p)) ridgeBetaEst = (invEst @ self.X.transpose()) @ self.y z = numpy.zeros(self.p, dtype=numpy.int) z[numpy.absolute(ridgeBetaEst) > self.delta] = 1 beta = ridgeBetaEst # get a sparse initial solution in order to ensure faster convergence maxNrInitialSelectedVars = int(self.p * 0.01) if maxNrInitialSelectedVars > 0 and numpy.sum( z) > maxNrInitialSelectedVars: largestIds = numpy.argsort( -numpy.absolute(ridgeBetaEst))[0:maxNrInitialSelectedVars] z = numpy.zeros(self.p, dtype=numpy.int) z[largestIds] = 1 beta[z == 0] = 0 sigmaSquareR = numpy.mean(numpy.square(self.y - self.X @ beta)) print("beta = ") print(beta) print("sigmaSquareR = ") print(sigmaSquareR) print("z = ") print(z) BURN_IN_SAMPLES = int(0.1 * NUMBER_OF_MCMC_SAMPLES_TOTAL) assert (BURN_IN_SAMPLES >= 1) NUMBER_OF_MCMC_SAMPLES_USED = NUMBER_OF_MCMC_SAMPLES_TOTAL - BURN_IN_SAMPLES print("BURN_IN_SAMPLES = ", BURN_IN_SAMPLES) print("NUMBER_OF_MCMC_SAMPLES_USED = ", NUMBER_OF_MCMC_SAMPLES_USED) posteriorAssignments = numpy.zeros( (NUMBER_OF_MCMC_SAMPLES_USED, self.p)) averagePosteriorBeta = numpy.zeros(self.p) averageSigmaSquareR = 0.0 spikeAndSlabVar = numpy.asarray([self.sigmaSquare0, self.etaSquare1]) print("spikeAndSlabVar = ", spikeAndSlabVar) for mcmcIt in range(NUMBER_OF_MCMC_SAMPLES_TOTAL): print("mcmcIt = ", mcmcIt) # if self.delta > 0: for j in range(self.p): # sample p(z_j | beta, z_-j, y, sigmaSquareR, X) z[j] = self.sampleZjConditionedOnRest(sigmaSquareR, spikeAndSlabVar, beta, z, j) # sample p(beta_j | beta_-j, z, y, sigmaSquareR, X) meanTilde, sigmaSquareTilde, _ = self.getMeanAndVarOfBetaConditional( sigmaSquareR, spikeAndSlabVar, beta, z, j) beta[j] = scipy.stats.norm.rvs( loc=meanTilde, scale=numpy.sqrt(sigmaSquareTilde)) if self.delta == 0: # safety check for delta == 0 assert (numpy.all(beta[z == 0] == 0) and numpy.all(beta[z == 1] != 0)) # sample p(sigmaSquareR | beta, z, y, X) etaSquareForsigmaSquareR = ( SpikeAndSlabProposedModelSearch.NU_R * SpikeAndSlabProposedModelSearch.ETA_SQUARE_R + numpy.sum(numpy.square(self.y - numpy.matmul(self.X, beta))) ) / (SpikeAndSlabProposedModelSearch.NU_R + self.n) sigmaSquareR = samplingHelper.getScaledInvChiSquareSample( nu=SpikeAndSlabProposedModelSearch.NU_R + self.n, etaSquare=etaSquareForsigmaSquareR, numberOfSamples=1)[0] # sample p(sigmaSquare_0 | beta, z, y, X) and p(sigmaSquare_1 | beta, z, y, X) spikeAndSlabVar[1] = self.sampleSigmaSquareConditional(beta, z) print("slab variance = ", spikeAndSlabVar[1]) if mcmcIt >= BURN_IN_SAMPLES: posteriorAssignments[mcmcIt - BURN_IN_SAMPLES] = z averagePosteriorBeta += beta averageSigmaSquareR += sigmaSquareR averagePosteriorBeta = averagePosteriorBeta / float( NUMBER_OF_MCMC_SAMPLES_USED) averageSigmaSquareR = averageSigmaSquareR / float( NUMBER_OF_MCMC_SAMPLES_USED) # print("posteriorAssignments = ") # print(posteriorAssignments) # print("averagePosteriorBeta = ") # print(averagePosteriorBeta) countAssignments = defaultdict(lambda: 0) for mcmcIt in range(NUMBER_OF_MCMC_SAMPLES_USED): nonZeroPos = numpy.where(posteriorAssignments[mcmcIt] != 0)[0] nonZeroPosAsStr = [str(num) for num in nonZeroPos] nonZeroPosAsStr = " ".join(nonZeroPosAsStr) countAssignments[nonZeroPosAsStr] += 1 sortedAssignmentsByFrequency = sorted(countAssignments.items(), key=lambda kv: kv[1], reverse=True) print("sortedAssignmentsByFrequency = ") print(sortedAssignmentsByFrequency) mostFrequentAssignment = showResultsText.getNumpyArray( sortedAssignmentsByFrequency[0][0]) # print("mostFrequentAssignment = ", mostFrequentAssignment) # see "Optimal predictive model selection", 2004 assignmentProbs = numpy.mean(posteriorAssignments, axis=0) medianProbabilityModel = numpy.where(assignmentProbs > 0.5)[0] # print("assignmentProbs = ", assignmentProbs) # print("medianProbabilityModel = ", medianProbabilityModel) return mostFrequentAssignment, medianProbabilityModel, assignmentProbs, averagePosteriorBeta, averageSigmaSquareR, sortedAssignmentsByFrequency
def main(): save_plots = False ### Set parameters ### nugget = 1.e-10 all_n_clusters = [1,2] corr_kernel = 'exponential_periodic' GCP_mapWithNoise= False sampling_model = 'GCP' integratedPrediction = False coef_latent_mapping = 0.1 prediction_size = 1000 ### Set parameters ### parameter_bounds = np.asarray( [[0,400]] ) training_size = 40 if (save_plots): if not os.path.exists('data_UCB'): os.mkdir('data_UCB') abs = np.atleast_2d(range(0,400)).T f_plot = [scoring_function(i) for i in abs[:,0]] x_training = [] y_training = [] for i in range(training_size): x = np.random.uniform(0,400) x_training.append(x) y_training.append(scoring_function(x)) x_training = np.atleast_2d(x_training).T candidates = [] real_y = [] for i in range(prediction_size): x = [np.random.uniform(0,400)] candidates.append(x) real_y.append(scoring_function(x[0])) real_y = np.asarray(real_y) candidates = np.asarray(candidates) count = -1 fig = plt.figure() for n_clusters in all_n_clusters: count += 2 ax = fig.add_subplot(len(all_n_clusters),2,count) ax.set_title("GCP prediction") gcp = GaussianCopulaProcess(nugget = nugget, corr = corr_kernel, random_start = 5, n_clusters = n_clusters, coef_latent_mapping = coef_latent_mapping, mapWithNoise = GCP_mapWithNoise, useAllNoisyY = False, model_noise = None, try_optimize = True) gcp.fit(x_training,y_training) print '\nGCP fitted' print 'Likelihood', np.exp(gcp.reduced_likelihood_function_value_) predictions,MSE,boundL,boundU = \ gcp.predict(candidates, eval_MSE=True, eval_confidence_bounds=True, coef_bound = 1.96, integratedPrediction=integratedPrediction) pred_error = np.mean( (predictions - np.asarray(real_y) ) **2. ) print 'SMSE', pred_error / (np.std(real_y) **2.) idx = np.argsort(candidates[:,0]) s_candidates = candidates[idx,0] s_boundL = boundL[idx] s_boundU = boundU[idx] pred,MSE_bis = gcp.predict(np.atleast_2d(s_candidates).T, eval_MSE=True, transformY=False, eval_confidence_bounds=False, coef_bound = 1.96) gp_boundL = pred - 1.96*np.sqrt(MSE_bis) gp_boundU = pred + 1.96*np.sqrt(MSE_bis) t_f_plot = [gcp.mapping(abs[i],f_plot[i],normalize=True) for i in range(len(f_plot))] t_y_training = [gcp.mapping(x_training[i],y_training[i],normalize=True) for i in range(len(y_training))] if(save_plots): save_data = np.asarray([s_candidates,boundL,boundU,predictions,f_plot]).T np.savetxt('data_UCB/data_plot.csv',save_data,delimiter=',') ax.plot(abs,f_plot) l1, = ax.plot(candidates,predictions,'r+',label='GCP predictions') l3, = ax.plot(x_training,y_training,'bo',label='Training points') ax.fill(np.concatenate([s_candidates,s_candidates[::-1]]),np.concatenate([s_boundL,s_boundU[::-1]]),alpha=.5, fc='c', ec='None') ax = fig.add_subplot(len(all_n_clusters),2,count+1) ax.set_title('GP space') ax.plot(abs,t_f_plot) ax.plot(s_candidates,pred,'r+',label='GCP predictions') ax.plot(x_training,t_y_training,'bo',label='Training points') ax.fill(np.concatenate([s_candidates,s_candidates[::-1]]),np.concatenate([gp_boundL,gp_boundU[::-1]]),alpha=.5, fc='c', ec='None') if(save_plots): t_save_data = np.asarray([s_candidates,gp_boundL,gp_boundU,pred,np.asarray(t_f_plot)[:,0]]).T np.savetxt('data_UCB/gpspace_data_plot.csv',t_save_data,delimiter=',') training_points = np.asarray([x_training[:,0],y_training,np.asarray(t_y_training)[:,0]]).T np.savetxt('data_UCB/train_data_plot.csv',training_points,delimiter=',') plt.legend() plt.show()
class TestiLocBaseIndependent: """Tests Independent Of Base Class""" @pytest.mark.parametrize( "key", [ slice(None), slice(3), range(3), [0, 1, 2], Index(range(3)), np.asarray([0, 1, 2]), ], ) @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager): frame = DataFrame({0: range(3)}, dtype=object) cat = Categorical(["alpha", "beta", "gamma"]) if not using_array_manager: assert frame._mgr.blocks[0]._can_hold_element(cat) df = frame.copy() orig_vals = df.values indexer(df)[key, 0] = cat overwrite = isinstance(key, slice) and key == slice(None) if overwrite or using_array_manager: # TODO(ArrayManager) we always overwrite because ArrayManager takes # the "split" path, which still overwrites # TODO: GH#39986 this probably shouldn't behave differently expected = DataFrame({0: cat}) assert not np.shares_memory(df.values, orig_vals) else: expected = DataFrame({0: cat}).astype(object) if not using_array_manager: assert np.shares_memory(df[0].values, orig_vals) tm.assert_frame_equal(df, expected) # check we dont have a view on cat (may be undesired GH#39986) df.iloc[0, 0] = "gamma" if overwrite: assert cat[0] != "gamma" else: assert cat[0] != "gamma" # TODO with mixed dataframe ("split" path), we always overwrite the column frame = DataFrame({0: np.array([0, 1, 2], dtype=object), 1: range(3)}) df = frame.copy() orig_vals = df.values indexer(df)[key, 0] = cat expected = DataFrame({0: cat, 1: range(3)}) tm.assert_frame_equal(df, expected) # TODO(ArrayManager) does not yet update parent @td.skip_array_manager_not_yet_implemented @pytest.mark.parametrize("box", [array, Series]) def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager): # GH#38952 Case with not setting a full column # IntegerArray without NAs arr = array([1, 2, 3, 4]) obj = frame_or_series(arr.to_numpy("i8")) if frame_or_series is Series or not using_array_manager: values = obj.values else: values = obj[0].values obj.iloc[:2] = box(arr[2:]) expected = frame_or_series(np.array([3, 4, 3, 4], dtype="i8")) tm.assert_equal(obj, expected) # Check that we are actually in-place if frame_or_series is Series: assert obj.values is values else: if using_array_manager: assert obj[0].values is values else: assert obj.values.base is values.base and values.base is not None def test_is_scalar_access(self): # GH#32085 index with duplicates doesn't matter for _is_scalar_access index = Index([1, 2, 1]) ser = Series(range(3), index=index) assert ser.iloc._is_scalar_access((1,)) df = ser.to_frame() assert df.iloc._is_scalar_access((1, 0)) def test_iloc_exceeds_bounds(self): # GH6296 # iloc should allow indexers that exceed the bounds df = DataFrame(np.random.random_sample((20, 5)), columns=list("ABCDE")) # lists of positions should raise IndexError! msg = "positional indexers are out-of-bounds" with pytest.raises(IndexError, match=msg): df.iloc[:, [0, 1, 2, 3, 4, 5]] with pytest.raises(IndexError, match=msg): df.iloc[[1, 30]] with pytest.raises(IndexError, match=msg): df.iloc[[1, -30]] with pytest.raises(IndexError, match=msg): df.iloc[[100]] s = df["A"] with pytest.raises(IndexError, match=msg): s.iloc[[100]] with pytest.raises(IndexError, match=msg): s.iloc[[-100]] # still raise on a single indexer msg = "single positional indexer is out-of-bounds" with pytest.raises(IndexError, match=msg): df.iloc[30] with pytest.raises(IndexError, match=msg): df.iloc[-30] # GH10779 # single positive/negative indexer exceeding Series bounds should raise # an IndexError with pytest.raises(IndexError, match=msg): s.iloc[30] with pytest.raises(IndexError, match=msg): s.iloc[-30] # slices are ok result = df.iloc[:, 4:10] # 0 < start < len < stop expected = df.iloc[:, 4:] tm.assert_frame_equal(result, expected) result = df.iloc[:, -4:-10] # stop < 0 < start < len expected = df.iloc[:, :0] tm.assert_frame_equal(result, expected) result = df.iloc[:, 10:4:-1] # 0 < stop < len < start (down) expected = df.iloc[:, :4:-1] tm.assert_frame_equal(result, expected) result = df.iloc[:, 4:-10:-1] # stop < 0 < start < len (down) expected = df.iloc[:, 4::-1] tm.assert_frame_equal(result, expected) result = df.iloc[:, -10:4] # start < 0 < stop < len expected = df.iloc[:, :4] tm.assert_frame_equal(result, expected) result = df.iloc[:, 10:4] # 0 < stop < len < start expected = df.iloc[:, :0] tm.assert_frame_equal(result, expected) result = df.iloc[:, -10:-11:-1] # stop < start < 0 < len (down) expected = df.iloc[:, :0] tm.assert_frame_equal(result, expected) result = df.iloc[:, 10:11] # 0 < len < start < stop expected = df.iloc[:, :0] tm.assert_frame_equal(result, expected) # slice bounds exceeding is ok result = s.iloc[18:30] expected = s.iloc[18:] tm.assert_series_equal(result, expected) result = s.iloc[30:] expected = s.iloc[:0] tm.assert_series_equal(result, expected) result = s.iloc[30::-1] expected = s.iloc[::-1] tm.assert_series_equal(result, expected) # doc example def check(result, expected): str(result) result.dtypes tm.assert_frame_equal(result, expected) dfl = DataFrame(np.random.randn(5, 2), columns=list("AB")) check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index)) check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]]) check(dfl.iloc[4:6], dfl.iloc[[4]]) msg = "positional indexers are out-of-bounds" with pytest.raises(IndexError, match=msg): dfl.iloc[[4, 5, 6]] msg = "single positional indexer is out-of-bounds" with pytest.raises(IndexError, match=msg): dfl.iloc[:, 4] @pytest.mark.parametrize("index,columns", [(np.arange(20), list("ABCDE"))]) @pytest.mark.parametrize( "index_vals,column_vals", [ ([slice(None), ["A", "D"]]), (["1", "2"], slice(None)), ([datetime(2019, 1, 1)], slice(None)), ], ) def test_iloc_non_integer_raises(self, index, columns, index_vals, column_vals): # GH 25753 df = DataFrame( np.random.randn(len(index), len(columns)), index=index, columns=columns ) msg = ".iloc requires numeric indexers, got" with pytest.raises(IndexError, match=msg): df.iloc[index_vals, column_vals] @pytest.mark.parametrize("dims", [1, 2]) def test_iloc_getitem_invalid_scalar(self, dims): # GH 21982 if dims == 1: s = Series(np.arange(10)) else: s = DataFrame(np.arange(100).reshape(10, 10)) with pytest.raises(TypeError, match="Cannot index by location index"): s.iloc["a"] def test_iloc_array_not_mutating_negative_indices(self): # GH 21867 array_with_neg_numbers = np.array([1, 2, -1]) array_copy = array_with_neg_numbers.copy() df = DataFrame( {"A": [100, 101, 102], "B": [103, 104, 105], "C": [106, 107, 108]}, index=[1, 2, 3], ) df.iloc[array_with_neg_numbers] tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy) df.iloc[:, array_with_neg_numbers] tm.assert_numpy_array_equal(array_with_neg_numbers, array_copy) def test_iloc_getitem_neg_int_can_reach_first_index(self): # GH10547 and GH10779 # negative integers should be able to reach index 0 df = DataFrame({"A": [2, 3, 5], "B": [7, 11, 13]}) s = df["A"] expected = df.iloc[0] result = df.iloc[-3] tm.assert_series_equal(result, expected) expected = df.iloc[[0]] result = df.iloc[[-3]] tm.assert_frame_equal(result, expected) expected = s.iloc[0] result = s.iloc[-3] assert result == expected expected = s.iloc[[0]] result = s.iloc[[-3]] tm.assert_series_equal(result, expected) # check the length 1 Series case highlighted in GH10547 expected = Series(["a"], index=["A"]) result = expected.iloc[[-1]] tm.assert_series_equal(result, expected) def test_iloc_getitem_dups(self): # GH 6766 df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) df = concat([df1, df2], axis=1) # cross-sectional indexing result = df.iloc[0, 0] assert isna(result) result = df.iloc[0, :] expected = Series([np.nan, 1, 3, 3], index=["A", "B", "A", "B"], name=0) tm.assert_series_equal(result, expected) def test_iloc_getitem_array(self): df = DataFrame( [ {"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}, {"A": 1000, "B": 2000, "C": 3000}, ] ) expected = DataFrame([{"A": 1, "B": 2, "C": 3}]) tm.assert_frame_equal(df.iloc[[0]], expected) expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}]) tm.assert_frame_equal(df.iloc[[0, 1]], expected) expected = DataFrame([{"B": 2, "C": 3}, {"B": 2000, "C": 3000}], index=[0, 2]) result = df.iloc[[0, 2], [1, 2]] tm.assert_frame_equal(result, expected) def test_iloc_getitem_bool(self): df = DataFrame( [ {"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}, {"A": 1000, "B": 2000, "C": 3000}, ] ) expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}]) result = df.iloc[[True, True, False]] tm.assert_frame_equal(result, expected) expected = DataFrame( [{"A": 1, "B": 2, "C": 3}, {"A": 1000, "B": 2000, "C": 3000}], index=[0, 2] ) result = df.iloc[lambda x: x.index % 2 == 0] tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("index", [[True, False], [True, False, True, False]]) def test_iloc_getitem_bool_diff_len(self, index): # GH26658 s = Series([1, 2, 3]) msg = f"Boolean index has wrong length: {len(index)} instead of {len(s)}" with pytest.raises(IndexError, match=msg): s.iloc[index] def test_iloc_getitem_slice(self): df = DataFrame( [ {"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}, {"A": 1000, "B": 2000, "C": 3000}, ] ) expected = DataFrame([{"A": 1, "B": 2, "C": 3}, {"A": 100, "B": 200, "C": 300}]) result = df.iloc[:2] tm.assert_frame_equal(result, expected) expected = DataFrame([{"A": 100, "B": 200}], index=[1]) result = df.iloc[1:2, 0:2] tm.assert_frame_equal(result, expected) expected = DataFrame( [{"A": 1, "C": 3}, {"A": 100, "C": 300}, {"A": 1000, "C": 3000}] ) result = df.iloc[:, lambda df: [0, 2]] tm.assert_frame_equal(result, expected) def test_iloc_getitem_slice_dups(self): df1 = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]) df2 = DataFrame( np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"] ) # axis=1 df = concat([df1, df2], axis=1) tm.assert_frame_equal(df.iloc[:, :4], df1) tm.assert_frame_equal(df.iloc[:, 4:], df2) df = concat([df2, df1], axis=1) tm.assert_frame_equal(df.iloc[:, :2], df2) tm.assert_frame_equal(df.iloc[:, 2:], df1) exp = concat([df2, df1.iloc[:, [0]]], axis=1) tm.assert_frame_equal(df.iloc[:, 0:3], exp) # axis=0 df = concat([df, df], axis=0) tm.assert_frame_equal(df.iloc[0:10, :2], df2) tm.assert_frame_equal(df.iloc[0:10, 2:], df1) tm.assert_frame_equal(df.iloc[10:, :2], df2) tm.assert_frame_equal(df.iloc[10:, 2:], df1) def test_iloc_setitem(self): df = DataFrame( np.random.randn(4, 4), index=np.arange(0, 8, 2), columns=np.arange(0, 12, 3) ) df.iloc[1, 1] = 1 result = df.iloc[1, 1] assert result == 1 df.iloc[:, 2:3] = 0 expected = df.iloc[:, 2:3] result = df.iloc[:, 2:3] tm.assert_frame_equal(result, expected) # GH5771 s = Series(0, index=[4, 5, 6]) s.iloc[1:2] += 1 expected = Series([0, 1, 0], index=[4, 5, 6]) tm.assert_series_equal(s, expected) def test_iloc_setitem_list(self): # setitem with an iloc list df = DataFrame( np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"] ) df.iloc[[0, 1], [1, 2]] df.iloc[[0, 1], [1, 2]] += 100 expected = DataFrame( np.array([0, 101, 102, 3, 104, 105, 6, 7, 8]).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"], ) tm.assert_frame_equal(df, expected) def test_iloc_setitem_pandas_object(self): # GH 17193 s_orig = Series([0, 1, 2, 3]) expected = Series([0, -1, -2, 3]) s = s_orig.copy() s.iloc[Series([1, 2])] = [-1, -2] tm.assert_series_equal(s, expected) s = s_orig.copy() s.iloc[Index([1, 2])] = [-1, -2] tm.assert_series_equal(s, expected) def test_iloc_setitem_dups(self): # GH 6766 # iloc with a mask aligning from another iloc df1 = DataFrame([{"A": None, "B": 1}, {"A": 2, "B": 2}]) df2 = DataFrame([{"A": 3, "B": 3}, {"A": 4, "B": 4}]) df = concat([df1, df2], axis=1) expected = df.fillna(3) inds = np.isnan(df.iloc[:, 0]) mask = inds[inds].index df.iloc[mask, 0] = df.iloc[mask, 2] tm.assert_frame_equal(df, expected) # del a dup column across blocks expected = DataFrame({0: [1, 2], 1: [3, 4]}) expected.columns = ["B", "B"] del df["A"] tm.assert_frame_equal(df, expected) # assign back to self df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] tm.assert_frame_equal(df, expected) # reversed x 2 df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) tm.assert_frame_equal(df, expected) def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( self, using_array_manager ): # Same as the "assign back to self" check in test_iloc_setitem_dups # but on a DataFrame with multiple blocks df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"]) df.iloc[:, 0] = df.iloc[:, 0].astype("f8") if not using_array_manager: assert len(df._mgr.blocks) == 2 expected = df.copy() # assign back to self df.iloc[[0, 1], [0, 1]] = df.iloc[[0, 1], [0, 1]] tm.assert_frame_equal(df, expected) # TODO: GH#27620 this test used to compare iloc against ix; check if this # is redundant with another test comparing iloc against loc def test_iloc_getitem_frame(self): df = DataFrame( np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0, 8, 2) ) result = df.iloc[2] exp = df.loc[4] tm.assert_series_equal(result, exp) result = df.iloc[2, 2] exp = df.loc[4, 4] assert result == exp # slice result = df.iloc[4:8] expected = df.loc[8:14] tm.assert_frame_equal(result, expected) result = df.iloc[:, 2:3] expected = df.loc[:, 4:5] tm.assert_frame_equal(result, expected) # list of integers result = df.iloc[[0, 1, 3]] expected = df.loc[[0, 2, 6]] tm.assert_frame_equal(result, expected) result = df.iloc[[0, 1, 3], [0, 1]] expected = df.loc[[0, 2, 6], [0, 2]] tm.assert_frame_equal(result, expected) # neg indices result = df.iloc[[-1, 1, 3], [-1, 1]] expected = df.loc[[18, 2, 6], [6, 2]] tm.assert_frame_equal(result, expected) # dups indices result = df.iloc[[-1, -1, 1, 3], [-1, 1]] expected = df.loc[[18, 18, 2, 6], [6, 2]] tm.assert_frame_equal(result, expected) # with index-like s = Series(index=range(1, 5), dtype=object) result = df.iloc[s.index] expected = df.loc[[2, 4, 6, 8]] tm.assert_frame_equal(result, expected) def test_iloc_getitem_labelled_frame(self): # try with labelled frame df = DataFrame( np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") ) result = df.iloc[1, 1] exp = df.loc["b", "B"] assert result == exp result = df.iloc[:, 2:3] expected = df.loc[:, ["C"]] tm.assert_frame_equal(result, expected) # negative indexing result = df.iloc[-1, -1] exp = df.loc["j", "D"] assert result == exp # out-of-bounds exception msg = "single positional indexer is out-of-bounds" with pytest.raises(IndexError, match=msg): df.iloc[10, 5] # trying to use a label msg = ( r"Location based indexing can only have \[integer, integer " r"slice \(START point is INCLUDED, END point is EXCLUDED\), " r"listlike of integers, boolean array\] types" ) with pytest.raises(ValueError, match=msg): df.iloc["j", "D"] def test_iloc_getitem_doc_issue(self, using_array_manager): # multi axis slicing issue with single block # surfaced in GH 6059 arr = np.random.randn(6, 4) index = date_range("20130101", periods=6) columns = list("ABCD") df = DataFrame(arr, index=index, columns=columns) # defines ref_locs df.describe() result = df.iloc[3:5, 0:2] str(result) result.dtypes expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=columns[0:2]) tm.assert_frame_equal(result, expected) # for dups df.columns = list("aaaa") result = df.iloc[3:5, 0:2] str(result) result.dtypes expected = DataFrame(arr[3:5, 0:2], index=index[3:5], columns=list("aa")) tm.assert_frame_equal(result, expected) # related arr = np.random.randn(6, 4) index = list(range(0, 12, 2)) columns = list(range(0, 8, 2)) df = DataFrame(arr, index=index, columns=columns) if not using_array_manager: df._mgr.blocks[0].mgr_locs result = df.iloc[1:5, 2:4] str(result) result.dtypes expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4]) tm.assert_frame_equal(result, expected) def test_iloc_setitem_series(self): df = DataFrame( np.random.randn(10, 4), index=list("abcdefghij"), columns=list("ABCD") ) df.iloc[1, 1] = 1 result = df.iloc[1, 1] assert result == 1 df.iloc[:, 2:3] = 0 expected = df.iloc[:, 2:3] result = df.iloc[:, 2:3] tm.assert_frame_equal(result, expected) s = Series(np.random.randn(10), index=range(0, 20, 2)) s.iloc[1] = 1 result = s.iloc[1] assert result == 1 s.iloc[:4] = 0 expected = s.iloc[:4] result = s.iloc[:4] tm.assert_series_equal(result, expected) s = Series([-1] * 6) s.iloc[0::2] = [0, 2, 4] s.iloc[1::2] = [1, 3, 5] result = s expected = Series([0, 1, 2, 3, 4, 5]) tm.assert_series_equal(result, expected) def test_iloc_setitem_list_of_lists(self): # GH 7551 # list-of-list is set incorrectly in mixed vs. single dtyped frames df = DataFrame( {"A": np.arange(5, dtype="int64"), "B": np.arange(5, 10, dtype="int64")} ) df.iloc[2:4] = [[10, 11], [12, 13]] expected = DataFrame({"A": [0, 1, 10, 12, 4], "B": [5, 6, 11, 13, 9]}) tm.assert_frame_equal(df, expected) df = DataFrame( {"A": ["a", "b", "c", "d", "e"], "B": np.arange(5, 10, dtype="int64")} ) df.iloc[2:4] = [["x", 11], ["y", 13]] expected = DataFrame({"A": ["a", "b", "x", "y", "e"], "B": [5, 6, 11, 13, 9]}) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])]) @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) def test_iloc_setitem_with_scalar_index(self, indexer, value): # GH #19474 # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated # elementwisely, not using "setter('A', ['Z'])". df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) df.iloc[0, indexer] = value result = df.iloc[0, 0] assert is_scalar(result) and result == "Z" def test_iloc_mask(self): # GH 3631, iloc with a mask (of a series) should raise df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"]) mask = df.a % 2 == 0 msg = "iLocation based boolean indexing cannot use an indexable as a mask" with pytest.raises(ValueError, match=msg): df.iloc[mask] mask.index = range(len(mask)) msg = "iLocation based boolean indexing on an integer type is not available" with pytest.raises(NotImplementedError, match=msg): df.iloc[mask] # ndarray ok result = df.iloc[np.array([True] * len(mask), dtype=bool)] tm.assert_frame_equal(result, df) # the possibilities locs = np.arange(4) nums = 2 ** locs reps = [bin(num) for num in nums] df = DataFrame({"locs": locs, "nums": nums}, reps) expected = { (None, ""): "0b1100", (None, ".loc"): "0b1100", (None, ".iloc"): "0b1100", ("index", ""): "0b11", ("index", ".loc"): "0b11", ("index", ".iloc"): ( "iLocation based boolean indexing cannot use an indexable as a mask" ), ("locs", ""): "Unalignable boolean Series provided as indexer " "(index of the boolean Series and of the indexed " "object do not match).", ("locs", ".loc"): "Unalignable boolean Series provided as indexer " "(index of the boolean Series and of the " "indexed object do not match).", ("locs", ".iloc"): ( "iLocation based boolean indexing on an " "integer type is not available" ), } # UserWarnings from reindex of a boolean mask with catch_warnings(record=True): simplefilter("ignore", UserWarning) for idx in [None, "index", "locs"]: mask = (df.nums > 2).values if idx: mask = Series(mask, list(reversed(getattr(df, idx)))) for method in ["", ".loc", ".iloc"]: try: if method: accessor = getattr(df, method[1:]) else: accessor = df answer = str(bin(accessor[mask]["nums"].sum())) except (ValueError, IndexingError, NotImplementedError) as e: answer = str(e) key = ( idx, method, ) r = expected.get(key) if r != answer: raise AssertionError( f"[{key}] does not match [{answer}], received [{r}]" ) def test_iloc_non_unique_indexing(self): # GH 4017, non-unique indexing (on the axis) df = DataFrame({"A": [0.1] * 3000, "B": [1] * 3000}) idx = np.arange(30) * 99 expected = df.iloc[idx] df3 = concat([df, 2 * df, 3 * df]) result = df3.iloc[idx] tm.assert_frame_equal(result, expected) df2 = DataFrame({"A": [0.1] * 1000, "B": [1] * 1000}) df2 = concat([df2, 2 * df2, 3 * df2]) with pytest.raises(KeyError, match="with any missing labels"): df2.loc[idx] def test_iloc_empty_list_indexer_is_ok(self): df = tm.makeCustomDataframe(5, 2) # vertical empty tm.assert_frame_equal( df.iloc[:, []], df.iloc[:, :0], check_index_type=True, check_column_type=True, ) # horizontal empty tm.assert_frame_equal( df.iloc[[], :], df.iloc[:0, :], check_index_type=True, check_column_type=True, ) # horizontal empty tm.assert_frame_equal( df.iloc[[]], df.iloc[:0, :], check_index_type=True, check_column_type=True ) def test_identity_slice_returns_new_object(self, using_array_manager): # GH13873 original_df = DataFrame({"a": [1, 2, 3]}) sliced_df = original_df.iloc[:] assert sliced_df is not original_df # should be a shallow copy original_df["a"] = [4, 4, 4] if using_array_manager: # TODO(ArrayManager) verify it is expected that the original didn't change # setitem is replacing full column, so doesn't update "viewing" dataframe assert not (sliced_df["a"] == 4).all() else: assert (sliced_df["a"] == 4).all() original_series = Series([1, 2, 3, 4, 5, 6]) sliced_series = original_series.iloc[:] assert sliced_series is not original_series # should also be a shallow copy original_series[:3] = [7, 8, 9] assert all(sliced_series[:3] == [7, 8, 9]) def test_indexing_zerodim_np_array(self): # GH24919 df = DataFrame([[1, 2], [3, 4]]) result = df.iloc[np.array(0)] s = Series([1, 2], name=0) tm.assert_series_equal(result, s) def test_series_indexing_zerodim_np_array(self): # GH24919 s = Series([1, 2]) result = s.iloc[np.array(0)] assert result == 1 @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457") def test_iloc_setitem_categorical_updates_inplace(self): # Mixed dtype ensures we go through take_split_path in setitem_with_indexer cat = Categorical(["A", "B", "C"]) df = DataFrame({1: cat, 2: [1, 2, 3]}) # This should modify our original values in-place df.iloc[:, 0] = cat[::-1] expected = Categorical(["C", "B", "A"]) tm.assert_categorical_equal(cat, expected) def test_iloc_with_boolean_operation(self): # GH 20627 result = DataFrame([[0, 1], [2, 3], [4, 5], [6, np.nan]]) result.iloc[result.index <= 2] *= 2 expected = DataFrame([[0, 2], [4, 6], [8, 10], [6, np.nan]]) tm.assert_frame_equal(result, expected) result.iloc[result.index > 2] *= 2 expected = DataFrame([[0, 2], [4, 6], [8, 10], [12, np.nan]]) tm.assert_frame_equal(result, expected) result.iloc[[True, True, False, False]] *= 2 expected = DataFrame([[0, 4], [8, 12], [8, 10], [12, np.nan]]) tm.assert_frame_equal(result, expected) result.iloc[[False, False, True, True]] /= 2 expected = DataFrame([[0.0, 4.0], [8.0, 12.0], [4.0, 5.0], [6.0, np.nan]]) tm.assert_frame_equal(result, expected) def test_iloc_getitem_singlerow_slice_categoricaldtype_gives_series(self): # GH#29521 df = DataFrame({"x": Categorical("a b c d e".split())}) result = df.iloc[0] raw_cat = Categorical(["a"], categories=["a", "b", "c", "d", "e"]) expected = Series(raw_cat, index=["x"], name=0, dtype="category") tm.assert_series_equal(result, expected) def test_iloc_getitem_categorical_values(self): # GH#14580 # test iloc() on Series with Categorical data ser = Series([1, 2, 3]).astype("category") # get slice result = ser.iloc[0:2] expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) # get list of indexes result = ser.iloc[[0, 1]] expected = Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) # get boolean array result = ser.iloc[[True, False, False]] expected = Series([1]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("value", [None, NaT, np.nan]) def test_iloc_setitem_td64_values_cast_na(self, value): # GH#18586 series = Series([0, 1, 2], dtype="timedelta64[ns]") series.iloc[0] = value expected = Series([NaT, 1, 2], dtype="timedelta64[ns]") tm.assert_series_equal(series, expected) def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): idx = Index([]) obj = DataFrame(np.random.randn(len(idx), len(idx)), index=idx, columns=idx) nd3 = np.random.randint(5, size=(2, 2, 2)) msg = f"Cannot set values with ndim > {obj.ndim}" with pytest.raises(ValueError, match=msg): obj.iloc[nd3] = 0 @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) def test_iloc_getitem_read_only_values(self, indexer): # GH#10043 this is fundamentally a test for iloc, but test loc while # we're here rw_array = np.eye(10) rw_df = DataFrame(rw_array) ro_array = np.eye(10) ro_array.setflags(write=False) ro_df = DataFrame(ro_array) tm.assert_frame_equal(indexer(rw_df)[[1, 2, 3]], indexer(ro_df)[[1, 2, 3]]) tm.assert_frame_equal(indexer(rw_df)[[1]], indexer(ro_df)[[1]]) tm.assert_series_equal(indexer(rw_df)[1], indexer(ro_df)[1]) tm.assert_frame_equal(indexer(rw_df)[1:3], indexer(ro_df)[1:3]) def test_iloc_getitem_readonly_key(self): # GH#17192 iloc with read-only array raising TypeError df = DataFrame({"data": np.ones(100, dtype="float64")}) indices = np.array([1, 3, 6]) indices.flags.writeable = False result = df.iloc[indices] expected = df.loc[[1, 3, 6]] tm.assert_frame_equal(result, expected) result = df["data"].iloc[indices] expected = df["data"].loc[[1, 3, 6]] tm.assert_series_equal(result, expected) # TODO(ArrayManager) setting single item with an iterable doesn't work yet # in the "split" path @td.skip_array_manager_not_yet_implemented def test_iloc_assign_series_to_df_cell(self): # GH 37593 df = DataFrame(columns=["a"], index=[0]) df.iloc[0, 0] = Series([1, 2, 3]) expected = DataFrame({"a": [Series([1, 2, 3])]}, columns=["a"], index=[0]) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("klass", [list, np.array]) def test_iloc_setitem_bool_indexer(self, klass): # GH#36741 df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) indexer = klass([True, False, False]) df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("indexer", [[1], slice(1, 2)]) def test_iloc_setitem_pure_position_based(self, indexer): # GH#22046 df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]}) df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}) df2.iloc[:, indexer] = df1.iloc[:, [0]] expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]}) tm.assert_frame_equal(df2, expected) def test_iloc_setitem_dictionary_value(self): # GH#37728 df = DataFrame({"x": [1, 2], "y": [2, 2]}) rhs = {"x": 9, "y": 99} df.iloc[1] = rhs expected = DataFrame({"x": [1, 9], "y": [2, 99]}) tm.assert_frame_equal(df, expected) # GH#38335 same thing, mixed dtypes df = DataFrame({"x": [1, 2], "y": [2.0, 2.0]}) df.iloc[1] = rhs expected = DataFrame({"x": [1, 9], "y": [2.0, 99.0]}) tm.assert_frame_equal(df, expected) def test_iloc_getitem_float_duplicates(self): df = DataFrame( np.random.randn(3, 3), index=[0.1, 0.2, 0.2], columns=list("abc") ) expect = df.iloc[1:] tm.assert_frame_equal(df.loc[0.2], expect) expect = df.iloc[1:, 0] tm.assert_series_equal(df.loc[0.2, "a"], expect) df.index = [1, 0.2, 0.2] expect = df.iloc[1:] tm.assert_frame_equal(df.loc[0.2], expect) expect = df.iloc[1:, 0] tm.assert_series_equal(df.loc[0.2, "a"], expect) df = DataFrame( np.random.randn(4, 3), index=[1, 0.2, 0.2, 1], columns=list("abc") ) expect = df.iloc[1:-1] tm.assert_frame_equal(df.loc[0.2], expect) expect = df.iloc[1:-1, 0] tm.assert_series_equal(df.loc[0.2, "a"], expect) df.index = [0.1, 0.2, 2, 0.2] expect = df.iloc[[1, -1]] tm.assert_frame_equal(df.loc[0.2], expect) expect = df.iloc[[1, -1], 0] tm.assert_series_equal(df.loc[0.2, "a"], expect) def test_iloc_setitem_custom_object(self): # iloc with an object class TO: def __init__(self, value): self.value = value def __str__(self) -> str: return f"[{self.value}]" __repr__ = __str__ def __eq__(self, other) -> bool: return self.value == other.value def view(self): return self df = DataFrame(index=[0, 1], columns=[0]) df.iloc[1, 0] = TO(1) df.iloc[1, 0] = TO(2) result = DataFrame(index=[0, 1], columns=[0]) result.iloc[1, 0] = TO(2) tm.assert_frame_equal(result, df) # remains object dtype even after setting it back df = DataFrame(index=[0, 1], columns=[0]) df.iloc[1, 0] = TO(1) df.iloc[1, 0] = np.nan result = DataFrame(index=[0, 1], columns=[0]) tm.assert_frame_equal(result, df) def test_iloc_getitem_with_duplicates(self): df = DataFrame(np.random.rand(3, 3), columns=list("ABC"), index=list("aab")) result = df.iloc[0] assert isinstance(result, Series) tm.assert_almost_equal(result.values, df.values[0]) result = df.T.iloc[:, 0] assert isinstance(result, Series) tm.assert_almost_equal(result.values, df.values[0]) def test_iloc_getitem_with_duplicates2(self): # GH#2259 df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=[1, 1, 2]) result = df.iloc[:, [0]] expected = df.take([0], axis=1) tm.assert_frame_equal(result, expected) def test_iloc_interval(self): # GH#17130 df = DataFrame({Interval(1, 2): [1, 2]}) result = df.iloc[0] expected = Series({Interval(1, 2): 1}, name=0) tm.assert_series_equal(result, expected) result = df.iloc[:, 0] expected = Series([1, 2], name=Interval(1, 2)) tm.assert_series_equal(result, expected) result = df.copy() result.iloc[:, 0] += 1 expected = DataFrame({Interval(1, 2): [2, 3]}) tm.assert_frame_equal(result, expected)
def posteriorParameterSamples(self, z, NUMBER_OF_MCMC_SAMPLES_TOTAL, fixedSlabVar, fixedSigmaSquareR, numberOfFreeBeta, fixedBetaPart): assert (fixedSlabVar is None or fixedSlabVar > 0.0) assert (fixedSigmaSquareR is None or fixedSigmaSquareR > 0.0) assert (numberOfFreeBeta + fixedBetaPart.shape[0] == self.p) invEst = numpy.linalg.inv(self.X.transpose() @ self.X + 1.0 * numpy.eye(self.p)) ridgeBetaEst = (invEst @ self.X.transpose()) @ self.y beta = ridgeBetaEst beta[numberOfFreeBeta:self.p] = fixedBetaPart if fixedSigmaSquareR is None: sigmaSquareR = numpy.mean( numpy.square(self.y - self.X @ ridgeBetaEst)) else: sigmaSquareR = fixedSigmaSquareR # print("z = ") # print(z) # print("beta = ") # print(beta) # assert(False) # print("sigmaSquareR = ") # print(sigmaSquareR) BURN_IN_SAMPLES = int(0.1 * NUMBER_OF_MCMC_SAMPLES_TOTAL) assert (BURN_IN_SAMPLES >= 1) NUMBER_OF_MCMC_SAMPLES_USED = NUMBER_OF_MCMC_SAMPLES_TOTAL - BURN_IN_SAMPLES # print("BURN_IN_SAMPLES = ", BURN_IN_SAMPLES) # print("NUMBER_OF_MCMC_SAMPLES_USED = ", NUMBER_OF_MCMC_SAMPLES_USED) posteriorBeta = numpy.zeros((NUMBER_OF_MCMC_SAMPLES_USED, self.p)) posteriorSigmaSquareR = numpy.zeros(NUMBER_OF_MCMC_SAMPLES_USED) posteriorSlabVar = numpy.zeros(NUMBER_OF_MCMC_SAMPLES_USED) spikeAndSlabVar = numpy.asarray([self.sigmaSquare0, self.etaSquare1]) if fixedSlabVar is not None: spikeAndSlabVar[1] = fixedSlabVar for mcmcIt in range(NUMBER_OF_MCMC_SAMPLES_TOTAL): print("mcmcIt = ", mcmcIt) for j in range(numberOfFreeBeta): # sample p(beta_j | beta_-j, z, y, sigmaSquareR, X) meanTilde, sigmaSquareTilde, _ = self.getMeanAndVarOfBetaConditional( sigmaSquareR, spikeAndSlabVar, beta, z, j) beta[j] = scipy.stats.norm.rvs( loc=meanTilde, scale=numpy.sqrt(sigmaSquareTilde)) if fixedSigmaSquareR is None: # sample p(sigmaSquareR | beta, z, y, X) etaSquareForsigmaSquareR = ( SpikeAndSlabProposedModelSearch.NU_R * SpikeAndSlabProposedModelSearch.ETA_SQUARE_R + numpy.sum( numpy.square(self.y - numpy.matmul(self.X, beta)))) / ( SpikeAndSlabProposedModelSearch.NU_R + self.n) sigmaSquareR = samplingHelper.getScaledInvChiSquareSample( nu=SpikeAndSlabProposedModelSearch.NU_R + self.n, etaSquare=etaSquareForsigmaSquareR, numberOfSamples=1)[0] if fixedSlabVar is None: # sample p(sigmaSquare_1 | beta, z, y, X) spikeAndSlabVar[1] = self.sampleSigmaSquareConditional(beta, z) if mcmcIt >= BURN_IN_SAMPLES: posteriorBeta[mcmcIt - BURN_IN_SAMPLES] = beta posteriorSigmaSquareR[mcmcIt - BURN_IN_SAMPLES] = sigmaSquareR posteriorSlabVar[mcmcIt - BURN_IN_SAMPLES] = spikeAndSlabVar[1] return posteriorBeta, posteriorSigmaSquareR, posteriorSlabVar
k=k+20 j=j+20 for l in range(20): hx = np.histogram(dx[l,0:],7) hy = np.histogram(dy[l,0:],7) hz = np.histogram(dz[l,0:],7) #normalize HX[l,:]=hx[0]/T1 HY[l,:] =hy[0]/T1 HZ[l,:] =hz[0]/T1 rx=HX.reshape(1,140) ry=HY.reshape(1,140) rz=HZ.reshape(1,140) rx=(np.append(rx,ry)).reshape(1,280) rx=(np.append(rx,rz)).reshape(1,420) rx= (np.asarray(rx)) vectors[dd,:]=rx dd=dd+1 vectors.reshape((48,420)) np.savetxt('hjpd_d1.t', vectors, delimiter=' ')
from __future__ import absolute_import from __future__ import division from __future__ import print_function import pickle, os, time import numpy as np import tensorflow as tf import matplotlib.pyplot as plt from numpy import linalg as LA init_time= time.time() #################################### Loading the Dataset ads_data_list= pickle.load( open("Ads400_3keyframes_july19.p", "rb") ) ads_data= np.asarray(ads_data_list) ## 12k samples of 112x112x3 vid_data_list= pickle.load( open("Vid500_10shots_3keyframes.p", "rb") ) vid_data= np.asarray(vid_data_list) ## 12k samples of 112x112x3 ads_audio_list= pickle.load( open("Ads400_audio_ad_mfcc_july19.p", "rb") ) ads_audio= np.asarray(ads_audio_list) ## 12k samples of 112x112x3 vid_audio_list= pickle.load( open("Vid500_10shots_mfcc.p", "rb") ) vid_audio= np.asarray(vid_audio_list) ## 12k samples of 112x112x3 ################################### Audio normalization and repeating for i in range(ads_audio.shape[0]): ads_audio[i,:]= ( ads_audio[i,:]- np.mean(ads_audio[i,:]) ) / ( np.std(ads_audio[i,:])+ 1e-6 ) for i in range(vid_audio.shape[0]): vid_audio[i,:]= ( vid_audio[i,:]- np.mean(vid_audio[i,:]) ) / ( np.std(vid_audio[i,:])+ 1e-6 ) ads_audio_3x= np.zeros([ads_audio.shape[0]*3, ads_audio.shape[1]])