def figure_2_1(): """Replicate figure 2.1 of Sutton and Barto's book.""" print('Running figure 2.1 simulation ...') np.random.seed(1234) epsilons = (0.1, 0.01, 0) ars, pos = [], [] for epsilon in epsilons: ar, po = run_experiment(2000, 1000, epsilon) ars.append(np.mean(ar, 0)) pos.append(np.mean(po, 0)) # plot the results plt.close('all') f, (ax1, ax2) = plt.subplots(2) for i,epsilon in enumerate(epsilons): ax1.plot(ars[i].T, label='$\epsilon$=%.2f' % epsilon) ax2.plot(pos[i].T, label='$\epsilon$=%.2f' % epsilon) ax1.legend(loc='lower right') ax1.set_ylabel('Average reward') ax1.set_xlim(xmin=-10) ax2.legend(loc='lower right') ax2.set_xlabel('Plays') ax2.set_ylabel('% Optimal action') ax2.set_xlim(xmin=-20) plt.savefig('fig_2_1.pdf') plt.show()
def get_tracedata(self, format = 'AmpPha', single=False): ''' Get the data of the current trace Input: format (string) : 'AmpPha': Amp in dB and Phase, 'RealImag', Output: 'AmpPha':_ Amplitude and Phase ''' #data = self._visainstrument.ask_for_values(':FORMAT REAL,32;*CLS;CALC1:DATA:NSW? SDAT,1;*OPC',format=1) data = self._visainstrument.ask_for_values('FORM:DATA REAL; FORM:BORD SWAPPED; CALC%i:SEL:DATA:SDAT?'%(self._ci), format = visa.double) data_size = numpy.size(data) datareal = numpy.array(data[0:data_size:2]) dataimag = numpy.array(data[1:data_size:2]) if format.upper() == 'REALIMAG': if self._zerospan: return numpy.mean(datareal), numpy.mean(dataimag) else: return datareal, dataimag elif format.upper() == 'AMPPHA': if self._zerospan: datareal = numpy.mean(datareal) dataimag = numpy.mean(dataimag) dataamp = numpy.sqrt(datareal*datareal+dataimag*dataimag) datapha = numpy.arctan(dataimag/datareal) return dataamp, datapha else: dataamp = numpy.sqrt(datareal*datareal+dataimag*dataimag) datapha = numpy.arctan2(dataimag,datareal) return dataamp, datapha else: raise ValueError('get_tracedata(): Format must be AmpPha or RealImag')
def testNormalizeLike(self): a = np.empty((10, 3)) a[:, 0] = np.random.random(10) a[:, 1] = np.random.random(10) a[:, 2] = np.random.random(10) b = np.empty((10, 3)) b[:, 0] = np.random.random(10) b[:, 1] = np.random.random(10) b[:, 2] = np.random.random(10) b = b * 2 c = normalizeArrayLike(b, a) # Should be normalized like a mean = [] std = [] mean.append(np.mean(a[:, 0])) mean.append(np.mean(a[:, 1])) mean.append(np.mean(a[:, 2])) std.append(np.std(a[:, 0])) std.append(np.std(a[:, 1])) std.append(np.std(a[:, 2])) # Check all values for col in xrange(b.shape[1]): for bval, cval in zip(b[:, col].flat, c[:, col].flat): print cval, (bval - mean[col]) / std[col] print cval, bval assert cval == (bval - mean[col]) / std[col] print ("TestNormalizeLike success")
def figure_2_4(): """Replicate figure 2.4 of Sutton and Barto's book.""" print('Running figure 2.4 simulation ...') np.random.seed(1234) epsilons = (0.1, 0) q_inits = (0, 5) ars, pos = [], [] for epsilon, q_init in zip(epsilons, q_inits): ar, po = run_experiment(2000, 1000, epsilon=epsilon, Q_init=q_init, alpha=0.1) ars.append(np.mean(ar, 0)) pos.append(np.mean(po, 0)) # plot the results plt.close('all') f, (ax1, ax2) = plt.subplots(2) labels = ('$\epsilon$-greedy', 'optimistic') for i,label in enumerate(labels): ax1.plot(ars[i].T, label=label) ax2.plot(pos[i].T, label=label) ax1.legend(loc='lower right') ax1.set_ylabel('Average reward') ax1.set_xlim(xmin=-10) ax2.legend(loc='lower right') ax2.set_xlabel('Plays') ax2.set_ylabel('% Optimal action') ax2.set_xlim(xmin=-20) plt.savefig('fig_2_4.pdf') plt.show()
def add_noise_evoked(evoked, noise, snr, tmin=None, tmax=None): """Adds noise to evoked object with specified SNR. SNR is computed in the interval from tmin to tmax. Parameters ---------- evoked : Evoked object An instance of evoked with signal noise : Evoked object An instance of evoked with noise snr : float signal to noise ratio in dB. It corresponds to 10 * log10( var(signal) / var(noise) ) tmin : float start time before event tmax : float end time after event Returns ------- evoked_noise : Evoked object An instance of evoked corrupted by noise """ evoked = copy.deepcopy(evoked) tmask = _time_mask(evoked.times, tmin, tmax) tmp = 10 * np.log10(np.mean((evoked.data[:, tmask] ** 2).ravel()) / np.mean((noise.data ** 2).ravel())) noise.data = 10 ** ((tmp - float(snr)) / 20) * noise.data evoked.data += noise.data return evoked
def mean_quadratic_weighted_kappa(kappas, weights=None): """ Calculates the mean of the quadratic weighted kappas after applying Fisher's r-to-z transform, which is approximately a variance-stabilizing transformation. This transformation is undefined if one of the kappas is 1.0, so all kappa values are capped in the range (-0.999, 0.999). The reverse transformation is then applied before returning the result. mean_quadratic_weighted_kappa(kappas), where kappas is a vector of kappa values mean_quadratic_weighted_kappa(kappas, weights), where weights is a vector of weights that is the same size as kappas. Weights are applied in the z-space """ kappas = np.array(kappas, dtype=float) if weights is None: weights = np.ones(np.shape(kappas)) else: weights = weights / np.mean(weights) # ensure that kappas are in the range [-.999, .999] kappas = np.array([min(x, .999) for x in kappas]) kappas = np.array([max(x, -.999) for x in kappas]) z = 0.5 * np.log((1 + kappas) / (1 - kappas)) * weights z = np.mean(z) return (np.exp(2 * z) - 1) / (np.exp(2 * z) + 1)
def getIdealWins(errors, testErrors, p=0.01): """ Figure out whether the ideal error obtained using the test set is an improvement over model selection using CV. """ winsShape = list(errors.shape[1:-1]) winsShape.append(3) stdWins = numpy.zeros(winsShape, numpy.int) for i in range(len(sampleSizes)): for j in range(foldsSet.shape[0]): s1 = errors[:, i, j, 0] s2 = testErrors[:, i] s1Mean = numpy.mean(s1) s2Mean = numpy.mean(s2) t, prob = scipy.stats.wilcoxon(s1, s2) if prob < p: if s1Mean > s2Mean: stdWins[i, j, 2] = 1 elif s1Mean < s2Mean: stdWins[i, j, 0] = 1 else: print("Test draw samplesize:" + str(sampleSizes[i]) + " folds " + str(foldsSet[j])) stdWins[i, j, 1] = 1 return stdWins
def test_mat_output(self): samples = GMM1([.9999, .0001], [0.0, 1.0], [0.000001, 0.000001], rng=self.rng, size=[40, 20]) assert samples.shape == (40, 20) assert -.001 < np.mean(samples) < .001, np.mean(samples) assert np.var(samples) < .0001, np.var(samples)
def Haffine_from_points(fp, tp): '''计算仿射变换的单应性矩阵H,使得tp是由fp经过仿射变换得到的''' if fp.shape != tp.shape: raise RuntimeError('number of points do not match') # 对点进行归一化 # 映射起始点 m = numpy.mean(fp[:2], axis=1) maxstd = numpy.max(numpy.std(fp[:2], axis=1)) + 1e-9 C1 = numpy.diag([1/maxstd, 1/maxstd, 1]) C1[0, 2] = -m[0] / maxstd C1[1, 2] = -m[1] / maxstd fp_cond = numpy.dot(C1, fp) # 映射对应点 m = numpy.mean(tp[:2], axis=1) maxstd = numpy.max(numpy.std(tp[:2], axis=1)) + 1e-9 C2 = numpy.diag([1/maxstd, 1/maxstd, 1]) C2[0, 2] = -m[0] / maxstd C2[1, 2] = -m[1] / maxstd tp_cond = numpy.dot(C2, tp) # 因为归一化之后点的均值为0,所以平移量为0 A = numpy.concatenate((fp_cond[:2], tp_cond[:2]), axis=0) U, S, V = numpy.linalg.svd(A.T) # 创建矩阵B和C tmp = V[:2].T B = tmp[:2] C = tmp[2:4] tmp2 = numpy.concatenate((numpy.dot(C, numpy.linalg.pinv(B)), numpy.zeros((2, 1))), axis=1) H = numpy.vstack((tmp2, [0, 0, 1])) H = numpy.dot(numpy.linalg.inv(C2), numpy.dot(H, C1)) # 反归一化 return H / H[2, 2] # 归一化,然后返回
def work(self): self.worked = True kwargs = dict( weights=self.weights, mus=self.mus, sigmas=self.sigmas, low=self.low, high=self.high, q=self.q, ) samples = GMM1(rng=self.rng, size=(self.n_samples,), **kwargs) samples = np.sort(samples) edges = samples[::self.samples_per_bin] #print samples pdf = np.exp(GMM1_lpdf(edges[:-1], **kwargs)) dx = edges[1:] - edges[:-1] y = 1 / dx / len(dx) if self.show: plt.scatter(edges[:-1], y) plt.plot(edges[:-1], pdf) plt.show() err = (pdf - y) ** 2 print np.max(err) print np.mean(err) print np.median(err) if not self.show: assert np.max(err) < .1 assert np.mean(err) < .01 assert np.median(err) < .01
def work(self, **kwargs): self.__dict__.update(kwargs) self.worked = True samples = LGMM1(rng=self.rng, size=(self.n_samples,), **self.LGMM1_kwargs) samples = np.sort(samples) edges = samples[::self.samples_per_bin] centers = .5 * edges[:-1] + .5 * edges[1:] print edges pdf = np.exp(LGMM1_lpdf(centers, **self.LGMM1_kwargs)) dx = edges[1:] - edges[:-1] y = 1 / dx / len(dx) if self.show: plt.scatter(centers, y) plt.plot(centers, pdf) plt.show() err = (pdf - y) ** 2 print np.max(err) print np.mean(err) print np.median(err) if not self.show: assert np.max(err) < .1 assert np.mean(err) < .01 assert np.median(err) < .01
def run_svm_evaluation(self, svmtype, inputdata, outputdata, k): """ Run SVM on training data to evaluate classifier. Return f1scores, gamma and C""" if svmtype == 'rbf': # Parameter grid param_grid = [ {'C': np.logspace(1,5,5), 'gamma': np.logspace(-3,0,5), 'kernel': ['rbf']} ] if svmtype == 'ln': param_grid =[ {'C': np.logspace(1,5,5)}] score_func = metrics.f1_score # Cross validation cv = cross_validation.KFold(inputdata.shape[0], n_folds=k, indices=True,shuffle=True) f1_scores = [] for traincv, testcv in cv: # TODO: multithreading of cross validation. (f1_score, gamma1, c) = self.do_cross_validation(param_grid, svmtype, score_func, inputdata[traincv], outputdata[traincv], inputdata[testcv], outputdata[testcv]) f1_scores.append(f1_score) print "score average: %s" + str(np.mean(f1_scores)) print f1_scores average_score =np.mean(f1_scores) tuples = (average_score, f1_scores) return (tuples, gamma1, c)
def sample_every_two_correlation_times(energy_data, magnetization_data, correlation_time, no_of_sites): """Sample the given data every 2 correlation times and determine value and error.""" magnet_samples = [] energy_samples = [] for t in np.arange(0, len(energy_data), 2 * int(np.ceil(correlation_time))): magnet_samples.append(magnetization_data[t]) energy_samples.append(energy_data[t]) magnet_samples = np.asarray(magnet_samples) energy_samples = np.asarray(energy_samples) abs_magnetization = np.mean(np.absolute(magnet_samples)) abs_magnetization_error = calculate_error(magnet_samples) print("<m> (<|M|/N>) = {0} +/- {1}".format(abs_magnetization, abs_magnetization_error)) magnetization = np.mean(magnet_samples) magnetization_error = calculate_error(magnet_samples) print("<M/N> = {0} +/- {1}".format(magnetization, magnetization_error)) energy = np.mean(energy_samples) energy_error = calculate_error(energy_samples) print("<E/N> = {0} +/- {1}".format(energy, energy_error)) magnetization_squared = np.mean((magnet_samples * no_of_sites)**2) magnetization_squared_error = calculate_error((magnet_samples * no_of_sites)**2) print("<M^2> = {0} +/- {1}".format(magnetization_squared, magnetization_squared_error))
def summarize_features_mfcc(mfccs, v=False): """ Given mfcc matrix, return summary for a window :param mfccs: NxM matrix mfcc matrix :param i_start: int index for beginning of window :param i_end: int index for end of window :return: 1xL array feature vector """ # Summarize features features = np.max(mfccs, axis=1) features = np.append(features, np.mean(mfccs, axis=1)) features = np.append(features, np.std(mfccs, axis=1)) d_mfccs = np.diff(mfccs, axis=1) features = np.append(features, np.mean(d_mfccs, axis=1)) features = np.append(features, np.std(d_mfccs, axis=1)) d_d_mfccs = np.diff(d_mfccs, axis=1) features = np.append(features, np.mean(d_d_mfccs, axis=1)) features = np.append(features, np.std(d_d_mfccs, axis=1)) # print np.shape(d_d_mfccs) # print np.shape(features) return np.reshape(features, (1, len(features)))
def trainer(model, data, epochs, validate_period, model_path, prob_lm=0.1, runid=''): def valid_loss(): result = dict(lm=[], visual=[]) for item in data.iter_valid_batches(): result['lm'].append(model.lm.loss_test(*model.lm.args(item))) result['visual'].append(model.visual.loss_test(*model.visual.args(item))) return result costs = Counter(dict(cost_v=0.0, N_v=0.0, cost_t=0.0, N_t=0.0)) print "LM: {} parameters".format(count_params(model.lm.params())) print "Vi: {} parameters".format(count_params(model.visual.params())) for epoch in range(1,epochs+1): for _j, item in enumerate(data.iter_train_batches()): j = _j +1 if random.random() <= prob_lm: cost_t = model.lm.train(*model.lm.args(item)) costs += Counter(dict(cost_t=cost_t, N_t=1)) else: cost_v = model.visual.train(*model.visual.args(item)) costs += Counter(dict(cost_v=cost_v, N_v=1)) print epoch, j, j*data.batch_size, "train", \ numpy.divide(costs['cost_v'], costs['N_v']),\ numpy.divide(costs['cost_t'], costs['N_t']) if j % validate_period == 0: result = valid_loss() print epoch, j, 0, "valid", \ numpy.mean(result['visual']),\ numpy.mean(result['lm']) sys.stdout.flush() model.save(path='model.r{}.e{}.zip'.format(runid, epoch)) model.save(path='model.zip')
def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10): assert(embeddings1.shape[0] == embeddings2.shape[0]) assert(embeddings1.shape[1] == embeddings2.shape[1]) nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) nrof_thresholds = len(thresholds) k_fold = KFold(n_splits=nrof_folds, shuffle=False) val = np.zeros(nrof_folds) far = np.zeros(nrof_folds) diff = np.subtract(embeddings1, embeddings2) dist = np.sum(np.square(diff),1) indices = np.arange(nrof_pairs) for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): # Find the threshold that gives FAR = far_target far_train = np.zeros(nrof_thresholds) for threshold_idx, threshold in enumerate(thresholds): _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) if np.max(far_train)>=far_target: f = interpolate.interp1d(far_train, thresholds, kind='slinear') threshold = f(far_target) else: threshold = 0.0 val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) val_mean = np.mean(val) far_mean = np.mean(far) val_std = np.std(val) return val_mean, val_std, far_mean
def getClass(imageWindow, models,z): hasLabel=False label=999 for k in models.keys(): m=models[k] l1=m[0] l2=m[1] l3=m[2] h1=m[3] h2=m[4] h3=m[5] ch1=numpy.mean(imageWindow[:,:,0]) ch2=numpy.mean(imageWindow[:,:,1]) ch3=numpy.mean(imageWindow[:,:,2]) #print "checking if ", ch1, ch2, ch3, " is between ", h1, l1, h2, l2, h3, l3 if(l1<ch1<h1 and l2<ch2<h2 and l3<ch3<h3): if(not hasLabel): label=k print "got label ", z[k] hasLabel=True else: print "error, relabeling as :", z[k] return 999 if(not hasLabel): return 999 else: return label
def updateBackgroundCutoff(fit_data): residual_bg = estimateBackground(fit_data.residual) mean_residual_bg = numpy.mean(residual_bg) fit_data.residual -= residual_bg fit_data.residual += mean_residual_bg fit_data.background = numpy.mean(fit_data.residual) fit_data.cutoff = fit_data.background + fit_data.cur_threshold
def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10): assert(embeddings1.shape[0] == embeddings2.shape[0]) assert(embeddings1.shape[1] == embeddings2.shape[1]) nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) nrof_thresholds = len(thresholds) k_fold = KFold(n_splits=nrof_folds, shuffle=False) tprs = np.zeros((nrof_folds,nrof_thresholds)) fprs = np.zeros((nrof_folds,nrof_thresholds)) accuracy = np.zeros((nrof_folds)) diff = np.subtract(embeddings1, embeddings2) dist = np.sum(np.square(diff),1) indices = np.arange(nrof_pairs) for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): # Find the best threshold for the fold acc_train = np.zeros((nrof_thresholds)) for threshold_idx, threshold in enumerate(thresholds): _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) best_threshold_index = np.argmax(acc_train) for threshold_idx, threshold in enumerate(thresholds): tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) tpr = np.mean(tprs,0) fpr = np.mean(fprs,0) return tpr, fpr, accuracy
def run_epoch(self, session, input_data, input_labels, shuffle=True, verbose=True): orig_X, orig_y = input_data, input_labels dp = self.config.dropout # We're interested in keeping track of the loss and accuracy during training total_loss = [] total_correct_examples = 0 total_processed_examples = 0 total_steps = len(orig_X) / self.config.batch_size for step, (x, y) in enumerate( data_iterator(orig_X, orig_y, batch_size=self.config.batch_size, label_size=self.config.label_size, shuffle=shuffle)): feed = self.create_feed_dict(input_batch=x, dropout=dp, label_batch=y) loss, total_correct, _ = session.run( [self.loss, self.correct_predictions, self.train_op], feed_dict=feed) total_processed_examples += len(x) total_correct_examples += total_correct total_loss.append(loss) ## if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : loss = {}'.format( step, total_steps, np.mean(total_loss))) sys.stdout.flush() if verbose: sys.stdout.write('\r') sys.stdout.flush() return np.mean(total_loss), total_correct_examples / float(total_processed_examples)
def main(): road = Road(number_of_cars=30) number_of_runs = 100 seconds_in_run = 60 road.place_cars() speed_limit_list = [] positions_list = [] speeds_list = [] mean_speeds = [] st_devs = [] for _ in range(number_of_runs): speeds, positions = road.simulate_n_seconds(seconds_in_run) mean = np.mean(speeds) stdv = np.std(speeds) speed_limit_list.append(mean + stdv) mean_speeds.append(mean) st_devs.append(stdv) if _ in {0, 9, 34, 74, 99}: positions_list.append(positions[:]) speeds_list.append(speeds) return (int(np.mean(speed_limit_list)), positions_list, speeds_list, mean_speeds, st_devs)
def test_decimate(): """Test decimation of digitizer headshapes with too many points.""" # load headshape and convert to meters hsp_mm = _get_ico_surface(5)['rr'] * 100 hsp_m = hsp_mm / 1000. # save headshape to a file in mm in temporary directory tempdir = _TempDir() sphere_hsp_path = op.join(tempdir, 'test_sphere.txt') np.savetxt(sphere_hsp_path, hsp_mm) # read in raw data using spherical hsp, and extract new hsp with warnings.catch_warnings(record=True) as w: raw = read_raw_kit(sqd_path, mrk_path, elp_txt_path, sphere_hsp_path) assert_true(any('more than' in str(ww.message) for ww in w)) # collect headshape from raw (should now be in m) hsp_dec = np.array([dig['r'] for dig in raw.info['dig']])[8:] # with 10242 points and _decimate_points set to resolution of 5 mm, hsp_dec # should be a bit over 5000 points. If not, something is wrong or # decimation resolution has been purposefully changed assert_true(len(hsp_dec) > 5000) # should have similar size, distance from center dist = np.sqrt(np.sum((hsp_m - np.mean(hsp_m, axis=0))**2, axis=1)) dist_dec = np.sqrt(np.sum((hsp_dec - np.mean(hsp_dec, axis=0))**2, axis=1)) hsp_rad = np.mean(dist) hsp_dec_rad = np.mean(dist_dec) assert_almost_equal(hsp_rad, hsp_dec_rad, places=3)
def modulate_image(gabor_def, visuals, spacials, position, min_contrast=0.0, frequency_data=None, use_local_rms=False): (pixels_per_degree, gabor_diameter, xf, yf, gaussian, ramp, grating, g) = frequency_data if isinstance(frequency_data, FREQ_DATA) else load_spacial_data(visuals, spacials) import time st = time.time() top_left_pos = (position[0] - (gabor_diameter / 2.0), position[1] - (gabor_diameter / 2.0)) patch = gabor_def.rms_matrix[top_left_pos[0] : top_left_pos[0] + gabor_diameter, top_left_pos[1] : top_left_pos[1] + gabor_diameter, :] if use_local_rms: patch_avg = gabor_def.avg_matrix[top_left_pos[0] : top_left_pos[0] + gabor_diameter, top_left_pos[1] : top_left_pos[1] + gabor_diameter] R = (patch_avg / 127.0) - 1 R = R / (numpy.max(numpy.abs(R))) / 2.0 rms_measure = numpy.std(R + 0.5) / numpy.mean(R + 0.5) print rms_measure if min_contrast > 0: rms_measure = max(rms_measure, min_contrast) g = g * (255.0 * rms_measure) else: g = g * (255.0 * gabor_def.rms_measure) g = g - numpy.mean(g) gabor = numpy.transpose(numpy.tile(g, (3,1,1)), (1,2,0)) print "took {0}".format((time.time() - st) * 1000.0) return GABOR_DATA._make([top_left_pos, gabor_diameter, gabor_diameter / 2.0, patch, numpy.clip(patch + gabor, 0, 255).astype('uint8')])
def testPdfOfSampleMultiDims(self): student = student_t.StudentT(df=[7., 11.], loc=[[5.], [6.]], scale=3.) self.assertAllEqual([], student.event_shape) self.assertAllEqual([], self.evaluate(student.event_shape_tensor())) self.assertAllEqual([2, 2], student.batch_shape) self.assertAllEqual([2, 2], self.evaluate(student.batch_shape_tensor())) num = 50000 samples = student.sample(num, seed=123456) pdfs = student.prob(samples) sample_vals, pdf_vals = self.evaluate([samples, pdfs]) self.assertEqual(samples.get_shape(), (num, 2, 2)) self.assertEqual(pdfs.get_shape(), (num, 2, 2)) self.assertNear(5., np.mean(sample_vals[:, 0, :]), err=.03) self.assertNear(6., np.mean(sample_vals[:, 1, :]), err=.03) self._assertIntegral(sample_vals[:, 0, 0], pdf_vals[:, 0, 0], err=0.02) self._assertIntegral(sample_vals[:, 0, 1], pdf_vals[:, 0, 1], err=0.02) self._assertIntegral(sample_vals[:, 1, 0], pdf_vals[:, 1, 0], err=0.02) self._assertIntegral(sample_vals[:, 1, 1], pdf_vals[:, 1, 1], err=0.02) if not stats: return self.assertNear( stats.t.var(7., loc=0., scale=3.), # loc d.n. effect var np.var(sample_vals[:, :, 0]), err=.4) self.assertNear( stats.t.var(11., loc=0., scale=3.), # loc d.n. effect var np.var(sample_vals[:, :, 1]), err=.4)
def testEpsilon_MOEA_NegativeDTLZ2(self): random = pyotl.utility.Random(1) problemGen = lambda: pyotl.problem.real.NegativeDTLZ2(3) problem = problemGen() pathProblem = os.path.join(self.pathData, type(problem).__name__.replace('Negative', ''), str(problem.GetNumberOfObjectives())) crossover = pyotl.crossover.real.SimulatedBinaryCrossover(random, 1, problem.GetBoundary(), 20) mutation = pyotl.mutation.real.PolynomialMutation(random, 1 / float(len(problem.GetBoundary())), problem.GetBoundary(), 20) epsilon = pyotl.utility.PyList2Vector_Real([0.06] * problem.GetNumberOfObjectives()) pfList = [] for _ in range(self.repeat): problem = problemGen() initial = pyotl.initial.real.BatchUniform(random, problem.GetBoundary(), 100) optimizer = pyotl.optimizer.couple_couple.real.Epsilon_MOEA(random, problem, initial, crossover, mutation, epsilon) while optimizer.GetProblem().GetNumberOfEvaluations() < 30000: optimizer() pf = pyotl.utility.PyListList2VectorVector_Real( [list(solution.objective_) for solution in optimizer.GetSolutionSet()]) for objective in pf: problem.Fix(objective) pfList.append(pf) pathCrossover = os.path.join(pathProblem, type(crossover).__name__) pathOptimizer = os.path.join(pathCrossover, type(optimizer).__name__) pfTrue = pyotl.utility.PyListList2VectorVector_Real(numpy.loadtxt(os.path.join(pathProblem, 'PF.csv')).tolist()) # GD indicator = pyotl.indicator.real.DTLZ2GD() metricList = [indicator(pf) for pf in pfList] rightList = numpy.loadtxt(os.path.join(pathOptimizer, 'GD.csv')).tolist() self.assertGreater(scipy.stats.ttest_ind(rightList, metricList)[1], 0.05, [numpy.mean(rightList), numpy.mean(metricList), metricList]) # IGD indicator = pyotl.indicator.real.InvertedGenerationalDistance(pfTrue) metricList = [indicator(pf) for pf in pfList] rightList = numpy.loadtxt(os.path.join(pathOptimizer, 'IGD.csv')).tolist() self.assertGreater(scipy.stats.ttest_ind(rightList, metricList)[1], 0.05, [numpy.mean(rightList), numpy.mean(metricList), metricList])
def EN_CID(y): """ CID measure from Batista, G. E. A. P. A., Keogh, E. J., Tataw, O. M. & de Souza, V. M. A. CID: an efficient complexity-invariant distance for time series. Data Min Knowl. Disc. 28, 634-669 (2014). Arguments --------- y: a nitime time-series object, or numpy vector """ # Make the input a row vector of numbers: y = makeRowVector(vectorize(y)) # Prepare the output dictionary out = {} # Original definition (in Table 2 of paper cited above) out['CE1'] = np.sqrt(np.mean(np.power(np.diff(y),2))); # sum -> mean to deal with non-equal time-series lengths # Definition corresponding to the line segment example in Fig. 9 of the paper # cited above (using Pythagoras's theorum): out['CE2'] = np.mean(np.sqrt(1 + np.power(np.diff(y),2))); return out
def SB_MotifTwo(y,binarizeHow='diff'): """ Looks at local motifs in a binary symbolization of the time series, which is performed by a given binarization method Arguments --------- y: a nitime time-series object, or numpy vector """ # Make the input a row vector of numbers: y = makeRowVector(vectorize(y)) # Make binarization on incremental differences: if binarizeHow == 'diff': yBin = ((np.sign(np.diff(y)))+1.)/2. else: raise ValueError(binarizeHow) # Initialize output dictionary out = {} # Where the difference is 0, 1 r0 = yBin==0 r1 = yBin==1 out['u'] = np.mean(r1) out['d'] = np.mean(r0) out['h'] = -(out['u']*np.log2(out['u']) + out['d']*np.log2(out['d'])) return out
def update(self, y): L = Loss().MSE(self.output, y) # stopping criteria self.errors[self.epoch%5] = numpy.mean(L.E**2)**0.5 score = numpy.mean(self.errors) # stop when error starts to diverge too much print " " , self.bestScore self.stop = score/self.bestScore > 1e60 # save the best weights if score < self.bestScore: self.bestW = self.W self.bestScore = score self.bestEpoch = self.epoch norm_W = numpy.linalg.norm(self.W) sys.stdout.write( "\rEpoch %d: RMSE: %2.3f, Norm(W): %2.2f"%(self.epoch, numpy.mean((y-self.output)**2)**0.5, norm_W) ) sys.stdout.flush() # gradients grad_outputs = L.dE_dY*(1 - self.output**2) dE_dK = numpy.dot(self.hidden.reshape(self.n_hidden, 1), grad_outputs.reshape(1, self.n_output)) transfer = numpy.dot(grad_outputs, self.K.T) # hidden layer grad_hidden = transfer * (1 - self.hidden**2) dE_dW = numpy.dot(self.X.T , grad_hidden) # updating weights self.K -= 1.2*self.alpha*dE_dK self.W -= self.alpha*dE_dW
def softmax_experiment(): """Run softmax experiment.""" print('Running softmax experiment.') taus = [0.01, 0.1, 1] ars, pos = [], [] for tau in taus: ar, po = run_experiment(2000, 1000, tau=tau, alpha=0.1) ars.append ars.append(np.mean(ar, 0)) pos.append(np.mean(po, 0)) # plot the results plt.close('all') f, (ax1, ax2) = plt.subplots(2) for i,tau in enumerate(taus): ax1.plot(ars[i].T, label='$\\tau$ = %.2f' % tau) ax2.plot(pos[i].T, label='$\\tau$ = %.2f' % tau) ax1.legend(loc='lower right') ax1.set_ylabel('Average reward') ax1.set_xlim(xmin=-10) ax2.legend(loc='lower right') ax2.set_xlabel('Plays') ax2.set_ylabel('% Optimal action') ax2.set_xlim(xmin=-20) plt.savefig('softmax_experiment.pdf') plt.show()
def svm_SVR_C( xM, yV, c_l, graph = True): """ SVR is performed iteratively with different C values until all C in the list are used. """ r2_l, sd_l = [], [] for C in c_l: print('sklearn.svm.SVR(C={})'.format( C)) clf = svm.SVR( C = C) clf.fit( xM, yV.A1) yV_pred = clf.predict(xM) r2, sd = regress_show( yV, np.mat( yV_pred).T, graph = graph) for X, x in [[r2_l, r2], [sd_l, sd]]: X.append( x) print('average r2, sd are', np.mean( r2_l), np.mean( sd_l)) if graph: pdw = pd.DataFrame( { 'log10(C)': np.log10(c_l), 'r2': r2_l, 'sd': sd_l}) pdw.plot( x = 'log10(C)') return r2_l, sd_l
inputs = Variable(inputs).cuda() labels = Variable(labels).cuda() #print('input_shape',inputs.shape) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = net(inputs) loss = criterion(outputs, labels) loss.backward() optimizer.step() prediction = outputs.data.max(1)[1] accuracy = ( float( prediction.eq(labels.data).sum() ) /float(batch_size))*100.0 train_accu.append(accuracy) accuracy_epoch = np.mean(train_accu) print(epoch, accuracy_epoch) if (epoch%5==4): correct = 0 total = 0 for data in testloader: inputs, labels = data inputs, labels = Variable(inputs).cuda(), Variable(labels).cuda() outputs = net(inputs) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) #correct += (predicted == labels).sum().item() correct+=predicted.eq(labels.data).sum() print('Accuracy of the network on the 10000 test images: %d %%' % (100.0 * float(correct) / float(total)))
with tf.device('/gpu:0'): n_epochs = 50 N_train = len(q_train) n_batches = N_train // batch_size + 1 for epoch in range(n_epochs): epoch_loss = [] times = 0. indexes = np.arange(N_train) np.random.shuffle(indexes) q_train = q_train[indexes] a_train = a_train[indexes] for idx in range(n_batches): tic = time() if idx%(n_batches//10)==0: print("Epoch %d - %d/%d : loss = %1.4f - time = %1.3fs"%(epoch,idx, n_batches,np.mean(epoch_loss), times/((n_train//10)*batch_size))) times = 0. begin = idx*batch_size end = min((idx+1)*batch_size, N_train) Q, mask, A = get_batch(begin,end,q_train,a_train,batch_size,max_q,Na) _,l,l_s = sess.run([model_outputs['train_op'], model_outputs['loss'], model_outputs['loss_summary']], feed_dict={model_outputs['question']:Q, model_outputs['mask']:mask, model_outputs['answer']:A}) epoch_loss.append(l) writer.add_summary(l_s,idx+epoch*n_batches) times += time() - tic with tf.device('/cpu:0'):
def _SSIMForMultiScale(img1, img2, max_val=255, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03): """Return the Structural Similarity Map between `img1` and `img2`. This function attempts to match the functionality of ssim_index_new.m by Zhou Wang: http://www.cns.nyu.edu/~lcv/ssim/msssim.zip Arguments: img1: Numpy array holding the first RGB image batch. img2: Numpy array holding the second RGB image batch. max_val: the dynamic range of the images (i.e., the difference between the maximum the and minimum allowed values). filter_size: Size of blur kernel to use (will be reduced for small images). filter_sigma: Standard deviation for Gaussian blur kernel (will be reduced for small images). k1: Constant used to maintain stability in the SSIM calculation (0.01 in the original paper). k2: Constant used to maintain stability in the SSIM calculation (0.03 in the original paper). Returns: Pair containing the mean SSIM and contrast sensitivity between `img1` and `img2`. Raises: RuntimeError: If input images don't have the same shape or don't have four dimensions: [batch_size, height, width, depth]. """ if img1.shape != img2.shape: raise RuntimeError( 'Input images must have the same shape (%s vs. %s).', img1.shape, img2.shape) if img1.ndim != 4: raise RuntimeError('Input images must have four dimensions, not %d', img1.ndim) img1 = img1.astype(np.float64) img2 = img2.astype(np.float64) _, height, width, _ = img1.shape # Filter size can't be larger than height or width of images. size = min(filter_size, height, width) # Scale down sigma if a smaller filter size is used. sigma = size * filter_sigma / filter_size if filter_size else 0 if filter_size: window = np.reshape(_FSpecialGauss(size, sigma), (1, size, size, 1)) mu1 = signal.fftconvolve(img1, window, mode='valid') mu2 = signal.fftconvolve(img2, window, mode='valid') sigma11 = signal.fftconvolve(img1 * img1, window, mode='valid') sigma22 = signal.fftconvolve(img2 * img2, window, mode='valid') sigma12 = signal.fftconvolve(img1 * img2, window, mode='valid') else: # Empty blur kernel so no need to convolve. mu1, mu2 = img1, img2 sigma11 = img1 * img1 sigma22 = img2 * img2 sigma12 = img1 * img2 mu11 = mu1 * mu1 mu22 = mu2 * mu2 mu12 = mu1 * mu2 sigma11 -= mu11 sigma22 -= mu22 sigma12 -= mu12 # Calculate intermediate values used by both ssim and cs_map. c1 = (k1 * max_val)**2 c2 = (k2 * max_val)**2 v1 = 2.0 * sigma12 + c2 v2 = sigma11 + sigma22 + c2 ssim = np.mean((((2.0 * mu12 + c1) * v1) / ((mu11 + mu22 + c1) * v2))) cs = np.mean(v1 / v2) return ssim, cs
def plot_collapse(flat_list, gamma_shape, min_rep=10, min_d=4, ax=None, str_leg=None, extrapolate=False, color='r', show_subplots=True): #Definitions interp_points = 1000 if ax is None: plt.figure() else: plt.sca(ax) #Flattens list of reps #flat_list = np.array([item for sublist in shape_list for item in sublist]) flat_list = np.array(flat_list) #List of avalanche sizes shape_size = np.zeros(len(flat_list)) for i in range(len(flat_list)): shape_size[i] = flat_list[i].size max_size = shape_size.max() #Avalanche size count shape_count, _ = np.histogram(shape_size, bins=np.arange(0, max_size + 2)) #Censors data by size censor_d_keep = np.arange(0, max_size + 1) >= min_d censor_rep_keep = shape_count >= min_rep censor_index = np.where( [a and b for a, b in zip(censor_d_keep, censor_rep_keep)])[0] #Defines average size matrix average_shape = np.zeros((censor_index.size, interp_points)) #Defines bottom interpolation range from data, to prevent extrapolation bias #x_min = 1/censor_index[0] if extrapolate is True: x_min = 0 elif extrapolate is False: x_min = 1 / censor_index[0] else: error('extrapolate is not binary.') x_range = np.linspace(x_min, 1, num=interp_points) #Averages shape for each duration and interpolates results y_min = 100 for i in range(len(censor_index)): #Calculates average shape size_i = censor_index[i] avg_shape_i_y = np.mean(flat_list[shape_size == size_i]) / np.power( size_i, gamma_shape - 1) avg_shape_i_x = np.arange(1, size_i + 1) / size_i if np.min(avg_shape_i_y) < y_min: y_min = np.min(avg_shape_i_y) #Interpolates results fx = InterpolatedUnivariateSpline(avg_shape_i_x, avg_shape_i_y) average_shape[i, :] = fx(x_range) #Plots transparent subplots if show_subplots: ax.plot(avg_shape_i_x, avg_shape_i_y, alpha=0.2, color=color) #Plots interpolated average curve if show_subplots: color_collapse = 'k' else: color_collapse = color plot_line, = ax.plot(x_range, np.mean(average_shape, axis=0), color=color_collapse, linewidth=2, label=str_leg) ax.legend([plot_line], [str_leg]) plt.legend() #Beautifies plot ax.set_xlabel('Scaled time') ax.set_ylabel('Scaled activity') plt.xlim([0, 1])
def run_analysis(data, newFig=True, label='Data', color='k'): #Sets up figure if newFig is True: fig = plt.figure(figsize=(18, 12)) gs = fig.add_gridspec(2, 2) ax_pS = fig.add_subplot(gs[0, 0]) ax_pD = fig.add_subplot(gs[0, 1]) ax_avgS = fig.add_subplot(gs[1, 0]) ax_shape = fig.add_subplot(gs[1, 1]) else: fig = plt.gcf() if len(fig.get_axes()) != 4: ValueError('Current figure does not have a 2x2 layout.') ax_pS, ax_pD, ax_avgS, ax_shape = fig.get_axes() #Analyzes avalanches avalanches = get_avalanches(data) S_list = [avalanches[i]['S'] for i in avalanches.keys()] D_list = [avalanches[i]['D'] for i in avalanches.keys()] shape_list = [avalanches[i]['shape'] for i in avalanches.keys()] #Calculates S_avg S_avg = np.zeros((np.max(D_list), 3)) for i in range(np.max(D_list)): S_avg[i, 0] = i + 1 S_D = [ avalanches[j]['S'] for j in avalanches.keys() if avalanches[j]['D'] == i + 1 ] S_avg[i, 1] = np.mean(S_D) S_avg[i, 2] = np.std(S_D) #Plots p(S) fit_pS = powerlaw.Fit(S_list, xmin=1) str_label = label + r': $\alpha$ = {:0.3f}'.format(fit_pS.power_law.alpha) fit_pS.plot_pdf(ax=ax_pS, color=color, **{'label': str_label}) fit_pS.power_law.plot_pdf(ax=ax_pS, color='k', linestyle='--') #Plots p(D) fit_pD = powerlaw.Fit(D_list, xmin=1) str_label = label + r': $\beta$ = {:0.3f}'.format(fit_pD.power_law.alpha) fit_pD.plot_pdf(ax=ax_pD, color=color, **{'label': str_label}) fit_pD.power_law.plot_pdf(ax=ax_pD, color='k', linestyle='--') #Plots <S>(D) fit_gamma, _, _ = fit_powerlaw(S_avg[:, 0], S_avg[:, 1], S_avg[:, 2], loglog=True) str_label = label + r': $\gamma$ = {:0.3f}'.format(fit_gamma) ax_avgS.plot(S_avg[:, 0], S_avg[:, 1], label=str_label, color=color) ax_avgS.plot(S_avg[:, 0], np.power(S_avg[:, 0], fit_gamma), color='k', linestyle='--') #Fits and plots the average avalanche shape fit_gamma_shape = fit_collapse(shape_list, 4, 20, extrapolate=True) str_leg = label + r': $\gamma_s$ = {:0.2f}'.format(fit_gamma_shape) plot_collapse(shape_list, fit_gamma_shape, 4, 20, ax_shape, str_leg, True, color, show_subplots=False) print('== Exponents for {:s} =='.format(label)) print('alpha = {:0.3f}'.format(fit_pS.power_law.alpha)) print('beta = {:0.3f}'.format(fit_pD.power_law.alpha)) print('gamma_scaling = {:0.3f}'.format( (fit_pD.power_law.alpha - 1) / (fit_pS.power_law.alpha - 1))) print('gamma = {:0.3f}'.format(fit_gamma)) print('gamma_shape = {:0.3f}'.format(fit_gamma_shape)) #Beautifies plots plt.sca(ax_pS) plt.legend(loc='upper right') plt.xlabel('S') plt.ylabel('p(S)') plt.sca(ax_pD) plt.legend(loc='upper right') plt.xlabel('D') plt.ylabel('p(D)') plt.sca(ax_avgS) plt.legend(loc='upper left') plt.xlabel('D') plt.ylabel(r'$\langle S \rangle$ (D)') plt.xlim([1, 1e3]) plt.ylim([1, 1e5]) ax_avgS.set_xscale('log') ax_avgS.set_yscale('log')
def fit_collapse(flat_list, min_d, min_rep, extrapolate=False): #Definitions interp_points = 1000 gamma_x0 = 0.5 opt_bounds = (-1, 5) #Flattens list of reps #flat_list = np.array([item for sublist in shape_list for item in sublist]) flat_list = np.array(flat_list) #List of avalanche sizes shape_size = np.zeros(len(flat_list)) for i in range(len(flat_list)): shape_size[i] = flat_list[i].size max_size = shape_size.max() #Avalanche size count shape_count, _ = np.histogram(shape_size, bins=np.arange(0, max_size + 2)) #Censors data by size censor_d_keep = np.arange(0, max_size + 1) >= min_d censor_rep_keep = shape_count >= min_rep censor_index = np.where( [a and b for a, b in zip(censor_d_keep, censor_rep_keep)])[0] #Defines average size matrix average_shape = np.zeros((censor_index.size, interp_points)) #Defines bottom interpolation range from data, to prevent extrapolation bias if extrapolate is True: x_min = 0 elif extrapolate is False: x_min = 1 / censor_index[0] else: error('extrapolate is not binary.') x_range = np.linspace(x_min, 1, num=interp_points) #Averages shape for each duration and interpolates results for i in range(len(censor_index)): #Calculates average shape size_i = censor_index[i] avg_shape_i_y = np.mean(flat_list[shape_size == size_i]) avg_shape_i_x = np.arange(1, size_i + 1) / size_i #Interpolates results fx = InterpolatedUnivariateSpline(avg_shape_i_x, avg_shape_i_y) average_shape[i, :] = fx(x_range) #Error function for optimization def _error(gamma_shape, *params): average_shape, censor_index = params shape_scaled = np.zeros((censor_index.size, interp_points)) for i in range(censor_index.size): shape_scaled[i, :] = average_shape[i, :] / np.power( censor_index[i], gamma_shape) err = np.mean(np.var(shape_scaled, axis=0)) / np.power( (np.max(np.max(shape_scaled)) - np.min(np.min(shape_scaled))), 2) return err #Minimizes error minimize_obj = minimize(_error, x0=[gamma_x0], args=(average_shape, censor_index), bounds=[opt_bounds]) return minimize_obj.x[0] + 1
session.run(tf.global_variables_initializer()) for iteration in xrange(ITERS): start_time = time.time() _input_noise = np.random.normal(size=(BATCH_SIZE, NOISE_DIM)) _dis_cost = [] for i in xrange(CRITIC_ITERS): _data = inf_train_gen().next() _dis_cost_, _ = session.run([dis_cost, dis_train_op], feed_dict={real_data: _data, input_noise: _input_noise}) _dis_cost.append(_dis_cost_) if clip_dis_weights: _ = session.run(clip_dis_weights) _dis_cost = np.mean(_dis_cost) _ = session.run(gen_train_op, feed_dict={input_noise: _input_noise}) _inv_cost, _ = session.run([inv_cost, inv_train_op], feed_dict={input_noise: _input_noise}) lib.plot.plot('train discriminator cost', _dis_cost) lib.plot.plot('train invertor cost', _inv_cost) lib.plot.plot('time', time.time() - start_time) if iteration % 1000 == 999: test_dis_costs = [] for test_instances, _ in gen(X_test, y_test): _test_dis_cost = session.run(dis_cost, feed_dict={real_data: test_instances, input_noise: _input_noise})
def isMaxWhite(plate): avg = np.mean(plate) if(avg>=115): return True else: return False
results = cross_validate(neural_network, clinicalInput, clinicalOutput, cv=10, scoring=("accuracy", "f1", "recall", "precision")) import matplotlib.pyplot as plt plt.plot(results["test_accuracy"], color="c") plt.plot(results["test_f1"], color="m") plt.plot(results["test_recall"], color="y") plt.plot(results["test_precision"], color="k") plt.title("Model Information (RNN)") plt.ylabel("Model Performance") plt.xlabel("Number of Folds") plt.legend(["Accuracy", "F1-Score", "Recall", "Precision"], loc="lower right") plt.show() #Determine the prediction y_pred = cross_val_predict(neural_network, clinicalInput, clinicalOutput, cv=10) #Provide AUC score from sklearn.metrics import roc_auc_score print("Accuracy result: ", np.mean(results["test_accuracy"])) print("Recall result: ", np.mean(results["test_recall"])) print("Precision result: ", np.mean(results["test_precision"])) print("F1 result: ", np.mean(results["test_f1"])) print("ROC: ", roc_auc_score(clinicalOutput, y_pred))
def run_test(title, test, setup, repeats=10): print '{:>60}'.format(title+':'), x = timeit.Timer(test, setup=setup).repeat(repeats,1) print '{:>9.3f} {:>9.3f} {:>9.3f} {:9d}'.format(min(x), np.mean(x), max(x), repeats)
# For computing reasons I'm limiting the dataframe length to 15,000 users merged_df=merged_df[['user_id', 'name', 'user_rating']] merged_subdf= merged_df[merged_df.user_id <= 15000] merged_subdf.head() #Create a matrix of Users vs Animes wih User Ratings as the values piv_table = merged_subdf.pivot_table(index=['user_id'], columns=['name'], values='user_rating') print(piv_table.shape) piv_table.head() # Standardization is being done here. # All users with only one rating or who had rated everything the same will be dropped # Normalize the values norm_piv_table = piv_table.apply(lambda x: (x-np.mean(x))/(np.max(x)-np.min(x)), axis=1) # Drop all columns containing only zeros. These represent users who did not rate norm_piv_table.fillna(0, inplace=True) norm_piv_table = norm_piv_table.T norm_piv_table = norm_piv_table.loc[:, (norm_piv_table != 0).any(axis=0)] # Our data needs to be in a sparse matrix format to be read by the following functions sparse_matrix = sp.sparse.csr_matrix(norm_piv_table.values) #Calculate item-item similarity and user-user similarity item_similarity = cosine_similarity(sparse_matrix) user_similarity = cosine_similarity(sparse_matrix.T) # Convert the similarity matrices into dataframes item_similarity_df = pd.DataFrame(item_similarity, index = norm_piv_table.index, columns = norm_piv_table.index)
def rms_flat(a): # Return the root mean square of all the elements of *a*, flattened out. return numpy.sqrt(numpy.mean(numpy.absolute(a)**2))
def download_one_rib_before_unix(self, my_date, unix): # my_date for deciding month tmp_month = my_date[0:4] + '.' + my_date[4:6] if self.co.startswith('rrc'): web_location = rrc_root + self.co + '/' + tmp_month + '/' else: web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/' web_location = web_location.replace('//', '/') try: webraw = cmlib.get_weblist('http://' + web_location) print 'Getting list from ' + 'http://' + web_location except: return -1 cmlib.make_dir(datadir + web_location) #---------------------------------------------------------------- # select a RIB file right before the unix and with reasonable (not strange) file size rib_list = webraw.split('\n') filter(lambda a: a != '', rib_list) filter(lambda a: a != '\n', rib_list) rib_list = [ item for item in rib_list if 'rib' in item or 'bview' in item ] sizelist = list() for line in rib_list: size = line.split()[-1] fsize = cmlib.parse_size(size) sizelist.append(fsize) avg = np.mean(sizelist) ok_rib_list = list() # RIBs whose size is OK for line in rib_list: fsize = cmlib.parse_size(line.split()[-1]) if fsize > 0.9 * avg: ok_rib_list.append(line) target_line = None # the RIB closest to unix min = 9999999999 for line in ok_rib_list: fdate = line.split()[0].split('.')[-3] ftime = line.split()[0].split('.')[-2] dtstr = fdate + ftime objdt = datetime.datetime.strptime(dtstr, '%Y%m%d%H%M') runix = time_lib.mktime( objdt.timetuple()) + 8 * 60 * 60 # F**k! Time zone! print objdt, runix, unix if runix <= unix and unix - runix < min: min = unix - runix print 'min changed to ', min target_line = line print 'Selected RIB:', target_line if target_line == None: return -1 size = target_line.split()[-1] # claimed RIB file size fsize = cmlib.parse_size(size) filename = target_line.split()[0] full_loc = datadir + web_location + filename # .bz2/.gz if os.path.exists(full_loc + '.txt'): # only for clearer logic os.remove(full_loc + '.txt') #------------------------------------------------------------------ # Download the RIB if os.path.exists(full_loc + '.txt.gz'): print 'existed!!!!!!!!!!!!' return full_loc + '.txt.gz' # Do not download if os.path.exists(full_loc): cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize) cmlib.pack_gz(full_loc + '.txt') return full_loc + '.txt.gz' cmlib.force_download_file('http://' + web_location, datadir + web_location, filename) cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize) cmlib.pack_gz(full_loc + '.txt') os.remove(full_loc) # remove the original file return full_loc + '.txt.gz'
def test_objective(x): """ An ackley defined on a low D space. """ xs = x.reshape([1, x.shape[0]]) z = model(tf.cast(xs, tf.float32)).numpy().reshape(R) # Reshape according to the extent of the low D points. return (myackley(z)) # Entropy max initial design N = N_init design = neural_maxent(N, P, L, H, R, net_weights=model.get_weights())['design'] response_us = np.apply_along_axis(test_objective, 1, design) y_mu = np.mean(response_us) y_sig = np.std(response_us) response = (response_us - y_mu) / y_sig design, response, explored = seq_design(design=design, response=response, model=model, objective=test_objective, seq_steps=seq_steps, explore_starts=explore_starts, verbose=True) design_tf = tf.Variable(design) ## Contour plot delta = 0.025 x = np.arange(extent[0], extent[1], delta)
# # print("") # # print(negatives_CD_costs[i]) # # exit() # ##Append positives # idx_selected = np.where(np.isin(neg_candidate_idxs[i], np.array(positives)))[0] # [positives_CD_costs[i].append(positive_costs[idx_selected[x]]) for x in range(idx_selected.shape[0])] # ######################################################## num_pos = np.array(num_pos) num_neg = np.array(num_neg) log_string(str(len(positives_idx))) log_string("Num models no positives: "+str(num_models_no_positives)) log_string("Num models no negatives: "+str(num_models_no_negatives)) log_string("Average number of positives: "+str(np.mean(num_pos))) log_string("Average number of negatives: "+str(np.mean(num_neg))) # dict_value = {"positives_cost": positives_CD_costs, "negatives_cost":negatives_CD_costs} # filename = 'arap_tripletv4_' + DATA_SPLIT + '_'+OBJ_CAT+'_cost.pickle' dict_value = {"positives": positives_idx, "negatives":negatives_idx} filename = 'arap_triplet_' + DATA_SPLIT + '_'+OBJ_CAT+'.pickle' log_string("Filename: "+filename) with open(filename, 'w') as handle: pickle.dump(dict_value, handle, protocol=pickle.HIGHEST_PROTOCOL) LOG_FOUT.close()
def download_one_rib(self, my_date): tmp_month = my_date[0:4] + '.' + my_date[4:6] if self.co.startswith('rrc'): web_location = rrc_root + self.co + '/' + tmp_month + '/' else: web_location = rv_root + self.co + '/bgpdata/' + tmp_month + '/RIBS/' web_location = web_location.replace('//', '/') webraw = cmlib.get_weblist('http://' + web_location) cmlib.make_dir(datadir + web_location) #---------------------------------------------------------------- # select a RIB file with reasonable (not strange) file size rib_list = webraw.split('\n') filter(lambda a: a != '', rib_list) filter(lambda a: a != '\n', rib_list) rib_list = [ item for item in rib_list if 'rib' in item or 'bview' in item ] sizelist = list() for line in rib_list: size = line.split()[-1] fsize = cmlib.parse_size(size) sizelist.append(fsize) avg = np.mean(sizelist) target_line = None # stores the RIB file for downloading largest_line = None max = -1 closest = 99999 for line in rib_list: fdate = line.split()[0].split('.')[-3] size = line.split()[-1] fsize = cmlib.parse_size(size) if fsize > max: max = fsize largest_line = line diff = abs(int(fdate) - int(my_date)) # >0 # XXX logic here not clear (but seems effective) if diff <= closest and fsize > 0.9 * avg and fsize < 1.1 * avg: target_line = line closest = diff if target_line is None: assert largest_line is not None print 'Failed. Resort to downloading the largest RIB...' target_line = largest_line # work-around for a special case print 'Selected RIB:', target_line size = target_line.split()[-1] # claimed RIB file size fsize = cmlib.parse_size(size) filename = target_line.split()[0] full_loc = datadir + web_location + filename # .bz2/.gz if os.path.exists(full_loc + '.txt'): # only for clearer logic os.remove(full_loc + '.txt') #------------------------------------------------------------------ # Download the RIB if os.path.exists(full_loc + '.txt.gz'): print 'existed size & original size:', os.path.getsize( full_loc + '.txt.gz'), fsize if os.path.getsize(full_loc + '.txt.gz') > 0.6 * fsize: # 0.6 is good enough return full_loc + '.txt.gz' # Do not download else: os.remove(full_loc + '.txt.gz') # too small to be complete if os.path.exists(full_loc): if os.path.getsize(full_loc) <= 0.95 * fsize: os.remove(full_loc) else: # Good! cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize) cmlib.pack_gz(full_loc + '.txt') return full_loc + '.txt.gz' cmlib.force_download_file('http://' + web_location, datadir + web_location, filename) cmlib.parse_mrt(full_loc, full_loc + '.txt', fsize) cmlib.pack_gz(full_loc + '.txt') os.remove(full_loc) # remove the original file return full_loc + '.txt.gz'
def getPvals(self, geneNames, num_cores=1): """Get p-Values for the the list of genes, one for each column in the ratios matrix Keyword arguments: geneNames -- A list of genes in the cluster singleCore -- Set to True to use a single core. False may not work. """ pVals = {} relGenes = list(set(geneNames) & set(self.ratios.row_names)) curGeneMatrix = self.ratios.submatrix_by_rows( self.ratios.row_indexes_for(relGenes)) noVarNs = [] #These three matrices should have a matched order noVarRats = [] #It would be better to have a single list noVarCns = [] #With 3 named elements in each list item for cn in self.ratios.column_names: colIdx = curGeneMatrix.column_indexes_for(column_names=[cn]) geneVect = curGeneMatrix.column_values(column=colIdx) geneVect = [x for x in geneVect if not math.isnan(x)] n = len(geneVect) if not self.allVars.get(cn, False): self.allVars[cn] = {} #For loop: efficiently use multicore by precalculating additional numbers of genes i_s = [n] if num_cores > 1: i_s = [n - 3, n - 2, n - 1, n, n + 1, n + 2, n + 3] for i in i_s: if not self.allVars[cn].get(str(i), False) and i >= 0: ratioVect = self.ratios.column_values( column=self.ratios.column_indexes_for( column_names=[cn])) noVarNs.append(i) noVarRats.append(ratioVect.tolist()) noVarCns.append(cn) # 2) Use a pool of workers to calculate a distribution for each of the tuples if len(noVarNs) > 0: logging.info("Calculating some backgrounds for about %d genes", len(geneNames)) if self.useChi2: logging.info("\tFitting variance samples to Chi2 distribution") if num_cores > 1: newargs = [] for i in range(0, len(noVarNs)): newargs.append([ noVarRats[i], noVarNs[i], self.tolerance, self.maxTime, self.chunkSize, self.verbose, noVarCns[i] ]) pool = mp.Pool(num_cores) newVars = pool.map(getVarianceMeanSDvect_mp_wrapper, newargs) pool.close() pool.join() else: tolerance = np.repeat(self.tolerance, len(noVarNs)).tolist() maxTime = np.repeat(self.maxTime, len(noVarNs)).tolist() chunkSize = np.repeat(self.chunkSize, len(noVarNs)).tolist() verbose = np.repeat(self.verbose, len(noVarNs)).tolist() newVars = list( map(getVarianceMeanSDvect, noVarRats, noVarNs, tolerance, maxTime, chunkSize, verbose, noVarCns)) # 3) Assign the new values into the empty slots for idx in range(0, len(noVarCns)): cn = noVarCns[idx] curN = str(noVarNs[idx]) if self.useChi2: curVars = newVars[idx] self.allVars[cn][curN] = sp.stats.chi2.fit(curVars, df=int(curN)) else: self.allVars[cn][curN] = newVars[idx] # 4) Calculate the p-Values pVals = {} for cn in self.ratios.column_names: colIdx = curGeneMatrix.column_indexes_for(column_names=[cn]) geneVect = curGeneMatrix.column_values(column=colIdx) geneVect = [x for x in geneVect if not math.isnan(x)] n = str(len(geneVect)) if len(geneVect) <= 1 or np.any(np.isnan( self.allVars[cn][str(n)])): pVals[cn] = 1 else: curVar = np.var(geneVect) if self.useChi2: [df, loc, scale] = self.allVars[cn][str(n)] pVals[cn] = 1 - sp.stats.chi2.sf( curVar, df=df, loc=loc, scale=scale) else: pVals[cn] = np.mean(self.allVars[cn][str(n)] < curVar) return pVals
def make_side_view(self, axis_name): scene = getattr(self, 'scene_%s' % axis_name) scene.scene.parallel_projection = True ipw_3d = getattr(self, 'ipw_3d_%s' % axis_name) # We create the image_plane_widgets in the side view using a # VTK dataset pointing to the data on the corresponding # image_plane_widget in the 3D view (it is returned by # ipw_3d._get_reslice_output()) side_src = ipw_3d.ipw._get_reslice_output() ipw = mlab.pipeline.image_plane_widget( side_src, plane_orientation='z_axes', vmin=self.data.min(), vmax=self.data.max(), figure=scene.mayavi_scene, name='Cut view %s' % axis_name, ) setattr(self, 'ipw_%s' % axis_name, ipw) # Extract the spacing of the side_src to convert coordinates # into indices spacing = side_src.spacing # Make left-clicking create a crosshair ipw.ipw.left_button_action = 0 x, y, z = self.position cursor = mlab.points3d( x, y, z, mode='axes', color=(0, 0, 0), scale_factor=2 * max(self.data.shape), figure=scene.mayavi_scene, name='Cursor view %s' % axis_name, ) self.cursors[axis_name] = cursor # Add a callback on the image plane widget interaction to # move the others this_axis_number = self._axis_names[axis_name] def move_view(obj, evt): # Disable rendering on all scene position = list(obj.GetCurrentCursorPosition() * spacing)[:2] position.insert(this_axis_number, self.position[this_axis_number]) # We need to special case y, as the view has been rotated. if axis_name is 'y': position = position[::-1] self.position = position ipw.ipw.add_observer('InteractionEvent', move_view) ipw.ipw.add_observer('StartInteractionEvent', move_view) # Center the image plane widget ipw.ipw.slice_position = 0.5 * self.data.shape[ self._axis_names[axis_name]] # 2D interaction: only pan and zoom scene.scene.interactor.interactor_style = \ tvtk.InteractorStyleImage() scene.scene.background = (0, 0, 0) # Some text: mlab.text(0.01, 0.8, axis_name, width=0.08) # Choose a view that makes sens views = dict(x=(0, 0), y=(90, 180), z=(0, 0)) mlab.view(views[axis_name][0], views[axis_name][1], focalpoint=0.5 * np.array(self.data.shape), figure=scene.mayavi_scene) scene.scene.camera.parallel_scale = 0.52 * np.mean(self.data.shape)
# Compute output test_hidden, test_output = net(test_secret, test_cover) # Calculate loss test_loss, loss_cover, loss_secret = customized_loss(test_output, test_hidden, test_secret, test_cover, beta) # diff_S, diff_C = np.abs(np.array(test_output.data[0]) - np.array(test_secret.data[0])), np.abs(np.array(test_hidden.data[0]) - np.array(test_cover.data[0])) # print (diff_S, diff_C) if idx in [1,2,3,4]: print ('Total loss: {:.2f} \nLoss on secret: {:.2f} \nLoss on cover: {:.2f}'.format(test_loss.data, loss_secret.data, loss_cover.data)) # Creates img tensor print("11111111111111") imgs = [test_secret.data, test_output.data, test_cover.data, test_hidden.data] print("22222222222222") imgs_tsor = torch.cat(imgs, 0) print("33333333333333") # Prints Images imshow(utils.make_grid(imgs_tsor), idx+1, learning_rate=learning_rate, beta=beta) print("444444444444444") test_losses.append(test_loss.data) mean_test_loss = np.mean(test_losses) print ('Average loss on test set: {:.2f}'.format(mean_test_loss))
def getVarianceMeanSDvect(ratioVect, n, tolerance=0.01, maxTime=600, chunkSize=200, verbose=False, expName=None): """Given a ratios matrix and a number of genes, figure out the expected distribution of variances Will sample background until the mean and sd converge or the operation times out Will return a list of variances to be used for statistical tests, or return nan if only nan values in ratioVect Keyword arguments: ratioVect -- A a vector of ratios n -- The number of genes to sample tolerance -- The fraction tolance to use as a stopping condition (DEFAULT: 0.01) maxTime -- The approximate maximum time to run in seconds (DEFAULT: 600) chunkSize -- The number of samples to add between test (DEFAULT: 200) verbose -- Set to false to suppress output (DEFAULT: False) expName -- Set to echo this name if verbose = True (DEFAULT: None) Useage: varDist = getVarianceMeanSD(ratioVect, n) """ ratioVect = [x for x in ratioVect if not math.isnan(x)] if verbose: logging.info("Calculating background for %d sampled from %d in %s", n, len(ratioVect), expName) if n <= 1 or n > len(ratioVect): return [np.nan] varList = [] repeat = True startTime = dt.datetime.now() while repeat: newVars = [] for i in range(0, chunkSize): curSample = random.sample(ratioVect, n) try: newVar = np.var(curSample) except: newVar = 0 newVars.append(newVar) if len(varList) > 0: #True if past the first sample oldMean = np.mean(varList) oldVar = np.var(varList) varList = varList + newVars newMean = np.mean(varList) newVar = np.var(varList) meanWinTol = abs(newMean - oldMean) < tolerance * abs(oldMean) varWinTol = abs(oldVar - newVar) < tolerance * abs(oldVar) if meanWinTol and varWinTol: repeat = False else: varList = varList + newVars curTime = dt.datetime.now() if (curTime - startTime).seconds > maxTime: repeat = False return varList
fname = sic_dir + "sic_1992_2019.nc" f6 = Nio.open_file(fname) sic = f6.variables["sic"][:, :, :] del (fname) fname = v_ocn_dir + "v200_ocn_1992_2019.nc" f3 = Nio.open_file(fname) v_ocn = f3.variables["v_ocn"][:, :, :] del (fname) fname = u_atm_dir + "u_atm_1992_2019.nc" f4 = Nio.open_file(fname) u_atm = f4.variables["u_atm"][:, :, :] del (fname) u_atm_clim = np.mean(u_atm, axis=0) fname = v_atm_dir + "v_atm_1992_2019.nc" f5 = Nio.open_file(fname) v_atm = f5.variables["v_atm"][:, :, :] del (fname) v_atm_clim = np.mean(v_atm, axis=0) # Pick the desired time period u_ocn = u_ocn[0:3286, :, :] v_ocn = v_ocn[0:3286, :, :] u_atm = u_atm[0:3286, :, :] v_atm = v_atm[0:3286, :, :] sst = sst[0:3286, :, :] sic = sic[0:3286, :, :] time = time[0:3286]
def create_output_images(Rover): # Create a scaled map for plotting and clean up obs/nav pixels a bit if np.max(Rover.worldmap[:,:,2]) > 0: nav_pix = Rover.worldmap[:,:,2] > 0 navigable = Rover.worldmap[:,:,2] * (255 / np.mean(Rover.worldmap[nav_pix, 2])) else: navigable = Rover.worldmap[:,:,2] if np.max(Rover.worldmap[:,:,0]) > 0: obs_pix = Rover.worldmap[:,:,0] > 0 obstacle = Rover.worldmap[:,:,0] * (255 / np.mean(Rover.worldmap[obs_pix, 0])) else: obstacle = Rover.worldmap[:,:,0] likely_nav = navigable >= obstacle obstacle[likely_nav] = 0 plotmap = np.zeros_like(Rover.worldmap) plotmap[:, :, 0] = obstacle plotmap[:, :, 2] = navigable plotmap = plotmap.clip(0, 255) # Overlay obstacle and navigable terrain map with ground truth map map_add = cv2.addWeighted(plotmap, 1, Rover.ground_truth, 0.5, 0) # Check whether any rock detections are present in worldmap rock_world_pos = Rover.worldmap[:,:,1].nonzero() # If there are, we'll step through the known sample positions # to confirm whether detections are real samples_located = 0 if rock_world_pos[0].any(): rock_size = 2 for idx in range(len(Rover.samples_pos[0])): test_rock_x = Rover.samples_pos[0][idx] test_rock_y = Rover.samples_pos[1][idx] rock_sample_dists = np.sqrt((test_rock_x - rock_world_pos[1])**2 + \ (test_rock_y - rock_world_pos[0])**2) # If rocks were detected within 3 meters of known sample positions # consider it a success and plot the location of the known # sample on the map if np.min(rock_sample_dists) < 3: samples_located += 1 map_add[test_rock_y-rock_size:test_rock_y+rock_size, test_rock_x-rock_size:test_rock_x+rock_size, :] = 255 # Calculate some statistics on the map results # First get the total number of pixels in the navigable terrain map tot_nav_pix = np.float(len((plotmap[:,:,2].nonzero()[0]))) # Next figure out how many of those correspond to ground truth pixels good_nav_pix = np.float(len(((plotmap[:,:,2] > 0) & (Rover.ground_truth[:,:,1] > 0)).nonzero()[0])) # Next find how many do not correspond to ground truth pixels bad_nav_pix = np.float(len(((plotmap[:,:,2] > 0) & (Rover.ground_truth[:,:,1] == 0)).nonzero()[0])) # Grab the total number of map pixels tot_map_pix = np.float(len((Rover.ground_truth[:,:,1].nonzero()[0]))) # Calculate the percentage of ground truth map that has been successfully found perc_mapped = round(100*good_nav_pix/tot_map_pix, 1) # Calculate the number of good map pixel detections divided by total pixels # found to be navigable terrain if tot_nav_pix > 0: fidelity = round(100*good_nav_pix/(tot_nav_pix), 1) else: fidelity = 0 # Flip the map for plotting so that the y-axis points upward in the display map_add = np.flipud(map_add).astype(np.float32) # Add some text about map and rock sample detection results cv2.putText(map_add,"Time: "+str(np.round(Rover.total_time, 1))+' s', (0, 10), cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1) cv2.putText(map_add,"Mapped: "+str(perc_mapped)+'%', (0, 25), cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1) cv2.putText(map_add,"Mode: " + Rover.mode, (0,150),cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1) cv2.putText(map_add,"Fidelity: "+str(fidelity)+'%', (0, 40), cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1) cv2.putText(map_add,"Rocks", (0, 55), cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1) cv2.putText(map_add," Located: "+str(samples_located), (0, 70), cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1) cv2.putText(map_add," Collected: "+str(Rover.samples_collected), (0, 85), cv2.FONT_HERSHEY_COMPLEX, 0.4, (255, 255, 255), 1) # Convert map and vision image to base64 strings for sending to server pil_img = Image.fromarray(map_add.astype(np.uint8)) buff = BytesIO() pil_img.save(buff, format="JPEG") encoded_string1 = base64.b64encode(buff.getvalue()).decode("utf-8") pil_img = Image.fromarray(Rover.vision_image.astype(np.uint8)) buff = BytesIO() pil_img.save(buff, format="JPEG") encoded_string2 = base64.b64encode(buff.getvalue()).decode("utf-8") return encoded_string1, encoded_string2
from sklearn.metrics import log_loss import lightgbm as lgb import pandas as pd from sklearn.model_selection import train_test_split import numpy as np train = pd.read_csv('../titanic/train.csv') test = pd.read_csv('../titanic/test.csv') sub = pd.read_csv('../titanic/gender_submission.csv') data = pd.concat([train, test], sort=False) data['Sex'].replace(['male', 'female'], [0, 1], inplace=True) data['Embarked'].fillna('S', inplace=True) data['Embarked'] = data['Embarked'].map({'S': 0, 'C': 1, 'Q': 2}).astype(int) data['Fare'].fillna(np.mean(data['Fare']), inplace=True) age_avg = data['Age'].mean() age_std = data['Age'].std() data['Age'].fillna(np.random.randint(age_avg - age_std, age_avg + age_std), inplace=True) delete_columns = ['Name', 'PassengerId', 'SibSp', 'Parch', 'Ticket', 'Cabin'] data.drop(delete_columns, axis=1, inplace=True) print(data.head()) train = data[:len(train)] test = data[len(train):] X_train = train.drop('Survived', axis=1) X_test = test.drop('Survived', axis=1) y_train = train['Survived']
def MSRCR(self, image): """ MSRCR (Multi-Scale Retinex with Color Restoration) is a retinex based algorithm that uses logarithmic compression and spatial convolution. MSRCR combines the dynamic range compression and color constancy of the MSR with a color 'restoration' filter that provides excellent color rendition. where: - image: array of image output: - image_out: array of image treated """ self.message.toprint('IMAGE_APPLY_MSRCR') image_original = np.float32(cv2.cvtColor(image, cv2.COLOR_BGR2RGB)) # Distributes scale interactions max_scale = self.settings.RETINEX_MAX_SCALE nr_scale = self.settings.RETINEX_NR_SCALE scales = ScalesDistribution.apply(max_scale, nr_scale) # new image with zero channels image_blur = np.zeros(shape=[ len(scales), image_original.shape[0], image_original.shape[1], image_original.shape[2] ]) # new image with zero channels image_mlog = np.zeros(shape=[ len(scales), image_original.shape[0], image_original.shape[1], image_original.shape[2] ]) # Do for each channel for channel in range(3): # Do for each scale distributed for scale_count, scale in enumerate(scales): # If sigma==0, it will be automatically calculated based on scale image_blur[scale_count, :, :, channel] = cv2.GaussianBlur( image_original[:, :, channel], (0, 0), scale) image_mlog[scale_count, :, :, channel] = np.log( image_original[:, :, channel] + 1.) - np.log(image_blur[scale_count, :, :, channel] + 1.) image_retinex = np.mean(image_mlog, 0) alpha = self.settings.RETINEX_ALPHA gain = self.settings.RETINEX_GAIN offset = self.settings.RETINEX_OFFSET image_retinex = ColorRestoration.apply(image_original=image_original, image_retinex=image_retinex, alpha=alpha, gain=gain, offset=offset) # Average color image retinex whith restoration image_mean = np.mean(image_retinex) # Standard deviation image retinex whith color restoration image_std = np.std(image_retinex) # Tansmission Map # The processing consist of to apply, using the "Transmission Map" average and # standard-deviation, a transformation of the type: # * newT = (oldT-mini) / (maxi-mini) # * with mini = average - k * standard-deviation # * with maxi = average + k * standard-deviation # where k = retinex_dynamic = contrast (variance): is decisive in the image rendering: # * low values will increase the seeming contrast, # * hight values will make the image more natural with less artefacts and hazes. k = self.settings.RETINEX_DYNAMIC image_mini = image_mean - k * image_std image_maxi = image_mean + k * image_std image_maxi_mini = image_maxi - image_mini image_oldT_mini = image_retinex - image_mini image_out = np.uint8( np.clip(image_oldT_mini / image_maxi_mini * 255, 0, 255)) image_out = cv2.cvtColor(image_out, cv2.COLOR_RGB2BGR) self.message.toprint('IMAGE_APPLIED_MSRCR') return image_out
import numpy as np import sys ##MG = ['F5','F6'] MG = [] box = str(sys.argv[1]) dir_1 = str(sys.argv[2]) #; dir_2 = '/home/jarmijo/HOD_galaxies/' dir_2 = str(sys.argv[3]) # print '\ndirectory:' + dir_1 + 'and' + dir_2 + '\n' mark = ['0.1'] #chi_M = np.zeros((len(mark),len(MG)),dtype=float) for p in range(len(mark)): m = np.loadtxt(dir_1 + 'marks/GR_mark_p' + mark[p] + '_' + box + '.txt') mean_m_GR = np.mean(m) print "mean mark (p = " + mark[p] + ") in GR is:" + str(mean_m_GR) + "\n" mean_m_GR2 = np.mean( np.loadtxt(dir_2 + 'marks/GR_mark_p' + mark[p] + '_' + box + '.txt')) DD_GR = np.loadtxt(dir_1 + 'pairs/D1D2_JK64_GR_' + box + '.txt') mm_GR = np.loadtxt(dir_1 + 'pairs/m1m2_JK64_GR_mark_p' + mark[p] + '_' + box + '.txt') dGR = mm_GR / (DD_GR * mean_m_GR * mean_m_GR2) GR_mean = np.mean(dGR, axis=0) n = len(dGR) N = len(GR_mean) C = np.zeros((N, N)) for i in range(N): for j in range(N): for k in range(n): C[i][j] += (n - 1) / float(n) * (dGR[k][i] - GR_mean[i]) * (
def compute_mean_d(self): return np.mean([self.compute_d(i) for i in self.x_list])
def train_model_regression(X, X_test, y, params, folds, model_type='lgb', eval_metric='mae', columns=None, plot_feature_importance=False, model=None, verbose=10000, early_stopping_rounds=200, n_estimators=50000, mol_type=-1, fold_group=None, skip_folds=None, phase_mark="", skipped_mark=[]): """ A function to train a variety of regression models. Returns dictionary with oof predictions, test predictions, scores and, if necessary, feature importances. :params: X - training data, can be pd.DataFrame or np.ndarray (after normalizing) :params: X_test - test data, can be pd.DataFrame or np.ndarray (after normalizing) :params: y - target :params: folds - folds to split data :params: model_type - type of model to use :params: eval_metric - metric to use :params: columns - columns to use. If None - use all columns :params: plot_feature_importance - whether to plot feature importance of LGB :params: model - sklearn model, works only for "sklearn" model type """ assert isinstance(skip_folds, list) or skip_folds is None print(f"skip_folds :{skip_folds}") columns = X.columns if columns is None else columns X_test = X_test[columns] # to set up scoring parameters metrics_dict = { 'mae': { 'lgb_metric_name': 'mae', 'catboost_metric_name': 'MAE', 'sklearn_scoring_function': metrics.mean_absolute_error }, 'group_mae': { 'lgb_metric_name': 'mae', 'catboost_metric_name': 'MAE', 'scoring_function': group_mean_log_mae }, 'mse': { 'lgb_metric_name': 'mse', 'catboost_metric_name': 'MSE', 'sklearn_scoring_function': metrics.mean_squared_error } } result_dict = {} # out-of-fold predictions on train data oof = np.zeros(len(X)) # averaged predictions on train data prediction = np.zeros(len(X_test)) # list of scores on folds scores = [] feature_importance = pd.DataFrame() model_list = [] # split and train on folds for fold_n, (train_index, valid_index) in enumerate(folds.split(X, groups=fold_group)): if skip_folds is not None and fold_n in skip_folds and phase_mark in skipped_mark: print(f'Fold {fold_n + 1} is skipped!!! at {time.ctime()}') oof = unpickle(mid_path / f"oof_cv{phase_mark}_{fold_n}.pkl", ) y_pred = unpickle( mid_path / f"prediction_cv{phase_mark}_{fold_n}.pkl", ) model = unpickle(mid_path / f"model_cv{phase_mark}_{fold_n}.pkl", ) fold_importance = unpickle( mid_path / f"importance_cv{phase_mark}_{fold_n}.pkl", ) feature_importance = pd.concat( [feature_importance, fold_importance], axis=0) prediction += y_pred model_list += [model] continue print(f'Fold {fold_n + 1} started at {time.ctime()}') if type(X) == np.ndarray: X_train, X_valid = X[columns][train_index], X[columns][valid_index] y_train, y_valid = y[train_index], y[valid_index] else: X_train, X_valid = X[columns].iloc[train_index], X[columns].iloc[ valid_index] y_train, y_valid = y.iloc[train_index], y.iloc[valid_index] if model_type == 'lgb': model = lgb.LGBMRegressor(**params, n_estimators=n_estimators, n_jobs=-1, importance_type='gain') print(model) model.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_valid, y_valid)], eval_metric=metrics_dict[eval_metric]['lgb_metric_name'], verbose=verbose, early_stopping_rounds=early_stopping_rounds) y_pred_valid = model.predict(X_valid) y_pred = model.predict(X_test, num_iteration=model.best_iteration_) if model_type == 'xgb': train_data = xgb.DMatrix(data=X_train, label=y_train, feature_names=X.columns) valid_data = xgb.DMatrix(data=X_valid, label=y_valid, feature_names=X.columns) watchlist = [(train_data, 'train'), (valid_data, 'valid_data')] params["objective"] = "reg:linear" params["eval_metric"] = metrics_dict[eval_metric][ 'lgb_metric_name'] model = xgb.train(dtrain=train_data, num_boost_round=20000, evals=watchlist, early_stopping_rounds=200, verbose_eval=verbose, params=params) y_pred_valid = model.predict(xgb.DMatrix(X_valid, feature_names=X.columns), ntree_limit=model.best_ntree_limit) y_pred = model.predict(xgb.DMatrix(X_test, feature_names=X.columns), ntree_limit=model.best_ntree_limit) if model_type == 'sklearn': model = model model.fit(X_train, y_train) y_pred_valid = model.predict(X_valid).reshape(-1, ) score = metrics_dict[eval_metric]['sklearn_scoring_function']( y_valid, y_pred_valid) print(f'Fold {fold_n}. {eval_metric}: {score:.4f}.') print('') y_pred = model.predict(X_test).reshape(-1, ) if model_type == 'cat': model = CatBoostRegressor( iterations=20000, eval_metric=metrics_dict[eval_metric]['catboost_metric_name'], **params, loss_function=metrics_dict[eval_metric] ['catboost_metric_name']) model.fit(X_train, y_train, eval_set=(X_valid, y_valid), cat_features=[], use_best_model=True, verbose=False) y_pred_valid = model.predict(X_valid) y_pred = model.predict(X_test) oof[valid_index] = y_pred_valid.reshape(-1, ) if eval_metric != 'group_mae': scores.append( metrics_dict[eval_metric]['sklearn_scoring_function']( y_valid, y_pred_valid)) else: scores.append(metrics_dict[eval_metric]['scoring_function']( y_valid, y_pred_valid, X_valid['type'])) prediction += y_pred if model_type == 'lgb' and plot_feature_importance: # feature importance fold_importance = pd.DataFrame() fold_importance["feature"] = columns fold_importance["importance"] = model.feature_importances_ fold_importance["fold"] = fold_n + 1 try: fold_importance.to_csv(mid_path / f"importance_cv_{fold_n}.csv") except Exception as e: print("failed to save importance...") print(e) feature_importance = pd.concat( [feature_importance, fold_importance], axis=0) model_list += [model] try: to_pickle(mid_path / f"oof_cv{phase_mark}_{fold_n}.pkl", oof) to_pickle(mid_path / f"prediction_cv{phase_mark}_{fold_n}.pkl", y_pred) to_pickle(mid_path / f"model_cv{phase_mark}_{fold_n}.pkl", model) to_pickle(mid_path / f"importance_cv{phase_mark}_{fold_n}.pkl", fold_importance) except Exception as e: print("failed to save intermediate data...") print(e) if model_type == 'lgb' and plot_feature_importance: result_dict['importance'] = feature_importance prediction /= folds.n_splits try: cv_score_msg = f'{DATA_VERSION}_{TRIAL_NO}' + ' CV mean score: {0:.4f}, std: {1:.4f}.'.format( np.mean(scores), np.std(scores)) print(cv_score_msg) send_message(cv_score_msg) except Exception as e: print(e) pass result_dict["models"] = model_list result_dict['oof'] = oof result_dict['prediction'] = prediction result_dict['scores'] = scores return result_dict
# Determine errors and errors e = y_test_est != y_test error_rate = (sum(e).type(torch.float)/len(y_test)).data.numpy() errors.append(error_rate) # store error rate for current CV fold # Display the learning curve for the best net in the current fold h, = summaries_axes[0].plot(learning_curve, color=color_list[k]) h.set_label('CV fold {0}'.format(k+1)) summaries_axes[0].set_xlabel('Iterations') summaries_axes[0].set_xlim((0, max_iter)) summaries_axes[0].set_ylabel('Loss') summaries_axes[0].set_title('Learning curves') # Display the error rate across folds summaries_axes[1].bar(np.arange(1, K+1), np.squeeze(np.asarray(errors)), color=color_list) summaries_axes[1].set_xlabel('Fold'); summaries_axes[1].set_xticks(np.arange(1, K+1)) summaries_axes[1].set_ylabel('Error rate'); summaries_axes[1].set_title('Test misclassification rates') print('Diagram of best neural net in last fold:') weights = [net[i].weight.data.numpy().T for i in [0,2]] biases = [net[i].bias.data.numpy() for i in [0,2]] tf = [str(net[i]) for i in [1,3]] draw_neural_net(weights, biases, tf, attribute_names=attributeNames) # Print the average classification error rate print('\nGeneralization error/average error rate: {0}%'.format(round(100*np.mean(errors),4))) #print('Ran Exercise 8.2.5')
ss_values=ss_values) else: all_sampling_dates = [45, 190, 300, 360] all_LHs = [0.6, 0.7, 0.8, 1.0] sampling_date_names = ['Feb. 14', 'Jul. 9', 'Oct. 27', 'Dec. 26'] # calculate the average prevalence (equally weighted across all days of the year) for each LH scenario print('calculating average prevalences...') ave_prevs = [None] * len(all_LHs) for lh in range(len(all_LHs)): with open( "simOutputs_DTK/prevalenceData_xLH%i_%s.p" % (round(all_LHs[lh] * 100), filename_suffix), "rb") as f: prev_list = pickle.load(f) ave_prevs[lh] = np.mean(prev_list[0]) # Plots if need_to_generate_plots == True: print('creating plot panels...') # To greatly reduce the difficulty of this problem, we assume the population size is constant at the mean value across # all DTK simulations. This assumption may possibly give rise to some biases and will ideally be explored through # a sensitivity analysis in the future (something as easy as using the max and min values and seeing whether # results change substantially). # load the population sizes across all simulations and take average with open("simOutputs_DTK/pop_size_sim_all_%s.p" % filename_suffix, "rb") as f: pop_size_sim = pickle.load(f) pop_size = int(round(np.mean(pop_size_sim)))
def pearSim(A, B): meanA = np.mean(A) meanB = np.mean(B) A_B = sum((A[i] - meanA) * (B[i] - meanB) for i in range(len(A))) pear = A_B / (np.linalg.norm(A - meanA) * np.linalg.norm(B - meanB)) return 0.5 + 0.5 * pear