def plot_validation_curve(estimator, title, X, y, param_name, param_range, cv=10, scoring='accuracy', n_jobs=2): from sklearn.learning_curve import validation_curve train_scores, test_scores = validation_curve( estimator, X, y, param_name, param_range, cv=cv, scoring=scoring, n_jobs=n_jobs) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.figure() plt.title(title) plt.xlabel(param_name) plt.ylabel("Score") plt.ylim(0.0, 1.1) plt.semilogx(param_range, train_scores_mean, label="Training score", color="r") plt.fill_between(param_range, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.2, color="r") plt.semilogx(param_range, test_scores_mean, label="Cross-validation score", color="g") plt.fill_between(param_range, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.2, color="g") plt.legend(loc="best") plt.show()
def plot_hourly_values(data, forecast_hours, apply_fun, handle=None, color='b', spread=True): # forecast_hours should be numpy array. hourly_values = \ get_hourly_values(data, apply_fun, forecast_hours) # Filter out NaN values good_means = ~np.isnan(hourly_values[:, 0]) good_stds = ~np.isnan(hourly_values[:, 1]) assert all(good_means == good_stds) # Do plotting if handle is not None: plt.figure(handle.number) plt.plot(forecast_hours[good_means], hourly_values[good_means, 0], color) if spread: plt.fill_between( forecast_hours[good_means], hourly_values[good_means, 0] - 2 * hourly_values[good_means, 1], hourly_values[good_means, 0] + 2 * hourly_values[good_means, 1], color=color, alpha=0.2 ) plt.xlabel("Forecast hour") plt.grid(True) if handle is None: plt.show() return hourly_values
def meanPlot(self, scale, xIndex=0, yIndex=1): i = 0 nxti = 0 num = 0 sumTimes = 0 numItems = 0 setNxt = False x = [] y = [] error = [[], []] tmp = [] while i < len(self.log[xIndex]): if self.log[xIndex][i] > (num + 1) * scale: if numItems != 0: x.append(num * scale) y.append(np.percentile(tmp, 50)) error[0].append(np.percentile(tmp, 25)) error[1].append(np.percentile(tmp, 75)) i = nxti num += 1 tmp = [] numItems = 0 setNxt = False if self.log[xIndex][i] >= (num - 1) * scale: tmp.append(self.log[yIndex][i]) numItems += 1 if not setNxt: setNxt = True nxti = i i += 1 c = plt.plot(x, y, zorder=10)[0].get_color() plt.fill_between(x, error[0], error[1], color=c, alpha="0.25", zorder=0) plt.show(block=False)
def plotOverlapMatrix(O): """Plots the probability of observing a sample from state i (row) in state j (column). For convenience, the neigboring state cells are fringed in bold.""" max_prob = O.max() fig = pl.figure(figsize=(K/2.,K/2.)) fig.add_subplot(111, frameon=False, xticks=[], yticks=[]) for i in range(K): if i!=0: pl.axvline(x=i, ls='-', lw=0.5, color='k', alpha=0.25) pl.axhline(y=i, ls='-', lw=0.5, color='k', alpha=0.25) for j in range(K): if O[j,i] < 0.005: ii = '' else: ii = ("%.2f" % O[j,i])[1:] alf = O[j,i]/max_prob pl.fill_between([i,i+1], [K-j,K-j], [K-(j+1),K-(j+1)], color='k', alpha=alf) pl.annotate(ii, xy=(i,j), xytext=(i+0.5,K-(j+0.5)), size=8, textcoords='data', va='center', ha='center', color=('k' if alf < 0.5 else 'w')) cx = sorted(2*range(K+1)) cy = sorted(2*range(K+1), reverse=True) pl.plot(cx[2:-1], cy[1:-2], 'k-', lw=2.0) pl.plot(numpy.array(cx[2:-3])+1, cy[1:-4], 'k-', lw=2.0) pl.plot(cx[1:-2], numpy.array(cy[:-3])-1, 'k-', lw=2.0) pl.plot(cx[1:-4], numpy.array(cy[:-5])-2, 'k-', lw=2.0) pl.xlim(0, K) pl.ylim(0, K) pl.savefig('O_MBAR.pdf', bbox_inches='tight', pad_inches=0.0) pl.close(fig) return
def _plot_scores(tuo_location_and_influence_score, marking_locations, no_of_bins_for_influence_score, smooth=True): figure = plt.figure() size = figure.get_size_inches() figure.set_size_inches( (size[0]*2, size[1]*0.5) ) influence_scores = zip(*tuo_location_and_influence_score)[1] no_of_influence_scores = len(influence_scores) hist_influence_score, bin_edges_influence_score = np.histogram(influence_scores, no_of_bins_for_influence_score) normed_hist_influence_score = map(lambda influence_score: (influence_score+0.)/no_of_influence_scores, hist_influence_score) bin_edges_influence_score = list(bin_edges_influence_score) normed_hist_influence_score = list(normed_hist_influence_score) bin_edges_influence_score=[bin_edges_influence_score[0]]+bin_edges_influence_score+[bin_edges_influence_score[-1]] normed_hist_influence_score=[0.0]+normed_hist_influence_score+[0.0] x_bin_edges_influence_score, y_normed_hist_influence_score = bin_edges_influence_score[:-1], normed_hist_influence_score if smooth: x_bin_edges_influence_score, y_normed_hist_influence_score = splineSmooth(x_bin_edges_influence_score, y_normed_hist_influence_score) plt.plot(x_bin_edges_influence_score, y_normed_hist_influence_score, lw=1, color='#FF9E05') plt.fill_between(x_bin_edges_influence_score, y_normed_hist_influence_score, color='#FF9E05', alpha=0.3) mf_neighbor_location_to_influence_score = dict(tuo_location_and_influence_score) for marking_location in marking_locations: if marking_location in mf_neighbor_location_to_influence_score: print marking_location, mf_neighbor_location_to_influence_score[marking_location] # plt.scatter([mf_neighbor_location_to_influence_score[marking_location]], [0.0005], s=20, lw=0, color=GeneralMethods.getRandomColor(), alpha=1., label=marking_location) plt.scatter([mf_neighbor_location_to_influence_score[marking_location]], [0.0005], s=20, lw=0, color='m', alpha=1., label=marking_location) else: print marking_location # plt.xlim(get_new_xlim(plt.xlim())) # plt.legend() (ticks, labels) = plt.yticks() plt.yticks([ticks[-2]]) plt.ylim(ymin=0.0) return ticks[-1]
def plot_rolling_auto_home(df_attack=None,df_defence=None, window=5, nstd=1, detected_events_home=None, detected_events_away=None, sky_events=None): sns.set_context("notebook", font_scale=1.8 ,rc={"lines.linewidth": 3.5, "figure.figsize":(18,12) }) plt.subplots_adjust(bottom=0.85) mean = pd.rolling_mean(df_attack, center=True, window=window) std = pd.rolling_std(df_attack, center=True, window=window) detected_plot_extrema = df_attack.ix[argrelextrema(df_attack.values, np.greater)] df_filt_noise = df_attack[(df_attack > mean-std) & (df_attack < mean+std)] df_filt_noise = df_filt_noise.ix[detected_plot_extrema.index].dropna() df_filt_keep = df_attack[~((df_attack > mean-std) & (df_attack < mean+std))] df_filt_keep = df_filt_keep.ix[detected_plot_extrema.index].dropna() plt.plot(df_attack, color='#4CA64C', label='{} Attack'.format(all_matches[0]['home_team'].title())) plt.fill_between(df_attack.index, (mean-nstd*std), (mean+nstd*std), interpolate=False, alpha=0.4, color='#B2B2B2', label='$\mu + {} \\times \sigma$'.format(nstd)) plt.scatter(df_filt_keep.index, df_filt_keep.values, marker='*', s=120, color='#000000', zorder=10, label='Selected maxima post-filtering') plt.scatter(df_filt_noise.index, df_filt_noise.values, marker='x', s=120, color='#000000', zorder=10, label='Unselected maxima post-filtering') df_defence.apply(lambda x: -1*x).plot(color='#000000', label='{} Defence'.format(all_matches[0]['home_team'].title())) if(len(detected_events_home) > 0): classifier_events_df_home= pd.DataFrame(detected_events_home) classifier_events_df_home[classifier_events_df_home.category == 'GOAL'] if(len(detected_events_away) > 0): classifier_events_df_away= pd.DataFrame(detected_events_away) classifier_events_df_away[classifier_events_df_away.category == 'GOAL'] font0 = FontProperties(family='arial', weight='bold',style='italic', size=16) for i, row in classifier_events_df_home.iterrows(): if row.category == 'OTHER': continue plt.text(row.event, df_attack.max(), "{} {} {}".format(all_matches[0]['home_team'].upper(), row.category, row.event), rotation='vertical', color='black', bbox=dict(facecolor='green', alpha=0.2))#, transform=transform) for i, row in classifier_events_df_away.iterrows(): if row.category == 'OTHER': continue plt.text(row.event, (df_attack.max()), "{} {} {}".format(all_matches[0]['away_team'].upper(), row.category, row.event), rotation='vertical', color='black', bbox=dict(facecolor='red', alpha=0.2)) high_peak_position = 0; if(df_attack.max() > df_defence.max()): high_peak_position = -(df_defence.max() * 2.0) else: high_peak_position = -(df_defence.max() * 1.25) # Functionality to include Sky Sports text commentary updates on plot for goal events. # for i, row in pd.DataFrame(sky_events).iterrows(): # dedented_text = textwrap.dedent(row.text).strip() # plt.text(row.event, high_peak_position, "@SkySports {} AT {}:\n{}:\n{}".format(row.category, row.event.time(), row.title, textwrap.fill(dedented_text, width=40)), color='black', bbox=dict(facecolor='blue', alpha=0.2)) plt.legend(loc=4) ax = plt.gca() label = ax.set_xlabel('time') plt.ylabel('Tweet frequency') plt.title('{} vs. {} (WK {}) - rolling averages window={} mins'.format(all_matches[0]['home_team'].title(), all_matches[0]['away_team'].title(), all_matches[0]['dbname'], window)) plt.savefig('{}attack_{}_plain.pdf'.format(all_matches[0]['home_team'].upper(), all_matches[0]['away_team'].upper())) return detected_plot_extrema
def plot_validation_curve(model, X, y, scorer, param_name, param_range=np.linspace(0.1, 1, 5), cv=None, n_jobs=5, ylim=None, title="Xval. validation curve"): ''' Plot learning curve for model on data ''' df = pd.DataFrame() df['param_range'] = param_range train_scores, test_scores = validation_curve(model, X, y, param_name=param_name, param_range=param_range, cv=cv, scoring=scorer, n_jobs=n_jobs) df['train_mean'] = 1 - np.mean(train_scores, axis=1) df['train_std'] = np.std(train_scores, axis=1) df['test_mean'] = 1 - np.mean(test_scores, axis=1) df['test_std'] = np.std(test_scores, axis=1) plt.figure() plt.title(title) if ylim is not None: plt.ylim(*ylim) plt.xlabel("Parameter value") plt.ylabel("Error (1-score)") plt.grid() plt.semilogx(param_range, df.train_mean, color="r", label="Training") plt.fill_between(param_range, df.train_mean - df.train_std, df.train_mean + df.train_std, alpha=0.1, color="r") plt.semilogx(param_range, df.test_mean, color="g", label="Test") plt.fill_between(param_range, df.test_mean - df.test_std, df.test_mean + df.test_std, alpha=0.1, color="g") plt.legend(loc="best") plt.show() return df, plt
def main(): S, col_names_S = load_data(config.paths.training_data, config.paths.cache_folder) Xs, Ys, col_names_S = extract_xy(S, col_names_S) a = RandomForestClassifier(n_estimators=1) a.fit(Xs.toarray(), Ys.toarray().ravel()) best_features = a.feature_importances_ max_ind, max_val = max(enumerate(best_features), key=operator.itemgetter(1)) print best_features print max_ind, max_val print Xs.shape print Ys.shape param_range = [1, 3, 5, 7, 10, 15, 20, 30, 60, 80] train_scores, test_scores = validation_curve(RandomForestClassifier(criterion='entropy'), Xs, Ys.toarray().ravel(), 'n_estimators', param_range) print train_scores print test_scores train_mean = np.mean(train_scores, axis=1) train_std = np.std(train_scores, axis=1) test_mean = np.mean(test_scores, axis=1) test_std = np.std(test_scores, axis=1) plt.title("Validation Curve for Random Forest") plt.xlabel("Number of Trees") plt.ylabel("Score") plt.plot(param_range, train_mean, label="Training Score", color='r') plt.fill_between(param_range, train_mean - train_std, train_mean + train_std, alpha=0.2, color='r') plt.plot(param_range, test_mean, label="Test Score", color='b') plt.fill_between(param_range, test_mean - test_std, test_mean + test_std, alpha=0.2, color='b') plt.legend(loc="best") plt.show()
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)): """ source: http://scikit-learn.org/stable/auto_examples/plot_learning_curve.html Generate a simple plot of the test and traning learning curve. Parameters ---------- estimator : object type that implements the "fit" and "predict" methods An object of that type which is cloned for each validation. title : string Title for the chart. X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples) or (n_samples, n_features), optional Target relative to X for classification or regression; None for unsupervised learning. ylim : tuple, shape (ymin, ymax), optional Defines minimum and maximum yvalues plotted. cv : integer, cross-validation generator, optional If an integer is passed, it is the number of folds (defaults to 3). Specific cross-validation objects can be passed, see sklearn.cross_validation module for the list of possible objects n_jobs : integer, optional Number of jobs to run in parallel (default 1). """ plt.figure() plt.title(title) if ylim is not None: plt.ylim(*ylim) plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") plt.legend(loc="best") return plt
def plot_conv(all_JSD,all_JSDs,rest_type): fold = len(all_JSD) rounds = len(all_JSDs[0]) n_rest = len(rest_type) new_JSD = [[] for i in range(n_rest)] for i in range(len(all_JSD)): for j in range(n_rest): new_JSD[j].append(all_JSD[i][j]) JSD_dist = [[] for i in range(n_rest)] JSD_std = [[] for i in range(n_rest)] for rest in range(n_rest): for f in range(fold): temp_JSD = all_JSDs[f][:,rest] JSD_dist[rest].append(np.mean(temp_JSD)) JSD_std[rest].append(np.std(temp_JSD)) plt.figure(figsize=(10,5*n_rest)) x = np.arange(100./fold,101.,fold) colors = ['red','blue','green','black','magenta','gold','navy'] for i in range(n_rest): plt.subplot(n_rest,1,i+1) plt.plot(x,new_JSD[i],'o-',color=colors[i],label=rest_type[i]) plt.hold(True) plt.plot(x,JSD_dist[i],'o',color=colors[i],label=rest_type[i]) plt.fill_between(x,np.array(JSD_dist[i])+np.array(JSD_std[i]),np.array(JSD_dist[i])-np.array(JSD_std[i]),color=colors[i],alpha=0.2) plt.xlabel('dataset (%)') plt.ylabel('JSD') plt.legend(loc='best') plt.tight_layout() plt.savefig('convergence.pdf')
def plot_forecast(fc, data=None, test=None, loc='upper left'): ''' Plots a forecast and its prediction intervals. Args: fc: Pandas Data Frame from converters.prediction_intervals, or an R forecast object data: the data for the forecast period as a Pandas Series, or None if fc is an R forecast test: optional data for the forecast period as a Pandas Series loc: Default is 'upper left', since plots often go up and right. For other values see matplotlib.pyplot.legend(). Output: a plot of the series, the mean forecast, and the prediciton intervals, and optionally, the data for the forecast period, if provided, ''' fc, data, test = converters.to_forecast(fc, data, test) plt.style.use('ggplot') l = list(fc.columns) lowers = l[1::2] uppers = l[2::2] tr_idx = converters.flatten_index(data.index) fc_idx = converters.flatten_index(fc.index) plt.plot(tr_idx, data, color='black') plt.plot(fc_idx, fc[l[0]], color='blue') for (k, (low, up)) in enumerate(zip(lowers, uppers), 1): plt.fill_between(fc_idx, fc[low], fc[up], color='grey', alpha=0.5/k) labels = ['data', 'forecast'] if test is not None: n = min(len(fc.index), len(test)) plt.plot(fc_idx[:n], list(test[:n]), color='green') labels.append('test') plt.legend(labels, loc=loc) plt.show()
def q_plot(self,Q): """" Returns a plot of the q-function """ col = max([s[0] for s in self.states])+1 rows = max([s[1] for s in self.states])+1 ax=pl.axes() colorlerp = lambda a, b, t: map(lambda x,y: x+(y-x)*t*t, a, b) green = [0.,.8,0.] red = [.8,0.,0.] for s in self.states: for a in self.actions(s): try: pl.fill_between([s[0]+max(a[0],0),s[0]+0.5,s[0]+1+min(a[0],0)],[s[1]+min(1+a[1],1)]*3,[s[1]+max(a[1],0), s[1]+0.5, s[1]+abs(a[0])+max(a[1],0)],color=colorlerp(red, green, (Q[s,a]+1)/2.)) ax.text(s[0]+0.3+0.25*a[0],s[1]+0.45+0.4*a[1],str(Q[s,a])[0:6]) except: if s in self.terminals: pl.fill_between([s[0],s[0]+1],[s[1]+1,s[1]+1],s[1],color=colorlerp(red, green, (Q[s,None]+1)/2.)) ax.text(s[0]+0.3,s[1]+0.45,str(Q[s,None])[0:6]) pass ax.set_xticks(range(col)) ax.set_yticks(range(rows)) ax.set_xticklabels([]) ax.set_yticklabels([]) pl.grid() pl.show() return
def ModelComplexity(X, y): """ Calculates the performance of the model as model complexity increases. The learning and testing errors rates are then plotted. """ # Create 10 cross-validation sets for training and testing cv = ShuffleSplit(X.shape[0], n_iter = 10, test_size = 0.2, random_state = 0) # Calculate the training and testing scores #alpha_range = np.logspace(0.1, 1,num = 10, base = 0.1) alpha_range = np.arange(0.1, 1, 0.1) train_scores, test_scores = curves.validation_curve(Ridge(), X, y, \ param_name = "alpha", param_range = alpha_range, cv = cv, scoring = 'r2') # Find the mean and standard deviation for smoothing train_mean = np.mean(train_scores, axis=1) train_std = np.std(train_scores, axis=1) test_mean = np.mean(test_scores, axis=1) test_std = np.std(test_scores, axis=1) # Plot the validation curve pl.figure(3) pl.title('LinearRegression Complexity Performance') pl.plot(alpha_range, train_mean, 'o-', color = 'r', label = 'Training Score') pl.plot(alpha_range,test_mean, 'o-', color = 'g', label = 'Validation Score') pl.fill_between(alpha_range, train_mean - train_std, \ train_mean + train_std, alpha = 0.15, color = 'r') pl.fill_between(alpha_range, test_mean - test_std, \ test_mean + test_std, alpha = 0.15, color = 'g') # Visual aesthetics pl.legend(loc = 'lower right') pl.xlabel('alpha_range') pl.ylabel('Score') pl.ylim([0.5000,1.0000]) pl.show()
def teardown(self): """Since we're at the end of the run, plot the data""" if len(self.epochs) > 0 and len(self.typecounts) > 0: num_types = self.experiment.population._cell_class.max_types fig = plt.figure() plt.xlabel("Time (epoch)") plt.ylabel("Abundance (cells)") prev_xvals = [0] * len(self.epochs) for t in range(num_types): xvals = [] for z in range(len(self.typecounts)): xvals.append(self.typecounts[z][t] + prev_xvals[z]) plt.fill_between(self.epochs, prev_xvals, xvals, color=self.experiment.population._cell_class.type_colors[t]) prev_xvals = xvals end_epoch = self.experiment.config.getint('Experiment', 'epochs') if not end_epoch: end_epoch = max(self.epochs) plt.xlim([self.epoch_start, end_epoch]) data_file = self.datafile_path(self.filename) plt.savefig(data_file)
def plot_layer(self, layer): layer = {k: v for k, v in layer.items() if k in self.VALID_AES} layer.update(self.manual_aes) if 'x' in layer: x = layer.pop('x') if 'y' in layer: y = layer.pop('y') if 'se' in layer: se = layer.pop('se') else: se = None if 'span' in layer: span = layer.pop('span') else: span = 2/3. if 'method' in layer: method = layer.pop('method') else: method = None if method == "lm": y, y1, y2 = smoothers.lm(x, y) elif method == "ma": y, y1, y2 = smoothers.mavg(x, y) else: y, y1, y2 = smoothers.lowess(x, y) idx = np.argsort(x) x = np.array(x)[idx] y = np.array(y)[idx] y1 = np.array(y1)[idx] y2 = np.array(y2)[idx] plt.plot(x, y, **layer) if se==True: plt.fill_between(x, y1, y2, alpha=0.2, color="grey")
def SetAxes(legend=False): f_b = 0.164 f_star = 0.01 err_b = 0.006 err_star = 0.004 f_gas = f_b - f_star err_gas = np.sqrt(err_b**2 + err_star**2) plt.axhline(y=f_gas, ls='--', c='k', label='', zorder=-1) x = np.linspace(.0,2.,1000) plt.fill_between(x, y1=f_gas - err_gas, y2=f_gas + err_gas, color='k', alpha=0.3, zorder=-1) plt.text(.6, f_gas+0.006, r'f$_{gas}$', verticalalignment='bottom', size='large') plt.xlabel(r'r/r$_{vir}$', size='x-large') plt.ylabel(r'f$_{gas}$ ($<$ r)', size='x-large') plt.xscale('log') plt.xticks([1./1.9, 1.33/1.9, 1, 1.5, 2.],[r'r$_{500}$', r'r$_{200}$', 1, 1.5, 2], size='large') #plt.yticks([.1, .2], ['0.10', '0.20']) plt.tick_params(length=10, which='major') plt.tick_params(length=5, which='minor') plt.xlim([0.4,1.5]) plt.minorticks_on() if legend: plt.legend(loc=0, prop={'size':'small'}, markerscale=0.7, numpoints=1, ncol=2)
def __spatialaverages__(cr,path,selector=lambda x:x.yhead, ylabel='y (cm)',xlim=None,ylim=None): if not os.path.exists(path): os.makedirs(path) cut = getballistictrials(cr) sessions = cut[cut.trial > 0].groupby(level=['subject','session']) for (subject,session),group in sessions: fig = plt.figure() subjectpath = os.path.join(activitymovies.datafolder,subject) sact = activitytables.read_subjects(subjectpath,days=[session]) x,y,yerr = activitytables.spatialaverage(sact,group,selector) activityplots.trajectoryplot(sact,group,alpha=0.2,flip=True, selector=selector) plt.fill_between(x,y-yerr,y+yerr) if xlim is not None: plt.xlim(xlim) if ylim is not None: plt.ylim(ylim) plt.xlabel('x (cm)') plt.ylabel(ylabel) plt.title(str.format('{0} (session {1})',subject,session)) fname = str.format("{0}_session_{1}_trajectories.png", subject, session) fpath = os.path.join(path,subject) if not os.path.exists(fpath): os.makedirs(fpath) fpath = os.path.join(fpath,fname) plt.savefig(fpath) plt.close(fig)
def plot_parameter_sweep_gender_proportion(self, number_of_runs, param, llim, ulim, number_of_steps): ## This function will execute gender proportion comparisons for all models ## Color list for mod in self.mlist: mod.run_parameter_sweep(number_of_runs, param, llim, ulim, number_of_steps) ## Create plot array and execute plot for k,v in enumerate(self.mlist): plot_array = self.mlist[k].parameter_sweep_array[0] plt.plot(plot_array[:,0], plot_array[:,1], label = self.mlist[k].label, linewidth=2.0, color=line_colors[k]) plt.fill_between(plot_array[:,0], plot_array[:,1] + 1.96*plot_array[:,2], plot_array[:,1] - 1.96*plot_array[:,2], alpha=0.5, color = line_colors[k], facecolor= line_colors[k]) plt.title('Parameter Sweep for Gender Proportion over ' + str(self.mlist[0].duration) + ' years') plt.xlabel(param) plt.ylabel('Percentage of the Department that is Women') plt.legend(loc='upper right', shadow=True) plt.show()
def plot_comparison_department_size(self, number_of_runs=10): ## This function will execute gender proportion comparisons for all models ## Color list line_colors = ['#7fc97f', '#beaed4', '#fdc086','#386cb0','#f0027f','#ffff99'] for mod in self.mlist: mod.run_multiple(number_of_runs) ## Create plot array and execute plot for k,v in enumerate(self.mlist): plt.plot(range(self.mlist[k].duration), self.mlist[k].dept_size_matrix['mean'], color=line_colors[k],label = self.mlist[k].label, linewidth=2.0) plt.plot(range(self.mlist[k].duration), self.mlist[k].dept_size_matrix['mean']) plt.fill_between(range(self.mlist[k].duration), self.mlist[k].dept_size_matrix[ 'mean'] + 1.96*self.mlist[k].dept_size_matrix[ 'std'], self.mlist[k].dept_size_matrix['mean'] - 1.96*self.mlist[k].dept_size_matrix[ 'std'], color = line_colors[k], alpha=0.5) plt.title('Department Size over Time: ' + self.name) plt.xlabel('Years') plt.ylabel('Total Department Size') plt.legend(loc='upper right', shadow=True) plt.show()
def plot_trade(buy_date, sell_date): # 找出2014-07-28对应时间序列中的index作为start start = tsla_df[tsla_df.index == buy_date].key.values[0] # 找出2014-09-05对应时间序列中的index作为end end = tsla_df[tsla_df.index == sell_date].key.values[0] # 使用5.1.1封装的绘制tsla收盘价格时间序列函数plot_demo # just_series=True, 即只绘制一条曲线使用series数据 plot_demo(just_series=True) # 将整个时间序列都填充一个底色blue,注意透明度alpha=0.08是为了 # 之后标注其他区间透明度高于0.08就可以清楚显示 plt.fill_between(tsla_df.index, 0, tsla_df['close'], color='blue', alpha=.08) # 标注股票持有周期绿色,使用start和end切片周期 # 透明度alpha=0.38 > 0.08 plt.fill_between(tsla_df.index[start:end], 0, tsla_df['close'][start:end], color='green', alpha=.38) # 设置y轴的显示范围,如果不设置ylim,将从0开始作为起点显示,效果不好 plt.ylim(np.min(tsla_df['close']) - 5, np.max(tsla_df['close']) + 5) # 使用loc='best' plt.legend(['close'], loc='best')
def plot_golden(): # 从视觉618和统计618中筛选更大的值 above618 = np.maximum(sp618, sp618_stats) # 从视觉618和统计618中筛选更小的值 below618 = np.minimum(sp618, sp618_stats) # 从视觉382和统计382中筛选更大的值 above382 = np.maximum(sp382, sp382_stats) # 从视觉382和统计382中筛选更小的值 below382 = np.minimum(sp382, sp382_stats) # 绘制收盘价 plt.plot(tsla_df.close) # 水平线视觉382 plt.axhline(sp382, c='r') # 水平线统计382 plt.axhline(sp382_stats, c='m') # 水平线视觉618 plt.axhline(sp618, c='g') # 水平线统计618 plt.axhline(sp618_stats, c='k') # 填充618 red plt.fill_between(tsla_df.index, above618, below618, alpha=0.5, color="r") # 填充382 green plt.fill_between(tsla_df.index, above382, below382, alpha=0.5, color="g") # 最后使用namedtuple包装上,方便获取 return namedtuple('golden', ['above618', 'below618', 'above382', 'below382'])( above618, below618, above382, below382)
def dummy(): all_pstd = [] all_fp = [] for i in range( len( self.values ) ): a,p,v = self.values[i] pstd, fp = execute_for_part(a,p,v) all_pstd.append( pstd ) all_fp.append( fp ) all_pstd = np.mean( np.array( all_pstd ), axis=0 ) fp = np.mean( np.array( all_fp ), axis=0 ) fps = np.std( np.array( all_fp ), axis=0 ) pp.close() pp.fill_between( all_pstd, fp-fps , fp+fps, alpha=0.8, facecolor='0.75' ) pp.plot( all_pstd, fp , 'b.-' ) [ x1,x2,y1,y2 ] = pp.axis(); pp.axis( [ np.min(all_pstd), np.max(all_pstd), y1,y2 ]) pp.grid() pp.xlabel('Average Predicted Standard Deviation',fontsize=18) pp.ylabel('Fraction of false positive peptides',fontsize=18) pp.savefig('./plots/Overall_PErr_vs_PSTD.pdf')
def plot_noisy_means(graph_title, means, bands, series, xvals=None, xlabel=None, ylabel=None, subtitle=None, data=None, filename='results.pdf'): colors = ['blue','red','green', 'black', 'orange', 'purple', 'brown', 'yellow'] # max 8 lines assert(means.shape == bands.shape) assert(xvals is None or xvals.shape[0] == means.shape[1]) assert(means.shape[0] <= len(colors)) if xvals is None: xvals = np.arange(means.shape[0]) ax = plt.axes([.1,.1,.8,.7]) plt.ticklabel_format(axis='y', style='plain', useOffset=False) for i,mean in enumerate(means): plt.plot(xvals, mean, label=series[i], color=colors[i]) plt.fill_between(xvals, mean + bands[i], mean - bands[i], facecolor=colors[i], alpha=0.2) if xlabel is not None: plt.xlabel(xlabel) if ylabel is not None: plt.ylabel(ylabel) if subtitle is not None: plt.figtext(.40,.9, graph_title, fontsize=18, ha='center') plt.figtext(.40,.85, subtitle, fontsize=10, ha='center') else: plt.title('{0}'.format(graph_title)) # Shink current axis by 20% box = ax.get_position() ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) # Put a legend to the right of the current axis ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), fontsize=12) plt.savefig(filename) plt.clf()
def plot_prior_1D(Xtest, test_cov, Ytest=None): # Manipulate X for plotting X = np.hstack(Xtest) # Set prior mean function mean = np.zeros(Xtest.shape) s = np.sqrt(np.diag(test_cov)) mean = np.reshape(mean, (-1,)) # Plot true function, mean function and uncertainty ax1 = plt.subplot(211) plt.xlim(min(X), max(X)) plt.ylim(min(mean-(2*s)-(s/2)), max(mean+(2*s)+(s/2))) if Ytest is not None: plt.plot(X, Ytest, 'b-', label='Y') plt.plot(X, mean, 'r--', lw=2, label='mean') plt.fill_between(X, mean-(2*s), mean+(2*s), color='#87cefa') plt.legend() # Plot draws from prior mean = mean.reshape(X.shape[0],1) f = mean + np.dot(test_cov, np.random.normal(size=(X.shape[0],10))) ax2 = plt.subplot(212, sharex=ax1) plt.plot(X, f) plt.title('Ten samples') plt.tight_layout() plt.show()
def plot_posterior_1D(Xtest, Xtrain, Ytrain, p_mean, p_sd, cov_post, Ytest=None): # Manipulate data for plotting mean_f = p_mean.flat p_sd = np.reshape(p_sd, (-1,)) Xtest = np.hstack(Xtest) # Plot true function, predicted mean and uncertainty (2s), and training points ax1 = plt.subplot(211) plt.plot(Xtrain, Ytrain, 'r+', ms=20) # training points plt.xlim(min(Xtest), max(Xtest)) plt.ylim(min(mean_f-(2*p_sd)-(p_sd/2)), max(mean_f+(2*p_sd)+(p_sd/2))) if Ytest is not None: plt.plot(Xtest, Ytest, 'b', label='Y') # true function plt.plot(Xtest, mean_f, 'r--', lw=2, label='mean') # mean function plt.fill_between(Xtest, mean_f-(2*p_sd), mean_f+(2*p_sd), color='#87cefa') # uncertainty plt.legend() # Plot 10 draws from posterior f = p_mean + np.dot(cov_post, np.random.normal(size=(Xtest.shape[0],10))) ax2 = plt.subplot(212, sharex=ax1) plt.xlim(min(Xtest), max(Xtest)) plt.plot(Xtest, f) plt.plot(Xtrain, Ytrain, 'r+', ms=20) # new points plt.title('Ten samples') plt.tight_layout() plt.show()
def m_errorplot(x, Y, L, U): Y = np.atleast_2d(Y) L = np.atleast_2d(L) U = np.atleast_2d(U) M = Y.shape[-2] ## print(np.shape(Y)) ## print(np.shape(L)) ## print(np.shape(U)) ## print(np.shape(M)) for i in range(M): plt.subplot(M,1,i+1) lower = Y[i] - L[i] upper = Y[i] + U[i] #print(upper-lower) #if np.any(lower>=upper): #print('WTF?!') plt.fill_between(x, upper, lower, #where=(upper>=lower), facecolor=(0.6,0.6,0.6,1), edgecolor=(0,0,0,0), #edgecolor=(0.6,0.6,0.6,1), linewidth=0, interpolate=True) plt.plot(x, Y[i], color=(0,0,0,1)) plt.ylabel(str(i))
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(0.1, 1.0, 5)): plt.figure() plt.title(title) if ylim is not None: plt.ylim(*ylim) plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes ) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between( train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r" ) plt.fill_between( train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g" ) plt.plot(train_sizes, train_scores_mean, "o-", color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, "o-", color="g", label="Cross-validation score") plt.legend(loc="best") return plt
def run_test(): #x = np.array([0.0,.1,.2,.4,.75,.9,1.]) #x = np.array([0.0,0.1,0.2,0.3,0.4,0.5,0.7,0.8,0.9,0.95,1.0,0.6]) #x = np.linspace(0,1,10) x = np.array([0, 0.2, 0.4, 0.6, 0.8, 1.0]) y = np.array([func(_x) for _x in x]) xp = np.linspace(0,1,100) yp = func(xp) krig = Kriging1D(x,y) rbf = Rbf(x,y) xk = np.linspace(0,1,100) yk = np.zeros(len(xk)) yr = np.zeros(len(xk)) dk = np.zeros(len(xk)) for i,xx in enumerate(xk): yk[i],dk[i] = krig(xx) yr[i] = rbf(xx) print sum(dk) plt.figure(1) plt.title('Kriging test') plt.hold(True) plt.plot(x,y,'rs') plt.plot(xp,yp,'b-') plt.plot(xk,yk,'r-') #plt.plot(xk,yr,'g-') plt.fill_between(xk,yk+0.5*dk,yk-0.5*dk,color='#dddddd') plt.grid(True) plt.axis([0,1,-10,20]) plt.legend(['sample point','exact function','kriging'],'upper left') #plt.legend(['sample point','exact function','kriging','rbf'],'upper left') plt.show()
def plot(self): """ Plot input current, recorded voltage, voltage after AEC (is applicable) and detected spike times (if applicable) """ time = self.getTime() plt.figure(figsize=(10,4), facecolor='white') plt.subplot(2,1,1) plt.plot(time,self.I, 'gray') plt.ylabel('I (nA)') plt.subplot(2,1,2) plt.plot(time,self.V_rec, 'black') if self.AEC_flag : plt.plot(time,self.V, 'red') if self.spks_flag : plt.plot(self.getSpikeTimes(),np.zeros(len(self.spks)), '.', color='blue') # Plot ROI (region selected for performing operations) ROI_vector = 100.0*np.ones(len(self.V)) ROI_vector[self.getROI() ] = -100.0 plt.fill_between(self.getTime(), ROI_vector, -100.0, color='0.2') plt.ylim([min(self.V)-5.0, max(self.V)+5.0]) plt.ylabel('V rec (mV)') plt.xlabel('Time (ms)') plt.show()
def _plot_graph_plot(ax, plot_data, **kwargs): plot_args = [] plot_kwargs = {} thumb = kwargs.get('thumb', False) plot_args = plot_data['data'] xaxis, yaxis = plot_args color = plot_data['color'] for prop in [ 'label', 'linewidth', 'zorder']: if prop not in plot_data: continue value = plot_data[prop] if isroutine(value): value = value(thumb) plot_kwargs[prop] = value ax.plot(xaxis, yaxis, color, **plot_kwargs) if plot_data.get('fill', False): where = [ True for x in xaxis ] alpha = plot_data.get('fillalpha', 1.0) plt.fill_between(xaxis, yaxis, where=where, interpolate=True, color=color, alpha=alpha)
def main(): p = optparse.OptionParser() p.add_option('--attr', '-a', type = str, help = 'attribute') p.add_option('--attr_type', '-t', type = str, help = 'attribute type') p.add_option('--num_train_each', '-n', type = int, help = 'number of training samples of True and False for the attribute (for total of 2n training samples)') p.add_option('--embedding', '-e', type = str, help = 'embedding (adj, normlap, regnormlap)') p.add_option('-k', type = int, help = 'number of eigenvalues') p.add_option('--sphere', '-s', action = 'store_true', default = False, help = 'normalize in sphere') p.add_option('--num_samples', '-S', type = int, default = 50, help = 'number of Monte Carlo samples') p.add_option('-v', action = 'store_true', default = False, help = 'save plot') p.add_option('--jobs', '-j', type = int, default = -1, help = 'number of jobs') opts, args = p.parse_args() attr, attr_type, num_train_each, embedding, k, sphere, num_samples, save_plot, jobs = opts.attr, opts.attr_type, opts.num_train_each, opts.embedding, opts.k, opts.sphere, opts.num_samples, opts.v, opts.jobs folder = 'gplus0_lcc/baseline5/' agg_precision_filename = folder + '%s_%s_n%d_%s_k%d%s_precision.csv' % (attr_type, attr, num_train_each, embedding, k, '_normalize' if sphere else '') plot_filename = folder + '%s_%s_n%d_%s_k%d%s_precision.png' % (attr_type, attr, num_train_each, embedding, k, '_normalize' if sphere else '') top_attrs_filename = folder + '%s_%s_n%d_%s_k%d%s_top_attrs.txt' % (attr_type, attr, num_train_each, embedding, k, '_normalize' if sphere else '') print("\nNominating nodes with whose '%s' attribute is '%s' (%d pos/neg seeds)..." % (attr_type, attr, num_train_each)) print("\nLoading AttributeAnalyzer...") a = AttributeAnalyzer(load_data = False) sqrt_samples = np.sqrt(num_samples) try: agg_precision_df = pd.read_csv(agg_precision_filename) print("\nLoaded data from '%s'." % agg_precision_filename) selected_attrs = pd.read_csv('selected_attrs.csv') if (attr in list(selected_attrs['attribute'])): row = selected_attrs[selected_attrs['attribute'] == attr].iloc[0] num_true_in_test = row['freq'] - num_train_each num_test = row['totalKnown'] - 2 * num_train_each else: ind = a.get_attribute_indicator(attr, attr_type) num_true_in_test = len(ind[ind == 1]) - num_train_each num_test = ind.count() - 2 * num_train_each except OSError: print("\nLoading attribute data...") timeit(a.load_data)() a.make_joint_attr_embedding_matrix(attr_type, sim = sim, embedding = embedding, delta = delta, tau = tau, k = k, sphere = 2 if sphere else 0) # get attribute indicator for all the nodes attr_indicator = a.get_attribute_indicator(attr, attr_type) # prepare the classifiers rfc = RandomForestClassifier(n_estimators = num_rf_trees, n_jobs = jobs) boost = AdaBoostClassifier(n_estimators = num_boost_trees) logreg = LogisticRegression(n_jobs = jobs) gnb = GaussianNB() rfc_precision_df = pd.DataFrame(columns = range(num_samples)) boost_precision_df = pd.DataFrame(columns = range(num_samples)) logreg_precision_df = pd.DataFrame(columns = range(num_samples)) gnb_precision_df = pd.DataFrame(columns = range(num_samples)) # maintain top nominee attributes dictionary top_attrs = defaultdict(float) for s in range(num_samples): print("\nSEED = %d" % s) np.random.seed(s) print("\nObtaining feature vectors for random training and test sets...") ((train_in, train_out), (test_in, test_out)) = timeit(a.get_joint_PMI_training_and_test)(attr, attr_type, num_train_each) # train and predict print("\nTraining %d random forest trees..." % num_rf_trees) timeit(rfc.fit)(train_in, train_out) print("\nPredicting probabilities...") probs_rfc = timeit(rfc.predict_proba)(test_in)[:, 1] print("\nTraining %d AdaBoost trees..." % num_boost_trees) timeit(boost.fit)(train_in, train_out) print("\nPredicting probabilities...") probs_boost = timeit(boost.predict_proba)(test_in)[:, 1] print("\nTraining logistic regression...") timeit(logreg.fit)(train_in, train_out) print("\nPredicting probabilities...") probs_logreg = timeit(logreg.predict_proba)(test_in)[:, 1] print("\nTraining Naive Bayes...") timeit(gnb.fit)(train_in, train_out) print("\nPredicting probabilities...") probs_gnb = timeit(gnb.predict_proba)(test_in)[:, 1] test_df = pd.DataFrame(columns = ['test', 'probs_rfc', 'probs_boost', 'probs_logreg', 'probs_gnb']) test_df['test'] = test_out test_df['probs_rfc'] = probs_rfc test_df['probs_boost'] = probs_boost test_df['probs_logreg'] = probs_logreg test_df['probs_gnb'] = probs_gnb # do vertex nomination test_df = test_df.sort_values(by = 'probs_rfc', ascending = False) rfc_precision_df[s] = np.asarray(test_df['test']).cumsum() / np.arange(1.0, len(test_out) + 1.0) test_df = test_df.sort_values(by = 'probs_boost', ascending = False) boost_precision_df[s] = np.asarray(test_df['test']).cumsum() / np.arange(1.0, len(test_out) + 1.0) test_df = test_df.sort_values(by = 'probs_logreg', ascending = False) logreg_precision_df[s] = np.asarray(test_df['test']).cumsum() / np.arange(1.0, len(test_out) + 1.0) test_df = test_df.sort_values(by = 'probs_gnb', ascending = False) gnb_precision_df[s] = np.asarray(test_df['test']).cumsum() / np.arange(1.0, len(test_out) + 1.0) # determine top attributes best_i, best_prec = -1, -1.0 for (i, prec_series) in enumerate([rfc_precision_df[s], boost_precision_df[s], logreg_precision_df[s], gnb_precision_df[s]]): if (prec_series[topN_nominees - 1] > best_prec): best_i, best_prec = i, prec_series[topN_nominees - 1] test_df = test_df.sort_values(by = 'probs_%s' % classifiers[i], ascending = False) for node in test_df.index[:topN_nominees]: attrs = a.attrs_by_node_by_type[attr_type][node] for at in attrs: top_attrs[at] += 1.0 / len(attrs) # divide the vote equally among all attributes sys.stdout.flush() # flush the output buffer # compute means and standard errors over all the samples agg_precision_df = pd.DataFrame(columns = ['mean_rfc_prec', 'stderr_rfc_prec', 'mean_boost_prec', 'stderr_boost_prec', 'mean_logreg_prec', 'stderr_logreg_prec', 'mean_gnb_prec', 'stderr_gnb_prec', 'max_mean_prec']) agg_precision_df['mean_rfc_prec'] = rfc_precision_df.mean(axis = 1) agg_precision_df['stderr_rfc_prec'] = rfc_precision_df.std(axis = 1) / sqrt_samples agg_precision_df['mean_boost_prec'] = boost_precision_df.mean(axis = 1) agg_precision_df['stderr_boost_prec'] = boost_precision_df.std(axis = 1) / sqrt_samples agg_precision_df['mean_logreg_prec'] = logreg_precision_df.mean(axis = 1) agg_precision_df['stderr_logreg_prec'] = logreg_precision_df.std(axis = 1) / sqrt_samples agg_precision_df['mean_gnb_prec'] = gnb_precision_df.mean(axis = 1) agg_precision_df['stderr_gnb_prec'] = gnb_precision_df.std(axis = 1) / sqrt_samples agg_precision_df['max_mean_prec'] = agg_precision_df[['mean_rfc_prec', 'mean_boost_prec', 'mean_logreg_prec', 'mean_gnb_prec']].max(axis = 1) # save the aggregate data frames N_save = min(len(test_out), topN_save) agg_precision_df = agg_precision_df[:N_save] agg_precision_df.to_csv(agg_precision_filename, index = False) top_attrs_df = pd.DataFrame(list(top_attrs.items()), columns = ['attribute', 'voteProportion']) top_attrs_df = top_attrs_df.set_index('attribute') top_attrs_df['voteProportion'] /= top_attrs_df['voteProportion'].sum() top_attrs_df = top_attrs_df.sort_values(by = 'voteProportion', ascending = False) open(top_attrs_filename, 'w').write(str(top_attrs_df)) num_true_in_test = test_out.sum() num_test = len(test_out) # plot the nomination precision if save_plot: N_plot = min(len(agg_precision_df), topN_plot) plt.fill_between(agg_precision_df.index, agg_precision_df['mean_rfc_prec'] - 2 * agg_precision_df['stderr_rfc_prec'], agg_precision_df['mean_rfc_prec'] + 2 * agg_precision_df['stderr_rfc_prec'], color = 'green', alpha = 0.25) rfc_plot, = plt.plot(agg_precision_df.index, agg_precision_df['mean_rfc_prec'], color = 'green', linewidth = 2, label = 'Random Forest') plt.fill_between(agg_precision_df.index, agg_precision_df['mean_boost_prec'] - 2 * agg_precision_df['stderr_boost_prec'], agg_precision_df['mean_boost_prec'] + 2 * agg_precision_df['stderr_boost_prec'], color = 'blue', alpha = 0.25) boost_plot, = plt.plot(agg_precision_df.index, agg_precision_df['mean_boost_prec'], color = 'blue', linewidth = 2, label = 'AdaBoost') plt.fill_between(agg_precision_df.index, agg_precision_df['mean_logreg_prec'] - 2 * agg_precision_df['stderr_logreg_prec'], agg_precision_df['mean_logreg_prec'] + 2 * agg_precision_df['stderr_logreg_prec'], color = 'red', alpha = 0.25) logreg_plot, = plt.plot(agg_precision_df.index, agg_precision_df['mean_logreg_prec'], color = 'red', linewidth = 2, label = 'Logistic Regression') plt.fill_between(agg_precision_df.index, agg_precision_df['mean_gnb_prec'] - 2 * agg_precision_df['stderr_gnb_prec'], agg_precision_df['mean_gnb_prec'] + 2 * agg_precision_df['stderr_gnb_prec'], color = 'orange', alpha = 0.25) gnb_plot, = plt.plot(agg_precision_df.index, agg_precision_df['mean_gnb_prec'], color = 'orange', linewidth = 2, label = 'Naive Bayes') guess_rate = num_true_in_test / num_test guess, = plt.plot([guess_rate for i in range(N_plot)], linestyle = 'dashed', linewidth = 2, color = 'black', label = 'Guess') plt.xlabel('rank') plt.ylabel('precision') plt.xlim((0.0, N_plot)) plt.ylim((0.0, 1.0)) plt.title('Vertex Nomination Precision') plt.legend(handles = [rfc_plot, boost_plot, logreg_plot, gnb_plot, guess]) plt.savefig(plot_filename) print("\nDone!")
dtype=float) dist_boot /= np.sum(dist_boot) dJS_boot[b, t] = d(dist_boot, dist_t[strain_ref][:, t]) plt.figure(201 + 10 * nst) plt.clf() plt.plot(dJS_boot, '.') plt.title('bootstrapping - ref=%s' % strain_ref) # significance level: s_boot = np.percentile(dJS_boot, 99, axis=0) plt.figure(202 + 10 * nst) plt.clf() plt.fill_between(range(tbins), 0, s_boot, color='lightgray') for strain in strains: plt.plot(ent_t_ref[strain], label=strain, linewidth=2) ids = np.where(ent_t_ref[strain] >= s_boot)[0] if len(ids) > 0: plt.plot(ids, ent_t_ref[strain][ids], '*k', markersize=10) plt.legend() plt.xlim(-0.5, tbins - 0.5) plt.title('dJS bootstrapping - ref=%s' % strain_ref) # fit a beta distribution to distributions import scipy.stats as stats pdf_par = {} for strain in strains: pdf_par[strain] = np.zeros((2, tbins))
def ROC(y_true, y_prob, path='ROC'): from sklearn.metrics import precision_recall_curve from sklearn.metrics import average_precision_score from sklearn.metrics import roc_curve, auc from inspect import signature plt.figure(figsize=(10, 4)) # precision-recall curve plt.subplot(1, 2, 1) precision, recall, thresholds = precision_recall_curve(y_true, y_prob) F1 = [_F1(p, r) for p, r in zip(precision, recall)] print('precision | recall | F1 | threshold') print('------------------------------') for p, r, f1, t in zip(precision, recall, F1, thresholds): print('%.3f | %.3f | %.3f | %.3f' % (p, r, f1, t)) print('------------------------------') max_f1_tup = max(zip(precision, recall, F1, thresholds), key=lambda x: x[2]) ap = average_precision_score(y_true, y_prob) # print(thresholds) # In matplotlib < 1.5, plt.fill_between does not have a 'step' argument step_kwargs = ({ 'step': 'post' } if 'step' in signature(plt.fill_between).parameters else {}) plt.step(recall, precision, color='b', alpha=0.2, where='post') plt.fill_between(recall, precision, alpha=0.2, color='b', **step_kwargs, label='AP={0:0.2f}'.format(ap)) plt.plot(max_f1_tup[1], max_f1_tup[0], marker='.', markersize=20, color='orange', label='mF1: %.2f - th: %.2f' % (max_f1_tup[2], max_f1_tup[3])) plt.xlabel('Recall') plt.ylabel('Precision') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Precision-Recall curve') plt.legend(loc="upper right") # ROC curve plt.subplot(1, 2, 2) fpr, tpr, thresholds = roc_curve(y_true, y_prob) plt.plot(fpr, tpr, alpha=0.2, label='AUC = %0.2f' % auc(fpr, tpr)) plt.fill_between(fpr, tpr, alpha=0.2, color='b') plt.plot([0, 1], [0, 1], 'k--') plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.ylim([0.0, 1.05]) plt.xlim([0.0, 1.0]) plt.title('Receiver operating characteristic') plt.legend(loc="upper left") plt.savefig('results/%s' % path)
""" 求 y=x^2 在[0,2]区间的积分 """ import numpy as np import matplotlib.pyplot as plt x = np.linspace(0, 2, 1000) # 0~2创建1000个等差数列 y = x**2 plt.plot(x, y) plt.fill_between(x, y, where=(y > 0), color='red', alpha=0.5) # 区域填充 # 该红色区域在一个2×4的正方形里面。使用蒙特卡洛方法,随机在这个正方形里面产生大量随机点(数量为N) # 计算有多少点(数量为count)落在红色区域内(判断条件为y<x2),count/N就是所要求的积分值,也即红色区域的面积。 N = 1000 points = [[xy[0] * 2, xy[1] * 4] for xy in np.random.rand(N, 2)] plt.scatter([x[0] for x in points], [x[1] for x in points], s=5, c=np.random.rand(N), alpha=0.5) plt.show() # 计算落在红色区域的比重 count = 0 for xy in points: if xy[1] < xy[0]**2: count += 1 print((count / N) * (2 * 4))
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=1, train_sizes=np.linspace(.1, 1.0, 5)): """ Generate a simple plot of the test and training learning curve. Parameters ---------- estimator : object type that implements the "fit" and "predict" methods An object of that type which is cloned for each validation. title : string Title for the chart. X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples) or (n_samples, n_features), optional Target relative to X for classification or regression; None for unsupervised learning. ylim : tuple, shape (ymin, ymax), optional Defines minimum and maximum yvalues plotted. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - An object to be used as a cross-validation generator. - An iterable yielding train/test splits. For integer/None inputs, if ``y`` is binary or multiclass, :class:`StratifiedKFold` used. If the estimator is not a classifier or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validators that can be used here. n_jobs : integer, optional Number of jobs to run in parallel (default 1). """ plt.figure() plt.title(title) if ylim is not None: plt.ylim(*ylim) plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") plt.legend(loc="best") return plt
plt.title('ACCURACY vs MSE vs LOSS') plt.legend() plt.xlabel('epoch') plt.ylabel('Accuracy, loss, mse') plt.savefig(r'C:\Users\hp\Desktop\priyasoftweb\STM32\sierra project\cnn_lstm-xai-output\vis\13a.png') plt.show() #area chart--------8888888888888888888----------- import matplotlib.pyplot as plt #Training vs Validation Loss Plot acc = history['acc'] mse = history['mean_squared_error'] epochs = range(nb_epochs) plt.figure() plt.fill_between(epochs, acc, label='accuracy', color="skyblue") plt.plot(epochs, acc, color="blue") plt.fill_between(epochs, mse, label='mse', color="green") plt.plot(epochs, mse, color="red") plt.title('ACCURACY vs loss') plt.legend() plt.xlabel('epoch') plt.ylabel('Accuracy and loss') plt.savefig(r'C:\Users\hp\Desktop\priyasoftweb\STM32\sierra project\cnn_lstm-xai-output\vis\14a.png') plt.show() #area chart--------99999999999999999-----------all-------- import matplotlib.pyplot as plt
def plot_learing_curve(pipeline, title): size = 10000 cv = KFold(size, shuffle=True) X = DataPrep.train_news["Statement"] y = DataPrep.train_news["Label"] pl = pipeline pl.fit(X, y) train_sizes, train_scores, test_scores = learning_curve( pl, X, y, n_jobs=-1, cv=cv, train_sizes=np.linspace(.1, 1.0, 5), verbose=0) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.figure() plt.title(title) plt.legend(loc="best") plt.xlabel("Training examples") plt.ylabel("Score") plt.gca().invert_yaxis() # box-like grid plt.grid() # plot the std deviation as a transparent range at each training set size plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") # plot the average training and test score lines at each training set size plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") # sizes the window for readability and displays the plot # shows error from 0 to 1.1 plt.ylim(-.1, 1.1) plt.show()
plt.plot(t, t_3, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_4, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_5, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_6, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_7, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_8, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_9, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_10, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_11, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_12, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_13, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_14, linewidth=.1, linestyle="-", c="red") plt.plot(t, t_15, linewidth=.1, linestyle="-", c="red") #Shading the theoretical distribution region plt.fill_between(t, expected_T_a_1, expected_T_a_2, color='DimGray', alpha=.7) plt.fill_between(t, expected_T_a_1, expected_T_a_3, color='DimGray', alpha=.35) plt.fill_between(t, expected_T_a_2, expected_T_a_4, color='DimGray', alpha=.35) #Formatting the plot plt.ylabel('$T(\phi,a)$') plt.xlabel('$log_2(|\phi|)$') plt.title('$T(\phi,a)$ in SMALLPRESENT-4 with all zero key upto 3 rounds') plt.text( 5.2, 78, 'For all $\phi_1,\phi_2$ if $|\phi_1|=|\phi_2|$ then $\phi_1 = \phi_2$') plt.text( 5.2, 85, 'For all $\phi_1,\phi_2$ if $|\phi_1| < |\phi_2|$ then $\phi_1 \subset \phi_2$' )
def run(self, random_guess=0.1, num_iter=3): ''' Perform simulation exploring chemical space num_iter''' # Statistics time_step = [] num_unreactive = [] num_reactive = [] accuracy = [] for i in tqdm(range(num_iter)): i_time_step, i_num_unreactive, i_num_reactive, i_accuracy =\ self.single_run(random_guess=random_guess) time_step.append(i_time_step) num_unreactive.append(i_num_unreactive) num_reactive.append(i_num_reactive) accuracy.append(i_accuracy) min_accuracy = np.min(accuracy, axis=0) max_accuracy = np.max(accuracy, axis=0) mean_accuracy = np.mean(accuracy, axis=0) # random statistics r_time_step = [] r_num_unreactive = [] r_num_reactive = [] r_accuracy = [] # Perform simulation wiht random guess for i in tqdm(range(num_iter)): # randomly keep guessing the whole space i_time_step, i_num_unreactive, i_num_reactive, i_accuracy =\ self.single_run(random_guess=1.0) r_time_step.append(i_time_step) r_num_unreactive.append(i_num_unreactive) r_num_reactive.append(i_num_reactive) r_accuracy.append(i_accuracy) r_min_accuracy = np.min(r_accuracy, axis=0) r_max_accuracy = np.max(r_accuracy, axis=0) r_mean_accuracy = np.mean(r_accuracy, axis=0) plt.plot(time_step[0], mean_accuracy, color='red') plt.fill_between(time_step[0], max_accuracy, min_accuracy, alpha=0.25, color='red') plt.plot(r_time_step[0], r_mean_accuracy, color='blue') plt.fill_between(r_time_step[0], r_max_accuracy, r_min_accuracy, alpha=0.25, color='blue') plt.xlabel('% of space explored') #, fontsize=16) plt.xlim(0, 110) plt.ylabel('prediction accuracy [%]') # fontsize=16) plt.ylim(40, 105) plt.title( 'Reactvity prediction accuracy for\n unexplored reaction space' ) #, fontsize=17) #plt.legend(loc = 2)# fontsize=17) path = os.path.join(root_path, 'figures', 'accuracy.pdf') plt.savefig(path) plt.show() plt.close() # Prepeare bar graph comparing reactive and unreactive avg_num_unreactive = np.mean(num_unreactive, axis=0) avg_num_reactive = np.mean(num_reactive, axis=0) num_idxs = len(time_step[0]) idxs = [] for i in range(1, 11): fraction = float(i) / 10 idx = int(num_idxs * fraction) - 1 idxs.append(idx) width = [9 for i in range(len(idxs))] bar_space = [time_step[0][i] - 5 for i in idxs] bar_reactive = [avg_num_reactive[i] for i in idxs] bar_unreactive = [avg_num_unreactive[i] for i in idxs] scale = [1, 2] plt.xlabel('% of space explored') plt.ylabel('total number of mixtures') plt.title('Statistics of Reactivity') plt.xlim(0, 105) plt.xticks([10, 20, 30, 40, 50, 60, 70, 80, 90, 100]) plt.ylim(0, 1000) p1 = plt.bar(bar_space, bar_unreactive, color='b', width=width) p2 = plt.bar(bar_space, bar_reactive, bottom=bar_unreactive, width=width, color='r') plt.legend((p1[0], p2[0]), ('Unreactive', 'Reactive'), loc='upper left') plt.show() path = os.path.join(root_path, 'figures', 'reactvity_stats.pdf') plt.savefig(path) plt.close()
def cumulative_contributors(project="fury-gl/fury", show=True): """Calculate total contributors as new contributors join with time. Parameters ---------- contributors_list : list List of contributors with weeks of contributions. Example: [ { 'weeks': [ {'w': 1254009600, 'a': 5, 'c': 2, 'd': 9}, ], ..... }, ] """ url = "https://api.github.com/repos/{0}/stats/contributors".format(project) r_json = get_json_from_url(url) contributors_join_date = {} for contributor in r_json: for week in contributor["weeks"]: if (week["c"] > 0): join_date = week['w'] if join_date not in contributors_join_date: contributors_join_date[join_date] = 0 contributors_join_date[join_date] += 1 cumulative_join_date = {} cumulative = 0 for time in sorted(contributors_join_date): cumulative += contributors_join_date[time] cumulative_join_date[time] = cumulative cumulative_list = list(cumulative_join_date.items()) cumulative_list.sort() if show: from datetime import datetime import matplotlib.patches as mpatches import matplotlib.pyplot as plt import numpy as np years, c_cum = zip(*cumulative_list) years_ticks = np.linspace(min(years), max(years), 15) years_labels = [] for y in years_ticks: date = datetime.utcfromtimestamp(int(y)) date_str = "Q{} - ".format((date.month - 1) // 3 + 1) date_str += date.strftime('%Y') years_labels.append(date_str) plt.fill_between(years, c_cum, color="skyblue", alpha=0.4) plt.plot(years, c_cum, color="Slateblue", alpha=0.6, linewidth=2) plt.tick_params(labelsize=12) plt.xticks(years_ticks, years_labels, rotation=45, fontsize=8) plt.yticks(fontsize=8) plt.xlabel('Date', size=12) plt.ylabel('Contributors', size=12) plt.ylim(bottom=0) plt.grid(True) plt.legend([ mpatches.Patch(color='skyblue'), ], [ 'Contributors', ], bbox_to_anchor=(0.5, 1.1), loc='upper center') plt.savefig('fury_cumulative_contributors.png', dpi=150) plt.show() return cumulative_list
# Run BOCPD via SMC bocpd = BOCPD(tt) bocpd.apply() print(bocpd.changepoints) plt.rcParams.update({'font.size': 24}) fig = plt.figure(figsize=(15, 10)) # Plot index against time process and analysis ax = plt.subplot(2, 1, 1) ax1 = plt.plot(tt, 'b-', label='data') ax2 = plt.plot(bocpd.pred_mean, 'r-', label='predicted mean') ax3 = plt.plot(bocpd.percentile_r, 'g-.', label='credible interval') plt.plot(bocpd.percentile_l, 'g-.') plt.fill_between(np.arange(len(tt)), bocpd.percentile_l, bocpd.percentile_r, alpha = 0.05,\ color = 'g') for cp in bocpd.changepoints: ax3 = plt.axvline(x=cp, color='r', linestyle='--', label='changepoint') plt.xlabel('number of events', fontsize=24) plt.ylabel('time', fontsize=24) handles, labels = ax.get_legend_handles_labels() ax.legend(handles[:4], labels[:4], loc='upper left', fontsize=18) # Plot counting process and changepoints ax = plt.subplot(2, 1, 2) plt.step(tt, np.arange(len(tt)), label='counting process') for cp in bocpd.changepoints: plt.axvline(x=tt[cp], color='r', linestyle='--', label='infection time')
repr = (rere) / (rere + irre) print('The relevant precision of Decision tree classifier is :', +repr) #The irrelevant precision irpr = (irir) / (irir + reir) print('The irrelevant precision of Decision tree classifier is :', +irpr) from sklearn.metrics import roc_curve fpr, tpr, thresholds = roc_curve(actual, predictions) print(fpr) print(tpr) print(thresholds) from sklearn.metrics import roc_auc_score auc = roc_auc_score(actual, predictions) print("The auc:", +auc) def class_logloss(actual, predicted, eps=1e-15): clip = np.clip(predicted, eps, 1 - eps) rows = actual.shape[0] vsota = np.sum(actual * np.log(clip)) return -1.0 / rows * vsota print("logloss: %0.3f " % class_logloss(yvalid, predictions)) import matplotlib.pyplot as plt plt.plot(fpr, tpr) plt.title("Decision tree-opportunity") plt.xlabel("False Positive Rate") plt.ylabel("True Positive Rate") plt.fill_between(fpr, tpr, where=(tpr >= 0), color='Green', alpha=0.5) plt.show()
np.percentile(winexp_arr[:, t], 90) for t in range(0, T) ] final_exp3 = np.array([exp3[i] for i in range(min_num_rounds, max_num_rounds)]) exp3_arr = np.array(exp3_regrets) #size repetitions x T exp3_10_percentile = [np.percentile(exp3_arr[:, t], 10) for t in range(0, T)] exp3_90_percentile = [np.percentile(exp3_arr[:, t], 90) for t in range(0, T)] matplotlib.rcParams.update({'font.size': 17}) plt.style.use('ggplot') fig = plt.figure() fig.set_figheight(10) fig.set_figwidth(10) plt.figure(1, figsize=(10, 10)) plt.plot(rounds, final_winexp, 'r', linewidth=2, label='WIN-EXP') plt.fill_between(rounds, winexp_10_percentile, winexp_90_percentile, facecolor='#db3236', alpha=0.4) plt.plot(rounds, final_exp3, 'b', linewidth=2, label='EXP3') plt.fill_between(rounds, exp3_10_percentile, exp3_90_percentile, facecolor='#4885ed', alpha=0.4) plt.legend(loc='best') plt.xlabel('number of rounds') plt.ylabel('regret') plt.title('Regret Performance of WIN-EXP vs EXP3') plt.savefig('win_adv_10000.png')
# Used plt.clf() usedDF, meansUsedDF, x, fit, fit_fn = regression(dataArrangedByCondition, 3, 'Used') meansUsedDF.plot(x='endTime', y='price', figsize=(12, 6), label='Used Price', color='blue') plt.plot(meansUsedDF['endTime'], fit_fn(x), linestyle='-', color='blue') plt.xticks(meansUsedDF.index, meansUsedDF['endTime'], rotation=90) ma = meansUsedDF['price'].rolling(5).mean() mstd = meansUsedDF['price'].rolling(5).std() plt.fill_between(mstd.index, ma - 2 * mstd, ma + 2 * mstd, color='k', alpha=0.2) plt.suptitle('Price Evolution of Used iPhone 6s 16GB') plt.savefig('Used Evolution.png') # Seller Refurbished plt.clf() refurbSellerDF, meansRefurbSellerDF, x, fit, fit_fn = regression( dataArrangedByCondition, 5, 'Seller refurbished') meansRefurbSellerDF.plot(x='endTime', y='price', figsize=(12, 6), label='Seller Refurb Price', color='orange') plt.plot(meansRefurbSellerDF['endTime'],
endIdx = startIdx + int(opts.time_window / tInt) if endIdx > waterfall.shape[0]: print 'Warning: time window (-w) in conjunction with start time (-s) results in a window extending beyond the filterbank file, clipping to maximum size' endIdx = waterfall.shape[0] waterfall = waterfall[startIdx:endIdx, :] meanBandpass = np.mean(waterfall, axis=0) maxBandpass = np.min(waterfall, axis=0) minBandpass = np.max(waterfall, axis=0) if not opts.nodisplay or opts.savefig: if opts.minmax: plt.fill_between(fil.freqs, minBandpass, maxBandpass, alpha=0.1, edgecolor='none') plt.plot(fil.freqs, meanBandpass) plt.xlim(fil.freqs[0], fil.freqs[-1]) if opts.write: # write bandpass to a text file outFn = opts.prefix + str(fbIdx) + '.dat' print 'Writing bandpass of %s to %s' % (fbFn, outFn) np.savetxt(outFn, bandpass, fmt='%.10f') if not opts.nodisplay or opts.savefig: plt.title('Bandpass') plt.xlabel('Freq. (MHz)') plt.ylabel('Amp')
try: current_date = datetime.strptime( row[0], '%Y-%m-%d' ) high = int(row[1]) low = int(row[2]) except ValueError: print(current_date, 'Missing data') else: dates.append(current_date) highs.append(high) lows.append(low) # 绘图 fig = plt.figure(figsize=(10, 6)) plt.plot(dates, highs, c='red', alpha=0.5) plt.plot(dates, lows, c='blue', alpha=0.5) # 两个函数之间上色 plt.fill_between( dates, highs, lows, facecolor='blue', alpha=0.1 ) # 设置格式 plt.title('Daily high temperature, July 2014', fontsize=20) plt.xlabel('', fontsize=16) fig.autofmt_xdate() plt.ylabel('Temperature(F)', fontsize=16) plt.tick_params(axis='both', which='major', labelsize=16) plt.show()
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None, n_jobs=None, train_sizes=np.linspace(.1, 1.0, 5)): """ Generate a simple plot of the test and training learning curve. Parameters ---------- estimator : object type that implements the "fit" and "predict" methods An object of that type which is cloned for each validation. title : string Title for the chart. X : array-like, shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like, shape (n_samples) or (n_samples, n_features), optional Target relative to X for classification or regression; None for unsupervised learning. ylim : tuple, shape (ymin, ymax), optional Defines minimum and maximum yvalues plotted. cv : int, cross-validation generator or an iterable, optional Determines the cross-validation splitting strategy. Possible inputs for cv are: - None, to use the default 3-fold cross-validation, - integer, to specify the number of folds. - :term:`CV splitter`, - An iterable yielding (train, test) splits as arrays of indices. For integer/None inputs, if ``y`` is binary or multiclass, :class:`StratifiedKFold` used. If the estimator is not a classifier or if ``y`` is neither binary nor multiclass, :class:`KFold` is used. Refer :ref:`User Guide <cross_validation>` for the various cross-validators that can be used here. n_jobs : int or None, optional (default=None) Number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See :term:`Glossary <n_jobs>` for more details. train_sizes : array-like, shape (n_ticks,), dtype float or int Relative or absolute numbers of training examples that will be used to generate the learning curve. If the dtype is float, it is regarded as a fraction of the maximum size of the training set (that is determined by the selected validation method), i.e. it has to be within (0, 1]. Otherwise it is interpreted as absolute sizes of the training sets. Note that for classification the number of samples usually have to be big enough to contain at least one sample from each class. (default: np.linspace(0.1, 1.0, 5)) """ plt.figure() plt.title(title) if ylim is not None: plt.ylim(*ylim) plt.xlabel("Training examples") plt.ylabel("Score") train_sizes, train_scores, test_scores = learning_curve( estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes) train_scores_mean = np.mean(train_scores, axis=1) train_scores_std = np.std(train_scores, axis=1) test_scores_mean = np.mean(test_scores, axis=1) test_scores_std = np.std(test_scores, axis=1) plt.grid() plt.fill_between(train_sizes, train_scores_mean - train_scores_std, train_scores_mean + train_scores_std, alpha=0.1, color="r") plt.fill_between(train_sizes, test_scores_mean - test_scores_std, test_scores_mean + test_scores_std, alpha=0.1, color="g") plt.plot(train_sizes, train_scores_mean, 'o-', color="r", label="Training score") plt.plot(train_sizes, test_scores_mean, 'o-', color="g", label="Cross-validation score") plt.legend(loc="best") return plt
fig = plt.figure() ax = fig.add_axes([0, 0, 1, 1]) df_collisions_new_standardized = ( df_collisions_new - df_collisions_new.mean()) / df_collisions_new.std() lower_range = 100 upper_range = 1000 x = df['episode'][lower_range:upper_range] y = df_collisions_new_standardized['collisions_DDPG'][lower_range:upper_range] error = (df_collisions_new['collisions_DDPG'].std()) / 1000 plt.plot(x, y, 'k', color='#CC4F1B', label='collisions DDPG') plt.fill_between(x, y - error, y + error, alpha=0.5, edgecolor='#CC4F1B', facecolor='#FF9848') y = df_collisions_new_standardized['collisions_MADDPG'][ lower_range:upper_range] error = (df_collisions_new['collisions_MADDPG'].std()) / 1000 plt.plot(x, y, 'k', color='#1B2ACC', label='collisions MADDPG') plt.fill_between(x, y - error, y + error, alpha=0.2, edgecolor='#1B2ACC', facecolor='#089FFF', linewidth=4, linestyle='dashdot',
def adjust_feature_size(self, selected_feature_size=None): '''Adjust the feature size customizablly with visualization. (from 1 to the full feature size) (With updated ranking_, support_, n_features attributes) Parameters ---------- selected_feature_size : int, (default=None) The chosen number of features selected. If the number is greater than the full feature size or if the variable is set to None, only the optimal feature size will be shown. Examples -------- The following example shows how to chose the custom feature size with the adjust_feature_size function (Minimal feature size first) # Import library from sklearn.ensemble import RandomForestClassifier from sklearn.datasets import make_classification # Make data X, y = make_classification(n_samples=50, n_features=30, n_informative=5, n_redundant=0, n_repeated=0, n_classes=2, random_state=0, shuffle=False) # Build a random forest model and # perform the RFE with 5-fold cross validation clf = RandomForestClassifier(random_state=0) rfecv = RFECV(estimator=clf, step=1, cv=StratifiedKFold(5), scoring='roc_auc',n_jobs=-1, active_feature_size_selection=True) rfecv.fit(X, y) # Adjust the feature size to maximize your need rfecv.adjust_feature_size(selected_feature_size=26) rfecv.adjust_feature_size(selected_feature_size=15) # Access the updated attitbute print(rfecv.ranking_)''' # calculate mean score, standard deviation, # score intervals and optimal feature_size optimal_feature_size = list(self.grid_scores_).index( np.max(self.grid_scores_))+1 score_lower, score_upper = zip( *[std_interval(row) for row in list(zip(*self.scores))[::-1]]) n_features_selected = range(1, len(self.grid_scores_) + 1) # Visualize plt.figure(figsize=(9, 6)) plt.plot(n_features_selected, self.grid_scores_, color='#14213d', label=r'Mean score', lw=2, alpha=.8) plt.fill_between(n_features_selected, score_lower, score_upper, color='#90a8c3', alpha=.2, label=r'$\pm$ 1 std. dev.') if selected_feature_size and self.active_feature_size_selection: # Requires self.active_feature_size_selection to be True # change attribute self.ranking_ = np.array( [i-selected_feature_size+1 if i > selected_feature_size else 1 for i in self.ranking_original]) self.support_ = np.array( [True if i == 1 else False for i in self.ranking_]) self.n_features_ = list(self.ranking_).count(1) # plot selected_size_score = self.grid_scores_[selected_feature_size-1] plt.axvline(x=selected_feature_size, linestyle='--', lw=2, color='#b2182b', label='Selected feature size \n\ (score = %0.3f) \n(feature size = %i)' % (selected_size_score, selected_feature_size), alpha=.8) plt.axvline(x=optimal_feature_size, linestyle='--', lw=2, color='#d7b9d5', label='Optimal feature size \n\ (score = %0.3f) \n(feature size = %i)' % (np.max(self.grid_scores_), optimal_feature_size), alpha=.8) else: if not self.active_feature_size_selection \ and selected_feature_size: print('In order to select feature size, \ please set active_feature_size_selection to True') plt.axvline(x=optimal_feature_size, linestyle='--', lw=2, color='#b2182b', label='Optimal feature size \n\ (score = %0.3f) \n(feature size = %i)' % (np.max(self.grid_scores_), optimal_feature_size), alpha=.8) plt.xlabel("Number of features selected") plt.ylabel("Cross validation score (nb of correct classifications)") plt.title(f'Recursive Feature Elimination with Cross Validation') plt.legend(loc="lower right", labelspacing=1.3) plt.show()
wf_collect=True, ecutwfc=40.0, ecutrho=500.0, occupations='smearing', smearing='mp', degauss=0.01, nspin=2, kpts=(6, 6, 6), walltime='24:00:00', ppn=4) as calc: fermi = calc.get_fermi_level() dos = EspressoDos(efermi=fermi) energies = dos.get_energies() occupied = (energies < 0) & (energies > -10) for orb in orbitals: ind = (energies < 5) & (energies > -10) d = dos.get_site_dos(0, orb) plt.plot(energies[ind], d[ind], c=colors[orb][0], label=orb) plt.fill_between(x=energies[occupied], y1=d[occupied], y2=np.zeros(energies[occupied].shape), color=colors[orb][1], label=orb) plt.xlabel('Energy (eV)') plt.ylabel('DOS (arbitrary units)') plt.ylim(0, 6) plt.savefig('figures/Ni-spin-proj-DOS.png') plt.legend() plt.show()
# Plot number of finished reps plt.figure() plt.plot(xscale * grid_vals, n_finished_g, marker='o') plt.grid(True) plt.xlabel(xlabel) plt.xlabel("# finished reps") # Plot reward ci_factor = 1.96 / np.sqrt(n_finished_g) # 95% confidence interval factor plt.figure() plt.plot(xscale * grid_vals, mean_reward, marker='o') plt.fill_between(xscale * grid_vals, mean_reward - std_reward * ci_factor, mean_reward + std_reward * ci_factor, color='blue', alpha=0.2) plt.grid(True) plt.xlabel(xlabel) if y_lim: plt.ylim(y_lim) if x_lim: plt.xlim(x_lim) plt.ylabel('Average Episode Return') if save_PDF: plt.title(title_prefix) save_fig(args.run_name) else: plt.title( f'{title_prefix}, reps_finished: {min(n_finished_g)} - {max(n_finished_g)}\n {args.result_dir}',
def vis_sinusoid_traj(trajs_c, times_c, trajs_t, times_t, pred_c, pred_t, sigma_c, sigma_t, epoch, exp_name, val=None): trajs_c = trajs_c.cpu().detach().numpy() # 1 3 times_c = times_c.cpu().detach().numpy() # 1 3 trajs_t = trajs_t.cpu().detach().numpy() # 2 4 times_t = times_t.cpu().detach().numpy() # 2 4 pred_c = pred_c.cpu().detach().numpy() pred_t = pred_t.cpu().detach().numpy() sigma_c = sigma_c.cpu().detach().numpy() sigma_t = sigma_t.cpu().detach().numpy() times = np.concatenate([times_c, times_t]) trajs = np.concatenate([trajs_c, trajs_t]) pred = np.concatenate([pred_c, pred_t]) sigma = np.concatenate([sigma_c, sigma_t]) sort_idx = np.argsort(times) times = times[sort_idx] trajs = trajs[sort_idx] pred = pred[sort_idx] sigma = sigma[sort_idx] plt.xlim(-2, 2) plt.ylim(-4, 4) plt.plot(times, trajs, color='black', label='Ground truth', alpha=0.6) plt.plot(times, pred, color='red', label='Predictive trajectory', alpha=0.6) # plt.axvline(x=4.9, linestyle=':') plt.fill_between(times, pred - sigma, pred + sigma, alpha=0.5, label='Confidence') plt.scatter(times_c, trajs_c, color='black', label='Context point', alpha=0.6) plt.scatter(times_t, trajs_t, color='red', label='Target point', alpha=0.6) plt.legend(fontsize='xx-small', bbox_to_anchor=(0, 1.02, 1, 0.2), loc="lower left", mode="expand", borderaxespad=0, ncol=5) plt.title(f'Num contexts: {len(trajs_c)}, Num targets: {len(trajs_t)}', fontsize='xx-small', y=-0.1) epoch = str(epoch) epoch = (3 - len(epoch)) * '0' + epoch path = './vis' + exp_name if not os.path.isdir(path): if not os.path.isdir('./vis'): os.mkdir('./vis') os.mkdir(path) if val is None: path += 'train/' else: path += 'val/' if not os.path.isdir(path): os.mkdir(path) plt.savefig(path + epoch + '.png', dpi=300) plt.close()
def ext_plot(M,E,R,M_err,color): xext = np.arange(0,10.0,0.05) yext= np.asarray( [extinction_func(xext[i],R) for i in range(len(xext))] ) fun1= M+E*yext**2 plt.fill_between(xext,fun1-M_err,fun1+M_err,facecolor=color, alpha=0.1) plt.plot(xext,fun1,'--',color=color,linewidth=0.7)
import sys
# -*- coding: utf-8 -*- import numpy as np from sympy import * import matplotlib.pyplot as plt from scipy.stats import norm fig = plt.gcf() fig.set_size_inches(8, 5) var('x') f = lambda x: exp(-x**2 / 2) x = np.linspace(-4, 4, 100) y = np.array([f(v) for v in x], dtype='float') plt.grid(True) plt.title('Gaussian Curve') plt.xlabel('X') plt.ylabel('Y') plt.plot(x, y, color='red') plt.fill_between(x, y, 0, color='#123456') plt.show() fig, ax = plt.subplots(1, 1) x = np.linspace(norm.ppf(0.01), norm.ppf(0.99), 100) ax.plot(x, norm.pdf(x), 'r-', lw=5, alpha=0.6, label='norm pdf') ax.plot(x, norm.pdf(x), 'k-', lw=2, label='frozen pdf') # r = norm.rvs(size=1000) # ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) # ax.legend(loc='best', frameon=False) plt.show()
ap_dictionary[class_name] = ap n_images = counter_images_per_class[class_name] lamr, mr, fppi = log_average_miss_rate(np.array(rec), np.array(fp), n_images) lamr_dictionary[class_name] = lamr """ Draw plot """ if draw_plot: plt.plot(rec, prec, '-o') # add a new penultimate point to the list (mrec[-2], 0.0) # since the last line segment (and respective area) do not affect the AP value area_under_curve_x = mrec[:-1] + [mrec[-2]] + [mrec[-1]] area_under_curve_y = mprec[:-1] + [0.0] + [mprec[-1]] plt.fill_between(area_under_curve_x, 0, area_under_curve_y, alpha=0.2, edgecolor='r') # set window title fig = plt.gcf() # gcf - get current figure fig.canvas.set_window_title('AP ' + class_name) # set plot title plt.title('class: ' + text) #plt.suptitle('This is a somewhat long figure title', fontsize=16) # set axis titles plt.xlabel('Recall') plt.ylabel('Precision') # optional - set axes axes = plt.gca() # gca - get current axes axes.set_xlim([0.0,1.0]) axes.set_ylim([0.0,1.05]) # .05 to give some extra space # Alternative option -> wait for button to be pressed # while not plt.waitforbuttonpress(): pass # wait for key display
def plot_data_predictions( data, preds, y_valid, y_dates_valid, scaler, title, forecast_data, n_loops ): plt.figure(figsize=plot_autoscale(), dpi=PLOT_DPI) plt.plot( data.index, data.values, "-ob", lw=1, ms=2, label="Prices", ) for i in range(len(y_valid) - 1): if scaler: y_pred = scaler.inverse_transform(preds[i].reshape(-1, 1)).ravel() y_act = scaler.inverse_transform(y_valid[i].reshape(-1, 1)).ravel() else: y_pred = preds[i].ravel() y_act = y_valid[i].ravel() plt.plot( y_dates_valid[i], y_pred, "r", lw=1, ) plt.fill_between( y_dates_valid[i], y_pred, y_act, where=(y_pred < y_act), color="r", alpha=0.2, ) plt.fill_between( y_dates_valid[i], y_pred, y_act, where=(y_pred > y_act), color="g", alpha=0.2, ) # Leave this one out of the loop so that the legend doesn't get overpopulated with "Predictions" if scaler: final_pred = scaler.inverse_transform(preds[-1].reshape(-1, 1)).ravel() final_valid = scaler.inverse_transform(y_valid[-1].reshape(-1, 1)).ravel() else: final_pred = preds[-1].reshape(-1, 1).ravel() final_valid = y_valid[-1].reshape(-1, 1).ravel() plt.plot( y_dates_valid[-1], final_pred, "r", lw=2, label="Predictions", ) plt.fill_between( y_dates_valid[-1], final_pred, final_valid, color="k", alpha=0.2, ) plt.axvspan( forecast_data.index[0] - timedelta(days=1), forecast_data.index[-1], facecolor="tab:orange", alpha=0.2, ) _, _, ymin, ymax = plt.axis() plt.vlines( forecast_data.index[0] - timedelta(days=1), ymin, ymax, colors="k", linewidth=3, linestyle="--", color="k", ) if n_loops == 1: plt.plot( forecast_data.index, forecast_data.values, "-og", ms=3, label="Forecast", ) else: plt.plot( forecast_data.index, forecast_data.median(axis=1).values, "-og", ms=3, label="Forecast", ) plt.fill_between( forecast_data.index, forecast_data.quantile(0.25, axis=1).values, forecast_data.quantile(0.75, axis=1).values, color="c", alpha=0.3, ) plt.legend(loc=0) plt.xlim(data.index[0], forecast_data.index[-1] + timedelta(days=1)) plt.xlabel("Time") plt.ylabel("Value") plt.grid(b=True, which="major", color="#666666", linestyle="-") plt.title(title) if gtff.USE_ION: plt.ion() plt.show()
def Print(self): tadDist = self.sqDist**0.5 if self.printAllFrames else self.cumulSqDist**0.5 / ( self.reader.frame - self.reader.initFrame) tadContact = self.contHist if self.printAllFrames else self.cumulContHist tadMap = squareform(tadDist) tadContact /= tadContact.sum(axis=(0, 1), keepdims=True) if plt.get_fignums(): plt.clf() else: fig = plt.figure() dMap = plt.imshow(tadMap, extent=(0, self.reader.nTad, 0, self.reader.nTad), origin='lower', norm=LogNorm()) plt.colorbar(dMap) for d in self.reader.polyDomains: if len(d) > 0: x = [d[0], d[-1], self.reader.nTad] y1 = [d[0], d[-1], d[-1]] y2 = [d[0], d[0], d[0]] plt.fill_between(x=x, y1=y1, y2=y2, color='red', alpha=0.5, lw=0) plt.fill_between(x=x[:2], y1=y1[:2], color='red', alpha=0.5, lw=0) plt.xlim([0, self.reader.nTad]) plt.ylim([0, self.reader.nTad]) if self.printAllFrames: mapFile_ = self.mapFile % (self.reader.frame - 1) plt.savefig(mapFile_, format="png", dpi=300) for i in range(self.nStride): contactFile_ = self.contactFile % (i + 1, self.reader.frame - 1) np.savetxt(contactFile_, tadContact[:, :, i]) print("\033[1;32mPrinted distance map to '%s'\033[0m" % mapFile_) else: np.savetxt(self.typeFile, self.polyType) plt.savefig(self.mapFile, format="pdf", transparent=True) print("\033[1;32mPrinted TAD type(s) to '%s'\033[0m" % self.typeFile) print("\033[1;32mPrinted distance map to '%s'\033[0m" % self.mapFile) for i in range(self.nStride): contactFile_ = self.contactFile % (i + 1) np.savetxt(contactFile_, tadContact[:, :, i])
idx = np.argsort(freqs2) ps2 = np.abs(np.fft.fft(data))**2 plt.figure() plt.plot(freqs2[idx], ps2[idx]) plt.title('Power spectrum (np.fft.fft)') # Define delta lower and upper limits low, high = 0.5, 4 # Find intersecting values in frequency vector idx_delta = np.logical_and(freqs >= low, freqs <= high) # Plot the power spectral density and fill the delta area plt.figure(figsize=(7, 4)) plt.plot(freqs, psd, lw=2, color='k') plt.fill_between(freqs, psd, where=idx_delta, color='skyblue') plt.xlabel('Frequency (Hz)') plt.ylabel('Power spectral density (uV^2 / Hz)') plt.xlim([0, 10]) plt.ylim([0, psd.max() * 1.1]) plt.title("Welch's periodogram") plt.show() sns.despine() from scipy.integrate import simps # Frequency resolution freq_res = freqs[1] - freqs[0] # = 1 / 4 = 0.25 #这个地方不明白 # Compute the absolute power by approximating the area under the curve delta_power = simps(psd[idx_delta], dx=freq_res)
if np.max(altitude.alt[nightmask]) > maxalt * u.deg: iic = np.mod(ic, 7) lab = name + ", " + sp + ", mA=" + str( mass1) + ", mB=" + str(mass2) + ", $\\theta$= " + str( round(theta, 1)) + " mas," + " H=" + str( round(float(dat["Hmag"][i]), 1)) plt.plot(delta_midnight, altitude.alt, label=lab, color="C" + str(iic), ls=lsarr[int(ic / 7)]) ic = ic + 1 plt.fill_between(delta_midnight.to('hr').value, 0, 90, sunaltazs.alt < -0 * u.deg, color='0.5', zorder=0) plt.fill_between(delta_midnight.to('hr').value, 0, 90, sunaltazs.alt < -18 * u.deg, color='k', zorder=0) plt.ylim(0., 90) plt.xlim(-12, 12) plt.xticks(np.arange(13) * 2 - 12) plt.ylim(10, 90) plt.axhline(30.0, color="gray", lw=1) plt.xlabel('Hours from Midnight = ' + midlocal.iso) plt.ylabel('Altitude [deg]')