def test_hist_with_legend_raises(self, by): # GH 6279 - Series histogram with legend and label raises index = 15 * ["1"] + 15 * ["2"] s = Series(np.random.randn(30), index=index, name="a") s.index.name = "b" with pytest.raises(ValueError, match="Cannot use both legend and label"): s.hist(legend=True, by=by, label="c")
def two_histogram(x: pd.Series, y: pd.Series) -> None: """ Функция строит две гистограммы на одной картинке. Выводит легенду и отображает пунктиром среднее значение выборок. """ x.hist(alpha=0.5, weights=[1./len(x)]*len(x)) x.hist(alpha=0.5, weights=[1./len(x)]*len(x)) plt.axvline(x.mean(), color="red", alpha=0.8, linestyle="dashed") plt.axvline(y.mean(), color="blue", alpha=0.8, linestyle="dashed") plt.legend([x.name, y.name])
def plot_histogram(column: pd.Series, title: str): plt.title(title) column.hist(bins=1 + int(np.log2(column.shape[0])), density=True, grid=True) column.plot.kde() quant = np.nanquantile(column, q=[0.25, 0.75]) low = quant[0] - 1.5 * (quant[1] - quant[0]) high = quant[1] + 1.5 * (quant[1] - quant[0]) plt.axvline(low, color='red') plt.axvline(high, color='red') plt.show()
def show_data_dist(lines): angles = [] for line in lines: angles.append(line[3]) se = Series(angles) print("Total data: ", len(se)) plt.figure(0) se.hist(bins=50) plt.title("data distribution") plt.savefig("./images_doc/dist.png")
def test_hist_no_overlap(self): from matplotlib.pyplot import subplot, gcf x = Series(randn(2)) y = Series(randn(2)) subplot(121) x.hist() subplot(122) y.hist() fig = gcf() axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes() assert len(axes) == 2
def test_hist_no_overlap(self): from matplotlib.pyplot import subplot, gcf x = Series(randn(2)) y = Series(randn(2)) subplot(121) x.hist() subplot(122) y.hist() fig = gcf() axes = fig.get_axes() self.assertEqual(len(axes), 2)
def test_hist_no_overlap(self): from matplotlib.pyplot import gcf, subplot x = Series(np.random.randn(2)) y = Series(np.random.randn(2)) subplot(121) x.hist() subplot(122) y.hist() fig = gcf() axes = fig.axes assert len(axes) == 2
def visualize_hist(array_1d, title='Histogram', precision=50): r = randint(1, 1000) title = title + ' id:' + str(r) print title plt.figure(r) plt.title(title) series = Series(array_1d) series.hist(bins=precision) # axes = plt.gca() # axes.set_xlim([-0.4, 0.4]) # axes.set_ylim([0, 500]) plt.show()
def plots(self): s = Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10)) #s.plot() df = DataFrame(np.random.randn(10, 4).cumsum(0), columns=['A', 'B', 'C', 'D'], index=np.arange(0, 100, 10)) #df.plot(kind='bar') #barh for horizontal bars comp1 = np.random.normal(0, 1, size=1000) # N(0,1) comp2 = np.random.normal(10, 2, size=1000) # N(10,4) values = Series(np.concatenate([comp1, comp2])) values.hist(bins=500, alpha=0.3, normed=True, color='orange') values.plot(kind='kde', style='k--')
def plot_temperature(temp: pandas.Series, name: str): """ Plot temperature for a room """ temp = temp.dropna() # get rid of nuisance empty values with .dropna() ax = plt.figure().gca() temp.hist(ax=ax) ax.set_ylabel("# of occurences") ax.set_xlabel(r"Temperature [$^\circ$C]") ax.set_title(f"{name} temperature") ax = plt.figure().gca() ax.plot(temp.index, temp.values) ax.set_xlabel("time") ax.set_ylabel(r"Temperature [$^\circ$C]") ax.set_title(f"{name} temperature")
def pandas_draw_hist_kde(): # 直方图:可以对值频率进行离散化显示的柱状图 # 数据点被拆分到离散的、间隔均匀的面元中,绘制的是个面元中数据点的数量 data = Series(np.random.randn(1000)) # data.hist(bins=50) # plt.show() # 密度图:通过计算“可能会产生观测数据的连续概率分布的估计”而产生 # data.plot(kind='kde') # plt.show() # 把直方图和密度图绘制到一起 comp1 = np.random.normal(0, 1, size=200) comp2 = np.random.normal(10, 2, size=200) values = Series(np.concatenate([comp1, comp2])) # 上下拼接 values.hist(bins=100, alpha=0.3, color='k', normed=True) values.plot(kind='kde', style='k--') plt.show()
def hist_distribute(x: pd.Series, title: str, nbin=10): ''' :param x: pandas series :param title: plot name :return: matplot figure ''' a = plt.figure(figsize=figure_size) a = x.hist(color=sns.desaturate("indianred", .8), bins=nbin).get_figure() plt.title(title) plt.close('all') return a
def plot_noise(): # seed random number generator seed(30) # create white noise series series = [gauss(0.0, 1.0) for i in range(50)] series = Series(series) # summary stats print(series.describe()) # prelims for subplots fig, ax = plt.subplots(nrows=2, ncols=2) # line plot series.plot(ax=ax[0, 0]) ax[0, 0].set_title('White Noise') # histogram plot series.hist(ax=ax[0, 1]) ax[0, 1].set_title('Noise Histogram') # autocorrelation from pandas.plotting import autocorrelation_plot autocorrelation_plot(series, ax=ax[1, 0]) plt.tight_layout() plt.show()
def plot_residuals(residuals: pd.Series): """ Plots: * histogram of residuals * density of residuals * QQ plot of residuals * autocorrelation plot of residuals Parameters ---------- residuals : pd.Series observed values - forecasted values """ residuals.hist() plt.show() residuals.plot(kind="kde") plt.show() qqplot(residuals) plt.show() autocorrelation_plot(residuals) plt.show()
def condition_stat(start_date, end_date, index_code, condition_num): """ 给定指定日期和美股变化的条件值,index_code为国内股市的指定变化 """ conn = connect_data_source() doom_data = find_condition_date_usa(start_date, end_date, condition_num) select_date_time_list = doom_data.index open_price_change_list = [] day_price_change_list = [] for selected_date in select_date_time_list: open_price_change, day_price_change = trading_day_state( index_code, selected_date, conn) open_price_change_list.append(open_price_change) day_price_change_list.append(day_price_change) open_price_change_series = Series(open_price_change_list) day_price_change_series = Series(day_price_change_list) open_price_change_series.hist() day_price_change_series.hist() print(open_price_change_series.describe()) print(day_price_change_series.describe()) print(sum(day_price_change_series > 0)) return open_price_change_series, day_price_change_series
def _plot_price_histogram(price_data: pd.Series, title: str, x_tick_interval: int, **kwargs): """ Plot histogram of price Parameters ---------- price_data : pd.Series price data title : str plot title x_tick_interval : int interval for x axis """ price_data = price_data.dropna() n_obs = len(price_data) with plt.style.context('bmh'): n_bins = 20 ax = price_data.hist(bins=n_bins, alpha=0.9, **kwargs) ax.grid(linewidth=0.5) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) plt.title(title) plt.xlabel("price") plt.ylabel("number of offers") # Format X axis plt.xticks(rotation=45) ax.xaxis.set_major_locator( ticker.MultipleLocator(base=x_tick_interval)) ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}')) # Add median price median_price = price_data.median() hist_y, hist_x = np.histogram(price_data, bins=n_bins) plt.axvline(median_price, color='midnightblue', linewidth=2) plt.text(median_price, np.quantile(hist_y, 0.25), s=f"Median price={median_price:,.0f} z\u0142", rotation=90, horizontalalignment="right", verticalalignment="bottom") # Add number of observations plt.text(hist_x.min(), hist_y.max() * 0.9, s=f"Total number of offers={n_obs}", horizontalalignment="left")
def histogram(data): s = Series(data) plt.figure() s.hist(color='k', alpha=0.5, bins=50) plt.show()
plt.show() ####保存文件 plt.savefig('name.png',bbox_inches='tight') #第二个参数将周围多余的空白区域裁剪掉 ##线形图 %matplotlib inline series.plot() df.plot() #注意参数,不识别中文。 ##柱状图 df.plot(kind='bar') #垂直方向 df.plot(kind='barh') #水平方向 ##直方图 Series.hist() #注意参数 Series.hist(bins=100) #100个间隔 Series.plot(kind = 'kde') #曲线图 s1 = np.random.normal(0,2,100) s2 = np.random.normal(0,2,100) nd = np.concatenate([s1,s2]) s = Series(nd) s.hist(bins = 100,normed = True) s.plot(kind = 'kde') #画在一张图上 ##散点图 df.plot('X','Y',kind = 'scatter') #输入列索引 pd.plotting.scatter_matrix(nd,diagonal='kde') #直方图 plt.hist(data)
# calculate and plot a white noise series from random import gauss from random import seed from pandas import Series from pandas.plotting import autocorrelation_plot from matplotlib import pyplot # seed random number generator seed(1) # create white noise series series = [gauss(0.0, 1.0) for i in range(1000)] series = Series(series) # summary stats print(series.describe()) # line plot series.plot() pyplot.show() # histogram plot series.hist() pyplot.show() # autocorrelation autocorrelation_plot(series) pyplot.show()
x100_1 = st.chi2.rvs(size=100, df=1) st.probplot(x100_1, plot=plt) plt.title("n=100, v=1") # adds space between plots plt.tight_layout() # ------------------------- # Exercise 5.2 # ------------------------- # a) values = np.array([0,10,11]) sim = Series(np.random.choice(values, size=1000, replace=True)) plt.subplot(421) sim.hist(bins=[0,1,10,11,12], edgecolor="black") plt.title("Original") plt.subplot(422) st.probplot(sim, plot=plt) plt.title("Normal Q-Q Plot") # b) n = 5 sim = np.random.choice(values, size=n*1000, replace=True) sim = DataFrame(np.reshape(sim, (n, 1000))) sim_mean = sim.mean() print(sim_mean) plt.subplot(423) sim_mean.hist(edgecolor="black") plt.title("Mean of %d observations" % n)
columns=['day','1','2','3','4','5','6']) df1.set_index('day',inplace=True) # s!!!et_index函数修改索引 df1.plot(kind='bar') df.plot(kind = "bar", colormap = "rainbow") # colormap = "rainbow"绘制成彩虹颜色 # 柱状图3: --- 横纵坐标互换后的图 df1.stack() # 先一维化 df1.stack().unstack(level = 0) # 第一级索引转化为列名称 df1.stack().unstack(level = 0).plot(kind = 'bar') '''3.绘制直方图---hist绘制直方图,它是一种特殊的柱状图,该图用来表示密度''' nd = np.random.randint(0,100,size = 100) s = Series(nd) s.hist(bins = 300) # bins = 500表示线条的粗细,越小越粗 '''4.绘制随机数百分比密度图---.plot(kind='kde')''' s.plot(kind='kde') '''5.直方图,和密度图绘制到一个图形中(有点无聊,不是真正意义上的一张图)''' n1 = np.random.normal(loc = 0,scale=1,size = 100) # 随机正太分布,以0为均值,1为方差 n2 = np.random.normal(loc = 10,scale = 2,size = 100) nd = np.concatenate([n1,n2]) # numpy的级联,注意与pandas的级联concat区分开 s = Series(nd) s.hist(bins = 100) # 绘制直方图
ax.add_patch(rect) ax.add_patch(circ) ax.add_patch(pgon) # 데이터프레임으로부터 막대그래프 df = DataFrame(np.random.rand(6, 4), index=['one', 'two', 'three', 'four', 'five', 'six'], columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus')) df.plot(kind='bar') df.plot(kind='barh', stacked=True) comp1 = np.random.normal(0, 1, size=200) #N(0,1) comp2 = np.random.normal(10, 2, size=200) #(10,4) values = Series(np.concatenate([comp1, comp2])) # concat(1,2) -1번째 문자열에 두번째 문자열을 합치는 함수 values.hist(bins=100, alpha=0.3, color='k', density=True) values.plot(kind='kde', style='k--') ## 109 obj = Series(range(4), index=['d', 'a', 'b', 'c']) obj.sort_index() # replace F frame = DataFrame(np.arange(8).reshape((2, 4)), index=['three', 'one'], columns=['d', 'a', 'b', 'c']) frame.sort_index() frame.sort_index(1) # 열 frame.sort_index(axis=1, ascending=False) frame2 = DataFrame({'b': [4, 7, 3, 2], 'a': [4, 9, 2, 5], 'c': [5, 3, 7, 9]}) frame2.sort_values(by='a')
# In[5]: df.plot(kind='bar') # In[10]: df.plot(kind='barh') # In[6]: nd = np.random.randint(0, 5, size=10) s = Series(nd) # In[9]: nd # In[7]: s.hist() # In[18]: nd1 = np.random.randint(0, 50, size=(50, 5)) df1 = DataFrame(nd1, columns=list('XYABC')) df1.plot(x='X', y='Y', kind='scatter') # In[19]: pd.plotting.scatter_matrix(df1, diagonal='kde')
# Plot histogram of tip_pct tips['tip_pct'].hist(bins=50, alpha=0.3, color='r') # plot density plot (KDE = kernel density estimation) tips['tip_pct'].plot(kind='kde') # Bimodal example fig = plt.figure() comp1 = np.random.normal(0, 1, size=200) # N(0, 1) comp2 = np.random.normal(10, 2, size=200) # N(10, 4) values = Series(np.concatenate([comp1, comp2])) values.hist(bins=100, alpha=0.3, color='g', normed=True) values.plot(kind='kde', style='r-') draw() # Scatterplot plt.figure() macro = pd.read_csv('../../pydata-book/ch08/macrodata.csv') data = macro[['cpi', 'm1', 'tbilrate', 'unemp']] trans_data = np.log(data).diff().dropna() plt.scatter(trans_data['m1'], trans_data['unemp']) plt.title('Changes in log %s vs log %s' % ('m1', 'unemp')) # scatter matrix pd.scatter_matrix(trans_data, diagonal='kde', color='b', alpha=0.3)
tips = pd.read_csv('D:\\Github\\pydata-book-master\\ch08\\tips.csv') party_counts = pd.crosstab(tips.day, tips.size) party_counts # In[19]: party_counts = party_counts.ix[:, 2:5] party_pcts = party_counts.div(party_counts.sum(1).astype(float), axis=0) party_pcts # In[20]: comp1 = np.random.normal(0, 1, size=200) #N(0,1) comp2 = np.random.normal(10, 2, size=200) #N(10,4) values = Series(np.concatenate([comp1, comp2])) values.hist(bins=100, alpha=0.3, color='k', normed=True) values.plot(kind='kde', style='k--') # In[23]: macro = pd.read_csv('D:\\Github\\pydata-book-master\\ch08\\macrodata.csv') data = macro[['cpi', 'm1', 'tbilrate', 'unemp']] trans_data = np.log(data).diff().dropna() trans_data # In[29]: plt.scatter(trans_data['m1'], trans_data['unemp']) plt.title(' Changes in log%s vs. log%s ' % ('m1', 'unemp')) # In[30]:
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import matplotlib.pyplot as plt import numpy as np import scipy.stats as st from pandas import Series, DataFrame from math import sqrt values = np.array([0, 10, 11]) # a) sim = Series(np.random.choice(values, size=1000, replace=True)) plt.subplot(4, 2, 1) sim.hist(bins=[0, 1, 10, 11, 12], edgecolor='black') plt.title('Original') plt.subplot(4, 2, 2) st.probplot(sim, plot=plt) plt.title('Normal Q-Q Plot') # b) n = 5 sim = np.random.choice(values, size=n * 1000, replace=True) sim = DataFrame(np.reshape(sim, (n, 1000))) sim_mean = sim.mean() plt.subplot(4, 2, 3) sim_mean.hist(edgecolor='black') plt.title('Mittelwerte von 5 Beobachtungen') plt.subplot(4, 2, 4) st.probplot(sim_mean, plot=plt) plt.title('Normal Q-Q Plot')
def slide_12_2(): comp1 = np.random.normal(0, 1, size=200) comp2 = np.random.normal(10, 2, size=200) values = Series(np.concatenate([comp1, comp2])) values.hist(bins=100, alpha=0.3, color='k', normed=True) values.plot(kind='kde', style='k--')
def test_histtype_argument(self, histtype, expected): # GH23992 Verify functioning of histtype argument ser = Series(np.random.randint(1, 10)) ax = ser.hist(histtype=histtype) self._check_patches_all_filled(ax, filled=expected)
p3_2 = normed[1] - normed[0] np.allclose(p3_1, p3_2) # p88 p4_binom = scipy.stats.binom.pmf(6, n=10, p=0.5) params4 = {"loc": 10 * 0.5, "scale": np.sqrt(10 * 0.5 * (1 - 0.5))} norm4 = norm.cdf([5.5, 6.5], **params4) p4_norm = norm4[1] - norm4[0] x4 = scipy.linspace(0, 10, 11) data4_norm = Series(norm.pdf(x4, **params4), index=x4) data4_norm.plot(ax=axes[0][1], kind="bar", width=1, title="B(10,0.5) and N(%.0f, %0.2f)" % (params4["loc"], params4["scale"])) data4_binom = Series(scipy.stats.binom.pmf(x4, n=10, p=0.5), index=x4) data4_binom.plot(ax=axes[0][1], color="r") # p90 means_5 = [] for _ in range(200): samples = np.random.standard_t(5, 10) means_5.append(samples.mean()) data5 = Series(means_5) n5 = np.ceil(1 + np.log2(data5.size)) axes[1][1].set_title("Random samples with mean 0, variance 1.67") data5.hist(bins=n5, ax=axes[1][1], normed=True) plt.show()
freqs = dict() total = float(sum(counts.values())) for ipos, count in counts.items(): freqs[ipos] = count/total return freqs def sequence_entropy(sequence): counts = count_positions(sequence) freqs = relative_frequency(counts) entropy = 0.0 for ipos, freq in freqs.items(): entropy += freq * math.log(1/freq, 2) return entropy def main(sequences): sequences = map(lambda x: Sequence(x.split("[")[0]), sequences) entropies = map(sequence_entropy, sequences) return entropies if __name__ == '__main__': import sys import os sequence_file = sys.argv[1] seqs = open(sequence_file).readlines() entropies = main(seqs) from matplotlib import pyplot as plt from pandas import Series es = Series(entropies) es.hist() plt.savefig(os.path.splitext(sequence_file)[0] + "_entropy_hist.png")
def plot(self): vals = Series(self.summary) ax = vals.hist() ax.set_title("%s Histogram" % self.metric.name) return ax
p3_2 = normed[1] - normed[0] np.allclose(p3_1, p3_2) # p88 p4_binom = scipy.stats.binom.pmf(6, n=10, p=0.5) params4 = { "loc": 10 * 0.5, "scale": np.sqrt(10 * 0.5 * (1 - 0.5)) } norm4 = norm.cdf([5.5, 6.5], **params4) p4_norm = norm4[1] - norm4[0] x4 = scipy.linspace(0, 10, 11) data4_norm = Series(norm.pdf(x4, **params4), index=x4) data4_norm.plot(ax=axes[0][1], kind="bar", width=1, title="B(10,0.5) and N(%.0f, %0.2f)" % (params4["loc"], params4["scale"])) data4_binom = Series(scipy.stats.binom.pmf(x4, n=10, p=0.5), index=x4) data4_binom.plot(ax=axes[0][1], color="r") # p90 means_5 = [] for _ in range(200): samples = np.random.standard_t(5, 10) means_5.append(samples.mean()) data5 = Series(means_5) n5 = np.ceil(1 + np.log2(data5.size)) axes[1][1].set_title("Random samples with mean 0, variance 1.67") data5.hist(bins=n5, ax=axes[1][1], normed=True) plt.show()
def series_hist(series: pd.Series): plt.figure(figsize=(10, 4)) series.hist(bins=70)