Пример #1
0
    def test_hist_with_legend_raises(self, by):
        # GH 6279 - Series histogram with legend and label raises
        index = 15 * ["1"] + 15 * ["2"]
        s = Series(np.random.randn(30), index=index, name="a")
        s.index.name = "b"

        with pytest.raises(ValueError, match="Cannot use both legend and label"):
            s.hist(legend=True, by=by, label="c")
Пример #2
0
def two_histogram(x: pd.Series, y: pd.Series) -> None:
    """
    Функция строит две гистограммы на одной картинке.
    Выводит легенду и отображает пунктиром среднее значение выборок.
    """
    x.hist(alpha=0.5, weights=[1./len(x)]*len(x))
    x.hist(alpha=0.5, weights=[1./len(x)]*len(x))
    plt.axvline(x.mean(), color="red", alpha=0.8, linestyle="dashed")
    plt.axvline(y.mean(), color="blue", alpha=0.8, linestyle="dashed")
    plt.legend([x.name, y.name])
Пример #3
0
def plot_histogram(column: pd.Series, title: str):
    plt.title(title)
    column.hist(bins=1 + int(np.log2(column.shape[0])), density=True, grid=True)
    column.plot.kde()
    quant = np.nanquantile(column, q=[0.25, 0.75])
    low = quant[0] - 1.5 * (quant[1] - quant[0])
    high = quant[1] + 1.5 * (quant[1] - quant[0])
    plt.axvline(low, color='red')
    plt.axvline(high, color='red')
    plt.show()
Пример #4
0
def show_data_dist(lines):
    angles = []
    for line in lines:
        angles.append(line[3])
    se = Series(angles)
    print("Total data: ", len(se))
    plt.figure(0)
    se.hist(bins=50)
    plt.title("data distribution")
    plt.savefig("./images_doc/dist.png")
Пример #5
0
 def test_hist_no_overlap(self):
     from matplotlib.pyplot import subplot, gcf
     x = Series(randn(2))
     y = Series(randn(2))
     subplot(121)
     x.hist()
     subplot(122)
     y.hist()
     fig = gcf()
     axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes()
     assert len(axes) == 2
Пример #6
0
 def test_hist_no_overlap(self):
     from matplotlib.pyplot import subplot, gcf
     x = Series(randn(2))
     y = Series(randn(2))
     subplot(121)
     x.hist()
     subplot(122)
     y.hist()
     fig = gcf()
     axes = fig.axes if self.mpl_ge_1_5_0 else fig.get_axes()
     assert len(axes) == 2
Пример #7
0
 def test_hist_no_overlap(self):
     from matplotlib.pyplot import subplot, gcf
     x = Series(randn(2))
     y = Series(randn(2))
     subplot(121)
     x.hist()
     subplot(122)
     y.hist()
     fig = gcf()
     axes = fig.get_axes()
     self.assertEqual(len(axes), 2)
 def test_hist_no_overlap(self):
     from matplotlib.pyplot import subplot, gcf
     x = Series(randn(2))
     y = Series(randn(2))
     subplot(121)
     x.hist()
     subplot(122)
     y.hist()
     fig = gcf()
     axes = fig.get_axes()
     self.assertEqual(len(axes), 2)
Пример #9
0
    def test_hist_no_overlap(self):
        from matplotlib.pyplot import gcf, subplot

        x = Series(np.random.randn(2))
        y = Series(np.random.randn(2))
        subplot(121)
        x.hist()
        subplot(122)
        y.hist()
        fig = gcf()
        axes = fig.axes
        assert len(axes) == 2
Пример #10
0
def visualize_hist(array_1d, title='Histogram', precision=50):
    r = randint(1, 1000)
    title = title + ' id:' + str(r)
    print title
    plt.figure(r)
    plt.title(title)
    series = Series(array_1d)
    series.hist(bins=precision)
    # axes = plt.gca()
    # axes.set_xlim([-0.4, 0.4])
    # axes.set_ylim([0, 500])
    plt.show()
Пример #11
0
    def plots(self):
        s = Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
        #s.plot()

        df = DataFrame(np.random.randn(10, 4).cumsum(0),
                       columns=['A', 'B', 'C', 'D'],
                       index=np.arange(0, 100, 10))
        #df.plot(kind='bar') #barh for horizontal bars

        comp1 = np.random.normal(0, 1, size=1000)  # N(0,1)
        comp2 = np.random.normal(10, 2, size=1000)  # N(10,4)
        values = Series(np.concatenate([comp1, comp2]))
        values.hist(bins=500, alpha=0.3, normed=True, color='orange')
        values.plot(kind='kde', style='k--')
Пример #12
0
def plot_temperature(temp: pandas.Series, name: str):
    """
    Plot temperature for a room
    """

    temp = temp.dropna()
    # get rid of nuisance empty values with .dropna()
    ax = plt.figure().gca()
    temp.hist(ax=ax)
    ax.set_ylabel("# of occurences")
    ax.set_xlabel(r"Temperature [$^\circ$C]")
    ax.set_title(f"{name} temperature")

    ax = plt.figure().gca()
    ax.plot(temp.index, temp.values)
    ax.set_xlabel("time")
    ax.set_ylabel(r"Temperature [$^\circ$C]")
    ax.set_title(f"{name} temperature")
Пример #13
0
def pandas_draw_hist_kde():
    # 直方图:可以对值频率进行离散化显示的柱状图
    # 数据点被拆分到离散的、间隔均匀的面元中,绘制的是个面元中数据点的数量
    data = Series(np.random.randn(1000))
    # data.hist(bins=50)
    # plt.show()

    # 密度图:通过计算“可能会产生观测数据的连续概率分布的估计”而产生
    # data.plot(kind='kde')
    # plt.show()

    # 把直方图和密度图绘制到一起
    comp1 = np.random.normal(0, 1, size=200)
    comp2 = np.random.normal(10, 2, size=200)
    values = Series(np.concatenate([comp1, comp2]))  # 上下拼接
    values.hist(bins=100, alpha=0.3, color='k', normed=True)
    values.plot(kind='kde', style='k--')
    plt.show()
Пример #14
0
def hist_distribute(x: pd.Series, title: str, nbin=10):
    '''
    :param x: pandas series
    :param title: plot name
    :return: matplot figure
    '''
    a = plt.figure(figsize=figure_size)
    a = x.hist(color=sns.desaturate("indianred", .8), bins=nbin).get_figure()
    plt.title(title)
    plt.close('all')
    return a
Пример #15
0
def plot_noise():
    # seed random number generator
    seed(30)
    # create white noise series
    series = [gauss(0.0, 1.0) for i in range(50)]
    series = Series(series)
    # summary stats
    print(series.describe())
    # prelims for subplots
    fig, ax = plt.subplots(nrows=2, ncols=2)
    # line plot
    series.plot(ax=ax[0, 0])
    ax[0, 0].set_title('White Noise')
    # histogram plot
    series.hist(ax=ax[0, 1])
    ax[0, 1].set_title('Noise Histogram')
    # autocorrelation
    from pandas.plotting import autocorrelation_plot
    autocorrelation_plot(series, ax=ax[1, 0])
    plt.tight_layout()
    plt.show()
Пример #16
0
def plot_residuals(residuals: pd.Series):
    """
    Plots:
    * histogram of residuals
    * density of residuals
    * QQ plot of residuals
    * autocorrelation plot of residuals
    
    Parameters
    ----------
    residuals : pd.Series
        observed values - forecasted values
    """
    residuals.hist()
    plt.show()
    residuals.plot(kind="kde")
    plt.show()
    qqplot(residuals)
    plt.show()
    autocorrelation_plot(residuals)
    plt.show()
Пример #17
0
def condition_stat(start_date, end_date, index_code, condition_num):
    """
    给定指定日期和美股变化的条件值,index_code为国内股市的指定变化
    """
    conn = connect_data_source()
    doom_data = find_condition_date_usa(start_date, end_date, condition_num)
    select_date_time_list = doom_data.index
    open_price_change_list = []
    day_price_change_list = []
    for selected_date in select_date_time_list:
        open_price_change, day_price_change = trading_day_state(
            index_code, selected_date, conn)
        open_price_change_list.append(open_price_change)
        day_price_change_list.append(day_price_change)
    open_price_change_series = Series(open_price_change_list)
    day_price_change_series = Series(day_price_change_list)
    open_price_change_series.hist()
    day_price_change_series.hist()
    print(open_price_change_series.describe())
    print(day_price_change_series.describe())
    print(sum(day_price_change_series > 0))
    return open_price_change_series, day_price_change_series
Пример #18
0
    def _plot_price_histogram(price_data: pd.Series,
                              title: str,
                              x_tick_interval: int,
                              **kwargs):
        """ Plot histogram of price

        Parameters
        ----------
        price_data : pd.Series
            price data
        title : str
            plot title
        x_tick_interval : int
            interval for x axis
        """
        price_data = price_data.dropna()
        n_obs = len(price_data)

        with plt.style.context('bmh'):
            n_bins = 20
            ax = price_data.hist(bins=n_bins, alpha=0.9, **kwargs)

            ax.grid(linewidth=0.5)
            ax.spines['right'].set_visible(False)
            ax.spines['top'].set_visible(False)

            plt.title(title)
            plt.xlabel("price")
            plt.ylabel("number of offers")

            # Format X axis
            plt.xticks(rotation=45)
            ax.xaxis.set_major_locator(
                ticker.MultipleLocator(base=x_tick_interval))
            ax.xaxis.set_major_formatter(ticker.StrMethodFormatter('{x:,.0f}'))

            # Add median price
            median_price = price_data.median()
            hist_y, hist_x = np.histogram(price_data, bins=n_bins)
            plt.axvline(median_price, color='midnightblue', linewidth=2)
            plt.text(median_price, np.quantile(hist_y, 0.25),
                     s=f"Median price={median_price:,.0f} z\u0142",
                     rotation=90,
                     horizontalalignment="right",
                     verticalalignment="bottom")

            # Add number of observations
            plt.text(hist_x.min(), hist_y.max() * 0.9, s=f"Total number of offers={n_obs}",
                     horizontalalignment="left")
Пример #19
0
def histogram(data):
    s = Series(data)
    plt.figure()
    s.hist(color='k', alpha=0.5, bins=50)
    plt.show()
        
Пример #20
0
plt.show()
####保存文件
plt.savefig('name.png',bbox_inches='tight') #第二个参数将周围多余的空白区域裁剪掉
##线形图
%matplotlib inline
series.plot()

df.plot() #注意参数,不识别中文。

##柱状图
df.plot(kind='bar')  #垂直方向
df.plot(kind='barh') #水平方向

##直方图
Series.hist() #注意参数
Series.hist(bins=100) #100个间隔
Series.plot(kind = 'kde') #曲线图

s1 = np.random.normal(0,2,100)
s2 = np.random.normal(0,2,100)
nd = np.concatenate([s1,s2])
s = Series(nd)
s.hist(bins = 100,normed = True)
s.plot(kind = 'kde')            #画在一张图上

##散点图
df.plot('X','Y',kind = 'scatter') #输入列索引
pd.plotting.scatter_matrix(nd,diagonal='kde') 
#直方图
plt.hist(data)
# calculate and plot a white noise series
from random import gauss
from random import seed
from pandas import Series
from pandas.plotting import autocorrelation_plot
from matplotlib import pyplot
# seed random number generator
seed(1)
# create white noise series
series = [gauss(0.0, 1.0) for i in range(1000)]
series = Series(series)
# summary stats
print(series.describe())
# line plot
series.plot()
pyplot.show()
# histogram plot
series.hist()
pyplot.show()
# autocorrelation
autocorrelation_plot(series)
pyplot.show()
Пример #22
0
x100_1 = st.chi2.rvs(size=100, df=1)
st.probplot(x100_1, plot=plt)
plt.title("n=100, v=1")

# adds space between plots
plt.tight_layout()

# -------------------------
# Exercise 5.2
# -------------------------
# a)
values = np.array([0,10,11])
sim = Series(np.random.choice(values, size=1000, replace=True))

plt.subplot(421)
sim.hist(bins=[0,1,10,11,12], edgecolor="black")
plt.title("Original")

plt.subplot(422)
st.probplot(sim, plot=plt)
plt.title("Normal Q-Q Plot")

# b)
n = 5
sim = np.random.choice(values, size=n*1000, replace=True)
sim = DataFrame(np.reshape(sim, (n, 1000)))
sim_mean = sim.mean()
print(sim_mean)
plt.subplot(423)
sim_mean.hist(edgecolor="black")
plt.title("Mean of %d observations" % n)
Пример #23
0
                columns=['day','1','2','3','4','5','6'])
df1.set_index('day',inplace=True)  # s!!!et_index函数修改索引
df1.plot(kind='bar')
df.plot(kind = "bar", colormap = "rainbow")  # colormap = "rainbow"绘制成彩虹颜色

# 柱状图3: --- 横纵坐标互换后的图
df1.stack()  # 先一维化
df1.stack().unstack(level = 0)  # 第一级索引转化为列名称
df1.stack().unstack(level = 0).plot(kind = 'bar')



'''3.绘制直方图---hist绘制直方图,它是一种特殊的柱状图,该图用来表示密度'''
nd = np.random.randint(0,100,size = 100)
s = Series(nd)
s.hist(bins = 300)  # bins = 500表示线条的粗细,越小越粗



'''4.绘制随机数百分比密度图---.plot(kind='kde')'''
s.plot(kind='kde')



'''5.直方图,和密度图绘制到一个图形中(有点无聊,不是真正意义上的一张图)'''
n1 = np.random.normal(loc = 0,scale=1,size = 100)  # 随机正太分布,以0为均值,1为方差
n2 = np.random.normal(loc = 10,scale = 2,size = 100)
nd = np.concatenate([n1,n2])  # numpy的级联,注意与pandas的级联concat区分开
s = Series(nd)

s.hist(bins = 100)  # 绘制直方图
Пример #24
0
ax.add_patch(rect)
ax.add_patch(circ)
ax.add_patch(pgon)

# 데이터프레임으로부터 막대그래프
df = DataFrame(np.random.rand(6, 4),
               index=['one', 'two', 'three', 'four', 'five', 'six'],
               columns=pd.Index(['A', 'B', 'C', 'D'], name='Genus'))
df.plot(kind='bar')
df.plot(kind='barh', stacked=True)

comp1 = np.random.normal(0, 1, size=200)  #N(0,1)
comp2 = np.random.normal(10, 2, size=200)  #(10,4)
values = Series(np.concatenate([comp1, comp2]))
# concat(1,2) -1번째 문자열에 두번째 문자열을 합치는 함수
values.hist(bins=100, alpha=0.3, color='k', density=True)
values.plot(kind='kde', style='k--')

## 109
obj = Series(range(4), index=['d', 'a', 'b', 'c'])
obj.sort_index()  # replace F

frame = DataFrame(np.arange(8).reshape((2, 4)),
                  index=['three', 'one'],
                  columns=['d', 'a', 'b', 'c'])
frame.sort_index()
frame.sort_index(1)  # 열
frame.sort_index(axis=1, ascending=False)

frame2 = DataFrame({'b': [4, 7, 3, 2], 'a': [4, 9, 2, 5], 'c': [5, 3, 7, 9]})
frame2.sort_values(by='a')
Пример #25
0
# In[5]:

df.plot(kind='bar')

# In[10]:

df.plot(kind='barh')

# In[6]:

nd = np.random.randint(0, 5, size=10)
s = Series(nd)

# In[9]:

nd

# In[7]:

s.hist()

# In[18]:

nd1 = np.random.randint(0, 50, size=(50, 5))
df1 = DataFrame(nd1, columns=list('XYABC'))
df1.plot(x='X', y='Y', kind='scatter')

# In[19]:

pd.plotting.scatter_matrix(df1, diagonal='kde')
Пример #26
0
# Plot histogram of tip_pct
tips['tip_pct'].hist(bins=50, alpha=0.3, color='r')

# plot density plot (KDE = kernel density estimation)
tips['tip_pct'].plot(kind='kde')



# Bimodal example
fig = plt.figure()
comp1 = np.random.normal(0, 1, size=200)  # N(0, 1)
comp2 = np.random.normal(10, 2, size=200)  # N(10, 4)
values = Series(np.concatenate([comp1, comp2]))

values.hist(bins=100, alpha=0.3, color='g', normed=True)
values.plot(kind='kde', style='r-')
draw()

# Scatterplot
plt.figure()
macro = pd.read_csv('../../pydata-book/ch08/macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()

plt.scatter(trans_data['m1'], trans_data['unemp'])
plt.title('Changes in log %s vs log %s' % ('m1', 'unemp'))

# scatter matrix
pd.scatter_matrix(trans_data, diagonal='kde', color='b', alpha=0.3)
tips = pd.read_csv('D:\\Github\\pydata-book-master\\ch08\\tips.csv')
party_counts = pd.crosstab(tips.day, tips.size)
party_counts

# In[19]:

party_counts = party_counts.ix[:, 2:5]
party_pcts = party_counts.div(party_counts.sum(1).astype(float), axis=0)
party_pcts

# In[20]:

comp1 = np.random.normal(0, 1, size=200)  #N(0,1)
comp2 = np.random.normal(10, 2, size=200)  #N(10,4)
values = Series(np.concatenate([comp1, comp2]))
values.hist(bins=100, alpha=0.3, color='k', normed=True)
values.plot(kind='kde', style='k--')

# In[23]:

macro = pd.read_csv('D:\\Github\\pydata-book-master\\ch08\\macrodata.csv')
data = macro[['cpi', 'm1', 'tbilrate', 'unemp']]
trans_data = np.log(data).diff().dropna()
trans_data

# In[29]:

plt.scatter(trans_data['m1'], trans_data['unemp'])
plt.title(' Changes in log%s vs. log%s ' % ('m1', 'unemp'))

# In[30]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import matplotlib.pyplot as plt
import numpy as np
import scipy.stats as st
from pandas import Series, DataFrame
from math import sqrt

values = np.array([0, 10, 11])

# a)
sim = Series(np.random.choice(values, size=1000, replace=True))
plt.subplot(4, 2, 1)
sim.hist(bins=[0, 1, 10, 11, 12], edgecolor='black')
plt.title('Original')
plt.subplot(4, 2, 2)
st.probplot(sim, plot=plt)
plt.title('Normal Q-Q Plot')

# b)
n = 5
sim = np.random.choice(values, size=n * 1000, replace=True)
sim = DataFrame(np.reshape(sim, (n, 1000)))
sim_mean = sim.mean()
plt.subplot(4, 2, 3)
sim_mean.hist(edgecolor='black')
plt.title('Mittelwerte von 5 Beobachtungen')
plt.subplot(4, 2, 4)
st.probplot(sim_mean, plot=plt)
plt.title('Normal Q-Q Plot')
Пример #29
0
def slide_12_2():
    comp1 = np.random.normal(0, 1, size=200)
    comp2 = np.random.normal(10, 2, size=200)
    values = Series(np.concatenate([comp1, comp2]))
    values.hist(bins=100, alpha=0.3, color='k', normed=True)
    values.plot(kind='kde', style='k--')
Пример #30
0
 def test_histtype_argument(self, histtype, expected):
     # GH23992 Verify functioning of histtype argument
     ser = Series(np.random.randint(1, 10))
     ax = ser.hist(histtype=histtype)
     self._check_patches_all_filled(ax, filled=expected)
Пример #31
0
p3_2 = normed[1] - normed[0]
np.allclose(p3_1, p3_2)

# p88
p4_binom = scipy.stats.binom.pmf(6, n=10, p=0.5)
params4 = {"loc": 10 * 0.5, "scale": np.sqrt(10 * 0.5 * (1 - 0.5))}
norm4 = norm.cdf([5.5, 6.5], **params4)
p4_norm = norm4[1] - norm4[0]
x4 = scipy.linspace(0, 10, 11)
data4_norm = Series(norm.pdf(x4, **params4), index=x4)
data4_norm.plot(ax=axes[0][1],
                kind="bar",
                width=1,
                title="B(10,0.5) and N(%.0f, %0.2f)" %
                (params4["loc"], params4["scale"]))
data4_binom = Series(scipy.stats.binom.pmf(x4, n=10, p=0.5), index=x4)
data4_binom.plot(ax=axes[0][1], color="r")

# p90
means_5 = []
for _ in range(200):
    samples = np.random.standard_t(5, 10)
    means_5.append(samples.mean())

data5 = Series(means_5)
n5 = np.ceil(1 + np.log2(data5.size))
axes[1][1].set_title("Random samples with mean 0, variance 1.67")
data5.hist(bins=n5, ax=axes[1][1], normed=True)

plt.show()
Пример #32
0
def slide_12_2():
    comp1 = np.random.normal(0, 1, size=200)
    comp2 = np.random.normal(10, 2, size=200)
    values = Series(np.concatenate([comp1, comp2]))
    values.hist(bins=100, alpha=0.3, color='k', normed=True)
    values.plot(kind='kde', style='k--')
    freqs = dict()
    total = float(sum(counts.values()))
    for ipos, count in counts.items():
        freqs[ipos] = count/total
    return freqs

def sequence_entropy(sequence):
    counts = count_positions(sequence)
    freqs = relative_frequency(counts)
    entropy = 0.0
    for ipos, freq in freqs.items():
        entropy += freq * math.log(1/freq, 2)
    return entropy

def main(sequences):
    sequences = map(lambda x: Sequence(x.split("[")[0]), sequences)
    entropies = map(sequence_entropy, sequences)
    return entropies

if __name__ == '__main__':
    import sys
    import os
    sequence_file = sys.argv[1]
    seqs = open(sequence_file).readlines()
    entropies = main(seqs)
    from matplotlib import pyplot as plt
    from pandas import Series
    es = Series(entropies)
    es.hist()
    plt.savefig(os.path.splitext(sequence_file)[0] + "_entropy_hist.png")
Пример #34
0
 def plot(self):
     vals = Series(self.summary)
     ax = vals.hist()
     ax.set_title("%s Histogram" % self.metric.name)
     return ax
Пример #35
0
p3_2 = normed[1] - normed[0]
np.allclose(p3_1, p3_2)

# p88
p4_binom = scipy.stats.binom.pmf(6, n=10, p=0.5)
params4 = {
    "loc": 10 * 0.5,
    "scale": np.sqrt(10 * 0.5 * (1 - 0.5))
}
norm4 = norm.cdf([5.5, 6.5], **params4)
p4_norm = norm4[1] - norm4[0]
x4 = scipy.linspace(0, 10, 11)
data4_norm = Series(norm.pdf(x4, **params4), index=x4)
data4_norm.plot(ax=axes[0][1], kind="bar", width=1,
                title="B(10,0.5) and N(%.0f, %0.2f)" % (params4["loc"], params4["scale"]))
data4_binom = Series(scipy.stats.binom.pmf(x4, n=10, p=0.5), index=x4)
data4_binom.plot(ax=axes[0][1], color="r")

# p90
means_5 = []
for _ in range(200):
    samples = np.random.standard_t(5, 10)
    means_5.append(samples.mean())

data5 = Series(means_5)
n5 = np.ceil(1 + np.log2(data5.size))
axes[1][1].set_title("Random samples with mean 0, variance 1.67")
data5.hist(bins=n5, ax=axes[1][1], normed=True)

plt.show()
Пример #36
0
def series_hist(series: pd.Series):
    plt.figure(figsize=(10, 4))
    series.hist(bins=70)