def draw_lag_plots(lmp_curves_list, hub_names, lag=24): """ Plots the lag plot to evaluate the time-series data's autocorrelation. Parameters ---------- lmp_curves_list : list of arrays A list comprised of historic price curves. hub_names : list of strings Names of the CAISO hubs. lag : int The difference in time of the two datapoints that are being evaluated for correlation. """ fig, axs = plt.subplots(nrows=1, ncols=len(hub_names), sharey=True, figsize=(20, 6)) for i, curve, h in zip(range(len(hub_names)), lmp_curves_list, hub_names): lag_plot(curve, ax=axs[i], c='green', alpha=0.5, lag=24) axs[i].set_title(f"{h} Lag Plot - 24 Hours", fontsize=16, fontweight='bold') plt.tight_layout() plt.show()
def plot_lags_and_auto(df, size=250): columns = len(df.columns) for col in df.columns: fig, ax = plt.subplots(1, 2, figsize=(20, 5)) fig.suptitle(col, fontsize=16) lag_plot(df[col].tail(size), ax=ax[0]) autocorrelation_plot(df[col].tail(size), ax=ax[1])
def lag_plot(self, column=None, lag_list=None): """ :param column: :param lag_list: receives lag_list if 9 ints :return: """ if lag_list: if column: series = self.data[column] else: series = self.data[self.data_column] f = plt.figure(figsize=(15, 15)) for index, lag in enumerate(lag_list): f.add_subplot(3, 3, index + 1) plt.title("lag {}".format(lag)) lag_plot(series, lag=lag) plt.show() else: if column: series = self.data[column] else: series = self.data[self.data_column] lag_list = [int(i) for i in np.linspace(1, len(series), 9)] f = plt.figure(figsize=(15, 15)) for index, lag in enumerate(lag_list): f.add_subplot(3, 3, index + 1) plt.title("lag {}".format(lag)) lag_plot(series, lag=lag) plt.show()
def arma_fit(option, dataframes, TICK, dayly_returns, pplotacf, ppltopacf, destranform_returns, freq_des, lag_acf, lag_pacf, def_seasonal, p, q, arma_sintrans): #ploteamos la descomposicion seasonal #primero se ve si los datos son estacionarios sin formula try: print(f'p value {TICK} Price: ', adfuller(abs(dataframes[option]))[1]) test = adfuller(dataframes[option])[1] except: print( "For the nature of the data it is not possible to predict these series with ARMA" ) #se grafican las laf lag_plot(dataframes[option]) if test > .05: #Grid search modelo_altranformarlo(dayly_returns, pplotacf, dataframes, option, plot_lags, lag_acf, lag_pacf, p, q) else: arma_sintrans(dataframes, lag_acf, lag_pacf, ARMA, option, dataframes, p, q)
def lag_plott(df, features, crypto_name, output_path): for feature in features: df = df.dropna(subset=[feature]) plt.figure(figsize=(5, 5)) plt.title("lag_plot_" + feature + "_" + crypto_name) lag_plot(df[feature]) plt.savefig(output_path + crypto_name + "_" + feature + ".png", dpi=120)
def autocorrelation(df, ticker): plt.rcParams.update({'ytick.left' : False, 'axes.titlepad':10}) # Plot fig, axes = plt.subplots(1, 7, figsize=(12,2), sharex=True, sharey=True, dpi=100) for i, ax in enumerate(axes.flatten()[:7]): lag_plot(df.Mean, lag=i+1, ax=ax, c='green') ax.set_title('Lag ' + str(i+1)) fig.suptitle(f'The {ticker} stock', y=1.15) plt.show()
def plot_Model_Identify(DataSet, frequency=1, acf_lag=12, pacf_lag=12): """ DataSet : dataframe with the type of first column either int() or panda datetime Frequency : int, Seasonal Component period (in time step) """ # Organize plot fig, ax = plt.subplots(3, 4) # Plot the Observed Data DataSet.plot(ax=ax[0, 0]) ax[0, 0].set_title('Observed Value') ax[0, 0].set_xlabel("") # Plot the autocorrelation plot autocorrelation_plot(DataSet.iloc[:, 0], ax=ax[1, 0]) ax[1, 0].set_title('Autocorrelation') # Plot the QQ plot qqplot( DataSet.iloc[:, 0], ax=ax[2, 0], ) ax[2, 0].set_title('Q-Q Plot') # Lag plot lag_plot(DataSet.iloc[:, 0], ax=ax[0, 1]) ax[0, 1].set_title('Lag Plot') ax[0, 1].set_ylabel("") ax[0, 1].set_xlabel("") # ACF Plot tsa.plot_acf(DataSet.iloc[:, 0], ax=ax[1, 1], lags=acf_lag, alpha=0.05) ax[1, 1].set_title('ACF') # PACF Plot tsa.plot_pacf(DataSet.iloc[:, 0], ax=ax[2, 1], lags=pacf_lag, alpha=0.05) ax[2, 1].set_title('PACF') # decomposition plot decomposition = sm.tsa.seasonal_decompose(DataSet.iloc[:, 0], freq=frequency) decomposition.resid.plot(ax=ax[0, 2]) decomposition.resid.plot(ax=ax[0, 3], kind='kde') decomposition.seasonal.plot(ax=ax[1, 2]) decomposition.trend.plot(ax=ax[2, 2]) ax[0, 2].set_title('Residual') ax[1, 2].set_title('Seasonal') ax[2, 2].set_title('Trend') ax[0, 3].set_title('Residual Prob. Distrib') plt.show()
def noise_check(my_data): Y1 = int(combo_Y1.get()) Y2 = int(combo_Y2.get()) M1 = int(combo_M1.get()) M2 = int(combo_M2.get()) D1 = int(combo_D1.get()) D2 = int(combo_D2.get()) h1 = int(combo_h1.get()) h2 = int(combo_h2.get()) m1 = int(combo_m1.get()) m2 = int(combo_m2.get()) s1 = int(combo_s1.get()) s2 = int(combo_s2.get()) start = pd.datetime(Y1, M1, D1, h1, m1, s1) stop = pd.datetime(Y2, M2, D2, h2, m2, s2) the_data = my_data[start:stop] plt.figure(figsize=(10, 10)) plt.suptitle('LAG PLOTS') ax1 = plt.subplot(211) lag_plot(the_data['back_be'], c='r') ax2 = plt.subplot(212) lag_plot(the_data['pump_pr']) plt.figure(figsize=(8, 8)) autocorrelation_plot(the_data.pump_pr) x = the_data['pump_pr'].values f_s = 1 X = fftpack.fft(x) freqs = fftpack.fftfreq(len(x)) * f_s fig, ax = plt.subplots() ax.plot(freqs, np.abs(X)) ax.set_xlabel('Frequency in Hertz [Hz]') ax.set_ylabel('Frequency Domain (Spectrum) Magnitude') ax.set_xlim(0, f_s / 2) ## ax.set_ylim(-5, 110) ## fig, axes=plt.subplots(nrows=1, ncols=2,figsize=(8,8)) ## axes[0]=lag_plot(the_data['pump_pr']) ## axes[0].set_title('Lag Plot') ## axes[0]=autocorrelation_plot(the_data.pump_pr) plt.show()
def setup_arima(train_data, price_col, time_col, **kwargs): plt.figure() lag_plot(train_data[price_col]) plt.title('Amazon Stock (Dev Data) - Autocorrelation Plot') plt.show() fig, ax = plt.subplots() ax.plot(train_data[time_col], train_data[price_col]) plt.xlabel('Time') plt.ylabel('Stock Price') ax.set_title('Amazon Stock (Dev Data) - Minute-by-Minute Closing Prices') fig.autofmt_xdate() plt.show()
def plot_lag(self, lag=1, ax=None): """ Plots a lag plot of power data http://www.itl.nist.gov/div898/handbook/eda/section3/lagplot.htm Returns ------- matplotlib.axis """ if ax is None: ax = plt.gca() for power in self.power_series(): lag_plot(power, lag, ax=ax) return ax
def lag_plots(data_df): f, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5)) lag_plot(data_df[data_df.columns[0]], ax=ax1) ax1.set_title(data_df.columns[0]); lag_plot(data_df[data_df.columns[1]], ax=ax2) ax2.set_title(data_df.columns[1]); ax1.set_ylabel('$y_{t+1}$'); ax1.set_xlabel('$y_t$'); ax2.set_ylabel('$y_{t+1}$'); ax2.set_xlabel('$y_t$'); plt.tight_layout()
def scatter_lag_plots(self, data, no_of_lags, col, district): fig, axes = plt.subplots(2, 4, figsize=(15, 8), sharex=True, sharey=True, dpi=100) for i, ax in enumerate(axes.flatten()[:no_of_lags]): lag_plot(data[col], lag=i + 1, ax=ax, c='red') ax.set_title('Lag ' + str(i + 1)) fig.suptitle('Lag Analysis for Sales with {} lags at {}'.format( no_of_lags, district), weight='bold') plt.show() return fig
def show_1d(s, log_scale=False): fig, ax = plt.subplots(1, 3, figsize=FIG_SIZE) tmp_s = s data_name = '' sns.swarmplot(y=tmp_s, ax=ax[0]) lag_plot(tmp_s, ax=ax[1], alpha=0.5) autocorrelation_plot(tmp_s, ax[2], alpha=0.5) if log_scale: ax[0].set(yscale='symlog') ax[1].set(yscale='symlog', xscale='symlog') ax[0].set_title(f"{data_name}总体图") ax[1].set_title(f"{data_name}时滞图") ax[2].set_title(f"{data_name}自相关图")
def plot_lag_plots(df, column): """ Lag plots: If points get wide and scattered with increasing lag, this means lesser correlation """ fig, axes = plt.subplots(1, 4, figsize=(10, 3), sharex=True, sharey=True, dpi=100) for i, ax in enumerate(axes.flatten()[:4]): lag_plot(df[column], lag=i + 1, ax=ax) ax.set_title("Lag " + str(i + 1)) plt.show()
def autocorrelation(file): '''Checking for autocorrelation within mood variable''' df = pd.read_csv(file) df = df[["id", "time", "mood"]] for id in df["id"].unique(): series = df[df["id"] == id].mood autocorrelation_plot(series) plt.title("Autocorrelation plot for user " + id) plt.show() lag_plot(series) plt.xlabel("Mood at current timepoint") plt.ylabel("Mood at next timepoint") plt.title("Lag plot for user " + id) plt.show()
def plot_autocorr(args: argparse.Namespace, column: str, df: pd.DataFrame): fig = plt.figure(figsize=(8, 9), constrained_layout=True) gs = fig.add_gridspec(3, 2) ax1 = fig.add_subplot(gs[0, :]) autocorrelation_plot(df[column], c=colors[0], ax=ax1) ax1.spines["left"].set_color("gray") ax1.spines["bottom"].set_color("gray") ax1.spines["right"].set_visible(False) ax1.spines["top"].set_visible(False) ax2 = fig.add_subplot(gs[1, 0]) lag_plot(df[column], lag=100, c=colors[1], ax=ax2, ec="k") ax2.spines["left"].set_color("gray") ax2.spines["bottom"].set_color("gray") ax2.spines["right"].set_visible(False) ax2.spines["top"].set_visible(False) ax3 = fig.add_subplot(gs[1, 1]) lag_plot(df[column], lag=200, c=colors[2], ax=ax3, ec="k") ax3.spines["left"].set_color("gray") ax3.spines["bottom"].set_color("gray") ax3.spines["right"].set_visible(False) ax3.spines["top"].set_visible(False) ax4 = fig.add_subplot(gs[2, 0]) lag_plot(df[column], lag=400, c=colors[3], ax=ax4, ec="k") ax4.spines["left"].set_color("gray") ax4.spines["bottom"].set_color("gray") ax4.spines["right"].set_visible(False) ax4.spines["top"].set_visible(False) ax5 = fig.add_subplot(gs[2, 1]) lag_plot(df[column], lag=500, c=colors[4], ax=ax5, ec="k") ax5.spines["left"].set_color("gray") ax5.spines["bottom"].set_color("gray") ax5.spines["right"].set_visible(False) ax5.spines["top"].set_visible(False) plt.suptitle(f"ELM ID:46, BES {column}", fontsize=18) plt.tight_layout() # rect=[0, 0.03, 1, 0.95], pad=1.5, h_pad=1.5) if not args.dry_run: plt.savefig( os.path.join( args.output_dir, f"auto_correlation_plots_elm_id_46_{column}.png", ), dpi=150, ) plt.show()
def plot(file): ''' Vizualisation of the Temperature dataset(csv-file) Parameters ---------- csv_file: str The data to load Returns --------- plt.show() ''' df = pd.read_csv(file, index_col=0, parse_dates=True) plt.bar(x='Nan', height=df.isna().sum()) plt.show() dates = df.index #assign x temp = df['meanT']#assign y plt.plot(dates,temp) plt.xlabel('dates') plt.ylabel('Mean Temp') plt.title('Temperature profile in Berlin') plt.show() year_2021 = df.index[-700:] temp_2021 = df['meanT'][-700:] plt.plot(year_2021,temp_2021) plt.xlabel('dates') plt.ylabel('Mean Temperature') plt.title('Temperature profile in Berlin 2021') plt.show() lag_plot(df) plt.show() autocorrelation_plot(df) plt.show() plot_acf(df, lags=30) plt.show()
def showLagPlot(df, numero_hogar, fichero, titulo): # Genera el gráfico y se establecen los ejes seriesRange00_06 = df['Rango 00-06'] seriesRange06_12 = df['Rango 06-12'] seriesRange12_18 = df['Rango 12-18'] seriesRange18_00 = df['Rango 18-00'] lag_plot(seriesRange00_06, c="blue") lag_plot(seriesRange06_12, c="orange") lag_plot(seriesRange12_18, c="green") lag_plot(seriesRange18_00, c="red") pyplot.title(titulo + numero_hogar, fontsize=16) pyplot.xlabel('y (t + 1)', fontsize=14) pyplot.ylabel('y (t)', fontsize=14) fichero = os.path.join(RUTA_ACTUAL, DIRECTORIO_GUARDADO_IMG, fichero + '_dispersion.png') pyplot.savefig(fichero) pyplot.show()
fig = plotFigures( df_tickername[[ "close", "MA for 10 days", "MA for 20 days", "MA for 50 days" ]], f"{ticker_name} Moving Average", "Time (in days)", "Closing Price", f"{ticker_name}_Moving_Average", ) pdf.savefig(fig) plt.show() plt.close() # Auto-Correlation plt.figure(figsize=(10, 10)) lag_plot(df_tickername["open"], lag=5) plt.title(f"{ticker_name} Autocorrelation Plot") # plt.savefig(f_getFilePath(f'reports\\figures\\{ticker_name}_Autocorrelation_Plot.png')) pdf.savefig() plt.show() plt.close() # Volatility fig = plotFigures( df_tickername["Volatility"], f"{ticker_name} Volatility", "Time (in days)", "Historical Volatility", f"{ticker_name}_Volatility", ) pdf.savefig(fig)
import pandas as pd import matplotlib.pyplot as plt from pandas.plotting import lag_plot from pandas import datetime from statsmodels.tsa.arima_model import ARIMA from sklearn.metrics import mean_squared_error import os root_dir = "/home/charan/Documents/workspaces/python_workspaces/Data/BDA_Project" data_path = "stocks_data/final_stock_consolidated.csv" data_path = os.path.join(root_dir, data_path) df = pd.read_csv(data_path) plt.figure() lag_plot(df['Open'], lag=3) plt.title('IBM Stock - Autocorrelation plot with lag = 3') plt.show() train_data, test_data = df[0:int(len(df) * 0.7)], df[int(len(df) * 0.7):] training_data = train_data['Close'].values test_data = test_data['Close'].values history = [x for x in training_data] model_predictions = [] N_test_observations = len(test_data) for time_point in range(N_test_observations): model = ARIMA(history, order=(4, 1, 0)) model_fit = model.fit(disp=0) output = model_fit.forecast() yhat = output[0] model_predictions.append(yhat)
from pandas.plotting import andrews_curves from pydataset import data iris = data('iris') iris.head() andrews_curves(iris, 'Species') #Parallel Coordinates from pandas.plotting import parallel_coordinates from pydataset import data iris = data('iris') parallel_coordinates(iris, 'Species') #Lag Plot from pandas.plotting import lag_plot spacing = np.linspace(-99 * np.pi, 99 * np.pi, num=1000) spacing data = pd.Series(0.1 * np.random.rand(1000) + 0.9 * np.sin(spacing)) data lag_plot(data) #Autocorrelation Plot from pandas.plotting import autocorrelation_plot spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000) data = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing)) autocorrelation_plot(data) #Bootstrap Plot from pandas.plotting import bootstrap_plot data = pd.Series(np.random.rand(1000)) bootstrap_plot(data, size=50, samples=500, color='grey')
stocks.head() shelter_outcomes = pd.read_csv( "../input/austin-animal-center-shelter-outcomes-and/aac_shelter_outcomes.csv", parse_dates=['date_of_birth', 'datetime']) shelter_outcomes = shelter_outcomes[[ 'outcome_type', 'age_upon_outcome', 'datetime', 'animal_type', 'breed', 'color', 'sex_upon_outcome', 'date_of_birth' ]] shelter_outcomes.head() shelter_outcomes['date_of_birth'].value_counts().sort_values().plot.line() shelter_outcomes['date_of_birth'].value_counts().resample( 'Y').sum().plot.line() stocks['volume'].resample('Y').mean().plot.bar() from pandas.plotting import lag_plot lag_plot(stocks['volume'].tail(250)) from pandas.plotting import autocorrelation_plot autocorrelation_plot(stocks['volume']) import pandas as pd crypto = pd.read_csv("../input/all-crypto-currencies/crypto-markets.csv") crypto = crypto[crypto['name'] == 'Bitcoin'] crypto['date'] = pd.to_datetime(crypto['date']) crypto.head() from IPython.display import HTML HTML(""" <ol> <li>Time-series data is really a special case of interval data.</li> <br/>
ax.plot_surface(x, y, z) ax.set_xlabel('CRIM') ax.set_ylabel('MEDV') ax.set_zlabel('ZN') ax.set_title("재광's") plt.show() # 1.5 boston_train.csv파일을 읽어와서 지연 plot을 출력하세요 df = pd.read_csv( 'C:/Users/CPB06GameN/PycharmProjects/GitHub/bigdata/bigdata/파이썬빅데이터분석/boston_train.csv' ) # 1) 풀이 from pandas.plotting import lag_plot lag_plot(np.log(df['MEDV'])) lag_plot(np.log(df['CRIM'])) plt.show() # 2) 풀이 lag_plot(np.log(df['MEDV'])) plt.show() # 1.6 boston_train.csv 파일을 읽어와서 자기 상관 plot을 출력하세요 df = pd.read_csv( 'C:/Users/CPB06GameN/PycharmProjects/GitHub/bigdata/bigdata/파이썬빅데이터분석/boston_train.csv' ) from pandas.plotting import autocorrelation_plot autocorrelation_plot(np.log(df['MEDV'])) plt.show()
years.boxplot(column=["2001", "2002", "2003"], ax=ax4) ax4.set(xlabel="Years", ylabel="Sales", title="Box and whisker plot") # Heatmap plot img5 = ax5.matshow(years, interpolation=None, aspect='auto') xaxis = [2000, 2001, 2002, 2003] yaxis = range(-1, 13, 2) ax5.set(xlabel="Year", ylabel="Month", xticklabels=xaxis, yticklabels=yaxis, title="Heatmap plot") ax5.xaxis.tick_bottom() fig.colorbar(img5, ax=ax5, aspect=5) # Lag plot lag_plot(series_shampoo, ax=ax6) diagonal = range(int(series_shampoo.min()), int(series_shampoo.max())) ax6.plot(diagonal, diagonal, '--k') ax6.set(xlabel="Sales(t)", ylabel="Sales(t+1)", title="Lag plot") # Autocorrelation plot autocorrelation_plot(series_shampoo, ax=ax7) ax7.set(title="Autocorrelation plot", ylim=(-1, 1)) ax8.remove() fig.subplots_adjust(hspace=0.6) # plt.tight_layout() plt.show()
def plot_lagplot(df): X = df['confirmed'] lag_plot(X) plt.show()
def plot(input_ts='-', columns=None, start_date=None, end_date=None, clean=False, skiprows=None, index_type='datetime', names=None, ofilename='plot.png', type='time', xtitle='', ytitle='', title='', figsize='10,6.0', legend=None, legend_names=None, subplots=False, sharex=True, sharey=False, colors='auto', linestyles='auto', markerstyles=' ', style='auto', logx=False, logy=False, xaxis='arithmetic', yaxis='arithmetic', xlim=None, ylim=None, secondary_y=False, mark_right=True, scatter_matrix_diagonal='kde', bootstrap_size=50, bootstrap_samples=500, norm_xaxis=False, norm_yaxis=False, lognorm_xaxis=False, lognorm_yaxis=False, xy_match_line='', grid=False, label_rotation=None, label_skip=1, force_freq=None, drawstyle='default', por=False, invert_xaxis=False, invert_yaxis=False, round_index=None, plotting_position='weibull', source_units=None, target_units=None, lag_plot_lag=1): r"""Plot data.""" # Need to work around some old option defaults with the implementation of # mando legend = bool(legend == '' or legend == 'True' or legend is None) import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt from matplotlib.ticker import FixedLocator tsd = tsutils.common_kwds(tsutils.read_iso_ts(input_ts, skiprows=skiprows, names=names, index_type=index_type), start_date=start_date, end_date=end_date, pick=columns, round_index=round_index, dropna='all', source_units=source_units, target_units=target_units, clean=clean) if type in ['bootstrap', 'heatmap', 'autocorrelation', 'lag_plot']: if len(tsd.columns) != 1: raise ValueError(""" * * The '{1}' plot can only work with 1 time-series in the DataFrame. * The DataFrame that you supplied has {0} time-series. * """.format(len(tsd.columns), type)) if por is True: tsd = tsutils.common_kwds(tsutils.read_iso_ts(tsd), start_date=start_date, end_date=end_date, round_index=round_index, dropna='no') # This is to help pretty print the frequency try: try: pltfreq = str(tsd.index.freq, 'utf-8').lower() except TypeError: pltfreq = str(tsd.index.freq).lower() if pltfreq.split(' ')[0][1:] == '1': beginstr = 3 else: beginstr = 1 if pltfreq == 'none': short_freq = '' else: # short freq string (day) OR (2 day) short_freq = '({0})'.format(pltfreq[beginstr:-1]) except AttributeError: short_freq = '' if legend_names: lnames = tsutils.make_list(legend_names) if len(lnames) != len(set(lnames)): raise ValueError(""" * * Each name in legend_names must be unique. * """) if len(tsd.columns) == len(lnames): renamedict = dict(list(zip(tsd.columns, lnames))) elif type == 'xy' and len(tsd.columns) // 2 == len(lnames): renamedict = dict(list(zip(tsd.columns[2::2], lnames[1:]))) renamedict[tsd.columns[1]] = lnames[0] else: raise ValueError(""" * * For 'legend_names' you must have the same number of comma * separated names as columns in the input data. The input * data has {0} where the number of 'legend_names' is {1}. * * If 'xy' type you need to have legend names as x,y1,y2,y3,... * """.format(len(tsd.columns), len(lnames))) tsd.rename(columns=renamedict, inplace=True) else: lnames = tsd.columns if colors == 'auto': colors = color_list else: colors = tsutils.make_list(colors) if linestyles == 'auto': linestyles = line_list else: linestyles = tsutils.make_list(linestyles) if markerstyles == 'auto': markerstyles = marker_list else: markerstyles = tsutils.make_list(markerstyles) if markerstyles is None: markerstyles = ' ' if style != 'auto': nstyle = tsutils.make_list(style) if len(nstyle) != len(tsd.columns): raise ValueError(""" * * You have to have the same number of style strings as time-series to plot. * You supplied '{0}' for style which has {1} style strings, * but you have {2} time-series. * """.format(style, len(nstyle), len(tsd.columns))) colors = [] markerstyles = [] linestyles = [] for st in nstyle: colors.append(st[0]) if len(st) == 1: markerstyles.append(' ') linestyles.append('-') continue if st[1] in marker_list: markerstyles.append(st[1]) try: linestyles.append(st[2:]) except IndexError: linestyles.append(' ') else: markerstyles.append(' ') linestyles.append(st[1:]) if linestyles is None: linestyles = [' '] else: linestyles = [' ' if i == ' ' else i for i in linestyles] markerstyles = [' ' if i is None else i for i in markerstyles] icolors = itertools.cycle(colors) imarkerstyles = itertools.cycle(markerstyles) ilinestyles = itertools.cycle(linestyles) style = ['{0}{1}{2}'.format(next(icolors), next(imarkerstyles), next(ilinestyles)) for i in list(range(len(tsd.columns)))] # reset to beginning of iterator icolors = itertools.cycle(colors) imarkerstyles = itertools.cycle(markerstyles) ilinestyles = itertools.cycle(linestyles) if (logx is True or logy is True or norm_xaxis is True or norm_yaxis is True or lognorm_xaxis is True or lognorm_yaxis is True): warnings.warn(""" * * The --logx, --logy, --norm_xaxis, --norm_yaxis, --lognorm_xaxis, and * --lognorm_yaxis options are deprecated. * * For --logx use --xaxis="log" * For --logy use --yaxis="log" * For --norm_xaxis use --type="norm_xaxis" * For --norm_yaxis use --type="norm_yaxis" * For --lognorm_xaxis use --type="lognorm_xaxis" * For --lognorm_yaxis use --type="lognorm_yaxis" * """) if xaxis == 'log': logx = True if yaxis == 'log': logy = True if type in ['norm_xaxis', 'lognorm_xaxis', 'weibull_xaxis']: xaxis = 'normal' if logx is True: logx = False warnings.warn(""" * * The --type={1} cannot also have the xaxis set to {0}. * The {0} setting for xaxis is ignored. * """.format(xaxis, type)) if type in ['norm_yaxis', 'lognorm_yaxis', 'weibull_yaxis']: yaxis = 'normal' if logy is True: logy = False warnings.warn(""" * * The --type={1} cannot also have the yaxis set to {0}. * The {0} setting for yaxis is ignored. * """.format(yaxis, type)) xlim = _know_your_limits(xlim, axis=xaxis) ylim = _know_your_limits(ylim, axis=yaxis) figsize = tsutils.make_list(figsize) if not isinstance(tsd.index, pd.DatetimeIndex): tsd.insert(0, tsd.index.name, tsd.index) if type in ['xy', 'double_mass']: if tsd.shape[1] % 2 != 0: raise AttributeError(""" * * The 'xy' and 'double_mass' types must have an even number of columns * arranged as x,y pairs. You supplied {0} columns. * """.format(tsd.shape[1])) colcnt = tsd.shape[1] // 2 elif type in ['norm_xaxis', 'norm_yaxis', 'lognorm_xaxis', 'lognorm_yaxis', 'weibull_xaxis', 'weibull_yaxis']: colcnt = tsd.shape[1] if type in ['xy', 'double_mass', 'norm_xaxis', 'norm_yaxis', 'lognorm_xaxis', 'lognorm_yaxis', 'weibull_xaxis', 'weibull_yaxis', 'heatmap']: _, ax = plt.subplots(figsize=figsize) plotdict = {(False, True): ax.semilogy, (True, False): ax.semilogx, (True, True): ax.loglog, (False, False): ax.plot} if type == 'time': ax = tsd.plot(legend=legend, subplots=subplots, sharex=sharex, sharey=sharey, style=None, logx=logx, logy=logy, xlim=xlim, ylim=ylim, secondary_y=secondary_y, mark_right=mark_right, figsize=figsize, drawstyle=drawstyle) for index, line in enumerate(ax.lines): plt.setp(line, color=style[index][0]) plt.setp(line, marker=style[index][1]) plt.setp(line, linestyle=style[index][2:]) xtitle = xtitle or 'Time' if legend is True: plt.legend(loc='best') elif type in ['taylor']: from .. skill_metrics import centered_rms_dev from .. skill_metrics import taylor_diagram ref = tsd.iloc[:, 0] std = [pd.np.std(ref)] ccoef = [1.0] crmsd = [0.0] for col in range(1, len(tsd.columns)): std.append(pd.np.std(tsd.iloc[:, col])) ccoef.append(pd.np.corrcoef(tsd.iloc[:, col], ref)[0][1]) crmsd.append(centered_rms_dev(tsd.iloc[:, col].values, ref.values)) taylor_diagram(pd.np.array(std), pd.np.array(crmsd), pd.np.array(ccoef)) elif type in ['target']: from .. skill_metrics import centered_rms_dev from .. skill_metrics import rmsd from .. skill_metrics import bias from .. skill_metrics import target_diagram biases = [] rmsds = [] crmsds = [] ref = tsd.iloc[:, 0].values for col in range(1, len(tsd.columns)): biases.append(bias(tsd.iloc[:, col].values, ref)) crmsds.append(centered_rms_dev(tsd.iloc[:, col].values, ref)) rmsds.append(rmsd(tsd.iloc[:, col].values, ref)) target_diagram(pd.np.array(biases), pd.np.array(crmsds), pd.np.array(rmsds)) elif type in ['xy', 'double_mass']: # PANDAS was not doing the right thing with xy plots # if you wanted lines between markers. # Fell back to using raw matplotlib. # Boy I do not like matplotlib. for colindex in range(colcnt): ndf = tsd.iloc[:, colindex*2:colindex*2 + 2] if type == 'double_mass': ndf = ndf.dropna().cumsum() oxdata = pd.np.array(ndf.iloc[:, 0]) oydata = pd.np.array(ndf.iloc[:, 1]) plotdict[(logx, logy)](oxdata, oydata, linestyle=next(ilinestyles), color=next(icolors), marker=next(imarkerstyles), label=lnames[colindex], drawstyle=drawstyle) ax.set_xlim(xlim) ax.set_ylim(ylim) if legend is True: ax.legend(loc='best') if type == 'double_mass': xtitle = xtitle or 'Cumulative {0}'.format(tsd.columns[0]) ytitle = ytitle or 'Cumulative {0}'.format(tsd.columns[1]) elif type in ['norm_xaxis', 'norm_yaxis', 'lognorm_xaxis', 'lognorm_yaxis', 'weibull_xaxis', 'weibull_yaxis']: ppf = tsutils.set_ppf(type.split('_')[0]) ys = tsd.iloc[:, :] for colindex in range(colcnt): oydata = pd.np.array(ys.iloc[:, colindex].dropna()) oydata = pd.np.sort(oydata)[::-1] n = len(oydata) norm_axis = ax.xaxis oxdata = ppf(tsutils.set_plotting_position(n, plotting_position)) if type in ['norm_yaxis', 'lognorm_yaxis', 'weibull_yaxis']: oxdata, oydata = oydata, oxdata norm_axis = ax.yaxis plotdict[(logx, logy)](oxdata, oydata, linestyle=next(ilinestyles), color=next(icolors), marker=next(imarkerstyles), label=lnames[colindex], drawstyle=drawstyle) # Make it pretty xtmaj = pd.np.array([0.01, 0.1, 0.5, 0.9, 0.99]) xtmaj_str = ['1', '10', '50', '90', '99'] xtmin = pd.np.concatenate([pd.np.linspace(0.001, 0.01, 10), pd.np.linspace(0.01, 0.1, 10), pd.np.linspace(0.1, 0.9, 9), pd.np.linspace(0.9, 0.99, 10), pd.np.linspace(0.99, 0.999, 10)]) xtmaj = ppf(xtmaj) xtmin = ppf(xtmin) norm_axis.set_major_locator(FixedLocator(xtmaj)) norm_axis.set_minor_locator(FixedLocator(xtmin)) if type in ['norm_xaxis', 'lognorm_xaxis', 'weibull_xaxis']: ax.set_xticklabels(xtmaj_str) ax.set_ylim(ylim) ax.set_xlim(ppf(xlim)) elif type in ['norm_yaxis', 'lognorm_yaxis', 'weibull_yaxis']: ax.set_yticklabels(xtmaj_str) ax.set_xlim(xlim) ax.set_ylim(ppf(ylim)) if type in ['norm_xaxis', 'norm_yaxis']: xtitle = xtitle or 'Normal Distribution' ytitle = ytitle or tsd.columns[0] elif type in ['lognorm_xaxis', 'lognorm_yaxis']: xtitle = xtitle or 'Log Normal Distribution' ytitle = ytitle or tsd.columns[0] elif type in ['weibull_xaxis', 'weibull_yaxis']: xtitle = xtitle or 'Weibull Distribution' ytitle = ytitle or tsd.columns[0] if type in ['norm_yaxis', 'lognorm_yaxis', 'weibull_yaxis']: xtitle, ytitle = ytitle, xtitle if legend is True: ax.legend(loc='best') elif type in ['kde', 'probability_density']: ax = tsd.plot(kind='kde', legend=legend, subplots=subplots, sharex=sharex, sharey=sharey, style=None, logx=logx, logy=logy, xlim=xlim, ylim=ylim, secondary_y=secondary_y, figsize=figsize) for index, line in enumerate(ax.lines): plt.setp(line, color=style[index][0]) plt.setp(line, marker=style[index][1]) plt.setp(line, linestyle=style[index][2:]) ytitle = ytitle or 'Density' if legend is True: plt.legend(loc='best') elif type == 'kde_time': from scipy.stats.kde import gaussian_kde _, (ax0, ax1) = plt.subplots(nrows=1, ncols=2, sharey=True, figsize=figsize, gridspec_kw={'width_ratios': [1, 4]}) tsd.plot(legend=legend, subplots=subplots, sharex=sharex, sharey=sharey, style=None, logx=logx, logy=logy, xlim=xlim, ylim=ylim, secondary_y=secondary_y, mark_right=mark_right, figsize=figsize, drawstyle=drawstyle, ax=ax1) for index, line in enumerate(ax1.lines): plt.setp(line, color=style[index][0]) plt.setp(line, marker=style[index][1]) plt.setp(line, linestyle=style[index][2:]) xtitle = xtitle or 'Time' ylimits = ax1.get_ylim() ny = pd.np.linspace(ylimits[0], ylimits[1], 1000) for col in range(len(tsd.columns)): xvals = tsd.iloc[:, col].dropna().values pdf = gaussian_kde(xvals) ax0.plot(pdf(ny), ny, linestyle=style[col][2:], color=style[col][0], marker=style[col][1], label=tsd.columns[col], drawstyle=drawstyle) ax0.set(xlabel='Probability Density', ylabel=ytitle) elif type == 'boxplot': tsd.boxplot(figsize=figsize) elif type == 'scatter_matrix': from pandas.plotting import scatter_matrix if scatter_matrix_diagonal == 'probablity_density': scatter_matrix_diagonal = 'kde' scatter_matrix(tsd, diagonal=scatter_matrix_diagonal, figsize=figsize) elif type == 'lag_plot': from pandas.plotting import lag_plot lag_plot(tsd, lag=lag_plot_lag) xtitle = xtitle or 'y(t)' ytitle = ytitle or 'y(t+{0})'.format(short_freq or 1) elif type == 'autocorrelation': from pandas.plotting import autocorrelation_plot autocorrelation_plot(tsd) xtitle = xtitle or 'Time Lag {0}'.format(short_freq) elif type == 'bootstrap': from pandas.plotting import bootstrap_plot bootstrap_plot(tsd, size=bootstrap_size, samples=bootstrap_samples, color='gray') elif type == 'heatmap': # Find beginning and end years byear = tsd.index[0].year eyear = tsd.index[-1].year tsd = tsutils.asbestfreq(tsd) if tsd.index.freqstr != 'D': raise ValueError(""" * * The "heatmap" plot type can only work with daily time series. * """) dr = pd.date_range('{0}-01-01'.format(byear), '{0}-12-31'.format(eyear), freq='D') ntsd = tsd.reindex(index=dr) groups = ntsd.iloc[:, 0].groupby(pd.TimeGrouper('A')) years = pd.DataFrame() for name, group in groups: ngroup = group.values if len(group.values) == 365: ngroup = pd.np.append(group.values, [pd.np.nan]) years[name.year] = ngroup years = years.T plt.imshow(years, interpolation=None, aspect='auto') plt.colorbar() yticks = list(range(byear, eyear + 1)) skip = len(yticks)//20 + 1 plt.yticks(range(0, len(yticks), skip), yticks[::skip]) mnths = [0, 30, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334] mnths_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] plt.xticks(mnths, mnths_labels) grid = False elif (type == 'bar' or type == 'bar_stacked' or type == 'barh' or type == 'barh_stacked'): stacked = False if type[-7:] == 'stacked': stacked = True kind = 'bar' if type[:4] == 'barh': kind = 'barh' ax = tsd.plot(kind=kind, legend=legend, stacked=stacked, style=style, logx=logx, logy=logy, xlim=xlim, ylim=ylim, figsize=figsize) for index, line in enumerate(ax.lines): plt.setp(line, color=style[index][0]) plt.setp(line, marker=style[index][1]) plt.setp(line, linestyle=style[index][2:]) freq = tsutils.asbestfreq(tsd, force_freq=force_freq).index.freqstr if freq is not None: if 'A' in freq: endchar = 4 elif 'M' in freq: endchar = 7 elif 'D' in freq: endchar = 10 elif 'H' in freq: endchar = 13 else: endchar = None nticklabels = [] if kind == 'bar': taxis = ax.xaxis else: taxis = ax.yaxis for index, i in enumerate(taxis.get_majorticklabels()): if index % label_skip: nticklabels.append(' ') else: nticklabels.append(i.get_text()[:endchar]) taxis.set_ticklabels(nticklabels) plt.setp(taxis.get_majorticklabels(), rotation=label_rotation) if legend is True: plt.legend(loc='best') elif type == 'histogram': tsd.hist(figsize=figsize) else: raise ValueError(""" * * Plot 'type' {0} is not supported. * """.format(type)) if xy_match_line: if isinstance(xy_match_line, str): xymsty = xy_match_line else: xymsty = 'g--' nxlim = ax.get_xlim() nylim = ax.get_ylim() maxt = max(nxlim[1], nylim[1]) mint = min(nxlim[0], nylim[0]) ax.plot([mint, maxt], [mint, maxt], xymsty, zorder=1) ax.set_ylim(nylim) ax.set_xlim(nxlim) plt.xlabel(xtitle) plt.ylabel(ytitle) if invert_xaxis is True: plt.gca().invert_xaxis() if invert_yaxis is True: plt.gca().invert_yaxis() plt.grid(grid) plt.title(title) plt.tight_layout() if ofilename is None: return plt plt.savefig(ofilename)
st.write( 'We will transform our Data because it is not stacionary, we will apply de following transformation:' ) st.image(Image.open('./imag/trans.png'), caption='Continuous returns') st.write( 'After the tranformation we will apply de adfuller test again.') dayly_returns(dataframes, option) st.write('P-Value ', TICK, 'Price: ', adfuller(abs(dataframes['trans']))[1]) U = arma1_fortrans(adfuller, dataframes) st.write(U) st.write( 'Based on the following plots choose the p and q for the ARMA model' ) lag_plot(dataframes[option]) lag_acf = st.slider( 'Would you choose the lags for the autocorrlation ', 0, 10, 50) lag_pacf = st.slider( 'Would you choose the lags for the partial autocallation', 0, 10, 50) #plot_lags(lag_acf,lag_pacf,dataframes) o = 'trans' try: m11 = pplotacf(dataframes, lag_acf, o) plt.savefig('./imag/acf.png') image11 = Image.open('./imag/acf.png') st.image(image11, caption='')
if valid['Predictions'][i] < valid['Predictions'][i + 1]: valid['Predicted'][i + 1] = "Increase" else: valid['Predicted'][i + 1] = "Decrease" for i in range(1, len(valid)): if valid['Actual'][i] == valid['Predicted'][i]: valid['Recommendation'][i] = "Correct" else: valid['Recommendation'][i] = "Incorrect" print(valid.tail(10)) # # Autocorrelation plt.figure(figsize=(10, 10)) lag_plot(df['Close'], lag=5) plt.title(f'{ticker} Autocorrelation plot') train_data, test_data = df[0:int(len(df) * 0.8)], df[int(len(df) * 0.8):] plt.figure(figsize=(16, 8)) plt.title(f'{ticker} Stock Price') plt.xlabel('Dates') plt.ylabel('Prices') plt.plot(df['Close'], 'blue', label='Training Data') plt.plot(test_data['Close'], 'green', label='Testing Data') plt.legend() # # Arima
plt.figure(figsize=(24, 16)) ax = plt.axes() ax = sns.heatmap(corr, ax = ax) ax.set_title('Heatmap - Granger Causality Test Results') plt.xlabel("Time Series") plt.ylabel("Time Series") plt.show() # In[23]: ## lag plot plt.figure() lag_plot(datat) # In[24]: print(datat.shape) # In[25]: i = 10 datatp = datat.iloc[:, 0:i]
print(df.head()) df = df.set_index(df.columns[0]) print(df.head()) df['hh_sp'].plot(figsize = (14,6), grid=True) plt.title('Henry Hub Spot Price') plt.ylabel('Price(Dollars per Million Btu)') plt.show() """We are dealing with discrete parameter process of time series.""" # Plot fig, axes = plt.subplots(1, 4, figsize=(10,3), sharex=True, sharey=True, dpi=100) for i, ax in enumerate(axes.flatten()[:4]): lag_plot(df, lag=i+1, ax=ax) ax.set_title('Lag ' + str(i+1)) fig.suptitle('Lag Plots of Natural Gas \n(Points get wide and scattered with increasing lag -> lesser correlation)\n', y=1.15) plt.show() lag_plot(df) plt.show() """Running the example plots the data (t) on the x-axis against the data on the previous day (t-1) on the y-axis. We can see a large ball of observations along a diagonal line of the plot. It clearly shows a relationship or some correlation.""" from pandas.plotting import autocorrelation_plot autocorrelation_plot(df.hh_sp) plt.show()
def do_lag_plot(X): for i in range(4): plt.clf() lag_plot(X, lag=i + 1, c='firebrick') plt.savefig(folder_out + 'lag_plot_%02d.png' % i) return