def do_main(): #---Imcomplete DATA--- #for missing data, we need to fill those gaps with data, so the NaNs dont blow our calculations #interpolating is not good (predicting the future) #use the last known value to fill the gap ->fill forward #for missing data in the begging ->fill backwards #use pandas fillna to fill the data (bfill and ffill) #http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.fillna.html crypto_codes = ['BTC','ETH', 'LTC'] currency_code = 'EUR' dates = pd.date_range('2017-04-01', '2017-08-01') #time where ETH started, so data is missing df = get_gdax_data(crypto_codes, currency_code, dates) ax = df.plot(title="LTH Close stats", label='LTH') ax.set_xlabel("Date") ax.set_ylabel("Close Price") ax.legend(loc='upper left') plt.show() #forward fill first df.fillna(method='ffill', inplace=True) ax = df.plot(title="LTH Close stats", label='LTH') plt.show() #backward fill second df.fillna(method='bfill', inplace=True) ax = df.plot(title="LTH Close stats", label='LTH') plt.show()
def do_main(): #---Porfolio values/stats--- #portefolio_value -> how much are your stocks worth in total, each day #cumulative return -> portefolio_value[-1]/portefolio_value[0] - 1 #avg_daily_ret -> related to its performance (the more the better) #std_daily_ret -> related with RISK (deviation/volatility) #sharpe_ratio -> metric to adjust return for risk (access if high return but better risk is worth or not) #= (portefolio_return - risk_free_return) / portefolio_return_std #risk_free_return is the return you would get in a risk free asset (bank account), which these days = 0 (ZERO!!!!) # = (mean(daily_ret - daily_rf)) / std(daily_ret) * k # need to multiply by a adjusting factor - k # k = sqrt(#sampes_per_year) # the higher the sharpe_ratio the better!! crypto_codes = ['BTC', 'LTC', 'ETH'] currency_code = 'EUR' dates = pd.date_range('2017-06-01', '2017-08-25') df = get_gdax_data(crypto_codes, currency_code, dates) #get portefolio value portfolio_money = 1000 portfolio__rel_alloc = np.array([0.4, 0.3, 0.3]) portfolio_value = get_portfolio_value(df, portfolio__rel_alloc * portfolio_money) print("portfolio_value") print(portfolio_value.tail()) #daily return daily_return = compute_daily_returns(portfolio_value)[1:] #remove the initial daily return which is 0 print("daily_return") print(daily_return.tail()) #cumulative return cumulative_return = get_portfolio_cumulative_return(portfolio_value); print("cumulative_return=", cumulative_return) #avg_daily_ret avg_daily_ret = daily_return.mean(); print("avg_daily_ret=", avg_daily_ret) #std_daily_ret std_daily_ret = daily_return.std(); print("std_daily_ret=", std_daily_ret) #sharpe_ratio sharpe_ratio = get_portfolio_sharp_ratio(daily_return, 0, 252); #252 active days a year print("sharpe_ratio=", sharpe_ratio)
def do_main(): #---Optimize a portfolio for higher shape ratio-- #https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html #get the data crypto_codes = ['BTC', 'LTC', 'ETH'] currency_code = 'EUR' dates = pd.date_range('2017-08-01', '2017-09-01') df = get_gdax_data(crypto_codes, currency_code, dates) #Initial Guess portfolio_allocations_0 = np.array([0.4, 0.3, 0.3]) #Solver constraints and bounds cons = ({ 'type': 'eq', 'fun': lambda x: 1 - x.sum() }) #must return 0 to be true bnds = ((0, 1.0), (0, 1.0), (0, 1.0)) result = spo.minimize( minimize_function, portfolio_allocations_0, args=(df, ), method='SLSQP', options={'disp': True}, bounds=bnds, #values of X are in range [0, 1] constraints=cons) #allocations must sum to 1.0 print("Optimized Allocations:") print(result.x) optimized_portfolio_value = get_portfolio_value(df, result.x) print("Max profit=", optimized_portfolio_value[-1]) optimized_daily_return = compute_daily_returns(optimized_portfolio_value)[ 1:] #remove the initial daily return which is 0 sharpe_ratio = get_portfolio_sharp_ratio(optimized_daily_return, 0, 252) print("Max Sharpe Ratio=", sharpe_ratio) optimized_daily_return.plot(title='Daily Returns') plt.show()
def do_main(): crypto_codes = ['BTC'] currency_code = 'EUR' dates = pd.date_range('2017-05-01', '2017-08-24') df = get_gdax_data(crypto_codes, currency_code, dates) ax = df.plot(title="BTC Close stats", label='BTC') ax.set_xlabel("Date") ax.set_ylabel("Close Price") ax.legend(loc='upper left') #dataframe computations like mean, etc #http://pandas.pydata.org/pandas-docs/stable/api.html#api-dataframe-stats print("--mean--") print(df.mean()) print("--median--") print(df.median()) print("--std--") print(df.std()) #http://pandas.pydata.org/pandas-docs/stable/computation.html?highlight=rolling%20statistics#moving-rolling-statistics-moments #rolling statistics -> statistics over just a "window" of data, done for all points #3.g.: bollinger bands -> limiting "thersholds" at 2sigma (rolling standard dev). # if price is there, good opportunity to buy/sell rm_BTC = get_rolling_mean(df['BTC close'], 20) #20 days rstd_BTC = get_rolling_std(df['BTC close'], 20) #20 days b_upper_band, b_lower_band = get_bollinger_bands(rm_BTC, rstd_BTC) rm_BTC.plot(label='Rolling mean', ax=ax) #plot on the same plot "access object" b_upper_band.plot(label='Upper band', ax=ax) b_lower_band.plot(label='Lower band', ax=ax) plt.show() #daily returns -> how much did the price go up and down on a day # todays price relative to yesterday daily_returns = compute_daily_returns(df) daily_returns.plot(title="Daily Returns") plt.show()
def do_main(): #get the data crypto_codes = ['BTC'] currency_code = 'EUR' training_dates = pd.date_range('2017-07-01', '2017-08-01') df_training = get_gdax_data(crypto_codes, currency_code, training_dates) test_dates = pd.date_range('2017-08-01', '2017-09-01') df_test = get_gdax_data(crypto_codes, currency_code, test_dates) x_training, y_training = calculate_df_input_output_data(df_training, 5) x_test, y_test = calculate_df_input_output_data(df_test, 5) print("y_test:") print(y_test) linear_reg_learner = LinRegLearner() linear_reg_learner.train(x_training, y_training) y_predicted_lr = linear_reg_learner.query(x_test) print("y_predicted_lr:") print(y_predicted_lr) #Root Mean Squared Error rms_error_lr = get_rms_error(y_predicted_lr, y_test) print("rms_error_lr: " + str(rms_error_lr)) #Correlation correlation_lr = np.corrcoef(y_predicted_lr, y_test) print("correlation_lr: " + str(correlation_lr)) #KNN knn_learner = KNNLearner(3) knn_learner.train(x_training, y_training) y_predicted_knn = knn_learner.query(x_test) print("y_predicted_knn:") print(y_predicted_knn) #Root Mean Squared Error rms_error_knn = get_rms_error(y_predicted_knn, y_test) print("rms_error_knn: " + str(rms_error_knn)) #Correlation correlation_knn = np.corrcoef(y_predicted_knn, y_test) print("correlation_knn: " + str(correlation_knn)) #knn in this case is very bad bc almost all x_test are outside of x_training range (price shot up) and knn cant extrapolate #PolyRegLearner poly_learner = PolyRegLearner(2) poly_learner.train(x_training, y_training) y_predicted_poly = poly_learner.query(x_test) print("y_predicted_poly:") print(y_predicted_poly) #Root Mean Squared Error rms_error_poly = get_rms_error(y_predicted_poly, y_test) print("rms_error_poly: " + str(rms_error_poly)) #Correlation correlation_poly = np.corrcoef(y_predicted_poly, y_test) print("correlation_poly: " + str(correlation_poly)) #Ensemble (lesson 24) ensemble_learner = EnsembleLearner() ensemble_learner.train(x_training, y_training) y_predicted_ensemble = ensemble_learner.query(x_test) print("y_predicted_ensemble:") print(y_predicted_ensemble) #Root Mean Squared Error rms_error_ensemble = get_rms_error(y_predicted_ensemble, y_test) print("rms_error_ensemble: " + str(rms_error_ensemble)) #Correlation correlation_ensemble = np.corrcoef(y_predicted_ensemble, y_test) print("correlation_ensemble: " + str(correlation_ensemble))
def do_main(): #---Histograms and scatter plots--- for Daily Returs #Histograms -> integral of the ocurrances -> usually matches the normal/guassian distribution #we can measue mean, std and kurtosis #kurtosis: >0, "fat tails", there are more occurances at the "tails"/end, than the guassian distrinution # <0, "skinny tais", there are less than # measures how the "tails" probability is different from gaussian distrib #Scatter Plots -> plot the difference between two values (two 'stocks') #commun to use linear regression to get the function of this relation #slope(m) = β, represents how reactive a stock is to the market (SPY) #slope does not mean correlation!! #correlation is if the values are less scatter (and more close together)...the band is thin! #intersection with 0 (b) = α, if positive, means that this stock has better performance than market(SPY) crypto_codes = ['BTC'] currency_code = 'EUR' dates = pd.date_range('2017-01-01', '2017-08-01') df = get_gdax_data(crypto_codes, currency_code, dates) ax = df.plot(title="Close stats", label='Close') plt.show() daily_returns = compute_daily_returns(df) daily_returns.plot(title="Daily Returns", label='Daily Returns') plt.show() #--histogram-- daily_returns.hist(bins=40) #use 20 bins #add mean and std (lines) to histogram mean = daily_returns['BTC close'].mean() print("mean=", mean) std = daily_returns['BTC close'].std() print("std=", std) plt.axvline(mean, color='w', linestyle='dashed', linewidth=1) plt.axvline(std, color='r', linestyle='dashed', linewidth=1) plt.axvline(-std, color='r', linestyle='dashed', linewidth=1) plt.show() #compute kurtosis print("kurtosis=", daily_returns.kurtosis()) #compare two histograms crypto_codes = ['LTC', 'ETH'] dates = pd.date_range('2017-06-01', '2017-08-25') df = get_gdax_data(crypto_codes, currency_code, dates) ax = df.plot(title="Close stats", label='Close') plt.show() daily_returns = compute_daily_returns(df) daily_returns.hist(bins=40) plt.show() #to plot both histograms together daily_returns['LTC close'].hist(bins=20, label='LTC') daily_returns['ETH close'].hist(bins=20, label='ETH') plt.show() #--Scatterplots-- crypto_codes = ['BTC', 'LTC', 'ETH'] df = get_gdax_data(crypto_codes, currency_code, dates) ax = df.plot(title="Close stats", label='Close') plt.show() daily_returns = compute_daily_returns(df) #Scatterplot BTC vs LTC daily_returns.plot(kind='scatter', x='BTC close', y='LTC close') #fit a line using regression/numpy beta_LTC, alpha_LTC = np.polyfit(daily_returns['BTC close'], daily_returns['LTC close'], 1) #degree 1 poly (line) print("beta_LTC=", beta_LTC) print("alpha_LTC=", alpha_LTC) plt.plot(daily_returns['BTC close'], beta_LTC*daily_returns['BTC close'] + alpha_LTC, '-', color='r') plt.show() daily_returns.plot(kind='scatter', x='BTC close', y='ETH close') beta_ETH, alpha_ETH = np.polyfit(daily_returns['BTC close'], daily_returns['ETH close'], 1) #degree 1 poly (line) print("beta_ETH=", beta_ETH) print("alpha_ETH=", alpha_ETH) plt.plot(daily_returns['BTC close'], beta_ETH*daily_returns['BTC close'] + alpha_ETH, '-', color='r') plt.show() #Correlation print daily_returns.corr(method='pearson') #pearson is the most commun method to calc correlation