def clean_sort_data(self): self.ticker = input('Ticker: ') #ngừoi chạy input ticker vào command line #lấy data self.data = tcbs_market.stock_prices([self.ticker], period=2000) #ticker chạy qua db của tcbs, lấy dữ liệu lịch sử #dọn data / sort data self.data = self.data.rename(columns = {'openPriceAdjusted': 'Open', 'closePriceAdjusted':'Close'}) #đổi tên cột thành standardised tên (functionality --> nếu có tương tác với db khác ) // price = close self.data = self.data.sort_values('dateReport', ascending = True) #tính daily returns / đánh dấu hiệu mua/bạn dựa theo giao động giá trong ngày self.data['%Δ Daily Returns'] = (self.data['Close']/self.data['Close'].shift(1) - 1) #log return self.data['DailyPriceSignal'] = [1 if self.data.loc[x, '%Δ Daily Returns'] > 0 else -1 for x in self.data.index] #1 là mua, -1 là bán
def clean_data(self): df_dict = tcbs_market.stock_prices( [user_ticker], period=2000) #getting data (tcbs api required) self.df = df_dict[user_ticker] #get item from dict self.df = self.df.rename(columns={ 'OpenPrice_Adjusted': 'Open', 'ClosePrice_Adjusted': 'Close' }) #change column name for convenience self.df['year'] = pd.DatetimeIndex( self.df.index).year #create year column to sort self.df['quarter'] = pd.DatetimeIndex( self.df.index).quarter #create quarter column to sort self.df = self.df.sort_index(ascending=True) print(self.df)
import numpy as np import matplotlib.pyplot as plt import matplotlib.patches as mpatches import seaborn as sns from scipy.stats import norm from datetime import datetime import tcdata.stock.llv.finance as tcbs import tcdata.stock.llv.market as tcbsm # ----------- GETTING USER PATH TO FILE & FOLDER LOCATION #file_path = input('What is your file path: ') figure_save = input('Where do you want your graphs saved to: ') ticker = input('Ticker: ') #allow user to input ticker # ----------- IMPORT DATA FROM TCBS DATABASE df = tcbsm.stock_prices([str(ticker)], 60) df = df.rename(columns={ 'openPriceAdjusted': 'Open', 'closePriceAdjusted': 'Price' }) #change column name for convenience df['dateReport'] = pd.to_datetime(df['dateReport']) df = df.set_index('dateReport') # ----------- CALCULATE RETURNS AND % RETURNS df['Price_1'] = df['Price'].shift(-1) #shift price df['Daily Returns'] = df['Price_1'] - df['Price'] #calculate daily returns df = df.dropna( ) #because the first value (descending) will have NaN for return, drop to avoid error when graphing df['%Returns'] = df['Daily Returns'] / df['Price_1'] * 100 #percentage returns # ----------- LOG RETURNS AND DISTRIBUTION OF RETURNS
def analyse_aggregate_feature(ticker): # ------ CLEANING AND PROCESSING DATA df = tcbs_market.stock_prices( [ticker], period=2000) #pulling data, according to ticker df = df.rename(columns={ 'openPriceAdjusted': 'Open', 'closePriceAdjusted': 'Price' }) #renaming column for familiriarity purposes df['dateReport'] = pd.to_datetime( df['dateReport']) #ensuring that all date data is in time series df.reset_index(inplace=True) #reset index df['year'] = pd.DatetimeIndex( df['dateReport']).year #inputing year column - sorting purposes df['quarter'] = pd.DatetimeIndex( df['dateReport']).quarter #inputing quarter column - sorting purposes df = df.sort_values( 'dateReport', ascending=True) #sort values - earlier first (like stocks data) years = [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020] #year list for iteration quarter = [1, 2, 3, 4] #quarter list for iteration #create lists to store processed data for new pd year_list = [] quarter_list = [] r_list = [] x_positions = [] #looping through year/quarter list to find quarterly max and min for y in years: df_1 = df.loc[df['year'] == y, :] #cut df by year for q in quarter: df_2 = df_1.loc[df['quarter'] == q, :] #cut df by quarter if df_2.empty: pass else: x_pos = df_2['Price'].argmax() - df_2['Price'].argmin( ) #finding delta x of maxima and minima x_positions.append( 1 if x_pos > 0 else -1 ) #signalling positions with -1 or 1 (1 for when maxima comes after minima and vv) if x_pos > 0: #if maxima comes after minima, we want to find max/min-1 r_i = df_2['Price'].max() / df_2['Price'].min() - 1 r_list.append(r_i) year_list.append(y) quarter_list.append(q) else: #if minima comes after maxima, we want to find min/max-1 (this will signal a negative downturn) r_i = df_2['Price'].min() / df_2['Price'].max() - 1 r_list.append(r_i) year_list.append(y) quarter_list.append(q) result = pd.DataFrame() #storing new result in this dataFrame result['Year'] = year_list result['Quarter'] = quarter_list result['Ri (Max/Min)'] = r_list result['ArgMax/Min Position'] = x_positions result = result.sort_values(['Year', 'Quarter'], ascending=True) # ------ FA Analysis fa_df = df = tcbs_ticker.ratio( ticker, period_type=0, period=40) #call financials data from database fa_df = fa_df.rename(columns={ 'YearReport': 'Year', 'LengthReport': 'Quarter' }) #changing column names for convenience fa_df = fa_df.sort_values( ['Year', 'Quarter'], ascending=True ) #sort time for better visualisation and fitting into income statement data fa_list = [ 'revenue', 'operationProfit', 'netProfit', 'provision', 'creditGrowth', 'cash', 'liability', 'equity', 'asset', 'customerCredit', 'priceToEarning', 'priceToBook', 'roe', 'bookValuePerShare', 'earningPerShare', 'profitMargin', 'provisionOnBadDebt', 'badDebtPercentage', 'loanOnDeposit', 'nonInterestOnToi' ] #this list has all column names from fa_df.columns.to_list() for item in fa_list: #appending to result df for processing purposes result[item] = ( fa_df[item] - fa_df[item].shift(4)) / fa_df[item].shift( 4) * 100 #finding the YoY differences between each item of FA result.dropna(inplace=True) #avoid error print(result.head()) # ------ TRAINING MODEL - MULTIVARIATE REGRESSION MODEL TO DETERMINE BEST DETERMINANTS FOR MAXIMUM POTENTIAL CHANGES features_list = [ 'revenue', 'priceToEarning', 'priceToBook', 'roe', 'earningPerShare' ] #taken from fa_list[], slice and append to as many needed for f in features_list: #looping through the entire list, item by item - loop 1 current_index = features_list.index(f) #find the current item index next_index = current_index + 1 #assigning value for proceding indeces append_list = features_list[ next_index:] #create a new list for each starting item (as they will only iterate through items after them) if not append_list: #this returns one empty list, so pass empty to avoid error pass else: for f2 in append_list: #looping through the append list (in reality, just features_list but without the items in the current and preceding indeces) features = result[[f, f2]] #setting feature bins in 2 outcomes = result[['Ri (Max/Min)' ]] #outcomes = maximum potential change x_train, x_test, y_train, y_test = train_test_split( features, outcomes, train_size=0.8, shuffle=False ) #splitting train/test dataset for better estimation / note, shuffle is False because we want to rely on time seried historical data model = LinearRegression() #fitting model model.fit(x_train, y_train) #fit train x and y coefs = model.coef_ #calculate coefficient - multiplicative factor of each feature to outcome intercepts = model.intercept_ #calculate intercept #coefficient and intercept will give formula to numerically calculate model score = model.score(x_train, y_train) #score train model score_test = model.score(x_test, y_test) #score test model predictions = model.predict( x_train ) #pass x-train through predict to determine best fit plane xx_pred, yy_pred = np.meshgrid( x_train[f], x_train[f2]) #creating meshgrid for best-fit plane model_vis = np.array([xx_pred.flatten(), yy_pred.flatten()]).T #reshaping array predictions = model.predict( model_vis ) #passing reshaped array through predict to fit plane print(ticker) print(score) sns.set() fig = plt.figure(figsize=[10, 10]) ax = fig.add_subplot(projection='3d') #allowing for 3d model ax.scatter(x_train[[f]], x_train[[f2]], y_train, color='forestgreen', alpha=0.8) #scattering train data ax.scatter(x_test[[f]], x_test[[f2]], y_test, color='magenta', alpha=0.5) #scattering test data ax.scatter(xx_pred.flatten(), yy_pred.flatten(), predictions, facecolor='red', s=30, edgecolor='red', alpha=0.3) #fit plane ax.set_xlabel( str(f) + '\n' + 'y = ' + str(round(coefs[0][0], 5)) + '*x' + ' + ' + str(round(intercepts[0], 3))) #set x label with regression formula ax.set_ylabel( str(f2) + '\n' + 'y = ' + str(round(coefs[0][1], 3)) + '*x' + ' + ' + str(round(intercepts[0], 3))) #set y label with regression formula ax.set_zlabel('Maximum Return Potential in %') #set z label ax.set_title( 'Correlation between Maximum Potential Returns, ' + str(f) + ' and ' + str(f2) + '. \n R-Squared: = ' + str(score)) #set title plt.savefig(local + '/' + str(f) + str(f2) + ticker + '.png') #save fig #lists to numerically store variables ticker_list = [] feature_list = [] score_list = [] total = pd.DataFrame( ) #dataFrame to store ticker, feature combos and R2 ticker_list.append(ticker) total['ticker'] = ticker_list print(ticker) feature = f + f2 #creating feature combos feature_list.append(feature) total['feature'] = feature_list score_list.append(score) #finding score total['score'] = score_list print(score) mx_score = total['score'].max() #max score gets appending list_result = total.loc[total['score'] == mx_score].values.tolist() #because data was pulled from dataFrame, use indeces to locate values ticker_sample.append(list_result[0][0]) feature_sample.append(list_result[0][1]) score_sample.append(list_result[0][2])
def analyse_single_feature(ticker): # ------ CLEANING AND PROCESSING DATA dict_data = tcbs_market.stock_prices( [ticker], period=2000 ) #getting data from tcbs_market databse / replaceable with data from yfinance df = pd.DataFrame.from_dict(dict_data[ticker]) print(df) #df = df.rename(columns = {'openPriceAdjusted': 'Open', 'closePriceAdjusted':'Price'}) #change column name for convenience df['dateReport'] = pd.to_datetime(df['dateReport']) #setting timeseries df.reset_index(inplace=True) #index reset df['year'] = pd.DatetimeIndex( df['dateReport']).year #create year column to sort df['quarter'] = pd.DatetimeIndex( df['dateReport']).quarter #create quarter column to sort df = df.sort_values( 'dateReport', ascending=True ) #sort column by ascending to fit traditional stocks chart years = [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020] #create list of years quarter = [1, 2, 3, 4] #create list of quarters #empty list to store data after processing year_list = [] #year quarter_list = [] #quarter r_list = [] #values of R(i) x_positions = [] #x_pos whether max comes before min and vv. #slicing data accordingly (splitting data by year/quarter) --> calculating quartely maximum potential for y in years: #looping through the year list df_1 = df.loc[df['year'] == y, :] #slicing data by year for q in quarter: #looping through the quarter list df_2 = df_1.loc[ df['quarter'] == q, :] #slicing data (alr sliced by year) by quarter if df_2.empty: #some DF might be empty, so print 0 instead of further processing pass else: #for dataframe with data x_pos = df_2['Price'].argmax() - df_2['Price'].argmin( ) #finding delta x of maxima and minima x_positions.append( 1 if x_pos > 0 else -1 ) #signalling positions with -1 or 1 (1 for when maxima comes after minima and vv) if x_pos > 0: #if maxima comes after minima, we want to find max/min-1 r_i = df_2['Price'].max() / df_2['Price'].min() - 1 r_list.append(r_i) year_list.append(y) quarter_list.append(q) else: #if minima comes after maxima, we want to find min/max-1 (this will signal a negative downturn) r_i = df_2['Price'].min() / df_2['Price'].max() - 1 r_list.append(r_i) year_list.append(y) quarter_list.append(q) result = pd.DataFrame() #creating empty df to store results #append results from above loop result['Year'] = year_list result['Quarter'] = quarter_list result['Ri (Max/Min)'] = r_list result['ArgMax/Min Position'] = x_positions result = result.sort_values( ['Year', 'Quarter'], ascending=True ) #sort time for better visualisation and fitting into income statement data # ------ IMPORT FA DATA fa_df = df = tcbs_ticker.ratio(ticker, period_type=0, period=40) #call data from database fa_df = fa_df.rename(columns={ 'YearReport': 'Year', 'LengthReport': 'Quarter' }) #changing column names for convenience fa_df = fa_df.sort_values( ['Year', 'Quarter'], ascending=True ) #sort time for better visualisation and fitting into income statement data fa_list = [ 'revenue', 'operationProfit', 'netProfit', 'cash', 'liability', 'equity', 'asset', 'priceToEarning', 'priceToBook', 'roe', 'bookValuePerShare', 'earningPerShare', 'profitMargin', 'provisionOnBadDebt', 'badDebtPercentage', 'loanOnDeposit', 'nonInterestOnToi' ] #creating a list for the FA numerics we want to examine for item in fa_list: result[item] = ( fa_df[item] - fa_df[item].shift(4)) / fa_df[item].shift( 4) * 100 #finding the YoY differences between each item of FA result.dropna(inplace=True) print(result.head()) # ------ TRAINING MODEL features_list = [ 'revenue', 'operationProfit', 'netProfit', 'cash', 'liability', 'equity', 'asset', 'priceToEarning', 'priceToBook', 'roe', 'bookValuePerShare', 'earningPerShare', 'profitMargin', 'provisionOnBadDebt', 'badDebtPercentage', 'loanOnDeposit', 'nonInterestOnToi' ] #because we are training single feature models, create features list feature_list = [] score_list = [] ticker_list = [] for f in features_list: #looping through each feature features = result[[f]] outcomes = result[['Ri (Max/Min)']] x_train, x_test, y_train, y_test = train_test_split( features, outcomes, train_size=0.8, shuffle=False) #dividing test/train bins model = LinearRegression() #create model model.fit(x_train, y_train) #fit model score = model.score(x_train, y_train) #score train model score_test = model.score(x_test, y_test) #score test model predictions = model.predict( x_train) #pass x-train through predict to determine best fit plane coefs = model.coef_ #calculate coefficient - multiplicative factor of each feature to outcome intercepts = model.intercept_ #calculate intercept sns.set() plt.figure(figsize=[10, 10]) plt.scatter(x_train, y_train, color='darkcyan', alpha=0.4) plt.plot(x_train, predictions, color='darkorange') plt.title('R^2 of ' + f + ' for ' + ticker + "\n" + 'y = ' + str(round(coefs[0][0], 5)) + '*x' + ' + ' + str(round(intercepts[0], 3))) plt.xlabel('% Δ ' + f + ' YoY') plt.ylabel('Maximum Stocks Potential/Quarter') plt.savefig(local + "/" + f + ticker + '.png') total = pd.DataFrame() ticker_list.append(ticker) total['ticker'] = ticker_list print(ticker) feature_list.append(f) total['feature'] = feature_list print(f) score_list.append(score) total['score'] = score_list print(score) mx_score = total['score'].max() list_result = total.loc[total['score'] == mx_score].values.tolist() print(list_result) ticker_sample.append(list_result[0][0]) feature_sample.append(list_result[0][1]) score_sample.append(list_result[0][2])