示例#1
0
    def clean_sort_data(self): 
        self.ticker = input('Ticker: ') #ngừoi chạy input ticker vào command line 
        #lấy data
        self.data = tcbs_market.stock_prices([self.ticker], period=2000) #ticker chạy qua db của tcbs, lấy dữ liệu lịch sử 

        #dọn data / sort data
        self.data = self.data.rename(columns = {'openPriceAdjusted': 'Open', 'closePriceAdjusted':'Close'}) #đổi tên cột thành standardised tên (functionality --> nếu có tương tác với db khác ) // price = close 
        self.data = self.data.sort_values('dateReport', ascending = True)

        #tính daily returns / đánh dấu hiệu mua/bạn dựa theo giao động giá trong ngày 
        self.data['%Δ Daily Returns'] = (self.data['Close']/self.data['Close'].shift(1) - 1) #log return 
        self.data['DailyPriceSignal'] = [1 if self.data.loc[x, '%Δ Daily Returns'] > 0 else -1 for x in self.data.index] #1 là mua, -1 là bán 
    def clean_data(self):
        df_dict = tcbs_market.stock_prices(
            [user_ticker], period=2000)  #getting data (tcbs api required)
        self.df = df_dict[user_ticker]  #get item from dict
        self.df = self.df.rename(columns={
            'OpenPrice_Adjusted': 'Open',
            'ClosePrice_Adjusted': 'Close'
        })  #change column name for convenience

        self.df['year'] = pd.DatetimeIndex(
            self.df.index).year  #create year column to sort
        self.df['quarter'] = pd.DatetimeIndex(
            self.df.index).quarter  #create quarter column to sort

        self.df = self.df.sort_index(ascending=True)
        print(self.df)
示例#3
0
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from scipy.stats import norm
from datetime import datetime
import tcdata.stock.llv.finance as tcbs
import tcdata.stock.llv.market as tcbsm

# ----------- GETTING USER PATH TO FILE & FOLDER LOCATION
#file_path = input('What is your file path: ')
figure_save = input('Where do you want your graphs saved to: ')
ticker = input('Ticker: ')  #allow user to input ticker

# ----------- IMPORT DATA FROM TCBS DATABASE
df = tcbsm.stock_prices([str(ticker)], 60)
df = df.rename(columns={
    'openPriceAdjusted': 'Open',
    'closePriceAdjusted': 'Price'
})  #change column name for convenience
df['dateReport'] = pd.to_datetime(df['dateReport'])
df = df.set_index('dateReport')

# ----------- CALCULATE RETURNS AND % RETURNS
df['Price_1'] = df['Price'].shift(-1)  #shift price
df['Daily Returns'] = df['Price_1'] - df['Price']  #calculate daily returns
df = df.dropna(
)  #because the first value (descending) will have NaN for return, drop to avoid error when graphing
df['%Returns'] = df['Daily Returns'] / df['Price_1'] * 100  #percentage returns

# ----------- LOG RETURNS AND DISTRIBUTION OF RETURNS
示例#4
0
def analyse_aggregate_feature(ticker):
    # ------ CLEANING AND PROCESSING DATA
    df = tcbs_market.stock_prices(
        [ticker], period=2000)  #pulling data, according to ticker
    df = df.rename(columns={
        'openPriceAdjusted': 'Open',
        'closePriceAdjusted': 'Price'
    })  #renaming column for familiriarity purposes
    df['dateReport'] = pd.to_datetime(
        df['dateReport'])  #ensuring that all date data is in time series

    df.reset_index(inplace=True)  #reset index

    df['year'] = pd.DatetimeIndex(
        df['dateReport']).year  #inputing year column - sorting purposes
    df['quarter'] = pd.DatetimeIndex(
        df['dateReport']).quarter  #inputing quarter column - sorting purposes

    df = df.sort_values(
        'dateReport',
        ascending=True)  #sort values - earlier first (like stocks data)

    years = [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
             2020]  #year list for iteration
    quarter = [1, 2, 3, 4]  #quarter list for iteration

    #create lists to store processed data for new pd
    year_list = []
    quarter_list = []
    r_list = []
    x_positions = []

    #looping through year/quarter list to find quarterly max and min
    for y in years:
        df_1 = df.loc[df['year'] == y, :]  #cut df by year
        for q in quarter:
            df_2 = df_1.loc[df['quarter'] == q, :]  #cut df by quarter
            if df_2.empty:
                pass
            else:
                x_pos = df_2['Price'].argmax() - df_2['Price'].argmin(
                )  #finding delta x of maxima and minima
                x_positions.append(
                    1 if x_pos > 0 else -1
                )  #signalling positions with -1 or 1 (1 for when maxima comes after minima and vv)
                if x_pos > 0:  #if maxima comes after minima, we want to find max/min-1
                    r_i = df_2['Price'].max() / df_2['Price'].min() - 1
                    r_list.append(r_i)
                    year_list.append(y)
                    quarter_list.append(q)
                else:  #if minima comes after maxima, we want to find min/max-1 (this will signal a negative downturn)
                    r_i = df_2['Price'].min() / df_2['Price'].max() - 1
                    r_list.append(r_i)
                    year_list.append(y)
                    quarter_list.append(q)

    result = pd.DataFrame()  #storing new result in this dataFrame
    result['Year'] = year_list
    result['Quarter'] = quarter_list
    result['Ri (Max/Min)'] = r_list
    result['ArgMax/Min Position'] = x_positions
    result = result.sort_values(['Year', 'Quarter'], ascending=True)

    # ------ FA Analysis
    fa_df = df = tcbs_ticker.ratio(
        ticker, period_type=0, period=40)  #call financials data from database
    fa_df = fa_df.rename(columns={
        'YearReport': 'Year',
        'LengthReport': 'Quarter'
    })  #changing column names for convenience
    fa_df = fa_df.sort_values(
        ['Year', 'Quarter'], ascending=True
    )  #sort time for better visualisation and fitting into income statement data

    fa_list = [
        'revenue', 'operationProfit', 'netProfit', 'provision', 'creditGrowth',
        'cash', 'liability', 'equity', 'asset', 'customerCredit',
        'priceToEarning', 'priceToBook', 'roe', 'bookValuePerShare',
        'earningPerShare', 'profitMargin', 'provisionOnBadDebt',
        'badDebtPercentage', 'loanOnDeposit', 'nonInterestOnToi'
    ]  #this list has all column names from fa_df.columns.to_list()

    for item in fa_list:  #appending to result df for processing purposes
        result[item] = (
            fa_df[item] - fa_df[item].shift(4)) / fa_df[item].shift(
                4) * 100  #finding the YoY differences between each item of FA

    result.dropna(inplace=True)  #avoid error
    print(result.head())

    # ------ TRAINING MODEL - MULTIVARIATE REGRESSION MODEL TO DETERMINE BEST DETERMINANTS FOR MAXIMUM POTENTIAL CHANGES
    features_list = [
        'revenue', 'priceToEarning', 'priceToBook', 'roe', 'earningPerShare'
    ]  #taken from fa_list[], slice and append to as many needed
    for f in features_list:  #looping through the entire list, item by item - loop 1
        current_index = features_list.index(f)  #find the current item index
        next_index = current_index + 1  #assigning value for proceding indeces
        append_list = features_list[
            next_index:]  #create a new list for each starting item (as they will only iterate through items after them)
        if not append_list:  #this returns one empty list, so pass empty to avoid error
            pass
        else:
            for f2 in append_list:  #looping through the append list (in reality, just features_list but without the items in the current and preceding indeces)
                features = result[[f, f2]]  #setting feature bins in 2
                outcomes = result[['Ri (Max/Min)'
                                   ]]  #outcomes = maximum potential change

                x_train, x_test, y_train, y_test = train_test_split(
                    features, outcomes, train_size=0.8, shuffle=False
                )  #splitting train/test dataset for better estimation / note, shuffle is False because we want to rely on time seried historical data

                model = LinearRegression()  #fitting model
                model.fit(x_train, y_train)  #fit train x and y
                coefs = model.coef_  #calculate coefficient - multiplicative factor of each feature to outcome
                intercepts = model.intercept_  #calculate intercept
                #coefficient and intercept will give formula to numerically calculate model
                score = model.score(x_train, y_train)  #score train model
                score_test = model.score(x_test, y_test)  #score test model
                predictions = model.predict(
                    x_train
                )  #pass x-train through predict to determine best fit plane

                xx_pred, yy_pred = np.meshgrid(
                    x_train[f],
                    x_train[f2])  #creating meshgrid for best-fit plane
                model_vis = np.array([xx_pred.flatten(),
                                      yy_pred.flatten()]).T  #reshaping array
                predictions = model.predict(
                    model_vis
                )  #passing reshaped array through predict to fit plane

                print(ticker)
                print(score)

                sns.set()
                fig = plt.figure(figsize=[10, 10])
                ax = fig.add_subplot(projection='3d')  #allowing for 3d model
                ax.scatter(x_train[[f]],
                           x_train[[f2]],
                           y_train,
                           color='forestgreen',
                           alpha=0.8)  #scattering train data
                ax.scatter(x_test[[f]],
                           x_test[[f2]],
                           y_test,
                           color='magenta',
                           alpha=0.5)  #scattering test data
                ax.scatter(xx_pred.flatten(),
                           yy_pred.flatten(),
                           predictions,
                           facecolor='red',
                           s=30,
                           edgecolor='red',
                           alpha=0.3)  #fit plane
                ax.set_xlabel(
                    str(f) + '\n' + 'y = ' + str(round(coefs[0][0], 5)) +
                    '*x' + ' + ' +
                    str(round(intercepts[0],
                              3)))  #set x label with regression formula
                ax.set_ylabel(
                    str(f2) + '\n' + 'y = ' + str(round(coefs[0][1], 3)) +
                    '*x' + ' + ' +
                    str(round(intercepts[0],
                              3)))  #set y label with regression formula
                ax.set_zlabel('Maximum Return Potential in %')  #set z label
                ax.set_title(
                    'Correlation between Maximum Potential Returns, ' +
                    str(f) + ' and ' + str(f2) + '. \n R-Squared: = ' +
                    str(score))  #set title
                plt.savefig(local + '/' + str(f) + str(f2) + ticker +
                            '.png')  #save fig

                #lists to numerically store variables
                ticker_list = []
                feature_list = []
                score_list = []

                total = pd.DataFrame(
                )  #dataFrame to store ticker, feature combos and R2
                ticker_list.append(ticker)
                total['ticker'] = ticker_list
                print(ticker)
                feature = f + f2  #creating feature combos
                feature_list.append(feature)
                total['feature'] = feature_list
                score_list.append(score)  #finding score
                total['score'] = score_list
                print(score)

                mx_score = total['score'].max()  #max score gets appending
                list_result = total.loc[total['score'] ==
                                        mx_score].values.tolist()
                #because data was pulled from dataFrame, use indeces to locate values
                ticker_sample.append(list_result[0][0])
                feature_sample.append(list_result[0][1])
                score_sample.append(list_result[0][2])
def analyse_single_feature(ticker):
    # ------ CLEANING AND PROCESSING DATA
    dict_data = tcbs_market.stock_prices(
        [ticker], period=2000
    )  #getting data from tcbs_market databse / replaceable with data from yfinance
    df = pd.DataFrame.from_dict(dict_data[ticker])
    print(df)
    #df = df.rename(columns = {'openPriceAdjusted': 'Open', 'closePriceAdjusted':'Price'}) #change column name for convenience
    df['dateReport'] = pd.to_datetime(df['dateReport'])  #setting timeseries

    df.reset_index(inplace=True)  #index reset

    df['year'] = pd.DatetimeIndex(
        df['dateReport']).year  #create year column to sort
    df['quarter'] = pd.DatetimeIndex(
        df['dateReport']).quarter  #create quarter column to sort

    df = df.sort_values(
        'dateReport', ascending=True
    )  #sort column by ascending to fit traditional stocks chart

    years = [2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
             2020]  #create list of years
    quarter = [1, 2, 3, 4]  #create list of quarters

    #empty list to store data after processing
    year_list = []  #year
    quarter_list = []  #quarter
    r_list = []  #values of R(i)
    x_positions = []  #x_pos whether max comes before min and vv.

    #slicing data accordingly (splitting data by year/quarter) --> calculating quartely maximum potential
    for y in years:  #looping through the year list
        df_1 = df.loc[df['year'] == y, :]  #slicing data by year
        for q in quarter:  #looping through the quarter list
            df_2 = df_1.loc[
                df['quarter'] ==
                q, :]  #slicing data (alr sliced by year) by quarter
            if df_2.empty:  #some DF might be empty, so print 0 instead of further processing
                pass
            else:  #for dataframe with data
                x_pos = df_2['Price'].argmax() - df_2['Price'].argmin(
                )  #finding delta x of maxima and minima
                x_positions.append(
                    1 if x_pos > 0 else -1
                )  #signalling positions with -1 or 1 (1 for when maxima comes after minima and vv)
                if x_pos > 0:  #if maxima comes after minima, we want to find max/min-1
                    r_i = df_2['Price'].max() / df_2['Price'].min() - 1
                    r_list.append(r_i)
                    year_list.append(y)
                    quarter_list.append(q)
                else:  #if minima comes after maxima, we want to find min/max-1 (this will signal a negative downturn)
                    r_i = df_2['Price'].min() / df_2['Price'].max() - 1
                    r_list.append(r_i)
                    year_list.append(y)
                    quarter_list.append(q)

    result = pd.DataFrame()  #creating empty df to store results
    #append results from above loop
    result['Year'] = year_list
    result['Quarter'] = quarter_list
    result['Ri (Max/Min)'] = r_list
    result['ArgMax/Min Position'] = x_positions
    result = result.sort_values(
        ['Year', 'Quarter'], ascending=True
    )  #sort time for better visualisation and fitting into income statement data

    # ------ IMPORT FA DATA
    fa_df = df = tcbs_ticker.ratio(ticker, period_type=0,
                                   period=40)  #call data from database
    fa_df = fa_df.rename(columns={
        'YearReport': 'Year',
        'LengthReport': 'Quarter'
    })  #changing column names for convenience
    fa_df = fa_df.sort_values(
        ['Year', 'Quarter'], ascending=True
    )  #sort time for better visualisation and fitting into income statement data

    fa_list = [
        'revenue', 'operationProfit', 'netProfit', 'cash', 'liability',
        'equity', 'asset', 'priceToEarning', 'priceToBook', 'roe',
        'bookValuePerShare', 'earningPerShare', 'profitMargin',
        'provisionOnBadDebt', 'badDebtPercentage', 'loanOnDeposit',
        'nonInterestOnToi'
    ]  #creating a list for the FA numerics we want to examine

    for item in fa_list:
        result[item] = (
            fa_df[item] - fa_df[item].shift(4)) / fa_df[item].shift(
                4) * 100  #finding the YoY differences between each item of FA

    result.dropna(inplace=True)
    print(result.head())

    # ------ TRAINING MODEL
    features_list = [
        'revenue', 'operationProfit', 'netProfit', 'cash', 'liability',
        'equity', 'asset', 'priceToEarning', 'priceToBook', 'roe',
        'bookValuePerShare', 'earningPerShare', 'profitMargin',
        'provisionOnBadDebt', 'badDebtPercentage', 'loanOnDeposit',
        'nonInterestOnToi'
    ]  #because we are training single feature models, create features list

    feature_list = []
    score_list = []
    ticker_list = []

    for f in features_list:  #looping through each feature
        features = result[[f]]
        outcomes = result[['Ri (Max/Min)']]

        x_train, x_test, y_train, y_test = train_test_split(
            features, outcomes, train_size=0.8,
            shuffle=False)  #dividing test/train bins

        model = LinearRegression()  #create model
        model.fit(x_train, y_train)  #fit model
        score = model.score(x_train, y_train)  #score train model
        score_test = model.score(x_test, y_test)  #score test model
        predictions = model.predict(
            x_train)  #pass x-train through predict to determine best fit plane
        coefs = model.coef_  #calculate coefficient - multiplicative factor of each feature to outcome
        intercepts = model.intercept_  #calculate intercept

        sns.set()
        plt.figure(figsize=[10, 10])
        plt.scatter(x_train, y_train, color='darkcyan', alpha=0.4)
        plt.plot(x_train, predictions, color='darkorange')
        plt.title('R^2 of ' + f + ' for ' + ticker + "\n" + 'y = ' +
                  str(round(coefs[0][0], 5)) + '*x' + ' + ' +
                  str(round(intercepts[0], 3)))
        plt.xlabel('% Δ ' + f + ' YoY')
        plt.ylabel('Maximum Stocks Potential/Quarter')
        plt.savefig(local + "/" + f + ticker + '.png')

        total = pd.DataFrame()
        ticker_list.append(ticker)
        total['ticker'] = ticker_list
        print(ticker)
        feature_list.append(f)
        total['feature'] = feature_list
        print(f)
        score_list.append(score)
        total['score'] = score_list
        print(score)

    mx_score = total['score'].max()
    list_result = total.loc[total['score'] == mx_score].values.tolist()
    print(list_result)
    ticker_sample.append(list_result[0][0])
    feature_sample.append(list_result[0][1])
    score_sample.append(list_result[0][2])