Exemplo n.º 1
0
    def __init__(self, path):

        self.path = path
        self.df_fin_all = pd.read_csv(self.path)

        # Preprocess data

        # Clean the data
        dc = data_cleaning()
        # remove data with missing ticker symbols
        self.df_fin_all = dc.remove_missing_tic_data(self.df_fin_all)
        # remove pre IPO data
        IPO_path = "D:\\FA\\data\\stock_stats\\IPO_year.csv"
        self.df_fin_all = dc.remove_pre_IPO_data(IPO_path, self.df_fin_all)

        # Remove rows with missing year information.
        self.df_fin_all = dc.remove_missing_year_data(self.df_fin_all)

        self.df_fin_all = self.df_fin_all.set_index('fyear')
        self.df_fin_all = self.df_fin_all.fillna(0.)

        # Select financial statements variables
        # Balance Sheet
        # Assets
        assets_current = ['che', 'rect', 'invt', 'aco', 'act']
        assets_rest = ['ppent', 'ivaeq', 'ivao', 'intan', 'ao', 'at']

        # Liabilities
        liabilities_current = ['dlc', 'ap', 'txp', 'lco', 'lct']
        liabilities_rest = [
            'dltt', 'txditc', 'lo', 'lt', 'mib', 'pstk', 'ceq', 'seq'
        ]

        self.b_sheet = assets_current + assets_rest + liabilities_current + \
                    liabilities_rest

        # Income Statement
        revenue = ['revt']
        costs = ['cogs', 'xsga']
        operations = ['oibdp', 'oiadp', 'xint', 'nopi']
        income = ['spi', 'pi', 'txt', 'ib', 'niadj', 'epspx', 'epsfx']

        self.i_sheet = revenue + costs + operations + income

        # Cash Flow Statement
        cash_oper = [
            'ibc', 'xidoc', 'dpc', 'txdc', 'esub', 'sppiv', 'fopo', 'recch',
            'invch', 'apalch', 'txach', 'aoloch', 'oancf'
        ]

        cash_investment = [
            'ivch', 'siv', 'ivstch', 'capx', 'sppe', 'aqc', 'ivaco', 'ivncf'
        ]

        cash_finance = [
            'sstk', 'txbcof', 'prstkc', 'dv', 'dltis', 'dltr', 'dlcch', 'fiao',
            'fincf'
        ]

        self.c_sheet = cash_oper + cash_investment + cash_finance
Exemplo n.º 2
0
    def __init__(self, path):
        self.path = path
        self.df_fin_all = pd.read_csv(self.path)

        # Preprocess data

        # Clean the data
        dc = data_cleaning()
        # remove data with missing ticker symbols
        self.df_fin_all = dc.remove_missing_tic_data(self.df_fin_all)
        # remove pre IPO data
        IPO_path = "/Users/liqiran/Desktop/ml_fa/data/stock_stats/IPO_year.csv"
        self.df_fin_all = dc.remove_pre_IPO_data(IPO_path, self.df_fin_all)

        # Remove rows with missing year information.
        self.df_fin_all = dc.remove_missing_year_data(self.df_fin_all)

        self.df_fin_all = self.df_fin_all.set_index('fyear')
        self.df_fin_all = self.df_fin_all.fillna(0.)

        # Select financial statements variables
        self.others = [
            're', 'wcapch', 'wcapc', 'unwcc', 'nim', 'citotal', 'cga', 'mrc1',
            'mrc2', 'mrc3', 'mrc4', 'mrc5'
        ]
Exemplo n.º 3
0
    def __init__(self, path):
        self.path = path
        self.df_fin_all = pd.read_csv(self.path)

        # Preprocess data
        # Clean the data
        dc = data_cleaning()
        # remove data with missing ticker symbols
        self.df_fin_all = dc.remove_missing_tic_data(self.df_fin_all)
        # remove pre IPO data
        IPO_path = "/Users/liqiran/Desktop/ml_fa/data/stock_stats/IPO_year.csv"
        self.df_fin_all = dc.remove_pre_IPO_data(IPO_path, self.df_fin_all)

        # Remove rows with missing year information.
        self.df_fin_all = dc.remove_missing_year_data(self.df_fin_all)

        self.df_fin_all = self.df_fin_all.set_index('fyear')

        # Select market data for the stock
        self.market_data = ['csho', 'mkvalt', 'prcc_c', 'prcc_f']
Exemplo n.º 4
0
    def __init__(self, path):
        self.path = path
        self.df_fin_all = pd.read_csv(self.path)

        # Preprocess data
        # Clean the data
        dc = data_cleaning()
        # remove data with missing ticker symbols
        self.df_fin_all = dc.remove_missing_tic_data(self.df_fin_all)
        # remove pre IPO data
        #IPO_path = "D:\\FA\\data\\stock_stats\\IPO_year.csv"
        IPO_path = os.path.join('..', 'data', 'stock_stats', 'IPO_year.csv')
        self.df_fin_all = dc.remove_pre_IPO_data(IPO_path, self.df_fin_all)

        # Remove rows with missing year information.
        self.df_fin_all = dc.remove_missing_year_data(self.df_fin_all)

        self.df_fin_all = self.df_fin_all.set_index('fyear')

        # Select market data for the stock
        self.market_data = ['csho', 'mkvalt', 'prcc_c', 'prcc_f']