Пример #1
0
def fileCompare(src_fullpath, tar_fullpath):
    # Check File Existence
    if not u.hasFile(src_fullpath):
        print('Source File %s Does Not Exist' % src_fullpath)
        raise SystemExit
    if not u.hasFile(tar_fullpath):
        print('Target File %s Does Not Exist' % tar_fullpath)
        raise SystemExit

    # Load Data Files
    src = u.read_csv(src_fullpath)
    tar = u.read_csv(tar_fullpath)
    src_row_number = len(src)
    tar_row_number = len(tar)
    src_col_number = len(src.columns)
    tar_col_number = len(tar.columns)
    matched = True
    print('File Compare Start: %s vs %s' % (src_fullpath, tar_fullpath))
    if src_row_number != tar_row_number:
        matched = False
        print('Row Number Un-matched')
    elif src_col_number != tar_col_number:
        matched = False
        print('Col Number Un-matched')
    else:
        for i in range(src_row_number):
            for j in range(src_col_number):
                if src.iloc[i,j] != tar.iloc[i,j]:
                    matched = False
                    print('Element(%s,%s) Un-matched' % (i,j))
    print('File Compare End: %s' % ('Matched' if matched else 'Un-Matched'))

    # Return Result
    return matched
Пример #2
0
def extractCommodityPrice(code, column):
    # Check Pre-requisite
    fullpath = c.fullpath_dict['commodity'] % code
    if not u.hasFile(fullpath):
        print('Require File Exists:', fullpath)
        return

    # Load Commodity Data
    data = loadCommodityPrice(code)
    data.set_index(u'发布时间', inplace=True)
    print(data.head(10))

    # Extract Price Data based on Given Column
    market = data[column].drop_duplicates()
    market_number = len(market)
    print('Market Number:', market_number)
    print('Markets:', market)

    i = 0
    for m in market:
        print('Market %s: %s' % (i + 1, m))
        m_name = 'Market_%s' % (i + 1)
        m_data = data[data[column].isin([m])]
        if not u.isNoneOrEmpty(m_data):
            u.to_csv(m_data, c.path_dict['commodity'],
                     c.file_dict['commodity_m'] % (code, m_name))
        i = i + 1
    '''
Пример #3
0
def loadFinanceSummary(stock_id):
    # Ensure data file is available
    fullpath = c.fullpath_dict['finsum'] % stock_id
    if not u.hasFile(fullpath):
        print('Require Finance Summary of %s!' % fullpath)
        return None

    fs = u.read_csv(fullpath)
    return fs
Пример #4
0
def loadStockBasics():
    # Ensure data file is available
    fullpath = c.fullpath_dict['basics']
    if not u.hasFile(fullpath):
        print('Require Stock Basics: %s!' % fullpath)
        return None

    basics = u.read_csv(fullpath)
    return basics
Пример #5
0
def loadDailyHFQ(stock_id, is_index):
    fullpath = c.fullpath_dict['lshq'] % u.stockFileName(stock_id, is_index)

    # Ensure data file is available
    if not u.hasFile(fullpath):
        print('Require LSHQ of %s!' % u.stockFileName(stock_id, is_index))
        return None

    # Read data file
    df = u.read_csv(fullpath)
    return df
Пример #6
0
def load_component(index_name):
    fullpath = c.path_dict['index'] + c.file_dict['index_c'] % index_name

    # Ensure data file is available
    if not u.hasFile(fullpath):
        print('Require Index Component of %s!' % index_name)
        return None

    # Read data file
    df = u.read_csv(fullpath)
    return df
Пример #7
0
def validDailyHFQ(stock_id, is_index, force_update):
    if force_update == True:
        return False

    else:
        return u.hasFile(c.fullpath_dict['lshq'] % u.stockFileName(stock_id, is_index))
Пример #8
0
def calc_hpe(stock_id, period, ratio):
    '''
    函数功能:
    --------
    逐周期计算历史市盈率。
    假定:逐周期前复权数据,Finance Summary数据已经下载或计算完成,并存储成为CSV文件。

    输入参数:
    --------
    stock_id : string, 股票代码 e.g. 600036
    period : string, 采样周期 e.g. 'W', 'M', 'Q'

    输出参数:
    --------
    DataFrame
        date 周期截止日期(为周期最后一天) e.g. 2005-03-31
        high 周期最高价
        close 周期收盘价
        low 周期最低价
        eps 周期末每股收益(可能有数据缺失)
        eps_filled 根据邻近周期推算出的周期末每股收益
        eps_rolling 根据周期末每股收益(含推算),折算的年度预期每股收益
        pe_high 根据周期最高价,计算出的市盈率
        pe_close 根据周期收盘价,计算出的市盈率
        pe_low 根据周期最低价,计算出的市盈率

    '''

    # Check Input Parameters
    if not isinstance(stock_id, str) or not isinstance(period, str):
        print('Incorrect type of one or more input parameters!')
        raise SystemExit

    # Check Period
    period_types = ['W', 'M', 'Q']
    if not period in period_types:
        print('Un-supported period type - should be one of:', period_types)
        raise SystemExit

    # Check Ratio
    ratio_types = ['PE', 'EP']
    if not ratio in ratio_types:
        print('Un-supported ratio type - should be one of:', ratio_types)
        raise SystemExit

    # Ensure Stock QFQ Data File is Available
    qfq_path = c.path_dict['qfq'] % period
    qfq_file = c.file_dict['qfq'] % (period, stock_id)
    qfq_fullpath = qfq_path + qfq_file
    if not u.hasFile(qfq_fullpath):
        print('Require stock QFQ file:', (qfq_fullpath))
        raise SystemExit

    # Ensure Stock Finance Summary Data File is Available
    fs_fullpath = c.fullpath_dict['finsum'] % stock_id
    if not u.hasFile(fs_fullpath):
        print('Require stock finance summary file:', (fs_fullpath))
        raise SystemExit

    #
    # Load QFQ Data
    #

    qfq = u.read_csv(qfq_fullpath)
    qfq.set_index('date', inplace=True)
    qfq.sort_index(ascending=True, inplace=True)
    if gs.is_debug:
        print(qfq.head(10))

    # Check empty QFQ data
    qfq_number = len(qfq)
    if qfq_number == 0:
        print('Stock QFQ data length is 0!')
        raise SystemExit

    # Handle stop-trading period (by filling with previous period data)
    # Assume: qfq data has been sorted ascendingly by date.
    for i in range(qfq_number):
        if i > 0 and np.isnan(qfq.iloc[i]['close']):
            if gs.is_debug:
                print('close = ', qfq.iloc[i]['close'])
            if np.isnan(qfq.iloc[i - 1]
                        ['close']):  # Ignore leading stop-trading periods
                continue
            else:  # Regular internal stop-trading periods
                for column in qfq.columns:
                    qfq.iloc[i][column] = qfq.iloc[i - 1][column]

    #
    # Load Finance Summary Data
    #

    fs = u.read_csv(fs_fullpath)
    fs.set_index('date', inplace=True)
    fs.sort_index(ascending=True, inplace=True)
    if gs.is_debug:
        print(fs.head(10))

    # Check empty Finance Summary data
    fs_number = len(fs)
    if fs_number == 0:
        print('Stock finance summary data length is 0!')
        raise SystemExit

    #
    # Generate Rolling EPS for Each Quarter
    #

    eps_index = []
    date_start = u.dateFromStr(qfq.index[0])  # First element
    date_end = u.dateFromStr(qfq.index[-1])  # Last element
    year_start = date_start.year
    year_end = date_end.year
    for year in range(year_start, year_end + 1):
        for quarter in range(1, 5):
            date = u.quarterDateStr(year, quarter)
            eps_index.append(date)
    if gs.is_debug:
        print(eps_index)

    eps_columns = ['eps', 'eps_filled', 'eps_rolling']
    eps_columns_number = len(eps_columns)
    eps_index_number = len(eps_index)

    # Init all elements to NaN
    data_init = np.random.randn(eps_index_number * eps_columns_number)
    for i in range(eps_index_number * eps_columns_number):
        data_init[i] = np.nan
    eps = pd.DataFrame(data_init.reshape(eps_index_number, eps_columns_number),
                       index=eps_index,
                       columns=eps_columns)

    # Inherite EPS from finance summary
    for i in range(eps_index_number):
        index = eps.index[i]
        if index in fs.index:  # Has EPS data
            eps.iloc[i]['eps'] = fs.loc[index, 'eps']
        else:  # Missing EPS data
            eps.iloc[i]['eps'] = np.nan

    # Fill the Missing EPS Data
    for year in range(year_start, year_end + 1):
        index_q1 = u.quarterDateStr(year, 1)
        index_q2 = u.quarterDateStr(year, 2)
        index_q3 = u.quarterDateStr(year, 3)
        index_q4 = u.quarterDateStr(year, 4)
        eps_q1 = eps.loc[index_q1, 'eps']
        eps_q2 = eps.loc[index_q2, 'eps']
        eps_q3 = eps.loc[index_q3, 'eps']
        eps_q4 = eps.loc[index_q4, 'eps']
        if gs.is_debug:
            print('eps_q1 =', eps_q1, 'eps_q2 =', eps_q2, 'eps_q3 =', eps_q3,
                  'eps_q4 =', eps_q4)

        eps_q1_filled = eps_q1
        eps_q2_filled = eps_q2
        eps_q3_filled = eps_q3
        eps_q4_filled = eps_q4

        if (np.isnan(eps_q1)):
            if (not np.isnan(eps_q2)):
                eps_q1_filled = eps_q2 * 0.5
            elif (not np.isnan(eps_q3)):
                eps_q1_filled = eps_q3 * 0.3333333333333333
            elif (not np.isnan(eps_q4)):
                eps_q1_filled = eps_q4 * 0.25
        if (np.isnan(eps_q2)):
            if (not np.isnan(eps_q1)):
                eps_q2_filled = eps_q1 * 2.0
            elif (not np.isnan(eps_q3)):
                eps_q2_filled = eps_q3 * 0.6666666666666667
            elif (not np.isnan(eps_q4)):
                eps_q2_filled = eps_q4 * 0.5
        if (np.isnan(eps_q3)):
            if (not np.isnan(eps_q2)):
                eps_q3_filled = eps_q2 * 1.5
            elif (not np.isnan(eps_q1)):
                eps_q3_filled = eps_q1 * 3.0
            elif (not np.isnan(eps_q4)):
                eps_q3_filled = eps_q4 * 0.75
        if (np.isnan(eps_q4)):
            if (not np.isnan(eps_q3)):
                eps_q4_filled = eps_q3 * 1.333333333333333
            elif (not np.isnan(eps_q2)):
                eps_q4_filled = eps_q2 * 2.0
            elif (not np.isnan(eps_q1)):
                eps_q4_filled = eps_q1 * 4.0
        if gs.is_debug:
            print('eps_q1_filled =', eps_q1_filled, 'eps_q2_filled =',
                  eps_q2_filled, 'eps_q3_filled =', eps_q3_filled,
                  'eps_q4_filled =', eps_q4_filled)

        eps.loc[index_q1, 'eps_filled'] = eps_q1_filled
        eps.loc[index_q2, 'eps_filled'] = eps_q2_filled
        eps.loc[index_q3, 'eps_filled'] = eps_q3_filled
        eps.loc[index_q4, 'eps_filled'] = eps_q4_filled

    # Calculate Rolling EPS
    rolling_ratio = [4.0, 2.0, 1.333333333333333, 1.0]
    for year in range(year_start, year_end + 1):
        for quarter in range(1, 5):
            index = u.quarterDateStr(year, quarter)
            eps_filled = eps.loc[index, 'eps_filled']
            eps.loc[index,
                    'eps_rolling'] = eps_filled * rolling_ratio[quarter - 1]

    if gs.is_debug:
        print(eps.head(10))

    #
    # Calculate HPE based on given period
    #

    # Drop un-used columns
    hpe = qfq.drop(['open', 'volume', 'amount'], axis=1)

    # Add columns to hpe
    if ratio == 'PE':
        for column in [
                'eps', 'eps_filled', 'eps_rolling', 'pe_high', 'pe_close',
                'pe_low'
        ]:
            hpe[column] = np.nan
    else:
        for column in [
                'eps', 'eps_filled', 'eps_rolling', 'ep_high', 'ep_close',
                'ep_low'
        ]:
            hpe[column] = np.nan

    # Calculate Historical P/E or E/P Ratio
    hpe_number = len(hpe)
    for i in range(hpe_number):
        index = hpe.index[i]  # 'YYYY-mm-dd'
        index_date = u.dateFromStr(index)  # datetime.date(YYYY-mm-dd)
        index_quarter = u.quarterDateStr(
            index_date.year, u.quarterOfDate(index_date))  # 'YYYY-mm-dd'
        for column in ['eps', 'eps_filled', 'eps_rolling']:
            hpe.loc[index, column] = eps.loc[index_quarter, column]

    if ratio == 'PE':
        # Calculate Historical P/E Ratio
        price = {'pe_close': 'close', 'pe_high': 'high', 'pe_low': 'low'}
        for i in range(hpe_number):
            index = hpe.index[i]  # 'YYYY-mm-dd'
            eps_rolling = hpe.iloc[i]['eps_rolling']
            for column in ['pe_close', 'pe_high', 'pe_low']:
                hpe.loc[index,
                        column] = hpe.loc[index, price[column]] / eps_rolling
    else:
        # Calculate Historical E/P Ratio
        price = {'ep_close': 'close', 'ep_high': 'high', 'ep_low': 'low'}
        for i in range(hpe_number):
            index = hpe.index[i]  # 'YYYY-mm-dd'
            eps_rolling = hpe.iloc[i]['eps_rolling']
            for column in ['ep_close', 'ep_high', 'ep_low']:
                hpe.loc[index,
                        column] = eps_rolling / hpe.loc[index, price[column]]

    # Format columns
    for column in hpe.columns:
        hpe[column] = hpe[column].map(lambda x: '%.2f' % x)
        hpe[column] = hpe[column].astype(float)

    return hpe
Пример #9
0
def plot_HPE(stock_id, period, ratio):
    # Check Input Parameters
    if not isinstance(stock_id, str) or not isinstance(period, str):
        print('Incorrect type of one or more input parameters!')
        raise SystemExit

    # Check Period
    period_types = ['W', 'M', 'Q']
    if not period in period_types:
        print('Un-supported period type - should be one of:', period_types)
        raise SystemExit

    # Check Ratio
    ratio_types = ['PE', 'EP']
    if not ratio in ratio_types:
        print('Un-supported ratio type - should be one of:', ratio_types)
        raise SystemExit

    # Check Pre-requisite: HPE File
    key = 'hpe' if ratio == 'PE' else 'hep'
    path = c.path_dict[key] % period
    file = c.file_dict[key] % (period, stock_id)
    hpe_fullpath = path + file
    if not u.hasFile(hpe_fullpath):
        print('Require File Exists:', hpe_fullpath)
        raise SystemExit

    # Load Data File
    hpe = loadHPE(stock_id=stock_id, period=period, ratio=ratio)

    # Plot Figure
    fig, (ax1, ax2) = plt.subplots(2, sharex=True)
    #    fig = plt.figure()
    fig.set_size_inches(32, 18)

    # Plot Sub-figure 1
    #    ax1 = plt.subplot(211)
    ratio_name = 'P/E Ratio' if ratio == 'PE' else 'E/P Ratio'
    title = ratio_name + ' Ratio of Stock %s' % stock_id
    ax1.set_title(title, fontsize=14)
    ax1.set_xlabel('')
    ax1.set_ylabel(ratio_name)
    if ratio == 'PE':
        #       hpe.plot(x='date', y='pe_high', ax=ax1)
        hpe.plot(x='date', y='pe_close', ax=ax1)
#       hpe.plot(x='date', y='pe_low', ax=ax1)
    else:
        #       hpe.plot(x='date', y='ep_high', ax=ax1)
        hpe.plot(x='date', y='ep_close', ax=ax1)
#       hpe.plot(x='date', y='ep_low', ax=ax1)

# Plot Sub-figure 2
#    ax2 = plt.subplot(212)
    ax2.set_title('Price of Stock %s' % stock_id, fontsize=14)
    ax2.set_xlabel('')
    ax2.set_ylabel('Price')
    #    hpe.plot(x='date', y='high', ax=ax2)
    hpe.plot(x='date', y='close', ax=ax2)
    #    hpe.plot(x='date', y='low', ax=ax2)
    # Common Format for Both Sub-figures
    for ax in [ax1, ax2]:
        ax.grid(True)
    fig.autofmt_xdate()
    fig.tight_layout()
    plt.setp(plt.gca().get_xticklabels(), rotation=30)
    plt.show()

    # Save Figure
    fig_key = 'fig_hpe' if ratio == 'PE' else 'fig_hep'
    fig_path = c.path_dict[fig_key] % period
    fig_file = c.file_dict[fig_key] % (period, stock_id)
    u.saveFigure(fig, fig_path, fig_file)
Пример #10
0
def validFinanceSummary(stock_id, force_update):
    if force_update == True:
        return False
    else:
        return u.hasFile(c.fullpath_dict['finsum'] % stock_id)
Пример #11
0
def validStockBasics(force_update):
    if force_update == True:
        return False
    else:
        return u.hasFile(c.fullpath_dict['basics'])
Пример #12
0
def calc_qfq(stock_id, period):
    '''
    函数功能:
    --------
    逐周期计算历史前复权数据,用于历史市盈率计算。
    假定:历史行情已经下载并存储成为CSV文件。

    输入参数:
    --------
    stock_id : string, 股票代码 e.g. 600036
    period : string, 重采样周期 e.g. 'W', 'M', 'Q'

    输出参数:
    --------
    DataFrame
        date 周期截止日期 e.g. 2005-03-31
        open 周期开盘价
        high 周期最高价
        close 周期收盘价
        low 周期最低价
        volume 周期成交量
        amount 周期成交额

    '''

    # Check Input Parameters
    if not isinstance(stock_id, str) or not isinstance(period, str):
        print('Incorrect type of one or more input parameters!')
        raise SystemExit

    # Check Period
    period_types = ['W', 'M', 'Q']
    if not period in period_types:
        print('Un-supported period type - should be one of:', period_types)
        raise SystemExit

    # Ensure Stock LSHQ Data File is Available
    if not u.hasFile(c.fullpath_dict['lshq'] % stock_id):
        print('Require LSHQ of Stock %s!' % stock_id)
        raise SystemExit

    df = u.read_csv(c.fullpath_dict['lshq'] % stock_id)
    df['date'] = df['date'].astype(np.datetime64)
    df.set_index('date', inplace=True)
    df.sort_index(ascending=True, inplace=True)
    if gs.is_debug:
        print(df.head(10))

    df_resample = df.resample(period).first()

    df_resample['open'] = df['open'].resample(period).first()
    df_resample['high'] = df['high'].resample(period).max()
    df_resample['close'] = df['close'].resample(period).last()
    df_resample['low'] = df['low'].resample(period).min()
    df_resample['volume'] = df['volume'].resample(period).sum()
    df_resample['amount'] = df['amount'].resample(period).sum()
    df_resample['factor'] = df['factor'].resample(period).last()

    df_resample_number = len(df_resample)
    for i in range(df_resample_number):
        fq_factor = df_resample.iloc[i]['factor']
        for column in ['open', 'high', 'close', 'low']:
            df_resample.iloc[i][
                column] = df_resample.iloc[i][column] / fq_factor
        df_resample.iloc[i]['volume'] = df_resample.iloc[i][
            'volume']  # Not touch it
        df_resample.iloc[i]['amount'] = df_resample.iloc[i]['amount']

    df_resample.drop('factor', axis=1, inplace=True)

    # Return Resampled Dataframe
    return df_resample
Пример #13
0
def validCommodityPrice(code, force_update):
    if force_update == True:
        return False
    else:
        return u.hasFile(c.fullpath_dict['commodity'] % code)