Python Utilities.createDataFrame примеры, Common.Utilities.createDataFrame Python примеры использования

Пример #1

0

Показать файл

Файл: CoefficientStrategy.py Проект: dxcv/fQuant

def extractRollingBeta(postfix):
    # Load Rolling Coefficient
    fullpath = c.path_dict['strategy'] + c.file_dict['strategy'] % postfix
    coef = u.read_csv(fullpath)
    if u.isNoneOrEmpty(coef):
        print('Require Coefficient File: %s!' % fullpath)
        return False

    # Extract Rolling Beta
    row_number = len(coef)
    beta = u.createDataFrame(row_number, ['date', 'beta'])
    beta['date'] = coef['date']
    for column in coef.columns:
        if len(column) >= 4 and column[0:4] == 'beta':
            beta[column] = coef[column]

    # Calculate Rolling Beta Average
    beta_number = len(beta.columns) - 2
    for i in range(row_number):
        beta_avg = 0.0
        beta_count = 0
        for j in range(beta_number):
            b = beta.ix[i, beta.columns[j + 2]]
            if not np.isnan(b):
                beta_avg = beta_avg + b
                beta_count = beta_count + 1
        if beta_count > 0:
            beta.ix[i, 'beta'] = beta_avg / float(beta_count)

    beta.set_index('date', inplace=True)
    postfix = '_'.join([postfix, 'Beta'])
    u.to_csv(beta, c.path_dict['strategy'], c.file_dict['strategy'] % postfix)

Пример #2

0

Показать файл

Файл: CoefficientStrategy.py Проект: dxcv/fQuant

def histogramAlpha(postfix, completeness_threshold):
    # Load Coefficient File
    allcoef = loadCoefficient(postfix, completeness_threshold)
    if u.isNoneOrEmpty(allcoef):
        return False

    # Calculate Coefficient Histogram
    columns = [
        'Total', 'Very High', 'High', 'Medium', 'Low', 'Very Low',
        'Negative Very Low', 'Negative Low', 'Negative Medium',
        'Negative High', 'Negative Very High'
    ]
    histogram = u.createDataFrame(1, columns, 0)
    stock_number = len(allcoef)
    histogram.ix[0, 'Total'] = stock_number
    for i in range(stock_number):
        alpha = allcoef.ix[i, 'alpha']
        if alpha >= 0.4:  # [0.4, +Infinity)
            histogram.ix[0, 'Very High'] = histogram.ix[0, 'Very High'] + 1
        elif alpha >= 0.3:  # [0.3, 0.4)
            histogram.ix[0, 'High'] = histogram.ix[0, 'High'] + 1
        elif alpha >= 0.2:  # [0.2, 0.3)
            histogram.ix[0, 'Medium'] = histogram.ix[0, 'Medium'] + 1
        elif alpha >= 0.1:  # [0.1, 0.2)
            histogram.ix[0, 'Low'] = histogram.ix[0, 'Low'] + 1
        elif alpha >= 0.0:  # [0.0, 0.1)
            histogram.ix[0, 'Very Low'] = histogram.ix[0, 'Very Low'] + 1
        elif alpha >= -0.1:  # [-0.1, 0.0)
            histogram.ix[
                0,
                'Negative Very Low'] = histogram.ix[0, 'Negative Very Low'] + 1
        elif alpha >= -0.2:  # [-0.2, -0.1)
            histogram.ix[0,
                         'Negative Low'] = histogram.ix[0, 'Negative Low'] + 1
        elif alpha >= -0.3:  # [-0.3, -0.2)
            histogram.ix[
                0, 'Negative Medium'] = histogram.ix[0, 'Negative Medium'] + 1
        elif alpha >= -0.4:  # [-0.4, -0.3)
            histogram.ix[0,
                         'Negative High'] = histogram.ix[0,
                                                         'Negative High'] + 1
        else:  # (-Infinity, -0.4)
            histogram.ix[0, 'Negative Very High'] = histogram.ix[
                0, 'Negative Very High'] + 1

    # Save to CSV File
    histogram.set_index('Total', inplace=True)
    file_postfix = '_'.join(
        [postfix, completeness_threshold, 'HistogramAlpha'])
    u.to_csv(histogram, c.path_dict['strategy'],
             c.file_dict['strategy'] % file_postfix)

    return True

Пример #3

0

Показать файл

Файл: CoefficientStrategy.py Проект: dxcv/fQuant

def histogramCorrelation(postfix, completeness_threshold):
    # Load Coefficient File
    allcoef = loadCoefficient(postfix, completeness_threshold)
    if u.isNoneOrEmpty(allcoef):
        return False

    # Calculate Coefficient Histogram
    columns = [
        'Total', 'Very Strong', 'Strong', 'Medium', 'Weak', 'Very Weak',
        'Negative Very Weak', 'Negative Weak', 'Negative Medium',
        'Negative Strong', 'Negative Very Strong'
    ]
    histogram = u.createDataFrame(1, columns, 0)
    stock_number = len(allcoef)
    histogram.ix[0, 'Total'] = stock_number
    for i in range(stock_number):
        correlation = allcoef.ix[i, 'correlation']
        if correlation > 0.8:  # (0.8, 1.0]
            histogram.ix[0, 'Very Strong'] = histogram.ix[0, 'Very Strong'] + 1
        elif correlation > 0.6:  # (0.6, 0.8]
            histogram.ix[0, 'Strong'] = histogram.ix[0, 'Strong'] + 1
        elif correlation > 0.4:  # (0.4, 0.6]
            histogram.ix[0, 'Medium'] = histogram.ix[0, 'Medium'] + 1
        elif correlation > 0.2:  # (0.2, 0.4]
            histogram.ix[0, 'Weak'] = histogram.ix[0, 'Weak'] + 1
        elif correlation >= 0.0:  # [0.0, 0.2]
            histogram.ix[0, 'Very Weak'] = histogram.ix[0, 'Very Weak'] + 1
        elif correlation > -0.2:  # (-0.2, 0.0)
            histogram.ix[0, 'Negative Very Weak'] = histogram.ix[
                0, 'Negative Very Weak'] + 1
        elif correlation > -0.4:  # (-0.4, -0.2]
            histogram.ix[0,
                         'Negative Weak'] = histogram.ix[0,
                                                         'Negative Weak'] + 1
        elif correlation > -0.6:  # (-0.6, -0.4]
            histogram.ix[
                0, 'Negative Medium'] = histogram.ix[0, 'Negative Medium'] + 1
        elif correlation > -0.8:  # (-0.8, -0.6]
            histogram.ix[
                0, 'Negative Strong'] = histogram.ix[0, 'Negative Strong'] + 1
        else:  # [-1.0, -0.8]
            histogram.ix[0, 'Negative Very Strong'] = histogram.ix[
                0, 'Negative Very Strong'] + 1

    # Save to CSV File
    histogram.set_index('Total', inplace=True)
    file_postfix = '_'.join(
        [postfix, completeness_threshold, 'HistogramCorrelation'])
    u.to_csv(histogram, c.path_dict['strategy'],
             c.file_dict['strategy'] % file_postfix)

    return True

Пример #4

0

Показать файл

Файл: GenerateIndex.py Проект: dxcv/fQuant

def generateIndexStatistics(index_names, series_name):
    # Create Statistics DataFrame
    columns = [
        'name', 'date', 'c_number', 's_number', 'index', 'ratio', 'b_index',
        'b_ratio', 'delta_ratio'
    ]
    index_number = len(index_names)
    stat = u.createDataFrame(index_number, columns)

    # Calculate Index Statistics
    for i in range(index_number):
        index_name = index_names[i]
        result = load_index_result(index_name)
        if u.isNoneOrEmpty(result):
            continue
        result_number = len(result)
        latest_date = result.ix[result_number - 1, 'date']
        component_number, suspended_number = generateComponentStatistics(
            index_name, latest_date)
        stat.ix[i, 'name'] = index_name
        stat.ix[i, 'date'] = latest_date
        stat.ix[i, 'c_number'] = component_number
        stat.ix[i, 's_number'] = suspended_number
        stat.ix[i, 'index'] = result.ix[result_number - 1, 'index']
        ratio = result.ix[result_number - 1, 'ratio']
        stat.ix[i, 'ratio'] = ratio
        stat.ix[i, 'b_index'] = result.ix[result_number - 1, 'b_index']
        b_ratio = result.ix[result_number - 1, 'b_ratio']
        stat.ix[i, 'b_ratio'] = b_ratio
        stat.ix[i, 'delta_ratio'] = ratio - b_ratio

    # Format Statistics DataFrame
    for column in ['name', 'date']:
        stat[column] = stat[column].astype(str)
    for column in ['c_number', 's_number']:
        stat[column] = stat[column].astype(int)
    for column in ['index', 'b_index']:
        stat[column] = stat[column].map(lambda x: '%.2f' % x)
        stat[column] = stat[column].astype(float)
    for column in ['ratio', 'b_ratio', 'delta_ratio']:
        stat[column] = stat[column].map(lambda x: '%.2f%%' % (x * 100.0))
    stat.set_index('name', inplace=True)

    # Save to CSV File
    if not u.isNoneOrEmpty(stat):
        print(stat)
        u.to_csv(stat, c.path_dict['index'],
                 c.file_dict['index_s'] % series_name)

Пример #5

0

Показать файл

Файл: GenerateIndex.py Проект: dxcv/fQuant

def generateIndexSeries(index_names, series_name):
    # Generate Index Series DataFrame
    columns = ['date', 'index_benchmark', 'ratio_benchmark']
    str_columns = ['date']
    float_columns = ['index_benchmark', 'ratio_benchmark']
    index_number = len(index_names)
    for i in range(index_number):
        index_name = index_names[i]
        columns.append('index_' + index_name)
        float_columns.append('index_' + index_name)
        columns.append('ratio_' + index_name)
        float_columns.append('ratio_' + index_name)
    index = load_index_result(index_names[0])
    row_number = len(index)
    series = u.createDataFrame(row_number, columns)
    print(series)

    # Init Series with Common Columns
    for i in range(row_number):
        series.ix[i, 'date'] = index.ix[i, 'date']
        series.ix[i, 'index_benchmark'] = index.ix[i, 'b_index']
        series.ix[i, 'ratio_benchmark'] = 1.0 + index.ix[i, 'b_ratio']

    # Merge Separate Index Data into Index Series
    for index_name in index_names:
        index = load_index_result(index_name)
        for i in range(row_number):
            series.ix[i, 'index_' + index_name] = index.ix[i, 'index']
            series.ix[i, 'ratio_' + index_name] = 1.0 + index.ix[i, 'ratio']

    # Format Columns
    for column in str_columns:
        series[column] = series[column].astype(str)
    for column in float_columns:
        series[column] = series[column].map(lambda x: '%.2f' % x)
        series[column] = series[column].astype(float)
    series.set_index('date', inplace=True)

    # Save to CSV File
    if not u.isNoneOrEmpty(series):
        u.to_csv(series, c.path_dict['index'],
                 c.file_dict['index_r'] % series_name)

Пример #6

0

Показать файл

def strategyPriceFollow(stock_id, is_index, trend_threshold):
    # Load Stock Daily QFQ Data
    lshq = loadDailyQFQ(stock_id, is_index)
    if u.isNoneOrEmpty(lshq):
        raise SystemExit

    # Calculate Trend, Trend High, Trend Low, Trend Ref
    lshq['trend'] = 'Up'
    for column in ['trend_high', 'trend_low', 'trend_ref']:
        lshq[column] = 0.0
    for column in ['predict', 'confirm']:
        lshq[column] = np.nan
    lshq_number = len(lshq)
    trends = []
    trend_turning_points = []
    trend_index_highs = []
    trend_index_lows = []
    index_high = 0
    index_low = 0
    for i in range(lshq_number):
        if i == 0:  # Initialization
            lshq.ix[i, 'trend'] = 'Up'
            trends.append('Up')
            trend_turning_points.append(i)
            for column in ['trend_high', 'trend_low', 'trend_ref']:
                lshq.ix[i, column] = lshq.ix[i, 'close']
        else:
            trend = lshq.ix[i - 1, 'trend']
            trend_high = lshq.ix[i - 1, 'trend_high']
            trend_low = lshq.ix[i - 1, 'trend_low']
            trend_ref = lshq.ix[i - 1, 'trend_ref']
            trend_cur = lshq.ix[i, 'close']
            up_to_down = False
            down_to_up = False
            if trend == 'Up':
                if (1.0 - trend_cur / trend_high) > trend_threshold:
                    lshq.ix[i, 'trend'] = 'Down'
                    up_to_down = True
                    trends.append('Down')
                    trend_turning_points.append(i)
                    trend_index_highs.append(index_high)
                else:
                    lshq.ix[i, 'trend'] = 'Up'
                    up_to_down = False
            else:
                if (trend_cur / trend_low - 1.0) > trend_threshold:
                    lshq.ix[i, 'trend'] = 'Up'
                    down_to_up = True
                    trends.append('Up')
                    trend_turning_points.append(i)
                    trend_index_lows.append(index_low)
                else:
                    lshq.ix[i, 'trend'] = 'Down'
                    down_to_up = False
            if trend == 'Up':
                if up_to_down == False:  # Up trend continues
                    if trend_cur > trend_high:
                        lshq.ix[i, 'predict'] = 1.0
                        for j in range(
                                index_high + 1, i + 1
                        ):  # New high confirms all trades since last high to be up-trend
                            lshq.ix[j, 'confirm'] = 1.0
                        lshq.ix[i, 'trend_high'] = trend_cur
                        index_high = i
                    else:
                        ratio = (1.0 -
                                 trend_cur / trend_high) / trend_threshold
                        lshq.ix[i, 'predict'] = 1.0 * (1.0 - ratio) + (
                            -1.0) * ratio  # Map to [1.0, -1.0]
                        lshq.ix[i, 'trend_high'] = trend_high
                    lshq.ix[i, 'trend_ref'] = trend_ref
                else:  # Up trend reverses
                    lshq.ix[i, 'predict'] = -1.0
                    for j in range(
                            index_high + 1, i + 1
                    ):  # Turning point confirms all trades since last high to be down-trend
                        lshq.ix[j, 'confirm'] = -1.0
                    lshq.ix[i, 'trend_ref'] = trend_high
                    lshq.ix[i, 'trend_low'] = trend_cur
                    index_low = i
            else:
                if down_to_up == False:  # Down trend continues
                    if trend_cur < trend_low:
                        lshq.ix[i, 'predict'] = -1.0
                        for j in range(
                                index_low + 1, i + 1
                        ):  # New low confirms all trades since last low to be down-trend
                            lshq.ix[j, 'confirm'] = -1.0
                        lshq.ix[i, 'trend_low'] = trend_cur
                        index_low = i
                    else:
                        ratio = (trend_cur / trend_low - 1.0) / trend_threshold
                        lshq.ix[i, 'predict'] = (-1.0) * (1.0 - ratio) + (
                            1.0) * ratio  # Map to [1.0, -1.0]
                        lshq.ix[i, 'trend_low'] = trend_low
                    lshq.ix[i, 'trend_ref'] = trend_ref
                else:  # Down trend reverses
                    lshq.ix[i, 'predict'] = 1.0
                    for j in range(
                            index_low + 1, i + 1
                    ):  # Turning point confirms all trades since last low to be up-trend
                        lshq.ix[j, 'confirm'] = 1.0
                    lshq.ix[i, 'trend_ref'] = trend_low
                    lshq.ix[i, 'trend_high'] = trend_cur
                    index_high = i
            # Handle Last Trend
            if i == lshq_number - 1:
                if lshq.ix[i, 'trend'] == 'Up':
                    trend_index_highs.append(i)
                else:
                    trend_index_lows.append(i)

    # Calculate Trend Price
    lshq['trend_price'] = 0.0
    trend_number = len(trends)
    print('Trend # =', trend_number)
    index_ref = 0
    index_tar = 0
    price_ref = 0.0
    price_tar = 0.0
    idx_high = 0
    idx_low = 0
    for i in range(trend_number):
        trend = trends[i]
        index_tar = trend_index_highs[
            idx_high] if trend == 'Up' else trend_index_lows[idx_low]
        price_ref = lshq.ix[index_ref, 'close']
        price_tar = lshq.ix[index_tar, 'close']
        for index in range(index_ref, index_tar):
            ratio = float(index - index_ref) / float(index_tar - index_ref)
            lshq.ix[
                index,
                'trend_price'] = price_ref * (1.0 - ratio) + price_tar * ratio
        if trend == 'Up':
            index_ref = trend_index_highs[idx_high]
            idx_high = idx_high + 1
        else:
            index_ref = trend_index_lows[idx_low]
            idx_low = idx_low + 1
        # Handle Last Trend
        if i == trend_number - 1:
            lshq.ix[index_tar, 'trend_price'] = price_tar

    # Record Timing Data
    trend_number = len(trends)
    timing = u.createDataFrame(trend_number, ['date', 'trend'])
    for i in range(trend_number):
        trend = trends[i]
        index = trend_turning_points[i]
        timing.ix[i, 'date'] = lshq.ix[index, 'date']
        timing.ix[i, 'trend'] = trend
    timing.set_index('date', inplace=True)
    timing.sort_index(ascending=True, inplace=True)

    # Save to CSV File
    file_postfix = 'Timing_%s_%s' % (u.stockFileName(
        stock_id, is_index), trend_threshold)
    u.to_csv(timing,
             c.path_dict['strategy'],
             file_postfix + '.csv',
             encoding='gbk')

    # Format Data Frame
    for column in ['trend_high', 'trend_low', 'trend_ref', 'trend_price']:
        lshq[column] = lshq[column].map(lambda x: '%.3f' % x)
        lshq[column] = lshq[column].astype(float)
    lshq.set_index('date', inplace=True)
    lshq.sort_index(ascending=True, inplace=True)

    # Save to CSV File
    file_postfix = 'PriceFollow_%s_%s' % (u.stockFileName(
        stock_id, is_index), trend_threshold)
    u.to_csv(lshq, c.path_dict['strategy'],
             c.file_dict['strategy'] % file_postfix)

Пример #7

0

Показать файл

def mergePriceFollow(stock_list, is_index, threshold_list):
    stock_number = len(stock_list)
    if stock_number < 1:
        print('Stock Number:', stock_number)
        raise SystemExit

    threshold_number = len(threshold_list)
    if threshold_number < 1:
        print('Threshold Number:', threshold_number)
        raise SystemExit

    # Init Price Follow Statistics for All Indexes
    stats_columns = ['date', 'index']
    for i in range(1, threshold_number - 1):
        stats_columns.append('wpredict_%s' % threshold_list[i])
        stats_columns.append('wtrend_%s' % threshold_list[i])
    stats = u.createDataFrame(stock_number, stats_columns)

    for s in range(stock_number):
        stock_id = stock_list[s]
        # Load Results from Different Threshold
        dfs = []
        for i in range(threshold_number):
            threshold = threshold_list[i]
            file_postfix = 'PriceFollow_%s_%s' % (u.stockFileName(
                stock_id, is_index), threshold)
            fullpath = c.path_dict[
                'strategy'] + c.file_dict['strategy'] % file_postfix
            df = u.read_csv(fullpath)
            dfs.append(df)
        # Compose Final Results
        drop_columns = [
            'trend', 'trend_high', 'trend_low', 'trend_ref', 'trend_price',
            'predict', 'confirm'
        ]
        df = dfs[0].drop(drop_columns, axis=1)
        for i in range(threshold_number):
            threshold = threshold_list[i]
            column = 'trend'
            df[column + '_%s' % threshold] = dfs[i][column]
        for i in range(threshold_number):
            threshold = threshold_list[i]
            column = 'trend_price'
            df[column + '_%s' % threshold] = dfs[i][column]
        for i in range(threshold_number):
            threshold = threshold_list[i]
            column = 'predict'
            df[column + '_%s' % threshold] = dfs[i][column]
        for i in range(threshold_number):
            threshold = threshold_list[i]
            column = 'confirm'
            df[column + '_%s' % threshold] = dfs[i][column]
        # Weighted Predict Columns
        cutoff = 0.0  # Optimized cutoff for weighted predict
        for i in range(1, threshold_number - 1):
            t_prev = threshold_list[i - 1]
            t_curr = threshold_list[i]
            t_next = threshold_list[i + 1]
            t_total = t_prev + t_curr + t_next
            column_postfix = '_%s' % t_curr
            df['wpredict' + column_postfix] = np.nan
            df['wtrend' + column_postfix] = np.nan
            row_number = len(df)
            for j in range(1, row_number):
                wpredict = 0.0
                for t in [t_prev, t_curr, t_next]:
                    wpredict = wpredict + t * df.ix[j, 'predict' + '_%s' % t]
                wpredict = wpredict / t_total
                df.ix[j, 'wpredict' + column_postfix] = wpredict
                df.ix[j, 'wtrend' +
                      column_postfix] = 'Up' if wpredict >= cutoff else 'Down'

        # Fill One Row of Statistics
        last_index = len(df) - 1
        stats.ix[s, 'date'] = df.ix[last_index, 'date']
        stats.ix[s, 'index'] = stock_id
        for i in range(1, threshold_number - 1):
            column_postfix = '_%s' % threshold_list[i]
            stats.ix[s, 'wpredict' +
                     column_postfix] = df.ix[last_index,
                                             'wpredict' + column_postfix]
            stats.ix[s,
                     'wtrend' + column_postfix] = df.ix[last_index, 'wtrend' +
                                                        column_postfix]

        # Format Columns
        df.set_index('date', inplace=True)
        # Save to CSV File
        file_postfix = 'PriceFollow_%s_All' % u.stockFileName(
            stock_id, is_index)
        u.to_csv(df, c.path_dict['strategy'],
                 c.file_dict['strategy'] % file_postfix)

    # Format Columns
    stats.set_index('date', inplace=True)
    # Save to CSV File
    file_postfix = 'PriceFollow_Statistics'
    u.to_csv(stats, c.path_dict['strategy'],
             c.file_dict['strategy'] % file_postfix)

Пример #8

0

Показать файл

def strategyRelativity(benchmark_id, date_start, date_end, period, stock_ids,
                       is_index, stock_name):
    '''
    函数功能：
    --------
    按照给定起止时间和采样频率，计算全市场所有指数和业绩基准之间的相对强弱。
    假定：全市场指数列表，指数历史前复权数据，业绩基准历史前复权数据已经提前获取并存储为CSV文件。

    输入参数：
    --------
    benchmark_id : string, 指数代码 e.g. '000300'
    date_start : string, 起始日期 e.g. '2005-01-01'
    date_end : string, 终止日期 e.g. '2016-12-31'
    period : string, 采样周期 e.g. 'M'
    stock_ids : pandas.Series or list, 股票/指数列表
    is_index : boolean, 股票/指数标识
    stock_name : string, 股票/指数名称

    输出参数：
    --------
    True/False : boolean, 策略运行是否完成

    数据文件
        Strategy_Relativity_DateStart_DateEnd_Period_StockName_vs_Benchmark.csv : 参与计算的所有股票/指数相对强弱
    '''
    # Check Period
    if not checkPeriod(period):
        return False

    # Sample Prices
    price = samplePrice(benchmark_id, stock_ids, is_index, date_start,
                        date_end, period)
    if u.isNoneOrEmpty(price):
        return False

    # Output Data
    df = price.copy()

    # Calculate Relativity
    # 1. Turn Prices to Ratios
    date_number = len(price)
    column_number = len(price.columns)
    relativity = u.createDataFrame(date_number, price.columns, np.nan)
    relativity['date'] = price['date']
    for col in range(1, column_number):  # Skip 'date'
        column = relativity.columns[col]
        for i in range(0, date_number):
            prev_price = price.ix[i, column] if i == 0 else price.ix[i - 1,
                                                                     column]
            curr_price = price.ix[i, column]
            if not np.isnan(prev_price) and not np.isnan(
                    curr_price):  # Both are valid prices
                relativity.ix[i,
                              column] = (curr_price - prev_price
                                         ) / prev_price  # Turn price to ratio

    for col in range(1, column_number):  # Skip 'date'
        column = relativity.columns[col]
        df['ratio_' + column[-6:]] = relativity[column]

    # 2. Turn Ratios to Deltas
    for col in range(2, column_number):  # Skip 'date' and 'close'
        column = relativity.columns[col]
        for i in range(0, date_number):
            ratio_stock = relativity.ix[i, column]
            ratio_bench = relativity.ix[i, 'close']
            if not np.isnan(ratio_stock) and not np.isnan(
                    ratio_bench):  # Both are valid ratios
                relativity.ix[
                    i,
                    column] = ratio_stock - ratio_bench  # Turn ratio to delta

    for col in range(2, column_number):  # Skip 'date' and 'close'
        column = relativity.columns[col]
        df['delta_' + column[-6:]] = relativity[column]

    # 3. Turn Deltas to Accumulated Deltas
    for col in range(2, column_number):  # Skip 'date' and 'close'
        column = relativity.columns[col]
        for i in range(0, date_number):
            prev_delta = relativity.ix[i, column] if i == 0 else relativity.ix[
                i - 1, column]
            curr_delta = relativity.ix[i, column]
            if not np.isnan(prev_delta) and not np.isnan(
                    curr_delta):  # Both are valid deltas
                relativity.ix[
                    i,
                    column] = prev_delta + curr_delta  # Turn delta to accumulated delta

    for col in range(2, column_number):  # Skip 'date' and 'close'
        column = relativity.columns[col]
        df['accumu_' + column[-6:]] = relativity[column]

    # Save to CSV File
    df.set_index('date', inplace=True)
    file_postfix = '_'.join([
        'Relativity', date_start, date_end, period, stock_name, 'vs',
        benchmark_id
    ])
    u.to_csv(df, c.path_dict['strategy'],
             c.file_dict['strategy'] % file_postfix)

    return True

Пример #9

0

Показать файл

Файл: CoefficientStrategy.py Проект: dxcv/fQuant

def calculateCoefficient(price, ignore_number, min_period_number,
                         ratio_method):
    # Create Coefficient Data Frame
    stocks_number = len(price.columns) - 2  # Remove 'date', 'close_benchmark'
    if stocks_number <= 0:
        print('No Stock Data to Calculate Coefficient!')
        return None
    coef = u.createDataFrame(
        stocks_number,
        columns=['code', 'completeness', 'alpha', 'beta', 'correlation'])

    # Calculate Coefficients
    # 1. Calculate Correlation - No need to interpolate price for stop trading
    benchmark = price[price.columns[1]]
    bench_ratio = dataToRatio(benchmark, ratio_method)
    for i in range(stocks_number):
        column = price.columns[i + 2]
        stock = price[column].copy()
        # Turn price to ratio
        stock_ratio = dataToRatio(stock, ratio_method)
        # Manually ignore a given number of valid data (since IPO)
        stock_ratio = ignoreData(stock_ratio, ignore_number)
        # Compose data frame and drop NaN
        df = pd.DataFrame({
            'bench_ratio': bench_ratio,
            'stock_ratio': stock_ratio
        })
        df = df.dropna(axis=0, how='any')
        df = df.reset_index(drop=True)
        df_number = len(df)
        # Compute correlation with other Series, excluding missing values.
        if df_number >= min_period_number:  # Has sufficient data, exclude those IPO recently.
            b_ratio = df['bench_ratio']
            s_ratio = df['stock_ratio']
            correlation = b_ratio.corr(s_ratio)
            coef.ix[i, 'correlation'] = correlation
        # Calculate completeness
        coef.ix[i, 'code'] = column.replace('close_', '')
        null_count = price[column].isnull().sum()
        coef.ix[i,
                'completeness'] = 1.0 - float(null_count) / len(price[column])

    # 2. Calculate Alpha and Beta - Need to interpolate price for stop trading
    benchmark = price[price.columns[1]]
    bench_ratio = dataToRatio(benchmark, ratio_method)
    for i in range(stocks_number):
        column = price.columns[i + 2]
        stock = price[column].copy()
        # Turn price to ratio
        stock_ratio = dataToRatio(stock, ratio_method)
        # Manually ignore a given number of valid data (since IPO)
        stock_ratio = ignoreData(stock_ratio, ignore_number)
        # Compose data frame and drop NaN
        df = pd.DataFrame({
            'bench_ratio': bench_ratio,
            'stock_ratio': stock_ratio
        })
        df = df.dropna(axis=0, how='any')
        df = df.reset_index(drop=True)
        df_number = len(df)
        if df_number >= min_period_number:  # Has sufficient data
            # Compute Beta w.r.t. Benchmark
            b_ratio = df['bench_ratio']
            s_ratio = df['stock_ratio']
            b_mean = b_ratio.mean()
            s_mean = s_ratio.mean()
            a = 0.0
            b = 0.0
            for j in range(df_number):
                a += (b_ratio[j] - b_mean) * (s_ratio[j] - s_mean)
                b += (b_ratio[j] - b_mean) * (b_ratio[j] - b_mean)
            beta = a / b
            # Same as below method
            # beta = b_ratio.cov(s_ratio) / b_ratio.var()
            coef.ix[i, 'beta'] = beta
            # Calculate Alpha
            alpha = s_mean - beta * b_mean
            coef.ix[i, 'alpha'] = alpha

    # Format Columns
    coef.set_index('code', inplace=True)
    for column in ['alpha', 'beta', 'correlation']:
        coef[column] = coef[column].map(lambda x: '%.3f' % x)
        coef[column] = coef[column].astype(float)
    coef['completeness'] = coef['completeness'].map(lambda x:
                                                    ('%.2f' % (x * 100)) + '%')

    return coef

Пример #10

0

Показать файл

Файл: CoefficientStrategy.py Проект: dxcv/fQuant

def calculateCoefficientRolling(price, rolling_number, min_period_number,
                                ratio_method):
    # Create Coefficient Data Frame
    stocks_number = len(price.columns) - 2  # Remove 'date', 'close_benchmark'
    if stocks_number <= 0:
        print('No Stock Data to Calculate Coefficient!')
        return None
    date_number = len(price)
    coef_columns = ['date', 'close']
    for i in range(stocks_number):
        column = price.columns[i + 2]
        stock_id = column.replace('close_', '')
        for item in ['close', 'completeness', 'alpha', 'beta', 'correlation']:
            coef_columns.append('_'.join([item, stock_id]))
    coef = u.createDataFrame(date_number, columns=coef_columns)
    coef['date'] = price['date']
    coef['close'] = price['close']
    for i in range(stocks_number):
        column = price.columns[i + 2]
        coef[column] = price[column]

    # Calculate Coefficients
    # 1. Calculate Correlation - No need to interpolate price_rolling for stop trading
    for i in range(date_number):
        if i + 1 < rolling_number:
            continue
        price_rolling = price.iloc[i + 1 - rolling_number:i + 1, :]
        price_rolling = price_rolling.reset_index(drop=True)
        benchmark = price_rolling[price_rolling.columns[1]]
        bench_ratio = dataToRatio(benchmark, ratio_method)
        for j in range(stocks_number):
            column = price_rolling.columns[j + 2]
            stock_id = column.replace('close_', '')
            stock = price_rolling[column].copy()
            # Turn price to ratio
            stock_ratio = dataToRatio(stock, ratio_method)
            # Compose data frame and drop NaN
            df = pd.DataFrame({
                'bench_ratio': bench_ratio,
                'stock_ratio': stock_ratio
            })
            df = df.dropna(axis=0, how='any')
            df = df.reset_index(drop=True)
            df_number = len(df)
            if df_number >= min_period_number:  # Has sufficient data
                b_ratio = df['bench_ratio']
                s_ratio = df['stock_ratio']
                correlation = b_ratio.corr(s_ratio)
                coef.ix[i, '_'.join(['correlation', stock_id])] = correlation
            # Calculate completeness
            null_count = price_rolling[column].isnull().sum()
            coef.ix[i,
                    '_'.join(['completeness', stock_id]
                             )] = 1.0 - float(null_count) / len(price[column])

    # 2. Calculate Alpha and Beta - No need to interpolate price_rolling for stop trading
    for i in range(date_number):
        if i + 1 < rolling_number:
            continue
        price_rolling = price.iloc[i + 1 - rolling_number:i + 1, :]
        price_rolling = price_rolling.reset_index(drop=True)
        benchmark = price_rolling[price_rolling.columns[1]]
        bench_ratio = dataToRatio(benchmark, ratio_method)
        for j in range(stocks_number):
            column = price_rolling.columns[j + 2]
            stock_id = column.replace('close_', '')
            stock = price_rolling[column].copy()
            # Turn price to ratio
            stock_ratio = dataToRatio(stock, ratio_method)
            # Compose data frame and drop NaN
            df = pd.DataFrame({
                'bench_ratio': bench_ratio,
                'stock_ratio': stock_ratio
            })
            df = df.dropna(axis=0, how='any')
            df = df.reset_index(drop=True)
            df_number = len(df)
            if df_number >= min_period_number:  # Has sufficient data
                # Compute Beta w.r.t. Benchmark
                b_ratio = df['bench_ratio']
                s_ratio = df['stock_ratio']
                b_mean = b_ratio.mean()
                s_mean = s_ratio.mean()
                a = 0.0
                b = 0.0
                for k in range(df_number):
                    a += (b_ratio[k] - b_mean) * (s_ratio[k] - s_mean)
                    b += (b_ratio[k] - b_mean) * (b_ratio[k] - b_mean)
                beta = a / b
                # Same as below method
                # beta = b_ratio.cov(s_ratio) / b_ratio.var()
                coef.ix[i, '_'.join(['beta', stock_id])] = beta
                # Calculate Alpha
                alpha = s_mean - beta * b_mean
                coef.ix[i, '_'.join(['alpha', stock_id])] = alpha

    # Format Columns
    coef.set_index('date', inplace=True)
    for i in range(stocks_number):
        stock_id = price.columns[i + 2].replace('close_', '')
        for item in ['alpha', 'beta', 'correlation']:
            column = '_'.join([item, stock_id])
            coef[column] = coef[column].map(lambda x: '%.3f' % x)
            coef[column] = coef[column].astype(float)
        for item in ['completeness']:
            column = '_'.join([item, stock_id])
            coef[column] = coef[column].map(lambda x: ('%.2f' % (x * 100)) +
                                            '%' if not np.isnan(x) else np.nan)

    return coef

Пример #11

0

Показать файл

Файл: PlotCoefficient.py Проект: dxcv/fQuant

def plot_coefficient_price(stock_ids, allprice, postfix, series_name,
                           benchmark_name):
    # If want to debug benchmark only (without stocks), set below flag to True.
    debug_benchmark_only = False

    # Extract Stock Prices and Normalize Them
    row_number = len(allprice)
    stock_number = len(stock_ids)
    columns = ['date', benchmark_name]
    if not debug_benchmark_only:
        for i in range(stock_number):
            stock_id = u.stockID(stock_ids[i])
            columns.append(stock_id)
    prices = u.createDataFrame(row_number, columns)
    prices['date'] = allprice['date']
    prices[benchmark_name] = allprice['close']
    if not debug_benchmark_only:
        for i in range(stock_number):
            stock_id = u.stockID(stock_ids[i])
            prices[stock_id] = allprice['close_' + stock_id]
    if debug_benchmark_only:
        print('Original Price')
        print(prices)

    # Normalize Price
    for i in range(1, len(columns)):
        column = columns[i]
        prices[column] = normalize_price(prices[column])
    if debug_benchmark_only:
        print('Normalized Price')
        print(prices)

    # Calculate Relative Price w.r.t. First Valid Price
    for i in range(1, len(columns)):
        column = columns[i]
        row = -1
        for j in range(row_number):
            if not np.isnan(prices.ix[j, column]):  # Find first valid price
                row = j
                break
        if row != -1:
            if debug_benchmark_only:
                print('Row =', row)
            ref_price = prices.ix[
                row,
                column]  # Need to be cached in the first place as it will be normalized to one later.
            for j in range(row, row_number):
                cur_price = prices.ix[j, column]
                if not np.isnan(cur_price):
                    prices.ix[
                        j, column] = 1.0 + (cur_price - ref_price) / ref_price
    if debug_benchmark_only:
        print('Relative Price')
        print(prices)

    # Plot Figure
    fig = plt.figure(figsize=(32, 18), dpi=72, facecolor="white")
    axes = plt.subplot(111)
    axes.cla()  # Clear Axes

    # Define Font
    font = {
        'family': 'serif',
        'color': 'black',
        'weight': 'normal',
        'size': 18,
    }

    # Plot Sub-figure 1
    title = '%s vs. %s' % (series_name, benchmark_name)
    plt.title(title, fontdict=font)
    axes.set_xlabel('', fontdict=font)
    axes.set_ylabel('Ratio', fontdict=font)
    prices.plot(x='date',
                y=benchmark_name,
                ax=axes,
                color='grey',
                lw=2.0,
                ls='--')
    if not debug_benchmark_only:
        for i in range(stock_number):
            column = u.stockID(stock_ids[i])
            prices.plot(x='date', y=column, ax=axes)

    # Common Format for Both Sub-figures
    axes.grid(True)
    fig.autofmt_xdate()
    fig.tight_layout()
    plt.setp(plt.gca().get_xticklabels(), rotation=30)
    plt.show()

    # Save Figure
    fig_key = 'fig_coef'
    fig_path = c.path_dict[fig_key]
    fig_name = '_'.join(
        [postfix, series_name, 'vs', benchmark_name,
         u.dateToStr(u.today())])
    fig_file = c.file_dict[fig_key] % fig_name
    u.saveFigure(fig, fig_path, fig_file)

Python Utilities.createDataFrame примеры использования