示例#1
0
    def breadth_computing(self):
        if self.breadth_computing_method == 'quantile':
            self.score_levels = self.breadth_computing_param['score_levels']
            self.score_levels.sort(reverse=True)

            self.quantile = self.depth_df.groupby(level=1).quantile(
                self.score_levels)
            self.quantile.index.names = ['timestamp', 'score']

            self.logger.info('factor:{},quantile:\n{}'.format(
                self.factor_name, self.quantile))

            self.result_df = self.depth_df.copy()
            self.result_df.reset_index(inplace=True, level='security_id')
            self.result_df['quantile'] = None
            for timestamp in self.quantile.index.levels[0]:
                length = len(
                    self.result_df.loc[self.result_df.index == timestamp,
                                       'quantile'])
                self.result_df.loc[self.result_df.index == timestamp,
                                   'quantile'] = [
                                       self.quantile.loc[timestamp].to_dict()
                                   ] * length

            self.logger.info('factor:{},df with quantile:\n{}'.format(
                self.factor_name, self.result_df))

            # self.result_df = self.result_df.set_index(['security_id'], append=True)
            # self.result_df = self.result_df.sort_index(level=[0, 1])
            #
            # self.logger.info(self.result_df)
            #
            def calculate_score(df, factor_name, quantile):
                original_value = df[factor_name]
                score_map = quantile.get(factor_name)
                min_score = self.score_levels[-1]

                if original_value < score_map.get(min_score):
                    return 0

                for score in self.score_levels[:-1]:
                    if original_value >= score_map.get(score):
                        return score

            for factor in self.factors:
                self.result_df[factor] = self.result_df.apply(
                    lambda x: calculate_score(x, factor, x['quantile']),
                    axis=1)

            self.result_df = self.result_df.reset_index()
            self.result_df = index_df_with_security_time(self.result_df)
            self.result_df = self.result_df.loc[:, self.factors]

            self.result_df = self.result_df.loc[~self.result_df.index.
                                                duplicated(keep='first')]

            self.logger.info('factor:{},df:\n{}'.format(
                self.factor_name, self.result_df))

            self.fill_gap()
示例#2
0
    def on_category_data_added(self, category, added_data: pd.DataFrame):
        size = len(added_data)
        df = self.data_df.loc[category].iloc[-self.valid_window - size:]

        for idx, indicator in enumerate(self.indicators):
            if indicator == 'ma':
                window = self.indicators_param[idx].get('window')

                if self.security_type == SecurityType.stock:
                    df['ma{}'.format(window)] = ma(df['qfq_close'], window=window)
                else:
                    df['ma{}'.format(window)] = ma(df['close'], window=window)

            if indicator == 'macd':
                slow = self.indicators_param[idx].get('slow')
                fast = self.indicators_param[idx].get('fast')
                n = self.indicators_param[idx].get('n')

                if self.security_type == SecurityType.stock:
                    df['diff'], df['dea'], df['m'] = macd(df['qfq_close'], slow=slow, fast=fast, n=n)
                else:
                    df['diff'], df['dea'], df['m'] = macd(df['close'], slow=slow, fast=fast, n=n)

        df = df.iloc[-size:, ]
        df = df.reset_index()
        df[self.category_field] = category
        df = index_df_with_security_time(df)

        self.depth_df = self.depth_df.append(df)
        self.depth_df = self.depth_df.sort_index(level=[0, 1])
示例#3
0
文件: factor.py 项目: Scorpi000/zvt
    def __init__(self,
                 security_type=SecurityType.stock,
                 exchanges=['sh', 'sz'],
                 codes=None,
                 the_timestamp=None,
                 window=None,
                 window_func='mean',
                 start_timestamp=None,
                 end_timestamp=None,
                 keep_all_timestamp=False,
                 fill_method='ffill',
                 columns=[],
                 filters=None,
                 provider='eastmoney') -> None:
        super().__init__(security_type, exchanges, codes, the_timestamp,
                         window, window_func, start_timestamp, end_timestamp,
                         keep_all_timestamp, fill_method)

        self.columns = set(columns) | {
            self.data_schema.security_id, self.data_schema.timestamp
        }
        self.factors = [item.key for item in columns]
        self.provider = provider

        self.original_df = get_data(data_schema=self.data_schema,
                                    provider=self.provider,
                                    codes=self.codes,
                                    columns=self.columns,
                                    start_timestamp=self.fetch_start_timestamp,
                                    end_timestamp=self.end_timestamp,
                                    filters=filters)

        self.original_df = index_df_with_security_time(self.original_df)

        self.logger.info(self.original_df)

        if self.window:
            self.data_df = self.original_df.reset_index(level='timestamp')

            # TODO:better way to handle window function
            if self.window_func == 'mean':
                self.data_df = self.data_df.groupby(level=0).rolling(
                    window='{}D'.format(self.window.days),
                    on='timestamp').mean()
            elif self.window_func == 'count':
                self.data_df = self.data_df.groupby(level=0).rolling(
                    window='{}D'.format(self.window.days),
                    on='timestamp').count()
            self.data_df = self.data_df.reset_index(level=0, drop=True)
            self.data_df = self.data_df.set_index('timestamp', append=True)
            print(self.data_df)
        else:
            self.data_df = self.original_df

        self.data_df = self.data_df.loc[(
            slice(None), slice(self.start_timestamp, self.end_timestamp)), :]

        self.logger.info(self.data_df)
示例#4
0
文件: factor.py 项目: woolf-wen/zvt
    def move_on(self, to_timestamp, touching_timestamp):
        df = self.original_df.reset_index(level='timestamp')
        recorded_timestamps = df.groupby(level=0)['timestamp'].max()

        self.logger.info('current_timestamps:\n{}'.format(recorded_timestamps))

        for security_id, recorded_timestamp in recorded_timestamps.iteritems():
            while True:
                now_timestamp = now_pd_timestamp()
                if touching_timestamp > now_timestamp:
                    delta = (touching_timestamp - now_timestamp).seconds
                    self.logger.info(
                        'want to get {} {} kdata for {},now is:{},waiting:{}sencods'
                        .format(to_timestamp, touching_timestamp, security_id,
                                now_timestamp, delta))
                    time.sleep(delta)

                added = get_data(data_schema=self.data_schema,
                                 provider=self.provider,
                                 security_id=security_id,
                                 columns=self.columns,
                                 start_timestamp=recorded_timestamp,
                                 end_timestamp=to_timestamp,
                                 filters=self.filters,
                                 level=self.level)

                if (added is not None) and not added.empty:
                    would_added = added[
                        added['timestamp'] != recorded_timestamp]
                    if not would_added.empty:
                        would_added = index_df_with_security_time(would_added)
                        self.logger.info(
                            'would_added:\n{}'.format(would_added))

                        self.original_df = self.original_df.append(would_added)
                        self.original_df = self.original_df.sort_index(
                            level=[0, 1])
                        self.on_data_added(security_id=security_id,
                                           size=len(would_added))
                        break
                    else:
                        self.logger.info(
                            'touching_timestamp:{} now_timestamp:{} kdata for {} not ready'
                            .format(touching_timestamp, now_pd_timestamp(),
                                    security_id))

                if now_timestamp > touching_timestamp + pd.Timedelta(
                        seconds=self.level.to_second() / 2):
                    self.logger.warning(
                        'now_timestamp:{},still could not get {} {} kdata for {}'
                        .format(now_timestamp, to_timestamp,
                                touching_timestamp, security_id))
                    break
示例#5
0
def finance_score(data_schema,
                  security_id=None,
                  codes=None,
                  provider='eastmoney',
                  fields=None,
                  timestamp=now_pd_timestamp(),
                  report_count=20):
    fields = fields + ['security_id', 'timestamp', 'report_date']

    data_df = get_data(data_schema=data_schema,
                       security_id=security_id,
                       codes=codes,
                       provider=provider,
                       columns=fields,
                       end_timestamp=timestamp)

    time_series = data_df['report_date'].drop_duplicates()
    time_series = time_series[-report_count:]

    data_df = index_df_with_security_time(data_df)

    idx = pd.IndexSlice

    df = data_df.loc[idx[:, time_series], ]
    print(df)

    df = df.groupby(df['security_id']).mean()
    print(df)

    quantile = df.quantile([0.1, 0.3, 0.5, 0.7, 0.9])

    def evaluate_score(s, column):
        the_column = column
        if s > quantile.loc[0.9, the_column]:
            return 0.9
        if s > quantile.loc[0.7, the_column]:
            return 0.7
        if s > quantile.loc[0.5, the_column]:
            return 0.5
        if s > quantile.loc[0.3, the_column]:
            return 0.3
        if s > quantile.loc[0.1, the_column]:
            return 0.1
        return 0

    for item in quantile.columns:
        df[item] = df[item].apply(lambda x: evaluate_score(x, item))

    print(df)
示例#6
0
文件: factor.py 项目: Scorpi000/zvt
    def run(self):
        self.quantile = self.data_df.groupby(level=1).quantile(
            self.score_levels)
        self.quantile.index.names = ['timestamp', 'score']

        self.logger.info(self.quantile)

        self.df = self.data_df.copy()
        self.df.reset_index(inplace=True, level='security_id')
        self.df['quantile'] = None
        for timestamp in self.quantile.index.levels[0]:
            length = len(self.df.loc[self.df.index == timestamp, 'quantile'])
            self.df.loc[self.df.index == timestamp,
                        'quantile'] = [self.quantile.loc[timestamp].to_dict()
                                       ] * length

        self.logger.info(self.df)

        # self.df = self.df.set_index(['security_id'], append=True)
        # self.df = self.df.sort_index(level=[0, 1])
        #
        # self.logger.info(self.df)
        #
        def calculate_score(df, factor_name, quantile):
            original_value = df[factor_name]
            score_map = quantile.get(factor_name)
            min_score = self.score_levels[-1]

            if original_value < score_map.get(min_score):
                return 0

            for score in self.score_levels[:-1]:
                if original_value >= score_map.get(score):
                    return score

        for factor in self.factors:
            self.df[factor] = self.df.apply(
                lambda x: calculate_score(x, factor, x['quantile']), axis=1)

        self.df = self.df.reset_index()
        self.df = index_df_with_security_time(self.df)
        self.df = self.df.loc[:, self.factors]

        self.logger.info(self.df)

        self.fill_gap()
示例#7
0
    def on_data_added(self, security_id, size):
        df = self.original_df.loc[security_id].iloc[-self.valid_window - size:]

        for idx, indicator in enumerate(self.indicators):
            if indicator == 'ma':
                window = self.indicators_param[idx].get('window')

                if self.security_type == SecurityType.stock:
                    df['ma{}'.format(window)] = ma(df['qfq_close'],
                                                   window=window)
                else:
                    df['ma{}'.format(window)] = ma(df['close'], window=window)

            if indicator == 'macd':
                slow = self.indicators_param[idx].get('slow')
                fast = self.indicators_param[idx].get('fast')
                n = self.indicators_param[idx].get('n')

                if self.security_type == SecurityType.stock:
                    df['diff'], df['dea'], df['m'] = macd(df['qfq_close'],
                                                          slow=slow,
                                                          fast=fast,
                                                          n=n)
                else:
                    df['diff'], df['dea'], df['m'] = macd(df['close'],
                                                          slow=slow,
                                                          fast=fast,
                                                          n=n)

        df = df.iloc[-size:, ]
        df = df.reset_index()
        df['security_id'] = security_id
        df = index_df_with_security_time(df)

        self.data_df = self.data_df.append(df)
        self.data_df = self.data_df.sort_index(level=[0, 1])
示例#8
0
文件: factor.py 项目: woolf-wen/zvt
    def __init__(self,
                 data_schema,
                 security_list=None,
                 security_type=SecurityType.stock,
                 exchanges=['sh', 'sz'],
                 codes=None,
                 the_timestamp=None,
                 start_timestamp=None,
                 end_timestamp=None,
                 keep_all_timestamp=False,
                 fill_method='ffill',
                 columns=[],
                 filters=None,
                 provider='eastmoney',
                 level=TradingLevel.LEVEL_1DAY,
                 effective_number=10) -> None:
        super().__init__(security_list, security_type, exchanges, codes,
                         the_timestamp, start_timestamp, end_timestamp,
                         keep_all_timestamp, fill_method, effective_number)

        self.data_schema = data_schema

        if columns:
            self.columns = set(columns) | {
                self.data_schema.security_id, self.data_schema.timestamp
            }
            self.factors = [item.key for item in columns]
        else:
            self.columns = None

        self.provider = provider
        self.level = level
        self.filters = filters

        # use security_list if possible
        if self.security_list:
            self.original_df = get_data(data_schema=self.data_schema,
                                        security_list=self.security_list,
                                        provider=self.provider,
                                        columns=self.columns,
                                        start_timestamp=self.start_timestamp,
                                        end_timestamp=self.end_timestamp,
                                        filters=self.filters,
                                        level=self.level)
        else:
            self.original_df = get_data(data_schema=self.data_schema,
                                        codes=self.codes,
                                        provider=self.provider,
                                        columns=self.columns,
                                        start_timestamp=self.start_timestamp,
                                        end_timestamp=self.end_timestamp,
                                        filters=self.filters,
                                        level=self.level)

        if self.original_df is None or self.original_df.empty:
            raise Exception(
                'no data for: {} {} level:{} from: {} to: {}'.format(
                    self.security_list, self.codes, self.level,
                    self.start_timestamp, self.end_timestamp))
        self.original_df = index_df_with_security_time(self.original_df)

        self.logger.info('factor:{},original_df:\n{}'.format(
            self.factor_name, self.original_df))