def breadth_computing(self): if self.breadth_computing_method == 'quantile': self.score_levels = self.breadth_computing_param['score_levels'] self.score_levels.sort(reverse=True) self.quantile = self.depth_df.groupby(level=1).quantile( self.score_levels) self.quantile.index.names = ['timestamp', 'score'] self.logger.info('factor:{},quantile:\n{}'.format( self.factor_name, self.quantile)) self.result_df = self.depth_df.copy() self.result_df.reset_index(inplace=True, level='security_id') self.result_df['quantile'] = None for timestamp in self.quantile.index.levels[0]: length = len( self.result_df.loc[self.result_df.index == timestamp, 'quantile']) self.result_df.loc[self.result_df.index == timestamp, 'quantile'] = [ self.quantile.loc[timestamp].to_dict() ] * length self.logger.info('factor:{},df with quantile:\n{}'.format( self.factor_name, self.result_df)) # self.result_df = self.result_df.set_index(['security_id'], append=True) # self.result_df = self.result_df.sort_index(level=[0, 1]) # # self.logger.info(self.result_df) # def calculate_score(df, factor_name, quantile): original_value = df[factor_name] score_map = quantile.get(factor_name) min_score = self.score_levels[-1] if original_value < score_map.get(min_score): return 0 for score in self.score_levels[:-1]: if original_value >= score_map.get(score): return score for factor in self.factors: self.result_df[factor] = self.result_df.apply( lambda x: calculate_score(x, factor, x['quantile']), axis=1) self.result_df = self.result_df.reset_index() self.result_df = index_df_with_security_time(self.result_df) self.result_df = self.result_df.loc[:, self.factors] self.result_df = self.result_df.loc[~self.result_df.index. duplicated(keep='first')] self.logger.info('factor:{},df:\n{}'.format( self.factor_name, self.result_df)) self.fill_gap()
def on_category_data_added(self, category, added_data: pd.DataFrame): size = len(added_data) df = self.data_df.loc[category].iloc[-self.valid_window - size:] for idx, indicator in enumerate(self.indicators): if indicator == 'ma': window = self.indicators_param[idx].get('window') if self.security_type == SecurityType.stock: df['ma{}'.format(window)] = ma(df['qfq_close'], window=window) else: df['ma{}'.format(window)] = ma(df['close'], window=window) if indicator == 'macd': slow = self.indicators_param[idx].get('slow') fast = self.indicators_param[idx].get('fast') n = self.indicators_param[idx].get('n') if self.security_type == SecurityType.stock: df['diff'], df['dea'], df['m'] = macd(df['qfq_close'], slow=slow, fast=fast, n=n) else: df['diff'], df['dea'], df['m'] = macd(df['close'], slow=slow, fast=fast, n=n) df = df.iloc[-size:, ] df = df.reset_index() df[self.category_field] = category df = index_df_with_security_time(df) self.depth_df = self.depth_df.append(df) self.depth_df = self.depth_df.sort_index(level=[0, 1])
def __init__(self, security_type=SecurityType.stock, exchanges=['sh', 'sz'], codes=None, the_timestamp=None, window=None, window_func='mean', start_timestamp=None, end_timestamp=None, keep_all_timestamp=False, fill_method='ffill', columns=[], filters=None, provider='eastmoney') -> None: super().__init__(security_type, exchanges, codes, the_timestamp, window, window_func, start_timestamp, end_timestamp, keep_all_timestamp, fill_method) self.columns = set(columns) | { self.data_schema.security_id, self.data_schema.timestamp } self.factors = [item.key for item in columns] self.provider = provider self.original_df = get_data(data_schema=self.data_schema, provider=self.provider, codes=self.codes, columns=self.columns, start_timestamp=self.fetch_start_timestamp, end_timestamp=self.end_timestamp, filters=filters) self.original_df = index_df_with_security_time(self.original_df) self.logger.info(self.original_df) if self.window: self.data_df = self.original_df.reset_index(level='timestamp') # TODO:better way to handle window function if self.window_func == 'mean': self.data_df = self.data_df.groupby(level=0).rolling( window='{}D'.format(self.window.days), on='timestamp').mean() elif self.window_func == 'count': self.data_df = self.data_df.groupby(level=0).rolling( window='{}D'.format(self.window.days), on='timestamp').count() self.data_df = self.data_df.reset_index(level=0, drop=True) self.data_df = self.data_df.set_index('timestamp', append=True) print(self.data_df) else: self.data_df = self.original_df self.data_df = self.data_df.loc[( slice(None), slice(self.start_timestamp, self.end_timestamp)), :] self.logger.info(self.data_df)
def move_on(self, to_timestamp, touching_timestamp): df = self.original_df.reset_index(level='timestamp') recorded_timestamps = df.groupby(level=0)['timestamp'].max() self.logger.info('current_timestamps:\n{}'.format(recorded_timestamps)) for security_id, recorded_timestamp in recorded_timestamps.iteritems(): while True: now_timestamp = now_pd_timestamp() if touching_timestamp > now_timestamp: delta = (touching_timestamp - now_timestamp).seconds self.logger.info( 'want to get {} {} kdata for {},now is:{},waiting:{}sencods' .format(to_timestamp, touching_timestamp, security_id, now_timestamp, delta)) time.sleep(delta) added = get_data(data_schema=self.data_schema, provider=self.provider, security_id=security_id, columns=self.columns, start_timestamp=recorded_timestamp, end_timestamp=to_timestamp, filters=self.filters, level=self.level) if (added is not None) and not added.empty: would_added = added[ added['timestamp'] != recorded_timestamp] if not would_added.empty: would_added = index_df_with_security_time(would_added) self.logger.info( 'would_added:\n{}'.format(would_added)) self.original_df = self.original_df.append(would_added) self.original_df = self.original_df.sort_index( level=[0, 1]) self.on_data_added(security_id=security_id, size=len(would_added)) break else: self.logger.info( 'touching_timestamp:{} now_timestamp:{} kdata for {} not ready' .format(touching_timestamp, now_pd_timestamp(), security_id)) if now_timestamp > touching_timestamp + pd.Timedelta( seconds=self.level.to_second() / 2): self.logger.warning( 'now_timestamp:{},still could not get {} {} kdata for {}' .format(now_timestamp, to_timestamp, touching_timestamp, security_id)) break
def finance_score(data_schema, security_id=None, codes=None, provider='eastmoney', fields=None, timestamp=now_pd_timestamp(), report_count=20): fields = fields + ['security_id', 'timestamp', 'report_date'] data_df = get_data(data_schema=data_schema, security_id=security_id, codes=codes, provider=provider, columns=fields, end_timestamp=timestamp) time_series = data_df['report_date'].drop_duplicates() time_series = time_series[-report_count:] data_df = index_df_with_security_time(data_df) idx = pd.IndexSlice df = data_df.loc[idx[:, time_series], ] print(df) df = df.groupby(df['security_id']).mean() print(df) quantile = df.quantile([0.1, 0.3, 0.5, 0.7, 0.9]) def evaluate_score(s, column): the_column = column if s > quantile.loc[0.9, the_column]: return 0.9 if s > quantile.loc[0.7, the_column]: return 0.7 if s > quantile.loc[0.5, the_column]: return 0.5 if s > quantile.loc[0.3, the_column]: return 0.3 if s > quantile.loc[0.1, the_column]: return 0.1 return 0 for item in quantile.columns: df[item] = df[item].apply(lambda x: evaluate_score(x, item)) print(df)
def run(self): self.quantile = self.data_df.groupby(level=1).quantile( self.score_levels) self.quantile.index.names = ['timestamp', 'score'] self.logger.info(self.quantile) self.df = self.data_df.copy() self.df.reset_index(inplace=True, level='security_id') self.df['quantile'] = None for timestamp in self.quantile.index.levels[0]: length = len(self.df.loc[self.df.index == timestamp, 'quantile']) self.df.loc[self.df.index == timestamp, 'quantile'] = [self.quantile.loc[timestamp].to_dict() ] * length self.logger.info(self.df) # self.df = self.df.set_index(['security_id'], append=True) # self.df = self.df.sort_index(level=[0, 1]) # # self.logger.info(self.df) # def calculate_score(df, factor_name, quantile): original_value = df[factor_name] score_map = quantile.get(factor_name) min_score = self.score_levels[-1] if original_value < score_map.get(min_score): return 0 for score in self.score_levels[:-1]: if original_value >= score_map.get(score): return score for factor in self.factors: self.df[factor] = self.df.apply( lambda x: calculate_score(x, factor, x['quantile']), axis=1) self.df = self.df.reset_index() self.df = index_df_with_security_time(self.df) self.df = self.df.loc[:, self.factors] self.logger.info(self.df) self.fill_gap()
def on_data_added(self, security_id, size): df = self.original_df.loc[security_id].iloc[-self.valid_window - size:] for idx, indicator in enumerate(self.indicators): if indicator == 'ma': window = self.indicators_param[idx].get('window') if self.security_type == SecurityType.stock: df['ma{}'.format(window)] = ma(df['qfq_close'], window=window) else: df['ma{}'.format(window)] = ma(df['close'], window=window) if indicator == 'macd': slow = self.indicators_param[idx].get('slow') fast = self.indicators_param[idx].get('fast') n = self.indicators_param[idx].get('n') if self.security_type == SecurityType.stock: df['diff'], df['dea'], df['m'] = macd(df['qfq_close'], slow=slow, fast=fast, n=n) else: df['diff'], df['dea'], df['m'] = macd(df['close'], slow=slow, fast=fast, n=n) df = df.iloc[-size:, ] df = df.reset_index() df['security_id'] = security_id df = index_df_with_security_time(df) self.data_df = self.data_df.append(df) self.data_df = self.data_df.sort_index(level=[0, 1])
def __init__(self, data_schema, security_list=None, security_type=SecurityType.stock, exchanges=['sh', 'sz'], codes=None, the_timestamp=None, start_timestamp=None, end_timestamp=None, keep_all_timestamp=False, fill_method='ffill', columns=[], filters=None, provider='eastmoney', level=TradingLevel.LEVEL_1DAY, effective_number=10) -> None: super().__init__(security_list, security_type, exchanges, codes, the_timestamp, start_timestamp, end_timestamp, keep_all_timestamp, fill_method, effective_number) self.data_schema = data_schema if columns: self.columns = set(columns) | { self.data_schema.security_id, self.data_schema.timestamp } self.factors = [item.key for item in columns] else: self.columns = None self.provider = provider self.level = level self.filters = filters # use security_list if possible if self.security_list: self.original_df = get_data(data_schema=self.data_schema, security_list=self.security_list, provider=self.provider, columns=self.columns, start_timestamp=self.start_timestamp, end_timestamp=self.end_timestamp, filters=self.filters, level=self.level) else: self.original_df = get_data(data_schema=self.data_schema, codes=self.codes, provider=self.provider, columns=self.columns, start_timestamp=self.start_timestamp, end_timestamp=self.end_timestamp, filters=self.filters, level=self.level) if self.original_df is None or self.original_df.empty: raise Exception( 'no data for: {} {} level:{} from: {} to: {}'.format( self.security_list, self.codes, self.level, self.start_timestamp, self.end_timestamp)) self.original_df = index_df_with_security_time(self.original_df) self.logger.info('factor:{},original_df:\n{}'.format( self.factor_name, self.original_df))