Пример #1
0
    def load_data(self):
        if self.security_list:
            self.data_df = get_data(data_schema=self.data_schema,
                                    security_list=self.security_list,
                                    provider=self.provider,
                                    columns=self.columns,
                                    start_timestamp=self.start_timestamp,
                                    end_timestamp=self.end_timestamp,
                                    filters=self.filters,
                                    level=self.level)
        else:
            self.data_df = get_data(data_schema=self.data_schema,
                                    codes=self.codes,
                                    provider=self.provider,
                                    columns=self.columns,
                                    start_timestamp=self.start_timestamp,
                                    end_timestamp=self.end_timestamp,
                                    filters=self.filters,
                                    level=self.level)

        if df_is_not_null(self.data_df):
            self.data_df = index_df_with_category_time(
                self.data_df, category=self.category_field)

        for listener in self.data_listeners:
            listener.on_data_loaded(self.data_df)
Пример #2
0
def common_data(data_schema, security_id=None, codes=None, level=None, provider='eastmoney', columns=None,
                start_timestamp=None, end_timestamp=None, filters=None, session=None, order=None, limit=None):
    if security_id:
        df = get_data(data_schema=data_schema, security_id=security_id, codes=None, level=level, provider=provider,
                      columns=columns, return_type='df', start_timestamp=start_timestamp,
                      end_timestamp=end_timestamp, filters=filters, session=session, order=order, limit=limit)
        return [df]
    if codes:
        df_list = []
        for code in codes:
            df_list.append(
                get_data(data_schema=data_schema, security_id=None, codes=[code], level=level, provider=provider,
                         columns=columns, return_type='df', start_timestamp=start_timestamp,
                         end_timestamp=end_timestamp, filters=filters, session=session, order=order, limit=limit))
        return df_list
Пример #3
0
def get_manager_trading(provider='eastmoney', security_id=None, codes=None, columns=None,
                        return_type='df', session=None, start_timestamp=None, end_timestamp=None,
                        filters=None, order=None, limit=None):
    return get_data(data_schema=ManagerTrading, security_id=security_id, codes=codes, level=None,
                    provider=provider,
                    columns=columns, return_type=return_type, start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp, filters=filters, session=session, order=order, limit=limit)
Пример #4
0
    def evaluate_start_end_size_timestamps(self, security_item):
        """
        evaluate the size for recording data
        :param security_item:
        :type security_item: str
        :return:the start,end,size need to recording,size=0 means finish recording
        :rtype:(pd.Timestamp,pd.Timestamp,int)
        """

        # get latest record
        latest_record = get_data(security_id=security_item.id,
                                 provider=self.provider,
                                 data_schema=self.data_schema,
                                 order=self.data_schema.timestamp.desc(),
                                 limit=1,
                                 return_type='domain',
                                 session=self.session)

        if latest_record:
            latest_timestamp = latest_record[0].timestamp
        else:
            latest_timestamp = security_item.timestamp

        if not latest_timestamp:
            return None, None, self.default_size, None

        return latest_timestamp, None, self.default_size, None
Пример #5
0
def get_kdata(security_id,
              level=TradingLevel.LEVEL_1DAY.value,
              provider='eastmoney',
              columns=None,
              return_type='df',
              start_timestamp=None,
              end_timestamp=None,
              filters=None,
              session=None,
              order=None,
              limit=None):
    security_type, exchange, code = decode_security_id(security_id)
    data_schema = get_kdata_schema(security_type, level=level)

    return get_data(data_schema=data_schema,
                    security_id=security_id,
                    level=level,
                    provider=provider,
                    columns=columns,
                    return_type=return_type,
                    start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp,
                    filters=filters,
                    session=session,
                    order=order,
                    limit=limit)
Пример #6
0
def get_account(trader_name=None,
                return_type='df',
                start_timestamp=None,
                end_timestamp=None,
                filters=None,
                session=None,
                order=None,
                limit=None):
    if trader_name:
        if filters:
            filters = filters + [SimAccount.trader_name == trader_name]
        else:
            filters = [SimAccount.trader_name == trader_name]

    return get_data(data_schema=SimAccount,
                    security_id=None,
                    codes=None,
                    level=None,
                    provider='zvt',
                    columns=None,
                    return_type=return_type,
                    start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp,
                    filters=filters,
                    session=session,
                    order=order,
                    limit=limit)
Пример #7
0
    def evaluate_start_end_size_timestamps(self, security_item):
        the_timestamps = self.security_timestamps_map.get(security_item.id)
        if not the_timestamps:
            self.init_timestamps(security_item)
            the_timestamps = self.security_timestamps_map.get(security_item.id)

        if not the_timestamps:
            self.logger.exception("could not get time series for:{}".format(security_item.id))
            assert False

        timestamps = [to_pd_timestamp(t) for t in the_timestamps]
        timestamps.sort()

        self.logger.info(
            'security_id:{},init timestamps start:{},end:{}'.format(security_item.id, timestamps[0], timestamps[-1]))

        latest_record = get_data(security_id=security_item.id,
                                 provider=self.provider,
                                 data_schema=self.data_schema,
                                 order=self.data_schema.timestamp.desc(), limit=1,
                                 return_type='domain',
                                 session=self.session)

        if latest_record:
            self.logger.info('latest record timestamp:{}'.format(latest_record[0].timestamp))
            timestamps = [t for t in timestamps if t > latest_record[0].timestamp]

            if timestamps:
                return timestamps[0], timestamps[-1], len(timestamps), timestamps
            return None, None, 0, None

        return timestamps[0], timestamps[-1], len(timestamps), timestamps
Пример #8
0
    def generate_domain(self, security_item, original_data):
        """
        generate the data_schema instance using security_item and original_data,the original_data should be from record

        :param security_item:
        :param original_data:
        """
        the_id = self.generate_domain_id(security_item, original_data)

        items = get_data(data_schema=self.data_schema, session=self.session, provider=self.provider,
                         security_id=security_item.id,
                         filters=[self.data_schema.id == the_id],
                         return_type='domain')

        if items and not self.force_update:
            self.logger.info('ignore the data {}:{} saved before'.format(self.data_schema, the_id))
            return None

        if not items:
            timestamp_str = original_data[self.get_timestamp_field()]
            timestamp = None
            try:
                timestamp = to_pd_timestamp(timestamp_str)
            except Exception as e:
                self.logger.exception(e)

            domain_item = self.data_schema(id=the_id,
                                           code=security_item.code,
                                           security_id=security_item.id,
                                           timestamp=timestamp)
        else:
            domain_item = items[0]

        fill_domain_from_dict(domain_item, original_data, self.get_data_map())
        return domain_item
Пример #9
0
def get_trader(trader_name=None,
               return_type='df',
               start_timestamp=None,
               end_timestamp=None,
               filters=None,
               session=None,
               order=None,
               limit=None) -> List[business.Trader]:
    if trader_name:
        if filters:
            filters = filters + [business.Trader.trader_name == trader_name]
        else:
            filters = [business.Trader.trader_name == trader_name]

    return get_data(data_schema=business.Trader,
                    security_id=None,
                    codes=None,
                    level=None,
                    provider='zvt',
                    columns=None,
                    return_type=return_type,
                    start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp,
                    filters=filters,
                    session=session,
                    order=order,
                    limit=limit)
Пример #10
0
def get_top_ten_tradable_holder(provider='eastmoney', security_id=None, codes=None, columns=None,
                                return_type='df', session=None, start_timestamp=None, end_timestamp=None,
                                filters=None, order=None, limit=None):
    return get_data(data_schema=TopTenTradableHolder, security_id=security_id, codes=codes, level=None,
                    provider=provider,
                    columns=columns, return_type=return_type, start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp, filters=filters, session=session, order=order, limit=limit)
Пример #11
0
    def __init__(self,
                 security_type=SecurityType.stock,
                 exchanges=['sh', 'sz'],
                 codes=None,
                 the_timestamp=None,
                 window=None,
                 window_func='mean',
                 start_timestamp=None,
                 end_timestamp=None,
                 keep_all_timestamp=False,
                 fill_method='ffill',
                 columns=[],
                 filters=None,
                 provider='eastmoney') -> None:
        super().__init__(security_type, exchanges, codes, the_timestamp,
                         window, window_func, start_timestamp, end_timestamp,
                         keep_all_timestamp, fill_method)

        self.columns = set(columns) | {
            self.data_schema.security_id, self.data_schema.timestamp
        }
        self.factors = [item.key for item in columns]
        self.provider = provider

        self.original_df = get_data(data_schema=self.data_schema,
                                    provider=self.provider,
                                    codes=self.codes,
                                    columns=self.columns,
                                    start_timestamp=self.fetch_start_timestamp,
                                    end_timestamp=self.end_timestamp,
                                    filters=filters)

        self.original_df = index_df_with_security_time(self.original_df)

        self.logger.info(self.original_df)

        if self.window:
            self.data_df = self.original_df.reset_index(level='timestamp')

            # TODO:better way to handle window function
            if self.window_func == 'mean':
                self.data_df = self.data_df.groupby(level=0).rolling(
                    window='{}D'.format(self.window.days),
                    on='timestamp').mean()
            elif self.window_func == 'count':
                self.data_df = self.data_df.groupby(level=0).rolling(
                    window='{}D'.format(self.window.days),
                    on='timestamp').count()
            self.data_df = self.data_df.reset_index(level=0, drop=True)
            self.data_df = self.data_df.set_index('timestamp', append=True)
            print(self.data_df)
        else:
            self.data_df = self.original_df

        self.data_df = self.data_df.loc[(
            slice(None), slice(self.start_timestamp, self.end_timestamp)), :]

        self.logger.info(self.data_df)
Пример #12
0
def df_to_db(df, data_schema, provider):
    store_category = get_store_category(data_schema)
    db_engine = get_db_engine(provider, store_category=store_category)

    current = get_data(data_schema=data_schema, columns=[data_schema.id])
    df = df[~df['id'].isin(current['id'])]

    df.to_sql(data_schema.__tablename__,
              db_engine,
              index=False,
              if_exists='append')
Пример #13
0
    def move_on(self, to_timestamp, touching_timestamp):
        df = self.original_df.reset_index(level='timestamp')
        recorded_timestamps = df.groupby(level=0)['timestamp'].max()

        self.logger.info('current_timestamps:\n{}'.format(recorded_timestamps))

        for security_id, recorded_timestamp in recorded_timestamps.iteritems():
            while True:
                now_timestamp = now_pd_timestamp()
                if touching_timestamp > now_timestamp:
                    delta = (touching_timestamp - now_timestamp).seconds
                    self.logger.info(
                        'want to get {} {} kdata for {},now is:{},waiting:{}sencods'
                        .format(to_timestamp, touching_timestamp, security_id,
                                now_timestamp, delta))
                    time.sleep(delta)

                added = get_data(data_schema=self.data_schema,
                                 provider=self.provider,
                                 security_id=security_id,
                                 columns=self.columns,
                                 start_timestamp=recorded_timestamp,
                                 end_timestamp=to_timestamp,
                                 filters=self.filters,
                                 level=self.level)

                if (added is not None) and not added.empty:
                    would_added = added[
                        added['timestamp'] != recorded_timestamp]
                    if not would_added.empty:
                        would_added = index_df_with_security_time(would_added)
                        self.logger.info(
                            'would_added:\n{}'.format(would_added))

                        self.original_df = self.original_df.append(would_added)
                        self.original_df = self.original_df.sort_index(
                            level=[0, 1])
                        self.on_data_added(security_id=security_id,
                                           size=len(would_added))
                        break
                    else:
                        self.logger.info(
                            'touching_timestamp:{} now_timestamp:{} kdata for {} not ready'
                            .format(touching_timestamp, now_pd_timestamp(),
                                    security_id))

                if now_timestamp > touching_timestamp + pd.Timedelta(
                        seconds=self.level.to_second() / 2):
                    self.logger.warning(
                        'now_timestamp:{},still could not get {} {} kdata for {}'
                        .format(now_timestamp, to_timestamp,
                                touching_timestamp, security_id))
                    break
Пример #14
0
    def evaluate_start_end_size_timestamps(self, security_item):
        """
        evaluate the size for recording data
        :param security_item:
        :type security_item: str
        :return:the start,end,size need to recording,size=0 means finish recording
        :rtype:(pd.Timestamp,pd.Timestamp,int)
        """

        # get latest record
        latest_record = get_data(security_id=security_item.id,
                                 provider=self.provider,
                                 data_schema=self.data_schema, level=self.level.value,
                                 order=self.data_schema.timestamp.desc(), limit=1,
                                 return_type='domain',
                                 session=self.session)

        if latest_record:
            latest_timestamp = latest_record[0].timestamp
        else:
            latest_timestamp = security_item.timestamp

        if not latest_timestamp:
            return latest_timestamp, None, self.default_size, None

        current_time = pd.Timestamp.now()
        time_delta = current_time - latest_timestamp

        if self.level == TradingLevel.LEVEL_1DAY:
            if is_same_date(current_time, latest_timestamp):
                return latest_timestamp, None, 0, None
            return latest_timestamp, None, time_delta.days + 1, None

        close_hour, close_minute = get_close_time(security_item.id)

        # to today,check closing time
        if time_delta.days == 0:
            if latest_timestamp.hour == close_hour and latest_timestamp.minute == close_minute:
                return latest_timestamp, None, 0, None

        if self.level == TradingLevel.LEVEL_5MIN:
            if time_delta.days > 0:
                minutes = (time_delta.days + 1) * get_one_day_trading_minutes(security_item.id)
                return latest_timestamp, None, int(math.ceil(minutes / 5)) + 1, None
            else:
                return latest_timestamp, None, int(math.ceil(time_delta.total_seconds() / (5 * 60))) + 1, None
        if self.level == TradingLevel.LEVEL_1HOUR:
            if time_delta.days > 0:
                minutes = (time_delta.days + 1) * get_one_day_trading_minutes(security_item.id)
                return latest_timestamp, None, int(math.ceil(minutes / 60)) + 1, None
            else:
                return latest_timestamp, None, int(math.ceil(time_delta.total_seconds() / (60 * 60))) + 1, None
Пример #15
0
def finance_score(data_schema,
                  security_id=None,
                  codes=None,
                  provider='eastmoney',
                  fields=None,
                  timestamp=now_pd_timestamp(),
                  report_count=20):
    fields = fields + ['security_id', 'timestamp', 'report_date']

    data_df = get_data(data_schema=data_schema,
                       security_id=security_id,
                       codes=codes,
                       provider=provider,
                       columns=fields,
                       end_timestamp=timestamp)

    time_series = data_df['report_date'].drop_duplicates()
    time_series = time_series[-report_count:]

    data_df = index_df_with_security_time(data_df)

    idx = pd.IndexSlice

    df = data_df.loc[idx[:, time_series], ]
    print(df)

    df = df.groupby(df['security_id']).mean()
    print(df)

    quantile = df.quantile([0.1, 0.3, 0.5, 0.7, 0.9])

    def evaluate_score(s, column):
        the_column = column
        if s > quantile.loc[0.9, the_column]:
            return 0.9
        if s > quantile.loc[0.7, the_column]:
            return 0.7
        if s > quantile.loc[0.5, the_column]:
            return 0.5
        if s > quantile.loc[0.3, the_column]:
            return 0.3
        if s > quantile.loc[0.1, the_column]:
            return 0.1
        return 0

    for item in quantile.columns:
        df[item] = df[item].apply(lambda x: evaluate_score(x, item))

    print(df)
Пример #16
0
def get_securities(security_list: List[str] = None,
                   security_type: Union[SecurityType, str] = 'stock',
                   exchanges: List[str] = None,
                   codes: List[str] = None,
                   columns: List = None,
                   return_type: str = 'df',
                   session: Session = None,
                   start_timestamp: Union[str, pd.Timestamp] = None,
                   end_timestamp: Union[str, pd.Timestamp] = None,
                   filters: List = None,
                   order: object = None,
                   limit: int = None,
                   provider: Union[str, Provider] = 'eastmoney',
                   index: str = 'code',
                   index_is_time: bool = False) -> object:
    data_schema = get_security_schema(security_type)

    if not order:
        order = data_schema.code.asc()

    if exchanges:
        if filters:
            filters.append(data_schema.exchange.in_(exchanges))
        else:
            filters = [data_schema.exchange.in_(exchanges)]

    return get_data(data_schema=data_schema,
                    security_list=security_list,
                    security_id=None,
                    codes=codes,
                    level=None,
                    provider=provider,
                    columns=columns,
                    return_type=return_type,
                    start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp,
                    filters=filters,
                    session=session,
                    order=order,
                    limit=limit,
                    index=index,
                    index_is_time=index_is_time)
Пример #17
0
def get_cash_flow_statement(provider='eastmoney', security_id=None, codes=None, columns=None,
                            return_type='df', session=None, start_timestamp=None, end_timestamp=None,
                            filters=None, order=None, limit=None):
    return get_data(data_schema=CashFlowStatement, security_id=security_id, codes=codes, level=None, provider=provider,
                    columns=columns, return_type=return_type, start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp, filters=filters, session=session, order=order, limit=limit)
Пример #18
0
    def move_on(self,
                to_timestamp: Union[str, pd.Timestamp] = None,
                timeout: int = 20) -> bool:
        """
        get the data happened before to_timestamp,if not set,get all the data which means to now

        Parameters
        ----------
        to_timestamp :
        timeout : the time waiting the data ready in seconds

        Returns
        -------
        whether got data
        """
        if not df_is_not_null(self.data_df):
            self.load_data()
            return False

        df = self.data_df.reset_index(level='timestamp')
        recorded_timestamps = df.groupby(level=0)['timestamp'].max()

        self.logger.info('level:{},current_timestamps:\n{}'.format(
            self.level, recorded_timestamps))

        changed = False
        # FIXME:we suppose history data should be there at first
        start_time = time.time()
        for category, recorded_timestamp in recorded_timestamps.iteritems():
            while True:
                category_filter = [self.category_column == category]
                if self.filters:
                    filters = self.filters + category_filter
                else:
                    filters = category_filter

                added = get_data(data_schema=self.data_schema,
                                 provider=self.provider,
                                 columns=self.columns,
                                 start_timestamp=recorded_timestamp,
                                 end_timestamp=to_timestamp,
                                 filters=filters,
                                 level=self.level)

                if df_is_not_null(added):
                    would_added = added[
                        added['timestamp'] != recorded_timestamp].copy()
                    if not would_added.empty:
                        added = index_df_with_category_time(
                            would_added, category=self.category_field)
                        self.logger.info('category:{},added:\n{}'.format(
                            category, added))

                        self.data_df = self.data_df.append(added)
                        self.data_df = self.data_df.sort_index(level=[0, 1])

                        for listener in self.data_listeners:
                            listener.on_category_data_added(category=category,
                                                            added_data=added)
                        changed = True
                        # if got data,just move to another category
                        break

                cost_time = time.time() - start_time
                if cost_time > timeout:
                    self.logger.warning(
                        'category:{} level:{} getting data timeout,to_timestamp:{},now:{}'
                        .format(category, self.level, to_timestamp,
                                now_pd_timestamp()))
                    break

        if changed:
            for listener in self.data_listeners:
                listener.on_data_changed(self.data_df)

        return changed
Пример #19
0
    def __init__(self,
                 data_schema,
                 security_list=None,
                 security_type=SecurityType.stock,
                 exchanges=['sh', 'sz'],
                 codes=None,
                 the_timestamp=None,
                 start_timestamp=None,
                 end_timestamp=None,
                 keep_all_timestamp=False,
                 fill_method='ffill',
                 columns=[],
                 filters=None,
                 provider='eastmoney',
                 level=TradingLevel.LEVEL_1DAY,
                 effective_number=10) -> None:
        super().__init__(security_list, security_type, exchanges, codes,
                         the_timestamp, start_timestamp, end_timestamp,
                         keep_all_timestamp, fill_method, effective_number)

        self.data_schema = data_schema

        if columns:
            self.columns = set(columns) | {
                self.data_schema.security_id, self.data_schema.timestamp
            }
            self.factors = [item.key for item in columns]
        else:
            self.columns = None

        self.provider = provider
        self.level = level
        self.filters = filters

        # use security_list if possible
        if self.security_list:
            self.original_df = get_data(data_schema=self.data_schema,
                                        security_list=self.security_list,
                                        provider=self.provider,
                                        columns=self.columns,
                                        start_timestamp=self.start_timestamp,
                                        end_timestamp=self.end_timestamp,
                                        filters=self.filters,
                                        level=self.level)
        else:
            self.original_df = get_data(data_schema=self.data_schema,
                                        codes=self.codes,
                                        provider=self.provider,
                                        columns=self.columns,
                                        start_timestamp=self.start_timestamp,
                                        end_timestamp=self.end_timestamp,
                                        filters=self.filters,
                                        level=self.level)

        if self.original_df is None or self.original_df.empty:
            raise Exception(
                'no data for: {} {} level:{} from: {} to: {}'.format(
                    self.security_list, self.codes, self.level,
                    self.start_timestamp, self.end_timestamp))
        self.original_df = index_df_with_security_time(self.original_df)

        self.logger.info('factor:{},original_df:\n{}'.format(
            self.factor_name, self.original_df))
Пример #20
0
def get_finance_factor(provider='eastmoney', security_id=None, codes=None, columns=None,
                       return_type='df', session=None, start_timestamp=None, end_timestamp=None,
                       filters=None, order=None, limit=None):
    return get_data(data_schema=FinanceFactor, security_id=security_id, codes=codes, level=None, provider=provider,
                    columns=columns, return_type=return_type, start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp, filters=filters, session=session, order=order, limit=limit)
Пример #21
0
    def evaluate_start_end_size_timestamps(self, security_item):
        """
        evaluate the size for recording data
        :param security_item:
        :type security_item: str
        :return:the start,end,size need to recording,size=0 means finish recording
        :rtype:(pd.Timestamp,pd.Timestamp,int)
        """

        # get latest record
        latest_record = get_data(security_id=security_item.id,
                                 provider=self.provider,
                                 data_schema=self.data_schema,
                                 level=self.level.value,
                                 order=self.data_schema.timestamp.desc(),
                                 limit=1,
                                 return_type='domain',
                                 session=self.session)

        if latest_record:
            latest_timestamp = latest_record[0].timestamp
        else:
            latest_timestamp = security_item.timestamp

        if not latest_timestamp:
            return latest_timestamp, None, self.default_size, None

        current_time = pd.Timestamp.now()
        time_delta = current_time - latest_timestamp

        if self.level == TradingLevel.LEVEL_1DAY:
            if is_same_date(current_time, latest_timestamp):
                return latest_timestamp, None, 0, None
            return latest_timestamp, None, time_delta.days + 1, None

        close_hour, close_minute = get_close_time(security_item.id)

        # to today,check closing time
        # 0,0 means never stop,e.g,coin
        if (close_hour != 0 and close_minute != 0) and time_delta.days == 0:
            if latest_timestamp.hour == close_hour and latest_timestamp.minute == close_minute:
                return latest_timestamp, None, 0, None

        if self.kdata_use_begin_time:
            touching_timestamp = latest_timestamp + pd.Timedelta(
                seconds=self.level.to_second())
        else:
            touching_timestamp = latest_timestamp

        waiting_seconds, size = self.level.count_from_timestamp(
            touching_timestamp,
            one_day_trading_minutes=get_one_day_trading_minutes(
                security_item.id))
        if not self.one_shot and waiting_seconds and (waiting_seconds > 30):
            t = waiting_seconds / 2
            self.logger.info(
                'level:{},recorded_time:{},touching_timestamp:{},current_time:{},next_ok_time:{},just sleep:{} seconds'
                .format(
                    self.level.value, latest_timestamp, touching_timestamp,
                    current_time, touching_timestamp +
                    pd.Timedelta(seconds=self.level.to_second()), t))
            time.sleep(t)

        return latest_timestamp, None, size, None
Пример #22
0
def get_rights_issue_detail(provider='eastmoney', security_id=None, codes=None, columns=None,
                            return_type='df', session=None, start_timestamp=None, end_timestamp=None,
                            filters=None, order=None, limit=None):
    return get_data(data_schema=RightsIssueDetail, security_id=security_id, codes=codes, level=None, provider=provider,
                    columns=columns, return_type=return_type, start_timestamp=start_timestamp,
                    end_timestamp=end_timestamp, filters=filters, session=session, order=order, limit=limit)