Пример #1
0
def cal_AV(data, start, end, dim='rate'):
    """
  Calculate Annual-volatility

  :param data: original OHLCV data
  :param start: start date
  :param end: end date
  :param dim: daily return dim 
  :returns: AV
  :raises: none
  """
    # calculate the period
    start_date = util.time_2_string(data[start:end].index.min())
    end_date = util.time_2_string(data[start:end].index.max())
    num_days = util.num_days_between(start_date, end_date) - 1

    AV = (data[dim].var() * (365 / num_days))**0.5
    return AV
Пример #2
0
def cal_APR(data, start, end, dim='value', dividends=0):
    """
  Calculate Annual-Percentile-Rate

  :param data: original OHLCV data
  :param start: start date
  :param end: end date
  :param dim: price dim to calculate
  :param dividends: divndends to add
  :returns: APR
  :raises: none
  """
    # calculate the HPR in specific period
    HPR = cal_HPR(data, start, end, dim, dividends)

    # convert the period to year
    start_date = util.time_2_string(data[start:end].index.min())
    end_date = util.time_2_string(data[start:end].index.max())
    period_in_year = util.num_days_between(start_date, end_date) / 365.0

    # calculate APR
    APR = HPR / period_in_year

    return APR
Пример #3
0
def cal_EAR(data, start, end, dim='value', dividends=0):
    """
  Calculate Effective-Annual-Rate

  :param data: original OHLCV data
  :param start: start date
  :param end: end date
  :param dim: price dim to calculate
  :param dividends: divndends to add
  :returns: EAR
  :raises: none
  """
    # calculate HPR in specific period
    HPR = cal_HPR(data, start, end, dim, dividends) + 1

    # convert the period to year
    start_date = util.time_2_string(data[start:end].index.min())
    end_date = util.time_2_string(data[start:end].index.max())
    period_in_year = util.num_days_between(start_date, end_date) / 365.0

    # calculate EAR
    EAR = pow(HPR, 1 / period_in_year) - 1

    return EAR
Пример #4
0
  def __init__(self, data, start_date=None, end_date=None, num_days=365, load_local_data=True):

    # copy data(sec_data, ta_data), initialize record with ta_data
    self.data = data.copy()
    ta_data = data['ta_data']
    for k in ta_data.keys():
      symbol = k.split('_')[0]
      self.record[symbol] = ta_data[k].copy()    

    # initialize record
    self.init_record(load_local_data=load_local_data)

    # set default start_date/end_date
    if (start_date is not None) and (end_date is None):
      end_date = util.string_plus_day(string=start_date, diff_days=num_days)
    elif (start_date is None) and (end_date is not None):
      start_date = util.string_plus_day(string=end_date, diff_days=-num_days)
    elif (start_date is None) and (end_date is None):
      end_date = util.time_2_string(datetime.datetime.today().date())
      start_date = util.string_plus_day(string=end_date, diff_days=-num_days)
    self.start_date = start_date
    self.end_date = end_date
Пример #5
0
  def analyze(self, sort=True):

    # get records for self.sec_list
    records = dict((key,value) for key,value in self.record.items() if key in self.sec_list)
    
    # init dict for storing results
    analysis = {
        'symbol': [],
        'start_date': [],
        'end_date': [],
        'start_money': [],
        'end_money': [],
        'EAR': [],
        'sharp_ratio': [],
        'max_drawndown': []
      }

    # go through each stock
    for symbol in records.keys():
        
      # get record data
      record_data = records[symbol]#[self.start_date:self.end_date]      

      if len(record_data) == 0:
        print(f'no record for {symbol}')
        continue

      min_idx = record_data.index.min()
      max_idx = record_data.index.max()
      
      # analysis profit, hpr, ear, etc.
      analysis['symbol'].append(symbol)
      analysis['start_date'].append(util.time_2_string(min_idx.date()))
      analysis['end_date'].append(util.time_2_string(max_idx.date()))
      analysis['start_money'].append(record_data.loc[min_idx, 'value'])
      analysis['end_money'].append(record_data.loc[max_idx, 'value'])

      EAR = finance_util.cal_EAR(data=record_data, start=min_idx.date(), end=max_idx.date(), dim='value', dividends=0)
      analysis['EAR'].append(EAR)

      sharp_ratio = finance_util.cal_sharp_ratio(data=record_data, start=None, end=None, price_dim='value')
      analysis['sharp_ratio'].append(sharp_ratio)

      max_drawndown = finance_util.cal_max_drawndown(data=record_data)
      analysis['max_drawndown'].append(max_drawndown)

    # transform dict to dataframe
    analysis = pd.DataFrame(analysis).set_index('symbol')
    if sort:
      analysis = analysis.sort_values('EAR', ascending=False)

    # calculate sum and mean for non benchmark stocks
    non_benchmark_list = [x for x in analysis.index.tolist() if x != 'benchmark']
    non_benchmark_analysis = analysis.loc[non_benchmark_list, analysis.columns].copy()
    if len(non_benchmark_analysis) > 1:

      # calculate sum and mean
      analysis_mean = non_benchmark_analysis.mean()
      analysis_sum = non_benchmark_analysis.sum()
      
      # calculate sum of the whole portfilo
      value_sum = self.record['portfolio'].copy()
      value_sum['rate'] = value_sum['value'].pct_change().fillna(0)
      total_ear = finance_util.cal_EAR(data=value_sum, dim='value', start=None, end=None)
      total_max_drawndown = finance_util.cal_max_drawndown(data=value_sum, dim='value')
      total_sharp_ratio = finance_util.cal_sharp_ratio(data=value_sum, price_dim='value', rate_dim='rate', start=None, end=None)

      # resort dataframe
      if self.benchmark is not None:
        right_order = [x for x in analysis.index if x != 'benchmark'] + ['benchmark'] 
        analysis = analysis.loc[right_order].copy()

      analysis = analysis.append(pd.DataFrame({'start_date': '', 'end_date': '', 'start_money': analysis_mean['start_money'], 'end_money':analysis_mean['end_money'], 'EAR':total_ear, 'sharp_ratio':total_sharp_ratio, 'max_drawndown':total_max_drawndown}, index=['mean']))
      analysis = analysis.append(pd.DataFrame({'start_date': '', 'end_date': '', 'start_money': analysis_sum['start_money'], 'end_money':analysis_sum['end_money'], 'EAR':total_ear, 'sharp_ratio':total_sharp_ratio, 'max_drawndown':total_max_drawndown}, index=['total']))

    # post process
    analysis['profit'] = analysis['end_money'] - analysis['start_money']
    analysis['HPR'] = analysis['profit'] / analysis['start_money']
    analysis = analysis[['start_date', 'end_date', 'start_money', 'end_money', 'profit', 'HPR', 'EAR', 'sharp_ratio', 'max_drawndown']].round(2)
    
    return analysis
Пример #6
0
  def recalculate_data(self, sec_list, mode=None, start_date=None, end_date=None):

    # verify value of mode
    if mode not in ['trend', 'signal', None]:
      print(f'Unknown mode: {mode}')
      return None

    # copy sec_data, ta_data
    sec_data = self.data['sec_data'].copy()
    ta_data = self.data['ta_data'].copy()

    # set start_date/end_date for recalculation
    start_date = self.start_date if start_date is None else start_date
    end_date = self.end_date if end_date is None else end_date
    global_min_date = None
    for k in sec_data.keys():
      symbol = k.split('_')[0]
      min_date = sec_data[k][start_date:].index.min()
      global_min_date = min_date if global_min_date is None else min(min_date, global_min_date)
    start_date = util.time_2_string(min_date)
    
    # set recalculate mode for each symbol
    cut_data = []
    recalculate_trend = []
    recalculate_signal = []
    for symbol in self.record.keys():

      # skip symbols which not in sec_list
      if symbol not in sec_list:
        # print(f'{symbol} not in sec_list')
        continue

      # get data and its range
      tmp_data = self.record[symbol]
      min_idx = util.time_2_string(tmp_data.index.min())
      max_idx = util.time_2_string(tmp_data.index.max())

      # for symbols which ta_data range covers start_date~end_date, process according to mode
      if (min_idx <= start_date) and (max_idx >= end_date):
        if mode is None:
          cut_data.append(symbol)
        elif mode == 'signal':
          recalculate_signal.append(symbol)
        elif mode == 'trend':
          recalculate_trend.append(symbol)
        else:
          print(f'Unknown mode: {mode}')
          continue

      # for symbols which ta_data range not covers start_date~end_date, recalculate from trend
      else:
        recalculate_trend.append(symbol)

    # for symbols just need to be cutted
    cut_data = list(set(cut_data))
    for symbol in cut_data:
      self.record[symbol] = self.record[symbol][start_date:end_date].copy()

    # for symbols need to recalculate signals
    recalculate_signal = list(set(recalculate_signal))
    for symbol in recalculate_signal:
      self.record[symbol] = ta_util.calculate_ta_signal(df=self.record[symbol])[start_date:end_date]

    # for symbols need to recalculate trend and signal
    recalculate_trend += [x for x in sec_list if x not in self.record.keys()]
    recalculate_trend = list(set(recalculate_trend))
    # read raw data for symbol that not in sec_data yet
    for symbol in  recalculate_trend:
      if f'{symbol}_day' not in sec_data.keys():
        print(f'Simulator does not have raw data for {symbol}, not able to recalculate trend')

    for symbol_interval in sec_data.keys():
      symbol, interval = symbol_interval.split('_')
      if symbol in recalculate_trend:
        if len(sec_data[symbol_interval][start_date:end_date]) > 0:
          self.record[symbol] = ta_util.calculation(df=sec_data[symbol_interval][start_date:end_date], symbol=symbol)
        else:
          print(f'{symbol} has no data, remove it from record')
          self.record.pop(symbol)
      else:
        continue

    # reset record
    self.init_record(load_local_data=False)
Пример #7
0
def download_stock_data_from_tiger(sec_code,
                                   time_col='time',
                                   quote_client=None,
                                   download_limit=1200,
                                   start_date=None,
                                   end_date=None,
                                   file_path='drive/My Drive/stock_data_us/',
                                   file_format='.csv',
                                   is_return=False,
                                   is_print=True):

    # 构建股票数据文件名
    filename = file_path + sec_code + file_format

    # 下载开始
    stage = 'downloading_started'
    try:
        # 查看是否已存在下载好的文件, 若有则读取, 若没有则初始化
        stage = 'loading_existed_data'
        data = pd.DataFrame()
        if os.path.exists(filename):
            data = read_stock_data(sec_code,
                                   file_path=file_path,
                                   file_format=file_format,
                                   time_col='Date')

        # 记录原始数据记录数, 更新下载起始日期
        init_len = len(data)
        if init_len > 0:
            start_date = util.time_2_string(data.index.max(),
                                            date_format='%Y-%m-%d')

        # 从老虎API下载数据
        stage = 'downloading_new_data'

        # 将开始结束时间转化为时间戳
        if start_date is not None:
            begin_time = round(
                time.mktime(util.string_2_time(start_date).timetuple()) * 1000)
        else:
            begin_time = 0
        if end_date is not None:
            end_time = round(
                time.mktime(util.string_2_time(end_date).timetuple()) * 1000)
        else:
            end_time = round(time.time() * 1000)

        # 开始下载数据
        tmp_len = download_limit
        new_data = pd.DataFrame()
        while tmp_len >= download_limit:
            tmp_data = quote_client.get_bars([sec_code],
                                             begin_time=begin_time,
                                             end_time=end_time,
                                             limit=download_limit)
            tmp_len = len(tmp_data)
            new_data = tmp_data.append(new_data)
            end_time = int(tmp_data.time.min())

        # 处理下载的数据
        stage = 'processing_new_data'
        if len(new_data) > 0:
            new_data.drop('symbol', axis=1, inplace=True)
            new_data[time_col] = new_data[time_col].apply(
                lambda x: util.timestamp_2_time(x).date())
            new_data.rename(columns={
                'open': 'Open',
                'high': 'High',
                'low': 'Low',
                'close': 'Close',
                'volume': 'Volume',
                'time': 'Date'
            },
                            inplace=True)
            new_data['Adj Close'] = new_data['Close']
            time_col = 'Date'
            new_data = util.df_2_timeseries(df=new_data, time_col=time_col)

            # 附上已有数据
            data = data.append(new_data, sort=False)

            # 去重,保存数据
            stage = 'saving_data'
            data = data.reset_index().drop_duplicates(subset=time_col,
                                                      keep='last')
            data.sort_values(by=time_col, )
            data.to_csv(filename, index=False)

        # 对比记录数量变化
        if is_print:
            final_len = len(data)
            diff_len = final_len - init_len
            print(
                '[From Tiger]%(sec_code)s: %(first_date)s - %(latest_date)s, 新增记录 %(diff_len)s/%(final_len)s, '
                % dict(diff_len=diff_len,
                       final_len=final_len,
                       first_date=data[time_col].min().date(),
                       latest_date=data[time_col].max().date(),
                       sec_code=sec_code))

    except Exception as e:
        print(sec_code, stage, e)

    # 返回数据
    if is_return:
        data = util.df_2_timeseries(data, time_col=time_col)
        return data
Пример #8
0
def download_stock_data_from_yahoo(sec_code,
                                   time_col='Date',
                                   start_date=None,
                                   end_date=None,
                                   file_path='drive/My Drive/stock_data_us/',
                                   file_format='.csv',
                                   is_return=False,
                                   is_print=True):

    # 构建股票数据文件名
    filename = file_path + sec_code + file_format

    # 下载开始
    stage = 'downloading_started'
    try:
        # 查看是否已存在下载好的文件, 若有则读取, 若没有则初始化
        stage = 'loading_existed_data'
        data = pd.DataFrame()
        if os.path.exists(filename):
            data = read_stock_data(sec_code,
                                   file_path=file_path,
                                   file_format=file_format,
                                   time_col=time_col)

        # 记录原始数据记录数, 更新下载的起始日期
        init_len = len(data)
        if init_len > 0:
            start_date = util.time_2_string(data.index.max(),
                                            date_format='%Y-%m-%d')

        # 下载更新新下载的数据并保存
        stage = 'appending_new_data'
        tmp_data = web.DataReader(sec_code,
                                  'yahoo',
                                  start=start_date,
                                  end=end_date)
        if len(tmp_data) > 0:
            data = data.append(tmp_data, sort=False)

            # 保存数据
            stage = 'saving_data'
            data = data.reset_index().drop_duplicates(subset=time_col,
                                                      keep='last')
            data.to_csv(filename, index=False)

        # 对比记录数量变化
        if is_print:
            final_len = len(data)
            diff_len = final_len - init_len
            print(
                '%(sec_code)s: %(first_date)s - %(latest_date)s, 新增记录 %(diff_len)s/%(final_len)s, '
                % dict(diff_len=diff_len,
                       final_len=final_len,
                       first_date=data[time_col].min().date(),
                       latest_date=data[time_col].max().date(),
                       sec_code=sec_code))
    except Exception as e:
        print(sec_code, stage, e)

    # 返回数据
    if is_return:
        data = util.df_2_timeseries(data, time_col=time_col)
        return data