Exemplo n.º 1
0
 def info(self):
     '''
     凡人,休得僭越!
     '''
     sprint('Getting industry analysis data...')
     industry_info = []
     growth_info = []
     valuation_info = []
     dupont_info = []
     market_size = []
     i = 0
     for data in self.get_data():
         industry_info.append({self.names[i]: data['hyzx']})  # 行业资讯
         growth_info.append(data['czxbj']['data'])  # 成长性比较
         valuation_info.append({self.names[i]: data['gzbj']['data']})  # 估值
         dupont_info.append({self.names[i]: data['dbfxbj']['data']})  # 杜邦
         market_size.append(
             {self.names[i]+'——'+'按总市值排名': data['gsgmzsz']})  # 总市值
         market_size.append(
             {self.names[i]+'——'+'按流通市值排名': data['gsgmltsz']})  # 流通市值
         market_size.append(
             {self.names[i]+'——'+'按营业收入排名': data['gsgmyysr']})  # 营业收入
         market_size.append(
             {self.names[i]+'——'+'按净利润排名': data['gsgmjlr']})  # 净利润
         i += 1
     return {
         'industry_info': industry_info,
         'growth_info': growth_info,
         'valuation_info': valuation_info,
         'dupont_info': dupont_info,
         'market_size': market_size,
     }
Exemplo n.º 2
0
 def __init__(
     self,
     industry='银行',
     compare_stocks=[
         '中证银行',
         '沪深300指数',
     ],
     start_date='2019-01-01',
     end_date='2020-03-01',
 ):
     sprint('Please make sure your industry is present in the market!')
     stock_industry = ConstituentStocks().stock_industry()
     self.start_date = start_date
     self.end_date = end_date
     self.names = stock_industry[stock_industry['industry'] ==
                                 industry]['code_name'][0:2]
     self.compare_stocks = compare_stocks
     sprint('Initializing...')
     global StockData
     # stock_data = StockData(names=self.names, start_date=self.start_date,
     #                        end_date=self.end_date)
     # self.stocks_valuation = stock_data.stocks_valuation()[['name', 'date', 'close', 'peTTM']]
     # self.dates = self.stocks_valuation.date.unique()
     compare_stocks_data = StockData(names=self.compare_stocks,
                                     start_date=self.start_date,
                                     end_date=self.end_date)
     self.compare_stocks_data = compare_stocks_data.stocks_data()
Exemplo n.º 3
0
 def __init__(self,
              names=['比亚迪', '阳光电源', '璞泰来', '紫光国微', '盛新锂能'],
              start_date='2021-05-01',
              end_date='2021-11-01',
              frequency='d',
              rfr=0.023467,
              funds=10000000,
              path='.\\Markovitz cache\\'):
     self.names = names
     self.lens = len(names)
     self.start_date = start_date
     self.end_date = end_date
     self.frequency = frequency
     self.rfr = (rfr*100) / \
         {'d': 365, 'w': 52, 'm': 30}[frequency]
     self.funds = funds
     self.path = path
     if self.path:
         makedir(self.path, '')
     sprint('Initializing...')
     if not self.path:
         sd = StockData(names=self.names,
                        start_date=self.start_date,
                        end_date=self.end_date,
                        frequency=self.frequency)
         self.datas = sd.stocks_data()
     else:
         try:
             self.datas = pd.read_csv(
                 f'{self.path}\\stock data\\stocks_data.csv')
         except:
             sd = StockData(names=self.names,
                            start_date=self.start_date,
                            end_date=self.end_date,
                            frequency=self.frequency,
                            path=self.path)
             self.datas = sd.stocks_data()
     self.datas.index = self.datas['name']
     self.data = self.datas.reset_index(drop=True)
     self.date = list(map(lambda x: str(x)[:10], self.data.date.unique()))
     self.first_date = self.date[0]
     self.last_date = self.date[-1]
     # 第一天开盘价
     self.first_price = self.data[self.data.date ==
                                  self.data.date.unique()[0]][[
                                      'open', 'name'
                                  ]].set_index('name').to_dict()['open']
     # 最后一天收盘价
     self.last_price = self.data[self.data.date ==
                                 self.data.date.unique()[-1]][[
                                     'close', 'name'
                                 ]].set_index('name').to_dict()['close']
     # 每只股票最大手数
     self.max_shares_dict = {
         name: math.floor(self.funds / (shares * 100))
         for name, shares in self.last_price.items()
     }
Exemplo n.º 4
0
def GetGoodStock(page=5):
    sprint('Getting data from http://fund.eastmoney.com/data/rankhandler.aspx ...')
    url = "http://fund.eastmoney.com/data/rankhandler.aspx"
    headers = {
        "Host": "fund.eastmoney.com",
        "Referer": "http://fund.eastmoney.com/data/fundranking.html",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36 Edg/88.0.705.63"
    }
    urls = []

    def get_urls(page):
        params = {
            "op": "ph",
            "sc": "6yzf",
            "sd": f'{time.strftime("%Y-%m-%d", time.localtime())}',
            "ed": f'{time.strftime("%Y-%m-%d", time.localtime())}',
            "pi": str(page),
            "dx": "1",
        }
        response = requests.get(url, headers=headers, params=params)
        response.encoding = response.apparent_encoding
        data = re.findall('var rankData = {datas:(.*),allRe', response.text)[0]
        data = eval(data)
        list = ['http://fund.eastmoney.com/' +
                re.findall(r'(\d*),', i)[0]+'.html' for i in data]
        for i in list:
            urls.append(i)
    for i in range(1, page+1):
        get_urls(i)

    def get_stock(url):
        df = pd.read_html(url)
        return df[5][['股票名称', '持仓占比']]

    stocks = []

    def main(url):
        stocks.append(get_stock(url))

    with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
        for url in urls:
            executor.submit(main, url)

    stock = pd.concat(stocks)
    stock['持仓占比'] = stock['持仓占比'].map(lambda x: x.replace('%', ''))
    stock = stock.replace('暂无数据', 0)
    stock['持仓占比'] = stock['持仓占比'].astype('float')
    group = stock.groupby('股票名称')
    df1 = group.mean()
    df2 = group.count()
    df1 = df1.rename(columns={'持仓占比': '平均持仓占比'})
    df2 = df2.rename(columns={'持仓占比': '出现次数'})
    df = pd.merge(df1, df2, how='outer', on='股票名称')
    df = df.sort_values(by='出现次数', ascending=False)
    return df
Exemplo n.º 5
0
    def __init__(self,
                 names=['贵州茅台', '隆基股份', '五粮液'],
                 weights=False,
                 start_date='2021-05-01',
                 end_date='2021-11-01',
                 frequency='d',
                 rfr=0.023467,
                 market_index='沪深300指数',
                 path='.\\Port cache\\'):
        self.names = names
        self.lens = len(names)
        self.start_date = start_date
        self.end_date = end_date
        self.frequency = frequency
        self.rfr = rfr
        self.market_index = market_index
        self.path = path
        sprint('Initializing...')
        if not self.path:
            sd = StockData(names=self.names + [market_index],
                           start_date=self.start_date,
                           end_date=self.end_date,
                           frequency=self.frequency,
                           path=self.path)
            self.datas = sd.stocks_data()
        else:
            try:
                self.datas = pd.read_csv(
                    f'{self.path}\\stock data\\stocks_data.csv')
            except:
                sd = StockData(names=self.names + [market_index],
                               start_date=self.start_date,
                               end_date=self.end_date,
                               frequency=self.frequency,
                               path=self.path)
                self.datas = sd.stocks_data()

        self.datas.index = self.datas['name']
        self.data = self.datas.loc[self.names].reset_index(drop=True)
        self.Rm_data = self.datas.loc[self.market_index].reset_index(drop=True)
        self.date = list(map(lambda x: str(x)[:10], self.datas.date.unique()))
        if not weights:
            self.weights_dict = self.optimization()['weights']
        elif isinstance(weights, dict):
            if list(weights.keys()) != self.names:
                raise ValueError('参数weights的keys必须与names相同!')
            elif np.array(weights.values()).sum() != 1:
                weights = dict(
                    zip(self.names, [
                        i / np.sum(list(weights.values()))
                        for i in weights.values()
                    ]))
            self.weights_dict = weights
        else:
            raise ValueError('参数weights必须为dict!')
Exemplo n.º 6
0
 def boundary_scatter_data(self, number=500):
     '''
     边界散点数据,默认生成500个
     '''
     if self.path:
         try:
             df_scatter = pd.read_csv(
                 f'{self.path}\\scatter data\\scatter_data.csv',
                 index=False)
         except:
             df_scatter = self.scatter_data()
     else:
         df_scatter = self.scatter_data()
     data_dict = self.calculate()
     data_mean = data_dict['mean']
     data_cov = data_dict['cov']
     scatter_list = []
     sprint('Searching for boundary scatter...')
     for i in trange(number):
         random_rate = random.uniform(df_scatter.rate.min(),
                                      df_scatter.rate.max())
         constraints = ({
             'type': 'eq',
             'fun': lambda weights: weights.sum() - 1
         }, {
             'type':
             'eq',
             'fun':
             lambda weights: data_mean.dot(weights.T)['pctChg'] -
             random_rate
         })
         opts = sco.minimize(
             fun=lambda weights: weights.dot(data_cov).dot(weights.T),
             x0=np.ones(self.lens) / self.lens,
             bounds=tuple((0, 1) for x in range(self.lens)),
             constraints=constraints)
         scatter_list.append([opts.x, np.sqrt(opts.fun), random_rate])
     df_boundary_scatter = pd.DataFrame(scatter_list,
                                        columns=['weights', 'risk', 'rate'])
     df_boundary_scatter['sharpe'] = (df_boundary_scatter.rate -
                                      self.rfr) / df_boundary_scatter.risk
     df_boundary_scatter = df_boundary_scatter.sort_values(by='sharpe',
                                                           ascending=False)
     if self.path:
         makedir(self.path, 'scatter data')
         df_boundary_scatter.to_csv(
             f'{self.path}\\scatter data\\boundary_scatter_data.csv')
     return df_boundary_scatter
Exemplo n.º 7
0
 def stocks_data(self):
     '''
     Return a DataFrame containing all the stocks data
     date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,peTTM,psTTM,pcfNcfTTM,pbMRQ,isST
     戳下面这个链接
     http://baostock.com/baostock/index.php/A股K线数据
     '''
     if not self.path:
         stocks_info = self.stocks_info()
     else:
         try:
             stocks_info = pd.read_csv(
                 f'{self.path}\\stock data\\stocks_info.csv').set_index('name').T.to_dict()
         except:
             stocks_info = self.stocks_info()
     df_list = []
     sprint('Loading stocks data...')
     for name in tqdm(self.names):
         code = stocks_info[name]['code']
         if stocks_info[name]['ipoDate'] > self.start_date:
             sprint(
                 f"{name}'s ipo date is {stocks_info[name]['ipoDate']}, which is after {self.start_date}.")
         if self.frequency == 'd':
             rs = bs.query_history_k_data_plus(code,
                                               'date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,peTTM,psTTM,pcfNcfTTM,pbMRQ,isST',
                                               start_date=self.start_date, end_date=self.end_date,
                                               frequency='d', adjustflag=self.adjustflag)
         elif self.frequency == 'w':
             rs = bs.query_history_k_data_plus(code,
                                               'date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg',
                                               start_date=self.start_date, end_date=self.end_date,
                                               frequency='w', adjustflag=self.adjustflag)
         elif self.frequency == 'm':
             rs = bs.query_history_k_data_plus(code,
                                               'date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg',
                                               start_date=self.start_date, end_date=self.end_date,
                                               frequency='m', adjustflag=self.adjustflag)
         df = get_data(rs)
         df['name'] = name
         df_list.append(df)
     df = pd.concat(df_list).apply(pd.to_numeric, errors='ignore')
     df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')
     if self.path:
         df.to_csv(f'{self.path}\\stock data\\stocks_data.csv', index=False)
     return df
Exemplo n.º 8
0
 def request(self, urls):
     '''
     大胆!不准看!
     '''
     sprint('Getting data...')
     result_list = []
     n = 0
     for url in tqdm(urls):
         response = requests.get(url, timeout=100)
         response.encoding = response.apparent_encoding
         data = response.text.replace(
             '\r', '').replace('\t', '').split('\n')
         df = pd.DataFrame(
             [i.split(',') for i in data if i != '']).set_index(0).T
         df['名称'] = self.names[n]
         n += 1
         result_list.append(df)
     df = pd.concat(result_list)
     return df
Exemplo n.º 9
0
 def tree(self):
     '''
     分枝定界
     返回最优整数解和sharpe
     '''
     # 初始整数组合
     exam_tree = pd.DataFrame()
     exam_tree['weights'] = self.port(self.init_tree(), near=1)
     max_sharpe = -9999999
     sprint('Searching for the integer shares')
     n = 0
     flag = False
     near = 1
     while True:
         n += 1
         tree_list = []
         print(f'第{n}次迭代:')
         for i in tqdm(list(exam_tree.itertuples())):
             examed_sharpe = self.exam(i.weights)
             if examed_sharpe != 0:
                 tree_list.append([i.weights, examed_sharpe])
         df_exam = pd.DataFrame(tree_list,
                                columns=['shares', 'sharpe'
                                         ]).sort_values(by='sharpe',
                                                        ascending=False)
         # 引入过滤条件减少计算量
         df_exam = df_exam[df_exam['sharpe'] >= max_sharpe]
         if len(df_exam) == 1:
             return df_exam.iloc[0].to_dict()
         # 本次迭代最大sharpe
         max_sharpe = df_exam['sharpe'].iloc[0]
         print(
             f'max_sharpe:{max_sharpe}\nshares:{df_exam["shares"].iloc[0]}\n'
             + '-' * 100)
         # 寻找下一个临近点
         if flag:
             near = 1
             flag = False
         elif n > 1:
             near = 2
             flag = True
         exam_tree = pd.DataFrame()
         exam_tree['weights'] = self.port(df_exam, near=near)
Exemplo n.º 10
0
    def cml(self, show=True):
        '''
        资本市场线 & 有效边界
        '''
        if self.path:
            try:
                df_scatter = pd.read_csv(
                    f'{self.path}\\scatter data\\scatter_data.csv')
                df_boundary_scatter = pd.read_csv(
                    f'{self.path}\\scatter data\\boundary_scatter_data.csv')
            except:
                df_scatter = self.scatter_data()
                df_boundary_scatter = self.boundary_scatter_data()
            df_scatter['boundary'] = False
            df_boundary_scatter['boundary'] = True
            pd.concat([
                df_scatter, df_boundary_scatter
            ]).to_csv(f'{self.path}\\scatter data\\all_scatter_data.csv')
        else:
            df_scatter = self.scatter_data()
            df_boundary_scatter = self.boundary_scatter_data()

        max_sharpe = self.optimization()['sharpe']
        sprint(f'max sharpe: {max_sharpe}')
        plt.cla()
        plt.style.use('seaborn-paper')
        plt.scatter(df_scatter.risk, df_scatter.rate, s=10, marker=".", c='b')
        plt.scatter(df_boundary_scatter.risk,
                    df_boundary_scatter.rate,
                    s=10,
                    marker=".",
                    c='r')
        plt.axline(xy1=(0, self.rfr), slope=max_sharpe, c='m')
        plt.xlim(df_scatter.risk.min() * 0.8, df_scatter.risk.max() * 1.2)
        plt.ylim(df_scatter.rate.min() * 0.8, df_scatter.rate.max() * 1.2)
        plt.xlabel('Risk')
        plt.ylabel('Yield')
        if show:
            plt.show()
        else:
            plt.savefig(f'{self.path}\\cml.svg', format='svg')
        return pd.concat([df_scatter, df_boundary_scatter])
Exemplo n.º 11
0
    def __init__(self,
                 names=['比亚迪', '阳光电源', '璞泰来', '紫光国微', '盛新锂能'],
                 start_date='2021-05-01',
                 end_date='2021-11-01',
                 frequency='w',
                 rfr=0.023467,
                 market_index='沪深300指数',
                 path='.\\CAPM cache\\'):
        self.names = names
        self.lens = len(names)
        self.start_date = start_date
        self.end_date = end_date
        self.frequency = frequency
        self.rfr = (rfr*100) / \
            {'d': 365, 'w': 52, 'm': 30}[frequency]
        self.market_index = market_index
        self.path = path
        if self.path:
            makedir(self.path, '')
        sprint('Initializing...')
        if not self.path:
            sd = StockData(names=self.names + [market_index],
                           start_date=self.start_date,
                           end_date=self.end_date,
                           frequency=self.frequency)
            self.datas = sd.stocks_data()
        else:
            try:
                self.datas = pd.read_csv(
                    f'{self.path}\\stock data\\stocks_data.csv')
            except:
                sd = StockData(names=self.names + [market_index],
                               start_date=self.start_date,
                               end_date=self.end_date,
                               frequency=self.frequency,
                               path=self.path)
                self.datas = sd.stocks_data()

        self.datas.index = self.datas['name']
        self.data = self.datas.loc[self.names].reset_index(drop=True)
        self.Rm_data = self.datas.loc[self.market_index].reset_index(drop=True)
Exemplo n.º 12
0
 def weight_tests(self, number=5):
     '''
     构建所有股票个数为number的组合
     '''
     lists = []
     for port in tqdm(list(combinations(self.names, number))):
         self.names = list(port)
         self.data = self.datas.loc[self.names].reset_index(drop=True)
         self.lens = len(self.names)
         test_dict = self.optimization()
         weight_array = np.array(list(test_dict['weights'].values()))
         test_dict['std'] = np.std(weight_array + 1)
         test_dict['min'] = np.min(weight_array)
         test_dict['max'] = np.max(weight_array)
         lists.append(test_dict)
         if test_dict['min'] > 0.02:
             sprint(test_dict)
     df_test = pd.DataFrame(
         lists, columns=['weights', 'sharpe', 'std', 'min', 'max'])
     if self.path:
         df_test.to_csv(f'{self.path}\\weight_test.csv', index=False)
     return df_test
Exemplo n.º 13
0
 def stocks_info(self):
     '''
     Return a dict containing stock names, codes and ipoDate
     {
         '贵州茅台': {'code': 'sh.600519', 'ipoDate': '2001-08-27'},
         '隆基股份': {'code': 'sh.601012', 'ipoDate': '2012-04-11'},
         ...
     }
     '''
     info = {}
     sprint('Loading stocks information...')
     for name in tqdm(self.names):
         rs = bs.query_stock_basic(code_name=name)
         stock_info = get_data(rs)
         info[name] = {'code': stock_info['code'][0],
                       'ipoDate': stock_info['ipoDate'][0]}
     if self.path:
         makedir(self.path, 'stock data')
         df_info = pd.DataFrame(info).T
         df_info['name'] = df_info.index
         df_info.to_csv(
             f'{self.path}\\stock data\\stocks_info.csv', index=False)
     return info
Exemplo n.º 14
0
 def scl(self, name='', show=True):
     '''
     给定资产的证券特征线
     '''
     if name not in self.names:
         sprint(f'name参数值未给定,或参数值{name}不在给定风险资产范围内!已重新随机选择一种给定风险资产!',
                color='red')
         name = random.choice(self.names)
     Ri = self.data[self.data.name == name]['pctChg']
     Rm = self.Rm_data['pctChg']
     ls_dict = self.ls_beta(Ri)
     plt.cla()
     plt.axline(xy1=(0, ls_dict['alpha_ols']),
                slope=ls_dict['beta_ols'],
                c='m')
     plt.scatter(Ri, Rm, s=10, marker=".", c='b')
     plt.xlabel(f'{name}收益率(%)')
     plt.ylabel(f'{self.market_index}收益率(%)')
     if show:
         plt.show()
     else:
         makedir(self.path, 'scl')
         plt.savefig(f'{self.path}\\scl\\{name}.svg', format='svg')
Exemplo n.º 15
0
 def save(self, path, func_name, dic):
     writer = pd.ExcelWriter(f'{path}/{func_name}.xlsx')
     for name, data in dic.items():
         data.to_excel(writer, sheet_name=name)
     writer.save()
     sprint(f'Saved in {path}.')