def info(self): ''' 凡人,休得僭越! ''' sprint('Getting industry analysis data...') industry_info = [] growth_info = [] valuation_info = [] dupont_info = [] market_size = [] i = 0 for data in self.get_data(): industry_info.append({self.names[i]: data['hyzx']}) # 行业资讯 growth_info.append(data['czxbj']['data']) # 成长性比较 valuation_info.append({self.names[i]: data['gzbj']['data']}) # 估值 dupont_info.append({self.names[i]: data['dbfxbj']['data']}) # 杜邦 market_size.append( {self.names[i]+'——'+'按总市值排名': data['gsgmzsz']}) # 总市值 market_size.append( {self.names[i]+'——'+'按流通市值排名': data['gsgmltsz']}) # 流通市值 market_size.append( {self.names[i]+'——'+'按营业收入排名': data['gsgmyysr']}) # 营业收入 market_size.append( {self.names[i]+'——'+'按净利润排名': data['gsgmjlr']}) # 净利润 i += 1 return { 'industry_info': industry_info, 'growth_info': growth_info, 'valuation_info': valuation_info, 'dupont_info': dupont_info, 'market_size': market_size, }
def __init__( self, industry='银行', compare_stocks=[ '中证银行', '沪深300指数', ], start_date='2019-01-01', end_date='2020-03-01', ): sprint('Please make sure your industry is present in the market!') stock_industry = ConstituentStocks().stock_industry() self.start_date = start_date self.end_date = end_date self.names = stock_industry[stock_industry['industry'] == industry]['code_name'][0:2] self.compare_stocks = compare_stocks sprint('Initializing...') global StockData # stock_data = StockData(names=self.names, start_date=self.start_date, # end_date=self.end_date) # self.stocks_valuation = stock_data.stocks_valuation()[['name', 'date', 'close', 'peTTM']] # self.dates = self.stocks_valuation.date.unique() compare_stocks_data = StockData(names=self.compare_stocks, start_date=self.start_date, end_date=self.end_date) self.compare_stocks_data = compare_stocks_data.stocks_data()
def __init__(self, names=['比亚迪', '阳光电源', '璞泰来', '紫光国微', '盛新锂能'], start_date='2021-05-01', end_date='2021-11-01', frequency='d', rfr=0.023467, funds=10000000, path='.\\Markovitz cache\\'): self.names = names self.lens = len(names) self.start_date = start_date self.end_date = end_date self.frequency = frequency self.rfr = (rfr*100) / \ {'d': 365, 'w': 52, 'm': 30}[frequency] self.funds = funds self.path = path if self.path: makedir(self.path, '') sprint('Initializing...') if not self.path: sd = StockData(names=self.names, start_date=self.start_date, end_date=self.end_date, frequency=self.frequency) self.datas = sd.stocks_data() else: try: self.datas = pd.read_csv( f'{self.path}\\stock data\\stocks_data.csv') except: sd = StockData(names=self.names, start_date=self.start_date, end_date=self.end_date, frequency=self.frequency, path=self.path) self.datas = sd.stocks_data() self.datas.index = self.datas['name'] self.data = self.datas.reset_index(drop=True) self.date = list(map(lambda x: str(x)[:10], self.data.date.unique())) self.first_date = self.date[0] self.last_date = self.date[-1] # 第一天开盘价 self.first_price = self.data[self.data.date == self.data.date.unique()[0]][[ 'open', 'name' ]].set_index('name').to_dict()['open'] # 最后一天收盘价 self.last_price = self.data[self.data.date == self.data.date.unique()[-1]][[ 'close', 'name' ]].set_index('name').to_dict()['close'] # 每只股票最大手数 self.max_shares_dict = { name: math.floor(self.funds / (shares * 100)) for name, shares in self.last_price.items() }
def GetGoodStock(page=5): sprint('Getting data from http://fund.eastmoney.com/data/rankhandler.aspx ...') url = "http://fund.eastmoney.com/data/rankhandler.aspx" headers = { "Host": "fund.eastmoney.com", "Referer": "http://fund.eastmoney.com/data/fundranking.html", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36 Edg/88.0.705.63" } urls = [] def get_urls(page): params = { "op": "ph", "sc": "6yzf", "sd": f'{time.strftime("%Y-%m-%d", time.localtime())}', "ed": f'{time.strftime("%Y-%m-%d", time.localtime())}', "pi": str(page), "dx": "1", } response = requests.get(url, headers=headers, params=params) response.encoding = response.apparent_encoding data = re.findall('var rankData = {datas:(.*),allRe', response.text)[0] data = eval(data) list = ['http://fund.eastmoney.com/' + re.findall(r'(\d*),', i)[0]+'.html' for i in data] for i in list: urls.append(i) for i in range(1, page+1): get_urls(i) def get_stock(url): df = pd.read_html(url) return df[5][['股票名称', '持仓占比']] stocks = [] def main(url): stocks.append(get_stock(url)) with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: for url in urls: executor.submit(main, url) stock = pd.concat(stocks) stock['持仓占比'] = stock['持仓占比'].map(lambda x: x.replace('%', '')) stock = stock.replace('暂无数据', 0) stock['持仓占比'] = stock['持仓占比'].astype('float') group = stock.groupby('股票名称') df1 = group.mean() df2 = group.count() df1 = df1.rename(columns={'持仓占比': '平均持仓占比'}) df2 = df2.rename(columns={'持仓占比': '出现次数'}) df = pd.merge(df1, df2, how='outer', on='股票名称') df = df.sort_values(by='出现次数', ascending=False) return df
def __init__(self, names=['贵州茅台', '隆基股份', '五粮液'], weights=False, start_date='2021-05-01', end_date='2021-11-01', frequency='d', rfr=0.023467, market_index='沪深300指数', path='.\\Port cache\\'): self.names = names self.lens = len(names) self.start_date = start_date self.end_date = end_date self.frequency = frequency self.rfr = rfr self.market_index = market_index self.path = path sprint('Initializing...') if not self.path: sd = StockData(names=self.names + [market_index], start_date=self.start_date, end_date=self.end_date, frequency=self.frequency, path=self.path) self.datas = sd.stocks_data() else: try: self.datas = pd.read_csv( f'{self.path}\\stock data\\stocks_data.csv') except: sd = StockData(names=self.names + [market_index], start_date=self.start_date, end_date=self.end_date, frequency=self.frequency, path=self.path) self.datas = sd.stocks_data() self.datas.index = self.datas['name'] self.data = self.datas.loc[self.names].reset_index(drop=True) self.Rm_data = self.datas.loc[self.market_index].reset_index(drop=True) self.date = list(map(lambda x: str(x)[:10], self.datas.date.unique())) if not weights: self.weights_dict = self.optimization()['weights'] elif isinstance(weights, dict): if list(weights.keys()) != self.names: raise ValueError('参数weights的keys必须与names相同!') elif np.array(weights.values()).sum() != 1: weights = dict( zip(self.names, [ i / np.sum(list(weights.values())) for i in weights.values() ])) self.weights_dict = weights else: raise ValueError('参数weights必须为dict!')
def boundary_scatter_data(self, number=500): ''' 边界散点数据,默认生成500个 ''' if self.path: try: df_scatter = pd.read_csv( f'{self.path}\\scatter data\\scatter_data.csv', index=False) except: df_scatter = self.scatter_data() else: df_scatter = self.scatter_data() data_dict = self.calculate() data_mean = data_dict['mean'] data_cov = data_dict['cov'] scatter_list = [] sprint('Searching for boundary scatter...') for i in trange(number): random_rate = random.uniform(df_scatter.rate.min(), df_scatter.rate.max()) constraints = ({ 'type': 'eq', 'fun': lambda weights: weights.sum() - 1 }, { 'type': 'eq', 'fun': lambda weights: data_mean.dot(weights.T)['pctChg'] - random_rate }) opts = sco.minimize( fun=lambda weights: weights.dot(data_cov).dot(weights.T), x0=np.ones(self.lens) / self.lens, bounds=tuple((0, 1) for x in range(self.lens)), constraints=constraints) scatter_list.append([opts.x, np.sqrt(opts.fun), random_rate]) df_boundary_scatter = pd.DataFrame(scatter_list, columns=['weights', 'risk', 'rate']) df_boundary_scatter['sharpe'] = (df_boundary_scatter.rate - self.rfr) / df_boundary_scatter.risk df_boundary_scatter = df_boundary_scatter.sort_values(by='sharpe', ascending=False) if self.path: makedir(self.path, 'scatter data') df_boundary_scatter.to_csv( f'{self.path}\\scatter data\\boundary_scatter_data.csv') return df_boundary_scatter
def stocks_data(self): ''' Return a DataFrame containing all the stocks data date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,peTTM,psTTM,pcfNcfTTM,pbMRQ,isST 戳下面这个链接 http://baostock.com/baostock/index.php/A股K线数据 ''' if not self.path: stocks_info = self.stocks_info() else: try: stocks_info = pd.read_csv( f'{self.path}\\stock data\\stocks_info.csv').set_index('name').T.to_dict() except: stocks_info = self.stocks_info() df_list = [] sprint('Loading stocks data...') for name in tqdm(self.names): code = stocks_info[name]['code'] if stocks_info[name]['ipoDate'] > self.start_date: sprint( f"{name}'s ipo date is {stocks_info[name]['ipoDate']}, which is after {self.start_date}.") if self.frequency == 'd': rs = bs.query_history_k_data_plus(code, 'date,code,open,high,low,close,preclose,volume,amount,adjustflag,turn,tradestatus,pctChg,peTTM,psTTM,pcfNcfTTM,pbMRQ,isST', start_date=self.start_date, end_date=self.end_date, frequency='d', adjustflag=self.adjustflag) elif self.frequency == 'w': rs = bs.query_history_k_data_plus(code, 'date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg', start_date=self.start_date, end_date=self.end_date, frequency='w', adjustflag=self.adjustflag) elif self.frequency == 'm': rs = bs.query_history_k_data_plus(code, 'date,code,open,high,low,close,volume,amount,adjustflag,turn,pctChg', start_date=self.start_date, end_date=self.end_date, frequency='m', adjustflag=self.adjustflag) df = get_data(rs) df['name'] = name df_list.append(df) df = pd.concat(df_list).apply(pd.to_numeric, errors='ignore') df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d') if self.path: df.to_csv(f'{self.path}\\stock data\\stocks_data.csv', index=False) return df
def request(self, urls): ''' 大胆!不准看! ''' sprint('Getting data...') result_list = [] n = 0 for url in tqdm(urls): response = requests.get(url, timeout=100) response.encoding = response.apparent_encoding data = response.text.replace( '\r', '').replace('\t', '').split('\n') df = pd.DataFrame( [i.split(',') for i in data if i != '']).set_index(0).T df['名称'] = self.names[n] n += 1 result_list.append(df) df = pd.concat(result_list) return df
def tree(self): ''' 分枝定界 返回最优整数解和sharpe ''' # 初始整数组合 exam_tree = pd.DataFrame() exam_tree['weights'] = self.port(self.init_tree(), near=1) max_sharpe = -9999999 sprint('Searching for the integer shares') n = 0 flag = False near = 1 while True: n += 1 tree_list = [] print(f'第{n}次迭代:') for i in tqdm(list(exam_tree.itertuples())): examed_sharpe = self.exam(i.weights) if examed_sharpe != 0: tree_list.append([i.weights, examed_sharpe]) df_exam = pd.DataFrame(tree_list, columns=['shares', 'sharpe' ]).sort_values(by='sharpe', ascending=False) # 引入过滤条件减少计算量 df_exam = df_exam[df_exam['sharpe'] >= max_sharpe] if len(df_exam) == 1: return df_exam.iloc[0].to_dict() # 本次迭代最大sharpe max_sharpe = df_exam['sharpe'].iloc[0] print( f'max_sharpe:{max_sharpe}\nshares:{df_exam["shares"].iloc[0]}\n' + '-' * 100) # 寻找下一个临近点 if flag: near = 1 flag = False elif n > 1: near = 2 flag = True exam_tree = pd.DataFrame() exam_tree['weights'] = self.port(df_exam, near=near)
def cml(self, show=True): ''' 资本市场线 & 有效边界 ''' if self.path: try: df_scatter = pd.read_csv( f'{self.path}\\scatter data\\scatter_data.csv') df_boundary_scatter = pd.read_csv( f'{self.path}\\scatter data\\boundary_scatter_data.csv') except: df_scatter = self.scatter_data() df_boundary_scatter = self.boundary_scatter_data() df_scatter['boundary'] = False df_boundary_scatter['boundary'] = True pd.concat([ df_scatter, df_boundary_scatter ]).to_csv(f'{self.path}\\scatter data\\all_scatter_data.csv') else: df_scatter = self.scatter_data() df_boundary_scatter = self.boundary_scatter_data() max_sharpe = self.optimization()['sharpe'] sprint(f'max sharpe: {max_sharpe}') plt.cla() plt.style.use('seaborn-paper') plt.scatter(df_scatter.risk, df_scatter.rate, s=10, marker=".", c='b') plt.scatter(df_boundary_scatter.risk, df_boundary_scatter.rate, s=10, marker=".", c='r') plt.axline(xy1=(0, self.rfr), slope=max_sharpe, c='m') plt.xlim(df_scatter.risk.min() * 0.8, df_scatter.risk.max() * 1.2) plt.ylim(df_scatter.rate.min() * 0.8, df_scatter.rate.max() * 1.2) plt.xlabel('Risk') plt.ylabel('Yield') if show: plt.show() else: plt.savefig(f'{self.path}\\cml.svg', format='svg') return pd.concat([df_scatter, df_boundary_scatter])
def __init__(self, names=['比亚迪', '阳光电源', '璞泰来', '紫光国微', '盛新锂能'], start_date='2021-05-01', end_date='2021-11-01', frequency='w', rfr=0.023467, market_index='沪深300指数', path='.\\CAPM cache\\'): self.names = names self.lens = len(names) self.start_date = start_date self.end_date = end_date self.frequency = frequency self.rfr = (rfr*100) / \ {'d': 365, 'w': 52, 'm': 30}[frequency] self.market_index = market_index self.path = path if self.path: makedir(self.path, '') sprint('Initializing...') if not self.path: sd = StockData(names=self.names + [market_index], start_date=self.start_date, end_date=self.end_date, frequency=self.frequency) self.datas = sd.stocks_data() else: try: self.datas = pd.read_csv( f'{self.path}\\stock data\\stocks_data.csv') except: sd = StockData(names=self.names + [market_index], start_date=self.start_date, end_date=self.end_date, frequency=self.frequency, path=self.path) self.datas = sd.stocks_data() self.datas.index = self.datas['name'] self.data = self.datas.loc[self.names].reset_index(drop=True) self.Rm_data = self.datas.loc[self.market_index].reset_index(drop=True)
def weight_tests(self, number=5): ''' 构建所有股票个数为number的组合 ''' lists = [] for port in tqdm(list(combinations(self.names, number))): self.names = list(port) self.data = self.datas.loc[self.names].reset_index(drop=True) self.lens = len(self.names) test_dict = self.optimization() weight_array = np.array(list(test_dict['weights'].values())) test_dict['std'] = np.std(weight_array + 1) test_dict['min'] = np.min(weight_array) test_dict['max'] = np.max(weight_array) lists.append(test_dict) if test_dict['min'] > 0.02: sprint(test_dict) df_test = pd.DataFrame( lists, columns=['weights', 'sharpe', 'std', 'min', 'max']) if self.path: df_test.to_csv(f'{self.path}\\weight_test.csv', index=False) return df_test
def stocks_info(self): ''' Return a dict containing stock names, codes and ipoDate { '贵州茅台': {'code': 'sh.600519', 'ipoDate': '2001-08-27'}, '隆基股份': {'code': 'sh.601012', 'ipoDate': '2012-04-11'}, ... } ''' info = {} sprint('Loading stocks information...') for name in tqdm(self.names): rs = bs.query_stock_basic(code_name=name) stock_info = get_data(rs) info[name] = {'code': stock_info['code'][0], 'ipoDate': stock_info['ipoDate'][0]} if self.path: makedir(self.path, 'stock data') df_info = pd.DataFrame(info).T df_info['name'] = df_info.index df_info.to_csv( f'{self.path}\\stock data\\stocks_info.csv', index=False) return info
def scl(self, name='', show=True): ''' 给定资产的证券特征线 ''' if name not in self.names: sprint(f'name参数值未给定,或参数值{name}不在给定风险资产范围内!已重新随机选择一种给定风险资产!', color='red') name = random.choice(self.names) Ri = self.data[self.data.name == name]['pctChg'] Rm = self.Rm_data['pctChg'] ls_dict = self.ls_beta(Ri) plt.cla() plt.axline(xy1=(0, ls_dict['alpha_ols']), slope=ls_dict['beta_ols'], c='m') plt.scatter(Ri, Rm, s=10, marker=".", c='b') plt.xlabel(f'{name}收益率(%)') plt.ylabel(f'{self.market_index}收益率(%)') if show: plt.show() else: makedir(self.path, 'scl') plt.savefig(f'{self.path}\\scl\\{name}.svg', format='svg')
def save(self, path, func_name, dic): writer = pd.ExcelWriter(f'{path}/{func_name}.xlsx') for name, data in dic.items(): data.to_excel(writer, sheet_name=name) writer.save() sprint(f'Saved in {path}.')