def output_high_score_funds(each_query=None, quarter_index=None): """ 输出高分基金 """ if each_query == None: each_query = FundQuery() if quarter_index == None: quarter_index = get_last_quarter_str() print("quarter_index", quarter_index) high_score_funds = each_query.select_high_score_funds( quarter_index=quarter_index) columns_bk = [ '代码', '名称', '季度', '总资产', '现任基金经理管理起始时间', '投资风格', '三月最大回撤', '六月最大回撤', '夏普比率', '阿尔法系数', '贝塔系数', 'R平方', '标准差', '风险系数', '两年风险评级', '三年风险评级', '五年风险评级', '五年晨星评级', '三年晨星评级', '股票仓位', '十大持股仓位' ] columns = [ '代码', '名称', '投资风格', '基金经理', '现任经理管理起始时间', '成立时间', '三年晨星评级', '五年晨星评级', '夏普比率', '股票仓位', '十大持股仓位', '两年风险评级', '三年风险评级', '五年风险评级', '阿尔法系数', '贝塔系数', '标准差', '总资产', '数据更新时间' ] df_high_score_funds = pd.DataFrame(high_score_funds, columns=columns) # pprint(df_high_score_funds) path = './outcome/数据整理/funds/high-score-funds.xlsx' update_xlsx_file(path, df_high_score_funds, quarter_index)
def acquire_fund_base(): lock = Lock() each_fund_query = FundQuery() each_fund_insert = FundInsert() record_total = each_fund_query.get_fund_count_from_snapshot_no_exist( ) # 获取记录条数 idWorker = IdWorker() print('record_total', record_total) error_funds = [] # 一些异常的基金详情页,如果发现记录该基金的code def crawlData(start, end): login_url = 'https://www.morningstar.cn/membership/signin.aspx' chrome_driver = login_morning_star(login_url, False) page_start = start page_limit = 10 # 遍历从基金列表的单支基金 while (page_start < end): results = each_fund_query.get_fund_from_snapshot_table_no_exist( page_start, page_limit) for record in results: each_fund = FundSpider(record[0], record[1], record[2], chrome_driver) # 从晨星网上更新信息 is_normal = each_fund.go_fund_url() if is_normal == False: lock.acquire() error_funds.append(each_fund.fund_code) lock.release() continue each_fund.get_fund_base_info() # 去掉没有成立时间的 if each_fund.found_date == '-': lock.acquire() error_funds.append(each_fund.fund_code) lock.release() continue # 拼接sql需要的数据 lock.acquire() snow_flake_id = idWorker.get_id() lock.release() base_dict = { 'id': snow_flake_id, 'fund_code': each_fund.fund_code, 'morning_star_code': each_fund.morning_star_code, 'fund_name': each_fund.fund_name, 'fund_cat': each_fund.fund_cat, 'company': each_fund.company, 'found_date': each_fund.found_date } each_fund_insert.insert_fund_base_info(base_dict) page_start = page_start + page_limit print('page_start', page_start) chrome_driver.close() bootstrap_thread(crawlData, record_total, 4) print('error_funds', error_funds)
def __init__(self): # 统计上一个季度 last_quarter_time = time.localtime(time.time() - 3 * 30 * 24 * 3600) time.strftime("%m-%d", last_quarter_time) year = time.strftime("%Y", last_quarter_time) date = time.strftime("%m-%d", last_quarter_time) index = get_season_index(date) quarter_index = year + '-Q' + str(index) self.quarter_index = quarter_index self.each_query = FundQuery()
def update_archive_status(self): fund_query = FundQuery() each_fund_update = FundUpdate() funds = fund_query.select_quarter_fund(0, 15000) print("funds's len", len(funds)) for fund_item in funds: fund_code = fund_item[0] fund_api = FundApier(fund_code, platform='zh_fund') fund_api.get_analyse_info_zh() buy_status = fund_api.buy_status if buy_status == '已清盘' or buy_status == '终止上市' : each_fund_update.update_archive_status(1, fund_code=fund_code) continue print('没有归档基金状态:', fund_code, fund_api.buy_status, fund_api.sell_status)
def update_fund_total_asset(self): fund_query = FundQuery() each_fund_update = FundUpdate() # 获取所有的A类基金 all_total_asset_is_null_results = fund_query.select_total_asset_is_null() for fund_item in all_total_asset_is_null_results: fund_code = fund_item[0] platform = 'ai_fund' end_date = '2021-06-11' each_fund = FundApier(fund_code, end_date=end_date, platform=platform) total_asset = each_fund.get_total_asset() # 如果在爱基金平台找不到,则到展恒基金找 if total_asset == None and platform == 'ai_fund': print("fund_code", fund_code) each_fund = FundApier(fund_code, end_date=end_date, platform='zh_fund') total_asset = each_fund.get_total_asset() if total_asset: each_fund_update.update_fund_total_asset(fund_code, total_asset)
class FundStatistic: def __init__(self): # 统计上一个季度 last_quarter_time = time.localtime(time.time() - 3 * 30 * 24 * 3600) time.strftime("%m-%d", last_quarter_time) year = time.strftime("%Y", last_quarter_time) date = time.strftime("%m-%d", last_quarter_time) index = get_season_index(date) quarter_index = year + '-Q' + str(index) self.quarter_index = quarter_index self.each_query = FundQuery() def all_stock_fund_count(self, *, quarter_index=None, filter_count=100): quarter_index = quarter_index if quarter_index else self.quarter_index results = self.each_query.select_top_10_stock(quarter_index) # pprint(results) code_dict = dict() for result in results: # print(result) for index in range(1, len(result), 2): code = result[index] name = result[index + 1] # 仅以股票名称为key,兼容港股,A股 # key = str(code) + '-' + str(name) key = str(name) if (key in code_dict and code != None): code_dict[key] = code_dict[key] + 1 else: code_dict[key] = 1 filer_dict = dict() for key, value in code_dict.items( ): # for (key,value) in girl_dict.items() 这样加上括号也可以 if value > filter_count and key != None: filer_dict[key] = value # print(key + ":" + str(value)) list = sorted(filer_dict.items(), key=lambda x: x[1], reverse=True) return list # 分组查询特定股票的每个季度基金持有总数 def item_stock_fund_count(self, stock_name): return self.each_query.select_special_stock_fund_count(stock_name)
class FundStatistic: def __init__(self): # 统计上一个季度 last_quarter_time = time.localtime(time.time() - 3 * 30 * 24 * 3600) # time.strftime("%m-%d", last_quarter_time) year = time.strftime("%Y", last_quarter_time) date = time.strftime("%m-%d", last_quarter_time) index = get_quarter_index(date) quarter_index = year + '-Q' + str(index) self.quarter_index = quarter_index self.each_query = FundQuery() self.stock_query = StockQuery() def all_stock_fund_count(self, *, quarter_index=None, fund_code_pool=None, filter_count=100): """查询某一个季度基金的十大持仓,并对持仓股票进行汇总统计,并根据filter_count进行过滤 Args: quarter_index (string, optional): [description]. Defaults to None.取self.quarter_index fund_code_pool (string[], optional): [description]. Defaults to None. 传入查询的基金池,为None默认查询全部 filter_count (int, optional): [description]. Defaults to 100. 过滤门槛,过滤掉一些持仓低的股票 Returns: tuple[]: 每只股票的名称,以及对应持仓基金个数的list """ quarter_index = quarter_index if quarter_index else self.quarter_index results = self.each_query.select_top_10_stock(quarter_index, fund_code_pool) code_dict = dict() for result in results: # print(result) totol_asset = result[2] for index in range(4, len(result), 3): code = result[index] name = result[index + 1] # 仅以股票名称为key,兼容港股,A股 portion = result[index + 2] if code == None or name == None: #print('index', index, 'code', code, 'name', name) #print('基金名称', result[1],'基金代码', result[0]) continue key = fisrt_match_condition_from_list(list(code_dict), code) holder_asset = round(portion * totol_asset / 100, 4) if totol_asset and portion else 0 if key == None and code and name: key = str(code) + '-' + str(name) if (key in code_dict and code != None): count = code_dict[key]['count'] + 1 holder_asset = code_dict[key]['holder_asset'] + \ holder_asset code_dict[key] = { 'count': count, 'holder_asset': holder_asset } else: code_dict[key] = {'count': 1, 'holder_asset': holder_asset} filer_dict = dict() for key, value in code_dict.items( ): # for (key,value) in girl_dict.items() 这样加上括号也可以 if value['count'] > filter_count and key != None: filer_dict[key] = value # print(key + ":" + str(value)) return sorted(filer_dict.items(), key=lambda x: x[1]['count'], reverse=True) def all_stock_fund_count_and_details(self, *, quarter_index=None, fund_code_pool=None, filter_count=100): """查询某一个季度基金的十大持仓,并对持仓股票进行汇总统计,并根据filter_count进行过滤 Args: quarter_index (string, optional): [description]. Defaults to None.取self.quarter_index fund_code_pool (string[], optional): [description]. Defaults to None. 传入查询的基金池,为None默认查询全部 filter_count (int, optional): [description]. Defaults to 100. 过滤门槛,过滤掉一些持仓低的股票 Returns: tuple[]: 每只股票的名称,以及对应持仓基金个数的list """ quarter_index = quarter_index if quarter_index else self.quarter_index results = self.each_query.select_top_10_stock(quarter_index, fund_code_pool) code_dict = dict() for result in results: # print(result) fund_info = { '基金代码': result[0], '基金名称': result[1], '基金规模': result[2], '股票总仓位': result[3], } totol_asset = result[2] for index in range(4, len(result), 3): code = result[index] name = result[index + 1] portion = result[index + 2] if code == None or name == None: continue key = fisrt_match_condition_from_list(list(code_dict), code) if key == None and code and name: key = str(code) + '-' + str(name) #key = str(name) holder_asset = round(portion * totol_asset / 100, 4) if totol_asset and portion else 0 if (key in code_dict and code != None): code_dict[key]['count'] = code_dict[key]['count'] + 1 code_dict[key]['fund_list'].append({ **fund_info, '仓位占比': portion, '持有市值(亿元)': holder_asset, '仓位排名': int(index / 3) }) else: code_dict[key] = { 'count': 1, 'fund_list': [{ **fund_info, '仓位占比': portion, '持有市值(亿元)': holder_asset, '仓位排名': int(index / 3) }] } # for key, value in code_dict.items(): # print('key, value', key, value) # print('code_dict.items()', code_dict.items()) return list(code_dict.items()) # return sorted(code_dict.items(), key=lambda x: x[1]['count'], reverse=True) # 分组查询特定股票的每个季度基金持有总数 def item_stock_fund_count(self, stock_code, fund_code_pool=None): return self.each_query.select_special_stock_fund_count( stock_code, fund_code_pool) def select_special_stock_special_quarter_info(self, stock_code, quarter_index=None, fund_code_pool=None): """ 即将废弃 """ result = self.each_query.select_special_stock_special_quarter_info( stock_code, quarter_index, fund_code_pool) target_stock_dict = {'count': len(result)} total_holder_asset = 0 for holders in result: total_asset = holders[1] for index in range(2, len(holders), 2): code = holders[index] if code == stock_code: portion = holders[index + 1] holder_asset = round(portion * total_asset / 100, 4) if total_asset and portion else 0 total_holder_asset = total_holder_asset + holder_asset break target_stock_dict['holder_asset'] = total_holder_asset return target_stock_dict def select_fund_pool(self, *, morning_star_rating_5="", morning_star_rating_3="", **args): return self.each_query.select_certain_condition_funds( morning_star_rating_5=morning_star_rating_5, morning_star_rating_3=morning_star_rating_3, **args) def select_stock_pool_industry(self, fund_code_pool): return self.stock_query.query_stock_industry(fund_code_pool) def select_special_fund_info(self, code, quarter_index=None): return self.each_query.select_special_fund_info(code, quarter_index) def summary_special_funds_stock_detail(self, fund_code_pool, quarter_index=None): holder_stock_industry_list = [] for fund_code in fund_code_pool: fund_info = self.select_special_fund_info(fund_code, quarter_index) fund_code = fund_info[0] fund_name = fund_info[1] fund_cat = fund_info[2] fund_manager = fund_info[3] fund_total_asset = fund_info[4] fund_total_portion = fund_info[5] fund_ten_portion = fund_info[6] for index in range(7, len(fund_info), 3): stock_code = fund_info[index] stock_name = fund_info[index + 1] stock_portion = fund_info[index + 2] stock_index = int((index - 4) / 3) stock_list_industry = [ fund_code, fund_name, fund_cat, fund_manager, fund_total_asset, fund_total_portion, fund_ten_portion, stock_code, stock_name, stock_portion, stock_index ] # holder_stock_industry_list.append(stock_list_industry] if bool(re.search("^\d{6}$", stock_code)): stock_list_industry_list = self.select_stock_pool_industry( [stock_code]) stock_list_industry_dict = stock_list_industry_list[0] industry_name_first = stock_list_industry_dict.get( 'industry_name_first') industry_name_second = stock_list_industry_dict.get( 'industry_name_second') industry_name_third = stock_list_industry_dict.get( 'industry_name_third') holder_stock_industry_list.append([ *stock_list_industry, industry_name_third, industry_name_second, industry_name_first ]) return holder_stock_industry_list def query_all_stock_industry_info(self): return self.stock_query.query_all_stock()
Author: [email protected] ----- Copyright (c) 2021 Camel Lu ''' from pprint import pprint from db.connect import connect from time import sleep import os from sql_model.fund_query import FundQuery from fund_info.api import FundApier if __name__ == '__main__': page_start = 3600 page_limit = 10000 fund_query = FundQuery() # 获取所有的A类基金 all_a_results = fund_query.select_all_a_class_fund(page_start, page_limit) # 获取查询的所有记录 for i in range(0, len(all_a_results)): # pprint(result[1]) name = all_a_results[i] c_class_result = fund_query.select_c_class_fund(name[1]) if c_class_result: fund_code = c_class_result[0] fund_name = c_class_result[1] platform = 'zh_fund' if '封闭' in fund_name else 'ai_fund' each_fund = FundApier(fund_code, '2021-05-07', platform) total_asset = each_fund.get_total_asset() # 如果在爱基金平台找不到,则到展恒基金找
# 利用api获取同类基金的资产 def get_total_asset(fund_code, platform): each_fund = FundApier(fund_code, '2021-05-07', platform) total_asset = each_fund.get_total_asset() # 如果在爱基金平台找不到,则到展恒基金找 if total_asset == None and platform == 'ai_fund': print("fund_code", total_asset, fund_code) each_fund = FundApier(fund_code, '2021-05-10', 'zh_fund') total_asset = each_fund.get_total_asset() if __name__ == '__main__': each_fund_query = FundQuery() record_total = each_fund_query.get_crawler_quarter_fund_total() # 获取记录条数 IdWorker = IdWorker() page_limit = 5 page_start = 0 # error_funds = [] # 设置表头 result_dir = './output/' fund_csv = FundCSV(result_dir) if page_start == 0: fund_csv.write_season_catch_fund(True) fund_csv.write_abnormal_url_fund(True) # df = pandas.read_csv(
输出高分基金 """ if each_query == None: each_query = FundQuery() if quarter_index == None: quarter_index = get_last_quarter_str() print("quarter_index", quarter_index) high_score_funds = each_query.select_high_score_funds( quarter_index=quarter_index) columns_bk = [ '代码', '名称', '季度', '总资产', '现任基金经理管理起始时间', '投资风格', '三月最大回撤', '六月最大回撤', '夏普比率', '阿尔法系数', '贝塔系数', 'R平方', '标准差', '风险系数', '两年风险评级', '三年风险评级', '五年风险评级', '五年晨星评级', '三年晨星评级', '股票仓位', '十大持股仓位' ] columns = [ '代码', '名称', '投资风格', '基金经理', '现任经理管理起始时间', '成立时间', '三年晨星评级', '五年晨星评级', '夏普比率', '股票仓位', '十大持股仓位', '两年风险评级', '三年风险评级', '五年风险评级', '阿尔法系数', '贝塔系数', '标准差', '总资产', '数据更新时间' ] df_high_score_funds = pd.DataFrame(high_score_funds, columns=columns) # pprint(df_high_score_funds) path = './outcome/数据整理/funds/high-score-funds.xlsx' update_xlsx_file(path, df_high_score_funds, quarter_index) if __name__ == '__main__': each_query = FundQuery() output_high_score_funds(each_query)
def acquire_fund_quarter(): lock = Lock() each_fund_query = FundQuery() record_total = each_fund_query.select_quarter_fund_total() # 获取记录条数 print('record_total', record_total) idWorker = IdWorker() result_dir = './output/' fund_csv = FundCSV(result_dir) fund_csv.write_season_catch_fund(True) fund_csv.write_abnormal_url_fund(True) def crawlData(start, end): login_url = 'https://www.morningstar.cn/membership/signin.aspx' chrome_driver = login_morning_star(login_url, False) page_start = start page_limit = 10 while(page_start < end): results = each_fund_query.select_quarter_fund( page_start, page_limit) for record in results: sleep(1) each_fund = FundSpider( record[0], record[1], record[2], chrome_driver) is_normal = each_fund.go_fund_url() # 是否能正常跳转到基金详情页,没有的话,写入csv,退出当前循环 if is_normal == False: # error_funds.append(each_fund.fund_code) fund_infos = [each_fund.fund_code, each_fund.morning_star_code, each_fund.fund_name, record[3], page_start, '页面跳转有问题'] output_line = ', '.join(str(x) for x in fund_infos) + '\n' fund_csv.write_abnormal_url_fund(False, output_line) continue # 开始爬取数据 quarter_index = each_fund.get_quarter_index() # 数据更新时间,如果不一致,不爬取下面数据 if quarter_index != each_fund.quarter_index: print('quarter_index', quarter_index, each_fund.update_date, each_fund.fund_code, each_fund.fund_name) continue each_fund.get_fund_season_info() # 基本季度性数据 each_fund.get_fund_manager_info() # 基金经理模块 each_fund.get_fund_morning_rating() # 基金晨星评级 each_fund.get_fund_qt_rating() # 基金风险评级 # 判断是否有股票持仓,有则爬取 if each_fund.stock_position['total'] != '0.00' and each_fund.total_asset != None: each_fund.get_asset_composition_info() # 爬取过程中是否有异常,有的话,存在csv中 if each_fund._is_trigger_catch == True: fund_infos = [each_fund.fund_code, each_fund.morning_star_code, each_fund.fund_name, record[3], each_fund.stock_position['total'], page_start, each_fund._catch_detail] output_line = ', '.join(str(x) for x in fund_infos) + '\n' fund_csv.write_season_catch_fund(False, output_line) # 入库 lock.acquire() snow_flake_id = idWorker.get_id() lock.release() # 开始存入数据 fund_insert = FundInsert() # 基金经理 if each_fund.manager.get('id'): manager_dict = { 'id': snow_flake_id, 'manager_id': each_fund.manager.get('id'), 'name': each_fund.manager.get('name'), 'brife': each_fund.manager.get('brife') } fund_insert.insert_fund_manger_info(manager_dict) quarterly_dict = { 'id': snow_flake_id, 'quarter_index': each_fund.quarter_index, 'fund_code': each_fund.fund_code, 'investname_style': each_fund.investname_style, 'total_asset': each_fund.total_asset, 'manager_id': each_fund.manager.get('id'), 'manager_start_date': each_fund.manager.get('start_date'), 'three_month_retracement': each_fund.three_month_retracement, 'june_month_retracement': each_fund.june_month_retracement, 'risk_statistics_alpha': each_fund.risk_statistics.get('alpha'), 'risk_statistics_beta': each_fund.risk_statistics.get('beta'), 'risk_statistics_r_square': each_fund.risk_statistics.get('r_square'), 'risk_assessment_standard_deviation': each_fund.risk_assessment.get('standard_deviation'), 'risk_assessment_risk_coefficient': each_fund.risk_assessment.get('risk_coefficient'), 'risk_assessment_sharpby': each_fund.risk_assessment.get('sharpby'), 'risk_rating_2': each_fund.risk_rating.get(2), 'risk_rating_3': each_fund.risk_rating.get(3), 'risk_rating_5': each_fund.risk_rating.get(5), 'risk_rating_10': each_fund.risk_rating.get(10), 'stock_position_total': each_fund.stock_position.get('total'), 'stock_position_ten': each_fund.stock_position.get('ten'), 'bond_position_total': each_fund.bond_position.get('total'), 'bond_position_five': each_fund.bond_position.get('five'), 'morning_star_rating_3': each_fund.morning_star_rating.get(3), 'morning_star_rating_5': each_fund.morning_star_rating.get(5), 'morning_star_rating_10': each_fund.morning_star_rating.get(10), } fund_insert.fund_quarterly_info(quarterly_dict) # 入库十大股票持仓 stock_position_total = each_fund.stock_position.get( 'total', '0.00') if float(stock_position_total) > 0: stock_dict = { 'id': snow_flake_id, 'quarter_index': each_fund.quarter_index, 'fund_code': each_fund.fund_code, 'stock_position_total': each_fund.stock_position.get('total'), } for index in range(len(each_fund.ten_top_stock_list)): temp_stock = each_fund.ten_top_stock_list[index] prefix = 'top_stock_' + str(index) + '_' code_key = prefix + 'code' stock_dict[code_key] = temp_stock['stock_code'] name_key = prefix + 'name' stock_dict[name_key] = temp_stock['stock_name'] portion_key = prefix + 'portion' stock_dict[portion_key] = temp_stock['stock_portion'] market_key = prefix + 'market' stock_dict[market_key] = temp_stock['stock_market'] fund_insert.fund_stock_info(stock_dict) # 获取同类基金,再获取同类基金的总资产 if each_fund.fund_name.endswith('A'): similar_name = each_fund.fund_name[0:-1] results = each_fund_query.select_similar_fund( similar_name) # 获取查询的所有记录 platform = 'zh_fund' if '封闭' in similar_name else 'ai_fund' for i in range(0, len(results)): item = results[i] item_code = item[0] total_asset = get_total_asset(item_code, platform) quarterly_dict['fund_code'] = item_code quarterly_dict['total_asset'] = total_asset quarterly_dict['id'] = snow_flake_id + i + 1 # 入库 fund_insert.fund_quarterly_info(quarterly_dict) if float(stock_position_total) > 0: stock_dict['fund_code'] = item_code stock_dict['id'] = snow_flake_id + i + 1 # 入库 fund_insert.fund_stock_info(stock_dict) # pprint(fundDict) page_start = page_start + page_limit print(current_thread().getName(), 'page_start', page_start) sleep(3) chrome_driver.close() bootstrap_thread(crawlData, record_total, 8) exit()