def QA_SU_save_report_calendar_day(client=DATABASE, ui_log=None, ui_progress=None): ''' save stock_day 保存财报日历 历史全部数据 :return: ''' END_DATE = QA_util_datetime_to_strdate( QA_util_add_months(QA_util_today_str(), -3)) START_DATE = QA_util_datetime_to_strdate( QA_util_add_months(QA_util_today_str(), -12)) date_list = list( pd.DataFrame.from_dict(QA_util_getBetweenQuarter(START_DATE, END_DATE)).T.iloc[:, 1]) report_calendar = client.report_calendar report_calendar.create_index([("code", pymongo.ASCENDING), ("report_date", pymongo.ASCENDING)], unique=True) err = [] def __saving_work(report_date, report_calendar): try: QA_util_log_info( '##JOB01 Now Saving Report_Calendar==== {}'.format( str(report_date)), ui_log) report_calendar.insert_many(QA_util_to_json_from_pandas( QA_fetch_get_financial_calendar(report_date)), ordered=False) except Exception as error0: print(error0) err.append(str(report_date)) for item in date_list: QA_util_log_info('The {} of Total {}'.format( (date_list.index(item) + 1), len(date_list))) strProgressToLog = 'DOWNLOAD PROGRESS {}'.format( str(float( (date_list.index(item) + 1) / len(date_list) * 100))[0:4] + '%', ui_log) intProgressToLog = int( float((date_list.index(item) + 1) / len(date_list) * 100)) QA_util_log_info(strProgressToLog, ui_log=ui_log, ui_progress=ui_progress, ui_progress_int_value=intProgressToLog) __saving_work(item, report_calendar) if len(err) < 1: QA_util_log_info('SUCCESS save report calendar ^_^', ui_log) else: QA_util_log_info(' ERROR CODE \n ', ui_log) QA_util_log_info(err, ui_log)
def date_func(date): if (date is None) or date in ['None', 0, '0']: d2 = datetime.datetime.strptime(QA_util_today_str(), "%Y-%m-%d") else: d2 = datetime.datetime.strptime(date, "%Y%m%d") d1 = datetime.datetime.strptime(QA_util_today_str(), "%Y-%m-%d") diff_days = d1 - d2 return (diff_days.days)
def QA_SU_save_stock_shares_day(client=DATABASE, ui_log=None, ui_progress=None): ''' save stock_day 保存财报日历 历史全部数据 :return: ''' END_DATE = QA_util_today_str() START_DATE = QA_util_datetime_to_strdate( QA_util_add_days(QA_util_today_str(), -7)) code = list(QA_fetch_stock_list_adv()['code']) stock_shares = client.stock_shares stock_shares.create_index([("code", pymongo.ASCENDING), ("begin_date", pymongo.ASCENDING), ('total_shares', pymongo.DESCENDING), ('reason', pymongo.DESCENDING), ('send_date', pymongo.DESCENDING)], unique=True) err = [] def __saving_work(code, stock_shares): try: QA_util_log_info( '##JOB01 Now Saving SSINA shares change==== {}'.format( str(code)), ui_log) stock_shares.insert_many(QA_util_to_json_from_pandas( QA_fetch_get_stock_shares_sina(code)), ordered=False) gc.collect() except Exception as error0: print(error0) err.append(str(code)) for item in code: QA_util_log_info('The {} of Total {}'.format((code.index(item) + 1), len(code))) strProgressToLog = 'DOWNLOAD PROGRESS {}'.format( str(float((code.index(item) + 1) / len(code) * 100))[0:4] + '%', ui_log) intProgressToLog = int(float((code.index(item) + 1) / len(code) * 100)) QA_util_log_info(strProgressToLog, ui_log=ui_log, ui_progress=ui_progress, ui_progress_int_value=intProgressToLog) __saving_work(item, stock_shares) if len(err) < 1: QA_util_log_info('SUCCESS save SINA shares change ^_^', ui_log) else: QA_util_log_info(' ERROR CODE \n ', ui_log) QA_util_log_info(err, ui_log)
def QA_fetch_interest_rate_adv(start="all", end=None, format='pd'): '获取股票日线' #code= [code] if isinstance(code,str) else code end = start if end is None else end start = str(start)[0:10] end = str(end)[0:10] # code checking if start == 'all': start = '1991-01-01' end = QA_util_today_str() if end is None: end = QA_util_today_str() return QA_fetch_interest_rate(start, end)
def QA_fetch_financial_TTM_adv(code, start="all", end=None, format='pd', collections=DATABASE.financial_TTM): '获取财报TTM' #code= [code] if isinstance(code,str) else code end = start if end is None else end start = str(start)[0:10] end = str(end)[0:10] # code checking if start == 'all': start = '2001-01-01' end = QA_util_today_str() if end is None: return QA_DataStruct_Financial( QA_fetch_financial_TTM(code, start, str(datetime.date.today()))) else: series = pd.Series(data=month_data, index=pd.to_datetime(month_data), name='date') timerange = series.loc[start:end].tolist() return QA_DataStruct_Financial(QA_fetch_financial_TTM( code, start, end))
def QA_fetch_stock_fianacial_adv( code, start='all', end=None, if_drop_index=True, ): '获取财报TTM' #code= [code] if isinstance(code,str) else code end = start if end is None else end start = str(start)[0:10] end = str(end)[0:10] if start == 'all': start = '1990-01-01' end = QA_util_today_str() res = QA_fetch_stock_fianacial(code, start, end, format='pd') if res is None: # todo 报告是代码不合法,还是日期不合法 print( "QA Error QA_fetch_stock_fianacial_adv parameter code=%s , start=%s, end=%s call QA_fetch_stock_fianacial_adv return None" % (code, start, end)) return None else: res_reset_index = res.set_index(['date', 'code'], drop=if_drop_index) # if res_reset_index is None: # print("QA Error QA_fetch_stock_fianacial_adv set index 'datetime, code' return None") # return return QA_DataStruct_Stock_day(res_reset_index)
def get_stock_report_ths(code): data = pd.DataFrame() for type in ['cash', 'benefit', 'debt']: excelFile = r'D:\{code}_{type}_report.xls'.format(code=code, type=type) seconds = 1 headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36', 'Connection': 'keep-alive', '''--proxy-server''': 'http://202.20.16.82:10152' } while (os.path.exists(excelFile) != True): options = webdriver.ChromeOptions() for (key, value) in headers.items(): options.add_argument('%s="%s"' % (key, value)) prefs = { 'profile.default_content_settings.popups': 0, 'download.default_directory': 'd:\\' } options.add_experimental_option('prefs', prefs) driver = webdriver.Chrome(chrome_options=options) driver.get( 'http://basic.10jqka.com.cn/api/stock/export.php?export={type}&type=report&code={code}' .format(code=code, type=type)) sleep(seconds) seconds = seconds + 1 if os.path.exists(excelFile) == True: try: df1 = pd.DataFrame( pd.read_excel(excelFile, sheet_name='Worksheet')).T.reset_index() data = data.append(df1.T) driver.quit() try: os.remove(excelFile) print("Success Delete {code} {type} report file".format( code=code, type=type)) except: print("NO {code} {type} report file to Delete".format( code=code, type=type)) except: print('Error for reading') res = data.T.iloc[1:, ] new_index = data.T[0:1].values.tolist()[0] new_index[0] = "report_date" res.columns = new_index res["code"] = code res['crawl_date'] = QA_util_today_str() return (res)
def QA_SU_save_stock_fianacial_momgo(start_date=None,end_date=None, ui_log = None, ui_progress = None): if start_date is None: if end_date is None: start_date = QA_util_today_str() end_date = start_date elif end_date is not None: start_date = '2008-01-01' elif start_date is not None: if end_date == None: end_date = QA_util_today_str() elif end_date is not None: if end_date < start_date: print('end_date should large than start_date') col = DATABASE.stock_financial_analysis col.create_index( [("CODE", ASCENDING), ("date_stamp", ASCENDING)], unique=True) deal_date_list = list(pd.date_range(start_date, end_date).map(lambda t:str(t.date()))) if deal_date_list is None: print('not a trading day') else: for deal_date in deal_date_list: data = QA_util_etl_stock_quant(deal_date) if data is not None: data = data.drop_duplicates( (['CODE', 'date'])) QA_util_log_info( '##JOB01 Pre Data stock financial data ============== {deal_date} '.format(deal_date=deal_date), ui_log) data = QA_util_to_json_from_pandas(data) print("got stock financial data ============== {deal_date}".format(deal_date=deal_date)) QA_util_log_info( '##JOB02 Got Data stock financial data ============== {deal_date}'.format(deal_date=deal_date), ui_log) try: col.insert_many(data, ordered=False) QA_util_log_info( '##JOB03 Now stock financial data saved ============== {deal_date} '.format(deal_date=deal_date), ui_log) except Exception as e: if isinstance(e, MemoryError): col.insert_many(data, ordered=True) elif isinstance(e, pymongo.bulk.BulkWriteError): pass pass else: QA_util_log_info( '##JOB01 No Data stock_fianacial_data ============== {deal_date} '.format(deal_date=deal_date), ui_log)
def QA_SU_save_stock_divyield_his(client=DATABASE, ui_log=None, ui_progress=None): ''' save stock_day 保存财报日历 反向查询四个季度财报 :return: ''' START_DATE = '2007-01-01' END_DATE = QA_util_datetime_to_strdate( QA_util_add_months(QA_util_today_str(), -3)) date_list = list( pd.DataFrame.from_dict(QA_util_getBetweenQuarter(START_DATE, END_DATE)).T.iloc[:, 1]) stock_divyield = client.stock_divyield stock_divyield.create_index([("a_stockcode", pymongo.ASCENDING), ("report_date", pymongo.ASCENDING), ("reg_date", pymongo.ASCENDING)], unique=True) err = [] def __saving_work(report_date, stock_divyield): try: QA_util_log_info( '##JOB01 Now Saving stock_divyield==== {}'.format( str(report_date)), ui_log) stock_divyield.insert_many(QA_util_to_json_from_pandas( QA_fetch_get_stock_divyield(report_date)), ordered=False) except Exception as error0: print("error : {code}".format(code=error0)) err.append(str(report_date)) for item in date_list: QA_util_log_info('The {} of Total {}'.format( (date_list.index(item) + 1), len(date_list))) strProgressToLog = 'DOWNLOAD PROGRESS {}'.format( str(float( (date_list.index(item) + 1) / len(date_list) * 100))[0:4] + '%', ui_log) intProgressToLog = int( float((date_list.index(item) + 1) / len(date_list) * 100)) QA_util_log_info(strProgressToLog, ui_log=ui_log, ui_progress=ui_progress, ui_progress_int_value=intProgressToLog) __saving_work(item, stock_divyield) if len(err) < 1: QA_util_log_info('SUCCESS save stock divyield ^_^', ui_log) else: QA_util_log_info(' ERROR CODE \n ', ui_log) QA_util_log_info(err, ui_log)
def QA_fetch_stock_divyield_adv(code, start="all", end=None, format='pd', type='crawl', collections=DATABASE.stock_divyield): '获取股票日线' #code= [code] if isinstance(code,str) else code end = start if end is None else end start = str(start)[0:10] end = str(end)[0:10] # code checking if start == 'all': start = '2007-01-01' end = QA_util_today_str() if end is None: end = QA_util_today_str() return QA_DataStruct_Financial( QA_fetch_stock_divyield(code, start, end, type=type))
def QA_SU_save_financial_report_day(client=DATABASE, ui_log=None, ui_progress=None): ''' save stock_day 保存财报日历 历史全部数据 :return: ''' code = list( QA_fetch_stock_financial_calendar_adv( list(QA_fetch_stock_list_adv()['code']), QA_util_today_str()).data['code']) stock_financial = client.stock_financial stock_financial.create_index([("code", pymongo.ASCENDING), ("report_date", pymongo.ASCENDING)], unique=True) err = [] def __saving_work(code, stock_financial): try: QA_util_log_info( '##JOB01 Now Saving THS financial_report==== {}'.format( str(code)), ui_log) stock_financial.insert_many(QA_util_to_json_from_pandas( QA_fetch_get_stock_report_ths(code)), ordered=False) gc.collect() except Exception as error0: print(error0) err.append(str(code)) for item in code: QA_util_log_info('The {} of Total {}'.format((code.index(item) + 1), len(code))) strProgressToLog = 'DOWNLOAD PROGRESS {}'.format( str(float((code.index(item) + 1) / len(code) * 100))[0:4] + '%', ui_log) intProgressToLog = int(float((code.index(item) + 1) / len(code) * 100)) QA_util_log_info(strProgressToLog, ui_log=ui_log, ui_progress=ui_progress, ui_progress_int_value=intProgressToLog) __saving_work(item, stock_financial) if len(err) < 1: QA_util_log_info('SUCCESS save THS financial_report ^_^', ui_log) else: QA_util_log_info(' ERROR CODE \n ', ui_log) QA_util_log_info(err, ui_log)
def get_stock_report_wy(code): data = pd.DataFrame() for type in ['lrb','zcfzb','xjllb']: excelFile = r'D:\{type}{code}.csv'.format(code = code, type=type) seconds = 1 headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Request URL': 'http://quotes.money.163.com/service/{type}_{code}.html'.format(code=code,type=type), 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36', 'Remote Address':'59.111.160.246:80', 'Referrer Policy':'no-referrer-when-downgrade' } while (os.path.exists(excelFile) != True): options = webdriver.ChromeOptions() for (key,value) in headers.items(): options.add_argument('%s="%s"' % (key, value)) #options.add_argument('headless') prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': 'd:\\'} options.add_experimental_option('prefs', prefs) driver = webdriver.Chrome(chrome_options=options) driver.get('http://quotes.money.163.com/service/{type}_{code}.html'.format(code = code, type=type)) sleep(seconds) seconds = seconds + 1 if os.path.exists(excelFile) == True: try: df1 = pd.read_csv(excelFile,encoding='ANSI', na_values=["--"," --"," "],header=0).T res = df1.reset_index().iloc[1:,:] res.columns= [x.replace('(万元)','').replace(' ','').strip() for x in df1.reset_index().iloc[:1].values.tolist()[0]] if type == 'xjllb': res.columns= [x+'C' if x in ['财务费用', '净利润', '少数股东损益'] else x.replace('(万元)','').replace(' ','').strip() for x in list(res.columns)] res = res.set_index('报告日期') data = pd.concat([data,res],axis=1,sort=False).fillna(0) driver.quit() try: os.remove(excelFile) print("Success Delete {code} {type} report file".format(code=code, type=type)) except: print("NO {code} {type} report file to Delete".format(code=code, type=type)) except: print('Error for reading') data = data * 10000 res = data.reset_index() new_index = list(res.columns) new_index[0] = "report_date" res.columns = new_index res["code"] = code res['crawl_date']=QA_util_today_str() res = res[res['report_date'].str.contains('Unnamed')==0] res = res[res['report_date'].apply(len) == 10] return(res)
def QA_fetch_financial_code(ndays=10): START = str(QA_util_get_pre_trade_date(QA_util_today_str(), ndays)) code = list( QA_fetch_stock_financial_calendar( QA.QA_fetch_stock_list_adv().code.tolist(), start=START)['code']) market_day = pd.DataFrame( QA_fetch_stock_basic_info_tushare())[['code', 'timeToMarket']] market_day['TM'] = market_day['timeToMarket'].apply(lambda x: str( QA_util_add_months(QA_util_date_int2str(int(x)), 0) if x > 0 else None)[0:10]) code = list(market_day[market_day['TM'] >= START]['code'].values) + code return (code)
def QA_fetch_stock_financial_calendar_adv( code, start="all", end=None, type='day', format='pd', collections=DATABASE.report_calendar): '获取股票财报日历' #code= [code] if isinstance(code,str) else code # code checking if start == 'all': start = '2007-01-01' end = QA_util_today_str() if end is None: end = QA_util_today_str() end = start if end is None else end start = str(start)[0:10] end = str(end)[0:10] return QA_DataStruct_Financial( QA_fetch_stock_financial_calendar(code, start, end, type=type))
def QA_SU_save_financial_report_his(client=DATABASE, ui_log=None, ui_progress=None): ''' save stock_day 保存财报日历 反向查询四个季度财报 :return: ''' YEARS = list( QA_util_getBetweenYear('2002-01-01', QA_util_today_str()).values()) code = list(QA_fetch_stock_list_adv()['code']) stock_financial_sina = client.stock_financial_sina stock_financial_sina.create_index([("code", pymongo.ASCENDING), ("report_date", pymongo.ASCENDING)], unique=True) err = [] def __saving_work(code, stock_financial_sina): try: QA_util_log_info( '##JOB01 Now Saving SINA financial_report==== {}'.format( str(code)), ui_log) stock_financial_sina.insert_many(QA_util_to_json_from_pandas( QA_fetch_get_stock_report_sina(code, YEARS)), ordered=False) except Exception as error0: print(error0) err.append(str(code)) for item in code: QA_util_log_info('The {} of Total {}'.format((code.index(item) + 1), len(code))) strProgressToLog = 'DOWNLOAD PROGRESS {}'.format( str(float((code.index(item) + 1) / len(code) * 100))[0:4] + '%', ui_log) intProgressToLog = int(float((code.index(item) + 1) / len(code) * 100)) QA_util_log_info(strProgressToLog, ui_log=ui_log, ui_progress=ui_progress, ui_progress_int_value=intProgressToLog) __saving_work(item, stock_financial_sina) if len(err) < 1: QA_util_log_info('SUCCESS save SINA financial_report ^_^', ui_log) else: QA_util_log_info(' ERROR CODE \n ', ui_log) QA_util_log_info(err, ui_log)
def get_financial_report_date(report_date, headers=None, psize=2000, vname="plsj", page=1): data, page_num = read_financial_report_date(report_date, headers, psize, vname, page) data.columns = [ 'code', 'name', 'pre_date', 'first_date', 'second_date', 'third_date', 'real_date', 'codes' ] data['report_date'] = report_date data['crawl_date'] = QA_util_today_str() return (data[data["real_date"].apply(lambda x: len(x) != 0)])
def read_stock_divyield(report_date, headers=None, page=1): if headers == None: headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36', 'Connection': 'keep-alive' } args = { "report_date": report_date, "unixstamp": int(round(time.time() * 1000)) } strUrl1 = "http://stock.jrj.com.cn/report/js/sz/{report_date}.js?ts={unixstamp}".format( **args) if page == 1: headers = get_headers(report_date, headers) options = webdriver.ChromeOptions() for (key, value) in headers.items(): options.add_argument('%s="%s"' % (key, value)) driver = webdriver.Chrome(chrome_options=options) driver.get(strUrl1) soup = BeautifulSoup(driver.page_source, "html.parser").body.text driver.quit() start_str = 'var fhps = '.format(**args) res = demjson.decode( soup.strip(start_str).strip(';').replace(''', , ,''', ',0,0,').replace(''', ,''', ',0,')) data = pd.DataFrame(res['data']) if data.shape[0] > 0: page_num = res['summary']['total'] data = data.drop_duplicates(keep='first') data.columns = [ 'a_stockcode', 'a_stocksname', 'div_info', 'div_type_code', 'bonus_shr', 'cash_bt', 'cap_shr', 'epsp', 'ps_cr', 'ps_up', 'reg_date', 'dir_dcl_date', 'a_stockcode1', 'ex_divi_date', 'prg' ] data['report_date'] = report_date data['crawl_date'] = QA_util_today_str() return (data, page_num) else: print("No divyield data for today") return (None, None)
def QA_fetch_index_quant_pre_adv(code, start="all", end=None, format='pd'): '获取股票量化机器学习数据查询接口' end = start if end is None else end start = str(start)[0:10] end = str(end)[0:10] # code checking if start == 'all' or start == None: start = '2008-01-01' end = QA_util_today_str() data = QA_fetch_index_quant_pre(code, start, end) return QA_DataStruct_Stock_day(data) else: data = QA_fetch_index_quant_pre(code, start, end) return QA_DataStruct_Stock_day(data)
def QA_fetch_interest_rate(start, end=None, format='pd', collections=DATABASE.interest_rate): '获取股票日线' #code= [code] if isinstance(code,str) else code # code checking if end is None: end = QA_util_today_str() if start is None: start = '1999-01-01' if QA_util_date_valid(end): __data = [] cursor = collections.find( { "date_stamp": { "$lte": QA_util_date_stamp(end), "$gte": QA_util_date_stamp(start) } }, {"_id": 0}, batch_size=10000) #res=[QA_util_dict_remove_key(data, '_id') for data in cursor] res = pd.DataFrame([item for item in cursor]) try: res = res.drop(columns=['crawl_date', 'date_stamp']) except: res = None if format in ['P', 'p', 'pandas', 'pd']: return res elif format in ['json', 'dict']: return QA_util_to_json_from_pandas(res) # 多种数据格式 elif format in ['n', 'N', 'numpy']: return numpy.asarray(res) elif format in ['list', 'l', 'L']: return numpy.asarray(res).tolist() else: print( "QA Error Interest Rate format parameter %s is none of \"P, p, pandas, pd , json, dict , n, N, numpy, list, l, L, !\" " % format) return None else: QA_util_log_info( 'QA Error Interest Rate data parameter start=%s end=%s is not right' % (start, end))
def get_stock_shares_sina(code): headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36', 'Connection': 'keep-alive' } options = webdriver.ChromeOptions() for (key,value) in headers.items(): options.add_argument('%s="%s"' % (key, value)) options.add_argument('headless') res = read_data_from_sina(code,options) res['crawl_date']=QA_util_today_str() if res is None: return None else: return(res)
def QA_fetch_stock_target_adv(code, start="all", end=None, type='close', format='pd'): '获取股票量化机器学习数据查询接口' end = start if end is None else end start = str(start)[0:10] end = str(end)[0:10] # code checking if start == 'all' or start == None: start = '2008-01-01' end = QA_util_today_str() data = QA_fetch_stock_target(code, start, end, type=type) return QA_DataStruct_Stock_day(data) else: data = QA_fetch_stock_target(code, start, end, type=type) return QA_DataStruct_Stock_day(data)
def read_stock_report_wy(code): res1 = pd.DataFrame() for report_type in ['zcfzb','lrb','xjllb']: res = read_data_data_from_wy(code,report_type) if res1.shape[0]==0: res1 = res else: res1 = res1.join(res) if res1 is None: return None else: res1['code'] = code res1['crawl_date']=QA_util_today_str() res = res1.reset_index() try: res.columns = ['report_date' if x == 'index' else x for x in list(res1.reset_index().columns)] except: pass return(res)
def get_interest_rate(): deposit = ts.get_deposit_rate() loan = ts.get_loan_rate() data = deposit[[x.startswith('定期存款整存整取') for x in deposit['deposit_type'] ]].pivot_table(values="rate", index=['date'], columns='deposit_type', aggfunc=sum) data1 = loan[[ x.startswith('短期贷款') or x.startswith('中长期贷款') for x in loan['loan_type'] ]].pivot_table(values="rate", index=['date'], columns='loan_type', aggfunc=sum) data.columns = [ x.strip().replace('''定期存款整存整取''', '').replace('''(''', '').replace(''')''', '') for x in data.columns ] data.columns = ['DOYEAR' if x == '一年' else x for x in data.columns] data.columns = ['DTMON' if x == '三个月' else x for x in data.columns] data.columns = ['DTYEAR' if x == '三年' else x for x in data.columns] data.columns = ['DSYEAR' if x == '二年' else x for x in data.columns] data.columns = ['DFYEAR' if x == '五年' else x for x in data.columns] data.columns = ['DHYEAR' if x == '半年' else x for x in data.columns] data1.columns = [ x.strip().replace('''短期贷款''', '').replace('''中长期贷款''', '').replace('''(''', '').replace(''')''', '') for x in data1.columns ] data1.columns = ['SYEAR' if x == '一至三年' else x for x in data1.columns] data1.columns = ['LTMON' if x == '三至五年' else x for x in data1.columns] data1.columns = ['LFYEAR' if x == '五年以上' else x for x in data1.columns] data1.columns = ['LHYEAR' if x == '六个月以内' else x for x in data1.columns] data1.columns = ['LOYEAR' if x == '六个月至一年' else x for x in data1.columns] res = pd.concat([data1, data], axis=1).reset_index().fillna(method='ffill') res.columns = ['date' if x == 'index' else x for x in res.columns] res['crawl_date'] = QA_util_today_str() return (res)
def QA_fetch_index_alpha_adv(code, start="all", end=None, format='pd', collections=DATABASE.index_alpha): '获取股票财报日历' #code= [code] if isinstance(code,str) else code end = start if end is None else end start = str(start)[0:10] end = str(end)[0:10] # code checking if start == 'all' or start == None: start = '2005-01-01' end = QA_util_today_str() data = QA_fetch_index_alpha(code, start, end) return QA_DataStruct_Stock_day(data) else: data = QA_fetch_index_alpha(code, start, end) return QA_DataStruct_Stock_day(data)
def get_stock_report_sina(code, report_year): headers = { 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.9', 'Cache-Control': 'max-age=0', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36', 'Connection': 'keep-alive' } options = webdriver.ChromeOptions() for (key, value) in headers.items(): options.add_argument('%s="%s"' % (key, value)) options.add_argument('headless') res2 = pd.DataFrame() for years in report_year: res1 = pd.DataFrame() for report_type in ['BalanceSheet', 'ProfitStatement', 'CashFlow']: table_name = '{report_type}NewTable0'.format( report_type=report_type) if report_type == 'CashFlow': table_name = 'ProfitStatementNewTable0' res = read_data_from_sina(code, years, report_type, table_name, options) res1 = pd.concat([res1, res], axis=1) res2 = res2.append(res1) if res2 is None: return None else: res2['code'] = code res2['crawl_date'] = QA_util_today_str() res = res2.reset_index() try: res.columns = [ 'report_date' if x == 'index' else x for x in list(res2.reset_index().columns) ] except: pass return (res)
def QA_SU_save_financial_files(): """本地存储financialdata """ download_financialzip() coll = DATABASE.financial coll.create_index([("code", ASCENDING), ("report_date", ASCENDING)], unique=True) for item in os.listdir(download_path): if item[0:4] != 'gpcw': print( "file ", item, " is not start with gpcw , seems not a financial file , ignore!" ) continue date = int(item.split('.')[0][-8:]) print('QUANTAXIS NOW SAVING {}'.format(date)) if coll.find({'report_date': date}).count() < 3600: print(coll.find({'report_date': date}).count()) data = parse_filelist([item]).reset_index().drop_duplicates( subset=['code', 'report_date']).sort_index() data["crawl_date"] = QA_util_today_str() data = data.assign(report_date=data['report_date'].apply( lambda x: QA_util_date_stamp(QA_util_date_int2str(x)))) data = data.assign(crawl_date=data['crawl_date'].apply( lambda x: QA_util_date_stamp(QA_util_date_int2str(x)))) data = QA_util_to_json_from_pandas(data) try: coll.insert_many(data, ordered=False) except Exception as e: if isinstance(e, MemoryError): coll.insert_many(data, ordered=True) elif isinstance(e, pymongo.bulk.BulkWriteError): pass else: print('ALL READY IN DATABASE') print('SUCCESSFULLY SAVE/UPDATE FINANCIAL DATA')
def QA_fetch_stock_quant_data_adv(code, start="all", end=None, block=True, format='pd', collections=DATABASE.stock_quant_data): '获取股票量化机器学习最终指标V1' #code= [code] if isinstance(code,str) else code end = start if end is None else end start = str(start)[0:10] end = str(end)[0:10] # code checking if start == 'all' or start == None: start = '2008-01-01' end = QA_util_today_str() data = QA_fetch_stock_quant_data(code, start, end, block) return QA_DataStruct_Stock_day(data) else: data = QA_fetch_stock_quant_data(code, start, end, block) return QA_DataStruct_Stock_day(data)
def ETL_stock_day(codes, start=None, end=None): if start is None: start = '2008-01-01' if end is None: end = QA_util_today_str() if start != end: rng = pd.Series(pd.date_range(start, end, freq='D')).apply(lambda x: str(x)[0:10]) else: rng = str(start)[0:10] start_date = QA_util_get_pre_trade_date(start,100) data = QA_fetch_stock_day_adv(codes,start_date,end) res1 = data.to_qfq().data res1.columns = [x + '_qfq' for x in res1.columns] data = data.data.join(res1).fillna(0).reset_index() res = data.groupby('code').apply(pct) res = res.reset_index(level = 0,drop = True).reset_index().set_index(['date','code']).loc[rng].replace([np.inf, -np.inf], 0) res = res.where((pd.notnull(res)), None) return(res)
def __init__(self): self.info = dict() self.info['date'] = QA_util_today_str() self.info['train_status'] = dict() self.info['test_status'] = dict() self.info['rng_status'] = dict()
def QA_SU_save_stock_fianacial_momgo_his(start_date=None,end_date=QA_util_today_str()): return save_stock_financial.QA_SU_save_stock_fianacial_momgo(start_date, end_date)