def getmaxreportdate(dbCntInfo, destTable) -> dict : ''' 获取每个产品最大的reportdate :param dbCntInfo: :return: ''' # 获取产品基础信息 productBasicInfodf = pb.getAllProductBasicInfo(dbCntInfo) if productBasicInfodf.empty: print("表中无正在上市的数据,提前正常结束!") return maxreportdatedict = {} for rowIndex in productBasicInfodf.index: oneProductTuple = productBasicInfodf.iloc[rowIndex] productCode = oneProductTuple["product_code"] ## 产品代码 maxReportDateSql = sc.COMPANYMAXREPORTDATE_SQL%(destTable, productCode) destDbBase = dbCntInfo.getDBCntInfoByTableName(tablename=destTable,productcode=productCode) destRetList = destDbBase.execSelectSmallSql(maxReportDateSql) maxReportDate = destRetList[0]['maxreportdate'] print(rowIndex) print(productCode+maxReportDateSql) maxreportdatedict[productCode] = maxReportDate if len(maxreportdatedict) == 0: raise Exception("getmaxreportdate return value is empty!") return maxreportdatedict
def getProductBasicInfo(dbCntInfo): ''' productbasicinfo表信息更新 :param dbCntInfo: :return: ''' sourceTable = "stock_basics" getStockBasics(dbCntInfo) destTable = "productbasicinfo" souceDbBase = dbCntInfo.getDBCntInfoByTableName(sourceTable) destDbBase = dbCntInfo.getDBCntInfoByTableName(destTable) proctBaseInfoDf = pb.getAllProductBasicInfo(dbCntInfo, ipostatus="A") proctBaseInfoDf.set_index("product_code") while (True): sourceRetList = souceDbBase.execSelectManySql(sc.PRODUCTBASICINFO_SQL) if len(sourceRetList) == 0: break # 数据插入到另外一个库里面 newsourceListTuple = [] for oneList in sourceRetList: productcode = oneList["product_code"] if productcode not in proctBaseInfoDf.index: print(oneList) oneList['market_type'] = dc.DICTCONS_CODETOMARKETTYPE[oneList['market_type']] newsourceListTuple.append(tuple(oneList.values())) destDbBase.execInsertManySql(sc.PRODUCTBASICINFO_INSERTSQL, newsourceListTuple) souceDbBase.closeDBConnect() destDbBase.closeDBConnect()
def getalltradeproductdate(dbCntInfo) -> pd.DataFrame: ''' 获取需要获取交易数据的产品的数据 :param dbCntInfo: :return: ''' # 获取产品基础信息 productBasicInfodf = pb.getAllProductBasicInfo(dbCntInfo) if productBasicInfodf.empty: print("表中无正在上市的数据,提前正常结束!") return # 获取当日日期 finanalWorkDate = pb.getTodayDate() curHour = time.localtime().tm_hour if curHour < 17: finanalWorkDate = pb.getYesterday() destTable = "producttradedata" productcodelist = [] startdatelist = [] enddatelist = [] for rowIndex in productBasicInfodf.index: oneProductTuple = productBasicInfodf.iloc[rowIndex] productCode = oneProductTuple["product_code"] ## 产品代码 listedDate = oneProductTuple["listed_date"] ## 上市日期 # 获取产品的起始日期,产品可能已经存在部分行情 maxTradeDateSql = sc.PRODUCTMAXTRADEDATE_SQL % productCode destDbBase = dbCntInfo.getDBCntInfoByTableName(tablename=destTable, productcode=productCode) destRetList = destDbBase.execSelectSmallSql(maxTradeDateSql) maxTradeDate = destRetList[0]['maxtradedate'] print(rowIndex) print(productCode + maxTradeDateSql) startDate = listedDate if maxTradeDate > listedDate: startDate = pb.getnextnday(maxTradeDate, 1) endDate = ((int(startDate / 10000)) + 10) * 10000 + 1231 if int(listedDate / 10000) == int(finanalWorkDate / 10000): endDate = finanalWorkDate if endDate > finanalWorkDate: endDate = finanalWorkDate if startDate > finanalWorkDate: continue print("startdate(%d),enddate(%d)" % (startDate, endDate)) productcodelist.append(productCode) startdatelist.append(startDate) enddatelist.append(endDate) if len(productcodelist) > 0: dfdict = { "productcode": productcodelist, "startdate": startdatelist, "enddate": enddatelist } df = pd.DataFrame(dfdict) df = df.set_index("productcode") return df return
def getProductBasicInfo(): ''' productbasicinfo表信息更新 :param dbCntInfo: :return: ''' dbCntInfo = dbcnt.getInitDbCntInfo() sourceTable = "stock_basics" getStockBasics(dbCntInfo) destTable = "productbasicinfo" souceDbBase = dbCntInfo.getDBCntInfoByTableName(sourceTable) destDbBase = dbCntInfo.getDBCntInfoByTableName(destTable) proctBaseInfoDf = pb.getAllProductBasicInfo(dbCntInfo, ipostatus="N", producttype='1') productcodelist = proctBaseInfoDf["product_code"].tolist() while (True): sourceRetList = souceDbBase.execSelectManySql(sc.PRODUCTBASICINFO_SQL, " order by code") if len(sourceRetList) == 0: break curworkday = pb.getlastworkday() # 数据插入到另外一个库里面 newsourceListTuple = [] for oneList in sourceRetList: productcode = oneList["product_code"] if productcode not in productcodelist: print(oneList) oneList['market_type'] = dc.DICTCONS_CODETOMARKETTYPE[ oneList['market_type']] oneList['symbol_code'] = pb.code_to_symbol(productcode) newsourceListTuple.append(tuple(oneList.values())) if len(newsourceListTuple) != 0: destDbBase.execInsertManySql(sc.PRODUCTBASICINFO_INSERTSQL, newsourceListTuple) proctBaseInfoDf = getDelistStockBasics(dbCntInfo) proctBaseInfoDf = proctBaseInfoDf.set_index("symbol") delistListTuple = [] for productcode, itemvalue in proctBaseInfoDf.iterrows(): if productcode in productcodelist: # print("%s has delisted!" % productcode) # print(itemvalue) delistListTuple.append( (itemvalue["delist_date"], pb.code_to_symbol(productcode))) if len(delistListTuple) != 0: # print(delistListTuple) destDbBase.execupdatemanysql(sc.PRODUCTBASICINFO_UPDATEDELISTSQL, delistListTuple)
def getProductFinanceInfo(dbCntInfo,sourcetable,desctable): ''' 获取产品的公司财务基础数据 :param dbCntInfo: :param sourcetable: tusharepro数据填写到该表 histincome histcastflow histbalance :param desctable: 移植到该正式表 company_income company_cashflow company_balance_sheet :return: ''' productInfoDf = pb.getAllProductBasicInfo(dbCntInfo) if productInfoDf.empty: print("表中无正在上市的数据,提前正常结束!") return pro = ts.pro_api('00f0c017db5d284d992f78f0971c73c9ecba4aa03dee2f38e71e4d9c') sourceTable = sourcetable # 比如"histincome" destTable = desctable # 比如"company_income" # 获取每个产品已经获取到的最大reportdate productReportDateDict = getmaxreportdate(dbCntInfo, destTable) engine = dbCntInfo.getEngineByTableName(sourceTable) # 获取tusharepro中的数据 for rowIndex in productInfoDf.index: # if rowIndex >= 0: # continue if rowIndex % 200 == 0 and rowIndex != 0: time.sleep(60) oneProductInfo = productInfoDf.iloc[rowIndex] productCode = oneProductInfo["product_code"] maxreportdate = productReportDateDict[productCode] maxreportdate = maxreportdate + 1 print("%d %s begin to get %s data from intenert..."%(rowIndex, productCode,destTable)) symbolProcuctCode = pb.code_to_symbol(productCode) df = pd.DataFrame() try : if destTable in "company_income": if maxreportdate == 1: df = pro.income(ts_code=symbolProcuctCode) else : df = pro.income(ts_code=symbolProcuctCode, start_date=str(maxreportdate)) elif destTable in "company_balance_sheet": if maxreportdate == 1: df = pro.balancesheet(ts_code=symbolProcuctCode) else : df = pro.balancesheet(ts_code=symbolProcuctCode, start_date=str(maxreportdate)) elif destTable in "company_cashflow": if maxreportdate == 1: df = pro.cashflow(ts_code=symbolProcuctCode) else : df = pro.cashflow(ts_code=symbolProcuctCode, start_date=str(maxreportdate)) else : raise Exception("destTable is exception!") except Exception as e: print(symbolProcuctCode + " connect time out!") time.sleep(120) if destTable in "company_income": df = pro.income(ts_code=symbolProcuctCode) elif destTable in "company_balance_sheet": df = pro.balancesheet(ts_code=symbolProcuctCode) elif destTable in "company_cashflow": df = pro.cashflow(ts_code=symbolProcuctCode) else: raise Exception("destTable is exception!") # 数据去重 # 数据相同删除重复的数据保留第一个数据 dfdup = df.drop_duplicates(subset=["ts_code","end_date"], keep='first', inplace=False) realSourTable = sourceTable + productCode basicdf = dfdup.reset_index(drop=True) basicdf.to_sql(realSourTable, engine, if_exists="replace", index=False) print("%s get data finish!" % (productCode)) time.sleep(1.5) # 获取表中存在的数据。 for rowIndex in productInfoDf.index: oneProductInfo = productInfoDf.iloc[rowIndex] productCode = oneProductInfo["product_code"] maxreportdate = productReportDateDict[productCode] realSourTable = sourceTable + productCode print("%d %s begin to insert table %s data..." % (rowIndex, realSourTable, destTable)) selectsql = sc.COMPANYFINANCE_SELECTSQL[destTable]%(realSourTable,maxreportdate) print(selectsql) insertsql = sc.COMPANYFINANCE_INSERTSQL[destTable] pb.insertNormalDbByCurProductCode(dbCntInfo, sourceTable, destTable, selectsql, insertsql,productCode) print("%s insert table %s finish!" % (realSourTable, destTable)) return
def getProfitData(dbCntInfo): ''' 在执行前需要首先用下列语句创建表 USE stocknotdealmarket; DROP TABLE IF EXISTS histprofitdata; CREATE TABLE histprofitdata ( ts_code text DEFAULT NULL, end_date text DEFAULT NULL, ann_date text DEFAULT NULL, div_proc text DEFAULT NULL, stk_div double DEFAULT NULL, stk_bo_rate double DEFAULT NULL, stk_co_rate double DEFAULT NULL, cash_div double DEFAULT NULL, cash_div_tax double DEFAULT NULL, record_date text DEFAULT NULL, ex_date text DEFAULT NULL, pay_date text DEFAULT NULL, div_listdate text DEFAULT NULL, imp_ann_date text DEFAULT NULL ) 数据来源:http://f10.eastmoney.com/BonusFinancing/Index?type=web&code=sh601199 中财网:http://data.cfi.cn/cfidata.aspx?sortfd=&sortway=&curpage=1&ndk=A0A1934A1939A1957A1966A1983&xztj=&mystock= 分红产品分红数据获取 ''' # 获取产品基础信息 productBasicInfodf = pb.getAllProductBasicInfo(dbCntInfo) if productBasicInfodf.empty: print("表中无正在上市的数据,提前正常结束!") return # 获取当日日期 # finanalWorkDate = pb.getTodayDate() sourceTable = "histprofitdata" # 分红数据 # destTable = "profitschema" # 产品分红方案 # 建立连接与tusharepro pro = ts.pro_api('00f0c017db5d284d992f78f0971c73c9ecba4aa03dee2f38e71e4d9c') engine = dbCntInfo.getEngineByTableName(sourceTable) for rowIndex in productBasicInfodf.index: oneProductTuple = productBasicInfodf.iloc[rowIndex] productCode = oneProductTuple["product_code"] ## 产品代码 print("%d %s begin to get prifit data ..." % (rowIndex,productCode)) symbolProcuctCode = pb.code_to_symbol(productCode) try: df = pro.dividend(ts_code=symbolProcuctCode) basicdf = df.reset_index(drop=True) basicdf.to_sql(sourceTable, engine, if_exists="append", index=False) except Exception as e: print(productCode + " connect time out!") time.sleep(30) df = pro.dividend(ts_code=symbolProcuctCode) basicdf = df.reset_index(drop=True) basicdf.to_sql(sourceTable, engine, if_exists="append", index=False) print(productCode + " begin to get data finish ...") time.sleep(1) if rowIndex % 100 == 0: time.sleep(10) if rowIndex % 500 == 0: time.sleep(30) print("all productcode profitschema finish download!") dbCntInfo.closeAllDBConnect()