def _basic_init(self): self._page = rget(self._url) if self._page.status_code == 404: raise ParserFailure( "Unrecognized fund, please check fund code you input.") if self._page.text[:800].find("Data_millionCopiesIncome") >= 0: raise FundTypeError( "This code seems to be a mfund, use mfundinfo instead") l = re.match(r"[\s\S]*Data_netWorthTrend = ([^;]*);[\s\S]*", self._page.text).groups()[0] l = l.replace("null", "None") # 暂未发现基金净值有 null 的基金,若有,其他地方也很可能出问题! l = eval(l) ltot = re.match(r"[\s\S]*Data_ACWorthTrend = ([^;]*);[\s\S]*", self._page.text).groups()[0] # .* doesn't match \n ltot = ltot.replace("null", "None") ## 096001 总值数据中有 null! ltot = eval(ltot) ## timestamp transform tzinfo must be taken into consideration tz_bj = dt.timezone(dt.timedelta(hours=8)) infodict = { "date": [ dt.datetime.fromtimestamp(int(d["x"]) / 1e3, tz=tz_bj).replace(tzinfo=None) for d in l ], "netvalue": [float(d["y"]) for d in l], "comment": [_nfloat(d["unitMoney"]) for d in l], } if len(l) == len(ltot): # 防止总值和净值数据量不匹配,已知有该问题的基金:502010 infodict["totvalue"] = [d[1] for d in ltot] try: rate = float( eval( re.match(r"[\s\S]*fund_Rate=([^;]*);[\s\S]*", self._page.text).groups()[0])) except ValueError: rate = 0 logger.info( "warning: this fund has no data for rate") # know cases: ETF name = eval( re.match(r"[\s\S]*fS_name = ([^;]*);[\s\S]*", self._page.text).groups()[0]) self.rate = rate # shengou rate in tiantianjijin, daeshengou rate discount is not considered self.name = name # the name of the fund df = pd.DataFrame(data=infodict) df = df[df["date"].isin(opendate)] df = df.reset_index(drop=True) if len(df) == 0: raise ParserFailure("no price table found for this fund %s" % self.code) self.price = df[df["date"] <= yesterdaydash()] # deal with the redemption fee attrs finally if not self.priceonly: self._feepreprocess()
def _basic_init(self): self._page = rget(self._url) if self._page.text[:800].find("Data_fundSharesPositions") >= 0: raise FundTypeError( "This code seems to be a fund, use fundinfo instead") l = eval( re.match(r"[\s\S]*Data_millionCopiesIncome = ([^;]*);[\s\S]*", self._page.text).groups()[0]) self.name = re.match(r"[\s\S]*fS_name = \"([^;]*)\";[\s\S]*", self._page.text).groups()[0] tz_bj = dt.timezone(dt.timedelta(hours=8)) datel = [ dt.datetime.fromtimestamp(int(d[0]) / 1e3, tz=tz_bj).replace(tzinfo=None) for d in l ] ratel = [float(d[1]) for d in l] netvalue = [1] for dailyrate in ratel: netvalue.append(netvalue[-1] * (1 + dailyrate * 1e-4)) netvalue.remove(1) df = pd.DataFrame( data={ "date": datel, "netvalue": netvalue, "totvalue": netvalue, "comment": [0 for _ in datel], }) df = df[df["date"].isin(opendate)] if len(df) == 0: raise ParserFailure("no price table for %s" % self.code) df = df.reset_index(drop=True) self.price = df[df["date"] <= yesterdaydash()]
def vtradevolume(cftable, freq="D", rendered=True): """ aid function on visualization of trade summary :param cftable: cftable (pandas.DataFrame) with at least date and cash columns :param freq: one character string, frequency label, now supporting D for date, W for week and M for month, namely the trade volume is shown based on the time unit :returns: the Bar object """ ### WARN: datazoom and time conflict, sliding till 1970..., need further look into pyeacharts ### very unsatisfied about current visualize effect, and it seems the order of add and set option matters a lot if freq == "D": datedata = [d.to_pydatetime() for d in cftable["date"]] selldata = [[row["date"].to_pydatetime(), row["cash"]] for _, row in cftable.iterrows() if row["cash"] > 0] buydata = [[row["date"].to_pydatetime(), row["cash"]] for _, row in cftable.iterrows() if row["cash"] < 0] elif freq == "W": cfmerge = cftable.groupby( [cftable["date"].dt.year, cftable["date"].dt.week])["cash"].sum() datedata = [ dt.datetime.strptime(str(a) + "4", "(%Y, %W)%w") for a, _ in cfmerge.iteritems() ] selldata = [[dt.datetime.strptime(str(a) + "4", "(%Y, %W)%w"), b] for a, b in cfmerge.iteritems() if b > 0] buydata = [[dt.datetime.strptime(str(a) + "4", "(%Y, %W)%w"), b] for a, b in cfmerge.iteritems() if b < 0] elif freq == "M": cfmerge = cftable.groupby( [cftable["date"].dt.year, cftable["date"].dt.month])["cash"].sum() datedata = [ dt.datetime.strptime(str(a) + "15", "(%Y, %m)%d") for a, _ in cfmerge.iteritems() ] selldata = [[dt.datetime.strptime(str(a) + "15", "(%Y, %m)%d"), b] for a, b in cfmerge.iteritems() if b > 0] buydata = [[dt.datetime.strptime(str(a) + "15", "(%Y, %m)%d"), b] for a, b in cfmerge.iteritems() if b < 0] else: raise ParserFailure("no such freq tag supporting") buydata = [[d, round(x, 1)] for d, x in buydata] selldata = [[d, round(x, 1)] for d, x in selldata] bar = Bar() bar.add_xaxis(datedata) # buydata should before selldata, since emptylist in the first line would make the output fig empty: may be bug in pyecharts bar.add_yaxis(series_name="买入", yaxis_data=buydata, category_gap="90%") bar.add_yaxis(series_name="卖出", yaxis_data=selldata, category_gap="90%") bar.set_global_opts( xaxis_opts=opts.AxisOpts(type_="time"), datazoom_opts=[opts.DataZoomOpts(range_start=99, range_end=100)], ) if rendered: return bar.render_notebook() else: return bar
def set_display(env=""): """ 开关 DataFrame 的显示模式,仅 Jupyter Notebook 有效。 :param env: str, default "". If env="notebook", pd.DataFrame will be shown in fantastic web language :return: """ if not env: try: delattr(pd.DataFrame, "_repr_javascript_") except AttributeError: pass elif env in ["notebook", "jupyter", "ipython"]: _set_display_notebook() else: raise ParserFailure("unknown env %s" % env)
def get_sh_status(category="cb", date=None): url = "http://query.sse.com.cn/commonQuery.do?jsonCallBack=&" if category in ["cb", "kzz"]: url += "isPagination=false&sqlId=COMMON_BOND_KZZFLZ_ALL&KZZ=1" elif category in ["fund", "fs"]: if not date: date = today_obj().strftime("%Y%m%d") date = date.replace("/", "").replace("-", "") url += "&sqlId=COMMON_SSE_FUND_LOF_SCALE_CX_S&pageHelp.pageSize=10000&FILEDATE={date}".format( date=date) else: raise ParserFailure("unrecoginzed category %s" % category) r = rget_json( url, headers={ "user-agent": "Mozilla/5.0", "Host": "query.sse.com.cn", "Referer": "http://www.sse.com.cn/market/bonddata/data/convertible/", }, ) return pd.DataFrame(r["result"])
def v(self, y="lne"): """ 总资产或总利润与拟合曲线的可视化 :param y: str. one of lne, lnb, e, b, roe :return: """ df = self.df if y == "roe": return df.plot(x="date", y="roe") fitx = np.arange(0, df.iloc[-1]["date_count"], 10) if y == "lne": fity = self.intercept_e + self.slope_e * fitx elif y == "lnb": fity = self.intercept_b + self.slope_b * fitx elif y == "e": fity = np.exp(self.intercept_e + self.slope_e * fitx) elif y == "b": fity = np.exp(self.intercept_b + self.slope_b * fitx) else: raise ParserFailure("Unrecogized y %s" % y) ax = df.plot(x="date_count", y=y) ax.plot(fitx, fity) return ax
def _addrow(self): """ Return cashflow table with one more line or raise an exception if there is no more line to add The same logic also applies to rem table 关于对于一个基金多个操作存在于同一交易日的说明:无法处理历史买入第一笔同时是分红日的情形, 事实上也不存在这种情形。无法处理一日多笔买卖的情形。 同一日既有卖也有买不现实,多笔买入只能在 csv 上合并记录,由此可能引起份额计算 0.01 的误差。可以处理分红日买入卖出的情形。 分级份额折算日封闭无法买入,所以程序直接忽略当天的买卖。因此不会出现多个操作共存的情形。 """ # the design on data remtable is disaster, it is very dangerous though works now code = self.aim.code if len(self.cftable) == 0: if len(self.status[self.status[code] != 0]) == 0: raise Exception("no other info to be add into cashflow table") i = 0 while self.status.iloc[i].loc[code] == 0: i += 1 value = self.status.iloc[i].loc[code] date = self.status.iloc[i].date if value > 0: rdate, cash, share = self.aim.shengou(value, date) rem = rm.buy([], share, rdate) else: raise TradeBehaviorError("You cannot sell first when you never buy") elif len(self.cftable) > 0: recorddate = list(self.status.date) lastdate = self.cftable.iloc[-1].date + pd.Timedelta(1, unit="d") while (lastdate not in self.aim.specialdate) and ( (lastdate not in recorddate) or ( (lastdate in recorddate) and ( self.status[self.status["date"] == lastdate].loc[:, code].any() == 0 ) ) ): lastdate += pd.Timedelta(1, unit="d") if (lastdate - yesterdayobj()).days >= 1: raise Exception("no other info to be add into cashflow table") date = lastdate label = self.aim.dividend_label # 现金分红 0, 红利再投 1 cash = 0 share = 0 rem = self.remtable.iloc[-1].rem rdate = date if (date in recorddate) and (date not in self.aim.zhesuandate): # deal with buy and sell and label the fenhongzaitouru, namely one label a 0.05 in the original table to label fenhongzaitouru value = self.status[self.status["date"] == date].iloc[0].loc[code] fenhongmark = round(10 * value - int(10 * value), 1) if fenhongmark == 0.5 and label == 0: label = 1 # fenhong reinvest value = round(value, 1) elif fenhongmark == 0.5 and label == 1: label = 0 value = round(value, 1) if value > 0: # value stands for purchase money rdate, dcash, dshare = self.aim.shengou(value, date) rem = rm.buy(rem, dshare, rdate) elif value < -0.005: # value stands for redemp share rdate, dcash, dshare = self.aim.shuhui( -value, date, self.remtable.iloc[-1].rem ) _, rem = rm.sell(rem, -dshare, rdate) elif value >= -0.005 and value < 0: # value now stands for the ratio to be sold in terms of remain positions, -0.005 stand for sell 100% remainshare = sum(self.cftable.loc[:, "share"]) ratio = -value / 0.005 rdate, dcash, dshare = self.aim.shuhui( remainshare * ratio, date, self.remtable.iloc[-1].rem ) _, rem = rm.sell(rem, -dshare, rdate) else: # in case value=0, when specialday is in record day rdate, dcash, dshare = date, 0, 0 cash += dcash share += dshare if date in self.aim.specialdate: # deal with fenhong and xiazhe comment = ( self.aim.price[self.aim.price["date"] == date] .iloc[0] .loc["comment"] ) if isinstance(comment, float): if comment < 0: dcash2, dshare2 = ( 0, sum([myround(sh * (-comment - 1)) for _, sh in rem]), ) # xiazhe are seperately carried out based on different purchase date rem = rm.trans(rem, -comment, date) # myround(sum(cftable.loc[:,'share'])*(-comment-1)) elif comment > 0 and label == 0: dcash2, dshare2 = ( myround(sum(self.cftable.loc[:, "share"]) * comment), 0, ) rem = rm.copy(rem) elif comment > 0 and label == 1: dcash2, dshare2 = ( 0, myround( sum(self.cftable.loc[:, "share"]) * ( comment / self.aim.price[self.aim.price["date"] == date] .iloc[0] .netvalue ) ), ) rem = rm.buy(rem, dshare2, date) cash += dcash2 share += dshare2 else: raise ParserFailure("comments not recoginized") self.cftable = self.cftable.append( pd.DataFrame([[rdate, cash, share]], columns=["date", "cash", "share"]), ignore_index=True, ) self.remtable = self.remtable.append( pd.DataFrame([[rdate, rem]], columns=["date", "rem"]), ignore_index=True )
def _addrow(self): """ Return cashflow table with one more line or raise an exception if there is no more line to add The same logic also applies to rem table 关于对于一个基金多个操作存在于同一交易日的说明:无法处理历史买入第一笔同时是分红日的情形, 事实上也不存在这种情形。无法处理一日多笔买卖的情形。 同一日既有卖也有买不现实,多笔买入只能在 csv 上合并记录,由此可能引起份额计算 0.01 的误差。可以处理分红日买入卖出的情形。 分级份额折算日封闭无法买入,所以程序直接忽略当天的买卖。因此不会出现多个操作共存的情形。 """ # the design on data remtable is disaster, it is very dangerous though works now # possibly failing cases include: # 买卖日记录是节假日,而顺延的日期恰好是折算日(理论上无法申赎)或分红日(可能由于 date 和 rdate 的错位而没有考虑到), # 又比如周日申购记录,周一申购记录,那么周日记录会现金流记在周一,继续现金流标更新将从周二开始,周一数据被丢弃 code = self.aim.code if len(self.cftable) == 0: if len(self.status[self.status[code] != 0]) == 0: raise Exception("no other info to be add into cashflow table") i = 0 while self.status.iloc[i].loc[code] == 0: i += 1 value = self.status.iloc[i].loc[code] date = self.status.iloc[i].date self.lastdate = date if len(self.price[self.price["date"] >= date]) > 0: date = self.price[self.price["date"] >= date].iloc[0]["date"] else: date = self.price[self.price["date"] <= date].iloc[-1]["date"] # 这里没有像下边部分一样仔细处理单独的 lastdate,hopefully 不会出现其他奇怪的问题,有 case 再说 # https://github.com/refraction-ray/xalpha/issues/47 # 凭直觉这个地方的处理很可能还有其他 issue if value > 0: feelabel = 100 * value - int(100 * value) if round(feelabel, 1) == 0.5: # binary encoding, 10000.005 is actually 10000.0050...1, see issue #59 feelabel = feelabel - 0.5 if abs(feelabel) < 1e-4: feelabel = 0 else: feelabel *= 100 else: feelabel = None value = int(value * 100) / 100 assert feelabel is None or feelabel >= 0.0, "自定义申购费必须为正值" rdate, cash, share = self.aim.shengou(value, date, fee=feelabel) rem = rm.buy([], share, rdate) else: raise TradeBehaviorError( "You cannot sell first when you never buy") elif len(self.cftable) > 0: # recorddate = list(self.status.date) if not getattr(self, "lastdate", None): lastdate = self.cftable.iloc[-1].date + pd.Timedelta(1, unit="d") else: lastdate = self.lastdate + pd.Timedelta(1, unit="d") while (lastdate not in self.aim.specialdate) and ( (lastdate not in self.recorddate_set) or ((lastdate in self.recorddate_set) and (self.status[self.status["date"] == lastdate].loc[:, code].any() == 0))): lastdate += pd.Timedelta(1, unit="d") if (lastdate - yesterdayobj()).days >= 1: raise Exception( "no other info to be add into cashflow table") if (lastdate - yesterdayobj()).days >= 1: raise Exception("no other info to be add into cashflow table") date = lastdate # 无净值日优先后移,无法后移则前移 # 还是建议日期记录准确,不然可能有无法完美兼容的错误出现 if len(self.price[self.price["date"] >= date]) > 0: date = self.price[self.price["date"] >= date].iloc[0]["date"] else: date = self.price[self.price["date"] <= date].iloc[-1]["date"] if date != lastdate and date in list(self.status.date): # 日期平移到了其他记录日,很可能出现问题! logger.warning( "账单日期 %s 非 %s 的净值记录日期,日期智能平移后 %s 与账单其他日期重合!交易处理极可能出现问题!! " "靠后日期的记录被覆盖" % (lastdate, self.code, date)) self.lastdate = lastdate if date > lastdate: self.lastdate = date # see https://github.com/refraction-ray/xalpha/issues/27, begin new date from last one in df is not reliable label = self.aim.dividend_label # 现金分红 0, 红利再投 1 cash = 0 share = 0 rem = self.remtable.iloc[-1].rem rdate = date if (lastdate in self.recorddate_set) and (date not in self.aim.zhesuandate): # deal with buy and sell and label the fenhongzaitouru, namely one label a 0.05 in the original table to label fenhongzaitouru value = self.status[ self.status["date"] <= lastdate].iloc[-1].loc[code] if date in self.aim.fenhongdate: # 0.05 的分红行为标记,只有分红日才有效 fenhongmark = round(10 * value - int(10 * value), 1) if fenhongmark == 0.5 and label == 0: label = 1 # fenhong reinvest value = value - math.copysign(0.05, value) elif fenhongmark == 0.5 and label == 1: label = 0 value = value - math.copysign(0.05, value) if value > 0: # value stands for purchase money feelabel = 100 * value - int(100 * value) if int(10 * feelabel) == 5: feelabel = (feelabel - 0.5) * 100 else: feelabel = None value = int(value * 100) / 100 rdate, dcash, dshare = self.aim.shengou( value, date, fee=feelabel ) # shengou fee is in the unit of percent, different than shuhui case rem = rm.buy(rem, dshare, rdate) elif value < -0.005: # value stands for redemp share feelabel = int(100 * value) - 100 * value if int(10 * feelabel) == 5: feelabel = feelabel - 0.5 else: feelabel = None value = int(value * 100) / 100 rdate, dcash, dshare = self.aim.shuhui( -value, date, self.remtable.iloc[-1].rem, fee=feelabel) _, rem = rm.sell(rem, -dshare, rdate) elif value >= -0.005 and value < 0: # value now stands for the ratio to be sold in terms of remain positions, -0.005 stand for sell 100% remainshare = sum(self.cftable[ self.cftable["date"] <= date].loc[:, "share"]) ratio = -value / 0.005 rdate, dcash, dshare = self.aim.shuhui( remainshare * ratio, date, self.remtable.iloc[-1].rem, 0) _, rem = rm.sell(rem, -dshare, rdate) else: # in case value=0, when specialday is in record day rdate, dcash, dshare = date, 0, 0 cash += dcash share += dshare if date in self.aim.specialdate: # deal with fenhong and xiazhe comment = self.price[self.price["date"] == date].iloc[0].loc["comment"] if isinstance(comment, float): if comment < 0: dcash2, dshare2 = ( 0, sum([ myround(sh * (-comment - 1)) for _, sh in rem ]), ) # xiazhe are seperately carried out based on different purchase date rem = rm.trans(rem, -comment, date) # myround(sum(cftable.loc[:,'share'])*(-comment-1)) elif comment > 0 and label == 0: dcash2, dshare2 = ( myround( sum(self.cftable.loc[:, "share"]) * comment), 0, ) rem = rm.copy(rem) elif comment > 0 and label == 1: dcash2, dshare2 = ( 0, myround( sum(self.cftable.loc[:, "share"]) * (comment / self.price[self.price["date"] == date].iloc[0].netvalue)), ) rem = rm.buy(rem, dshare2, date) cash += dcash2 share += dshare2 else: raise ParserFailure("comments not recognized") self.cftable = self.cftable.append( pd.DataFrame([[rdate, cash, share]], columns=["date", "cash", "share"]), ignore_index=True, ) self.remtable = self.remtable.append(pd.DataFrame( [[rdate, rem]], columns=["date", "rem"]), ignore_index=True)
def vtradevolume(cftable, freq="D", rendered=True): """ aid function on visualization of trade summary :param cftable: cftable (pandas.DataFrame) with at least date and cash columns :param freq: one character string, frequency label, now supporting D for date, W for week and M for month, namely the trade volume is shown based on the time unit :returns: the Bar object """ ### WARN: datazoom and time conflict, sliding till 1970..., need further look into pyeacharts startdate = cftable.iloc[0]["date"] if freq == "D": # datedata = [d.to_pydatetime() for d in cftable["date"]] datedata = pd.date_range(startdate, yesterdayobj(), freq="D") selldata = [[row["date"].to_pydatetime(), row["cash"]] for _, row in cftable.iterrows() if row["cash"] > 0] buydata = [[row["date"].to_pydatetime(), row["cash"]] for _, row in cftable.iterrows() if row["cash"] < 0] elif freq == "W": cfmerge = cftable.groupby( [cftable["date"].dt.year, cftable["date"].dt.week])["cash"].sum() # datedata = [ # dt.datetime.strptime(str(a) + "4", "(%Y, %W)%w") # for a, _ in cfmerge.iteritems() # ] datedata = pd.date_range(startdate, yesterdayobj() + pd.Timedelta(days=7), freq="W-THU") selldata = [[dt.datetime.strptime(str(a) + "4", "(%G, %V)%w"), b] for a, b in cfmerge.iteritems() if b > 0] buydata = [[dt.datetime.strptime(str(a) + "4", "(%G, %V)%w"), b] for a, b in cfmerge.iteritems() if b < 0] # %V pandas gives iso weeknumber which is different from python original %W or %U, # see https://stackoverflow.com/questions/5882405/get-date-from-iso-week-number-in-python for more details # python3.6+ required for %G and %V # but now seems no equal distance between sell and buy data, no idea why elif freq == "M": cfmerge = cftable.groupby( [cftable["date"].dt.year, cftable["date"].dt.month])["cash"].sum() # datedata = [ # dt.datetime.strptime(str(a) + "15", "(%Y, %m)%d") # for a, _ in cfmerge.iteritems() # ] datedata = pd.date_range(startdate, yesterdayobj() + pd.Timedelta(days=31), freq="MS") selldata = [[dt.datetime.strptime(str(a) + "1", "(%Y, %m)%d"), b] for a, b in cfmerge.iteritems() if b > 0] buydata = [[dt.datetime.strptime(str(a) + "1", "(%Y, %m)%d"), b] for a, b in cfmerge.iteritems() if b < 0] else: raise ParserFailure("no such freq tag supporting") buydata = [[d, round(x, 1)] for d, x in buydata] selldata = [[d, round(x, 1)] for d, x in selldata] bar = Bar() datedata = list(datedata) bar.add_xaxis(xaxis_data=datedata) # buydata should before selldata, since emptylist in the first line would make the output fig empty: may be bug in pyecharts bar.add_yaxis(series_name="买入", yaxis_data=buydata) bar.add_yaxis(series_name="卖出", yaxis_data=selldata) bar.set_global_opts( tooltip_opts=opts.TooltipOpts( is_show=True, trigger="axis", trigger_on="mousemove", axis_pointer_type="cross", ), datazoom_opts=[opts.DataZoomOpts(range_start=90, range_end=100)], ) if rendered: return bar.render_notebook() else: return bar
def get_fund_holdings(code, year="", season="", month="", category="jjcc"): """ 获取基金详细的底层持仓信息 :param code: str. 6 位基金代码 :param year: int. eg. 2019 :param season: int, 1,2,3,4 :param month: Optional[int]. 指定 season 即可,一般不需理会 :param category: str. stock 股票持仓, bond 债券持仓,天天基金无法自动处理海外基金持仓,暂未兼容 FOF 的国内基金持仓 :return: pd.DataFrame or None. 没有对应持仓时返回 None。 """ if not month and season: month = 3 * int(season) if category in ["stock", "stocks", "jjcc", "", "gp", "s"]: category = "jjcc" elif category in ["bond", "bonds", "zq", "zqcc", "b"]: category = "zqcc" else: raise ParserFailure("unrecognized category %s" % category) if code.startswith("F"): code = code[1:] r = rget( "http://fundf10.eastmoney.com/FundArchivesDatas.aspx?type={category}&code={code}&topline=10&\ year={year}&month={month}".format(year=str(year), month=str(month), code=code, category=category), headers={ "Host": "fundf10.eastmoney.com", "Referer": "http://fundf10.eastmoney.com/ccmx_{code}.html".format(code=code), }, ) if len(r.text) < 50: return # raise ParserFailure( # "This fund has no holdings on stock or bonds in this period" # ) s = BeautifulSoup( re.match("[\s\S]*apidata={ content:(.*),arryear:", r.text).groups()[0], "lxml") if len(s.text) < 30: return # raise ParserFailure( # "This fund has no holdings on stock or bonds in this period" # ) timeline = [ i.string for i in s.findAll("font", class_="px12") if i.text.startswith("2") ] ind = 0 if month: for i, d in enumerate(timeline): if d.split("-")[1][-1] == str(month)[-1]: # avoid 09 compare to 9 ind = i break else: return # not update to this month t1 = s.findAll("table")[ind] main = [[j.text for j in i.contents] for i in t1.findAll("tr")[1:]] cols = [j.text for j in t1.findAll("tr")[0].contents if j.text.strip()] icode = 1 iname = 2 iratio = 4 ishare = 5 ivalue = 6 for j, col in enumerate(cols): if col.endswith("代码"): icode = j elif col.endswith("名称"): iname = j elif col.endswith("比例"): iratio = j elif col.startswith("持股数"): ishare = j elif col.startswith("持仓市值"): ivalue = j if category == "jjcc": result = { "code": [], "name": [], "ratio": [], "share": [], "value": [] } for l in main: result["code"].append(l[icode]) result["name"].append(l[iname]) result["ratio"].append(float(l[iratio][:-1])) result["share"].append(_float(l[ishare])) result["value"].append(_float(l[ivalue])) elif category == "zqcc": result = {"code": [], "name": [], "ratio": [], "value": []} for l in main: result["code"].append(l[1]) result["name"].append(l[2]) result["ratio"].append(float(l[3][:-1])) result["value"].append(_float(l[4])) return pd.DataFrame(result)