def web_getXTxt001k(bs): x10, tss = bs.find_all('p'), '' if x10 == []: x10 = bs.find_all('div') for x in x10: if x != None: css = x.text if tss.find(css) == -1: css = zstr.str_fltHtm(css) if len(css) > 10: tss = ''.join([tss, '\n', css]) #print(css);print('csn',len(css)) # #tss=tss+'\np' if len(tss) < 200: tss = '' return tss
def web_getXTxt001div(bs,claSgn): x10,tss=bs.find_all('div'),'' for x in x10: #print('@x',x) if x!=None: x2=x.find('div',class_=claSgn) else:x2=None # if x2!=None: css=x2.text if tss.find(css)==-1: css=zstr.str_fltHtm(css) tss=''.join([tss,'\n',css]) #print("@::",css,'\ncsn,',len(css)) #print("@::",x2) # #tss=tss+'\n'+claSgn if len(tss)<200:tss='' return tss
def fb_gid_get4htm(htm): bs = BeautifulSoup(htm, 'html5lib') # 'lxml' df = pd.DataFrame(columns=tfsys.gidSgn, dtype=str) ds = pd.Series(tfsys.gidNil, index=tfsys.gidSgn, dtype=str) #---1# # zsys.bs_get_ktag_kstr=['align','right'] zsys.bs_get_ktag_kstr = 'matchid' x10 = bs.find_all(zweb.bs_get_ktag) for xc, x in enumerate(x10): ds = pd.Series(tfsys.gidNil, index=tfsys.gidSgn, dtype=str) #print('\n@x\n',xc,'#',x.attrs) home_team = x.find(attrs={'align': 'right'}) guest_team = x.find(attrs={'align': 'left'}) ds['gid'] = home_team.find('span')['id'].split('_')[1] ds['gset'] = x['gamename'] ds['mplay'] = home_team.text ds['gplay'] = guest_team.text clst = zt.lst4objs_txt2(zstr.str_fltHtm(x.text), ['\n', '\t', '%']) score = clst[5].split('-') if score[0]: ds['qj'] = score[0] if score[1]: ds['qs'] = score[1] date = x.find(attrs={'title': re.compile("截止时间:*")}) if date.text == '完场': ds['kend'] = '1' ds['tsell'] = date['title'].split(':')[1] ds['tplay'] = ds['tsell'].split(' ')[0] ds['tweek'] = x['name'] kwin = fb_kwin4qnum(int(ds['qj']), int(ds['qs'])) ds['kwin'] = str(kwin) # df = df.append(ds.T, ignore_index=True) #---5# df = df[df['gid'] != '-1'] return df