예제 #1
0
def web_getXTxt001k(bs):
    x10, tss = bs.find_all('p'), ''
    if x10 == []: x10 = bs.find_all('div')
    for x in x10:
        if x != None:
            css = x.text
            if tss.find(css) == -1:
                css = zstr.str_fltHtm(css)
                if len(css) > 10:
                    tss = ''.join([tss, '\n', css])
                    #print(css);print('csn',len(css))
    #
    #tss=tss+'\np'
    if len(tss) < 200: tss = ''
    return tss
예제 #2
0
def web_getXTxt001div(bs,claSgn):
    x10,tss=bs.find_all('div'),''
    for x in x10: 
        #print('@x',x)
        if x!=None:
            x2=x.find('div',class_=claSgn)
        else:x2=None
        #
        if x2!=None:
            css=x2.text
            if tss.find(css)==-1:
                css=zstr.str_fltHtm(css)
                tss=''.join([tss,'\n',css])
                #print("@::",css,'\ncsn,',len(css))
                #print("@::",x2)
    #
    #tss=tss+'\n'+claSgn
    if len(tss)<200:tss=''
    return tss
예제 #3
0
def fb_gid_get4htm(htm):
    bs = BeautifulSoup(htm, 'html5lib')  # 'lxml'
    df = pd.DataFrame(columns=tfsys.gidSgn, dtype=str)
    ds = pd.Series(tfsys.gidNil, index=tfsys.gidSgn, dtype=str)

    #---1#
    #    zsys.bs_get_ktag_kstr=['align','right']
    zsys.bs_get_ktag_kstr = 'matchid'
    x10 = bs.find_all(zweb.bs_get_ktag)
    for xc, x in enumerate(x10):
        ds = pd.Series(tfsys.gidNil, index=tfsys.gidSgn, dtype=str)
        #print('\n@x\n',xc,'#',x.attrs)

        home_team = x.find(attrs={'align': 'right'})
        guest_team = x.find(attrs={'align': 'left'})
        ds['gid'] = home_team.find('span')['id'].split('_')[1]
        ds['gset'] = x['gamename']
        ds['mplay'] = home_team.text
        ds['gplay'] = guest_team.text

        clst = zt.lst4objs_txt2(zstr.str_fltHtm(x.text), ['\n', '\t', '%'])
        score = clst[5].split('-')
        if score[0]:
            ds['qj'] = score[0]
        if score[1]:
            ds['qs'] = score[1]

        date = x.find(attrs={'title': re.compile("截止时间:*")})
        if date.text == '完场':
            ds['kend'] = '1'
        ds['tsell'] = date['title'].split(':')[1]
        ds['tplay'] = ds['tsell'].split(' ')[0]
        ds['tweek'] = x['name']

        kwin = fb_kwin4qnum(int(ds['qj']), int(ds['qs']))
        ds['kwin'] = str(kwin)

        #
        df = df.append(ds.T, ignore_index=True)

    #---5#
    df = df[df['gid'] != '-1']
    return df