Пример #1
0
def fb_gid_get_nday(xtfb,timStr,fgExt=False):
    if timStr=='':ktim=xtfb.tim_now
    else:ktim=arrow.get(timStr)
    #
    nday=tfsys.xnday_down
    for tc in range(0, nday):
        xtim=ktim.shift(days= -tc)
        xtimStr=xtim.format('YYYY-MM-DD')
        #print('\nxtim',xtim,xtim<xtfb.tim0_gid)
        #
        xss=str(tc)+'#,'+xtimStr+',@'+ zt.get_fun_nam()
        zt.f_addLog(xss)
        if xtim<xtfb.tim0_gid:
            print('#brk;')
            break
        #
        
        fss=tfsys.rghtm+xtimStr+'.htm'
        uss=tfsys.us0_gid+xtimStr
        print(timStr,tc,'# update--',fss)
        #
        htm=zweb.web_get001txtFg(uss,fss)
        if len(htm)>5000:
            df=fb_gid_get4htm(htm)
            if len(df['gid'])>0:
                tfsys.gids=tfsys.gids.append(df)
                tfsys.gids.drop_duplicates(subset='gid', keep='last', inplace=True)
                #
                #if fgExt:fb_gid_getExt(df)
                if fgExt:fb_gid_getExtPool(df)
    #
    if tfsys.gidsFN!='':
        print('+++++')
        print(tfsys.gids.tail())
        tfsys.gids.to_csv(tfsys.gidsFN,index=False,encoding='gb18030')
Пример #2
0
def fb_gid_get_nday(xtfb, timStr, fgExt=False):
    ###1. 下载各轮赛数据
    for league in tfsys.league:
        sc = fb_download_league_data(league)
        tfsys.league_sc = pd.concat([tfsys.league_sc, sc], ignore_index=True)

    ### 2.下载当期比赛
    if timStr == '': ktim = xtfb.tim_now
    else: ktim = arrow.get(timStr)
    #
    nday = tfsys.xnday_down
    for tc in range(nday):
        xtim = ktim.shift(days=-tc)
        xtimStr = xtim.format('YYYY-MM-DD')
        #print('\nxtim',xtim,xtim<xtfb.tim0_gid)
        #
        xss = str(tc) + '#,' + xtimStr + ',@' + zt.get_fun_nam()
        zt.f_addLog(xss)
        if xtim < xtfb.tim0_gid:
            print('#brk;')
            break
        #
        fss = tfsys.rghtm + xtimStr + '.htm'
        uss = tfsys.us0_gid + xtimStr

        print(timStr, tc, '#', fss)
        #
        htm = zweb.web_get001txtFg(uss, fss)
        if len(htm) > 5000:
            df = fb_gid_get4htm(htm)  #提取每天比赛场次
            if len(df['gid']) > 0:
                tfsys.gids = tfsys.gids.append(df)
                tfsys.gids.drop_duplicates(subset='gid',
                                           keep='last',
                                           inplace=True)
                #
                if fgExt: fb_gid_getExt(df)  #单线程
                #if fgExt:fb_gid_getExtPool(df)   #多线程
    #
    if tfsys.gidsFN != '':
        print('+++++')
        print(tfsys.gids.tail())
        tfsys.gids.to_csv(tfsys.gidsFN, index=False, encoding='gb18030')
Пример #3
0
def fb_gid_get4htm(htm):
    bs = BeautifulSoup(htm, 'html5lib')  # 'lxml'
    df = pd.DataFrame(columns=tfsys.gidSgn, dtype=str)
    ds = pd.Series(tfsys.gidNil, index=tfsys.gidSgn, dtype=str)

    #---1#
    zsys.bs_get_ktag_kstr = 'isend'
    x10 = bs.find_all(zweb.bs_get_ktag)
    for xc, x in enumerate(x10):
        #print('\n@x\n',xc,'#',x.attrs)
        ds['gid'], ds['gset'] = x['fid'], zstr.str_fltHtmHdr(x['lg'])
        ds['mplay'] = zstr.str_fltHtmHdr(x['homesxname'])
        ds['gplay'] = zstr.str_fltHtmHdr(x['awaysxname'])
        ds['kend'] = x['isend']
        s2 = ds['tweek'] = x['gdate'].split(' ')[0]  #tweek
        ds['tweek'] = fb_tweekXed(s2)
        ds['tplay'], ds['tsell'] = x['pdate'], x['pendtime']  #tplay,tsell,
        #
        df = df.append(ds.T, ignore_index=True)

    #---2#
    x20 = bs.find_all('a', class_='score')
    for xc, x in enumerate(x20):
        xss = x['href']
        kss = zstr.str_xmid(xss, 'ju-', '.sh')
        clst = x.text.split(':')
        #
        ds = df[df['gid'] == kss]
        ds = df[df['gid'] == kss]
        if len(ds) == 1:
            inx = ds.index
            df['qj'][inx] = clst[0]
            df['qs'][inx] = clst[1]
            kwin = fb_kwin4qnum(int(clst[0]), int(clst[1]))
            df['kwin'][inx] = str(kwin)

    #---3#
    x20 = bs.find_all('td', class_='left_team')
    if (len(x20) == len(x10)):
        for xc, x in enumerate(x20):
            #print('@x',xc,'#',x.a['href'])
            xss = x.a['href']
            if xss.find('/team//') < 0:
                xid = zstr.str_xmid(xss, '/team/', '/')
                df['mtid'][xc] = xid
                g01 = df['gid'][xc]
                if xid == '':
                    zt.f_addLog('tid-mtid,nil,' + xss + ',gid,' + g01)
    #---4#
    x20 = bs.find_all('td', class_='right_team')
    if (len(x20) == len(x10)):
        for xc, x in enumerate(x20):
            #print('@x',xc,'#',x.a['href'])
            xss = x.a['href']
            if xss.find('/team//') < 0:
                xid = zstr.str_xmid(xss, '/team/', '/')
                df['gtid'][xc] = xid
                g01 = df['gid'][xc]
                if xid == '':
                    zt.f_addLog('tid-gtid,nil,' + xss + ',gid,' + g01)

    #---5#
    df = df[df['gid'] != '-1']
    return df