def fb_gid_getExt010(x10): bars=pd.Series(x10,index=tfsys.gidSgn,dtype=str) gid=bars['gid'] # fss_az=tfsys.rhtmYazhi+gid+'_az.htm' uss_az=tfsys.us0_extYazhi+gid+'.shtml' fss_oz=tfsys.rhtmOuzhi+gid+'_oz.htm' uss_oz=tfsys.us0_extOuzhi+gid+'.shtml' fss_tz=tfsys.rhtmTouzhu+gid+'_tz.htm' uss_tz=tfsys.us0_extTouzhu+gid+'.shtml' htm_az=zweb.web_get001txtFg(uss_az,fss_az) htm_oz=zweb.web_get001txtFg(uss_oz,fss_oz) #zt.zt_web_get001txtFg or(fsiz<5000): htm_tz=zweb.web_get001txtFg(uss_tz,fss_tz) # fxdat_az=tfsys.rxdat+gid+'_az.dat' fxdat_oz=tfsys.rxdat+gid+'_oz.dat' fxdat_tz=tfsys.rxdat+gid+'_tz.dat' fsiz_az=zt.f_size(fxdat_az);#print(zsys.sgnSP4,'@',fsiz,fxdat) fsiz_oz=zt.f_size(fxdat_oz); fsiz_tz=zt.f_size(fxdat_tz); # #print('xtfb.bars',xtfb.bars) if (fsiz_az<1000)or(tfsys.xnday_down<10): fb_gid_getExt_az4htm(htm_az,bars,ftg=fxdat_az) if (fsiz_oz<1000)or(tfsys.xnday_down<10): fb_gid_getExt_oz4htm(htm_oz,bars,ftg=fxdat_oz) if (fsiz_tz<1000)or(tfsys.xnday_down<10): fb_gid_getExt_tz4htm(htm_tz,bars,ftg=fxdat_tz) return fxdat_az,fxdat_oz,fxdat_tz
def fb_gid_getExt010(x10): bars = pd.Series(x10, index=tfsys.gidSgn, dtype=str) gid = bars['gid'] # fss = tfsys.rhtmOuzhi + gid + '_oz.htm' uss = tfsys.us0_extOuzhi + gid + '.shtml' #print(uss) htm = zweb.web_get001txtFg(uss, fss) #zt.zt_web_get001txtFg or(fsiz<5000): # fxdat = tfsys.rxdat + gid + '_oz.dat' fsiz = zt.f_size(fxdat) #print(zsys.sgnSP4,'@',fsiz,fxdat) # #print('xtfb.bars',xtfb.bars) if (fsiz < 1000) or (tfsys.xnday_down < 10): fb_gid_getExt_oz4htm(htm, bars, ftg=fxdat) ''' # fss=xtfb.rhtmYazhi+xtfb.kgid+'_az.htm' uss=xtfb.us0_extYazhi+xtfb.kgid+'.shtml' # fss=xtfb.rhtmShuju+xtfb.kgid+'_sj.htm' uss=xtfb.us0_extShuju+xtfb.kgid+'.shtml' ''' return fxdat
def fb_gid_get_nday(xtfb,timStr,fgExt=False): if timStr=='':ktim=xtfb.tim_now else:ktim=arrow.get(timStr) # nday=tfsys.xnday_down for tc in range(0, nday): xtim=ktim.shift(days= -tc) xtimStr=xtim.format('YYYY-MM-DD') #print('\nxtim',xtim,xtim<xtfb.tim0_gid) # xss=str(tc)+'#,'+xtimStr+',@'+ zt.get_fun_nam() zt.f_addLog(xss) if xtim<xtfb.tim0_gid: print('#brk;') break # fss=tfsys.rghtm+xtimStr+'.htm' uss=tfsys.us0_gid+xtimStr print(timStr,tc,'# update--',fss) # htm=zweb.web_get001txtFg(uss,fss) if len(htm)>5000: df=fb_gid_get4htm(htm) if len(df['gid'])>0: tfsys.gids=tfsys.gids.append(df) tfsys.gids.drop_duplicates(subset='gid', keep='last', inplace=True) # #if fgExt:fb_gid_getExt(df) if fgExt:fb_gid_getExtPool(df) # if tfsys.gidsFN!='': print('+++++') print(tfsys.gids.tail()) tfsys.gids.to_csv(tfsys.gidsFN,index=False,encoding='gb18030')
def fb_gid_get_nday(xtfb, timStr, fgExt=False): ###1. 下载各轮赛数据 for league in tfsys.league: sc = fb_download_league_data(league) tfsys.league_sc = pd.concat([tfsys.league_sc, sc], ignore_index=True) ### 2.下载当期比赛 if timStr == '': ktim = xtfb.tim_now else: ktim = arrow.get(timStr) # nday = tfsys.xnday_down for tc in range(nday): xtim = ktim.shift(days=-tc) xtimStr = xtim.format('YYYY-MM-DD') #print('\nxtim',xtim,xtim<xtfb.tim0_gid) # xss = str(tc) + '#,' + xtimStr + ',@' + zt.get_fun_nam() zt.f_addLog(xss) if xtim < xtfb.tim0_gid: print('#brk;') break # fss = tfsys.rghtm + xtimStr + '.htm' uss = tfsys.us0_gid + xtimStr print(timStr, tc, '#', fss) # htm = zweb.web_get001txtFg(uss, fss) if len(htm) > 5000: df = fb_gid_get4htm(htm) #提取每天比赛场次 if len(df['gid']) > 0: tfsys.gids = tfsys.gids.append(df) tfsys.gids.drop_duplicates(subset='gid', keep='last', inplace=True) # if fgExt: fb_gid_getExt(df) #单线程 #if fgExt:fb_gid_getExtPool(df) #多线程 # if tfsys.gidsFN != '': print('+++++') print(tfsys.gids.tail()) tfsys.gids.to_csv(tfsys.gidsFN, index=False, encoding='gb18030')
def fb_download_league_data(league, season, fgSample=False): leagueId = tfsys.leagueId[league] subleagueId = tfsys.subleagueId[league] # season = '2018-2019' fss = tfsys.lghtm + season + '_' + leagueId + '.js' uss = tfsys.us0_league + season + '/s' + leagueId + subleagueId + '.js' htm = zweb.web_get001txtFg(uss, fss) # if len(htm)>5000: ### 1. 轮赛每队当前的基本面,包括进、失球,积分,排名等 teamDict, scoresDf = fb_get_team_dataset(htm) # tfsys.teamIds = tfsys.teamIds.append(teamDf) tfsys.teamIds = dict(tfsys.teamIds, **teamDict) # tfsys.teamIds.drop_duplicates(subset='team_id', keep='last', inplace=True) ### 2. 过去赛季比赛场次ID,赔率、及当时两队的对赛记录(用于算法的训练) if fgSample: fb_league_gids(htm, league)
def fb_gid_getExt010(x10): bars = pd.Series(x10, index=tfsys.gidSgn, dtype=str) gid = bars['gid'] isdownload = False features = pd.DataFrame() # gid = '1552246' print('gid:', gid) ### 1.下载投注量网页,投注量比值特征 uss_tz = tfsys.us0_extTouzhu + gid + '.htm?%s' fss_tz = tfsys.rhtmTouzhu + gid + '.htm' fxdat_tz = tfsys.rxdat + gid + '_tz.dat' for timeout in range(5): #没有下载到网页,就重复5遍 if not isdownload: #获取当前时间 uss_tz = uss_tz % (time.mktime( datetime.datetime.now().timetuple())) htm_tz = zweb.web_get001txtFg( uss_tz, fss_tz) #zt.zt_web_get001txtFg or(fsiz<5000): if htm_tz == '404': #### 没有下载到网页 # print('############ 404 ############') isdownload = False else: isdownload = True if not isdownload: return '######### can not download the html ##########' # df = fb_gid_getExt_tz4htm(htm_tz,bars,ftg=fxdat_tz) # if df.empty: return volumes_feature = fb_get_volumes_features(htm_tz, bars, ftg=fxdat_tz) if len(volumes_feature) == 0: print('投注数据不足,不预测该场比赛') return ### 2.下载赔率网页,赔率特征 uss_oz = tfsys.us0_extOuzhi + gid + '.js' fss_oz = tfsys.rhtmOuzhi + gid + '.js' fxdat_oz_1 = tfsys.rxdat + gid + '_oz_1.dat' fxdat_oz_2 = tfsys.rxdat + gid + '_oz_2.dat' htm_oz = zweb.web_get001txtFg(uss_oz, fss_oz) odds_feature = fb_get_odds_features(htm_oz, bars, ftg=fxdat_oz_1) # fb_gid_getExt_oz4htm_1(htm_oz, bars,ftg=fxdat_oz_1) ### 3. 下载分析网页,对赛,主、客队近期赛特征 uss_fx = tfsys.us0_extFenxi + gid + '.htm' fss_fx = tfsys.rhtmFenxi + gid + '.htm' fxdat_fx = tfsys.rxdat + gid + '_fx.dat' htm_fx = zweb.web_get001txtFg(uss_fx, fss_fx) score_feature = fb_get_score_features(htm_fx, bars, ftg=fxdat_fx) # fb_gid_getExt_oz4htm_1(htm_oz,bars,ftg=fxdat_oz_1) # fb_gid_getExt_oz4htm_2(htm_oz,bars,ftg=fxdat_oz_2) ### 4. 拼接所有特征值,并保存 # features.loc[0,"FTR"] = bars['kwin'] features = pd.concat([score_feature, odds_feature, volumes_feature], axis=1) features.insert(0, 'FTR', bars['kwin']) fss_features = tfsys.rxdat + gid + '_ftr.dat' features.to_csv(fss_features, index=False, encoding='gb18030') #把特征值保存下来 return fss_oz