Пример #1
0
def lst4objs_txt_az(xobjs,fltLst=[]):
    clst=[]
    odds = [0] * 8
    for x in xobjs:
        #css=x.text.replace('\n','')
        css=zstr.str_flt(x.get_text(),fltLst)
        c20=css.split(' ')    
        for c in c20:
            if c!='' and c!='升' and c!='降':
                clst.append(c)
    cl = clst[0:3]+clst[-3:]
    odds[0] = cl[0]
    odds[1] = cl[2]
    odds[2] = cl[3]
    odds[3] = cl[5]
    begin_pan = float(tfsys.pan[cl[1]])
    end_pan = float(tfsys.pan[cl[4]])
    if begin_pan >= 0:
        odds[5] =  begin_pan        #  1- begin_pan * 0.125      #乘以0.125是为了归一化
    else: odds[4] =  -begin_pan      # 1- begin_pan * -0.125
    if end_pan >= 0:
        odds[7] =  end_pan         # 1- end_pan * 0.125
    else: odds[6] = -end_pan       # 1- end_pan * -0.125
        
    return odds  
Пример #2
0
def lst4objs_txt2(text, fltLst=[]):
    clst = []
    css = zstr.str_flt(text, fltLst)
    c20 = css.split(' ')
    for c in c20:
        if c != '': clst.append(c)
    #
    return clst
Пример #3
0
def lst4objs_txt(xobjs, fltLst=[]):
    clst = []
    for x in xobjs:
        #css=x.text.replace('\n','')
        css = zstr.str_flt(x.get_text(), fltLst)
        c20 = css.split(' ')
        for c in c20:
            if c != '': clst.append(c)
    #
    return clst
Пример #4
0
def fb_get_team_dataset(htm):
    ### 1.获取轮赛参赛队伍
    teamDict = {}

    result = re.findall(r"var arrTeam = \[.*\]", htm)
    res = result[0].split(' = ')
    teams = re.findall(r"\[\[(.*)\]\]", res[1])
    teams = teams[0].split('],[')

    for team in teams:
        res = re.split("\,", team)
        team_id = res[0]
        team_name = res[1]
        team_name = zstr.str_flt(team_name, '\'')
        teamDict[team_id] = team_name

    ### 2. 获取参赛队伍积分,进失球等
    scoresDf = pd.DataFrame(columns=tfsys.scSgn, dtype=str)
    ds = pd.Series(tfsys.scNil, index=tfsys.scSgn, dtype=str)

    result = re.findall(r"var totalScore = \[.*\]", htm)
    res = result[0].split(' = ')
    scores = re.findall(r"\[\[(.*)\]\]", res[1])
    scores = scores[0].split('],[')

    for score in scores:
        res = re.split("\,", score)
        ds['teamPL'] = int(res[1])  #名次
        ds['team_id'] = res[2]  #球队ID
        ds['MW'] = int(res[4])  #轮次
        ds['wins'] = int(res[5])  #胜场数
        ds['draws'] = int(res[6])  #平
        ds['losts'] = int(res[7])  #负
        ds['goal_score'] = float(res[8])  #进球数
        ds['goal_conceded'] = float(res[9])  #失球数
        ds['goal_diff'] = float(res[10])  #净胜球数
        ds['GS'] = float(res[14])  #场均进球数
        ds['GC'] = float(res[15])  #场均失球数
        ds['TP'] = float(res[16]) / int(res[4])  #场均得分
        ds['M1'] = get_sign(res[24])  #前 1 轮赛果
        ds['M2'] = get_sign(res[23])  #前 2 轮赛果
        ds['M3'] = get_sign(res[22])  #前 3 轮赛果
        ds['M4'] = get_sign(res[21])  #前 4 轮赛果
        ds['M5'] = get_sign(res[20])  #前 5 轮赛果
        scoresDf = scoresDf.append(ds.T, ignore_index=True)

    gs_max = (scoresDf['goal_score']).max()
    gc_max = (scoresDf['goal_conceded']).max()
    scoresDf['goal_score'] = scoresDf['goal_score'] / gs_max
    scoresDf['goal_conceded'] = scoresDf['goal_conceded'] / gc_max

    return teamDict, scoresDf
Пример #5
0
def lst4objs_txt_az(xobjs, fltLst=[]):
    clst = []
    for x in xobjs:
        #css=x.text.replace('\n','')
        css = zstr.str_flt(x.get_text(), fltLst)
        c20 = css.split(' ')
        for c in c20:
            if c != '' and c != '升' and c != '降':
                clst.append(c)
    cl = clst[0:3] + clst[-3:]
    cl[1] = tfsys.pan[cl[1]]
    cl[4] = tfsys.pan[cl[4]]
    return cl
Пример #6
0
def fb_league_gids(htm, league, fgExt=True):
    df = pd.DataFrame(columns=tfsys.gidSgn, dtype=str)
    ds = pd.Series(tfsys.gidNil, index=tfsys.gidSgn, dtype=str)

    nround = re.findall(r"jh\[\"R_.*\"\] = \[.*\]", htm)  #赛季有多少轮
    n_round = len(nround)

    for n in range(5, n_round):  ##从第5轮开始采集数据
        pattern = "jh\[\"R_" + str(n + 1) + "\"\] = \[.*\]"
        result = re.findall(pattern, htm)
        res = result[0].split(' = ')
        games = re.findall(r"\[\[(.*)\]\]", res[1])
        games = games[0].split('],[')

        for game in games:
            res = re.split("\,", game)
            ds['gid'] = res[0]
            ds['gset'] = league
            ds['kend'] = '1'  #比赛已结束
            score = res[6]
            score = zstr.str_flt(score, '\'')
            ds['qj'] = re.split("\-", score)[0]
            ds['qs'] = re.split("\-", score)[1]
            qj = int(ds['qj'])
            qs = int(ds['qs'])
            if qj > qs:
                ds['kwin'] = '3'
            elif qj < qs:
                ds['kwin'] = '0'
            else:
                ds['kwin'] = '1'
            ds['mplay'] = tfsys.teamIds[res[4]]
            ds['gplay'] = tfsys.teamIds[res[5]]
            ds['mtid'] = res[4]
            ds['gtid'] = res[5]
            ds['tplay'] = res[3]
            ds['tweek'] = str(n + 1)  #第几轮赛事
            ds['tsell'] = str(n + 1)
            df = df.append(ds.T, ignore_index=True)
            tfsys.gids = tfsys.gids.append(df)
            tfsys.gids.drop_duplicates(subset='gid', keep='last', inplace=True)

    if fgExt: fb_gid_getExt(df)  #单线程
    else: fb_gid_getExtPool(df)  #多线程

    if tfsys.gidsFN != '':
        print('+++++')
        print(tfsys.gids.tail())
        tfsys.gids.to_csv(tfsys.gidsFN, index=False, encoding='gb18030')
Пример #7
0
def get_score_data(htm, keyword):
    cols = [
        'HTGS', 'ATGS', 'HTGC', 'ATGC', 'HTP', 'ATP', 'HTGD', 'ATGD',
        'DiffPts', 'DiffLP'
    ]
    score = pd.Series(index=cols, dtype=str)
    scoreDF = pd.DataFrame(columns=cols)

    home = re.findall(r"var h2h_home = .*", htm)
    home = zstr.str_flt(home[0], [';', ' ', '\n', '\t', '\r'])
    homeId = (re.split("\=", home))[1]

    away = re.findall(r"var h2h_away = .*", htm)
    away = (re.split("\=", away[0]))[1]
    awayId = zstr.str_flt(away, [';', ' ', '\n', '\t', '\r'])

    HTGS = 0.0
    HTGC = 0.0
    HTP = 0.0
    ATGS = 0.0
    ATGC = 0.0
    ATP = 0.0
    GS_MAX = 0
    GC_MAX = 0
    datas = re.findall(keyword, htm)
    datas = datas[0].split('\",\"')
    for data in datas:
        keystr = '|'
        pos = data.index(keystr)
        data = data[pos:]
        res = re.split("\|", data)
        teamId = res[2]
        MW = float(res[5])  ##比赛轮次
        if teamId == homeId:
            HomeTeamLP = float(res[1])
            HTGS = float(res[15])
            HTGC = float(res[16])
            HTP = float(res[17]) / MW
            HTGD = (HTGS - HTGC)
        if teamId == awayId:
            AwayTeamLP = float(res[1])
            ATGS = float(res[15])
            ATGC = float(res[16])
            ATP = float(res[17]) / MW
            ATGD = (ATGS - ATGC)
        if float(res[15]) > GS_MAX: GS_MAX = float(res[15])
        if float(res[16]) > GC_MAX: GC_MAX = float(res[16])

    DiffPts = HTP - ATP
    DiffLP = HomeTeamLP - AwayTeamLP

    score['HTGS'] = HTGS / GS_MAX
    score['ATGS'] = ATGS / GS_MAX
    score['HTGC'] = HTGC / GC_MAX
    score['ATGC'] = ATGC / GC_MAX
    score['HTP'] = HTP
    score['ATP'] = ATP
    score['HTGD'] = HTGD
    score['ATGD'] = ATGD
    score['DiffPts'] = DiffPts
    score['DiffLP'] = DiffLP

    scoreDF = scoreDF.append(score.T, ignore_index=True)

    return scoreDF, MW
Пример #8
0
def fb_get_features(htm, bars, ftg=''):
    cols = [
        'HTGS', 'ATGS', 'HTGC', 'ATGC', 'HTP', 'ATP', 'HM1', 'HM2', 'HM3',
        'HM4', 'HM5', 'AM1', 'AM2', 'AM3', 'AM4', 'AM5', 'HTWinStreak3',
        'HTWinStreak5', 'HTLossStreak3', 'HTLossStreak5', 'ATWinStreak3',
        'ATWinStreak5', 'ATLossStreak3', 'ATLossStreak5', 'HTGD', 'ATGD',
        'DiffPts', 'DiffFormPts', 'teamPL'
    ]
    features = pd.DataFrame(columns=cols)
    fea = pd.Series(index=cols, dtype=str)

    game = re.findall(r"var ScheduleID=.*", htm)
    if len(game) == 0:
        game = re.findall(r"ScheduleID=.*", htm)
    game = zstr.str_flt(game[0], [';', ' ', '\n', '\t', '\r'])
    gid = (re.split("\=", game))[1]

    home = re.findall(r"var hometeamID=.*", htm)
    home = zstr.str_flt(home[0], [';', ' ', '\n', '\t', '\r'])
    homeId = (re.split("\=", home))[1]

    away = re.findall(r"var guestteamID=.*", htm)
    away = (re.split("\=", away[0]))[1]
    awayId = zstr.str_flt(away, [';', ' ', '\n', '\t', '\r'])

    league_sc = tfsys.league_sc
    hIndex = np.where(league_sc['team_id'] == homeId)
    aIndex = np.where(league_sc['team_id'] == awayId)
    if len(hIndex[0]) == 0 and len(aIndex[0]) == 0:
        return

    hIndex = league_sc.index[hIndex][0]
    aIndex = league_sc.index[aIndex][0]

    fea['HTGS'] = league_sc.iloc[hIndex].goal_score
    fea['ATGS'] = league_sc.iloc[aIndex].goal_score
    fea['HTGC'] = league_sc.iloc[hIndex].goal_conceded
    fea['ATGC'] = league_sc.iloc[aIndex].goal_conceded
    fea['HTP'] = league_sc.iloc[hIndex].TP / league_sc.iloc[hIndex].MW
    fea['ATP'] = league_sc.iloc[aIndex].TP / league_sc.iloc[hIndex].MW

    fea['HM1'] = league_sc.iloc[hIndex].M1
    fea['HM2'] = league_sc.iloc[hIndex].M2
    fea['HM3'] = league_sc.iloc[hIndex].M3
    fea['HM4'] = league_sc.iloc[hIndex].M4
    fea['HM5'] = league_sc.iloc[hIndex].M5
    fea['AM1'] = league_sc.iloc[aIndex].M1
    fea['AM2'] = league_sc.iloc[aIndex].M2
    fea['AM3'] = league_sc.iloc[aIndex].M3
    fea['AM4'] = league_sc.iloc[aIndex].M4
    fea['AM5'] = league_sc.iloc[aIndex].M5

    HTFormPtsStr = league_sc.iloc[hIndex].M1 + league_sc.iloc[
        hIndex].M2 + league_sc.iloc[hIndex].M3 + league_sc.iloc[
            hIndex].M4 + league_sc.iloc[hIndex].M5
    ATFormPtsStr = league_sc.iloc[aIndex].M1 + league_sc.iloc[
        aIndex].M2 + league_sc.iloc[aIndex].M3 + league_sc.iloc[
            aIndex].M4 + league_sc.iloc[aIndex].M5

    fea['HTWinStreak3'] = get_3game_ws(HTFormPtsStr)
    fea['HTWinStreak5'] = get_5game_ws(HTFormPtsStr)
    fea['HTLossStreak3'] = get_3game_ls(HTFormPtsStr)
    fea['HTLossStreak5'] = get_5game_ls(HTFormPtsStr)
    fea['ATWinStreak3'] = get_3game_ws(ATFormPtsStr)
    fea['ATWinStreak5'] = get_5game_ws(ATFormPtsStr)
    fea['ATLossStreak3'] = get_3game_ls(ATFormPtsStr)
    fea['ATLossStreak5'] = get_5game_ls(ATFormPtsStr)

    fea['HTGD'] = league_sc.iloc[hIndex].goal_diff / league_sc.iloc[hIndex].MW
    fea['ATGD'] = league_sc.iloc[aIndex].goal_diff / league_sc.iloc[aIndex].MW
    fea['DiffPts'] = (league_sc.iloc[hIndex].TP -
                      league_sc.iloc[aIndex].TP) / league_sc.iloc[aIndex].MW

    HTFormPts = get_form_points(HTFormPtsStr)
    ATFormPts = get_form_points(ATFormPtsStr)
    DiffFormPts = HTFormPts - ATFormPts
    fea['DiffFormPts'] = DiffFormPts / league_sc.iloc[aIndex].MW
    fea['teamPL'] = league_sc.iloc[hIndex].teamPL - league_sc.iloc[
        aIndex].teamPL

    features = features.append(fea.T, ignore_index=True)

    features = HM_one_hot_encoder(features)

    features['gid'] = gid

    tfsys.samples = tfsys.samples.append(features)