Exemplo n.º 1
0
def _build_power(games, outcomes, coerce_fn, acc=0.0001, alpha=1.0, snap=True):
    """ Builds power model over a set of related games (they 
        should all be from the same competition, for example).
        Given a series of games and their outcome, builds a logistic
        regression model that computes a relative ranking for the teams.
        Returns a dict of team id to power ranking between 0 and 1.
        If snap is set, the rankings are bucketed into quartiles. This
        is useful bcause we may only have rough estimates of power
        rating and we don't want to get a false specificity.
    """
    outcomes = pd.Series([coerce_fn(val) for val in outcomes])
    #    print(outcomes.describe())
    #    print(outcomes.value_counts())
    model = world_cup.build_model_logistic(outcomes,
                                           games,
                                           acc=acc,
                                           alpha=alpha)

    #    model = world_cup.build_model_MNlogistic(outcomes, games,
    #        acc=acc, alpha=alpha)

    #    print(model.summary())
    #    print(model.params)
    params = np.exp(model.params)

    del params['intercept']
    params = params[params != 1.0]
    max_param = params.max()
    min_param = params.min()
    param_range = max_param - min_param
    if len(params) == 0 or param_range < 0.0001:
        return None

    params = params.sub(min_param)
    params = params.div(param_range)
    qqs = np.percentile(params, [20, 40, 60, 80])

    def _snap(val):
        """ Snaps a value to a quartile. """
        for idx in range(len(qqs)):
            if (qqs[idx] > val):
                return idx * 0.25
        return 1.0

    if snap:
        # Snap power data to rough quartiles.
        return params.apply(_snap).to_dict()
    else:
        return params.to_dict()
Exemplo n.º 2
0
def _build_power(games, outcomes, coerce_fn, acc=0.0001, alpha=1.0, snap=True):
    """ Birbiri ile alakali bir kume mac uzerinden bir guc modeli
        hazirlar (tum bu maclar ayni turnuvadan, musabakadan olmalidir mesela).
        Bu maclar ve onlarin sonucunu alarak bu fonksiyon bir lojistik
        regresyon modeli kurar, ve bu model tum takimlarin birbirine
        olan izafi bir siralamasini hesaplar. Geriye takim id'si ve
        o takimin 0 ve 1 arasindaki bir guc indisini dondurur. Eger
        snap degiskeni True ise, indisler ceyreklere bolunur. Bu faydali
        cunku siralama tahmini oldukca kabaca yapilan bir tahmin ve
        tek bir numaradan elde edilecek bir tur "asiri spesifiklik" 
        bizi yaniltabilirdi. 
    """
    outcomes = pd.Series([coerce_fn(val) for val in outcomes])
    games.to_csv('/tmp/games.csv', index=None)
    outcomes.to_csv('/tmp/outcomes.csv', index=None)
    model = world_cup.build_model_logistic(outcomes,
                                           games,
                                           acc=acc,
                                           alpha=alpha)

    #print model.summary()
    params = np.exp(model.params)
    del params['intercept']
    params = params[params != 1.0]
    max_param = params.max()
    min_param = params.min()
    param_range = max_param - min_param
    if len(params) == 0 or param_range < 0.0001:
        return None

    params = params.sub(min_param)
    params = params.div(param_range)
    qqs = np.percentile(params, [20, 40, 60, 80])

    def _snap(val):
        """ Snaps a value to a quartile. """
        for idx in xrange(len(qqs)):
            if (qqs[idx] > val):
                return idx * 0.25
        return 1.0

    if snap:
        # Snap power data to rough quartiles.
        return params.apply(_snap).to_dict()
    else:
        return params.to_dict()
Exemplo n.º 3
0
def _build_power(games, outcomes, coerce_fn, acc=0.0001, alpha=1.0, snap=True):
    """ Birbiri ile alakali bir kume mac uzerinden bir guc modeli
        hazirlar (tum bu maclar ayni turnuvadan, musabakadan olmalidir mesela).
        Bu maclar ve onlarin sonucunu alarak bu fonksiyon bir lojistik
        regresyon modeli kurar, ve bu model tum takimlarin birbirine
        olan izafi bir siralamasini hesaplar. Geriye takim id'si ve
        o takimin 0 ve 1 arasindaki bir guc indisini dondurur. Eger
        snap degiskeni True ise, indisler ceyreklere bolunur. Bu faydali
        cunku siralama tahmini oldukca kabaca yapilan bir tahmin ve
        tek bir numaradan elde edilecek bir tur "asiri spesifiklik" 
        bizi yaniltabilirdi. 
    """
    outcomes = pd.Series([coerce_fn(val) for val in outcomes])
    games.to_csv('/tmp/games.csv',index=None)
    outcomes.to_csv('/tmp/outcomes.csv',index=None)
    model = world_cup.build_model_logistic(outcomes, games, 
        acc=acc, alpha=alpha)

    #print model.summary()
    params = np.exp(model.params)
    del params['intercept']
    params = params[params != 1.0]
    max_param = params.max()
    min_param = params.min()
    param_range = max_param - min_param
    if len(params) == 0 or param_range < 0.0001:
        return None
    
    params = params.sub(min_param)
    params = params.div(param_range)
    qqs = np.percentile(params, [20, 40, 60, 80])
    def _snap(val): 
        """ Snaps a value to a quartile. """
        for idx in xrange(len(qqs)):
            if (qqs[idx] > val):
                return idx * 0.25
        return 1.0
      
    if snap:
        # Snap power data to rough quartiles.
        return params.apply(_snap).to_dict()
    else:
        return params.to_dict()
Exemplo n.º 4
0
def _build_power(games, outcomes, coerce_fn, acc=0.0001, alpha=1.0, snap=True):
    """ Builds power model over a set of related games (they 
        should all be from the same competition, for example).
        Given a series of games and their outcome, builds a logistic
        regression model that computes a relative ranking for the teams.
        Returns a dict of team id to power ranking between 0 and 1.
        If snap is set, the rankings are bucketed into quartiles. This
        is useful bcause we may only have rough estimates of power
        rating and we don't want to get a false specificity.
    """
    outcomes = pd.Series([coerce_fn(val) for val in outcomes])
    games.to_csv('/tmp/games.csv',index=None)
    outcomes.to_csv('/tmp/outcomes.csv',index=None)
    model = world_cup.build_model_logistic(outcomes, games, 
        acc=acc, alpha=alpha)

    #print model.summary()
    params = np.exp(model.params)
    del params['intercept']
    params = params[params != 1.0]
    max_param = params.max()
    min_param = params.min()
    param_range = max_param - min_param
    if len(params) == 0 or param_range < 0.0001:
        return None
    
    params = params.sub(min_param)
    params = params.div(param_range)
    qqs = np.percentile(params, [20, 40, 60, 80])
    def _snap(val): 
        """ Snaps a value to a quartile. """
        for idx in xrange(len(qqs)):
            if (qqs[idx] > val):
                return idx * 0.25
        return 1.0
      
    if snap:
        # Snap power data to rough quartiles.
        return params.apply(_snap).to_dict()
    else:
        return params.to_dict()