Пример #1
0
def read_input_data(banditOutFile, configFile):
    '''
    Reads in a file output by running a contextual bandit, and returns a dataframe
    where the 0:-2 columns are dummy coded contextual variables, and the -2 column
    is the action (treating actions as flat), and the -1 column is the observed reward.
    '''
    df = pd.read_csv(banditOutFile, header=1)

    config = generate_contextual_data.loadConfiguration(configFile)
    # need to get the columns that refer to contextual variables and dummy code them
    structure = generate_contextual_data.ContextualStructure(config)
    numContextualVars = structure.getNumberOfVariables()
    contextualVarCols = df.iloc[:, 1:(numContextualVars + 1)]
    dummyCoded = [
        pd.get_dummies(contextualVarCols.iloc[:, i],
                       prefix='cv' + str(i)).iloc[:, 1:]
        for i in range(numContextualVars)
    ]
    contextualVarDf = pd.concat(dummyCoded, axis=1)
    # then we'll add in the action and the response variable
    xyDf = pd.concat([
        contextualVarDf, df.loc[:, H_ALGO_ACTION],
        df.loc[:, H_ALGO_OBSERVED_REWARD]
    ],
                     axis=1)
    #     print(xyDf.head())
    return xyDf
Пример #2
0
def makeActionInteractions(banditOutFile, configFile, useFlat=False):
    '''
    Reads in a file output by running a contextual bandit, and returns a dataframe
    where the 0:-2 columns are dummy coded contextual variables, and the -2 column
    is the action (treating actions as flat), and the -1 column is the observed reward.
    Returns a dataframe where the -1 column is still the reward, but the 0:-1 columns are
    interactions between each contextual variable and the condition for an experiment.
    configFile is used to find out how many experiments there were, and for the filename
    that gives the mapping between action numbers in the outfile (which are flat across
    experiments) and action numbers for each experiment. E.g., action 1 in the outfile
    might correspond to action 0 in experiment 1 and action 1 in experiment 2.
    Future implementation: If useFlat is true, then the flat action structure is used 
    (so we don't separate at all based on the different experiments).
    '''
    df = pd.read_csv(banditOutFile, header=1)
    config = generate_contextual_data.loadConfiguration(configFile)
    actionsToConditionsDict = getActionsToConditionsDictionary(config)

    structure = generate_contextual_data.ContextualStructure(config)
    numContextualVars = structure.getNumberOfVariables()
    interactionHeaders = getInteractionHeaders(config)
    #     print(interactionHeaders)
    interactionRows = []
    for rowIndex in range(df.shape[0]):
        action = df.loc[rowIndex, H_ALGO_ACTION]
        conditions = actionsToConditionsDict[action]
        curRow = makeAllZerosRow(interactionHeaders)
        interactionRows.append(curRow)
        for condition, expIndex in zip(conditions, range(len(conditions))):
            # add the intercept term
            interceptHeader = getInterceptHeaderForExpCondition(
                expIndex, condition)
            curRow[interceptHeader] = 1
            for cvIndex in range(
                    1, numContextualVars + 1
            ):  # Second to last column is the action, last column is the outcome variable so is omitted
                header = getHeaderForCV(df.iloc[rowIndex, cvIndex],
                                        cvIndex - 1, expIndex, condition)
                if header in interactionHeaders:  # won't appear if cv value is 0 because that's baked into the intercept
                    curRow[header] = 1

    interactionDf = pd.DataFrame(interactionRows, columns=interactionHeaders)

    #     print(interactionDf.head())
    return pd.concat([interactionDf, df.loc[:, H_ALGO_OBSERVED_REWARD]],
                     axis=1)
Пример #3
0
def countNumSamplesByContextualVariableCombination(banditOutFile,
                                                   experimentIndex, config):
    df = pd.read_csv(banditOutFile, header=1)
    structure = generate_contextual_data.ContextualStructure(config)
    numContextualVars = structure.getNumberOfVariables()
    numActions = len(config["conditions"][experimentIndex])
    actionsToConditionsDict = getActionsToConditionsDictionary(config)
    contextualVarValuesToActions = [{} for _ in range(numContextualVars)]
    for row in range(df.shape[0]):
        contextualVars = tuple(df.iloc[row, 1:(numContextualVars + 1)])
        for i in range(numContextualVars):
            if contextualVars[i] not in contextualVarValuesToActions[i]:
                contextualVarValuesToActions[i][
                    contextualVars[i]] = [0] * numActions
            action = df.iloc[row, :].loc[H_ALGO_ACTION]
            contextualVarValuesToActions[i][contextualVars[i]][
                actionsToConditionsDict[action][experimentIndex]] += 1

    return contextualVarValuesToActions
Пример #4
0
def plotDifferencesInProbabilitiesOfReward(fittedModel, config, ax):
    '''
    Plots the fittedModel's estimated probabilities of reward compared to the actual
    probabilitiy of reward for each combination of the contextual variables. Assumes
    that all contextual variables are categorical.
    '''
    structure = generate_contextual_data.ContextualStructure(config)
    numContextualVars = structure.getNumberOfVariables()
    conditionVectors = generate_contextual_data.makeConditionVectorsFromConfig(
        config)
    contextualVariableCombinations = structure.getAllContextualCombinations()
    interactionHeaders = getInteractionHeaders(config)
    conditions = config["conditions"]
    experiments = [
        generate_contextual_data.Experiment([
            generate_contextual_data.Condition(coeff)
            for coeff in curConditions
        ]) for curConditions in conditions
    ]
    model = generate_contextual_data.LogisticModel(0, experiments, structure)
    estimatedProbs = []
    actualProbs = []
    tol = .98
    numDistWhereMaxProbIsCorrect = 0
    #     jsDivergences = np.zeros(shape=(len(contextualVariableCombinations),1))
    #     totalVariationDists = np.zeros(shape=(len(contextualVariableCombinations),1))
    # 2 series for each combination of contextual variable values (one for the estimated probabilities and one for actual)
    # each series is plotted separately, so we'll look over contextual variable value combinations first, and make
    # a list of the values for each condition combination
    for varValues, i in zip(contextualVariableCombinations,
                            range(len(contextualVariableCombinations))):
        curEstProb = []
        curActualProb = []
        for conditionVector in conditionVectors:
            estProb = fittedModel.predict_proba(
                makeActionInteraction(varValues, conditionVector,
                                      numContextualVars,
                                      interactionHeaders).values.reshape(
                                          1,
                                          -1)).flatten()[1]  # get success prob
            curEstProb.append(estProb)
            actualProb = model.getSuccessProbability(varValues,
                                                     conditionVector)
            curActualProb.append(actualProb)
        estimatedProbs.append(curEstProb)
        actualProbs.append(curActualProb)
        # Identify whether maximum estimated prob action is an action with tol of best actual action
        npEstProb = np.asarray(curEstProb)
        npActProb = np.asarray(curActualProb)
        goodEnoughActions = npActProb > np.max(npActProb) * tol
        if any(goodEnoughActions[npEstProb > np.max(npEstProb) * tol]):
            numDistWhereMaxProbIsCorrect += 1
#         else:
#             print("not close")
#         jsDivergences[i] = calculateJensenShannonDivergence(np.asarray(curEstProb), np.asarray(curActualProb))
#         totalVariationDists[i] = calculateTotalVariationDistance(np.asarray(curEstProb), np.asarray(curActualProb))

# Now we need to do the plotting
    barWidth = 1 / (len(estimatedProbs) + 1)
    for cvValuesIndex in range(len(estimatedProbs)):
        curEstProb = estimatedProbs[cvValuesIndex]
        curActualProb = actualProbs[cvValuesIndex]
        difference = np.array(curEstProb) - np.array(curActualProb)
        xEst = [
            x + cvValuesIndex * barWidth for x in np.arange(len(difference))
        ]
        #         xActual = [x + (2*cvValuesIndex+1)*barWidth for x in np.arange(len(curEstProb))]
        #         print("CVs: " + str(contextualVariableCombinations[cvValuesIndex]))
        #         print(estimatedProbs[cvValuesIndex])
        #         print(actualProbs[cvValuesIndex])

        ax.bar(xEst, difference, width=barWidth, color='r')


#         ax.bar(xActual, actualProbs[cvValuesIndex], width=barWidth, color=ACTUAL_COLOR)
# Add xticks on the middle of the group bars
#     ax.set_xticks([r + len(estimatedProbs)*barWidth for r in range(len(curEstProb))], [str(vector) for vector in conditionVectors])
    ax.get_xaxis().set_ticks([])
    ax.set_xlabel("Condition and Cont. Var. Value Combos")
    ax.set_ylabel("Est. - Actual")

    # distances
    estimated = np.asarray(estimatedProbs).flatten()
    actual = np.asarray(actualProbs).flatten()

    roundingFigs = 4
    ax.annotate("Pointwise L1: " + str(round(np.average(abs(estimated - actual)),roundingFigs)) + \
                "(" + str(round(np.median(abs(estimated - actual)),roundingFigs)) + ")",
                xy=(-.4, -.35), xycoords='axes fraction')
    ax.annotate("Euclidean: " +
                str(round(np.linalg.norm(estimated - actual), roundingFigs)),
                xy=(-.4, -.48),
                xycoords='axes fraction')
    ax.annotate("Proportion close to max: " + str(
        round(
            numDistWhereMaxProbIsCorrect / len(contextualVariableCombinations),
            roundingFigs)),
                xy=(-.4, -.61),
                xycoords='axes fraction')
Пример #5
0
def plotProbabilitiesOfReward(fittedModel, config, ax):
    '''
    Plots the fittedModel's estimated probabilities of reward compared to the actual
    probabilitiy of reward for each combination of the contextual variables. Assumes
    that all contextual variables are categorical.
    '''
    structure = generate_contextual_data.ContextualStructure(config)
    numContextualVars = structure.getNumberOfVariables()
    conditionVectors = generate_contextual_data.makeConditionVectorsFromConfig(
        config)
    contextualVariableCombinations = structure.getAllContextualCombinations()
    interactionHeaders = getInteractionHeaders(config)
    conditions = config["conditions"]
    experiments = [
        generate_contextual_data.Experiment([
            generate_contextual_data.Condition(coeff)
            for coeff in curConditions
        ]) for curConditions in conditions
    ]
    model = generate_contextual_data.LogisticModel(0, experiments, structure)
    estimatedProbs = []
    actualProbs = []
    # 2 series for each combination of contextual variable values (one for the estimated probabilities and one for actual)
    # each series is plotted separately, so we'll look over contextual variable value combinations first, and make
    # a list of the values for each condition combination
    for varValues in contextualVariableCombinations:
        curEstProb = []
        curActualProb = []
        for conditionVector in conditionVectors:
            estProb = fittedModel.predict_proba(
                makeActionInteraction(varValues, conditionVector,
                                      numContextualVars,
                                      interactionHeaders).values.reshape(
                                          1,
                                          -1)).flatten()[1]  # get success prob
            curEstProb.append(estProb)
            actualProb = model.getSuccessProbability(varValues,
                                                     conditionVector)
            curActualProb.append(actualProb)
        estimatedProbs.append(curEstProb)
        actualProbs.append(curActualProb)

    # Now we need to do the plotting
    barWidth = 1 / (2 * len(estimatedProbs) + 1)
    for cvValuesIndex in range(len(estimatedProbs)):
        curEstProb = estimatedProbs[cvValuesIndex]
        xEst = [
            x + 2 * cvValuesIndex * barWidth
            for x in np.arange(len(curEstProb))
        ]
        xActual = [
            x + (2 * cvValuesIndex + 1) * barWidth
            for x in np.arange(len(curEstProb))
        ]
        print("CVs: " + str(contextualVariableCombinations[cvValuesIndex]))
        print(estimatedProbs[cvValuesIndex])
        print(actualProbs[cvValuesIndex])

        ax.bar(xEst, curEstProb, width=barWidth, color=EST_COLOR)
        ax.bar(xActual,
               actualProbs[cvValuesIndex],
               width=barWidth,
               color=ACTUAL_COLOR)
    # Add xticks on the middle of the group bars
    ax.set_xticks(
        [r + len(estimatedProbs) * barWidth for r in range(len(curEstProb))],
        [str(vector) for vector in conditionVectors])