def read_input_data(banditOutFile, configFile): ''' Reads in a file output by running a contextual bandit, and returns a dataframe where the 0:-2 columns are dummy coded contextual variables, and the -2 column is the action (treating actions as flat), and the -1 column is the observed reward. ''' df = pd.read_csv(banditOutFile, header=1) config = generate_contextual_data.loadConfiguration(configFile) # need to get the columns that refer to contextual variables and dummy code them structure = generate_contextual_data.ContextualStructure(config) numContextualVars = structure.getNumberOfVariables() contextualVarCols = df.iloc[:, 1:(numContextualVars + 1)] dummyCoded = [ pd.get_dummies(contextualVarCols.iloc[:, i], prefix='cv' + str(i)).iloc[:, 1:] for i in range(numContextualVars) ] contextualVarDf = pd.concat(dummyCoded, axis=1) # then we'll add in the action and the response variable xyDf = pd.concat([ contextualVarDf, df.loc[:, H_ALGO_ACTION], df.loc[:, H_ALGO_OBSERVED_REWARD] ], axis=1) # print(xyDf.head()) return xyDf
def makeActionInteractions(banditOutFile, configFile, useFlat=False): ''' Reads in a file output by running a contextual bandit, and returns a dataframe where the 0:-2 columns are dummy coded contextual variables, and the -2 column is the action (treating actions as flat), and the -1 column is the observed reward. Returns a dataframe where the -1 column is still the reward, but the 0:-1 columns are interactions between each contextual variable and the condition for an experiment. configFile is used to find out how many experiments there were, and for the filename that gives the mapping between action numbers in the outfile (which are flat across experiments) and action numbers for each experiment. E.g., action 1 in the outfile might correspond to action 0 in experiment 1 and action 1 in experiment 2. Future implementation: If useFlat is true, then the flat action structure is used (so we don't separate at all based on the different experiments). ''' df = pd.read_csv(banditOutFile, header=1) config = generate_contextual_data.loadConfiguration(configFile) actionsToConditionsDict = getActionsToConditionsDictionary(config) structure = generate_contextual_data.ContextualStructure(config) numContextualVars = structure.getNumberOfVariables() interactionHeaders = getInteractionHeaders(config) # print(interactionHeaders) interactionRows = [] for rowIndex in range(df.shape[0]): action = df.loc[rowIndex, H_ALGO_ACTION] conditions = actionsToConditionsDict[action] curRow = makeAllZerosRow(interactionHeaders) interactionRows.append(curRow) for condition, expIndex in zip(conditions, range(len(conditions))): # add the intercept term interceptHeader = getInterceptHeaderForExpCondition( expIndex, condition) curRow[interceptHeader] = 1 for cvIndex in range( 1, numContextualVars + 1 ): # Second to last column is the action, last column is the outcome variable so is omitted header = getHeaderForCV(df.iloc[rowIndex, cvIndex], cvIndex - 1, expIndex, condition) if header in interactionHeaders: # won't appear if cv value is 0 because that's baked into the intercept curRow[header] = 1 interactionDf = pd.DataFrame(interactionRows, columns=interactionHeaders) # print(interactionDf.head()) return pd.concat([interactionDf, df.loc[:, H_ALGO_OBSERVED_REWARD]], axis=1)
def countNumSamplesByContextualVariableCombination(banditOutFile, experimentIndex, config): df = pd.read_csv(banditOutFile, header=1) structure = generate_contextual_data.ContextualStructure(config) numContextualVars = structure.getNumberOfVariables() numActions = len(config["conditions"][experimentIndex]) actionsToConditionsDict = getActionsToConditionsDictionary(config) contextualVarValuesToActions = [{} for _ in range(numContextualVars)] for row in range(df.shape[0]): contextualVars = tuple(df.iloc[row, 1:(numContextualVars + 1)]) for i in range(numContextualVars): if contextualVars[i] not in contextualVarValuesToActions[i]: contextualVarValuesToActions[i][ contextualVars[i]] = [0] * numActions action = df.iloc[row, :].loc[H_ALGO_ACTION] contextualVarValuesToActions[i][contextualVars[i]][ actionsToConditionsDict[action][experimentIndex]] += 1 return contextualVarValuesToActions
def plotDifferencesInProbabilitiesOfReward(fittedModel, config, ax): ''' Plots the fittedModel's estimated probabilities of reward compared to the actual probabilitiy of reward for each combination of the contextual variables. Assumes that all contextual variables are categorical. ''' structure = generate_contextual_data.ContextualStructure(config) numContextualVars = structure.getNumberOfVariables() conditionVectors = generate_contextual_data.makeConditionVectorsFromConfig( config) contextualVariableCombinations = structure.getAllContextualCombinations() interactionHeaders = getInteractionHeaders(config) conditions = config["conditions"] experiments = [ generate_contextual_data.Experiment([ generate_contextual_data.Condition(coeff) for coeff in curConditions ]) for curConditions in conditions ] model = generate_contextual_data.LogisticModel(0, experiments, structure) estimatedProbs = [] actualProbs = [] tol = .98 numDistWhereMaxProbIsCorrect = 0 # jsDivergences = np.zeros(shape=(len(contextualVariableCombinations),1)) # totalVariationDists = np.zeros(shape=(len(contextualVariableCombinations),1)) # 2 series for each combination of contextual variable values (one for the estimated probabilities and one for actual) # each series is plotted separately, so we'll look over contextual variable value combinations first, and make # a list of the values for each condition combination for varValues, i in zip(contextualVariableCombinations, range(len(contextualVariableCombinations))): curEstProb = [] curActualProb = [] for conditionVector in conditionVectors: estProb = fittedModel.predict_proba( makeActionInteraction(varValues, conditionVector, numContextualVars, interactionHeaders).values.reshape( 1, -1)).flatten()[1] # get success prob curEstProb.append(estProb) actualProb = model.getSuccessProbability(varValues, conditionVector) curActualProb.append(actualProb) estimatedProbs.append(curEstProb) actualProbs.append(curActualProb) # Identify whether maximum estimated prob action is an action with tol of best actual action npEstProb = np.asarray(curEstProb) npActProb = np.asarray(curActualProb) goodEnoughActions = npActProb > np.max(npActProb) * tol if any(goodEnoughActions[npEstProb > np.max(npEstProb) * tol]): numDistWhereMaxProbIsCorrect += 1 # else: # print("not close") # jsDivergences[i] = calculateJensenShannonDivergence(np.asarray(curEstProb), np.asarray(curActualProb)) # totalVariationDists[i] = calculateTotalVariationDistance(np.asarray(curEstProb), np.asarray(curActualProb)) # Now we need to do the plotting barWidth = 1 / (len(estimatedProbs) + 1) for cvValuesIndex in range(len(estimatedProbs)): curEstProb = estimatedProbs[cvValuesIndex] curActualProb = actualProbs[cvValuesIndex] difference = np.array(curEstProb) - np.array(curActualProb) xEst = [ x + cvValuesIndex * barWidth for x in np.arange(len(difference)) ] # xActual = [x + (2*cvValuesIndex+1)*barWidth for x in np.arange(len(curEstProb))] # print("CVs: " + str(contextualVariableCombinations[cvValuesIndex])) # print(estimatedProbs[cvValuesIndex]) # print(actualProbs[cvValuesIndex]) ax.bar(xEst, difference, width=barWidth, color='r') # ax.bar(xActual, actualProbs[cvValuesIndex], width=barWidth, color=ACTUAL_COLOR) # Add xticks on the middle of the group bars # ax.set_xticks([r + len(estimatedProbs)*barWidth for r in range(len(curEstProb))], [str(vector) for vector in conditionVectors]) ax.get_xaxis().set_ticks([]) ax.set_xlabel("Condition and Cont. Var. Value Combos") ax.set_ylabel("Est. - Actual") # distances estimated = np.asarray(estimatedProbs).flatten() actual = np.asarray(actualProbs).flatten() roundingFigs = 4 ax.annotate("Pointwise L1: " + str(round(np.average(abs(estimated - actual)),roundingFigs)) + \ "(" + str(round(np.median(abs(estimated - actual)),roundingFigs)) + ")", xy=(-.4, -.35), xycoords='axes fraction') ax.annotate("Euclidean: " + str(round(np.linalg.norm(estimated - actual), roundingFigs)), xy=(-.4, -.48), xycoords='axes fraction') ax.annotate("Proportion close to max: " + str( round( numDistWhereMaxProbIsCorrect / len(contextualVariableCombinations), roundingFigs)), xy=(-.4, -.61), xycoords='axes fraction')
def plotProbabilitiesOfReward(fittedModel, config, ax): ''' Plots the fittedModel's estimated probabilities of reward compared to the actual probabilitiy of reward for each combination of the contextual variables. Assumes that all contextual variables are categorical. ''' structure = generate_contextual_data.ContextualStructure(config) numContextualVars = structure.getNumberOfVariables() conditionVectors = generate_contextual_data.makeConditionVectorsFromConfig( config) contextualVariableCombinations = structure.getAllContextualCombinations() interactionHeaders = getInteractionHeaders(config) conditions = config["conditions"] experiments = [ generate_contextual_data.Experiment([ generate_contextual_data.Condition(coeff) for coeff in curConditions ]) for curConditions in conditions ] model = generate_contextual_data.LogisticModel(0, experiments, structure) estimatedProbs = [] actualProbs = [] # 2 series for each combination of contextual variable values (one for the estimated probabilities and one for actual) # each series is plotted separately, so we'll look over contextual variable value combinations first, and make # a list of the values for each condition combination for varValues in contextualVariableCombinations: curEstProb = [] curActualProb = [] for conditionVector in conditionVectors: estProb = fittedModel.predict_proba( makeActionInteraction(varValues, conditionVector, numContextualVars, interactionHeaders).values.reshape( 1, -1)).flatten()[1] # get success prob curEstProb.append(estProb) actualProb = model.getSuccessProbability(varValues, conditionVector) curActualProb.append(actualProb) estimatedProbs.append(curEstProb) actualProbs.append(curActualProb) # Now we need to do the plotting barWidth = 1 / (2 * len(estimatedProbs) + 1) for cvValuesIndex in range(len(estimatedProbs)): curEstProb = estimatedProbs[cvValuesIndex] xEst = [ x + 2 * cvValuesIndex * barWidth for x in np.arange(len(curEstProb)) ] xActual = [ x + (2 * cvValuesIndex + 1) * barWidth for x in np.arange(len(curEstProb)) ] print("CVs: " + str(contextualVariableCombinations[cvValuesIndex])) print(estimatedProbs[cvValuesIndex]) print(actualProbs[cvValuesIndex]) ax.bar(xEst, curEstProb, width=barWidth, color=EST_COLOR) ax.bar(xActual, actualProbs[cvValuesIndex], width=barWidth, color=ACTUAL_COLOR) # Add xticks on the middle of the group bars ax.set_xticks( [r + len(estimatedProbs) * barWidth for r in range(len(curEstProb))], [str(vector) for vector in conditionVectors])