-
Notifications
You must be signed in to change notification settings - Fork 0
/
libcause.py
225 lines (184 loc) · 9.71 KB
/
libcause.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
from numpy import *
from scipy.stats import *
from libpgm.nodedata import NodeData
from libpgm.graphskeleton import GraphSkeleton
from libpgm.discretebayesiannetwork import DiscreteBayesianNetwork
from libpgm.tablecpdfactorization import TableCPDFactorization
import itertools
import matplotlib.pyplot as plt
import matplotlib
import time
import json
class BinaryNetworkBandit(object):
"""Class represents a Discrete Bayesian Network that supports Thompson sampling.
Currently only supports interventions on a single variable at a time"""
def __init__(self, bayesnetfile, targetVar = 'Y', prior=(1.0,1.0)):
self.file = bayesnetfile
self.theta_range = linspace(0.0001,0.9999,100)
self.set_bayesnet()
self.y = targetVar
self.reset(prior = prior)
def getCPD(self):
""" required as querying a TableCPDFactorization leads modifies the underlying Bayesian network (refresh() appears broken) """
self.set_bayesnet()
return TableCPDFactorization(self.bn)
def set_bayesnet(self):
nd = NodeData()
skel = GraphSkeleton()
nd.load(self.file)
skel.load(self.file)
skel.toporder()
self.bn = DiscreteBayesianNetwork(skel, nd)
def reset(self,prior=(1.0,1.0)):
""" Clears all the data on samples that have been taken so far but keeps graph structure.
You can optionally specify a new prior"""
# possible single interventions on non-target variable
self.prior = prior
self.interventions = [] # a list of possible assignments
self.variables = [] # store the variables - defines an ordering
self.intervention_to_arm = {}
self.variable_name_to_index = {}
values = []
index = 0
variable_index = 0
for variable, data in self.bn.Vdata.iteritems():
if variable != self.y:
self.variables.append(variable)
self.variable_name_to_index[variable] = variable_index
variable_index +=1
vals = data.get("vals")
values.append(vals)
for value in vals:
self.interventions.append({variable:value})
self.intervention_to_arm[(variable,value)]=index
index +=1
# lets calculate and print the actual value of theta for each arm (since we know it)
truth = []
for i in self.interventions:
cpd = self.getCPD()
answer = cpd.specificquery({self.y:"1"},i)
truth.append(answer)
print "THETA",truth
cpd = self.getCPD() # reset the network to its original state
# generate all possible assignments (intervention on all variables) to non-target values
combinations = list(itertools.product(*values))
self.assignements = [dict(zip(self.variables,v)) for v in combinations]
num_assignments = len(self.assignements)
self.assingment_map = dict(zip([str(list(v)) for v in combinations],range(num_assignments))) # builds a map from a each assingment to its indx
self.atrials = self.trials = zeros(shape=(num_assignments,), dtype=int) # stores how often each assignment occured
self.asuccesses = zeros(shape=(num_assignments,), dtype=int) # stores how often each assignment paid off
self.num_arms = len(self.interventions)
self.trials = zeros(shape=(self.num_arms,), dtype=int) # stores how often each arm was selected
self.successes = zeros(shape=(self.num_arms,), dtype=int) # stores how often each arm paid off
# now here I'm going to assume models where X1 ... Xn mutually independent causes of Y
# record distributions for P(X1), P(X2) ... - they update only when we observe Xn not when we do it
self.observed_trials = zeros(shape=(self.num_arms,), dtype=int)
self.observed_true = zeros(shape=(self.num_arms,), dtype=int)
# records how many times each variable was set by intervention
self.intervened_trials = zeros(shape=(self.num_arms,), dtype=int)
self.intervened_true = zeros(shape=(self.num_arms,), dtype=int)
def sample(self,n,plot=-1):
""" returns n samples based on Thompson sampling """
for i in xrange(n):
if plot > 0 and i % plot == 0:
do_plot = True
else:
do_plot = False
arm = self.get_recommendation(do_plot)
intervention = self.interventions[arm]
# note: evidence is equivelent to do in libpgm
result = self.bn.randomsample(1,evidence=intervention)[0] # returns a dictionary containing values for each variable
reward = int(result.get(self.y))
# update the counts for the pulled arm (P(Y|X?=?))
self.trials[arm] = self.trials[arm] + 1
if (reward == 1):
self.successes[arm] = self.successes[arm] + 1
# for variable we intervened on record that the we intervened
assert(len(interventions.keys())==1)
do_variable = intervention.keys()[0] # ASSUMING SINGLE INTERVENTIONS AT THIS POINT
do_value = intervention.values()[0]
do_variable_indx = self.variable_name_to_index[do_variable]
self.intervened_trials[do_variable_indx] +=1
self.intervened_true[do_variable_indx] +=1
# for all variables we did not intervene on, update observed
values = []
for indx,v in enumerate(self.variables):
value = result[v]
values.append(value)
if v not in intervention:
self.observed_trials[indx] += 1
if int(value) == 1:
self.observed_true[indx]+=1
# update based on intervened and non-intervened ...
# for the pulled arm
self.trials_c[arm] = self.trials_c[arm] + 1
if (reward == 1):
self.successes_c[arm] = self.successes_c[arm] + 1
# each other value in result corresponds to an arm
for k,val in result:
# calculate how much this should be weighted down
otrials = self.observed_trials[do_variable_indx]
oratio = (otrials+self.observed_true[do_variable_indx])/otrials
itrials = self.intervened_trials[do_variable_indx]
isuccess = itrials/
total_success = osuccess+/
w =
if k not in intervention:
o_arm = # get arm corresponding to setting variable k to var
self.trials_c[o_arm] = self.trials_c[o_arm]+w
if reward == 1:
self.successes_c[arm] = self.successes_c[arm] + w
# the value of [arm] occured because of intervention so we need to weight down based on that - going to lead to fractional trials...
# update relevent exact assignment
key = str((values))
a = self.assingment_map[key]
self.atrials[a] = self.atrials[a]+1
if (reward == 1):
self.asuccesses[a] = self.asuccesses[a] + 1
def plot_observed(self):
# put labels under each plot
f,sp = plt.subplots(1,len(self.variables),sharey=False,figsize=(15,5))
for i in range(len(self.variables)):
dist = beta(self.prior[0]+self.observed_true[i], self.prior[1]+self.observed_trials[i]-self.observed_true[i])
sp[i].plot(self.theta_range,dist.pdf(self.theta_range))
plt.show()
def plot_assignments(self):
print self.atrials
print self.asuccesses
f,sp = plt.subplots(1,len(self.assingment_map),sharey=False,figsize=(15,5))
titles = [json.dumps(x) for x in self.assignements]
for i in range(len(self.assingment_map)): # need to get rid of the unicode tags so things fit - dirty way is s.encode('ascii')
dist = beta(self.prior[0]+self.asuccesses[i]+1, self.prior[1]+self.atrials[i]-self.asuccesses[i])
sp[i].set_title(titles[i])
sp[i].plot(self.theta_range,dist.pdf(self.theta_range))
plt.show()
def get_recommendation(self,do_plot=False):
""" recommends which arm to pull next proportional to the estimated probability that it is the optimal one"""
sampled_theta = []
if do_plot:
f,sp = plt.subplots(1,self.num_arms,sharey=False,figsize=(15,5))
for i in range(self.num_arms):
#Construct beta distribution for posterior
dist = beta(self.prior[0]+self.successes[i], self.prior[1]+self.trials[i]-self.successes[i])
if do_plot:
sp[i].plot(self.theta_range,dist.pdf(self.theta_range))
#Draw sample from beta distribution
sampled_theta.append(dist.rvs())
# Alternately calculate P(Y|X1) as sum(P(Y|X1,X2)P(X2))
# Do this here ....
if do_plot:
plt.show()
# Return the index of the sample with the largest value
return sampled_theta.index( max(sampled_theta) )
def regret(self, bestprob):
""" regret as ratio between reward and expectation of reward had we always selected best """
reward = sum(self.successes)/float(sum(self.trials))
optimal = bestprob
return 1 - reward/bestprob
matplotlib.rcParams.update({'font.size': 8})
bandit = BinaryNetworkBandit("bayesnet.json")
bandit.sample(500,plot=500)
print bandit.trials
print bandit.successes
bandit.plot_observed()
bandit.plot_assignments()