class TestBeliefState(object): def setup(self): d = Dataset('test_pascal_trainval',force=True) d2 = Dataset('test_pascal_test',force=True) config = {'detectors': ['csc_default']} self.dp = DatasetPolicy(d,d2,**config) self.bs = BeliefState(d,self.dp.actions) def test_featurization(self): ff = self.bs.compute_full_feature() np.set_printoptions(precision=2) print self.bs.block_out_action(ff,-1) print self.bs.block_out_action(ff,0) print self.bs.block_out_action(ff,3)
class TestBeliefState(object): def setup(self): d = Dataset('test_pascal_trainval', force=True) d2 = Dataset('test_pascal_test', force=True) config = {'detectors': ['csc_default']} self.dp = DatasetPolicy(d, d2, **config) self.bs = BeliefState(d, self.dp.actions) def test_featurization(self): ff = self.bs.compute_full_feature() np.set_printoptions(precision=2) print self.bs.block_out_action(ff, -1) print self.bs.block_out_action(ff, 0) print self.bs.block_out_action(ff, 3)
def setup(self): d = Dataset('test_pascal_trainval',force=True) d2 = Dataset('test_pascal_test',force=True) config = {'detectors': ['csc_default']} self.dp = DatasetPolicy(d,d2,**config) self.bs = BeliefState(d,self.dp.actions)
def run_on_image(self, image, dataset, verbose=False, epsilon=0.01): """ Return - list of detections in the image, with each row as self.det_columns - list of multi-label classification outputs, with each row as self.get_cls_cols() - list of <s,a,r,s',dt> samples. """ gt = image.get_det_gt(with_diff=True) self.tt.tic('run_on_image') all_detections = [] all_clses = [] samples = [] prev_ap = 0 img_ind = dataset.get_img_ind(image) # If we have previously run_on_image(), then we already have a reference to an inf_model # Otherwise, we make a new one and store a reference to it, to keep it alive if hasattr(self, 'inf_model'): b = BeliefState(self.train_dataset, self.actions, self.inference_mode, self.bounds, self.inf_model, self.fastinf_model_name) else: b = BeliefState(self.train_dataset, self.actions, self.inference_mode, self.bounds, fastinf_model_name=self.fastinf_model_name) self.b = b self.inf_model = b.model self.update_actions(b) action_ind = self.select_action(b, epsilon) step_ind = 0 initial_clses = np.array(b.get_p_c().tolist() + [img_ind, 0]) entropy_prev = np.mean(b.get_entropies()) while True: # Populate the sample with stuff we know sample = Sample() sample.step_ind = step_ind sample.img_ind = img_ind sample.state = b.compute_full_feature() sample.action_ind = action_ind # TODO: this is incorrect, and results in samples at t=0 to already have detections sample.t = b.t # prepare for AUC reward stuff # TODO: should set time_to_deadline to -Inf if no bounds time_to_deadline = 0 if self.bounds: # this should never be less than zero, except for when running oracle time_to_deadline = max(0, self.bounds[1] - b.t) sample.auc_ap_raw = 0 sample.auc_ap = 0 # Take the action and get the observations as a dict action = self.actions[action_ind] obs = action.obj.get_observations(image) dt = obs['dt'] # If observations include detections, compute the relevant # stuff for the sample collection sample.det_naive_ap = 0 sample.det_actual_ap = 0 if not 'dets' in obs: all_detections.append(np.array([])) else: det = action.obj dets = obs['dets'] cls_ind = dataset.classes.index(det.cls) if dets.shape[0] > 0: c_vector = np.tile(cls_ind, (np.shape(dets)[0], 1)) i_vector = np.tile(img_ind, (np.shape(dets)[0], 1)) detections = np.hstack((dets, c_vector, i_vector)) else: detections = np.array([]) dets_table = Table(detections, det.columns + ['cls_ind', 'img_ind']) # compute the 'naive' det AP increase, # as if we were adding dets to an empty set #ap,rec,prec = self.ev.compute_det_pr(dets_table,gt) ap = self.ev.compute_det_map(dets_table, gt) sample.det_naive_ap = ap # TODO: am I needlessly recomputing this table? all_detections.append(detections) nonempty_dets = [ dets for dets in all_detections if dets.shape[0] > 0 ] all_dets_table = Table(np.array([]), dets_table.cols) if len(nonempty_dets) > 0: all_dets_table = Table(np.concatenate(nonempty_dets, 0), dets_table.cols) # compute the actual AP increase: adding dets to dets so far #ap,rec,prec = self.ev.compute_det_pr(all_dets_table,gt) ap = self.ev.compute_det_map(all_dets_table, gt) ap_diff = ap - prev_ap sample.det_actual_ap = ap_diff # Compure detector AUC reward # If the action took longer than we have time, benefit is 0 (which is already set above) if dt <= time_to_deadline: midway_point = b.t + dt / 2. if midway_point > self.bounds[0]: length = max(0, self.bounds[1] - midway_point) else: length = self.bounds[1] - self.bounds[0] auc_ap = 1. * ap_diff * length sample.auc_ap_raw = auc_ap # Now divide by the potential gain to compute the "normalized" reward # Note that there are two cases: the curve goes up, or it turns down. # In the first case, the normalizing area should be the area to ap=1. # In the second case, the normalizing area should be the area to ap=0. if ap_diff < 0: divisor = time_to_deadline * (prev_ap) else: divisor = time_to_deadline * (1. - prev_ap) if divisor < 0: divisor = 0 auc_ap = 1 if divisor == 0 else auc_ap / divisor assert (auc_ap >= -1 and auc_ap <= 1) sample.auc_ap = auc_ap prev_ap = ap # Update the belief state with the observations if action.name == 'gist': b.update_with_gist(action_ind, obs['scores']) else: b.update_with_score(action_ind, obs['score']) # mean entropy entropy = np.mean(b.get_entropies()) dh = entropy_prev - entropy # this is actually -dh :) sample.entropy = dh auc_entropy = time_to_deadline * dh - dh * dt / 2 divisor = (time_to_deadline * entropy_prev) if divisor == 0: auc_entropy = 1 else: auc_entropy /= divisor if dt > time_to_deadline: auc_entropy = 0 if not (auc_entropy >= -1 and auc_entropy <= 1): auc_entropy = 0 sample.auc_entropy = auc_entropy entropy_prev = entropy # TODO: the below line of code should occur before the state is stored in the sample b.t += dt sample.dt = dt samples.append(sample) step_ind += 1 # The updated belief state posterior over C is our classification result clses = b.get_p_c().tolist() + [img_ind, b.t] all_clses.append(clses) # Update action values and pick the next action self.update_actions(b) action_ind = self.select_action(b, epsilon) # check for stopping conditions if action_ind < 0: break if self.bounds and not self.policy_mode == 'oracle': if b.t > self.bounds[1]: break # in case of 'oracle' mode, re-sort the detections and times in order of AP # contributions, and actually re-gather p_c's for clses. action_inds = [s.action_ind for s in samples] if self.policy_mode == 'oracle': naive_aps = np.array([s.det_naive_ap for s in samples]) sorted_inds = np.argsort(-naive_aps, kind='merge') # order-preserving all_detections = np.take(all_detections, sorted_inds) sorted_action_inds = np.take(action_inds, sorted_inds) # actually go through the whole thing again, getting new p_c's b.reset() all_clses = [] for action_ind in sorted_action_inds: action = self.actions[action_ind] obs = action.obj.get_observations(image) b.t += obs['dt'] if action.name == 'gist': b.update_with_gist(action_ind, obs['scores']) else: b.update_with_score(action_ind, obs['score']) clses = b.get_p_c().tolist() + [img_ind, b.t] all_clses.append(clses) # now construct the final dets array, with correct times times = [s.dt for s in samples] #assert(len(all_detections)==len(all_clses)==len(times)) cum_times = np.cumsum(times) all_times = [] all_nonempty_detections = [] for i, dets in enumerate(all_detections): num_dets = dets.shape[0] if num_dets > 0: all_nonempty_detections.append(dets) t_vector = np.tile(cum_times[i], (num_dets, 1)) all_times.append(t_vector) if len(all_nonempty_detections) > 0: all_detections = np.concatenate(all_nonempty_detections, 0) all_times = np.concatenate(all_times, 0) # appending 'time' column at end, as promised all_detections = np.hstack((all_detections, all_times)) # we probably went over deadline with the oracle mode, so trim it down if self.bounds: if np.max(all_times) > self.bounds[1]: first_overdeadline_ind = np.flatnonzero( all_times > self.bounds[1])[0] all_detections = all_detections[:first_overdeadline_ind, :] else: all_detections = np.array([]) all_clses = np.array(all_clses) if verbose: print("DatasetPolicy on image with ind %d took %.3f s" % (img_ind, self.tt.qtoc('run_on_image'))) # TODO: temp debug thing if False: print("Action sequence was: %s" % [s.action_ind for s in samples]) print("here's an image:") X = np.vstack((all_clses[:, :-2], image.get_cls_ground_truth())) np.set_printoptions(precision=2, suppress=True) print X plt.pcolor(np.flipud(X)) plt.show() return (all_detections, all_clses, samples)
def run_on_image(self, image, dataset, verbose=False, epsilon=0.01): """ Return - list of detections in the image, with each row as self.det_columns - list of multi-label classification outputs, with each row as self.get_cls_cols() - list of <s,a,r,s',dt> samples. """ gt = image.get_det_gt(with_diff=True) self.tt.tic('run_on_image') all_detections = [] all_clses = [] samples = [] prev_ap = 0 img_ind = dataset.get_img_ind(image) # If we have previously run_on_image(), then we already have a reference to an inf_model # Otherwise, we make a new one and store a reference to it, to keep it alive if hasattr(self,'inf_model'): b = BeliefState(self.train_dataset, self.actions, self.inference_mode, self.bounds, self.inf_model, self.fastinf_model_name) else: b = BeliefState(self.train_dataset, self.actions, self.inference_mode, self.bounds, fastinf_model_name=self.fastinf_model_name) self.b = b self.inf_model = b.model self.update_actions(b) action_ind = self.select_action(b,epsilon) step_ind = 0 initial_clses = np.array(b.get_p_c().tolist() + [img_ind,0]) entropy_prev = np.mean(b.get_entropies()) while True: # Populate the sample with stuff we know sample = Sample() sample.step_ind = step_ind sample.img_ind = img_ind sample.state = b.compute_full_feature() sample.action_ind = action_ind # TODO: this is incorrect, and results in samples at t=0 to already have detections sample.t = b.t # prepare for AUC reward stuff # TODO: should set time_to_deadline to -Inf if no bounds time_to_deadline = 0 if self.bounds: # this should never be less than zero, except for when running oracle time_to_deadline = max(0,self.bounds[1]-b.t) sample.auc_ap_raw = 0 sample.auc_ap = 0 # Take the action and get the observations as a dict action = self.actions[action_ind] obs = action.obj.get_observations(image) dt = obs['dt'] # If observations include detections, compute the relevant # stuff for the sample collection sample.det_naive_ap = 0 sample.det_actual_ap = 0 if not 'dets' in obs: all_detections.append(np.array([])) else: det = action.obj dets = obs['dets'] cls_ind = dataset.classes.index(det.cls) if dets.shape[0]>0: c_vector = np.tile(cls_ind,(np.shape(dets)[0],1)) i_vector = np.tile(img_ind,(np.shape(dets)[0],1)) detections = np.hstack((dets, c_vector, i_vector)) else: detections = np.array([]) dets_table = Table(detections,det.columns+['cls_ind','img_ind']) # compute the 'naive' det AP increase, # as if we were adding dets to an empty set #ap,rec,prec = self.ev.compute_det_pr(dets_table,gt) ap = self.ev.compute_det_map(dets_table,gt) sample.det_naive_ap = ap # TODO: am I needlessly recomputing this table? all_detections.append(detections) nonempty_dets = [dets for dets in all_detections if dets.shape[0]>0] all_dets_table = Table(np.array([]),dets_table.cols) if len(nonempty_dets)>0: all_dets_table = Table(np.concatenate(nonempty_dets,0),dets_table.cols) # compute the actual AP increase: adding dets to dets so far #ap,rec,prec = self.ev.compute_det_pr(all_dets_table,gt) ap = self.ev.compute_det_map(all_dets_table,gt) ap_diff = ap-prev_ap sample.det_actual_ap = ap_diff # Compure detector AUC reward # If the action took longer than we have time, benefit is 0 (which is already set above) if dt <= time_to_deadline: midway_point = b.t+dt/2. if midway_point > self.bounds[0]: length = max(0, self.bounds[1]-midway_point) else: length = self.bounds[1]-self.bounds[0] auc_ap = 1.*ap_diff * length sample.auc_ap_raw = auc_ap # Now divide by the potential gain to compute the "normalized" reward # Note that there are two cases: the curve goes up, or it turns down. # In the first case, the normalizing area should be the area to ap=1. # In the second case, the normalizing area should be the area to ap=0. if ap_diff<0: divisor = time_to_deadline*(prev_ap) else: divisor = time_to_deadline*(1.-prev_ap) if divisor < 0: divisor = 0 auc_ap = 1 if divisor == 0 else auc_ap/divisor assert(auc_ap>=-1 and auc_ap<=1) sample.auc_ap = auc_ap prev_ap = ap # Update the belief state with the observations if action.name=='gist': b.update_with_gist(action_ind, obs['scores']) else: b.update_with_score(action_ind, obs['score']) # mean entropy entropy = np.mean(b.get_entropies()) dh = entropy_prev-entropy # this is actually -dh :) sample.entropy = dh auc_entropy = time_to_deadline * dh - dh * dt / 2 divisor = (time_to_deadline * entropy_prev) if divisor == 0: auc_entropy = 1 else: auc_entropy /= divisor if dt > time_to_deadline: auc_entropy = 0 if not (auc_entropy>=-1 and auc_entropy<=1): auc_entropy = 0 sample.auc_entropy = auc_entropy entropy_prev = entropy # TODO: the below line of code should occur before the state is stored in the sample b.t += dt sample.dt = dt samples.append(sample) step_ind += 1 # The updated belief state posterior over C is our classification result clses = b.get_p_c().tolist() + [img_ind,b.t] all_clses.append(clses) # Update action values and pick the next action self.update_actions(b) action_ind = self.select_action(b,epsilon) # check for stopping conditions if action_ind < 0: break if self.bounds and not self.policy_mode=='oracle': if b.t > self.bounds[1]: break # in case of 'oracle' mode, re-sort the detections and times in order of AP # contributions, and actually re-gather p_c's for clses. action_inds = [s.action_ind for s in samples] if self.policy_mode=='oracle': naive_aps = np.array([s.det_naive_ap for s in samples]) sorted_inds = np.argsort(-naive_aps,kind='merge') # order-preserving all_detections = np.take(all_detections, sorted_inds) sorted_action_inds = np.take(action_inds, sorted_inds) # actually go through the whole thing again, getting new p_c's b.reset() all_clses = [] for action_ind in sorted_action_inds: action = self.actions[action_ind] obs = action.obj.get_observations(image) b.t += obs['dt'] if action.name=='gist': b.update_with_gist(action_ind, obs['scores']) else: b.update_with_score(action_ind, obs['score']) clses = b.get_p_c().tolist() + [img_ind,b.t] all_clses.append(clses) # now construct the final dets array, with correct times times = [s.dt for s in samples] #assert(len(all_detections)==len(all_clses)==len(times)) cum_times = np.cumsum(times) all_times = [] all_nonempty_detections = [] for i,dets in enumerate(all_detections): num_dets = dets.shape[0] if num_dets > 0: all_nonempty_detections.append(dets) t_vector = np.tile(cum_times[i],(num_dets,1)) all_times.append(t_vector) if len(all_nonempty_detections)>0: all_detections = np.concatenate(all_nonempty_detections,0) all_times = np.concatenate(all_times,0) # appending 'time' column at end, as promised all_detections = np.hstack((all_detections,all_times)) # we probably went over deadline with the oracle mode, so trim it down if self.bounds: if np.max(all_times)>self.bounds[1]: first_overdeadline_ind = np.flatnonzero(all_times>self.bounds[1])[0] all_detections = all_detections[:first_overdeadline_ind,:] else: all_detections = np.array([]) all_clses = np.array(all_clses) if verbose: print("DatasetPolicy on image with ind %d took %.3f s"%( img_ind,self.tt.qtoc('run_on_image'))) # TODO: temp debug thing if False: print("Action sequence was: %s"%[s.action_ind for s in samples]) print("here's an image:") X = np.vstack((all_clses[:,:-2],image.get_cls_ground_truth())) np.set_printoptions(precision=2, suppress=True) print X plt.pcolor(np.flipud(X)) plt.show() return (all_detections,all_clses,samples)
def setup(self): d = Dataset('test_pascal_trainval', force=True) d2 = Dataset('test_pascal_test', force=True) config = {'detectors': ['csc_default']} self.dp = DatasetPolicy(d, d2, **config) self.bs = BeliefState(d, self.dp.actions)