def __init__(self, max_size, width, height, num_channels, alpha): self.max_size = max_size #frames/images are stored in sequence, i.e. frames[i+1] is the state obtained by executing action actions[i+1] in state frames[i] self.frames = np.zeros([max_size, width, height, num_channels], dtype=np.float32) self.actions = np.zeros(max_size, dtype=int) self.rewards = np.zeros([max_size, 1], dtype=np.float32) self.terminal = np.zeros([max_size, 1], dtype=int) self.size = 0 self.next_index = 0 #use the next power of 2 as the size for the sum tree/max heap, this simplifies their implementation power_2_size = self.get_next_power(max_size) self.sum_tree = SumTree(power_2_size, alpha) self.max_heap = MaxHeap(power_2_size) #additive constant to keep td-values above 0 self.td_epsilon = 1e-9
import time import numpy import sys budget = 20 graph_type = int(sys.argv[1]) stats = [[] for _ in range(budget)] ans = [[] for _ in range(budget)] for i in range(1, 51): filename = "../DATA/mass_data/input" + `graph_type` + "-" + `i` + ".txt" G = read_file(filename) start_time = time.time() G.preprocess() PQ = MaxHeap() for i in range(G.N): PQ.update(i, G.N * G.N) S = set() cur_val = G.N * G.N used = set() for curr_budget in range(budget): while True: cur, val, aux = PQ.pop() if cur == -1: break if cur in used: continue new_val = G.avg_dist(S.union(set([cur])))
class ExperienceMemory: def __init__(self, max_size, width, height, num_channels, alpha): self.max_size = max_size #frames/images are stored in sequence, i.e. frames[i+1] is the state obtained by executing action actions[i+1] in state frames[i] self.frames = np.zeros([max_size, width, height, num_channels], dtype=np.float32) self.actions = np.zeros(max_size, dtype=int) self.rewards = np.zeros([max_size, 1], dtype=np.float32) self.terminal = np.zeros([max_size, 1], dtype=int) self.size = 0 self.next_index = 0 #use the next power of 2 as the size for the sum tree/max heap, this simplifies their implementation power_2_size = self.get_next_power(max_size) self.sum_tree = SumTree(power_2_size, alpha) self.max_heap = MaxHeap(power_2_size) #additive constant to keep td-values above 0 self.td_epsilon = 1e-9 def add(self, s, a, r, t): i = self.next_index self.frames[i, :, :] = s self.actions[i] = a self.rewards[i] = r self.terminal[i] = t #a new element gets maximum priority value #this ensures that each transition is replayed at least once (with high probability) p_value = 1 if self.size > 0: p_value = self.max_heap.get_max() self.sum_tree.update(i, p_value) self.max_heap.update(i, p_value) if self.size < self.max_size: self.size += 1 self.next_index = (self.next_index + 1) % self.max_size def sample(self, n): #if the replay memory is not full we should not sample the frame 0 since it has no predecessor frame min_index = 1 max_index = self.size if self.size == self.max_size: min_index = 0 i, p_values = self.sum_tree.sample(n) i_minus = (i - 1) % self.size s = self.frames[i_minus, :, :, :] a = self.actions[i] r = self.rewards[i] s2 = self.frames[i, :, :, :] t = self.terminal[i] return s, a, r, s2, t, i, p_values #update priority value of replayed transitions with their new td-error def update_p(self, indices, td): for i in range(len(indices)): self.sum_tree.update(indices[i], td[i] + self.td_epsilon) self.max_heap.update(indices[i], td[i] + self.td_epsilon) #returns y >= x such that y is a power of 2 def get_next_power(self, x): result = 1 if x > 0: x_int = int(x - 1) while x_int > 0: x_int = x_int >> 1 result = result << 1 return result
from util import ( read_file, MaxHeap ) G = read_file("../OLD/input.txt") G.preprocess() budget = 10 seed = set() PQ = MaxHeap() last_seed = None cur_best = None cur_val = G.N * G.N INIT_VAL = G.N * G.N # initialize for i in range(G.N): mg1 = G.avg_dist(set([i])) prev_best = cur_best if cur_best is not None: mg2 = G.avg_dist(set([i]).union(set([cur_best]))) else: mg2 = mg1 flag = 0 PQ.update((i, mg1, prev_best, mg2, flag), mg1) if mg1 < cur_val: cur_val = mg1 cur_best = i # update while len(seed) < budget: