Exemplo n.º 1
0
 def __init__(self, max_size, width, height, num_channels, alpha):
     self.max_size = max_size
     #frames/images are stored in sequence, i.e. frames[i+1] is the state obtained by executing action actions[i+1] in state frames[i]
     self.frames = np.zeros([max_size, width, height, num_channels],
                            dtype=np.float32)
     self.actions = np.zeros(max_size, dtype=int)
     self.rewards = np.zeros([max_size, 1], dtype=np.float32)
     self.terminal = np.zeros([max_size, 1], dtype=int)
     self.size = 0
     self.next_index = 0
     #use the next power of 2 as the size for the sum tree/max heap, this simplifies their implementation
     power_2_size = self.get_next_power(max_size)
     self.sum_tree = SumTree(power_2_size, alpha)
     self.max_heap = MaxHeap(power_2_size)
     #additive constant to keep td-values above 0
     self.td_epsilon = 1e-9
Exemplo n.º 2
0
import time
import numpy
import sys

budget = 20
graph_type = int(sys.argv[1])

stats = [[] for _ in range(budget)]
ans = [[] for _ in range(budget)]

for i in range(1, 51):
    filename = "../DATA/mass_data/input" + `graph_type` + "-" + `i` + ".txt"
    G = read_file(filename)
    start_time = time.time()
    G.preprocess()
    PQ = MaxHeap()

    for i in range(G.N):
      PQ.update(i, G.N * G.N)

    S = set()
    cur_val = G.N * G.N

    used = set()

    for curr_budget in range(budget):
      while True:
        cur, val, aux = PQ.pop()
        if cur == -1: break
        if cur in used: continue
        new_val = G.avg_dist(S.union(set([cur]))) 
Exemplo n.º 3
0
class ExperienceMemory:
    def __init__(self, max_size, width, height, num_channels, alpha):
        self.max_size = max_size
        #frames/images are stored in sequence, i.e. frames[i+1] is the state obtained by executing action actions[i+1] in state frames[i]
        self.frames = np.zeros([max_size, width, height, num_channels],
                               dtype=np.float32)
        self.actions = np.zeros(max_size, dtype=int)
        self.rewards = np.zeros([max_size, 1], dtype=np.float32)
        self.terminal = np.zeros([max_size, 1], dtype=int)
        self.size = 0
        self.next_index = 0
        #use the next power of 2 as the size for the sum tree/max heap, this simplifies their implementation
        power_2_size = self.get_next_power(max_size)
        self.sum_tree = SumTree(power_2_size, alpha)
        self.max_heap = MaxHeap(power_2_size)
        #additive constant to keep td-values above 0
        self.td_epsilon = 1e-9

    def add(self, s, a, r, t):
        i = self.next_index
        self.frames[i, :, :] = s
        self.actions[i] = a
        self.rewards[i] = r
        self.terminal[i] = t

        #a new element gets maximum priority value
        #this ensures that each transition is replayed at least once (with high probability)
        p_value = 1
        if self.size > 0:
            p_value = self.max_heap.get_max()
        self.sum_tree.update(i, p_value)
        self.max_heap.update(i, p_value)

        if self.size < self.max_size:
            self.size += 1

        self.next_index = (self.next_index + 1) % self.max_size

    def sample(self, n):
        #if the replay memory is not full we should not sample the frame 0 since it has no predecessor frame
        min_index = 1
        max_index = self.size
        if self.size == self.max_size:
            min_index = 0

        i, p_values = self.sum_tree.sample(n)
        i_minus = (i - 1) % self.size

        s = self.frames[i_minus, :, :, :]
        a = self.actions[i]
        r = self.rewards[i]
        s2 = self.frames[i, :, :, :]
        t = self.terminal[i]

        return s, a, r, s2, t, i, p_values

    #update priority value of replayed transitions with their new td-error
    def update_p(self, indices, td):
        for i in range(len(indices)):
            self.sum_tree.update(indices[i], td[i] + self.td_epsilon)
            self.max_heap.update(indices[i], td[i] + self.td_epsilon)

    #returns y >= x such that y is a power of 2
    def get_next_power(self, x):
        result = 1
        if x > 0:
            x_int = int(x - 1)
            while x_int > 0:
                x_int = x_int >> 1
                result = result << 1

        return result
Exemplo n.º 4
0
from util import (
  read_file,
  MaxHeap
)
G = read_file("../OLD/input.txt")
G.preprocess()
budget = 10
seed = set()
PQ = MaxHeap()
last_seed = None
cur_best = None
cur_val = G.N * G.N

INIT_VAL = G.N * G.N

# initialize
for i in range(G.N):
  mg1 = G.avg_dist(set([i]))
  prev_best = cur_best  
  if cur_best is not None:
    mg2 = G.avg_dist(set([i]).union(set([cur_best])))
  else:
    mg2 = mg1
  flag = 0
  PQ.update((i, mg1, prev_best, mg2, flag), mg1)
  if mg1 < cur_val:
    cur_val = mg1
    cur_best = i 

# update
while len(seed) < budget: