Пример #1
0
    def create_csp(self, graph_file, domain_size):
        self.CNET = constraintnet.ConstraintNet()
        gac = GAC(self.CNET)
        f = open(graph_file, 'r')
        number_of_vertices, number_of_edges = [int(x) for x in f.readline().strip().split(' ')]

        for i in range(number_of_vertices):
            index, x, y = [i for i in f.readline().strip().split(' ')]
            vertex = cspvariable.Variable(int(index), float(x), float(y))
            gac.variables.append(vertex)

        for j in range(number_of_edges):
            i1, i2 = [int(i) for i in f.readline().strip().split(' ')]
            this_vertex = gac.variables[i1]
            other_vertex = gac.variables[i2]
            constraint = cspconstraint.Constraint([this_vertex, other_vertex], "x!=y")
            self.CNET.add_constraint(this_vertex,constraint)
            self.CNET.add_constraint(other_vertex,constraint)



        for k in gac.variables:
            gac.domains[k] = [self.colors[x] for x in range(domain_size)]

        f.close()
        return gac
Пример #2
0
def make_csp(path, colors):
	gac = GAC()

	f = open(path, "r")
	
	NV = 0
	NE = 0
	
	count = 1
	for line in f:
		l = parse_line(line)
		print(l)
		if count == 1:
			NV = int(l[0])
			NE = int(l[1])
			
		elif count > 1 and count <= NV + 1:
			print("Making var...")
			name = "v{}".format(l[0])
			print(name)
			print("")
			domain = [i for i in range(colors)]
			gac.add_variable(Vc_var(name, domain, float(l[1]), float(l[2])))
			
		else:
			print("Making constraint...\n")
			gac.add_constraint(gen_constraint(l[0], l[1]))

		count += 1
	
	return gac
Пример #3
0
    def __init__(self, columns, rows):
        tk.Tk.__init__(self)
        self.columns = columns
        self.rows = rows
        self.gac = GAC(columns, rows)
        self.search = Search(self.gac)
        solution = self.search.a_star()
        "ASDASDJKASHDKAJSD"
        self.canvas = tk.Canvas(self, width=800, height=800, borderwidth=0)
        self.canvas.pack(side="top", fill="both", expand="true")

        menubar = tk.Menu(self)

        mapMenu = tk.Menu(menubar)
        mapMenu.add_command(label="Scenario 0",
                            command=lambda: self.changeMap('scenario0.txt'))
        mapMenu.add_command(label="Scenario 1",
                            command=lambda: self.changeMap('scenario1.txt'))
        mapMenu.add_command(label="Scenario 2",
                            command=lambda: self.changeMap('scenario2.txt'))
        mapMenu.add_command(label="Scenario 3",
                            command=lambda: self.changeMap('scenario3.txt'))
        mapMenu.add_command(label="Scenario 4",
                            command=lambda: self.changeMap('scenario4.txt'))
        mapMenu.add_command(label="Scenario 5",
                            command=lambda: self.changeMap('scenario5.txt'))
        mapMenu.add_command(label="Scenario 6",
                            command=lambda: self.changeMap('scenario6.txt'))
        menubar.add_cascade(label="Maps", menu=mapMenu)
        self.config(menu=menubar)
Пример #4
0
def make(path):
    f = open(path, "r")
    count = 0
    size = 0
    colors = 0
    map = []

    #generate map
    for line in f:
        l = parse_line(line)
        if count == 0:
            size = int(l[0])
            colors = int(l[1])
            for i in range(size):
                map.append([0] * size)
        else:
            map[int(l[2])][int(l[1])] = int(l[0]) + 1
            map[int(l[4])][int(l[3])] = (int(l[0]) + 1)

        count += 1

    for row in map:
        print(row)
    print()
    FFNode.size = size
    gac = GAC()
    gen_variables(gac, map, colors)
    gen_constraints(gac, map)

    return gac, size
Пример #5
0
def make(path):
    f = open(path, "r")
    width = 0
    height = 0
    map = []

    #generate map
    count = 0
    rows = []
    columns = []

    for line in f:
        l = util.parse_line(line)
        if count == 0:
            width = int(l[0])  #columns
            height = int(l[1])  #rows
            print(l)

        elif (count <= height):
            row = []
            for segment in l:
                row.append(int(segment))
            rows.append(row)
            print(row)

        else:
            col = []
            for segment in l:
                col.append(int(segment))
            columns.append(col)
            print(col)
        count += 1

    bitmap = util.get_bitmap_vector(max(width, height))
    print()

    gac = GAC()
    #gen variables
    vars = gen_variables(rows, columns, width, height, bitmap)
    for var in vars:
        gac.add_variable(var)

    #gen constraints
    constraints = gen_constraints(width, height)
    gac.constraints = constraints

    return gac, width, height
Пример #6
0
def make(path):
	f = open(path, "r")
	width = 0
	height = 0
	map = []
	
	#generate map
	count = 0
	rows = []
	columns = []
	
	for line in f:
		l = util.parse_line(line)
		if count == 0:
			width = int(l[0]) #columns
			height = int(l[1])	#rows
			print(l)
		
		elif(count <= height):
			row = []
			for segment in l:
				row.append(int(segment))
			rows.append(row)
			print(row)
			
		else:
			col = []
			for segment in l:
				col.append(int(segment))
			columns.append(col)
			print(col)
		count += 1
		
	bitmap = util.get_bitmap_vector(max(width, height))
	print()
	
	gac = GAC()
	#gen variables
	vars = gen_variables(rows, columns, width, height, bitmap)
	for var in vars:
		gac.add_variable(var)

	#gen constraints
	constraints = gen_constraints(width, height)
	gac.constraints = constraints
	
	return gac, width, height
Пример #7
0
	def __init__(self, domains, constraint_list):
		'''
		Initializes the values used by astar and gac.
		'''
		#A* INFO
		self.h = 0
		self.g = 0
		self.f = 0
		self.predecessor = None
		self.neighbours = []	

		#GAC INFO
		self.constraints = Constraints("x!=y", ["x", "y"], constraint_list)
		self.domains = domains

		#init gac
		self.gac = GAC()
Пример #8
0
    def __init__(self, world_size, args):
        if args.env_name == 'L2M2019Env':
            env = L2M2019Env(visualize=False, difficulty=args.difficulty)
            obs_dim = 99
        else:
            env = gym.make(args.env_name)
            obs_dim = env.observation_space.shape[0]

        act_dim = env.action_space.shape[0]

        self.device = torch.device(args.device)

        self.args = args
        self.world_size = world_size

        self.actor_critic = MLPActorCritic(obs_dim,
                                           act_dim,
                                           hidden_sizes=args.hidden_sizes).to(
                                               self.device)
        self.replay_buffer = [
            ReplayBuffer(obs_dim, act_dim, args.buffer_size)
            for _ in range(1, world_size)
        ]

        self.gac = GAC(self.actor_critic,
                       self.replay_buffer,
                       device=self.device,
                       gamma=args.gamma,
                       alpha_start=args.alpha_start,
                       alpha_min=args.alpha_min,
                       alpha_max=args.alpha_max)

        self.test_len = 0.0
        self.test_ret = 0.0

        self.ob_rrefs = []
        for ob_rank in range(1, world_size):
            ob_info = rpc.get_worker_info(OBSERVER_NAME.format(ob_rank))
            self.ob_rrefs.append(remote(ob_info, Observer, args=(args, )))

        self.agent_rref = RRef(self)
Пример #9
0
def make_csp(path, colors):
    gac = GAC()

    f = open(path, "r")

    NV = 0
    NE = 0

    count = 1
    for line in f:
        l = parse_line(line)
        print(l)
        if count == 1:
            NV = int(l[0])
            NE = int(l[1])

        elif count > 1 and count <= NV + 1:
            print("Making var...")
            name = "v{}".format(l[0])
            print(name)
            print("")
            domain = [i for i in range(colors)]
            gac.add_variable(Vc_var(name, domain, float(l[1]), float(l[2])))

        else:
            print("Making constraint...\n")
            gac.add_constraint(gen_constraint(l[0], l[1]))

        count += 1

    return gac
Пример #10
0
class Probleminstance():

	
	'''

	'''
	def __init__(self, domains, constraint_list):
		'''
		Initializes the values used by astar and gac.
		'''
		#A* INFO
		self.h = 0
		self.g = 0
		self.f = 0
		self.predecessor = None
		self.neighbours = []	

		#GAC INFO
		self.constraints = Constraints("x!=y", ["x", "y"], constraint_list)
		self.domains = domains

		#init gac
		self.gac = GAC()

	def initialize(self):
		'''
		Initializes by running the first domain filtering loop.
		'''

		self.gac.initialize(self.domains, self.constraints)
		self.domains = self.gac.domain_filtering_loop()
		self.astar = Astar(self)

	def solve(self):
		'''
		Runs one iteration of the astar algorithm
		'''
		self.current = self.astar.solve("A*")
		return [self, self.current, self.astar.prev_current]

	def is_solution(self):
		'''
		Returns True if this is a solution state, False if not.
		'''
		for domain in self.domains:
			if len(self.domains[domain]) != 1:
				return False
		return True

	def get_neighbours(self):
		'''
		Returns the neighbours of this node. 
		'''
		minlen = float("inf")
		current_domain = None
		neighbours = list()
		for domain in range(len(self.domains)):
			if len(self.domains[domain]) == 1:
				continue
			if (len(self.domains[domain])) < minlen:
				minlen = len(self.domains[domain])
				current_domain = domain
		for color in self.domains[current_domain]:
			copy_domains = copy.deepcopy(self.domains)
			copy_domains[current_domain] = [color]
			copy_domains = self.gac.rerun(copy_domains, current_domain, self.constraints)
			pi = Probleminstance(copy_domains, self.constraints.involved)
			neighbours.append(pi)
		self.neighbours = neighbours
		return neighbours

	def get_arc_cost(self):
		'''
		Returns the cost of moving from this node.
		'''
		return 1

	def get_h(self):
		'''
		Sets and returns the h value
		'''
		h = 0
		for domain in self.domains:
			h += len(self.domains[domain]) - 1
		self.h = h
		return self.h

	def is_illegal(self):
		'''
		Returns True if any constraints are broken in this state, False otherwise.
		'''
		for node in self.constraints.involved:
			for edge in self.constraints.involved[node]:
				if (len(self.domains[node]) == 1) and (len(self.domains[edge]) == 1) and (self.domains[node][0] == self.domains[edge][0]):
					return True
		return False

	def __lt__(self, other):
		'''
		Less than comparison method. Compares on f-value primarily, h value
		if f are equal.
		'''
		if self.f == other.f:
			return self.h < other.h
		return self.f < other.f

	def __eq__(self, other):
		'''
		Equality comparison method. Compares on the equality of domains.
		'''
		return self.domains == other.domains

	def __str__(self):
		'''
		Print method for this object, returns its domains.
		'''
		return str(self.domains)
 def __init__(self, variables, domains, expressions):
     super(Astar_GAC, self).__init__()
     self.cnet = CNET(variables, domains, expressions)
     self.currentState = self.initializeState(self.cnet)
     self.gac = GAC(self.currentState)
     self.Astar = AStar(self)
class Astar_GAC(Graph): 
    """Astar_GAC integrates Astar and GAC"""
    #both
    def __init__(self, variables, domains, expressions):
        super(Astar_GAC, self).__init__()
        self.cnet = CNET(variables, domains, expressions)
        self.currentState = self.initializeState(self.cnet)
        self.gac = GAC(self.currentState)
        self.Astar = AStar(self)


    @abstractmethod
    def createNewState( self, variables, constraints):
        pass 


    def initializeState(self, cnet):
        """in initState each variable has its full domain. It will be set as root node
        initilizes cnet"""
        s = self.createNewState( cnet.variables, cnet.constraints )
        s.update('start')
        self.startNode = s
        self.stateCounter = 0
        self.nofAssumption = 0
        self.nofExpanded = 0
        return s 


    def search(self):
        self.currentState = self.gac.domainFiltering(self.currentState)
        self.stateCounter += 1
        
        if self.currentState.isSolution():
            self.printStatistics(self.currentState)
            return self.currentState

        return self.iterateSearch()


    def iterateSearch(self):
            prev = self.currentState 
            if prev.isSolution():
                return prev

            self.currentState = self.Astar.iterateAStar()
            # self.currentState.updateColors()
            self.stateCounter += 1
            self.currentState.parent = prev #used for backtracking to find 'shortest path' for statistics
            self.nofExpanded = self.Astar.nofExpandedNodes
            if self.currentState.isSolution():
                self.printStatistics(self.currentState)
                return self.currentState

            self.currentState = self.gac.domainFiltering(self.currentState)
            return self.currentState


    def makeAssumption(self, newVI, parentState):
        """Generate one successor, and make sure all pointers are correct"""

        newVertices = {}
        newVIList = []
        for vi in parentState.viList:
            viID = vi.getID()
            tmpVI = VI(viID, vi.domain.copy())
            newVertices[viID] = tmpVI
            newVIList.append( tmpVI )

        for vi in parentState.viList:
            viID = vi.getID()
            for neighbor in vi.neighbors:
                n = newVertices[ neighbor.getID() ]
                newVertices[viID].add_neighbor( n )

        for vi in newVIList:
            if vi.getID() == newVI.getID():
                newVIList.remove(vi)
                newVIList.append(newVI)
            else:
                for vi_n in vi.neighbors:
                    if vi_n.getID() == newVI.getID():
                        vi.neighbors.remove(vi_n)
                        vi.neighbors.append(newVI)

        succ = self.createNewState(newVIList, parentState.constraintList)
        succ.parent = parentState
        succ.updateUndecided() # maybe not needed

        succ.ciList = []
        constraints = self.cnet.getConstraints()
        for v in succ.undecidedVariables:
            for n in v.neighbors:
                for c in constraints:
                    succ.ciList.append( CI(c,[v,n]) )

        return succ


    def generateSucc(self, state):
        """ make a guess. start gussing value for variables with min. domain length"""
        succStates = []
        finishedVIs = []
        varsCopy = state.undecidedVariables.copy()

        if not len(varsCopy):
            return []

        otherVIs = sorted(varsCopy, key=lambda v: len(v.domain), reverse=True)
        betterVI = otherVIs.pop()

        if betterVI.domain:
            initID = betterVI.getID()
            for d in betterVI.domain:
                newVI = VI( initID, [d])
                newVI.neighbors = betterVI.neighbors.copy()
                successor = self.makeAssumption(newVI, state)

                succStates.append( self.gac.rerun(successor) )
            return succStates
        else:
            return []


# Not complete : TODO
    def printStatistics(self, state):
        print ( 'The number of unsatisfied constraints = ', self.countUnsatisfiedConstraints(state), '\n' )
        print ( 'The total number of verticies without color assignment = ', self.countColorLess(state), '\n' )
        print ( 'The total number of nodes in search tree = ', self.stateCounter, '\n' )
        print ( 'The total number of nodes poped from agenda and expanded = ', self.nofExpanded, '\n' )
        print ( 'The length of the path = ', self.nofAssumption ,'\n')


    def countColorLess(self, state):
        nofColorLess = 0
        for vi in state.viList:
            if len(vi.domain) != 1:
                nofColorLess += 1
        return nofColorLess


    # def countUnsatisfiedConstraints(self, state):
    #     unsatisfied = 0
    #     varList = state.viList
    #     for c in state.ciList:
    #         for var in varList: 
    #             if var in c.variables:
    #                 if self.countInconsistentDomainValues(var, c) or not len(var.domain):
    #                     unsatisfied += 1
    #     return unsatisfied

    def countUnsatisfiedConstraints(self, state):
           unsatisfied = 0
           varList = state.viList
           for var in varList: 
               if len(var.domain) != 1:
                   unsatisfied += 1
           return unsatisfied


    # Needed for Graph
    def getGoal(self):
        return None


    def countInconsistentDomainValues(self, x, c):
        pairs = []
        nofInconsistency = 0
        for k in c.variables:
            for value in x.domain:
                pairs.extend( list(itertools.product([value], k.domain)) )
        
        for p in pairs :
            if not c.constraint( p[0], p[1] ):
                nofInconsistency += 1

        return nofInconsistency
Пример #13
0
class Agent:
    def __init__(self, world_size, args):
        if args.env_name == 'L2M2019Env':
            env = L2M2019Env(visualize=False, difficulty=args.difficulty)
            obs_dim = 99
        else:
            env = gym.make(args.env_name)
            obs_dim = env.observation_space.shape[0]

        act_dim = env.action_space.shape[0]

        self.device = torch.device(args.device)

        self.args = args
        self.world_size = world_size

        self.actor_critic = MLPActorCritic(obs_dim,
                                           act_dim,
                                           hidden_sizes=args.hidden_sizes).to(
                                               self.device)
        self.replay_buffer = [
            ReplayBuffer(obs_dim, act_dim, args.buffer_size)
            for _ in range(1, world_size)
        ]

        self.gac = GAC(self.actor_critic,
                       self.replay_buffer,
                       device=self.device,
                       gamma=args.gamma,
                       alpha_start=args.alpha_start,
                       alpha_min=args.alpha_min,
                       alpha_max=args.alpha_max)

        self.test_len = 0.0
        self.test_ret = 0.0

        self.ob_rrefs = []
        for ob_rank in range(1, world_size):
            ob_info = rpc.get_worker_info(OBSERVER_NAME.format(ob_rank))
            self.ob_rrefs.append(remote(ob_info, Observer, args=(args, )))

        self.agent_rref = RRef(self)

    def select_action(self, obs, deterministic=False):
        obs = torch.FloatTensor(obs.reshape(1, -1)).to(self.device)
        a = self.actor_critic.act(obs, deterministic)
        return a

    def add_memory(self, ob_id, o, a, r, o2, d):
        self.replay_buffer[ob_id - 1].store(o, a, r, o2, d)

    def run_episode(self, n_steps=0, random=False):
        futs = []
        for ob_rref in self.ob_rrefs:
            # make async RPC to kick off an episode on all observers
            futs.append(
                rpc_async(ob_rref.owner(),
                          _call_method,
                          args=(Observer.run_episode, ob_rref, self.agent_rref,
                                n_steps, random)))

        # wait until all obervers have finished this episode
        for fut in futs:
            fut.wait()

    def add_test_data(self, ret, length):
        self.test_ret += ret
        self.test_len += length

    def test_episode(self):
        futs, self.test_ret, self.test_len = [], 0.0, 0.0
        for ob_rref in self.ob_rrefs:
            # make async RPC to kick off an episode on all observers
            futs.append(
                rpc_async(ob_rref.owner(),
                          _call_method,
                          args=(Observer.test_episode, ob_rref,
                                self.agent_rref)))

        # wait until all obervers have finished this episode
        for fut in futs:
            fut.wait()

        self.test_ret /= (self.world_size - 1)
        self.test_len /= (self.world_size - 1)
        return self.test_ret, self.test_len

    def update(self):
        for _ in range(self.args.steps_per_update):
            loss_a, loss_c, alpha = self.gac.update(self.args.batch_size)
        self.gac.update_beta()
        print(
            "loss_actor = {:<22}, loss_critic = {:<22}, alpha = {:<20}, beta = {:<20}"
            .format(loss_a, loss_c, alpha, self.gac.beta))
Пример #14
0
    def __init__(self, graph):
        tk.Tk.__init__(self)
        self.graph = Graph(graph[0], graph[1], graph[2], graph[3])
        self.gac = GAC(self.graph)
        self.search = Search(self.gac)
        self.graph_size = 800.0
        self.vertex_size = 10.0

        self.ixy, self.x_size, self.y_size = self.getIXY()

        self.canvas = tk.Canvas(self,
                                width=self.graph_size + 50,
                                height=self.graph_size + 50,
                                borderwidth=0)
        self.canvas.pack(side="top", fill="both", expand="true")

        menubar = tk.Menu(self)

        commandmenu = tk.Menu(menubar)
        commandmenu.add_command(label="solve", command=self.drawSolution)
        commandmenu.add_command(label="start animation",
                                command=self.startAnimation)
        commandmenu.add_command(label="increment",
                                command=self.incrementSolution)
        commandmenu.add_command(label="reset", command=self.resetGraph)
        menubar.add_cascade(label="Commands", menu=commandmenu)

        execmenu = tk.Menu(menubar)
        execmenu.add_command(label="2 Colors",
                             command=lambda: self.changeColors('2'))
        execmenu.add_command(label="3 Colors",
                             command=lambda: self.changeColors('3'))
        execmenu.add_command(label="4 Colors",
                             command=lambda: self.changeColors('4'))
        execmenu.add_command(label="5 Colors",
                             command=lambda: self.changeColors('5'))
        execmenu.add_command(label="6 Colors",
                             command=lambda: self.changeColors('6'))
        execmenu.add_command(label="7 Colors",
                             command=lambda: self.changeColors('7'))
        execmenu.add_command(label="8 Colors",
                             command=lambda: self.changeColors('8'))
        execmenu.add_command(label="9 Colors",
                             command=lambda: self.changeColors('9'))
        execmenu.add_command(label="10 Colors",
                             command=lambda: self.changeColors('10'))
        menubar.add_cascade(label="Colors", menu=execmenu)

        mapMenu = tk.Menu(menubar)
        mapMenu.add_command(
            label="graph-color-2",
            command=lambda: self.changeMap('graph-color-2.txt'))
        mapMenu.add_command(
            label="rand-50",
            command=lambda: self.changeMap('rand-50-4-color1.txt'))
        mapMenu.add_command(label="test",
                            command=lambda: self.changeMap('test.txt'))
        mapMenu.add_command(
            label="spiral-500",
            command=lambda: self.changeMap('spiral-500-4-color1.txt'))
        mapMenu.add_command(
            label="graph-color-1",
            command=lambda: self.changeMap('graph-color-1.txt'))
        mapMenu.add_command(
            label="rand-100-6",
            command=lambda: self.changeMap('rand-100-6-color1.txt'))
        mapMenu.add_command(
            label="rand-100-4",
            command=lambda: self.changeMap('rand-100-4-color1.txt'))
        menubar.add_cascade(label="Maps", menu=mapMenu)
        self.config(menu=menubar)

        self.oval = {}

        for edge in self.graph.edges:

            x1 = (self.ixy[edge[0]][1] *
                  (self.graph_size / self.x_size)) + (self.vertex_size / 2)
            y1 = (self.ixy[edge[0]][2] *
                  (self.graph_size / self.y_size)) + (self.vertex_size / 2)
            x2 = (self.ixy[edge[1]][1] *
                  (self.graph_size / self.x_size)) + (self.vertex_size / 2)
            y2 = (self.ixy[edge[1]][2] *
                  (self.graph_size / self.y_size)) + (self.vertex_size / 2)

            self.canvas.create_line(x1, y1, x2, y2)

        for vertex in self.ixy:
            x1 = vertex[1] * (self.graph_size / self.x_size)
            y1 = vertex[2] * (self.graph_size / self.y_size)

            x2 = x1 + self.vertex_size
            y2 = y1 + self.vertex_size

            self.oval[vertex[1],
                      vertex[2]] = self.canvas.create_oval(x1,
                                                           y1,
                                                           x2,
                                                           y2,
                                                           outline="black",
                                                           fill="gray80",
                                                           tag="oval")
Пример #15
0
	def __init__(self):
		'''
		Initializes the general GAC.
		'''
		GAC.__init__(self)
Пример #16
0
    def __init__(self):
        '''
		Initializes the general GAC.
		'''
        GAC.__init__(self)
Пример #17
0
def main(args):

    if 'L2M2019Env' in args.env_name:
        env = L2M2019Env(visualize=False, difficulty=args.difficulty)
        test_env = L2M2019Env(visualize=False, difficulty=args.difficulty)
    else:
        env = gym.make(args.env_name)
        test_env = gym.make(args.env_name)
    device = torch.device(args.device)

    data = np.load('./official_obs_scaler.npz')
    obs_mean, obs_std = data['mean'], data['std']

    # 1.Set some necessary seed.
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    np.random.seed(args.seed)
    env.seed(args.seed)
    test_env.seed(args.seed + 999)

    # 2.Create actor, critic, EnvSampler() and PPO.
    if 'L2M2019Env' in args.env_name:
        obs_dim = 99
    else:
        obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]

    act_high = env.action_space.high
    act_low = env.action_space.low

    actor_critic = MLPActorCritic(obs_dim,
                                  act_dim,
                                  hidden_sizes=args.hidden_sizes).to(device)

    replay_buffer = ReplayBuffer(obs_dim, act_dim, args.buffer_size)

    gac = GAC(actor_critic,
              replay_buffer,
              device=device,
              gamma=args.gamma,
              alpha_start=args.alpha_start,
              alpha_min=args.alpha_min,
              alpha_max=args.alpha_max)

    def act_encoder(y):
        # y = [min, max] ==> x = [-1, 1]
        # if args.env_name == 'L2M2019Env':
        #     return y
        return (y - act_low) / (act_high - act_low) * 2.0 - 1.0

    def act_decoder(x):
        # x = [-1, 1] ==> y = [min, max]
        # if args.env_name == 'L2M2019Env':
        #     return np.abs(x)
        return (x + 1.0) / 2.0 * (act_high - act_low) - act_low

    def get_observation(env):
        obs = np.array(env.get_observation()[242:])

        obs = (obs - obs_mean) / obs_std

        state_desc = env.get_state_desc()
        p_body = [
            state_desc['body_pos']['pelvis'][0],
            -state_desc['body_pos']['pelvis'][2]
        ]
        v_body = [
            state_desc['body_vel']['pelvis'][0],
            -state_desc['body_vel']['pelvis'][2]
        ]
        v_tgt = env.vtgt.get_vtgt(p_body).T

        return np.append(obs, v_tgt)

    def get_reward(env):
        reward = 10.0

        # Reward for not falling down
        state_desc = env.get_state_desc()
        p_body = [
            state_desc['body_pos']['pelvis'][0],
            -state_desc['body_pos']['pelvis'][2]
        ]
        v_body = [
            state_desc['body_vel']['pelvis'][0],
            -state_desc['body_vel']['pelvis'][2]
        ]
        v_tgt = env.vtgt.get_vtgt(p_body).T

        vel_penalty = np.linalg.norm(v_body - v_tgt)

        muscle_penalty = 0
        for muscle in sorted(state_desc['muscles'].keys()):
            muscle_penalty += np.square(
                state_desc['muscles'][muscle]['activation'])

        ret_r = reward - (vel_penalty * 3 + muscle_penalty * 1)

        if vel_penalty < 0.3:
            ret_r += 10

        return ret_r

    # 3.Start training.
    def get_action(o, deterministic=False):
        o = torch.FloatTensor(o.reshape(1, -1)).to(device)
        a = actor_critic.act(o, deterministic)
        return a

    def test_agent():
        test_ret, test_len = 0, 0
        for j in range(args.epoch_per_test):
            _, d, ep_ret, ep_len = test_env.reset(), False, 0, 0
            o = get_observation(test_env)
            while not (d or (ep_len == args.max_ep_len)):
                # Take deterministic actions at test time
                a = get_action(o, True)
                a = act_decoder(a)

                for _ in range(args.frame_skip):
                    _, r, d, _ = test_env.step(a)
                    ep_ret += r
                    ep_len += 1
                    if d: break

                o = get_observation(test_env)

            test_ret += ep_ret
            test_len += ep_len
        return test_ret / args.epoch_per_test, test_len / args.epoch_per_test

    total_step = args.total_epoch * args.steps_per_epoch
    _, d, ep_len = env.reset(), False, 0
    o = get_observation(env)
    for t in range(1, total_step + 1):
        if t <= args.start_steps:
            a = act_encoder(env.action_space.sample())
        else:
            a = get_action(o, deterministic=False)

        a = act_decoder(a)

        r = 0.0
        for _ in range(args.frame_skip):
            _, _, d, _ = env.step(a)
            r += get_reward(env)
            ep_len += 1
            if d: break

        o2 = get_observation(env)

        # Ignore the "done" signal if it comes from hitting the time
        # horizon (that is, when it's an artificial terminal signal
        # that isn't based on the agent's state)

        d = False if ep_len == args.max_ep_len else d

        # if not d:
        #     new_o, new_r, new_o2 = generate_success(o, o2)
        #     replay_buffer.store(new_o, a, new_r * args.reward_scale, new_o2, d)

        # Store experience to replay buffer
        replay_buffer.store(o, a, r * args.reward_scale, o2, d)

        o = o2
        if d or (ep_len == args.max_ep_len):
            _, ep_len = env.reset(obs_as_dict=False), 0
            o = get_observation(env)

        if t >= args.update_after and t % args.steps_per_update == 0:
            for _ in range(args.steps_per_update):
                loss_a, loss_c, alpha = gac.update(args.batch_size)
            gac.update_beta()
            print(
                "loss_actor = {:<22}, loss_critic = {:<22}, alpha = {:<20}, beta = {:<20}"
                .format(loss_a, loss_c, alpha, gac.beta))

        # End of epoch handling
        if t >= args.update_after and t % args.steps_per_epoch == 0:
            test_ret, test_len = test_agent()
            print("Step {:>10}: test_ret = {:<20}, test_len = {:<20}".format(
                t, test_ret, test_len))
            print(
                "-----------------------------------------------------------")
            yield t, test_ret, test_len, actor_critic