示例#1
0
文件: nchecker.py 项目: nrc/N
  def checkJudge(self, name):
    pTree = self.world.judgeTrees[name][0]
    judge = JudgeDef(pTree)
    judge.name = pTree.val
    judge.envs = map(lambda x: self.strToSym(x, pTree.syntax), pTree.syntax.envs)
    judge.args = map(lambda x: self.strToSym(x, pTree.syntax), pTree.syntax.children)
    freshGen = Env()
    judge.envLabels = map(lambda x: freshGen.freshOld(x), judge.envs)
    judge.argLabels = map(lambda x: freshGen.freshOld(x), judge.args)
    
    
    self.world.judges[pTree.val] = judge
    
    for i in range(len(pTree.children)):
      self.checkJCase(pTree.children[i], judge, i)
      
    curIndex = len(pTree.children)
    
    #add any other asts which hold parts of the judgement
    for oTree in self.world.judgeTrees[name][1:]:
      #check the shapes match (ignore any latex or label)
      envs = map(lambda x: self.strToSym(x, oTree.syntax), oTree.syntax.envs)
      args = map(lambda x: self.strToSym(x, oTree.syntax), oTree.syntax.children)
      check = reduce(lambda x,y: x and y, map(lambda (x,y): x == y, zip(args, judge.args)), True)
      check = check and reduce(lambda x,y: x and y, map(lambda (x,y): x == y, zip(envs, judge.envs)), True)
      if not check:
        expectStr = "; ".join(map(lambda x: x.name, judge.envs)) + " |- " + "; ".join(map(lambda x: x.name, judge.args))
        foundStr = "; ".join(map(lambda x: x.name, envs)) + " |- " + "; ".join(map(lambda x: x.name, args))
        self.addErr("Shape of repeated judgment does not match original judgement: found: " + foundStr + ", expected: " + expectStr, oTree)

      #add any cases
      for i in range(len(oTree.children)):
        self.checkJCase(oTree.children[i], judge, curIndex + i)
      curIndex += len(oTree.children)
示例#2
0
class Function(Exp):

    def __init__(self, params, cmd):
        self.params = params
        self.cmd = cmd
        self.this = None
        self.env = None

    def eval(self, env):
        if self.env is None:
            self.env = Env(env)
            self.env.declare('this', self.this)
        return self

    def set_this(self, this):
        self.this = this

    def call(self, args, env):
        if len(args) != len(self.params):
            raise Exception("Invalid count of parameters. Should be %s, is %s."  % (len(self.params), len(args)))
        new_env = Env(self.env)
        values = zip(self.params, args)
        for val in values:
            new_env.declare(val[0], val[1])
        return self.cmd.eval(new_env)

    def __str__(self):
        return "Function(%s, %s)" % (self.params, self.cmd)
示例#3
0
def EVAL(ast, env):
        #print("EVAL %s" % printer._pr_str(ast))
        if not types._list_Q(ast):
            return eval_ast(ast, env)

        # apply list
        if len(ast) == 0: return ast
        a0 = ast[0]
        if not isinstance(a0, MalSym):
            raise Exception("attempt to apply on non-symbol")

        if u"def!" == a0.value:
            a1, a2 = ast[1], ast[2]
            res = EVAL(a2, env)
            return env.set(a1, res)
        elif u"let*" == a0.value:
            a1, a2 = ast[1], ast[2]
            let_env = Env(env)
            for i in range(0, len(a1), 2):
                let_env.set(a1[i], EVAL(a1[i+1], let_env))
            return EVAL(a2, let_env)
        else:
            el = eval_ast(ast, env)
            f = el.values[0]
            if isinstance(f, MalFunc):
                return f.apply(el.values[1:])
            else:
                raise Exception("%s is not callable" % f)
示例#4
0
class Field(ScatterPlane):
    '''
    This is the Field which will contain cells.
    '''
    agent_widget = ObjectProperty(None)
    total_reward = NumericProperty(0)

    def __init__(self, cell_size=25, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.cell_size = cell_size

        # At the __init__ height and width, and consecutively center may be not
        # established, yet due to layout logic.
        Clock.schedule_once(self._init_after)

        Clock.schedule_interval(self.update, 0.1)

    def _init_after(self, dt):
        ''' Perform initializations after the layout is finalized. '''
        self.env = Env()
        # TODO: Move params to config file
        with open('sarsa.pickle', 'rb') as fd:
            self.sarsa = pickle.load(fd)
        self.grid = Grid(self.canvas, 'line_loop', Color(), self.cell_size,
                         self.to_local(*self.center))
        self.state = self.env.reset(self.grid)
        self._place_agent(self.state.cell)

    def _place_agent(self, cell):
        self.agent_widget.center = self.grid.pixcenter(cell.q, cell.r)
        # FIXME
        for _ in self.grid.neighbors(cell.q, cell.r):
            pass

    def on_touch_down(self, touch):
        super().on_touch_down(touch)

        x, y = self.to_local(touch.x, touch.y)
        q, r = self.grid.pixel_to_hex(x, y)

        if (q, r) in self.grid:
            print("Touched ({}, {}) in {}.".format(q, r, (x, y)))
            print("env tvisited", self.env.tvisited[q, r])
            print("state food", self.state.food)
        else:
            self.grid.init(q, r)
            for _ in self.grid.neighbors(q, r):
                pass

        return True

    # TODO: Shouldn't this feel better in SwarmApp?
    def update(self, dt):
        action = self.sarsa.policy(self.state, explore=False)
        next_state, reward, done = self.env.step(action)
        self.sarsa.adapt_policy(self.state, action, next_state, reward)

        self.state = next_state
        self.total_reward += int(reward)
        self._place_agent(self.state.cell)
示例#5
0
def EVAL(ast, env):
    while True:
        #print("EVAL %s" % printer._pr_str(ast))
        if not types._list_Q(ast):
            return eval_ast(ast, env)

        # apply list
        ast = macroexpand(ast, env)
        if not types._list_Q(ast):
            return eval_ast(ast, env)
        if len(ast) == 0: return ast
        a0 = ast[0]

        if "def!" == a0:
            a1, a2 = ast[1], ast[2]
            res = EVAL(a2, env)
            return env.set(a1, res)
        elif "let*" == a0:
            a1, a2 = ast[1], ast[2]
            let_env = Env(env)
            for i in range(0, len(a1), 2):
                let_env.set(a1[i], EVAL(a1[i+1], let_env))
            ast = a2
            env = let_env
            # Continue loop (TCO)
        elif "quote" == a0:
            return ast[1]
        elif "quasiquote" == a0:
            ast = quasiquote(ast[1]);
            # Continue loop (TCO)
        elif 'defmacro!' == a0:
            func = EVAL(ast[2], env)
            func._ismacro_ = True
            return env.set(ast[1], func)
        elif 'macroexpand' == a0:
            return macroexpand(ast[1], env)
        elif "do" == a0:
            eval_ast(ast[1:-1], env)
            ast = ast[-1]
            # Continue loop (TCO)
        elif "if" == a0:
            a1, a2 = ast[1], ast[2]
            cond = EVAL(a1, env)
            if cond is None or cond is False:
                if len(ast) > 3: ast = ast[3]
                else:            ast = None
            else:
                ast = a2
            # Continue loop (TCO)
        elif "fn*" == a0:
            a1, a2 = ast[1], ast[2]
            return types._function(EVAL, Env, a2, env, a1)
        else:
            el = eval_ast(ast, env)
            f = el[0]
            if hasattr(f, '__ast__'):
                ast = f.__ast__
                env = f.__gen_env__(el[1:])
            else:
                return f(*el[1:])
示例#6
0
def entry_point(argv):
    repl_env = Env()
    def REP(str, env):
        return PRINT(EVAL(READ(str), env))

    # core.py: defined using python
    for k, v in core.ns.items():
        repl_env.set(_symbol(unicode(k)), MalFunc(v))

    # core.mal: defined using the language itself
    REP("(def! not (fn* (a) (if a false true)))", repl_env)

    while True:
        try:
            line = mal_readline.readline("user> ")
            if line == "": continue
            print(REP(line, repl_env))
        except EOFError as e:
            break
        except reader.Blank:
            continue
        except types.MalException as e:
            print(u"Error: %s" % printer._pr_str(e.object, False))
        except Exception as e:
            print("Error: %s" % e)
            #print("".join(traceback.format_exception(*sys.exc_info())))
    return 0
示例#7
0
 def call(self, args, env):
     if len(args) != len(self.params):
         raise Exception("Invalid count of parameters. Should be %s, is %s."  % (len(self.params), len(args)))
     new_env = Env(self.env)
     values = zip(self.params, args)
     for val in values:
         new_env.declare(val[0], val[1])
     return self.cmd.eval(new_env)
示例#8
0
def EVAL(ast, env):
    while True:
        if type(ast) == list:
            if ast[0] == "def!":
                val = EVAL(ast[2], env)
                env.set(ast[1], val)
                return val

            elif ast[0] == "let*":
                new_env = Env(env)
                bindings = ast[1]
                for i in range(0, len(bindings), 2):
                    val = EVAL(bindings[i+1], new_env)
                    new_env.set(bindings[i], val)
                # return EVAL(ast[2], new_env)
                ast = ast[2]
                env = new_env
                continue

            elif ast[0] == "do":
                # elements = [eval_ast(e, env) for e in ast[1:]]
                # return elements[-1]
                [eval_ast(e, env) for e in ast[1:-1]]
                ast = ast[-1]
                continue

            elif ast[0] == "if":
                cond = EVAL(ast[1], env)
                if cond != None and cond != False:
                    # cond was true
                    ast = ast[2]
                else:
                    if len(ast) > 3:
                        ast = ast[3]
                    else:
                        return None
                continue

            elif ast[0] == "fn*":
                # def func(*params):
                #     new_env = Env(env, ast[1], params)
                #     res = EVAL(ast[2], new_env)
                #     return res;
                return maltypes.Function(ast[2], ast[1], env)

            else:
                l = eval_ast(ast, env)
                f = l[0]
                if type(f) == maltypes.Function:
                    ast = f.ast
                    new_env = Env(f.env, f.params, l[1:])
                    env = new_env
                else:
                    return f(*l[1:])

        else:
            res = eval_ast(ast, env)
            return res
示例#9
0
def EVAL(ast, env):
    while True:
        #print("EVAL %s" % printer._pr_str(ast))
        if not types._list_Q(ast):
            return eval_ast(ast, env)

        # apply list
        if len(ast) == 0: return ast
        a0 = ast[0]
        if isinstance(a0, MalSym):
            a0sym = a0.value
        else:
            a0sym = u"__<*fn*>__"

        if u"def!" == a0sym:
            a1, a2 = ast[1], ast[2]
            res = EVAL(a2, env)
            return env.set(a1, res)
        elif u"let*" == a0sym:
            a1, a2 = ast[1], ast[2]
            let_env = Env(env)
            for i in range(0, len(a1), 2):
                let_env.set(a1[i], EVAL(a1[i+1], let_env))
            ast = a2
            env = let_env # Continue loop (TCO)
        elif u"quote" == a0sym:
            return ast[1]
        elif u"quasiquote" == a0sym:
            ast = quasiquote(ast[1]) # Continue loop (TCO)
        elif u"do" == a0sym:
            if len(ast) == 0:
                return nil
            elif len(ast) > 1:
                eval_ast(ast.slice2(1, len(ast)-1), env)
            ast = ast[-1] # Continue loop (TCO)
        elif u"if" == a0sym:
            a1, a2 = ast[1], ast[2]
            cond = EVAL(a1, env)
            if cond is nil or cond is false:
                if len(ast) > 3: ast = ast[3] # Continue loop (TCO)
                else:            return nil
            else:
                ast = a2 # Continue loop (TCO)
        elif u"fn*" == a0sym:
            a1, a2 = ast[1], ast[2]
            return MalFunc(None, a2, env, a1, EVAL)
        else:
            el = eval_ast(ast, env)
            f = el.values[0]
            if isinstance(f, MalFunc):
                if f.ast:
                    ast = f.ast
                    env = f.gen_env(el.rest()) # Continue loop (TCO) 
                else:
                    return f.apply(el.rest())
            else:
                raise Exception("%s is not callable" % f)
示例#10
0
def EVAL(ast: MalType, env: Env) -> MalType:
    while True:
        if isinstance(ast, MalVector):
            return MalVector(EVAL(member, env) for member in ast)
        if isinstance(ast, MalHashmap):
            return MalHashmap([ast[0], EVAL(ast[1], env)])
        if not isinstance(ast, MalList):  # not a list
            return eval_ast(ast, env)

        if isinstance(ast, MalList):
            if len(ast) == 0:  # an empty list
                return ast
            else:  # a list
                if ast[0] == 'def!':
                    return env.set(ast[1], EVAL(ast[2], env))
                elif ast[0] == 'let*':
                    let_env = Env(outer=env)
                    param1 = iter(ast[1])
                    for symbol, value in zip(param1, param1):
                        let_env.set(symbol, EVAL(value, env=let_env))
                    # return EVAL(ast[2], env=let_env)
                    ast, env = ast[2], let_env
                    continue
                elif ast[0] == 'do':
                    # value = nil
                    # for element in ast[1:]:
                    #     value = EVAL(element, env)
                    # return value
                    for ele in ast[1:-1]:
                        eval_ast(ele, env)
                    ast = ast[-1]
                    continue
                elif ast[0] == 'if':
                    cond = EVAL(ast[1], env)
                    if cond != nil and MalBool(cond):
                        # return EVAL(ast[2], env)
                        ast = ast[2]
                        continue
                    elif len(ast) == 4:
                        # return EVAL(ast[3], env)
                        ast = ast[3]
                        continue
                    else:
                        return nil
                elif ast[0] == 'fn*':
                    return MalFunction(ast=ast[2], params=ast[1], env=env,
                                       eval_fn=EVAL)
                else:
                    f, *args = eval_ast(ast, env)
                    if isinstance(f, MalFunction):
                        env = Env(binds=f.params, exprs=args, outer=f.env)
                        ast = f.ast
                        continue
                    else:
                        return f(*args)
示例#11
0
    def eval(self, env):
        new_env = Env(env)
        # put object in heap
        addr = heap.alloc()
        heap[addr] = self

        for decl in self.decls:
            decl.exp.set_this(addr)
            decl.eval(new_env)
        for key in new_env:
            if new_env.directly_defined(key):
                self.env[key] = new_env[key]
        return addr
示例#12
0
def interpret(code, print_ast=False):
    ast = parse(tokenize(code))
    if print_ast:
        print(ast)
    env = Env()
    env.declare("alloc", Alloc())
    env.declare("readline", ReadLine())
    env.declare("true", 1)
    env.declare("false", 0)
    ast.eval(env)
示例#13
0
def eval_ast(ast: MalType, env: Env):
    if isinstance(ast, MalSymbol):
        return env.get(ast)
    elif isinstance(ast, MalList):
        return MalList(EVAL(child, env) for child in ast)
    else:
        return ast
示例#14
0
def EVAL(mt, env):
    if type(mt) == list:
        if mt[0] == "def!":
            val = EVAL(mt[2], env)
            env.set(mt[1], val)
            return val

        elif mt[0] == "let*":
            new_env = Env(env)
            bindings = mt[1]
            for i in range(0, len(bindings), 2):
                val = EVAL(bindings[i+1], new_env)
                new_env.set(bindings[i], val)
            return EVAL(mt[2], new_env)

        elif mt[0] == "do":
            elements = [eval_ast(e, env) for e in mt[1:]]
            return elements[-1]

        elif mt[0] == "if":
            cond = EVAL(mt[1], env)
            if cond != None and cond != False:
                # cond was true
                res = EVAL(mt[2], env)
            else:
                if len(mt) > 3:
                    res = EVAL(mt[3], env)
                else:
                    res = maltypes.Nil()
            return res

        elif mt[0] == "fn*":
            def func(*params):
                new_env = Env(env, mt[1], params)
                res = EVAL(mt[2], new_env)
                return res;
            return func

        else:
            l = eval_ast(mt, env)
            func = l[0]
            return func(*l[1:])

    else:
        res = eval_ast(mt, env)
        return res
示例#15
0
def EVAL(ast, env):
        #print("EVAL %s" % printer._pr_str(ast))
        if not types._list_Q(ast):
            return eval_ast(ast, env)

        # apply list
        if len(ast) == 0: return ast
        a0 = ast[0]
        if isinstance(a0, MalSym):
            a0sym = a0.value
        else:
            a0sym = u"__<*fn*>__"

        if u"def!" == a0sym:
            a1, a2 = ast[1], ast[2]
            res = EVAL(a2, env)
            return env.set(a1, res)
        elif u"let*" == a0sym:
            a1, a2 = ast[1], ast[2]
            let_env = Env(env)
            for i in range(0, len(a1), 2):
                let_env.set(a1[i], EVAL(a1[i+1], let_env))
            return EVAL(a2, let_env)
        elif u"do" == a0sym:
            el = eval_ast(ast.rest(), env)
            return el.values[-1]
        elif u"if" == a0sym:
            a1, a2 = ast[1], ast[2]
            cond = EVAL(a1, env)
            if cond is nil or cond is false:
                if len(ast) > 3: return EVAL(ast[3], env)
                else:            return nil
            else:
                return EVAL(a2, env)
        elif u"fn*" == a0sym:
            a1, a2 = ast[1], ast[2]
            return MalFunc(None, a2, env, a1, EVAL)
        else:
            el = eval_ast(ast, env)
            f = el.values[0]
            if isinstance(f, MalFunc):
                return f.apply(el.rest())
            else:
                raise Exception("%s is not callable" % f)
示例#16
0
文件: step5_tco.py 项目: wbrown/mal
def EVAL(ast, env):
    while True:
        #print("EVAL %s" % ast)
        if not types._list_Q(ast):
            return eval_ast(ast, env)

        # apply list
        if len(ast) == 0: return ast
        a0 = ast[0]

        if "def!" == a0:
            a1, a2 = ast[1], ast[2]
            res = EVAL(a2, env)
            return env.set(a1, res)
        elif "let*" == a0:
            a1, a2 = ast[1], ast[2]
            let_env = Env(env)
            for i in range(0, len(a1), 2):
                let_env.set(a1[i], EVAL(a1[i+1], let_env))
            return EVAL(a2, let_env)
        elif "do" == a0:
            eval_ast(ast[1:-1], env)
            ast = ast[-1]
            # Continue loop (TCO)
        elif "if" == a0:
            a1, a2 = ast[1], ast[2]
            cond = EVAL(a1, env)
            if cond is None or cond is False:
                if len(ast) > 3: ast = ast[3]
                else:            ast = None
            else:
                ast = a2
            # Continue loop (TCO)
        elif "fn*" == a0:
            a1, a2 = ast[1], ast[2]
            return types._function(EVAL, Env, a2, env, a1)
        else:
            el = eval_ast(ast, env)
            f = el[0]
            if hasattr(f, '__ast__'):
                ast = f.__ast__
                env = f.__gen_env__(el[1:])
            else:
                return f(*el[1:])
示例#17
0
def EVAL(ast, env):
    if type(ast) == List and len(ast) > 0:
        function = ast[0]
        if function == 'fn*':
            bindings = ast[1]
            body = ast[2]
            return Function(Env, bindings, env, body, EVAL)
        elif function == 'let*':
            scoped_env = Env(env)
            bindings = ast[1]
            for i in range(0, len(bindings), 2):
                symbol = Symbol(bindings[i])
                value = EVAL(bindings[i+1], scoped_env)
                scoped_env.set(symbol, value)
            expression = ast[-1]
            return EVAL(expression, scoped_env)
        elif function == 'def!':
            symbol = Symbol(ast[1])
            value = EVAL(ast[2], env)
            env.set(symbol, value)
            return value
        elif function == 'do':
            return_val = None
            for exp in ast[1:]:
                return_val = EVAL(exp, env)
            return return_val
        elif function == 'if':
            condition = EVAL(ast[1], env)
            if_branch = ast[2]
            if condition is not False and condition is not None:
                return EVAL(if_branch, env)
            else:
                else_branch = None
                try:
                    else_branch = ast[3]
                except IndexError as e:
                    pass
                return EVAL(else_branch, env)
        else:
            evaluated = eval_ast(ast, env)
            return evaluated[0](*evaluated[1:])
    evaluated = eval_ast(ast, env)
    return evaluated
示例#18
0
 def _init_after(self, dt):
     ''' Perform initializations after the layout is finalized. '''
     self.env = Env()
     # TODO: Move params to config file
     with open('sarsa.pickle', 'rb') as fd:
         self.sarsa = pickle.load(fd)
     self.grid = Grid(self.canvas, 'line_loop', Color(), self.cell_size,
                      self.to_local(*self.center))
     self.state = self.env.reset(self.grid)
     self._place_agent(self.state.cell)
示例#19
0
 def _mako_render(path,meta):
     makedirs(dirname(path))
     f=open(path,'w+')
     content=Env.get_template(template).render(**meta)
     f.write(content)
     try:
         print path
     except:
         pass
     f.close()
示例#20
0
文件: dev.py 项目: ankitcha/minion
def provision_env(**optionals):
    vpc_cidr_block = '10.6.0.0/16'
    zone = valid_zones[0]
    private_subnet_cidr_block = '10.6.0.0/19'
    public_subnet_cidr_block = '10.6.32.0/20'
    dev_zones = [valid_zones[0]]

    env = Env(
        env='dev',
        vpc_cidr_block=vpc_cidr_block,
        **optionals
    )

    env.provision_zone(
        zone=zone,
        private_subnet_cidr_block=private_subnet_cidr_block,
        public_subnet_cidr_block=public_subnet_cidr_block
    )

    env.provision_resources(zones=dev_zones, zk_cluster_size=1, flink_num_jobmanagers=1, flink_num_taskmanagers=1)
示例#21
0
def EVAL(mt, env):
    if type(mt) == list:
        if mt[0] == "def!":
            val = EVAL(mt[2], env)
            env.set(mt[1], val)
            return val

        elif mt[0] == "let*":
            new_env = Env(env)
            bindings = mt[1]
            for i in range(0, len(bindings), 2):
                val = EVAL(bindings[i+1], new_env)
                new_env.set(bindings[i], val)
            return EVAL(mt[2], new_env)

        else:
            l = eval_ast(mt, env)
            f = l[0]
            return f(*l[1:])

    else:
        return eval_ast(mt, env)
示例#22
0
def repl():
    # set up base environment
    env = Env(outer=None)
    for k, v in ns.items():
        env.set(k, v)

    env.set(SYMBOL_EVAL, lambda ast: EVAL(ast, env=env))  # `eval` added to ns

    s = """
    (def! load-file
      (fn* (f)
        (eval (read-string (str "(do " (slurp f) ")")))))
    """
    EVAL(READ(s), env=env)

    # read-eval-print loop
    while True:
        try:
            s = input("=> ")
            rep(s, env)
        except Exception as e:
            print("error: {0}".format(e))
示例#23
0
def EVAL(ast, env):
    if type(ast) == List and len(ast) > 0:
        function = ast[0]
        if function == 'let*':
            scoped_env = Env(env)
            bindings = ast[1]
            for i in range(0, len(bindings), 2):
                symbol = Symbol(bindings[i])
                value = EVAL(bindings[i+1], scoped_env)
                scoped_env.set(symbol, value)
            expression = ast[-1]
            return EVAL(expression, scoped_env)
        elif function == 'def!':
            symbol = Symbol(ast[1])
            value = EVAL(ast[2], env)
            env.set(symbol, value)
            return value
        else:
            evaluated = eval_ast(ast, env)
            return evaluated[0](*evaluated[1:])
    evaluated = eval_ast(ast, env)
    return evaluated
示例#24
0
def EVAL(ast, env):
        #print("EVAL %s" % printer._pr_str(ast))
        if not types._list_Q(ast):
            return eval_ast(ast, env)

        # apply list
        if len(ast) == 0: return ast
        a0 = ast[0]

        if "def!" == a0:
            a1, a2 = ast[1], ast[2]
            res = EVAL(a2, env)
            return env.set(a1, res)
        elif "let*" == a0:
            a1, a2 = ast[1], ast[2]
            let_env = Env(env)
            for i in range(0, len(a1), 2):
                let_env.set(a1[i], EVAL(a1[i+1], let_env))
            return EVAL(a2, let_env)
        else:
            el = eval_ast(ast, env)
            f = el[0]
            return f(*el[1:])
示例#25
0
def _get_options(sys,parser):
    options = None
    if len(sys.argv[1:]) == 0:
            options = parser.parse_args(['auto'])

    elif len(sys.argv[1:]) == 1:
        if sys.argv[1] == "_conditioncheck":
            options = condition_check.parse_args(sys.argv[2:])
            env = Env(options)
            env.check_env()
            CheckCondition(modules_pool=None,
                           options=options,
                           env=env).run()
            quit()
        elif sys.argv[1] == "--help" or sys.argv[1] == "-h":
            options = parser.parse_args(sys.argv[1:])
        elif sys.argv[1].startswith('-'):
            options = parser.parse_args(["auto"]+sys.argv[1:])
        else:
            options = parser.parse_args(sys.argv[1:])
    else:
        options = parser.parse_args(sys.argv[1:])
    return options
示例#26
0
def EVAL(ast: MalType, env: Env) -> MalType:
    if isinstance(ast, MalVector):
        return MalVector(EVAL(member, env) for member in ast)
    if isinstance(ast, MalHashmap):
        return MalHashmap([ast[0], EVAL(ast[1], env)])
    if not isinstance(ast, MalList):  # not a list
        return eval_ast(ast, env)

    if isinstance(ast, MalList):
        if len(ast) == 0:  # an empty list
            return ast
        else:  # a list
            if ast[0] == 'def!':
                return env.set(ast[1], EVAL(ast[2], env))
            elif ast[0] == 'let*':
                let_env = Env(outer=env)
                param1 = iter(ast[1])
                for symbol, value in zip(param1, param1):
                    let_env.set(symbol, EVAL(value, env=let_env))
                return EVAL(ast[2], env=let_env)
            else:
                f, *args = eval_ast(ast, env)
                return f(*args)
示例#27
0
def EVAL(ast, env):
        #print("EVAL %s" % ast)
        if not types._list_Q(ast):
            return eval_ast(ast, env)

        # apply list
        if len(ast) == 0: return ast
        a0 = ast[0]

        if "def!" == a0:
            a1, a2 = ast[1], ast[2]
            res = EVAL(a2, env)
            return env.set(a1, res)
        elif "let*" == a0:
            a1, a2 = ast[1], ast[2]
            let_env = Env(env)
            for i in range(0, len(a1), 2):
                let_env.set(a1[i], EVAL(a1[i+1], let_env))
            return EVAL(a2, let_env)
        elif "do" == a0:
            el = eval_ast(ast[1:], env)
            return el[-1]
        elif "if" == a0:
            a1, a2 = ast[1], ast[2]
            cond = EVAL(a1, env)
            if cond is None or cond is False:
                if len(ast) > 3: return EVAL(ast[3], env)
                else:            return None
            else:
                return EVAL(a2, env)
        elif "fn*" == a0:
            a1, a2 = ast[1], ast[2]
            return types._function(EVAL, Env, a2, env, a1)
        else:
            el = eval_ast(ast, env)
            f = el[0]
            return f(*el[1:])
示例#28
0
文件: stepA_mal.py 项目: kanaka/mal
def entry_point(argv):
    repl_env = Env()
    def REP(str, env):
        return PRINT(EVAL(READ(str), env))

    # core.py: defined using python
    for k, v in core.ns.items():
        repl_env.set(_symbol(unicode(k)), MalFunc(v))
    repl_env.set(types._symbol(u'eval'),
                 MalEval(None, env=repl_env, EvalFunc=EVAL))
    mal_args = []
    if len(argv) >= 3:
        for a in argv[2:]: mal_args.append(MalStr(unicode(a)))
    repl_env.set(_symbol(u'*ARGV*'), MalList(mal_args))

    # core.mal: defined using the language itself
    REP("(def! *host-language* \"rpython\")", repl_env)
    REP("(def! not (fn* (a) (if a false true)))", repl_env)
    REP("(def! load-file (fn* (f) (eval (read-string (str \"(do \" (slurp f) \")\")))))", repl_env)
    REP("(defmacro! cond (fn* (& xs) (if (> (count xs) 0) (list 'if (first xs) (if (> (count xs) 1) (nth xs 1) (throw \"odd number of forms to cond\")) (cons 'cond (rest (rest xs)))))))", repl_env)
    REP("(def! inc (fn* [x] (+ x 1)))", repl_env)
    REP("(def! gensym (let* [counter (atom 0)] (fn* [] (symbol (str \"G__\" (swap! counter inc))))))", repl_env)
    REP("(defmacro! or (fn* (& xs) (if (empty? xs) nil (if (= 1 (count xs)) (first xs) (let* (condvar (gensym)) `(let* (~condvar ~(first xs)) (if ~condvar ~condvar (or ~@(rest xs)))))))))", repl_env)

    if len(argv) >= 2:
        REP('(load-file "' + argv[1] + '")', repl_env)
        return 0

    REP("(println (str \"Mal [\" *host-language* \"]\"))", repl_env)
    while True:
        try:
            line = mal_readline.readline("user> ")
            if line == "": continue
            print(REP(line, repl_env))
        except EOFError as e:
            break
        except reader.Blank:
            continue
        except types.MalException as e:
            print(u"Error: %s" % printer._pr_str(e.object, False))
        except Exception as e:
            print("Error: %s" % e)
            if IS_RPYTHON:
                llop.debug_print_traceback(lltype.Void)
            else:
                print("".join(traceback.format_exception(*sys.exc_info())))
    return 0
示例#29
0
def entry_point(argv):
    repl_env = Env()
    def REP(str, env):
        return PRINT(EVAL(READ(str), env))

    # core.py: defined using python
    for k, v in core.ns.items():
        repl_env.set(_symbol(unicode(k)), MalFunc(v))
    repl_env.set(types._symbol(u'eval'),
                 MalEval(None, env=repl_env, EvalFunc=EVAL))
    mal_args = []
    if len(argv) >= 3:
        for a in argv[2:]: mal_args.append(MalStr(unicode(a)))
    repl_env.set(_symbol(u'*ARGV*'), MalList(mal_args))

    # core.mal: defined using the language itself
    REP("(def! not (fn* (a) (if a false true)))", repl_env)
    REP("(def! load-file (fn* (f) (eval (read-string (str \"(do \" (slurp f) \")\")))))", repl_env)

    if len(argv) >= 2:
        REP('(load-file "' + argv[1] + '")', repl_env)
        return 0

    while True:
        try:
            line = mal_readline.readline("user> ")
            if line == "": continue
            print(REP(line, repl_env))
        except EOFError as e:
            break
        except reader.Blank:
            continue
        except types.MalException as e:
            print(u"Error: %s" % printer._pr_str(e.object, False))
        except Exception as e:
            print("Error: %s" % e)
            #print("".join(traceback.format_exception(*sys.exc_info())))
    return 0
示例#30
0
文件: terp.py 项目: darius/pother
 def to_call(arg, k):
     return expr.eval(Env(variable, arg, env), k)
示例#31
0
from functools import partial
import torch
from torch import autograd, optim
from torch.distributions import Independent, Normal
from torch.distributions.kl import kl_divergence
from torch.nn.utils import parameters_to_vector, vector_to_parameters
from tqdm import tqdm
from env import Env
from hyperparams import BACKTRACK_COEFF, BACKTRACK_ITERS, ON_POLICY_BATCH_SIZE as BATCH_SIZE, CONJUGATE_GRADIENT_ITERS, DAMPING_COEFF, DISCOUNT, HIDDEN_SIZE, INITIAL_POLICY_LOG_STD_DEV, KL_LIMIT, LEARNING_RATE, MAX_STEPS, TRACE_DECAY, VALUE_EPOCHS
from models import ActorCritic
from utils import plot

env = Env()
agent = ActorCritic(env.observation_space.shape[0],
                    env.action_space.shape[0],
                    HIDDEN_SIZE,
                    initial_policy_log_std_dev=INITIAL_POLICY_LOG_STD_DEV)
critic_optimiser = optim.Adam(agent.critic.parameters(), lr=LEARNING_RATE)


def hessian_vector_product(d_kl, x):
    g = parameters_to_vector(
        autograd.grad(d_kl, agent.actor.parameters(), create_graph=True))
    return parameters_to_vector(
        autograd.grad((g * x.detach()).sum(),
                      agent.actor.parameters(),
                      retain_graph=True)) + DAMPING_COEFF * x


def conjugate_gradient(Ax, b):
    x = torch.zeros_like(b)
示例#32
0
class SingleThread:
    def __init__(self,
                 sess,
                 thread_index,
                 global_network,
                 initial_learning_rate,
                 grad_applier,
                 max_global_time_step,
                 action_size,
                 env_name,
                 device='/CPU:0'):

        self.thread_index = thread_index
        self.global_network = global_network
        self.initial_learning_rate = initial_learning_rate
        self.grad_applier = grad_applier
        self.max_global_time_step = max_global_time_step
        self.device = device
        self.action_size = action_size
        self.env = Env(env_name)

        # prepare model
        self.local_network = A3CLSTM(action_size, self.thread_index,
                                     self.device)
        self.local_network.loss_calculate_scaffold()

        # get gradients for local network
        v_ref = [v for v in self.local_network.get_vars()]
        self.gradients = tf.gradients(self.local_network.total_loss,
                                      v_ref,
                                      colocate_gradients_with_ops=False,
                                      gate_gradients=False,
                                      aggregation_method=None)
        # self.apply_gradients = grad_applier.apply_gradient(self.global_network.get_vars(),
        #                                                     self.gradients)

        self.apply_gradients = tf.train.RMSPropOptimizer(
            initial_learning_rate).apply_gradients(
                zip(self.gradients, self.global_network.get_vars()))

        self.sync = self.local_network.sync_from(self.global_network)

        # intiialize states
        self.episode_reward = 0
        self.done = False
        self.state = self.env.reset()

    def choose_action(self, policy):
        return np.random.choice(range(len(policy)), p=policy)

    def _anneal_learning_rate(self, global_time_step):
        learning_rate = self.initial_learning_rate * (
            self.max_global_time_step -
            global_time_step) / self.max_global_time_step
        if learning_rate < 0.0:
            learning_rate = 0.0
        return learning_rate

    def write_summary(self, summary, train_writer, global_step):
        if self.thread_index == 0 and global_step % 10 == 0:
            train_writer.add_summary(summary, global_step)

    def process(self, sess, summary_op, train_writer, score, global_step):
        states = []
        values = []
        rewards = []
        discounted_rewards = []
        actions = []

        deltas = []
        gaes = []

        # first we sync local network with global network
        sess.run(self.sync)

        initial_lstm_state = self.local_network.lstm_state_output

        if self.done:
            self.state = self.env.reset()
            self.done = False

        # now our local network is the same as global network
        for i in range(0, LOCAL_MAX_STEP):
            #self.env.render()
            policy, value = self.local_network.get_policy_value(
                sess, self.state)
            action = self.choose_action(policy)

            states.append(self.state)
            actions.append(action)

            self.state, reward, self.done = self.env.step(action)
            rewards.append(reward)

            values.append(value[0])

            self.episode_reward += reward

            if self.done:
                print('Episode reward: {}'.format(self.episode_reward))

                self.episode_reward = 0
                self.state = self.env.reset()
                self.local_network.reset_lstm_state()

                break

        R = 0.0
        gae = 0.0

        if self.done is False:
            _, R = self.local_network.get_policy_value(
                sess, self.state)  # run and get the last value
            R = R[0]
            #states.append(self.state)

        a = []
        action_batch = []
        for i in reversed(range(len(rewards))):
            R = R * gamma + rewards[i]
            #R = R - values[i] # this is temporal difference
            discounted_rewards.append(R)
            a = np.zeros(self.action_size)
            a[actions[i]] = 1

            action_batch.append(a)

            #delta = rewards[i] + gamma * values[i+1] - values[i]
            #deltas.append(delta)

            #gae = gamma * tau * gae + delta
            #gaes.append(gae)
        #gaes = np.expand_dims(gaes, 1)

        states.reverse()
        states = np.array(states).reshape(-1, 47, 47, 1)
        discounted_rewards = np.array(discounted_rewards).reshape(-1, 1)
        #rewards.reverse()

        _, summary = sess.run(
            [self.apply_gradients, summary_op],
            feed_dict={
                self.local_network.s:
                states,
                #self.local_network.rewards: rewards,
                #self.local_network.values: values,
                self.local_network.step_size: [len(states)],
                #self.local_network.deltas: deltas,
                # self.local_network.gaes: gaes,
                #self.local_network.td: td,
                self.local_network.a:
                action_batch,
                self.local_network.discounted_rewards:
                discounted_rewards,
                self.local_network.LSTMState:
                initial_lstm_state,
                score:
                self.episode_reward
            })

        self.write_summary(summary, train_writer, global_step)

        time.sleep(2)
示例#33
0
 def fn(*exprs):
     new_env = Env(outer=env, binds=ast[1], exprs=exprs)
     return EVAL(ast[2], new_env)
示例#34
0
            el = eval_ast(ast, env)
            f = el[0]
            if hasattr(f, '__ast__'):
                ast = f.__ast__
                env = f.__gen_env__(el[1:])
            else:
                return f(*el[1:])


# print
def PRINT(exp):
    return printer._pr_str(exp)


# repl
repl_env = Env()


def REP(str):
    return PRINT(EVAL(READ(str), repl_env))


# core.py: defined using python
for k, v in core.ns.items():
    repl_env.set(types._symbol(k), v)
repl_env.set(types._symbol('eval'), lambda ast: EVAL(ast, repl_env))
repl_env.set(types._symbol('*ARGV*'), types._list(*sys.argv[2:]))

# core.mal: defined using the language itself
REP("(def! *host-language* \"python\")")
REP("(def! not (fn* (a) (if a false true)))")
示例#35
0
def EVAL(ast, env):
    while True:
        #print("EVAL %s" % printer._pr_str(ast))
        if not types._list_Q(ast):
            return eval_ast(ast, env)

        # apply list
        ast = macroexpand(ast, env)
        if not types._list_Q(ast): return ast
        if len(ast) == 0: return ast
        a0 = ast[0]
        if isinstance(a0, MalSym):
            a0sym = a0.value
        else:
            a0sym = u"__<*fn*>__"

        if u"def!" == a0sym:
            a1, a2 = ast[1], ast[2]
            res = EVAL(a2, env)
            return env.set(a1, res)
        elif u"let*" == a0sym:
            a1, a2 = ast[1], ast[2]
            let_env = Env(env)
            for i in range(0, len(a1), 2):
                let_env.set(a1[i], EVAL(a1[i+1], let_env))
            ast = a2
            env = let_env # Continue loop (TCO)
        elif u"quote" == a0sym:
            return ast[1]
        elif u"quasiquote" == a0sym:
            ast = quasiquote(ast[1]) # Continue loop (TCO)
        elif u"defmacro!" == a0sym:
            func = EVAL(ast[2], env)
            func.ismacro = True
            return env.set(ast[1], func)
        elif u"macroexpand" == a0sym:
            return macroexpand(ast[1], env)
        elif u"try*" == a0sym:
            a1, a2 = ast[1], ast[2]
            a20 = a2[0]
            if isinstance(a20, MalSym):
                if a20.value == u"catch*":
                    try:
                        return EVAL(a1, env);
                    except types.MalException as exc:
                        exc = exc.object
                        catch_env = Env(env, _list(a2[1]), _list(exc))
                        return EVAL(a2[2], catch_env)
                    except Exception as exc:
                        exc = MalStr(unicode("%s" % exc))
                        catch_env = Env(env, _list(a2[1]), _list(exc))
                        return EVAL(a2[2], catch_env)
            return EVAL(a1, env);
        elif u"do" == a0sym:
            if len(ast) == 0:
                return nil
            elif len(ast) > 1:
                eval_ast(ast.slice2(1, len(ast)-1), env)
            ast = ast[-1] # Continue loop (TCO)
        elif u"if" == a0sym:
            a1, a2 = ast[1], ast[2]
            cond = EVAL(a1, env)
            if cond is nil or cond is false:
                if len(ast) > 3: ast = ast[3] # Continue loop (TCO)
                else:            return nil
            else:
                ast = a2 # Continue loop (TCO)
        elif u"fn*" == a0sym:
            a1, a2 = ast[1], ast[2]
            return MalFunc(None, a2, env, a1, EVAL)
        else:
            el = eval_ast(ast, env)
            f = el.values[0]
            if isinstance(f, MalFunc):
                if f.ast:
                    ast = f.ast
                    env = f.gen_env(el.rest()) # Continue loop (TCO) 
                else:
                    return f.apply(el.rest())
            else:
                raise Exception("%s is not callable" % f)
    model_dir = os.path.abspath(os.path.join(data_dir, "oak_model/model"))
    cont_res_dir = os.path.abspath(os.path.join(data_dir, "eval_result/cont_res_things_15"))

    scene_file_name = "7_things_3_same.json"
    graph_file_name = "7_things_3_same.pkl"
    dataset_name = "7_things_3_same.pt"

    cmd_args.graph_file_name = graph_file_name
    cmd_args.scene_file_name = scene_file_name
    cmd_args.dataset_name = dataset_name
    print(cmd_args)

    raw_path = os.path.abspath(os.path.join(data_dir, "./processed_dataset/raw"))
    scenes_path = os.path.abspath(os.path.join(raw_path, scene_file_name))
    graphs_path = os.path.join(raw_path, graph_file_name)

    graphs, scene_dataset = create_dataset(data_dir, scenes_path, graphs_path)
    embedding_layer = nn.Embedding(len(scene_dataset.attr_encoder.lookup_list), cmd_args.hidden_dim)
    gnn = GNNGL(scene_dataset, embedding_layer)

    # --- Finished Load dataset , construct decoder ---- #
    decoder = NodeDecoder()
    ref = [0, 1]

    dataloader = DataLoader(scene_dataset)
    for data_point in dataloader:
        graph = graphs[data_point.graph_id]
        env = Env(data_point, graph, config, scene_dataset.attr_encoder)
        graph_embedding = gnn(data_point)
        probs, clauses = decoder.unroll(graph_embedding, graph, ref, eps=0)
示例#37
0
def test():
    tf.reset_default_graph()
    policy_network = PolicyNetwork(scope='supervised_policy')

    f = open(relationPath)
    all_data = f.readlines()
    f.close()

    test_data = all_data
    test_num = len(test_data)

    success = 0

    saver = tf.train.Saver()
    path_found = []
    path_relation_found = []
    path_set = set()

    with tf.Session() as sess:
        saver.restore(sess, 'models/policy_retrained' + relation)
        print('Model reloaded')

        if test_num > 500:
            test_num = 500

        for episode in xrange(test_num):
            print('Test sample %d: %s' % (episode, test_data[episode][:-1]))
            env = Env(dataPath, test_data[episode])
            sample = test_data[episode].split()
            state_idx = [env.entity2id_[sample[0]], env.entity2id_[sample[1]], 0]

            transitions = []

            for t in count():
                state_vec = env.idx_state(state_idx)
                action_probs = policy_network.predict(state_vec)

                action_probs = np.squeeze(action_probs)

                action_chosen = np.random.choice(np.arange(action_space), p=action_probs)
                reward, new_state, done = env.interact(state_idx, action_chosen)
                new_state_vec = env.idx_state(new_state)
                transitions.append(
                    Transition(state=state_vec, action=action_chosen, next_state=new_state_vec, reward=reward))

                if done or t == max_steps_test:
                    if done:
                        success += 1
                        print("Success\n")
                        path = path_clean(' -> '.join(env.path))
                        path_found.append(path)
                    else:
                        print('Episode ends due to step limit\n')
                    break
                state_idx = new_state

            if done:
                if len(path_set) != 0:
                    path_found_embedding = [env.path_embedding(path.split(' -> ')) for path in path_set]
                    curr_path_embedding = env.path_embedding(env.path_relations)
                    path_found_embedding = np.reshape(path_found_embedding, (-1, embedding_dim))
                    cos_sim = cosine_similarity(path_found_embedding, curr_path_embedding)
                    diverse_reward = -np.mean(cos_sim)
                    print('diverse_reward', diverse_reward)
                    # total_reward = 0.1*global_reward + 0.8*length_reward + 0.1*diverse_reward
                    state_batch = []
                    action_batch = []
                    for t, transition in enumerate(transitions):
                        if transition.reward == 0:
                            state_batch.append(transition.state)
                            action_batch.append(transition.action)
                    policy_network.update(np.reshape(state_batch, (-1, state_dim)), 0.1 * diverse_reward, action_batch)
                path_set.add(' -> '.join(env.path_relations))

    for path in path_found:
        rel_ent = path.split(' -> ')
        path_relation = []
        for idx, item in enumerate(rel_ent):
            if idx % 2 == 0:
                path_relation.append(item)
        path_relation_found.append(' -> '.join(path_relation))

    # path_stats = collections.Counter(path_found).items()
    relation_path_stats = collections.Counter(path_relation_found).items()
    relation_path_stats = sorted(relation_path_stats, key=lambda x: x[1], reverse=True)

    ranking_path = []
    for item in relation_path_stats:
        path = item[0]
        length = len(path.split(' -> '))
        ranking_path.append((path, length))

    ranking_path = sorted(ranking_path, key=lambda x: x[1])
    print('Success persentage:', success / test_num)

    f = open(dataPath + 'tasks/' + relation + '/' + 'path_to_use.txt', 'w')
    for item in ranking_path:
        f.write(item[0] + '\n')
    f.close()
    print('path to use saved')
    return
示例#38
0
            return pickle.load(pickle_file)
    else:
        with bz2.open(memory_path, 'rb') as zipped_pickle_file:
            return pickle.load(zipped_pickle_file)


def save_memory(memory, memory_path, disable_bzip):
    if disable_bzip:
        with open(memory_path, 'wb') as pickle_file:
            pickle.dump(memory, pickle_file)
    else:
        with bz2.open(memory_path, 'wb') as zipped_pickle_file:
            pickle.dump(memory, zipped_pickle_file)

# Environment
env = Env(args)
env.train()
action_space = env.action_space()

# Agent
dqn_list = []
for _ in range(args.num_ensemble):
    dqn = Agent(args, env)
    dqn_list.append(dqn)

# If a model is provided, and evaluate is fale, presumably we want to resume, so try to load memory
if args.model is not None and not args.evaluate:
    if not args.memory:
        raise ValueError('Cannot resume training without memory save path. Aborting...')
    elif not os.path.exists(args.memory):
        raise ValueError('Could not find memory file at {path}. Aborting...'.format(path=args.memory))
示例#39
0
# modules
import datetime

# Packages
from flask import Flask, jsonify, request
import pytz

# Files
from env import Env

env = Env('Api')

# __name__ is '__main_' if run as the main program
# else __name__ will be the file name
app = Flask(__name__)


@app.route('/', methods=['GET'])
def landing():
    return jsonify({'success': True})


@app.route('/show/<name>', methods=['GET'])
def show(name):
    return jsonify({'machine': name})


# It's as if the interpreter inserts this at the top
# of your module when run as the main program.
if __name__ == '__main__':
    print(
示例#40
0
文件: training.py 项目: xamm/DRL_HFV
EMBED_TYPE = 'conv1d'
LOG_INTERVAL = 200

#------------------------SET LOGS WRITER--------------------------

time_id = datetime.now().strftime("%d_%m_%Y")
filename = "experiment1"
log_dir = os.path.join('tensorboardLogs', filename)
writer = SummaryWriter(log_dir=log_dir)

#----------------INITIALIZE ENVIROMENT AND POLICIES----------------

env = Env(seed=SEED,
          batch_size=BATCH_SIZE,
          capacity=CAPACITY,
          n_nodes=N_NODES,
          n_depot=N_DEPOT,
          max_demand=MAX_DEMAND,
          n_agents=N_VEHICLES)

env_test = Env(seed=SEED + 2,
               batch_size=BATCH_SIZE,
               capacity=CAPACITY,
               n_nodes=N_NODES,
               n_depot=N_DEPOT,
               max_demand=MAX_DEMAND,
               n_agents=N_VEHICLES)

policy = [
    PolicyNet(batch_size=BATCH_SIZE,
              n_nodes=N_NODES,
示例#41
0
                if (sa_value >= best_sa_value):
                    pi[s] = action
                    best_sa_value = sa_value


def count_rewards():
    rewards_sum = 0
    for m in memories:
        rewards_sum += m['reward']
    return rewards_sum
    # print(rewards_sum)


gamma = 1
epsilon = 0.1
env = Env(10)
wins = 0
loses = 0
state_space = [(x, y, z) for x in range(52) for y in range(52)
               for z in range(52)]
action_space = env.action_space
Q = init_Q(state_space, action_space)
returns = init_returns()
pi = init_pi()
T = 10
best_score = 0
epochs = range(20)
for e in epochs:
    memories = epoch()
    update_returns()
    update_Q()
示例#42
0
def mal_eval(ast, environ):
    while True:
        ast = macroexpand(ast, environ)
        if not isinstance(ast, list):
            return eval_ast(ast, environ)
        elif len(ast) == 0:
            return ast
        else:
            if isinstance(ast[0], Symbol):
                if ast[0].getVal() == 'def!':
                    environ.set(ast[1].getVal(), mal_eval(ast[2], environ))
                    return environ.get(ast[1].getVal())
                elif ast[0].getVal() == 'quote':
                    return ast[1]
                elif ast[0].getVal() == 'quasiquote':
                    ast = quasiquote(ast[1])
                    continue
                elif ast[0].getVal() == 'macroexpand':
                    return macroexpand(ast[1], environ)
                elif ast[0].getVal() == 'let*':
                    e = Env(environ)
                    update_env(e, ast[1])
                    environ = e
                    ast = ast[2]
                    continue
                elif ast[0].getVal() == 'if':
                    b = mal_eval(ast[1], environ)
                    if b != SpecialToken.NIL and b is not False:
                        ast = ast[2]
                    else:
                        if len(ast) < 4:
                            ast = SpecialToken.NIL
                        else:
                            ast = ast[3]
                    continue
                elif ast[0].getVal() == 'do':
                    for i in ast[1:len(ast) - 1]:
                        mal_eval(i, environ)
                    ast = ast[len(ast) - 1]
                    continue
                elif ast[0].getVal() == 'fn*':
                    return Function(
                        ast[2], ast[1].getVal(), environ, lambda *x: mal_eval(
                            ast[2], Env(environ, ast[1].getVal(), x)))
                elif ast[0].getVal() == 'defn':
                    f = Function(
                        ast[3], ast[2].getVal(), environ, lambda *x: mal_eval(
                            ast[3], Env(environ, ast[2].getVal(), x)))
                    environ.set(ast[1].getVal(), f)
                    return f
                elif ast[0].getVal() == 'defmacro!':
                    f = Function(
                        ast[3], ast[2].getVal(), environ, lambda *x: mal_eval(
                            ast[3], Env(environ, ast[2].getVal(), x[0])), True)
                    environ.set(ast[1].getVal(), f)
                    return f

            eval_list = eval_ast(ast, environ)
            if isinstance(eval_list[0], Function):
                ast = eval_list[0].get_ast_body()
                environ = Env(eval_list[0].get_env(),
                              eval_list[0].get_params(), eval_list[1:])
                continue
            else:
                fn = eval_list[0]
                args = eval_list[1:]
                return fn(*args)
示例#43
0
def main(unused_argv):
    '''
    check path
    '''
    if FLAGS.data_dir == '' or not os.path.exists(FLAGS.data_dir):
        raise ValueError('invalid data directory {}'.format(FLAGS.data_dir))

    if FLAGS.output_dir == '':
        raise ValueError('invalid output directory {}'.format(
            FLAGS.output_dir))
    elif not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    event_log_dir = os.path.join(FLAGS.output_dir, '')
    checkpoint_path = os.path.join(FLAGS.output_dir, 'model.ckpt')
    '''
    setup summaries
    '''
    summ = Summaries()
    '''
    setup the game environment
    '''

    filenames_train = glob.glob(
        os.path.join(FLAGS.data_dir, 'train-{}'.format(FLAGS.sampling_rate),
                     '*.mat'))
    filenames_val = glob.glob(
        os.path.join(FLAGS.data_dir, 'val-{}'.format(FLAGS.sampling_rate),
                     '*.mat'))

    game_env_train = Env(decay=FLAGS.decay)
    game_env_val = Env(decay=FLAGS.decay)

    game_actions = list(game_env_train.actions.keys())
    '''
    setup the transition table for experience replay
    '''

    stateDim = [FLAGS.num_chans, FLAGS.num_points]

    transition_args = {
        'batchSize': FLAGS.batch_size,
        'stateDim': stateDim,
        'numActions': len(game_actions),
        'maxSize': FLAGS.replay_memory,
    }

    transitions = TransitionMemory(transition_args)
    '''
    setup agent
    '''
    s_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size] + stateDim,
                                   's_placeholder')
    s2_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size] + stateDim,
                                    's2_placeholder')
    a_placeholder = tf.placeholder(tf.int32, [FLAGS.batch_size],
                                   'a_placeholder')
    r_placeholder = tf.placeholder(tf.float32, [FLAGS.batch_size],
                                   'r_placeholder')

    pcont_t = tf.constant(FLAGS.discount, tf.float32, [FLAGS.batch_size])

    network = Model(FLAGS.batch_size, len(game_actions), FLAGS.num_chans, FLAGS.sampling_rate, \
                    FLAGS.num_filters, FLAGS.num_recurs, FLAGS.pooling_stride, name = "network")

    target_network = Model(FLAGS.batch_size, len(game_actions), FLAGS.num_chans, FLAGS.sampling_rate,\
                           FLAGS.num_filters, FLAGS.num_recurs, FLAGS.pooling_stride, name = "target_n")

    q = network(s_placeholder)
    q2 = target_network(s2_placeholder)
    q_selector = network(s2_placeholder)

    loss, q_learning = trfl.double_qlearning(q, a_placeholder, r_placeholder,
                                             pcont_t, q2, q_selector)
    synchronizer = Synchronizer(network, target_network)
    sychronize_ops = synchronizer()

    training_variables = network.variables

    opt = Adam(FLAGS.learning_rate,
               lr_decay=FLAGS.lr_decay,
               lr_decay_steps=FLAGS.lr_decay_steps,
               lr_decay_factor=FLAGS.lr_decay_factor,
               clip=True)

    reduced_loss = tf.reduce_mean(loss)

    graph_regularizers = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    total_regularization_loss = tf.reduce_sum(graph_regularizers)

    total_loss = reduced_loss + total_regularization_loss

    update_op = opt(total_loss, var_list=training_variables)

    summ_loss_op = tf.summary.scalar('loss', total_loss)

    state_placeholder = tf.placeholder(tf.float32, [1] + stateDim,
                                       'state_placeholder')
    decayed_ep_placeholder = tf.placeholder(tf.float32, [],
                                            'decayed_ep_placeholder')

    action_tensor_egreedy = eGreedy(state_placeholder, network,
                                    len(game_actions), decayed_ep_placeholder,
                                    FLAGS.debug)

    action_tensor_greedy = greedy(state_placeholder, network)
    '''
    setup the training process
    '''
    episode_reward_placeholder = tf.placeholder(tf.float32, [],
                                                "episode_reward_placeholder")
    average_reward_placeholder = tf.placeholder(tf.float32, [],
                                                "average_reward_placeholder")

    summ.register('train', 'episode_reward_train', episode_reward_placeholder)
    summ.register('train', 'average_reward_train', average_reward_placeholder)

    summ.register('val', 'episode_reward_val', episode_reward_placeholder)
    summ.register('val', 'average_reward_val', average_reward_placeholder)

    total_reward_train = 0
    average_reward_train = 0

    total_reward_val = 0
    average_reward_val = 0
    '''
    gathering summary operators
    '''
    train_summ_op = summ('train')
    val_summ_op = summ('val')
    '''
    setup the training process
    '''
    transitions.empty()
    # print("game_actions -> {}".format(game_actions))

    writer = tf.summary.FileWriter(event_log_dir, tf.get_default_graph())

    saver = tf.train.Saver(training_variables)

    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=False)

    assert (FLAGS.gpus != ''), 'invalid GPU specification'
    config.gpu_options.visible_device_list = FLAGS.gpus

    with tf.Session(config=config) as sess:
        sess.run([
            tf.global_variables_initializer(),
            tf.local_variables_initializer()
        ])

        val_step = 0

        for step in range(FLAGS.steps):
            print("Iteration: {}".format(step))

            game_env_train.reset(filenames_train[np.random.randint(
                0, len(filenames_train))])

            last_state = None
            last_state_assigned = False
            episode_reward = 0
            action_index = (len(game_actions) >> 2)

            for estep in range(FLAGS.eval_steps):
                # print("Evaluation step: {}".format(estep))

                # print("{} - measured RT: {}".format(estep, game_env_train.measured_rt))
                # print("{} - predicted RT: {}".format(estep, game_env_train.predicted_rt))
                # print("{} - action -> {}".format(estep, game_actions[action]))

                state, reward, terminal = game_env_train.step(
                    game_actions[action_index])

                # game over?
                if terminal:
                    break

                episode_reward += reward

                # Store transition s, a, r, t
                # if last_state_assigned and reward:
                if last_state_assigned:
                    # print("reward -> {}".format(reward))
                    # print("action -> {}".format(game_actions[last_action]))
                    transitions.add(last_state, last_action, reward,
                                    last_terminal)

                # Select action
                # decayed_ep = FLAGS.testing_ep

                decayed_ep = max(0.1,
                                 (FLAGS.steps - step) / FLAGS.steps * FLAGS.ep)

                if not terminal:
                    action_index = sess.run(action_tensor_egreedy,
                                            feed_dict={
                                                state_placeholder:
                                                np.expand_dims(state, axis=0),
                                                decayed_ep_placeholder:
                                                decayed_ep
                                            })
                else:
                    action_index = 0

                # Do some Q-learning updates
                if estep > FLAGS.learn_start and estep % FLAGS.update_freq == 0:
                    summ_str = None
                    for _ in range(FLAGS.n_replay):
                        if transitions.size > FLAGS.batch_size:
                            s, a, r, s2 = transitions.sample()

                            summ_str, _ = sess.run(
                                [summ_loss_op, update_op],
                                feed_dict={
                                    s_placeholder: s,
                                    a_placeholder: a,
                                    r_placeholder: r,
                                    s2_placeholder: s2
                                })

                    if summ_str:
                        writer.add_summary(summ_str,
                                           step * FLAGS.eval_steps + estep)

                last_state = state
                last_state_assigned = True

                last_action = action_index
                last_terminal = terminal

                if estep > FLAGS.learn_start and estep % FLAGS.target_q == 0:
                    # print("duplicate model parameters")
                    sess.run(sychronize_ops)

            total_reward_train += episode_reward
            average_reward_train = total_reward_train / (step + 1)

            train_summ_str = sess.run(train_summ_op,
                                      feed_dict={
                                          episode_reward_placeholder:
                                          episode_reward,
                                          average_reward_placeholder:
                                          average_reward_train
                                      })
            writer.add_summary(train_summ_str, step)

            if FLAGS.validation and step % FLAGS.validation_interval == 0:
                game_env_val.reset(filenames_val[0])

                episode_reward = 0
                count = 0
                action_index = (len(game_actions) >> 2)

                while True:
                    # print("Evaluation step: {}".format(count))
                    # print("action -> {}".format(game_actions[action_index]))

                    state, reward, terminal = game_env_val.step(
                        game_actions[action_index])

                    # game over?
                    if terminal:
                        break

                    episode_reward += reward

                    if not terminal:
                        action_index = sess.run(action_tensor_greedy,
                                                feed_dict={
                                                    state_placeholder:
                                                    np.expand_dims(state,
                                                                   axis=0)
                                                })
                        action_index = np.squeeze(action_index)

                    # print('state -> {}'.format(state))
                    # print('action_index -> {}'.format(action_index))

                    else:
                        action_index = 0

                    count += 1

                total_reward_val += episode_reward
                average_reward_val = total_reward_val / (val_step + 1)
                val_step += 1

                val_summ_str = sess.run(val_summ_op,
                                        feed_dict={
                                            episode_reward_placeholder:
                                            episode_reward,
                                            average_reward_placeholder:
                                            average_reward_val
                                        })
                writer.add_summary(val_summ_str, step)

        tf.logging.info('Saving model.')
        saver.save(sess, checkpoint_path)
        tf.logging.info('Training complete')

    writer.close()
示例#44
0
            # RL take action and get next observation and reward
            s_, reward, done = E.step(action)
            print(action, reward)
            # RL learn from this transition
            RL.learn(s, action, reward, s_)
            # swap observation
            s = s_
            # break while loop when end of this episode
            if done:
                #RL.epsilon += 0.001
                break
        if episode %10 == 0:
            RL.dump_model = copy.copy(RL.model)
            
        
    E = Env()
    print("---------------test---------------")
    RL.m.bias_noisy = False
    RL.m.weight_noisy = False
    for i in range(E.final_step):
        q_table = RL.model.predict([i])
        E.step(np.argmax(q_table))
        print(np.argmax(q_table))
    print(E.score)

if __name__ == "__main__":
    env = Env()
    RL = NoisyQ(actions=list(range(env.n_actions)))
    update()
    
    
示例#45
0
def REINFORCE(training_pairs, policy_nn, num_episodes):
    train = training_pairs

    success = 0

    # path_found = set()
    path_found_entity = []
    path_relation_found = []

    for i_episode in range(num_episodes):
        start = time.time()
        print('Episode %d' % i_episode)
        print('Training sample: ', train[i_episode][:-1])

        env = Env(dataPath, train[i_episode])

        sample = train[i_episode].split()
        state_idx = [env.entity2id_[sample[0]], env.entity2id_[sample[1]], 0]

        episode = []
        state_batch_negative = []
        action_batch_negative = []
        for t in count():
            state_vec = env.idx_state(state_idx)
            action_probs = policy_nn.predict(state_vec)
            action_chosen = np.random.choice(np.arange(action_space), p=np.squeeze(action_probs))
            reward, new_state, done = env.interact(state_idx, action_chosen)

            if reward == -1:  # the action fails for this step
                state_batch_negative.append(state_vec)
                action_batch_negative.append(action_chosen)

            new_state_vec = env.idx_state(new_state)
            episode.append(Transition(state=state_vec, action=action_chosen, next_state=new_state_vec, reward=reward))

            if done or t == max_steps:
                break

            state_idx = new_state

        # Discourage the agent when it choose an invalid step
        if len(state_batch_negative) != 0:
            print('Penalty to invalid steps:', len(state_batch_negative))
            policy_nn.update(np.reshape(state_batch_negative, (-1, state_dim)), -0.05, action_batch_negative)

        print('----- FINAL PATH -----')
        print('\t'.join(env.path))
        print('PATH LENGTH', len(env.path))
        print('----- FINAL PATH -----')

        # If the agent success, do one optimization
        if done == 1:
            print('Success')

            path_found_entity.append(path_clean(' -> '.join(env.path)))

            success += 1
            path_length = len(env.path)
            length_reward = 1 / path_length
            global_reward = 1

            # if len(path_found) != 0:
            # 	path_found_embedding = [env.path_embedding(path.split(' -> ')) for path in path_found]
            # 	curr_path_embedding = env.path_embedding(env.path_relations)
            # 	path_found_embedding = np.reshape(path_found_embedding, (-1,embedding_dim))
            # 	cos_sim = cosine_similarity(path_found_embedding, curr_path_embedding)
            # 	diverse_reward = -np.mean(cos_sim)
            # 	print 'diverse_reward', diverse_reward
            # 	total_reward = 0.1*global_reward + 0.8*length_reward + 0.1*diverse_reward
            # else:
            # 	total_reward = 0.1*global_reward + 0.9*length_reward
            # path_found.add(' -> '.join(env.path_relations))

            total_reward = 0.1 * global_reward + 0.9 * length_reward
            state_batch = []
            action_batch = []
            for t, transition in enumerate(episode):
                if transition.reward == 0:
                    state_batch.append(transition.state)
                    action_batch.append(transition.action)
            policy_nn.update(np.reshape(state_batch, (-1, state_dim)), total_reward, action_batch)
        else:
            global_reward = -0.05
            # length_reward = 1/len(env.path)

            state_batch = []
            action_batch = []
            total_reward = global_reward
            for t, transition in enumerate(episode):
                if transition.reward == 0:
                    state_batch.append(transition.state)
                    action_batch.append(transition.action)
            policy_nn.update(np.reshape(state_batch, (-1, state_dim)), total_reward, action_batch)

            print('Failed, Do one teacher guideline')
            try:
                good_episodes = teacher(sample[0], sample[1], 1, env, graphpath)
                for item in good_episodes:
                    teacher_state_batch = []
                    teacher_action_batch = []
                    total_reward = 0.0 * 1 + 1 * 1 / len(item)
                    for t, transition in enumerate(item):
                        teacher_state_batch.append(transition.state)
                        teacher_action_batch.append(transition.action)
                    policy_nn.update(np.squeeze(teacher_state_batch), 1, teacher_action_batch)

            except Exception as e:
                print('Teacher guideline failed')
        print('Episode time: ', time.time() - start)
        print('\n')
    print('Success percentage:', success / num_episodes)

    for path in path_found_entity:
        rel_ent = path.split(' -> ')
        path_relation = []
        for idx, item in enumerate(rel_ent):
            if idx % 2 == 0:
                path_relation.append(item)
        path_relation_found.append(' -> '.join(path_relation))

    relation_path_stats = collections.Counter(path_relation_found).items()
    relation_path_stats = sorted(relation_path_stats, key=lambda x: x[1], reverse=True)

    f = open(dataPath + 'tasks/' + relation + '/' + 'path_stats.txt', 'w')
    for item in relation_path_stats:
        f.write(item[0] + '\t' + str(item[1]) + '\n')
    f.close()
    print('Path stats saved')

    return
示例#46
0
 def test_step8_is_macro(self):
     self.assertEqual(False, MalFunctionCompiled(lambda a: MalInt(1)).is_macro())
     self.assertEqual(
         False,
         MalFunctionRaw(core.ns["+"], MalInt(1), MalList([]), Env(None)).is_macro(),
     )
示例#47
0
def EVAL(ast, env):
    while True:
        #print("EVAL %s" % printer._pr_str(ast))
        if not types._list_Q(ast):
            return eval_ast(ast, env)

        # apply list
        ast = macroexpand(ast, env)
        if not types._list_Q(ast):
            return eval_ast(ast, env)
        if len(ast) == 0: return ast
        a0 = ast[0]

        if "def!" == a0:
            a1, a2 = ast[1], ast[2]
            res = EVAL(a2, env)
            return env.set(a1, res)
        elif "let*" == a0:
            a1, a2 = ast[1], ast[2]
            let_env = Env(env)
            for i in range(0, len(a1), 2):
                let_env.set(a1[i], EVAL(a1[i + 1], let_env))
            ast = a2
            env = let_env
            # Continue loop (TCO)
        elif "quote" == a0:
            return ast[1]
        elif "quasiquote" == a0:
            ast = quasiquote(ast[1])
            # Continue loop (TCO)
        elif 'defmacro!' == a0:
            func = EVAL(ast[2], env)
            func._ismacro_ = True
            return env.set(ast[1], func)
        elif 'macroexpand' == a0:
            return macroexpand(ast[1], env)
        elif "py!*" == a0:
            exec(compile(ast[1], '', 'single'), globals())
            return None
        elif "py*" == a0:
            return types.py_to_mal(eval(ast[1]))
        elif "." == a0:
            el = eval_ast(ast[2:], env)
            f = eval(ast[1])
            return f(*el)
        elif "try*" == a0:
            if len(ast) < 3:
                return EVAL(ast[1], env)
            a1, a2 = ast[1], ast[2]
            if a2[0] == "catch*":
                err = None
                try:
                    return EVAL(a1, env)
                except types.MalException as exc:
                    err = exc.object
                except Exception as exc:
                    err = exc.args[0]
                catch_env = Env(env, [a2[1]], [err])
                return EVAL(a2[2], catch_env)
            else:
                return EVAL(a1, env)
        elif "do" == a0:
            eval_ast(ast[1:-1], env)
            ast = ast[-1]
            # Continue loop (TCO)
        elif "if" == a0:
            a1, a2 = ast[1], ast[2]
            cond = EVAL(a1, env)
            if cond is None or cond is False:
                if len(ast) > 3: ast = ast[3]
                else: ast = None
            else:
                ast = a2
            # Continue loop (TCO)
        elif "fn*" == a0:
            a1, a2 = ast[1], ast[2]
            return types._function(EVAL, Env, a2, env, a1)
        else:
            el = eval_ast(ast, env)
            f = el[0]
            if hasattr(f, '__ast__'):
                ast = f.__ast__
                env = f.__gen_env__(el[1:])
            else:
                return f(*el[1:])
示例#48
0
def test(model, a2c, config, args, **kwargs):
    env = Env(config.move_range)
    env.set_param(**kwargs)

    test_loader = torch.utils.data.DataLoader(dataset=MRIDataset(
        root=args.root, image_set='test', transform=False),
                                              batch_size=config.batch_size,
                                              shuffle=False,
                                              num_workers=config.workers,
                                              pin_memory=False)

    start = time.time()
    reward_sum = 0
    PSNR_list = []
    SSIM_list = []
    for i, (ori_image, image) in enumerate(test_loader):
        ori_image = ori_image.numpy()
        image = image.numpy()
        previous_image = image.copy()
        env.reset(ori_image=ori_image, image=image)

        for j in range(config.episode_len):
            image_input = Variable(torch.from_numpy(image).cuda(),
                                   volatile=True)
            pout, vout = model(image_input)
            actions = a2c.act(pout, deterministic=True)
            image, reward = env.step(actions)
            image = np.clip(image, 0, 1)

            reward_sum += np.mean(reward)

        for ii in range(image.shape[0]):
            PSNR_list.append(
                computePSNR(ori_image[ii, 0], previous_image[ii, 0], image[ii,
                                                                           0]))
            SSIM_list.append(
                computeSSIM(ori_image[ii, 0], previous_image[ii, 0], image[ii,
                                                                           0]))

        if i == 100:
            i += 1
            actions = actions.astype(np.uint8)
            total = actions.size
            a0 = actions[0]
            B = image[0, 0].copy()
            for a in range(config.num_actions):
                print(a, 'actions', np.sum(actions == a) / total)
                A = np.zeros((*B.shape, 3))
                #print(A, B)
                A[..., 0] += B * 255
                A[..., 1] += B * 255
                A[..., 2] += B * 255
                A[a0 == a, 0] += 250
                cv2.imwrite('actions/' + str(a) + '.jpg', A)

            break

    psnr_res = np.mean(np.array(PSNR_list), axis=0)
    ssim_res = np.mean(np.array(SSIM_list), axis=0)

    print('PSNR', psnr_res)
    print('SSIM', ssim_res)

    avg_reward = reward_sum / i
    print('test finished: reward ', avg_reward)

    return avg_reward, psnr_res, ssim_res
示例#49
0
device = "gpu"

dt = 0.25
seed = 4
#alpha=0.25

net_size = 50
epochs = 10000
bptt_steps = seq_size = 50
le_size = 10
lrate = 0.0001
decay_rate = 1.0 #0.999

forecast_step = 1

env = Env("piano")



source_data_file_list = []

for f in sorted(os.listdir(env.dataset())):
    if f.endswith("sparse_acoustic_data.dump"):
        print "Considering {} as input".format(f)
        source_data_file_list.append(env.dataset(f))


data_file_list = source_data_file_list[:]

max_t, input_size = 0, None
示例#50
0
def train_filter(model, a2c):
    args = parse()
    config = Config('filter_config.yml')

    torch.backends.cudnn.benchmark = True

    #log_dir = os.path.expanduser(args.log_dir)

    env = Env(config.move_range, reward_method=config.reward_method)
    #model = MyFcn(num_actions=config.num_actions)
    #model = torch.nn.DataParallel(model, device_ids=args.gpu).cuda()
    #a2c = PixelWiseA2C(model=None, optimizer=None, t_max=100000, gamma=config.gamma, beta=1e-2)
    filter_model = FilterModel()
    filter_model = filter_model.cuda()
    optimizer = torch.optim.SGD(filter_model.parameters(),
                                config.base_lr,
                                momentum=0)

    train_loader = torch.utils.data.DataLoader(dataset=MRIDataset(
        root=args.root, image_set='train', transform=True),
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=False)

    writer = SummaryWriter('./filter_logs')

    #for lp in [0, 0.01, 0.02, 0.08, 0.09, 0.095, 0.1, 0.105, 0.11]:
    #    print('lp', lp)
    #    avg_reward, psnr_res, ssim_res = test(model, a2c, config, args, laplace_param=lp)

    for sobel_v1 in [0, 0.01, 0.02, 0.08, 0.09, 0.095, 0.1, 0.105, 0.11]:
        print('sobel_v1', sobel_v1)
        avg_reward, psnr_res, ssim_res = test(model,
                                              a2c,
                                              config,
                                              args,
                                              sobel_v1_param=sobel_v1)

    episodes = 0
    while episodes < config.num_episodes:

        for i, (ori_image, image) in enumerate(train_loader):
            learning_rate = adjust_learning_rate(
                optimizer,
                episodes,
                config.base_lr,
                policy=config.lr_policy,
                policy_parameter=config.policy_parameter)
            ori_image_input = Variable(ori_image).cuda()
            ori_image = ori_image.numpy()
            image = image.numpy()
            env.reset(ori_image=ori_image, image=image)

            reward = np.zeros((1))
            loss = Variable(torch.zeros(1)).cuda()

            for j in range(config.episode_len):
                image_input = Variable(torch.from_numpy(image).cuda(),
                                       volatile=True)
                #reward_input = Variable(torch.from_numpy(reward).cuda())
                pout, vout = model(image_input)
                actions = a2c.act(pout, deterministic=True)
                #print(actions)
                mask_laplace = (actions == 6)[:, np.newaxis]
                action_mask = Variable(
                    torch.from_numpy(mask_laplace.astype(np.float32))).cuda()
                print(action_mask.mean())
                xxx
                image_input = Variable(torch.from_numpy(image).cuda())
                output_laplace = filter_model(image_input)
                ll = torch.abs(ori_image_input - output_laplace) * action_mask
                #print(ll.shape)
                loss += ll.mean()
                previous_image = image
                image, reward = env.step(actions)
                #print(ori_image_input.shape, action_mask.shape, actions.shape, output_laplace.shape)

                if i % 40 == 0:
                    print('reward', j, np.mean(reward))
                    print(
                        computeSSIM(ori_image[0, 0], previous_image[0, 0],
                                    image[0, 0]))
                    print('diff', (
                        torch.abs(ori_image_input.data -
                                  torch.from_numpy(image).cuda()) -
                        torch.abs(ori_image_input.data - output_laplace.data) *
                        action_mask.data).mean())
                image = np.where(mask_laplace,
                                 output_laplace.cpu().data.numpy(), image)
                image = np.clip(image, 0, 1)

            #loss = a2c.stop_episode_and_compute_loss(reward=Variable(torch.from_numpy(reward).cuda()), done=True) / config.iter_size
            loss.backward()

            if not (episodes % config.iter_size):
                optimizer.step()
                optimizer.zero_grad()
                lw = float(filter_model.state_dict()
                           ['conv_laplace.weight'].cpu().numpy())
                print('loss:', ll.mean(), 'weight:', lw)
                writer.add_scalar('weight', lw, episodes)

            episodes += 1
            if episodes % config.display == 0:
                print('episode: ', episodes, 'loss: ', loss.data)

            if not (episodes % config.save_episodes):
                #torch.save(model.module.state_dict(), 'model/' + str(episodes) + '.pth')
                print('model saved')

            if not (episodes % config.test_episodes):
                avg_reward, psnr_res, ssim_res = test(model, a2c, config, args)
                #writer.add_scalar('psnr_ref', psnr_res[0], episodes)
                #writer.add_scalar('psnr', psnr_res[1], episodes)
                #writer.add_scalar('ssim_ref', ssim_res[0], episodes)
                #writer.add_scalar('ssim', ssim_res[1], episodes)

            if episodes == config.num_episodes:
                writer.close()
                break
示例#51
0
def sarsa():
    grid_size = 4
    env = Env(grid_size)
    policy = EspionGreedyPolicy(env.actions(), range(grid_size**2))
    Q = defaultdict(float)
    for i in range(5000):
        s0 = env.init()
        if env.is_t(s0):
            continue
        a0 = policy.get_a(s0)
        while not env.is_t(s0):
            s, r = env.step(a0)
            a = policy.get_a(s)
            Q[(s0, a0)] += 0.9 * (r + 0.9 * Q[(s, a)] - Q[(s0, a0)])
            s0 = s
            a0 = a
            mm = [(x, Q[(s0, x)]) for x in env.actions()]
            action = max(mm, key=lambda x:x[1])[0]
            policy.set_max(s0, action)

    Pi = {}
    for i in range(grid_size**2):
        Pi[i] = policy.get_m(i)
    for t in env.get_t():
        Pi[t] = 'ter'

    env.render(Pi)
示例#52
0
class MulControl:
    def __init__(self):
        # 环境初始化
        self.global_arg = arg.init_global_arg()
        env_arg = arg.init_env_arg(self.global_arg)
        # 增加nk的一个读入操作
        self.main_env = Env(env_arg)
        for model_type in ['st', 'ed']:
            if all_config['checkpoint']['env'][model_type]['enable']:
                self.main_env.nkmodel_load(all_config['checkpoint']['env']['path'], model_type)
            self.main_env.nkmodel_save(all_config["nkmodel_path"][model_type], model_type)
        # 个体初始化
        self.agents = []
        csv_head_agent = ['agent_no'] + ['st_state'] + ['st_value'] + ['insight'] + ['xplr'] + ['xplt'] + ['enable']
        moniter.AppendToCsv(csv_head_agent, all_config['agent_csv_path'])
        for i in range(self.global_arg["Nagent"]):
            # 个体随机初始位置
            start_st_label = [randint(0, self.main_env.P - 1) for j in range(self.main_env.N)]
            state_start = State(start_st_label)
            self.agents.append(Agent(arg.init_agent_arg(self.global_arg,
                                                        self.main_env.arg),
                                     self.main_env))
            self.agents[i].state_now = deepcopy(state_start)
            self.agents[i].agent_id = i

            # 去除了一开始给一个全局area,改为添加一个包含起点的点area
            start_area = Area(self.agents[i].state_now, [False] * self.main_env.N, 0)
            start_area.info = get_area_sample_distr(env=self.main_env, area=start_area, state=self.agents[i].state_now,
                                                    T_stmp=0, sample_num=1, dfs_r=1)
            start_area.sign = Sign(i, 0, 'start')
            self.agents[i].renew_m_info(start_area, 0)
            self.a_plan = None
            logging.info("state:%s, st_value:%s,insight:%.5s ,xplr:%.5s, xplt:%.5s, enable:%.5s" % (
                str(self.agents[i].state_now),
                self.main_env.getValue(self.agents[i].state_now, 0),
                self.agents[i].agent_arg['a']['insight'],
                self.agents[i].agent_arg['a']['xplr'],
                self.agents[i].agent_arg['a']['xplt'],
                self.agents[i].agent_arg['a']['enable']))
            # 记录agent信息
            csv_info_agent = ['agent%d' % i] \
                             + [self.agents[i].state_now] \
                             + [self.main_env.getValue(self.agents[i].state_now, 0)] \
                             + [self.agents[i].agent_arg['a']['insight']] \
                             + [self.agents[i].agent_arg['a']['xplr']] \
                             + [self.agents[i].agent_arg['a']['xplt']] \
                             + [self.agents[i].agent_arg['a']['enable']]
            moniter.AppendToCsv(csv_info_agent, all_config['agent_csv_path'])

        # 社会网络初始化
        soclnet_arg = arg.init_soclnet_arg(self.global_arg, env_arg)
        self.socl_net = SoclNet(soclnet_arg)
        self.socl_net.new_flat_init()  # 修改初始化方法
        # self.socl_net.flat_init()
        if all_config['checkpoint']['socl_network']['enable']:
            self.socl_net.power_load(all_config['checkpoint']['socl_network']['power'])
            self.socl_net.relat_load(all_config['checkpoint']['socl_network']['relat'])
        self.record = Record()

        self.metric = metrics.register_all_metrics(metrics.Metrics())

    def run_meet_frame(self, Ti, Tfi, meet_name, member, host, up_info):
        # 根据m_name开会
        logging.debug("m_name:%s, member:%s, host:%s" % (meet_name, member, host))
        self.agents, self.socl_net = meeting.meet_map[meet_name](env=self.main_env,
                                                                 agents=self.agents,
                                                                 member=member,
                                                                 host=host,
                                                                 socl_net=self.socl_net,
                                                                 record=self.record,
                                                                 T=Ti, Tfi=Tfi)

    def run_all_frame(self, Ti, Tfi, meet_req, up_info):
        # 将每个Agent上一帧的初始拷贝进来
        for i in range(len(self.agents)):
            last_arg = deepcopy(self.agents[i].frame_arg)
            # logging.debug("agent %d, %s"%(i,"{}".format(self.agents[i].frame_arg)))
            self.agents[i].frame_arg = arg.init_frame_arg(
                global_arg=self.global_arg,
                env_arg=self.main_env.arg,
                agent_arg=self.agents[i].agent_arg,
                stage_arg=self.agents[i].stage_arg,
                last_arg=last_arg,
                Tp=Ti,
                PSMfi=self.main_env.getValue(self.agents[i].state_now, Ti)
            )
        logging.debug("agent copy finished")
        # 清空agent的行动和会议记录
        for i in range(len(self.agents)):
            self.agents[i].meeting_now = ''
            self.agents[i].policy_now = ''

        # NOTE cid 增加SoclNet自然衰减
        self.socl_net.relat_cd(self.socl_net.arg['re_decr_r'])

        # 读取之前发起的集体行动
        all_host = set()
        all_meet_info = {}
        new_meet_req = {}
        # 把每一种meeting的host先集中起来,并加入到对应的meet_info中
        # meet_req的结构大致如下?
        # meet_req={
        #    "m_name1":{agent}
        #    "m_name2":{agent}
        # }
        # m_name是指信息交流xxjl之类的集体行动名称

        for m_name in meet_req:
            all_host = all_host.union(meet_req[m_name])
            all_meet_info[m_name] = {"member": deepcopy(meet_req[m_name]),
                                     "host": deepcopy(meet_req[m_name])}
        # 询问每个Agent是否加入
        logging.debug("all host:%s" % (all_host))
        for m_name in all_meet_info:
            logging.debug("before m_name:%s, member:%s, host:%s" % (
                m_name, all_meet_info[m_name]['member'], all_meet_info[m_name]['host']))
        for i in range(len(self.agents)):
            #            logging.debug("all_host:{}".format(all_host))
            # 跳过所有host
            if i in all_host:
                continue
            # 返回是否参与集体行动的信息,如果不参与,执行完个体行动,如果参与,进入后续run_meet_frame
            if self.global_arg['mul_agent']:
                # logging.info("using mul_act")
                self.agents[i], self.socl_net, meet_info = brain.mul_agent_act(env=self.main_env,
                                                                               soc_net=self.socl_net,
                                                                               agent=self.agents[i],
                                                                               Ti=Ti, Tfi=Tfi, agent_no=i,
                                                                               record=self.record,
                                                                               meet_req=meet_req)
            else:
                self.agents[i], self.socl_net, meet_info = brain.sgl_agent_act(env=self.main_env,
                                                                               soc_net=self.socl_net,
                                                                               agent=self.agents[i],
                                                                               Ti=Ti, Tfi=Tfi, agent_no=i,
                                                                               record=self.record,
                                                                               meet_req=meet_req)

            if meet_info is None:
                continue
            # 选择参加会议,则加入会议名单
            if meet_info['type'] == 'commit':
                all_meet_info[meet_info['name']]["member"].add(i)
            # 选择发起新会议
            if meet_info['type'] == 'req':
                if not meet_info['name'] in new_meet_req:
                    new_meet_req[meet_info['name']] = set()
                new_meet_req[meet_info['name']].add(i)
        # 每个host都选完人之后,依次开会
        for m_name in all_meet_info:
            logging.debug("after m_name:%s, member:%s, host:%s" % (
                m_name, all_meet_info[m_name]['member'], all_meet_info[m_name]['host']))
            self.run_meet_frame(Ti, Tfi, m_name,
                                all_meet_info[m_name]['member'],
                                all_meet_info[m_name]['host'],
                                up_info)
        self.metric.calc_metric(['frame'], Ti + Tfi,
                                socl_net=self.socl_net,
                                agents=self.agents,
                                env=self.main_env)
        return new_meet_req

    def run_stage(self, Ti, meet_req, up_info):
        # 将Agent上一个stage的最终状态拷贝过来
        for i in range(len(self.agents)):
            last_arg = deepcopy(self.agents[i].stage_arg)
            self.agents[i].stage_arg = arg.init_stage_arg(self.global_arg,
                                                          self.main_env.arg,
                                                          self.agents[i].agent_arg,
                                                          last_arg,
                                                          Ti)
        meet_req = {}
        #  NOTE cid传了个up_info进去,避免重复遍历
        self.record.add_env_record(self.main_env, Ti, up_info)
        self.record.add_socl_net_record(self.socl_net, Ti)
        for i in range(self.global_arg['Ts']):
            logging.info("frame %3d , Ti:%3d" % (i, Ti))
            self.record.add_agents_record(self.main_env, self.agents, Ti + i)
            # 运行Frame, 并将运行后生成的会议请求记录下来
            meet_req = self.run_all_frame(Ti, i, meet_req, up_info)

            # 输出每个个体的具体信息
            for k in range(self.global_arg["Nagent"]):
                tmp_goal = ''
                tmp_goal_value = ''
                if not self.agents[k].a_plan is None:
                    tmp_goal = self.agents[k].a_plan.goal
                    tmp_goal_value = self.agents[k].a_plan.goal_value

                csv_info_result = [
                    Ti + i,
                    str(self.agents[k].state_now),
                    self.main_env.getValue(self.agents[k].state_now, Ti),
                    self.agents[k].get_max_area().info['max'],
                    str(self.agents[k].get_max_area().center),
                    str(self.agents[k].policy_now) + '&' + str(self.agents[k].meeting_now),
                    str(tmp_goal),
                    tmp_goal_value
                ]
                moniter.AppendToCsv(csv_info_result, all_config['result_csv_path'][k])

            # 输出当前value
            agent_value = [self.main_env.getValue(self.agents[k].state_now, Ti) for k in
                           range(self.global_arg["Nagent"])]
            agent_avg = sum(agent_value) / len(agent_value)

            csv_info_value = [Ti + i] \
                             + agent_value \
                             + [agent_avg, max(agent_value), min(agent_value)] \
                             + [up_info['nkinfo'][key] for key in ['max', 'min', 'avg']] \
                             + [(agent_avg - up_info['nkinfo']['min']) / (
                    up_info['nkinfo']['max'] - up_info['nkinfo']['min'])]
            moniter.AppendToCsv(csv_info_value, all_config['value_csv_path'][-1])

            # 输出max_area
            agent_max_area = [self.agents[k].get_max_area().info['max'] for k in
                              range(self.global_arg["Nagent"])]
            csv_info_area = [Ti + i] \
                            + agent_max_area \
                            + [sum(agent_max_area) / len(agent_max_area)] \
                            + [up_info['nkinfo']['max']]
            moniter.AppendToCsv(csv_info_area, all_config['area_csv_path'])

            # NOTE cid 添加act信息(相应增加agent类里的变量)
            act_list = [self.agents[k].policy_now + '&' + self.agents[k].meeting_now for k in
                        range(self.global_arg["Nagent"])]
            csv_info_act = [Ti + i] \
                           + act_list
            moniter.AppendToCsv(csv_info_act, all_config['act_csv_path'])

            # 按stage输出
        if self.global_arg['mul_agent']:
            # net_title, net_data = self.record.output_socl_net_per_frame(Ti + i)
            power_save_path = os.path.join(all_config['network_csv_path'], "power_%04d.csv" % (Ti))
            relat_save_path = os.path.join(all_config['network_csv_path'], "relat_%04d.csv" % (Ti))
            self.socl_net.power_save(power_save_path)
            self.socl_net.relat_save(relat_save_path)
            #  P1-05 增加Socil Network的结果输出
        self.metric.calc_metric(['stage'], Ti,
                                socl_net=self.socl_net,
                                agents=self.agents,
                                env=self.main_env)
        return meet_req

    def run_exp(self):
        up_info = {}
        # 单个agent的结果表
        for k in range(self.global_arg["Nagent"]):
            csv_head = ['frame', 'state', 'value', 'area_v', 'area_center', 'act', 'goal', 'goal_value']
            moniter.AppendToCsv(csv_head, all_config['result_csv_path'][k])
        # 结果汇总表
        # 添加agent max和agent min
        csv_head_value = ['frame'] \
                         + ["agent%d" % (k) for k in range(self.global_arg['Nagent'])] \
                         + ["agent_avg", "agent_max", "agent_min"] \
                         + ['peakmax', 'peakmin', 'peakavg'] \
                         + ['adj_avg']
        moniter.AppendToCsv(csv_head_value, all_config['value_csv_path'][-1])
        csv_head_area = ['frame'] \
                        + ["agent%d" % (k) for k in range(self.global_arg['Nagent'])] \
                        + ["agent_avg"] \
                        + ['nkmax']
        moniter.AppendToCsv(csv_head_area, all_config['area_csv_path'])

        csv_head_act = ['frame'] \
                       + ["agent%d" % (k) for k in range(self.global_arg['Nagent'])]
        moniter.AppendToCsv(csv_head_act, all_config['act_csv_path'])

        stage_num = self.global_arg['T'] // self.global_arg['Ts']

        # self.main_env.getModelDistri()  # 为了作图,仅测试时调用!!
        up_info['nkinfo'] = self.main_env.getModelPeakDistri()  # 将nkinfo变为peakvalue
        # all_peak_value = self.main_env.getAllPeakValue()
        # moniter.DrawHist(all_peak_value, all_config['peak_hist'])

        meet_req = {}
        for i in range(stage_num):
            Ti = i * self.global_arg['Ts'] + 1
            logging.info("stage %3d, Ti:%3d" % (i, Ti))
            self.main_env.T_clock = Ti
            # 每个stage遍历一遍当前模型,获取分布信息
            # 减少运算量,只算第一帧
            # up_info['nkinfo'] = self.main_env.getModelDistri()
            # logging.debug("max_value:{max}".format(**up_info['nkinfo']))
            # 运行一个Stage,Ti表示每个Stage的第一帧
            meet_req = self.run_stage(Ti, meet_req, up_info)
        moniter.DumpToJson(self.metric.get_data(), all_config['metrics_json_path'])
        moniter.DumpToJson(leadership_bill.leader_bill.to_json(), all_config['leadership_bill_json_path'])
示例#53
0
文件: verify.py 项目: Seraphli/gomoku
from env import Env
import random

env = Env()
for dx in range(-1, 2):
    for dy in range(-1, 2):
        if dx == 0 and dy == 0:
            continue
        for x in range(0, 15):
            for y in range(0, 15):
                if dx < 0 and x < 5:
                    continue
                if dx > 0 and x > 10:
                    continue
                if dy < 0 and y < 5:
                    continue
                if dy > 0 and y > 10:
                    continue
                for m in range(100):
                    env.reset()
                    actions = []
                    for i in range(5):
                        actions.append((x + dx * i, y + dy * i))
                    for i in range(4):
                        action = random.choice(env.actions)
                        while action in actions:
                            action = random.choice(env.actions)
                        env.take_action(actions[i])
                        assert not env.game_over
                        env.take_action(action)
                        assert not env.game_over
示例#54
0
import numpy as np

from tqdm import tqdm

from models import Stochastic
from models import RoundRobin
from models import Greedy
from models import learn_local
from models import learn_hierarchical


if __name__ == '__main__':
    logger = get_logger(args.note)
    logger.info(str(args))

    env = Env()

    models = [
        RoundRobin(act_size=args.n_servers),

        Stochastic(act_size=args.n_servers),

        Greedy(act_size=args.n_servers, n_servers=args.n_servers),

        learn_local(
            env=env,
            total_epoches=args.total_epoches,
            n_servers=args.n_servers,
            l_ob_size=args.l_obsize + 1,
            l_act_size=args.l_actsize,
            l_latents=args.l_latents,
示例#55
0
import reader
import printer
import mal_types
from env import Env
from core import ns
import sys

repl_env = Env()
repl_env.set('eval', lambda ast: EVAL(ast, repl_env))
for k, v in ns.items():
    repl_env.set(k, v)


def READ(string):
    return reader.read_str(string)


def EVAL(ast, env):
    while True:
        if not isinstance(ast, mal_types.list_types):
            return eval_ast(ast, env)
        elif not ast:
            return ast
        elif isinstance(ast, mal_types.list_types):
            if len(ast) == 0:
                return ast
            if isinstance(ast[0], mal_types.MalSymbol):
                if ast[0].data == 'def!':
                    value = EVAL(ast[2], env)
                    env.set(ast[1].data, value)
                    return value
示例#56
0
from keras.optimizers import Adam
from rl.agents import NAFAgent
from rl.memory import SequentialMemory
from rl.random import OrnsteinUhlenbeckProcess
from rl.core import Processor


class PendulumProcessor(Processor):
    def process_reward(self, reward):
        # The magnitude of the reward can be important. Since each step yields a relatively
        # high reward, we reduce the magnitude by two orders.
        return reward / 100.


N = 5
env = Env(N)
nb_actions = N

processor = PendulumProcessor()

memory = SequentialMemory(limit=100000, window_length=1)

random_process = OrnsteinUhlenbeckProcess(theta=.15,
                                          mu=0.,
                                          sigma=.3,
                                          size=nb_actions)

agent = NAFAgent(covariance_mode='diag',
                 nb_actions=nb_actions,
                 V_model=Vmodel(N),
                 L_model=Lmodel(N),
示例#57
0
from env import Env
import time

# set the environment
env = Env((8, 8), (130, 90), default_rewards=0)

def game_map_1(environment):
	environment.add_item('yellow_star', (3, 3), pickable=True)
	environment.add_item('yellow_star', (0, 7), pickable=True)
	environment.add_item('red_ball', (5, 6), terminal=True, label="Exit")

# select a game
game_map_1(env)
for _ in range(100):
	action = env.action_space.sample()
	print(action)
	reward, next, end = env.step(action)
	print(reward, next, end)
	time.sleep(0.2)
env.reset()
示例#58
0
np.random.seed(1)


#### epsilon贪心算法
def epsilon_greedy(Q, state):
    n = np.random.uniform()

    if (n > 1 - EPSILON) or ((Q[state, :] == 0).all()):
        action = np.random.randint(0, 4)  # 0~3
    else:
        action = Q[state, :].argmax()
    return action


e = Env()
#### Q表:Q(s,a)
Q = np.zeros((e.state_num, 4))

for i in range(EPOCH):
    e = Env()
    #### E表:E(s,a)
    ### 1. 主要记录所经历过的路径
    ### 2. 即最终产生的reward变化跟所经历过的所有路径有关
    E = np.zeros((e.state_num, 4))
    #### e.is_end !=False
    while ((e.is_end == 0) and (e.step < MAX_STEP)):
        action = epsilon_greedy(Q, e.present_state)
        state = e.present_state
        reward = e.interact(action)
        new_state = e.present_state
示例#59
0
    sys.stderr.write('       hash_type   = stream | historic\n')
    sys.stderr.write('       hash        = the hash\n')
    sys.stderr.write('       name        = a friendly name for the subscription\n')
    sys.stderr.write('       key=val     = output_type-specific arguments\n')
    sys.stderr.write('\n')
    sys.stderr.write('Example\n')
    sys.stderr.write('       PushFromHash http stream <hash> \"Push Name\" delivery_frequency=10 \\\n')
    sys.stderr.write('                    url=http://www.example.com/push_endpoint auth.type=none\n')
    sys.stderr.write('\n')

    sys.stderr.write('\n')
    if exit:
        sys.exit(1)

# Set up the environment
env = Env(sys.argv)

# Make sure we have enough arguments
if env.get_arg_count() < 4:
    usage()

# Get the args
output_type = env.get_arg(0)
hash_type   = env.get_arg(1)
hash        = env.get_arg(2)
name        = env.get_arg(3)

try:
    # Create the Push definition
    pushdef = env.get_user().create_push_definition()
    pushdef.set_output_type(output_type)
示例#60
0
        if t > 10 * 60:  # eval takes too long
            self.eval_episode = int(self.eval_episode * 0.94)
        self.trainer.monitors.put_scalar('farmer_win_rate', farmer_win_rate)
        self.trainer.monitors.put_scalar('lord_win_rate', 1 - farmer_win_rate)


if __name__ == '__main__':
    # encoding = np.load('encoding.npy')
    # print(encoding.shape)
    # env = Env()
    # stat = StatCounter()
    # init_cards = np.arange(21)
    # # init_cards = np.append(init_cards[::4], init_cards[1::4])
    # for _ in range(10):
    #     fw = play_one_episode(env, lambda b: np.random.rand(1, 1, 100) if b[1][0] else np.random.rand(1, 1, 21), [100, 21])
    #     stat.feed(int(fw))
    # print('lord win rate: {}'.format(1. - stat.average))
    env = Env()
    stat = StatCounter()
    for i in range(100):
        env.reset()
        print('begin')
        env.prepare()
        r = 0
        while r == 0:
            role = env.get_role_ID()
            intention, r, _ = env.step_auto()
            # print('lord gives' if role == 2 else 'farmer gives', to_char(intention))
        stat.feed(int(r < 0))
    print(stat.average)