예제 #1
0
def main():
	from fireplace import cards
	cards.db.initialize()
	from utils import Agent,play_set_of_games,play_MechaHunterGames
	from hearthstone.enums import CardClass
	Human=Agent("Human",None,myClass=CardClass.MAGE)
	StandardRandom=Agent("Maya",None) # Classを指定しないとHUNTER
	
	# モンテカルロによる読み切り
	## Maya=Agent("Maya",None)

	# Standardなベクトル評価のエージェント34次元の重みベクトルをオプションとして入力する
	#from agent_Standard import StandardStep1
	#import random
	#opt = []
	#for i in range(34):
	#	opt.append(random.randint(1,10))
	#GhostCatPlayer=Agent("GhostCat", StandardStep1,myOption=opt,myClass=CardClass.WARRIOR)
	#あそび
	#PIPlayer=Agent("Pi", StandardStep1,myOption=[3,1,4,1,5,9,2,6,5,3,5,8,9,7,9,3,2,3,8,4,6,2,6,4,3,3,8,3,2,7,9,5,0,2,8],myClass=CardClass.PRIEST)

	# 言葉で戦略を組み立てるエージェント by Ahara
	#from agent_word_strategy import WS, agent_word_strategy
	#WSplayer = Agent("WS", agent_word_strategy,\
	#	myOption=[WS.ミニョンで敵ヒーローの体力を削る, WS.呪文を使えるなら呪文, WS.ランダムにプレー],\
	#	myClass=CardClass.PRIEST)

	#AngryCat by Ahara 無駄な行為を選択肢から排除するアルゴリズム
	#from agent_AngryCat import AngryCatAI
	#AngryCatPlayer = Agent("AngryCat", AngryCatAI)

	#ゲームプレイ
	play_set_of_games(Human, StandardRandom, gameNumber=1, debugLog=True) 
	#ハンター縛りのデッキ(メカハンター)による対戦
	#play_MechaHunterGames(StandardPlayer, AngryCatPlayer, gameNumber=1, debugLog=True)

	#総当たり戦
	#from competition import play_round_robin_competition
	#play_round_robin_competition([StandardRandom,PIPlayer,AngryCatPlayer],matchNumber=5)

	#特定の2枚のカードのシナジーを調べる(idea by Maya)
	#from card_pair import investigate_card_pair, find_card_pair
	#investigate_card_pair()
	#シナジーのあるカードの組を漠然と探す
	#find_card_pair(1)
	#print("test_branch_yamadamaya")

	pass
예제 #2
0
def getTransportistas(agentType, directoryAgent, sender, messageCount):
    gmess = Graph()
    # Construimos el mensaje de registro
    gmess.bind('foaf', FOAF)
    gmess.bind('dso', DSO)
    ask_obj = agn[sender.name + '-Search']

    gmess.add((ask_obj, RDF.type, DSO.Search))
    gmess.add((ask_obj, DSO.AgentType, agentType))
    gr = send_message(
        build_message(gmess,
                      perf=ACL.request,
                      sender=sender.uri,
                      receiver=directoryAgent.uri,
                      msgcnt=messageCount,
                      content=ask_obj), directoryAgent.address)
    dic = get_message_properties(gr)
    content = dic['content']
    agents = []
    for (s, p, o) in gr.triples((content, None, None)):
        if str(p).startswith('http://www.w3.org/1999/02/22-rdf-syntax-ns#_'):
            address = gr.value(subject=o, predicate=DSO.Address)
            url = gr.value(subject=o, predicate=DSO.Uri)
            name = gr.value(subject=o, predicate=FOAF.name)
            agent = Agent(name, url, address, None)
            agents += [agent]

    return agents
예제 #3
0
class MyForm(BoxLayout):  # 此处类定义虽然为空,但会将my.kv的GUI定义的相关“程序”引入,即相当于在此定义
    text_input = ObjectProperty()  # 在类中添加text_input属性,对应kv文件中用于外部引用的名称,最终指向对应id的GUI部件
    label_output = ObjectProperty()  # 在类中添加label_output属性,对应kv文件中用于外部引用的名称,最终指向对应id的GUI部件
    agent = Agent()
    translate_res = None

    # 加载字体资源(使用中文)
    kivy.resources.resource_add_path("./fonts")
    font_zh = kivy.resources.resource_find("msyh.ttc")
    # 通过labelBase
    LabelBase.register("msyh_labelBase", "msyh.ttc")
    kivy.core.text.Label.register("msyh_label", "msyh.ttc")
    
    def button_act(self, action=None):
        print('Start ...')
        if action is None:
            self.translate_res = self.text_input.text  # 获取text_input所指向GUI部件的text值,
        elif action == 'Translate':
            t = self.target_lang.text
            if t == 'Target':
                self.translate_res = self.text_input.text
            else:
                self.translate_res = self.agent.translate(text=self.text_input.text, source='auto', target=t)
        elif action == 'Audio':
            text = self.agent.transcribe()
            t = self.target_lang.text
            if t == 'Target':
                t = 'en'  # default
            self.translate_res = self.agent.translate(text=text, source='auto', target=t)
        elif action == 'Speech':
            # 语音输出翻译结果
            try:
                assert self.translate_res is not None
                self.agent.speech(self.translate_res)
                print('Finish speeching ...')
            except:
                print('No speeching content.')
                pass
        else:
            raise NotImplementedError

        print(self.translate_res)  # 打印结果到控制台
        if self.translate_res is not None:
            if action == 'Speech':
                pass
            else:
                # 显示翻译结果到UI界面
                self.label_output.text = self.translate_res
                print('Finish translating ...')
            pass

        return
    
    def clean_label(self,):
        # 清除label文本
        self.label_output.text = ""  # (Waiting ...)
        self.translate_res = None
        return
예제 #4
0
def simulate_hunterschoice_policy_from_func(policy, N=1000):
    grounds = create_small_hunting_environment()
    env = HuntingMDPWrapper(grounds)
    agent = Agent(env, policy)

    successes = 0
    episode_lengths = []
    for i in range(N):
        rewards = run_agent(agent)
        if rewards[-1] == 1:
            successes += 1
        episode_lengths.append(len(rewards))

    return successes / N, episode_lengths
예제 #5
0
def simulate_lake_policy_from_func(policy, N=1000, size="small"):
    if size == "small":
        env = get_small_lake()
    else:
        env = get_large_lake()
    agent = Agent(env, policy)

    successes = 0
    episode_lengths = []
    for i in range(N):
        rewards = run_agent(agent)
        if rewards[-1] == 1:
            successes += 1
        episode_lengths.append(len(rewards))

    return successes / N, episode_lengths
예제 #6
0
def simulate_hunterschoice_policy(policy_location, N=1000):
    print(f"Simulating Policy {policy_location}")
    with open(policy_location, "rb") as f:
        solver = pickle.load(f)

    grounds = create_small_hunting_environment()
    policy = create_hunterschoice_policy(solver.policy)

    agent = Agent(grounds, policy)

    successes = 0
    episode_lengths = []
    for i in range(N):
        rewards = run_agent(agent)
        if rewards[-1] == 1:
            successes += 1
        episode_lengths.append(len(rewards))

    return successes / N, episode_lengths
예제 #7
0
def simulate_frozenlake_policy(policy_location, N=1000):
    print(f"Simulating Policy {policy_location}")
    with open(policy_location, "rb") as f:
        solver = pickle.load(f)

    if "small" in policy_location:
        lake = get_small_lake()
    else:
        lake = get_large_lake()

    policy = create_frozenlake_policy(solver.policy)
    agent = Agent(lake, policy)

    successes = 0
    episode_lengths = []
    for i in range(N):
        rewards = run_agent(agent)
        if rewards[-1] == 1:
            successes += 1
        episode_lengths.append(len(rewards))

    return successes / N, episode_lengths
예제 #8
0
def getAgentInfo(agentType, directoryAgent, sender, messageCount):
    gmess = Graph()
    gmess.bind('foaf', FOAF)
    gmess.bind('dso', DSO)
    ask_obj = agn[sender.name + '-Search']

    gmess.add((ask_obj, RDF.type, DSO.Search))
    gmess.add((ask_obj, DSO.AgentType, agentType))
    gr = send_message(
        build_message(gmess,
                      perf=ACL.request,
                      sender=sender.uri,
                      receiver=directoryAgent.uri,
                      msgcnt=messageCount,
                      content=ask_obj), directoryAgent.address)
    dic = get_message_properties(gr)
    content = dic['content']

    address = gr.value(subject=content, predicate=DSO.Address)
    url = gr.value(subject=content, predicate=DSO.Uri)
    name = gr.value(subject=content, predicate=FOAF.name)

    return Agent(name, url, address, None)
예제 #9
0
RUN_ID = (
    "cliff-GAMMA(%.2f)-ALPHA(%f)-MEMSIZE(%d)-EPISODES(%d)-BATCHSIZE(%d)-REPLACE_AFTER(%d)-PER(%r)"
    % (GAMMA, ALPHA, MAX_MEMORY_SIZE, NUM_EPISODES, BATCH_SIZE, REPLACE_AFTER,
       USE_PER))

time_started = datetime.now().strftime("%d.%m.%Y-%H:%M:%S")
if USE_TB: writer = SummaryWriter("runs/%s-%s" % (RUN_ID, time_started))

env = CliffWalking1D(9)

agent = Agent(Net=Net,
              input_size=1,
              output_size=env.action_space.n,
              gamma=GAMMA,
              alpha=ALPHA,
              max_memory_size=MAX_MEMORY_SIZE,
              num_episodes=NUM_EPISODES,
              replace_after=REPLACE_AFTER,
              use_per=USE_PER,
              per_a=1)
# Filling memory
print("Filling replay memory")
state = env.reset()
if USE_TB:
    writer.add_graph(agent.Q_loc, torch.FloatTensor([state]).to(device))

for t in range(200):
    action = env.action_space.sample()
    next_state, reward, done, info = env.step(action)

    agent.store_transition(state, action, reward, next_state, done)
예제 #10
0
EPS_DECAY = 200  # tokyo uni value = 200
TARGET_UPDATE = 10  # for updating the target network

# for the experiments bash script:
record_file = "../exp/rl_results_" + args.data_name + ".csv"

# File for recording episode durations - original code:
# record_file = "../exp/rl_results.csv"

# Random Seed
for seed in range(1, 101):  # original range (1,6)
    random.seed(seed)
    torch.manual_seed(0)

    # Initialize Agent
    agent = Agent()
    # Get number of actions
    # n_actions = len(agent.action_space)
    n_actions = res_dict["num_words"]
    print(f"n_actions: {n_actions}")
    # Get size of state
    state_size = agent.get_state().size(1)

    # Initialize policy and target network
    hidden_size = 32
    policy_net = DQN(state_size, n_actions, hidden_size).to(device)
    target_net = DQN(state_size, n_actions, hidden_size).to(device)
    target_net.load_state_dict(policy_net.state_dict())
    target_net.eval()

    learning_rate = 1e-2  # tokyo uni value = 1e-2
예제 #11
0
def get_agent_any_type(type_opps, name, policy_type, env):
    if type_opps == "zoo":
        return load_agent(name, policy_type, "zoo_ant_policy_2", env, 1)
    elif type_opps == "const":
        trained_agent = constant_agent_sampler()
        trained_agent.load(name)
        return trained_agent
    elif type_opps == "lstm":
        policy = LSTMPolicy(scope="agent_new",
                            reuse=False,
                            ob_space=env.observation_space.spaces[0],
                            ac_space=env.action_space.spaces[0],
                            hiddens=[128, 128],
                            normalize=True)

        def get_action(observation):
            return policy.act(stochastic=True, observation=observation)[0]

        trained_agent = Agent(get_action, policy.reset)

        with open(name, "rb") as file:
            values_from_save = pickle.load(file)

        for key, value in values_from_save.items():
            var = tf.get_default_graph().get_tensor_by_name(key)
            sess.run(tf.assign(var, value))

        return trained_agent
    elif type_opps == "our_mlp":
        #TODO DO ANYTHING BUT THIS.  THIS IS VERY DIRTY AND SAD :(
        def make_env(id):
            # TODO: seed (not currently supported)
            # TODO: VecNormalize? (typically good for MuJoCo)
            # TODO: baselines logger?
            # TODO: we're loading identical policy weights into different
            # variables, this is to work-around design choice of Agent's
            # having state stored inside of them.
            sess = utils.make_session()
            with sess.as_default():
                multi_env = env

                attacked_agent = constant_agent_sampler(act_dim=8,
                                                        magnitude=100)

                single_env = Gymify(
                    MultiToSingle(CurryEnv(multi_env, attacked_agent)))
                single_env.spec = gym.envs.registration.EnvSpec('Dummy-v0')

                # TODO: upgrade Gym so don't have to do thi0s
                single_env.observation_space.dtype = np.dtype(np.float32)
            return single_env
            # TODO: close session?

        #TODO DO NOT EVEN READ THE ABOVE CODE :'(

        denv = SubprocVecEnv([functools.partial(make_env, 0)])

        model = ppo2.learn(network="mlp",
                           env=denv,
                           total_timesteps=1,
                           seed=0,
                           nminibatches=4,
                           log_interval=1,
                           save_interval=1,
                           load_path=name)

        stateful_model = StatefulModel(denv, model)
        trained_agent = utils.Agent(action_selector=stateful_model.get_action,
                                    reseter=stateful_model.reset)

        return trained_agent
    raise (Exception('Agent type unrecognized'))