Python Agent示例，agent.Agent Python示例

示例#1

0

显示文件

文件： train.py 项目： yhu9/RCAN

    def load(self, args):

        if args.model_dir != "":
            loadedparams = torch.load(args.model_dir, map_location=self.device)
            #self.agent = agent.Agent(args,chkpoint=loadedparams)
            self.agent = agent.Agent(args)
        else:
            self.agent = agent.Agent(args)
        self.SRmodels = []
        self.SRoptimizers = []
        self.schedulers = []
        for i in range(args.action_space):

            #CREATE THE ARCH
            if args.model == 'basic':
                model = arch.RRDBNet(3, 3, 32, args.d, gc=8)
            elif args.model == 'ESRGAN':
                model = arch.RRDBNet(3, 3, 64, 23, gc=32)
            elif args.model == 'RCAN':
                torch.manual_seed(args.seed)
                checkpoint = utility.checkpoint(args)
                if checkpoint.ok:
                    module = import_module('model.rcan')
                    model = module.make_model(args).to(self.device)
                    kwargs = {}
                else:
                    print('error loading RCAN model. QUITING')
                    quit()

            #LOAD THE WEIGHTS
            if args.model_dir != "":
                model.load_state_dict(loadedparams["sisr" + str(i)])
                print('continuing training')
            elif args.random:
                print('random init')
                model.apply(init_weights)
            elif args.model == 'ESRGAN':
                model.load_state_dict(torch.load(args.ESRGAN_PATH),
                                      strict=True)
            elif args.model == 'RCAN':
                print('RCAN loaded!')
                model.load_state_dict(torch.load(args.pre_train, **kwargs),
                                      strict=True)
            elif args.model == 'basic':
                if args.d == 1:
                    model.load_state_dict(torch.load(args.basicpath_d1),
                                          strict=True)
                elif args.d == 2:
                    model.load_state_dict(torch.load(args.basicpath_d2),
                                          strict=True)
                elif args.d == 4:
                    model.load_state_dict(torch.load(args.basicpath_d4),
                                          strict=True)
                elif args.d == 8:
                    model.load_state_dict(torch.load(args.basicpath_d8),
                                          strict=True)
                else:
                    print(
                        'no pretrained model available. Random initialization of basic block'
                    )

            self.SRmodels.append(model)
            self.SRmodels[-1].to(self.device)

            #self.SRoptimizers.append(torch.optim.Adam(model.parameters(),lr=1e-5))
            self.SRoptimizers.append(
                torch.optim.Adam(model.parameters(), lr=1e-5))
            scheduler = torch.optim.lr_scheduler.StepLR(self.SRoptimizers[-1],
                                                        1000,
                                                        gamma=0.5)

            self.schedulers.append(scheduler)

示例#2

0

显示文件

文件： test-checkpoint.py 项目： minoot7/Lane-Detection-Transfer-Learning

def Testing():
    print('Testing')

    ## Get dataset

    print("Get dataset")
    loader = Generator()

    ## Get agent and model

    print('Get agent')
    if p.model_path == "":
        lane_agent = agent.Agent()
    else:
        lane_agent = agent.Agent()
        lane_agent.load_weights(804, "tensor(0.5786)")

    ## testing

    print('Testing loop')
    lane_agent.evaluate_mode()

    if p.mode == 0:  # check model with test data
        for _, _, _, test_image in loader.Generate():
            _, _, ti = test(lane_agent, np.array([test_image]))
            cv2.imshow("test", ti[0])
            cv2.waitKey(0)

    elif p.mode == 1:  # check model with video
        cap = cv2.VideoCapture(
            "/Users/minootaghavi/Desktop/GA/Capstone-Project-1/test/IMG_1398.mp4"
        )
        writer = cv2.VideoWriter('filename.avi',
                                 cv2.VideoWriter_fourcc(*'MJPG'), 10,
                                 (1280, 800))
        while (cap.isOpened()):
            ret, frame = cap.read()
            #torch.cuda.synchronize()
            prevTime = time.time()
            frame = cv2.resize(frame, (512, 256)) / 255.0
            frame = np.rollaxis(frame, axis=2, start=0)
            _, _, ti = test(lane_agent, np.array([frame]))
            curTime = time.time()
            sec = curTime - prevTime
            fps = 1 / (sec)
            s = "FPS : " + str(fps)
            ti[0] = cv2.resize(ti[0], (1280, 800))
            cv2.putText(ti[0], s, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 255, 0))
            cv2.imshow('frame', ti[0])
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
            writer.write(ti[0])

        cap.release()
        cv2.destroyAllWindows()

    elif p.mode == 2:  # check model with a picture
        test_image = cv2.imread(
            "/Users/minootaghavi/Desktop/GA/tusimple-trained model/minoo/Deep Neural Networks/data/test_set/clips/0530/1492626047222176976_0/20.img"
        )
        test_image = cv2.resize(test_image, (512, 256)) / 255.0
        test_image = np.rollaxis(test_image, axis=2, start=0)
        _, _, ti = test(lane_agent, np.array([test_image]))
        cv2.imwrite(
            '/Users/minootaghavi/Desktop/GA/tusimple-trained model/minoo/Deep Neural Networks/save_test/image2_result.png',
            ti[0])
        cv2.imshow("test", ti[0])
        cv2.waitKey(0)

    elif p.mode == 3:  #evaluation
        print("evaluate")
        evaluation(loader, lane_agent)

示例#3

0

显示文件

文件： sumotest.py 项目： june6723/sumo-rl-offset

import numpy as np
import agent as ag
import sumoenv as se

env_train = se.SumoEnv(gui_f=False)
env_test = se.SumoEnv(gui_f=True)
agent = ag.Agent()

EPS = 20

for ieps in range(EPS):
    for i in range(20):
        state = env_train.reset()
        done = False
        while not done:
            action = agent.policy(state)
            next_state, reward, done, rewards = env_train.step_d(action)

            agent.train(state, action, reward, 0.001, [1, 1, done, 1, 1])

            state = next_state
        env_train.close()

    state = env_test.reset()
    done = False
    while not done:
        action = agent.policy(state)
        next_state, reward, done, rewards = env_test.step_d(action)
        print(state)

        state = next_state

示例#4

0

显示文件

文件： unit_test.py 项目： BrendanMeins/MachineLearning

# tas 23.10.19
#

import environment
import agent
import logging
import sys
import stateinfo

logging.basicConfig(
    format='%(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',
    level=logging.DEBUG)
# oder z.B.:  .WARNING

env = environment.Environment()
agent = agent.Agent(env.action_count)

# -------------------------------------------------------------
# Hilfs-Routinen der untersten Ebenen in environment
# -------------------------------------------------
s = env.coord2state(0, 3)
if s != 3:
    logging.error("env.coord2state(): s = %d", s)
    sys.exit(0)

s = env.coord2state(2, 1)
if s != 35:
    logging.error("env.coord2state(): s = %d", s)
    sys.exit(0)

y, x = env.state2coord(5)

示例#5

0

显示文件

文件： test.py 项目： timsean/PINet_new

def Testing():
    print('Testing')

    #########################################################################
    ## Get dataset
    #########################################################################
    print("Get dataset")
    loader = Generator()

    ##############################
    ## Get agent and model
    ##############################
    print('Get agent')
    if p.model_path == "":
        lane_agent = agent.Agent()
    else:
        lane_agent = agent.Agent()
        lane_agent.load_weights(804, "tensor(0.5786)")

    ##############################
    ## Check GPU
    ##############################
    print('Setup GPU mode')
    if torch.cuda.is_available():
        lane_agent.cuda()

    cudnn.benchmark = True
    cudnn.fastest = True

    ##############################
    ## testing
    ##############################
    print('Testing loop')
    lane_agent.evaluate_mode()

    if p.mode == 0:  # check model with test data
        for _, _, _, test_image in loader.Generate():
            _, _, ti = test(lane_agent, np.array([test_image]))
            cv2.imshow("test", ti[0])
            cv2.waitKey(0)

    elif p.mode == 1:  # check model with video
        cap = cv2.VideoCapture(
            "/home/tim/Codes-for-Lane-Detection/ERFNet-CULane-PyTorch/data/day2.MOV"
        )
        while (cap.isOpened()):
            ret, frame = cap.read()
            torch.cuda.synchronize()
            prevTime = time.time()
            #            frame = frame[:-489, :, :]
            frame = cv2.resize(frame, (512, 256)) / 255.0
            frame = np.rollaxis(frame, axis=2, start=0)
            _, _, ti = test(lane_agent, np.array([frame]))
            curTime = time.time()
            sec = curTime - prevTime
            fps = 1 / (sec)
            s = "FPS : " + str(fps)
            ti[0] = cv2.resize(ti[0], (1280, 800))
            cv2.putText(ti[0], s, (0, 100), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 255, 0))
            cv2.imshow('frame', ti[0])
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
        cap.release()
        cv2.destroyAllWindows()

    elif p.mode == 2:  # check model with a picture
        test_image = cv2.imread(p.test_root_url +
                                "clips/0530/1492720840345996040_0/20.jpg")
        test_image = cv2.resize(test_image, (512, 256)) / 255.0
        test_image = np.rollaxis(test_image, axis=2, start=0)
        _, _, ti = test(lane_agent, np.array([test_image]))
        cv2.imshow("test", ti[0])
        cv2.waitKey(0)

    elif p.mode == 3:  #evaluation
        print("evaluate")
        evaluation(loader, lane_agent)

示例#6

0

显示文件

# Implementation of the solver
import cube as c
import agent as a

# Get a new agent and a new cube
cube = c.Cube()
agent = a.Agent(cube)

# Start

示例#7

0

显示文件

文件： run.py 项目： NLPatVCU/NER-OUS

def train_network_analysis(train_batch_container, file_sentence_dict, config, supplemental_batch=None):
    """
    Trains a neural network model and reports analysis usking k-fold cross validation.

    :param train_batch_container: A BatchContainer object containing the data to be trained.
    :param file_sentence_dict: Map containing SentenceStructures of all files in memory. Used for generating analysis.
    :param config: A configuration instance from configparser.
    :param supplemental_batch: A BatchContainer object containing optional data to be transfer learned. Defaults to None.
    :return: Nothing.
    """
    buckets = int(config['CONFIGURATION']['BUCKETS'])
    epochs = int(config['CONFIGURATION']['EPOCHS'])

    #Setup Buckets for k fold cross validation
    batch_x, batch_y, seq_len, batch_to_file_map = kfold_bucket_generator(train_batch_container.bx, train_batch_container.by, train_batch_container.bs, buckets)
        
    #TODO(Jeff) Clean up supplemental_batch information.
    if supplemental_batch:
        sup_batch_x, sup_batch_y, sup_seq_len, _ = kfold_bucket_generator(supplemental_batch.bx, supplemental_batch.by, supplemental_batch.bs, epochs)

    #Create and train the model for kFoldCrossValidation
    pre_correction_confusion_matrix_list = []
    phrase_matrix_list = []
    post_correction_confusion_matrix = None
    
    if buckets > 1:
        for k in range(0, buckets):
            trainer = agent.Agent(config['NUM_FEATURES'], len(config['CLASS_LIST'])+1, int(config['CONFIGURATION']['MAX_SENTENCE_LENGTH']))

            #Train supplemental for j epochs.
            if supplemental_batch:
                for j in range(0, epochs):
                    for l in range(0, epochs):
                        trainer.train(sup_batch_x[l], sup_batch_y[l], sup_seq_len[l])

            #Train normal for j epochs.
            for j in range(0, epochs):
                loss = 0

                #Train each bucket where l != current K
                for l in range(0, buckets):
                    if l == k:
                        continue
                    loss += trainer.train(batch_x[l], batch_y[l], seq_len[l])

                print("Loss for Epoch " + str(j) + " is " + str(loss) + ".")

            #Evaluate after training and store debugging files.
            cm = trainer.eval_token_level(batch_x[k], batch_y[k], seq_len[k])
            pre_correction_confusion_matrix_list.append(cm)

            file = open("./outCF", 'a')
            outstr = np.array2string(cm)
            file.write(outstr)
            file.write("\n")
            file.close()

            pm = trainer.eval_phrase_level(batch_x[k], seq_len[k], k, train_batch_container.mapping, batch_to_file_map, file_sentence_dict, config)
            phrase_matrix_list.append(pm)
            file = open("./outCFS", 'a')
            outstr = np.array2string(pm)
            file.write(outstr)
            file.write("\n")
            file.close()

            trainer.clean_up()
    else:
        trainer = agent.Agent(config['NUM_FEATURES'], len(config['CLASS_LIST'])+1, int(config['CONFIGURATION']['MAX_SENTENCE_LENGTH']))
        
        #Train supplemental for j epochs.
        if supplemental_batch:
            for j in range(0, epochs):
                trainer.train(sup_batch_x[0], sup_batch_y[0], sup_seq_len[0])
                    
        #Train normal for j epochs.
        for j in range(0, epochs):
            loss = trainer.train(batch_x[0], batch_y[0], seq_len[0])
            print("Loss for Epoch " + str(j) + " is " + str(loss) + ".")
        

    post_correction_confusion_matrix = agent.eval_token_level_from_dict(file_sentence_dict, config)
    
    #Run analysis generation.
    generate_analysis_file(pre_correction_confusion_matrix_list, post_correction_confusion_matrix, phrase_matrix_list, config)

示例#8

0

显示文件

def main(args):
    """Trains an agent to play Atari games."""

    env = environment.AtariWrapper(args.env_name,
                                   args.max_episode_length,
                                   args.replay_memory_capacity,
                                   args.observations_per_state,
                                   args.action_space)
    test_env = environment.AtariWrapper(args.env_name,
                                        args.max_episode_length,
                                        100 * args.observations_per_state,
                                        args.observations_per_state,
                                        args.action_space)

    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)

    checkpoint_dir = os.path.join(args.log_dir, 'checkpoint')
    summary_dir = os.path.join(args.log_dir, 'summary')
    summary_writer = tf.summary.FileWriter(summary_dir)

    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory_alloc

    with tf.Session(config=config) as sess:
        player = agent.Agent(env,
                             args.start_epsilon,
                             args.end_epsilon,
                             args.anneal_duration,
                             args.train_interval,
                             args.target_network_reset_interval,
                             args.batch_size,
                             args.learning_rate,
                             args.max_gradient_norm,
                             args.discount)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(max_to_keep=args.num_epochs)

        if args.load_path:
            saver.restore(sess, args.load_path)
            LOGGER.info('Restored model from "%s".', args.load_path)

        LOGGER.info('Accumulating %d experiences before training...', args.wait_before_training)

        for _ in range(args.wait_before_training):
            env.step(env.sample_action())

        env.reset()
        LOGGER.info('Accumulated %d experiences.', args.wait_before_training)

        for epoch_i in range(args.num_epochs):
            for _ in range(args.epoch_length):
                player.train()

                if args.render:
                    env.render()

                if env.done:
                    LOGGER.info('Finished episode. Total reward: %d. Length: %d.',
                                env.episode_reward,
                                env.episode_length)

                    summary = tf.Summary()
                    summary.value.add(tag='training/episode_length',
                                      simple_value=env.episode_length)
                    summary.value.add(tag='training/episode_reward',
                                      simple_value=env.episode_reward)
                    summary.value.add(tag='training/fps', simple_value=env.fps)
                    summary.value.add(tag='training/epsilon', simple_value=player.epsilon)

                    total_time_steps = args.train_interval * player.global_step.eval()
                    summary_writer.add_summary(summary, total_time_steps)
                    summary_writer.flush()

            file_name = '{}.{:05d}-of-{:05d}'.format(args.env_name, epoch_i, args.num_epochs)
            model_path = os.path.join(checkpoint_dir, file_name)
            saver.save(sess, model_path)
            LOGGER.info('Saved model to "%s".', model_path)

            # Evaluate the model.
            total_reward = 0
            min_reward = 1e7
            max_reward = -1e7
            total_Q = 0
            summed_min_Qs = 0
            min_Q = 1e7
            summed_max_Qs = 0
            max_Q = -1e7
            time_step = 0
            num_games_finished = 0

            while time_step < args.test_length:
                local_total_reward = 0
                local_total_Q = 0
                local_min_Q = 1e7
                local_max_Q = -1e7
                local_time_step = 0
                test_env.reset()

                while not test_env.done and time_step + local_time_step < args.test_length:
                    local_time_step += 1
                    state = test_env.get_state()

                    # Occasionally try a random action (explore).
                    if random.random() < args.test_epsilon:
                        action = test_env.sample_action()
                    else:
                        action = player.get_action(state)

                    # Cast NumPy scalar to float.
                    Q = float(player.dqn.get_optimal_action_value(state))

                    # Record statistics.
                    local_total_reward += test_env.step(action)
                    local_total_Q += Q
                    local_min_Q = min(local_min_Q, Q)
                    local_max_Q = max(local_max_Q, Q)

                if not test_env.done:
                    # Discard unfinished game.
                    break

                num_games_finished += 1
                time_step += local_time_step
                total_reward += local_total_reward
                min_reward = min(min_reward, local_total_reward)
                max_reward = max(max_reward, local_total_reward)
                total_Q += local_total_Q
                summed_min_Qs += local_min_Q
                summed_max_Qs += local_max_Q
                min_Q = min(min_Q, local_min_Q)
                max_Q = max(max_Q, local_max_Q)

            # Save results.
            if num_games_finished > 0:
                # Extract more statistics.
                avg_reward = total_reward / num_games_finished
                avg_Q = total_Q / time_step
                avg_min_Q = summed_min_Qs / num_games_finished
                avg_max_Q = summed_max_Qs / num_games_finished

                summary = tf.Summary()
                summary.value.add(tag='testing/num_games_finished', simple_value=num_games_finished)
                summary.value.add(tag='testing/average_reward', simple_value=avg_reward)
                summary.value.add(tag='testing/minimum_reward', simple_value=min_reward)
                summary.value.add(tag='testing/maximum_reward', simple_value=max_reward)
                summary.value.add(tag='testing/average_Q', simple_value=avg_Q)
                summary.value.add(tag='testing/average_minimum_Q', simple_value=avg_min_Q)
                summary.value.add(tag='testing/minimum_Q', simple_value=min_Q)
                summary.value.add(tag='testing/average_maximum_Q', simple_value=avg_max_Q)
                summary.value.add(tag='testing/maximum_Q', simple_value=max_Q)

                summary_writer.add_summary(summary, epoch_i)
                summary_writer.flush()

示例#9

0

显示文件

def Training():
    print('Training')

    ####################################################################
    ## Hyper parameter
    ####################################################################
    print('Initializing hyper parameter')

    vis = visdom.Visdom(port='2020')
    loss_window = vis.line(X=torch.zeros((1, )).cpu(),
                           Y=torch.zeros((1)).cpu(),
                           opts=dict(xlabel='50 steps',
                                     ylabel='Loss',
                                     title='Training Loss',
                                     legend=['Loss']))

    #########################################################################
    ## Get dataset
    #########################################################################
    print("Get dataset")
    loader = Generator()

    ##############################
    ## Get agent and model
    ##############################
    print('Get agent')
    if p.model_path == "":
        lane_agent = agent.Agent()
        p.model_epoch = 0
    else:
        lane_agent = agent.Agent(p.model_epoch + 1)
        lane_agent.load_weights(p.model_epoch, p.model_loss)

    ##############################
    ## Check GPU
    ##############################
    print('Setup GPU mode')
    if torch.cuda.is_available():
        lane_agent.cuda()
        #torch.backends.cudnn.benchmark=True

    ##############################
    ## Loop for training
    ##############################
    print('Training loop')
    step = int(p.model_epoch * loader.size_train / p.batch_size)
    sampling_list = None
    for epoch in range(p.model_epoch + 1, p.n_epoch):
        lane_agent.training_mode()
        for inputs, target_lanes, target_h, test_image, data_list in loader.Generate(
                sampling_list):
            #training
            #util.visualize_points(inputs[0], target_lanes[0], target_h[0])
            print("epoch : " + str(epoch))
            print("step : " + str(step))
            loss_p = lane_agent.train(inputs, target_lanes, target_h, epoch,
                                      lane_agent, data_list)
            torch.cuda.synchronize()
            loss_p = loss_p.cpu().data

            if step % 50 == 0:
                vis.line(X=torch.ones((1, 1)).cpu() * int(step / 50),
                         Y=torch.Tensor([loss_p]).unsqueeze(0).cpu(),
                         win=loss_window,
                         update='append')

            step += 1

        lane_agent.save_model(epoch, loss_p)
        testing(lane_agent, test_image, step, loss_p)

        sampling_list = copy.deepcopy(lane_agent.get_data_list())
        lane_agent.sample_reset()

        #evaluation
        if p.do_eval and epoch >= 0 and epoch % 1 == 0:
            print("evaluation")
            lane_agent.evaluate_mode()
            th_list = [0.8]
            index = [3]
            lane_agent.save_model(int(step / 100), loss_p)

            for idx in index:
                print("generate result")
                test.evaluation(loader,
                                lane_agent,
                                index=idx,
                                name="test_result_" + str(epoch) + "_" +
                                str(idx) + ".json")

            for idx in index:
                print("compute score")
                with open("eval_results/eval_result2_" + str(idx) + "_.txt",
                          'a') as make_file:
                    make_file.write("epoch : " + str(epoch) + " loss : " +
                                    str(loss_p.cpu().data))
                    make_file.write(
                        evaluation.LaneEval.bench_one_submit(
                            "test_result_" + str(epoch) + "_" + str(idx) +
                            ".json", "test_label.json"))
                    make_file.write("\n")
                with open("eval_results/eval_result_" + str(idx) + "_.txt",
                          'a') as make_file:
                    make_file.write("epoch : " + str(epoch) + " loss : " +
                                    str(loss_p.cpu().data))
                    make_file.write(
                        evaluation.LaneEval.bench_one_submit(
                            "test_result_" + str(epoch) + "_" + str(idx) +
                            ".json", "test_label.json"))
                    make_file.write("\n")

        if int(step) > 700000:
            break

示例#10

0

显示文件

def SGD(params, lr):
    for param in params:
        param[:] = param - lr * param.grad


def tderror(rt, qval2, qval1, l):
    return nd.nansum((rt + l * qval2 - qval1)**2)


# Setup
num_episodes = 20

# Create main loop
env = gym.make('SuperMarioBros-1-1-v0')
env.reset()
a = agent.Agent(env.observation_space.shape, env.action_space.shape)
for episode in range(num_episodes):
    observation, reward, done, info = env.step([0] * 6)
    observation = preprocess(observation)
    # env.render()
    for epoch in range(10):
        #action = env.action_space.sample() # your agent here (this takes random actions)
        with autograd.record():
            action1, max_ind1, qval1 = a.action_nd(observation)
            observation, reward, done, info = env.step(action1)
            if done:
                print('Epoch {}: Resetting environment\n'.format(epoch))
                break
            observation = preprocess(observation)
            action2, max_ind2, qval2 = a.action(observation)
            tdloss = tderror(reward, qval2, qval1, 0.99)

示例#11

0

显示文件

文件： training.py 项目： rcln/dospordos

                                               is_db_v2=is_db_v2)
        name = given_weight[:-3]
    else:
        if network == 'NN':
            env = environment.Environment(path=path_data,
                                          path_weights=name + '_weights.h5',
                                          is_db_v2=is_db_v2)
        elif network == 'LSTM':
            env = environment_LSTM.Environment(path=path_data,
                                               path_weights=name +
                                               '_weights.h5',
                                               is_db_v2=is_db_v2)

    if network == 'NN':
        if is_db_v2:
            agent = agent.Agent(env, (25, ))  # 27
        else:
            agent = agent.Agent(env, (24, ))  # 26

    elif network == 'LSTM':
        agent = agent_LSTM.Agent(env)

    list_users = os.listdir(env.path)

    if initial_range != "-1" and final_range != "-1":
        list_users = list_users[int(initial_range):int(final_range)]
    elif initial_range != "-1":
        list_users = list_users[int(initial_range):]
    elif final_range != "-1":
        list_users = list_users[:int(final_range)]

示例#12

0

显示文件

def main():
    #delete old game files
    i = 0
    while True:
        try:
            os.remove(options.path + "/quackgame-%i.gcg" % i)
            os.remove(options.path + "/cs221game-%i" % i)
            i += 1
        except OSError:
            break

    #start quackle game in background
    print "starting quackle..."
    quackle = subprocess.Popen(
        "./test  --repetitions=%i lexicon=cs221 --mode=cs221 --quiet" %
        options.numgames,
        cwd=options.path,
        shell=True)

    sleep(1)
    print "done."

    for i in xrange(0, options.numgames):
        sleep(1)
        you = open(options.path + "/quackgame-%i.gcg" % i, 'r')
        me = open(options.path + "/cs221game-%i" % i, 'w+')

        b = board.Board()
        AI = agent.Agent(b,
                         quackle=True,
                         montecarlo=True,
                         heuristic=weights_MC)
        scoreYou = 0
        scoreMe = 0
        if not options.silent: print b
        OK = True
        yourMove = ""
        myMove = ""
        print "-------------------------------------------------"
        print "playing game %i of %i" % (i + 1, options.numgames)
        print "-------------------------------------------------"

        while True:
            y = you.readline().strip()
            #m = me.readline().strip()
            if y != yourMove and y != "":
                yourMove = y.split()
                #print "yourMove",yourMove
                player = yourMove[0]
                if player == "quackle":
                    if not options.silent: print "quackle move", yourMove
                    else: print "q",
                    orientation = yourMove[1]
                    loc = (int(yourMove[2]), int(yourMove[3]))
                    if len(yourMove) == 5:
                        word = yourMove[4].upper()
                        if not options.silent:
                            print "word, loc, score:", word, loc, orientation, scoreYou
                        scoreYou += b.insertWord(word,
                                                 loc,
                                                 orientation,
                                                 debug=False)
                    elif len(yourMove) > 5:  #abort
                        print yourMove
                        print "breaking"
                        break
                    else:
                        if not options.silent: print "quackle pass?"

                elif player == "cs221":
                    rack = yourMove[-1]
                    #wildcard tiles, add a vowel if we have none
                    #otherwise pick a random letter
                    wildcard = ''
                    if sum([1 for v in vowels if v in rack]) > 0:
                        wildcard = vowels[randint(0, 5)]
                    else:
                        wildcard = alphabet[randint(0, 25)]
                    rack.replace('?', wildcard)
                    move = AI.move([t for t in rack])
                    if not options.silent: print "\ncs221 move", move
                    else: print "c",
                    if move != None:  #write move to file
                        (word, pos, orientation, usedTiles, score) = move
                        if len(word) > 0:
                            row, col = pos
                            scoreMe += score
                            if wildcard in usedTiles:
                                word = list(word)
                                word[word.index(wildcard)] = wildcard.lower()
                                word = ''.join(word)
                                #print "wildcard used",wildcard,word,rack
                            me.write("%s %s %s %s %s\n" %
                                     (word, row, col, orientation, score))
                            me.flush()
                        else:  #tile exchange
                            tile = pos
                            me.write("%s %s\n" % ("exchange", tile))
                            me.flush()
                    else:  #write pass to file
                        me.write("pass\n")
                        me.flush()
                elif player == "Game":  #game over
                    print "Game over!"
                    break
                else:  #TODO: this shouldn't happen, fix it!
                    print "file %s in a bad state, ending" % you
                    me.write("end\n")
                    me.flush()
                    OK = False
                    break
                if not options.silent:
                    print b
                    print "CS221: %s, Quackle: %s" % (scoreMe, scoreYou)

示例#13

0

显示文件

import agent

agent = agent.Agent(load_model=True)
print agent.test(verbose=True)

示例#14

0

显示文件

文件： main.py 项目： SimonPfeifer/Ecosystem

    def on_init(self):

        self._running = True

        # Switches for features
        self.training = True
        self.testing = False

        self.whiskers_on = True
        self.smell_on = False

        self.progress_bar = False

        # Neural net diagnostics
        # General
        self.model_filepath_load = './models/testing/smell_lr_0001/model'
        self.counts_per_epoch = 100
        self.count = 0
        self.epoch = 0

        # Training
        self.model_filepath_save = './models/testing/smell_lr_0001/model'
        self.epoch_train = 200

        # Testing
        self.epoch_test = 200
        self.reward_total = 0
        self.loss_array = np.zeros(self.epoch_test)
        self.actions_array = np.zeros(self.counts_per_epoch * self.epoch_test)

        # Initialise the pygame display and define its surface parameters
        pg.init()
        self._display_surf = pg.display.set_mode(self.size,
                                                 pg.HWSURFACE | pg.DOUBLEBUF)

        # Add animals to the ecosystem
        if self.training:
            self.animals = np.array([
                agent.Agent(self._display_surf,
                            whiskers_on=self.whiskers_on,
                            smell_on=self.smell_on)
                for i in range(self.nanimals)
            ])
        else:
            self.animals = np.array([
                agent.Agent(
                    self._display_surf,
                    model_filepath=self.model_filepath_load + '_%03d' % i,
                    whiskers_on=self.whiskers_on,
                    smell_on=self.smell_on) for i in range(self.nanimals)
            ])

        # Add plants to the environment
        self.environment = environment.Environment(self._display_surf,
                                                   n_plants=self.nplants,
                                                   smell_on=self.smell_on)

        # Initialise agents
        self.on_render()
        for animal in self.animals:
            animal.state_previous = animal.sense(
                self._display_surf, smell_map=self.environment.smell_map)

示例#15

0

显示文件

    def __init__(self, **kwargs):
        Default.__init__(self, **kwargs)

        self.model = self.agent.model
        self.rlConfig = self.model.rlConfig

        if self.dump:
            try:
                import zmq
            except ImportError as err:
                print("ImportError: {0}".format(err))
                sys.exit("Install pyzmq to dump experiences")

            context = zmq.Context()

            self.socket = context.socket(zmq.PUSH)
            self.sock_addr = "tcp://%s:%d" % (self.dump,
                                              util.port(self.model.name))
            print("Connecting to " + self.sock_addr)
            self.socket.connect(self.sock_addr)

            self.dump_size = self.rlConfig.experience_length
            self.dump_state_actions = (self.dump_size *
                                       ssbm.SimpleStateAction)()

            self.dump_frame = 0
            self.dump_count = 0

        self.first_frame = True
        self.action_counter = 0
        self.toggle = False

        self.user = os.path.expanduser(self.user)

        self.state = ssbm.GameMemory()
        # track players 1 and 2 (pids 0 and 1)
        self.sm = state_manager.StateManager([0, 1])
        self.write_locations()

        if self.tag is not None:
            random.seed(self.tag)

        self.pids = [1]
        self.agents = {1: self.agent}
        self.characters = {1: self.agent.char or self.p2}

        reload_every = self.rlConfig.experience_length
        self.agent.reload_every = reload_every

        enemy = None
        if self.self_play:
            enemy = agent.Agent(reload_every=self.self_play * reload_every,
                                swap=True,
                                **kwargs)
        elif self.enemy:
            with open(self.enemy + 'agent', 'r') as f:
                import json
                enemy_kwargs = json.load(f)
            enemy_kwargs.update(reload_every=None,
                                swap=True,
                                dump=None,
                                path=self.enemy)
            enemy = agent.Agent(**enemy_kwargs)

        if enemy:
            self.pids.append(0)
            self.agents[0] = enemy
            self.characters[0] = enemy.char or self.p1

        self.menu_managers = {
            i: MenuManager(characters[c], pid=i)
            for i, c in self.characters.items()
        }

        print('Creating MemoryWatcher.')
        mwType = memory_watcher.MemoryWatcher
        if self.zmq:
            mwType = memory_watcher.MemoryWatcherZMQ
        self.mw = mwType(self.user + '/MemoryWatcher/MemoryWatcher')

        pipe_dir = self.user + '/Pipes/'
        print('Creating Pads at %s. Open dolphin now.' % pipe_dir)
        util.makedirs(self.user + '/Pipes/')

        paths = [pipe_dir + 'phillip%d' % i for i in self.pids]
        self.get_pads = util.async_map(Pad, paths)

        self.init_stats()

        # sets the game mode and random stage
        self.movie = movie.Movie(movie.endless_netplay +
                                 movie.stages[self.stage])

示例#16

0

显示文件

# ***************************************

env = gw.make_env(config.DEFAULT_ENV_NAME)
writer = SummaryWriter(comment="-" + config.DEFAULT_ENV_NAME)

# the main DQN neural network that we are going to train
net = dqn.DQN(env.observation_space.shape, env.action_space.n).to(device)
print(net)

target_net = dqn.DQN(env.observation_space.shape,
                     env.action_space.n).to(device)

# create the experience replay buffer of the required size and pass
#  it to the agent
buffer = xr.ExperienceReplay(config.replay_size)
agent = ag.Agent(env, buffer)

epsilon = config.eps_start

# create an optimizer, a buffer for full episode rewards, a counter of
# frames and a variable to track the best mean reward reached (because
# every time the mean reward beats the record, we will save the model
# in a file)
optimizer = optim.Adam(net.parameters(), lr=config.learning_rate)
total_rewards = []
frame_idx = 0

best_mean_reward = None

print(">>>Training starts at ", datetime.datetime.now())
while True:  # while not converged

示例#17

0

显示文件

文件： main_test0.py 项目： kitkat4/differential_wheeled_robot_simulator

#!/usr/bin/env python3
#coding: utf-8

import world
import robot
import agent

import numpy as np

import math
import sys

if __name__ == "__main__":

    world = world.World(10, 0.1)

    agent1 = agent.Agent(0.2, 0.0)
    agent2 = agent.Agent(0.2, 10.0 * math.pi / 180.0)

    robot1 = robot.Robot("robot_1", np.array([0.0, 0.0, 0.0]), 0.2, "black",
                         agent1)

    robot2 = robot.Robot("robot_2", np.array([1.0, 2.0, math.pi / 2.0]), 0.2,
                         "red", agent2)

    world.add_robot(robot1)
    world.add_robot(robot2)

    world.draw()

示例#18

0

显示文件

文件： keras_cnn.py 项目： Farbod909/cs175-dont-starve

                time.sleep(2)

    # Loop until mission starts:
    print("Waiting for the mission to start ", end=' ')
    world_state = agent_host.getWorldState()
    while not world_state.has_mission_begun:
        print(".", end="")
        time.sleep(0.1)
        world_state = agent_host.getWorldState()
        for error in world_state.errors:
            print("Error:", error.text)

    print("Mission running ", end=' ')

    agent_host.sendCommand("chat /time set day")
    agent = ag.Agent(agent_host)

    # Loop until mission ends:
    while not agent.finished:
        state = transform_farm(copy.deepcopy(agent.state))
        action = select_action(state, net)
        reward = np.array(agent.run(action + 1))
        memory.push(state, action, transform_farm(copy.deepcopy(agent.state)),
                    reward)

        sample = memory.sample(1)[0]



        target = sample.reward + discount_rate * \
                np.max(net.predict(np.expand_dims(sample.next_state, axis=0)))

示例#19

0

显示文件

文件： thread.py 项目： rjjjava/PycharmProjects

def run_worker(args):
    """Starts a worker thread that learns how to play the specified Atari game."""

    cluster_def = get_cluster_def(args.num_threads)
    config = tf.ConfigProto(intra_op_parallelism_threads=1,
                            inter_op_parallelism_threads=2)
    server = tf.train.Server(cluster_def,
                             'thread',
                             args.worker_index,
                             config=config)

    # Configure the supervisor.
    is_chief = args.worker_index == 0
    checkpoint_dir = os.path.join(args.log_dir, 'checkpoint')
    thread_dir = os.path.join(args.log_dir,
                              'thread-{}'.format(args.worker_index))
    summary_writer = tf.summary.FileWriter(thread_dir)
    global_variables_initializer = tf.global_variables_initializer()
    init_fn = lambda sess: sess.run(global_variables_initializer)

    # Initialize the model.
    env = environment.AtariWrapper(args.env_name, environment.TRAINING,
                                   args.action_space)
    player = agent.Agent(args.worker_index, env, args.render,
                         args.num_local_steps, args.learning_rate,
                         args.entropy_regularization, args.max_gradient_norm,
                         args.discount, summary_writer,
                         args.summary_update_interval)

    # Local copies of the model will not be saved.
    model_variables = [
        var for var in tf.global_variables()
        if not var.name.startswith('local')
    ]

    supervisor = tf.train.Supervisor(
        ready_op=tf.report_uninitialized_variables(model_variables),
        is_chief=is_chief,
        init_op=tf.variables_initializer(model_variables),
        logdir=checkpoint_dir,
        summary_op=None,
        saver=tf.train.Saver(model_variables),
        global_step=player.global_step,
        save_summaries_secs=30,
        save_model_secs=30,
        summary_writer=summary_writer,
        init_fn=init_fn)

    config = tf.ConfigProto(device_filters=[
        '/job:master', '/job:thread/task:{}/cpu:0'.format(args.worker_index)
    ])

    LOGGER.info('Starting worker. This may take a while.')
    with supervisor.managed_session(server.target,
                                    config=config) as sess, sess.as_default():
        global_step = 0
        while not supervisor.should_stop(
        ) and global_step < args.num_global_steps:
            global_step = player.train(sess)

    supervisor.stop()
    LOGGER.info('Stopped after %d global steps.', player.global_step)

示例#20

0

显示文件

 def post(self):
     board = tornado.escape.json_decode(self.request.body)
     move = agent.Agent(board).next_move()
     self.write(tornado.escape.json_encode(move))

示例#21

0

显示文件

文件： training_gridsearch.py 项目： gitBress/NNDL

# Cross-validation
for episodes in params['episodes']:
    for epsilon_start in params['epsilon_start']:
        for epsilon_end in params['epsilon_end']:
            for alpha_fixed in params['alpha_fixed']:
                if (alpha_fixed):
                    for alpha in params['alpha']:
                        # alpha and epsilon profile
                        alpha = np.ones(episodes) * alpha
                        epsilon = np.linspace(epsilon_start, epsilon_end,
                                              episodes)

                        # initialize the agent
                        learner = agent.Agent((x * y),
                                              5,
                                              discount,
                                              max_reward=1,
                                              softmax=softmax,
                                              sarsa=sarsa)
                        # perform the training
                        rewards = []
                        for index in range(0, episodes):
                            # start from a random state (but avoid barrier and mountain)
                            barrier_x = [0, 1, 2, 3, 4, 6, 7, 8, 9]
                            barrier_y = [4, 5]
                            while (True):
                                initial = [
                                    np.random.randint(0, x),
                                    np.random.randint(0, y)
                                ]
                                if (not (initial[0] in barrier_y
                                         and initial[1] in barrier_x)):

示例#22

0

显示文件

        print "----------------"

        if game.done:
            break

        time.sleep(0.25)


#############

if __name__ == "__main__":

    game = game.Game(AREA_WIDTH, AREA_HEIGHT)
    user_play(game)

    agent = agent.Agent(ACTION_SIZE, DQN_MEMSIZE)

    stats = stats.Stats()

    score_sum = 0.0
    time_sum = 0.0
    score_cnt = 0.0
    steps_wo_r = 0
    quality_max = 0.0

    for e in range(EPISODES):
        game.reset()
        state = game.get_state()
        for t in range(MAX_STEPS):
            action = agent.act(state)
            key = action2key[game.key][action]

示例#23

0

显示文件

文件： cadrl_node.py 项目： billdavidzyb/cadrl_ros

    def cbComputeActionGA3C(self, event):
        feasible_actions = copy.deepcopy(self.feasible_actions)
        if self.operation_mode.mode != self.operation_mode.NN:
            print 'Not in NN mode'
            print self.operation_mode.mode
            return
        if len(feasible_actions.angles) == 0 \
            or len(feasible_actions.path_lengths)==0:
            print 'Invalid Feasible Actions'
            # print feasible_actions
            return

        # construct agent_state
        x = self.pose.pose.position.x
        y = self.pose.pose.position.y
        v_x = self.vel.x
        v_y = self.vel.y
        radius = self.veh_data['radius']
        turning_dir = 0.0
        heading_angle = self.psi
        pref_speed = self.veh_data['pref_speed']
        goal_x = self.goal.pose.position.x
        goal_y = self.goal.pose.position.y
        # in case current speed is larger than desired speed
        v = np.linalg.norm(np.array([v_x, v_y]))
        if v > pref_speed:
            v_x = v_x * pref_speed / v
            v_y = v_y * pref_speed / v

        host_agent = agent.Agent(x, y, goal_x, goal_y, radius, pref_speed,
                                 heading_angle, 0)
        host_agent.vel_global_frame = np.array([v_x, v_y])
        # host_agent.print_agent_info()

        other_agents_state = copy.deepcopy(self.other_agents_state)
        obs = host_agent.observe(other_agents_state)[1:]
        obs = np.expand_dims(obs, axis=0)
        # print "obs:", obs
        predictions = self.nn.predict_p(obs, None)[0]
        # print "predictions:", predictions
        # print "best action index:", np.argmax(predictions)
        raw_action = copy.deepcopy(self.actions[np.argmax(predictions)])
        action = np.array(
            [pref_speed * raw_action[0],
             util.wrap(raw_action[1] + self.psi)])
        # print "raw_action:", raw_action
        # print "action:", action

        # feasible_actions
        angles = (np.array(feasible_actions.angles) + np.pi) % (2 *
                                                                np.pi) - np.pi
        max_ranges = np.array(feasible_actions.max_speeds) - 0.3
        path_lengths = np.array(feasible_actions.path_lengths)
        # Sort the feasible actions by increasing angle
        order_inds = np.argsort(angles)
        max_ranges = max_ranges[order_inds]
        angles = angles[order_inds]
        path_lengths = path_lengths[order_inds]

        # Find which index corresponds to straight in front, and 90 deg each side
        zero_ind = np.digitize([self.psi + 0.01], angles) - 1
        self.d_min = max_ranges[zero_ind]
        # self.d_min = 100.0

        # if close to goal
        kp_v = 0.5
        kp_r = 1

        if host_agent.dist_to_goal < 2.0:  # and self.percentComplete>=0.9:
            # print "somewhat close to goal"
            pref_speed = max(
                min(kp_v * (host_agent.dist_to_goal - 0.1), pref_speed), 0.0)
            action[0] = min(raw_action[0], pref_speed)
            turn_amount = max(min(kp_r * (host_agent.dist_to_goal - 0.1), 1.0),
                              0.0) * raw_action[1]
            action[1] = util.wrap(turn_amount + self.psi)
        if host_agent.dist_to_goal < 0.3:
            self.stop_moving_flag = True
        else:
            self.stop_moving_flag = False

        # print 'chosen action (rel angle)', action[0], action[1]
        self.update_action(action)

示例#24

0

显示文件

import importlib

parser = argparse.ArgumentParser()
parser.add_argument("dir1", type=str, help="Directory to agent 1 to be tested.")
parser.add_argument("dir2", type=str, default=None, nargs="?",
                    help="Directory to agent 2 to be tested. If empty, SimpleAI is used instead.")
parser.add_argument("--render", "-r", action="store_true", help="Render the competition.")
parser.add_argument("--games", "-g", type=int, default=100, help="number of games.")

args = parser.parse_args()

sys.path.insert(0, args.dir1)
import agent
orig_wd = os.getcwd()
os.chdir(args.dir1)
agent1 = agent.Agent()
agent1.load_model()
os.chdir(orig_wd)
del sys.path[0]

if args.dir2:
    sys.path.insert(0, args.dir2)
    importlib.reload(agent)
    os.chdir(args.dir2)
    agent2 = agent.Agent()
    agent2.load_model()
    os.chdir(orig_wd)
    del sys.path[0]
else:
    agent2 = None

示例#25

0

显示文件

 def __init__(self, instrument):
     self.instrument = instrument
     self.agent = agent.Agent(None, None, [instrument])

示例#26

0

显示文件

文件： train.py 项目： lichnak/drl-breakout

import agent

agent = agent.Agent()
agent.train()

示例#27

0

显示文件

    def __init__(
            self,
            num_nodes=100,
            avg_node_degree=3,
            # taipei : 1.92
            # telaviv : 2.16
            # tallinn : 2.20,
            engagement=0.49,
            trustability=0.21,
            influenceability=0.53,
            recovery=0.63,
            experience=1,
            initial_opinion=0,
            opinion=0,
            public_sector_opinion=1,
            corpo_opinion=1,
            startup_opinion=1,
            academic_opinion=-1,
            civil_opinion=-1,
            media_opinion=-1):
        # set network layout
        self.num_nodes = num_nodes
        prob = avg_node_degree / self.num_nodes
        self.G = nx.erdos_renyi_graph(n=self.num_nodes, p=prob)
        # set space and time of the model
        self.grid = NetworkGrid(self.G)
        self.schedule = RandomActivation(self)
        # set model parameters
        self.engagement = engagement
        self.trustability = trustability
        self.influenceability = influenceability
        self.recovery = recovery
        self.experience = experience
        self.initial_opinion = initial_opinion
        self.opinion = initial_opinion
        self.public_sector_opinion = public_sector_opinion
        self.corpo_opinion = corpo_opinion
        self.startup_opinion = startup_opinion
        self.academic_opinion = academic_opinion
        self.civil_opinion = civil_opinion
        self.media_opinion = media_opinion
        # set data collection
        self.datacollector = DataCollector({
            "Negative":
            num_negative,
            "Neutral":
            num_neutral,
            "Positive":
            num_positive,
            "Total Engagement":
            total_engagement,
            "Total Trustability":
            total_trustability,
            "Total Recovery":
            total_recovery,
            "Total Experience":
            total_experience,
        })

        # create agents with average parameters taken on #city tweets
        for i, node in enumerate(self.G.nodes()):
            a = agent.Agent(
                i,
                self,
                self.engagement,
                self.trustability,
                self.influenceability,
                self.recovery,
                self.experience,
                self.initial_opinion,  # fixed by interface
                self.opinion)
            self.schedule.add(a)
            # add the undetermined agents to the network
            self.grid.place_agent(a, node)

        # create 1 representative of each stakeholder category
        public_sector = self.random.sample(self.G.nodes(), 1)
        for a in self.grid.get_cell_list_contents(public_sector):
            a.engagement = 0.57
            a.trustability = 0.53
            a.influenceability = 0.59
            a.recovery = 0.70
            a.experience = 1
            a.initial_opinion = public_sector_opinion  # fixed by interface
            a.opinion = a.initial_opinion

        corporate = self.random.sample(self.G.nodes(), 1)
        for a in self.grid.get_cell_list_contents(corporate):
            a.engagement = 0.75
            a.trustability = 0.49
            a.influenceability = 0.68
            a.recovery = 0.73
            a.experience = 1
            a.initial_opinion = corpo_opinion  # fixed by interface
            a.opinion = a.initial_opinion

        startup = self.random.sample(self.G.nodes(), 1)
        for a in self.grid.get_cell_list_contents(startup):
            a.engagement = 0.69
            a.trustability = 0.29
            a.influenceability = 0.68
            a.recovery = 0.97
            a.experience = 1
            a.initial_opinion = startup_opinion  # fixed by interface
            a.opinion = a.initial_opinion

        academic = self.random.sample(self.G.nodes(), 1)
        for a in self.grid.get_cell_list_contents(academic):
            a.engagement = 0.49
            a.trustability = 0.20
            a.influenceability = 0.65
            a.recovery = 0.75
            a.experience = 1
            a.initial_opinion = academic_opinion  # fixed by interface
            a.opinion = a.initial_opinion

        civil = self.random.sample(self.G.nodes(), 1)
        for a in self.grid.get_cell_list_contents(civil):
            a.engagement = 0.43
            a.trustability = 0.21
            a.influenceability = 0.69
            a.recovery = 0.72
            a.experience = 1
            a.initial_opinion = civil_opinion  # fixed by interface
            a.opinion = a.initial_opinion

        media = self.random.sample(self.G.nodes(), 1)
        for a in self.grid.get_cell_list_contents(media):
            a.engagement = 0.50
            a.trustability = 0.23
            a.influenceability = 0.65
            a.recovery = 0.71
            a.experience = 1
            a.initial_opinion = media_opinion  # fixed by interface
            a.opinion = a.initial_opinion

        self.running = True
        self.datacollector.collect(self)
        print('Finished initialising model, network has %s nodes' %
              self.G.nodes)
        nx.draw_networkx(self.G)

示例#28

0

显示文件

文件： run.py 项目： uusama/RL

    # print("acc_reward:", acc_reward)
    return acc_reward, i, loss


n_actions = env.action_space.n
state_dim = env.observation_space.high.shape[0]
print("n_actions:", n_actions, "state_dim", state_dim)
batch_size = 64
checkpoint_path = "/tmp/my_dqn.ckpt"
qvalue_model = Qvalue.Qvalue(state_dim=state_dim,
                             n_actions=n_actions,
                             batch_size=64,
                             h1_n=512,
                             h2_n=256,
                             checkpoint_path=checkpoint_path)
agent = agent.Agent(actions=n_actions, q_value_model=qvalue_model)
memory = memory.RandomMemory(max_size=1024)

discount = .95
rewards = []
episodes_end = []
losses = []
eps = .9
reward, episode_end, loss = 0., 0., 0.
render = False
print(reward)
while reward < 20.:
    for episode_i in range(1000):
        # print("episode_i:", episode_i)
        if episode_i % 100 == 0:
            eps = epslons[int(episode_i / 100)]

示例#29

0

显示文件

文件： NFSP_DouDiZhu.py 项目： tianxiaohe103/doudizhu

        self.ACTION_NUM = agent.dim_actions
        self.STATE_NUM = agent.dim_states
        self.RLMemory_num = 20
        self.SLMemory_num = 20
        self.RLMemory = deque(maxlen=self.RLMemory_num)
        self.SLMemory = deque(maxlen=self.SLMemory_num)
        # self.Q = DQN.DQN_DouDiZhu(self.ACTION_NUM, self.STATE_NUM, self.RLMemory, self.RLMemory_num, self.player)
        # self.Pi = SLN.Pi(self.ACTION_NUM, self.STATE_NUM, self.SLMemory, self.SLMemory_num, self.player)
        self.EPSILON = 0.06
        self.ETA = 0.1
        self.EPISODE_NUM = 5000000
        self.Q_enable = False


if __name__ == '__main__':
    agent = ag.Agent(models=["rl", "rl", "rl"])
    runAgent1 = RunAgent(agent, 'player1')
    runAgent2 = RunAgent(agent, 'player2')
    runAgent3 = RunAgent(agent, 'player3')
    Q = DQN.DQN_DouDiZhu(runAgent1.ACTION_NUM, runAgent1.STATE_NUM, runAgent1.RLMemory, runAgent1.RLMemory_num)
    Pi = SLN.Pi(runAgent1.ACTION_NUM, runAgent1.STATE_NUM, runAgent1.SLMemory, runAgent1.SLMemory_num)

    for i in range(runAgent1.EPISODE_NUM):
        print('=========== episode:', i, '============')
        if random.random() < runAgent1.ETA:
            runAgent1.Q_enable = True
            print('player1 ' + 'Q network is working')
        else:
            runAgent1.Q_enable = False
            print('player1 ' + 'Pi network is working')

示例#30

0

显示文件

    def initialise_particle_data_set(self, unknown_agent, sim):
        # 1. Generating initial data (particles)

        none_count, none_threshold = 0, 500
        x, y, direction = unknown_agent.position[0], unknown_agent.position[
            1], unknown_agent.direction
        tmp_agent = agent.Agent(x, y, direction, self.type, -1)

        tmp_agent.set_parameters(sim, sim.agents[0].level,
                                 sim.agents[0].radius, sim.agents[0].angle)

        # 4. Defining route
        tmp_sim = sim.copy()
        tmp_agent = tmp_sim.move_a_agent(tmp_agent)
        target = tmp_agent.get_memory()
        route_actions = tmp_agent.route_actions
        particle = {}

        # 5. Adding to the data set
        if route_actions is not None:
            particle['target'] = target
            particle['choose_target_state'] = tmp_sim
            particle['parameter'] = [
                sim.agents[0].level, sim.agents[0].radius, sim.agents[0].angle
            ]
            particle['succeeded_steps'] = 1
            particle['failed_steps'] = 0
            particle['index'] = len(self.data_set)
            particle['cts_type'] = 'e'
            self.data_set.append(particle)

        while len(self.data_set) < self.generated_data_number:
            if none_count == none_threshold:
                break
            else:
                particle = {}

            # 2. Random uniform parameter sampling
            tmp_radius = random.uniform(radius_min, radius_max)  # 'radius'
            tmp_angle = random.uniform(angle_min, angle_max)  # 'angle'
            tmp_level = random.uniform(level_min, level_max)  # 'level'

            # 3. Creating the temporary agent
            x, y, direction = unknown_agent.position[
                0], unknown_agent.position[1], unknown_agent.direction
            tmp_agent = agent.Agent(x, y, direction, self.type, -1)
            tmp_agent.set_parameters(sim, tmp_level, tmp_radius, tmp_angle)

            # 4. Calculating route
            tmp_sim = sim.copy()
            tmp_agent = tmp_sim.move_a_agent(tmp_agent)
            target = tmp_agent.get_memory()
            route_actions = tmp_agent.route_actions

            # 5. Adding to the data set
            if route_actions is not None:
                particle['target'] = target
                particle['choose_target_state'] = tmp_sim
                particle['parameter'] = [tmp_level, tmp_radius, tmp_angle]
                particle['succeeded_steps'] = 1
                particle['failed_steps'] = 0
                particle['index'] = len(self.data_set)
                particle['cts_type'] = 'e'
                self.data_set.append(particle)
            else:
                none_count += 1