Python DDPG примеры, model.ddpg.ddpg.DDPG Python примеры использования

Пример #1

0

Показать файл

    def restore(self):
        self.start_session()
        nb_classes = len(self.testing_stocks) + 1
        action_dim, state_dim = [nb_classes], [nb_classes, self.window_length]
        variable_scope = get_variable_scope(self.window_length,
                                            self.predictor_type,
                                            self.use_batch_norm)
        with tf.variable_scope(variable_scope):
            actor = StockActor(
                sess=self.sess,
                feature_number=self.feature_number,
                state_dim=state_dim,
                action_dim=action_dim,
                action_bound=self.action_bound,
                learning_rate=self.config["training"]["actor learning rate"],
                decay_rate=self.config["training"]["actor decay rate"],
                decay_steps=self.config["training"]["actor decay steps"],
                weight_decay=self.config["training"]["actor weight decay"],
                tau=self.tau,
                batch_size=self.batch_size,
                predictor_type=self.predictor_type,
                use_batch_norm=self.use_batch_norm,
                activation_function=self.activation_function)

            critic = StockCritic(
                sess=self.sess,
                feature_number=self.feature_number,
                state_dim=state_dim,
                action_dim=action_dim,
                tau=self.tau,
                learning_rate=self.config["training"]["critic learning rate"],
                decay_rate=self.config["training"]["critic decay rate"],
                decay_steps=self.config["training"]["critic decay steps"],
                weight_decay=self.config["training"]["critic weight decay"],
                num_actor_vars=actor.get_num_trainable_vars(),
                predictor_type=self.predictor_type,
                use_batch_norm=self.use_batch_norm,
                activation_function=self.activation_function)

            actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))
            print(self.model_save_path)

            ddpg_model = DDPG(env=None,
                              sess=self.sess,
                              actor=actor,
                              critic=critic,
                              actor_noise=actor_noise,
                              obs_normalizer=obs_normalizer,
                              config=self.config,
                              model_save_path=self.model_save_path,
                              summary_path=self.summary_path)
            ddpg_model.initialize(load_weights=True, verbose=False)
            self.model = ddpg_model

Пример #2

0

Показать файл

Файл: stock_trading.py Проект: ylcoder/drl-portfolio-management

def predict_next_day(env, sess, actor, critic, actor_noise, norm_func=None):
    ddpg_model = DDPG(env, sess, actor, critic, actor_noise, obs_normalizer=norm_func,
                      config_file='config/stock.json', model_save_path=model_save_path,
                      summary_path=summary_path)
    ddpg_model.initialize(load_weights=True)
    env = PortfolioEnv(last_history, target_stocks, steps=0, window_length=window_length, start_idx=0, trading_cost=0.0, sample_start_date='2019-12-26')
    print("data=", last_history)
    observation = env.get_last_observation()
    # print("observation before normalization={}, shape={}".format(observation, observation.shape))
    # observation = observation[:, :, 3] / observation[:, :, 0]
    # print("observation after normalization={}, shape={}".format(observation, observation.shape))
    # observation = np.expand_dims(observation, axis=-1)
    # print("observation after dims expand={}, shape={}".format(observation, observation.shape))
    action = ddpg_model.predict_single(observation)
    # action = np.squeeze(action, axis=0)
    # observation, _, done, _ = env.step(action)
    print("action=", action)

Пример #3

0

Показать файл

                                     use_batch_norm)
    summary_path = get_result_path(window_length, predictor_type,
                                   use_batch_norm)

    variable_scope = get_variable_scope(window_length, predictor_type,
                                        use_batch_norm)

    with tf.variable_scope(variable_scope):
        sess = tf.Session()
        actor = StockActor(sess, state_dim, action_dim, action_bound, 1e-4,
                           tau, batch_size, predictor_type, use_batch_norm)
        critic = StockCritic(sess=sess,
                             state_dim=state_dim,
                             action_dim=action_dim,
                             tau=1e-3,
                             learning_rate=1e-3,
                             num_actor_vars=actor.get_num_trainable_vars(),
                             predictor_type=predictor_type,
                             use_batch_norm=use_batch_norm)
        ddpg_model = DDPG(env,
                          sess,
                          actor,
                          critic,
                          actor_noise,
                          obs_normalizer=obs_normalizer,
                          config_file='config/stock.json',
                          model_save_path=model_save_path,
                          summary_path=summary_path)
        ddpg_model.initialize(load_weights=False)
        ddpg_model.train()

Пример #4

0

Показать файл

if __name__ == '__main__':
    env = gym.make('Pendulum-v0')
    sess = tf.Session()
    action_dim = [1]
    state_dim = [3]
    batch_size = 64
    action_bound = 2.
    tau = 1e-3
    actor = PendulumActor(sess, state_dim, action_dim, action_bound, 1e-4, tau,
                          batch_size)
    critic = PendulumCritic(sess=sess,
                            state_dim=state_dim,
                            action_dim=action_dim,
                            tau=1e-3,
                            learning_rate=1e-3,
                            num_actor_vars=actor.get_num_trainable_vars())
    actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

    ddpg_model = DDPG(env,
                      sess,
                      actor,
                      critic,
                      actor_noise,
                      action_processor=None,
                      config_file='config/pendulum.json',
                      model_save_path='weights/pendulum/checkpoint.ckpt',
                      summary_path='results/pendulum/')
    ddpg_model.initialize(load_weights=True)
    # ddpg_model.train()
    test_model(env, ddpg_model, 10)

Пример #5

0

Показать файл

Файл: cart_pole-v0.py Проект: ylcoder/drl-portfolio-management


if __name__ == '__main__':
    sess = tf.Session()
    env = gym.make('CartPole-v0')
    action_dim = [2]
    state_dim = [4]
    batch_size = 64
    tau = 1e-3
    actor = CartPoleActor(sess, state_dim, action_dim, 1., 1e-4, tau,
                          batch_size)
    critic = CartPoleCritic(sess=sess,
                            state_dim=state_dim,
                            action_dim=action_dim,
                            tau=1e-3,
                            learning_rate=1e-3,
                            num_actor_vars=actor.get_num_trainable_vars())
    actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

    ddpg_model = DDPG(env,
                      sess,
                      actor,
                      critic,
                      actor_noise,
                      action_processor=np.argmax,
                      model_save_path='weights/cartpole/checkpoint.ckpt',
                      summary_path='results/cartpole/')
    ddpg_model.initialize(load_weights=False)
    ddpg_model.train()
    test_model(env, ddpg_model, 10)

Пример #6

0

Показать файл

def visualise_Data():

    with open('utils/datasets/all_eqw', 'rb') as fr:
        history = pickle.load(fr, encoding='latin1')

    with open('utils/datasets/stock_names', 'rb') as fr:
        abbreviation = pickle.load(fr, encoding='latin1')

    history = history[:, :, :4]
    num_training_time = history.shape[1]
    num_testing_time = history.shape[1]
    window_length = 3

    # get target history
    target_stocks = ['BLK UN EQUITY', 'GS UN EQUITY', 'USB UN EQUITY']
    target_history = np.empty(shape=(len(target_stocks), num_training_time,
                                     history.shape[2]))
    for i, stock in enumerate(target_stocks):
        target_history[i] = history[
            abbreviation.index(stock), :num_training_time, :]
        print(target_history[i])

    # collect testing data
    testing_stocks = [
        'AMG UN EQUITY',
        'BRK/B UN EQUITY',
        'MTB UN EQUITY',
    ]
    testing_history = np.empty(shape=(len(testing_stocks), num_testing_time,
                                      history.shape[2]))
    for i, stock in enumerate(target_stocks):
        testing_history[i] = history[
            abbreviation.index(stock), :num_testing_time, :]

    # dataset for 16 stocks by splitting timestamp
    history, abbreviation = read_stock_history(
        filepath='utils/datasets/stocks_history_target.h5')
    with open('utils/datasets/all_eqw', 'rb') as fr:
        history = pickle.load(fr, encoding='latin1')

    with open('utils/datasets/stock_names', 'rb') as fr:
        abbreviation = pickle.load(fr, encoding='latin1')
    history = history[:, :, :4]

    # 16 stocks are all involved. We choose first 3 years as training data
    num_training_time = 1095
    target_stocks = abbreviation
    target_history = np.empty(shape=(len(target_stocks), num_training_time,
                                     history.shape[2]))

    for i, stock in enumerate(target_stocks):
        target_history[i] = history[
            abbreviation.index(stock), :num_training_time, :]
    print((target_history.shape))

    # and last 2 years as testing data.
    testing_stocks = abbreviation
    testing_history = np.empty(shape=(len(testing_stocks),
                                      history.shape[1] - num_training_time,
                                      history.shape[2]))
    for i, stock in enumerate(testing_stocks):
        testing_history[i] = history[abbreviation.index(stock),
                                     num_training_time:, :]

    print((testing_history.shape))

    nb_classes = len(target_stocks) + 1
    print(target_history.shape)
    print(testing_history.shape)

    if True:
        date_list = [index_to_date(i) for i in range(target_history.shape[1])]
        x = range(target_history.shape[1])
        for i in range(len(target_stocks)):
            plt.figure(i)
            plt.plot(
                x, target_history[i, :,
                                  1])  # open, high, low, close = [0, 1, 2, 3]
            plt.xticks(x[::200], date_list[::200], rotation=30)
            plt.title(target_stocks[i])
            plt.show()

    # common settings
    batch_size = 64
    action_bound = 1.
    tau = 1e-3
    models = []
    model_names = []
    window_length_lst = [3, 7, 14, 21]
    predictor_type_lst = ['cnn', 'lstm']
    use_batch_norm = True

    for window_length in window_length_lst:
        name = 'imit_LSTM%3A window = {}'.format(window_length)
        model_name = 'imitation_lstm_window_{}'.format(window_length)
        model_names.append(model_name)
        # instantiate LSTM model
        lstm_model = StockLSTM(nb_classes,
                               window_length,
                               weights_file='weights/' + name + '.h5')
        lstm_model.build_model(load_weights=True)
        models.append(lstm_model)

        name = 'imit_CNN%3A window = {}'.format(window_length)
        model_name = 'imitation_cnn_window_{}'.format(window_length)
        model_names.append(model_name)
        # instantiate CNN model
        cnn_model = StockCNN(nb_classes,
                             window_length,
                             weights_file='weights/' + name + '.h5')
        cnn_model.build_model(load_weights=True)
        models.append(cnn_model)

    # instantiate environment, 3 stocks, with trading cost, window_length 3, start_date sample each time

    for window_length in window_length_lst:
        for predictor_type in predictor_type_lst:
            name = 'DDPG_window_{}_predictor_{}'.format(
                window_length, predictor_type)
            model_names.append(name)
            tf.reset_default_graph()
            sess = tf.Session()
            tflearn.config.init_training_mode()
            action_dim = [nb_classes]
            state_dim = [nb_classes, window_length]
            variable_scope = get_variable_scope(window_length, predictor_type,
                                                use_batch_norm)
            with tf.variable_scope(variable_scope):
                actor = StockActor(sess, state_dim, action_dim, action_bound,
                                   1e-4, tau, batch_size, predictor_type,
                                   use_batch_norm)
                critic = StockCritic(
                    sess=sess,
                    state_dim=state_dim,
                    action_dim=action_dim,
                    tau=1e-3,
                    learning_rate=1e-3,
                    num_actor_vars=actor.get_num_trainable_vars(),
                    predictor_type=predictor_type,
                    use_batch_norm=use_batch_norm)
                actor_noise = OrnsteinUhlenbeckActionNoise(
                    mu=np.zeros(action_dim))

                model_save_path = get_model_path(window_length, predictor_type,
                                                 use_batch_norm)
                summary_path = get_result_path(window_length, predictor_type,
                                               use_batch_norm)

                ddpg_model = DDPG(None,
                                  sess,
                                  actor,
                                  critic,
                                  actor_noise,
                                  obs_normalizer=obs_normalizer,
                                  config_file='config/stock.json',
                                  model_save_path=model_save_path,
                                  summary_path=summary_path)
                ddpg_model.initialize(load_weights=True, verbose=False)
                models.append(ddpg_model)

    env = MultiActionPortfolioEnv(target_history,
                                  target_stocks,
                                  model_names[8:],
                                  steps=500,
                                  sample_start_date='2012-10-30')

    test_model_multiple(env, models[8:])

Пример #7

0

Показать файл

    def train_model(self):
        print("training period is from %s to %s"%(self.training_start_time,self.training_end_time))
        
        self.target_history, _, self.training_start_time, self.training_end_time \
                                = fetch_data(start_time = self.training_start_time, 
                                             end_time = self.training_end_time, 
                                             window_length = self.window_length,
                                             stocks = self.target_stocks)
        
        print("total training example is %d" %(self.training_start_time-self.training_end_time))
        print("self.target_history shape is", self.target_history.shape)  
        if self.config["training"]["max_step"] <= 0:
            self.config["training"]["max_step"] = self.target_history.shape[1] - self.window_length-1
            # print("max_steps is", self.target_history.shape[1] - self.window_length-1)

        env = PortfolioEnv(history = self.target_history, 
                           abbreviation = self.target_stocks, 
                           steps = self.config["training"]["max_step"],
                           trading_cost = self.trading_cost,
                           time_cost = self.time_cost, 
                           window_length = self.window_length,
                           reward_function = self.config["input"]["reward_function"])

        actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(self.action_dim))
        
        variable_scope = get_variable_scope(self.window_length, self.predictor_type, self.use_batch_norm)
        if self.config["device"] == "cpu":
            device_res = "/cpu:0"
        else:
            device_res = "/gpu:0"
        print("device is ", device_res)
        with tf.device(device_res):
            self.sess = self.start_session()
            with tf.variable_scope(variable_scope):
                actor = StockActor(sess = self.sess, 
                                   feature_number = self.feature_number,
                                   state_dim = self.state_dim, 
                                   action_dim = self.action_dim, 
                                   action_bound = self.action_bound, 
                                   learning_rate = self.config["training"]["actor learning rate"], 
                                   decay_rate = self.config["training"]["actor decay rate"],
                                   decay_steps = self.config["training"]["actor decay steps"],
                                   weight_decay = self.config["training"]["actor weight decay"],
                                   tau = self.tau, 
                                   batch_size = self.batch_size,
                                   predictor_type = self.predictor_type, 
                                   use_batch_norm = self.use_batch_norm,
                                   activation_function = self.activation_function)
                critic = StockCritic(sess = self.sess, 
                                     feature_number = self.feature_number,
                                     state_dim = self.state_dim, 
                                     action_dim = self.action_dim, 
                                     tau = self.tau,
                                     learning_rate = self.config["training"]["critic learning rate"], 
                                     decay_rate = self.config["training"]["critic decay rate"],
                                     decay_steps = self.config["training"]["critic decay steps"],
                                     weight_decay = self.config["training"]["critic weight decay"],
                                     num_actor_vars = actor.get_num_trainable_vars(),
                                     predictor_type = self.predictor_type, 
                                     use_batch_norm = self.use_batch_norm,
                                     activation_function = self.activation_function)
                ddpg_model = DDPG(env = env,
                                  sess = self.sess,
                                  actor = actor, 
                                  critic = critic, 
                                  actor_noise = actor_noise, 
                                  obs_normalizer = obs_normalizer,
                                  config = self.config, 
                                  model_save_path = self.model_save_path,
                                  summary_path = self.summary_path)
                ddpg_model.initialize(load_weights = False)
                ddpg_model.train()
                self.close_session()
        return self.train_id

Пример #8

0

Показать файл

Файл: stock_trading.py Проект: yrxwin/ddpg_model_intern

        actor = StockActor(
            sess=sess,
            state_dim=state_dim,
            action_dim=action_dim,
            action_bound=action_bound,
            learning_rate=config["training"]["actor learning rate"],
            tau=tau,
            batch_size=batch_size,
            predictor_type=predictor_type,
            use_batch_norm=use_batch_norm)
        critic = StockCritic(
            sess=sess,
            state_dim=state_dim,
            action_dim=action_dim,
            tau=tau,
            learning_rate=config["training"]["critic learning rate"],
            num_actor_vars=actor.get_num_trainable_vars(),
            predictor_type=predictor_type,
            use_batch_norm=use_batch_norm)
        ddpg_model = DDPG(env=env,
                          sess=sess,
                          actor=actor,
                          critic=critic,
                          actor_noise=actor_noise,
                          obs_normalizer=obs_normalizer,
                          config=config,
                          model_save_path=model_save_path,
                          summary_path=summary_path)
        ddpg_model.initialize(load_weights=False)
        ddpg_model.train()

Пример #9

0

Показать файл

        critic = StockCritic(sess=sess,
                             state_dim=state_dim,
                             action_dim=action_dim,
                             tau=critic_tau,
                             learning_rate=1e-3,
                             num_actor_vars=actor.get_num_trainable_vars(),
                             predictor_type=predictor_type,
                             use_batch_norm=use_batch_norm,
                             use_previous=True,
                             auxiliary_commission=auxil_commission)
        ddpg_model = DDPG(train_env,
                          sess,
                          actor,
                          critic,
                          actor_noise,
                          obs_normalizer=obs_normalizer,
                          gamma=gamma,
                          training_episodes=training_episodes,
                          max_rollout_steps=max_rollout_steps,
                          buffer_size=buffer_size,
                          seed=seed,
                          batch_size=batch_size,
                          model_save_path=model_save_path,
                          summary_path=summary_path,
                          infer_path=infer_path,
                          infer_train_env=infer_train_env,
                          infer_test_env=infer_test_env,
                          learning_steps=learning_steps)
        ddpg_model.initialize(load_weights=load_weights, verbose=False)
        ddpg_model.train()

Пример #10

0

Показать файл

    def init_model(self):
        # common settings
        batch_size = 64
        action_bound = 1.
        tau = 1e-3

        models = []
        model_names = []
        window_length_lst = [3, 7, 14, 21]
        predictor_type_lst = ['cnn' ,'lstm']
        use_batch_norm = True

        nb_classes=17

        for window_length in window_length_lst:
            name = 'imit_LSTM%3A window = {}'.format(window_length)
            model_name = 'imitation_lstm_window_{}'.format(window_length)
            model_names.append(model_name)
            # instantiate LSTM model
            lstm_model = StockLSTM(nb_classes, window_length,
                                   weights_file='weights/' + name + '.h5')
            lstm_model.build_model(load_weights=True)
            models.append(lstm_model)

            name = 'imit_CNN%3A window = {}'.format(window_length)
            model_name = 'imitation_cnn_window_{}'.format(window_length)
            model_names.append(model_name)
            # instantiate CNN model
            cnn_model = StockCNN(nb_classes, window_length,
                                 weights_file='weights/' + name + '.h5')
            cnn_model.build_model(load_weights=True)
            models.append(cnn_model)

        # instantiate environment, 3 stocks, with trading cost, window_length 3, start_date sample each time
        for window_length in window_length_lst:
            for predictor_type in predictor_type_lst:
                name = 'DDPG_window_{}_predictor_{}'.format(window_length,
                                                            predictor_type)
                model_names.append(name)
                tf.reset_default_graph()
                sess = tf.Session()
                tflearn.config.init_training_mode()
                action_dim = [nb_classes]
                state_dim = [nb_classes, window_length]
                variable_scope = get_variable_scope(window_length,
                                                    predictor_type,
                                                    use_batch_norm)
                with tf.variable_scope(variable_scope):
                    actor = StockActor(sess, state_dim, action_dim,
                                       action_bound, 1e-4, tau, batch_size,
                                       predictor_type,
                                       use_batch_norm)
                    critic = StockCritic(sess=sess, state_dim=state_dim,
                                         action_dim=action_dim, tau=1e-3,
                                         learning_rate=1e-3,
                                         num_actor_vars=actor.get_num_trainable_vars(),
                                         predictor_type=predictor_type,
                                         use_batch_norm=use_batch_norm)
                    actor_noise = OrnsteinUhlenbeckActionNoise(
                        mu=np.zeros(action_dim))

                    model_save_path = get_model_path(window_length,
                                                     predictor_type,
                                                     use_batch_norm)
                    summary_path = get_result_path(window_length,
                                                   predictor_type,
                                                   use_batch_norm)

                    ddpg_model = DDPG(None, sess, actor, critic, actor_noise,
                                      obs_normalizer=obs_normalizer,
                                      config_file='config/stock.json',
                                      model_save_path=model_save_path,
                                      summary_path=summary_path)
                    ddpg_model.initialize(load_weights=True, verbose=False)
                    models.append(ddpg_model)

        print("model names",model_names)

        return models

Пример #11

0

Показать файл

Файл: stock_trading.py Проект: ylcoder/drl-portfolio-management

def _load_model(norm_func=None):
    ddpg_model = DDPG(env, sess, actor, critic, actor_noise, obs_normalizer=norm_func,
                      config_file='config/stock.json', model_save_path=model_save_path,
                      summary_path=summary_path)
    ddpg_model.initialize(load_weights=True)
    return ddpg_model

Python DDPG примеры использования