示例#1
0
from utils.logging import default_logger as logger
from utils.image import create_gif_subproc
from utils.tensor_board import global_board
from utils.helper_classes import Counter, Timer
from utils.conf import Config
from utils.save_env import SaveEnv
from utils.prep import prep_args

from env.walker.single_walker import BipedalWalker

# definitions
observe_dim = 24
action_dim = 4

# configs
c = Config()
#c.restart_from_trial = "2020_05_06_21_50_57"
c.max_episodes = 5000
c.max_steps = 1000
c.replay_size = 500000

# or: explore_noise_params = [(0, 0.2)] * action_dim
c.explore_noise_params = (0, 0.2)
c.policy_noise_params = (0, 1.0, -0.5, 0.5)
c.device = "cuda:0"
c.root_dir = "/data/AI/tmp/multi_agent/walker/naive_ddpg_td3/"

# train configs
# lr: learning rate, int: interval
# warm up should be less than one epoch
c.ddpg_update_batch_size = 100
示例#2
0
# -*- coding: utf-8 -*-
"""
Created on 2018.12.05

@author: zhangjun
"""

import tensorflow as tf
from utils.model import load_model_predict, load_model_raw_predict
from utils.conf import Config
import glob

config = Config(base_dir='../conf')


def test_load_model_predict():
    sess = tf.Session()
    feature_dict_example = {'col1': 2.0,
                            'col2': 3.0,
                            'col3': 5.0,
                            'col4': "a",
                            'col5': 3.0,
                            'col6': "r#w#k",
                            'col7': "f",
                            'col8': "e"}

    model_path_list = glob.glob(config.get_model_prop('model_export_dir') + '/*')
    export_path = max(model_path_list)

    p = load_model_predict(export_path=export_path,
                           feature_dict=feature_dict_example,
示例#3
0
from utils.image import create_gif_subproc
from utils.tensor_board import global_board
from utils.helper_classes import Counter, Timer
from utils.conf import Config
from utils.save_env import SaveEnv
from utils.prep import prep_args
from utils.parallel import get_context, Pool

from env.walker.carrier import BipedalMultiCarrier

# definitions
observe_dim = 28
action_dim = 4

# configs
c = Config()
# c.restart_from_trial = "2020_05_09_15_00_31"
c.max_episodes = 50000
c.max_steps = 1000
c.replay_size = 50000

c.agent_num = 1
c.device = "cuda:0"
c.root_dir = "/data/AI/tmp/multi_agent/mcarrier/naive_ppo_parallel/"

# train configs
# lr: learning rate, int: interval
c.workers = 5
c.discount = 0.99
c.learning_rate = 3e-4
c.entropy_weight = None
示例#4
0
from utils.logging import default_logger as logger
from utils.image import create_gif_subproc
from utils.tensor_board import global_board
from utils.helper_classes import Counter, Timer
from utils.conf import Config
from utils.save_env import SaveEnv
from utils.prep import prep_args

from env.walker.carrier import BipedalMultiCarrier

# definitions
observe_dim = 28
action_dim = 4

# configs
c = Config()
# c.restart_from_trial = "2020_05_06_21_50_57"
c.max_episodes = 20000
c.max_steps = 2000
c.replay_size = 500000

c.agent_num = 3
c.sub_policy_num = 1
c.explore_noise_params = (0, 0.2)
c.q_increase_rate = 1
c.q_decrease_rate = 1
c.device = "cuda:0"
c.root_dir = "/data/AI/tmp/multi_agent/mcarrier/maddpg/"

# train configs
# lr: learning rate, int: interval
示例#5
0
from utils.logging import default_logger as logger
from utils.tensor_board import global_board
from utils.helper_classes import Counter, Timer, Object
from utils.conf import Config
from utils.save_env import SaveEnv
from utils.prep import prep_args
from utils.parallel import get_context, Pool, mark_static_module

from env.magent_helper import *

from .utils import draw_agent_num_figure

mark_static_module(magent)

# configs
c = Config()

c.map_size = 50
c.agent_ratio = 0.04
agent_num = int(np.sqrt(c.map_size * c.map_size * c.agent_ratio)) ** 2
c.neighbor_num = 3

# c.restart_from_trial = "2020_05_09_15_00_31"
c.max_episodes = 5000
c.max_steps = 500
c.replay_size = 20000

c.device = "cuda:0"
c.storage_device = "cpu"
c.root_dir = "/data/AI/tmp/multi_agent/magent/naive_ppo_parallel/"
示例#6
0
from utils.logging import default_logger as logger
from utils.image import create_gif_subproc
from utils.tensor_board import global_board
from utils.helper_classes import Counter, Timer
from utils.conf import Config
from utils.save_env import SaveEnv
from utils.prep import prep_args

from env.walker.single_walker import BipedalWalker

# definitions
observe_dim = 24
action_dim = 4

# configs
c = Config()
c.max_episodes = 5000
c.max_steps = 1000
c.replay_size = 500000

c.explore_noise_params = (0, 0.2)
c.device = "cuda:0"
c.root_dir = "/data/AI/tmp/multi_agent/walker/naive_ddpg/"

c.ddpg_update_batch_size = 100

if __name__ == "__main__":
    save_env = SaveEnv(c.root_dir, restart_use_trial=c.restart_from_trial)
    prep_args(c, save_env)

    # save_env.remove_trials_older_than(diff_hour=1)
示例#7
0
from models.frames.algorithms.hddpg import HDDPG
from models.naive.env_magent_ddpg import Actor, Critic

from utils.logging import default_logger as logger
from utils.tensor_board import global_board
from utils.helper_classes import Counter, Timer, Object
from utils.conf import Config
from utils.save_env import SaveEnv
from utils.prep import prep_args

from env.magent_helper import *

from .utils import draw_agent_num_figure

# configs
c = Config()

c.map_size = 50
c.agent_ratio = 0.04
c.neighbor_num = 3
agent_num = int(np.sqrt(c.map_size * c.map_size * c.agent_ratio))**2

#c.restart_from_trial = "2020_05_06_21_50_57"
c.max_episodes = 20000
c.max_steps = 2000
c.replay_size = 500000

c.agent_num = 3
c.q_increase_rate = 1
c.q_decrease_rate = 1
c.device = "cuda:0"
示例#8
0
from utils.tensor_board import global_board, normalize_seq_length
from utils.helper_classes import Counter, Timer
from utils.conf import Config
from utils.save_env import SaveEnv
from utils.prep import prep_args
from utils.gym_env import disable_view_window

# definitions
env_name = "LunarLander-v2"
env = gym.make(env_name)
disable_view_window()
observe_dim = env.observation_space.shape[0]
action_dim = 4

# configs
c = Config()
# c.restart_from_trial = "2020_05_09_15_00_31"
c.max_episodes = 50000
c.max_steps = 300
c.replay_size = 10000

c.device = "cuda:0"
c.root_dir = "/data/AI/tmp/multi_agent/lunar_lander/naive_ppo/"

# train configs
# lr: learning rate, int: interval
c.discount = 0.99
c.learning_rate = 1e-3
c.entropy_weight = 1e-2
c.ppo_update_batch_size = 100
c.ppo_update_times = 4
示例#9
0
# -*- coding: utf-8 -*-
"""
Created on 2018.12.05

@author: zhangjun
"""

import tensorflow as tf
from utils.util import list_files

from utils.conf import Config
from utils.data import dataProcess
from utils.feature import map_more_feature

config = Config(base_dir='../conf')


def input_fn(data_path, num_epochs, mode, batch_size):

    sequence_cols = config.SEQUENCE_COLS

    def squence_split(raw_features):
        if len(sequence_cols) > 0:
            for col, sep in sequence_cols:
                raw_features = several_values_columns_to_array(raw_features, col, sep)
        return raw_features

    def several_values_columns_to_array(raw_features, feature_name, sep):
        raw_features[feature_name] = tf.sparse_tensor_to_dense(
            tf.string_split(raw_features[feature_name], sep),
            default_value='')