示例#1
0
    def __init__(self):

        self.num_time_steps = 300  #keep - number of sequences that will be fed into the model

        # Data cluster
        self.dataset = 'realmix'
        self.data_cluster = DataCluster(
            dataset=self.dataset,
            remove_features=['close', 'high', 'low', 'open', 'volume'],
            num_stocks=NUM_STOCKS,
            wavelet_scales=WAVELET_SCALES,
            num_time_steps=self.num_time_steps)
        self.collection = self.data_cluster.collection
        (st_shape, lt_shape) = self.data_cluster.get_model_shape()

        # Agent
        self.agent = Agent(model_shape=(st_shape, lt_shape),
                           num_time_steps=self.num_time_steps)
        self.agent.pre_train(self.collection,
                             cached_data=False,
                             epochs=PT_EPOCHS,
                             sample_size=SAMPLE_SIZE,
                             lr_preTrain=1e-3)

        # Save the model
        self.agent.model.save(Path.cwd() / 'pre_trained_models' /
                              str(int(time.time())))
示例#2
0
 def __init__(self, log_dir=os.path.join(dir_path, 'log')):
     self.user_dict = dict()
     tmp = os.curdir
     os.chdir(dir_path)
     interpreter = self.interpreter = Interpreter.load(
         'models/default/model_20190514-121130')
     os.chdir(tmp)
     self.agent = MyAgent(interpreter)
     self.log_dir = log_dir
示例#3
0
 def make_world(self):
     world = World()
     # set any world properties first
     world.dim_c = 2
     num_good_agents = 1
     num_adversaries = 2
     num_agents = num_adversaries + num_good_agents
     # add agents
     world.agents = [Agent() for i in range(num_agents)]
     for i, agent in enumerate(world.agents):
         agent.name = 'agent %d' % i
         agent.survived = True
         # agent.game_time = 0
         agent.leader = True if i == 0 else False
         # agent.silent = True if i > 0 else False
         agent.silent = True
         agent.adversary = True if i < num_adversaries else False
         agent.accel = 1 if agent.adversary else 0.5
         agent.max_speed = 4 if agent.adversary else 2.5
     self.reset_world(world)
     return world
示例#4
0
def save_yaml_config(config_path: Path, args, role_type: str,
                     agent: Agent) -> None:
    class Dumper(yaml.Dumper):
        def increase_indent(self, flow=False, *_, **__):
            return super().increase_indent(flow=flow, indentless=False)

    if role_type not in {'actor', 'learner'}:
        raise ValueError('Invalid role type')

    with open(config_path, 'w') as f:
        args_config = {
            k: v
            for k, v in vars(args).items()
            if not k.endswith('path') and k != 'agent_config' and k != 'config'
        }
        yaml.dump({role_type: args_config}, f, sort_keys=False, Dumper=Dumper)
        f.write('\n')
        yaml.dump({'agent': agent.export_config()},
                  f,
                  sort_keys=False,
                  Dumper=Dumper)
示例#5
0
import gym
from gym import Env
from gridworld import WindyGridWorld  # 导入自建的有风世界环境
from core import Agent  # 导入自建的agent基类
from utils import learning_curve  # 画图方法

env = WindyGridWorld()
env.reset()
env.render()

agent = Agent(env, capacity=10000)  # 一个记忆容量为10000的agent
data = agent.learning(max_episode_num=180, display=True)
learning_curve(data,
               2,
               0,
               title="learning curve",
               x_name="episode",
               y_name="time steps")
env.close()
示例#6
0
import fixer
import os

from core import Agent
from configparser import ConfigParser

__version__ = "0.3.0"

environments_object = ConfigParser()
environments_object.read("/opt/sixfab/.env")
pms_environments = environments_object["pms"]

configs = {
    "version": __version__,
    "feeder_interval": int(pms_environments.get("INTERVAL", 10)),
    "experimental_enabled": True if pms_environments.get("EXPERIMENTAL", False) == "True" else False,
    "environments": pms_environments,
    "environments_object": environments_object,
    "firmware_update_repository": "https://git.sixfab.com/sixfab-power/firmwares.git"
}

if __name__ == "__main__":
 
    agent = Agent(
        pms_environments["TOKEN"],
        configs=configs
    )
    agent.loop()
示例#7
0
文件: main.py 项目: Niwood/DQL-Trader
    def __init__(self):

        self.num_time_steps = 300  #keep - number of sequences that will be fed into the model

        # Data cluster
        self.dataset = 'realmix'
        self.data_cluster = DataCluster(
            dataset=self.dataset,
            remove_features=['close', 'high', 'low', 'open', 'volume'],
            num_stocks=num_stocks,
            wavelet_scales=WAVELET_SCALES,
            num_time_steps=self.num_time_steps)
        self.collection = self.data_cluster.collection
        (st_shape, lt_shape) = self.data_cluster.get_model_shape()

        # Agent
        self.agent = Agent(model_shape=(st_shape, lt_shape),
                           num_time_steps=self.num_time_steps)
        self.agent.load_network(PT_NETWORK)

        # Environment
        self.env = StockTradingEnv2()

        # Epsilon
        self.epsilon_steps = \
            [1.0] * int(EPISODES * epsilon_plateau) + \
            list(np.linspace(1,MIN_EPSILON,EPISODES - int(EPISODES * epsilon_plateau)*2+1)) + \
            [MIN_EPSILON] * int(EPISODES * epsilon_plateau)

        # Statistics
        self.epsilon = epsilon
        stats = [
            'avgReward', 'buyAndHold', 'netWorthChng', 'buyTrigger',
            'sellTrigger', 'holdTrigger', 'epsilon', 'totTrainTime',
            'amountBalance', 'amountAsset', 'holdReward', 'sellReward',
            'buyReward', 'avgAmount', 'episodeTime'
        ]
        self.estats = pd.DataFrame(np.nan,
                                   index=np.arange(1, EPISODES + 1),
                                   columns=stats)
        self.estats.index.name = 'Episode'

        astats_index = np.append(np.array([1]),
                                 np.arange(0, EPISODES + 1, EPOCH_SIZE)[1::])
        self.astats = pd.DataFrame(np.nan,
                                   index=astats_index,
                                   columns=[
                                       'lastReward', 'buyAndHold',
                                       'netWorthChng', 'buyTrigger',
                                       'sellTrigger', 'holdTrigger',
                                       'avgAmount', 'networkName'
                                   ])
        self.astats.index.name = 'Episode'

        # Create model folder and model ID
        self.model_id = int(time.time())
        self.agent.model._name = str(self.model_id)
        self.folder = Path.cwd() / 'models' / str(self.model_id)
        self.folder.mkdir(exist_ok=False)

        # Save architechture
        self.agent.model._name = str(self.model_id)
        with open(self.folder / 'arch.txt', 'w') as fh:
            self.agent.model.summary(print_fn=lambda x: fh.write(x + '\n'))

        # Save metadata
        metadata = {
            'model_id': self.model_id,
            'note': ' '.join([str(x) for x in sys.argv[1::]]),
            'date': str(datetime.now()),
            'episodes': EPISODES,
            'epoch_size': EPOCH_SIZE,
            'time_steps': self.num_time_steps,
            'data': {
                'dataset': self.dataset,
                'length': len(self.collection),
            },
            'optimizer': {
                'algorithm': K.eval(self.agent.model.optimizer._name),
                'learning_rate': float(K.eval(self.agent.model.optimizer.lr))
            },
            'agent': {
                'discount': self.agent.DISCOUNT,
                'replay_memory_size': self.agent.REPLAY_MEMORY_SIZE,
                'min_replay_memory_size': self.agent.MIN_REPLAY_MEMORY_SIZE,
                'minibatch_size': self.agent.MINIBATCH_SIZE,
                'update_target_network_every': self.agent.UPDATE_TARGET_EVERY
            },
            'pre_training': {
                'conf_mat': str(self.agent.conf_mat)
            }
        }
        with open(self.folder / 'metadata.json', 'w') as outfile:
            json.dump(metadata, outfile)

        # Run
        self.run()
示例#8
0
文件: main.py 项目: Niwood/DQL-Trader
class Trader:
    def __init__(self):

        self.num_time_steps = 300  #keep - number of sequences that will be fed into the model

        # Data cluster
        self.dataset = 'realmix'
        self.data_cluster = DataCluster(
            dataset=self.dataset,
            remove_features=['close', 'high', 'low', 'open', 'volume'],
            num_stocks=num_stocks,
            wavelet_scales=WAVELET_SCALES,
            num_time_steps=self.num_time_steps)
        self.collection = self.data_cluster.collection
        (st_shape, lt_shape) = self.data_cluster.get_model_shape()

        # Agent
        self.agent = Agent(model_shape=(st_shape, lt_shape),
                           num_time_steps=self.num_time_steps)
        self.agent.load_network(PT_NETWORK)

        # Environment
        self.env = StockTradingEnv2()

        # Epsilon
        self.epsilon_steps = \
            [1.0] * int(EPISODES * epsilon_plateau) + \
            list(np.linspace(1,MIN_EPSILON,EPISODES - int(EPISODES * epsilon_plateau)*2+1)) + \
            [MIN_EPSILON] * int(EPISODES * epsilon_plateau)

        # Statistics
        self.epsilon = epsilon
        stats = [
            'avgReward', 'buyAndHold', 'netWorthChng', 'buyTrigger',
            'sellTrigger', 'holdTrigger', 'epsilon', 'totTrainTime',
            'amountBalance', 'amountAsset', 'holdReward', 'sellReward',
            'buyReward', 'avgAmount', 'episodeTime'
        ]
        self.estats = pd.DataFrame(np.nan,
                                   index=np.arange(1, EPISODES + 1),
                                   columns=stats)
        self.estats.index.name = 'Episode'

        astats_index = np.append(np.array([1]),
                                 np.arange(0, EPISODES + 1, EPOCH_SIZE)[1::])
        self.astats = pd.DataFrame(np.nan,
                                   index=astats_index,
                                   columns=[
                                       'lastReward', 'buyAndHold',
                                       'netWorthChng', 'buyTrigger',
                                       'sellTrigger', 'holdTrigger',
                                       'avgAmount', 'networkName'
                                   ])
        self.astats.index.name = 'Episode'

        # Create model folder and model ID
        self.model_id = int(time.time())
        self.agent.model._name = str(self.model_id)
        self.folder = Path.cwd() / 'models' / str(self.model_id)
        self.folder.mkdir(exist_ok=False)

        # Save architechture
        self.agent.model._name = str(self.model_id)
        with open(self.folder / 'arch.txt', 'w') as fh:
            self.agent.model.summary(print_fn=lambda x: fh.write(x + '\n'))

        # Save metadata
        metadata = {
            'model_id': self.model_id,
            'note': ' '.join([str(x) for x in sys.argv[1::]]),
            'date': str(datetime.now()),
            'episodes': EPISODES,
            'epoch_size': EPOCH_SIZE,
            'time_steps': self.num_time_steps,
            'data': {
                'dataset': self.dataset,
                'length': len(self.collection),
            },
            'optimizer': {
                'algorithm': K.eval(self.agent.model.optimizer._name),
                'learning_rate': float(K.eval(self.agent.model.optimizer.lr))
            },
            'agent': {
                'discount': self.agent.DISCOUNT,
                'replay_memory_size': self.agent.REPLAY_MEMORY_SIZE,
                'min_replay_memory_size': self.agent.MIN_REPLAY_MEMORY_SIZE,
                'minibatch_size': self.agent.MINIBATCH_SIZE,
                'update_target_network_every': self.agent.UPDATE_TARGET_EVERY
            },
            'pre_training': {
                'conf_mat': str(self.agent.conf_mat)
            }
        }
        with open(self.folder / 'metadata.json', 'w') as outfile:
            json.dump(metadata, outfile)

        # Run
        self.run()

    def run(self):

        # Define iterator and track elapsed time
        episode_iter = tqdm(range(1, EPISODES + 1), ascii=True, unit='episode')
        last_iteration_time = datetime.fromtimestamp(episode_iter.start_t)
        self.episode_times = list()

        # Iterate over episodes
        for episode in episode_iter:

            # Slice estats for this episode for simplicity
            self._estats = self.estats.loc[episode]

            # Timer to track train time
            self.train_time = 0

            # Save actions
            self.actions = list()

            # Update tensorboard step every episode
            self.agent.tensorboard.step = episode

            # Reset episode reward and step number
            self.episode_reward = list()
            self.reward_action = [0, 0, 0]
            step = 1

            # Reset environment and get initial state
            current_state, _ = self.env.reset()

            # Reset flag and start iterating until episode ends
            done = False

            while not done:

                # This part stays mostly the same, the change is to query a model for Q values
                if random.random() > self.epsilon:
                    # Get action from Q table
                    action = np.argmax(self.agent.get_qs(current_state))
                else:
                    # Get random action
                    action = np.random.randint(0, self.env.ACTION_SPACE_SIZE)

                # STEP ENV
                new_state, reward, done = self.env.step(action)

                # Save reward and action
                self.reward_action[action] += reward
                self.actions.append(action)

                # Transform new continous state to new discrete state and count reward
                self.episode_reward.append(reward)

                # Every step we update replay memory and train main network
                self.agent.update_replay_memory(
                    (current_state, action, reward, new_state, done))
                self.agent.train(done, step)
                self.train_time += self.agent.elapsed

                current_state = new_state
                step += 1

            max_action_errors = {
                0: [max(self.agent.action_errors[0], default=0)],
                1: [max(self.agent.action_errors[1], default=0)],
                2: [max(self.agent.action_errors[2], default=0)]
            }
            min_action_errors = {
                0: [min(self.agent.action_errors[0], default=0)],
                1: [min(self.agent.action_errors[1], default=0)],
                2: [min(self.agent.action_errors[2], default=0)]
            }
            mean_action_errors = {
                0: [np.mean(self.agent.action_errors[0])],
                1: [np.mean(self.agent.action_errors[1])],
                2: [np.mean(self.agent.action_errors[2])]
            }
            print('max_action_errors: ', max_action_errors)
            print('min_action_errors: ', min_action_errors)
            print('mean_action_errors: ', mean_action_errors)

            # Save model
            if not episode % EPOCH_SIZE or episode == 1:
                self._save_model(episode)
            else:
                # Set default values to evaluation stats
                self._estats.loc['TotTrainTime'] = round(self.train_time, 1)

            # Decay epsilon
            self.epsilon = self.epsilon_steps[episode]

            # Time tracking
            iteration_time = datetime.fromtimestamp(episode_iter.last_print_t)
            datetime_delta = iteration_time - last_iteration_time
            self.delta_time = datetime_delta.seconds + datetime_delta.microseconds / 1e6
            last_iteration_time = iteration_time

            # Render
            if not episode % AGGREGATE_STATS_EVERY:
                self._render(episode)

            # Free memory
            gc.collect()

    def _render(self, episode):
        ''' Renders stats for a certain episodes '''
        print('=' * 20)

        # Env to render
        self.env.render(stats=self._estats)

        # Episode aggregated stats
        self._estats.loc['epsilon'] = round(self.epsilon, 2)
        self._estats.loc['avgReward'] = round(mean(self.episode_reward), 3)
        self._estats.loc['totTrainTime'] = round(self.train_time, 1)
        self._estats.loc['holdTrigger'] = round(
            self.actions.count(0) / len(self.actions), 3)
        self._estats.loc['buyTrigger'] = round(
            self.actions.count(1) / len(self.actions), 3)
        self._estats.loc['sellTrigger'] = round(
            self.actions.count(2) / len(self.actions), 3)
        self._estats.loc['holdReward'] = round(
            safe_div(self.reward_action[0], self.actions.count(0)), 3)
        self._estats.loc['buyReward'] = round(
            safe_div(self.reward_action[1], self.actions.count(1)), 3)
        self._estats.loc['sellReward'] = round(
            safe_div(self.reward_action[2], self.actions.count(2)), 3)
        self._estats.loc['episodeTime'] = self.delta_time

        # Print episode stats
        self.estats.loc[episode] = self._estats
        print(self.estats.loc[episode - 10:episode])
        print(
            f'Replay memory allocation: {self.agent.replay_memory_allocation * 100}%'
        )

        # Pickle and save episode stats
        self.estats.loc[1:episode].to_pickle(self.folder / 'estats.pkl')

    def _save_model(self, episode):
        ''' For each epoch save model and make a sample inference with epsilon=0 '''
        epoch_id = int(time.time())
        self.last_model_name = f'{epoch_id}_EPS{episode}of{EPISODES}.model'
        self.agent.model.save(self.folder / self.last_model_name)
        try:
            self._model_assessment(episode)
        except:
            pass

    def _model_assessment(self, episode):
        ''' Make a model predict on a sample with epsilon=0 '''

        # Load data
        dc = DataCluster(
            dataset=self.dataset,
            remove_features=['close', 'high', 'low', 'open', 'volume'],
            num_stocks=1,
            verbose=False,
            wavelet_scales=WAVELET_SCALES,
            num_time_steps=self.num_time_steps)
        collection = dc.collection
        (st_shape, lt_shape) = dc.get_model_shape()

        # Model assessment
        ma = ModelAssessment(collection=collection,
                             model_shape=(st_shape, lt_shape),
                             num_time_steps=self.num_time_steps)
        ma.astats = self.astats.loc[episode]
        ma.load_model(model_name=self.folder / self.last_model_name)
        ma.simulate()
        ma.render()
        self.astats.loc[episode] = ma.astats

        # Save stats for the last model name
        self.astats.loc[episode, 'networkName'] = self.last_model_name

        # Print assessment stats
        print(self.astats.loc[episode - 10:episode])

        # Pickle and save assessment stats and simulation run
        self.astats.loc[1:episode].to_pickle(self.folder / 'astats.pkl')
        ma.sim.to_pickle(self.folder /
                         f'sim_{ma.ticker}_EPS{episode}of{EPISODES}.pkl')
示例#9
0
class DialogueManager(object):

    def __init__(self, log_dir=os.path.join(dir_path, 'log')):
        self.user_dict = dict()
        tmp = os.curdir
        os.chdir(dir_path)
        interpreter = self.interpreter = Interpreter.load(
            'models/default/model_20190514-121130')
        os.chdir(tmp)
        self.agent = MyAgent(interpreter)
        self.log_dir = log_dir

    def chat(self, text, uid):
        """chat with user

        Args:
            text (str): user reply
            id (str): user id to manage dialogue

        Returns:
            str: bot response string
        """
        if uid not in self.user_dict.keys():
            user = User(uid)
            self.user_dict[uid] = user

        user = self.user_dict.get(uid)

        response = self.agent.handle_message(text, uid)
        return response

    def get_response_json(self, text, uid):

        response = self.chat(text, uid)
        ret = {
            'build': uid,
            'answer': response
        }

        # TODO patch here
        if self.agent.get_status_code(uid) == 1:
            ret.update(self.agent._get_user_state_tracker(uid).get_ask_answer_content())
        return ret

    def finish_chat(self, uid, tel_number, record_log=True):

        if record_log:

            log = self.get_log(uid)
            cur_time = time.strftime('%Y-%m-%d %H:%M:%S')
            user_dir = os.path.join(self.log_dir, tel_number)

            if not os.path.isdir(user_dir):
                os.makedirs(user_dir)
            with open(os.path.join(user_dir, cur_time + '.txt'), 'wb') as f:
                if log is not None:
                    f.write(log.encode('utf-8'))
        # remove user from DialogueManager
        if uid in self.user_dict.keys():

            self.user_dict.pop(uid)

        self.agent.finish_conversation(uid)

    def get_info(self, uid):
        return self.agent.get_user_info(uid)

    def get_status_code(self, uid):
        return self.agent.get_status_code(uid)

    def hang_up(self, uid):
        self.agent.hang_up(uid)

    def get_log(self, uid):
        return self.agent.get_logger(uid)

    def get_pack(self, uid, tel_number):
        pack = self.agent.pack_info(uid, tel_number)
        self.finish_chat(uid, tel_number)
        return pack
示例#10
0
import os
import sys
path = os.path.dirname(__file__)
sys.path.append(path)
import gym
from gym import Env
from gridworld import WindyGridWorld
from core import Agent

env = WindyGridWorld()  #Generate stylized sub-world environment objects
env.reset()  #Reset environment objects
env.render()  #Display the visual interface of environment objects

agent = Agent(env, capacity=10000)
data = agent.learning(max_episode_num=180, display=False)