Пример #1
0
 def reset(self):
     self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(2,))
     # print(str({"metric": "epoch_reward", "value": self.epoch_reward, "step": self.total_steps}))
     print('{"metric": "epoch_reward", "value": '+str(self.epoch_reward)+', "step":'+str(self.total_steps)+'}')
     logbook().record_epoch_reward(self.epoch_reward)
     self.epoch_reward = 0
     return np.array(self.state)
 def reset(self):
     # We used to reset the state each time - dont see how this is sensible as its a continuation in simulation backend. 
     # Still worthwhile having regular epochs as we want it to think about maximising daily bidding (or some other period)
     # But resetting state is a little incompehensible.
     # Edge case here is first instance, whereby state is set to obs_low.
     # self.state = self.np_random.uniform(low=0, high=0.05, size=(8,))
     
     # print(str({"metric": "epoch_reward", "value": self.epoch_reward, "step": self.total_steps}))
     print('{"metric": "epoch_reward", "value": '+str(self.epoch_reward)+', "step":'+str(self.total_steps)+'}')
     logbook().record_epoch_reward(self.epoch_reward)
     self.epoch_reward = 0
     return np.array(self.state)
Пример #3
0
    def reset(self):
        # We used to reset the state each time - dont see how this is sensible as its a continuation in simulation backend.
        # Still worthwhile having regular epochs as we want it to think about maximising daily bidding (or some other period)
        # But resetting state is a little incompehensible.
        # Edge case here is first instance, whereby state is set to obs_low.
        # self.state = self.np_random.uniform(low=0, high=0.05, size=(8,))

        # print(str({"metric": "epoch_reward", "value": self.epoch_reward, "step": self.total_steps}))
        print('{"metric": "epoch_reward", "value": ' + str(self.epoch_reward) +
              ', "step":' + str(self.total_steps) + '}')

        print('{"metric": "unique_bids", "value": ' +
              str(logbook().get_num_unique_bids(previous_steps=50)) +
              ', "step":' + str(self.total_steps) + '}')

        logbook().record_epoch_reward(self.epoch_reward)
        self.epoch_reward = 0

        # Every X steps, write results to file in case of dramatic failure.
        if self.total_steps % 20000 == 0:
            # if self.total_steps % 100 == 0:
            logbook().save_json(label=str(self.label))

        return np.array(self.state)
Пример #4
0
 def render(self, mode='human'):
     # Log bid/value in Floydhub
     # print('{"metric": "bid", "value": '+str(self.last_action)+', "step":'+str(self.total_steps)+'}')
     
     # Log in logbook suite
     logbook().record_price(self._state_dict['price'], self.total_steps)
     logbook().record_demand(self._state_dict['demand'], self.total_steps)
     # Log bidstack in logbook suite.
     for bid in self._state_dict['all_bids']:
         logbook().record_bid(bid['label'], bid['price'], bid['quantity'], self.total_steps)
     return None
    sys.exit()
# Make sure that the participant name is one of hte allowed ones.
elif sys.argv[1] not in market_config['PARTICIPANTS']:
    print(
        'Error: Participant not in list of possible participants. Must be one of:'
    )
    [print(" -" + p) for p in market_config['PARTICIPANTS']]
    sys.exit()
else:
    participant_name = sys.argv[1]

# ENV_NAME = 'CartPole-v0'
# ENV_NAME = 'MultiBidMarket-v0'
ENV_NAME = 'MultiBidMarketEfficient-v0'

logbook().record_metadata('Environment', ENV_NAME)
logbook().record_metadata('datetime', pendulum.now().isoformat())
for param in market_config:
    logbook().record_metadata('Market: ' + param, market_config[param])

# # Set the tensorflow memory growth to auto - this is important when running two simultaneous models
# # Otherwise, the first process hogs all the memory and the second (the one that we watch the output of)
# # gets a CUDA_ERROR_OUT_OF_MEMORY message and crashes. Instead ofthe majority of below, you can also use config.gpu_options.allow_growth = True #Set automatically - takes some time.
# config = tf.ConfigProto()
# config.gpu_options.per_process_gpu_memory_fraction = 0.95 / float(len(market_config['PARTICIPANTS'])) # Alternatively, allocate as a fraction of the available memory:
# sess = tf.Session(config=config)
# K.set_session(sess)

# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
# Wrap so that we have a discrete action space - maps the internal MultiDiscrete action space to a Discrete action space.
from marketsim.logbook.logbook import logbook
import pendulum

logbook().set_label("TEST" + " " + pendulum.now().format('ddd D/M HH:mm'))
logbook().record_hyperparameter('alpha', 1)
logbook().record_hyperparameter('beta', 2)
logbook().submit()