Пример #1
0
def test_td_agent_does_not_perform_invalid_actions_when_filtered(env):
    q_table = QTable([0, 1, 2], initializer=lambda: 0)
    agent = TDAgent(EpsilonGreedyPolicy(0.2, ActionFilter(env)), q_table, 1,
                    0.9, 0.1)
    sess = RatesExchangeSession(env, agent)
    for _ in range(0, 100):
        sess.run()

    assert q_table[[0, 0.0, 1.0, 0.0], 1] == 0
Пример #2
0
def test_td_agent_produces_sensible_q_values(env):
    q_table = QTable([0, 1, 2])
    agent = TDAgent(EpsilonGreedyPolicy(0.2), q_table, 1, 0.9, 0.1)
    sess = RatesExchangeSession(env, agent)
    for _ in range(0, 100):
        sess.run()

    assert q_table[[0, 0.0, 1.0, 0.0], 2] > q_table[[0, 0.0, 1.0, 0.0], 0]
    assert q_table[[0, 0.0, 1.0, 0.0], 2] > q_table[[0, 0.0, 1.0, 0.0], 1]
Пример #3
0
def test_td_agent_produces_sensible_regression_model_predictions(env):
    model = QRegressionModel(5, [100], 0.1)
    Q = QNeuronal(model, 3, 10)
    episode = 0
    agent = TDAgent(NormalEpsilonGreedyPolicy(lambda: (1 / (episode + 1))), Q,
                    1, 0.9, 0.1)
    sess = RatesExchangeSession(env, agent)
    for e in range(0, 100):
        episode = e
        sess.run()

    assert Q[[0, 0.0, 1.0, 0.0], 2] > Q[[0, 0.0, 1.0, 0.0], 0]
    assert Q[[0, 0.0, 1.0, 0.0], 2] > Q[[0, 0.0, 1.0, 0.0], 1]
Пример #4
0
from pythia.core.environment.exchange_trading_environment import ExchangeTradingEnvironment
from pythia.core.sessions.rates_exchange_session import RatesExchangeSession
from pythia.core.streams.shape_shift_rates import ShapeShiftRates, SUPPORTED_COINS
from pythia.core.utils.profiling import clock_block
from pythia.core.visualization.coin_exchange_visualizer import CoinExchangeVisualizer

targets = list(SUPPORTED_COINS)
targets.remove("BTC")
agent = AnalyticalAgent('0.1', '0', 2, targets)

if __name__ == '__main__':
    path = "../data/recordings/2018-02-28-shapeshift-exchange-records.json" if len(
        sys.argv) == 1 else sys.argv[0]
    with open(path) as stream:
        with clock_block("Initialization"):
            rates = ShapeShiftRates(stream, preload=True)
            vis = CoinExchangeVisualizer(rates)
            env = ExchangeTradingEnvironment(rates, "BTC", "0.1")
            env.register_listener(vis.record_exchange)
            sess = RatesExchangeSession(env, agent)

        with clock_block("Running"):
            sess.run()

        print("The analytical agent crated a token difference of: {0}".format(
            sess.difference()))
        print("Current balance: {0} {1}".format(env.amount, env.token))
        print("Exchange actions: {0}".format(vis.actions))

        rates.reset()
        vis.render("BTC_GAME")
Пример #5
0
with tf.Session():
    exchange = "{}_{}".format(CASH_TOKEN, TOKEN)
    rates = DataFrameStream(train, name=TOKEN)
    vis = CoinExchangeVisualizer(rates)
    env_train = ExchangeTradingEnvironment(
        rates,
        CASH_TOKEN,
        start_amount=1000,
        window=201,
        state_transform=DQTRatioStateTransformer(exchange),
        reward_calculator=DQTRewardCalc(100, exchange))
    env_train.register_listener(vis.record_exchange)
    agent = DQTAgent(build_model, [CASH_TOKEN, None, TOKEN], 1, 0.85, 64, 64,
                     tau)
    sess_train = RatesExchangeSession(env_train, agent)

    for e in range(TRAIN_EPISODES):
        with clock_block("Running"):
            sess_train.run()
        print("Training episode {} finished.".format(e))
        print("Token difference after training: {0}".format(
            sess_train.difference()))
        #vis.render(exchange)

    env_test = ExchangeTradingEnvironment(
        DataFrameStream(test, name=TOKEN),
        CASH_TOKEN,
        start_amount=1000,
        window=201,
        state_transform=DQTRatioStateTransformer(exchange),
def make_session(env, agent):
    return RatesExchangeSession(env, agent)
Пример #7
0
def run_shares_dqn_regression_model(holding_tokens,
                                    buying_tokens,
                                    starting_balance,
                                    window,
                                    hidden_layers,
                                    learning_rate,
                                    memory_size,
                                    epsilon_episode_start,
                                    num_steps,
                                    gamma,
                                    alpha,
                                    episodes,
                                    output_dir=None):
    token_h = "CURRENCY" if holding_tokens is None else os.path.basename(holding_tokens)
    token_b = os.path.basename(buying_tokens)

    open_fn = open
    if buying_tokens.startswith("gs://"):
        open_fn = lambda name: file_io.FileIO(name, 'r')

    with open_fn(buying_tokens) as stream, tf.Session() as sess:
        with clock_block("Initialization"):
            rates = ShareRates(Symbol(token_b, stream))
            env = ExchangeTradingAiEnvironment(rates, token_h, starting_balance, window, {1: token_h, 2: token_b},
                                               TotalBalanceReward())

            model = QRegressionModel(3 + window * 2, hidden_layers, learning_rate)
            saver, ckpt = None, None
            if output_dir is not None:
                saver, ckpt = tf.train.Saver(), os.path.join(output_dir, "model.ckpt")
            if ckpt is not None and tf.train.checkpoint_exists(ckpt):
                saver.restore(sess, ckpt)

            Q = QNeuronal(model, n=3, memory_size=memory_size)
            episode = 0
            policy = NormalEpsilonGreedyPolicy(lambda: epsilon_episode_start / (episode + 1), ActionFilter(env))
            agent = TDAgent(policy, Q, num_steps, gamma, alpha)
            rates_sess = RatesExchangeSession(env, agent)

        for e in range(episodes):
            episode = e
            with clock_block("Running"):
                rates_sess.run()
            print("Episode {} finished.".format(episode))
            print("The td agent crated a token difference of: {0}".format(rates_sess.difference()))
            if output_dir is not None:
                saver.save(sess, ckpt)

        with clock_block("Evaluation"):
            diff = 0.0
            for _ in range(EVAL_STEPS):
                rates_sess.run()
                diff += float(rates_sess.difference())
            effectiveness = diff / float(EVAL_STEPS)
            if output_dir is not None:
                summary = Summary(value=[Summary.Value(tag="effectiveness", simple_value=effectiveness)])
                eval_path = os.path.join(output_dir, "effectiveness")
                summary_writer = tf.summary.FileWriter(eval_path)
                summary_writer.add_summary(summary)
                summary_writer.flush()

        print("Current balance: {0} {1}".format(env.amount, env.token))
        print("Effectiveness: {0}".format(effectiveness))