def update_prediction(db): # 트래이닝 된 모델을 로드한다. predict_days = [7, 14, 30] nets = [] # environment의 shape 샘플이 필요하므로 KQ003380 종목을 임의로 로드한다. sample_prices_list, _valid_list = lib.data.load_prices(["KQ003380"]) for pdays in predict_days: file_path = "data/v3.0-phase3-{}.data".format(pdays) env = pdenviron.PredEnv(prices_list=sample_prices_list, predict_days=pdays) net = models.SimpleFFDQN(env.observation_space.shape[0], env.action_space.n) models.load_model(file_path, net) nets.append(net) today = datetime.datetime.now().date() with closing(db.cursor()) as cur: cur.execute("select scode from stocks") for row in cur.fetchall(): scode = row[0] prices_list, val_prices_list = lib.data.load_prices([scode]) if len(prices_list[0].open) < 60: continue try: with closing(db.cursor()) as cur2: for i in range(0, len(nets)): pdays = predict_days[i] env = pdenviron.PredEnv(prices_list=prices_list, predict_days=pdays) # offset을 마지막 일자로 한다. obs = env.reset(0, len(prices_list[0].open) - 1) values = environ.apply_model_from_state(obs, nets[i]) # 예측결과 저장 cur2.execute( stmt4, (scode, today, pdays, values[0], values[1], values[2], values[3], values[4])) except Exception as ex: logger.error('update_prediction() failed:' + str(ex))
else: stock_data = {"YNDX": data.load_relative(args.data)} env = environ.StocksEnv(stock_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False, volumes=False) env_tst = environ.StocksEnv(stock_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False) elif os.path.isdir(args.data): env = environ.StocksEnv.from_dir(args.data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False) env_tst = environ.StocksEnv.from_dir(args.data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False) else: raise RuntimeError("No data to train on") env = gym.wrappers.TimeLimit(env, max_episode_steps=1000) val_data = {"YNDX": data.load_relative(args.valdata)} env_val = environ.StocksEnv(val_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False) writer = SummaryWriter(comment="-simple-" + args.run) net = models.SimpleFFDQN(env.observation_space.shape[0], env.action_space.n).to(device) tgt_net = ptan.agent.TargetNet(net) selector = ptan.actions.EpsilonGreedyActionSelector(EPSILON_START) agent = ptan.agent.DQNAgent(net, selector, device=device) exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, GAMMA, steps_count=REWARD_STEPS) buffer = ptan.experience.ExperienceReplayBuffer(exp_source, REPLAY_SIZE) optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE) # main training loop step_idx = 0 eval_states = None best_mean_val = None with common.RewardTracker(writer, np.inf, group_rewards=100) as reward_tracker: while True: step_idx += 1
def train_model(cuda, phase, premodel, pdays): """ cuda : True / False phase : 1~3 premodel: data/phase1_model.data pdays: integer """ device = torch.device("cuda" if cuda else "cpu") phase = int(phase) if phase == 1: config = sconfig elif phase == 2: config = mconfig elif phase == 3: config = pconfig run_name = "v" + config.version + "-phase" + str(phase) saves_path = os.path.join("saves", run_name) os.makedirs(saves_path, exist_ok=True) save_name = "" writer = SummaryWriter(comment=run_name) prices_list, val_prices_list = data.load_prices(config.choices) if phase == 1: s_env = environ.StocksEnvS(prices_list) stock_env = s_env val_stock_env = environ.StocksEnvS(val_prices_list) save_name = "{}.data".format(run_name) elif phase == 2: # phase 1 의 network 그래프를 로드한다. s_env = environ.StocksEnvS(prices_list) prenet = models.SimpleFFDQN(s_env.observation_space.shape[0], s_env.action_space.n) #.to(device) models.load_model(premodel, prenet) # phase2 환경 생성 stock_env = environ.StocksEnvM(prices_list, prenet) val_stock_env = environ.StocksEnvM(val_prices_list, prenet) save_name = "{}.data".format(run_name) elif phase == 3: predict_days = int(pdays) stock_env = pdenviron.PredEnv(prices_list=prices_list, predict_days=7) val_stock_env = pdenviron.PredEnv(prices_list=prices_list, predict_days=7) save_name = "{}-{}.data".format(run_name, predict_days) net = models.SimpleFFDQN(stock_env.observation_space.shape[0], stock_env.action_space.n).to(device) tgt_net = ptan.agent.TargetNet(net) selector = ptan.actions.EpsilonGreedyActionSelector(config.epsilon_start) agent = ptan.agent.DQNAgent(net, selector, device=device) exp_source = ptan.experience.ExperienceSourceFirstLast( stock_env, agent, config.gamma, steps_count=config.reward_steps) buffer = ptan.experience.ExperienceReplayBuffer(exp_source, config.replay_size) optimizer = optim.Adam(net.parameters(), lr=config.learning_rate) # main training loop step_idx = 0 eval_states = None best_mean_val = None with common.RewardTracker(writer, np.inf, group_rewards=100) as reward_tracker: while step_idx < config.end_step: step_idx += 1 buffer.populate(1) selector.epsilon = max( config.epsilon_stop, config.epsilon_start - step_idx / config.epsilon_steps) new_rewards = exp_source.pop_rewards_steps() if new_rewards: reward_tracker.reward(new_rewards[0], step_idx, selector.epsilon) if len(buffer) < config.replay_initial: continue if eval_states is None: print("Initial buffer populated, start training") eval_states = buffer.sample(config.states_to_evaluate) eval_states = [ np.array(transition.state, copy=False) for transition in eval_states ] eval_states = np.array(eval_states, copy=False) if step_idx % config.eval_every_step == 0: mean_val = common.calc_values_of_states(eval_states, net, device=device) writer.add_scalar("values_mean", mean_val, step_idx) if best_mean_val is None or best_mean_val < mean_val: if best_mean_val is not None: print("%d: Best mean value updated %.3f -> %.3f" % (step_idx, best_mean_val, mean_val)) best_mean_val = mean_val #torch.save(net.state_dict(), os.path.join(saves_path, "mean_val-%.3f.data" % mean_val)) optimizer.zero_grad() batch = buffer.sample(config.batch_size) loss_v = common.calc_loss(batch, net, tgt_net.target_model, config.gamma**config.reward_steps, device=device) loss_v.backward() optimizer.step() if step_idx % config.target_net_sync == 0: tgt_net.sync() if step_idx % config.checkpoint_every_step == 0: idx = step_idx // config.checkpoint_every_step torch.save( net.state_dict(), os.path.join(saves_path, "checkpoint-%d.data" % idx)) if step_idx % config.validation_every_step == 0: res = validation.validation_run(stock_env, net, device=device) for key, val in res.items(): writer.add_scalar(key + "_test", val, step_idx) res = validation.validation_run(val_stock_env, net, device=device) for key, val in res.items(): writer.add_scalar(key + "_val", val, step_idx) models.save_model(os.path.join(saves_path, save_name), net, {"predict_days": predict_days})
config = mconfig elif phase == 3: config = pconfig run_name = "v" + config.version + "-phase" + str(phase) saves_path = os.path.join("saves", run_name) save_name = "" writer = SummaryWriter(comment=run_name) prices_list, val_prices_list = data.load_prices(config.choices) predict_days = int(args.pdays) stock_env = pdenviron.PredEnv(prices_list=prices_list, predict_days=7) net = models.SimpleFFDQN(stock_env.observation_space.shape[0], stock_env.action_space.n) # .to(device) #models.load_model(args.model, net) net.load_state_dict( torch.load(args.model, map_location=lambda storage, loc: storage)) for i in range(0, 10): done = False obs = stock_env.reset() while not done: values = environ.apply_model_from_state(obs, net) action = pdenviron.PredAction(np.argmax(values, axis=0)) obs, reward, done, info = stock_env.step(action) print("action:{}, netprice:{}, reward:{}, values:{}".format( action.value, info["net_price"], reward, values))