def train_input_fn(data, monthly_means, monthly_stds, disc, seq_length=10, batch_size=64): examples_per_epoch = 0 datasets = [] for i in range(0, seq_length * MONTH, 21 // MONTH): mdata = monthify(data[i:]).astype('float32') mdata -= monthly_means mdata /= monthly_stds dataset = tf.data.Dataset.from_tensor_slices(mdata) dataset = dataset.batch(seq_length + 1, drop_remainder=True) dataset = dataset.map(splitter_fn(disc)) datasets.append(dataset) examples_per_epoch += len(mdata) dataset = datasets[0] for d in datasets[1:]: dataset = dataset.concatenate(d) buffer_size = 10000 dataset = dataset.shuffle(buffer_size) dataset = dataset.batch(batch_size, drop_remainder=True) return dataset, examples_per_epoch
n_features = tests.shape[1] print("MONTH:", MONTH) print("n_tests:", len(tests)) print("n_features:", n_features) # LOAD MODEL model = gd.build_model(n_features=n_features, n_disc=N_DISC, rnn_units=1024, batch_size=1000) model.load_weights(tf.train.latest_checkpoint(gd.checkpoint_dir)) model.build(tf.TensorShape([1, None])) # GENERATING mtests = monthify(tests).astype('float32') mtests -= monthly_means mtests /= monthly_stds starts = [] trues = [] for st_idx in range(0, len(mtests) - 2 * SEQ_LENGTH, 21 // MONTH): start = mtests[st_idx:st_idx + SEQ_LENGTH].copy() # to jest prawdziwy koniec historii true = mtests[st_idx + SEQ_LENGTH:st_idx + 2 * SEQ_LENGTH].copy() start *= monthly_stds start += monthly_means true *= monthly_stds true += monthly_means
def load_all(split=0.8): funds = load_one(FUNDS) nyse = load_one(NYSE) unem = load_one(UNEMPLOYMENT) wig = load_one(WIG20) gdp = load_one(GDP) irates = load_one(IRATES) usd = load_one(USD) usdpln = load_one(USDPLN) vix = load_one(VIX) START_DATE = max( funds["date"].min(), nyse["date"].min(), unem["date"].min(), wig["date"].min(), gdp["date"].min(), irates["date"].min(), usd["date"].min(), usdpln["date"].min(), vix["date"].min(), ) END_DATE = min( funds["date"].max(), nyse["date"].max(), unem["date"].max(), wig["date"].max(), gdp["date"].max(), irates["date"].max(), usd["date"].max(), usdpln["date"].max(), vix["date"].max(), ) N = (END_DATE - START_DATE).days def recalc_date(df): df["date"] = list(map(lambda d: float(d.days), df["date"] - START_DATE)) return df funds = recalc_date(funds) nyse = recalc_date(nyse) unem = recalc_date(unem) wig = recalc_date(wig) gdp = recalc_date(gdp) irates = recalc_date(irates) usd = recalc_date(usd) usdpln = recalc_date(usdpln) vix = recalc_date(vix) TIME = funds["date"] TIME = TIME[TIME <= N] TIME = TIME[TIME >= 0] #days_after_start = TIME.iloc[int(len(TIME) * split)] #print(len(TIME)) #print(START_DATE + datetime.timedelta(days=TIME.min())) #print(START_DATE + datetime.timedelta(days=TIME.max())) #print(START_DATE + datetime.timedelta(days=days_after_start)) #exit(0) TIME = np.array(TIME) def interpolate(df): time = np.array(df["date"]) vals = np.array(df.loc[:, df.columns != "date"]) inter_fn = interp1d(time, vals.T) vals = inter_fn(TIME).T return vals funds_arr = interpolate(funds) nyse_arr = interpolate(nyse) unem_arr = interpolate(unem) wig_arr = interpolate(wig) gdp_arr = interpolate(gdp) irates_arr = interpolate(irates) usd_arr = interpolate(usd) usdpln_arr = interpolate(usdpln) vix_arr = interpolate(vix) #all_arr = funds_arr all_arr = np.concatenate((funds_arr.T, nyse_arr.T, unem_arr.T, wig_arr.T, gdp_arr.T, irates_arr.T)).T #, #usd_arr.T, #usdpln_arr.T, #vix_arr.T)).T m_all_arr = monthify(all_arr) monthly_means = np.mean(m_all_arr, axis=0) m_all_arr = m_all_arr - monthly_means monthly_stds = np.std(m_all_arr, axis=0) s = int(len(all_arr) * split) train = all_arr[:s] tests = all_arr[s:] return train, tests, monthly_means, monthly_stds