예제 #1
0
def train_input_fn(data, monthly_means, monthly_stds, disc,
                   seq_length=10,
                   batch_size=64):

    examples_per_epoch = 0
    datasets = []
    for i in range(0, seq_length * MONTH, 21 // MONTH):
        mdata = monthify(data[i:]).astype('float32')
        mdata -= monthly_means
        mdata /= monthly_stds
        dataset = tf.data.Dataset.from_tensor_slices(mdata)
        dataset = dataset.batch(seq_length + 1,
                                drop_remainder=True)
        dataset = dataset.map(splitter_fn(disc))
        datasets.append(dataset)
        examples_per_epoch += len(mdata)

    dataset = datasets[0]
    for d in datasets[1:]:
        dataset = dataset.concatenate(d)

    buffer_size = 10000
    dataset = dataset.shuffle(buffer_size)
    dataset = dataset.batch(batch_size, drop_remainder=True)
    return dataset, examples_per_epoch
예제 #2
0
n_features = tests.shape[1]

print("MONTH:", MONTH)
print("n_tests:", len(tests))
print("n_features:", n_features)

# LOAD MODEL
model = gd.build_model(n_features=n_features,
                       n_disc=N_DISC,
                       rnn_units=1024,
                       batch_size=1000)
model.load_weights(tf.train.latest_checkpoint(gd.checkpoint_dir))
model.build(tf.TensorShape([1, None]))

# GENERATING
mtests = monthify(tests).astype('float32')
mtests -= monthly_means
mtests /= monthly_stds

starts = []
trues = []
for st_idx in range(0, len(mtests) - 2 * SEQ_LENGTH, 21 // MONTH):
    start = mtests[st_idx:st_idx + SEQ_LENGTH].copy()
    # to jest prawdziwy koniec historii
    true = mtests[st_idx + SEQ_LENGTH:st_idx + 2 * SEQ_LENGTH].copy()

    start *= monthly_stds
    start += monthly_means

    true *= monthly_stds
    true += monthly_means
예제 #3
0
def load_all(split=0.8):
    funds = load_one(FUNDS)
    nyse = load_one(NYSE)
    unem = load_one(UNEMPLOYMENT)
    wig = load_one(WIG20)
    gdp = load_one(GDP)
    irates = load_one(IRATES)
    usd = load_one(USD)
    usdpln = load_one(USDPLN)
    vix = load_one(VIX)

    START_DATE = max(
        funds["date"].min(),
        nyse["date"].min(),
        unem["date"].min(),
        wig["date"].min(),
        gdp["date"].min(),
        irates["date"].min(),
        usd["date"].min(),
        usdpln["date"].min(),
        vix["date"].min(),
    )

    END_DATE = min(
        funds["date"].max(),
        nyse["date"].max(),
        unem["date"].max(),
        wig["date"].max(),
        gdp["date"].max(),
        irates["date"].max(),
        usd["date"].max(),
        usdpln["date"].max(),
        vix["date"].max(),
    )

    N = (END_DATE - START_DATE).days

    def recalc_date(df):
        df["date"] = list(map(lambda d: float(d.days),
                              df["date"] - START_DATE))
        return df

    funds = recalc_date(funds)
    nyse = recalc_date(nyse)
    unem = recalc_date(unem)
    wig = recalc_date(wig)
    gdp = recalc_date(gdp)
    irates = recalc_date(irates)
    usd = recalc_date(usd)
    usdpln = recalc_date(usdpln)
    vix = recalc_date(vix)

    TIME = funds["date"]
    TIME = TIME[TIME <= N]
    TIME = TIME[TIME >= 0]
    #days_after_start = TIME.iloc[int(len(TIME) * split)]
    #print(len(TIME))
    #print(START_DATE + datetime.timedelta(days=TIME.min()))
    #print(START_DATE + datetime.timedelta(days=TIME.max()))
    #print(START_DATE + datetime.timedelta(days=days_after_start))
    #exit(0)

    TIME = np.array(TIME)

    def interpolate(df):
        time = np.array(df["date"])
        vals = np.array(df.loc[:, df.columns != "date"])
        inter_fn = interp1d(time, vals.T)
        vals = inter_fn(TIME).T
        return vals

    funds_arr = interpolate(funds)
    nyse_arr = interpolate(nyse)
    unem_arr = interpolate(unem)
    wig_arr = interpolate(wig)
    gdp_arr = interpolate(gdp)
    irates_arr = interpolate(irates)
    usd_arr = interpolate(usd)
    usdpln_arr = interpolate(usdpln)
    vix_arr = interpolate(vix)

    #all_arr = funds_arr
    all_arr = np.concatenate((funds_arr.T, nyse_arr.T, unem_arr.T, wig_arr.T,
                              gdp_arr.T, irates_arr.T)).T  #,
    #usd_arr.T,
    #usdpln_arr.T,
    #vix_arr.T)).T

    m_all_arr = monthify(all_arr)
    monthly_means = np.mean(m_all_arr, axis=0)
    m_all_arr = m_all_arr - monthly_means
    monthly_stds = np.std(m_all_arr, axis=0)

    s = int(len(all_arr) * split)
    train = all_arr[:s]
    tests = all_arr[s:]

    return train, tests, monthly_means, monthly_stds