Exemplo n.º 1
0
def main(iteration, name):

    smape_dict = {}

    print("xgboost with redefine is running")
    start = time.perf_counter()

    #loading the data
    data = pd.read_csv("data/" + name, usecols=[iteration]).iloc[:,
                                                                 0].to_list()

    #70/30 train/test split
    split = int(0.7 * len(data))
    train, test = data[:split], data[split:]
    setback = len(train)

    predictions = []
    ground_truth = []

    for i in range(len(test)):
        #get breakpoints for train
        history = functions.ada_preprocessing(train)

        #save the final set of breakpoints
        bkp = None
        if i == len(test) - 1:
            bkp = history["concept"]

        history = one_hot_encoding(history)

        #add new test observation to train series
        train.append(test[i])

        #path the last point from history dataframe to then extract same concept dummies
        test_df = manual_preprocessing(train, history.tail(1))

        ground_truth.append(train[-1])

        #training data = history
        prediction = xgboost_forecast(history, test_df.loc[:, "t-1":])
        predictions.append(prediction)

    end = time.perf_counter()
    print("Time spent on xgboost with retrain: {:.2f}m".format(
        (end - start) / 60))

    error = smape(np.asarray(predictions), np.asarray(ground_truth))
    smape_dict[name] = error
    #     print("SMAPE: {:.4f}".format(error))
    #plot_save(predictions, ground_truth, bkp, "results/xgboost/redefine/"+name, setback)

    dict_path = "results/xgboost/redefine/errors/error" + str(
        iteration) + name + ".txt"
    with open(dict_path, 'w') as file:
        for key in smape_dict.keys():
            file.write("%s,%s\n" % (key, smape_dict[key]))
Exemplo n.º 2
0
def main(iteration, name):
	print("xgboost with retrain is running")
	
	smape_dict = {}
	#loading the data
	#print("xgboost with retrain is alive")
	data = pd.read_csv("data/"+name, usecols = [iteration]).iloc[:,0].to_list()

	#note: i only use this to get the lagged values, the concepts and others are dropped subsequently
	data = functions.ada_preprocessing(data)
	data = data.loc[:, "t":"t-5"]

	#train/test split
	n = len(data)
	train, test = data[:int(0.7*n)], data[int(0.7*n):]    

	#fitting and plotting with concept
	start = time.perf_counter()
	error, y, yhat = walk_forward_validation(train, test)
	end = time.perf_counter()
	print("Time spent on xgboost with retrain: {:.2f}s".format((end-start)))

	smape_dict[name] = error
	# print("SMAPE: {:.4f}".format(error))
	#plt.plot(y, label = "Expected", color = "black")
	#plt.plot(yhat, label = "Predicted", color = "red")
	#plt.legend()
	#plt.title(name)

	#saving the plots
	#image_path = "results/xgboost/retrain/"+name+".png"
	#plt.savefig(image_path)
	#plt.clf()
	# plt.show()

	#saving the dictionary containing errors
	dict_path = "results/xgboost/retrain/errors/error"+str(iteration)+name+".txt"
	with open(dict_path, 'w') as file:
		for key in smape_dict.keys():
			file.write("%s,%s\n"%(key,smape_dict[key]))
Exemplo n.º 3
0

lin1_abrupt = create_simdata.linear1_abrupt()
lin1_abrupt = functions.preprocess_timeseries(lin1_abrupt)

series = pd.DataFrame({"t":lin1_abrupt})
series = functions.autocorrelations_in_window(10, series)
series = functions.partial_autocorrelations_in_window(10, series)
series = functions.features_in_window(10, series)
series = functions.oscillation_behaviour_in_window(10, series)



timeseries = create_simdata.linear1_abrupt()

timeseries = functions.ada_preprocessing(timeseries, delay_correction=2)







nonlinear2_abrupt_raw = create_simdata.nonlinear3_abrupt()
nonlinear2_abrupt = functions.preprocess_timeseries(nonlinear2_abrupt_raw, windowsize=20)

plt.plot(nonlinear2_abrupt)

lin1_abrupt = functions.preprocess_timeseries(lin1_abrupt)

Exemplo n.º 4
0
def main(iteration, name):

    smape_dict = {}

    print("xgboost with discard is running")
    start = time.perf_counter()

    #loading the data
    data = pd.read_csv("data/" + name, usecols=[iteration]).iloc[:,
                                                                 0].to_list()

    #70/30 train/test split
    split = int(0.7 * len(data))
    train, test = data[:split], data[split:]
    setback = len(train)

    #get breakpoints for train set
    history = functions.ada_preprocessing(train)

    #note the last concept that appeared
    last_num_concepts = max(list(history["concept"]))

    predictions = []
    points = 0
    bkp = None
    for i in range(len(test)):
        #add new test observation to train series
        train.append(test[i])

        #pass all the values available in series up to and including the new test point
        test_df = manual_preprocessing(train)

        #training data = history
        prediction = xgboost_forecast(history.loc[:, "t":"t-5"],
                                      test_df.loc[:, "t-1":"t-5"])
        predictions.append(prediction)

        #new dataframe with the predicted test observation already appended
        history = functions.ada_preprocessing(train)
        if i == len(test) - 1:
            bkp = history["concept"]

        #note the real concept for the test observation
        new_num_concepts = max(list(history["concept"]))

        #if the number of concepts change, check if we have enough datapoints for new concept
        if new_num_concepts > last_num_concepts:
            #if we have more than 20 points for new concept, keep them and drop the rest of the data
            points = is_enough(history)
        if points >= 20:
            history = history.tail(points)
            last_num_concepts = new_num_concepts
            points = 0
            #otherwise just keep using the same dataset

    end = time.perf_counter()
    print("Time spent on xgboost with discard: {:.2f}m".format(
        (end - start) / 60))

    error = smape(np.asarray(predictions), np.asarray(test))
    smape_dict[name] = error
    # print("SMAPE: {:.4f}".format(error))
    #plot_save(predictions, ground_truth, bkp, "results/xgboost/discard/"+name, setback)

    dict_path = "results/xgboost/discard/errors/error" + str(
        iteration) + name + ".txt"
    with open(dict_path, 'w') as file:
        for key in smape_dict.keys():
            file.write("%s,%s\n" % (key, smape_dict[key]))
Exemplo n.º 5
0
def main(iteration, name):
    print("lstm with discard is running")
    smape_dict = {}
    # load the data
    data = pd.read_csv("data/" + name, usecols=[iteration]).iloc[:,
                                                                 0].to_list()

    #70/30 train/test split
    split = int(0.7 * len(data))
    train, test = data[:split], data[split:]
    setback = len(train)

    #get breakpoints for train set
    history = functions.ada_preprocessing(train)

    #note the last concept that appeared
    last_num_concepts = max(list(history["concept"]))

    model = fit_lstm(history.loc[:, "t":"t-5"])

    predictions = []
    points = 0
    bkp = None

    start = time.perf_counter()

    for i in range(0, len(test)):
        #get test observation into necessary shape
        train.append(test[i])
        test_row = manual_preprocessing(train)

        X = test_row.loc[:, "t-1":"t-5"]
        X_arrays = np.asarray(X)
        test_X = np.hstack(X_arrays).reshape(X.shape[0], 1, X.shape[1])

        #get predictions for new test observation
        prediction = model.predict(test_X)
        predictions.append(prediction)

        #new dataframe with the predicted test observation already appended
        history = functions.ada_preprocessing(train)
        if i == len(test) - 1:
            bkp = history["concept"]

        #note the real concept for the test observation
        new_num_concepts = max(list(history["concept"]))

        #if the number of concepts change, check if we have enough datapoints for new concept
        if new_num_concepts > last_num_concepts:
            #if we have more than 20 points for new concept, keep them and drop the rest of the data
            points = is_enough(history)
            if points >= 20:
                print("found {} points from new concept".format(points))
                history = history.tail(points)
                last_num_concepts = new_num_concepts
                points = 0
                # retrain the model
                model = fit_lstm(history.loc[:, "t":"t-5"])
        #otherwise just keep using the same dataset

    end = time.perf_counter()
    print("Time spent: {:.2f}h".format((end - start) / 3600))

    #inverting predictions to original scale
    #     predictions = scaler.inverse_transform(np.asarray(predictions).reshape([-1,1]))

    error = smape(np.asarray(predictions), np.asarray(test))
    smape_dict[name] = error
    print("SMAPE: {:.4f}".format(error))

    #plot_save(np.asarray(predictions), test, bkp, "results/lstm/discard/"+name, setback)

    #saving the dictionary containing errors
    dict_path = "results/lstm/discard/errors/error" + str(
        iteration) + name + ".txt"
    with open(dict_path, 'w') as file:
        for key in smape_dict.keys():
            file.write("%s,%s\n" % (key, smape_dict[key]))
Exemplo n.º 6
0
def main(iteration, name):
    smape_dict = {}
    data = pd.read_csv("data/" + name, usecols=[iteration]).iloc[:,
                                                                 0].to_list()

    #70/30 train/test split
    split = int(0.7 * len(data))
    train, test = data[:split], data[split:]
    setback = len(train)
    bkp = None

    predictions = []

    start = time.perf_counter()

    # need to do first iteration outside the loop to avoid retraining the model
    history = functions.ada_preprocessing(train)

    history.drop(["transition", "steps_to_bkp", "steps_since_bkp"],
                 axis=1,
                 inplace=True)

    #get the dataframe for new test observation
    train.append(test[0])
    test_row = manual_preprocessing(train, history.tail(1))

    #change train and test into form appropriate for CondRNN
    train_X_input, train_X_aux, test_X_input, test_X_aux, train_y, test_y = forecast_preprocessing(
        history, test_row)

    model = fit_cond_rnn(train_X_input, train_X_aux, train_y)

    #get predictions for new test observation
    prediction = model.predict([test_X_input, test_X_aux])
    predictions.append(prediction)

    #we've got the first prediction, so now start from 1
    for i in range(1, len(test)):
        #get breakpoints for train dataset
        history = functions.ada_preprocessing(train)
        if i == len(test) - 1:
            bkp = history["concept"]
        history.drop(["transition", "steps_to_bkp", "steps_since_bkp"],
                     axis=1,
                     inplace=True)

        #get the dataframe for new test observation
        train.append(test[i])
        test_row = manual_preprocessing(train, history.tail(1))

        #change train and test into form appropriate for CondRNN
        train_X_input, train_X_aux, test_X_input, test_X_aux, train_y, test_y = forecast_preprocessing(
            history, test_row)

        #get predictions for new test observation
        prediction = model.predict([test_X_input, test_X_aux])
        predictions.append(prediction)

    end = time.perf_counter()
    print("Time spent on cond_rnn: {:.2f}h".format((end - start) / 3600))

    #inverting predictions to original scale
    #     predictions = scaler.inverse_transform(np.asarray(predictions).reshape([-1,1]))

    error = smape(np.asarray(predictions), np.asarray(test))
    smape_dict[name] = error
    #plot_save(np.asarray(predictions), test, bkp, "results/cond_rnn/"+name, setback)

    dict_path = "results/cond_rnn/errors/error" + str(
        iteration) + name + ".txt"
    with open(dict_path, 'w') as file:
        for key in smape_dict.keys():
            file.write("%s,%s\n" % (key, smape_dict[key]))
Exemplo n.º 7
0
def main(iteration, name):
    print("lstm with oneshot is running")
    smape_dict = {}

    #loading the data
    data = pd.read_csv("data/" + name, usecols=[iteration]).iloc[:,
                                                                 0].to_list()

    #70/30 train/test split
    split = int(0.7 * len(data))
    train, test = data[:split], data[split:]

    predictions = []

    # train the model outside the for-loop

    history = functions.ada_preprocessing(train)
    history = history.loc[:, "t":"t-5"]

    model = fit_lstm(history)

    start = time.perf_counter()
    for i in range(0, len(test)):
        print("lstm with oneshot is alive")
        #get test observation into necessary shape
        train.append(test[i])
        test_row = manual_preprocessing(train)

        X = test_row.loc[:, "t-1":"t-5"]
        X_arrays = np.asarray(X)
        test_X = np.hstack(X_arrays).reshape(X.shape[0], 1, X.shape[1])

        #get predictions for new test observation
        prediction = model.predict(test_X)
        predictions.append(prediction)

        #get breakpoints for train dataset
        history = functions.ada_preprocessing(train)
        history = history.loc[:, "t":"t-5"]

    end = time.perf_counter()
    print("Time spent: {:.2f}h".format((end - start) / 3600))

    #inverting predictions to original scale
    #     predictions = scaler.inverse_transform(np.asarray(predictions).reshape([-1,1]))

    error = smape(np.asarray(predictions), np.asarray(test))
    smape_dict[name] = error
    print("SMAPE: {:.4f}".format(error))

    plt.plot(test, label="expected", color="black")
    plt.plot(np.asarray(predictions).reshape([-1, 1]),
             label="predicted",
             color="red")
    plt.title(name)
    plt.legend()
    image_path = "results/lstm/oneshot/" + name + ".png"
    plt.savefig(image_path)
    plt.clf()

    #saving the dictionary containing errors
    dict_path = "results/lstm/oneshot/errors/error" + str(
        iteration) + name + ".txt"
    with open(dict_path, 'w') as file:
        for key in smape_dict.keys():
            file.write("%s,%s\n" % (key, smape_dict[key]))