def fetch(timesteps, future_vision, time_interval, test_percentage=10, epochs=1, Permutation='0 6 2 1 1 0 0 0', nodes1=128, patience=0, multipred=1, smooth=0): datax, datay, pdt_index, permutation = load_perm(time_interval, future_vision, timesteps, Permutation, smooth=smooth, multipred=multipred) test_cursor = int( (1 - test_percentage / 100) * datax.shape[0]) #where to split the data testy = datay[test_cursor + timesteps:] testx = datax[test_cursor + timesteps:] testx = testx.reshape( (list(testx.shape[0:1]) + [1] + list(testx.shape[1:]))) if multipred == 0: pdt_index = 0 model_name = '/home/josephkn/Documents/Fortum/master/models/' model_name += '%dtimesteps_%dtimeinterval_%dforward_%de_%dn_%s_multipred=%d' % ( timesteps, time_interval, future_vision, epochs, nodes1, Permutation, multipred) try: f = open(model_name, 'r') f.close() model = tf.keras.models.load_model(model_name) return model, testx, testy, pdt_index except: FileNotFoundError: print('model does not exist, go train it. exiting') exit()
def train( timesteps, future_vision, time_interval, batch_size=64, validation_percentage=5, test_percentage=20, activation='relu', epochs=1, Permutation='none', #'0 6 2 1 1 0 0 0', place="none", savemod=1, save_img=0, save_bench=1, nodes1=128, nodes2=128, patience=0, multipred=1, preserve=1, weather=0, smooth=0, time=0, differentiate=0, method='f_score', optimized_features=0, k=20, categorical=0, catkeys=["weekday"], holiday=0, k2=0, special=0, lr=0.001, b1=0.9, b2=0.999): try: f = open( '/home/josephkn/Documents/Fortum/master/models/%dtimesteps_%dtimeinterval_%dforward_%de_%dn_%s_multipred=%d' % (timesteps, time_interval, future_vision, epochs, nodes1, Permutation, multipred)) f.close() print('you have this model') if preserve == 1: return 0 else: injpiu except: print('Initializing training of permutation %s' % Permutation) if optimized_features and multipred: arrays, tags, pdt_index, pdt_tag, permutation, start, end = load_features( time_interval, place, method, k) if special: specialarrays, specialtags = load_special_features( time_interval, place, method, k2) for tag in specialtags: if tag not in tags: arrays[tag] = specialarrays[tag] del specialarrays else: arrays, tags, pdt_index, pdt_tag, permutation, start, end = load_perm( time_interval, Permutation, place, smooth, multipred, weather) ##FIND OUT WHERE TO DIFFERENTIATE AND SCALE pdt_original = arrays[pdt_tag].copy() if differentiate: Arrays = make_differential_data(arrays, future_vision, tags) else: Arrays = np.zeros((len(tags), len(arrays[tags[0]])), dtype=np.float32) for i in range(len(tags)): Arrays[i] = arrays[tags[i]].squeeze() del arrays #here we create non-categorical time data dims = list(range(Arrays.shape[0])) intercepts = [] means = [] stds = [] for i in dims: mean = Arrays[i].mean() #iurdngswvlgeufzboingxegbldzuidxjocfkpvålj std = Arrays[i].std() Arrays[i] -= mean Arrays[i] /= std means.append(mean) stds.append(std) if time: timedata = create_time_variables(start, end, time_interval) if categorical == 0: noncat = noncategorical_timedata(timedata) for key in noncat.keys(): if differentiate: Arrays = np.vstack((Arrays, noncat[key][:-future_vision])) else: Arrays = np.vstack((Arrays, noncat[key])) else: cat = categorical_timedata(timedata, catkeys) if holiday: holidaysArr = holidays(time_interval)[:, None] cat = np.concatenate((cat, holidaysArr), axis=1) if differentiate: cat = cat[:-future_vision] print("categorical shape: ", cat.shape) dims = list(range(Arrays.shape[0])) if categorical == 0 and holiday == 1: cat = holidays(time_interval)[:, None] indices = extract_indices2(timesteps, future_vision, [], Arrays) indices = np.asarray(indices) ranger = np.arange(len(indices)) datax = np.zeros((len(indices), timesteps, Arrays.shape[0]), dtype=np.float) datay = np.zeros((len(indices), Arrays.shape[0]), dtype=np.float32) #check = Arrays[0].copy() #print(check.shape) X = [ extract_samples2(x, timesteps, indices, datax, datay, ranger, Arrays[x]) for x in dims ] del X, Arrays, ranger, indices print(datax.shape) test_cursor = int( (1 - test_percentage / 100) * datax.shape[0]) #where to split the data print(test_cursor) testx = datax[test_cursor:].copy() #+timesteps:] testy = datay[test_cursor:].copy() #+timesteps:] datax = datax[:test_cursor] datay = datay[:test_cursor] datay = datay[:, pdt_index][:, np.newaxis] testy = testy[:, pdt_index][:, np.newaxis] ##early return for creating forecasting plot in main.py #return testx, testy,pdt_index,means,stds if holiday or categorical: cat_test = cat[(future_vision + timesteps + test_cursor - 1):] catx = cat[:test_cursor] input1 = Input(shape=(datax.shape[1:])) lstm = CuDNNLSTM(nodes1, kernel_initializer='glorot_normal', input_shape=datax.shape[1:], return_sequences=False)(input1) #if activation != "None": #univariate shit # dropout = Dense(nodes1,kernel_initializer='glorot_normal', # activation=activation)(dropout) #dropout = Dropout(0.25)(lstm) if categorical or holiday: input2 = Input(shape=(cat.shape[-1], )) embedding = Embedding(batch_size, 64)(input2) embeddingflat = Flatten()(embedding) embed_dense = Dense(32, kernel_initializer='glorot_normal', activation=activation)(embeddingflat) marge = Concatenate()([lstm, embed_dense]) dropout = Dropout(0.25)(marge) outputs = Dense(1, kernel_initializer='glorot_normal', activation='linear')(dropout) model = Model(inputs=[input1, input2], outputs=outputs) '''embed_dense = Dropout(0.25)(embed_dense) #remove marge = Concatenate()([dropout,embed_dense]) #dropout = Dropout(0.25)(marge) #uncomment outputs = Dense(1,kernel_initializer='glorot_normal', activation='linear')(marge) #(dropout) model = Model(inputs=[input1,input2],outputs=outputs)''' else: dropout = Dropout(0.25)(lstm) outputs = Dense(1, kernel_initializer='glorot_normal', activation='linear')(dropout) model = Model(inputs=input1, outputs=outputs) ''' model = Sequential() model.add(tf.keras.layers.CuDNNLSTM(nodes1,kernel_initializer='glorot_normal' ,input_shape=datax.shape[1:], return_sequences=False, kernel_regularizer=tf.keras.regularizers.l2(0.00))) #model.add(tf.keras.layers.Dense(nodes2,kernel_initializer='glorot_normal', #want dense before or after merge? # activation=activation, # kernel_regularizer=tf.keras.regularizers.l2(0.00))) model.add(tf.keras.layers.Dropout(0.25)) if categorical==0 and holiday ==0: model.add(tf.keras.layers.Dense(datay.shape[-1], kernel_initializer='glorot_normal', activation='linear')) ''' filepath = '/home/josephkn/Documents/Fortum/master/models/best.h5' checkpoint = ModelCheckpoint(filepath, monitor='val_mean_absolute_error', verbose=1, save_best_only=True, mode='min') optimizer = tf.keras.optimizers.Adam(lr=lr, beta_1=b1, beta_2=b2) model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mse', 'mae']) if holiday or categorical: datax = [datax, catx] testx = [testx, cat_test] del cat history = model.fit( datax, datay, validation_split=validation_percentage / 100, epochs=epochs, batch_size=batch_size, verbose=1, shuffle=0, callbacks=[checkpoint ]) #tf.keras.callbacks.EarlyStopping(patience=patience)]) model.load_weights(filepath) model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse', 'mae']) scores = model.evaluate(x=testx, y=testy, verbose=0) print('average mse:', scores) kk = model.predict(testx).squeeze() #predicted testy if savemod: path = '/home/josephkn/Documents/Fortum/master/models/' '''model.save(path+'%dtimesteps_%dtimeinterval_%dforward_%de_%dn_%s_multipred=%d'%(timesteps, time_interval, future_vision, epochs, nodes1, Permutation, multipred))''' model.save(path + 'forecast%d' % future_vision) if save_bench and multipred and place == 'none': with open( 'bench/benchmarks_%dinterval_multipred.txt' % (time_interval), 'a') as f: f.write('%f %d %d %d %s %d %d %d\n' % (scores, patience, nodes1, timesteps, Permutation, future_vision, weather, time)) if save_bench and multipred and place != 'none': ###########ERRORS ARE BEHAVING NORMALLY! method_ = '' if optimized_features: dir = "bench_multi_lstm" additional = '_k=%d' % k method_ = method else: dir = "bench_place_sparse" additional = '' permutation = ''.join((str(i) + ',') for i in permutation.tolist()) print(model.metrics_names) mse, mae = scores[1:] errors = (kk - testy.squeeze()) abserrors = abs(errors) error_sort = np.sort(abserrors) median = np.median(error_sort) * stds[pdt_index] physical_error = abserrors.mean() * stds[pdt_index] if differentiate: dataX, dataY, copy_last, manual_physical = differencial_error_check( testy.shape[0], timesteps, future_vision, pdt_original, pdt_index, kk, physical_error, stds, means, dims) else: dataX, dataY, copy_last, manual_physical = error_check( testy.shape[0], timesteps, future_vision, pdt_original, pdt_index, kk, physical_error, stds, means, dims) print(stds[pdt_index], 'std') print(abs(errors).mean(), 'mae') print(median, 'median abs') print(physical_error, 'physical') print(dir + '/benchmarks_%s_%dinterval_%dsteps_multipred%s.txt' % (place, time_interval, timesteps, additional)) with open( dir + '/benchmarks_%s_%dinterval_%dsteps_multipred%s.txt' % (place, time_interval, timesteps, additional), 'a') as f: f.write('%f %f %f %f %d %d %d %s %d %d %d %s %d %d\n' % (physical_error, mse, median, copy_last, patience, nodes1, timesteps, permutation[:-1], future_vision, weather, time, method_, differentiate, k2)) if save_bench and multipred == 0: #print(np.mean(abs(k-testy)), 'model abs error') errors = (testy.squeeze() - kk) error_sort = np.sort(abs(errors)) mse, mae = scores[1:] physical_errors = (np.abs(errors) * stds[pdt_index]) #shortcut physical_error = physical_errors.mean() median = np.median(np.sort(physical_errors)) print(physical_error) if differentiate: dataX, dataY, copy_last, manual_physical = differencial_error_check( testy.shape[0], timesteps, future_vision, pdt_original, pdt_index, kk, physical_error, stds, means, dims) else: dataX, dataY, copy_last, manual_physical = error_check( testy.shape[0], timesteps, future_vision, pdt_original, pdt_index, kk, physical_error, stds, means, dims) ''' plt.plot(dataX,label="x") plt.plot(dataY,label="y") plt.show() ''' print(physical_error, " physical error (shortcut)") print(manual_physical, " physical error (manual)") print(copy_last, " copy last step error") dir = "alternative_bench/lstm_dense" if activation == "sigmoid": dir += "2" elif activation == "None": dir = dir.split("dense")[0][:-1] with open(dir + '/benchmarks_%dinterval_unipred.txt' % (time_interval), 'a') as f: f.write('%f %f %f %f %d %d %d %s %d %d\n' % (physical_error, mse, median, copy_last, patience, nodes1, timesteps, Permutation, future_vision, differentiate)) if save_img: plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('Model Loss') plt.ylabel('MSE') plt.xlabel('Epoch') plt.legend(['Training Error', 'Validation Error'], loc='upper left') plt.savefig('imgs/%s.jpg' % Permutation) del datax, datay, history, test_cursor return model, testx, testy, pdt_index
from fbprophet import Prophet from load_multi_pred import load_perm, load_features time_interval = 60 future_vision = 1 timesteps = 20 Permutation = "none" place = "VIK" arrays, tags, pdt_index, pdt_tag, permutation, start, end = load_perm( time_interval, timesteps, Permutation, place) df = arrays[pdt_tag] print(df) #m = Prophet() #m.fit(df)
def linreg_uni(time_interval,future_vision,timesteps,differentiate): Permutation ="none" place = "VIK" arrays,tags,pdt_index,pdt_tag,permutation,start,end = load_perm(time_interval,timesteps,Permutation,place) arrays[pdt_tag] pdt_original = arrays[pdt_tag].copy() tags = list(arrays.keys()) if differentiate: Arrays = np.zeros((len(tags),len(arrays[tags[0]])-1),dtype=np.float32) Arrays = make_differential_data(arrays,future_vision,tags) else: Arrays = np.zeros((len(tags),len(arrays[tags[0]])),dtype=np.float32) for i in range(len(tags)): Arrays[i] = arrays[tags[i]].squeeze() del arrays dims = list(range(Arrays.shape[0])) intercepts=[] means=[] stds=[] for i in dims: mean = Arrays[i].mean() std = Arrays[i].std() Arrays[i] -= mean Arrays[i] /= std means.append(mean) stds.append(std) indices = extract_indices2(timesteps,future_vision,[],Arrays) indices = np.asarray(indices) ranger = np.arange(len(indices)) datax = np.zeros((len(indices),timesteps,Arrays.shape[0]),dtype=np.float) datay = np.zeros((len(indices),Arrays.shape[0]),dtype=np.float32) #check = Arrays[0].copy() #print(check.shape) X = [extract_samples2(x,timesteps,indices,datax,datay,ranger,Arrays[x]) for x in dims] del X,Arrays,ranger,indices test_percentage = 20 test_cursor = int((1-test_percentage/100)*datax.shape[0]) #where to split the data testx = datax[test_cursor:].squeeze()#+timesteps:] testy = datay[test_cursor:]#+timesteps:] datax = datax[:test_cursor].squeeze() datay = datay[:test_cursor] datay = datay[:,pdt_index][:,np.newaxis] testy = testy[:,pdt_index][:,np.newaxis] linreg = LinearRegression() linreg.fit(datax,datay) #xnew = np.array([testx,testy]) ypredict = linreg.predict(testx).squeeze() err = np.abs(ypredict-testy.squeeze()) median = np.median(stds[pdt_index]*err) physerr = stds[pdt_index]*np.mean(err) pdt_original=pdt_original.squeeze() print(physerr) if differentiate: dataX, dataY, copy_last, manual_physical= differencial_error_check( testy.shape[0],timesteps, future_vision,pdt_original, pdt_index,ypredict,physerr, stds,means,dims) else: dataX, dataY, copy_last, manual_physical = error_check( testy.shape[0],timesteps, future_vision,pdt_original, pdt_index,ypredict,physerr, stds,means,dims) dir = "linreg_bench" with open(dir+'/benchmarks_%dinterval_unipred.txt'%(time_interval),'a')as f: f.write('%f %f %f %f %d %d %d\n'% (physerr,np.mean(err),median,copy_last,timesteps,future_vision,differentiate))