def run_gru(s): x_dims = len(x_cols[s.dataSet]) if s.dataSet in x_cols else s.lookback random.seed(6) np.random.seed(6) rnn = Sequential() rnn.add( GRU(s.nodes, input_shape=(None, x_dims), kernel_initializer='he_uniform', stateful=False)) #rnn.add(Dropout(0.15)) rnn.add(Dense(1, kernel_initializer='he_uniform')) opt = adam(lr=s.lr, decay=0.0) #1e-3) rnn.compile(loss='mae', optimizer=opt) # prepare dataset as pyBrain sequential dataset sequence = readDataSet(s.dataSet, s.dataSetDetailed, s) if s.limit_to: sequence = sequence[:s.limit_to] dp = DataProcessor() # standardize data by subtracting mean and dividing by std #(meanSeq, stdSeq) = dp.normalize('data', sequence) dp.windowed_normalize(sequence) for key in sequence.keys(): if key != "data": dp.normalize(key, sequence) predictedInput = np.zeros((len(sequence), )) targetInput = np.zeros((len(sequence), )) trueData = np.zeros((len(sequence), )) if s.dataSet in differenceSets: predictedInputNodiff = np.zeros((len(sequence), )) targetInputNodiff = np.zeros((len(sequence), )) if s.dataSet in differenceSets: backup_sequence = sequence sequence = dp.difference(sequence, s.lookback) allX = getX(sequence, s) allY = np.array(sequence['data']) allX = allX[48:] allY = allY[48:] #if s.dataSet not in x_cols: # allY = allY[s.lookback:] trainX = allX[0:s.nTrain] trainY = allY[s.predictionStep:s.nTrain + s.predictionStep] trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) curBatch = 1.0 callback = LossCallback() temp_set = np.array(sequence['data'])[:48 + s.nTrain + 5] configure_batches(48, s.batch_size, np.reshape(temp_set, (temp_set.shape[0], 1, 1))) rnn.fit(trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=min(s.max_verbosity, 2), callbacks=[callback]) for i in xrange(0, s.nTrain): targetInput[i] = allY[i + s.predictionStep] for i in tqdm(xrange(s.nTrain + s.predictionStep, len(allX)), disable=s.max_verbosity == 0): if i % s.retrain_interval == 0 and i > s.numLags + s.nTrain and s.online: trainX = allX[i - s.nTrain - s.predictionStep:i - s.predictionStep] trainY = allY[i - s.nTrain:i] trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) temp_set = np.array(sequence['data'])[i - s.nTrain - s.predictionStep - 48:i] configure_batches(48, s.batch_size, np.reshape(temp_set, (temp_set.shape[0], 1, 1))) rnn.fit(trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=2, callbacks=[callback]) targetInput[i] = allY[i + s.predictionStep] predictedInput[i] = rnn.predict(np.reshape(allX[i], (1, 1, x_dims))) if s.dataSet in differenceSets: predictedInputNodiff[i] = predictedInput[i] targetInputNodiff[i] = targetInput[i] predictedInput[i] = dp.inverse_difference(backup_sequence['data'], predictedInput[i], i - 1) targetInput[i] = dp.inverse_difference(backup_sequence['data'], targetInput[i], i - 1) predictedInput[0] = 0 trueData[i] = sequence['data'][i] #predictedInput = dp.denormalize(predictedInput, meanSeq, stdSeq) #targetInput = dp.denormalize(targetInput, meanSeq, stdSeq) dp.windowed_denormalize(predictedInput, targetInput) if s.dataSet in differenceSets: # predictedInputNodiff = dp.denormalize(predictedInputNodiff) # targetInputNodiff = dp.denormalize(targetInputNodiff) pass #trueData = (trueData * stdSeq) + meanSeq dp.saveResultToFile(s.dataSet, predictedInput, targetInput, 'gru', s.predictionStep, s.max_verbosity) skipTrain = error_ignore_first[s.dataSet] from plot import computeSquareDeviation squareDeviation = computeSquareDeviation(predictedInput, targetInput) squareDeviation[:skipTrain] = None nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(targetInput) if s.max_verbosity > 0: print "", s.nodes, "NRMSE {}".format(nrmse) mae = np.nanmean(np.abs(targetInput - predictedInput)) if s.max_verbosity > 0: print "MAE {}".format(mae) if s.dataSet in differenceSets: dp.saveResultToFile(s.dataSet, predictedInputNodiff, targetInputNodiff, 'gru_nodiff', s.predictionStep, s.max_verbosity) squareDeviation = computeSquareDeviation(predictedInputNodiff, targetInputNodiff) squareDeviation[:skipTrain] = None nrmse = np.sqrt( np.nanmean(squareDeviation)) / np.nanstd(targetInputNodiff) if s.max_verbosity > 0: print "", s.nodes, "NRMSE {}".format(nrmse) mae = np.nanmean(np.abs(targetInputNodiff - predictedInputNodiff)) if s.max_verbosity > 0: print "MAE {}".format(mae) mase = errors.get_mase(predictedInput, targetInput, np.roll(targetInput, 24)) if s.max_verbosity > 0: print "MAE {}".format(mae) return nrmse
def run_gru(s): global global_step global increment_global_step_op global reset_global_step_op global batches global images_placeholder global batches_op global_step = tf.Variable(0, name='global_step', trainable=False, dtype=tf.int32) increment_global_step_op = tf.assign(global_step, global_step + 1) reset_global_step_op = tf.assign(global_step, 0) batches = tf.get_variable( "batches", [s.nTrain / int(s.batch_size), s.batch_size, 1, 1], dtype=tf.float32, initializer=tf.zeros_initializer) images_placeholder = tf.placeholder(tf.float32, shape=(s.nTrain / int(s.batch_size), s.batch_size, 1, 1)) batches_op = tf.assign(batches, images_placeholder) x_dims = len(x_cols[s.dataSet]) if s.dataSet in x_cols else s.lookback random.seed(6) np.random.seed(6) rnn = Sequential() rnn.add( GRU(s.nodes, input_shape=(None, x_dims), kernel_initializer='he_uniform', stateful=False)) #rnn.add(Dropout(0.15)) rnn.add(Dense(1, kernel_initializer='he_uniform')) opt = adam(lr=s.lr, decay=0.0) #1e-3) rnn.compile(loss='mae', optimizer=opt) # prepare dataset as pyBrain sequential dataset sequence = readDataSet(s.dataSet, s.dataSetDetailed, s) if s.limit_to: sequence = sequence[:s.limit_to] dp = DataProcessor() # standardize data by subtracting mean and dividing by std (meanSeq, stdSeq) = dp.normalize('data', sequence, s.nTrain) #dp.windowed_normalize(sequence) for key in sequence.keys(): if key != "data": dp.normalize(key, sequence, s.nTrain) if s.dataSet in differenceSets: predictedInputNodiff = np.zeros((len(sequence), )) targetInputNodiff = np.zeros((len(sequence), )) if s.dataSet in differenceSets: backup_sequence = sequence sequence = dp.difference(sequence, s.lookback) seq_full = sequence['data'].values seq_actual = seq_full[s.front_buffer:] allX = getX(seq_full, s) allY = seq_actual[s.predictionStep - 1:] predictedInput = np.full((len(allY), ), np.nan) #if s.dataSet not in x_cols: # allY = allY[s.lookback:] trainX = allX[:s.nTrain] trainY = allY[:s.nTrain] trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) rnn.fit(trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=min(s.max_verbosity, 2)) #for i in xrange(0,s.nTrain): # targetInput[i] = allY[i+s.predictionStep] targetInput = allY pred_diffs = [] pred_closer_to_actual = [] isFirst = True for i in tqdm(xrange(s.nTrain + s.predictionStep, len(allX)), disable=s.max_verbosity == 0): #for i in tqdm(xrange(0, len(allX)), disable=s.max_verbosity == 0): if i % s.retrain_interval == 0 and i > s.numLags + s.nTrain and s.online: trainX = allX[i - s.nTrain - s.predictionStep:i - s.predictionStep] trainY = allY[i - s.nTrain - s.predictionStep:i - s.predictionStep] trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1])) rnn.fit(trainX, trainY, epochs=s.epochs, batch_size=s.batch_size, verbose=0) #targetInput[i] = allY[i] predictedInput[i] = rnn.predict(np.reshape(allX[i], (1, 1, x_dims))) if isFirst: print predictedInput[i] isFirst = False #predictedInput[i] = targetInput[i-1440] pred_diffs.append(abs(predictedInput[i] - allX[i][-1])) pred_closer_to_actual.append( abs(predictedInput[i] - targetInput[i]) < abs(predictedInput[i] - allX[i][-1])) if s.dataSet in differenceSets: predictedInputNodiff[i] = predictedInput[i] targetInputNodiff[i] = targetInput[i] predictedInput[i] = dp.inverse_difference(backup_sequence['data'], predictedInput[i], i - 1) targetInput[i] = dp.inverse_difference(backup_sequence['data'], targetInput[i], i - 1) for i in range(s.nTrain + s.predictionStep): predictedInput[i] = np.nan predictedInput = dp.denormalize(predictedInput, meanSeq, stdSeq) targetInput = dp.denormalize(targetInput, meanSeq, stdSeq) #dp.windowed_denormalize(predictedInput, targetInput) print "FINAL", predictedInput[-1], targetInput[-1], len( predictedInput), len(targetInput) if s.dataSet in differenceSets: # predictedInputNodiff = dp.denormalize(predictedInputNodiff) # targetInputNodiff = dp.denormalize(targetInputNodiff) pass dp.saveResultToFile(s.dataSet, predictedInput, targetInput, 'gru', s.predictionStep, s.max_verbosity) skipTrain = error_ignore_first[s.dataSet] from plot import computeSquareDeviation squareDeviation = computeSquareDeviation(predictedInput, targetInput) squareDeviation[:skipTrain] = None nrmse = np.sqrt(np.nanmean(squareDeviation)) / np.nanstd(targetInput) if s.max_verbosity > 0: print "", s.nodes, "NRMSE {}".format(nrmse) mae = np.nanmean(np.abs(targetInput - predictedInput)) if s.max_verbosity > 0: print "MAE {}".format(mae) mase = errors.get_mase(predictedInput, targetInput, np.roll(targetInput, s.season)) if s.max_verbosity > 0: print "MASE {}".format(mase) if s.dataSet in differenceSets: dp.saveResultToFile(s.dataSet, predictedInputNodiff, targetInputNodiff, 'gru_nodiff', s.predictionStep, s.max_verbosity) squareDeviation = computeSquareDeviation(predictedInputNodiff, targetInputNodiff) squareDeviation[:skipTrain] = None nrmse = np.sqrt( np.nanmean(squareDeviation)) / np.nanstd(targetInputNodiff) if s.max_verbosity > 0: print "", s.nodes, "NRMSE {}".format(nrmse) mae = np.nanmean(np.abs(targetInputNodiff - predictedInputNodiff)) if s.max_verbosity > 0: print "MAE {}".format(mae) closer_rate = pred_closer_to_actual.count(True) / float( len(pred_closer_to_actual)) if s.max_verbosity > 0: pred_diffs.sort() print pred_diffs[0], pred_diffs[-1], pred_diffs[int(0.9 * len(pred_diffs))] print "Good results:", closer_rate return mase, closer_rate