def main(): start = datetime.now() # get the data test_data = helpers.load_data(numpy_path, 'test_set.npy') test_data_labels = test_data[:, 2] test_data_labels = np.array([item[0] for item in test_data_labels]) test_data_countries = test_data[:, 0] test_data_countries = np.array([item[0] for item in test_data_countries]) # convert the data test_dataset = convert_dataset(test_data, batchsize=1000) # checkpoint dir checkpoint_dir = os.path.join(logdir, expname, 'checkpoints/') print('[INFO] Starting feature importance') predictions = [] predictions_feature = [] test_dataset_feature = convert_dataset_feature_importance(test_data) for i in range(1, 11): #print('Round number: '+str(i)) model = modelprovider.build_multi_input_model((15, ), (2, 19)) model.compile(loss=loss.crps_cost_function, optimizer=Adam()) model.load_weights( os.path.join(checkpoint_dir, 'round-' + str(i) + '/best_checkpoint')).expect_partial() predictions.append( model.predict(test_dataset, batch_size=1000, verbose=0)) predictions_feature.append( model.predict(test_dataset_feature, batch_size=1000, verbose=0)) predictions = np.array(predictions) predictions_feature = np.array(predictions_feature) # Make sure std is positive predictions[:, :, 1] = np.abs(predictions[:, :, 1]) predictions_feature[:, :, 1] = np.abs(predictions_feature[:, :, 1]) mean_predictions = np.mean(predictions, 0) mean_predictions_feature = np.mean(predictions_feature, 0) test_crps = crps.norm_data(test_data_labels, mean_predictions) test_crps_feature = crps.norm_data(test_data_labels, mean_predictions_feature) print(round(test_crps_feature.mean(), 2)) print(round(test_crps.mean(), 2)) test_score = round((1 - test_crps_feature.mean() / test_crps.mean()) * 100, 2) print(test_crps_feature.mean()) print(datetime.now() - start)
def printIntMonth(test_data_labels, test_data_month, mean_predictions): test_crps = crps.norm_data(test_data_labels, mean_predictions) for i in range(1, 13): filter = test_data_month == i filter_data = test_crps[filter] if len(filter_data) > 0: item = (i, round(np.array(filter_data).mean(), 2)) else: item = (i, 0) print(item)
def printIntCountries(test_data_labels, test_data_countries, mean_predictions): test_crps = crps.norm_data(test_data_labels, mean_predictions) test_score = round(test_crps.mean(), 2) result = str(test_score) for i in [8, 16, 2, 5, 20]: filter = test_data_countries == i filter_data = test_crps[filter] if len(filter_data) > 0: item = round(np.array(filter_data).mean(), 2) else: item = 0 result += '&{:.2f}'.format(item) print(result)
def main(): start = datetime.now() # get the data train_data = helpers.load_data(numpy_path, 'train_set.npy') valid_data = helpers.load_data(numpy_path, 'valid_set.npy') test_data = helpers.load_data(numpy_path, 'test_set.npy') # filter the data test_data_labels = np.array([item[0] for item in test_data[:, 2]]) test_data_countries = np.array([item[0] for item in test_data[:, 0]]) test_data_month = test_data[:, 5] # convert the data train_dataset, train_shape = convert_dataset(train_data, batchsize=batchsize, shuffle=1000, shape=True) valid_dataset = convert_dataset(valid_data, batchsize=1000, shuffle=100) test_dataset = convert_dataset(test_data, batchsize=1000) # build the model model = build_model(train_shape[1], train_shape[2]) # Print Model # modelprovider.printModel(model, dir=os.path.join( # logdir, expname), name=expname+".png") # compiling the model lossfn = loss.crps_cost_function opt = Adam(lr=learning_rate, amsgrad=True) model.compile(loss=lossfn, optimizer=opt) # checkdir path checkpoint_dir = os.path.join(logdir, expname, 'checkpoints/') # begin with training 10 times print('[INFO] Starting training') predictions = [] for i in range(1, 11): print('Round number: ' + str(i)) model = build_model(train_shape[1], train_shape[2]) # compile new model with new inital weights model.compile(loss=lossfn, optimizer=opt) # checkpoint callbacks # all checkpoints cp_callback_versuch = tf.keras.callbacks.ModelCheckpoint( os.path.join(checkpoint_dir, 'round-' + str(i) + '/') + "checkpoint_{epoch}", monitor='val_loss', save_weights_only=True, mode='min', verbose=0) # best checkpoint cp_callback = tf.keras.callbacks.ModelCheckpoint( os.path.join(checkpoint_dir, 'round-' + str(i) + '/checkpoint'), monitor='val_loss', save_weights_only=True, mode='min', save_best_only=True, verbose=0) # train the model if train_model: model.fit( train_dataset, epochs=epochs, initial_epoch=initial_epochs, batch_size=batchsize, verbose=1, validation_data=valid_dataset, validation_batch_size=1000, callbacks=[cp_callback, cp_callback_versuch], ) # load the best checkpoint of round i model.load_weights( os.path.join(checkpoint_dir, 'round-' + str(i) + '/checkpoint')).expect_partial() predictions.append( model.predict(test_dataset, batch_size=1000, verbose=0)) # convert to numpy array predictions = np.array(predictions) # Make sure std is positive predictions[:, :, 1] = np.abs(predictions[:, :, 1]) # calculate mean between the 10 results mean_predictions = np.mean(predictions, 0) # calculate the score for each record in test set test_crps = crps.norm_data(test_data_labels, mean_predictions) # print the results with filters helpers.printIntCountries(test_data_labels, test_data_countries, mean_predictions) helpers.printHist(helpers.datasetPIT(mean_predictions, test_data_labels)) np.save(os.path.join(logdir, expname, 'prediction'), predictions) print(datetime.now() - start)