def kfold(k=5): subjects = PARTICIPANT_LIST w_lengths = [100, 50, 25, 5] for _end in w_lengths: print(_end) for _subj in subjects: # if True: # total # if _subj.find("pen") >= 0: # if _subj.find("umbr") >= 0: if _subj.find("pen") < 0 and _subj.find("umbr") < 0: # fh print(f"subj: {_subj} len: {_end}") params = {"end": _end, "dir": _subj, "set": INPUT_SET} x, y = DataLoader().load(params) x = scale_input(x) kf = KFold(n_splits=k, shuffle=True, random_state=293) for train_index, test_index in kf.split(x): x_train, x_test = x[train_index], x[test_index] y_train, y_test = y[train_index], y[test_index] y_train = to_categorical(y_train) x_train = reshape_for_cnn(x_train) x_test = reshape_for_cnn(x_test) cnn = get_cnn_adv(x_train[0], len(INPUT_SET)) model, accuracy, cm = test_model(cnn, x_train, x_test, y_train, y_test) print(accuracy) backend.clear_session() pass
def train_models_main(): subjects = [PARTICIPANT_LIST[0]] _set = BUZZ_SET w_lengths = [WINDOW_LENGTHS[0]] total_accuracy = 0.0 count = 0 for _subj in subjects: for _end in w_lengths: params = {"end": _end, "dir": _subj, "set": _set} print(f"CNN {_subj}, {_end}") x, y = DataLoader().load(params) x = scale_input(x) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True) y_train = to_categorical(y_train) x_train = reshape_for_cnn(x_train) x_test = reshape_for_cnn(x_test) cnn = get_cnn_adv(x_train[0], len(_set)) model, accuracy = test_model(cnn, x_train, x_test, y_train, y_test) model_name = _subj + "_" + str(_end) model.save(model_name) count += 1 total_accuracy += accuracy print("Average accuracy is " + str(total_accuracy / count)) return model_name
def one_hot_encode(*labels, class_amount=10): encoded_labels = [] for i in range(len(labels)): one_hot_encoded = np_utils.to_categorical(labels[i], class_amount) encoded_labels.append(one_hot_encoded) return tuple(encoded_labels)
def encode_categories(y): # encode class values as integers encoder = LabelEncoder() encoded_Y = encoder.fit_transform(y) # convert integers to dummy variables (i.e. one hot encoded) dummy_y = to_categorical(encoded_Y).astype(int) # print(dummy_y) return dummy_y
def test_model_new(model, x_train, x_test, y_train, y_test, epochs=KERAS_EPOCHS): model.fit(x_train, y_train, validation_data=(x_test, to_categorical(y_test)), batch_size=KERAS_BATCH_SIZE, epochs=epochs, verbose=1) y_pred = model.predict(np.ndarray.astype(x_test, 'float32')) y_pred = [np.argmax(y, axis=None, out=None) for y in y_pred] accuracy = metrics.accuracy_score(y_test, y_pred) plot_confusion_matrix(y_true=np.asarray(y_test).astype(int), y_pred=np.asarray(y_pred).astype(int), title=str(accuracy), normalize=True, classes=[str(i + 1) for i in range(len(y_train[0]))]) return model, accuracy
def generator_data(): for data in ds: image = data['features'].numpy() label = data['label'].numpy() # image = image.reshape([32, 28, 28, 1]) # label = label.reshape([32, 1]) # print(label) label = np_utils.to_categorical(label, num_classes=3) print(image.shape, label.shape) yield image, label, [None]
def key_test(): merger = KeyfileMerger() merger.load_files() merger.merge() merger.analyse() x, y = merger.cut_trials() x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, shuffle=True, random_state=42) y_train = to_categorical(y_train) x_train = reshape_for_cnn(x_train) x_test = reshape_for_cnn(x_test) cnn = get_cnn(x_train[0], len(y_train[0])) model, accuracy, cm = test_model(cnn, x_train, x_test, y_train, y_test) model.save(KeyConstants.MODEL_PATH)
def to_integer_base(notes, sequence_size): vocab_size = len(set(notes)) pitchnames = sorted(set(item for item in notes)) notes_to_int = dict( (note, number) for number, note in enumerate(pitchnames)) network_output = [] network_input = [] for i in range(0, len(notes) - sequence_size, 1): sequence_in = notes[i:i + sequence_size] sequence_out = notes[i + sequence_size] network_input.append([notes_to_int[char] for char in sequence_in]) network_output.append(notes_to_int[sequence_out]) patterns = len(network_input) network_input = np.reshape(network_input, (patterns, sequence_size, 1)) network_input = network_input / float(vocab_size) network_output = np_utils.to_categorical(network_output) return network_input, network_output, vocab_size, pitchnames
def generate_sample(size, n_patterns, parameter=None): X, y = list(), list() for i in range(n_patterns): # print("gen{}/{}".format(i,n_patterns)) frames, labels = build_frames2(size) code = np.array(labels) label_encoder = LabelEncoder() vec = label_encoder.fit_transform(code) X.append(frames) y.append(vec) # resize as [samples, timesteps, width, height, channels] X = np.array(X).reshape(n_patterns, len(X[0]), size, size, 1) y = np.array(y).reshape(n_patterns, 4) labels = to_categorical(y, 4) return X, labels
def train_data(features, labels): features = features / 255.0 y_labels = np_utils.to_categorical(labels) dense_layers = [0, 1, 2, 3, 4, 5] sizes_layers = [32, 64, 128, 256] conv_layers = [1, 2, 3, 4] for dense in dense_layers: for size in sizes_layers: for conv in conv_layers: name_model = 'Training_Model_{}_Dense_{}_Size_{}_Conv_{}'.format(dense,size,conv,int(time.time())) tensorboard = TensorBoard(log_dir='logs\\{}'.format(name_model)) model = Sequential() model.add(Conv2D(size, (3, 3), input_shape=features.shape[1:])) model.add(Activation("relu")) model.add(MaxPool2D(pool_size=(2, 2))) for layer in range(conv-1): model.add(Conv2D(size, (3, 3))) model.add(Activation("relu")) model.add(MaxPool2D(pool_size=(2, 2))) model.add(Flatten()) for layer in range(dense): model.add(Dense(size)) model.add(Activation('relu')) model.add(Dense(7)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print(name_model) print(model.summary()) model.fit(features, y_labels, batch_size=32, epochs=25, validation_split=0.1, callbacks=[tensorboard]) '''model = Sequential()
def generate_DB_A(size, n_patterns, parameter=None): X, y = list(), list() for i in range(n_patterns): print("gen{}/{}".format(i, n_patterns)) frames, labels = build_frames_DB_A(size=size, shuff=parameter['shuff'][0]) code = np.array(labels) label_encoder = LabelEncoder() vec = label_encoder.fit_transform(code) X.append(frames) y.append(vec) # resize as [samples, timesteps, width, height, channels] #XX = np.array(X) #XX.shape = (n_patterns, len(X[0]), size, size, 1) X = np.array(X).reshape(n_patterns, len(X[0]), size, size, 1) # y = np.array(y).reshape(n_patterns, 8) labels = to_categorical(y, 5) return X, labels
def get_tflite(): _set = INPUT_SET _subj = "kirillpen" w_lengths = [100, 50, 25, 10] for _end in w_lengths: params = {"end": _end, "dir": _subj, "set": _set} x, y = DataLoader().load(params) x = scale_input(x) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True) y_train = to_categorical(y_train) x_train = reshape_for_cnn(x_train) x_test = reshape_for_cnn(x_test) cnn = get_cnn_adv(x_train[0], len(_set)) model, accuracy, cm = test_model(cnn, x_train, x_test, y_train, y_test) model_name = f"{_subj}_{_end}" model.save(model_name) converter = TFLiteConverter.from_keras_model_file(model_name) tflite_model = converter.convert() open(model_name + '.tflite', "wb").write(tflite_model)
def train(self, number_of_epochs=100, test_size=0.2, callbacks=[]): self.log_name = self.log_name + 'epochs=' + str( number_of_epochs) + 'test=' + str(test_size) log_dir = "logs\\new_results\\" + self.log_name + '_' + datetime.now( ).strftime("%Y%m%d-%H%M%S") tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir) csv_callback = tf.keras.callbacks.CSVLogger(filename=log_dir + 'logs.csv') callbacks.append(tensorboard_callback) callbacks.append(csv_callback) x = [] y = [] for track in self.data_set: for j in range(0, len(track) - self.sequence_length): input_vector = [] for i in range(self.sequence_length): input_vector.append( track[i + j] / self.unique_events_list.get_event_list_size()) x.append(input_vector) y.append(track[j + self.sequence_length]) self.prepare_model(self.unique_events_list.get_event_list_size()) x = np.reshape(x, (len(x), self.sequence_length, 1)) y = np_utils.to_categorical(y) x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=test_size, random_state=1) history = self.model.fit(x_train, y_train, epochs=number_of_epochs, batch_size=32, callbacks=callbacks, validation_data=(x_test, y_test)) pyplot.plot(history.history['loss']) pyplot.plot(history.history['val_loss']) pyplot.show()
def get_gcm(): subjects = PARTICIPANT_LIST _set = INPUT_SET _end = 50 cum_cm = np.zeros((len(INPUT_SET), len(INPUT_SET))) print(f'cm for length {_end}') count = 0 for _subj in subjects: # if _subj.find("pen") >= 0: # if _subj.find("umbr") >= 0: if _subj.find("pen") < 0 and _subj.find("umbr") < 0: # fh count += 1 params = {"end": _end, "dir": _subj, "set": _set} x, y = DataLoader().load(params) x = scale_input(x) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, shuffle=True) y_train = to_categorical(y_train) x_train = reshape_for_cnn(x_train) x_test = reshape_for_cnn(x_test) cnn = get_cnn_adv(x_train[0], len(_set)) model, accuracy, cm = test_model(cnn, x_train, x_test, y_train, y_test) cum_cm += cm print(accuracy) ax = plt.axes() ax.ylabel = "Target" mx = cum_cm mx = mx / count # normalize dat disp = ConfusionMatrixDisplay(confusion_matrix=mx, display_labels=["suggestion", "top", "mid", "bottom", "rest"]) disp.plot(include_values=True, ax=ax, cmap='Blues') plt.show()
def windDir(location): filename = location+"_preprocessed.csv" df = pd.read_csv(filename) data = pd.DataFrame( columns=['day', 'month', 'WindDir'] ) data['day'] = df['day'] data['month'] = df['month'] data['WindDir'] = df['WindDir'] le_pred = LabelEncoder() y = le_pred.fit_transform(data.WindDir) y = np_utils.to_categorical(y) y = y.astype('int32') out_classes = y.shape[1] model = Sequential() model.add(Dense(units=16, input_dim=2, activation='relu')) model.add(Dense(units=32, activation='relu')) model.add(Dense(units=64, activation='relu')) model.add(Dense(units=64, activation='relu')) model.add(Dense(units=128, activation='relu')) model.add(Dense(units=128, activation='relu')) model.add(Dense(units=128, activation='relu')) model.add(Dense(units=64, activation='relu')) model.add(Dense(units=64, activation='relu')) model.add(Dense(units=32, activation='relu')) model.add(Dense(units=32, activation='relu')) model.add(Dense(units=32, activation='relu')) model.add(Dense(units=32, activation='relu')) model.add(Dense(units=32, activation='relu')) model.add(Dense(units=out_classes, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(data.iloc[:, :-1], y, epochs=200, batch_size=512) last_date = df['date'].iloc[-1] last_date = datetime.datetime.strptime(last_date, "%Y-%m-%d").date() pred_date = [] for i in range(1, 91): pred_date.append(last_date + datetime.timedelta(days=i)) pred_input = [[] for i in range(90)] i = 0 for j in pred_date: pred_input[i].append(j.day) pred_input[i].append(j.month) i += 1 sample = pd.DataFrame( columns=['day', 'month'], data=pred_input ) sample_p = model.predict(sample) sample_pred = [0 for i in range(len(sample_p))] for i in range(len(sample_p)): maxm = max(sample_p[i]) for j in range(len(sample_p[i])): if sample_p[i][j] == maxm: index = j sample_pred[i] = index directions = le_pred.inverse_transform(sample_pred) directions for i in range(len(directions)): if directions[i] == 'N': c = np.random.randint(0, 1) if c == 0: directions[i] = np.random.randint(0, 22.5) elif c == 1: directions[i] = np.random.randint(337.5, 360) elif directions[i] == 'NE': directions[i] = np.random.randint(22.5, 67.5) elif directions[i] == 'E': directions[i] = np.random.randint(67.5, 112.5) elif directions[i] == 'SE': directions[i] = np.random.randint(112.5, 157.5) elif directions[i] == 'S': directions[i] = np.random.randint(157.5, 202.5) elif directions[i] == 'SW': directions[i] = np.random.randint(202.5, 247.5) elif directions[i] == 'W': directions[i] = np.random.randint(247.5, 292.5) elif directions[i] == 'NW': directions[i] = np.random.randint(292.5, 337.5) pred_data = pd.DataFrame( columns=['date', 'direction', 'speed'] ) pred_data['date'] = pred_date pred_data['direction'] = directions pred_filename = location+".csv" filepath = "data/"+pred_filename pred_data.to_csv(filepath, mode='a', header=False, index=False)
def single_target(EXPERIMENT_PATH, DATA_PATH, TENSOR_DATA_PATH, window_sequences, list_num_neurons, learning_rate, features_to_use, DROPOUT, EPOCHS, PATIENCE,BATCH_SIZE,test_set): #################### FOLDER SETUP #################### MODELS_PATH = "models" RESULT_PATH = "result" # starting from the testing set for crypto_name in os.listdir(DATA_PATH): # create a folder for data in tensor format folder_creator(TENSOR_DATA_PATH + "/" + crypto_name, 0) # create a folder for results folder_creator(EXPERIMENT_PATH + "/" + MODELS_PATH + "/" + crypto_name, 0) folder_creator(EXPERIMENT_PATH + "/" + RESULT_PATH + "/" + crypto_name, 0) for window, num_neurons in product(window_sequences, list_num_neurons): print('Current configuration: ') print("Crypto: ",crypto_name,"\t","Window_sequence: ", window, "\t", "Neurons: ", num_neurons) predictions_file = {'symbol': [], 'date': [], 'observed_class': [], 'predicted_class': []} macro_avg_recall_file = {'symbol': [], 'macro_avg_recall': []} # New folders for this configuration configuration_name = "LSTM_" + str(num_neurons) + "_neurons_" + str(window) + "_days" # Create a folder to save # - best model checkpoint # - statistics (results) statistics = "stats" model_path = EXPERIMENT_PATH + "/" + MODELS_PATH + "/" + crypto_name + "/" + configuration_name + "/" results_path = EXPERIMENT_PATH + "/" + RESULT_PATH + "/" + crypto_name + "/" + configuration_name + "/" + statistics + "/" folder_creator(model_path, 0) folder_creator(results_path, 0) for date_to_predict in test_set: #format of dataset name: Crypto_DATE_TO_PREDICT.csv dataset_name=crypto_name+"_"+str(date_to_predict)+".csv" dataset, features_without_date = \ prepare_input_forecasting(os.path.join(DATA_PATH,crypto_name),dataset_name,features_to_use) #print(dataset.dtypes) dataset_tensor_format = fromtemporal_totensor(np.array(dataset), window, TENSOR_DATA_PATH + "/" + crypto_name + "/", crypto_name+"_"+date_to_predict) #train, validation,test = get_training_validation_testing_set(dataset_tensor_format, date_to_predict) train, test = get_training_validation_testing_set(dataset_tensor_format, date_to_predict) index_of_target_feature = features_without_date.index('trend') x_train = train[:, :-1, :index_of_target_feature] """print("X_TRAIN") print(x_train) print(x_train.shape)""" y_train = train[:, -1, index_of_target_feature] """print("Y_TRAIN") print(y_train) print(y_train.shape)""" x_test = test[:, :-1, :index_of_target_feature] """print("X_TEST") print(x_test) print(x_test.shape)""" y_test = test[:, -1, index_of_target_feature] """print("Y_TEST") print(y_test) print(y_test.shape)""" # change the data type, from object to float x_train = x_train.astype('float') x_test = x_test.astype('float') # one hot encode y y_train = to_categorical(y_train) y_test = to_categorical(y_test) """print(y_train) print(y_test)""" #batch size must be a factor of the number of training elements if BATCH_SIZE == None: BATCH_SIZE = x_train.shape[0] model, history = train_single_target_model(x_train, y_train, num_neurons=num_neurons, learning_rate=learning_rate, dropout=DROPOUT, epochs=EPOCHS, batch_size=BATCH_SIZE, patience=PATIENCE, num_categories=len(y_train[0]), date_to_predict=date_to_predict, model_path=model_path) # plot neural network's architecture plot_model(model, to_file=model_path + "neural_network.png", show_shapes=True, show_layer_names=True, expand_nested=True, dpi=150) #plot loss """filename="model_train_val_loss_bs_"+str(BATCH_SIZE)+"_target_"+str(date_to_predict) plot_train_and_validation_loss(pd.Series(history.history['loss']),pd.Series(history.history['val_loss']),model_path,filename) #plot accuracy filename = "model_train_val_accuracy_bs_" + str(BATCH_SIZE) + "_target_" + str(date_to_predict) plot_train_and_validation_accuracy(pd.Series(history.history['accuracy']), pd.Series(history.history['val_accuracy']), model_path, filename)""" # Predict for each date in the validation set test_prediction = model.predict(x_test) # this is important!! K.clear_session() tf_core.random.set_seed(42) gc.collect() del model del dataset_tensor_format del dataset print("Num of entries for training: ", x_train.shape[0]) # invert encoding: argmax of numpy takes the higher value in the array print("Predicting for: ", date_to_predict) print("Predicted: ", np.argmax(test_prediction)) print("Actual: ", np.argmax(y_test)) print("\n") # Saving the predictions on the dictionarie predictions_file['symbol'].append(crypto_name) predictions_file['date'].append(date_to_predict) predictions_file['observed_class'].append(np.argmax(y_test)) predictions_file['predicted_class'].append(np.argmax(test_prediction)) save_results(macro_avg_recall_file, crypto_name, predictions_file, results_path) return
import matplotlib.pyplot as plt from tensorflow.keras import Sequential # from tensorflow_core.python.keras import Sequential from tensorflow_core.python.keras.callbacks import EarlyStopping from tensorflow_core.python.keras.datasets import mnist from tensorflow_core.python.keras.layers import Conv2D, MaxPool2D, Flatten, Dropout, Dense from tensorflow_core.python.keras.utils.np_utils import to_categorical (X_train, Y_train), (X_test, Y_test) = mnist.load_data() print(f'X_train: {X_train.shape}, Y_train: {Y_train.shape}') print(f'X_test: {X_test.shape}, Y_test: {Y_test.shape}') X_train = X_train.reshape(*X_train.shape, 1).astype('float16') / 255 X_test = X_test.reshape(*X_test.shape, 1).astype('float16') / 255 Y_train = to_categorical(Y_train, 10, dtype='float16') Y_test = to_categorical(Y_test, 10, dtype='float16') print(f'X_train: {X_train.shape}, Y_train: {Y_train.shape}') print(f'X_test: {X_test.shape}, Y_test: {Y_test.shape}') model = Sequential() model.add( Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1))) model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu')) model.add(MaxPool2D(pool_size=2)) model.add(Dropout(rate=0.25)) model.add(Flatten())