norms = np.array(sorted(norms, key=lambda x: x[1]), dtype=object) counts = np.bincount(norms[:self.k, 0].astype('int32')) return np.argmax(counts) def plot_knn_results(data, k): knn_classifier = KNN(k=k) knn_classifier.fit(*data) sup_x = (np.min(data[0][:, 0]), np.max(data[0][:, 0])) sup_y = (np.min(data[0][:, 1]), np.max(data[0][:, 1])) results = np.array( [[knn_classifier.predict((x, y)) for y in range(*sup_y)] for x in range(*sup_x)]) points, labels = data colors_dict = {0: 'red', 1: 'blue', 2: 'green'} brush = np.vectorize(lambda x: colors_dict[x]) colors = brush(labels) background = brush(results) plt.pcolormesh(background) plt.scatter(points[:, 0], points[:, 1], c=colors) plt.show() if __name__ == '__main__': data = data_processing.data_generator(10, 3, 2, centers=((0, 0), (5, 5), (-5, 5))) plot_knn_results(data, 7)
for i in range(epochs): x, y = list(zip(data, pretty_labels))[np.random.randint( 0, data.shape[0])] x = np.append(x, 1) self.beta -= learning_rate(i) * self.gradient(x, y) def predict(self, x): x = np.append(x, 1) return 1 if self.kernel(x, self.beta) >= 0 else -1 if __name__ == '__main__': # np.random.seed(1) data = data_processing.data_generator(no_samples_per_class=10, no_classes=2, no_dimensions=2, centers=((0, 0), (15, 5))) # data_processing.plot_data(data) # svm = SVM(softing_parameter=10, kernel=lambda x, y: (np.dot(x, y) + 1000)**10) svm = SVM(softing_parameter=10000, kernel=lambda x, y: np.dot(x, y)) # print(svm.predict(np.array([15, 5]))) data, target = datasets.load_iris(as_frame=True, return_X_y=True) data['target'] = target data = data[data['target'] < 2] data = shuffle(data) print(data.shape) training_data = data.iloc[:30, :] test_data = data.iloc[30:, :] print(len(test_data))
def train(sess, data, *, input_shape=None, epochs=10, batch_size=32, learning_rate=1e-4, weight_decay=0.0, save_dir='./saved_fcn8s', rootname='fcn8s', finalize_dir=None): """Train neural network and print out the loss during training. :param sess: TF Session :param data: Data object Data set. :param input_shape: tuple, (w, h) Input shape for the neural network. :param epochs: int Number of epochs. :param batch_size: int Batch size. :param learning_rate: float Learning rate. :param weight_decay: float L2 regularization strength. :param save_dir: string Directory of the saved model and weights. :param rootname: string Rootname for saved file. """ input_ts, output_ts, keep_prob_ts = build_model(sess, data.n_classes) # cross entropy loss logits_ts = tf.reshape(output_ts, (-1, data.n_classes)) labels_ts = tf.placeholder(tf.float32, (None, None, None, data.n_classes)) cross_entropy_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( logits=logits_ts, labels=tf.reshape(labels_ts, (-1, data.n_classes)))) # L2 regularization trainable_vars = tf.trainable_variables() l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in trainable_vars if 'bias' not in v.name]) * weight_decay total_loss = cross_entropy_loss + l2_loss # Optimizer for training optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(total_loss) # Initialization sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() loss_history_file = os.path.join(save_dir, rootname + '_loss_history.pkl') loss_history = dict() loss_history['loss'] = [] loss_history['vali_loss'] = [] # load the model if exists if os.path.exists(save_dir): try: print("--- Loading saved models! ---") saver.restore(sess, os.path.join(save_dir, rootname)) if finalize_dir is not None: builder = tf.saved_model.builder.SavedModelBuilder(finalize_dir) builder.add_meta_graph_and_variables( sess, ['fcn8s'], signature_def_map={ "model": tf.saved_model.signature_def_utils.predict_signature_def( inputs={"input": input_ts}, outputs={"output": output_ts}) } ) builder.save() return except: print("Cannot load existing model!") else: os.mkdir(save_dir) if finalize_dir is not None: print("Cannot finalize a saved model!") return # train the model print("--- Training ---") pbar = tqdm(total=epochs) for i in range(epochs): gen = data_generator( data.image_files_train, data.label_files_train, data.label_colors if data.label_colors else data.background_color, batch_size=batch_size, input_shape=input_shape) total_loss = 0 count = 0 for X, Y in gen: _, loss = sess.run([optimizer, cross_entropy_loss], feed_dict={keep_prob_ts: 1.0, input_ts: X, labels_ts: Y }) count += X.shape[0] total_loss += loss*X.shape[0] loss_history['loss'].append(loss) print("mini-batch loss: {:.4f}".format(loss), end='\r') avg_loss = total_loss / count # validation if data.image_files_vali is not None: vali_count = 0 total_vali_loss = 0 gen = data_generator( data.image_files_vali, data.label_files_vali, data.label_colors if data.label_colors else data.background_color, batch_size=batch_size, input_shape=input_shape, is_training=False) for X, Y in gen: vali_loss = sess.run(cross_entropy_loss, feed_dict={keep_prob_ts: 1.0, input_ts: X, labels_ts: Y}) vali_count += X.shape[0] total_vali_loss += vali_loss*X.shape[0] loss_history['vali_loss'].append(vali_loss) avg_vali_loss = total_vali_loss / vali_count else: avg_vali_loss = None update_description(pbar, avg_loss, avg_vali_loss) pbar.update() # save the model saver.save(sess, os.path.join(save_dir, rootname)) save_history(loss_history, loss_history_file)
def getGenerator(csv_path, length, k_folds, dim, target_height, target_width, batch_size): """ Reading data from csv file and constructing data generators Args: csv_path: path of csv file length: length of sequence data to be read k_folds: number of folds for training and validation dim: the dimension of samples used for training or validation. Selectable value is 1, 2 or 3. target_height, target_width: target height and width of input samples batch_size: the size of data batch Retrun: two lists which save train_set and valid_set respectively two lists which save train_steps_per_epoch and valid_steps_per_epoch Remark: - random_state was set at every shuffle operaton to ensure that all the models use the same training set and validation set. """ # loading data dataset = csv_read(csv_path, length) dataset = shuffle(dataset, random_state=0) train_data, test_data = train_test_split(dataset, test_size=0.2, shuffle=True, random_state=10) test_generator = data_generator(Data=test_data, dim=dim, target_height=target_height, target_width=target_width, batch_size=len(test_data), random_state=20) train_steps, valid_steps = [], [] train_generators, valid_generators = [], [] # get data generator if k_folds == 1: train_set, valid_set = train_test_split(train_data, test_size=0.2, shuffle=True, random_state=30) train_steps_per_epoch = len(train_set) // batch_size valid_steps_per_epoch = len(valid_set) // batch_size train_steps.append(train_steps_per_epoch) valid_steps.append(valid_steps_per_epoch) train_generator = data_generator(Data=train_set, dim=dim, target_height=target_height, target_width=target_width, batch_size=batch_size) valid_generator = data_generator(Data=valid_set, dim=dim, target_height=target_height, target_width=target_width, batch_size=batch_size) train_generators.append(train_generator) valid_generators.append(valid_generator) elif k_folds >= 2: kfold = KFold(n_splits=k_folds, shuffle=True, random_state=40) for train_index, valid_index in kfold.split(dataset): train_set = np.array(dataset)[train_index] valid_set = np.array(dataset)[valid_index] train_steps_per_epoch = len(train_set) // batch_size valid_steps_per_epoch = len(valid_set) // batch_size train_steps.append(train_steps_per_epoch) valid_steps.append(valid_steps_per_epoch) train_generator = data_generator(Data=train_set, dim=dim, target_height=target_height, target_width=target_width, batch_size=batch_size) valid_generator = data_generator(Data=valid_set, dim=dim, target_height=target_height, target_width=target_width, batch_size=batch_size) # I have verified that the object of data_generator is an immutable object train_generators.append(train_generator) valid_generators.append(valid_generator) else: raise IOError("Undefined k folds value!") return (train_generators, train_steps, valid_generators, valid_steps, test_generator)