def build_dataloader(args): """build data loader for model training and validding Args: args (argparse.Namespace): Command parameter parser Returns: DataLoader: train dataloader DataLoader: valid dataloader Vocab: vocab of corpus """ max_seq_len = args.max_seq_len valid_rate = args.valid_rate batch_size = args.batch_size # Get training data and validation data set print('Getting the data...') data = get_data(args.data_json_path, args.wv_name, args.formated_data_path, max_seq_len) x, y, vocab = data['x'], data['y'], data['vocab'] # get dataloader dataset = gluon.data.SimpleDataset([[field1, field2] for field1, field2 in zip(x, y)]) train_dataset, valid_dataset = nlp.data.train_valid_split( dataset, valid_rate) train_dataloader = get_dataloader(train_dataset, batch_size, is_train=True) valid_dataloader = get_dataloader(valid_dataset, batch_size, is_train=False) return train_dataloader, valid_dataloader, vocab
__author__ = 'joseph' import prepare_data as prepare import evaluate import io_helper from sklearn.neighbors import KNeighborsClassifier train_data, validation_data, test_data, basic_users_info = prepare.get_data() label_encoder = {} train_x, train_y = prepare.get_exclude_ndf_x(train_data, basic_users_info, label_encoder) validation_x, validation_y = prepare.get_exclude_ndf_x(validation_data, basic_users_info, label_encoder) # max_ndcg = 0 # max_k = 0 # k_ndcg = {} # for k in range(1, 100): # neighbor_classifier = KNeighborsClassifier(n_neighbors=k) # neighbor_classifier.fit(train_x, train_y) # validation_predict = neighbor_classifier.predict(validation_x) # predict_list = [[predict] for predict in validation_predict] # # ndcg = evaluate.ndcg(predict_list, validation_data) # k_ndcg.setdefault(k, ndcg) # if ndcg > max_ndcg: # max_ndcg = ndcg # max_k = k # print(max_ndcg, max_k) # # io_helper.write_map_data(k_ndcg, '../records/k_neighbors_classifier.csv') # (0.8724597056762439, 25)
for image in images: extracted_images.append( np.array([ average_intensity(image), detect_vertical_line(image), detect_horizontal_line(image), enclosed_space(image), average_horizontal_std(image), average_vertical_std(image) ])) return extracted_images # load data set (training_images, training_labels, validation_images, validation_labels, test_images, test_labels) = prepare_data.get_data() # create classifier and fit to training data baseline_classifier = tree.DecisionTreeClassifier(criterion="entropy") tuned_classifier = tree.DecisionTreeClassifier(criterion="entropy", max_depth=9, min_samples_leaf=6) feature_extracted_classifier = tree.DecisionTreeClassifier(criterion="entropy") # ----------------------------------------------------------- # Select the classifier to be used (comment out all but one) classifier = baseline_classifier # classifier = tuned_classifier # classifier = feature_extracted_classifier
import sys sys.path.append('..') from util import * from tqdm import tqdm import numpy as np import matplotlib.pyplot as plt from prepare_data import get_data X_all, y_all, groups, feature_names, subjects, labels, class_names = get_data( use_precomputed=True) try: # feature_index = [('90' in x and 'deriv' in x and 'eog h' in x) for x in feature_names].index(True) feature_index = [('max of eog v' in x) for x in feature_names].index(True) #super great # feature_index = [('min' in x and 'deriv' in x and 'eog l' in x) for x in feature_names].index(True) # feature_index = [('std dev of eog h' in x) for x in feature_names].index(True) #meh # feature_index = [('std dev of eog v' in x) for x in feature_names].index(True) #quite good # feature_index = [('std dev of eog r' in x) for x in feature_names].index(True) #same as previous # feature_index = [('std dev of eog l' in x) for x in feature_names].index(True) #same as previous # feature_index = [('energy of eog l' in x) for x in feature_names].index(True) #different std devs # feature_index = [('max of gyro z' in x) for x in feature_names].index(True) #very bad except ValueError as e: print("Feature not found. Exiting.") exit()
import sys sys.path.append('..') from util import * import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA from sklearn.manifold import TSNE from sklearn.mixture import GaussianMixture from sklearn.cluster import SpectralClustering, KMeans from prepare_data import get_data X_all, y_all, groups, feature_names, subjects, labels, class_names, is_moving_data, include_eog, include_imu = get_data(use_precomputed=True) def rand_jitter(arr): stdev = 0.05*(max(arr)-min(arr)) return arr + np.random.randn(len(arr)) * stdev print("reducing dimensions with PCA") plt.figure(1) pca = PCA(n_components=2) pca.fit(X_all) X_all_reduced = pca.transform(X_all) for i,label in enumerate(labels): mask = np.where(y_all == label)
use_response_similarity = False # Can't use with discussion use_book_similarity = True # --- # Defining some key variables that will be used later on in the training MAX_LEN = 128 TRAIN_BATCH_SIZE = 4 VALID_BATCH_SIZE = 2 N_EPOCHS = 16 LEARNING_RATE = 1e-05 print("DistilBertTokenizer") tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased') # Get data mes_train, mes_test, class_train, class_test, book_idx_train, book_idx_test, response_link_train, response_link_test, class_dict = get_data( sheet, use_response_similarity, use_book_similarity) # intialise data of lists. data_train = {'Message': mes_train, 'ENCODE_CAT': class_train} data_test = {'Message': mes_test, 'ENCODE_CAT': class_test} # Create DataFrame df_train = pd.DataFrame(data_train) df_test = pd.DataFrame(data_test) # Creating the dataset and dataloader for the neural network train_dataset = df_train test_dataset = df_test if len(test_dataset) % 2 != 0: test_dataset = test_dataset[:-1]
def train(): """Classify first, then propose.""" log_name = str(datetime.datetime.now()).replace(' ', '_')[:-7] os.mkdir(f'./clf/logs/{log_name}') os.mkdir(f'./regress/logs/{log_name}') callbacks_clf = [ keras.callbacks.TensorBoard(f'./clf/logs/{log_name}'), keras.callbacks.EarlyStopping(patience=4, restore_best_weights=True) ] callbacks_regress = [ keras.callbacks.TensorBoard(f'./regress/logs/{log_name}'), keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True) ] data_classify, data_regress, n = prepare_data.get_data() model = models.bbox_regressor((128, 128, 3), logits_output=False) model_clf = keras.Model(inputs=model.inputs, outputs=model.outputs[0]) model_clf.compile( optimizer=keras.optimizers.Adam(3e-6), # 1e-5 loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics=[keras.metrics.SparseCategoricalAccuracy()]) train_data_classify, val_data_classify = prepare_data.split_data( data_classify, n) model_clf.fit(train_data_classify, epochs=10000, validation_data=val_data_classify, callbacks=callbacks_clf) # model.save('./clf/first_model.h5') # imgs = [] # for label in prepare_data_v2.classes: # imgs.append(plt.imread(f'./tiny_vid/{label}/000001.JPEG')) # imgs = np.array(imgs) # rsts = model_clf.predict(imgs) # for i, img, label in zip(range(1, imgs.shape[0] + 1), imgs, rsts): # plt.subplot(3, 2, i) # plt.imshow(img) # label = np.argmax(label) # plt.title(prepare_data_v2.classes[label]) # plt.savefig(f'clf_full_{log_name}.jpg') # plt.show() # # freeze conv layers for layer in model.layers: name = layer.name if 'block' in name: layer.trainable = False model_regress = keras.Model(inputs=model.inputs, outputs=model.outputs[1]) model_regress.compile( optimizer=keras.optimizers.Adam(5e-7), # too large loss=keras.losses.MeanSquaredError(), metrics=[keras.metrics.MeanAbsoluteError()]) train_data_regress, val_data_regress = prepare_data.split_data( data_regress, n) model_regress.fit(train_data_regress, epochs=10000, validation_data=val_data_regress, callbacks=callbacks_regress) # Visualize imgs = [] for label in prepare_data.classes: imgs.append(plt.imread(f'./tiny_vid/{label}/000111.JPEG')) imgs = np.array(imgs) rsts = model.predict(imgs) for i, img, label, cor in zip(range(1, imgs.shape[0] + 1), imgs, rsts[0], rsts[1]): plt.subplot(3, 2, i) plt.imshow(img) # utils.plot_box_from_xywh(cor) utils.plot_box_from_min_max(cor) label = np.argmax(label) plt.xticks([]), plt.yticks([]) plt.title(prepare_data.classes[label]) plt.savefig(f'full_{log_name}.jpg') plt.show() pass
learning_rate=0.5*learning_rate print('Setting learning rate to %f'%learning_rate) sys.stdout.flush() # For each training example ... for i in range(len(y_train)): self.numpy_sdg_step(X_train[i],y_train[i],learning_rate) num_examples_seen+=1 return losses if __name__=='__main__': src_root = '/Users/jinzixiang/Documents/workspace/python/rnn' vocabulary_size = 4000 X_train,y_train =prepare_data.get_data(src_root,vocabulary_size) np.random.seed(10) model=RNNNumpy(vocabulary_size) predictions=model.predict(X_train[0][0:100]) print(predictions) print("Expected Loss for random predictions: %f" % np.log(vocabulary_size)) print("Actual loss: %f" % model.calculate_loss(X_train[:1000], y_train[:1000])) grade_check_vocabulary_size=100 model=RNNNumpy(grade_check_vocabulary_size,hidden_dim=10,bptt_truncate=1000) model.gradient_check([0,1,2,3],[1,2,3,4])
from keras.layers import Input, Dense, Dropout, Activation,Flatten from keras.layers import Convolution2D, MaxPooling2D from keras.utils import np_utils from matplotlib import pyplot as plt import tensorflow as tf from keras import backend as K from keras.callbacks import TensorBoard,EarlyStopping from keras import regularizers import prepare_data as pd import numpy as np sess = tf.Session() K.set_session(sess) folder = "D:\PickledData\\" x_train, x_test, Y_train, Y_test, categories = pd.get_data(folder) print(x_train.shape) input_img = Input(shape=(5001,1608, 1)) X_train = x_train.astype('float32') X_test = x_test.astype('float32') X_train /= np.amax(X_train) #- 0.5 X_test /= np.amax(X_test) #- 0.5 x_train = np.reshape(X_train, (len(X_train), 5001, 1608, 1)) # adapt this if using `channels_first` image data format x_test = np.reshape(X_test, (len(X_test), 5001, 1608, 1)) # adapt this if using `channels_first` image data format #X_train = X_train.reshape((-1,784)) # X_test = X_test.reshape((-1,784)) print(X_train.shape) Y_train = np_utils.to_categorical(y_train, 10) Y_test = np_utils.to_categorical(y_test,10)