def getfeatures(folder, target_size, model='Resnet50'): model = ResNet50(weights="imagenet", include_top=False) images = getFilesInDir(folder) images_train, images_test = test_train_split(images, fraction) tensors_train = [] tensors_test = [] for i, j in images_train.items(): for k in j: tensors_train.append(img_to_tensor(k, target_size=(target_size))) for i, j in images_test.items(): for k in j: tensors_test.append(img_to_tensor(k, target_size=(target_size))) # print() preprocessed_tensors_train = [preprocess_input(i) for i in tensors_train] preprocessed_tensors_test = [preprocess_input(i) for i in tensors_test] print("Total Training Tensors created:" + str(len(tensors_train))) print("Total Testing Tensors created:" + str(len(tensors_test))) labels_train = create_labels(images_train, output_classes=21) labels_test = create_labels(images_test, output_classes=21) print("Total Training Lables created:" + str(len(labels_train))) print("Total Testing Lables created:" + str(len(labels_test))) features_list_train = [] features_list_test = [] features_list_train = [ model.predict(x) for x in preprocessed_tensors_train ] features_list_test = [model.predict(x) for x in preprocessed_tensors_test] return (features_list_train, labels_train, features_list_test, labels_test)
def find_best_acc_and_thresh(labels_csv: Path, inference_folder: Path, classes: List[str]) -> \ Dict[str, float]: """ Find the best accuracy and threshold for the given images. Args: labels_csv: CSV file containing the ground truth labels. inference_folder: Folder containing the predicted labels. classes: Names of the classes in the dataset. Returns: A dictionary mapping class names to the best thresholds. """ gt_labels = create_labels(csv_path=labels_csv) prediction_csv_paths = get_csv_paths(folder=inference_folder) best_acc = 0 best_thresholds = None best_csv = None for prediction_csv_path in prediction_csv_paths: prediction_labels = create_labels(csv_path=prediction_csv_path) avg_class_acc, conf_matrix = get_scores( gt_labels=gt_labels, prediction_labels=prediction_labels, classes=classes) print(f"thresholds {parse_thresholds(csv_path=prediction_csv_path)} " f"has average class accuracy {avg_class_acc:.5f}") if best_acc < avg_class_acc: best_acc = avg_class_acc best_csv = prediction_csv_path best_thresholds = parse_thresholds(csv_path=prediction_csv_path) print(f"view these predictions in {best_csv}") return best_thresholds
def print_final_test_results(labels_csv: Path, inference_folder: Path, classes: List[str]) -> None: """ Print the final accuracy and confusion matrix. Args: labels_csv: CSV file containing the ground truth labels. inference_folder: Folder containing the predicted labels. classes: Names of the classes in the dataset. """ gt_labels = create_labels(csv_path=labels_csv) prediction_csv_paths = get_csv_paths(folder=inference_folder) for prediction_csv_path in prediction_csv_paths: prediction_labels = create_labels(csv_path=prediction_csv_path) avg_class_acc, conf_matrix = get_scores( gt_labels=gt_labels, prediction_labels=prediction_labels, classes=classes) print(f"test set has final avg class acc: {avg_class_acc:.5f}" f"\n{conf_matrix}")
def inference(): # Inference Path # make_dirs(config.inference_path) # Prepare Data Loader # test_loader = get_celeba_loader('test', config.batch_size, config.selected_attrs) # Prepare Generator # G = Generator(num_classes=len(config.selected_attrs)).to(device) G.load_state_dict( torch.load( os.path.join( config.weights_path, 'StarGAN_Generator_Epoch_{}.pkl'.format(config.num_epochs)))) # Test # print("StarGAN | Generating Aligned CelebA Images started...") for i, (image, label) in enumerate(test_loader): # Prepare Data # image = image.to(device) fixed_labels = create_labels(label, selected_attrs=config.selected_attrs) # Generate Fake Images # x_fake_list = [image] for c_fixed in fixed_labels: x_fake_list.append(G(image, c_fixed)) x_concat = torch.cat(x_fake_list, dim=3) # Save Images # save_image(denorm(x_concat.data.cpu()), os.path.join( config.inference_path, 'StarGAN_Aligned_CelebA_Results_%04d.png' % (i + 1)), nrow=1, padding=0) make_gifs_test("StarGAN", config.inference_path)
def flow(self): i = 0 while True: images_batch = [] labels_batch = [] for b in range(self.batch_size): if i == len(self.images): i = 0 image = cv2.imread(self.images[i]) image = resize(image, resize=self.resize) image = crop(image, ROI=self.ROI) label_img = cv2.imread(self.masks[i], cv2.IMREAD_GRAYSCALE) label_img = resize(label_img, resize=self.resize) label_img = crop(label_img, ROI=self.ROI) images_batch.append(norm_data(image)) labels_batch.append(create_labels(label_img, self.classes)) i += 1 yield np.array(images_batch, dtype=np.float32), np.array(labels_batch, dtype=np.float32)
clip_list_temp.extend(all_selec) # create variable buttons num_vars_prev = num_vars clip_list, num_vars = check_and_create(clip_list_temp, clip_list_prev, frame, 1, num_vars, '', button_font) # create function buttons num_funcs_prev = num_funcs func_clip_list, num_funcs = check_and_create(clip_list_func_temp, clip_list_func_prev, frame, 2, num_funcs, 'Control', button_font) # create labels create_labels([num_vars, num_funcs], [num_vars_prev, num_funcs_prev], frame, button_font) # update tk root.update_idletasks() root.update()
# print(ord(i)) for i in test_string: if ord(i) < 32 or ord(i) > 122: print(i, ord(i), chr(ord(i)), train_string.find(i)) # print(ord(i) X_train = create_input_array(train_string) print(X_train.shape) X_test = create_input_array(test_string) print(X_test.shape) print(X_train[0]) Y_train = create_labels(train_string, hashmap) print(Y_train.shape) Y_test = create_labels(test_string, hashmap) print(Y_test.shape) # print(Y_train) print(Y_train.shape) # print(Y_test) print(Y_test.shape) encrypter = Sequential() encrypter.add(layers.Dense(91, input_shape=(91, ))) encrypter.add(layers.LeakyReLU())
Later will become a null vector where indices would representing dimensions where value is one - bag_of_words: similar to naive but unknown words are ignored. Later will become a null vector where indicies would representing dimensions where value is one or more (in case a word is present X times, the value will be X) - tfidf: similar to bag_of_word instead of having discrete value, compute importance of each word using TF-IDF (widely used in Information Extraction) - word_embeddings: List of indices from word_to_index_we. Later, these indices will be mapped to the embeddings of index_we_to_emb - sentence_embeddings: Vector of 600 dimensions representing the sentence embeddings. ''' X_naive = transform_for_naive(data, word_to_index) X_bow = transform_for_bag_of_words(data, word_to_index) X_tfidf = transform_for_tfidf(data) X_we = transform_for_word_embeddings(data, word_to_index_we, index_we_to_emb) X_se = transform_for_sentence_embeddings(data) X_topics = transform_for_topics(data) Y = create_labels(data) ''' #Visualize using only one kind of features visualize_tsne(X_naive, Y, 'naive') visualize_tsne(X_bow, Y, 'bow') visualize_tsne(X_tfidf, Y, 'tfidf') visualize_tsne(X_we, Y, 'word_emb', index_we_to_emb) visualize_tsne(X_se, Y, 'sent_emb') visualize_tsne(X_topics, Y, 'topics') #''' X_bow_se = np.concatenate([X_bow, X_se], axis=1) X_bow_topics = np.concatenate([X_bow, X_topics], axis=1) X_tfidf_se = np.concatenate([X_tfidf, X_se], axis=1) X_tfidf_topics = np.concatenate([X_tfidf, X_topics], axis=1) X_se_topics = np.concatenate([X_se, X_topics], axis=1)
def train(): data = np.load("../dataset/dev.npy") labels = np.load("../dataset/dev_transcripts.npy") # temorary dataset data = data[0:2] labels = labels[0:2] # temporary dataset vocab = create_vocab(labels) labels = create_labels(labels, vocab) shuffle_index = np.arange(len(data)) shuffle(shuffle_index) batch_size = cfg.BATCH_SIZE learning_rate = cfg.LEARNING_RATE # my_listener = Listener(40, 256, 0.0) # my_speller = Speller(33, 512, 512, 256, 3) if isfile("../weights/listener.pt"): with open("../weights/listener.pt", 'rb') as fl: my_listener = torch.load(fl) with open("../weights/speller.pt", 'rb') as fs: my_speller = torch.load(fs) print("model loading completed.") else: my_listener = Listener(40, 256, 0.0) my_speller = Speller(33, 512, 512, 256, 3) loss_fn = torch.nn.CrossEntropyLoss(reduce=False) my_optimizer = torch.optim.Adam([{ 'params': my_speller.parameters() }, { 'params': my_listener.parameters() }], lr=cfg.LEARNING_RATE) start_index = 0 for epoch in range(cfg.EPOCH): losses = 0.0 start_index = 0 while (start_index + batch_size <= len(data)): batch_data = data[shuffle_index[start_index:start_index + batch_size]] batch_labels = labels[shuffle_index[start_index:start_index + batch_size]] batch_data, batch_labels, batch_lengths, batch_label_lengths = preprocess( batch_data, batch_labels) one_hot_batch_labels = OneHot(batch_labels, 33) listener_output = my_listener(batch_data, batch_lengths) speller_output = my_speller(batch_labels.size(1), listener_output, one_hot_batch_labels) batch_loss = loss_fn( speller_output[0].contiguous().view(-1, 33), torch.autograd.Variable(batch_labels).view(-1, )) batch_loss = batch_loss.view(speller_output[0].size(0), speller_output[0].size(1)) mask = torch.zeros(batch_loss.size()) for i in range(batch_label_lengths.size(0)): mask[i, :batch_label_lengths[i]] = 1.0 batch_loss = torch.mul(batch_loss, torch.autograd.Variable(mask)) batch_loss = torch.sum(batch_loss) / torch.sum(mask) print("epoch {} batch_loss == {:.5f}".format( epoch, batch_loss.data[0])) batch_loss.backward() losses += batch_loss.data.cpu().numpy() my_optimizer.step() start_index += batch_size # break if (epoch % 3 == 0): with open("../weights/listener.pt", 'wb') as fl: torch.save(my_listener, fl) with open("../weights/speller.pt", 'wb') as fs: torch.save(my_speller, fs)
def cross_validation(k, X, y, params, regression): """ Performing regression using K-Cross Validation. This function is used to generate a model, given data, a regression function and a set of parameters. Args: k (int): k for cross validation X (nd.array): training samples of form N x D y (nd.array): training samples of form N params (dict): dictionary of training samples regression (function): regression function Returns: float: mean loss on validation datasets float: mean accuracy on validation datasets Raise: ValueError: if the regression function raises an error """ # Cross-validation k_indices = build_k_indices(y, k) accuracies = [] losses = [] # print(f"(max_iters: {params['max_iters']}, gamma: {params['gamma']}, lambda: {params['lambda_']})") # each iteration for each split of training and validation for k_iteration in range(k): # split the data accordingly into training and validation X_train, Y_train, X_val, Y_val = cross_validation_iter( y, X, k_indices, k_iteration) # initial weights W_init = np.random.rand(D, ) # initialize dictionary for the training regression model args_train = { "tx": X_train, "y": Y_train, "initial_w": W_init, "max_iters": params["max_iters"], "gamma": params["gamma"], "lambda_": params["lambda_"] } # try to train the model, if this doesnt work, raise an error try: W, loss_tr = regression(**args_train) except ValueError: print("Regression diverged with these parameters.") return None, None if "Logistic" in f_name: prediction_val_regression = sigmoid(X_val @ W) else: prediction_val_regression = X_val @ W # calculate prediction for the validation dataset prediction_val = create_labels(prediction_val_regression) # calculate corresponding loss and accuracy loss_val = calculate_mse_loss(Y_val, prediction_val) acc_val = calculate_acc(Y_val, prediction_val) losses.append(loss_val) accuracies.append(acc_val) # finally, generate the means mean_loss_val = np.array(losses).mean() mean_acc_val = np.array(accuracies).mean() return mean_loss_val, mean_acc_val