예제 #1
0
def load_facedisguise(plot=False):
    
    glasses = []
    beard = []

    disguise = []
    disguise_label = []

    no_disguise = []
    no_disguise_label = []

    count = 0
    for fname in os.listdir(CROPPED_PATH):
        
        if False:
            count += 1
            if count > 500:
                break

        full_path = os.path.join(CROPPED_PATH, fname)
        
        img = scipy.ndimage.imread(full_path,mode='L')
        img = scipy.misc.imresize(img, img_size)

        truth_file = os.path.join(TRUTH_PATH, fname).replace('jpg', 'txt').replace(' ', '')
        truth = load_truth(truth_file)

        if truth[TRUTH_MAP['BEARD']] == 1 or truth[TRUTH_MAP['GLASSES']] == 1:
            #disguise.append(img)
            #disguise_label.append([truth[TRUTH_MAP['BEARD']] == 1, truth[TRUTH_MAP['GLASSES']] == 1])

            if truth[TRUTH_MAP['BEARD']] == True and truth[TRUTH_MAP['GLASSES']] == False:
                beard.append(img)
                disguise.append(img)
                disguise_label.append([truth[TRUTH_MAP['BEARD']] == 1, truth[TRUTH_MAP['GLASSES']] == 0])
            elif truth[TRUTH_MAP['BEARD']] == False and truth[TRUTH_MAP['GLASSES']] == True:
                glasses.append(img)
        else:
            no_disguise.append(img)
            no_disguise_label.append([truth[TRUTH_MAP['BEARD']] == 1, truth[TRUTH_MAP['GLASSES']] == 1])
        
        #plt.imshow(img, cmap='gray')
        #plt.show()
        #print(img.shape)
    
    print('Number of imges with no disguse: {}'.format(len(no_disguise)))
    print('Number of imges that have disguse: {}'.format(len(disguise)))

    if plot:
        figures = beard[:4]
        figures.extend(glasses[:4])
        figures.extend(no_disguise[:4])
        plot_figures(figures, 3, 4)

        #plt.imshow(img, cmap='gray')
        #plt.show()
        #print(img.shape)

    return np.array(disguise), np.array(disguise_label), np.array(no_disguise), np.array(no_disguise_label)
예제 #2
0
    def build_train_model(self, train_ratio=0.8, epochs=80, batch_size=32, model_save_name='models/multi_day_lstm.h5'):
        
        print('-- Preprocessing data --\n')

        (X_train, X_train_external, y_train), (X_test, X_test_external, y_test) = self.preprocess_data(self.data, train_ratio=train_ratio)
        
        print(f'Training set: ({X_train.shape} - {y_train.shape})')
        print(f'Testing set: ({X_test.shape} - {y_test.shape})')

        LSTM_INPUT_SHAPE = (X_train.shape[1], X_train.shape[2])
        EXTENSIVE_INPUT_SHAPE = (X_train_external.shape[1])

        print('-- Build LSTM model --\n')

        lstm_model = self.build_model(LSTM_INPUT_SHAPE, EXTENSIVE_INPUT_SHAPE)
        lstm_model.compile(loss='mse', optimizer='rmsprop')

        print('-- Train LSTM model --\n')

        history = lstm_model.fit([X_train, X_train_external], y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, use_multiprocessing=True)

        print('-- Plotting LOSS figure --\n')

        plot_figures(
            data=[history.history['loss'], history.history['val_loss']], 
            y_label='Loss', 
            legend=['loss', 'val_loss'], 
            title='LSTM multi day training and validating loss', 
            file_name='figures/lstm_loss_multi_day.png'
        )

        y_predicted = lstm_model.predict([X_test, X_test_external])
        y_test_plot = np.concatenate((y_test[:, 0], y_test[-1, 1:]))

        y_predicted_inverse = self.target_scaler.inverse_transform(y_predicted)
        y_test_inverse = self.target_scaler.inverse_transform(y_test_plot)

        print('-- Plotting LSTM stock prediction vs Real closing stock price figure --\n')
        
        self.plot_multi_day_prediction(
            y_test_inverse,
            y_predicted_inverse,
            file_name='figures/lstm_prediction_multi_day.png'
        )

        print('-- Save LSTM model --\n')
        
        if model_save_name is not None:
            save_model(lstm_model, filepath=model_save_name)

        return lstm_model
예제 #3
0
    def on_epoch_end(self, epoch, logs={}):
        # num = np.random.randint(0,len(self.validation),1)
        for batch_index in range(len(self.validation)):
            val_targ = self.validation[batch_index][1][0]
            val_pred = self.model.predict(self.validation[batch_index][0])
            val_prob = val_pred[0]
            val_depth = val_pred[1]
            val_predict = np.argmax(val_prob, axis=-1)
            if batch_index == 0:
                plot_figures(self.validation[batch_index][0], val_targ,
                             val_predict, val_prob, val_depth,
                             self.validation[batch_index][1][1],
                             self.model_dir, epoch, self.classes, 'val')

            val_predict = val_predict[val_targ < self.classes]
            val_targ = val_targ[val_targ < self.classes]
            self.pred.extend(val_predict)
            self.targ.extend(val_targ)

        f1 = np.round(f1_score(self.targ, self.pred, average=None) * 100, 2)
        precision = np.round(
            precision_score(self.targ, self.pred, average=None) * 100, 2)
        recall = np.round(
            recall_score(self.targ, self.pred, average=None) * 100, 2)

        #update the logs dictionary:
        mean_f1 = np.sum(f1) / self.classes
        logs["mean_f1"] = mean_f1

        print(
            f' — val_f1: {f1}\n — val_precision: {precision}\n — val_recall: {recall}'
        )
        print(f' — mean_f1: {mean_f1}')

        current = logs.get("mean_f1")
        if np.less(self.best, current):
            self.best = current
            self.wait = 0
            print("Found best weights at epoch {}".format(epoch + 1))
            # Record the best weights if current results is better (less).
            self.best_weights = self.model.get_weights()
        else:
            self.wait += 1
            if self.wait >= self.patience:
                self.stopped_epoch = epoch
                self.model.stop_training = True
                print(
                    "Restoring model weights from the end of the best epoch.")
                self.model.set_weights(self.best_weights)
예제 #4
0
    def build_train_model(self,
                          train_ratio=0.8,
                          epochs=80,
                          batch_size=32,
                          model_save_name='models/single_day_lstm.h5'):
        """
        [summary]

        Keyword Arguments:
            epochs {int} -- [description] (default: {80})
            batch_size {int} -- [description] (default: {32})

        Returns:
            [type] -- [description]
        """

        print('-- Preprocessing data --\n')

        (X_train,
         y_train), (X_test,
                    y_test) = self.preprocess_data(self.data, train_ratio)

        X_train_ma = self.preprocess_moving_average(X_train)
        X_test_ma = self.preprocess_moving_average(X_test)

        print(f'Training set: ({X_train.shape} - {y_train.shape})')
        print(f'Testing set: ({X_test.shape} - {y_test.shape})')

        print(f'Extensive training MA: ({X_train_ma.shape})')
        print(f'Extensive testing MA: ({X_test_ma.shape})\n')

        LSTM_INPUT_SHAPE = (X_train.shape[1], X_train.shape[2])
        EXTENSIVE_INPUT_SHAPE = (X_train_ma.shape[1])

        print('-- Build LSTM model --\n')

        lstm_model = self.build_model(LSTM_INPUT_SHAPE, EXTENSIVE_INPUT_SHAPE)
        lstm_model.compile(loss='mse', optimizer='adam')

        print('-- Train LSTM model --\n')

        history = lstm_model.fit(x=[X_train, X_train_ma],
                                 y=y_train,
                                 epochs=epochs,
                                 batch_size=batch_size,
                                 verbose=2,
                                 shuffle=True,
                                 validation_split=0.2)

        print('-- Plotting LOSS figure --\n')

        plot_figures(
            data=[history.history['loss'], history.history['val_loss']],
            y_label='Loss',
            legend=['loss', 'val_loss'],
            title='LSTM single day training and validating loss',
            file_name='figures/lstm_loss_single_day.png')

        print('-- Evaluating on Test set --')

        y_predicted = lstm_model.predict([X_test, X_test_ma])
        y_predicted_inverse = self.target_scaler.inverse_transform(y_predicted)
        y_test_inverse = self.target_scaler.inverse_transform(y_test)

        mae_inverse = np.sum(
            np.abs(y_predicted_inverse - y_test_inverse)) / len(y_test)
        print(f'Mean Absolute Error - Testing = {mae_inverse}\n')

        print(
            '-- Plotting LSTM stock prediction vs Real closing stock price figure --\n'
        )

        plot_figures(
            data=[y_predicted_inverse, y_test_inverse],
            y_label='Close',
            legend=['y_predict', 'y_test'],
            title='Real Close stock price vs LSTM prediction on 1 day period',
            file_name='figures/lstm_prediction_single_day.png')

        print('-- Save LSTM model --\n')

        if not os.path.exists('./models'):
            os.mkdir('./models')

        if model_save_name is not None:
            save_model(lstm_model, filepath=model_save_name)

        return lstm_model
예제 #5
0
def train(plot=False):
    # Data loading
    train_path = "train.labeled"
    test_path = "test.labeled"
    train_sentences_df = split(train_path)
    test_sentences_df = split(test_path)
    word_to_ix, tag_to_ix, ix_to_word = generateVocabs(train_sentences_df)
    word_to_ix_test, _, ix_to_word_test = generateVocabs(test_sentences_df)
    word_dict_train = wordFrequency(train_sentences_df)
    word_dict_test = wordFrequency(test_sentences_df)
    train_pretrained_embeds = init_word_embeddings(word_dict_train)
    test_pretrained_embeds = init_word_embeddings(word_dict_test)

    train = KiperwasserDataset(word_to_ix,
                               tag_to_ix,
                               train_sentences_df,
                               tp='train_')
    train_dataloader = DataLoader(train, shuffle=True)
    test = KiperwasserDataset(word_to_ix,
                              tag_to_ix,
                              test_sentences_df,
                              tp='test',
                              word_embed_to_ix=word_to_ix_test)
    test_dataloader = DataLoader(test, shuffle=False)

    word_vocab_size = len(train.word_to_ix)
    tag_vocab_size = len(train.tag_to_ix)

    loss_function = nn.CrossEntropyLoss()  # NLLLoss
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    model = emptyModel(word_vocab_size, tag_vocab_size)

    if use_cuda:
        model.cuda()

    optimizer = optim.Adam(model.parameters(),
                           betas=(0.9, 0.9),
                           lr=LEARNING_RATE,
                           weight_decay=WEIGHT_DECAY)

    # Training start
    print("Training Started")
    UAS_train_list = []
    UAS_test_list = []
    loss_list = []
    epochs = EPOCHS
    best_UAS_sf = 0.905
    for epoch in range(epochs):
        current_loss = 0  # To keep track of the loss value
        i = 0
        for input_data in train_dataloader:
            model.train()  # put model on train model to proceed with dropout
            i += 1
            words_idx_tensor, pos_idx_tensor, gold = input_data
            external_embeds = get_pretrained_vector(train_pretrained_embeds,
                                                    words_idx_tensor,
                                                    ix_to_word)
            gold = gold.squeeze(0).to(device)
            scores = model(words_idx_tensor, pos_idx_tensor, external_embeds,
                           gold)
            loss = loss_function(scores[1:, :], gold[1:])
            loss = loss / accumulate_grad_steps
            loss.backward()
            if i % accumulate_grad_steps == 0:
                optimizer.step()
                model.zero_grad()
            current_loss += loss.item()

        # below used for plotting
        current_loss = current_loss / len(train)
        loss_list.append(float(current_loss))
        UAS_train = evaluation.evaluate(train_dataloader, model,
                                        train_pretrained_embeds, ix_to_word)
        UAS_test = evaluation.evaluate(test_dataloader, model,
                                       test_pretrained_embeds, ix_to_word_test)
        UAS_train_list.append(UAS_train)
        UAS_test_list.append(UAS_test)
        if UAS_test > best_UAS_sf:
            model.save('advanced_final_' + str(epoch))
            best_UAS_sf = UAS_test
            plot_figures(loss_list, UAS_train_list, UAS_test_list,
                         'advanced_' + str(epoch))
        print(
            f"Epoch {epoch + 1}, \tLoss: {current_loss:.7f}, \t UAS_train: {UAS_train:.4f}, \tUAS_test: {UAS_test:.4f}"
        )
    if (plot):
        plot_figures(loss_list, UAS_train_list, UAS_test_list, 'advanced')
    return model