Пример #1
0
    def fit(self):
        with tf.device("/cpu:0"):
            self.init_pt_model()

        parallel_pt_model = ModelMGPU(self.pt_model, gpus=cnt.USE_NUM_GPUS)

        adam = optimizers.Adam(lr=0.001)
        parallel_pt_model.compile(optimizer=adam,
                                  loss="categorical_crossentropy",
                                  metrics=['accuracy'])

        callbacks = [
            ModelCheckpoint(filepath=cnt.PT_MODEL_PATH,
                            monitor='val_loss',
                            save_best_only=True),
        ]

        parallel_pt_model.fit_generator(
            self.data_generator(self.num_train, 'train', 'pt'),
            callbacks=callbacks,
            steps_per_epoch=shutils.get_num_batches(self.num_train,
                                                    cnt.BATCH_SIZE),
            validation_data=self.data_generator(self.num_test, 'test', 'pt'),
            validation_steps=shutils.get_num_batches(self.num_test,
                                                     cnt.BATCH_SIZE),
            epochs=cnt.NUM_EPOCHS,
            verbose=1,
            use_multiprocessing=True)

        with tf.device("/cpu:0"):
            self.init_color_model()

        parallel_color_model = ModelMGPU(self.color_model,
                                         gpus=cnt.USE_NUM_GPUS)

        adam = optimizers.Adam(lr=0.001)
        parallel_color_model.compile(optimizer=adam,
                                     loss="categorical_crossentropy",
                                     metrics=['accuracy'])

        callbacks = [
            ModelCheckpoint(filepath=cnt.COLOR_MODEL_PATH,
                            monitor='val_loss',
                            save_best_only=True),
        ]

        parallel_color_model.fit_generator(
            self.data_generator(self.num_train, 'train', 'color'),
            callbacks=callbacks,
            steps_per_epoch=shutils.get_num_batches(self.num_train,
                                                    cnt.BATCH_SIZE),
            validation_data=self.data_generator(self.num_test, 'test',
                                                'color'),
            validation_steps=shutils.get_num_batches(self.num_test,
                                                     cnt.BATCH_SIZE),
            epochs=cnt.NUM_EPOCHS,
            verbose=1,
            use_multiprocessing=True)
Пример #2
0
    def fit(self):
        curr_best_validation_loss = float("Inf")

        for epoch in range(cnt.NUM_EPOCHS):
            train_iter = self.data_generator(self.num_train, 'train')
            valid_iter = self.data_generator(self.num_valid, 'valid')

            steps_per_epoch_train = shutils.get_num_batches(
                self.num_train, cnt.BATCH_SIZE)
            steps_per_epoch_valid = shutils.get_num_batches(
                self.num_valid, cnt.BATCH_SIZE)

            total_loss = 0

            for batch in range(steps_per_epoch_train):
                src, trg = next(train_iter)
                batch_size = src.shape[0]
                batch_loss = train_step(src,
                                        trg,
                                        self.trg_lang,
                                        self.encoder,
                                        self.decoder,
                                        self.optimizer,
                                        batch_size,
                                        type='train')
                total_loss += batch_loss

                if batch % 100 == 0:
                    print('Epoch {} Batch {} Loss {:.8f}'.format(
                        epoch + 1, batch, batch_loss.numpy()))

            print('Epoch {} Mean Training Loss {:.8f}'.format(
                epoch + 1, total_loss / self.num_train))

            for batch in range(steps_per_epoch_valid):
                src, trg = next(valid_iter)
                batch_size = src.shape[0]
                batch_loss = train_step(src,
                                        trg,
                                        self.trg_lang,
                                        self.encoder,
                                        self.decoder,
                                        self.optimizer,
                                        batch_size,
                                        type='valid')
                total_loss += batch_loss

            validation_loss = total_loss / self.num_valid

            if validation_loss < curr_best_validation_loss:
                curr_best_validation_loss = validation_loss
                self.checkpoint.save(file_prefix=cnt.MODEL_PATH)

            print('Epoch {} Mean Validation Loss {:.8f}'.format(
                epoch + 1, validation_loss))
Пример #3
0
    def fit(self):
        self.init_model()

        callbacks = [
            ModelCheckpoint(filepath=cnt.MODEL_PATH,
                            monitor='val_loss',
                            save_best_only=True),
        ]

        self.model.fit_generator(
            self.data_generator(self.num_train, 'train'),
            callbacks=callbacks,
            steps_per_epoch=shutils.get_num_batches(self.num_train,
                                                    cnt.BATCH_SIZE),
            validation_data=self.data_generator(self.num_test, 'test'),
            validation_steps=shutils.get_num_batches(self.num_test,
                                                     cnt.BATCH_SIZE),
            epochs=cnt.NUM_EPOCHS,
            verbose=1,
            use_multiprocessing=True)
Пример #4
0
    def scoring(self, type='pt', save_imgs=False, save_cams=False):
        test_labels, pred_labels, total_batches = [], [], shutils.get_num_batches(
            self.num_test, cnt.BATCH_SIZE)

        if type == 'pt':
            encoder = shutils.load_data_pkl(cnt.PT_ENCODER_PATH)
            pred_out_dir = cnt.PT_PREDS_PATH
            cam_dir = cnt.PT_CAMS_PATH
            self.init_pt_model()
            model = self.pt_model
            model.load_weights(cnt.PT_MODEL_PATH)

        else:
            encoder = shutils.load_data_pkl(cnt.COLOR_ENCODER_PATH)
            pred_out_dir = cnt.COLOR_PREDS_PATH
            cam_dir = cnt.COLOR_CAMS_PATH
            self.init_color_model()
            model = self.color_model
            model.load_weights(cnt.COLOR_MODEL_PATH)

        num_batches, start = 0, 0

        for batch_data, batch_labels in self.data_generator(
                self.num_test, 'test', type):
            test_labels += batch_labels.tolist()
            predictions = self.predict(batch_data, type)
            pred_labels += predictions
            num_batches += 1

            indices = [start + i for i in range(len(batch_labels))]

            if save_imgs:
                utils.save_imgs(batch_data, indices, np.array(batch_labels),
                                np.array(predictions), encoder, pred_out_dir)

            if save_cams:
                utils.cam(model, batch_data, indices, np.array(batch_labels),
                          np.array(predictions), encoder, cam_dir)

            start += len(batch_labels)

            if num_batches == total_batches:
                break

        h = np.sum(np.array(pred_labels), axis=1)
        idx = np.nonzero(h > 0)[0]

        t_labels = encoder.inverse_transform(np.array(test_labels)[idx])
        p_labels = encoder.inverse_transform(np.array(pred_labels)[idx])

        print(classification_report(t_labels, p_labels))
Пример #5
0
    def scoring(self):
        test_labels, pred_labels, total_batches = [], [], shutils.get_num_batches(
            self.num_test, cnt.BATCH_SIZE)

        num_batches = 0
        for batch_data, batch_labels in self.data_generator(
                self.num_test, 'test'):
            test_labels += batch_labels.tolist()
            pred_labels += self.predict(batch_data).tolist()
            num_batches += 1
            if num_batches == total_batches:
                break

        print(classification_report(test_labels, pred_labels))
Пример #6
0
def get_data_as_generator(num_data, prefix='train'):
    random.seed(42)

    word_vector_model = utils.get_vector_model(cnt.VECTOR_MODEL,
                                               char_tokens=False)
    char_vector_model = utils.get_vector_model(cnt.VECTOR_MODEL,
                                               char_tokens=True)

    data_pairs = shutils.load_data_pkl(
        os.path.join(cnt.PERSISTENCE_PATH, prefix + "_data_pairs.pkl"))
    random.shuffle(data_pairs)

    num_batches = shutils.get_num_batches(num_data, cnt.BATCH_SIZE)

    batch_num = 0

    while True:
        m = batch_num % num_batches

        start, end = m * cnt.BATCH_SIZE, min((m + 1) * cnt.BATCH_SIZE,
                                             num_data)

        word_tokens1, word_tokens2, char_tokens1, char_tokens2, labels = zip(
            *data_pairs[start:end])
        labels = np.array(labels)
        labels = np.expand_dims(labels, -1)

        word_data_1 = shutils.get_vectors(word_vector_model, word_tokens1,
                                          cnt.WORD_VECTOR_DIM)
        word_data_2 = shutils.get_vectors(word_vector_model, word_tokens2,
                                          cnt.WORD_VECTOR_DIM)

        char_data_1 = np.array([
            shutils.get_vectors(char_vector_model, x, cnt.CHAR_VECTOR_DIM)
            for x in char_tokens1
        ])
        char_data_2 = np.array([
            shutils.get_vectors(char_vector_model, x, cnt.CHAR_VECTOR_DIM)
            for x in char_tokens2
        ])

        batch_num += 1

        yield [word_data_1, word_data_2, char_data_1, char_data_2], labels
Пример #7
0
    def scoring(self):
        test_labels, pred_labels, total_batches = [], [], shutils.get_num_batches(
            self.num_test, cnt.BATCH_SIZE)
        encoder = shutils.load_data_pkl(cnt.ENCODER_PATH)

        num_batches = 0

        for batch_data, batch_labels in self.data_generator(
                self.num_test, 'test'):
            test_labels += batch_labels.tolist()
            predictions = self.predict(batch_data)
            pred_labels += predictions
            num_batches += 1

            if num_batches == total_batches:
                break

        t_labels = encoder.inverse_transform(np.array(test_labels))
        p_labels = encoder.inverse_transform(np.array(pred_labels))

        print(
            classification_report(t_labels,
                                  p_labels,
                                  target_names=encoder.classes_))
Пример #8
0
    def fit(self):
        with self.sess.as_default():
            optimizer, cost, accuracy = get_loss_accuracy(
                self.input_img, self.input_txt, self.output, self.num_classes,
                self.vocab_size, self.training)

            train_summary_writer = tf.summary.FileWriter(
                cnt.TF_TRAIN_SUMMARY_PATH, self.sess.graph)
            test_summary_writer = tf.summary.FileWriter(
                cnt.TF_TEST_SUMMARY_PATH, self.sess.graph)

            saver = tf.train.Saver()

            self.sess.run(tf.global_variables_initializer())

            steps_per_epoch_train = shutils.get_num_batches(
                self.num_train, cnt.BATCH_SIZE)
            steps_per_epoch_test = shutils.get_num_batches(
                self.num_test, cnt.BATCH_SIZE)

            min_test_loss = float("Inf")

            for i in range(cnt.NUM_EPOCHS):
                train_iter = self.data_generator(self.num_train, 'train')
                test_iter = self.data_generator(self.num_test, 'test')

                train_c_loss, train_c_acc, self.sess = process_batches(
                    steps_per_epoch_train,
                    train_iter,
                    self.input_img,
                    self.input_txt,
                    self.output,
                    self.training,
                    optimizer,
                    cost,
                    accuracy,
                    self.sess,
                    mode='train')

                train_summary = tf.Summary()
                train_summary.value.add(tag="Accuracy",
                                        simple_value=train_c_acc)
                train_summary.value.add(tag="Loss", simple_value=train_c_loss)

                train_summary_writer.add_summary(train_summary, i)

                test_c_loss, test_c_acc, self.sess = process_batches(
                    steps_per_epoch_test,
                    test_iter,
                    self.input_img,
                    self.input_txt,
                    self.output,
                    self.training,
                    optimizer,
                    cost,
                    accuracy,
                    self.sess,
                    mode='test')

                test_summary = tf.Summary()
                test_summary.value.add(tag="Accuracy", simple_value=test_c_acc)
                test_summary.value.add(tag="Loss", simple_value=test_c_loss)

                test_summary_writer.add_summary(test_summary, i)

                if cnt.SAVE_BEST_LOSS_MODEL:
                    if test_c_loss < min_test_loss:
                        min_test_loss = test_c_loss
                        saver.save(self.sess, cnt.MODEL_PATH)
                else:
                    saver.save(self.sess, cnt.MODEL_PATH)

                print("Iter " + str(i) + ", Training Loss= " +
                      "{:.6f}".format(train_c_loss) + ", Training Accuracy= " +
                      "{:.5f}".format(train_c_acc))
                print("Iter " + str(i) + ", Validation Loss= " +
                      "{:.6f}".format(test_c_loss) +
                      ", Validation Accuracy= " + "{:.5f}".format(test_c_acc))
                print()

            train_summary_writer.close()
            test_summary_writer.close()