Пример #1
0
def train(aug_no, model_mode='mymodel', set_epochs=10, do_es=False):

    train_dir = os.path.join(cwd, "da_concat_{}".format(aug_no))

    train_data, train_label = inputDataCreator(train_dir,
                                               224,
                                               normalize=True,
                                               one_hot=True)

    validation_data, validation_label = inputDataCreator(validation_dir,
                                                         224,
                                                         normalize=True,
                                                         one_hot=True)

    print("train data shape : ", train_data.shape)
    print("train label shape : ", train_label.shape)

    INPUT_SIZE = train_data.shape[1]
    print("INPUT_SIZE: ", INPUT_SIZE)

    CHANNEL = train_data.shape[3]
    print("set channel : ", CHANNEL)

    batch_size = 10
    print("set batch_size : ", batch_size)

    mh = ModelHandler(INPUT_SIZE, CHANNEL)

    if model_mode == 'mymodel':
        model = mh.buildMyModel()
    elif model_mode == 'tlearn':
        model = mh.buildTlearnModel(base='mnv1')

    model.summary()

    if do_es:
        es = EarlyStopping(monitor='val_loss',
                           patience=5,
                           verbose=1,
                           mode='auto')
        es = [es]
    else:
        es = None

    history = model.fit(train_data,
                        train_label,
                        batch_size=batch_size,
                        epochs=set_epochs,
                        validation_data=(validation_data, validation_label),
                        callbacks=es,
                        verbose=1)
    # make log dir -----
    if do_es:
        log_dir = os.path.join(cwd, 'log_with_es')
    else:
        log_dir = os.path.join(cwd, 'log')
    os.makedirs(log_dir, exist_ok=True)

    child_log_dir = os.path.join(log_dir, '{}_{}'.format(aug_no, model_mode))
    os.makedirs(child_log_dir, exist_ok=True)

    # save model & weights
    model_file = os.path.join(child_log_dir,
                              '{}_{}_model.h5'.format(aug_no, model_mode))
    model.save(model_file)

    # save history
    history_file = os.path.join(child_log_dir,
                                '{}_{}_history.pkl'.format(aug_no, model_mode))
    with open(history_file, 'wb') as p:
        pickle.dump(history.history, p)

    print("export logs in ", child_log_dir)
Пример #2
0
def main():

    cwd = os.getcwd()
    sub_prj = os.path.dirname(cwd)
    sub_prj_root = os.path.dirname(sub_prj)
    prj_root = os.path.dirname(sub_prj_root)

    data_dir = os.path.join(prj_root, "datasets")

    data_src = os.path.join(data_dir, "small_721")
    print("\ndata source: ", data_src)

    use_da_data = False
    if use_da_data:
        train_dir = os.path.join(data_src, "train_with_aug")
    else:
        train_dir = os.path.join(data_src, "train")
    validation_dir = os.path.join(data_src, "validation")
    test_dir = os.path.join(data_src, "test")

    print("train_dir: ", train_dir)
    print("validation_dir: ", validation_dir)
    print("test_dir: ", test_dir)

    # data load ----------
    data_gen = ImageDataGenerator(rescale=1. / 255)

    train_generator = data_gen.flow_from_directory(train_dir,
                                                   target_size=target_size,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   class_mode='categorical')

    validation_generator = data_gen.flow_from_directory(
        validation_dir,
        target_size=target_size,
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical')

    test_generator = data_gen.flow_from_directory(test_dir,
                                                  target_size=target_size,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  class_mode='categorical')

    data_checker, label_checker = next(train_generator)

    print("train data shape (in batch): ", data_checker.shape)
    print("train label shape (in batch): ", label_checker.shape)
    # print("validation data shape:", validation_data.shape)
    # print("validation label shape:", validation_label.shape)
    # print("test data shape:", test_data.shape)
    # print("test label shape:", test_label.shape)

    # build model ----------
    mh = ModelHandler(input_size, channel)
    model = mh.buildMyModel()
    model.summary()

    # instance EarlyStopping -----
    es = EarlyStopping(
        monitor='val_loss',
        # monitor='val_accuracy',
        patience=5,
        verbose=1,
        restore_best_weights=True)

    print("\ntraining sequence start .....")
    steps_per_epoch = train_generator.n // batch_size
    validation_steps = validation_generator.n // batch_size
    print(steps_per_epoch, " [steps / epoch]")
    print(validation_steps, " (validation steps)")

    start = time.time()
    history = model.fit_generator(train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=set_epochs,
                                  validation_data=validation_generator,
                                  validation_steps=validation_steps,
                                  callbacks=[es],
                                  verbose=1)
    elapsed_time = time.time() - start
    print("elapsed time (for train): {} [sec]".format(elapsed_time))

    # evaluate ----------
    print("\nevaluate sequence...")

    accs = history.history['accuracy']
    losses = history.history['loss']
    val_accs = history.history['val_accuracy']
    val_losses = history.history['val_loss']
    print("last val_acc: ", val_accs[len(val_accs) - 1])

    test_steps = test_generator.n // batch_size
    eval_res = model.evaluate_generator(test_generator,
                                        steps=test_steps,
                                        verbose=1)

    print("result loss: ", eval_res[0])
    print("result score: ", eval_res[1])

    # logging and detail outputs -----
    # make log_dirctory
    log_dir = os.path.join(sub_prj, "outputs", "logs")
    os.makedirs(log_dir, exist_ok=True)
    model_log_dir = os.path.join(sub_prj, "outputs", "models")
    os.makedirs(log_dir, exist_ok=True)

    now = datetime.datetime.now()
    child_log_dir = os.path.join(log_dir, "{0:%Y%m%d}".format(now))
    os.makedirs(child_log_dir, exist_ok=True)
    child_model_log_dir = os.path.join(model_log_dir, "{0:%Y%m%d}".format(now))
    os.makedirs(child_model_log_dir, exist_ok=True)

    # save model & weights
    model_file = os.path.join(child_model_log_dir, "model.h5")
    model.save(model_file)
    print("\nexport model in ", child_model_log_dir)

    print("\npredict sequence...")
    pred = model.predict_generator(test_generator, steps=test_steps, verbose=1)

    test_label = []
    for i in range(test_steps):
        _, tmp_tl = next(test_generator)
        if i == 0:
            test_label = tmp_tl
        else:
            test_label = np.vstack((test_label, tmp_tl))

    label_name_list = []
    for i in range(len(test_label)):
        if test_label[i][0] == 1:
            label_name_list.append('cat')
        elif test_label[i][1] == 1:
            label_name_list.append('dog')

    #print("result: ", pred)
    df_pred = pd.DataFrame(pred, columns=['cat', 'dog'])
    df_pred['class'] = df_pred.idxmax(axis=1)
    df_pred['label'] = pd.DataFrame(label_name_list, columns=['label'])
    df_pred['collect'] = (df_pred['class'] == df_pred['label'])

    confuse = df_pred[df_pred['collect'] == False].index.tolist()
    collect = df_pred[df_pred['collect'] == True].index.tolist()

    print(df_pred)
    print("\nwrong recognized indeices are ", confuse)
    print("  wrong recognized amount is ", len(confuse))
    print("\ncollect recognized indeices are ", collect)
    print("  collect recognized amount is ", len(collect))
    print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %")

    # save history
    save_dict = {}
    save_dict['last_loss'] = losses[len(losses) - 1]
    save_dict['last_acc'] = accs[len(accs) - 1]
    save_dict['last_val_loss'] = val_losses[len(val_losses) - 1]
    save_dict['last_val_acc'] = val_accs[len(val_accs) - 1]
    save_dict['n_confuse'] = len(confuse)
    save_dict['eval_loss'] = eval_res[0]
    save_dict['eval_acc'] = eval_res[1]
    save_dict['elapsed_time'] = elapsed_time

    print(save_dict)

    df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys())

    csv_file = os.path.join(child_log_dir, "result.csv")
    df_result.to_csv(csv_file)
    print("\nexport history in ", csv_file)
Пример #3
0
def main(N,
         LEARN_PATH,
         MODE,
         BUILD_MODEL,
         EPOCHS=60,
         BATCH_SIZE=20,
         FINE_TUNE_AT=81):

    total_data, total_label = inputDataCreator(os.path.join(
        LEARN_PATH, "natural"),
                                               224,
                                               normalize=True)
    #one_hot=True

    print("\ntotal_data shape: ", total_data.shape)
    print("total_label shape: ", total_label.shape)

    if MODE == 'auged':
        auged_dir = os.path.join(LEARN_PATH, "auged")
        EPOCHS = EPOCHS // 2

        total_auged_data, total_auged_label = inputDataCreator(auged_dir,
                                                               224,
                                                               normalize=True,
                                                               one_hot=True)
        print("\n  total auged_data : ", total_auged_data.shape)

    input_size = total_data.shape[1]
    channel = total_data.shape[3]

    mh = ModelHandler(input_size, channel)

    skf = StratifiedKFold(n_splits=5)

    k = 0
    for traval_idx, test_idx in skf.split(total_data, total_label):
        print("\nK-Fold Cross-Validation k:{} ==========".format(k))

        print("\ntrain indices: \n", traval_idx)
        print("\ntest indices: \n", test_idx)

        test_data = total_data[test_idx]
        test_label = total_label[test_idx]

        print("-----*-----*-----")

        traval_data = total_data[traval_idx]
        traval_label = total_label[traval_idx]
        # print(traval_data.shape)
        # print(traval_label.shape)

        traval_label = np.identity(2)[traval_label.astype(np.int8)]
        test_label = np.identity(2)[test_label.astype(np.int8)]

        train_data, train_label, validation_data, validation_label, _, _ = dataSplit(
            traval_data,
            traval_label,
            train_rate=3 / 4,
            validation_rate=1 / 4,
            test_rate=0)

        if MODE == 'auged':
            print("\nadd auged data to train_data...")

            auged_traval_data = total_auged_data[traval_idx]
            auged_traval_label = total_auged_label[traval_idx]

            auged_train_data, auged_train_label, _, _, _, _ = dataSplit(
                auged_traval_data,
                auged_traval_label,
                train_rate=3 / 4,
                validation_rate=1 / 4,
                test_rate=0)

            print("  append auged data: ", auged_train_data.shape)
            print("\n  concatnate auged data with native data...")
            train_data = np.vstack((train_data, auged_train_data))
            train_label = np.vstack((train_label, auged_train_label))
            print("    Done.")

        print("\ntrain data shape: ", train_data.shape)
        print("train label shape: ", train_label.shape)
        print("\nvalidation data shape: ", validation_data.shape)
        print("validation label shape: ", validation_label.shape)
        print("\ntest data shape: ", test_data.shape)
        print("test label shape: ", test_label.shape)

        es = EarlyStopping(monitor='val_loss',
                           patience=5,
                           verbose=1,
                           restore_best_weights=True,
                           mode='auto')

        print("set epochs: ", EPOCHS)

        if BUILD_MODEL == 'mymodel':
            model = mh.buildMyModel()

            # normal train ----------
            print("\ntraining sequence start .....")
            start = time.time()

            history = model.fit(train_data,
                                train_label,
                                BATCH_SIZE,
                                epochs=EPOCHS,
                                vlidation_data=(validation_data,
                                                validation_label),
                                callbacks=[es],
                                verbose=2)
            elapsed_time = time.time() - start

        elif BUILD_MODEL == 'tlearn':
            # あとで重みの解凍をできるように base_model を定義
            base_model = mh.buildMnv1Base()
            base_model.trainable = False

            model = mh.addChead(base_model)

            print("\ntraining sequence start .....")
            start = time.time()

            # 準備体操 -----
            print("\nwarm up sequence .....")
            model.summary()
            _history = model.fit(train_data,
                                 train_label,
                                 BATCH_SIZE,
                                 epochs=10,
                                 validation_data=(validation_data,
                                                  validation_label),
                                 callbacks=[es],
                                 verbose=2)

            # fine tuning -----
            print("\nfine tuning.....")
            mh.setFineTune(base_model, model, FINE_TUNE_AT)
            model.summary()

            history = model.fit(train_data,
                                train_label,
                                BATCH_SIZE,
                                epochs=EPOCHS,
                                validation_data=(validation_data,
                                                 validation_label),
                                callbacks=[es],
                                verbose=2)
            elapsed_time = time.time() - start

        # training end
        accs = history.history['accuracy']
        losses = history.history['loss']
        val_accs = history.history['val_accuracy']
        val_losses = history.history['val_loss']

        log_dir = os.path.join(os.path.dirname(cwd), "flog")
        os.makedirs(log_dir, exist_ok=True)
        """
        child_log_dir = os.path.join(log_dir, "{}_{}_{}".format(MODE, BUILD_MODEL, no))
        os.makedirs(child_log_dir, exist_ok=True)

        # save model & weights
        model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(MODE, BUILD_MODEL, no))
        model.save(model_file)

        # save history
        history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(MODE, BUILD_MODEL, no))
        with open(history_file, 'wb') as p:
            pickle.dump(history.history, p)

        print("\nexport logs in ", child_log_dir)
        """

        print("\npredict sequence...")
        pred = model.predict(test_data, batch_size=10, verbose=2)

        label_name_list = []
        for i in range(len(test_label)):
            if test_label[i][0] == 1:
                label_name_list.append('cat')
            elif test_label[i][1] == 1:
                label_name_list.append('dog')

        df_pred = pd.DataFrame(pred, columns=['cat', 'dog'])
        df_pred['class'] = df_pred.idxmax(axis=1)
        df_pred['label'] = pd.DataFrame(label_name_list, columns=['label'])
        df_pred['collect'] = (df_pred['class'] == df_pred['label'])

        confuse = df_pred[df_pred['collect'] == False].index.tolist()
        collect = df_pred[df_pred['collect'] == True].index.tolist()

        print(df_pred)
        print("\nwrong recognized indeices are ", confuse)
        print("  wrong recognized amount is ", len(confuse))
        print("\ncollect recognized indeices are ", collect)
        print("  collect recognized amount is ", len(collect))
        print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %")

        print("\nevaluate sequence...")

        eval_res = model.evaluate(test_data,
                                  test_label,
                                  batch_size=10,
                                  verbose=2)

        print("result loss: ", eval_res[0])
        print("result score: ", eval_res[1])

        # ----------
        save_dict = {}
        save_dict['last_loss'] = losses[len(losses) - 1]
        save_dict['last_acc'] = accs[len(accs) - 1]
        save_dict['last_val_loss'] = val_losses[len(val_losses) - 1]
        save_dict['last_val_acc'] = val_accs[len(val_accs) - 1]
        save_dict['n_confuse'] = len(confuse)
        save_dict['eval_loss'] = eval_res[0]
        save_dict['eval_acc'] = eval_res[1]
        save_dict['elapsed_time'] = elapsed_time

        print(save_dict)

        if k == 0:
            df_result = pd.DataFrame(save_dict.values(),
                                     index=save_dict.keys())
        else:
            series = pd.Series(save_dict)
            df_result[k] = series
        print(df_result)

        # undefine ----------
        # del total_data, total_label
        del traval_data, traval_label

        if MODE == 'auged':
            # del total_auged_data, total_auged_label
            del auged_traval_data, auged_traval_label
            del auged_train_data, auged_train_label

        del train_data, train_label
        del validation_data, validation_label
        del test_data, test_label

        del model
        del _history, history

        # clear session against OOM Error
        keras.backend.clear_session()
        gc.collect()

        k += 1

    csv_file = os.path.join(
        log_dir, "sample_{}_{}_{}_result.csv".format(N, MODE, BUILD_MODEL))
    df_result.to_csv(csv_file)

    print("\nexport {}  as CSV.".format(csv_file))
Пример #4
0
def main(data_mode, model_mode, no, set_epochs=60, do_es=False):

    batch_size = 10

    if data_mode == 'native':
        data_dir = os.path.join(cwd, "experiment_{}".format(no))

        total_data, total_label = inputDataCreator(data_dir,
                                                   224,
                                                   normalize=True,
                                                   one_hot=True)

        train_data, train_label, validation_data, validation_label, test_data, test_label = dataSplit(
            total_data, total_label)
        """
        print("\ntrain data shape: ", train_data.shape)
        print("train label shape: ", train_label.shape)
        print("\nvalidation data shape: ", validation_data.shape)
        print("validation label shape: ", validation_label.shape)
        print("\ntest data shape: ", test_data.shape)
        print("test label shape: ", test_label.shape)
        """

        datagen = ImageDataGenerator()

        train_generator = datagen.flow(
            train_data,
            train_label,
            # target_size=(224, 224),
            batch_size=batch_size,
            shuffle=True)
        validation_generator = datagen.flow(
            validation_data,
            validation_label,
            # target_size=(224, 224),
            batch_size=batch_size,
            shuffle=True)
        test_generator = datagen.flow(
            test_data,
            test_label,
            # target_size=(224, 224),
            batch_size=batch_size,
            shuffle=True)

    elif data_mode == 'auged':
        set_epochs = int(set_epochs / 2)
        data_dir = os.path.join(cwd, "concat_experiment_{}".format(no))

        train_dir = os.path.join(data_dir, "train")
        validation_dir = os.path.join(data_dir, "validation")
        test_dir = os.path.join(data_dir, "test")

        datagen = ImageDataGenerator(rescale=1 / 255.0)

        train_generator = datagen.flow_from_directory(train_dir,
                                                      target_size=(224, 224),
                                                      batch_size=batch_size,
                                                      shuffle=True,
                                                      class_mode='categorical')

        validation_generator = datagen.flow_from_directory(
            validation_dir,
            target_size=(224, 224),
            batch_size=batch_size,
            shuffle=True,
            class_mode='categorical')
        test_generator = datagen.flow_from_directory(test_dir,
                                                     target_size=(224, 224),
                                                     batch_size=batch_size,
                                                     shuffle=True,
                                                     class_mode='categorical')

    data_sample, label_sample = next(train_generator)
    input_size = data_sample.shape[1]
    channel = data_sample.shape[3]
    print("input_size: {} | channel: {}".format(input_size, channel))
    print("set epochs: ", set_epochs)

    mh = ModelHandler(input_size, channel)

    if model_mode == 'mymodel':
        model = mh.buildMyModel()
    elif model_mode == 'tlearn':
        model = mh.buildTlearnModel(base='mnv1')

    model.summary()

    if do_es:
        es = EarlyStopping(monitor='val_loss',
                           patience=5,
                           verbose=1,
                           mode='auto')
        es = [es]
    else:
        es = None

    print("\ntraining sequence start .....")

    steps_per_epoch = train_generator.n // batch_size
    validation_steps = validation_generator.n // batch_size
    print(steps_per_epoch, " [steps / epoch]")
    print(validation_steps, " (validation steps)")

    start = time.time()

    history = model.fit_generator(train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=set_epochs,
                                  validation_data=validation_generator,
                                  validation_steps=validation_steps,
                                  verbose=1)

    elapsed_time = time.time() - start

    accs = history.history['accuracy']
    losses = history.history['loss']
    val_accs = history.history['val_accuracy']
    val_losses = history.history['val_loss']

    if do_es:
        log_dir = os.path.join(cwd, "log_with_es")
    else:
        log_dir = os.path.join(cwd, "log")
    os.makedirs(log_dir, exist_ok=True)
    """
    child_log_dir = os.path.join(log_dir, "{}_{}_{}".format(data_mode, model_mode, no))
    os.makedirs(child_log_dir, exist_ok=True)

    # save model & weights
    model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(data_mode, model_mode, no))
    model.save(model_file)

    # save history
    history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(data_mode, model_mode, no))
    with open(history_file, 'wb') as p:
        pickle.dump(history.history, p)

    print("\nexport logs in ", child_log_dir)
    """

    print("\npredict sequence...")

    pred = model.predict_generator(test_generator, verbose=1)

    label_name_list = []

    if data_mode == 'auged':
        test_steps = test_generator.n // batch_size
        for i in range(test_steps):
            _, tmp = next(test_generator)
            if i == 0:
                test_label = tmp
            else:
                test_label = np.vstack((test_label, tmp))

    for i in range(len(test_label)):
        if test_label[i][0] == 1:
            label_name_list.append('cat')
        elif test_label[i][1] == 1:
            label_name_list.append('dog')

    df_pred = pd.DataFrame(pred, columns=['cat', 'dog'])
    df_pred['class'] = df_pred.idxmax(axis=1)
    df_pred['label'] = pd.DataFrame(label_name_list, columns=['label'])
    df_pred['collect'] = (df_pred['class'] == df_pred['label'])

    confuse = df_pred[df_pred['collect'] == False].index.tolist()
    collect = df_pred[df_pred['collect'] == True].index.tolist()

    print(df_pred)
    print("\nwrong recognized indeices are ", confuse)
    print("  wrong recognized amount is ", len(confuse))
    print("\ncollect recognized indeices are ", collect)
    print("  collect recognized amount is ", len(collect))
    print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %")

    print("\nevaluate sequence...")

    eval_res = model.evaluate(test_generator, verbose=1)

    print("result loss: ", eval_res[0])
    print("result score: ", eval_res[1])

    # ----------
    save_dict = {}
    save_dict['last_loss'] = losses[len(losses) - 1]
    save_dict['last_acc'] = accs[len(accs) - 1]
    save_dict['last_val_loss'] = val_losses[len(val_losses) - 1]
    save_dict['last_val_acc'] = val_accs[len(val_accs) - 1]
    save_dict['n_confuse'] = len(confuse)
    save_dict['eval_loss'] = eval_res[0]
    save_dict['eval_acc'] = eval_res[1]
    save_dict['elapsed_time'] = elapsed_time

    print(save_dict)

    # undefine validable ----------
    del datagen
    del train_generator, validation_generator, test_generator

    #   due to CPU memory ---------
    # del train_data, train_label
    # del validation_data, validation_label
    # del test_data, test_label
    # del set_epochs

    #   due to GPU memory ---------
    # del mh,
    del model
    del history
    # del accs, losses, val_accs, val_losses
    # del pred, df_pred, label_name_list
    # del confuse, collect
    # del eval_res

    keras.backend.clear_session()
    gc.collect()

    return save_dict
Пример #5
0
def main(data_mode, model_mode, no, set_epochs=60, do_es=False):

    cwd = os.getcwd()
    data_dir = os.path.join(cwd, "experiment_{}".format(no))

    total_data, total_label = inputDataCreator(
        data_dir,
        224,
        normalize=True,
        #one_hot=True
    )
    print("\ntotal_data shape: ", total_data.shape)
    print("total_label shape: ", total_label.shape)

    if data_mode == 'auged':
        base_dir, data_dir_name = os.path.split(data_dir)
        data_dir_name = "auged_" + data_dir_name
        auged_dir = os.path.join(base_dir, data_dir_name)
        set_epochs = int(set_epochs / 2)

        total_auged_data, total_auged_label = inputDataCreator(auged_dir,
                                                               224,
                                                               normalize=True,
                                                               one_hot=True)
        print("\n  total auged_data : ", total_auged_data.shape)

    input_size = total_data.shape[1]
    channel = total_data.shape[3]
    mh = ModelHandler(input_size, channel)

    skf = StratifiedKFold(n_splits=10)

    k = 0
    for traval_idx, test_idx in skf.split(total_data, total_label):
        print("\nK-Fold Cross-Validation k:{} ==========".format(k))

        print("\ntrain indices: \n", traval_idx)
        print("\ntest indices: \n", test_idx)

        test_data = total_data[test_idx]
        test_label = total_label[test_idx]

        print("-----*-----*-----")

        traval_data = total_data[traval_idx]
        traval_label = total_label[traval_idx]
        # print(traval_data.shape)
        # print(traval_label.shape)

        traval_label = np.identity(2)[traval_label.astype(np.int8)]
        test_label = np.identity(2)[test_label.astype(np.int8)]

        train_data, train_label, validation_data, validation_label, _, _ = dataSplit(
            traval_data,
            traval_label,
            train_rate=2 / 3,
            validation_rate=1 / 3,
            test_rate=0)

        if data_mode == 'auged':
            print("\nadd auged data to train_data...")

            auged_traval_data = total_auged_data[traval_idx]
            auged_traval_label = total_auged_label[traval_idx]

            auged_train_data, auged_train_label, _, _, _, _ = dataSplit(
                auged_traval_data,
                auged_traval_label,
                train_rate=2 / 3,
                validation_rate=1 / 3,
                test_rate=0)
            print("  append auged data: ", auged_train_data.shape)
            print("\n  concatnate auged data with native data...")
            train_data = np.vstack((train_data, auged_train_data))
            train_label = np.vstack((train_label, auged_train_label))
            print("    Done.")

        print("\ntrain data shape: ", train_data.shape)
        print("train label shape: ", train_label.shape)
        print("\nvalidation data shape: ", validation_data.shape)
        print("validation label shape: ", validation_label.shape)
        print("\ntest data shape: ", test_data.shape)
        print("test label shape: ", test_label.shape)

        if model_mode == 'mymodel':
            model = mh.buildMyModel()
        elif model_mode == 'tlearn':
            model = mh.buildTlearnModel(base='mnv1')

        model.summary()

        if do_es:
            es = EarlyStopping(monitor='val_loss',
                               patience=5,
                               verbose=1,
                               mode='auto')
            es = [es]
        else:
            es = None

        batch_size = 10
        print("set epochs: ", set_epochs)

        print("\ntraining sequence start .....")
        start = time.time()
        history = model.fit(train_data,
                            train_label,
                            batch_size,
                            epochs=set_epochs,
                            validation_data=(validation_data,
                                             validation_label),
                            callbacks=es,
                            verbose=1)

        elapsed_time = time.time() - start

        accs = history.history['accuracy']
        losses = history.history['loss']
        val_accs = history.history['val_accuracy']
        val_losses = history.history['val_loss']

        if do_es:
            log_dir = os.path.join(cwd, "rlog_with_es")
        else:
            log_dir = os.path.join(cwd, "rlog")
        os.makedirs(log_dir, exist_ok=True)

        child_log_dir = os.path.join(
            log_dir, "{}_{}_{}".format(data_mode, model_mode, no))
        os.makedirs(child_log_dir, exist_ok=True)
        """
        # save model & weights
        model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(data_mode, model_mode, no))
        model.save(model_file)

        # save history
        history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(data_mode, model_mode, no))
        with open(history_file, 'wb') as p:
            pickle.dump(history.history, p)

        print("\nexport logs in ", child_log_dir)
        """

        print("\npredict sequence...")
        pred = model.predict(test_data, batch_size=10, verbose=1)

        label_name_list = []
        for i in range(len(test_label)):
            if test_label[i][0] == 1:
                label_name_list.append('cat')
            elif test_label[i][1] == 1:
                label_name_list.append('dog')

        df_pred = pd.DataFrame(pred, columns=['cat', 'dog'])
        df_pred['class'] = df_pred.idxmax(axis=1)
        df_pred['label'] = pd.DataFrame(label_name_list, columns=['label'])
        df_pred['collect'] = (df_pred['class'] == df_pred['label'])

        confuse = df_pred[df_pred['collect'] == False].index.tolist()
        collect = df_pred[df_pred['collect'] == True].index.tolist()

        print(df_pred)
        print("\nwrong recognized indeices are ", confuse)
        print("  wrong recognized amount is ", len(confuse))
        print("\ncollect recognized indeices are ", collect)
        print("  collect recognized amount is ", len(collect))
        print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %")

        print("\nevaluate sequence...")

        eval_res = model.evaluate(test_data,
                                  test_label,
                                  batch_size=10,
                                  verbose=1)

        print("result loss: ", eval_res[0])
        print("result score: ", eval_res[1])

        # ----------
        save_dict = {}
        save_dict['last_loss'] = losses[len(losses) - 1]
        save_dict['last_acc'] = accs[len(accs) - 1]
        save_dict['last_val_loss'] = val_losses[len(val_losses) - 1]
        save_dict['last_val_acc'] = val_accs[len(val_accs) - 1]
        save_dict['n_confuse'] = len(confuse)
        save_dict['eval_loss'] = eval_res[0]
        save_dict['eval_acc'] = eval_res[1]
        save_dict['elapsed_time'] = elapsed_time

        print(save_dict)

        if k == 0:
            df_result = pd.DataFrame(save_dict.values(),
                                     index=save_dict.keys())
        else:
            series = pd.Series(save_dict)
            df_result[k] = series
        print(df_result)

        # undefine ----------
        # del total_data, total_label
        del traval_data, traval_label

        if data_mode == 'auged':
            # del total_auged_data, total_auged_label
            del auged_traval_data, auged_traval_label
            del auged_train_data, auged_train_label

        del train_data, train_label
        del validation_data, validation_label
        del test_data, test_label

        del model
        del history
        del pred
        del df_pred, label_name_list, confuse, collect
        del eval_res
        del accs, losses, val_accs, val_losses

        # clear session against OOM Error
        keras.backend.clear_session()
        gc.collect()

        k += 1

    csv_file = os.path.join(child_log_dir,
                            "{}_{}_result.csv".format(data_mode, model_mode))
    df_result.to_csv(csv_file)

    print("\nexport {}  as CSV.".format(csv_file))

    # delete valables at all in end of this program -----
    del cwd, data_dir
    del total_data, total_label
    del save_dict, df_result

    if data_mode == 'auged':
        del total_auged_data, total_auged_label
        del base_dir, data_dir_name, auged_dir

    gc.collect()
Пример #6
0
def main():

    cwd = os.getcwd()
    sub_prj = os.path.dirname(cwd)
    """
    sub_prj_root = os.path.dirname(sub_prj)
    prj_root = os.path.dirname(sub_prj_root)

    """
    data_dir = os.path.join(sub_prj, "datasets")

    data_src = os.path.join(data_dir, "medium_721")
    print("\ndata source: ", data_src)
    """
    use_da_data = False
    increase_val = False
    print( "\nmode: Use Augmented data: {} | increase validation data: {}".format(use_da_data, increase_val) )

    # First define original train_data only as train_dir
    train_dir = os.path.join(data_dir, "train")
    if (use_da_data == True) and (increase_val == False):
        # with_augmented data (no validation increase)
        train_dir = os.path.join(data_dir, "train_with_aug")
    validation_dir = os.path.join(data_dir, "val")  # original validation data

    # pair of decreaced train_data and increased validation data
    if (increase_val == True):
        train_dir = os.path.join(data_dir, "red_train")
        if (use_da_data == True):
            train_dir = os.path.join(data_dir, "red_train_with_aug")
        validation_dir = os.path.join(data_dir, "validation")
    """

    use_da_data = True
    if use_da_data:
        train_dir = os.path.join(data_src, "train_with_aug")
    else:
        train_dir = os.path.join(data_src, "train")
    validation_dir = os.path.join(data_src, "validation")
    test_dir = os.path.join(data_src, "test")

    print("train_dir: ", train_dir)
    print("validation_dir: ", validation_dir)
    # print("test_dir: ", test_dir)

    # data load ----------
    data_gen = ImageDataGenerator(rescale=1. / 255)

    train_generator = data_gen.flow_from_directory(train_dir,
                                                   target_size=target_size,
                                                   batch_size=batch_size,
                                                   shuffle=True,
                                                   class_mode='categorical')

    validation_generator = data_gen.flow_from_directory(
        validation_dir,
        target_size=target_size,
        batch_size=batch_size,
        shuffle=True,
        class_mode='categorical')

    test_generator = data_gen.flow_from_directory(test_dir,
                                                  target_size=target_size,
                                                  batch_size=batch_size,
                                                  shuffle=False,
                                                  class_mode='categorical')

    data_checker, label_checker = next(train_generator)

    print("train data shape (in batch): ", data_checker.shape)
    print("train label shape (in batch): ", label_checker.shape)
    # print("validation data shape:", validation_data.shape)
    # print("validation label shape:", validation_label.shape)
    # print("test data shape:", test_data.shape)
    # print("test label shape:", test_label.shape)

    # build model ----------
    mh = ModelHandler(input_size, channel)

    model = mh.buildMyModel()

    model.summary()

    # instance EarlyStopping -----
    es = EarlyStopping(
        monitor='val_loss',
        # monitor='val_accuracy',
        patience=5,
        verbose=1,
        restore_best_weights=True)

    print("\ntraining sequence start .....")
    steps_per_epoch = train_generator.n // batch_size
    validation_steps = validation_generator.n // batch_size
    print(steps_per_epoch, " [steps / epoch]")
    print(validation_steps, " (validation steps)")

    start = time.time()
    history = model.fit_generator(train_generator,
                                  steps_per_epoch=steps_per_epoch,
                                  epochs=set_epochs,
                                  validation_data=validation_generator,
                                  validation_steps=validation_steps,
                                  callbacks=[es],
                                  verbose=1)
    print("elapsed time (for train): {} [sec]".format(time.time() - start))

    print("\nevaluate sequence...")
    test_steps = test_generator.n // batch_size
    eval_res = model.evaluate_generator(test_generator,
                                        steps=test_steps,
                                        verbose=1)

    print("result loss: ", eval_res[0])
    print("result score: ", eval_res[1])

    # confusion matrix -----
    print("\nconfusion matrix")
    pred = model.predict_generator(test_generator, steps=test_steps, verbose=3)

    test_label = []
    for i in range(test_steps):
        _, tmp_tl = next(test_generator)
        if i == 0:
            test_label = tmp_tl
        else:
            test_label = np.vstack((test_label, tmp_tl))

    idx_label = np.argmax(test_label, axis=-1)  # one_hot => normal
    idx_pred = np.argmax(pred, axis=-1)  # 各 class の確率 => 最も高い値を持つ class

    cm = confusion_matrix(idx_label, idx_pred)

    # Calculate Precision and Recall
    tn, fp, fn, tp = cm.ravel()

    print("  | T  | F ")
    print("--+----+---")
    print("N | {} | {}".format(tn, fn))
    print("--+----+---")
    print("P | {} | {}".format(tp, fp))

    # 適合率 (precision):
    # precision = tp/(tp+fp)
    # print("Precision of the model is {}".format(precision))

    # 再現率 (recall):
    # recall = tp/(tp+fn)
    # print("Recall of the model is {}".format(recall))

    # save model -----
    save_location = os.path.join(sub_prj, "outputs", "models")

    if use_da_data:
        save_file = os.path.join(save_location, "mymodel_auged.h5")
    else:
        save_file = os.path.join(save_location, "mymodel.h5")
    model.save(save_file)
    print("\nmodel has saved in", save_file)
Пример #7
0
def main(N, LEARN_PATH, DATA_MODE, BUILD_MODEL, EPOCHS=60):

    sample_dir = os.path.join(LEARN_PATH, "sample_{}".format(N))

    use_da_data = False
    if use_da_data:
        train_dir = os.path.join(sample_dir, "train_with_aug")
    else:
        train_dir = os.path.join(sample_dir, "train")
    validation_dir = os.path.join(sample_dir, "validation")
    test_dir = os.path.join(sample_dir, "test")

    print("train_dir: ", train_dir)
    print("validation_dir: ", validation_dir)
    print("test_dir: ", test_dir)

    # data load ----------
    train_data, train_label = inputDataCreator(train_dir,
                                               224,
                                               normalize=True,
                                               one_hot=True)

    validation_data, validation_label = inputDataCreator(validation_dir,
                                                         224,
                                                         normalize=True,
                                                         one_hot=True)

    test_data, test_label = inputDataCreator(test_dir,
                                             224,
                                             normalize=True,
                                             one_hot=True)

    print("\ntrain data shape: ", train_data.shape)
    print("train label shape: ", train_label.shape)
    print("\nvalidation data shape: ", validation_data.shape)
    print("validation label shape: ", validation_label.shape)

    input_size = train_data.shape[1]
    channel = train_data.shape[3]
    batch_size = 10
    print("set epochs: ", EPOCHS)

    mh = ModelHandler(input_size, channel)

    if BUILD_MODEL == 'mymodel':
        model = mh.buildMyModel()
    elif BUILD_MODEL == 'tlearn':
        model = mh.buildTlearnModel(base='mnv1')

    model.summary()
    """
    es = EarlyStopping(monitor='val_loss',
                       patience=5,
                       verbose=1,
                       mode='auto',
                       restore)
    """
    # early stopping
    es = keras.callbacks.EarlyStopping(monitor='val_loss',
                                       patience=5,
                                       restore_best_weights=True)

    print("\ntraining sequence start .....")
    start = time.time()
    history = model.fit(train_data,
                        train_label,
                        batch_size,
                        epochs=EPOCHS,
                        validation_data=(validation_data, validation_label),
                        callbacks=[es],
                        verbose=2)

    elapsed_time = time.time() - start
    print("elapsed time (for train): {} [sec]".format(time.time() - start))

    accs = history.history['accuracy']
    losses = history.history['loss']
    val_accs = history.history['val_accuracy']
    val_losses = history.history['val_loss']
    """
    # logging and detail outputs -----
    # make log_dirctory
    log_dir = os.path.join(sub_prj, "outputs", "logs")
    os.makedirs(log_dir, exist_ok=True)
    model_log_dir = os.path.join(sub_prj, "outputs", "models")
    os.makedirs(log_dir, exist_ok=True)

    now = datetime.datetime.now()
    child_log_dir = os.path.join(log_dir, "{0:%Y%m%d}".format(now))
    os.makedirs(child_log_dir, exist_ok=True)
    child_model_log_dir = os.path.join(model_log_dir, "{0:%Y%m%d}".format(now))
    os.makedirs(child_model_log_dir, exist_ok=True)
    """
    """
    if do_es:
        log_dir = os.path.join(cwd, "log_with_es")
    else:
        log_dir = os.path.join(cwd, "log")
    os.makedirs(log_dir, exist_ok=True)
    """
    """
    child_log_dir = os.path.join(log_dir, "{}_{}_{}".format(DATA_MODE, BUILD_MODEL, no))
    os.makedirs(child_log_dir, exist_ok=True)

    # save model & weights
    model_file = os.path.join(child_log_dir, "{}_{}_{}_model.h5".format(DATA_MODE, BUILD_MODEL, no))
    model.save(model_file)

    # save history
    history_file = os.path.join(child_log_dir, "{}_{}_{}_history.pkl".format(DATA_MODE, BUILD_MODEL, no))
    with open(history_file, 'wb') as p:
        pickle.dump(history.history, p)

    print("\nexport logs in ", child_log_dir)
    """

    print("\npredict sequence...")
    pred = model.predict(test_data, batch_size=10, verbose=1)

    label_name_list = []
    for i in range(len(test_label)):
        if test_label[i][0] == 1:
            label_name_list.append('cat')
        elif test_label[i][1] == 1:
            label_name_list.append('dog')

    df_pred = pd.DataFrame(pred, columns=['cat', 'dog'])
    df_pred['class'] = df_pred.idxmax(axis=1)
    df_pred['label'] = pd.DataFrame(label_name_list, columns=['label'])
    df_pred['collect'] = (df_pred['class'] == df_pred['label'])

    confuse = df_pred[df_pred['collect'] == False].index.tolist()
    collect = df_pred[df_pred['collect'] == True].index.tolist()

    print(df_pred)
    print("\nwrong recognized indeices are ", confuse)
    print("  wrong recognized amount is ", len(confuse))
    print("\ncollect recognized indeices are ", collect)
    print("  collect recognized amount is ", len(collect))
    print("\nwrong rate: ", 100 * len(confuse) / len(test_label), " %")

    print("\nevaluate sequence...")

    eval_res = model.evaluate(test_data, test_label, batch_size=10, verbose=1)

    print("result loss: ", eval_res[0])
    print("result score: ", eval_res[1])

    # ----------
    save_dict = {}
    save_dict['last_loss'] = losses[len(losses) - 1]
    save_dict['last_acc'] = accs[len(accs) - 1]
    save_dict['last_val_loss'] = val_losses[len(val_losses) - 1]
    save_dict['last_val_acc'] = val_accs[len(val_accs) - 1]
    save_dict['n_confuse'] = len(confuse)
    save_dict['eval_loss'] = eval_res[0]
    save_dict['eval_acc'] = eval_res[1]
    save_dict['elapsed_time'] = elapsed_time

    print(save_dict)

    # 重そうなものは undefine してみる
    #del train_data, train_label, validation_data, validation_label, test_data, test_label
    del model
    del history
    #del pred

    keras.backend.clear_session()
    gc.collect()

    return save_dict
Пример #8
0
def main(LEARN_PATH, INPUT_SIZE, CHANNEL, BATCH_SIZE, EPOCHS):

    target_size = (INPUT_SIZE, INPUT_SIZE)
    input_shape = (INPUT_SIZE, INPUT_SIZE, CHANNEL)

    data_src = LEARN_PATH
    print("\ndata source: ", data_src)

    use_da_data = False
    if use_da_data:
        train_dir = os.path.join(data_src, "train_with_aug")
    else:
        train_dir = os.path.join(data_src, "train")
    validation_dir = os.path.join(data_src, "validation")
    test_dir = os.path.join(data_src, "test")

    print("train_dir: ", train_dir)
    print("validation_dir: ", validation_dir)
    print("test_dir: ", test_dir)


    # data load ----------
    train_data, train_label = inputDataCreator(train_dir,
                                               INPUT_SIZE,
                                               normalize=True,
                                               one_hot=True)
    validation_data, validation_label = inputDataCreator(validation_dir,
                                                         INPUT_SIZE,
                                                         normalize=True,
                                                         one_hot=True)
    test_data, test_label = inputDataCreator(test_dir,
                                             INPUT_SIZE,
                                             normalize=True,
                                             one_hot=True)
    """
    total_data, total_label = inputDataCreator(data_dir,
                                               224,
                                               normalize=True,
                                               one_hot=True)

    train_data, train_label, validation_data, validation_label, test_data, test_label = dataSplit(total_data, total_label)
    """

    print("train data shape (in batch): ", train_data.shape)
    print("train label shape (in batch): ", train_label.shape)
    # print("validation data shape:", validation_data.shape)
    # print("validation label shape:", validation_label.shape)
    # print("test data shape:", test_data.shape)
    # print("test label shape:", test_label.shape)


    # build model ----------
    mh = ModelHandler(INPUT_SIZE, CHANNEL)
    model = mh.buildMyModel()
    model.summary()


    # instance EarlyStopping -----
    es = EarlyStopping(monitor='val_loss',
                       # monitor='val_accuracy',
                       patience=5,
                       verbose=1,
                       restore_best_weights=True)


    print("\ntraining sequence start .....")
    start = time.time() 
    history = model.fit(train_data,
                        train_label,
                        batch_size=BATCH_SIZE,
                        epochs=EPOCHS,
                        validation_data=(validation_data, validation_label),
                        callbacks=[es],
                        verbose=2)
    elapsed_time = time.time() - start
    print( "elapsed time (for train): {} [sec]".format(elapsed_time) )
    
    accs = history.history['accuracy']
    losses = history.history['loss']
    val_accs = history.history['val_accuracy']
    val_losses = history.history['val_loss']
    print("last val_acc: ", val_accs[len(val_accs)-1])


    # evaluate ----------
    print("\nevaluate sequence...")
    eval_res = model.evaluate(test_data,
                              test_label,
                              batch_size=10,
                              verbose=2)
    print("result loss: ", eval_res[0])
    print("result score: ", eval_res[1])


    # logging and detail outputs -----

    # make log_dirctory
    cwd = os.getcwd()
    sub_prj = os.path.dirname(cwd)

    log_dir = os.path.join(sub_prj, "outputs", "logs")
    os.makedirs(log_dir, exist_ok=True)
    model_log_dir = os.path.join(sub_prj, "outputs", "models")
    os.makedirs(log_dir, exist_ok=True)

    now = datetime.datetime.now()
    child_log_dir = os.path.join(log_dir, "{0:%Y%m%d}".format(now))
    os.makedirs(child_log_dir, exist_ok=True)
    child_model_log_dir = os.path.join(model_log_dir, "{0:%Y%m%d}".format(now))
    os.makedirs(child_model_log_dir, exist_ok=True)

    # save model & weights
    model_file = os.path.join(child_model_log_dir, "model.h5")
    model.save(model_file)
    print("\nexport model in ", child_model_log_dir)



    # predict -> confusion matrix ----------
    print("\npredict sequence...")
    pred = model.predict(test_data,
                         batch_size=BATCH_SIZE,
                         verbose=2)

    label_name_list = []
    for i in range(len(test_label)):
        if test_label[i][0] == 1:
            label_name_list.append('cat')
        elif test_label[i][1] == 1:
            label_name_list.append('dog')
        

    #print("result: ", pred)
    df_pred = pd.DataFrame(pred, columns=['cat', 'dog'])
    df_pred['class'] = df_pred.idxmax(axis=1)
    df_pred['label'] = pd.DataFrame(label_name_list, columns=['label'])
    df_pred['collect'] = (df_pred['class'] == df_pred['label'])

    confuse = df_pred[df_pred['collect'] == False].index.tolist()
    collect = df_pred[df_pred['collect'] == True].index.tolist()

    print(df_pred)
    print("\nwrong recognized indeices are ", confuse)
    print("  wrong recognized amount is ", len(confuse))
    print("\ncollect recognized indeices are ", collect)
    print("  collect recognized amount is ", len(collect))
    print("\nwrong rate: ", 100*len(confuse)/len(test_label), " %")


    # save history
    save_dict = {}
    save_dict['last_loss'] = losses[len(losses)-1]
    save_dict['last_acc'] = accs[len(accs)-1]
    save_dict['last_val_loss'] = val_losses[len(val_losses)-1]
    save_dict['last_val_acc'] = val_accs[len(val_accs)-1]
    save_dict['n_confuse'] = len(confuse)
    save_dict['eval_loss'] = eval_res[0]
    save_dict['eval_acc'] = eval_res[1]
    save_dict['elapsed_time'] = elapsed_time

    print("save result dict:", save_dict)

    df_result = pd.DataFrame(save_dict.values(), index=save_dict.keys())

    csv_file = os.path.join( child_log_dir, "result.csv" )
    df_result.to_csv(csv_file)
    print("\nexport result in ", csv_file)


    # confusion matrix -----
    #           Predict
    #           0  | 1
    #       --+----+-----
    #       0 | TN | FP
    # label -------+-----
    #       1 | FN | TP
    print("\nconfusion matrix")

    idx_label = np.argmax(test_label, axis=-1)  # one_hot => normal
    idx_pred = np.argmax(pred, axis=-1)  # 各 class の確率 => 最も高い値を持つ class
    cm = confusion_matrix(idx_label, idx_pred)

    # Calculate Precision and Recall
    tn, fp, fn, tp = cm.ravel()

    print("  | T  | F ")
    print("--+----+---")
    print("N | {} | {}".format(tn, fn))
    print("--+----+---")
    print("P | {} | {}".format(tp, fp))

    # 適合率 (precision):
    #   sklearn.metrics => precision_score() にも libaray がある。
    #       # 入力は (idx_label, idx_pred)
    #       「陽性と予測されたサンプルのうち, 正解したサンプルの割合」
    #       PPV (positive predictive value) とも呼ばれる。
    precision = tp/(tp+fp)
    print("Precision of the model is {}".format(precision))
    
    # 再現率 (recall:
    #    sklearn.metrics => recall_score() にも library がある
    #       # 入力は (idx_label, idx_pred)
    #    「実際に陽性のサンプルのうち, 正解したサンプルの割合」。
    #        sensitivity や hit rate,
    #        TPR (true positive rate, 真陽性率) などとも呼ばれる。
    #         # すごく大雑把にいえば
    #         #    class 1 だけに対して考えた正解率の様な指標だと言える
    #         #    (Negative 側がどれだけ正解/不正解かは don't care)
    #         # 逆に TN / (TN + FP) とすると
    #         #    class 0 だけに対する正解率となる。
    recall = tp/(tp+fn)
    print("Recall of the model is {}".format(recall))