Пример #1
0
def q5b2():
    logger.log('Analyzing Q5(b2)...')
    compare = {
        'acc_converge': {
            3: acc_converge_epoch(histories['Q4']['optimal']['val_accuracy']),
            4: acc_converge_epoch(histories['Q5']['val_accuracy'])
        },

        'loss_converge': {
            3: loss_converge_epoch(histories['Q4']['optimal']['val_loss']),
            4: loss_converge_epoch(histories['Q5']['val_loss'])
        },

        'final_acc': {
            3: training_result(histories['Q4']['optimal']['val_accuracy']),
            4: training_result(histories['Q5']['val_accuracy'])
        },

        'final_loss': {
            3: training_result(histories['Q4']['optimal']['val_loss'], mode='loss'),
            4: training_result(histories['Q5']['val_loss'], mode='loss')
        }
    }
    compare_df = pd.DataFrame(compare)
    compare_df.to_csv('result/AQ5_result.csv')
    logger.log('Saved result to \"result/AQ5_result.csv\"')
Пример #2
0
def compare():
    logger.log('Compare all features, 6 features and 5 features')

    mse_all = hist1['Q1']['a']['val_mse']
    best_6 = val_mse[[c for c in df.columns
                      if len(c.split(",")) == 1]].idxmin()
    mse_6 = hist2['Q2'][best_6]['val_mse']
    best_5 = val_mse[[c for c in df.columns
                      if len(c.split(",")) == 2]].idxmin()
    mse_5 = hist2['Q2'][best_5]['val_mse']

    f, ax = plt.subplots(figsize=(10, 5))
    plt.plot(mse_all, label='all features')
    plt.plot(mse_6, label='6 features')
    plt.plot(mse_5, label='5 features')
    plt.xlabel('epoch', fontsize=18)
    plt.ylabel('mean squared error', fontsize=18)
    plt.legend(loc='upper right', fontsize=18)
    plt.title('MSE for Different Numbers of Input Features',
              fontsize=20,
              pad=20)
    plt.xlim(1000, 5000)
    plt.ylim(0.0058, 0.010)
    plt.xticks(fontsize=12, wrap=True)
    plt.yticks(fontsize=12, wrap=True)

    plt.tight_layout()
    plt.savefig('result/BQ2_compare.png')
    logger.log('Saved result to \"result/BQ2_compare.png\"')

    final = {
        'mse_converge': {
            "all_features": loss_converge_epoch(mse_all),
            "6_features": loss_converge_epoch(mse_6),
            "5_features": loss_converge_epoch(mse_5)
        },
        'final_mse': {
            "all_features": training_result(mse_all, mode='loss'),
            "6_features": training_result(mse_6, mode='loss'),
            "5_features": training_result(mse_5, mode='loss')
        }
    }
    final_df = pd.DataFrame(final)
    final_df.to_csv('result/BQ2_result.csv')
    logger.log('Saved result to \"result/BQ2_result.csv\"')

    # update hyperparameter
    result = min(final['final_mse'].keys(),
                 key=lambda x: final['final_mse'][x])
    if result == 'all_features':
        removed = []
    elif result == '6_features':
        removed = list(map(int, best_6.split(",")))
    elif result == '5_features':
        removed = list(map(int, best_5.split(",")))

    hyperparameters['input_shape'] = (len(columns) - len(removed), )
    hyperparameters['removed'] = removed
    write_dict_to_json(hyperparameters, args.params)
Пример #3
0
def compare():
    df = pd.DataFrame(histories['Q3'])
    val_mse = df[df.index == 'val_mse'].apply(
        lambda x: x.explode()).reset_index(drop=True)
    f, ax = plt.subplots(1, 1, figsize=(15, 5))
    val_mse.plot()
    plt.xlabel('epoch', fontsize=15)
    plt.ylabel('mean squared error', fontsize=15)
    plt.legend(loc='upper right', fontsize=12)
    plt.title('MSE for Different Models', fontsize=15, pad=20)
    plt.xlim(500, 10000)
    plt.ylim(0.0055, 0.0115)
    plt.xticks(fontsize=12)
    plt.yticks(fontsize=12)
    plt.tight_layout()
    plt.savefig('result/BQ3_compare.png')
    logger.log('Saved result to \"result/BQ3_compare.png\"')

    df.applymap(lambda x: training_result(x, mode='loss')).to_csv(
        'result/BQ3_compare.csv')
    logger.log('Saved result to \"result/BQ3_compare.csv\"')

    df.applymap(lambda x: loss_converge_epoch(x)).to_csv(
        'result/BQ3_converge_epoch.csv')
    logger.log('Saved result to \"result/BQ3_converge_epoch.csv\"')
Пример #4
0
def rfe(n_to_remove, removed=[]):
    if n_to_remove == 0:
        return

    logger.log('RFE %s' % (n_to_remove))
    results = dict()

    for i in range(X_train.shape[1]):
        if i in removed:
            continue

        logger.log('Removed %s' % (removed + [i]))

        model = tf.keras.Sequential([
            tf.keras.layers.InputLayer(input_shape=(X_train.shape[1] -
                                                    len(removed) - 1, )),
            tf.keras.layers.Dense(
                num_neurons,
                activation='relu',
                kernel_regularizer=tf.keras.regularizers.l2(l=beta)),
            tf.keras.layers.Dense(
                1,
                activation='sigmoid',
                kernel_regularizer=tf.keras.regularizers.l2(l=beta))
        ])

        model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=alpha),
                      loss=tf.keras.losses.MeanSquaredError(),
                      metrics=['mse'])

        with tqdm(total=epochs) as pbar:
            update = tf.keras.callbacks.LambdaCallback(
                on_epoch_end=lambda batch, logs: pbar.update(1))

            hist = model.fit(x=remove_features(X_train, removed + [i]),
                             y=y_train,
                             batch_size=batch_size,
                             epochs=epochs,
                             validation_data=(remove_features(
                                 X_test, removed + [i]), y_test),
                             verbose=0,
                             callbacks=[update])

        key = ",".join(map(str, removed + [i]))
        histories['Q2'][key] = {
            'mse': hist.history['mse'],
            'val_mse': hist.history['val_mse'],
            'loss': hist.history['loss'],
            'val_loss': hist.history['val_loss']
        }
        results[i] = training_result(hist.history['val_mse'], mode='loss')

    i = min(results.keys(), key=lambda x: results[x])
    logger.log('Feature %s is removed!' % (i))
    rfe(n_to_remove - 1, removed + [i])
Пример #5
0
def Q2(x_train, y_train, x_test, y_test):
    logger.log('Start Q2')
    num_ch_c1 = [10, 30, 50, 70, 90]
    num_ch_c2 = [20, 40, 60, 80, 100]

    epochs = EPOCHS
    batch_size = BATCH_SIZE
    learning_rate = LR
    use_dropout = False

    histories = {c1: {c2: None for c2 in num_ch_c2} for c1 in num_ch_c1}

    for c1 in num_ch_c1:
        for c2 in num_ch_c2:
            logger.log(f'Train C1={c1} C2={c2}')
            model = make_model(c1, c2, use_dropout)
            loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
            optimizer_ = 'SGD'
            optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
            model.compile(optimizer=optimizer, loss=loss, metrics='accuracy')

            with tqdm(total=epochs) as pbar:
                update = tf.keras.callbacks.LambdaCallback(
                    on_epoch_end=lambda batch, logs: pbar.update(1)
                )
                logs = tf.keras.callbacks.CSVLogger(
                    f'./logs/{c1}_{c2}_{optimizer_}_no_dropout', separator=',', append=False
                )
                history = model.fit(
                    x_train,
                    y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    shuffle=True,
                    validation_data=(x_test, y_test),
                    verbose=0,
                    callbacks=[update, logs]
                )
            model.save(f'./models/{c1}_{c2}_{optimizer_}_no_dropout.h5')
            histories[c1][c2] = history.history['val_accuracy']

    hist_df = pd.DataFrame(histories)
    hist_df = hist_df.applymap(lambda x: training_result(x, mode='acc'))
    hist_df.to_csv('./results/c1c2_test_accuracy.csv', header=True, index=True)
    opt_c1 = hist_df.max().idxmax()
    opt_c2 = hist_df[opt_c1].idxmax()
    logger.log(f'Best C1={opt_c1} C2={opt_c2}')
    logger.end('Done Q2')

    return opt_c1, opt_c2
Пример #6
0
                    required=True)
args = parser.parse_args()

logger = Logger()
logger.log('Starting q2_analyze.py...')

logger.log('Loading \"' + args.data[0] + '\" and \"' + args.data[1] + '\"')
hist1 = read_json_to_dict(args.data[0])
hist2 = read_json_to_dict(args.data[1])

# Hyperparameters
hyperparameters = read_json_to_dict(args.params)

# Setup data to be used
df = pd.DataFrame(
    hist2['Q2']).applymap(lambda x: training_result(x, mode='loss'))
columns = hist2['columns']


def decode_col_name(col):
    idx = list(map(int, col.split(",")))
    removed_col = [columns[i] for i in idx]
    remaining_cols = list(set(columns) - set(removed_col))
    return ", ".join(remaining_cols)


val_mse = df[df.index == 'val_mse'].squeeze()


def corr_coef():
    logger.log('Plotting correlation coefficient of features...')
Пример #7
0
from utils.dict_json import read_json_to_dict
from utils.acc_loss import acc_converge_epoch, loss_converge_epoch, training_result

parser = argparse.ArgumentParser()
parser.add_argument('-D', '--data', help='Path to result json file', required=True)
args = parser.parse_args()

logger = Logger()
logger.log('Starting q2_analyze.py...')

logger.log('Loading \"' + args.data + '\"')
histories = read_json_to_dict(args.data)
batch_epoch = pd.DataFrame(histories['Q2']['cv']['accuracy'])
time_per_epoch = pd.DataFrame(histories['Q2']['cv']['time'])
epoch_to_converge = batch_epoch.applymap(lambda x: acc_converge_epoch(x))
cv_accuracy = batch_epoch.applymap(lambda x: training_result(x))

total_time_to_converge = (epoch_to_converge * time_per_epoch).mean()
print(epoch_to_converge)
epoch_to_converge = epoch_to_converge.mean()
print(cv_accuracy)
cv_accuracy = cv_accuracy.mean()
batch_size = histories['Q2']['optimal']['optimal_batch']

def q2a1():
    logger.log('Analyzing Q2(a1)...')
    
    f, ax = plt.subplots(5, 1, figsize=(10, 25))
    for i, batch in enumerate(batch_epoch.columns, start=0):
        ax[i].plot(batch_epoch[batch][0][1:], label='Fold 1')
        ax[i].plot(batch_epoch[batch][1][1:], label='Fold 2')
Пример #8
0
def cross_validation():
    input_shape = hyperparameters['input_shape']
    num_classes = hyperparameters['num_classes']
    epochs = 500
    batch_sizes = [4, 8, 16, 32, 64]
    num_neurons = hyperparameters['num_neurons']
    alpha = hyperparameters['alpha']

    histories['Q2'] = {
        'cv': {
            'accuracy': {batch: [] for batch in batch_sizes},
            'time': {batch: [] for batch in batch_sizes},
        },
        'optimal': dict()
    }

    logger.log('Starting cross validation...')
    X, y = dataset.get_train()
    X_test, y_test = dataset.get_test()

    for fold, (train_index, valid_index) in enumerate(dataset.get_kfold(), start=1):
        X_train, X_valid = X[train_index], X[valid_index]
        y_train, y_valid = y[train_index], y[valid_index]
        
        for batch in batch_sizes:
            logger.log('Fold %s Batch Size %s' % (fold, batch))
            with tqdm(total=epochs, desc='Fold %s Batch Size %s' % (fold, batch)) as pbar:
                update = tf.keras.callbacks.LambdaCallback(
                    on_epoch_end=lambda batch, logs: pbar.update(1)
                )
            
                model = tf.keras.Sequential([
                    tf.keras.layers.InputLayer(input_shape=input_shape),
                    tf.keras.layers.Dense(num_neurons, activation='relu'),
                    tf.keras.layers.Dense(num_classes, activation='softmax')
                ])

                model.compile(
                    optimizer=tf.keras.optimizers.SGD(learning_rate=alpha),
                    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                    metrics=['accuracy']
                )

                start = time.time()
                history = model.fit(
                    x=X_train,
                    y=y_train,
                    batch_size=batch,
                    epochs=epochs,
                    validation_data=(X_valid, y_valid),
                    verbose=0,
                    callbacks=[update]
                )
                end = time.time()
            
                histories['Q2']['cv']['accuracy'][batch].append(history.history['val_accuracy'])
                histories['Q2']['cv']['time'][batch].append((end - start) / epochs)
    

    batch_epoch = pd.DataFrame(histories['Q2']['cv']['accuracy'])
    time_per_epoch = pd.DataFrame(histories['Q2']['cv']['time'])
    epoch_to_converge = batch_epoch.applymap(lambda x: acc_converge_epoch(x))
    cv_accuracy = batch_epoch.applymap(lambda x: training_result(x)).mean()
    total_time_to_converge = (epoch_to_converge * time_per_epoch).mean()

    def standardize(arr):
        return (arr - arr.mean()) / arr.std()

    def deciding_factor(total_time, acc):
        time_score = np.exp(-standardize(total_time))
        acc_score = np.exp(standardize(acc))
        print(acc_score * time_score)
        return int((acc_score * time_score).idxmax())

    hyperparameters['batch_size'] = deciding_factor(total_time_to_converge, cv_accuracy)
    logger.log('Optimal batch size: %s' % hyperparameters['batch_size'])
    write_dict_to_json(hyperparameters, args.params)

    logger.log('Done cross validation')
Пример #9
0
def cross_validation():
    input_shape = hyperparameters['input_shape']
    num_classes = hyperparameters['num_classes']
    epochs = 500
    batch_size = hyperparameters['batch_size']
    num_neurons = hyperparameters['num_neurons']
    alpha = hyperparameters['alpha']
    beta = [0, 1e-3, 1e-6, 1e-9, 1e-12]

    histories['Q4'] = {
        'cv': {
            'accuracy': {b: [] for b in beta},
            'time': {b: [] for b in beta},
        },
        'optimal': dict()
    }

    logger.log('Starting cross validation...')
    X, y = dataset.get_train()
    X_test, y_test = dataset.get_test()

    for fold, (train_index, valid_index) in enumerate(dataset.get_kfold(), start=1):
        X_train, X_valid = X[train_index], X[valid_index]
        y_train, y_valid = y[train_index], y[valid_index]
        
        for b in beta:
            logger.log('Fold %s Decay %s' % (fold, b))
            with tqdm(total=epochs, desc='Fold %s Decay %s' % (fold, b)) as pbar:
                update = tf.keras.callbacks.LambdaCallback(
                    on_epoch_end=lambda batch, logs: pbar.update(1)
                )
            
                model = tf.keras.Sequential([
                    tf.keras.layers.InputLayer(input_shape=input_shape),
                    tf.keras.layers.Dense(num_neurons, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(l=b)),
                    tf.keras.layers.Dense(num_classes, activation='softmax', kernel_regularizer=tf.keras.regularizers.l2(l=b))
                ])

                model.compile(
                    optimizer=tf.keras.optimizers.SGD(learning_rate=alpha),
                    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                    metrics=['accuracy']
                )

                start = time.time()
                history = model.fit(
                    x=X_train,
                    y=y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(X_valid, y_valid),
                    verbose=0,
                    callbacks=[update]
                )
                end = time.time()
            
                histories['Q4']['cv']['accuracy'][b].append(history.history['val_accuracy'])
                histories['Q4']['cv']['time'][b].append((end - start) / epochs)
    
    beta_epoch = pd.DataFrame(histories['Q4']['cv']['accuracy'])
    cv_accuracy = beta_epoch.applymap(lambda x: training_result(x)).mean()
    hyperparameters['beta'] = float(cv_accuracy.idxmax())
    write_dict_to_json(hyperparameters, args.params)
    logger.log('Done cross validation')
Пример #10
0
def rfe(n_features, features_left=[]):
    if len(features_left) <= n_features:
        return

    logger.log('RFE %s' % (len(features_left) - 1))
    results = dict()

    for i, col in enumerate(features_left):

        columns = list(features_left[0:i]) + list(
            features_left[i + 1:len(features_left)])
        dataset = PreprocessDataset(df=df,
                                    feature_columns=columns,
                                    label_column=df.columns[-1],
                                    test_ratio=0.3,
                                    fold=5)
        X_train, y_train = dataset.get_train()
        X_test, y_test = dataset.get_test()

        logger.log('Features %s' % (", ".join(columns)))

        model = tf.keras.Sequential([
            tf.keras.layers.InputLayer(input_shape=(len(columns), )),
            tf.keras.layers.Dense(
                num_neurons,
                activation='relu',
                kernel_regularizer=tf.keras.regularizers.l2(l=beta)),
            tf.keras.layers.Dense(
                1,
                activation='sigmoid',
                kernel_regularizer=tf.keras.regularizers.l2(l=beta))
        ])

        model.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=alpha),
                      loss=tf.keras.losses.MeanSquaredError(),
                      metrics=['mse'])

        with tqdm(total=epochs) as pbar:
            update = tf.keras.callbacks.LambdaCallback(
                on_epoch_end=lambda batch, logs: pbar.update(1))

            hist = model.fit(x=X_train,
                             y=y_train,
                             batch_size=batch_size,
                             epochs=epochs,
                             validation_data=(X_test, y_test),
                             verbose=0,
                             callbacks=[update])

        key = ", ".join(columns)
        histories['Q2'][key] = {
            'mse': hist.history['mse'],
            'val_mse': hist.history['val_mse'],
            'loss': hist.history['loss'],
            'val_loss': hist.history['val_loss']
        }
        results[key] = training_result(hist.history['val_mse'], mode='loss')

    cols = min(results.keys(), key=lambda x: results[x])
    logger.log('Features left %s' % (cols))
    rfe(n_features, cols.split(", "))
Пример #11
0
def Q5():
    models = {
        'char_cnn_Adam_no_dropout': 'Char CNN No Dropout',
        'char_gru_Adam_no_dropout': 'Char GRU No Dropout',
        'word_cnn_Adam_no_dropout': 'Word CNN No Dropout',
        'word_gru_Adam_no_dropout': 'Word GRU No Dropout',
        'char_cnn_Adam_dropout': 'Char CNN Dropout',
        'char_gru_Adam_dropout': 'Char GRU Dropout',
        'word_cnn_Adam_dropout': 'Word CNN Dropout',
        'word_gru_Adam_dropout': 'Word GRU Dropout'
    }

    time = pd.read_csv('./results/time.csv', index_col=0)
    time = time.div(250)

    f, ax = plt.subplots()
    time.plot(kind='barh', ax=ax)
    ax.set_title('Model Timing')
    ax.set_xlabel('time per epoch(s)')
    ax.set_ylabel('model')
    x_extra = (max(time.max()) - min(time.min())) * 0.2
    ax.set_xlim((max(0, min(time.min()) - x_extra), max(time.max()) + x_extra))
    for p in ax.patches:
        ax.annotate("{:.2f}".format(p.get_width()),
                    (p.get_width() + 0.1, p.get_y() + p.get_height() / 2),
                    va='center')
    plt.tight_layout()
    plt.savefig(f'./results/Q5_time.png')
    plt.close()

    results = {
        model: pd.read_csv('./logs/' + model)
        for model in models.keys()
    }
    acc_df = pd.DataFrame({
        'No Dropout': {
            models[model].replace(' No Dropout', ''):
            results[model]['val_accuracy']
            for model in models if 'no_dropout' in model
        },
        'Dropout': {
            models[model].replace(' Dropout', ''):
            results[model]['val_accuracy']
            for model in models
            if 'dropout' in model and 'no_dropout' not in model
        }
    })
    loss_df = pd.DataFrame({
        'No Dropout': {
            models[model].replace(' No Dropout', ''):
            results[model]['val_loss']
            for model in models if 'no_dropout' in model
        },
        'Dropout': {
            models[model].replace(' Dropout', ''): results[model]['val_loss']
            for model in models
            if 'dropout' in model and 'no_dropout' not in model
        }
    })

    acc_df = acc_df.applymap(lambda x: training_result(x, mode='acc'))
    f, ax = plt.subplots()
    acc_df.plot(kind='barh', ax=ax)
    ax.set_title('Model Test Accuracies Comparison')
    ax.set_xlabel('accuracy')
    ax.set_ylabel('model')
    x_extra = (max(acc_df.max()) - min(acc_df.min())) * 0.2
    ax.set_xlim(
        (max(0,
             min(acc_df.min()) - x_extra), max(acc_df.max()) + x_extra))
    for p in ax.patches:
        ax.annotate("{:.5f}".format(p.get_width()),
                    (p.get_width() + 0.005, p.get_y() + p.get_height() / 2),
                    va='center')
    plt.tight_layout()
    plt.savefig(f'./results/Q5_accuracy_comparison.png')
    plt.close()

    loss_df = loss_df.applymap(lambda x: training_result(x, mode='loss'))
    f, ax = plt.subplots()
    loss_df.plot(kind='barh', ax=ax)
    ax.set_title('Model Test Loss Comparison')
    ax.set_xlabel('loss')
    ax.set_ylabel('model')
    x_extra = (max(loss_df.max()) - min(loss_df.min())) * 0.2
    ax.set_xlim(
        (max(0,
             min(loss_df.min()) - x_extra), max(loss_df.max()) + x_extra))
    for p in ax.patches:
        ax.annotate("{:.5f}".format(p.get_width()),
                    (p.get_width() + 0.05, p.get_y() + p.get_height() / 2),
                    va='center')
    plt.tight_layout()
    plt.savefig(f'./results/Q5_loss_comparison.png')
    plt.close()

    acc_results = pd.DataFrame(
        {models[model]: results[model]['val_accuracy']
         for model in results})
    acc_results.plot()
    plt.title('Model Test Accuracies')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'./results/Q5_accuracy_epoch.png')
    plt.close()

    loss_results = pd.DataFrame(
        {models[model]: results[model]['val_loss']
         for model in results})
    loss_results.plot()
    plt.title('Model Test Loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'./results/Q5_loss_epoch.png')
    plt.close()
Пример #12
0
def Q6():
    models = {
        'char_gru_Adam_no_dropout': 'Char GRU',
        'char_vanilla_Adam': 'Char Vanilla',
        'char_lstm_Adam': 'Char LSTM',
        'char_gru_2_layers_Adam': 'Char 2-Layer GRU',
        'char_gru_Adam_gradient_clipping': 'Char GRU with\nGradient Clipping',
        'word_gru_Adam_no_dropout': 'Word GRU',
        'word_vanilla_Adam': 'Word Vanilla',
        'word_lstm_Adam': 'Word LSTM',
        'word_gru_2_layers_Adam': 'Word 2-Layer GRU',
        'word_gru_Adam_gradient_clipping': 'Word GRU with\nGradient Clipping'
    }
    for model in models:
        LossAccPlot('./logs/' + model, models[model])

    results = {
        model: pd.read_csv('./logs/' + model)
        for model in models.keys()
    }
    acc_df = pd.DataFrame({
        'Char': {
            models[model].replace('Char ', ''): results[model]['val_accuracy']
            for model in models if 'char' in model
        },
        'Word': {
            models[model].replace('Word ', ''): results[model]['val_accuracy']
            for model in models if 'word' in model
        }
    })
    loss_df = pd.DataFrame({
        'Char': {
            models[model].replace('Char ', ''): results[model]['val_loss']
            for model in models if 'char' in model
        },
        'Word': {
            models[model].replace('Word ', ''): results[model]['val_loss']
            for model in models if 'word' in model
        }
    })

    acc_df = acc_df.applymap(lambda x: training_result(x, mode='acc'))
    f, ax = plt.subplots(figsize=(7.4, 5.8))
    acc_df.plot(kind='barh', ax=ax)
    ax.set_title('Model Test Accuracies Comparison')
    ax.set_xlabel('accuracy')
    ax.set_ylabel('model')
    x_extra = (max(acc_df.max()) - min(acc_df.min())) * 0.2
    ax.set_xlim(
        (max(0,
             min(acc_df.min()) - x_extra), max(acc_df.max()) + x_extra))
    for p in ax.patches:
        ax.annotate("{:.5f}".format(p.get_width()),
                    (p.get_width() + 0.005, p.get_y() + p.get_height() / 2),
                    va='center')
    ax.legend(loc='right', bbox_to_anchor=(1.0, 0.3))
    plt.tight_layout()
    plt.savefig(f'./results/Q6_accuracy_comparison.png')
    plt.close()

    loss_df = loss_df.applymap(lambda x: training_result(x, mode='loss'))
    f, ax = plt.subplots(figsize=(7.4, 5.8))
    loss_df.plot(kind='barh', ax=ax)
    ax.set_title('Model Test Loss Comparison')
    ax.set_xlabel('loss')
    ax.set_ylabel('model')
    x_extra = (max(loss_df.max()) - min(loss_df.min())) * 0.2
    ax.set_xlim(
        (max(0,
             min(loss_df.min()) - x_extra), max(loss_df.max()) + x_extra))
    for p in ax.patches:
        ax.annotate("{:.5f}".format(p.get_width()),
                    (p.get_width() + 0.05, p.get_y() + p.get_height() / 2),
                    va='center')
    plt.tight_layout()
    plt.savefig(f'./results/Q6_loss_comparison.png')
    plt.close()

    char_acc_results = pd.DataFrame({
        models[model].replace('Char ', ''): results[model]['val_accuracy']
        for model in results if 'char' in model
    })
    char_acc_results.plot()
    plt.title('Char Model Test Accuracies')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'./results/Q6_char_accuracy_epoch.png')
    plt.close()

    char_loss_results = pd.DataFrame({
        models[model].replace('Char ', ''): results[model]['val_loss']
        for model in results if 'char' in model
    })
    char_loss_results.plot()
    plt.title('Char Model Test Loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'./results/Q6_char_loss_epoch.png')
    plt.close()

    word_acc_results = pd.DataFrame({
        models[model].replace('Word ', ''): results[model]['val_accuracy']
        for model in results if 'word' in model
    })
    word_acc_results.plot()
    plt.title('Word Model Test Accuracies')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'./results/Q6_word_accuracy_epoch.png')
    plt.close()

    word_loss_results = pd.DataFrame({
        models[model].replace('Word ', ''): results[model]['val_loss']
        for model in results if 'word' in model
    })
    word_loss_results.plot()
    plt.title('Word Model Test Loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend()
    plt.tight_layout()
    plt.savefig(f'./results/Q6_word_loss_epoch.png')
    plt.close()
Пример #13
0
import matplotlib.pyplot as plt
from utils.logger import Logger
from utils.dict_json import read_json_to_dict
from utils.acc_loss import acc_converge_epoch, loss_converge_epoch, smooth_curve, training_result

parser = argparse.ArgumentParser()
parser.add_argument('-D', '--data', help='Path to result json file', required=True)
args = parser.parse_args()

logger = Logger()
logger.log('Starting q3_analyze.py...')

logger.log('Loading \"' + args.data + '\"')
histories = read_json_to_dict(args.data)
neuron_epoch = pd.DataFrame(histories['Q3']['cv']['accuracy'])
cv_accuracy = neuron_epoch.applymap(lambda x: training_result(x))
print(cv_accuracy)
cv_accuracy = cv_accuracy.mean()
num_neurons = histories['Q3']['optimal']['optimal_num']

def q3a1():
    logger.log('Analyzing Q3(a1)...')
    
    f, ax = plt.subplots(5, 1, figsize=(10, 25))
    for i, num in enumerate(neuron_epoch.columns, start=0):
        ax[i].plot(neuron_epoch[num][0][1:], label='Fold 1')
        ax[i].plot(neuron_epoch[num][1][1:], label='Fold 2')
        ax[i].plot(neuron_epoch[num][2][1:], label='Fold 3')
        ax[i].plot(neuron_epoch[num][3][1:], label='Fold 4')
        ax[i].plot(neuron_epoch[num][4][1:], label='Fold 5')