Пример #1
0
def show(args):
    params = vars(args)
    data_bunch = load_data(params["datadir"], 1, 1)
    if params["type"] == "train":
        indices = np.arange(params["start"],
                            data_bunch["train_data"].shape[0] - 1)
        show_data(data_bunch["train_data"][params["start"]:],
                  data_bunch["train_label"][params["start"]:], indices)
    else:
        indices = np.arange(params["start"],
                            data_bunch["test_data"].shape[0] - 1)
        show_data(data_bunch["test_data"][params["start"]:],
                  data_bunch["test_label"][params["start"]:], indices)
def main():
    # Basic usage: python train.py data_directory
    parser = argparse.ArgumentParser(description='Optional add-ons.')
    parser.add_argument('data_directory')
    parser.add_argument('--save_dir', action='store')
    parser.add_argument('--arch', action='store')
    parser.add_argument('--learning_rate', action='store', type=float)
    parser.add_argument('--hidden_units', action='store', type=int)
    parser.add_argument('--epochs', action='store', type=int)
    parser.add_argument('--gpu', action='store_true')
    args = parser.parse_args()

    # get category class mapping to names for flowers
    with open('cat_to_name.json', 'r') as f:
        cat_to_name = json.load(f)

    # capture train and validation data and transform
    train_data, valid_data = th.load_data(args.data_directory)
    trainloader, validloader, class_to_idx = th.transform_data(
        train_data, valid_data)

    # Use GPU if it's available
    device = torch.device('cuda') if (
        args.gpu | torch.cuda.is_available()) else torch.device('cpu')

    print('Building model...')
    # set default to vgg11
    args.arch = args.arch if args.arch else 'vgg11'
    model = models.__dict__[args.arch](pretrained=True)
    model.arch = args.arch

    # update model classifier to customize for our problem (102 category probability calcs)
    hidden_units = args.hidden_units if args.hidden_units else 4098
    model.classifier = th.update_classifier(model, hidden_units)

    print('Training model...')
    epochs = args.epochs if args.epochs else 5
    learning_rate = args.learning_rate if args.learning_rate else 0.001
    th.train_model(model, trainloader, validloader, epochs, learning_rate,
                   device)

    # add class_to_idx attribute to model
    model.class_to_idx = class_to_idx

    print('Saving model...')
    save_dir = args.save_dir if args.save_dir else 'saved_models'
    th.save_model(model, save_dir)

    print('Trained model saved!')
device = 'cpu'
if gpu:
    if torch.cuda.is_available():
        device = 'cuda'
        print("GPU mode enabled\n")
    else: 
        print("Device doesn't support CUDA\n")
        exit(0)
else:
    device = 'cpu'
    print("Further training will be done on cpu, switch to GPU\n")

print("Selected Device: ", device, "\n")

# load the datasets
data, loader =  load_data(data_dir)

# make model
model = make_model(arch, hidden_units, drop)
model.to(device)

# set optimizer state according to arch 
if model.name == 'vgg16' or model.name == 'densenet121':
    optimizer = optim.Adam(model.classifier.parameters(), lr=lr)
elif model.name == 'resnet50':
    optimizer = optim.Adam(model.fc.parameters(), lr=lr)
    
# train model, get new state of optimizer in order to save it in checkpoint
trained_model, optimizer = train_model(model, optimizer, epochs, device, data, loader)

# check accuracy
Пример #4
0
from tensorflow.keras import models
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, LSTM
from tensorflow.keras.utils import to_categorical, normalize
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l1, l2
import numpy as np
from train_helper import load_data, preprocess_data, plot_learncurve
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from time import process_time
import matplotlib.pyplot as plt

data_dir = '/home/kangle/dataset/PedBicCarData'
train_data, train_label, test_data, test_label = load_data(
    data_dir, 2, 2, 6, 1)
train_data, train_label, test_data, test_label = preprocess_data(
    train_data, train_label, test_data, test_label, 'rnn_a')

# train_data = np.squeeze(train_data)
# test_data = np.squeeze(test_data)
# train_data = np.transpose(train_data, (0,2,1))
# test_data = np.transpose(test_data, (0,2,1))

train_data, val_data, train_label, val_label = train_test_split(
    train_data, train_label, test_size=0.1, random_state=42)
print("Split training data into training and validation data:\n")
print("training data: %d" % train_data.shape[0])
print("validation data: %d" % val_data.shape[0])

model = models.Sequential()
model.add(LSTM(64, input_shape=(train_data.shape[1], train_data.shape[2])))
Пример #5
0
def train(args):
    params = vars(args)
    with open(params["config"], mode='r') as f:
        paramset = json.load(f)
    data_dir = paramset["root_dir"]

    if "sample_rate" in paramset:
        samp_rate_t = paramset["sample_rate"]["sample_rate_t"]
        samp_rate_f = paramset["sample_rate"]["sample_rate_f"]
    else:
        samp_rate_t = 1
        samp_rate_f = 1
    data_bunch = load_data(data_dir, samp_rate_t, samp_rate_f)
    test_data_visual = np.copy(data_bunch["test_data"])

    if "dimension_reduction" in paramset:
        dim_reducer = paramset["dimension_reduction"]["method"]
        num_components = paramset["dimension_reduction"]["n_components"]
        algo_map[dim_reducer]["parameters"]["n_components"] = num_components
        print('\nbegin dimensionality reduction process.')
        module = importlib.import_module(algo_map[dim_reducer]["module"])
        reducer = getattr(module, algo_map[dim_reducer]["function"])(
            **algo_map[dim_reducer]["parameters"])
        data_bunch = preprocess_data(data_bunch, dim_reducer)
        reducer.fit(data_bunch["train_data"])
        data_bunch["train_data"] = reducer.transform(data_bunch["train_data"])
        data_bunch["test_data"] = reducer.transform(data_bunch["test_data"])
        print('\nafter dimensionality reduction:')
        print(data_bunch["train_data"].shape)
        print(data_bunch["test_data"].shape)

    classify_method = paramset["classifier"]["method"]
    classify_parameter = paramset["classifier"]["parameter"]
    if classify_method in nn_type:
        data_bunch = preprocess_data(data_bunch, classify_method)
        classifier = load_model(paramset["classifier"]["model"],
                                data_bunch["train_data"].shape)
        history = nnet_fit(data_bunch, classifier,
                           paramset["classifier"]["parameter"])
        plot_learncurve(classify_method, history)
    else:
        module = importlib.import_module(algo_map[classify_method]["module"])
        classifier = getattr(
            module,
            algo_map[classify_method]["function"])(**classify_parameter)
        classifier.fit(data_bunch["train_data"], data_bunch["train_label"])
        plot_learncurve(classify_method,
                        estimator=classifier,
                        data=data_bunch["train_data"],
                        label=data_bunch["train_label"],
                        train_sizes=np.linspace(0.05, 0.2, 5))

    print('\npredict for test data.')
    test_pred = classifier.predict(data_bunch["test_data"])
    train_pred = classifier.predict(data_bunch["train_data"])
    train_label = data_bunch["train_label"]
    test_label = data_bunch["test_label"]
    if len(test_pred.shape) > 1:
        test_pred = np.argmax(test_pred, axis=1)
        train_pred = np.argmax(train_pred, axis=1)
        test_label = np.argmax(data_bunch["test_label"], axis=1)
        train_label = np.argmax(data_bunch["train_label"], axis=1)

    print('\nevaluate the prediction(train data).')
    train_conf = confusion_matrix(train_label, train_pred)
    train_precision = precision_score(train_label, train_pred, average=None)
    train_recall = recall_score(train_label, train_pred, average=None)
    print(train_conf)
    print(train_precision)
    print(train_recall)

    print('\nevaluate the prediction(test data).')
    test_conf = confusion_matrix(test_label, test_pred)
    test_precision = precision_score(test_label, test_pred, average=None)
    test_recall = recall_score(test_label, test_pred, average=None)
    print(test_conf)
    print(test_precision)
    print(test_recall)

    pred_result = {
        "train_conf": train_conf,
        "train_precision": train_precision,
        "train_recall": train_recall,
        "test_conf": test_conf,
        "test_precision": test_precision,
        "test_recall": test_recall
    }

    print('\ngenerate report file \t')
    if classify_method in nn_type:
        log_file = write_log(paramset, pred_result, classifier, history)
    else:
        log_file = write_log(paramset, pred_result)
    print(log_file)

    if params["show_misclassified"]:
        indices = [
            i for i in range(len(data_bunch["test_label"]))
            if test_pred[i] != data_bunch["test_label"][i]
        ]
        show_data(test_data_visual[indices], data_bunch["test_label"][indices],
                  indices, test_pred[indices])
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.manifold import LocallyLinearEmbedding
from sklearn.metrics import confusion_matrix, precision_score, recall_score
import matplotlib.pyplot as plt
import numpy as np
from train_helper import load_data
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV



data_dir = '/home/kangle/dataset/PedBicCarData'
train_data, train_label, test_data, test_label = load_data(data_dir, 4, 5)

print("Data sample distribution in training set: %d %d %d %d %d\n" % (np.count_nonzero(train_label==1),
      np.count_nonzero(train_label==2), np.count_nonzero(train_label==3),
      np.count_nonzero(train_label==4), np.count_nonzero(train_label==5)))
print("Data sample distribution in test set: %d %d %d %d %d\n" % (np.count_nonzero(test_label==1),
      np.count_nonzero(test_label==2), np.count_nonzero(test_label==3),
      np.count_nonzero(test_label==4), np.count_nonzero(test_label==5)))

scaler = StandardScaler()
train_data = scaler.fit_transform(train_data)
test_data = scaler.transform(test_data)

# print('\nbegin LLE process.')
# lle = LocallyLinearEmbedding(n_components=20,n_neighbors=10,n_jobs=-1)
# train_feature = lle.fit_transform(train_data)
# print(train_feature.shape)
#
Пример #7
0
#!/usr/bin/env python

import train_helper

n_epochs = 20
batch_size = 32

dataloaders, class_names = train_helper.load_data('../../data',
                                                  batch_size=batch_size)
model, criterion, optimizer, scheduler = train_helper.get_model(
    dataloaders, n_epochs)
model = train_helper.train_model(model,
                                 criterion,
                                 optimizer,
                                 scheduler,
                                 dataloaders,
                                 n_epochs=n_epochs)

train_helper.save_model(model, 'model.pt')
train_helper.save_mobile_model(model, 'model_mobile.pt')
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, AveragePooling2D, Input, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical, normalize
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.regularizers import l1, l2
from tensorflow.keras.initializers import RandomUniform, GlorotUniform, he_uniform
from tensorflow.keras.callbacks import LearningRateScheduler, ReduceLROnPlateau, EarlyStopping, TensorBoard, ModelCheckpoint, Callback
from tensorflow.keras.backend import get_value, set_value
from train_helper import load_data, preprocess_data, plot_learncurve
from sklearn.model_selection import train_test_split
from time import process_time
from lr_finder import LRFinder
from sgdr import SGDRScheduler
from cyclic_lr import CyclicLR

data_dir = '/home/kangle/dataset/PedBicCarData'
data_bunch = load_data(data_dir, 2, 2, 6, 1)
data_bunch = preprocess_data(data_bunch, 'cnn')

model = models.Sequential()
regularizer = None  #l2(1e-4)
initializer = he_uniform()
model.add(
    Conv2D(32, [3, 3],
           input_shape=data_bunch["train_data"].shape[1:],
           activation='relu',
           kernel_initializer=initializer,
           kernel_regularizer=regularizer,
           padding='same',
           name='conv_1'))
model.add(MaxPooling2D())
model.add(