def create_model(model_num,model_type='sda',data_norm_type='area',overwrite=False,layer_sizes = [50,50,50],
                                      corruption_levels=[0.3,0.3,0.3],learning_rate=0.01):
    specs = get_spectrograms(norm_type=data_norm_type)
    
    output_folder = 'lung-sound-deep-learning-models/{}_{}'.format(model_type,model_num)
    if isdir(output_folder) and not overwrite: # Throw error is model already exists
        print('Model already exists:' + output_folder)
        return

    if model_type == 'sda':
        specs = np.expand_dims(specs,1)
        autoencoder.train_autoencoder(specs,output_folder,batch_size=20,learning_rate=learning_rate,num_epochs=10000,
                                      momentum_flag=True,momentum=0.9,layer_sizes = layer_sizes,
                                      corruption_levels=corruption_levels,nonlinearity='sigmoid')
    elif model_type == 'conv':
        specs = np.expand_dims(specs,1)
        conv_autoencoder.train_conv_autoencoder(specs,output_folder,(20,49),learning_rate=learning_rate)                              
    else:
        print('Model type not implemented:' + model_type)
        return
Пример #2
0
def train_mnist_sda_model():
    train_set_x = load_data()
    train_set_x = np.reshape(train_set_x, (train_set_x.shape[0], 28, 28))
    train_set_x = np.expand_dims(train_set_x, 1)
    output_folder = 'lung-sound-deep-learning-models/mnist/sda0'
    autoencoder.train_autoencoder(train_set_x, output_folder)
Пример #3
0
def trainStages():
    for stage in STAGES:
        with open("data/training/stages/stage%s" % stage, "rb") as file:
            stageFrames = pickle.load(file)
        autoencoder.train_autoencoder(stageFrames, stage)
Пример #4
0
############# Helper functions


path_to_model = './models/autoencoder'
x = np.array([example.flatten() for example in allData])
x, y = unison_shuffle(x, stringLabels)

## Split into test/train
num_training_samples = int(.7*len(x))
x_train = x[:num_training_samples]
y_train = y[:num_training_samples]
x_test = x[num_training_samples:]
y_test = y[num_training_samples:]

train_autoencoder(x_train, x_test, path_to_model)


## Calculate encodings
model = keras.models.load_model(path_to_model+'.h5')
embedding_fn = keras.backend.function([model.layers[0].input], [model.layers[2].output])
all_sample_encodings = embedding_fn([x, 0])[0]
train_samples_encodings = embedding_fn([x_train, 0])[0]
test_sample_encodings = embedding_fn([x_test, 0])[0]
letter_encodings = compute_letter_encodings(train_samples_encodings, y_train)

## Calculate train/test accuracy
train_accuracy = get_overall_accuracy(letter_encodings, train_samples_encodings, y_train)
test_accuracy = get_overall_accuracy(letter_encodings, test_sample_encodings, y_test)

print ''
Пример #5
0
import numpy as np
import autoencoder

pd.set_option('display.max_columns', None)

train = pd.read_csv('./train.csv')
test = pd.read_csv('./test.csv')

all_cols = list(train.columns.values)
feat_cols = [x for x in all_cols if x not in ['id', 'target']]

x_train = train[feat_cols]
x_test = test[feat_cols]
y_train = train['target']

encoder = autoencoder.train_autoencoder(x_test, x_train)
x_test_encode = autoencoder.encode_features(x_test, encoder)
x_train_encode = autoencoder.encode_features(x_train, encoder)

x_train = pd.concat([x_train, x_train_encode], axis=1)
x_test = pd.concat([x_test, x_test_encode], axis=1)

params = {
    'max_depth': 7,
    'eta': 0.05,
    'silent': 1,
    'objective': 'binary:logistic',
    'alpha': 4,
    'lambda': 10,
    'min_child_weight': 1,
    'gamma': 2,
Пример #6
0
def preprocess(X,
               y,
               X_val,
               test_data,
               verbose=True,
               scale=True,
               autoencoder=True,
               qda=True,
               knn=False,
               xgb=False):
    """Preprocess the data by adding features and scaling it.

    For each method, we train the model on the training data using the
    corresponding labels, then apply the same transformation to
    validation and test data.

    Args:
        X (numpy ndarray): Training data
        y (numpy ndarray): Training labels
        X_val (numpy ndarray): Validation data
        test_data (numpy ndarray): Test data for submission
        verbose (bool): log level
        scale (bool): scale the data
        autoencoder (bool): use autoencoder feature
        qda (bool): use Quadratic Discriminant Analysis feature
        knn (bool): use k-nearest neighbours feature
        xgb (bool): use XGBoost feature

    Returns:
        The dataset appropriately transformed by the selected methods.
    """
    if autoencoder:
        if verbose:
            print("## Autoencoder")
            print("### Train...", end=" ", flush=True)
            ae = train_autoencoder(X, size=32, epochs=20, verbose=1)
        else:
            ae = train_autoencoder(X, size=32, epochs=20, verbose=0)
        if verbose:
            print("done.")
            print("### Evaluate...", end=" ", flush=True)
        ae.eval()
        X_ae = ae.layer1(Variable(torch.Tensor(X))).data
        X = np.c_[X, X_ae]
        X_val_ae = ae.layer1(Variable(torch.Tensor(X_val))).data
        X_val = np.c_[X_val, X_val_ae]
        test_data_ae = ae.layer1(Variable(torch.Tensor(test_data))).data
        test_data = np.c_[test_data, test_data_ae]
        if verbose:
            print("done.")

    if qda:
        if verbose:
            print("## Quadratic Discriminant Analysis...", end=" ", flush=True)
        qdaclf = QuadraticDiscriminantAnalysis(reg_param=0.02)
        qdaclf.fit(X, y)
        X_qda = qdaclf.predict_proba(X)
        X = np.c_[X, X_qda[:, 1]]
        X_val_qda = qdaclf.predict_proba(X_val)
        X_val = np.c_[X_val, X_val_qda[:, 1]]
        test_data_qda = qdaclf.predict_proba(test_data)
        test_data = np.c_[test_data, test_data_qda[:, 1]]
        if verbose:
            print("done.")

    if knn:
        print("## K-Nearest Neighbours...", end=" ", flush=True)
        knnclf = KNeighborsClassifier(n_neighbors=10, p=2, n_jobs=-1)
        knnclf.fit(X, y)
        X_knn = knnclf.predict_proba(X)
        X = np.c_[X, X_knn[:, 1]]
        X_val_knn = knnclf.predict_proba(X_val)
        X_val = np.c_[X_val, X_val_knn[:, 1]]
        test_data_knn = knnclf.predict_proba(test_data)
        test_data = np.c_[test_data, test_data_knn[:, 1]]
        print("done.")

    if xgb:
        print("## XGBoost...", end=" ", flush=True)
        xgbclf = XGBClassifier(max_depth=3,
                               learning_rate=0.1,
                               n_estimators=1000,
                               gamma=10,
                               min_child_weight=10,
                               objective='binary:logistic',
                               n_jobs=4)
        xgbclf.fit(X, y)
        X_xgb = xgbclf.predict_proba(X)
        X_val_xgb = xgbclf.predict_proba(X_val)
        X = np.c_[X, X_xgb[:, 1]]
        X_val = np.c_[X_val, X_val_xgb[:, 1]]
        test_data_xgb = xgbclf.predict_proba(test_data)
        test_data = np.c_[test_data, test_data_xgb[:, 1]]
        print("done.")

    if scale:
        if verbose:
            print("## Scaling...", end=" ", flush=True)
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        X_val = scaler.transform(X_val)
        test_data = scaler.transform(test_data)
        if verbose:
            print("done.")

    return X, y, X_val, test_data
Пример #7
0
                predictions.append(output_indices)

                explainer = BertExplainer(model)
                relevance, attentions, self_attentions = explainer.explain(
                    input_ids, segment_ids, input_mask,
                    [o["span"] for o in output_indices.values()])
                input_tensor = torch.stack([
                    r.sum(-1).unsqueeze(-1) *
                    explainer.layer_values_global["bert.encoder"]["input"][0]
                    for r in relevance
                ], 0)
                target_tensor = torch.stack(relevance, 0).sum(-1)
                loss = train_autoencoder(input_tensor,
                                         target_tensor,
                                         encoder1,
                                         decoder1,
                                         encoder_optimizer,
                                         decoder_optimizer,
                                         criterion,
                                         max_length=13)

                print('Encoder loss: %.4f' % loss)

            # For printing the results ####
            index = None
            for example in examples:
                if index != example.example_id:
                    pp.pprint(example.para_text)
                    index = example.example_id
                    print('\n')
                    print(
                        colored(