Пример #1
0
def main():
    start_time = time()
    print("---------- main1 --------------")
    f0 = gzip.open('/home/luca/data/mnist/train-images-idx3-ubyte.gz', 'r')
    f1 = gzip.open('/home/luca/data/mnist/t10k-images-idx3-ubyte.gz', 'r')
    l0 = gzip.open('/home/luca/data/mnist/train-labels-idx1-ubyte.gz', 'r')
    l1 = gzip.open('/home/luca/data/mnist/t10k-labels-idx1-ubyte.gz', 'r')
    X_train = np.frombuffer(f0.read(), dtype=np.uint8,
                            offset=16).reshape(-1, 28 * 28)
    X_test = np.frombuffer(f1.read(), dtype=np.uint8,
                           offset=16).reshape(-1, 28 * 28)
    y_train = np.frombuffer(l0.read(), dtype=np.uint8, offset=8)
    y_test = np.frombuffer(l1.read(), dtype=np.uint8, offset=8)

    y_train = one_hot_encoding(y_train)
    y_label = one_hot_encoding(y_test)
    mean = np.mean(X_train)
    std = np.std(X_train)
    X_train, X_test = X_train - mean, X_test - mean
    X_train, X_test = X_train / std, X_test / std

    model = neural_network((89, 'TanH'), (10, 'Sigmoid'),
                           input_nodes=784,
                           seed=20190119)
    model = fit(x_train=X_train,
                y_train=y_train,
                x_test=X_test,
                y_test=y_label,
                model=model,
                optimizer=sgd(epochs=50,
                              eta=0.35,
                              etaN=0.15,
                              decay_type='exponential'),
                batch_size=60,
                eval_every=5,
                early_stop=True,
                seed=20190119)

    validate_accuracy(x_test=X_test, y_test=y_test, model=model)

    # print(model[0][0][0].shape)
    # print(np.sum(model[0][0][0]))
    # print(model[0][0][1].shape)
    # print(np.sum(model[0][0][1]))
    #
    # print(model[1][0][0].shape)
    # print(np.sum(model[1][0][0]))
    # print(model[1][0][1].shape)
    # print(np.sum(model[1][0][1]))
    # print()
    print("--- %s seconds ---" % (time() - start_time))
Пример #2
0
def main():
    start_time = time()
    print("---------- main5 --------------")
    f0 = gzip.open('/home/luca/data/mnist/train-images-idx3-ubyte.gz', 'r')
    f1 = gzip.open('/home/luca/data/mnist/t10k-images-idx3-ubyte.gz', 'r')
    l0 = gzip.open('/home/luca/data/mnist/train-labels-idx1-ubyte.gz', 'r')
    l1 = gzip.open('/home/luca/data/mnist/t10k-labels-idx1-ubyte.gz', 'r')
    X_train = np.frombuffer(f0.read(), dtype=np.uint8,
                            offset=16).reshape(-1, 28 * 28)
    X_test = np.frombuffer(f1.read(), dtype=np.uint8,
                           offset=16).reshape(-1, 28 * 28)
    y_train = np.frombuffer(l0.read(), dtype=np.uint8, offset=8)
    y_test = np.frombuffer(l1.read(), dtype=np.uint8, offset=8)

    y_train = one_hot_encoding(y_train)
    y_label = one_hot_encoding(y_test)
    mean = np.mean(X_train)
    std = np.std(X_train)
    X_train, X_test = X_train - mean, X_test - mean
    X_train, X_test = X_train / std, X_test / std

    model = neural_network((89, 'TanH'), (10, 'Softmax'),
                           input_nodes=784,
                           seed=20190119,
                           weight_init='scaled')
    model = fit(x_train=X_train,
                y_train=y_train,
                x_test=X_test,
                y_test=y_label,
                model=model,
                optimizer=sgd(epochs=50,
                              eta=0.15,
                              etaN=0.05,
                              decay_type='exponential',
                              beta=0.85),
                batch_size=60,
                eval_every=5,
                early_stop=True,
                loss_function='cross-entropy',
                seed=20190119,
                dropout=0.8)

    validate_accuracy(x_test=X_test, y_test=y_test, model=model)

    print("--- %s seconds ---" % (time() - start_time))
        print(train_set_x.shape)
        print(test_set_x_orig.shape)
        test_set_x = test_set_x_orig
        print(test_set_x.shape)
        num_px = train_set_x_orig.shape[1]

        X = train_set_x / 255
        Y = train_set_y
        X_test = test_set_x / 255
        Y_test = test_set_y

        print(Y)
        print(Y_test)
        print("Y_test.shape : " + str(Y_test.shape))
        #print("X_test.shape : " + str(X_test.shape))
        dict = one_hot_encoding(dict={"Y": Y, "Y_test": Y_test})
        Y = dict["Y"]
        Y_test = dict["Y_test"]
        print(Y)
        print(Y_test)

        print("Y.shape : " + str(Y.shape))
        print("X.shape : " + str(X.shape))
        print("Y_test.shape : " + str(Y_test.shape))
        print("X_test.shape : " + str(X_test.shape))

    elif dataset_option == "S":
        train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset_SIGNS(
        )
        train_set_x = train_set_x_orig
        print(train_set_x.shape)
        X_test = test_set_x / 255
        Y_test = test_set_y
        print(Y_test)

        # Normalize Inputs
        # Normailze Mean
        X -= np.mean(X, axis=0)
        X_test -= np.mean(X_test, axis=0)

        # Normalize Variance
        X /= np.var(X, axis=0)
        X_test /= np.var(X_test, axis=0)

        # One Hot Encoding
        dict = {'Y': Y, 'Y_test': Y_test}
        dict = one_hot_encoding(dict)
        Y = dict['Y']
        Y_test = dict["Y_test"]
        del dict
        print(Y)

        print("Y.shape : " + str(Y.shape))
        print("X.shape : " + str(X.shape))
        print("Y_test.shape : " + str(Y_test.shape))
        print("X_test.shape : " + str(X_test.shape))

    elif dataset_option == "N":

        #test = sio.loadmat('datasets/Digit_Classification-BigDataset.mat')
        #X = test['X'][:]
        #Y = test['Y'][:]
Пример #5
0
tot_bedrooms_clear = tot_bedrooms[
    ~np.isnan(tot_bedrooms)]  # removing NaN values

median = np.median(
    tot_bedrooms_clear)  # computing the median of the distribution
tot_bedrooms.fillna(median,
                    inplace=True)  # replacing NaN values with median value
dataset[
    'total_bedrooms'] = tot_bedrooms  # assigning fixed feature column to dataset

# second problem: ocean_proximity has categorical values --> one-hot encoding
drop = True

if drop:
    dataset_drop = f.one_hot_encoding(dataset, drop=drop)

    # put the label column at the end of the dataset
    label = dataset_drop['median_house_value']
    dataset_drop.pop('median_house_value')
    dataset_drop['median_house_value'] = label

    # show the correlation between variables
    plt.figure(figsize=(15, 8))
    corr = dataset_drop.corr()
    mask = np.zeros_like(corr, dtype=np.bool)
    mask[np.triu_indices_from(mask)] = True
    sb.heatmap(abs(corr),
               linewidths=.5,
               annot=True,
               mask=mask,