Exemplo n.º 1
0
def test_from_data_frame(spark_context):
    features = np.ones((2, 10))
    labels = np.asarray([[2.0], [1.0]]).reshape((2,))

    data_frame = adapter.to_data_frame(
        spark_context, features, labels, categorical=False)

    x, y = adapter.from_data_frame(data_frame, categorical=False)
    assert features.shape == x.shape
    assert labels.shape == y.shape
Exemplo n.º 2
0
def test_from_data_frame_cat(spark_context):
    features = np.ones((2, 10))
    labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]])

    data_frame = adapter.to_data_frame(
        spark_context, features, labels, categorical=True)

    x, y = adapter.from_data_frame(data_frame, categorical=True, nb_classes=3)
    assert features.shape == x.shape
    assert labels.shape == y.shape
print(image_df.count())

# Scaling goes here

renamed_df = image_df.withColumn("features", col("imgnpa")).withColumn(
    "label", col("contaminated"))

# Split in to train/test
splits = renamed_df.randomSplit([0.8, 0.2], 314)
train_df = splits[0]
test_df = splits[1]

print(train_df.count())
print(test_df.count())

features_train, labels_train = from_data_frame(train_df, True, 2)
features_test, labels_test = from_data_frame(test_df, True, 2)

import numpy as np
from math import sqrt


# Change features to right shape
def raise_dim(l):
    x = np.array(l)
    square_dim = int(sqrt(len(x)))  # gave up, hard coded
    r = x[0::3].reshape(-1, 32)
    g = x[1::3].reshape(-1, 32)
    b = x[2::3].reshape(-1, 32)
    return np.array([r, g, b])
    # np.expand_dims(, axis=0)
Exemplo n.º 4
0
#scaler = StandardScaler(inputCol="vector_images", outputCol="scaled_features", withStd=True, withMean=False)
#fitted_scaler = scaler.fit(vector_length_df)
#scaled_df = fitted_scaler.transform(vector_length_df)

#import numpy as np
#from sklearn import preprocessing
#scale = F.udf(lambda x: x / 255.0, T.Vec)
#scaled_df = vector_length_df.withColumn("scaled_features", scale(col("vector_images")))

#features is a magic name in the prediction code
#scaled_df = vector_length_df.withColumn("features", col("vector_images"))
scaled_df = image_df.withColumn("features", col("imgnpa")).withColumn(
    "label", col("contaminated"))

features, labels = from_data_frame(scaled_df, True, 2)

import numpy as np


# Change features to right shape
def raise_dim(l):
    x = np.array(l)
    r = x[0::3].reshape(-1, 2)
    g = x[1::3].reshape(-1, 2)
    b = x[2::3].reshape(-1, 2)
    return np.expand_dims(np.array([r, g, b]), axis=0)


f2 = np.array([raise_dim(x) for x in features])