Exemplo n.º 1
0
    def _encode_numerical_feature(
        feature: KerasTensor,
        name: str,
        dataset: Optional[BatchDataset],
    ) -> KerasTensor:
        """Normalize numerical features.

        Args:
            - feature: The input layer of the feature.
            - name: The feature's name (its column name in the original dataframe).
            - dataset: The training data, if not specified, return a no-op layer.

        Returns:
            The normalized tensor of the input feature.

        """
        # Return generic layer for the tuner initialization
        if not dataset:
            return KerasTensor(type_spec=TensorSpec(
                shape=(None, 1), dtype=tf.float32, name=None))

        # Create a Normalization layer for our feature
        normalizer = Normalization()

        # Prepare a Dataset that only yields our feature
        feature_ds = dataset.map(lambda x, y: x[name])
        feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

        # Learn the statistics of the data
        normalizer.adapt(feature_ds)

        # Normalize the input feature
        encoded_feature = normalizer(feature)

        return encoded_feature
Exemplo n.º 2
0
def cifar_standardization(x, mode='FEATURE_NORMALIZE', data_samples=None):
    mode = mode.upper()
    assert mode in ['FEATURE_NORMALIZE', 'PIXEL_MEAN_SUBTRACT']

    if mode == 'PIXEL_MEAN_SUBTRACT' and not data_samples:
        raise ValueError('`data_samples` argument should not be `None`, '
                         'when `mode="PIXEL_MEAN_SUBTRACT"`.')

    if mode == 'FEATURE_NORMALIZE':
        cifar_mean = tf.cast(CIFAR_MEAN, tf.float32).numpy()
        cifar_std = tf.cast(CIFAR_STD, tf.float32).numpy()

        x = Rescaling(scale=1. / cifar_std,
                      offset=-(cifar_mean / cifar_std),
                      name='mean_normalization')(x)
    elif mode == 'PIXEL_MEAN_SUBTRACT':
        mean_subtraction_layer = Normalization(axis=[1, 2, 3],
                                               name='pixel_mean_subtraction')
        mean_subtraction_layer.adapt(data_samples)

        # set values of variance = 1. and keep mean values as is
        mean_pixels = mean_subtraction_layer.get_weights()[0]
        mean_subtraction_layer.set_weights(
            [mean_pixels, tf.ones_like(mean_pixels)])

        x = mean_subtraction_layer(x)
        x = Rescaling(scale=1 / 255., name='rescaling')(x)
    return x
def min_normalizer():
    """ Normalizer with minimalistic data."""
    adapt_data = np.array([
        [1., 2.],
        [2., 3.],
    ], dtype=np.float32)
    normalizer = Normalization()
    normalizer.adapt(adapt_data)
    return normalizer
Exemplo n.º 4
0
def encode_numerical_feature(feature, name, dataset):
    normalizer = Normalization()

    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    normalizer.adapt(feature_ds)

    encoded_feature = normalizer(feature)
    return encoded_feature
Exemplo n.º 5
0
def encode_numerical_feature(feature, name, dataset):
    # Create a Keras Normalization Layer for the input feature passed as argument
    normalizer = Normalization()
    # Prepare a Dataset containing only the feature
    feature_dset = dataset.map(lambda x, y: x[name])
    feature_dset = feature_dset.map(lambda x: tf.expand_dims(x, -1))
    # Learn the statistics of the data and normalise the input feature
    normalizer.adapt(feature_dset)
    encoded_feature = normalizer(feature)
    return encoded_feature
def load_data():
    data = pd.read_csv(
        "https://storage.googleapis.com/tf-datasets/titanic/train.csv")
    from sklearn.model_selection import train_test_split
    labels = data.pop('survived')
    label_names = ["Not survived", "Survived"]
    features = {}

    # Converting CSV file into Tensorflow object

    for name, column in data.items():
        dtype = column.dtype
        if dtype == object:
            dtype = string
        else:
            dtype = float32
        features[name] = Input(shape=(1, ), name=name, dtype=dtype)

    # Extracting and normalizing numeric features
    numeric_features = {
        name: feature
        for name, feature in features.items() if feature.dtype == float32
    }

    x = Concatenate()(list(numeric_features.values()))
    norm = Normalization()
    norm.adapt(np.array(data[numeric_features.keys()]))
    numeric_features = norm(x)

    processed_features = [numeric_features]
    # Extracting and normalizing non-numeric features

    for name, feature in features.items():
        if feature.dtype == float32:
            continue
        word = StringLookup(vocabulary=np.unique(data[name]))
        one_hot = CategoryEncoding(max_tokens=word.vocab_size())

        x = word(feature)
        x = one_hot(x)
        processed_features.append(x)

    processed_features = Concatenate()(processed_features)
    processed_features = Model(features, processed_features)

    utils.plot_model(model=processed_features,
                     rankdir='LR',
                     dpi=72,
                     show_shapes=True)

    feature_dict = {name: np.array(value) for name, value in data.items()}

    train_features, test_features, train_labels, test_labels = train_test_split(
        processed_features(feature_dict).numpy(), labels, test_size=0.2)
    return train_features, train_labels, test_features, test_labels
Exemplo n.º 7
0
def encodeMyFeature(indidualFeature, name, dataset):
    # Normalization the data
    normalizer = Normalization()
    # Pull out a data set for each feature.
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # built in code to find the statistics of the data
    normalizer.adapt(feature_ds)
    encodedFeature = normalizer(indidualFeature)
    return encodedFeature
Exemplo n.º 8
0
def encode_numerical_feature(feature, name, dataset):
    # Create a Normalization layer for our feature
    normalizer = Normalization()

    # Prepare a Dataset that only yields our feature
    feature_ds = dataset.map(lambda x, y: x[name])
    feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))

    # Learn the statistics of the data
    normalizer.adapt(feature_ds)

    # Normalize the input feature
    encoded_feature = normalizer(feature)
    return encoded_feature
Exemplo n.º 9
0
class Data:
    def __init__(self, filename, m_input='u', m_output='y'):
        # Load data
        data = loadmat(filename)

        # Normalize input data
        self.input = data[m_input]
        self.layernorminput = Normalization()
        self.layernorminput.adapt(self.input)
        self.input_norm = self.layernorminput(self.input)

        # Normalize output data
        self.output = data[m_output]
        self.layernormoutput = Normalization()
        self.layernormoutput.adapt(self.output)
        self.output_norm = self.layernormoutput(self.output)
Exemplo n.º 10
0
def train(data):

    types = set()
    for file in data["files"]:
        types.add(file["type"])

    # Try to get >100 of each type
    training_input, training_output = convert_data_to_np(data, list(types))

    # n = tf.keras.utils.normalize(training_input, axis=-1, order=2)
    normalizer = Normalization(axis=-1)
    normalizer.adapt(training_input)
    normalized_data = normalizer(training_input)
    print("var: %.4f" % np.var(normalized_data))
    print("mean: %.4f" % np.mean(normalized_data))

    dense = keras.layers.Dense(units=16)
# token.
integer_data = vectorizer(training_data)
print(integer_data)
"""
**Example: normalizing features**

"""

from tensorflow.keras.layers.experimental.preprocessing import Normalization

# Example image data, with values in the [0, 255] range
training_data = np.random.randint(0, 256,
                                  size=(64, 200, 200, 3)).astype("float32")

normalizer = Normalization(axis=-1)
normalizer.adapt(training_data)

normalized_data = normalizer(training_data)
print("var: %.4f" % np.var(normalized_data))
print("mean: %.4f" % np.mean(normalized_data))
"""
**Example: rescaling & center-cropping images**

Both the `Rescaling` layer and the `CenterCrop` layer are stateless, so it isn't
 necessary to call `adapt()` in this case.
"""

from tensorflow.keras.layers.experimental.preprocessing import CenterCrop
from tensorflow.keras.layers.experimental.preprocessing import Rescaling

# Example image data, with values in the [0, 255] range
"""
# normalize in range [0, 1]
scaling_layer = Rescaling(1.0 / 255)
# normalize in range [-1, 1]
input_ = tf.keras.Input(shape=(32, 32, 3))
norm_neg_one_to_one = Normalization()
x = norm_neg_one_to_one(input_)
import numpy as np
mean = [127.5]*3
var = mean ** 2
norm_neg_one_to_one.set_weights([mean, var])
norm_neg_one_to_one.get_weights()

# normalize with mean 0 and std 1
norm_mean_std = Normalization()
norm_mean_std.adapt(x_train[0])

model_ = Sequential([
    tf.keras.Input(shape=(32, 32, 3)),
    norm_mean_std,
    model
])

model_.compile(
optimizer="Adam",
loss="sparse_categorical_crossentropy",
metrics=['accuracy'],
)
model_.fit(x_train, y_train, epochs=1, batch_size=16)
"""
When you don't have a large image dataset, it's a good practice
Exemplo n.º 13
0
class CALIHOUSING:
    def __init__(self,
                 test_size: float = 0.2,
                 validation_size: float = 0.33) -> None:
        # User-definen constants
        self.num_targets = 1
        self.batch_size = 128
        # Load the data set
        dataset = fetch_california_housing()
        self.x, self.y = dataset.data, dataset.target
        self.feature_names = dataset.feature_names
        self.description = dataset.DESCR
        # Split the dataset
        x_train, x_test, y_train, y_test = train_test_split(
            self.x, self.y, test_size=test_size)
        x_train, x_val, y_train, y_val = train_test_split(
            x_train, y_train, test_size=validation_size)
        # Preprocess x data
        self.x_train = x_train.astype(np.float32)
        self.x_test = x_test.astype(np.float32)
        self.x_val = x_val.astype(np.float32)
        # Preprocess y data
        self.y_train = np.reshape(y_train,
                                  (-1, self.num_targets)).astype(np.float32)
        self.y_test = np.reshape(y_test,
                                 (-1, self.num_targets)).astype(np.float32)
        self.y_val = np.reshape(y_val,
                                (-1, self.num_targets)).astype(np.float32)
        # Dataset attributes
        self.train_size = self.x_train.shape[0]
        self.test_size = self.x_test.shape[0]
        self.num_features = self.x_train.shape[1]
        self.num_targets = self.y_train.shape[1]
        # Normalization variables
        self.normalization_layer = Normalization()
        self.normalization_layer.adapt(self.x_train)
        # tf.data Datasets
        self.train_dataset = tf.data.Dataset.from_tensor_slices(
            (self.x_train, self.y_train))
        self.test_dataset = tf.data.Dataset.from_tensor_slices(
            (self.x_test, self.y_test))
        self.val_dataset = tf.data.Dataset.from_tensor_slices(
            (self.x_val, self.y_val))
        self.train_dataset = self._prepare_dataset(self.train_dataset,
                                                   shuffle=True)
        self.test_dataset = self._prepare_dataset(self.test_dataset)
        self.val_dataset = self._prepare_dataset(self.val_dataset)

    def get_train_set(self) -> tf.data.Dataset:
        return self.train_dataset

    def get_test_set(self) -> tf.data.Dataset:
        return self.test_dataset

    def get_val_set(self) -> tf.data.Dataset:
        return self.val_dataset

    def _prepare_dataset(self,
                         dataset: tf.data.Dataset,
                         shuffle: bool = False) -> tf.data.Dataset:
        dataset = dataset.map(
            map_func=lambda x, y: (tf.reshape(self.normalization_layer(
                tf.reshape(x, shape=(1, self.num_features)), training=False),
                                              shape=(self.num_features, )), y),
            num_parallel_calls=tf.data.experimental.AUTOTUNE)

        if shuffle:
            dataset = dataset.shuffle(buffer_size=1_000)

        dataset = dataset.batch(batch_size=self.batch_size)

        return dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
Exemplo n.º 14
0
print(dept_targets)

dataset = keras.preprocessing.image_dataset_from_directory('mian_directory',
                                                           batch_size=64,
                                                           image_size=(2000,
                                                                       2000))
for data, labels in dataset:
    print(data.shape)
    print(data.dtype)
    print(labels.shape)
    print(labels.dtype)
#规范化
trianing_data = np.random.randint(0, 256,
                                  size=(64, 200, 200, 3)).astype("float32")
normalizetion = Normalization(axis=-1)  #沿着最后一个下标变换的方向
normalizetion.adapt(trianing_data)
normalizetion_data = normalizetion(trianing_data)
print("var:%.4f" % np.var(normalizetion_data))  #np.var求标准差
print("mean:%.4f" % np.mean(normalizetion_data))
#重新缩放和中心裁剪图像
cropper = CenterCrop(height=150, width=150)
scaler = Rescaling(scale=1.0 / 255)
output_data = scaler(cropper(
    trianing_data))  #无论是Rescaling层与CenterCrop层是无状态的,所以没有必要调用adapt()在这种情况下。
print(output_data.shape)
print("min:", np.min(output_data))

#使用Keras Functional API构建模型
dense = keras.layers.Dense(units=16)  #它将其输入映射到16维特征空间:

#但是用于任何大小的RGB图像的输入将具有shape (None, None, 3)。
Exemplo n.º 15
0
inputWords = [[gloveDict.get(w, [0] * dims) for w in t] for t in inputWords] #vectorise words: if the word is unique to MTG it just gets set to 0
print("Vectorised oracle text")

#split data into training and test datasets
testNames = [data[i]["name"] for i in range(len(data)) if data[i]["set"] == "m20"]
trainVecs = [inputVecs[i] for i in range(len(inputVecs)) if not data[i]["set"] == "m20"]
testVecs = [inputVecs[i] for i in range(len(inputVecs)) if data[i]["set"] == "m20"]
trainWords = tf.ragged.constant([inputWords[i] for i in range(len(inputWords)) if not data[i]["set"] == "m20"])
testWords = tf.ragged.constant([inputWords[i] for i in range(len(inputWords)) if data[i]["set"] == "m20"])
trainCorRars = [corRars[i] for i in range(len(corRars)) if not data[i]["set"] == "m20"]
testCorRars = [corRars[i] for i in range(len(corRars)) if data[i]["set"] == "m20"]

#normalise input vectors
trainVecs = np.array(trainVecs).astype("float32") #convert to numpy format
normalizer = Normalization()
normalizer.adapt(trainVecs)
trainVecs = normalizer(trainVecs)
testVecs = normalizer(testVecs)
print("Normalised numerical data")

#build keras model
wordIn = layers.Input(shape = (None, len(inputWords[0][0]))) #input layer for var-length word vec data
numIn = layers.Input(shape = (len(inputVecs[0]), )) #input layer for fixed-length numerical data
rnn = layers.LSTM(32)(wordIn) #RNN layer for word vec data
numLayer = layers.Dense(20, activation = "relu")(numIn) #layer for numerical data
merge = layers.concatenate([rnn, numLayer]) #combine the two vectors
combLayer = layers.Dense(64, activation = "relu")(merge) #hidden intermediate layer for combined data
out = layers.Dense(3, activation = "softmax")(combLayer) #final layer: softmax ensures output is a set of probabilities
model = DualModel(inputs = [numIn, wordIn], outputs = [out])
model.compile(loss = "sparse_categorical_crossentropy", metrics = "sparse_categorical_accuracy", optimizer = "adam") #I have no idea whether these ones are the best ones to use
print("Built model")