示例#1
0
  def fit(self, x, y, **kwargs):
    """Constructs a new model with `build_fn` & fit the model to `(x, y)`.

    Arguments:
        x : array-like, shape `(n_samples, n_features)`
            Training samples where `n_samples` is the number of samples
            and `n_features` is the number of features.
        y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
            True labels for `x`.
        **kwargs: dictionary arguments
            Legal arguments are the arguments of `Sequential.fit`

    Returns:
        history : object
            details about the training history at each epoch.
    """
    if self.build_fn is None:
      self.model = self.__call__(**self.filter_sk_params(self.__call__))
    elif (not isinstance(self.build_fn, types.FunctionType) and
          not isinstance(self.build_fn, types.MethodType)):
      self.model = self.build_fn(
          **self.filter_sk_params(self.build_fn.__call__))
    else:
      self.model = self.build_fn(**self.filter_sk_params(self.build_fn))

    if (losses.is_categorical_crossentropy(self.model.loss) and
        len(y.shape) != 2):
      y = to_categorical(y)

    fit_args = copy.deepcopy(self.filter_sk_params(Sequential.fit))
    fit_args.update(kwargs)

    history = self.model.fit(x, y, **fit_args)

    return history
示例#2
0
def Dataset(spilt_num, one_hot=True):
    (train_images, train_labels), (test_images,
                                   test_labels) = datasets.mnist.load_data()
    train_images = train_images / 255.0
    if one_hot:
        train_labels = to_categorical(train_labels, 10)
    random_order = list(range(len(train_images)))
    # np.random.shuffle(random_order)
    yushu = len(train_images) % spilt_num
    if yushu != 0:
        random_order.extend(random_order[:spilt_num - yushu])
    train_images = train_images[random_order]
    train_labels = train_labels[random_order]
    res = []
    for x, y in zip(np.split(train_images, spilt_num),
                    np.split(train_labels, spilt_num)):
        res.append(client_data(x, y))
    return res
    def test_subclassed_model_with_feature_columns(self):
        col_a = fc.numeric_column('a')
        col_b = fc.numeric_column('b')

        dnn_model = TestDNNModel([col_a, col_b], 20)

        dnn_model.compile(optimizer='rmsprop',
                          loss='categorical_crossentropy',
                          metrics=['accuracy'],
                          run_eagerly=testing_utils.should_run_eagerly())

        x = {'a': np.random.random((10, 1)), 'b': np.random.random((10, 1))}
        y = np.random.randint(20, size=(10, 1))
        y = np_utils.to_categorical(y, num_classes=20)
        dnn_model.fit(x=x, y=y, epochs=1, batch_size=5)
        dnn_model.fit(x=x, y=y, epochs=1, batch_size=5)
        dnn_model.evaluate(x=x, y=y, batch_size=5)
        dnn_model.predict(x=x, batch_size=5)
示例#4
0
    def test_ensemble(self, config):
        """Tests using a prebuilt model in an ensemble learner."""
        loader, model, build_fn, ensembles = CONFIG[config]
        data = loader()
        x_train, y_train = data.data[:100], data.target[:100]

        n_classes_ = np.unique(y_train).size
        # make y the same shape as will be used by .fit
        if config != "MLPRegressor":
            y_train = to_categorical(y_train)
            meta = {
                "n_classes_": n_classes_,
                "target_type_": "multiclass",
                "n_features_in_": x_train.shape[1],
                "n_outputs_expected_": 1,
            }
            keras_model = build_fn(
                meta=meta,
                hidden_layer_sizes=(100, ),
                compile_kwargs={
                    "optimizer": "adam",
                    "loss": None,
                    "metrics": None,
                },
            )
        else:
            meta = {
                "n_outputs_": 1,
                "n_features_in_": x_train.shape[1],
            }
            keras_model = build_fn(
                meta=meta,
                hidden_layer_sizes=(100, ),
                compile_kwargs={
                    "optimizer": "adam",
                    "loss": None,
                    "metrics": None,
                },
            )

        base_estimator = model(model=keras_model)
        for ensemble in ensembles:
            estimator = ensemble(base_estimator=base_estimator, n_estimators=2)
            basic_checks(estimator, loader)
示例#5
0
    def test_vector_classification_shared_model(self):
        # Test that Sequential models that feature internal updates
        # and internal losses can be shared.
        np.random.seed(1337)
        (x_train, y_train), _ = testing_utils.get_test_data(train_samples=100,
                                                            test_samples=0,
                                                            input_shape=(10, ),
                                                            num_classes=2)
        y_train = np_utils.to_categorical(y_train)

        base_model = testing_utils.get_model_from_layers(
            [
                keras.layers.Dense(
                    16,
                    activation='relu',
                    kernel_regularizer=keras.regularizers.l2(1e-5),
                    bias_regularizer=keras.regularizers.l2(1e-5)),
                keras.layers.BatchNormalization()
            ],
            input_shape=x_train.shape[1:])
        x = keras.layers.Input(x_train.shape[1:])
        y = base_model(x)
        y = keras.layers.Dense(y_train.shape[-1], activation='softmax')(y)
        model = keras.models.Model(x, y)
        model.compile(loss='categorical_crossentropy',
                      optimizer=keras.optimizer_v2.adam.Adam(0.005),
                      metrics=['acc'],
                      run_eagerly=testing_utils.should_run_eagerly(),
                      experimental_run_tf_function=testing_utils.
                      should_run_tf_function())
        if not testing_utils.should_run_eagerly():
            self.assertEqual(len(model.get_losses_for(None)), 2)
            self.assertEqual(len(model.get_updates_for(x)), 2)
        history = model.fit(x_train,
                            y_train,
                            epochs=10,
                            batch_size=10,
                            validation_data=(x_train, y_train),
                            verbose=2)
        self.assertGreater(history.history['val_acc'][-1], 0.7)
        _, val_acc = model.evaluate(x_train, y_train)
        self.assertAlmostEqual(history.history['val_acc'][-1], val_acc)
        predictions = model.predict(x_train)
        self.assertEqual(predictions.shape, (x_train.shape[0], 2))
示例#6
0
    def test_nested_model_with_tensor_input(self):
        gpus = 2
        input_dim = 10
        shape = (input_dim, )
        num_samples = 16
        num_classes = 10

        if not check_if_compatible_devices(gpus=gpus):
            self.skipTest('multi gpu only')

        with ops.Graph().as_default(), self.cached_session():
            input_shape = (num_samples, ) + shape
            x_train = np.random.randint(0, 255, input_shape)
            y_train = np.random.randint(0, num_classes, (input_shape[0], ))

            y_train = np_utils.to_categorical(y_train, num_classes)

            x_train = x_train.astype('float32')
            y_train = y_train.astype('float32')

            dataset = data.Dataset.from_tensor_slices((x_train, y_train))
            dataset = dataset.repeat()
            dataset = dataset.batch(4)
            iterator = data.make_one_shot_iterator(dataset)

            inputs, targets = iterator.get_next()

            input_tensor = keras.layers.Input(tensor=inputs)

            model = keras.models.Sequential()
            model.add(keras.layers.Dense(3, input_shape=(input_dim, )))
            model.add(keras.layers.Dense(num_classes))

            output = model(input_tensor)
            outer_model = keras.Model(input_tensor, output)
            parallel_model = multi_gpu_utils.multi_gpu_model(outer_model,
                                                             gpus=gpus)

            parallel_model.compile(loss='categorical_crossentropy',
                                   optimizer=optimizer_v1.RMSprop(lr=0.0001,
                                                                  decay=1e-6),
                                   metrics=['accuracy'],
                                   target_tensors=[targets])
            parallel_model.fit(epochs=1, steps_per_epoch=3)
示例#7
0
    def test_lstm_v2_feature_parity_with_canonical_lstm(self):
        input_shape = 10
        rnn_state_size = 8
        timestep = 4
        batch = 20

        (x_train,
         y_train), _ = testing_utils.get_test_data(train_samples=batch,
                                                   test_samples=0,
                                                   input_shape=(timestep,
                                                                input_shape),
                                                   num_classes=rnn_state_size,
                                                   random_seed=87654321)
        y_train = np_utils.to_categorical(y_train, rnn_state_size)
        # For the last batch item of the test data, we filter out the last
        # timestep to simulate the variable length sequence and masking test.
        x_train[-2:, -1, :] = 0.0
        y_train[-2:] = 0

        inputs = keras.layers.Input(shape=[timestep, input_shape],
                                    dtype=dtypes.float32)
        masked_input = keras.layers.Masking()(inputs)
        lstm_layer = rnn_v1.LSTM(rnn_state_size,
                                 recurrent_activation='sigmoid')
        output = lstm_layer(masked_input)
        lstm_model = keras.models.Model(inputs, output)
        weights = lstm_model.get_weights()
        y_1 = lstm_model.predict(x_train)
        lstm_model.compile('rmsprop', 'mse')
        lstm_model.fit(x_train, y_train)
        y_2 = lstm_model.predict(x_train)

        with testing_utils.device(should_use_gpu=True):
            cudnn_layer = rnn.LSTM(rnn_state_size)
            cudnn_model = keras.models.Model(inputs, cudnn_layer(masked_input))
        cudnn_model.set_weights(weights)
        y_3 = cudnn_model.predict(x_train)
        cudnn_model.compile('rmsprop', 'mse')
        cudnn_model.fit(x_train, y_train)
        y_4 = cudnn_model.predict(x_train)

        self.assertAllClose(y_1, y_3, rtol=1e-5, atol=2e-5)
        self.assertAllClose(y_2, y_4, rtol=1e-5, atol=2e-5)
示例#8
0
    def final_prepare(self, story_raw, tokenizer):
        all_words = []
        for story in story_raw:
            s = story.split()
            all_words.append(s)
        all_words_flat = [item for sublist in all_words for item in sublist]

        X = []
        Y = []
        for i in range(len(all_words_flat) - self.interval):
            X.append(all_words_flat[i:i + self.interval])
            Y.append(all_words_flat[i + self.interval])

        X = tokenizer.texts_to_sequences(X)
        Y = tokenizer.texts_to_sequences(Y)
        Y = np_utils.to_categorical(Y,
                                    num_classes=len(tokenizer.word_index) + 1)

        return X, Y
    def test_sequential_model(self):
        columns = [fc.numeric_column('a')]
        model = keras.models.Sequential([
            df.DenseFeatures(columns),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dense(20, activation='softmax')
        ])
        model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'],
                      run_eagerly=testing_utils.should_run_eagerly())

        x = {'a': np.random.random((10, 1))}
        y = np.random.randint(20, size=(10, 1))
        y = np_utils.to_categorical(y, num_classes=20)
        model.fit(x, y, epochs=1, batch_size=5)
        model.fit(x, y, epochs=1, batch_size=5)
        model.evaluate(x, y, batch_size=5)
        model.predict(x, batch_size=5)
示例#10
0
  def testNumericEquivalenceForAmsgrad(self):
    if context.executing_eagerly():
      self.skipTest(
          'v1 optimizer does not run in eager mode')
    np.random.seed(1331)
    with test_util.use_gpu():
      train_samples = 20
      input_dim = 3
      num_classes = 2
      (x, y), _ = testing_utils.get_test_data(
          train_samples=train_samples,
          test_samples=10,
          input_shape=(input_dim,),
          num_classes=num_classes)
      y = np_utils.to_categorical(y)

      num_hidden = 5
      model_k_v1 = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2 = testing_utils.get_small_sequential_mlp(
          num_hidden=num_hidden, num_classes=num_classes, input_dim=input_dim)
      model_k_v2.set_weights(model_k_v1.get_weights())

      opt_k_v1 = optimizers.Adam(amsgrad=True)
      opt_k_v2 = adam.Adam(amsgrad=True)

      model_k_v1.compile(
          opt_k_v1,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly())
      model_k_v2.compile(
          opt_k_v2,
          loss='categorical_crossentropy',
          metrics=[],
          run_eagerly=testing_utils.should_run_eagerly())

      hist_k_v1 = model_k_v1.fit(x, y, batch_size=5, epochs=10, shuffle=False)
      hist_k_v2 = model_k_v2.fit(x, y, batch_size=5, epochs=10, shuffle=False)

      self.assertAllClose(model_k_v1.get_weights(), model_k_v2.get_weights())
      self.assertAllClose(opt_k_v1.get_weights(), opt_k_v2.get_weights())
      self.assertAllClose(hist_k_v1.history['loss'], hist_k_v2.history['loss'])
示例#11
0
 def test_to_categorical(self):
   num_classes = 5
   shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)]
   expected_shapes = [(1, num_classes), (3, num_classes), (4, 3, num_classes),
                      (5, 4, 3, num_classes), (3, num_classes),
                      (3, 2, num_classes)]
   labels = [np.random.randint(0, num_classes, shape) for shape in shapes]
   one_hots = [
       np_utils.to_categorical(label, num_classes) for label in labels]
   for label, one_hot, expected_shape in zip(labels,
                                             one_hots,
                                             expected_shapes):
     # Check shape
     self.assertEqual(one_hot.shape, expected_shape)
     # Make sure there is only one 1 in a row
     self.assertTrue(np.all(one_hot.sum(axis=-1) == 1))
     # Get original labels back from one hots
     self.assertTrue(np.all(
         np.argmax(one_hot, -1).reshape(label.shape) == label))
示例#12
0
  def testRNNWithStackKerasCell(self):
    with self.cached_session() as sess:
      input_shape = 10
      output_shape = 5
      timestep = 4
      batch = 100
      (x_train, y_train), _ = testing_utils.get_test_data(
          train_samples=batch,
          test_samples=0,
          input_shape=(timestep, input_shape),
          num_classes=output_shape)
      y_train = np_utils.to_categorical(y_train)
      cell = keras.layers.StackedRNNCells(
          [keras.layers.LSTMCell(2 * output_shape),
           keras.layers.LSTMCell(output_shape)])

      inputs = array_ops.placeholder(
          dtypes.float32, shape=(None, timestep, input_shape))
      predict = array_ops.placeholder(
          dtypes.float32, shape=(None, output_shape))

      outputs, state = rnn.dynamic_rnn(
          cell, inputs, dtype=dtypes.float32)
      self.assertEqual(outputs.shape.as_list(), [None, timestep, output_shape])
      self.assertEqual(len(state), 2)
      state = nest.flatten(state)
      self.assertEqual(len(state), 4)
      self.assertEqual(state[0].shape.as_list(), [None, 2 * output_shape])
      self.assertEqual(state[1].shape.as_list(), [None, 2 * output_shape])
      self.assertEqual(state[2].shape.as_list(), [None, output_shape])
      self.assertEqual(state[3].shape.as_list(), [None, output_shape])
      loss = losses.softmax_cross_entropy(predict, state[2])
      train_op = training.GradientDescentOptimizer(0.001).minimize(loss)

      sess.run([variables_lib.global_variables_initializer()])
      _, outputs, state = sess.run(
          [train_op, outputs, state], {inputs: x_train, predict: y_train})

      self.assertEqual(len(outputs), batch)
      self.assertEqual(len(state), 4)
      for s in state:
        self.assertEqual(len(s), batch)
def input_data_for_model(input_shape):

    # 数据导入
    input_data = load_data()
    # 数据处理
    data_processing()
    # 导入字典
    with open(CONSTANTS[1], 'rb') as f:
        word_dictionary = pickle.load(f)
    with open(CONSTANTS[2], 'rb') as f:
        inverse_word_dictionary = pickle.load(f)
    with open(CONSTANTS[3], 'rb') as f:
        label_dictionary = pickle.load(f)
    with open(CONSTANTS[4], 'rb') as f:
        output_dictionary = pickle.load(f)
    vocab_size = len(word_dictionary.keys())
    label_size = len(label_dictionary.keys())

    # 处理输入数据
    aggregate_function = lambda input: [
        (word, pos, label)
        for word, pos, label in zip(input['word'].values.tolist(), input[
            'pos'].values.tolist(), input['tag'].values.tolist())
    ]

    grouped_input_data = input_data.groupby('sent_no').apply(
        aggregate_function)
    sentences = [sentence for sentence in grouped_input_data]

    x = [[word_dictionary[word[0]] for word in sent] for sent in sentences]
    x = sequence.pad_sequences(maxlen=input_shape,
                               sequences=x,
                               padding='post',
                               value=0)
    y = [[label_dictionary[word[2]] for word in sent] for sent in sentences]
    y = sequence.pad_sequences(maxlen=input_shape,
                               sequences=y,
                               padding='post',
                               value=0)
    y = [to_categorical(label, num_classes=label_size + 1) for label in y]

    return x, y, output_dictionary, vocab_size, label_size, inverse_word_dictionary
示例#14
0
    def testPrunesModel(self, model_type):
        model = test_utils.build_mnist_model(model_type, self.params)
        if model_type == 'layer_list':
            model = keras.Sequential(
                prune.prune_low_magnitude(model, **self.params))
        elif model_type in ['sequential', 'functional']:
            model = prune.prune_low_magnitude(model, **self.params)

        model.compile(loss='categorical_crossentropy',
                      optimizer='sgd',
                      metrics=['accuracy'])
        test_utils.assert_model_sparsity(self, 0.0, model)
        model.fit(np.random.rand(32, 28, 28, 1),
                  np_utils.to_categorical(np.random.randint(10, size=(32, 1)),
                                          10),
                  callbacks=[pruning_callbacks.UpdatePruningStep()])

        test_utils.assert_model_sparsity(self, 0.5, model)

        self._check_strip_pruning_matches_original(model, 0.5)
示例#15
0
def get_label(path, num_classes=2):
    train_data_dir = path
    labels = os.listdir(train_data_dir)
    num = 0
    for label in labels:
        image_names_train = os.listdir(os.path.join(train_data_dir, label))
        num = num + len(image_names_train)
    lab = np.zeros((num, ), dtype='uint8')
    i = 0
    j = 0
    for label in labels:
        image_names_train = os.listdir(os.path.join(train_data_dir, label))
        for image_name in image_names_train:
            lab[i] = j
            i += 1
        j += 1

    lab = np_utils.to_categorical(lab[:num], num_classes)

    return lab
示例#16
0
    def test_ensemble(self, config):
        """Tests using a prebuilt model in an ensemble learner."""
        loader, model, build_fn, ensembles = CONFIG[config]
        data = loader()
        x_train, y_train = data.data[:100], data.target[:100]

        n_classes_ = np.unique(y_train).size
        # make y the same shape as will be used by .fit
        if config != "MLPRegressor":
            y_train = to_categorical(y_train)
            keras_model = build_fn(
                X=x_train, n_classes_=n_classes_, n_outputs_=1
            )
        else:
            keras_model = build_fn(X=x_train, n_outputs_=1)

        base_estimator = model(build_fn=keras_model)
        for ensemble in ensembles:
            estimator = ensemble(base_estimator=base_estimator, n_estimators=2)
            check(estimator, loader)
示例#17
0
def convert_inputs(descriptions, headlines, number_words_to_replace, model,
                   is_training):

    # length of headlines and descriptions should be equal
    assert len(descriptions) == len(headlines)

    X, y = [], []
    for each_desc, each_headline in zip(descriptions, headlines):
        # print('each ', type(each_headline[0]))

        input_headline_idx = sentence2idx(each_headline[0], True,
                                          parameters.max_len_head, True)
        predicted_headline_idx = sentence2idx(each_headline[0], True,
                                              parameters.max_len_head, False)
        desc_idx = sentence2idx(each_desc[0], False, parameters.max_len_desc)

        # assert size checks
        assert len(input_headline_idx) == parameters.max_len_head - 1
        assert len(predicted_headline_idx) == parameters.max_len_head
        assert len(desc_idx) == parameters.max_len_desc + 1

        X.append(desc_idx + input_headline_idx)
        y.append(predicted_headline_idx)

    X, y = np.array(X), np.array(y)
    if is_training:
        #print("Length of X before flipping",len(X))
        X = flip_words_randomly(X, number_words_to_replace, model)
        # One hot encoding of y
        vocab_size = word2vec.shape[0]
        length_of_data = len(headlines)
        Y = np.zeros((length_of_data, parameters.max_len_head, vocab_size))
        for i, each_y in enumerate(y):
            Y[i, :, :] = np_utils.to_categorical(each_y, vocab_size)
        #check equal lengths
        assert len(X) == len(Y)
        return X, Y
    else:
        #Testing doesnot require OHE form of headline, flipping also not required
        #Because BLUE score require words and not OHE form to check accuracy
        return X, headlines
示例#18
0
def CNNclassifier(train_data, train_label, test_data, test_labels):
    t1 = process_time()
    inputECG = Input(batch_shape=(None, 140, 1))
    x = layers.Conv1D(64, 3, activation='relu', padding='valid')(inputECG)
    x1 = layers.MaxPooling1D(2)(x)
    x2 = layers.Conv1D(32, 3, activation='relu', padding='valid')(x1)
    x3 = layers.MaxPooling1D(2)(x2)
    flat = layers.Flatten()(x3)
    encoded = Dense(32, activation='relu')(flat)

    model_encoder = Model(inputECG, encoded)
    model = Sequential()
    model.add(model_encoder.layers[0])
    model.add(model_encoder.layers[1])
    model.add(model_encoder.layers[2])
    model.add(model_encoder.layers[3])
    model.add(model_encoder.layers[4])
    model.add(model_encoder.layers[5])
    model.add(model_encoder.layers[6])
    #
    model.add(layers.Dense(2, activation='softmax'))
    #
    model.compile(optimizer='rmsprop',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    train_data = train_data[..., None]
    test_data = test_data[..., None]

    model.fit(train_data, train_label, epochs=100, batch_size=100)
    pred = model.predict(test_data)
    pred = np.argmax(pred, axis=1)
    pred = to_categorical(pred)

    f1 = f1_score(test_labels, pred, average='macro')
    precision = precision_score(test_labels, pred, average='macro')
    recall = recall_score(test_labels, pred, average='macro')
    accuracy = accuracy_score(test_labels, pred)
    t2 = process_time()
    time = t2 - t1
    return round(f1, 5), round(precision, 5), round(recall, 5), accuracy, time
    def test_subclassed_model_with_feature_columns_with_ds_input(self):
        col_a = fc.numeric_column('a')
        col_b = fc.numeric_column('b')

        dnn_model = TestDNNModel([col_a, col_b], 20)

        dnn_model.compile(optimizer='rmsprop',
                          loss='categorical_crossentropy',
                          metrics=['accuracy'],
                          run_eagerly=testing_utils.should_run_eagerly())

        y = np.random.randint(20, size=(100, 1))
        y = np_utils.to_categorical(y, num_classes=20)
        x = {'a': np.random.random((100, 1)), 'b': np.random.random((100, 1))}
        ds1 = dataset_ops.Dataset.from_tensor_slices(x)
        ds2 = dataset_ops.Dataset.from_tensor_slices(y)
        ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5)
        dnn_model.fit(ds, steps_per_epoch=1)
        dnn_model.fit(ds, steps_per_epoch=1)
        dnn_model.evaluate(ds, steps=1)
        dnn_model.predict(ds, steps=1)
示例#20
0
    def createTestAndTrain():
        X_train, Y_train = getDataWithLabel()
        # Normalize the data
        X_train = X_train.astype('float16')
        X_train = X_train / 255.0
        print("Data was normalized..")
        print("Data shape: ", X_train.shape)
        #Reshape to matrix
        X_train = X_train.values.reshape(-1, 240, 240, 3)
        print("Data was reshaped..")
        #LabelEncode
        #labels = preprocessing.LabelEncoder().fit_transform(labels)
        Y_train = fitLabelEncoder(Y_train)
        print("Data was encoded..")
        #int to vector
        Y_train = to_categorical(Y_train, num_classes=10)
        #train and test data split

        #X_train, X_test, Y_train, Y_test= train_test_split(X_train, Y_train, test_size=0.1, random_state=42)
        #return X_train, X_test, Y_train, Y_test;
        return X_train, Y_train
示例#21
0
def apply_model_to_sap_data_timeseries(model, node_features, node_types):
    item_feature_count = len(node_features) + len(node_types)
    sap_extractor = SapExtractor(cfg.STORAGE_BASE_SAP_DATA)
    db, min_time, max_time = sap_extractor.extract()

    timeseries_extractor = TimeseriesExtractor(db=db,
                                               max_simulation_time=max_time,
                                               min_simulation_time=min_time)

    timeseries, labels, _, names = timeseries_extractor.generate_timeseries(
        cfg.SAP_DATA_WINDOW_DURATION,
        node_features,
        node_types,
        window_stride=cfg.SAP_DATA_WINDOW_STRIDE)

    timeseries[:, :, len(node_types):item_feature_count] = (
        (timeseries - np.nanmean(timeseries, axis=(0, 1))) /
        np.nanstd(timeseries, axis=(0, 1)))[:, :,
                                            len(node_types):item_feature_count]
    np.nan_to_num(timeseries, copy=False, nan=-1)

    all_gt = to_categorical(labels)

    raw_predictions = model.predict(timeseries)
    all_predictions = K.argmax(raw_predictions).numpy().tolist()
    print(all_predictions)

    all_predictions_df = pd.DataFrame({
        "Slice": names,
        "Predicted is Fraud": all_predictions,
        "RAW": raw_predictions.tolist(),
        "Labels": labels
    })
    all_predictions_df.to_csv(cfg.STORAGE_ROOT_PATH +
                              rf'\results_all_sap_rnn.csv',
                              sep=';')

    plot_confusion_matrix(
        'Confusion Matrix - SAP Data', all_predictions, labels,
        cfg.STORAGE_BASE_THESIS_IMG + rf'\conf_matrix_all_rnn_sap.pdf')
示例#22
0
def load_dataset(dirname, classname, scores=[1, 3, 5]):
    """

    加载教练数据集,在线切割测试集与训练集

    :param dirname: 数据集路径
    :param classname: 评价指标
    :param scores: 具体分类类别
    :return:
    """
    X = []
    y = []
    label_encoder = LabelEncoder()
    encoded = label_encoder.fit_transform(scores)

    for i, score in enumerate(scores):
        fileList = os.listdir(os.path.join(dirname, classname, str(score)))

        for file in fileList:
            # print(file)
            data = load_file(os.path.join(dirname, classname, str(score),
                                          file),
                             isBeginner=False)
            circleList = to_circleList(data.copy())
            for a, circle in enumerate(circleList):
                circleList[a] = padding(circle)
            for circle in circleList:
                # print(circle.shape)
                X.append(circle)
                y.append([encoded[i]])

    X = np.array(X)
    y = np.array(y)

    y = to_categorical(y)

    # 随机分割测试集与训练集
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

    return X_train, X_test, y_train, y_test
def validate_model_for_all_classes(model: Model) -> dict:
    class_labels = dict()
    class_entities = dict()
    results = dict()
    for i in range(0, len(globals()['VALIDATION_LABELS'])):
        if globals()['VALIDATION_LABELS'][i] not in class_labels.keys():
            class_labels[globals()['VALIDATION_LABELS'][i]] = list()
            class_entities[globals()['VALIDATION_LABELS'][i]] = list()
        class_labels[globals()['VALIDATION_LABELS'][i]].append(globals()['VALIDATION_LABELS'][i])
        class_entities[globals()['VALIDATION_LABELS'][i]].append(globals()['VALIDATION_DATA'][i])

    for key in class_labels.keys():
        label = np.asarray(class_labels[key] + ['9'])
        entities = np.asarray(class_entities[key])

        y_validate = to_categorical(label)[:-1]
        print(y_validate.shape)

        loss, acc, recall, precision = model.evaluate(x=entities, y=y_validate, batch_size=32)
        results[key] = [loss, acc, recall, precision]

    return results
    def test_sequential_model_with_ds_input(self):
        columns = [fc.numeric_column('a')]
        model = keras.models.Sequential([
            df.DenseFeatures(columns),
            keras.layers.Dense(64, activation='relu'),
            keras.layers.Dense(20, activation='softmax')
        ])
        model.compile(optimizer='rmsprop',
                      loss='categorical_crossentropy',
                      metrics=['accuracy'],
                      run_eagerly=testing_utils.should_run_eagerly())

        y = np.random.randint(20, size=(100, 1))
        y = np_utils.to_categorical(y, num_classes=20)
        x = {'a': np.random.random((100, 1))}
        ds1 = dataset_ops.Dataset.from_tensor_slices(x)
        ds2 = dataset_ops.Dataset.from_tensor_slices(y)
        ds = dataset_ops.Dataset.zip((ds1, ds2)).batch(5)
        model.fit(ds, steps_per_epoch=1)
        model.fit(ds, steps_per_epoch=1)
        model.evaluate(ds, steps=1)
        model.predict(ds, steps=1)
示例#25
0
  def score(self, x, y, **kwargs):
    """Returns the mean accuracy on the given test data and labels.

    Arguments:
        x: array-like, shape `(n_samples, n_features)`
            Test samples where `n_samples` is the number of samples
            and `n_features` is the number of features.
        y: array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
            True labels for `x`.
        **kwargs: dictionary arguments
            Legal arguments are the arguments of `Sequential.evaluate`.

    Returns:
        score: float
            Mean accuracy of predictions on `x` wrt. `y`.

    Raises:
        ValueError: If the underlying model isn't configured to
            compute accuracy. You should pass `metrics=["accuracy"]` to
            the `.compile()` method of the model.
    """
    y = np.searchsorted(self.classes_, y)
    kwargs = self.filter_sk_params(Sequential.evaluate, kwargs)

    loss_name = self.model.loss
    if hasattr(loss_name, '__name__'):
      loss_name = loss_name.__name__
    if loss_name == 'categorical_crossentropy' and len(y.shape) != 2:
      y = to_categorical(y)

    outputs = self.model.evaluate(x, y, **kwargs)
    if not isinstance(outputs, list):
      outputs = [outputs]
    for name, output in zip(self.model.metrics_names, outputs):
      if name == 'acc':
        return output
    raise ValueError('The model is not configured to compute accuracy. '
                     'You should pass `metrics=["accuracy"]` to '
                     'the `model.compile()` method.')
示例#26
0
    def score(self, x, y, **kwargs):
        """Returns the mean accuracy on the given test data and labels.

    Arguments:
        x: array-like, shape `(n_samples, n_features)`
            Test samples where `n_samples` is the number of samples
            and `n_features` is the number of features.
        y: array-like, shape `(n_samples,)` or `(n_samples, n_outputs)`
            True labels for `x`.
        **kwargs: dictionary arguments
            Legal arguments are the arguments of `Sequential.evaluate`.

    Returns:
        score: float
            Mean accuracy of predictions on `x` wrt. `y`.

    Raises:
        ValueError: If the underlying model isn't configured to
            compute accuracy. You should pass `metrics=["accuracy"]` to
            the `.compile()` method of the model.
    """
        y = np.searchsorted(self.classes_, y)
        kwargs = self.filter_sk_params(Sequential.evaluate, kwargs)

        loss_name = self.model.loss
        if hasattr(loss_name, '__name__'):
            loss_name = loss_name.__name__
        if loss_name == 'categorical_crossentropy' and len(y.shape) != 2:
            y = to_categorical(y)

        outputs = self.model.evaluate(x, y, **kwargs)
        if not isinstance(outputs, list):
            outputs = [outputs]
        for name, output in zip(self.model.metrics_names, outputs):
            if name in ['accuracy', 'acc']:
                return output
        raise ValueError('The model is not configured to compute accuracy. '
                         'You should pass `metrics=["accuracy"]` to '
                         'the `model.compile()` method.')
示例#27
0
def test_model(model_path,
               test_sound_file_path,
               label,
               repetition,
               signal_length,
               take_components=None,
               training_path=None):
    if not os.path.isfile(model_path):
        raise Exception("No model file found")
    if not os.path.isfile(test_sound_file_path):
        raise Exception("No model file found")
    loaded_model = load_model(model_path)

    # select random samples and its features from the test file with the signal length matching the trained length
    x_test = []
    signal, sr = librosa.load(test_sound_file_path)
    for i in range(repetition):
        start_index = random.randint(0, len(signal) - signal_length - 1)
        new_signal = signal[start_index:start_index + signal_length]
        x_test.append([new_signal, sr])
    x_test = data_processing.get_features(x_test)

    # transform the test set with pca according to the size of the loaded model input dim. To ensure the
    # same dim is taken the original train set has to be fitted.
    if take_components is not None:
        x_train = np.load(training_path, mmap_mode=None, allow_pickle=True)
        pca = PCA(n_components=take_components)
        pca.fit(x_train)
        x_test = pca.transform(x_test)

    # expand dims
    x_test = np.expand_dims(x_test, axis=2)

    # create the categorical vectors with expected label
    y_test = to_categorical(np.full((len(x_test)), label), num_classes=2)

    # evaluate
    _, accuracy = loaded_model.evaluate(x_test, y_test, batch_size=16)
    def test_timeseries_classification_sequential_tf_rnn(self):
        np.random.seed(1337)
        (x_train, y_train), _ = testing_utils.get_test_data(train_samples=100,
                                                            test_samples=0,
                                                            input_shape=(4,
                                                                         10),
                                                            num_classes=2)
        y_train = np_utils.to_categorical(y_train)

        with base_layer.keras_style_scope():
            model = keras.models.Sequential()
            model.add(
                keras.layers.RNN(rnn_cell.LSTMCell(5),
                                 return_sequences=True,
                                 input_shape=x_train.shape[1:]))
            model.add(
                keras.layers.RNN(
                    rnn_cell.GRUCell(y_train.shape[-1],
                                     activation='softmax',
                                     dtype=dtypes.float32)))
            model.compile(loss='categorical_crossentropy',
                          optimizer=keras.optimizer_v2.adam.Adam(0.005),
                          metrics=['acc'],
                          run_eagerly=testing_utils.should_run_eagerly(),
                          experimental_run_tf_function=testing_utils.
                          should_run_tf_function())

        history = model.fit(x_train,
                            y_train,
                            epochs=15,
                            batch_size=10,
                            validation_data=(x_train, y_train),
                            verbose=2)
        self.assertGreater(history.history['val_acc'][-1], 0.7)
        _, val_acc = model.evaluate(x_train, y_train)
        self.assertAlmostEqual(history.history['val_acc'][-1], val_acc)
        predictions = model.predict(x_train)
        self.assertEqual(predictions.shape, (x_train.shape[0], 2))
    def __init__(self, config):
        distorted_paths = config.distorted
        undistorted_paths = config.undistorted

        print(undistorted_paths)
        print(distorted_paths)

        dist = []
        undist = []

        for path in distorted_paths:
            dist += glob.glob(path, recursive=True)
        for path in undistorted_paths:
            undist += glob.glob(path, recursive=True)

        print("Total items in dataset:{}".format(len(dist) + len(undist)))

        print("Debug mode: {}".format(config.debug))
        if config.debug:
            dist = dist[:100]
            undist = undist[:100]

        labels_dist = np.ones(len(dist)).astype('int')
        labels_undist = np.zeros(len(undist)).astype('int')

        data = dist + undist
        labels = np.concatenate((labels_dist, labels_undist), axis=0)
        # binary classification
        categories = np_utils.to_categorical(labels, 2)

        X_train, X_test, Y_train, Y_test = train_test_split(
            data,
            categories,
            test_size=config.validation_split,
            random_state=2)

        self.X_train = load_imgs(data, (500, 500, 1), config.color_mode)
        self.Y_train = categories
示例#30
0
    def test_invalid_ionames_error(self):
        (x_train,
         y_train), (_,
                    _) = testing_utils.get_test_data(train_samples=_TRAIN_SIZE,
                                                     test_samples=100,
                                                     input_shape=(10, ),
                                                     num_classes=2)
        y_train = np_utils.to_categorical(y_train)

        def invald_input_name_input_fn():
            input_dict = {'invalid_input_name': x_train}
            return input_dict, y_train

        def invald_output_name_input_fn():
            input_dict = {'input_layer': x_train}
            output_dict = {'invalid_output_name': y_train}
            return input_dict, output_dict

        model = simple_functional_model()
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['acc'])
        est_keras = keras_lib.model_to_estimator(keras_model=model,
                                                 config=self._config)

        regexp_pattern = r'{} keys:(\s|.)*{}(\s|.)*Missed keys:(\s|.)*{}'

        with self.assertRaisesRegexp(
                keras_lib.FormattedKeyError,
                regexp_pattern.format('features', 'invalid_input_name',
                                      'input_layer')):
            est_keras.train(input_fn=invald_input_name_input_fn, steps=100)

        with self.assertRaisesRegexp(
                keras_lib.FormattedKeyError,
                regexp_pattern.format('labels', 'invalid_output_name',
                                      'dense_1')):
            est_keras.train(input_fn=invald_output_name_input_fn, steps=100)
示例#31
0
def create_dataset(sentences: Series, targets: Series, tokenizer: MyTokenizer):

    assert len(sentences) == len(
        targets
    ), 'Error - create_dataset - sentence and target series have different length'

    length_samples = len(sentences)

    x_sentence, y_sentence = [], []

    for i in range(length_samples):
        sentence = sentences.iloc[i]
        target = targets.iloc[i]

        x_sentence.append([
            tokenizer.word_to_index(word) for word in sentence.split(sep=' ')
            if word != ''
        ])
        y_sentence.append(
            [tokenizer.label_to_index(t) for t in str(target).split(sep=' ')])

    x_dataset = pad_sequences(sequences=x_sentence,
                              maxlen=MAX_WORD_SENTENCE,
                              padding='post',
                              value=tokenizer.word_to_index('PAD'))

    num_classes = tokenizer.n_labels
    y_dataset = []
    for targets in y_sentence:
        cat_target = np.zeros(num_classes)
        for target in targets:
            cat_target += to_categorical(target, num_classes=num_classes)

        y_dataset.append(cat_target)

    y_dataset = np.array(y_dataset)

    return x_dataset, y_dataset
示例#32
0
文件: gru_test.py 项目: MFChunga/poo
    def test_reset_after_GRU(self):
        num_samples = 2
        timesteps = 3
        embedding_dim = 4
        units = 2

        (x_train,
         y_train), _ = testing_utils.get_test_data(train_samples=num_samples,
                                                   test_samples=0,
                                                   input_shape=(timesteps,
                                                                embedding_dim),
                                                   num_classes=units)
        y_train = np_utils.to_categorical(y_train, units)

        inputs = keras.layers.Input(shape=[timesteps, embedding_dim])
        gru_layer = keras.layers.GRU(units, reset_after=True)
        output = gru_layer(inputs)
        gru_model = keras.models.Model(inputs, output)
        gru_model.compile('rmsprop',
                          'mse',
                          run_eagerly=testing_utils.should_run_eagerly())
        gru_model.fit(x_train, y_train)
        gru_model.predict(x_train)