Exemplo n.º 1
0
 def fit(self, X, y, **param):
     self.neural_shape = []
     if (param.has_key('neural_shape')):
         self.neural_shape = param.get("neural_shape")
         self.n_output = self.neural_shape[-1]
         self.n_hidden = self.neural_shape[1:-1]
         self.number_of_layers = len(self.neural_shape)
     else:
         self.n_input = len(X[0])
         self.n_output = len(y[0])
         self.neural_shape = self.hidden_nodes.tolist()
         self.neural_shape.insert(0, self.n_input)
         self.neural_shape.append(self.n_output)
         self.n_hidden = self.hidden_nodes
     self.kFold = KFold(X.shape[0], n_folds=5)
     self.weights_matrix = param.get('weights_matrix')
     self.weight_layers = [(self.neural_shape[t - 1], self.neural_shape[t])
                           for t in range(1, len(self.neural_shape))]
     self.bias_layers = [
         self.neural_shape[t] for t in range(1, len(self.neural_shape))
     ]
     self.total_nodes_per_layer = zip(self.weight_layers, self.bias_layers)
     self.total_nodes = 0
     for layer in self.total_nodes_per_layer:
         self.total_nodes += (layer[0][0] + 1) * layer[0][1]
     # If weights are None then initialize randomly
     if (self.weights_matrix == None):
         self.W, self.b = initialize_param(self.weight_layers,
                                           self.bias_layers,
                                           self.uniform_init)
     else:
         self.W, self.b = self.set_weights(self.weights_matrix)
     # Iterator for weights and bias
     self.W_iter = iter(self.W)
     self.b_iter = iter(self.b)
     # Initialize neural network layers
     self.X = tf.placeholder("float", [None, self.neural_shape[0]],
                             name="input")
     self.y = tf.placeholder("float", [None, self.neural_shape[-1]],
                             name="output")
     #self.config_addon = skflow.addons.ConfigAddon(num_cores=4, gpu_memory_fraction=0.6)
     self.network = skflow.TensorFlowEstimator(
         model_fn=self.model_fn,
         n_classes=0,
         steps=self.steps,
         learning_rate=self.learning_rate,
         batch_size=self.batch_size,
         optimizer=self.optimize,
         verbose=self.verbose,
         continue_training=True)
     if (self.cross_validation):
         for train, test in self.kFold:
             self.network.fit(X[train], y[train])
     else:
         self.network.fit(X, y)
     return self
Exemplo n.º 2
0
def train(DIR = "", saveDIR = "/Users/xxximgs/DRNmodel" ):
    print('trainingLABELs... paste it to predictFunc!!\n', [clsdir for clsdir in os.listdir(DIR) if not clsdir in set(['.DS_Store'])])
    images, labels = convData(DIR)
    data_train, data_test, label_train, label_test = cross_validation.train_test_split(images, labels)
    # Train a resnet classifier
    classifier = skflow.TensorFlowEstimator(
    model_fn=res_net, n_classes=10, batch_size=100, steps=20000,
    learning_rate=0.001)
    while True:
        classifier.fit(data_train, label_train, logdir='/Users/xxxx')
        score = metrics.accuracy_score(label_test, classifier.predict(data_test))
        print('Accuracy: {0:f}'.format(score))
        classifier.save(saveDIR)
Exemplo n.º 3
0
def dnn(nn_lr=0.1, nn_steps=5000, hidden_units=[30, 30]):
    def tanh_dnn(X, y):
        features = skflow.ops.dnn(X,
                                  hidden_units=hidden_units,
                                  activation=skflow.tf.tanh)
        return skflow.models.linear_regression(features, y)

    regressor = skflow.TensorFlowEstimator(model_fn=tanh_dnn,
                                           n_classes=0,
                                           steps=nn_steps,
                                           learning_rate=nn_lr,
                                           batch_size=100)
    return regressor
def custom_dnn_model(X_train, y_train):
    def _model_fn(X, y):
        layers = skflow.ops.dnn(X, [50, 50, 50], keep_prob=0.5)
        return skflow.models.logistic_regression(layers, y)

    classifier = skflow.TensorFlowEstimator(
        model_fn=_model_fn,
        #keep_prob=0.5,
        n_classes=2,
        batch_size=50,
        learning_rate=1.5,
        steps=2000)
    return classifier.fit(X_train, y_train)
Exemplo n.º 5
0
 def testCustomModel(self):
     path = '/tmp/tmp.saver2'
     random.seed(42)
     iris = datasets.load_iris()
     def custom_model(X, y):
         return skflow.models.logistic_regression(X, y)
     classifier = skflow.TensorFlowEstimator(model_fn=custom_model,
         n_classes=3)
     classifier.fit(iris.data, iris.target)
     classifier.save(path)
     new_classifier = skflow.TensorFlowEstimator.restore(path)
     self.assertEqual(type(new_classifier), type(classifier))
     score = accuracy_score(iris.target, new_classifier.predict(iris.data))
     self.assertGreater(score, 0.5, "Failed with score = {0}".format(score))
Exemplo n.º 6
0
def train(DIR = "/XXXXXX", logdir = /XXXXXX'):
	print('trainingLABELs... paste it to predictFunc!!\n', [clsdir for clsdir in os.listdir(DIR) if not clsdir in set(['.DS_Store'])])
	images, labels = conv_data(DIR)
	data_train, data_test, label_train, label_test = cross_validation.train_test_split(images, labels, test_size=0.2, random_state=42)
	classifier = skflow.TensorFlowEstimator(
	    model_fn = cnn_model, n_classes=NUM_CLASSES, batch_size=10, steps=1000,
	    learning_rate=1e-4, optimizer='Adam', continue_training=True)
	# classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
 #                                            n_classes=NUM_CLASSES, steps=1000,
 #                                            early_stopping_rounds=200)
	while True:
		classifier.fit(data_train, label_train, logdir=logdir)
		score = metrics.accuracy_score(label_test, classifier.predict(data_test))
		print('Accuracy: {0:f}'.format(score))
		classifier.save(save_dir)
Exemplo n.º 7
0
    def fit(self, X, y, **param):
        self.neural_shape = param.get("neural_shape")
        self.weights_matrix = param.get('weights_matrix')

        self.n_input = self.neural_shape[0]
        self.n_output = self.neural_shape[-1]
        self.n_hidden = self.neural_shape[1]
        self.number_of_layers = len(self.neural_shape)
        self.weight_layers = [(self.neural_shape[t - 1], self.neural_shape[t])
                              for t in range(1, len(self.neural_shape))]
        self.bias_layers = [
            self.neural_shape[t] for t in range(1, len(self.neural_shape))
        ]
        self.total_nodes_per_layer = zip(self.weight_layers, self.bias_layers)
        self.total_nodes = 0
        for layer in self.total_nodes_per_layer:
            self.total_nodes += (layer[0][0] + 1) * layer[0][1]
        # If weights are None then initialize randomly
        if (self.weights_matrix == None):
            self.W, self.b = initialize_param(self.weight_layers,
                                              self.bias_layers,
                                              self.uniform_init)
        else:
            self.W, self.b = self.set_weights(self.weights_matrix)
        #Iterator for weights and bias
        self.W_iter = iter(self.W)
        self.b_iter = iter(self.b)
        #Initialize neural network layers
        self.X = tf.placeholder("float", [None, self.neural_shape[0]],
                                name="input")
        self.y = tf.placeholder("float", [None, self.neural_shape[-1]],
                                name="output")
        self.config_addon = skflow.addons.ConfigAddon(num_cores=4,
                                                      gpu_memory_fraction=0.6)
        self.network = skflow.TensorFlowEstimator(
            model_fn=self.model_fn,
            n_classes=0,
            steps=self.steps,
            learning_rate=self.learning_rate,
            batch_size=self.batch_size,
            optimizer=self.optimize,
            config_addon=self.config_addon,
            verbose=0)
        return self.network.fit(X, y)
Exemplo n.º 8
0
tflr.fit(X_train, y_train)
print(accuracy_score(tflr.predict(X_test), y_test))

# 3 layer neural network with rectified linear activation.

random.seed(42)
classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10],
                                            n_classes=2,
                                            batch_size=128,
                                            steps=500,
                                            learning_rate=0.05)
classifier.fit(X_train, y_train)
print(accuracy_score(classifier.predict(X_test), y_test))

# 3 layer neural network with hyperbolic tangent activation.


def dnn_tanh(X, y):
    layers = skflow.ops.dnn(X, [10, 20, 10], tf.tanh)
    return skflow.models.logistic_regression(layers, y)


random.seed(42)
classifier = skflow.TensorFlowEstimator(model_fn=dnn_tanh,
                                        n_classes=2,
                                        batch_size=128,
                                        steps=500,
                                        learning_rate=0.05)
classifier.fit(X_train, y_train)
print(accuracy_score(classifier.predict(X_test), y_test))
Exemplo n.º 9
0
    vocab_processor = skflow.preprocessing.ByteProcessor(
        max_document_length=MAX_DOCUMENT_LENGTH)
    X_iter = ["some sentence", "some other sentence"]
    X_pred = ["some sentence", "some other sentence"]
    Y_iter = ["some sentence", "some other sentence"]

    x_iter = vocab_processor.transform(X_iter)
    y_iter = vocab_processor.transform(Y_iter)
    xpred = np.array(list(vocab_processor.transform(X_pred)))
    PATH = '/tmp/tf_examples/ntm/'

    if os.path.exists(PATH):
        translator = skflow.TensorFlowEstimator.restore(PATH)
    else:
        translator = skflow.TensorFlowEstimator(model_fn=translate_model,
                                                n_classes=256,
                                                continue_training=True)
    # print(zip(xpred, xpred_inp, predictions, text_outputs))
    translator.fit(x_iter, y_iter, logdir=PATH)
    # translator.save(PATH)

    predictions = translator.predict(xpred, axis=2)
    xpred_inp = vocab_processor.reverse(xpred)
    text_outputs = vocab_processor.reverse(predictions)
    try:
        for inp_data, input_text, pred, output_text in zip(
                xpred, xpred_inp, predictions, text_outputs):
            print(input_text, output_text)
            print(inp_data, pred)
    except Exception as e:
        print(e)
Exemplo n.º 10
0
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.

import random

import skflow
from sklearn import datasets, metrics, cross_validation

iris = datasets.load_iris()
X_train, X_test, y_train, y_test = cross_validation.train_test_split(iris.data, iris.target,
    test_size=0.2, random_state=42)

random.seed(42)

def my_model(X, y):
    """This is DNN with 10, 20, 10 hidden layers, and dropout of 0.5 probability."""
    layers = skflow.ops.dnn(X, [10, 20, 10], keep_prob=0.5)
    return skflow.models.logistic_regression(layers, y)

classifier = skflow.TensorFlowEstimator(model_fn=my_model, n_classes=3)
classifier.fit(X_train, y_train)
score = metrics.accuracy_score(classifier.predict(X_test), y_test)
print('Accuracy: {0:f}'.format(score))

Exemplo n.º 11
0
def average_model(X, y):
    word_vectors = skflow.ops.categorical_variable(
        X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words')
    features = tf.reduce_max(word_vectors, reduction_indices=1)
    return skflow.models.logistic_regression(features, y)


def rnn_model(X, y):
    word_vectors = skflow.ops.categorical_variable(
        X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words')
    word_list = [
        tf.squeeze(w, [1])
        for w in tf.split(1, MAX_DOCUMENT_LENGTH, word_vectors)
    ]
    cell = rnn_cell.GRUCell(EMBEDDING_SIZE)
    _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32)
    return skflow.models.logistic_regression(encoding[-1], y)


classifier = skflow.TensorFlowEstimator(model_fn=rnn_model,
                                        n_classes=15,
                                        steps=1000,
                                        continue_training=True)

# Continuesly train for 1000 steps & predict on test set.
while True:
    classifier.fit(X_train, y_train)
    score = metrics.accuracy_score(classifier.predict(X_test), y_test)
    print("Accuracy: %f" % score)
Exemplo n.º 12
0
# Create random dataset.
rng = np.random.RandomState(1)
X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)
y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T

# Fit regression DNN models.
regressors = []
options = [[2], [10, 10], [20, 20]]
for hidden_units in options:
    def tanh_dnn(X, y):
        features = skflow.ops.dnn(X, hidden_units=hidden_units,
          activation=skflow.tf.tanh)
        return skflow.models.linear_regression(features, y)

    regressor = skflow.TensorFlowEstimator(model_fn=tanh_dnn, n_classes=0,
        steps=500, learning_rate=0.1, batch_size=100)
    regressor.fit(X, y)
    score = mean_squared_error(regressor.predict(X), y)
    print("Mean Squared Error for {0}: {1:f}".format(str(hidden_units), score))
    regressors.append(regressor)

# Predict on new random Xs.
X_test = np.arange(-100.0, 100.0, 0.1)[:, np.newaxis]
y_1 = regressors[0].predict(X_test)
y_2 = regressors[1].predict(X_test)
y_3 = regressors[2].predict(X_test)

# Plot the results
plt.figure()
plt.scatter(y[:, 0], y[:, 1], c="k", label="data")
plt.scatter(y_1[:, 0], y_1[:, 1], c="g",
Exemplo n.º 13
0
    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
        embedding_size=EMBEDDING_SIZE, name='words')
    in_X, in_y, out_y = skflow.ops.seq2seq_inputs(
        word_list, y, MAX_DOCUMENT_LENGTH, MAX_DOCUMENT_LENGTH)
    cell = tf.nn.rnn_cell.OutputProjectionWrapper(tf.nn.rnn_cell.GRUCell(HIDDEN_SIZE), 256)
    decoding, _, sampling_decoding, _ = skflow.ops.rnn_seq2seq(in_X, in_y, cell)
    return skflow.ops.sequence_classifier(decoding, out_y, sampling_decoding)


PATH = '/tmp/tf_examples/ntm_words/'

if os.path.exists(PATH):
    translator = skflow.TensorFlowEstimator.restore(PATH)
else:
    translator = skflow.TensorFlowEstimator(model_fn=translate_model,
        n_classes=n_words,
        optimizer='Adam', learning_rate=0.01, batch_size=128,
        continue_training=True)

while True:
    translator.fit(X_train, y_train, logdir=PATH)
    translator.save(PATH)

    predictions = translator.predict(xpred, axis=2)
    xpred_inp = X_vocab_processor.reverse(xpred)
    text_outputs = y_vocab_processor.reverse(predictions)
    for inp_data, input_text, pred, output_text, gold in zip(xpred, xpred_inp,
        predictions, text_outputs, ygold):
        print('English: %s. French (pred): %s, French (gold): %s' %
            (input_text, output_text, gold.decode('utf-8')))
        print(inp_data, pred)
Exemplo n.º 14
0
 def testUnfitted(self):
     estimator = skflow.TensorFlowEstimator(model_fn=None, n_classes=1)
     with self.assertRaises(base.NotFittedError):
         estimator.predict([1, 2, 3])
     with self.assertRaises(base.NotFittedError):
         estimator.save('/tmp/path')
Exemplo n.º 15
0
        except IndexError:
            pass


    net = tf.nn.avg_pool(net,
                         ksize=[1, net.get_shape().as_list()[1],
                                net.get_shape().as_list()[2], 1],
                         strides=[1, 1, 1, 1], padding='VALID')
    net = tf.reshape(
        net,
        [-1, net.get_shape().as_list()[1] *
         net.get_shape().as_list()[2] *
         net.get_shape().as_list()[3]])

    return skflow.models.logistic_regression(net, y)


# Download and load MNIST data.
mnist = input_data.read_data_sets('MNIST_data')

# Train a resnet classifier
classifier = skflow.TensorFlowEstimator(
    model_fn=res_net, n_classes=10, batch_size=100, steps=20000,
    learning_rate=0.001)

classifier.fit(mnist.train.images, mnist.train.labels)

# Calculate accuracy
score = metrics.accuracy_score(mnist.test.labels, classifier.predict(mnist.test.images))
print('Accuracy: {0:f}'.format(score))
Exemplo n.º 16
0
random.seed(42)

# Load dataset and split it into train / test subsets.

digits = datasets.load_digits()
X = digits.images
y = digits.target

X_train, X_test, y_train, y_test = cross_validation.train_test_split(
    X, y, test_size=0.2, random_state=42)

# TensorFlow model using Scikit Flow ops


def conv_model(X, y):
    X = tf.expand_dims(X, 3)
    features = tf.reduce_max(skflow.ops.conv2d(X, 12, [3, 3]), [1, 2])
    features = tf.reshape(features, [-1, 12])
    return skflow.models.logistic_regression(features, y)


# Create a classifier, train and predict.
classifier = skflow.TensorFlowEstimator(model_fn=conv_model,
                                        n_classes=10,
                                        steps=500,
                                        learning_rate=0.05,
                                        batch_size=128)
classifier.fit(X_train, y_train)
score = metrics.accuracy_score(classifier.predict(X_test), y_test)
print('Accuracy: {0:f}'.format(score))
Exemplo n.º 17
0
    with tf.variable_scope('conv2'):
        conv2 = skflow.ops.conv2d(pool1,
                                  n_filters=64,
                                  filter_shape=[5, 5],
                                  bias=True,
                                  activation=tf.nn.relu)
        pool2 = max_pool_2x2(conv2)

    pool2_flat = tf.reshape(pool2, [-1, 7 * 7 * 64])
    fc1 = skflow.ops.dnn(pool2_flat, [1024],
                         activation=tf.nn.relu,
                         keep_prob=0.5)
    return skflow.models.logistic_regression(fc1, y)


classifier_cnn = skflow.TensorFlowEstimator(model_fn=conv_model,
                                            n_classes=10,
                                            batch_size=100,
                                            steps=20000,
                                            learning_rate=0.001)
classifier_cnn.fit(X_train, y_train)
cnn_y_predict = []
for i in range(100, 28001, 100):
    cnn_y_predict = np.append(cnn_y_predict,
                              classifier_cnn.predict(X_test[i - 100:i]))
cnn_submission = pd.DataFrame({
    'ImageId': range(1, 28001),
    'Label': np.int32(cnn_y_predict)
})
cnn_submission.to_csv('cnn_submission.csv')
Exemplo n.º 18
0
	return skflow.models.logistic_regression(h_fc1, y, class_weight=None)
def CNNmodel(X, y):
	keep_prob = tf.placeholder(tf.float32)
	return conv_model(X, y, keep_prob = keep_prob)
### Linear classifier.
# classifier = skflow.TensorFlowLinearClassifier(
#     n_classes=NUM_CLASSES, batch_size=100, steps=1000, learning_rate=0.01)
# classifier.fit(data_train, label_train)
# score = metrics.accuracy_score(label_test, classifier.predict(data_test))
# print('Accuracy: {0:f}'.format(score))
def train(DIR = "", saveDIR = "/Users/xxxx'):
	print('trainingLABELs... paste it to predictFunc!!\n', [clsdir for clsdir in os.listdir(DIR) if not clsdir in set(['.DS_Store'])])
	images, labels = convData(DIR)
	data_train, data_test, label_train, label_test = cross_validation.train_test_split(images, labels)
	classifier = skflow.TensorFlowEstimator(
	    model_fn = CNNmodel, n_classes=NUM_CLASSES, batch_size=10, steps=200,
	    learning_rate=1e-4, optimizer='Adam', continue_training=True)
	while True:
		classifier.fit(data_train, label_train, logdir=logdir)
		score = metrics.accuracy_score(label_test, classifier.predict(data_test))
		print('Accuracy: {0:f}'.format(score))
		classifier.save(saveDIR)
 # ['chino', 'eri', 'hanayo', 'honoka', 'kotori', 'maki', 'niko', 'nozomi', 'rin', 'umi']
def predictAns(filename  = "rin/show.png", isShow = True, model = '/Users/xxxx']):
	classifier = skflow.TensorFlowEstimator.restore(model)
	# imgaddress = "rin/images-10.jpeg"
	# imgaddress = '/Users/xxxx'
	img, altfilename, frame, FACEflag = openCVmod.FaceRecognition(filename, isShow = isShow, saveStyle = 'whole', workDIR = '')
	img = openCVmod.adjustIMG(img, isHC = True, K = 0, size = (28, 28))
	result = classifier.predict(img)
	anslabel = label[result]
Exemplo n.º 19
0
        yield u"какое-то приложение"
        yield u"какое-то другое приложение"


# Translation model

hidden_size = 10


def translate_model(X, y):
    print X.get_shape(), y.get_shape()

    #    in_X, in_y, out_y = skflow.ops.seq2seq_inputs(X, y)
    #    cell = tf.rnn_cell.GRUCell(hidden_size)
    #    decoding = seq2seq.basic_rnn_seq2seq(in_X, in_y, cell)
    #    return skflow.ops.sequence_classifier(decoding, out_y)
    return X, y


vocab_processor = skflow.preprocessing.VocabularyProcessor(
    max_document_length=3)
vocab_processor.fit(["some sentence", "some other sentence"])

xiter = vocab_processor.transform(X_iter())
yiter = vocab_processor.transform(y_iter())

translator = skflow.TensorFlowEstimator(model_fn=translate_model,
                                        n_classes=128)
translator.fit(xiter, yiter)
print translator.predict(X_iter())
Exemplo n.º 20
0
    net = tf.reshape(net, [-1, net_shape[1] * net_shape[2] * net_shape[3]])

    return skflow.models.logistic_regression(net, y)


# Download and load MNIST data.
mnist = input_data.read_data_sets('MNIST_data')

# Restore model if graph is saved into a folder.
if os.path.exists("models/resnet/graph.pbtxt"):
    classifier = skflow.TensorFlowEstimator.restore("models/resnet/")
else:
    # Create a new resnet classifier.
    classifier = skflow.TensorFlowEstimator(model_fn=res_net,
                                            n_classes=10,
                                            batch_size=100,
                                            steps=100,
                                            learning_rate=0.001,
                                            continue_training=True)

while True:
    # Train model and save summaries into logdir.
    classifier.fit(mnist.train.images,
                   mnist.train.labels,
                   logdir="models/resnet/")

    # Calculate accuracy.
    score = metrics.accuracy_score(
        mnist.test.labels, classifier.predict(mnist.test.images,
                                              batch_size=64))
    print('Accuracy: {0:f}'.format(score))
        h_pool3 = max_pool_2x2(h_conv3)
        # reshape tensor into a batch of vectors
        h_pool3_flat = tf.reshape(
            h_pool3, [-1, IMAGE_SIZE // 8 * IMAGE_SIZE // 8 * 128])
    # densely connected layer with 1024 neurons
    h_fc1 = skflow.ops.dnn(h_pool3_flat, [500, 500],
                           activation=tf.nn.relu,
                           keep_prob=0.5)
    return skflow.models.linear_regression(h_fc1, y)


estimator = skflow.TensorFlowEstimator(
    model_fn=cnn_model,
    n_classes=0,
    batch_size=BATCH_SIZE,
    early_stopping_rounds=EARLY_STOP_PATIENCE,
    steps=10,
    optimizer='Adam',
    learning_rate=exp_decay,
    continue_training=True)

title = 'learning curve for cnn'
# to plot the learning curve, continue_training must be set False
# generate_learning_curve(estimator, title, 'mean_squared_error', train_dataset, train_labels)

# Continuesly train for 100 steps & predict on test set.
for i in xrange(0, 100):
    estimator.fit(train_dataset, train_labels, logdir='log')
    score = metrics.mean_squared_error(validation_labels,
                                       estimator.predict(validation_dataset))
    print('mean squared error: {0:f}'.format(score))
Exemplo n.º 22
0
def train(DataSet,
          saveDIR="/XXXXXX",
          logdir='/tmp/tf_examples/word_rnn',
          splittype='char'):
    def average_model(X, y):
        word_vectors = skflow.ops.categorical_variable(
            X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words')
        features = tf.reduce_max(word_vectors, reduction_indices=1)
        return skflow.models.logistic_regression(features, y)

    def rnn_model(X, y):
        """Recurrent neural network model to predict from sequence of words
        to a class."""
        # Convert indexes of words into embeddings.
        # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
        # maps word indexes of the sequence into [batch_size, sequence_length,
        # EMBEDDING_SIZE].
        word_vectors = skflow.ops.categorical_variable(
            X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words')
        # Split into list of embedding per word, while removing doc length dim.
        # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
        word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH,
                                             word_vectors)
        # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE.
        cell = rnn_cell.GRUCell(EMBEDDING_SIZE)
        # Create an unrolled Recurrent Neural Networks to length of
        # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit.
        _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32)
        # Given encoding of RNN, take encoding of last step (e.g hidden size of the
        # neural network of last step) and pass it as features for logistic
        # regression over output classes.
        return skflow.models.logistic_regression(encoding[-1], y)

    MAX_DOCUMENT_LENGTH = 140
    if splittype == 'ma':
        DataSet = [(' '.join([
            w[0] for w in natural_language_processing.MA.get_mecabCP(pair[0])
        ]), pair[1]) for pair in DataSet]
    else:
        DataSet = [(' '.join(list(pair[0])), pair[1]) for pair in DataSet]
    X_train, X_test, y_train, y_test = cross_validation.train_test_split(
        [train[0] for train in DataSet], [train[1] for train in DataSet])
    vocab_processor = skflow.preprocessing.VocabularyProcessor(
        MAX_DOCUMENT_LENGTH)
    X_train = np.array(list(vocab_processor.fit_transform(X_train)))
    X_test = np.array(list(vocab_processor.transform(X_test)))
    n_words = len(vocab_processor.vocabulary_)
    print('Total words: %d' % n_words)
    ### Models
    EMBEDDING_SIZE = 50

    classifier = skflow.TensorFlowEstimator(model_fn=rnn_model,
                                            n_classes=3,
                                            steps=10,
                                            optimizer='Adam',
                                            learning_rate=0.01,
                                            continue_training=True)
    while True:
        classifier.fit(X_train, y_train, logdir=logdir)
        classifier.save(saveDIR)
        score = metrics.accuracy_score(y_test, classifier.predict(X_test))
        print('Accuracy: {0:f}'.format(score))
Exemplo n.º 23
0
    # maps word indexes of the sequence into [batch_size, sequence_length,
    # EMBEDDING_SIZE].
    word_vectors = skflow.ops.categorical_variable(
        X, n_classes=n_words, embedding_size=EMBEDDING_SIZE, name='words')
    # Split into list of embedding per word, while removing doc length dim.
    # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
    word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors)
    # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE.
    cell = rnn_cell.GRUCell(EMBEDDING_SIZE)
    # Create an unrolled Recurrent Neural Networks to length of
    # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit.
    _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32)
    # Given encoding of RNN, take encoding of last step (e.g hidden size of the
    # neural network of last step) and pass it as features for logistic
    # regression over output classes.
    return skflow.models.logistic_regression(encoding[-1], y)


classifier = skflow.TensorFlowEstimator(model_fn=rnn_model,
                                        n_classes=15,
                                        steps=1000,
                                        optimizer='Adam',
                                        learning_rate=0.01,
                                        continue_training=True)

# Continuesly train for 1000 steps & predict on test set.
while True:
    classifier.fit(X_train, y_train, logdir='/tmp/tf_examples/word_rnn')
    score = metrics.accuracy_score(classifier.predict(X_test), y_test)
    print('Accuracy: {0:f}'.format(score))
Exemplo n.º 24
0
        in_X, in_y, encoder_cell, decoder_cell)
    return skflow.ops.sequence_classifier(decoding, out_y, sampling_decoding)


def get_language_model(hidden_size):
    """Returns a language model with given hidden size."""
    def language_model(X, y):
        inputs = skflow.ops.one_hot_matrix(X, 256)
        inputs = skflow.ops.split_squeeze(1, MAX_DOC_LENGTH, inputs)
        target = skflow.ops.split_squeeze(1, MAX_DOC_LENGTH, y)
        encoder_cell = tf.nn.rnn_cell.OutputProjectionWrapper(
            tf.nn.rnn_cell.GRUCell(hidden_size), 256)
        output, _ = tf.nn.rnn(encoder_cell, inputs, dtype=tf.float32)
        return skflow.ops.sequence_classifier(output, target)

    return language_model


### Training model.

estimator = skflow.TensorFlowEstimator(
    model_fn=get_language_model(HIDDEN_SIZE),
    n_classes=256,
    optimizer='Adam',
    learning_rate=0.01,
    steps=1000,
    batch_size=64,
    continue_training=True)

estimator.fit(X, y)