def main(unused_argv): iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, test_size=0.2, random_state=42) val_monitor = learn.monitors.ValidationMonitor( x_val, y_val, early_stopping_rounds=200) # classifier with early stopping on training data classifier1 = learn.TensorFlowDNNClassifier( hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model/') classifier1.fit(x=x_train, y=y_train, steps=2000) score1 = metrics.accuracy_score(y_test, classifier1.predict(x_test)) # classifier with early stopping on validation data, save frequently for # monitor to pick up new checkpoints. classifier2 = learn.TensorFlowDNNClassifier( hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model_val/', config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1)) classifier2.fit(x=x_train, y=y_train, steps=2000, monitors=[val_monitor]) score2 = metrics.accuracy_score(y_test, classifier2.predict(x_test)) # In many applications, the score is improved by using early stopping print(score2 > score1)
def train(self, train, *args, **kwargs): import numpy as np from tensorflow.contrib import learn as skflow if 'hidden_units' not in kwargs: kwargs['hidden_units'] = [10, 20, 10] self.logger.info('Hidden Units = {}'.format(kwargs['hidden_units'])) if 'n_classes' not in kwargs: kwargs['n_classes'] = 5 self.logger.info('n_classes = {}'.format(kwargs['n_classes'])) if 'steps' not in kwargs: kwargs['steps'] = 5000 self.logger.info('Number of steps = {}'.format(kwargs['steps'])) train_labels = train.target() self.logger.info('Loading dictionary from {}'.format(self.glove_data)) self.logger.info('Creating vectors for each question') x_train = np.asarray([self.create_vector(question) for question \ in train]) y_train = self.encoder.transform(train_labels) self.logger.info('Encoded classes = {}'.format(self.encoder.classes_)) classifier = skflow.TensorFlowDNNClassifier(**kwargs) self.logger.info('Fitting model') classifier.fit(x_train, y_train) return classifier
def run_training(): #training_set = datasets.load_csv(filename=file_train, #target_dtype=np.int) #test_set = datasets.load_csv(filename=file_test, #target_dtype=np.int) filename_queue = tf.train.string_input_producer([file_train], num_epochs=1, shuffle=False) reader = tf.TextLineReader(skip_header_lines=0) _, csv_row = reader.read(filename_queue) filename_queue_test = tf.train.string_input_producer([file_test], num_epochs=1, shuffle=False) reader = tf.TextLineReader(skip_header_lines=0) _, csv_row_test = reader.read(filename_queue_test) record_defaults = [[1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1.], [1]] col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13, col14 = tf.decode_csv( csv_row, record_defaults=record_defaults) features = tf.pack([ col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11, col12, col13 ]) col1t, col2t, col3t, col4t, col5t, col6t, col7t, col8t, col9t, col10t, col11t, col12t, col13t, col14t = tf.decode_csv( csv_row_test, record_defaults=record_defaults) features_test = tf.pack([ col1t, col2t, col3t, col4t, col5t, col6t, col7t, col8t, col9t, col10t, col11t, col12t, col13t ]) min_after_dequeue = 100000 capacity = min_after_dequeue + FEATURES * batch_size images_batch, label_batch = tf.train.batch([features, col14 - 1], batch_size=batch_size, capacity=capacity, num_threads=1) classifier = skflow.TensorFlowDNNClassifier( hidden_units=[10, 20, 10], n_classes=4, optimizer=tf.train.GradientDescentOptimizer(learning_rate=0.1)) coln = tf.to_int64(label_batch) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) print('hola') for step in xrange(1): example, label = sess.run([images_batch, label_batch]) classifier.fit(example, label) print('hola') coord.request_stop() coord.join(threads)
def __init__(self, X: np.array, Y: np.array, tune_parameters=False): super().__init__(X, Y, tune_parameters=False) self.X = X.todense( ) # TensorFlow/Skflow doesn't support sparse matrices output_layer = len(np.unique(Y)) if tune_parameters: self.param_dist_random = { 'learning_rate': random.random(100), 'optimizer': ['Adam'], 'hidden_units': [sp_randint(50, 500), sp_randint(50, 500)] } self.clf = skflow.TensorFlowDNNClassifier( hidden_units=self.hidden_units, n_classes=output_layer, steps=self.steps, learning_rate=self.learning_rate, verbose=0, optimizer=self.optimizer)
def question2(): print() print("***Question 2***") path = "./data/" # Read dataset filename_read = os.path.join(path, "submit-hanmingli-prog2q1.csv") df = pd.read_csv(filename_read, na_values=['NA', '?']) weight = encode_text_index(df, "weight") # Create x(predictors) and y (expected outcome) x, y = to_xy(df, 'weight') num_classes = len(weight) # Split into train/test x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=45) # Create a deep neural network with 3 hidden layers of 10, 20, 10 regressor = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=num_classes, steps=10000) # Early stopping early_stop = skflow.monitors.ValidationMonitor(x_test, y_test, early_stopping_rounds=10000, print_steps=100, n_classes=num_classes) # Fit/train neural network regressor.fit(x_train, y_train, monitor=early_stop) # Measure accuracy pred = regressor.predict(x_test) score = np.sqrt(metrics.mean_squared_error(pred, y_test)) print("Final score (RMSE): {}".format(score))
def main(): # read in the input data input = pd.read_csv('letter-recognition.csv', header=None) data = np.array(input.values[:, 1:]) # get the input labels labels1 = np.array(input.values[:, 0]) labels = [] # transform letters to numbers ('A'->0, 'B'->1, and so on) for label in labels1: labels.append(ord(label) - ord('A')) labels = np.array(labels) n_classes = 26 # define the classifier with 3 layers (100 units on each layer) and 2000 steps classifier = skflow.TensorFlowDNNClassifier(hidden_units=[100, 100, 100], n_classes=n_classes, learning_rate=0.05, steps=20000) scores = [] # define the 10-fold cross validation skf = StratifiedKFold(labels, n_folds=10) for train_index, test_index in skf: # get the data and labels for both training set and test set train_data = data[train_index] train_labels = labels[train_index] test_data = data[test_index] test_labels = labels[test_index] # fit data, compute the score classifier.fit(train_data, train_labels) score = metrics.accuracy_score(test_labels, classifier.predict(test_data)) scores.append(score) # print out average score print(scores) print("Accuracy: %.5f%%" % (sum(scores) * 10))
def train(): print("Training dnn_titanic") # load and clean the dataset df = pandas.read_csv(data_path) X, y = df[['Sex', 'Age', 'SibSp', 'Fare']], df['Survived'] # chain: fillna for str with 'NA', num with 0 X = preprocess.MultiFillna(X) # Label Encoder; will always encode str columns into integers mle = preprocess.MultiLabelEncoder(columns=[]) X = mle.fit_transform(X) # random-split into train (80%), test data (20%) X_train, X_test, y_train, y_test = cross_validation.train_test_split( X, y, test_size=0.2, random_state=42) # Build 3 layer DNN with 10, 20, 10 units respecitvely. Allows to be trained continuously classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=2, steps=500, learning_rate=0.01, continue_training=True) # Fit and save model for deployment. classifier.fit(X_train, y_train) score = metrics.accuracy_score(y_test, classifier.predict(X_test)) print('Accuracy: {0:f}'.format(score)) # should be arond 0.74 # Clean checkpoint folder if exists try: shutil.rmtree(model_path) except OSError: pass # save the model and label encoder for use classifier.save(model_path) mle.save(model_path) print('Model saved to', model_path)
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function from sklearn import metrics, cross_validation from tensorflow.contrib import learn # Load dataset. iris = learn.datasets.load_dataset('iris') X_train, X_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) # Build 3 layer DNN with 10, 20, 10 units respectively. classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=200) # Fit and predict. classifier.fit(X_train, y_train) score = metrics.accuracy_score(y_test, classifier.predict(X_test)) print('Accuracy: {0:f}'.format(score))
# http://terrytangyuan.github.io/2016/03/14/scikit-flow-intro/ import tensorflow.contrib.learn as skflow from sklearn import datasets, metrics iris = datasets.load_iris() classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3) classifier.fit(iris.data, iris.target) score = metrics.accuracy_score(iris.target, classifier.predict(iris.data)) print("Accuracy: %f" % score)
sess.run(v2.initializer) #위 두 라인과 동일한 효과를 냅니다. sess.run(tf.global_variables_initializer()) #변수를 실행한다는 것은 변수안의 텐서 연산을 실행하는 것입니다. print(sess.run([v1, v2])) print(sess.run([v1._variable, v2._variable])) import tensorflow as tf from tensorflow.contrib import learn as skflow classifier = skflow.TensorFlowDNNClassifier ( hidden_unitts=[10, 20, 10], n_classes= 2, batch_size= 128, steps= 500, learning_rate= 0.05) import tensorflow as tf x = tf.constant([[1.0, 2.0, 3.0]]) w = tf.constant([[2.0], [2.0], [2.,]]) y = tf.matmul(x, w) print(x.get_shape()) sess = tf.Session() init = tf.global_variables_initializer() sess.run(init)
iris = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42) val_monitor = learn.monitors.ValidationMonitor(X_val, y_val, early_stopping_rounds=200) # classifier with early stopping on training data classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model/') classifier1.fit(X_train, y_train, steps=2000) score1 = metrics.accuracy_score(y_test, classifier1.predict(X_test)) # classifier with early stopping on validation data classifier2 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, model_dir='/tmp/iris_model_val/') classifier2.fit(X_train, y_train, val_monitor, steps=2000) score2 = metrics.accuracy_score(y_test, classifier2.predict(X_test)) # In many applications, the score is improved by using early stopping print(score2 > score1)
def question5(): print() print("***Question 5***") filename_read = os.path.join(path, "auto-mpg.csv") filename_write = os.path.join(path, "submit-hanmingli-prog2q5.csv") df = pd.read_csv(filename_read, na_values=['NA', '?']) # create feature vector missing_median(df, 'horsepower') encode_numeric_zscore(df, 'mpg') encode_numeric_zscore(df, 'horsepower') encode_numeric_zscore(df, 'weight') encode_numeric_zscore(df, 'displacement') encode_numeric_zscore(df, 'acceleration') encode_numeric_zscore(df, 'origin') tem = df['name'] df.drop('name', 1, inplace=True) # Shuffle np.random.seed(42) df = df.reindex(np.random.permutation(df.index)) df.reset_index(inplace=True, drop=True) # Encode to a 2D matrix for training x, y = to_xy(df, 'cylinders') # Cross validate kf = KFold(len(x), n_folds=5) oos_y = [] oos_pred = [] fold = 1 for train, test in kf: print("Fold #{}".format(fold)) fold += 1 x_train = x[train] y_train = y[train] x_test = x[test] y_test = y[test] # Create a deep neural network with 3 hidden layers of 10, 20, 10 classifier = skflow.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=9, steps=500) # Early stopping early_stop = skflow.monitors.ValidationMonitor( x_test, y_test, early_stopping_rounds=200, print_steps=50, n_classes=9) # Fit/train neural network classifier.fit(x_train, y_train, monitor=early_stop) # Add the predictions to the oos prediction list pred = classifier.predict(x_test) oos_y.append(y_test) oos_pred.append(pred) # Measure accuracy score = np.sqrt(metrics.mean_squared_error(pred, y_test)) print("Fold score: {}".format(score)) # Build the oos prediction list and calculate the error. oos_y = np.concatenate(oos_y) oos_pred = np.concatenate(oos_pred) score = np.sqrt(metrics.mean_squared_error(oos_pred, oos_y)) print("Final, out of sample score: {}".format(score)) # Write the cross-validated prediction oos_y = pd.DataFrame(oos_y) oos_pred = pd.DataFrame(oos_pred) oos_y.columns = ['ideal'] oos_pred.columns = ['predict'] oosDF = pd.concat([df, tem, oos_y, oos_pred], axis=1) oosDF.to_csv(filename_write, index=False)
fold_recall = [] fold_MSE = [] fold_RMSE = [] fold_MAE = [] fold_confusion_matrix = np.array([[0, 0], [0, 0]]) fold_number = 1 for train_index, test_index in skf: print("Fold Number:", fold_number) fold_number += 1 x_train1, x_test1 = df_data_NM[train_index], df_data_NM[test_index] y_train1, y_test1 = df_label_NM_1D[train_index], df_label_NM_1D[test_index] print(len(y_test1)) # Build 3 layer DNN with 30 units. classifier = skflow.TensorFlowDNNClassifier(hidden_units=[30], n_classes=2, steps=50000) # Fit classifier.fit(x_train1, y_train1) score_accracy = metrics.accuracy_score(y_test1, classifier.predict(x_test1)) fold_accuracy.append(score_accracy) score_precision = metrics.precision_score(y_test1, classifier.predict(x_test1)) fold_precision.append(score_precision) score_recall = metrics.recall_score(y_test1, classifier.predict(x_test1)) fold_recall.append(score_recall)
def get_classifier(self, X, y): return skflow.TensorFlowDNNClassifier(hidden_units=[5, 3], n_classes=2)
iris = learn.datasets.load_dataset('iris') x_train, x_test, y_train, y_test = cross_validation.train_test_split( iris.data, iris.target, test_size=0.2, random_state=42) # Note that we are saving and load iris data as h5 format as a simple # demonstration here. h5f = h5py.File('test_hdf5.h5', 'w') h5f.create_dataset('X_train', data=x_train) h5f.create_dataset('X_test', data=x_test) h5f.create_dataset('y_train', data=y_train) h5f.create_dataset('y_test', data=y_test) h5f.close() h5f = h5py.File('test_hdf5.h5', 'r') x_train = h5f['X_train'] x_test = h5f['X_test'] y_train = h5f['y_train'] y_test = h5f['y_test'] # Build 3 layer DNN with 10, 20, 10 units respectively. feature_columns = learn.infer_real_valued_columns_from_input(x_train) classifier = learn.TensorFlowDNNClassifier(feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3, steps=200) # Fit and predict. classifier.fit(x_train, y_train) score = metrics.accuracy_score(y_test, classifier.predict(x_test)) print('Accuracy: {0:f}'.format(score))
labels, test_size=0.2, random_state=42) # Remove this exit to move on # exit() ''' Part 4 : Simple DNN ------------------------------------------------------------------------------------------------------------------ ''' # Now we'll create a simple deep neural network tensorflow graph # For regression you can use learn.TensorFlowDNNRegressor classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=y_classes, batch_size=1, steps=100, optimizer="Adam", learning_rate=0.01, dropout=0.6) #,feature_columns=list(range(88))) # learn.TensorFlowDNNClassifier # Here we'll train our DNN classifier.fit(X_train, y_train, logdir='/tmp/tf_learn/') # and evaluate it on our dev data predictions = classifier.predict(X_dev) score = metrics.accuracy_score(y_dev, predictions) print("Accuracy: %f" % score)