def load_dbpedia(size='small', test_with_fake_data=False): """Get DBpedia datasets from CSV files.""" if not test_with_fake_data: data_dir = os.path.join(os.getenv('TF_EXP_BASE_DIR', ''), 'dbpedia_data') maybe_download_dbpedia(data_dir) train_path = os.path.join(data_dir, 'dbpedia_csv', 'train.csv') test_path = os.path.join(data_dir, 'dbpedia_csv', 'test.csv') if size == 'small': # Reduce the size of original data by a factor of 1000. base.shrink_csv(train_path, 1000) base.shrink_csv(test_path, 1000) train_path = train_path.replace('train.csv', 'train_small.csv') test_path = test_path.replace('test.csv', 'test_small.csv') else: module_path = os.path.dirname(__file__) train_path = os.path.join(module_path, 'data', 'text_train.csv') test_path = os.path.join(module_path, 'data', 'text_test.csv') train = base.load_csv_without_header( train_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) test = base.load_csv_without_header( test_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) return base.Datasets(train=train, validation=None, test=test)
def load_dbpedia(size='small', test_with_fake_data=False): """Get DBpedia datasets from CSV files.""" if not test_with_fake_data: data_dir = os.path.join(os.getenv('TF_EXP_BASE_DIR', ''), 'dbpedia_data') maybe_download_dbpedia(data_dir) train_path = os.path.join(data_dir, 'dbpedia_csv', 'train.csv') test_path = os.path.join(data_dir, 'dbpedia_csv', 'code.csv') if size == 'small': # Reduce the size of original data by a factor of 1000. base.shrink_csv(train_path, 1000) base.shrink_csv(test_path, 1000) train_path = train_path.replace('train.csv', 'train_small.csv') test_path = test_path.replace('code.csv', 'test_small.csv') else: module_path = os.path.dirname(__file__) train_path = os.path.join(module_path, 'data', 'text_train.csv') test_path = os.path.join(module_path, 'data', 'text_test.csv') train = base.load_csv_without_header(train_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) test = base.load_csv_without_header(test_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) return base.Datasets(train=train, validation=None, test=test)
def load_data(): module_path = os.path.dirname(__file__) #train_path = os.path.join(module_path, 'nrf_data', 'nrf-traindata.csv') #test_path = os.path.join(module_path, 'nrf_data', 'nrf-testdata.csv') #train_path = os.path.join(module_path, 'nrf_data', 'train_10000.csv') #test_path = os.path.join(module_path, 'nrf_data', 'eval_10000.csv') #train_path = os.path.join(module_path, 'nrf_data', 'train_10000_only_one_objective.csv') #test_path = os.path.join(module_path, 'nrf_data', 'eval_10000_only_one_objective.csv') train_path = os.path.join(module_path, 'nrf_data', 'train_10000_processed.csv') test_path = os.path.join(module_path, 'nrf_data', 'eval_10000_processed.csv') train = base.load_csv_without_header(train_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) test = base.load_csv_without_header(test_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) return base.Datasets(train=train, validation=None, test=test)
def load_origin_data(): module_path = os.path.dirname(__file__) train_path = os.path.join(module_path, 'nrf_data', 'nrf-traindata.csv') test_path = os.path.join(module_path, 'nrf_data', 'nrf-testdata.csv') train = base.load_csv_without_header(train_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) test = base.load_csv_without_header(test_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) return base.Datasets(train=train, validation=None, test=test)
def train(data_file, model_path, num_class): ds = load_csv_without_header(data_file, np.int32, np.float32, 19) for dropi in range(7): group = groups[dropi] num_feature = len(group) dsplit = DataSplit(fetch(ds.data, group), ds.target, 0.75) dtrain = dsplit.getTrain() dtest = dsplit.getTest() x, label, train_step, accuracy, prediction = build_graph( num_feature, num_class) signature = tf.saved_model.signature_def_utils.build_signature_def( inputs={'input': tf.saved_model.utils.build_tensor_info(x)}, outputs={ 'output': tf.saved_model.utils.build_tensor_info(prediction) }, method_name=tf.saved_model.PREDICT_METHOD_NAME) shutil.rmtree(model_path, ignore_errors=True) best_test = 0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Build Signature to save to model # Start training loop for i in range(5000): batch = dtrain.next_batch(50) if batch is None: break if i % 100 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], label: batch[1] }) # print('step %d, training accuracy %g' % (i, train_accuracy)) test_accuracy = accuracy.eval(feed_dict={ x: dtest.data, label: dtest.label }) # print('step %d, test accuracy %g' % (i, test_accuracy)) if best_test < test_accuracy: best_test = test_accuracy # print("Current best, saving model") shutil.rmtree(model_path, ignore_errors=True) builder = tf.saved_model.builder.SavedModelBuilder( model_path) builder.add_meta_graph_and_variables( sess, [tf.saved_model.SERVING], signature_def_map={ tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature }) builder.save() ## Print the value just saved train_step.run(feed_dict={x: batch[0], label: batch[1]}) print("Final Result: %d, %g" % (dropi, best_test))
def load_full_data(): module_path = os.path.dirname(__file__) #train_path = os.path.join(module_path, 'nrf_data', 'nrf-traindata.csv') #test_path = os.path.join(module_path, 'nrf_data', 'nrf-testdata.csv') train_path = os.path.join(module_path, 'nrf_data', 'train_10000.csv') test_path = os.path.join(module_path, 'nrf_data', 'eval_10000.csv') train = base.load_csv_without_header(train_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) test = base.load_csv_without_header(test_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) data = np.concatenate((train.data, test.data), axis=0) target = np.concatenate((train.target, test.target), axis=0) return Dataset(data=np.array(data), target=np.array(target).astype(np.int32))
def loadDbpedia(size='small'): """Get DBpedia datasets from CSV files.""" data_dir = '../data/dbpedia_data' train_path = os.path.join(data_dir, 'dbpedia_csv', 'train.csv') test_path = os.path.join(data_dir, 'dbpedia_csv', 'test.csv') if size == 'small': # Reduce the size of original data by a factor of 1000. train_path = train_path.replace('train.csv', 'train_small.csv') test_path = test_path.replace('test.csv', 'test_small.csv') train = base.load_csv_without_header(train_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) test = base.load_csv_without_header(test_path, target_dtype=np.int32, features_dtype=np.str, target_column=0) return base.Datasets(train=train, validation=None, test=test)
def main(args): # Load datasets abalone_train, abalone_test, abalone_predict = maybe_download( FLAGS.train_data, FLAGS.test_data, FLAGS.predict_data) # Training examples training_set = load_csv_without_header(filename=abalone_train, target_dtype=np.int, features_dtype=np.float64) # Test examples test_set = tf.contrib.learn.datasets.base.load_csv_without_header( filename=abalone_test, target_dtype=np.int, features_dtype=np.float64) # Set of 7 examples for which to predict abalone ages prediction_set = tf.contrib.learn.datasets.base.load_csv_without_header( filename=abalone_predict, target_dtype=np.int, features_dtype=np.float64) # Set model params model_params = {"learning_rate": LEARNING_RATE} # Instantiate Estimator nn = tf.contrib.learn.Estimator(model_fn=model_fn, params=model_params) def get_train_inputs(): x = tf.constant(training_set.data) y = tf.constant(training_set.target) return x, y # Fit nn.fit(input_fn=get_train_inputs, steps=5000) # Score accuracy def get_test_inputs(): x = tf.constant(test_set.data) y = tf.constant(test_set.target) return x, y ev = nn.evaluate(input_fn=get_test_inputs, steps=1) print("Loss: %s" % ev["loss"]) print("Root Mean Squared Error: %s" % ev["rmse"]) # Print out predictions predictions = nn.predict(x=prediction_set.data, as_iterable=True) for i, p in enumerate(predictions): print("Prediction %s: %s" % (i + 1, p["ages"]))
def train(data_file, model_path, num_class): ds = load_csv_without_header(data_file, np.int32, np.float32, 19) dsplit = DataSplit(ds.data, ds.target, 0.75) dtrain = dsplit.getTrain() dtest = dsplit.getTest() x, label, train_step, accuracy, prediction = build_graph(num_class) builder = tf.saved_model.builder.SavedModelBuilder(model_path) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(50000): batch = dtrain.next_batch(50) if batch is None: break if i % 100 == 0: train_accuracy = accuracy.eval(feed_dict={ x: batch[0], label: batch[1] }) print('step %d, training accuracy %g' % (i, train_accuracy)) test_accuracy = accuracy.eval(feed_dict={ x: dtest.data, label: dtest.label }) print('step %d, test accuracy %g' % (i, test_accuracy)) train_step.run(feed_dict={x: batch[0], label: batch[1]}) # Build Signature to save to model signature = tf.saved_model.signature_def_utils.build_signature_def( inputs={'input': tf.saved_model.utils.build_tensor_info(x)}, outputs={ 'output': tf.saved_model.utils.build_tensor_info(prediction) }, method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={ tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature }) builder.save()
import numpy as np import tensorflow as tf from tensorflow.contrib.learn.python.learn.datasets import base INPUTNODE = 100 OUTPUTNODE = 2 LAYER1 = 10 filenametrain = "/Users/hhy/Desktop/vectortrain.csv" train = base.load_csv_without_header(filename=filenametrain, target_dtype=np.int, features_dtype=np.int) filenametest = "/Users/hhy/Desktop/vectortest.csv" test = base.load_csv_without_header(filename=filenametest, target_dtype=np.int, features_dtype=np.int) def add_layer(inputs, in_size, out_size, activation_function=None): Weights = tf.Variable(tf.random_normal([in_size, out_size])) biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) Wx_plus_b = tf.matmul(inputs, Weights) + biases if activation_function is None: outputs = Wx_plus_b else: outputs = activation_function(Wx_plus_b) return outputs x_train = train.data.reshape(24780, 100) #y_train=train.target.reshape(24780,1) y_train = np.array(train.target).reshape(24780, 2) #y_train=tf.one_hot(y_train1,2,1,0)
from tensorflow.contrib.learn.python.learn.datasets import base import tensorflow as tf import numpy as np # print tensor flow version print('TF Version: ', tf.__version__) # data file which we will train on TRAIN = "candles_train.txt" # data file which we will test to determine accuracy TEST = "candles_test.txt" # training set train_set = base.load_csv_without_header(filename=TRAIN, features_dtype=np.double, target_dtype=np.double) # test set test_set = base.load_csv_without_header(filename=TEST, features_dtype=np.double, target_dtype=np.double) # print train data set # print(train_set.data) # print test data set # print(test_set.data) # add feature columns so tensor flow will know what we need to train on feature_name = "stock_data_features" feature_columns = [tf.feature_column.numeric_column(feature_name, shape=[1])] # our classifier will do the training as well as keep track of the state if we need to use it again