def testIrisES(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42) val_monitor = learn.monitors.ValidationMonitor( x_val, y_val, every_n_steps=50, early_stopping_rounds=100, early_stopping_metric='accuracy', early_stopping_metric_minimize=False) # classifier without early stopping - overfitting classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=1000) classifier1.fit(x_train, y_train) _ = accuracy_score(y_test, classifier1.predict(x_test)) # Full 1000 steps, 11 summaries and no evaluation summary. # 11 summaries = first + every 100 out of 1000 steps. self.assertEqual(11, len(_get_summary_events(classifier1.model_dir))) with self.assertRaises(ValueError): _get_summary_events(classifier1.model_dir + '/eval') # classifier with early stopping - improved accuracy on testing set classifier2 = learn.TensorFlowDNNClassifier( hidden_units=[10, 20, 10], n_classes=3, steps=2000, config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1)) classifier2.fit(x_train, y_train, monitors=[val_monitor]) _ = accuracy_score(y_val, classifier2.predict(x_val)) _ = accuracy_score(y_test, classifier2.predict(x_test)) # Note, this test is unstable, so not checking for equality. # See stability_test for examples of stability issues. if val_monitor.early_stopped: self.assertLess(val_monitor.best_step, 2000) # Note, due to validation monitor stopping after the best score occur, # the accuracy at current checkpoint is less. # TODO(ipolosukhin): Time machine for restoring old checkpoints? # flaky, still not always best_value better then score2 value. # self.assertGreater(val_monitor.best_value, score2_val) # Early stopped, unstable so checking only < then max. self.assertLess(len(_get_summary_events(classifier2.model_dir)), 21) self.assertLess( len(_get_summary_events(classifier2.model_dir + '/eval')), 4)
def testIrisES(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split( x_train, y_train, test_size=0.2) val_monitor = learn.monitors.ValidationMonitor(x_val, y_val, early_stopping_rounds=100) # classifier without early stopping - overfitting classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=1000) classifier1.fit(x_train, y_train) accuracy_score(y_test, classifier1.predict(x_test)) # classifier with early stopping - improved accuracy on testing set classifier2 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=1000) classifier2.fit(x_train, y_train, monitors=[val_monitor]) accuracy_score(y_test, classifier2.predict(x_test))
def testIrisES(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42) val_monitor = learn.monitors.ValidationMonitor( x_val, y_val, every_n_steps=50, early_stopping_rounds=100, early_stopping_metric="accuracy", early_stopping_metric_minimize=False, ) # classifier without early stopping - overfitting classifier1 = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=1000) classifier1.fit(x_train, y_train) _ = accuracy_score(y_test, classifier1.predict(x_test)) # Full 1000 steps, 12 summaries and no evaluation summary. # 12 summaries = global_step + first + every 100 out of 1000 steps. self.assertEqual(12, len(_get_summary_events(classifier1.model_dir))) with self.assertRaises(ValueError): _get_summary_events(classifier1.model_dir + "/eval") # classifier with early stopping - improved accuracy on testing set classifier2 = learn.TensorFlowDNNClassifier( hidden_units=[10, 20, 10], n_classes=3, steps=2000, config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1), ) classifier2.fit(x_train, y_train, monitors=[val_monitor]) _ = accuracy_score(y_val, classifier2.predict(x_val)) _ = accuracy_score(y_test, classifier2.predict(x_test)) # Note, this test is unstable, so not checking for equality. # See stability_test for examples of stability issues. if val_monitor.early_stopped: self.assertLess(val_monitor.best_step, 2000) # Note, due to validation monitor stopping after the best score occur, # the accuracy at current checkpoint is less. # TODO(ipolosukhin): Time machine for restoring old checkpoints? # flaky, still not always best_value better then score2 value. # self.assertGreater(val_monitor.best_value, score2_val) # Early stopped, unstable so checking only < then max. self.assertLess(len(_get_summary_events(classifier2.model_dir)), 21) # Eval typically has ~6 events, but it varies based on the run. self.assertLess(len(_get_summary_events(classifier2.model_dir + "/eval")), 8)
def testIrisMomentum(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) def custom_optimizer(learning_rate): return tf.train.MomentumOptimizer(learning_rate, 0.9) cont_features = [ tf.contrib.layers.real_valued_column("", dimension=4)] classifier = learn.TensorFlowDNNClassifier( feature_columns=cont_features, hidden_units=[10, 20, 10], n_classes=3, steps=400, learning_rate=0.01, optimizer=custom_optimizer) classifier.fit(x_train, y_train) score = accuracy_score(y_test, classifier.predict(x_test)) self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
def get_datasets(csv_path, test_size=0.1, image_size=28, color=True): """ トレーニングとテスト用のデータセットを取得 @param csv_path データセットcsv test_size データセットをテストに利用する割合 image_size 画像の1辺のpixel数 color カラー画像フラグ @return x_train トレーニングデータセット(特徴) x_test テストデータセット(特徴) y_train トレーニングデータセット(答えラベル) y_test テストデータセット(答えラベル) """ csv_reader = csv.reader(io.open(csv_path, 'r', encoding='utf-8'), delimiter=',') labels = get_labels(csv_path) X = [] y = [] for row in csv_reader: # ベクトル化した画像 X.append(image_to_vector(row[0], image_size=image_size, color=color)) # one of k方式で答えラベルを用意 one_of_k = np.zeros(len(labels)) one_of_k.put(labels.index(row[1]), 1) y.append(one_of_k) return train_test_split(np.array(X), np.array(y), test_size=test_size, random_state=42)
def testIrisMomentum(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) def custom_optimizer(learning_rate): return tf.train.MomentumOptimizer(learning_rate, 0.9) classifier = learn.TensorFlowDNNClassifier( hidden_units=[10, 20, 10], feature_columns=learn.infer_real_valued_columns_from_input( x_train), n_classes=3, steps=400, learning_rate=0.01, optimizer=custom_optimizer) classifier.fit(x_train, y_train) score = accuracy_score(y_test, classifier.predict(x_test)) self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
def testIrisMomentum(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) # setup exponential decay function def exp_decay(global_step): return tf.train.exponential_decay(learning_rate=0.1, global_step=global_step, decay_steps=100, decay_rate=0.001) def custom_optimizer(learning_rate): return tf.train.MomentumOptimizer(learning_rate, 0.9) classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=400, learning_rate=exp_decay, optimizer=custom_optimizer) classifier.fit(x_train, y_train) score = accuracy_score(y_test, classifier.predict(x_test)) self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
def testIrisMomentum(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) def custom_optimizer(): return momentum_lib.MomentumOptimizer(learning_rate=0.01, momentum=0.9) classifier = learn.DNNClassifier( hidden_units=[10, 20, 10], feature_columns=learn.infer_real_valued_columns_from_input( x_train), n_classes=3, optimizer=custom_optimizer, config=learn.RunConfig(tf_random_seed=1)) classifier.fit(x_train, y_train, steps=400) predictions = np.array(list(classifier.predict_classes(x_test))) score = accuracy_score(y_test, predictions) self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
def testIrisMomentum(self): random.seed(42) iris = datasets.load_iris() X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) # setup exponential decay function def exp_decay(global_step): return tf.train.exponential_decay(learning_rate=0.1, global_step=global_step, decay_steps=100, decay_rate=0.001) custom_optimizer = lambda learning_rate: tf.train.MomentumOptimizer( learning_rate, 0.9) classifier = learn.TensorFlowDNNClassifier(hidden_units=[10, 20, 10], n_classes=3, steps=800, learning_rate=exp_decay, optimizer=custom_optimizer) classifier.fit(X_train, y_train) score = accuracy_score(y_test, classifier.predict(X_test)) self.assertGreater(score, 0.7, "Failed with score = {0}".format(score))
def train(tokenizer, model_dir): word_index, embeddings_matrix = generate_embeddings(tokenizer) x_train, x_validate, y_train, y_validate = train_test_split( data['content'], data['label'], test_size=0.1) list_tokenized_train = tokenizer.texts_to_sequences(x_train) input_train = sequence.pad_sequences(list_tokenized_train, maxlen=maxlen) list_tokenized_validation = tokenizer.texts_to_sequences(x_validate) input_validation = sequence.pad_sequences(list_tokenized_validation, maxlen=maxlen) y_train = keras.utils.to_categorical(y_train, num_classes=3) y_validate = keras.utils.to_categorical(y_validate, num_classes=3) model1 = CNN().model(embeddings_matrix, maxlen, word_index) file_path = model_dir + model_name % "{epoch:02d}" checkpoint = ModelCheckpoint(file_path, verbose=2, save_weights_only=True) metrics = Metrics() callbacks_list = [checkpoint, metrics] model1.fit(input_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(input_validation, y_validate), callbacks=callbacks_list, verbose=2) del model1
def dividirDataset(dataset): X = dataset[:, 0:21] Y = dataset[:, 21] X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.35, random_state=0) print(X_train.shape) print(X_test.shape) print(Y_train.shape) print(Y_test.shape) return (X_train, X_test, Y_train, Y_test)
def split_data(self, x, y): x_train, x_validate, y_train, y_validate = train_test_split( x, y, test_size=0.1) tokenizer = Tokenizer(num_words=self.max_words) tokenizer.fit_on_texts(x_train) train_seq = tokenizer.texts_to_sequences(x_train) x_train = sequence.pad_sequences(train_seq, maxlen=self.max_len) tokenizer.fit_on_texts(x_validate) val_seq = tokenizer.texts_to_sequences(x_validate) x_validate = sequence.pad_sequences(val_seq, self.max_len) y_train = keras.utils.to_categorical(x_validate, num_classes=3) y_validate = keras.utils.to_categorical(y_validate, num_classes=3) return x_train, y_train, x_validate, y_validate
def train_model(training_data): X = np.array([i[0] for i in training_data]).reshape(-1, len(training_data[0][0])) y = np.array([i[1] for i in training_data]).reshape(-1, len(training_data[0][1])) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) print("Shapes: X_train {}, X_test {}".format(X_train.shape, X_test.shape)) model = build_model(input_size=len(X[0]), output_size=len(y[0])) history = model.fit(X_train, y_train, batch_size=512, epochs=200, verbose=0, validation_data=(X_test, y_test), shuffle=True) return model, history
def testIrisMomentum(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) def custom_optimizer(): return tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9) classifier = learn.DNNClassifier( hidden_units=[10, 20, 10], feature_columns=learn.infer_real_valued_columns_from_input(x_train), n_classes=3, optimizer=custom_optimizer, config=learn.RunConfig(tf_random_seed=1), ) classifier.fit(x_train, y_train, steps=400) score = accuracy_score(y_test, classifier.predict(x_test)) self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
def __init__(self, load=False): self.model = Sequential([ Embedding(1000, 32), LSTM(128, dropout=0.2, recurrent_dropout=0.2), Dense(128, activation='relu'), Dense(1, activation='sigmoid'), ]) if load: self.model.load_weights('sms_model') else: self.model.compile(metrics=['acc'], optimizer='adam', loss='binary_crossentropy') df = pd.read_csv('14 spam data.csv', delimiter=',', encoding='latin-1')[['v1', 'v2']] Y = (df.v1 == 'ham').astype('int').values X = df.v2 max_words = 1000 max_len = 150 tok = Tokenizer(num_words=max_words) tok.fit_on_texts(X) sequences = tok.texts_to_sequences(X) X = sequence.pad_sequences(sequences, maxlen=max_len) X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=0.1, random_state=42) for i in range(5): self.model.fit(X_train, y_train, epochs=1, batch_size=32, verbose=1) self.model.save_weights('sms_model') print(self.model.evaluate(X_test, y_test))
def testIrisMomentum(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) def custom_optimizer(): return tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9) cont_features = [tf.contrib.layers.real_valued_column("", dimension=4)] classifier = learn.DNNClassifier( feature_columns=cont_features, hidden_units=[10, 20, 10], n_classes=3, optimizer=custom_optimizer, config=learn.RunConfig(tf_random_seed=1)) classifier.fit(x_train, y_train, steps=400) score = accuracy_score(y_test, classifier.predict(x_test)) self.assertGreater(score, 0.65, "Failed with score = {0}".format(score))
def train_model(X_data, y_data): # https://keras.io/examples/generative/lstm_character_level_text_generation/ # 清洗数据集,生成所需要的数据 print('-' * 5 + ' ' * 3 + "清洗数据集" + ' ' * 3 + '-' * 5) X_doc, y_doc = generate_data(X_data, y_data) output_example_data(X_doc, y_doc) # ---------------------------------------------------------------------- print('-' * 5 + ' ' * 3 + "填充数据集" + ' ' * 3 + '-' * 5) X_seq = pad_sequences(X_doc, maxlen = max_len, padding = 'post') y_seq = y_doc # ---------------------------------------------------------------------- print('-' * 5 + ' ' * 3 + "拆分数据集" + ' ' * 3 + '-' * 5) X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, random_state = seed, stratify = y_seq) print("训练数据集(train_data):%d 条数据;测试数据集(test_data):%d 条数据" % ((len(y_train)), (len(y_test)))) # ---------------------------------------------------------------------- # 构建模型 print('-' * 5 + ' ' * 3 + "构建网络模型" + ' ' * 3 + '-' * 5) model = construct_model() print(model.summary()) # ---------------------------------------------------------------------- # 输出训练的结果 def output_result(): print("模型预测-->", end = '') print("损失值 = {},精确度 = {}".format(results[0], results[1])) if label_name == 'age': np_argmax = np.argmax(predictions, 1) # print("前 30 个真实的预测数据 =", np.array(X_test[:30], dtype = int)) print("前 30 个真实的目标数据 =", np.array(y_test[:30], dtype = int)) print("前 30 个预测的目标数据 =", np.array(np.argmax(predictions[:30], 1), dtype = int)) print("前 30 个预测的结果数据 =", ) print(predictions[:30]) for i in range(10): print("类别 {0} 的真实数目:{1},预测数目:{2}".format(i, sum(y_test == i), sum(np_argmax == i))) elif label_name == 'gender': predict_gender = np.array(predictions > 0.5, dtype = int) print("sum(abs(predictions>0.5-y_test_scaled))/sum(y_test_scaled) = error% =", sum(abs(predict_gender - y_test)) / sum(y_test) * 100, '%') print("前100个真实的目标数据 =", np.array(y_test[:100], dtype = int)) print("前100个预测的目标数据 =", np.array(predict_gender[:100], dtype = int)) print("sum(predictions>0.5) =", sum(predict_gender)) print("sum(y_test) =", sum(y_test)) print("sum(abs(predictions-y_test))=error_number=", sum(abs(predict_gender - y_test))) else: print("错误的标签名称:", label_name) pass pass # ---------------------------------------------------------------------- # 训练网络模型 # 使用验证集 print('-' * 5 + ' ' * 3 + "使用验证集训练网络模型" + ' ' * 3 + '-' * 5) model.fit(X_train, y_train, epochs = epochs, batch_size = batch_size, validation_split = 0.2, use_multiprocessing = True, verbose = 2) results = model.evaluate(X_test, y_test, verbose = 0) predictions = model.predict(X_test).squeeze() output_result() # ---------------------------------------------------------------------- # 不使用验证集,训练次数减半 print('-' * 5 + ' ' * 3 + "不使用验证集训练网络模型,训练次数减半" + ' ' * 3 + '-' * 5) model.fit(X_train, y_train, epochs = epochs // 2, batch_size = batch_size, use_multiprocessing = True, verbose = 2) results = model.evaluate(X_test, y_test, verbose = 0) predictions = model.predict(X_test).squeeze() output_result() pass
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") gpus = [ x.name for x in device_lib.list_local_devices() if x.device_type == "GPU" ] if FLAGS.num_gpus > len(gpus): raise ValueError("Your machine has only %d gpus " "which is less than the requested --num_gpus=%d." % (len(gpus), FLAGS.num_gpus)) i = 0 data = [] L = file_name(FLAGS.data_path) for dir in L: if i == 0: data = np.loadtxt(dir) else: tmp = np.loadtxt(dir) data = np.vstack((data, tmp)) i += 1 random.shuffle(data) train_x, test_x, train_y, test_y = train_test_split(data[:, :-1], data[:, -1], test_size=0.2, random_state=0) train_x, valid_x, train_y, valid_y = train_test_split(train_x[:, :], train_y[:], test_size=0.2, random_state=42) config = get_config() eval_config = get_config() eval_config.batch_size = 1 # with tf.Graph().as_default(): initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): train_input = PTBInput(config=config, x_data=train_x, y_data=train_y, name="TrainInput") with tf.variable_scope("Model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config, input_=train_input) tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) with tf.name_scope("Valid"): valid_input = PTBInput(config=config, x_data=valid_x, y_data=valid_y, name="ValidInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config, input_=valid_input) tf.summary.scalar("Validation Loss", mvalid.cost) with tf.name_scope("Test"): test_input = PTBInput(config=eval_config, x_data=test_x, y_data=test_y, name="TestInput") with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTBModel(is_training=False, config=eval_config, input_=test_input) # models = {"Train": m, "Valid": mvalid, "Test": mtest} # for name, model in models.items(): # model.export_ops(name) # metagraph = tf.train.export_meta_graph() # if tf.__version__ < "1.1.0" and FLAGS.num_gpus > 1: # raise ValueError("num_gpus > 1 is not supported for TensorFlow versions " # "below 1.1.0") soft_placement = False # if FLAGS.num_gpus > 1: # soft_placement = True # util.auto_parallel(metagraph, m) # # with tf.Graph().as_default(): # tf.train.import_meta_graph(metagraph) # # tf.train.import_meta_graph("models/model.ckpt-0.meta") # for model in models.values(): # model.import_ops() sv = tf.train.Supervisor(logdir=FLAGS.save_path) # sv = tf.train.Supervisor() config_proto = tf.ConfigProto(allow_soft_placement=soft_placement) with sv.managed_session(config=config_proto) as session: for i in range(config.max_max_epoch): lr_decay = config.lr_decay**max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity, cost = run_epoch(session, m, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f Cost: %.3f" % (i + 1, train_perplexity, cost)) valid_perplexity, cost = run_epoch(session, mvalid) print("Epoch: %d Valid Perplexity: %.3f Cost: %.3f" % (i + 1, valid_perplexity, cost)) test_perplexity, cost = run_epoch(session, mtest) print("Test Perplexity: %.3f accuracy: %.3f" % (test_perplexity, 100 - cost)) if FLAGS.save_path: print("Saving model to %s." % FLAGS.save_path) sv.saver.save(session, FLAGS.save_path, global_step=sv.global_step)
def main(_): if not FLAGS.data_path: raise ValueError("Must set --data_path to PTB data directory") gpus = [ x.name for x in device_lib.list_local_devices() if x.device_type == "GPU" ] if FLAGS.num_gpus > len(gpus): raise ValueError( "Your machine has only %d gpus " "which is less than the requested --num_gpus=%d." % (len(gpus), FLAGS.num_gpus)) i = 0 data = [] L = file_name(FLAGS.data_path) for dir in L: if i == 0: data = np.loadtxt(dir) else: tmp = np.loadtxt(dir) data = np.vstack((data, tmp)) i += 1 random.shuffle(data) train_x, test_x, train_y, test_y = train_test_split(data[:, :-1], data[:, -1], test_size=0.2, random_state=0) train_x, valid_x, train_y, valid_y = train_test_split(train_x[:, :], train_y[:], test_size=0.2, random_state=42) config = get_config() eval_config = get_config() eval_config.batch_size = 1 train_epoch = len(train_y) // config.batch_size valid_epoch = len(valid_y) // config.batch_size test_epoch = len(test_y) // eval_config.batch_size ''' train_x1 = tf.convert_to_tensor(train_x1, name="train_datax1", dtype=tf.float32) train_y1 = tf.convert_to_tensor(train_y1, name="train_datay1", dtype=tf.int32) valid_x1 = tf.convert_to_tensor(valid_x1, name="valid_datax1", dtype=tf.float32) valid_y1 = tf.convert_to_tensor(valid_y1, name="valid_datay1", dtype=tf.int32) test_x1 = tf.convert_to_tensor(test_x1, name="test_datax1", dtype=tf.float32) test_y1 = tf.convert_to_tensor(test_y1, name="test_datay1", dtype=tf.int32) ''' with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.name_scope("Train"): with tf.variable_scope("Model", reuse=tf.AUTO_REUSE, initializer=initializer): m = PTB(config=config, is_training=True, name="Train") tf.summary.scalar("Training Loss", m.cost) tf.summary.scalar("Learning Rate", m.lr) summary_op_m = tf.summary.merge_all() with tf.name_scope("Valid"): with tf.variable_scope("Model", reuse=True, initializer=initializer): mvalid = PTB(config=config, is_training=False, name="Valid") with tf.name_scope("Test"): with tf.variable_scope("Model", reuse=True, initializer=initializer): mtest = PTB(config=eval_config, is_training=False, name="Test") summary_writer = tf.summary.FileWriter('./lstm_logs',session.graph) tf.initialize_all_variables().run() # 对参数变量初始化 for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i + 1 - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity, cost, accuracy = run_epoch(session, m, train_x, train_y, train_epoch, eval_op=m.train_op, verbose=True) print("Epoch: %d Train Perplexity: %.3f Cost: %.3f Accuracy: %.3f" % (i + 1, train_perplexity, cost, accuracy)) valid_perplexity, cost, accuracy = run_epoch(session, mvalid, valid_x, valid_y, valid_epoch) print("Epoch: %d Valid Perplexity: %.3f Cost: %.3f Accuracy: %.3f" % (i + 1, valid_perplexity, cost, accuracy)) test_perplexity, cost, accuracy = run_epoch(session, mtest, test_x, test_y, test_epoch) print("Test Perplexity: %.3f accuracy: %.3f" % (test_perplexity, accuracy)) saver = tf.train.Saver() saver.save(session, './model/model.ckpt', global_step=i) summary_str = run_epoch_summary(session, m, summary_op_m, train_x, train_y, eval_op=m.train_op) summary_writer.add_summary(summary_str, i)
from tensorflow.contrib.learn.python.learn.estimators._sklearn import train_test_split from tensorflow.python.keras import Sequential from tensorflow.python.keras.layers import Dense, Dropout numpy.random.seed(2) # loading load prima indians diabetes dataset, past 5 years of medical history dataset = numpy.loadtxt("prima-indians-diabetes.csv", delimiter=",") # split into input (X) and output (Y) variables, splitting csv data X = dataset[:, 0:8] Y = dataset[:, 8] # split X, Y into a train and test set x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42) # create model, add dense layers one by one specifying activation function model = Sequential() model.add(Dense(15, input_dim=8, activation='relu')) # input layer requires input_dim param model.add(Dense(10, activation='relu')) model.add(Dense(8, activation='relu')) model.add(Dropout(.2)) model.add(Dense(1, activation='sigmoid') ) # sigmoid instead of relu for final probability between 0 and 1 # compile the model, adam gradient descent (optimized) model.compile(loss="binary_crossentropy", optimizer="adam",
def run(): # 从本地缓存加载数据 title_count, title_set, genres2int, features, targets_values, \ ratings, users, movies, data, movies_orig, users_orig = pickle.load(open('preprocess.p', mode='rb')) # 基本参数 # 嵌入矩阵的维度 embed_dim = 32 # 用户ID个数 # take 第一个参数为哪一列,第二个为轴参数 uid_max = max(features.take(0, 1)) + 1 # 6040 # 性别个数 gender_max = max(features.take(2, 1)) + 1 # 1 + 1 = 2 # 年龄类别个数 age_max = max(features.take(3, 1)) + 1 # 6 + 1 = 7 # 职业个数 job_max = max(features.take(4, 1)) + 1 # 20 + 1 = 21 # 电影ID个数 movie_id_max = max(features.take(1, 1)) + 1 # 3952 # 电影类型个数 movie_categories_max = max(genres2int.values()) + 1 # 18 + 1 = 19 # 电影名单词个数 movie_title_max = len(title_set) # 5216 # 对电影类型嵌入向量做加和操作的标志,考虑过使用mean做平均,但是没实现mean combiner = "sum" # 电影名长度 sentences_size = title_count # = 15 # 文本卷积滑动窗口,分别滑动2, 3, 4, 5个单词 window_sizes = {2, 3, 4, 5} # 文本卷积核数量 filter_num = 8 # 电影ID转下标的字典,数据集中电影ID跟下标不一致,比如第5行的数据电影ID不一定是5 movieid2idx = {val[0]: i for i, val in enumerate(movies.values)} # 超参数 # Number of Epochs num_epochs = 5 # Batch Size batch_size = 256 dropout_keep = 0.5 # Learning Rate learning_rate = 0.0001 # Show stats for every n number of batches show_every_n_batches = 20 save_dir = './save' def get_user_embedding(uid, user_gender, user_age, user_job): """ 定义User的嵌入矩阵 :param uid: :param user_gender: :param user_age: :param user_job: :return: """ with tf.name_scope("user_embedding"): # 创建一个uid_max * embed_dim 的矩阵 uid_embed_matrix = tf.Variable(tf.random_uniform([uid_max, embed_dim], -1, 1), name="uid_embed_matrix") uid_embed_layer = tf.nn.embedding_lookup(uid_embed_matrix, uid, name="uid_embed_layer") gender_embed_matrix = tf.Variable(tf.random_uniform([gender_max, embed_dim // 2], -1, 1), name="gender_embed_matrix") gender_embed_layer = tf.nn.embedding_lookup(gender_embed_matrix, user_gender, name="gender_embed_layer") age_embed_matrix = tf.Variable(tf.random_uniform([age_max, embed_dim // 2], -1, 1), name="age_embed_matrix") age_embed_layer = tf.nn.embedding_lookup(age_embed_matrix, user_age, name="age_embed_layer") job_embed_matrix = tf.Variable(tf.random_uniform([job_max, embed_dim // 2], -1, 1), name="job_embed_matrix") job_embed_layer = tf.nn.embedding_lookup(job_embed_matrix, user_job, name="job_embed_layer") return uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer def get_user_feature_layer(uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer): """ 获取用户特征网络 :param uid_embed_layer: :param gender_embed_layer: :param age_embed_layer: :param job_embed_layer: :return: """ with tf.name_scope("user_fc"): # 第一层全连接 uid_fc_layer = tf.layers.dense(uid_embed_layer, embed_dim, name="uid_fc_layer", activation=tf.nn.relu) gender_fc_layer = tf.layers.dense(gender_embed_layer, embed_dim, name="gender_fc_layer", activation=tf.nn.relu) age_fc_layer = tf.layers.dense(age_embed_layer, embed_dim, name="age_fc_layer", activation=tf.nn.relu) job_fc_layer = tf.layers.dense(job_embed_layer, embed_dim, name="job_fc_layer", activation=tf.nn.relu) # 第二层全连接, 上面每一个输出连接起来 # (?, 1, 128), 0: bactch, 1: height, 2:width user_combine_layer = tf.concat([uid_fc_layer, gender_fc_layer, age_fc_layer, job_fc_layer], 2) user_combine_layer = tf.contrib.layers.fully_connected(user_combine_layer, 200, tf.tanh) # (?, 1, 200) # 把输出结构重新编排 user_combine_layer_flat = tf.reshape(user_combine_layer, [-1, 200]) return user_combine_layer, user_combine_layer_flat def get_movie_id_embed_layer(movie_id): """ movieID 嵌入层 :param movie_id: :return: """ with tf.name_scope("movie_embedding"): movie_id_embed_matrix = tf.Variable(tf.random_uniform([movie_id_max, embed_dim], -1, 1), # -1, 1 表示最小值和最大值 name="movie_id_embed_matrix") movie_id_embed_layer = tf.nn.embedding_lookup(movie_id_embed_matrix, movie_id, name="movie_id_embed_layer") return movie_id_embed_layer def get_movie_categories_layers(movie_categories): """ movie标签嵌入层 :param movie_categories: :return: """ with tf.name_scope("movie_categories_layers"): movie_categories_embed_matrix = tf.Variable(tf.random_uniform([movie_categories_max, embed_dim], -1, 1), name="movie_categories_embed_matrix") movie_categories_embed_layer = tf.nn.embedding_lookup(movie_categories_embed_matrix, movie_categories, name="movie_categories_embed_layer") # 这里可以求和 或者求平均 if combiner == "sum": # 把每一行的值求和,即把左右的category 对应的特征进行相加 movie_categories_embed_layer = tf.reduce_sum(movie_categories_embed_layer, axis=1, keep_dims=True) # elif combiner == "mean": return movie_categories_embed_layer def get_movie_cnn_layer(movie_titles): """ movie 标题嵌入以及卷积 :param movie_titles: :return: """ # 从嵌入矩阵中得到电影名对应的各个单词的嵌入向量 with tf.name_scope("movie_embedding"): movie_title_embed_matrix = tf.Variable(tf.random_uniform([movie_title_max, embed_dim], -1, 1), name="movie_title_embed_matrix") movie_title_embed_layer = tf.nn.embedding_lookup(movie_title_embed_matrix, movie_titles, name="movie_title_embed_layer") # 增加一个chanels 维度 movie_title_embed_layer_expand = tf.expand_dims(movie_title_embed_layer, -1) # -1 表示最后一维 # 对文本嵌入层使用不同尺寸的卷积核做卷积和最大池化 pool_layer_lst = [] for window_size in window_sizes: with tf.name_scope("movie_txt_conv_maxpool_{}".format(window_size)): # 截断的正太分布 filter_weights = tf.Variable(tf.truncated_normal([window_size, embed_dim, 1, filter_num], stddev=0.1), name="filter_weights") filter_bias = tf.Variable(tf.constant(0.1, shape=[filter_num]), name="filter_bias") conv_layer = tf.nn.conv2d(movie_title_embed_layer_expand, filter_weights, [1, 1, 1, 1], padding="VALID", name="conv_layer") relu_layer = tf.nn.relu(tf.nn.bias_add(conv_layer, filter_bias), name="relu_layer") maxpool_layer = tf.nn.max_pool(relu_layer, [1, sentences_size - window_size + 1, 1, 1], [1, 1, 1, 1], padding="VALID", name="maxpool_layer") pool_layer_lst.append(maxpool_layer) # Dropout层 with tf.name_scope("pool_dropout"): pool_layer = tf.concat(pool_layer_lst, 3, name="pool_layer") max_num = len(window_sizes) * filter_num pool_layer_flat = tf.reshape(pool_layer, [-1, 1, max_num], name="pool_layer_flat") dropout_layer = tf.nn.dropout(pool_layer_flat, dropout_keep_prob, name="dropout_layer") return pool_layer_flat, dropout_layer def get_movie_feature_layer(movie_id_embed_layer, movie_categories_embed_layer, dropout_layer): """ :param movie_id_embed_layer: :param movie_categories_embed_layer: :param dropout_layer: :return: """ with tf.name_scope("movie_fc"): # 第一层全连接 movie_id_fc_layer = tf.layers.dense(movie_id_embed_layer, embed_dim, name="movie_id_fc_layer", activation=tf.nn.relu) movie_categories_fc_layer = tf.layers.dense(movie_categories_embed_layer, embed_dim, name="movie_categories_fc_layer", activation=tf.nn.relu) # 第二层全连接 movie_combine_layer = tf.concat([movie_id_fc_layer, movie_categories_fc_layer, dropout_layer], 2) # (?, 1, 96) movie_combine_layer = tf.contrib.layers.fully_connected(movie_combine_layer, 200, tf.tanh) # (?, 1, 200) movie_combine_layer_flat = tf.reshape(movie_combine_layer, [-1, 200]) return movie_combine_layer, movie_combine_layer_flat # 构建计算图 tf.reset_default_graph() train_graph = tf.Graph() with train_graph.as_default(): # 获取输入占位符 uid, user_gender, user_age, user_job, movie_id, movie_categories, movie_titles, targets, lr, dropout_keep_prob = get_inputs() # 获取User的4个嵌入向量 uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer = get_user_embedding(uid, user_gender, user_age, user_job) # 得到用户特征 user_combine_layer, user_combine_layer_flat = get_user_feature_layer(uid_embed_layer, gender_embed_layer, age_embed_layer, job_embed_layer) # 获取电影ID的嵌入向量 movie_id_embed_layer = get_movie_id_embed_layer(movie_id) # 获取电影类型的嵌入向量 movie_categories_embed_layer = get_movie_categories_layers(movie_categories) # 获取电影名的特征向量 pool_layer_flat, dropout_layer = get_movie_cnn_layer(movie_titles) # 得到电影特征 movie_combine_layer, movie_combine_layer_flat = get_movie_feature_layer(movie_id_embed_layer, movie_categories_embed_layer, dropout_layer) # 计算出评分,要注意两个不同的方案,inference的名字(name值)是不一样的,后面做推荐时要根据name取得tensor with tf.name_scope("inference"): # 将用户特征和电影特征作为输入,经过全连接,输出一个值的方案 # inference_layer = tf.concat([user_combine_layer_flat, movie_combine_layer_flat], 1) #(?, 200) # inference = tf.layers.dense(inference_layer, 1, # kernel_initializer=tf.truncated_normal_initializer(stddev=0.01), # kernel_regularizer=tf.nn.l2_loss, name="inference") # 简单的将用户特征和电影特征做矩阵乘法得到一个预测评分 # inference = tf.matmul(user_combine_layer_flat, tf.transpose(movie_combine_layer_flat)) inference = tf.reduce_sum(user_combine_layer_flat * movie_combine_layer_flat, axis=1) inference = tf.expand_dims(inference, axis=1) with tf.name_scope("loss"): # MSE损失,将计算值回归到评分 cost = tf.losses.mean_squared_error(targets, inference) loss = tf.reduce_mean(cost) # 优化损失 # train_op = tf.train.AdamOptimizer(lr).minimize(loss) #cost global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(lr) gradients = optimizer.compute_gradients(loss) # cost train_op = optimizer.apply_gradients(gradients, global_step=global_step) import time import datetime losses = {'train': [], 'test': []} with tf.Session(graph=train_graph) as sess: # 搜集数据给tensorBoard用 # Keep track of gradient values and sparsity grad_summaries = [] for g, v in gradients: if g is not None: grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name.replace(':', '_')), g) sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name.replace(':', '_')), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", loss) # Train Summaries train_summary_op = tf.summary.merge([loss_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter(train_summary_dir, sess.graph) # Inference summaries inference_summary_op = tf.summary.merge([loss_summary]) inference_summary_dir = os.path.join(out_dir, "summaries", "inference") inference_summary_writer = tf.summary.FileWriter(inference_summary_dir, sess.graph) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() for epoch_i in range(num_epochs): # 将数据集分成训练集和测试集,随机种子不固定 train_X, test_X, train_y, test_y = train_test_split(features, targets_values, test_size=0.2, random_state=0) train_batches = get_batches(train_X, train_y, batch_size) test_batches = get_batches(test_X, test_y, batch_size) # 训练的迭代,保存训练损失 for batch_i in range(len(train_X) // batch_size): x, y = next(train_batches) categories = np.zeros([batch_size, 18]) for i in range(batch_size): categories[i] = x.take(6, 1)[i] titles = np.zeros([batch_size, sentences_size]) for i in range(batch_size): titles[i] = x.take(5, 1)[i] feed = { uid: np.reshape(x.take(0, 1), [batch_size, 1]), user_gender: np.reshape(x.take(2, 1), [batch_size, 1]), user_age: np.reshape(x.take(3, 1), [batch_size, 1]), user_job: np.reshape(x.take(4, 1), [batch_size, 1]), movie_id: np.reshape(x.take(1, 1), [batch_size, 1]), movie_categories: categories, # x.take(6,1) movie_titles: titles, # x.take(5,1) targets: np.reshape(y, [batch_size, 1]), dropout_keep_prob: dropout_keep, # dropout_keep lr: learning_rate} step, train_loss, summaries, _ = sess.run([global_step, loss, train_summary_op, train_op], feed) # cost losses['train'].append(train_loss) train_summary_writer.add_summary(summaries, step) # # Show every <show_every_n_batches> batches if (epoch_i * (len(train_X) // batch_size) + batch_i) % show_every_n_batches == 0: time_str = datetime.datetime.now().isoformat() print('{}: Epoch {:>3} Batch {:>4}/{} train_loss = {:.3f}'.format( time_str, epoch_i, batch_i, (len(train_X) // batch_size), train_loss)) # 使用测试数据的迭代 for batch_i in range(len(test_X) // batch_size): x, y = next(test_batches) categories = np.zeros([batch_size, 18]) for i in range(batch_size): categories[i] = x.take(6, 1)[i] titles = np.zeros([batch_size, sentences_size]) for i in range(batch_size): titles[i] = x.take(5, 1)[i] feed = { uid: np.reshape(x.take(0, 1), [batch_size, 1]), user_gender: np.reshape(x.take(2, 1), [batch_size, 1]), user_age: np.reshape(x.take(3, 1), [batch_size, 1]), user_job: np.reshape(x.take(4, 1), [batch_size, 1]), movie_id: np.reshape(x.take(1, 1), [batch_size, 1]), movie_categories: categories, # x.take(6,1) movie_titles: titles, # x.take(5,1) targets: np.reshape(y, [batch_size, 1]), dropout_keep_prob: 1, lr: learning_rate} step, test_loss, summaries = sess.run([global_step, loss, inference_summary_op], feed) # cost # 保存测试损失 losses['test'].append(test_loss) inference_summary_writer.add_summary(summaries, step) # time_str = datetime.datetime.now().isoformat() if (epoch_i * (len(test_X) // batch_size) + batch_i) % show_every_n_batches == 0: print('{}: Epoch {:>3} Batch {:>4}/{} test_loss = {:.3f}'.format( time_str, epoch_i, batch_i, (len(test_X) // batch_size), test_loss)) # Save Model saver.save(sess, save_dir) # , global_step=epoch_i print('Model Trained and Saved') # save_params((save_dir)) # load_dir = load_params() plt.plot(losses['train'], label='Training loss') plt.legend() _ = plt.ylim()
nos = [ filename for filename in os.listdir('originals/no') if filename.lower().endswith('.jpg') ] for filename in nos: image = cv2.imread(f'originals/no/{filename}') image = cv2.resize(image, (299, 299)) image = img_to_array(image) data.append(image) labels.append(0) # 0 for no firetruck data = np.array(data, dtype='float') / 255 labels = np.array(labels) train_x, test_x, train_y, test_y = train_test_split(data, labels, test_size=0.25, random_state=24601) train_y = to_categorical(train_y, num_classes=2) test_y = to_categorical(test_y, num_classes=2) # train the model on the new data for a few epochs history1 = model.fit(train_x, train_y, batch_size=64, epochs=3, validation_data=(test_x, test_y)) with open('history_new_layers.json', 'w') as f: json.dump(history1.history, f) # at this point, the top layers are well trained and we can start fine-tuning
def testIrisES(self): random.seed(42) iris = datasets.load_iris() x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=42) x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=42) val_monitor = learn.monitors.ValidationMonitor( x_val, y_val, every_n_steps=50, early_stopping_rounds=100, early_stopping_metric='loss', early_stopping_metric_minimize=False) feature_columns = learn.infer_real_valued_columns_from_input(iris.data) # classifier without early stopping - overfitting classifier1 = learn.DNNClassifier(feature_columns=feature_columns, hidden_units=[10, 20, 10], n_classes=3) classifier1.fit(x_train, y_train, steps=1000) _ = accuracy_score(y_test, classifier1.predict(x_test)) # Full 1000 steps, 19 summaries and no evaluation summary: # 1 summary of net at step 1 # 9 x (1 summary of net and 1 summary of global step) for steps 101, 201,... self.assertEqual(19, len(_get_summary_events(classifier1.model_dir))) with self.assertRaises(ValueError): _get_summary_events(classifier1.model_dir + '/eval') # classifier with early stopping - improved accuracy on testing set classifier2 = learn.DNNClassifier( hidden_units=[10, 20, 10], feature_columns=feature_columns, n_classes=3, config=tf.contrib.learn.RunConfig(save_checkpoints_secs=1)) classifier2.fit(x_train, y_train, monitors=[val_monitor], steps=2000) _ = accuracy_score(y_val, classifier2.predict(x_val)) _ = accuracy_score(y_test, classifier2.predict(x_test)) # Note, this test is unstable, so not checking for equality. # See stability_test for examples of stability issues. if val_monitor.early_stopped: self.assertLess(val_monitor.best_step, 2000) # Note, due to validation monitor stopping after the best score occur, # the accuracy at current checkpoint is less. # TODO(ipolosukhin): Time machine for restoring old checkpoints? # flaky, still not always best_value better then score2 value. # self.assertGreater(val_monitor.best_value, score2_val) # Early stopped, unstable so checking only < then max. self.assertLess(len(_get_summary_events(classifier2.model_dir)), 21) # Eval typically has ~6 events, but it varies based on the run. self.assertLess( len(_get_summary_events(classifier2.model_dir + '/eval')), 8)
import os import matplotlib.pyplot as plt import sys import numpy import tensorflow as tf from tensorflow.contrib.learn.python.learn.estimators._sklearn import train_test_split seed = 0 numpy.random.seed(seed) tf.set_random_seed(seed) npz_file = numpy.load('NPInter.npz') npz_x_list = numpy.hstack([npz_file['XP'], npz_file['XR']]) X_train, X_test, Y_train, Y_test = train_test_split(npz_x_list, npz_file['Y'], test_size=0.1, random_state=seed) model = Sequential() model.add(Conv1D(32, kernel_size=4, input_shape=(739, 1), activation='relu')) model.add(Conv1D(64, 4, activation='relu')) model.add(MaxPooling1D(pool_size=2)) model.add(Dropout(0.25)) model.add(Flatten()) # 2D -> 1D model.add(Dense(128, activation='relu')) model.add(Dense(32, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam',
return np.squeeze(H.T) f = open("train.pkl", 'rb') x_train, y_train = pkl.load(f) f1 = open("hogged.pkl", 'rb') x_train = pkl.load(f1) f1.close() x_train = np.asarray(x_train) x_train = x_train.squeeze() y_train = y_train.transpose() y_train = y_train[0] x_train, x_test, y_train, y_test = train_test_split(x_train, y_train, test_size=0.1) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) name = "Bmodel2.h5" layers = [ Dense(1000, activation="relu", input_shape=(x_train[0].shape[0], )), Dense(500, activation="relu"), Dense(100, activation="relu"), Dense(36, activation="softmax") ]
batch_size = 100 learning_rate = 0.01 samples_per_epoch = 100 # Assuming 10,000 images nb_epoch = 25 test_size = 0.3 keep_prob = 0.5 # </editor-fold> # <editor-fold desc="Load Data"> data_df = pd.read_csv(os.path.join(dataset_dir + data, 'driving_log.csv')) X = data_df[['center', 'left', 'right']].values y = data_df['steering'].values X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=test_size, random_state=0) # </editor-fold> class Model(): def __init__(self, INPUT_SHAPE, keep_prob): self.model = self.load(INPUT_SHAPE, keep_prob) def load(self, INPUT_SHAPE, keep_prob): model = Sequential() model.add(Lambda(lambda x: x / 127.5 - 1.0, input_shape=INPUT_SHAPE)) model.add(Conv2D(24, 5, 5, activation='elu', subsample=(2, 2))) model.add(Conv2D(36, 5, 5, activation='elu', subsample=(2, 2)))
if dir.startswith('.'): continue img = cv2.imread(os.path.join(root, file), cv2.COLOR_BGR2GRAY) img = cv2.resize(img, (28, 28), interpolation=cv2.INTER_CUBIC) labels.append(dir) datasets.append(img) datasets = np.array(datasets) labels = np.array(labels) # 处理数据集 datasets = datasets.astype('float32') datasets /= 255 datasets = datasets.reshape(datasets.shape[0], 28, 28, 1) x_train, x_test, y_train, y_test = train_test_split(datasets, labels, test_size=0.3, random_state=0) y_prepare_train = [] for obj in y_train: y_prepare_train.append(int(re.sub("\D", "", obj)) - 1) y_prepare_test = [] for obj in y_test: y_prepare_test.append(int(re.sub("\D", "", obj)) - 1) y_train = keras.utils.to_categorical(y_prepare_train, type_num) y_test = keras.utils.to_categorical(y_prepare_test, type_num) model = Sequential() model.add(
from sklearn.metrics import precision_score from sklearn.metrics import recall_score from sklearn.metrics import f1_score from sklearn.model_selection import cross_val_predict import matplotlib.pyplot as plt from sklearn.preprocessing import StandardScaler dota2results = np.loadtxt('data/dota2Dataset/dota2Train.csv', delimiter=',') dota2x = dota2results[:, 1:] dota2y = dota2results[:, 0] spamx = spambase[:, :57] spamy = spambase[:, 57] from sklearn.preprocessing import StandardScaler from tensorflow.contrib.learn.python.learn.estimators._sklearn import train_test_split X_train, X_test, Y_train, Y_test = train_test_split(spamx, spamy, test_size=0.1, random_state=0) spamx = spambase[:, :57] spamy = spambase[:, 57] # 对数据的训练集进行标准化 ss = StandardScaler() # X_train = ss.fit_transform(X_train) # 先拟合数据在进行标准化 lr = LogisticRegressionCV(multi_class="ovr", fit_intercept=True, Cs=np.logspace(-2, 2, 20), cv=2, penalty="l2", solver="lbfgs", tol=0.01) re = lr.fit(X_train, Y_train) # # # 预测 # X_test = ss.transform(X_test) # 数据标准化 # Y_predict = lr.predict(X_test) # 预测 # Dota2数据 dx_train, dx_test, dy_train, dy_test = train_test_split(dota2x, dota2y, test_size=0.1, random_state=0)