def linear_regression(): x_train = np.asarray([1, 2, 3, 4, 5, 6, 7, 8, 9, 11]) y_train = np.asarray([0.1, 0.2, 0.32, 0.43, 0.54, 0.65, 0.77, 0.88, 0.94, 1]) n_sample = x_train.shape[0] x_ = tf.placeholder(tf.float32, name="x") y_ = tf.placeholder(tf.float32, name="y") w = tf.get_variable("weights", initializer=tf.constant(0.0)) b = tf.get_variable("bias", initializer=tf.constant(0.0)) y_predict = w * x_ + b loss = tf.square(y_ - y_predict, name='loss') optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss) writer = tf.summary.FileWriter("./graphs", tf.get_default_graph()) writer.close() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for i in range(100): total_loss = 0 for x, y in zip(x_train, y_train): _, _loss = sess.run([optimizer, loss], feed_dict={x_: x, y_: y}) total_loss += _loss print(f"Epoch {i}: {total_loss / n_sample}") w_out, b_out = sess.run([w, b]) y_predict = x_train * w_out + b_out for i, j in zip(y_predict, y_train): print(f"{i} : {j}") plt.plot(x_train, y_predict, "r-", label="predict") plt.plot(x_train, y_train, "go", label="data") plt.title("ABC") plt.xlabel("x") plt.ylabel("y") plt.show()
def __post_init__(self): if self.batch_size is None: self.batch_size = 1 assert self.nsamples % self.batch_size == 0 self.enc = encoder.get_encoder(self.models_dir, self.model_name) self.hparams = model.default_hparams() with open( os.path.join(self.models_dir, self.model_name, 'hparams.json')) as f: self.hparams.override_from_dict(json.load(f)) if self.length is None: self.length = self.hparams.n_ctx // 2 elif self.length > self.hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % self.hparams.n_ctx) self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) # with tf.Session(graph=tf.Graph()) as self.sess: self.sess.run(tf.global_variables_initializer()) self.context = tf.placeholder(tf.int32, [self.batch_size, None]) np.random.seed(self.seed) tf.set_random_seed(self.seed) self.output = sample.sample_sequence( hparams=self.hparams, length=self.length, context=self.context, batch_size=self.batch_size, temperature=self.temperature, top_k=self.top_k, top_p=self.top_p ) saver = tf.train.Saver() print(f"MODEL DIR {self.models_dir}") print(f"MODEL NAME {self.model_name}") print(f"PWD {os.getcwd()}") print(f"MODEL DIR ABS {Path(self.models_dir).absolute()}") ckpt = tf.train.latest_checkpoint( os.path.join(self.models_dir, self.model_name)) saver.restore(self.sess, ckpt)
def build_learning_model(self): # with tf.variable_scope("Natural-DQN"): # self.natural_dqn = DoubleDQN(self.action_space, self.n_features, memory_size=self.memory_size, # e_greedy_increment=0.001, double_q=False, sess=self.sess, output_graph=True) with tf.variable_scope("Double-DQN"): self.double_dqn = DoubleDQN(self.action_space, self.n_features, memory_size=self.memory_size, double_q=True, sess=self.sess, e_greedy_increment=0.001, output_graph=True) self.sess.run(tf.global_variables_initializer())
def __init__(self, n_state, n_action, learning_rate, gamma, replay_buffer_size=3000, sess: tf.Session = None): self.n_state = n_state self.n_action = n_action self.fai_s_size = 512 # shape: (state_size, action_size) self.w = np.zeros([n_state]) self.learning_rate = learning_rate self.gamma = gamma self.replay_buffer = np.zeros( [replay_buffer_size, self.n_state * 2 + 2]) self.memory_size = replay_buffer_size self.memory_count = 0 self.state = tf.placeholder(tf.float32, [None, self.n_state]) self.state_hat = tf.placeholder(tf.float32, [None, self.n_state]) self.state_ = tf.placeholder(tf.float32, [None, self.n_state]) self.rs_p = tf.placeholder(tf.float32, [None, 1]) if sess is None: self.sess = tf.Session() else: self.sess = sess self.eval_collection_name = [ 'eval_net_collection', tf.GraphKeys.GLOBAL_VARIABLES ] self.target_collection_name = [ 'target_net_collection', tf.GraphKeys.GLOBAL_VARIABLES ] shutil.rmtree("./log") os.mkdir("./log") with tf.variable_scope('assign_op'): e_params = tf.get_collection('eval_collection_name') t_params = tf.get_collection('target_net_collection') self.assign_op = [ tf.assign(t, e) for t, e in zip(t_params, e_params) ] with tf.variable_scope('eval_net'): self.eval_fai, self.eval_s_hat, self.eval_r_s, self.eval_M = self._build_net( self.eval_collection_name) with tf.variable_scope('target_net'): self.eval_fai, self.target_s_hat, self.target_r_s, self.target_M = self._build_net( self.target_collection_name) tf.summary.FileWriter("./log", self.sess.graph) self.sess.run(tf.global_variables_initializer())
def __init__(self, n_actions, n_features, learning_rate=0.005, reward_decay=0.9, replace_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=3000, batch_size=32, e_greedy_increment=None, output_graph=False, double_q=True, sess: tf.Session = None): self.n_actions = n_actions self.n_features = n_features self.learning_rate = learning_rate self.gamma = reward_decay self.replace_decay = replace_decay self.replace_target_iter = replace_target_iter self.epsilon_max = e_greedy self.memory_size = memory_size self.batch_size = batch_size self.e_greedy_increment = e_greedy_increment self.output_graph = output_graph self.double_q = double_q self.memory_counter = 0 self.learn_step_counter = 0 self.memory = np.zeros((self.memory_size, self.n_features * 2 + 2)) self.epsilon = 0 if self.e_greedy_increment is not None else self.epsilon_max self._build_net() e_params = tf.get_collection('eval_net_params') t_params = tf.get_collection('target_net_params') with tf.variable_scope("assign_op"): self.replace_target_op = [ tf.assign(t, e) for t, e in zip(t_params, e_params) ] if sess is None: self.sess = tf.Session() self.sess.run(tf.global_variables_initializer()) else: self.sess = sess if output_graph: tf.summary.FileWriter("./logs/", self.sess.graph) self.cost_his = [] # 损失函数历史记录
def train(x_train, y_train): n_samples, n_features = x_train.shape w = tf.Variable(np.random.rand(input_dim, 1).astype(dtype='float32'), name="weight") b = tf.Variable(0.0, dtype=tf.float32, name="bias") x = tf.placeholder(dtype=tf.float32, name='x') y = tf.placeholder(dtype=tf.float32, name='y') predictions = tf.matmul(x, w) + b loss = tf.reduce_mean( tf.log(1 + tf.exp(tf.multiply(-1.0 * y, predictions)))) # optimizer = tf.train.GradientDescentOptimizer(learn_rate).minimize(loss) optimizer = tf.train.ProximalGradientDescentOptimizer( learning_rate=learn_rate, l1_regularization_strength=0.1).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) for epoch in range(n_epochs): for idx in range(0, n_samples, batch_size): iE = min(n_samples, idx + batch_size) x_batch = x_train[idx:iE, :] y_batch = y_train[idx:iE, :] sess.run([optimizer], feed_dict={x: x_batch, y: y_batch}) curr_w, curr_b = sess.run([w, b]) for idx in range(len(curr_w)): if curr_w[idx] < threshold * -1: curr_w[idx] += threshold else: curr_w[idx] -= threshold sess.run([tf.assign(w, curr_w)]) return curr_w, curr_b
def trainingProcess(self): init = tf.global_variables_initializer() config = tf.ConfigProto() config.gpu_options.allow_growth = True avgLoss = 0.0 self.saver = tf.train.Saver() with tf.Session(config=config) as sess: print("init...") sess.run(init) print("init finished!") step = 0 while self.texti.epoch < self.maxEpoch: batchX, batchImage, batchWord, batchY = self.texti.nextBatch() if step % self.displayStep == 0: acc, p = sess.run([self.PredAcc, self.p], feed_dict=self.make_feed_dict( batchX, batchImage, batchWord, batchY, False)) self.display(avgLoss / self.displayStep, acc, p, batchY, self.texti.epoch) avgLoss = 0.0 if step % self.validStep == 0 and self.texti.epoch > 0: self.valid(sess, step) if self.patience == 0: break _, tmpLoss = sess.run([self.trainOP, self.loss], feed_dict=self.make_feed_dict( batchX, batchImage, batchWord, batchY, True)) avgLoss += tmpLoss step += 1 if self.texti.epoch > -1: sess.run(self.addGlobal)
action_output_raw = tf.layers.dense(hidden_1, action_output_count, tf.nn.softmax, use_bias = True, kernel_initializer = tf.zeros_initializer, bias_initializer = tf.zeros_initializer, name = "action_output_node") action_output_ = tf.multiply(action_output_raw, 100000) action_output_ = tf.round(action_output_) action_output = tf.div(action_output_, 100000) prediction = tf.concat([Q_output, action_output], 1, name = "concat_node") prediction_identity = tf.identity(prediction, name = "prediction_node") Q_loss = tf.keras.losses.mean_squared_error(y_true = Q_target, y_pred = Q_output_raw) policy_loss = tf.keras.losses.categorical_crossentropy(y_true = action_target, y_pred = action_output_raw) total_loss = Q_loss + policy_loss train_op = tf.train.AdamOptimizer(learning_rate = learning_rate, name = "Optimizer").minimize(total_loss, name = 'optimize_node') init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) train_writer = tf.summary.FileWriter(path_to_store + "/summary", sess.graph) train_writer.close() with open(os.path.join(path_to_store, model_name + '.pb'), 'wb') as f: f.write(tf.get_default_graph().as_graph_def().SerializeToString()) # builder = tf.saved_model.builder.SavedModelBuilder("C:/Users/Snurka/init_model") # builder.add_meta_graph_and_variables( # sess,
def main(): args = parser.parse_args() enc = encoder.get_encoder(CHECKPOINT_DIR, args.model_name) hparams = model.default_hparams() with open(os.path.join(CHECKPOINT_DIR, args.model_name, 'hparams.json')) as f: hparams.override_from_dict(json.load(f)) if args.sample_length > hparams.n_ctx: raise ValueError("Can't get samples longer than window size: %s" % hparams.n_ctx) if args.model_name == '345M': # args.memory_saving_gradients = True if args.optimizer == 'adam': args.only_train_transformer_layers = True config = tf.ConfigProto() config.gpu_options.allow_growth = True config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.OFF with tf.Session(config=config) as sess: context = tf.placeholder(tf.int32, [args.batch_size, None]) context_in = randomize(context, hparams, args.noise) output = model.model(hparams=hparams, X=context_in) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=context[:, 1:], logits=output['logits'][:, :-1])) if args.val_every > 0: val_context = tf.placeholder(tf.int32, [args.val_batch_size, None]) val_output = model.model(hparams=hparams, X=val_context) val_loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=val_context[:, 1:], logits=val_output['logits'][:, :-1])) val_loss_summary = tf.summary.scalar('val_loss', val_loss) tf_sample = sample.sample_sequence(hparams=hparams, length=args.sample_length, context=context, batch_size=args.batch_size, temperature=1.0, top_k=args.top_k, top_p=args.top_p) all_vars = [v for v in tf.trainable_variables() if 'model' in v.name] train_vars = [v for v in all_vars if '/h' in v.name ] if args.only_train_transformer_layers else all_vars if args.optimizer == 'adam': opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate) elif args.optimizer == 'sgd': opt = tf.train.GradientDescentOptimizer( learning_rate=args.learning_rate) else: exit('Bad optimizer:', args.optimizer) if args.accumulate_gradients > 1: if args.memory_saving_gradients: exit( "Memory saving gradients are not implemented for gradient accumulation yet." ) opt = AccumulatingOptimizer(opt=opt, var_list=train_vars) opt_reset = opt.reset() opt_compute = opt.compute_gradients(loss) opt_apply = opt.apply_gradients() summary_loss = tf.summary.scalar('loss', opt_apply) else: if args.memory_saving_gradients: opt_grads = memory_saving_gradients.gradients(loss, train_vars) else: opt_grads = tf.gradients(loss, train_vars) opt_grads = list(zip(opt_grads, train_vars)) opt_apply = opt.apply_gradients(opt_grads) summary_loss = tf.summary.scalar('loss', loss) summary_lr = tf.summary.scalar('learning_rate', args.learning_rate) summaries = tf.summary.merge([summary_lr, summary_loss]) summary_log = tf.summary.FileWriter( os.path.join(CHECKPOINT_DIR, args.run_name)) saver = tf.train.Saver(var_list=all_vars, max_to_keep=5, keep_checkpoint_every_n_hours=2) sess.run(tf.global_variables_initializer()) if args.restore_from == 'latest': ckpt = tf.train.latest_checkpoint( os.path.join(CHECKPOINT_DIR, args.run_name)) if ckpt is None: # Get fresh GPT weights if new run. ckpt = tf.train.latest_checkpoint( os.path.join(CHECKPOINT_DIR, args.model_name)) elif args.restore_from == 'fresh': ckpt = tf.train.latest_checkpoint( os.path.join(CHECKPOINT_DIR, args.model_name)) else: ckpt = tf.train.latest_checkpoint(args.restore_from) print('Loading checkpoint', ckpt) saver.restore(sess, ckpt) print('Loading dataset...') chunks = load_dataset(enc, args.dataset, args.combine, encoding=args.encoding) data_sampler = Sampler(chunks) if args.val_every > 0: if args.val_dataset: val_chunks = load_dataset(enc, args.val_dataset, args.combine, encoding=args.encoding) else: val_chunks = chunks print('dataset has', data_sampler.total_size, 'tokens') print('Training...') if args.val_every > 0: # Sample from validation set once with fixed seed to make # it deterministic during training as well as across runs. val_data_sampler = Sampler(val_chunks, seed=1) val_batches = [[ val_data_sampler.sample(1024) for _ in range(args.val_batch_size) ] for _ in range(args.val_batch_count)] counter = 1 counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter') if os.path.exists(counter_path): # Load the step number if we're resuming a run # Add 1 so we don't immediately try to save again with open(counter_path, 'r') as fp: counter = int(fp.read()) + 1 def save(): maketree(os.path.join(CHECKPOINT_DIR, args.run_name)) print( 'Saving', os.path.join(CHECKPOINT_DIR, args.run_name, 'model-{}').format(counter)) saver.save(sess, os.path.join(CHECKPOINT_DIR, args.run_name, 'model'), global_step=counter) with open(counter_path, 'w') as fp: fp.write(str(counter) + '\n') def generate_samples(): print('Generating samples...') context_tokens = data_sampler.sample(1) all_text = [] index = 0 while index < args.sample_num: out = sess.run( tf_sample, feed_dict={context: args.batch_size * [context_tokens]}) for i in range(min(args.sample_num - index, args.batch_size)): text = enc.decode(out[i]) text = '======== SAMPLE {} ========\n{}\n'.format( index + 1, text) all_text.append(text) index += 1 print(text) maketree(os.path.join(SAMPLE_DIR, args.run_name)) with open(os.path.join(SAMPLE_DIR, args.run_name, 'samples-{}').format(counter), 'w', encoding=args.encoding) as fp: fp.write('\n'.join(all_text)) def validation(): print('Calculating validation loss...') losses = [] for batch in tqdm.tqdm(val_batches): losses.append( sess.run(val_loss, feed_dict={val_context: batch})) v_val_loss = np.mean(losses) v_summary = sess.run(val_loss_summary, feed_dict={val_loss: v_val_loss}) summary_log.add_summary(v_summary, counter) summary_log.flush() print('[{counter} | {time:2.2f}] validation loss = {loss:2.2f}'. format(counter=counter, time=time.time() - start_time, loss=v_val_loss)) def sample_batch(): return [data_sampler.sample(1024) for _ in range(args.batch_size)] avg_loss = (0.0, 0.0) start_time = time.time() try: while counter < 1000: if counter % args.save_every == 0: save() if counter % args.sample_every == 0: generate_samples() if args.val_every > 0 and (counter % args.val_every == 0 or counter == 1): validation() if args.accumulate_gradients > 1: sess.run(opt_reset) for _ in range(args.accumulate_gradients): sess.run(opt_compute, feed_dict={context: sample_batch()}) (v_loss, v_summary) = sess.run((opt_apply, summaries)) else: (_, v_loss, v_summary) = sess.run( (opt_apply, loss, summaries), feed_dict={context: sample_batch()}) summary_log.add_summary(v_summary, counter) avg_loss = (avg_loss[0] * 0.99 + v_loss, avg_loss[1] * 0.99 + 1.0) print( '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}' .format(counter=counter, time=time.time() - start_time, loss=v_loss, avg=avg_loss[0] / avg_loss[1])) counter += 1 except KeyboardInterrupt: print('interrupted') save()
def gain(data_x, gain_parameters): '''Impute missing values in data_x Args: - data_x: original data with missing values - gain_parameters: GAIN network parameters: - batch_size: Batch size - hint_rate: Hint rate - alpha: Hyperparameter - iterations: Iterations Returns: - imputed_data: imputed data ''' # Define mask matrix data_m = 1 - np.isnan(data_x) # System parameters batch_size = gain_parameters['batch_size'] hint_rate = gain_parameters['hint_rate'] alpha = gain_parameters['alpha'] iterations = gain_parameters['iterations'] # Other parameters no, dim = data_x.shape # Hidden state dimensions h_dim = int(dim) # Normalization norm_data, norm_parameters = normalization(data_x) norm_data_x = np.nan_to_num(norm_data, 0) ## GAIN architecture # Input placeholders # Data vector tf.disable_v2_behavior() X = tf.placeholder(tf.float32, shape=[None, dim]) # Mask vector M = tf.placeholder(tf.float32, shape=[None, dim]) # Hint vector H = tf.placeholder(tf.float32, shape=[None, dim]) # Discriminator variables D_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) # Data + Hint as inputs D_b1 = tf.Variable(tf.zeros(shape=[h_dim])) D_W2 = tf.Variable(xavier_init([h_dim, h_dim])) D_b2 = tf.Variable(tf.zeros(shape=[h_dim])) D_W3 = tf.Variable(xavier_init([h_dim, dim])) D_b3 = tf.Variable(tf.zeros(shape=[dim])) # Multi-variate outputs theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3] #Generator variables # Data + Mask as inputs (Random noise is in missing components) G_W1 = tf.Variable(xavier_init([dim * 2, h_dim])) G_b1 = tf.Variable(tf.zeros(shape=[h_dim])) G_W2 = tf.Variable(xavier_init([h_dim, h_dim])) G_b2 = tf.Variable(tf.zeros(shape=[h_dim])) G_W3 = tf.Variable(xavier_init([h_dim, dim])) G_b3 = tf.Variable(tf.zeros(shape=[dim])) theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3] ## GAIN functions # Generator def generator(x, m): # Concatenate Mask and Data inputs = tf.concat(values=[x, m], axis=1) G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1) G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2) # MinMax normalized output G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3) return G_prob # Discriminator def discriminator(x, h): # Concatenate Data and Hint inputs = tf.concat(values=[x, h], axis=1) D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1) D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2) D_logit = tf.matmul(D_h2, D_W3) + D_b3 D_prob = tf.nn.sigmoid(D_logit) return D_prob ## GAIN structure # Generator G_sample = generator(X, M) # Combine with observed data Hat_X = X * M + G_sample * (1 - M) # Discriminator D_prob = discriminator(Hat_X, H) ## GAIN loss D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \ + (1-M) * tf.log(1. - D_prob + 1e-8)) G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8)) MSE_loss = \ tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M) D_loss = D_loss_temp G_loss = G_loss_temp + alpha * MSE_loss ## GAIN solver D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D) G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G) ## Iterations sess = tf.Session() sess.run(tf.global_variables_initializer()) # Start Iterations for it in tqdm(range(iterations)): # Sample batch batch_idx = sample_batch_index(no, batch_size) X_mb = norm_data_x[batch_idx, :] M_mb = data_m[batch_idx, :] # Sample random vectors Z_mb = uniform_sampler(0, 0.01, batch_size, dim) # Sample hint vectors H_mb_temp = binary_sampler(hint_rate, batch_size, dim) H_mb = M_mb * H_mb_temp # Combine random vectors with observed vectors X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb _, D_loss_curr = sess.run([D_solver, D_loss_temp], feed_dict={ M: M_mb, X: X_mb, H: H_mb }) _, G_loss_curr, MSE_loss_curr = \ sess.run([G_solver, G_loss_temp, MSE_loss], feed_dict = {X: X_mb, M: M_mb, H: H_mb}) ## Return imputed data Z_mb = uniform_sampler(0, 0.01, no, dim) M_mb = data_m X_mb = norm_data_x X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0] imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data # Renormalization imputed_data = renormalization(imputed_data, norm_parameters) # Rounding imputed_data = rounding(imputed_data, data_x) return imputed_data
print('Reading CIFAR-10...') X_train, Y_train, X_test, Y_test = read_cifar_10(image_width=INPUT_WIDTH, image_height=INPUT_HEIGHT) alexnet = AlexNet(input_width=INPUT_WIDTH, input_height=INPUT_HEIGHT, input_channels=INPUT_CHANNELS, num_classes=NUM_CLASSES, learning_rate=LEARNING_RATE, momentum=MOMENTUM, keep_prob=KEEP_PROB) with tf.Session() as sess: print('Training dataset...') print() file_writer = tf.summary.FileWriter(logdir='./log', graph=sess.graph) summary_operation = tf.summary.merge_all() sess.run(tf.global_variables_initializer()) for i in range(EPOCHS): print('Calculating accuracies...') train_accuracy = alexnet.evaluate(sess, X_train, Y_train, BATCH_SIZE) test_accuracy = alexnet.evaluate(sess, X_test, Y_test, BATCH_SIZE) print('Train Accuracy = {:.3f}'.format(train_accuracy)) print('Test Accuracy = {:.3f}'.format(test_accuracy)) print() print('Training epoch', i + 1, '...') alexnet.train_epoch(sess, X_train, Y_train, BATCH_SIZE, file_writer, summary_operation, i) print()
def main(trainModel=True, buildConfusionMatrix=True, restore=False, buildClassifiedMatrix=True): tf.disable_v2_behavior() input_images = tf.placeholder(tf.float32, [None, 28, 28], name="Input") real = tf.placeholder(tf.float32, [None, CLASSES], name="real_classes") layer1 = create_conv_layer(tf.reshape(input_images, [-1, 28, 28, 1]), 1, 28, [5, 5], [2, 2], name="conv_no_pool") layer2 = create_conv_layer(layer1, 28, 56, [5, 5], [2, 2], name='conv_with_pool') conv_result = tf.reshape(layer2, [-1, 7 * 7 * 56]) relu_layer_weight = tf.Variable(tf.truncated_normal([7 * 7 * 56, 1000], stddev=STDDEV * 2), name='relu_layer_weight') rely_layer_bias = tf.Variable(tf.truncated_normal([1000], stddev=STDDEV / 2), name='rely_layer_bias') relu_layer = tf.matmul(conv_result, relu_layer_weight) + rely_layer_bias relu_layer = tf.nn.relu(relu_layer) relu_layer = tf.nn.dropout(relu_layer, DROPOUT) final_layer_weight = tf.Variable(tf.truncated_normal([1000, CLASSES], stddev=STDDEV * 2), name='final_layer_weight') final_layer_bias = tf.Variable(tf.truncated_normal([CLASSES], stddev=STDDEV / 2), name='final_layer_bias') final_layer = tf.matmul(relu_layer, final_layer_weight) + final_layer_bias predicts = tf.nn.softmax(final_layer) predicts_for_log = tf.clip_by_value(predicts, 1e-9, 0.999999999) #crossEntropy = -tf.reduce_mean(tf.reduce_sum(y * tf.log(y_clipped) + (1 - y) * tf.log(1 - y_clipped), axis=1)) loss = -tf.reduce_mean( tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1), axis=0) #test = tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1) #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=final_layer, labels=real)) optimiser = tf.train.GradientDescentOptimizer( learning_rate=LEARNING_RATE).minimize(loss) correct_prediction = tf.equal(tf.argmax(real, axis=1), tf.argmax(predicts, axis=1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) confusion_matrix = tf.confusion_matrix(labels=tf.argmax(real, axis=1), predictions=tf.argmax(predicts, axis=1), num_classes=CLASSES) saver = tf.train.Saver() # dataset = get_mnist_dataset() dataset = get_fashion_dataset() with tf.Session() as session: session.run(tf.global_variables_initializer()) if restore: saver.restore(session, SAVE_PATH) if trainModel: train(input_images, real, session, optimiser, loss, accuracy, saver, dataset) if buildConfusionMatrix: test_cm = session.run(confusion_matrix, feed_dict={ input_images: dataset.test_x, real: dataset.test_y }) draw_confusion_matrix(test_cm) if buildClassifiedMatrix: all_probs = session.run(predicts, feed_dict={ input_images: dataset.test_x, real: dataset.test_y }) max_failure_picture_index = [[(-1, -1.0)] * CLASSES for _ in range(CLASSES)] for i in range(len(all_probs)): real = np.argmax(dataset.test_y[i]) for j in range(CLASSES): if max_failure_picture_index[real][j][1] < all_probs[i][j]: max_failure_picture_index[real][j] = (i, all_probs[i][j]) draw_max_failure_pictures(dataset.test_x, max_failure_picture_index)