def test_set_weights_without_biases(): my_cnn = CNN() image_size = (np.random.randint(32, 100), np.random.randint(20, 100), np.random.randint(3, 10)) number_of_conv_layers = np.random.randint(2, 10) my_cnn.add_input_layer(shape=image_size, name="input") previous_depth = image_size[2] for k in range(number_of_conv_layers): number_of_filters = np.random.randint(3, 100) kernel_size = np.random.randint(3, 9) my_cnn.append_conv2d_layer(num_of_filters=number_of_filters, kernel_size=(kernel_size, kernel_size), padding="same", activation='linear') w = my_cnn.get_weights_without_biases(layer_number=k + 1) w_set=np.full_like(w,0.2) my_cnn.set_weights_without_biases(w_set,layer_number=k+1) w_get = my_cnn.get_weights_without_biases(layer_number=k + 1) assert w_get.shape == w_set.shape previous_depth = number_of_filters pool_size = np.random.randint(2, 5) my_cnn.append_maxpooling2d_layer(pool_size=pool_size, padding="same", strides=2, name="pool1") my_cnn.append_flatten_layer(name="flat1") my_cnn.append_dense_layer(num_nodes=10) number_of_dense_layers = np.random.randint(2, 10) previous_nodes = 10 for k in range(number_of_dense_layers): number_of_nodes = np.random.randint(3, 100) kernel_size = np.random.randint(3, 9) my_cnn.append_dense_layer(num_nodes=number_of_nodes) w = my_cnn.get_weights_without_biases(layer_number=k + number_of_conv_layers + 4) w_set = np.full_like(w, 0.8) my_cnn.set_weights_without_biases(w_set, layer_number=k + number_of_conv_layers + 4) w_get = my_cnn.get_weights_without_biases(layer_number=k + number_of_conv_layers + 4) assert w_get.shape == w_set.shape previous_nodes = number_of_nodes
def __init__(self, embed_size, vocab: VocabEntry): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code #pad_token_idx = vocab.src['<pad>'] #self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j self.char_embed_size = 50 self.embed_size = embed_size self.char_embedding = nn.Embedding(len(vocab.char2id), self.char_embed_size, padding_idx=0) self.cnn = CNN(char_embed_size=self.char_embed_size, word_embed_size=embed_size, kernel_size=5) self.highway = Highway(embed_size=embed_size) self.dropout = nn.Dropout(0.3)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j self.pad_token_idx = vocab.char2id['<pad>'] self.e_char = self.char_embed_size = 50 self.e_word = self.embed_size = embed_size #e_word self.embeddings = nn.Embedding(len(vocab.char2id), self.char_embed_size, padding_idx=self.pad_token_idx) self.vocab = vocab #vocab object self.dropoutp = 0.3 self.dropout = nn.Dropout(self.dropoutp) self.cnnlay = CNN(self.e_char, f = self.e_word) self.highwaylay = Highway(self.e_word)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j self.CHAR_EMBED_SIZE = 50 self.dropout_rate = 0.3 self.embed_size = embed_size pad_token_idx = vocab['<pad>'] self.conv = CNN(self.CHAR_EMBED_SIZE, embed_size) self.highway = Highway(embed_size, embed_size) self.charEmbedding = nn.Embedding(num_embeddings=len(vocab.char2id), embedding_dim=self.CHAR_EMBED_SIZE, padding_idx=pad_token_idx) self.dropout = nn.Dropout(self.dropout_rate)
def __init__(self, settings): super().__init__() self.enc_type = settings['conv_type'] self.out_c = settings['out_c'] if self.enc_type == 'CNN': self.enc = CNN(settings) else: self.enc = PCNN(settings) self.out_c *= 3 self.out_feature_size = self.out_c self.n_rel = settings['n_rel'] self.r_embed = nn.Parameter(torch.zeros(self.n_rel, self.out_feature_size), requires_grad=True) self.r_bias = nn.Parameter(torch.zeros(self.n_rel), requires_grad=True) # attention module self.att_sm = nn.Softmax(dim=-1) eye = torch.eye(self.out_feature_size, self.out_feature_size) # self.att_W = nn.Parameter(eye.expand(self.n_rel, self.out_c, self.out_c), requires_grad=True) # n_rel * out_feature_size * out_c self.att_W = nn.Parameter(eye.unsqueeze(0).repeat([self.n_rel, 1, 1]), requires_grad=True) # out_feature_size * out_c self.att_W_small = nn.Parameter(eye, requires_grad=True) # pcnn # self.linear = nn.Linear(settings['out_feature_size'] * 3, settings['n_rel']) self.linear = nn.Linear(self.out_feature_size, 1) self.dropout = nn.Dropout(p=settings['dropout_p']) # con = math.sqrt(6.0/(self.out_feature_size + self.n_rel)) con = 0.01 nn.init.uniform_(self.r_embed, a=-con, b=con) nn.init.uniform_(self.r_bias, a=-con, b=con) nn.init.uniform_(self.att_W_small, a=-con, b=con)
def question_1i_sanity_check(): """ Sanity check for model_embeddings.py basic shape check """ print("-" * 80) print("Running Sanity Check for Question 1i: Convolutional Network") print("-" * 80) char_embed_size = 50 max_word_length = 21 kernel_size = 5 inpt = torch.randn(BATCH_SIZE, char_embed_size, max_word_length, dtype=torch.float32) conv = CNN(char_embed_size, EMBED_SIZE, kernel_size) output = conv(inpt) output_expected_size = [BATCH_SIZE, EMBED_SIZE] assert ( list(output.size()) == output_expected_size ), "output shape is incorrect: it should be:\n {} but is:\n{}".format( output_expected_size, list(output.size())) print("Sanity Check Passed for Question 1i: Convolutional Network!") print("-" * 80)
def play(OPTIONS): """main method""" trained_agent = lambda x: np.argmax(mcts.get_raw_action_prob(x)) random_player = lambda x: np.random.choice( np.where(x.get_valid_moves() == 1)[0]) num_games = 100 if OPTIONS.optimal: opponent = get_optimal_move elif OPTIONS.suboptimal: opponent = ( lambda x: get_optimal_move(x) if random.random() > Config.suboptimality else random_player(x)) elif OPTIONS.suboptimal: opponent = trained_agent else: opponent = random_player network = CNN(Config.levels) network.load_checkpoint("./temp/", "best.pth.tar") mcts = MCTS(network) a = gym.Gym(trained_agent, opponent, potential=0.99) print(a.play_games(num_games, mode=2, verbose=True))
def __init__(self, word_embed_size, vocab): """ Init the Embedding layer for one language @param word_embed_size (int): Embedding size (dimensionality) for the output word @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. Hints: - You may find len(self.vocab.char2id) useful when create the embedding """ super(ModelEmbeddings, self).__init__() ### YOUR CODE HERE for part 1h self.word_embed_size = word_embed_size self.char_embed_size = 50 self.vocab = vocab self.char_embed = nn.Embedding(len(self.vocab.char2id), self.char_embed_size, padding_idx=self.vocab.char_pad) self.cnn = CNN(self.char_embed_size, self.word_embed_size, kernel_size=5, padding=1) self.highway = Highway(self.word_embed_size, self.word_embed_size, dropout_rate=0.3)
def question_1g_sanity_check(): """ Sanity check for the class `CNN`. """ print("-" * 80) print("Running Sanity Check for Question 1g: CNN") print("-" * 80) SENTENCE_LENGTH = 20 BATCH_SIZE = 5 E_CHAR = 50 M_WORD = 21 F = 3 # model = CNN(f=F, e_char = E_CHAR, m_word=M_WORD) model = CNN(f=F, e_char=E_CHAR) x_reshaped = torch.randn((SENTENCE_LENGTH, BATCH_SIZE, E_CHAR, M_WORD)) print("Running test on a batch of x_reshaped") x_conv_out = model.forward(x_reshaped) assert list(x_conv_out.size()) == [ SENTENCE_LENGTH, BATCH_SIZE, F ], "Output size should be: {}, but got {}".format( (SENTENCE_LENGTH, BATCH_SIZE, F), x_conv_out.size()) print("Sanity Check Passed for Question 1g: CNN!") print("-" * 80)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() # A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding( # len(vocab.src), embed_size, padding_idx=pad_token_idx) # End A4 code # YOUR CODE HERE for part 1j self.char_embed_size = 50 self.embed_size = embed_size pad_token_idx = vocab.char2id['<pad>'] self.embeddings = nn.Embedding(len(vocab.char2id), self.char_embed_size, padding_idx=pad_token_idx) # print(embed_size) self.convolution = CNN(self.char_embed_size, self.embed_size) self.highway_layer = Highway(self.embed_size, dropout_rate=0.3)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code self.embed_size = embed_size self.vocab = vocab self.num_char = len(vocab.char2id) self.char_embed_size = 50 self.max_word_length = 21 self.dropout_rate = 0.3 self.kernel_size = 5 self.embeddings = nn.Embedding(self.num_char, self.char_embed_size, padding_idx=self.vocab.char2id['<pad>']) self.cnn = CNN(self.kernel_size, self.char_embed_size, self.embed_size, self.max_word_length) self.highway = Highway(self.embed_size, self.dropout_rate)
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j e_char = 50 #character embedding size max_word_length = 21 dropout_rate = 0.3 self.embed_size = embed_size pad_token_idx = vocab.char2id['<pad>'] self.char_embed_layer = nn.Embedding(len(vocab.char2id), e_char, padding_idx=pad_token_idx) self.cnn_layer = CNN(max_word_length, e_char, self.embed_size) self.highway_layer = Highway(self.embed_size, dropout_rate)
def forward(self, input): """ Looks up character-based CNN embeddings for the words in a batch of sentences. @param input: Tensor of integers of shape (sentence_length, batch_size, max_word_length) where each integer is an index into the character vocabulary @param output: Tensor of shape (sentence_length, batch_size, embed_size), containing the CNN-based embeddings for each word of the sentences in the batch """ ## A4 code # output = self.embeddings(input) # return output ## End A4 code ### YOUR CODE HERE for part 1j #x_char_embedding = [] # for x_padded in input: x_padded = input x_embedded = self.embeddings(x_padded) x_padded_dim = list(x_embedded.size()) # print(x_embedded.size()) # need to convert 4d to 3d x_embedded = x_embedded.reshape(-1, x_padded_dim[3], x_padded_dim[2]) # print(x_embedded.size()) x_conv_out = CNN(in_channel=self.embed_size, out_channel=self.embed_size).forward(x_embedded) x_conv_out = torch.squeeze(x_conv_out, -1) # print(x_conv_out.size()) x_conv_out = x_conv_out.reshape(x_padded_dim[0], x_padded_dim[1], -1) # print(x_conv_out.size()) x_highway = Highway(self.embed_size).forward(x_conv_out) #print(x_highway.size()) x_dout = self.dropout(x_highway) # x_char_embedding.append(x_dout) #x_char_embedding = torch.stack(x_char_embedding ) return x_dout
def __init__(self, embed_size, vocab): """ Init the Embedding layer for one language @param embed_size (int): Embedding size (dimensionality) for the output @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. """ super(ModelEmbeddings, self).__init__() ## A4 code # pad_token_idx = vocab.src['<pad>'] # self.embeddings = nn.Embedding(len(vocab.src), embed_size, padding_idx=pad_token_idx) ## End A4 code ### YOUR CODE HERE for part 1j char_emb_dim = 50 max_sentence_len = 21 p_drop = 0.3 kernel_size = 5 pad_token_idx = vocab.char2id['<pad>'] self.vocab = vocab self.embed_size = embed_size self.char_emb = nn.Embedding(len(vocab.char2id), char_emb_dim, pad_token_idx) self.cnn = CNN(kernel_size, embed_size, char_emb_dim, max_sentence_len) self.hwy = Highway(embed_size, p_drop)
def test_get_weights_without_biases_3(): my_cnn = CNN() image_size=(np.random.randint(32,100),np.random.randint(20,100),np.random.randint(3,10)) number_of_conv_layers=np.random.randint(2,10) my_cnn.add_input_layer(shape=image_size,name="input") previous_depth=image_size[2] for k in range(number_of_conv_layers): number_of_filters = np.random.randint(3, 100) kernel_size= np.random.randint(3,9) my_cnn.append_conv2d_layer(num_of_filters=number_of_filters, kernel_size=(kernel_size,kernel_size), padding="same", activation='linear') actual = my_cnn.get_weights_without_biases(layer_number=k+1) assert actual.shape == (kernel_size,kernel_size,previous_depth,number_of_filters) previous_depth=number_of_filters actual = my_cnn.get_weights_without_biases(layer_number=0) assert actual is None pool_size = np.random.randint(2, 5) my_cnn.append_maxpooling2d_layer(pool_size=pool_size,padding="same", strides=2,name="pool1") actual=my_cnn.get_weights_without_biases(layer_name="pool1") assert actual is None my_cnn.append_flatten_layer(name="flat1") actual=my_cnn.get_weights_without_biases(layer_name="flat1") assert actual is None my_cnn.append_dense_layer(num_nodes=10) number_of_dense_layers = np.random.randint(2, 10) previous_nodes = 10 for k in range(number_of_dense_layers): number_of_nodes = np.random.randint(3, 100) kernel_size = np.random.randint(3, 9) my_cnn.append_dense_layer(num_nodes=number_of_nodes) actual = my_cnn.get_weights_without_biases(layer_number=k+number_of_conv_layers+4 ) # assert actual.shape == (previous_nodes, number_of_nodes) previous_nodes = number_of_nodes
def __init__(self, word_embed_size, vocab, char_embed_size=50, dropout_prob=0.3): """ Init the Embedding layer for one language @param word_embed_size (int): Embedding size (dimensionality) for the output word @param vocab (VocabEntry): VocabEntry object. See vocab.py for documentation. Hints: - You may find len(self.vocab.char2id) useful when create the embedding """ super(ModelEmbeddings, self).__init__() ### YOUR CODE HERE for part 1h self.word_embed_size = word_embed_size # for the autograder self.e_word = word_embed_size self.e_char = char_embed_size self.dropout_prob = dropout_prob self.embedding = nn.Embedding(len(vocab.char2id), self.e_char, vocab.char_pad) self.cnn = CNN(self.e_char, self.e_word) self.highway = Highway(self.e_word)
def test_remove_last_layer(): from tensorflow.keras.datasets import cifar10 batch_size = 32 num_classes = 10 epochs = 100 data_augmentation = True num_predictions = 20 save_dir = os.path.join(os.getcwd(), 'saved_models') model_name = 'keras_cifar10_trained_model.h5' (X_train, y_train), (X_test, y_test) = cifar10.load_data() number_of_train_samples_to_use = 100 X_train = X_train[0:number_of_train_samples_to_use, :] y_train = y_train[0:number_of_train_samples_to_use] my_cnn = CNN() my_cnn.add_input_layer(shape=(32, 32, 3), name="input") my_cnn.append_conv2d_layer(num_of_filters=16, kernel_size=(3, 3), padding="same", activation='linear', name="conv1") my_cnn.append_maxpooling2d_layer(pool_size=2, padding="same", strides=2, name="pool1") my_cnn.append_conv2d_layer(num_of_filters=8, kernel_size=3, activation='relu', name="conv2") my_cnn.append_flatten_layer(name="flat1") my_cnn.append_dense_layer(num_nodes=10, activation="relu", name="dense1") my_cnn.append_dense_layer(num_nodes=2, activation="relu", name="dense2") out = my_cnn.predict(X_train) assert out.shape == (number_of_train_samples_to_use, 2) my_cnn.remove_last_layer() out = my_cnn.predict(X_train) assert out.shape == (number_of_train_samples_to_use, 10)
import mnist import numpy as np from cnn import CNN import matplotlib.pyplot as plt import pickle np.set_printoptions(edgeitems=100, linewidth=200000) cnn = CNN(6, 12) train_images = (mnist.train_images() / 255) - 0.5 train_labels = mnist.train_labels() test_images = (mnist.test_images() / 255) - 0.5 test_labels = mnist.test_labels() stats = cnn.train(train_images[:10000], train_labels[:10000], test_images[:1000], test_labels[:1000], 10, 0.005) epochs = stats[0] avg_losses = stats[1] accuracies = stats[2] with open("artifacts/model.bin", "wb") as f: pickle.dump(cnn, f) fig = plt.figure() plt.subplots_adjust(hspace=0.5) g1 = fig.add_subplot(2, 1, 1, ylabel="Loss", xlabel="Epoch") g1.plot(epochs, avg_losses, label="Avg loss", color="red") g1.legend(loc="center")
session_conf = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf) with sess.as_default(): # embed() if (MODEL_TO_RUN == 0): model = CNN_LSTM(x_train.shape[1], y_train.shape[1], len(vocab_processor.vocabulary_), embedding_dim, filter_sizes, num_filters, l2_reg_lambda) elif (MODEL_TO_RUN == 1): model = LSTM_CNN(x_train.shape[1], y_train.shape[1], len(vocab_processor.vocabulary_), embedding_dim, filter_sizes, num_filters, l2_reg_lambda) elif (MODEL_TO_RUN == 2): model = CNN(x_train.shape[1], y_train.shape[1], len(vocab_processor.vocabulary_), embedding_dim, filter_sizes, num_filters, l2_reg_lambda) elif (MODEL_TO_RUN == 3): model = LSTM(x_train.shape[1], y_train.shape[1], len(vocab_processor.vocabulary_), embedding_dim) else: print "PLEASE CHOOSE A VALID MODEL!\n0 = CNN_LSTM\n1 = LSTM_CNN\n2 = CNN\n3 = LSTM\n" exit() # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
def train(learning_rate, learning_rate_decay, dropout_rate, mini_batch_size, epochs, optimizer, random_seed, model_directory, model_filename, log_directory): np.random.seed(random_seed) if not os.path.exists(log_directory): os.makedirs(log_directory) # Load CIFAR10 dataset cifar10 = CIFAR10() x_train = cifar10.x_train y_train = cifar10.y_train y_train_onehot = cifar10.y_train_onehot x_valid = cifar10.x_valid y_valid = cifar10.y_valid y_valid_onehot = cifar10.y_valid_onehot num_classes = cifar10.num_classes input_size = cifar10.input_size print('CIFAR10 Input Image Size: {}'.format(input_size)) model = CNN(input_size=input_size, num_classes=num_classes, optimizer=optimizer) train_accuracy_log = list() valid_accuracy_log = list() train_loss_log = list() for epoch in range(epochs): print('Epoch: %d' % epoch) learning_rate *= learning_rate_decay # Prepare mini batches on train set shuffled_idx = np.arange(len(x_train)) np.random.shuffle(shuffled_idx) mini_batch_idx = [ shuffled_idx[k:k + mini_batch_size] for k in range(0, len(x_train), mini_batch_size) ] # Validate on validation set valid_prediction_onehot = model.test(data=x_valid) valid_prediction = np.argmax(valid_prediction_onehot, axis=1).reshape( (-1, 1)) valid_accuracy = model_accuracy(label=y_valid, prediction=valid_prediction) print('Validation Accuracy: %f' % valid_accuracy) valid_accuracy_log.append(valid_accuracy) # Train on train set for i, idx in enumerate(mini_batch_idx): train_loss = model.train(data=x_train[idx], label=y_train_onehot[idx], learning_rate=learning_rate, dropout_rate=dropout_rate) if i % 200 == 0: train_prediction_onehot = model.test(data=x_train[idx]) train_prediction = np.argmax(train_prediction_onehot, axis=1).reshape((-1, 1)) train_accuracy = model_accuracy(label=y_train[idx], prediction=train_prediction) print('Training Loss: %f, Training Accuracy: %f' % (train_loss, train_accuracy)) if i == 0: train_accuracy_log.append(train_accuracy) train_loss_log.append(train_loss) model.save(directory=model_directory, filename=model_filename) print('Trained model saved successfully') model.save_as_pb(directory=model_directory, filename=model_filename) print('Trained model saved as pb successfully') # The directory should not exist before calling this method signature_dir = os.path.join(model_directory, 'signature') assert (not os.path.exists(signature_dir)) model.save_signature(directory=signature_dir) print('Trained model with signature saved successfully') plot_curve(train_losses = train_loss_log, train_accuracies = train_accuracy_log, valid_accuracies = valid_accuracy_log, \ filename = os.path.join(log_directory, 'training_curve.png'))
def test_add_input_layer(): model = CNN() out = model.add_input_layer(shape=(256, 256, 3), name="input0") # no tests for this? assert True
def init_CNN(self): net = CNN(load_glove=False) net.num_words = self.num_words net.glove_dim = INPUT_DIM return net
def main(model_name, new_scan=False, preprocess=True): config = Config() plot = config.plot cut = config.cut bandpass = config.bandpass resample = config.resample # read data folders file_list = os.listdir(config.root + '/data/after') file_list.sort() if new_scan == True: print('start new scan!') start_point = 0 event_num = 0 try: os.system('rm -rf %s/event_detect/detect_result/cut/*' % config.root) os.system('rm -rf %s/event_detect/detect_result/png/*' % config.root) os.system('rm -rf %s/event_detect/detect_result/png2/*' % config.root) os.system('rm -rf %s/event_detect/detect_result/cnn/*.csv' % config.root) except: pass # file_list_len = len(file_list) else: with open(config.root + '/event_detect/detect_result/' + model_name + '/checkpoint') as file: start_point = int(file.readline()) event_num = int(file.readline()) file_list = file_list[start_point:] # file_list_len = len(file_list) print('restart from {}'.format(file_list[0])) # load CNN model if model_name == 'cnn': from cnn import CNN import tensorflow as tf from tflib.models import Model model = CNN() # sess = tf.Session(config=tf.ConfigProto(device_count={"CPU":20},inter_op_parallelism_threads=0,intra_op_parallelism_threads=0)) sess = tf.Session() saver, global_step = Model.continue_previous_session( sess, model_file='cnn', ckpt_file=config.root + '/event_detect/saver/cnn/checkpoint') # read group info group = [] with open(config.root + '/config/group_info', 'r') as f: for line in f.readlines(): if line != '\n': if line[0] == '#': group.append([]) else: group[-1].append(line.split()[0]) # read data & detect eq for file in file_list: sac_file_name = [[], [], []] all_group_sta_num = [0] * len(group) path = os.path.join(config.root + '/data/after', file) begin = datetime.datetime.now() group_E = [[] for _ in range(len(group))] group_N = [[] for _ in range(len(group))] group_Z = [[] for _ in range(len(group))] print('Start reading data: %s.' % file) for i in range(len(group)): for sta in group[i]: if len(glob.glob(path + '/' + '*' + sta + '.*')) == 3: all_group_sta_num[i] += 1 sacfile_E = glob.glob(path + '/' + '*' + sta + '.*' + 'E')[0] sacfile_N = glob.glob(path + '/' + '*' + sta + '.*' + 'N')[0] sacfile_Z = glob.glob(path + '/' + '*' + sta + '.*' + 'Z')[0] sac_file_name[0].append(sacfile_E.split('/')[-1]) sac_file_name[1].append(sacfile_N.split('/')[-1]) sac_file_name[2].append(sacfile_Z.split('/')[-1]) group_E[i].append(obspy.read(sacfile_E)) group_N[i].append(obspy.read(sacfile_N)) group_Z[i].append(obspy.read(sacfile_Z)) flatten_group_E = [st for each_group in group_E for st in each_group] flatten_group_N = [st for each_group in group_N for st in each_group] flatten_group_Z = [st for each_group in group_Z for st in each_group] st_E = reduce(lambda st1, st2: st1 + st2, flatten_group_E) st_N = reduce(lambda st1, st2: st1 + st2, flatten_group_N) st_Z = reduce(lambda st1, st2: st1 + st2, flatten_group_Z) st_all = st_E + st_N + st_Z all_sta_num = len(flatten_group_Z) if resample: st_all = st_all.resample(sampling_rate=resample) if bandpass: st_all = st_all.filter('bandpass', freqmin=bandpass[0], freqmax=bandpass[1], corners=4, zerophase=True) endtime = st_all[0].stats.endtime start_flag = -1 end_flag = -1 event_list = [] confidence_total = {} start_total = [] end_total = [] pos_num_total = [] samples = 1.0 / st_all[0].stats.delta # npts = st_all[0].stats.npts print('Finish reading data.') print('Start detection.') for windowed_st in st_all.slide(window_length=(config.winsize - 1) / samples, step=config.winlag / samples): cur_sta = 0 len_group_conf = 0 group_class, group_conf = [], [] # windowed_E = windowed_st[:all_sta_num] # windowed_N = windowed_st[all_sta_num:2*all_sta_num] # windowed_Z = windowed_st[2*all_sta_num:] start = len(windowed_st) / 3 * 2 end = len(windowed_st) group_max_conf = 0 for i in range(len(group)): data_input = [[], [], []] group_sta_num = all_group_sta_num[i] if group_sta_num > 0: for j in range(cur_sta, cur_sta + group_sta_num): if len(windowed_st[j].data) < config.winsize: windowed_st[j].data = np.concatenate([ windowed_st[j].data, np.zeros(config.winsize - len(windowed_st[j].data)) ]) data_input[0].append( windowed_st[j].data[:config.winsize]) # print(j, windowed_st[j]) for j in range(all_sta_num + cur_sta, all_sta_num + cur_sta + group_sta_num): if len(windowed_st[j].data) < config.winsize: windowed_st[j].data = np.concatenate([ windowed_st[j].data, np.zeros(config.winsize - len(windowed_st[j].data)) ]) data_input[1].append( windowed_st[j].data[:config.winsize]) # print(j, windowed_st[j]) for j in range(2 * all_sta_num + cur_sta, 2 * all_sta_num + cur_sta + group_sta_num): if len(windowed_st[j].data) < config.winsize: windowed_st[j].data = np.concatenate([ windowed_st[j].data, np.zeros(config.winsize - len(windowed_st[j].data)) ]) data_input[2].append( windowed_st[j].data[:config.winsize]) # print(j, windowed_st[j]) plot_b = 2 * all_sta_num + cur_sta plot_e = 2 * all_sta_num + cur_sta + group_sta_num cur_sta += group_sta_num if preprocess: for i in range(3): for j in range(group_sta_num): data_input[i][j] = data_preprocess( data_input[i][j]) data_input = np.array(data_input) if len(data_input[0][0]) < config.winsize: concat = np.zeros([ 3, group_sta_num, config.winsize - len(data_input[0][0]) ]) data_input = np.concatenate([data_input, concat], axis=2) else: data_input = data_input[:, :, :config.winsize] data_input = data_input.transpose((1, 2, 0)) j = 0 while j < len(data_input): if np.max(data_input[j]) == 0 or np.isnan( np.max(data_input[j])): data_input = np.delete(data_input, j, axis=0) else: j += 1 if len(data_input) >= 3: len_group_conf += 1 class_pred, confidence = model.classify( sess=sess, input_=[data_input]) group_class.append(class_pred) group_conf.append(confidence[0]) if confidence[0] > group_max_conf: start = plot_b end = plot_e group_max_conf = confidence[0] else: group_class.append(0) group_conf.append(0) else: group_class.append(0) group_conf.append(0) # consider the result of multiple groups pos_num = 0 for each in group_class: if each == 1: pos_num += 1 if pos_num >= config.group_num_thrd: class_pred = 1 else: class_pred = 0 confidence = sum( group_conf) / len_group_conf if len_group_conf else 0 # calculate the window range if class_pred == 1: confidence_total[confidence] = [group_max_conf, start, end] start_total.append(windowed_st[0].stats.starttime) end_total.append(windowed_st[0].stats.endtime) pos_num_total.append(pos_num) if start_flag == -1: start_flag = windowed_st[0].stats.starttime end_flag = windowed_st[0].stats.endtime else: end_flag = windowed_st[0].stats.endtime print("{} {} {} {} {:.8f} {:.8f}".format(class_pred,start_flag,end_flag, \ windowed_st[0].stats.starttime,confidence, group_max_conf)) if class_pred == 0 and start_flag != -1: #end_flag < windowed_st[0].stats.starttime: confidence = max(list(confidence_total.keys())) # for j in range(len(confidence_total)): # if confidence == confidence_total[j]: # break # start_local = start_total[j] # end_local = end_total[j] # event = [file, start_flag, end_flag, # confidence, start_local, end_local] event_num += 1 group_max_conf = confidence_total[confidence][0] start = confidence_total[confidence][1] end = confidence_total[confidence][2] event = [event_num, file, start_flag, end_flag, confidence, \ max(pos_num_total), start, end, group_max_conf] confidence_total = {} start_total = [] end_total = [] pos_num_total = [] event_list.append(event) #print(event_list) start_flag = -1 end_flag = -1 if class_pred == 1 and end_flag + config.winlag / samples >= endtime: confidence = max(list(confidence_total.keys())) # for j in range(len(confidence_total)): # if confidence == confidence_total[j]: # break # start_local = start_total[j] # end_local = end_total[j] # event = [file.split('/')[-2], start_flag, endtime, # confidence, start_total, end_total] event_num += 1 group_max_conf = confidence_total[confidence][0] start = confidence_total[confidence][1] end = confidence_total[confidence][2] event = [event_num, file, start_flag, endtime, confidence, \ max(pos_num_total), start, end, group_max_conf] event_list.append(event) start_flag = -1 end_flag = -1 if event_list: new_event_list = [event_list[0]] for i in range(1, len(event_list)): if event_list[i][1] > new_event_list[-1][1] and \ event_list[i][1] < new_event_list[-1][1]+1000/(config.resample if config.resample else 200): # if event_list[i][1] > new_event_list[-1][1] and event_list[i][1] < new_event_list[-1][2]: new_event_list[-1][2] = event_list[i][2] else: new_event_list.append(event_list[i]) else: new_event_list = [] # write event list if len(event_list) != 0: with open(config.root + '/event_detect/detect_result/' + model_name + '/events_list.csv', mode='a', newline='') as f: csvwriter = csv.writer(f) for event in event_list: csvwriter.writerow(event) f.close() if plot: print('Plot detected events.') for event in new_event_list: plot_traces = st_Z event_num, _, start_flag, end_flag, confidence, pos_num, start, end, group_max_conf = event name = config.root + '/event_detect/detect_result/png/' \ + str(int(event_num)) + '_' + str(confidence)[:4] + '.png' plot_traces.plot(starttime=start_flag, endtime=end_flag, size=(800, 800), automerge=False, equal_scale=False, linewidth=0.8, outfile=name) plot_traces2 = st_all[start:end] name2 = config.root + '/event_detect/detect_result/png2/' \ + str(int(event_num)) + '_' + str(group_max_conf)[:4] + '.png' plot_traces2.plot(starttime=start_flag, endtime=end_flag, size=(800, 800), automerge=False, equal_scale=False, linewidth=0.8, outfile=name2) ## cut use Obspy, processed data # if cut: # print('Cut detected events.') # for event in new_event_list: # event_num, _, start_flag, end_flag, confidence, pos_num, start, end, group_max_conf = event # slice_E = st_E.slice(start_flag, end_flag) # slice_N = st_N.slice(start_flag, end_flag) # slice_Z = st_Z.slice(start_flag, end_flag) # save_path = config.root + '/event_detect/detect_result/cut/' \ # + str(int(event_num)) + '_' + str(confidence)[:4] # os.system('mkdir %s'%save_path) # for i in range(len(slice_E)): # slice_E[i].write(save_path+'/'+sac_file_name[0][i], format='SAC') # slice_N[i].write(save_path+'/'+sac_file_name[1][i], format='SAC') # slice_Z[i].write(save_path+'/'+sac_file_name[2][i], format='SAC') ## cut use SAC, raw data if cut: print('Cut detected events.') for event in new_event_list: event_num, _, start_flag, end_flag, confidence, pos_num, start, end, group_max_conf = event save_path = config.root + '/event_detect/detect_result/cut/' \ + str(int(event_num)) + '_' + str(confidence)[:4] + '/' os.system('mkdir %s' % save_path) cut_b = 60 * 60 * int(start_flag.hour) + 60 * int( start_flag.minute) + float(start_flag.second) cut_e = 60 * 60 * int(end_flag.hour) + 60 * int( end_flag.minute) + float(end_flag.second) ## SAC os.putenv("SAC_DISPLAY_COPYRIGHT", "0") p = subprocess.Popen(['sac'], stdin=subprocess.PIPE) s = '' s += "cut %s %s \n" % (cut_b, cut_e) s += "r %s/* \n" % (config.root + '/data/after/' + file) s += "w dir %s over \n" % (save_path) s += "quit \n" p.communicate(s.encode()) start_point += 1 with open(config.root + '/event_detect/detect_result/' + model_name + '/checkpoint', mode='w') as f: f.write(str(start_point) + '\n') f.write(str(event_num)) end = datetime.datetime.now() print('{} completed, num {}, time {}.'.format( file, start_point, end - begin)) print('Checkpoint saved.')
#logger.info("%s, evaluation loss:%s, acc:%s"%(timestr, total_loss/step, total_right/(total_right + total_wrong))) #---------------------------------- execute valid model end -------------------------------------- #----------------------------------- begin to train ----------------------------------- with tf.Graph().as_default(): with tf.device("/gpu:3"): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=FLAGS.gpu_options) session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options) with tf.Session(config=session_conf).as_default() as sess: cnn = CNN(FLAGS.sequence_len, embedding, FLAGS.embedding_size, filter_sizes, FLAGS.num_filters) global_step = tf.Variable(0, name="global_step", trainable=False) #optimizer = tf.train.AdamOptimizer(5e-2) optimizer = tf.train.GradientDescentOptimizer(1e-1) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) sess.run(tf.initialize_all_variables()) #ori_quests, cand_quests = zip(*train_quests) #valid_ori_quests, valid_cand_quests = zip(*valid_quests) for ori_train, cand_train, neg_train in batch_iter( ori_quests, cand_quests, FLAGS.batch_size, FLAGS.epoches): run_step(sess, ori_train, cand_train, neg_train, cnn,
def main(): print('Starting process...') SEED = 111 torch.manual_seed(SEED) torch.backends.cudnn.deterministic = True TEXT = torchtext.data.Field(tokenize='spacy', batch_first=True) LABEL = torchtext.data.LabelField(dtype=torch.float) train_data, test_data = datasets.IMDB.splits(TEXT, LABEL) train_data, valid_data = train_data.split(random_state=random.seed(SEED)) max_vocab_size = 25_000 TEXT.build_vocab( train_data, max_size=max_vocab_size, vectors="glove.6B.100d", unk_init=torch.Tensor.normal_, ) LABEL.build_vocab(train_data) train_iter, valid_iter, test_iter = torchtext.data.BucketIterator.splits( (train_data, valid_data, test_data), batch_size=ARGS.batch_size, device=DEVICE) vocab_size = len(TEXT.vocab) pad_idx = TEXT.vocab.stoi[TEXT.pad_token] filter_sizes = np.array(ARGS.filter_sizes.split(','), dtype=int) model = CNN(vocab_size, ARGS.binary_neuron, ARGS.embed_dim, ARGS.n_filters, filter_sizes, ARGS.output_dim, ARGS.dropout_rate, pad_idx) pretrained_embeddings = TEXT.vocab.vectors model.embedding.weight.data.copy_(pretrained_embeddings) UNK_IDX = TEXT.vocab.stoi[TEXT.unk_token] model.embedding.weight.data[UNK_IDX] = torch.zeros(ARGS.embed_dim) model.embedding.weight.data[pad_idx] = torch.zeros(ARGS.embed_dim) criterion = torch.nn.BCEWithLogitsLoss() optimizer = torch.optim.Adam(model.parameters()) model.to(DEVICE) criterion.to(DEVICE) min_valid_loss = float('inf') for epoch in range(1, ARGS.epochs + 1): start_time = time.time() model.train() train_loss, train_acc, train_p, train_tn, train_fp, train_fn = run_epoch( model, train_iter, criterion, optimizer) model.eval() with torch.no_grad(): valid_loss, valid_acc, val_p, val_tn, val_fp, val_fn = run_epoch( model, valid_iter, criterion) end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if valid_loss < min_valid_loss: min_valid_loss = valid_loss torch.save(model.state_dict(), 'model.pt') print( f'Epoch: {epoch:02} | Epoch Time: {epoch_mins}m {epoch_secs}s\n' \ f'\tTrain Loss: {train_loss:.3f} | Train Acc: {train_acc*100:.2f}%\n' \ f'\tVal. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}%' ) test_loss, test_acc, test_tp, test_tn, test_fp, test_fn = run_epoch( model, test_iter, criterion) print(f'Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%') sns.heatmap(np.array([[test_tp, test_fp], [test_fn, test_tn]]), vmax=.5, linewidth=0.5, cmap="Blues", xticklabels=["Positive", "Negative"], yticklabels=["True", "False"]) print(np.array([[test_tp, test_fp], [test_fn, test_tn]])) plt.show()
print(n(m(char_embedding_i)).size()) x=n(m(char_embedding_i)) x = x.squeeze(2,3) print(x.size()) """ ##cnn = CNN() learning_rate = 1.0 criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(lstm.parameters(), lr = learning_rate) for i in range(0, 44376-1, 35): #print("charembed") #print(char_embed[:,i:i+35,:]) #print(char_embed[:,i:i+35,:].size()) cnn = CNN() char_embedx = char_embed[:,i:i+35,:].contiguous().view(-1,1,21,15) #print("char_embedx") #print(char_embedx) #print(char_embedx.size()) cnnoutput = cnn(char_embedx) #print('CNN') #print(cnnoutput) #print(cnnoutput.size()) input_size = cnnoutput.size()[1] output_size = cnnoutput.size()[1] highway = Highway(input_size, output_size) highwayoutput = highway(cnnoutput) #print("highway") #print(highwayoutput)
def main(): # loads, encodes and normalizes the dataset X_train, y_train, X_test, y_test, N_class = load_data() encoder = OneHotEncoder(sparse=True) y_train = encoder.fit_transform(y_train.reshape(-1, 1)).toarray() y_test = encoder.transform(y_test.reshape(-1, 1)).toarray() X_train = normalize(X_train) X_train, y_train = shuffle(X_train, y_train) X_test = normalize(X_test) X_test, X_validation, y_test, y_validation = train_test_split(X_test, y_test, test_size=0.50, random_state=0) """ IN THIS SECTION WE COMPARE DIFFENT REGULARIZATIONS """ nn1 = CNN( name="No_regularization", imageWidth=100, imageHeight=100, hiddenSize=256, outputSize=N_class, filters=[(3, 3, 3, 20), (3, 3, 20, 50)], poolSize=(2, 2), initialization="xavier_glorot", regularization="None" ) cost1, accuracy1 = nn1.train(X_train, y_train, X_validation, y_validation, batchSize=128, epochs=20) nn2 = CNN( name="dropout_regularization", imageWidth=100, imageHeight=100, hiddenSize=256, outputSize=N_class, filters=[(3, 3, 3, 20), (3, 3, 20, 50)], poolSize=(2, 2), initialization="xavier_glorot", regularization="dropout" ) cost2, accuracy2 = nn2.train(X_train, y_train, X_validation, y_validation, batchSize=128, epochs=20) nn3 = CNN( name="l1_regularization", imageWidth=100, imageHeight=100, hiddenSize=256, outputSize=N_class, filters=[(3, 3, 3, 20), (3, 3, 20, 50)], poolSize=(2, 2), initialization="xavier_glorot", regularization="l1" ) cost3, accuracy3 = nn3.train(X_train, y_train, X_validation, y_validation, batchSize=128, epochs=20) nn4 = CNN( name="l2_regularization", imageWidth=100, imageHeight=100, hiddenSize=256, outputSize=N_class, filters=[(3, 3, 3, 20), (3, 3, 20, 50)], poolSize=(2, 2), initialization="xavier_glorot", regularization="l2" ) cost4, accuracy4 = nn4.train(X_train, y_train, X_validation, y_validation, batchSize=128, epochs=20) plt.xlabel("Epochs") plt.ylabel("Cost") plt.plot(cost1, label='None') plt.plot(cost2, label='dropout') plt.plot(cost3, label='l1') plt.plot(cost4, label='l2') plt.legend(loc='upper left') plt.show() plt.xlabel("Epochs") plt.ylabel("Validation Accuracy") plt.plot(accuracy1, label='None') plt.plot(accuracy2, label='dropout') plt.plot(accuracy3, label='l1') plt.plot(accuracy4, label='l2') plt.legend(loc='upper left') plt.show() count1=np.zeros((4,4),dtype=int) count2=np.zeros((4,4),dtype=int) count3=np.zeros((4,4),dtype=int) count4=np.zeros((4,4),dtype=int) for i in range(len(X_test)): k = np.argmax(y_test[i]) j1 = nn1.predictOne(X_test[i]) j2 = nn2.predictOne(X_test[i]) j3 = nn3.predictOne(X_test[i]) j4 = nn4.predictOne(X_test[i]) if j1!=k: count1[k][j1] += 1 if j2!=k: count2[k][j2] += 1 if j3!=k: count3[k][j3] += 1 if j4!=k: count4[k][j4] += 1 for i in range(N_class): p = np.zeros(N_class) p[i] = 1 print(i,':',encoder.inverse_transform([p])) print("Test phase") print("------") print("None:") print("mistakes") print(count1) print("Test accuracy:",(len(X_test)-np.sum(count1))/len(X_test)) print("------") print("dropout:") print("mistakes") print(count2) print("Test accuracy:",(len(X_test)-np.sum(count2))/len(X_test)) print("------") print("l1:") print("mistakes") print(count3) print("Test accuracy:",(len(X_test)-np.sum(count3))/len(X_test)) print("------") print("l2:") print("mistakes") print(count4) print("Test accuracy:",(len(X_test)-np.sum(count4))/len(X_test))
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_))) print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev))) # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = CNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None:
FilesList = LoadList(cfg.TEST_LIST) WND_SHIFT = WND_WIDTH - 2 VEC_PER_WND = WND_WIDTH / math.pow(2, MPoolLayers_H) phase_train = tf.Variable(True, name='phase_train') x = tf.placeholder(tf.float32, shape=[None, WND_HEIGHT, WND_WIDTH]) SeqLens = tf.placeholder(shape=[cfg.BatchSize], dtype=tf.int32) x_expanded = tf.expand_dims(x, 3) Inputs = CNN(x_expanded, phase_train, 'CNN_1') logits = RNN(Inputs, SeqLens, 'RNN_1') # CTC Beam Search Decoder to decode pred string from the prob map decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, SeqLens) #Reading test data... InputListTest, SeqLensTest, _ = ReadData(cfg.TEST_LOCATION, cfg.TEST_LIST, cfg.TEST_NB, WND_HEIGHT, WND_WIDTH, WND_SHIFT, VEC_PER_WND, '') print('Initializing...') session = tf.Session()
def main(): # Training settings parser = argparse.ArgumentParser(description='PyTorch MNIST Example') parser.add_argument('--batch-size', type=int, default=64, metavar='N', help='input batch size for training (default: 64)') parser.add_argument('--test-batch-size', type=int, default=1000, metavar='N', help='input batch size for testing (default: 1000)') parser.add_argument('--epochs', type=int, default=10, metavar='N', help='number of epochs to train (default: 10)') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.01)') parser.add_argument('--momentum', type=float, default=0.5, metavar='M', help='SGD momentum (default: 0.5)') parser.add_argument('--no-cuda', action='store_true', default=False, help='disables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument( '--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--optimizer', type=str, default='sgd', help='which optimizer to use in training. Valid options are' + \ '\'sgd\' or \'kfac\'.') parser.add_argument('--save-model', action='store_true', default=False, help='For Saving the current Model') parser.add_argument( '--save-stats', type=str, default=None, help='name of file to save training loss and test loss and accuracy.') parser.add_argument('--test-every', type=int, default=None, help='test the model roughly every n examples') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {} train_loader = torch.utils.data.DataLoader(datasets.MNIST( '.', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '.', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) model = CNN().to(device) if args.optimizer == 'sgd': optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) elif args.optimizer == 'kfac': optimizer = KFAC(model, F.nll_loss) train_stats = {} for epoch in range(1, args.epochs + 1): train(args, model, device, train_loader, test_loader, optimizer, epoch, train_stats) test(args, model, device, test_loader) if (args.save_model): torch.save(model.state_dict(), "mnist_cnn.pt")