def main(args): torch.manual_seed(args.seed) train_loader, test_loader = data_generator(args.data_dir, args.batch_size) for m in range(len(models)): if(models[m]=="Transformer"): model = Transformer(args.NumFeatures,args.NumTimeSteps,args.n_layers, args.heads, args.dropout,args.n_classes,time=args.NumTimeSteps) elif(models[m]=="TCN"): channel_sizes = [args.nhid] * args.levels model = TCN(args.NumFeatures, args.n_classes, channel_sizes, kernel_size=args.ksize, dropout=args.dropout) elif(models[m]=="LSTMWithInputCellAttention"): model = LSTMWithInputCellAttention(args.NumFeatures, args.nhid,args.n_classes,args.dropout,args.attention_hops,args.d_a) elif(models[m]=="LSTM"): model = LSTM(args.NumFeatures, args.nhid, args.n_classes,args.dropout) model.to(device) model_name = "model_{}_NumFeatures_{}".format(models[m],args.NumFeatures) model_filename = args.model_dir + 'm_' + model_name + '.pt' lr=args.lr optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) best_test_loss=100 for epoch in range(1, args.epochs+1): model,optimizer = train(args,epoch,model,train_loader,optimizer) test_loss,test_acc = test(args,model,test_loader) if(test_loss<best_test_loss): best_test_loss = test_loss save(model, model_filename) if(test_acc>=99): break if epoch % 10 == 0: lr /= 10 for param_group in optimizer.param_groups: param_group['lr'] = lr
def optimize(lr, clip): print("Optimizing with " + str(lr) + "lr, " + str(args.epochs) + " epochs, " + str(clip) + " clip") num_chans = [args.nhid] * (args.levels - 1) + [args.emsize] model = TCN(args, n_words, num_chans) if args.cuda: model.cuda() print("Parameters: " + str(sum(p.numel() for p in model.parameters()))) torch.backends.cudnn.benchmark = True # This makes dilated conv much faster for CuDNN 7.5 optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) # Start training loop best_model_name = "model_" + args.experiment_name + ".pt" best_vloss = 1e8 all_vloss = [] for epoch in range(1, args.epochs+1): epoch_start_time = time.time() try: train(model, optimizer, lr, epoch, clip) except OverflowError: return {'status': 'fail'} print("Validating...") val_loss = evaluate(model, val_data) if np.isnan(val_loss) or val_loss > 100: return {'status' : 'fail'} print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), val_loss, math.exp(val_loss))) print('-' * 89) # Save the model if the validation loss is the best we've seen so far. if val_loss < best_vloss: with open(best_model_name, 'wb') as f: print('Save model!\n') torch.save(model, f) best_vloss = val_loss # Anneal the learning rate if the validation loss plateaus if epoch > 10 and val_loss >= max(all_vloss[-5:]): lr = lr / 2. for param_group in optimizer.param_groups: param_group['lr'] = lr all_vloss.append(val_loss) return {"status" : "ok", "loss" : best_vloss, "model_name" : best_model_name}
def optimize(lr, clip): print("Optimizing with " + str(lr) + "lr, " + str(args.epochs) + " epochs, " + str(clip) + " clip") # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) print(args) n_channels = [args.nhid] * args.levels model = TCN(args.model, input_size, input_size, n_channels, args.ksize, dropout=args.dropout) print('Parameter count: ', str(sum(p.numel() for p in model.parameters()))) if args.cuda: model.cuda() #summary(model, (193, 88)) optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) best_vloss = 1e8 vloss_list = [] model_name = "model_" + str(args.data) + "_" + str( args.experiment_name) + ".pt" for ep in range(1, args.epochs + 1): train(model, ep, lr, optimizer, clip) vloss = evaluate(model, X_valid, name='Validation') if np.isnan(vloss) or vloss > 1000: return {'status': 'fail'} if vloss < best_vloss: with open(model_name, "wb") as f: torch.save(model, f) print("Saved model!\n") best_vloss = vloss if ep > 10 and vloss > max(vloss_list[-10:]): lr /= 2 for param_group in optimizer.param_groups: param_group['lr'] = lr vloss_list.append(vloss) return {'status': 'ok', 'loss': best_vloss, 'model_name': model_name}
def optimize(lr, clip): print("Optimizing with " + str(lr) + "lr, " + str(args.epochs) + " epochs, " + str(clip) + " clip") num_chans = [args.nhid] * (args.levels - 1) + [args.emsize] model = TCN(args, n_characters, num_chans) if args.cuda: model.cuda() print("Parameters: " + str(sum(p.numel() for p in model.parameters()))) torch.backends.cudnn.benchmark = True # This makes dilated conv much faster for CuDNN 7.5 optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) # Start training loop all_losses = [] best_vloss = 1e7 for epoch in range(1, args.epochs + 1): try: train(model, optimizer, clip, lr, epoch) except OverflowError: return {'status': 'fail'} vloss = evaluate(model, val_data) if np.isnan(vloss) or vloss > 1000: return {'status': 'fail'} print('-' * 89) print('| End of epoch {:3d} | valid loss {:5.3f} | valid bpc {:8.3f}'. format(epoch, vloss, vloss / math.log(2))) if epoch > 10 and vloss > max(all_losses[-5:]): lr = lr / 2. for param_group in optimizer.param_groups: param_group['lr'] = lr all_losses.append(vloss) if vloss < best_vloss: print("Saving...") with open("model_" + args.experiment_name + ".pt", "wb") as f: torch.save(model, f) print("Saved model!\n") best_vloss = vloss return { "status": "ok", "loss": best_vloss, "model_name": "model_" + args.experiment_name + ".pt" }
print("train_data.shape:", train_data.shape, ", train_labels.shape:", train_labels.shape) train_dataset = tf.data.Dataset.from_tensor_slices( (train_data, train_labels)).shuffle(n_train).batch(batch_size) test_data, test_labels = data_generator(T, seq_len, n_test) test_data, test_labels = tf.convert_to_tensor(test_data), tf.convert_to_tensor( test_labels) # Build model # Note: We use a very simple setting here (assuming all levels have the same # of channels. print("Building model...") channel_sizes = [args.nhid] * args.levels kernel_size = args.ksize dropout = args.dropout model = TCN(n_classes, channel_sizes, kernel_size=kernel_size, dropout=dropout) # Optimizer optimizer = tf.train.RMSPropOptimizer(lr) # RUN for epoch in range(epochs): for batch, (train_x, train_y) in enumerate(train_dataset): # print(train_x.shape, train_y.shape) # assert train_x.shape == (batch_size, n_steps, 1) # assert train_y.shape == (batch_size, n_steps) # loss with tf.GradientTape() as tape: y = model(train_x, training=True) # assert y.shape == (batch_size, n_steps, 10) loss_np = tf.nn.sparse_softmax_cross_entropy_with_logits(
def load_network(network, network_label, epoch_label): load_filename = 'net_epoch_%d_id_%s.pth' % (epoch_label, network_label) load_path = os.path.join(save_dir, load_filename) assert os.path.exists( load_path ), 'Weights file not found. Have you trained a model!? We are not providing one' % load_path network.load_state_dict(torch.load(load_path)) print('loaded net: %s' % load_path) model_T = TCN(input_channels_T, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) model_E = TCN(input_channels_E, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) model_G = TCN(input_channels_G, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) if args.cuda: model_T.cuda()
args) n_characters = len(corpus.dict) train_data = batchify(char_tensor(corpus, file), args.batch_size, args) val_data = batchify(char_tensor(corpus, valfile), 1, args) test_data = batchify(char_tensor(corpus, testfile), 1, args) print("Corpus size: ", n_characters) num_chans = [args.nhid] * (args.levels - 1) + [args.emsize] k_size = args.ksize dropout = args.dropout emb_dropout = args.emb_dropout model = TCN(args.emsize, n_characters, num_chans, args.seq_len, args.emsize, kernel_size=k_size, dropout=dropout, emb_dropout=emb_dropout) lr = args.lr total_step = 0 def evaluate(source): total_loss = 0 count = 0 source_len = source.shape[1] for batch, i in enumerate(range(0, source_len - 1, args.validseqlen)): if i + args.seq_len - args.validseqlen >= source_len: break inp, target = get_batch(source, i, args)
def create_model(num_channels, in_channels, kernel_size, dropout, out_features): model = TCN(num_channels, in_channels, kernel_size, dropout, out_features) return model
iters = args.iters T = args.blank_len n_steps = T + (2 * seq_len) n_classes = 10 # Digits 0 - 9 n_train = 10000 n_test = 1000 print(args) print("Preparing data...") train_x, train_y = data_generator(T, seq_len, n_train) test_x, test_y = data_generator(T, seq_len, n_test) channel_sizes = [args.nhid] * args.levels kernel_size = args.ksize dropout = args.dropout model = TCN(1, n_classes, channel_sizes, kernel_size, dropout=dropout) def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) total_params = count_parameters(model) print("Total params are ", total_params) if args.cuda: model.cuda() train_x = train_x.cuda() train_y = train_y.cuda() test_x = test_x.cuda() test_y = test_y.cuda()
levelfrac = fsolve(funcLevel, 30, args=(sequence_length, kernel_size, d)) level = int(np.ceil(levelfrac)) channel_sizes = [nhid] * level seq_length = calc_seq_length(kernel_size, d, level) last_dilation = int( np.ceil( (sequence_length - calc_seq_length(kernel_size, d, level - 1)) / (2 * (kernel_size - 1)))) dilation_sizes = (d**np.arange(level - 1)).tolist() + [last_dilation] #dilation_sizes = (d**np.arange(level)).tolist() seq_length = calc_seq_length(kernel_size, dilation_sizes, level) #print d,seq_length,level #continue model = TCN(input_channels, n_classes, channel_sizes, dilation_size=dilation_sizes, kernel_size=kernel_size, dropout=dropout) s, sp = summary(model, input_size=(input_channels, 2 * seq_length), batch_size=batch_size, noprint=True) total_sizes += [sp['total_size']] seq_lengths += [seq_length] print d, seq_length, level, sp['total_size'], last_dilation # level = 8 # dilation_size = 2 # seq_length = calc_seq_length(kernel_size,dilation_size,level) # channel_sizes = [nhid] * level # print "****Setup 1****"
input_channels = 6 output_size = 1 batch_size = args.batch_size seq_length = args.seq_len epochs = args.epochs print(args) print("Producing data...") # Note: We use a very simple setting here (assuming all levels have the same # of channels. num_channels = [args.nhid] * args.levels kernel_size = args.ksize dropout = args.dropout model = TCN(input_channels, output_size, num_channels, kernel_size=kernel_size, dropout=dropout) # a = torch.rand(3,6,10) # aa = model(train_x) # print(aa) if args.cuda: model.cuda() train_x = train_x.cuda() train_y = train_y.cuda() test_x = test_x.cuda() test_y = test_y.cuda() lr = args.lr
bn_switch = tf.placeholder(dtype=tf.bool) dropout_switch = tf.placeholder(dtype=tf.float32) input_layer = tf.placeholder(dtype=tf.int32, shape=(None, args.seq_len)) labels = tf.placeholder(dtype=tf.int32, shape=(None, args.seq_len)) one_hot = tf.one_hot(labels, depth=n_characters, axis=-1, dtype=tf.float32) num_chans = [args.nhid] * (args.levels - 1) + [args.emsize] k_size = args.ksize dropout = args.dropout emb_dropout = args.emb_dropout output = TCN(input_layer, n_characters, num_chans, args.emsize, kernel_size=k_size, dropout=dropout_switch, bn_switch=bn_switch) eff_history = args.seq_len - args.validseqlen final_output = tf.reshape(output[:, eff_history:, :], (-1, n_characters)) final_target = tf.reshape(one_hot[:, eff_history:, :], (-1, n_characters)) tf.stop_gradient(final_target) loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=final_output, labels=final_target) cross_entropy_mean = tf.reduce_mean(loss, name='cross_entropy') #loss_for_minimization = cross_entropy_mean
batch_size=batch_size, shuffle=True, num_workers=num_threds, drop_last=False) test_dataset = MitbinDataset(args, is_for_train=False) test_loader = torch.utils.data.DataLoader( test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_threds, drop_last=False) channel_sizes = [args.nhid] * args.levels kernel_size = args.ksize model_T = TCN(input_channels_T, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) model_E = TCN(input_channels_E, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) model_G = TCN(input_channels_G, n_classes, channel_sizes, kernel_size=kernel_size, dropout=args.dropout) if args.cuda: model_T.cuda() model_E.cuda() model_G.cuda() optimizer = getattr(optim, args.optim)([{'params': model_T.parameters(), 'lr': args.lr_T}, {'params': model_E.parameters(), 'lr': args.lr_E}, {'params': model_G.parameters(), 'lr': args.lr_G} ])#,momentum=0.9) def save_network(network, network_label, epoch_label): save_filename = 'net_epoch_%d_id_%s.pth' % (epoch_label, network_label)
file, file_len, valfile, valfile_len, testfile, testfile_len, corpus = data_generator( args) n_characters = len(corpus.dict) train_data = batchify(char_tensor(corpus, file), args.batch_size, args) val_data = batchify(char_tensor(corpus, valfile), 1, args) test_data = batchify(char_tensor(corpus, testfile), 1, args) print("Corpus size: ", n_characters) num_chans = [args.nhid] * (args.levels - 1) + [args.emsize] k_size = args.ksize dropout = args.dropout emb_dropout = args.emb_dropout model = TCN(args.emsize, n_characters, num_chans, args.seq_len, args.emsize, kernel_size=k_size) lr = args.lr total_step = 0 def evaluate(source): total_loss = 0 count = 0 source_len = source.shape[1] for batch, i in enumerate(range(0, source_len - 1, args.validseqlen)): if i + args.seq_len - args.validseqlen >= source_len: break inp, target = get_batch(source, i, args) eff_history = args.seq_len - args.validseqlen
print("train_data.shape:", train_data.shape) print("eval_data.shape:", eval_data.shape) # Build model print("Building model...") channel_sizes = [args.nhid] * args.levels kernel_size = args.ksize dropout = args.dropout emsize = args.emsize emb_dropout = args.emb_dropout vocab_size = corpus.vocab_size model = TCN(output_size=vocab_size, num_channels=channel_sizes, kernel_size=kernel_size, dropout=dropout, embedding_dim=emsize, sequence_length=seq_len, emb_dropout=emb_dropout) # 优化 learning_rate = tf.Variable(lr, name="learning_rate") optimizer = tf.train.GradientDescentOptimizer(learning_rate) # last_loss, best_loss = None, None for epoch in range(epochs): # range把原来的sequence截成多部分,分别训练 for batch, i in enumerate( range(0, train_data.shape[1] - args.seq_len, args.validseqlen)):
training_dataset = TCNDataset(training=True) training_dataloader = torch.utils.data.DataLoader(training_dataset, collate_fn=collate_fn_padd, batch_size=batch_size, shuffle=True, drop_last=False) test_dataset = TCNDataset(training=False) test_dataloader = torch.utils.data.DataLoader(test_dataset, collate_fn=collate_fn_padd, batch_size=batch_size, shuffle=False, drop_last=False) single_TCN = TCN() single_TCN = single_TCN.to(device) single_TCN_optimizer = torch.optim.Adam(single_TCN.parameters(), lr=0.001) multi_stage_TCN = MultiStageTCN() multi_stage_TCN = multi_stage_TCN(device) multi_stage_TCN_optimizer = torch.optim.Adam(multi_stage_TCN.parameters(), lr=0.001) multi_stage_TCN_video_loss = MultiStageTCN() multi_stage_TCN_video_loss = multi_stage_TCN_video_loss.to(device) multi_stage_TCN_optimizer = torch.optim.Adam( multi_stage_TCN_video_loss.parameters(), lr=0.001) parallel_TCNs = ParallelTCNs() parallel_TCNs = parallel_TCNs.to(device)
pin_memory=torch.cuda.is_available()) for split in splits } symbols = datasets['train'].symbols # TCN model embedding_size = 300 # dimension of character embeddings dropout_rate = 0.1 emb_dropout_rate = 0.1 levels = 3 # # of levels nhid = 450 # number of hidden units per layer num_chans = [nhid] * (levels - 1) + [embedding_size] model = TCN(vocab_size=datasets['train'].vocab_size, embed_size=embedding_size, num_channels=num_chans, bos_idx=symbols['<bos>'], eos_idx=symbols['<eos>'], pad_idx=symbols['<pad>'], dropout=dropout_rate, emb_dropout=emb_dropout_rate) model = model.to(device) print(model) # folder to save model save_path = 'model' if not os.path.exists(save_path): os.makedirs(save_path) # objective function learning_rate = 4 criterion = nn.CrossEntropyLoss(size_average=False, ignore_index=symbols['<pad>'])
(x_train, y_train), (x_test, y_test) = data_generator(permute=args.permute) print("train_data.shape:", x_train.shape, ", train_labels.shape:", y_train.shape) train_dataset = tf.data.Dataset.from_tensor_slices( (x_train, y_train)).shuffle(60000).batch(batch_size) test_data, test_labels = tf.convert_to_tensor(x_test), tf.convert_to_tensor( y_test) # build model # Note: We use a very simple setting here (assuming all levels have the same # of channels. print("Building model...") channel_sizes = [args.nhid] * args.levels kernel_size = args.ksize dropout = args.dropout model = TCN(n_classes, channel_sizes, kernel_size=kernel_size, dropout=dropout) # optimizer optimizer = tf.train.AdamOptimizer(lr) # run for epoch in range(epochs): for batch, (train_x, train_y) in enumerate(train_dataset): # assert train_x.shape == (batch_size, seq_length, 1) # assert train_y.shape == (batch_size,) # loss with tf.GradientTape() as tape: y = model(train_x, training=True) # assert y.shape == (batch_size, 10) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(labels=train_y,
iters = args.iters n_classes = 1 # out size n_train = 10000 n_test = 1000 model_para_path = './model/02-512' dataset = load_data(file_path) # scaler限制到0-1 reframed, scaler = normalize_and_make_series(dataset, look_back) train_x, train_y, test_x, test_y = split_data(dataset, reframed, look_back, split_time) channel_sizes = [args.nhid] * args.levels kernel_size = args.ksize dropout = args.dropout model = TCN(look_back, n_classes, channel_sizes, kernel_size, dropout=dropout) model.double() if args.cuda: model.cuda() train_x = train_x.cuda() train_y = train_y.cuda() test_x = test_x.cuda() test_y = test_y.cuda() criterion = nn.MSELoss() lr = args.lr optimizer = getattr(optim, args.optim)(model.parameters(), lr=lr) def evaluate():
'input_channels': input_channels, 'n_classes': n_classes, 'channel_sizes': channel_sizes, 'kernel_size': kernel_size, 'dropout': dropout } else: input_channels = modelContext['model_parameters']['input_channels'] n_classes = modelContext['model_parameters']['n_classes'] channel_sizes = modelContext['model_parameters']['channel_sizes'] kernel_size = modelContext['model_parameters']['kernel_size'] dropout = modelContext['model_parameters']['dropout'] # Generate the model model = TCN(input_channels, n_classes, channel_sizes, kernel_size=kernel_size, dropout=dropout) # Creating a backup of the model that we can use for early stopping modelBEST = model ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ### ### ~~~~~~~~~~~~~~~~~ LOAD DATA INTO CUDA ~~~~~~~~~~~~~~~~~~~ ### if args.cuda: torch.cuda.set_device(cuda_device) model.cuda() modelBEST.cuda() # If we are not just testing then load everything into cuda if not testSession:
if args.lmodel == 1: corpus = data_generator1(args) eval_batch_size = 10 train_data = list(zip(corpus.train_embeddings, corpus.train_labels)) valid_data = list(zip(corpus.valid_embeddings, corpus.valid_labels)) test_data = list(zip(corpus.test_embeddings, corpus.test_labels)) num_chans = [args.nhid] * (args.levels) k_size = args.ksize dropout = args.dropout emb_dropout = args.emb_dropout tied = args.tied if args.model == 0: model = TCN(args.emsize, 1, num_chans, dropout=dropout, kernel_size=k_size) if args.model == 1: model = LSTM_classifier(input_size=args.emsize, output_size=1, hidden_size=args.nhid) if args.model == 2: model = LSTM_classifier_bidirectional(input_size=args.emsize, output_size=1, hidden_size=args.nhid) if args.model == 3: model = GRU_classifier(input_size=args.emsize, output_size=1, hidden_size=args.nhid) if args.model == 4: model = GRU_classifier_bidirectional(input_size=args.emsize, output_size=1,
Data.testData, args.eval_batch_size, isLabel=False) test_label = Data.batchify(args, Data.testLabel, args.eval_batch_size, isLabel=True) print('anomaly rate:', Data.testLabel.mean()) # print anomaly rate ############################################################################### # Build the model ############################################################################### feature_dim = Data.trainData.size(1) model = TCN(input_size=feature_dim, output_size=feature_dim, num_channels=[32, 32, 32, 32, 32, 32, 32], kernel_size=7, dropout=0.2).to(args.device) model.load_state_dict(checkpoint['state_dict']) try: '''load mean and covariance if they are pre-calculated, if not calculate them.''' # Mean and covariance are calculated on train dataset. if 'mean' in checkpoint.keys() and 'cov' in checkpoint.keys(): print('=> loading pre-calculated mean and covariance') mean, cov = checkpoint['mean'], checkpoint['cov'] else: print('=> calculating mean and covariance') mean, cov = fit_norm_distribution_param(args, model, train_data, feature_dim) '''calculate anomaly scores'''
file, file_len, valfile, valfile_len, testfile, testfile_len, corpus = data_generator( args) n_characters = len(corpus.dict) train_data = batchify(char_tensor(corpus, file), args.batch_size, args) val_data = batchify(char_tensor(corpus, valfile), 1, args) test_data = batchify(char_tensor(corpus, testfile), 1, args) print("Corpus size: ", n_characters) num_chans = [args.nhid] * (args.levels - 1) + [args.emsize] k_size = args.ksize dropout = args.dropout emb_dropout = args.emb_dropout model = TCN(args.emsize, n_characters, num_chans, kernel_size=k_size, dropout=dropout, emb_dropout=emb_dropout) def count_parameters(model): return sum(p.numel() for p in model.parameters() if p.requires_grad) total_params = count_parameters(model) print("Total params are ", total_params) if args.cuda: model.cuda() criterion = nn.CrossEntropyLoss()