def train(args, labeled, resume_from, ckpt_file): print("========== In the train step ==========") batch_size = args["batch_size"] lr = args["learning_rate"] momentum = args["momentum"] epochs = args["train_epochs"] train_split = args["split_train"] loader = processData(args, stageFor="train", indices=labeled) net = NeuralNet() net = net.to(device=device) criterion = torch.nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=float(lr), momentum=momentum) if resume_from is not None: ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth")) net.load_state_dict(ckpt["model"]) optimizer.load_state_dict(ckpt["optimizer"]) else: getdatasetstate(args) net.train() for epoch in tqdm(range(args["train_epochs"]), desc="Training"): running_loss = 0 for i, batch in enumerate(loader, start=0): data, labels = batch data = data.to(device) labels = labels.to(device) optimizer.zero_grad() output = net(data) loss = criterion(output, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 1000: print( "epoch: {} batch: {} running-loss: {}".format( epoch + 1, i + 1, running_loss / 1000 ), end="\r", ) running_loss = 0 print("Finished Training. Saving the model as {}".format(ckpt_file)) ckpt = {"model": net.state_dict(), "optimizer": optimizer.state_dict()} torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth")) return
def train(cpu, args): rank = args.nr * args.cpus + cpu dist.init_process_group( backend="gloo", init_method="file:///C:/Users/Tung/Desktop/HK201/CN/Extra/setup.txt", world_size=args.world_size, rank=rank) torch.manual_seed(0) # Hyperparameters: batch_size = 100 # NOTE: If ran out of memory, try changing this value to 64 or 32 learning_rate = 0.0001 # Create model: model = NeuralNet() # Define loss function and optimizer: loss_fn = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), learning_rate) # Wrap the model for ddp: model = nn.parallel.DistributedDataParallel(model, device_ids=None) # Data loading: train_dataset = torchvision.datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True) # Sampling the dataset to avoid same inputs order: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset, num_replicas=args.world_size, rank=rank) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, shuffle=False, num_workers=0, pin_memory=True, sampler=train_sampler) start = datetime.now() total_step = len(train_loader) for epoch in range(args.epochs): for i, (images, labels) in enumerate(train_loader): # Forward pass: outputs = model(images) loss = loss_fn(outputs, labels) # Backward pass: optimizer.zero_grad() loss.backward() optimizer.step() # For logging: if (i + 1) % batch_size and cpu == 0: print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format\ (epoch + 1, args.epochs, i + 1, total_step,loss.item())) if cpu == 0: print("Training completed in: " + str(datetime.now() - start))
def main(): # check if CUDA is available device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # create DataLoader train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0) # create model and push it to device if available model = NeuralNet(input_size, hidden_size, output_size).to(device) # loss and optimzer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) return device, train_loader, model, criterion, optimizer
def main(): max_features = 21128 maxlen = 50 BATCH_SIZE = 128 INPUT_DIM = max_features EMB_DIM = 128 HID_DIM_1 = 60 HID_DIM_2 = 16 OUTPUT_DIM = 3 h_data, text_data, y_data = load_smart_eyes_data() logger.info('text train shape: {}'.format(text_data.shape)) x_train, x_val, y_train, y_val = train_test_split(text_data, y_data, test_size=0.2, random_state=42) model = NeuralNet(INPUT_DIM, EMB_DIM, HID_DIM_1, HID_DIM_2, OUTPUT_DIM, maxlen) optimizer = torch.optim.Adam(model.parameters()) trainer = Trainer(batch_size=BATCH_SIZE, model=model, optimizer=optimizer, epochs=30) trainer.fit(x_train, y_train, x_val, y_val)
def train(): nodes = retrieve('nodes') all_words = [] ids = [] xy = [] # loop through each sentence in our node patterns for node in nodes: # add to id list ids.append(node['id']) for pattern in node['patterns']: # tokenize each word in the sentence w = tokenize(pattern) # add to our words list all_words.extend(w) # add to xy pair xy.append((w, node['id'])) # stem and lower each word and remove stop words ignore_words = ['?', '.', '!', '(', ')'] stop_words = retrieve('stop_words') all_words = [w for w in all_words if not w.lower() in stop_words] all_words = [stem(w) for w in all_words if w not in ignore_words] # remove duplicates and sort all_words = sorted(set(all_words)) ids = sorted(set(ids)) # create training data x_train = [] y_train = [] for (pattern_sentence, id) in xy: # X: bag of words for each pattern_sentence bag = bag_of_words(pattern_sentence, all_words) x_train.append(bag) # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot y_train.append(ids.index(id)) x_train = np.array(x_train) y_train = np.array(y_train) # Hyper-parameters num_epochs = 1000 batch_size = 8 learning_rate = 0.001 input_size = len(x_train[0]) hidden_size = 8 output_size = len(ids) class ChatDataset(Dataset): def __init__(self): self.n_samples = len(x_train) self.x_data = x_train self.y_data = y_train # support indexing such that dataset[i] can be used to get i-th sample def __getitem__(self, index): return self.x_data[index], self.y_data[index] # we can call len(dataset) to return the size def __len__(self): return self.n_samples dataset = ChatDataset() train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0) device = torch.device('cpu') model = NeuralNet(input_size, hidden_size, output_size).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model for epoch in range(num_epochs): for (words, labels) in train_loader: words = words.to(device) labels = labels.to(dtype=torch.long).to(device) # Forward pass outputs = model(words) # if y would be one-hot, we must apply # labels = torch.max(labels, 1)[1] loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() data = { "model_state": model.state_dict(), "input_size": input_size, "hidden_size": hidden_size, "output_size": output_size, "all_words": all_words, "ids": ids } torch.save(data, "data.pth")
def train(num_epochs=500, learning_rate=0.001): global intents # Use of a JSON-File to read trough the training data with open("intents.json", "r", encoding="UTF-8") as f: intents = json.load(f) # Will hold every word to tokenize and stem them all_words = [] # Will hold every tag to classify the words tags = [] # Will hold patterns and tags xy = [] # the JSON-file is treated like a dictionary, therefore we have to use a key for the loop for intent in intents["intents"]: tag = intent["tag"] tags.append(tag) for pattern in intent["patterns"]: w = tokenize(pattern) # We don´t want to have lists in the all_words list, therefore we extend instead of appending them all_words.extend(w) # to be able to link the words to the different tags xy.append((w, tag)) # setting up the excluded characters ignore_words = ["?", "!", ".", ","] all_words = [stem(w) for w in all_words if w not in ignore_words] # getting a alphabetically sorted list without duplicate words (function of set) all_words = sorted(set(all_words)) tags = sorted(set(tags)) X_train = [] Y_train = [] for pattern_sentence, tag in xy: bag = bag_of_words(pattern_sentence, all_words) X_train.append(bag) # Get the index of the tag of the tags-list label = tags.index(tag) Y_train.append(label) # CrossEntropyLoss # Create np.arrays, arrays with only zeros with the length of corresponding data X_train = np.array(X_train) Y_train = np.array(Y_train) # Dataset-Class to train it easily class ChatDataSet(Dataset): def __init__(self): self.n_samples = len(X_train) self.x_data = X_train self.y_data = Y_train def __getitem__(self, index): return self.x_data[index], self.y_data[index] def __len__(self): return self.n_samples # Hyperparameters batch_size = 8 hidden_size = 80 output_size = len(tags) input_size = len(all_words) # Creating a custom data-set to feed into the neural network dataset = ChatDataSet() train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True) # Checking if working with gpu is available device = torch.device("cpu") # Defining the model and using it for training model = NeuralNet(input_size, hidden_size, output_size).to(device) # loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(num_epochs): for words, labels in train_loader: words = words.to(device) labels = labels.to(device, torch.int64) # forward outputs = model(words) loss = criterion(outputs, labels) # backward and optimizer step optimizer.zero_grad() loss.backward() optimizer.step() if epoch % 100 == 0: print("Epoch " + str(epoch) + " finished! " + f"loss={loss.item():.4}" + "\n " + str(num_epochs - epoch) + " remaining!") data = { "model_state": model.state_dict(), "input_size": input_size, "output_size": output_size, "hidden_size": hidden_size, "all_words": all_words, "tags": tags } FILE = "Terra-Speak.pth" torch.save(data, FILE) print(f"Training complete! Model named {FILE} saved.")
def main(): # 1>#open the appropriate files # Open the intents.json file in read mode with open('intents.json', 'r') as f: intents = json.load(f) # open the attribute file as attribute with open('attributes.json', 'r') as p: attributes = json.load(p) # 2># create collection from both files collection1, allWords1, tags1 = createCollection(intents, 1, "tags") collection2, allWords2, tags2 = createCollection(attributes, 2, "attr") # 3># concatinate these to create the main collection and words collection1.extend(collection2) allWords1.extend(allWords2) tags1.extend(tags2) # 4># define xTrain and yTrain xTrain = [] yTrain = [] # 5># Find out the bag of words of pattern and tags for (patternSentence, tag) in collection1: bag = BagOfWords(patternSentence, allWords1) xTrain.append(bag) label = tags1.index(tag) yTrain.append(label) # 6># Develop training data as array xTrain = numpy.array(xTrain) yTrain = numpy.array(yTrain) yTrain = torch.tensor(yTrain, dtype=torch.long) # 7>## Hyperparameters for further training batchSize = 8 # input size is same as bagof words and xTrain contain bagofwords at each row inputSize = len(xTrain[0]) hiddenSize = 8 outputSize = len(tags1) learningRate = 0.001 numEpoch = 1000 # 8># dataset and trainLoader for data loading and preparation dataSet = ChatDataset(xTrain, yTrain) trainLoader = DataLoader(dataset=dataSet, batch_size=batchSize, shuffle=True) # 9># build the model model = NeuralNet(inputSize, hiddenSize, outputSize) # 10# loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learningRate) # 11># start the trainning loop for epoch in range(numEpoch): for (words, label) in trainLoader: # forward pass outputs = model(words) loss = criterion(outputs, label) # backward pass optimizer.zero_grad() loss.backward() optimizer.step() if (epoch + 1) % 100 == 0: print(f'epoch {epoch+1}/{numEpoch},loss = {loss.item():.4f} ') # print the final loss print(f'final ::loss = {loss.item():.4f} ') # save the model attributes in a file .pth extension data = { "modelState": model.state_dict(), "input_size": inputSize, "output_size": outputSize, "hidden_size": hiddenSize, "allWords_size": allWords1, "tags": tags1, } FILE = "data.pth" torch.save(data, FILE) print(f'training complete. file saved to {FILE}')
decoder = NeuralNet(10, 100, config.getint("dataset", "patch_size") * config.getint("dataset", "patch_size") * 3, activation=nn.Tanh, activate_last=False) # move to gpu if needed if use_gpu: encoder = encoder.to(torch.device("cuda:0")) decoder = decoder.to(torch.device("cuda:0")) # optimisers lr = config.getfloat("training", "lr") optim_encoder = optim.Adam(encoder.parameters(), lr=lr) optim_decoder = optim.Adam(decoder.parameters(), lr=lr) # dataset dataset = Patchifier(config.get("training", "data_path"), int(config.get("dataset", "patch_size")), whiten=config.getboolean("dataset", "whiten"), stride=int(config.get("training", "stride"))) dataloader = DataLoader(dataset, batch_size=int(config.get("training", "batch_size")), shuffle=config.getboolean("dataset", "shuffle"), num_workers=int(config.get("training", "num_workers"))) # loss criterion = nn.MSELoss()
def train(args, labeled, resume_from, ckpt_file): print("========== In the train step ==========") batch_size = args["batch_size"] lr = args["learning_rate"] momentum = args["momentum"] epochs = args["train_epochs"] train_split = args["split_train"] CSV_FILE = "./data/mushrooms.csv" dataset = MushroomDataset(CSV_FILE) train_dataset = torch.utils.data.Subset( dataset, list(range(int(train_split * len(dataset)))) ) train_subset = Subset(train_dataset, labeled) train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True) net = NeuralNet() net = net.to(device=device) criterion = torch.nn.BCELoss() optimizer = optim.SGD(net.parameters(), lr=float(lr), momentum=momentum) if resume_from is not None: ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth")) net.load_state_dict(ckpt["model"]) optimizer.load_state_dict(ckpt["optimizer"]) else: getdatasetstate(args) net.train() for epoch in tqdm(range(args["train_epochs"]), desc="Training"): running_loss = 0 for i, batch in enumerate(train_loader, start=0): data, labels = batch data = data.to(device) labels = labels.to(device) optimizer.zero_grad() output = net(data) loss = criterion(output, labels) loss.backward() optimizer.step() running_loss += loss.item() if i % 1000: print( "epoch: {} batch: {} running-loss: {}".format( epoch + 1, i + 1, running_loss / 1000 ), end="\r", ) running_loss = 0 print("Finished Training. Saving the model as {}".format(ckpt_file)) ckpt = {"model": net.state_dict(), "optimizer": optimizer.state_dict()} torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth")) return
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0) #2 if everything is fine else 0 i_size = len(X_train[0]) h_size = 8 o_size = len(tags) model = NeuralNet(i_size, h_size, o_size).to(device) #Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) n_epoch = 1000 for epoch in range(n_epoch): for words, labels in train_loader: words = words.to(device) labels = labels.to(device) outputs = model(words) #labels = torch.max(labels, 1)[1] loss = criterion(outputs, labels.long()) # Backward and optimizer optimizer.zero_grad() loss.backward()
class DQNAgent(BaseAgent): def __init__(self, board, piece, LR, train): super().__init__(board, piece, LR, train) self.model = NeuralNet() self.optimizer = th.optim.Adam(self.model.parameters(), lr=LR) self.loss_func = th.nn.MSELoss() def get_moves_with_rewards(self): valid_moves = self.get_all_moves() moves = [] for move in valid_moves: inp = flatten_sa_pair((self.board.values, move)).float() reward = self.model(inp) moves.append((move, reward.item())) return moves # def get_best_move(self): # moves = self.get_moves_with_rewards() # if len(moves) == 0: # raise Exception # best_move = max(moves, key=lambda x: x[1]) # return best_move[0] def learn(self): self.optimizer.zero_grad() data = th.tensor(self.data).float() n_features = data.shape[1] - 1 features, target = th.split(data, [n_features, 1], dim=1) pred = self.model(features) target = target.reshape(-1, 1).to(self.model.device) loss = self.loss_func(pred, target) loss.backward() total_loss = loss.item() self.optimizer.step() # Decrease the epsilon to do more exploitation self.epsilon -= self.eps_dec self.epsilon = max(self.epsilon, self.eps_min) return total_loss / len(self.data) def test(self, test_file): if not self.test_data: dataset = Dataset(test_file, featuresCols=range(84), targetCol=[84]) self.test_data = th.utils.data.DataLoader(dataset, batch_size=20) total_loss = 0 for data, target in self.test_data: data = data.float() pred = self.model(data) target = target.float().reshape(-1, 1).to(self.model.device) loss = self.loss_func(pred, target) total_loss += loss.item() return total_loss / len(dataset)
if argvs[1] == 'CE': model = CE(input_size, hidden_size, num_layers, num_classes, vocab_size, emb_size, embedding, dropout_rate, cosine_similarity, device).to(device) if argvs[1] == 'ACE': model = ACE(input_size, hidden_size, num_layers, num_classes, vocab_size, emb_size, embedding, dropout_rate, cosine_similarity, device).to(device) if argvs[1] == 'WACE': model = WACE(input_size, hidden_size, num_layers, num_classes, vocab_size, emb_size, embedding, dropout_rate, cosine_similarity, entailment, device).to(device) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=learning_rate, betas=(0.9, 0.999), weight_decay=weight_decay) c_train = math.ceil(len(token_data["train"]["Stance"]) / train_batch) # c_dev = math.ceil(len(token_data["dev"]["Stance"])/dev_batch) c_test = math.ceil(len(token_data["test"]["Stance"]) / test_batch) # train print("Start Train") for epoch in range(num_epoch): model.train() loss_sum = 0 accuracy_sum = 0 for premise, hypothesis, label, sim, entail in batcher( token_data["train"], tfidf_cossim["train"],
batch_size = 8 hidden_size = 8 output_size = len(tags) input_size = len(X_train[0]) #len(all_words) learning_rate = 0.001 num_epochs = 1000 dataset = ChatDataset(X_train, y_train) train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0) device = torch_device('cuda' if torch_cuda.is_available() else 'cpu') model = NeuralNet(input_size, hidden_size, output_size).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = Adam(model.parameters(), lr=learning_rate) # Our training loop for epoch in range(num_epochs): for (words, labels) in train_loader: words = words.to(device) labels = labels.to(device) # Forward outputs = model(words) loss = criterion(outputs, labels.long()) # Backward and optimizer steps optimizer.zero_grad() loss.backward() # calculate backpropagation optimizer.step()
pathlib.Path(f'{args.samples_path}/valid').glob('sample_*.pkl')) log(f"{len(train_files)} training samples", logfile) log(f"{len(valid_files)} validation samples", logfile) train_files = [str(x) for x in train_files] valid_files = [str(x) for x in valid_files] valid_data = LazyDataset(valid_files) valid_data = DataLoader(valid_data, batch_size=valid_batch_size) model = NeuralNet(device).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss().to(device) if args.optimizer == 'Adam': optimizer = torch.optim.Adam(model.parameters(), lr=lr) elif args.optimizer == 'RMSprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=lr) else: raise Exception('Invalid optimizer') # Should set lr *= 0.2 when .step() is called lr_scheduler = ExponentialLR(optimizer, 0.2) ### TRAINING LOOP ### best_loss = np.inf plateau_count = 0 for epoch in range(max_epochs + 1): log(f"EPOCH {epoch}...", logfile) # TRAIN if epoch > 0:
def main(read_dir="high.json", write_dir="high.pth"): base_json_dir = "resource/jsonFile/" + read_dir base_pth_dir = "resource/pthFile/" + write_dir with open(base_json_dir, "r", encoding="UTF-8") as file: intents = json.load(file) all_words = [] tags = [] xy = [] for intent in intents['intents']: tag = intent['tag'] tags.append(tag) for pattern in intent['patterns']: w = tokenize(pattern) all_words.extend(w) xy.append((w, tag)) ignore_word = [",", ".", "'", '"', "?", "!", "^", "@", "#", "_", "-", "~"] #we need, regular expression all_words = [stem(w) for w in all_words if w not in ignore_word] #this is better than using map all_words = sorted(set(all_words)) tags = sorted(set(tags)) # for order X_train = [] Y_train = [] for (pattern_sentence, tag) in xy: bag = bag_of_words(pattern_sentence, all_words) X_train.append(bag) label = tags.index(tag) Y_train.append(label) X_train = np.array(X_train) Y_train = np.array(Y_train) # Hyper-parameters num_epochs = 1000 batch_size = 8 learning_rate = 0.001 input_size = len(X_train[0]) hidden_size = 8 output_size = len(tags) class ChatDataset(Dataset): def __init__(self): self.n_samples = len(X_train) self.x_data = X_train self.y_data = Y_train # support indexing such that dataset[i] can be used to get i-th sample def __getitem__(self, index): return self.x_data[index], self.y_data[index] # we can call len(dataset) to return the size def __len__(self): return self.n_samples dataset = ChatDataset() train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = NeuralNet(input_size, hidden_size, output_size).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model for epoch in range(num_epochs): for (words, labels) in train_loader: words = words.to(device) labels = labels.to(dtype=torch.long).to(device) # Forward pass outputs = model(words) # if y would be one-hot, we must apply # labels = torch.max(labels, 1)[1] loss = criterion(outputs, labels) # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (epoch + 1) % 100 == 0: print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}') print(f'final loss: {loss.item():.4f}') data = { "model_state": model.state_dict(), "input_size": input_size, "hidden_size": hidden_size, "output_size": output_size, "all_words": all_words, "tags": tags } torch.save(data, base_pth_dir) print(f'training complete. write_dir saved to {base_pth_dir}')
EPOCHS = 4 dataset = Dataset('experience-goat.txt') dataloader = th.utils.data.DataLoader(dataset, batch_size=GAME_LENGTH) device = th.device('cuda:0' if th.cuda.is_available() else 'cpu') # LRs = [0.001, 0.01, 0.0025] LRs = [0.0025] for LR in LRs: model = NeuralNet().to(device) # model = th.load('tigerModel-learn.pt') optimizer = th.optim.Adam(model.parameters(), lr=LR) loss_func = th.nn.MSELoss() avg_loss = [] for _ in tqdm(range(EPOCHS)): total_loss = 0 for inp, target in tqdm(dataloader, leave=False): # print(inp) # print(type(th.tensor(inp))) # break optimizer.zero_grad() inp = inp.float().to(device) pred = model.layers(inp) target = target.float().reshape(-1, 1)
def train(self): # create_training_data first self.create_training_data() # Hyper-parameters num_epochs = 800 batch_size = 8 learning_rate = 0.001 input_size = len(self.x_train[0]) hidden_size = 8 output_size = len(self.tags) dataset = IntentDataset(self.x_train, self.y_train) train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=2) # if using Python3.8, set num_workers=0. Python3.8 has a spawn vs # fork issue that causes this to fail if num_workers > 0 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = NeuralNet(input_size, hidden_size, output_size).to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model for epoch in range(num_epochs): for (words, labels) in train_loader: words = words.to(device) labels = labels.to(device) # Forward pass outputs = model(words) # if y would be one-hot, we must apply # labels = torch.max(labels, 1)[1] loss = criterion(outputs, labels) # Backwards and optimize optimizer.zero_grad() loss.backward() optimizer.step() if (epoch + 1) % 100 == 0: print( f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}') print(f'final loss: {loss.item():.4f}') data = { "model_state": model.state_dict(), "input_size": input_size, "hidden_size": hidden_size, "output_size": output_size, "all_words": self.all_words, "tags": self.tags } FILE = bumblebee_root + "models/" + self.model_name + ".pth" torch.save(data, FILE) print(f'training complete. file saved to {FILE}')
args = parser.parse_args() device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # set the seed for generating random numbers torch.manual_seed(args.seed) if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) # get train loader train_loader = _get_train_loader(args.batch_size, args.data_dir) # data_dir from above.. ## TODO: Build the model by passing in the input params # To get params from the parser, call args.argument_name, ex. args.epochs or ards.hidden_dim # Don't forget to move your model .to(device) to move to GPU , if appropriate model = NeuralNet(args.input_dim, args.hidden_dim, args.output_dim).to(device) # Given: save the parameters used to construct the model save_model_params(model, args.model_dir) ## TODO: Define an optimizer and loss function for training optimizer = optim.Adam(model.parameters(), lr=args.lr) criterion = nn.MSELoss() # Trains the model (given line of code, which calls the above training function) # This function *also* saves the model state dictionary train(model, train_loader, args.epochs, optimizer, criterion, device)
import torch import torch.nn as nn from torch import optim from sklearn.datasets import load_digits from random import randint from model import NeuralNet, loss_fn, device digits = load_digits() X = torch.tensor(digits['data'], dtype=torch.float32).to(device) Y = torch.tensor(digits['target'], dtype=torch.int64).to(device) model = NeuralNet() optimizer = optim.Adam(model.parameters()) i = 100 for epoch in range(i): optimizer.zero_grad() y_predict = model(X) loss = loss_fn(y_predict, Y) loss.backward() optimizer.step() if epoch % 10 == 0: print('Epoch {:4d}/{} Cost: {:.6f}'.format(epoch, i, loss.item()))
def train(): for i, (train_idx, valid_idx) in enumerate(splits): # split data in train / validation according to the KFold indeces # also, convert them to a torch tensor and store them on the GPU (done with .cuda()) x_train = np.array(x_train) y_train = np.array(y_train) features = np.array(features) x_train_fold = torch.tensor(x_train[train_idx.astype(int)], dtype=torch.long).cuda() y_train_fold = torch.tensor(y_train[train_idx.astype(int), np.newaxis], dtype=torch.float32).cuda() kfold_X_features = features[train_idx.astype(int)] kfold_X_valid_features = features[valid_idx.astype(int)] x_val_fold = torch.tensor(x_train[valid_idx.astype(int)], dtype=torch.long).cuda() y_val_fold = torch.tensor(y_train[valid_idx.astype(int), np.newaxis], dtype=torch.float32).cuda() # model = BiLSTM(lstm_layer=2,hidden_dim=40,dropout=DROPOUT).cuda() model = NeuralNet() # make sure everything in the model is running on the GPU model.cuda() # define binary cross entropy loss # note that the model returns logit to take advantage of the log-sum-exp trick # for numerical stability in the loss loss_fn = torch.nn.BCEWithLogitsLoss(reduction='sum') step_size = 300 base_lr, max_lr = 0.001, 0.003 optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=max_lr) ################################################################################################ scheduler = CyclicLR(optimizer, base_lr=base_lr, max_lr=max_lr, step_size=step_size, mode='exp_range', gamma=0.99994) ############################################################################################### train = torch.utils.data.TensorDataset(x_train_fold, y_train_fold) valid = torch.utils.data.TensorDataset(x_val_fold, y_val_fold) train = MyDataset(train) valid = MyDataset(valid) ##No need to shuffle the data again here. Shuffling happens when splitting for kfolds. train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=False) print(f'Fold {i + 1}') for epoch in range(n_epochs): # set train mode of the model. This enables operations which are only applied during training like dropout start_time = time.time() model.train() avg_loss = 0. for i, (x_batch, y_batch, index) in enumerate(train_loader): # Forward pass: compute predicted y by passing x to the model. ################################################################################################ f = kfold_X_features[index] y_pred = model([x_batch,f]) ################################################################################################ ################################################################################################ if scheduler: scheduler.batch_step() ################################################################################################ # Compute and print loss. loss = loss_fn(y_pred, y_batch) # Before the backward pass, use the optimizer object to zero all of the # gradients for the Tensors it will update (which are the learnable weights # of the model) optimizer.zero_grad() # Backward pass: compute gradient of the loss with respect to model parameters loss.backward() # Calling the step function on an Optimizer makes an update to its parameters optimizer.step() avg_loss += loss.item() / len(train_loader) # set evaluation mode of the model. This disabled operations which are only applied during training like dropout model.eval() # predict all the samples in y_val_fold batch per batch valid_preds_fold = np.zeros((x_val_fold.size(0))) test_preds_fold = np.zeros((len(df_test))) avg_val_loss = 0. for i, (x_batch, y_batch, index) in enumerate(valid_loader): f = kfold_X_valid_features[index] y_pred = model([x_batch,f]).detach() avg_val_loss += loss_fn(y_pred, y_batch).item() / len(valid_loader) valid_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0] elapsed_time = time.time() - start_time print('Epoch {}/{} \t loss={:.4f} \t val_loss={:.4f} \t time={:.2f}s'.format( epoch + 1, n_epochs, avg_loss, avg_val_loss, elapsed_time)) avg_losses_f.append(avg_loss) avg_val_losses_f.append(avg_val_loss) # predict all samples in the test set batch per batch for i, (x_batch,) in enumerate(test_loader): f = test_features[i * batch_size:(i+1) * batch_size] y_pred = model([x_batch,f]).detach() test_preds_fold[i * batch_size:(i+1) * batch_size] = sigmoid(y_pred.cpu().numpy())[:, 0] train_preds[valid_idx] = valid_preds_fold test_preds += test_preds_fold / len(splits) print('All \t loss={:.4f} \t val_loss={:.4f} \t '.format(np.average(avg_losses_f),np.average(avg_val_losses_f)))
input_size = len(X_train[0]) learning_rate = 0.001 num_epochs = 1000 dataset = ChatDataset() train_loader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = NeuralNet(input_size, hidden_size, output_size).to(device) #loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) for epoch in range(num_epochs): for (words, labels) in train_loader: words = words.to(device) labels = labels.to(dtype=torch.long) #forward outputs = model(words) loss = criterion(outputs, labels) # backward and optimizer optimizer.zero_grad() loss.backward() optimizer.step()
import torch import torch.optim as optim from model import NeuralNet from train import train, test, test_dataset from visualizer import show device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = NeuralNet().to(device) optimizer = optim.SGD(model.parameters(), lr=0.01) if __name__ == '__main__': for epoch in range(1, 10 + 1): train(epoch, model, optimizer, device) test(model, device) show(model, test_dataset, device)