def main(): input = get_args() parser =train_args.get_args() kg_args = parser.parse_args() #define data directories data_dir = kg_args.data_dir train_dir = data_dir + '/train' valid_dir = data_dir + '/valid' test_dir = data_dir + '/test' #load pretrained model and determine input size - code adjusted per required changes from torchvision import models if kg_args.arch == ‘densenet201’: model = models.densenet201() print(model.classifier.in_features) if kg_args.arch == ‘resnet101’: model = models.resnet101() print(model.fc.in_features) if kg_args.arch == ‘alexnet’: model = models.alexnet() print(model.classifier[1].in_features) else: print(“Sorry, the model architecture that you provided is not supported.”) model = model_dict[pretrained_model] input_size = inputsize_dict[pretrained_model]
def main(): args = train_args.get_args() print(args) train_dir = args.data_dir + '/train' valid_dir = args.data_dir + '/valid' train_data = load_datasets(train_dir) valid_data = load_datasets(valid_dir) trainloader = define_dataloaders(train_data) validloader = define_dataloaders(valid_data) inputs, labels = next(iter(trainloader)) print(inputs.size()) hidden_layers = args.hidden_layers.split(',') hidden_layers = [int(i) for i in hidden_layers] classifier = define_classifier(hidden_layers) print("Loading pre-trained network") model = load_network(args.arch) print(model) train_model(args.arch, model, args.learning_rate, args.gpu, classifier, args.epochs, trainloader, validloader, train_data, args.save_dir)
def main(): data_dir = 'flowers' #train_dir = data_dir + '/train' valid_dir = data_dir + '/valid' test_dir = data_dir + '/test' parser = train_args.get_args() cli_args = parser.parse_args() # check for data directory if not os.path.isdir(cli_args.data_directory): print(f'Data directory {cli_args.data_directory} was not found.') exit(1) # check for save directory if not os.path.isdir(cli_args.save_dir): print(f'Directory {cli_args.save_dir} does not exist. Creating...') os.makedirs(cli_args.save_dir) # load categories with open(cli_args.categories_json, 'r') as f: cat_to_name = json.load(f) # set output to the number of categories output_size = len(cat_to_name) print(f"Images are labeled with {output_size} categories.") data_transforms = { 'training': transforms.Compose([ transforms.RandomRotation(35), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'validation': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'testing': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), } #Load the datasets with ImageFolder image_datasets = { 'training': datasets.ImageFolder(cli_args.data_directory, transform=data_transforms['training']), 'testing': datasets.ImageFolder(test_dir, transform=data_transforms['testing']), 'validation': datasets.ImageFolder(valid_dir, transform=data_transforms['validation']) } #Using the image datasets and the trainforms, define the dataloaders dataloaders = { 'training': torch.utils.data.DataLoader(image_datasets['training'], batch_size=64, shuffle=True), 'testing': torch.utils.data.DataLoader(image_datasets['testing'], batch_size=64, shuffle=False), 'validation': torch.utils.data.DataLoader(image_datasets['validation'], batch_size=64, shuffle=True) } # Make model if not cli_args.arch.startswith("vgg") and not cli_args.arch.startswith( "densenet"): print("Only supporting VGG and DenseNet") exit(1) print(f"Using a pre-trained {cli_args.arch} network.") model = models.__dict__[cli_args.arch](pretrained=True) classifier = nn.Sequential( OrderedDict([ ('fc1', nn.Linear(25088, 4096)), # First layer ('relu', nn.ReLU()), # Apply activation function ('fc2', nn.Linear(4096, 102)), # Output layer ('output', nn.LogSoftmax(dim=1)) # Apply loss function ])) for param in model.parameters(): param.requires_grad = False model.classifier = classifier def train(model, epochs, learning_rate, criterion, optimizer, training_loader, validation_loader): model.train() # Puts model into training mode print_every = 40 steps = 0 use_gpu = False # Check to see whether GPU is available if torch.cuda.is_available(): use_gpu = True model.cuda() else: model.cpu() # Iterates through each training pass based on #epochs & GPU/CPU for epoch in range(epochs): running_loss = 0 for inputs, labels in iter(training_loader): steps += 1 if use_gpu: inputs = Variable(inputs.float().cuda()) labels = Variable(labels.long().cuda()) else: inputs = Variable(inputs) labels = Variable(labels) # Forward and backward passes optimizer.zero_grad( ) # zero's out the gradient, otherwise will keep adding output = model.forward(inputs) # Forward propogation loss = criterion(output, labels) # Calculates loss loss.backward() # Calculates gradient optimizer.step( ) # Updates weights based on gradient & learning rate running_loss += loss.item() if steps % print_every == 0: validation_loss, accuracy = validate( model, criterion, validation_loader) print( "Epoch: {}/{} ".format(epoch + 1, epochs), "Training Loss: {:.3f} ".format(running_loss / print_every), "Validation Loss: {:.3f} ".format(validation_loss), "Validation Accuracy: {:.3f}".format(accuracy)) def validate(model, criterion, data_loader): model.eval() # Puts model into validation mode accuracy = 0 test_loss = 0 for inputs, labels in iter(data_loader): if torch.cuda.is_available(): inputs = Variable(inputs.float().cuda(), volatile=True) labels = Variable(labels.long().cuda(), volatile=True) else: inputs = Variable(inputs, volatile=True) labels = Variable(labels, volatile=True) output = model.forward(inputs) test_loss += criterion(output, labels).item() ps = torch.exp(output).data equality = (labels.data == ps.max(1)[1]) accuracy += equality.type_as(torch.FloatTensor()).mean() return test_loss / len(data_loader), accuracy / len(data_loader) print("Success") ''' epoch = cli_args.epochs, state_dict = model.state_dict(), optimizer_dict = optimizer.state_dict(), classifier = model.classifier, class_to_idx = nn_model.class_to_idx, arch = cli_args.arch ''' epochs = cli_args.epochs learning_rate = cli_args.learning_rate criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr=learning_rate) train(model, epochs, learning_rate, criterion, optimizer, dataloaders['training'], dataloaders['validation']) #Save checkpoint model.class_to_idx = image_datasets['training'].class_to_idx model.cpu() torch.save( { 'arch': cli_args.arch, 'state_dict': model.state_dict(), # Holds all the weights and biases 'class_to_idx': model.class_to_idx }, 'checkpoint.pth') print("Checkpoint saved.")
save_path = os.path.join( logger.get_dir(), 'model_every_100_episodes/episodes-{}'.format(T)) self.agent.save(save_path) def restore(self, model_path): logger.info('restore model from {}'.format(model_path)) self.agent.restore(model_path) def add_episode_rpm(self, episode_rpm): for x in episode_rpm: self.rpm.append( obs=x[0], act=x[1], reward=x[2], next_obs=x[3], terminal=x[4]) def pred_batch(self, obs): batch_obs = np.expand_dims(obs, axis=0) with self.model_lock: action = self.agent.predict(batch_obs.astype('float32')) action = np.squeeze(action, axis=0) return action if __name__ == '__main__': from train_args import get_args args = get_args() if args.logdir is not None: logger.set_dir(args.logdir) learner = Learner(args)
elif unchanged > unchanged_limit: create_metric_figure(ofp_fname, loss_ls, loss_ls_s, loss_ls_qa, loss_valid_ls, qa_f1, sent_f1, cur_used_ls_mean, total_used, total_s, mean_seg_len) return else: unchanged += 1 create_metric_figure(ofp_fname, loss_ls, loss_ls_s, loss_ls_qa, loss_valid_ls, qa_f1, sent_f1, cur_used_ls_mean, total_used, total_s, mean_seg_len) args = parse.get_args() batch_size = args.batch_size sent_len = args.sent_len if args.train: ofp_fname = create_output_name(args) data_loader_valid, num_val, used_b_id, x_for_rouge, all_sent_align = create_iterator( data_split='valid', max_len=sent_len, max_size=-1, batch_size=batch_size, balance=None, bert_model=args.bert_model,
def main(): args = train_args.get_args() means = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] image_size = 224 batch_size = 32 train_transforms = transforms.Compose([transforms.RandomRotation(30), transforms.RandomResizedCrop(image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(means, std)]) valid_transforms = transforms.Compose([transforms.Resize(255), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(means, std)]) data_dir = args.data_directory train_dir = data_dir + '/train' valid_dir = data_dir + '/valid' training_dataset = datasets.ImageFolder(train_dir, transform=train_transforms) training_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=batch_size, shuffle=True) valid_dataset = datasets.ImageFolder(valid_dir, transform=valid_transforms) valid_dataloader = torch.utils.data.DataLoader(valid_dataset, batch_size=64) # Start with CPU device = torch.device("cpu") print("Cuda: " + str(torch.cuda.is_available())) # Requested GPU if args.use_gpu and torch.cuda.is_available(): device = torch.device("cuda:0") else: print("Using CPU.") model = models.__dict__[args.arch](pretrained=True) input_size = model.classifier[0].in_features # Freeze parameters so we don't backprop through them for param in model.parameters(): param.requires_grad = False model_classifier = nn.Sequential(nn.Linear(input_size, args.hidden_units), nn.ReLU(), nn.Dropout(0.5), nn.Linear(args.hidden_units, 102), nn.LogSoftmax(dim=1)) model.classifier = model_classifier criterion = nn.NLLLoss() # Only train the classifier parameters, feature parameters are frozen optimizer = optim.Adam(model.classifier.parameters(), lr=args.learning_rate) model.to(device) epochs = args.epochs steps = 0 running_loss = 0 print_every = 5 for epoch in range(epochs): for inputs, labels in training_dataloader: steps += 1 # Move input and label tensors to the default device inputs, labels = inputs.to(device), labels.to(device) optimizer.zero_grad() logps = model.forward(inputs) loss = criterion(logps, labels) loss.backward() optimizer.step() running_loss += loss.item() if steps % print_every == 0: val_loss = 0 accuracy = 0 model.eval() with torch.no_grad(): for inputs, labels in valid_dataloader: inputs, labels = inputs.to(device), labels.to(device) logps = model.forward(inputs) batch_loss = criterion(logps, labels) val_loss += batch_loss.item() # Calculate accuracy ps = torch.exp(logps) top_p, top_class = ps.topk(1, dim=1) equals = top_class == labels.view(*top_class.shape) accuracy += torch.mean(equals.type(torch.FloatTensor)).item() print(f"Epoch {epoch + 1}/{epochs}.. " f"Train loss: {running_loss / print_every:.3f}.. " f"Validation loss: {val_loss / len(valid_dataloader):.3f}.. " f"Validation accuracy: {accuracy / len(valid_dataloader):.3f}") running_loss = 0 model.train() print('Saving') model.class_to_idx = training_dataset.class_to_idx checkpoint = {'input_size': input_size, 'output_size': 102, 'arch': args.arch, 'learning_rate': args.learning_rate, 'batch_size': 64, 'classifier': model_classifier, 'optimizer': optimizer.state_dict(), 'state_dict': model.state_dict(), 'class_to_idx': model.class_to_idx} torch.save(checkpoint, f'{args.save_dir}/checkpoint_cli.pth')
def main(): # See train_args.py for the list of args. args = train_args.get_args() # Get the config files, expanding globs and directories (*) if necessary. # jobs = config_reader.process_config_files(args.config) # assert jobs, 'No jobs found from config.' if args.include: exec("import "+args.include) # Scan job sets job_set = [] for i in range(0, 1): file_name = 'job_data_mjrl_%d.txt' % i with open(file_name, 'rb') as f: s = f.read() job_set.append(eval(s)) # create jobs jobs = [] ctr = 0 for j in job_set: for j2 in product_dict(**j): j2['job_name'] = j2['job_name'] + "_" + str(ctr) jobs.append(j2) ctr += 1 # Create the output directory if not present. output_dir = args.output_dir or os.getcwd() if not os.path.isdir(output_dir): os.mkdir(output_dir) output_dir = os.path.abspath(output_dir) # If the log directory is given, enable Tensorboard logging. # Create the Tensorboard log directory if not present. if args.tensorboard: tb_output_path = os.path.join(output_dir, 'tensorboard') if not os.path.isdir(tb_output_path): os.mkdir(tb_output_path) tensorboard.enable(tb_output_path) robot_args = { 'is_hardware': args.hardware, 'legacy': args.legacy, 'device_name': args.device, 'overlay': args.overlay, 'calibration_mode': args.calibration_mode, } for index, job in enumerate(jobs): # Modify the job name to include the job number. assert 'job_name' in job if len(jobs) > 1: job['job_name'] = '{}_{}'.format(job['job_name'], index) # Add additional parameters to the job. job['output_dir'] = os.path.join(output_dir, job['job_name']) # Set the robot configuration. if 'robot' not in job: job['robot'] = {} # Only keep the entries where the arg is given. job['robot'].update({k: v for k, v in robot_args.items() if v}) # Override num_cpus if the args.num_cpu is given or if we're running on hardware. job['num_cpu'] = 1 if args.hardware else (args.num_cpu or job['num_cpu']) print('Running {} jobs {}'.format( len(jobs), 'in parallel' if args.parallel else 'sequentially')) # execute jobs t1 = timer.time() if args.parallel: # processes: Number of processes to create # maxtasksperchild: the number of tasks a worker process can complete before it will exit and be replaced with a fresh worker process pool = mp.Pool(processes=len(jobs), maxtasksperchild=1) parallel_runs = [ pool.apply_async(single_process, args=(job, )) for job in jobs ] try: max_process_time = 360000 # process time out in seconds results = [p.get(timeout=max_process_time) for p in parallel_runs] except Exception as e: notify_user("exception thrown in "+jobs[0]['job_name'], str(e)) print('exception thrown') print(str(e)) traceback.print_exc() pool.close() pool.terminate() pool.join() else: for job in jobs: try: time_taken = single_process(job) except Exception as e: notify_user("exception thrown in "+jobs[0]['job_name'], str(e)) print('exception thrown') print(str(e)) traceback.print_exc() t2 = timer.time() # Send notifcation to the user msg_subject = "{}:: {} finished".format(os.uname()[1], jobs[0]['job_name']) msg_body = 'Total time taken = %f sec' % (t2 - t1) # msg_body += ":: CPU-" + str(psutil.cpu_percent()) + str(psutil.virtual_memory()) print(msg_subject + ": " + msg_body) if args.email: send_message(args.email, msg_subject, msg_body) if args.sms: send_message(args.sms, msg_subject, msg_body) # Upload to S3 buckets if args.upload=='s3': print("Uploading Project folder to S3") os.system("aws s3 sync ~/Projects/r3l/ s3://r3l/") print("Stopping instance") os.system("sudo shutdown -h now") return
import matplotlib.pyplot as plt import torch import numpy as np from torch import nn from torch import optim import torch.nn.functional as F from torchvision import datasets, transforms, models from PIL import Image import json import train_args args = train_args.get_args() print(args) train_dir = args.data_dir + '/train' valid_dir = args.data_dir + '/valid' test_dir = args.data_dir + '/test' train_transforms = transforms.Compose([transforms.RandomRotation(30), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) valid_transforms = transforms.Compose([transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
def main(): parser = train_args.get_args() cli_args = parser.parse_args() if cli_args.arch != 'alexnet': print('Currently, we support only AlexNet.') exit(1) use_cuda = False epochs = cli_args.epochs checkpoint_name = 'checkpoint.pt' if cli_args.save_dir: save_dir = cli_args.save_dir if cli_args.save_name: save_name = cli_args.save_name if save_dir and save_name: checkpoint_name = f'{cli_args.save_dir}/{cli_args.save_name}.pt' # check if CUDA is available and if we want to use it if cli_args.use_gpu and torch.cuda.is_available(): use_cuda = True else: print("GPU is not available. Using CPU.") hidden_units = cli_args.hidden_units # check for data directory if not os.path.isdir(cli_args.data_directory): print(f'Data directory {cli_args.data_directory} was not found.') exit(1) # check for save directory if not os.path.isdir(cli_args.save_dir): print(f'Directory {cli_args.save_dir} does not exist. Creating...') os.makedirs(cli_args.save_dir) # load the directory train_dir = cli_args.data_directory valid_dir = 'flowers/valid' train_transform = transforms.Compose([ transforms.RandomRotation(15), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) valid_transform = transforms.Compose([ transforms.Resize(224), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) data_transforms = {'train': train_transform, 'valid': valid_transform} train_dataset = ImageFolder(train_dir, transform=train_transform) valid_dataset = ImageFolder(valid_dir, transform=valid_transform) image_datasets = {'train': train_dataset, 'valid': valid_dataset} batch_size = 20 num_workers = 0 train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=True) valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=batch_size, num_workers=num_workers, shuffle=False) dataloaders = {'train': train_loader, 'valid': valid_loader} with open('cat_to_name.json', 'r') as f: cat_to_name = json.load(f) model_transfer, criterion_transfer, optimizer_transfer = get_model( use_cuda=use_cuda, hidden_units=hidden_units) model_transfer = train(dataloaders, model_transfer, optimizer_transfer, criterion_transfer, checkpoint_name, epochs, use_cuda, train_dataset)
def main(): """ Image Classification Network Trainer """ parser = train_args.get_args() parser.add_argument('--version', action='version', version='%(prog)s ' + __version__ + ' by ' + __author__) cli_args = parser.parse_args() # check for data directory if not os.path.isdir(cli_args.data_directory): print(f'Data directory {cli_args.data_directory} was not found.') exit(1) # check for save directory if not os.path.isdir(cli_args.save_dir): print(f'Directory {cli_args.save_dir} does not exist. Creating...') os.makedirs(cli_args.save_dir) # load categories with open(cli_args.categories_json, 'r') as f: cat_to_name = json.load(f) # set output to the number of categories output_size = len(cat_to_name) print(f"Images are labeled with {output_size} categories.") # prep data loader expected_means = [0.485, 0.456, 0.406] expected_std = [0.229, 0.224, 0.225] max_image_size = 224 batch_size = 32 training_transforms = transforms.Compose([ transforms.RandomHorizontalFlip(p=0.25), transforms.RandomRotation(25), transforms.RandomGrayscale(p=0.02), transforms.RandomResizedCrop(max_image_size), transforms.ToTensor(), transforms.Normalize(expected_means, expected_std) ]) training_dataset = datasets.ImageFolder(cli_args.data_directory, transform=training_transforms) training_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=batch_size, shuffle=True) # Make model if not cli_args.arch.startswith("vgg") and not cli_args.arch.startswith( "densenet"): print("Only supporting VGG and DenseNet") exit(1) print(f"Using a pre-trained {cli_args.arch} network.") nn_model = models.__dict__[cli_args.arch](pretrained=True) densenet_input = { 'densenet121': 1024, 'densenet169': 1664, 'densenet161': 2208, 'densenet201': 1920 } input_size = 0 # Input size from current classifier if VGG if cli_args.arch.startswith("vgg"): input_size = nn_model.classifier[0].in_features if cli_args.arch.startswith("densenet"): input_size = densenet_input[cli_args.arch] # Prevent back propagation on parameters for param in nn_model.parameters(): param.requires_grad = False od = OrderedDict() hidden_sizes = cli_args.hidden_units hidden_sizes.insert(0, input_size) print( f"Building a {len(cli_args.hidden_units)} hidden layer classifier with inputs {cli_args.hidden_units}" ) for i in range(len(hidden_sizes) - 1): od['fc' + str(i + 1)] = nn.Linear(hidden_sizes[i], hidden_sizes[i + 1]) od['relu' + str(i + 1)] = nn.ReLU() od['dropout' + str(i + 1)] = nn.Dropout(p=0.15) od['output'] = nn.Linear(hidden_sizes[i + 1], output_size) od['softmax'] = nn.LogSoftmax(dim=1) classifier = nn.Sequential(od) # Replace classifier nn_model.classifier = classifier # Start clean by setting gradients of all parameters to zero. nn_model.zero_grad() # The negative log likelihood loss as criterion. criterion = nn.NLLLoss() # Adam: A Method for Stochastic Optimization # https://arxiv.org/abs/1412.6980 print(f"Setting optimizer learning rate to {cli_args.learning_rate}.") optimizer = optim.Adam(nn_model.classifier.parameters(), lr=cli_args.learning_rate) # Start with CPU device = torch.device("cpu") # Requested GPU if cli_args.use_gpu and torch.cuda.is_available(): device = torch.device("cuda:0") else: print("GPU is not available. Using CPU.") print(f"Sending model to device {device}.") nn_model = nn_model.to(device) data_set_len = len(training_dataloader.batch_sampler) chk_every = 50 print(f'Using the {device} device to train.') print( f'Training on {data_set_len} batches of {training_dataloader.batch_size}.' ) print( f'Displaying average loss and accuracy for epoch every {chk_every} batches.' ) for e in range(cli_args.epochs): e_loss = 0 prev_chk = 0 total = 0 correct = 0 print( f'\nEpoch {e+1} of {cli_args.epochs}\n----------------------------' ) for ii, (images, labels) in enumerate(training_dataloader): # Move images and labeles perferred device # if they are not already there images = images.to(device) labels = labels.to(device) # Set gradients of all parameters to zero. optimizer.zero_grad() # Propigate forward and backward outputs = nn_model.forward(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() # Keep a running total of loss for # this epoch e_loss += loss.item() # Accuracy _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() # Keep a running total of loss for # this epoch itr = (ii + 1) if itr % chk_every == 0: avg_loss = f'avg. loss: {e_loss/itr:.4f}' acc = f'accuracy: {(correct/total) * 100:.2f}%' print( f' Batches {prev_chk:03} to {itr:03}: {avg_loss}, {acc}.') prev_chk = (ii + 1) print('Done... Saving') nn_model.class_to_idx = training_dataset.class_to_idx model_state = { 'epoch': cli_args.epochs, 'state_dict': nn_model.state_dict(), 'optimizer_dict': optimizer.state_dict(), 'classifier': nn_model.classifier, 'class_to_idx': nn_model.class_to_idx, 'arch': cli_args.arch } save_location = f'{cli_args.save_dir}/{cli_args.save_name}.pth' print(f"Saving checkpoint to {save_location}") torch.save(model_state, save_location)
def main(): parser = train_args.get_args() parser.add_argument('--version', action='version', version='%(prog)s '+ __version__+ __author__) kg_args = parser.parse_args() #check for args #check for data directory if not os.path.isdir(kg_args.data_dir): print(f'Directory {kg_args.data_dir} was not found.') exit(1) #check for save directory and create if needed if not os.path.isdir(kg_args.save_dir): print(f'Directory {kg_args.save_dir} does not exist. Creating now....') os.makedirs(kg_args.save_dir) #load categories for training with open(kg_args.category_names, 'r') as f: cat_to_name = json.load(f) #set output to number of categories output_size = len(cat_to_name) print(f"Image classifier has {output_size} categories.") #define data directories data_dir = kg_args.data_dir train_dir = data_dir + '/train' valid_dir = data_dir + '/valid' test_dir = data_dir + '/test' dataloaders['train'], dataloaders['valid'], dataloaders['test'] = process(train_dir, valid_dir, test_dir) #define transforms for the training, validation, and testing sets data_transforms = { 'train': transforms.Compose([ transforms.RandomRotation(30), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])]), 'valid': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), 'test': transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),} image_datasets = dict() image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'valid', 'test']} dataloaders = dict() dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=64, shuffle=True) for x in ['train', 'valid', 'test']} dataset_sizes ={x: len(image_datasets[x]) for x in ['train', 'valid', 'test']} class_names = image_datasets['train'].classes #determine model details if not kg_args.arch.startswith("vgg") and not kg_args.arch.startswith("resnet") and not kg_args.arch.startswith("alexnet"): print("Only supporting VGG, Resnet, and Alexnet") exit(1) print(f"Using a pre-trained {kg_args.arch} model.") nn.model = models.__dict__[kg_args.arch](pretrained=True) #pre-trained models supported vgg16 = models.vgg16(pretrained=True) resnet18 = models.resnet18(pretrained=True) alexnet = models.alexnet(pretrained=True) #load pretrained model and determine input size model_dict = {"vgg": vgg16, "resnet": resnet18, "alexnet": alexnet} input_dict = {"vgg": 25088, "resnet": 512, "alexnet": 9218} #model = model_dict[pretrained_model] input_size = inputsize_dict[pretrained_model]
#Using the image datasets and the trainforms, define the dataloaders trainloader = torch.utils.data.DataLoader(image_datasets[0], batch_size=64, shuffle=True) validloader = torch.utils.data.DataLoader(image_datasets[1], batch_size=32) testloader = torch.utils.data.DataLoader(image_datasets[2], batch_size=32) dataloaders = [trainloader, validloader, testloader] with open('cat_to_name.json', 'r') as f: cat_to_name = json.load(f) parser = train_args.get_args() args = parser.parse_args() def get_model(): if args.vgg == 1: model = models.vgg11(pretrained=True) elif args.vgg == 2: model = models.vgg13(pretrained=True) elif args.vgg == 3: model = models.vgg16(pretrained=True) elif args.vgg == 4: model = models.vgg19(pretrained=True) if args.alexnet: model = models.alexnet(pretrained=True)
def main(): """ Image Classification Network Trainer Student: Louis Bove Credit for assistance: https://github.com/cjimti/aipnd-project & https://github.com/DMells/Convolutional-Neural-Networks-Project """ parser = train_args.get_args() cli_args = parser.parse_args() # checking for data directory if not os.path.isdir(cli_args.data_directory): print(f'Data directory {cli_args.data_directory} was not found.') exit(1) # checking for save directory and then making it if not already created if not os.path.isdir(cli_args.save_dir): print( f'Directory {cli_args.save_dir} does not exist. Making directory, stand by.' ) os.makedirs(cli_args.save_dir) # loading categories with open(cli_args.categories_json, 'r') as f: cat_to_name = json.load(f) # Base output on catgory number output_size = len(cat_to_name) # Define batch size batch_size = 32 # Utilizing transform function to modify image training_trans = transforms.Compose([ transforms.RandomRotation(30), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # Defining data set and data loader training_dataset = datasets.ImageFolder(cli_args.data_directory, transform=training_trans) training_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=batch_size, shuffle=True) # Check nn are only vgg if not cli_args.arch.startswith("vgg"): print("Program only supports VGG") exit(1) # print(f"Running {cli_args.arch} network.") nn_model = models.__dict__[cli_args.arch](pretrained=True) input_size = 0 # Input size for VGG if cli_args.arch.startswith("vgg"): input_size = nn_model.classifier[0].in_features # IOT prevent backpropigation for param in nn_model.parameters(): param.requires_grad = False # Create hidden sizes based on input size hidden_sizes = cli_args.hidden_units hidden_sizes.insert(0, input_size) # Ensure the order is kept (https://pymotw.com/3/collections/ordereddict.html) od = OrderedDict() for i in range(len(hidden_sizes) - 1): od['fc' + str(i + 1)] = nn.Linear(hidden_sizes[i], hidden_sizes[i + 1]) od['relu' + str(i + 1)] = nn.ReLU() od['dropout' + str(i + 1)] = nn.Dropout(p=0.15) od['output'] = nn.Linear(hidden_sizes[i + 1], output_size) od['softmax'] = nn.LogSoftmax(dim=1) classifier = nn.Sequential(od) # Replace classifier (https://pytorch.org/docs/stable/cuda.html) nn_model.classifier = classifier # Zero out parameters (https://discuss.pytorch.org/t/zero-grad-optimizer-or-net/1887/5) nn_model.zero_grad() # The negative log likelihood loss as criterion. criterion = nn.NLLLoss() # Only train the classifier parameters (https://pytorch.org/docs/stable/optim.html) print(f"Optimizer learn rate - {cli_args.learning_rate}.") optimizer = optim.Adam(nn_model.classifier.parameters(), lr=cli_args.learning_rate) # Start with CPU device = torch.device("cpu") # Requested GPU if cli_args.use_gpu and torch.cuda.is_available(): device = torch.device("cuda:0") else: print("GPU is not available. Only CPU available.") # Send model to device nn_model = nn_model.to(device) chk_every = 1 data_set_len = len(training_dataloader.batch_sampler) print( f'Training on {data_set_len} batches of {training_dataloader.batch_size}.' ) print( f'Displaying average loss and accuracy for epoch every {chk_every} batches.' ) for e in range(cli_args.epochs): e_loss = 0 prev_chk = 0 correct = 0 total = 0 print( f'\nEpoch {e+1} of {cli_args.epochs}\n----------------------------' ) for ii, (images, labels) in enumerate(training_dataloader): # Move images and labeles images = images.to(torch.device("cuda:0")) labels = labels.to(torch.device("cuda:0")) # Zero out gradients optimizer.zero_grad() # Propigate both forward and backward outputs = nn_model.forward(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() # Running total e_loss += loss.item() # Accuracy calculation _, predicted = torch.max(outputs.data, 1) total += labels.size(0) correct += (predicted == labels).sum().item() # Running total itr = (ii + 1) if itr % chk_every == 0: avg_loss = f'avg. loss: {e_loss/itr:.4f}' acc = f'accuracy: {(correct/total) * 100:.2f}%' print(f' Batche {prev_chk:03}-{itr:03}: {avg_loss}, {acc}.') prev_chk = (ii + 1) print('Training Complete') nn_model.class_to_idx = training_dataset.class_to_idx model_state = { 'epoch': cli_args.epochs, 'state_dict': nn_model.state_dict(), 'optimizer_dict': optimizer.state_dict(), 'classifier': nn_model.classifier, 'class_to_idx': nn_model.class_to_idx, 'arch': cli_args.arch } save_location = f'{cli_args.save_dir}/{cli_args.save_name}.pth' print(f"Checkpoint saved to {save_location}") torch.save(model_state, save_location)
def main(): parser = train_args.get_args() parser.add_argument('--version', action='version', version='%(prog)s ' + __version__ + ' by ' + __author__) cli_args = parser.parse_args() # directory #First check if not os.path.isdir(cli_args.data_directory): print(f'Data directory {cli_args.data_directory} not found.') exit(1) # Then save directory if not os.path.isdir(cli_args.save_dir): print(f'Directory {cli_args.save_dir} does not exist. Creating...') os.makedirs(cli_args.save_dir) with open(cli_args.categories_json, 'r') as f: cat_to_name = json.load(f) output_size = len(cat_to_name) expected_means = [0.485, 0.456, 0.406] expected_std = [0.229, 0.224, 0.225] max_image_size = 224 batch_size = 32 #train_transform tr_transform = transforms.Compose([transforms.RandomHorizontalFlip(p=0.25), transforms.RandomRotation(25), transforms.RandomGrayscale(p=0.02), transforms.RandomResizedCrop(max_image_size), transforms.ToTensor(), transforms.Normalize(expected_means, expected_std)]) #train_dataset tr_dataset = datasets.ImageFolder(cli_args.data_directory, transform=tr_transform) #tr_dataloader tr_dataloader = torch.utils.data.DataLoader(tr_dataset, batch_size=batch_size, shuffle=True) # model if not cli_args.arch.startswith("vgg") and not cli_args.arch.startswith("densenet"): print("Only supporting VGG and DenseNet") exit(1) print(f"Using a pre-trained {cli_args.arch} network.") my_model = models.__dict__[cli_args.arch](pretrained=True) densenet_input = { 'densenet121': 1024, 'densenet169': 1664, 'densenet161': 2208, 'densenet201': 1920 } input_size = 0 if cli_args.arch.startswith("vgg"): input_size = my_model.classifier[0].in_features if cli_args.arch.startswith("densenet"): input_size = densenet_input[cli_args.arch] for param in my_model.parameters(): param.requires_grad = False od = OrderedDict() hidden_sizes = cli_args.hidden_units hidden_sizes.insert(0, input_size) print(f"Building a {len(cli_args.hidden_units)} hidden layer classifier with inputs {cli_args.hidden_units}") for i in range(len(hidden_sizes) - 1): od['fc' + str(i + 1)] = nn.Linear(hidden_sizes[i], hidden_sizes[i + 1]) od['relu' + str(i + 1)] = nn.ReLU() od['dropout' + str(i + 1)] = nn.Dropout(p=0.15) od['output'] = nn.Linear(hidden_sizes[i + 1], output_size) od['softmax'] = nn.LogSoftmax(dim=1) classifier = nn.Sequential(od) # Replace the classifier my_model.classifier = classifier my_model.zero_grad()