import os import tqdm import nltk import multiprocessing import pickle import numpy as np import collections from utils import parameters params = parameters.Parameters() def ptb_data_read(corpus_file, sent_file): if os.path.exists(sent_file): print("Loading sentences file") with open(sent_file, 'rb') as rf: sentences = pickle.load(file=rf) return sentences if not os.path.exists("./trained_embeddings_valid_" + params.name): os.makedirs("./trained_embeddings_valid_" + params.name) sentences = [] with open(corpus_file) as rf: for line in rf: # print(line) # print(line.strip().split(' ')) sentences.append(['3'] + line.strip().split(' ') + ['4']) # print(sentences) with open(sent_file, 'wb') as wf: pickle.dump(sentences, file=wf) return sentences
# Create the experiment name experiment = f'{args["dataset"]}_train' if args['name'] == '' else args['name'] # Define the compute device (either GPU or CPU) compute_device = torch.device(args['gpu'] if torch.cuda.is_available() else 'cpu') # Ensure we don't accidentally overwrite anything by checking how many previous experiments share the same name if not args['continue_training']: directories = [name for name in os.listdir(os.path.abspath(args['network_dir'])) if os.path.isdir(f'{args["network_dir"]}{name}') and experiment in name] num = len(directories) experiment = f'{experiment}_{num}' del directories # Set up a parameters object for saving hyperparameters, etc. parameters = parameters.Parameters(experiment, 'train', **args) if args['continue_training']: with open(os.path.abspath(f'{args["network_dir"]}{experiment}_parameters.pkl'), 'rb') as f: parameters = pickle.load(f) # Create the data transforms for each respective set train_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])]) test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])]) # Retrieve the datasets train_dataset, val_dataset, _ = retrieve_dataset(args['dataset'], args['image_dir'], train_transform, test_transform, test_equals_val=True) train_dataloader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True) val_dataloader = DataLoader(val_dataset, batch_size=args['batch_size'], shuffle=False) # Create the network, (potentially) load network state dictionary, and send the network to the compute device
if __name__ == '__main__': # Set a seed with my birth date if we want reproducibility if args['seed'] is not None: torch.manual_seed(args['seed']) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(args['seed']) experiment = f'{args["dataset"]}_test' if args['name'] == '' else args['name'] # Define the compute device (either GPU or CPU) compute_device = torch.device(args['gpu'] if torch.cuda.is_available() else 'cpu') # Set up a parameters object for saving hyperparameters, etc. parameters = parameters.Parameters(experiment, 'test', **args) with open(os.path.abspath(f'{args["network_dir"]}{experiment}_parameters.pkl'), 'rb') as f: parameters = pickle.load(f) # Create the data transforms for each respective set transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])]) # Retrieve the datasets _, val_dataset, test_dataset = retrieve_dataset(args['dataset'], args['image_dir'], transform, transform, test_equals_val=True) val_dataloader = DataLoader(val_dataset, batch_size=args['batch_size'], shuffle=False) test_dataloader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False) # Create the network, (potentially) load network state dictionary, and send the network to the compute device num_classes = val_dataset.num_classes() loader = retrieve_network(args['dataset'], args['network'])
if args['seed'] is not None: args['seed'] = int(args['seed']) torch.manual_seed(args['seed']) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(args['seed']) return args if __name__ == '__main__': # Get arguments args = arguments() # Set up a parameters object for saving hyperparameters, etc. parameters = parameters.Parameters(args['name'], **args) if args['continue_training']: with open( os.path.abspath( f'{args["network_dir"]}{args["name"]}_parameters.pkl'), 'rb') as f: parameters = pickle.load(f) # Create the data transforms for each respective set if args['dataset'] == 'tomato': train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5), RotationTransform(angles=[0, 90, 180, 270]), GammaJitter(low=0.9, high=1.1), BrightnessJitter(low=0.9, high=1.1),