Пример #1
0
import os
import tqdm
import nltk
import multiprocessing
import pickle
import numpy as np
import collections
from utils import parameters

params = parameters.Parameters()


def ptb_data_read(corpus_file, sent_file):
    if os.path.exists(sent_file):
        print("Loading sentences file")
        with open(sent_file, 'rb') as rf:
            sentences = pickle.load(file=rf)
        return sentences

    if not os.path.exists("./trained_embeddings_valid_" + params.name):
        os.makedirs("./trained_embeddings_valid_" + params.name)
    sentences = []
    with open(corpus_file) as rf:
        for line in rf:
            # print(line)
            # print(line.strip().split(' '))
            sentences.append(['3'] + line.strip().split(' ') + ['4'])
            # print(sentences)
    with open(sent_file, 'wb') as wf:
        pickle.dump(sentences, file=wf)
    return sentences
Пример #2
0
    # Create the experiment name
    experiment = f'{args["dataset"]}_train' if args['name'] == '' else args['name']

    # Define the compute device (either GPU or CPU)
    compute_device = torch.device(args['gpu'] if torch.cuda.is_available() else 'cpu')

    # Ensure we don't accidentally overwrite anything by checking how many previous experiments share the same name
    if not args['continue_training']:
        directories = [name for name in os.listdir(os.path.abspath(args['network_dir'])) if os.path.isdir(f'{args["network_dir"]}{name}') and experiment in name]
        num = len(directories)
        experiment = f'{experiment}_{num}'
        del directories

    # Set up a parameters object for saving hyperparameters, etc.
    parameters = parameters.Parameters(experiment, 'train', **args)
    if args['continue_training']:
        with open(os.path.abspath(f'{args["network_dir"]}{experiment}_parameters.pkl'), 'rb') as f:
            parameters = pickle.load(f)

    # Create the data transforms for each respective set
    train_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])
    test_transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])

    # Retrieve the datasets
    train_dataset, val_dataset, _ = retrieve_dataset(args['dataset'], args['image_dir'], train_transform, test_transform, test_equals_val=True)

    train_dataloader = DataLoader(train_dataset, batch_size=args['batch_size'], shuffle=True)
    val_dataloader = DataLoader(val_dataset, batch_size=args['batch_size'], shuffle=False)

    # Create the network, (potentially) load network state dictionary, and send the network to the compute device
Пример #3
0
if __name__ == '__main__':
    # Set a seed with my birth date if we want reproducibility
    if args['seed'] is not None:
        torch.manual_seed(args['seed'])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        np.random.seed(args['seed'])

    experiment = f'{args["dataset"]}_test' if args['name'] == '' else args['name']

    # Define the compute device (either GPU or CPU)
    compute_device = torch.device(args['gpu'] if torch.cuda.is_available() else 'cpu')

    # Set up a parameters object for saving hyperparameters, etc.
    parameters = parameters.Parameters(experiment, 'test', **args)
    with open(os.path.abspath(f'{args["network_dir"]}{experiment}_parameters.pkl'), 'rb') as f:
        parameters = pickle.load(f)

    # Create the data transforms for each respective set
    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])])

    # Retrieve the datasets
    _, val_dataset, test_dataset = retrieve_dataset(args['dataset'], args['image_dir'], transform, transform, test_equals_val=True)

    val_dataloader = DataLoader(val_dataset, batch_size=args['batch_size'], shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False)

    # Create the network, (potentially) load network state dictionary, and send the network to the compute device
    num_classes = val_dataset.num_classes()
    loader = retrieve_network(args['dataset'], args['network'])
Пример #4
0
    if args['seed'] is not None:
        args['seed'] = int(args['seed'])
        torch.manual_seed(args['seed'])
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
        np.random.seed(args['seed'])

    return args


if __name__ == '__main__':
    # Get arguments
    args = arguments()

    # Set up a parameters object for saving hyperparameters, etc.
    parameters = parameters.Parameters(args['name'], **args)
    if args['continue_training']:
        with open(
                os.path.abspath(
                    f'{args["network_dir"]}{args["name"]}_parameters.pkl'),
                'rb') as f:
            parameters = pickle.load(f)

    # Create the data transforms for each respective set
    if args['dataset'] == 'tomato':
        train_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(p=0.5),
            transforms.RandomVerticalFlip(p=0.5),
            RotationTransform(angles=[0, 90, 180, 270]),
            GammaJitter(low=0.9, high=1.1),
            BrightnessJitter(low=0.9, high=1.1),