Python WeightInit примеры использования

Язык программирования: Python

Пространство имен/Пакет: lib.Models.initialization

Класс/Тип: WeightInit

Примеров на hotexamples.com: 6

Python WeightInit - 6 примеров найдено. Это лучшие примеры Python кода для lib.Models.initialization.WeightInit, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

WeightInit(6)

init_model(4)

Основные методы

WeightInit (6)

init_model (4)

Пример #1

Показать файл

def main():
    # Check whether GPU is available and can be used
    # if CUDA is found then device is set accordingly
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    save_path = './runs/' + strftime("%Y-%m-%d_%H-%M-%S", gmtime())
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    log_file = os.path.join(save_path, "stdout")
    log = open(log_file, "a")
    # TODO: gives interrupted sys call error
    # log_file = os.path.join(save_path, "stdout")
    # sys.stdout = Logger(log_file)

    # Command line options
    args = parser.parse_args()
    print("Command line options:")
    for arg in vars(args):
        print(arg, getattr(args, arg))
        log.write(arg + ':' + str(getattr(args, arg)) + '\n')
    log.close()

    # Initialize the weights of the model
    print("Initializing network with: " + args.weight_init)
    WeightInitializer = WeightInit(args.weight_init)

    # Dataset loading
    # TODO: hard-coded file paths
    patch_size = args.patch_size
    data_init_method = getattr(datasets, args.dataset)
    dataset = data_init_method(torch.cuda.is_available(), args)

    gen = QLearner(state_space_parameters, 1, WeightInitializer, device, args, save_path, qstore = args.qstore_path,\
        replaydict = args.replay_dict_path)

    if (args.continue_epsilon
            not in np.array(state_space_parameters.epsilon_schedule)[:, 0]):
        raise ValueError('continue-epsilon {} not in epsilon schedule!'.format(
            args.continue_epsilon))

    for episode in state_space_parameters.epsilon_schedule:

        epsilon = episode[0]
        M = episode[1]

        for ite in range(1, M + 1):
            if epsilon == args.continue_epsilon and args.continue_ite > M:
                raise ValueError(
                    'continue-ite {} not within range of continue-epsilon {} in epsilon schedule!'
                    .format(args.continue_ite, epsilon))
            if (epsilon == args.continue_epsilon and ite >= args.continue_ite
                ) or (epsilon < args.continue_epsilon):
                print('ite:{}, epsilon:{}'.format(ite, epsilon))
                gen.generate_net(epsilon, dataset)

    gen.replay_dictionary.to_csv(os.path.join(save_path,
                                              'replayDictFinal.csv'))
    gen.qstore.save_to_csv(os.path.join(save_path, 'qValFinal.csv'))

Пример #2

Показать файл

def main():
    # Command line options
    args = parser.parse_args()
    print("Command line options:")
    for arg in vars(args):
        print(arg, getattr(args, arg))

    if args.cross_dataset and not args.incremental_data:
        raise ValueError(
            'cross-dataset training possible only if incremental-data flag set'
        )

    # Check whether GPU is available and can be used
    # if CUDA is found then device is set accordingly
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Launch a writer for the tensorboard summary writer instance
    save_path = 'runs/' + strftime("%Y-%m-%d_%H-%M-%S", gmtime()) + '_' + args.dataset + '_' + args.architecture +\
                '_variational_samples_' + str(args.var_samples) + '_latent_dim_' + str(args.var_latent_dim)

    # add option specific naming to separate tensorboard log files later
    if args.autoregression:
        save_path += '_pixelcnn'

    if args.incremental_data:
        save_path += '_incremental'
        if args.train_incremental_upper_bound:
            save_path += '_upper_bound'
        if args.generative_replay:
            save_path += '_genreplay'
        if args.openset_generative_replay:
            save_path += '_opensetreplay'
    if args.cross_dataset:
        save_path += '_cross_dataset_' + args.dataset_order

    # if we are resuming a previous training, note it in the name
    if args.resume:
        save_path = save_path + '_resumed'
    writer = SummaryWriter(save_path)

    # saving the parsed args to file
    log_file = os.path.join(save_path, "stdout")
    log = open(log_file, "a")
    for arg in vars(args):
        log.write(arg + ':' + str(getattr(args, arg)) + '\n')

    # Dataset loading
    data_init_method = getattr(datasets, args.dataset)
    dataset = data_init_method(torch.cuda.is_available(), args)
    # get the number of classes from the class dictionary
    num_classes = dataset.num_classes

    # we set an epoch multiplier to 1 for isolated training and increase it proportional to amount of tasks in CL
    epoch_multiplier = 1
    if args.incremental_data:
        from lib.Datasets.incremental_dataset import get_incremental_dataset

        # get the method to create the incremental dataste (inherits from the chosen data loader)
        inc_dataset_init_method = get_incremental_dataset(
            data_init_method, args)

        # different options for class incremental vs. cross-dataset experiments
        if args.cross_dataset:
            # if a task order file is specified, load the task order from it
            if args.load_task_order:
                # check if file exists and if file ends with extension '.txt'
                if os.path.isfile(args.load_task_order) and len(args.load_task_order) >= 4\
                        and args.load_task_order[-4:] == '.txt':
                    print("=> loading task order from '{}'".format(
                        args.load_task_order))
                    with open(args.load_task_order, 'rb') as fp:
                        task_order = pickle.load(fp)
                # if no file is found default to cmd line task order
                else:
                    # parse and split string at commas
                    task_order = args.dataset_order.split(',')
                    for i in range(len(task_order)):
                        # remove blank spaces in dataset names
                        task_order[i] = task_order[i].replace(" ", "")
            # use task order as specified in command line
            else:
                # parse and split string at commas
                task_order = args.dataset_order.split(',')
                for i in range(len(task_order)):
                    # remove blank spaces in dataset names
                    task_order[i] = task_order[i].replace(" ", "")

            # just for getting the number of classes in the first dataset
            num_classes = 0
            for i in range(args.num_base_tasks):
                temp_dataset_init_method = getattr(datasets, task_order[i])
                temp_dataset = temp_dataset_init_method(
                    torch.cuda.is_available(), args)
                num_classes += temp_dataset.num_classes
                del temp_dataset

            # multiply epochs by number of tasks
            if args.num_increment_tasks:
                epoch_multiplier = ((len(task_order) - args.num_base_tasks) /
                                    args.num_increment_tasks) + 1
            else:
                # this branch will get active if num_increment_tasks is set to zero. This is useful when training
                # any isolated upper bound with all datasets present from the start.
                epoch_multiplier = 1.0
        else:
            # class incremental
            # if specified load task order from file
            if args.load_task_order:
                if os.path.isfile(args.load_task_order):
                    print("=> loading task order from '{}'".format(
                        args.load_task_order))
                    task_order = np.load(args.load_task_order).tolist()
                else:
                    # if no file is found a random task order is created
                    print(
                        "=> no task order found. Creating randomized task order"
                    )
                    task_order = np.random.permutation(num_classes).tolist()
            else:
                # if randomize task order is specified create a random task order, else task order is sequential
                task_order = []
                for i in range(dataset.num_classes):
                    task_order.append(i)

                if args.randomize_task_order:
                    task_order = np.random.permutation(num_classes).tolist()

            # save the task order
            np.save(os.path.join(save_path, 'task_order.npy'), task_order)
            # set the number of classes to base tasks + 1 because base tasks is always one less.
            # E.g. if you have 2 classes it's one task. This is a little inconsistent from the naming point of view
            # but we wanted a single variable to work for both class incremental as well as cross-dataset experiments
            num_classes = args.num_base_tasks + 1
            # multiply epochs by number of tasks
            epoch_multiplier = (
                (len(task_order) -
                 (args.num_base_tasks + 1)) / args.num_increment_tasks) + 1

        print("Task order: ", task_order)
        # log the task order into the text file
        log.write('task_order:' + str(task_order) + '\n')
        args.task_order = task_order

        # this is a little weird, but it needs to be here because the below method pops items from task_order
        args_to_tensorboard(writer, args)

        assert epoch_multiplier.is_integer(), print(
            "uneven task division, make sure number of tasks are integers.")

        # Get the incremental dataset
        dataset = inc_dataset_init_method(torch.cuda.is_available(), device,
                                          task_order, args)
    else:
        # add command line options to TensorBoard
        args_to_tensorboard(writer, args)

    log.close()

    # Get a sample input from the data loader to infer color channels/size
    net_input, _ = next(iter(dataset.train_loader))
    # get the amount of color channels in the input images
    num_colors = net_input.size(1)

    # import model from architectures class
    net_init_method = getattr(architectures, args.architecture)

    # if we are not building an autoregressive model the number of output channels of the model is equivalent to
    # the amount of input channels. For an autoregressive models we set the number of output channels of the
    # non-autoregressive decoder portion according to the command line option below
    if not args.autoregression:
        args.out_channels = num_colors

    # build the model
    model = net_init_method(device, num_classes, num_colors, args)

    # optionally add the autoregressive decoder
    if args.autoregression:
        model.pixelcnn = PixelCNN(device,
                                  num_colors,
                                  args.out_channels,
                                  args.pixel_cnn_channels,
                                  num_layers=args.pixel_cnn_layers,
                                  k=args.pixel_cnn_kernel_size,
                                  padding=args.pixel_cnn_kernel_size // 2)

    # Parallel container for multi GPU use and cast to available device
    model = torch.nn.DataParallel(model).to(device)
    print(model)

    # Initialize the weights of the model, by default according to He et al.
    print("Initializing network with: " + args.weight_init)
    WeightInitializer = WeightInit(args.weight_init)
    WeightInitializer.init_model(model)

    # Define optimizer and loss function (criterion)
    optimizer = torch.optim.Adam(model.parameters(), args.learning_rate)

    epoch = 0
    best_prec = 0
    best_loss = random.getrandbits(128)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            epoch = checkpoint['epoch']
            best_prec = checkpoint['best_prec']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # optimize until final amount of epochs is reached. Final amount of epochs is determined through the
    while epoch < (args.epochs * epoch_multiplier):
        # visualize the latent space before each task increment and at the end of training if it is 2-D
        if epoch % args.epochs == 0 and epoch > 0 or (epoch + 1) % (
                args.epochs * epoch_multiplier) == 0:
            if model.module.latent_dim == 2:
                print("Calculating and visualizing dataset embedding")
                # infer the number of current tasks to plot the different classes in the embedding
                if args.incremental_data:
                    if args.cross_dataset:
                        num_tasks = sum(
                            dataset.num_classes_per_task[:len(dataset.
                                                              seen_tasks)])
                    else:
                        num_tasks = len(dataset.seen_tasks)
                else:
                    num_tasks = num_classes

                zs = get_latent_embedding(model, dataset.train_loader,
                                          num_tasks, device)
                visualize_dataset_in_2d_embedding(writer,
                                                  zs,
                                                  args.dataset,
                                                  save_path,
                                                  task=num_tasks)

        # continual learning specific part
        if args.incremental_data:
            # at the end of each task increment
            if epoch % args.epochs == 0 and epoch > 0:
                print('Saving the last checkpoint from the previous task ...')
                save_task_checkpoint(save_path, epoch // args.epochs)

                print("Incrementing dataset ...")
                dataset.increment_tasks(
                    model,
                    args.batch_size,
                    args.workers,
                    writer,
                    save_path,
                    is_gpu=torch.cuda.is_available(),
                    upper_bound_baseline=args.train_incremental_upper_bound,
                    generative_replay=args.generative_replay,
                    openset_generative_replay=args.openset_generative_replay,
                    openset_threshold=args.openset_generative_replay_threshold,
                    openset_tailsize=args.openset_weibull_tailsize,
                    autoregression=args.autoregression)

                # grow the classifier and increment the variable for number of overall classes so we can use it later
                if args.cross_dataset:
                    grow_classifier(
                        model.module.classifier,
                        sum(dataset.num_classes_per_task[:len(dataset.
                                                              seen_tasks)]) -
                        model.module.num_classes, WeightInitializer)
                    model.module.num_classes = sum(
                        dataset.num_classes_per_task[:len(dataset.seen_tasks)])
                else:
                    model.module.num_classes += args.num_increment_tasks
                    grow_classifier(model.module.classifier,
                                    args.num_increment_tasks,
                                    WeightInitializer)

                # reset moving averages etc. of the optimizer
                optimizer = torch.optim.Adam(model.parameters(),
                                             args.learning_rate)

            # change the number of seen classes
            if epoch % args.epochs == 0:
                model.module.seen_tasks = dataset.seen_tasks

        # train
        train(dataset, model, criterion, epoch, optimizer, writer, device,
              args)

        # evaluate on validation set
        prec, loss = validate(dataset, model, criterion, epoch, writer, device,
                              save_path, args)

        # remember best prec@1 and save checkpoint
        is_best = loss < best_loss
        best_loss = min(loss, best_loss)
        best_prec = max(prec, best_prec)
        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.architecture,
                'state_dict': model.state_dict(),
                'best_prec': best_prec,
                'best_loss': best_loss,
                'optimizer': optimizer.state_dict()
            }, is_best, save_path)

        # increment epoch counters
        epoch += 1

        # if a new task begins reset the best prec so that new best model can be stored.
        if args.incremental_data and epoch % args.epochs == 0:
            best_prec = 0
            best_loss = random.getrandbits(128)

    writer.close()

Пример #3

Показать файл

def main():
    # Command line options
    args = parser.parse_args()
    print("Command line options:")
    for arg in vars(args):
        print(arg, getattr(args, arg))

    if args.debug:
        pdb.set_trace()

    # Check whether GPU is available and can be used
    # if CUDA is found then device is set accordingly
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Launch a writer for the tensorboard summary writer instance
    save_path = 'runs/' + strftime(
        "%Y-%m-%d_%H-%M-%S",
        gmtime()) + '_' + args.dataset + '_' + args.architecture

    # if we are resuming a previous training, note it in the name
    if args.resume:
        save_path = save_path + '_resumed'
    writer = SummaryWriter(save_path)

    # saving the parsed args to file
    log_file = os.path.join(save_path, "stdout")
    log = open(log_file, "a")
    for arg in vars(args):
        log.write(arg + ':' + str(getattr(args, arg)) + '\n')

    # Dataset loading
    data_init_method = getattr(datasets, args.dataset)
    dataset = data_init_method(torch.cuda.is_available(), args)
    # get the number of classes from the class dictionary
    num_classes = dataset.num_classes

    # we set an epoch multiplier to 1 for isolated training and increase it proportional to amount of tasks in CL
    epoch_multiplier = 1

    # add command line options to TensorBoard
    args_to_tensorboard(writer, args)
    log.close()

    # build the model
    model = architectures.Inos_model(args.num_class, args)

    # Parallel container for multi GPU use and cast to available device
    model = torch.nn.DataParallel(model).to(device)
    print(model)

    if not args.pretrained:
        # Initialize the weights of the model, by default according to He et al.
        print("Initializing network with: " + args.weight_init)
        WeightInitializer = WeightInit(args.weight_init)
        WeightInitializer.init_model(model)

    # Define optimizer and loss function (criterion)
    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=0.9,
                                weight_decay=2e-4)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=[30, 60, 80, 100], gamma=0.5)

    epoch = 0
    best_prec = 0
    best_loss = random.getrandbits(128)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            epoch = checkpoint['epoch']
            best_prec = checkpoint['best_prec']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            # optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # optimize until final amount of epochs is reached. Final amount of epochs is determined through the
    while epoch < (args.epochs * epoch_multiplier):
        if epoch + 2 == epoch % args.epochs:
            print("debug perpose")

        # train
        train(dataset, model, criterion, epoch, optimizer, writer, device,
              args)

        # evaluate on validation set
        prec, loss = validate(dataset, model, criterion, epoch, writer, device,
                              save_path, args)

        # evaluate on test set
        prec_t, loss_t = test(dataset, model, criterion, epoch, writer, device,
                              save_path, args)

        # remember best prec@1 and save checkpoint
        is_best = loss < best_loss
        best_loss = min(loss, best_loss)
        best_prec = max(prec, best_prec)
        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.architecture,
                'state_dict': model.state_dict(),
                'best_prec': best_prec,
                'best_loss': best_loss,
                'optimizer': optimizer.state_dict()
            }, is_best, save_path)

        # increment epoch counters
        epoch += 1
        scheduler.step()

    writer.close()

Пример #4

Показать файл

Файл: main.py Проект: zhuikonger/Rethinking_CNN_Layerwise_Feature_Amounts

def main():
    # Command line options
    args = parser.parse_args()
    print("Command line options:")
    for arg in vars(args):
        print(arg, getattr(args, arg))

    # Check whether GPU is available and can be used
    # if CUDA is found then device is set accordingly
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    cudnn.benchmark = True
    num_GPUs = torch.cuda.device_count()

    # If save directory for runs doesn't exist then create it
    if not os.path.exists('runs'):
        os.mkdir('runs')

    # Create a time-stamped save path for individual experiment
    save_path = 'runs/' + strftime("%Y-%m-%d_%H-%M-%S", gmtime()) + \
                ';' + args.dataset + ';' + args.architecture
    os.mkdir(save_path)

    # List of values to log to csv
    columns_list = [
        'Filters', 'Parameters', 'Mean', 'Variance', 'Skew', 'BestVal',
        'BestValsTrain', 'BestEpoch', 'LastValPrec', 'LastTrainPrec',
        'AllTrain', 'AllVal'
    ]
    df = pd.DataFrame(columns=columns_list)

    # Dataset loading
    data_init_method = getattr(datasets, args.dataset)
    dataset = data_init_method(torch.cuda.is_available(), args)

    # get the amount of color channels in the input images
    net_input, _ = next(iter(dataset.train_loader))
    num_colors = net_input.size(1)

    # import model from architectures class
    net_init_method = getattr(architectures, args.architecture)

    # Get the parameters for all valid skewed models
    SNModels = SkewNormalModels(depth=args.vgg_depth,
                                num_classes=dataset.num_classes,
                                patch_size=args.patch_size)
    skew_model_params = SNModels.get_valid_models()
    print("Total number of models: ", len(skew_model_params["filters"]))

    # Weight-init method
    WeightInitializer = WeightInit(args.weight_init)

    # Optionally resume a previous experiment
    current_id = args.resume_model_id
    for i in range(len(skew_model_params["filters"]) - current_id):
        print("Model filters: ", skew_model_params["filters"][i + current_id])
        print("Model parameters: ",
              skew_model_params["total_params"][i + current_id], " mean: ",
              skew_model_params["means"][i + current_id], " var: ",
              skew_model_params["vars"][i + current_id], " skew: ",
              skew_model_params["skews"][i + current_id])

        model = net_init_method(device,
                                dataset.num_classes,
                                num_colors,
                                args,
                                skew_model_params["filters"][i + current_id],
                                custom_filters=True)

        # Parallel container for multi GPU use and cast to available device
        model = torch.nn.DataParallel(model).to(device)
        print(model)

        # Initialize the weights of the model
        print("Initializing networks with: " + args.weight_init)
        WeightInitializer.init_model(model)

        # Define criterion and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay,
                                    nesterov=args.nesterov)

        # Initialize SGDWR learning rate scheduler
        lr_scheduler = LearningRateScheduler(args.lr_wr_epochs,
                                             len(dataset.train_loader.dataset),
                                             args.batch_size,
                                             args.learning_rate,
                                             args.lr_wr_mul, args.lr_wr_min)

        # Get estimated GPU memory usage of the model and split batch if too little memory is available
        if torch.cuda.is_available():
            GPUMemory = GPUMem(torch.cuda.is_available())
            print('available:{}'.format(
                (GPUMemory.total_mem -
                 GPUMemory.total_mem * GPUMemory.get_mem_util()) / 1024.))
            print('required per gpu with buffer: {}'.format(
                (4. / float(num_GPUs) * model.module.gpu_usage) + 1.))

            # calculate smaller chunk size to split batch into sequential computations
            mem_scale_factor = 4.0  # TODO: WEIRD factor... why is this necessary and where does it come from?
            # TODO: the + 1 Gb should be taken from the cache allocator
            if ((GPUMemory.total_mem -
                 GPUMemory.total_mem * GPUMemory.get_mem_util()) / 1024.) < (
                     (mem_scale_factor / float(num_GPUs) *
                      model.module.gpu_usage) + 1.):

                # code for variable batch size implementation as per gpu constraint; remove for old code
                approx_small_batch_size = (((GPUMemory.total_mem - GPUMemory.total_mem * GPUMemory.get_mem_util()) / 1024.
                                            - 1.) * float(num_GPUs) / mem_scale_factor) //\
                                          (model.module.gpu_usage / float(args.batch_size))

                diff = float('inf')
                temp_small_batch_size = approx_small_batch_size
                for j in range(1, (args.batch_size // 2) + 1):
                    if args.batch_size % j == 0 and abs(
                            j - approx_small_batch_size) < diff:
                        diff = abs(j - approx_small_batch_size)
                        temp_small_batch_size = j
                batch_seq_split_size = temp_small_batch_size
            else:
                batch_seq_split_size = args.batch_size
        else:
            batch_seq_split_size = args.batch_size

        # Get training and validation dataset loaders
        dataset.train_loader, dataset.val_loader = dataset.get_dataset_loader(
            batch_seq_split_size, args.workers, device)

        print(
            'sequential batch size split size:{}'.format(batch_seq_split_size))

        epoch = 0
        best_epoch = 0
        best_prec = 0
        best_val_train_prec = 0
        all_train = []
        all_val = []

        while epoch < args.epochs:
            # train for one epoch
            train_prec = train(dataset.train_loader, model, criterion, epoch,
                               optimizer, lr_scheduler, device,
                               batch_seq_split_size, args)
            # evaluate on validation set
            prec = validate(dataset.val_loader, model, criterion, epoch,
                            device, args)

            all_train.append(train_prec)
            all_val.append(prec)

            # remember best prec@1 and save checkpoint
            is_best = prec > best_prec
            if is_best:
                best_epoch = epoch
                best_val_train_prec = train_prec
                best_prec = prec

            # if architecture doesn't train at all skip it
            if epoch == args.lr_wr_epochs - 1 and train_prec < (
                    2 * 100.0 / dataset.num_classes):
                break

            # increment epoch counters
            epoch += 1
            lr_scheduler.scheduler_epoch += 1

        # append architecture results to csv
        df = df.append(pd.DataFrame([[
            skew_model_params["filters"][i + current_id],
            skew_model_params["total_params"][i + current_id],
            skew_model_params["means"][i + current_id],
            skew_model_params["vars"][i + current_id],
            skew_model_params["skews"][i + current_id], best_prec,
            best_val_train_prec, best_epoch, prec, train_prec, all_train,
            all_val
        ]],
                                    columns=columns_list),
                       ignore_index=True)
        df.to_csv(save_path + '/model_%03d' % (i + 1 + current_id) + '.csv')

        del model
        del optimizer

Пример #5

Показать файл

def main():
    # set device for torch computations
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    save_path = './runs/' + strftime("%Y-%m-%d_%H-%M-%S", gmtime())
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    # parse command line arguments
    args = parser.parse_args()
    print("Command line options:")
    for arg in vars(args):
        print(arg, getattr(args, arg))

    # create log file
    log_file = os.path.join(save_path, "stdout")

    # write parsed args to log file
    log = open(log_file, "a")
    for arg in vars(args):
        print(arg, getattr(args, arg))
        log.write(arg + ':' + str(getattr(args, arg)) + '\n')
    log.close()

    # instantiate the weight initializer
    print("Initializing network with: " + args.weight_init)
    weight_initializer = WeightInit(args.weight_init)

    # instantiate dataset object
    data_init_method = getattr(datasets, args.dataset)
    dataset = data_init_method(torch.cuda.is_available(), args)

    # instantiate a tabular Q-learner
    q_learner = QLearner(args, dataset.num_classes, save_path)

    # start new architecture search
    if int(args.task) == 1:
        if args.continue_search is True:
            # raise exceptions if requirements to start new search not met
            if args.continue_epsilon not in np.array(
                    state_space_parameters.epsilon_schedule)[:, 0]:
                raise ValueError(
                    'continue-epsilon {} not in epsilon schedule!'.format(
                        args.continue_epsilon))
            if (args.replay_buffer_csv_path is None) or (not os.path.exists(
                    args.replay_buffer_csv_path)):
                raise ValueError(
                    'specify correct path to replay buffer to continue ')
            if (args.q_values_csv_path is None) or (not os.path.exists(
                    args.q_values_csv_path)):
                raise ValueError('wrong path is specified for Q-values')

        # iterate as per the epsilon-greedy schedule
        for episode in state_space_parameters.epsilon_schedule:
            epsilon = episode[0]
            m = episode[1]

            # raise exception if net number to continue from greater than number of nets for the continue_epsilon
            if epsilon == args.continue_epsilon and args.continue_ite > m:
                raise ValueError(
                    'continue-ite {} not within range of continue-epsilon {} in epsilon schedule!'
                    .format(args.continue_ite, epsilon))

            # iterate through number of nets for an epsilon
            for ite in range(1, m + 1):
                # check conditions to generate and train arc
                if (epsilon == args.continue_epsilon
                        and ite >= args.continue_ite) or (
                            epsilon < args.continue_epsilon):
                    print('ite:{}, epsilon:{}'.format(ite, epsilon))

                    # generate net states for search
                    q_learner.generate_search_net_states(epsilon)

                    # check if net already trained before
                    search_net_in_replay_dict = q_learner.check_search_net_in_replay_buffer(
                    )

                    # add to the end of the replay buffer if net already trained before
                    if search_net_in_replay_dict:
                        q_learner.add_search_net_to_replay_buffer(
                            search_net_in_replay_dict, verbose=True)
                    # train net if net not trained before
                    else:
                        # train/val search net
                        mem_fit, spp_size, hard_best_val, hard_val_all_epochs, soft_best_val, soft_val_all_epochs,\
                        train_flag, hard_best_background, hard_best_crack, hard_best_spallation,\
                        hard_best_exposed_bars, hard_best_efflorescence, hard_best_corrosion_stain =\
                            train_val_net(q_learner.state_list, dataset, weight_initializer, device, args, save_path)

                        # check if net fits memory
                        while mem_fit is False:
                            print(
                                "net failed mem check even with batch splitting, sampling again!"
                            )

                            q_learner.generate_search_net_states(epsilon)
                            net_in_replay_dict = q_learner.check_search_net_in_replay_buffer(
                            )

                            if search_net_in_replay_dict:
                                q_learner.add_search_net_to_replay_buffer(
                                    net_in_replay_dict)
                                break
                            else:
                                mem_fit, spp_size, hard_best_val, hard_val_all_epochs, soft_best_val, \
                                soft_val_all_epochs, train_flag, hard_best_background, hard_best_crack,\
                                hard_best_spallation, hard_best_exposed_bars, hard_best_efflorescence,\
                                hard_best_corrosion_stain =\
                                    train_val_net(q_learner.state_list, dataset, weight_initializer, device, args,
                                                  save_path)

                        # add new net and performance measures to replay buffer if it fits in memory after splitting
                        # batch
                        if mem_fit:
                            reward = q_learner.accuracies_to_reward(
                                hard_val_all_epochs)
                            q_learner.add_search_net_to_replay_buffer(
                                search_net_in_replay_dict,
                                spp_size=spp_size,
                                reward=reward,
                                hard_best_val=hard_best_val,
                                hard_val_all_epochs=hard_val_all_epochs,
                                soft_best_val=soft_best_val,
                                soft_val_all_epochs=soft_val_all_epochs,
                                train_flag=train_flag,
                                hard_best_background=hard_best_background,
                                hard_best_crack=hard_best_crack,
                                hard_best_spallation=hard_best_spallation,
                                hard_best_exposed_bars=hard_best_exposed_bars,
                                hard_best_efflorescence=hard_best_efflorescence,
                                hard_best_corrosion_stain=
                                hard_best_corrosion_stain,
                                verbose=True)
                    # sample nets from replay buffer, update Q-values and save partially filled replay buffer and
                    # Q-values
                    q_learner.update_q_values_and_save_partial()

        # save fully filled replay buffer and final Q-values
        q_learner.save_final()

    # load single architecture config from replay buffer and train till convergence
    elif int(args.task) == 2:
        # raise exceptions if requirements to continue incomplete search not met
        if (args.replay_buffer_csv_path is None) or (not os.path.exists(
                args.replay_buffer_csv_path)):
            raise ValueError('wrong path specified for replay buffer')
        if int(args.fixed_net_index_no) < 0:
            raise ValueError(
                'specify a non negative integer for fixed net index')

        # generate states for fixed net from a complete search
        q_learner.generate_fixed_net_states()

        # train/val fixed net exhaustively
        mem_fit, spp_size, hard_best_val, hard_val_all_epochs, soft_best_val, soft_val_all_epochs, train_flag,\
        hard_best_background, hard_best_crack, hard_best_spallation, hard_best_exposed_bars, hard_best_efflorescence, \
        hard_best_corrosion_stain = train_val_net(q_learner.state_list, dataset, weight_initializer, device, args,
                                                  save_path)

        # add fixed net and performance measures to a data frame and save it
        q_learner.add_fixed_net_to_fixed_net_buffer(
            spp_size=spp_size,
            hard_best_val=hard_best_val,
            hard_val_all_epochs=hard_val_all_epochs,
            soft_best_val=soft_best_val,
            soft_val_all_epochs=soft_val_all_epochs,
            hard_best_background=hard_best_background,
            hard_best_crack=hard_best_crack,
            hard_best_spallation=hard_best_spallation,
            hard_best_exposed_bars=hard_best_exposed_bars,
            hard_best_efflorescence=hard_best_efflorescence,
            hard_best_corrosion_stain=hard_best_corrosion_stain)

        # save fixed net buffer
        q_learner.save_final()

    # raise exception if no matching task
    else:
        raise NotImplementedError('Given task no. not implemented.')

Пример #6

Показать файл

def main():
    # Command line options
    args = parser.parse_args()
    print("Command line options:")
    for arg in vars(args):
        print(arg, getattr(args, arg))

    # import the correct loss and training functions depending which model to optimize
    # TODO: these could easily be refactored into one function, but we kept it this way for modularity
    if args.train_var:
        if args.joint:
            from lib.Training.train import train_var_joint as train
            from lib.Training.validate import validate_var_joint as validate
            from lib.Training.loss_functions import var_loss_function_joint as criterion
        else:
            from lib.Training.train import train_var as train
            from lib.Training.validate import validate_var as validate
            from lib.Training.loss_functions import var_loss_function as criterion
    else:
        if args.joint:
            from lib.Training.train import train_joint as train
            from lib.Training.validate import validate_joint as validate
            from lib.Training.loss_functions import loss_function_joint as criterion
        else:
            from lib.Training.train import train as train
            from lib.Training.validate import validate as validate
            from lib.Training.loss_functions import loss_function as criterion

    # Check whether GPU is available and can be used
    # if CUDA is found then device is set accordingly
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Launch a writer for the tensorboard summary writer instance
    save_path = 'runs/' + strftime("%Y-%m-%d_%H-%M-%S", gmtime()) + '_' + args.dataset + '_' + args.architecture +\
                '_dropout_' + str(args.dropout)

    if args.train_var:
        save_path += '_variational_samples_' + str(
            args.var_samples) + '_latent_dim_' + str(args.var_latent_dim)

    if args.joint:
        save_path += '_joint'

    # if we are resuming a previous training, note it in the name
    if args.resume:
        save_path = save_path + '_resumed'
    writer = SummaryWriter(save_path)

    # saving the parsed args to file
    log_file = os.path.join(save_path, "stdout")
    log = open(log_file, "a")
    for arg in vars(args):
        log.write(arg + ':' + str(getattr(args, arg)) + '\n')

    # Dataset loading
    data_init_method = getattr(datasets, args.dataset)
    dataset = data_init_method(torch.cuda.is_available(), args)
    # get the number of classes from the class dictionary
    num_classes = dataset.num_classes

    # add command line options to TensorBoard
    args_to_tensorboard(writer, args)

    log.close()

    # Get a sample input from the data loader to infer color channels/size
    net_input, _ = next(iter(dataset.train_loader))
    # get the amount of color channels in the input images
    num_colors = net_input.size(1)

    # import model from architectures class
    net_init_method = getattr(architectures, args.architecture)

    # build the model
    model = net_init_method(device, num_classes, num_colors, args)

    # Parallel container for multi GPU use and cast to available device
    model = torch.nn.DataParallel(model).to(device)
    print(model)

    # Initialize the weights of the model, by default according to He et al.
    print("Initializing network with: " + args.weight_init)
    WeightInitializer = WeightInit(args.weight_init)
    WeightInitializer.init_model(model)

    # Define optimizer and loss function (criterion)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 weight_decay=args.weight_decay)

    epoch = 0
    best_prec = 0
    best_loss = random.getrandbits(128)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            epoch = checkpoint['epoch']
            best_prec = checkpoint['best_prec']
            best_loss = checkpoint['best_loss']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # optimize until final amount of epochs is reached.
    while epoch < args.epochs:
        # train
        train(dataset, model, criterion, epoch, optimizer, writer, device,
              args)

        # evaluate on validation set
        prec, loss = validate(dataset, model, criterion, epoch, writer, device,
                              args)

        # remember best prec@1 and save checkpoint
        is_best = loss < best_loss
        best_loss = min(loss, best_loss)
        best_prec = max(prec, best_prec)
        save_checkpoint(
            {
                'epoch': epoch,
                'arch': args.architecture,
                'state_dict': model.state_dict(),
                'best_prec': best_prec,
                'best_loss': best_loss,
                'optimizer': optimizer.state_dict()
            }, is_best, save_path)

        # increment epoch counters
        epoch += 1

    writer.close()