Пример #1
0
 def log_record(self, config):
     """
     To record the loss value of testing data during training
     :param config:
     :return:
     """
     log_dir = "log_{}".format('AI_GAN')
     tl.files.exists_or_mkdir(log_dir)
     self.log_all, self.log_all_filename = utils.logging_setup(log_dir)
     utils.log_config(self.log_all_filename, config)
Пример #2
0
from utils import spacy_nlp, logging_setup

from rasa.nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer
from rasa.nlu.tokenizers.spacy_tokenizer import SpacyTokenizer
from rasa.nlu.tokenizers.mitie_tokenizer import MitieTokenizer
from rasa.nlu.tokenizers.convert_tokenizer import ConveRTTokenizer
from rasa.nlu.tokenizers.lm_tokenizer import LanguageModelTokenizer
from rasa.nlu.utils.hugging_face.hf_transformers import HFTransformersNLP
from rasa.nlu.training_data import Message
from rasa.nlu.constants import (TEXT, SPACY_DOCS)

logger = logging_setup()

test_input = "Okay, pick up this yellow banana for me."
message = Message(test_input)

tk = WhitespaceTokenizer()
tokens = tk.tokenize(message, attribute=TEXT)
logger.info('Whitespace: {}'.format([t.text for t in tokens]))

tk = SpacyTokenizer()

message.set(SPACY_DOCS[TEXT], spacy_nlp(test_input))
tokens = tk.tokenize(message, attribute=TEXT)
logger.info('SpaCy: {}'.format([t.text for t in tokens]))

tk = MitieTokenizer()
tokens = tk.tokenize(message, attribute=TEXT)
logger.info('Mitie: {}'.format([t.text for t in tokens]))

tk = ConveRTTokenizer()
Пример #3
0
 def log_record(self,config):
     log_dir = "log_{}".format('BrainQuantAI_Part_one')
     tl.files.exists_or_mkdir(log_dir)
     self.log_all, self.log_all_filename = utils.logging_setup(log_dir)
     utils.log_config(self.log_all_filename, config)
Пример #4
0
from logging import log
import os
import sys
from argparse import ArgumentParser

sys.path.append(os.path.realpath("framework"))

from nydus_anchor import NydusAnchor
from nydusify import Nydusify
from utils import logging_setup

logging_setup()


# alpine:3.10.2 fedora:30 rethinkdb:2.3.6 postgres:13.1 redis:5.0.5 mariadb:10.5 python:3.9 golang:1.12.9 gcc:10.2.0 jruby:9.2.8.0
# perl:5.30 php:7.3.8 pypy:3.5 r-base:3.6.1 drupal:8.7.6 jenkins:2.60.3 node:13.13.0 tomcat:10.0.0-jdk15-openjdk-buster wordpress:5.7

if __name__ == "__main__":

    parser = ArgumentParser()
    parser.add_argument(
        "--sources",
        nargs="+",
        type=str,
        default="",
    )

    parser.add_argument(
        "--backend",
        type=str,
        default="",
Пример #5
0
parser.add_argument('--label_smooth', type=float, default=0.1, help='label smoothing')
parser.add_argument('--gamma', type=float, default=0.97, help='learning rate decay')
parser.add_argument('--decay_period', type=int, default=1, help='epochs between two learning rate decays')
parser.add_argument('--parallel', action='store_true', default=False, help='data parallelism')
parser.add_argument('--ops', type=str, default='OPS', help='which operations to use, options are OPS and DARTS_OPS')
parser.add_argument('--primitives', type=str, default='PRIMITIVES',
                    help='which primitive layers to use inside a cell search space,'
                         ' options are PRIMITIVES and DARTS_PRIMITIVES')
parser.add_argument('--flops', action='store_true', default=False, help='count flops and exit, aka floating point operations.')
args = parser.parse_args()

args.save = 'eval-{}-{}-{}-{}'.format(time.strftime("%Y%m%d-%H%M%S"), args.save, args.dataset, args.arch)
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

log_file_path = os.path.join(args.save, 'log.txt')
logger = utils.logging_setup(log_file_path)
params_path = os.path.join(args.save, 'commandline_args.json')
with open(params_path, 'w') as f:
    json.dump(vars(args), f)

CLASSES = 1000


class CrossEntropyLabelSmooth(nn.Module):

  def __init__(self, num_classes, epsilon):
    super(CrossEntropyLabelSmooth, self).__init__()
    self.num_classes = num_classes
    self.epsilon = epsilon
    self.logsoftmax = nn.LogSoftmax(dim=1)
Пример #6
0
def main():
  parser = argparse.ArgumentParser("Common Argument Parser")
  parser.add_argument('--data', type=str, default='../data', help='location of the data corpus')
  parser.add_argument('--dataset', type=str, default='cifar10', help='which dataset:\
                      cifar10, mnist, emnist, fashion, svhn, stl10, devanagari')
  parser.add_argument('--batch_size', type=int, default=64, help='batch size')
  parser.add_argument('--learning_rate', type=float, default=0.025, help='init learning rate')
  parser.add_argument('--learning_rate_min', type=float, default=1e-8, help='min learning rate')
  parser.add_argument('--lr_power_annealing_exponent_order', type=float, default=2,
                      help='Cosine Power Annealing Schedule Base, larger numbers make '
                           'the exponential more dominant, smaller make cosine more dominant, '
                           '1 returns to standard cosine annealing.')
  parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
  parser.add_argument('--weight_decay', '--wd', dest='weight_decay', type=float, default=3e-4, help='weight decay')
  parser.add_argument('--partial', default=1/8, type=float, help='partially adaptive parameter p in Padam')
  parser.add_argument('--report_freq', type=float, default=50, help='report frequency')
  parser.add_argument('--gpu', type=int, default=0, help='gpu device id')
  parser.add_argument('--epochs', type=int, default=2000, help='num of training epochs')
  parser.add_argument('--start_epoch', default=1, type=int, metavar='N',
                      help='manual epoch number (useful for restarts)')
  parser.add_argument('--warmup_epochs', type=int, default=5, help='num of warmup training epochs')
  parser.add_argument('--warm_restarts', type=int, default=20, help='warm restarts of cosine annealing')
  parser.add_argument('--init_channels', type=int, default=36, help='num of init channels')
  parser.add_argument('--mid_channels', type=int, default=32, help='C_mid channels in choke SharpSepConv')
  parser.add_argument('--layers', type=int, default=20, help='total number of layers')
  parser.add_argument('--model_path', type=str, default='saved_models', help='path to save the model')
  parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower')
  parser.add_argument('--mixed_auxiliary', action='store_true', default=False, help='Learn weights for auxiliary networks during training. Overrides auxiliary flag')
  parser.add_argument('--auxiliary_weight', type=float, default=0.4, help='weight for auxiliary loss')
  parser.add_argument('--cutout', action='store_true', default=False, help='use cutout')
  parser.add_argument('--cutout_length', type=int, default=16, help='cutout length')
  parser.add_argument('--autoaugment', action='store_true', default=False, help='use cifar10 autoaugment https://arxiv.org/abs/1805.09501')
  parser.add_argument('--random_eraser', action='store_true', default=False, help='use random eraser')
  parser.add_argument('--drop_path_prob', type=float, default=0.2, help='drop path probability')
  parser.add_argument('--save', type=str, default='EXP', help='experiment name')
  parser.add_argument('--seed', type=int, default=0, help='random seed')
  parser.add_argument('--arch', type=str, default='DARTS', help='which architecture to use')
  parser.add_argument('--ops', type=str, default='OPS', help='which operations to use, options are OPS and DARTS_OPS')
  parser.add_argument('--primitives', type=str, default='PRIMITIVES',
                      help='which primitive layers to use inside a cell search space,'
                           ' options are PRIMITIVES, SHARPER_PRIMITIVES, and DARTS_PRIMITIVES')
  parser.add_argument('--optimizer', type=str, default='sgd', help='which optimizer to use, options are padam and sgd')
  parser.add_argument('--load', type=str, default='',  metavar='PATH', help='load weights at specified location')
  parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping')
  parser.add_argument('--flops', action='store_true', default=False, help='count flops and exit, aka floating point operations.')
  parser.add_argument('-e', '--evaluate', dest='evaluate', type=str, metavar='PATH', default='',
                      help='evaluate model at specified path on training, test, and validation datasets')
  parser.add_argument('--multi_channel', action='store_true', default=False, help='perform multi channel search, a completely separate search space')
  parser.add_argument('--load_args', type=str, default='',  metavar='PATH',
                      help='load command line args from a json file, this will override '
                           'all currently set args except for --evaluate, and arguments '
                           'that did not exist when the json file was originally saved out.')
  parser.add_argument('--layers_of_cells', type=int, default=8, help='total number of cells in the whole network, default is 8 cells')
  parser.add_argument('--layers_in_cells', type=int, default=4,
                      help='Total number of nodes in each cell, aka number of steps,'
                           ' default is 4 nodes, which implies 8 ops')
  parser.add_argument('--weighting_algorithm', type=str, default='scalar',
                    help='which operations to use, options are '
                         '"max_w" (1. - max_w + w) * op, and scalar (w * op)')
  # TODO(ahundt) remove final path and switch back to genotype
  parser.add_argument('--load_genotype', type=str, default=None, help='Name of genotype to be used')
  parser.add_argument('--simple_path', default=True, action='store_false', help='Final model is a simple path (MultiChannelNetworkModel)')
  args = parser.parse_args()

  args = utils.initialize_files_and_args(args)

  logger = utils.logging_setup(args.log_file_path)

  if not torch.cuda.is_available():
    logger.info('no gpu device available')
    sys.exit(1)

  np.random.seed(args.seed)
  torch.cuda.set_device(args.gpu)
  cudnn.benchmark = True
  torch.manual_seed(args.seed)
  cudnn.enabled=True
  torch.cuda.manual_seed(args.seed)
  logger.info('gpu device = %d' % args.gpu)
  logger.info("args = %s", args)

  DATASET_CLASSES = dataset.class_dict[args.dataset]
  DATASET_CHANNELS = dataset.inp_channel_dict[args.dataset]
  DATASET_MEAN = dataset.mean_dict[args.dataset]
  DATASET_STD = dataset.std_dict[args.dataset]
  logger.info('output channels: ' + str(DATASET_CLASSES))

  # # load the correct ops dictionary
  op_dict_to_load = "operations.%s" % args.ops
  logger.info('loading op dict: ' + str(op_dict_to_load))
  op_dict = eval(op_dict_to_load)

  # load the correct primitives list
  primitives_to_load = "genotypes.%s" % args.primitives
  logger.info('loading primitives:' + primitives_to_load)
  primitives = eval(primitives_to_load)
  logger.info('primitives: ' + str(primitives))

  genotype = eval("genotypes.%s" % args.arch)
  # create the neural network

  criterion = nn.CrossEntropyLoss()
  criterion = criterion.cuda()
  if args.multi_channel:
    final_path = None
    if args.load_genotype is not None:
      genotype = getattr(genotypes, args.load_genotype)
      print(genotype)
      if type(genotype[0]) is str:
        logger.info('Path :%s', genotype)
    # TODO(ahundt) remove final path and switch back to genotype
    cnn_model = MultiChannelNetwork(
      args.init_channels, DATASET_CLASSES, layers=args.layers_of_cells, criterion=criterion, steps=args.layers_in_cells,
      weighting_algorithm=args.weighting_algorithm, genotype=genotype)
    flops_shape = [1, 3, 32, 32]
  elif args.dataset == 'imagenet':
      cnn_model = NetworkImageNet(args.init_channels, DATASET_CLASSES, args.layers, args.auxiliary, genotype, op_dict=op_dict, C_mid=args.mid_channels)
      flops_shape = [1, 3, 224, 224]
  else:
      cnn_model = NetworkCIFAR(args.init_channels, DATASET_CLASSES, args.layers, args.auxiliary, genotype, op_dict=op_dict, C_mid=args.mid_channels)
      flops_shape = [1, 3, 32, 32]
  cnn_model = cnn_model.cuda()

  logger.info("param size = %fMB", utils.count_parameters_in_MB(cnn_model))
  if args.flops:
    logger.info('flops_shape = ' + str(flops_shape))
    logger.info("flops = " + utils.count_model_flops(cnn_model, data_shape=flops_shape))
    return

  optimizer = torch.optim.SGD(
      cnn_model.parameters(),
      args.learning_rate,
      momentum=args.momentum,
      weight_decay=args.weight_decay
      )

  # Get preprocessing functions (i.e. transforms) to apply on data
  train_transform, valid_transform = utils.get_data_transforms(args)
  if args.evaluate:
    # evaluate the train dataset without augmentation
    train_transform = valid_transform

  # Get the training queue, use full training and test set
  train_queue, valid_queue = dataset.get_training_queues(
    args.dataset, train_transform, valid_transform, args.data, args.batch_size, train_proportion=1.0, search_architecture=False)

  test_queue = None
  if args.dataset == 'cifar10':
    # evaluate best model weights on cifar 10.1
    # https://github.com/modestyachts/CIFAR-10.1
    test_data = cifar10_1.CIFAR10_1(root=args.data, download=True, transform=valid_transform)
    test_queue = torch.utils.data.DataLoader(
      test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=8)

  if args.evaluate:
    # evaluate the loaded model, print the result, and return
    logger.info("Evaluating inference with weights file: " + args.load)
    eval_stats = evaluate(
      args, cnn_model, criterion, args.load,
      train_queue=train_queue, valid_queue=valid_queue, test_queue=test_queue)
    with open(args.stats_file, 'w') as f:
      arg_dict = vars(args)
      arg_dict.update(eval_stats)
      json.dump(arg_dict, f)
    logger.info("flops = " + utils.count_model_flops(cnn_model))
    logger.info(utils.dict_to_log_string(eval_stats))
    logger.info('\nEvaluation of Loaded Model Complete! Save dir: ' + str(args.save))
    return

  lr_schedule = cosine_power_annealing(
    epochs=args.epochs, max_lr=args.learning_rate, min_lr=args.learning_rate_min,
    warmup_epochs=args.warmup_epochs, exponent_order=args.lr_power_annealing_exponent_order)
  epochs = np.arange(args.epochs) + args.start_epoch
  # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs))
  epoch_stats = []

  stats_csv = args.epoch_stats_file
  stats_csv = stats_csv.replace('.json', '.csv')
  with tqdm(epochs, dynamic_ncols=True) as prog_epoch:
    best_valid_acc = 0.0
    best_epoch = 0
    best_stats = {}
    stats = {}
    epoch_stats = []
    weights_file = os.path.join(args.save, 'weights.pt')
    for epoch, learning_rate in zip(prog_epoch, lr_schedule):
      # update the drop_path_prob augmentation
      cnn_model.drop_path_prob = args.drop_path_prob * epoch / args.epochs
      # update the learning rate
      for param_group in optimizer.param_groups:
        param_group['lr'] = learning_rate
      # scheduler.get_lr()[0]

      train_acc, train_obj = train(args, train_queue, cnn_model, criterion, optimizer)

      val_stats = infer(args, valid_queue, cnn_model, criterion)
      stats.update(val_stats)
      stats['train_acc'] = train_acc
      stats['train_loss'] = train_obj
      stats['lr'] = learning_rate
      stats['epoch'] = epoch

      if stats['valid_acc'] > best_valid_acc:
        # new best epoch, save weights
        utils.save(cnn_model, weights_file)
        best_epoch = epoch
        best_stats.update(copy.deepcopy(stats))
        best_valid_acc = stats['valid_acc']
        best_train_loss = train_obj
        best_train_acc = train_acc
      # else:
      #   # not best epoch, load best weights
      #   utils.load(cnn_model, weights_file)
      logger.info('epoch, %d, train_acc, %f, valid_acc, %f, train_loss, %f, valid_loss, %f, lr, %e, best_epoch, %d, best_valid_acc, %f, ' + utils.dict_to_log_string(stats),
                  epoch, train_acc, stats['valid_acc'], train_obj, stats['valid_loss'], learning_rate, best_epoch, best_valid_acc)
      stats['train_acc'] = train_acc
      stats['train_loss'] = train_obj
      epoch_stats += [copy.deepcopy(stats)]
      with open(args.epoch_stats_file, 'w') as f:
        json.dump(epoch_stats, f, cls=utils.NumpyEncoder)
      utils.list_of_dicts_to_csv(stats_csv, epoch_stats)

    # get stats from best epoch including cifar10.1
    eval_stats = evaluate(args, cnn_model, criterion, weights_file, train_queue, valid_queue, test_queue)
    with open(args.stats_file, 'w') as f:
      arg_dict = vars(args)
      arg_dict.update(eval_stats)
      json.dump(arg_dict, f, cls=utils.NumpyEncoder)
    with open(args.epoch_stats_file, 'w') as f:
      json.dump(epoch_stats, f, cls=utils.NumpyEncoder)
    logger.info(utils.dict_to_log_string(eval_stats))
    logger.info('Training of Final Model Complete! Save dir: ' + str(args.save))
Пример #7
0
def main():
    global best_combined_error, args, logger

    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1

    # commented because it is now set as an argparse param.
    # args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank % torch.cuda.device_count()
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    # note the gpu is used for directory creation and log files
    # which is needed when run as multiple processes
    args = utils.initialize_files_and_args(args)
    logger = utils.logging_setup(args.log_file_path)

    # # load the correct ops dictionary
    op_dict_to_load = "operations.%s" % args.ops
    logger.info('loading op dict: ' + str(op_dict_to_load))
    op_dict = eval(op_dict_to_load)

    # load the correct primitives list
    primitives_to_load = "genotypes.%s" % args.primitives
    logger.info('loading primitives:' + primitives_to_load)
    primitives = eval(primitives_to_load)
    logger.info('primitives: ' + str(primitives))
    # get the number of output channels
    classes = dataset.costar_class_dict[args.feature_mode]

    if args.arch == 'NetworkResNetCOSTAR':
        # baseline model for comparison
        model = NetworkResNetCOSTAR(args.init_channels,
                                    classes,
                                    args.layers,
                                    args.auxiliary,
                                    None,
                                    vector_size=VECTOR_SIZE,
                                    op_dict=op_dict,
                                    C_mid=args.mid_channels)
    else:
        # create model
        genotype = eval("genotypes.%s" % args.arch)
        # create the neural network
        model = NetworkCOSTAR(args.init_channels,
                              classes,
                              args.layers,
                              args.auxiliary,
                              genotype,
                              vector_size=VECTOR_SIZE,
                              op_dict=op_dict,
                              C_mid=args.mid_channels)

    model.drop_path_prob = 0.0
    # if args.pretrained:
    #     logger.info("=> using pre-trained model '{}'".format(args.arch))
    #     model = models.__dict__[args.arch](pretrained=True)
    # else:
    #     logger.info("=> creating model '{}'".format(args.arch))
    #     model = models.__dict__[args.arch]()

    if args.sync_bn:
        import apex
        logger.info("using apex synced BN")
        model = apex.parallel.convert_syncbn_model(model)

    model = model.cuda()
    if args.distributed:
        # By default, apex.parallel.DistributedDataParallel overlaps communication with
        # computation in the backward pass.
        # model = DDP(model)
        # delay_allreduce delays all communication to the end of the backward pass.
        model = DDP(model, delay_allreduce=True)

    # define loss function (criterion) and optimizer
    criterion = nn.MSELoss().cuda()
    # NOTE(rexxarchl): MSLE loss, indicated as better for rotation in costar_hyper/costar_block_stacking_train_regression.py
    #                  is not available in PyTorch by default

    # Scale learning rate based on global batch size
    args.learning_rate = args.learning_rate * float(
        args.batch_size * args.world_size) / 256.
    init_lr = args.learning_rate / args.warmup_lr_divisor
    optimizer = torch.optim.SGD(model.parameters(),
                                init_lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # epoch_count = args.epochs - args.start_epoch
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(epoch_count))
    # scheduler = warmup_scheduler.GradualWarmupScheduler(
    #     optimizer, args.warmup_lr_divisor, args.warmup_epochs, scheduler)

    # Optionally resume from a checkpoint
    if args.resume or args.evaluate:
        if args.evaluate:
            args.resume = args.evaluate
        # Use a local scope to avoid dangling references
        def resume():
            if os.path.isfile(args.resume):
                logger.info("=> loading checkpoint '{}'".format(args.resume))
                checkpoint = torch.load(
                    args.resume,
                    map_location=lambda storage, loc: storage.cuda(args.gpu))
                args.start_epoch = checkpoint['epoch'] + 1
                if 'best_combined_error' in checkpoint:
                    best_combined_error = checkpoint['best_combined_error']
                model.load_state_dict(checkpoint['state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer'])
                # TODO(ahundt) make sure scheduler loading isn't broken
                if 'lr_scheduler' in checkpoint:
                    scheduler.load_state_dict(checkpoint['lr_scheduler'])
                elif 'lr_schedule' in checkpoint:
                    lr_schedule = checkpoint['lr_schedule']
                logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, checkpoint['epoch']))
            else:
                logger.info("=> no checkpoint found at '{}'".format(
                    args.resume))

        resume()

    # Get preprocessing functions (i.e. transforms) to apply on data
    # normalize_as_tensor = False because we normalize and convert to a
    # tensor in our custom prefetching function, rather than as part of
    # the transform preprocessing list.
    # train_transform, valid_transform = utils.get_data_transforms(args, normalize_as_tensor=False)
    train_transform = valid_transform = None  # NOTE(rexxarchl): data transforms are not applicable for CoSTAR BSD at the moment
    evaluate = True if args.evaluate else False
    # Get the training queue, select training and validation from training set
    train_loader, val_loader = dataset.get_training_queues(
        args.dataset,
        train_transform,
        valid_transform,
        args.data,
        args.batch_size,
        train_proportion=1.0,
        collate_fn=fast_collate,
        distributed=args.distributed,
        num_workers=args.workers,
        costar_set_name=args.set_name,
        costar_subset_name=args.subset_name,
        costar_feature_mode=args.feature_mode,
        costar_version=args.version,
        costar_num_images_per_example=args.num_images_per_example,
        costar_output_shape=(224, 224, 3),
        costar_random_augmentation=None,
        costar_one_hot_encoding=True,
        evaluate=evaluate)

    if args.evaluate:
        # Load the test set
        test_loader = dataset.get_costar_test_queue(
            args.data,
            costar_set_name=args.set_name,
            costar_subset_name=args.subset_name,
            collate_fn=fast_collate,
            costar_feature_mode=args.feature_mode,
            costar_version=args.version,
            costar_num_images_per_example=args.num_images_per_example,
            costar_output_shape=(224, 224, 3),
            costar_random_augmentation=None,
            costar_one_hot_encoding=True)

        # Evaluate on all splits, without any augmentation
        validate(train_loader,
                 model,
                 criterion,
                 args,
                 prefix='evaluate_train_')
        validate(val_loader, model, criterion, args, prefix='evaluate_val_')
        validate(test_loader, model, criterion, args, prefix='evaluate_test_')
        return

    lr_schedule = cosine_power_annealing(
        epochs=args.epochs,
        max_lr=args.learning_rate,
        min_lr=args.learning_rate_min,
        warmup_epochs=args.warmup_epochs,
        exponent_order=args.lr_power_annealing_exponent_order,
        restart_lr=args.restart_lr)
    epochs = np.arange(args.epochs) + args.start_epoch

    stats_csv = args.epoch_stats_file
    stats_csv = stats_csv.replace('.json', '.csv')
    with tqdm(epochs,
              dynamic_ncols=True,
              disable=args.local_rank != 0,
              leave=False,
              initial=args.start_epoch) as prog_epoch:
        best_stats = {}
        stats = {}
        epoch_stats = []
        best_epoch = 0
        logger.info('Initial Learning Rate: ' + str(lr_schedule[0]))
        for epoch, learning_rate in zip(prog_epoch, lr_schedule):
            if args.distributed and train_loader.sampler is not None:
                train_loader.sampler.set_epoch(int(epoch))
            # if args.distributed:
            # train_sampler.set_epoch(epoch)
            # update the learning rate
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate
            # scheduler.step()
            model.drop_path_prob = args.drop_path_prob * float(epoch) / float(
                args.epochs)
            # train for one epoch
            train_stats = train(train_loader, model, criterion, optimizer,
                                int(epoch), args)
            if args.prof:
                break
            # evaluate on validation set
            combined_error, val_stats = validate(val_loader, model, criterion,
                                                 args)
            stats.update(train_stats)
            stats.update(val_stats)
            # stats['lr'] = '{0:.5f}'.format(scheduler.get_lr()[0])
            stats['lr'] = '{0:.5f}'.format(learning_rate)
            stats['epoch'] = epoch

            # remember best combined_error and save checkpoint
            if args.local_rank == 0:
                is_best = combined_error < best_combined_error
                best_combined_error = min(combined_error, best_combined_error)
                stats['best_combined_error'] = '{0:.3f}'.format(
                    best_combined_error)
                if is_best:
                    best_epoch = epoch
                    best_stats = copy.deepcopy(stats)
                stats['best_epoch'] = best_epoch

                stats_str = utils.dict_to_log_string(stats)
                logger.info(stats_str)
                save_checkpoint(
                    {
                        'epoch': epoch,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_combined_error': best_combined_error,
                        'optimizer': optimizer.state_dict(),
                        # 'lr_scheduler': scheduler.state_dict()
                        'lr_schedule': lr_schedule,
                        'stats': best_stats
                    },
                    is_best,
                    path=args.save)
                prog_epoch.set_description(
                    'Overview ***** best_epoch: {0} best_valid_combined_error: {1:.2f} ***** Progress'
                    .format(best_epoch, best_combined_error))
            epoch_stats += [copy.deepcopy(stats)]
            with open(args.epoch_stats_file, 'w') as f:
                json.dump(epoch_stats, f, cls=utils.NumpyEncoder)
            utils.list_of_dicts_to_csv(stats_csv, epoch_stats)
        stats_str = utils.dict_to_log_string(best_stats, key_prepend='best_')
        logger.info(stats_str)
        with open(args.stats_file, 'w') as f:
            arg_dict = vars(args)
            arg_dict.update(best_stats)
            json.dump(arg_dict, f, cls=utils.NumpyEncoder)
        with open(args.epoch_stats_file, 'w') as f:
            json.dump(epoch_stats, f, cls=utils.NumpyEncoder)
        utils.list_of_dicts_to_csv(stats_csv, epoch_stats)
        logger.info('Training of Final Model Complete!')

        # Do a final evaluation
        logger.info('Final evaluation')
        # Load the best model
        best_model_path = os.path.join(args.save, 'model_best.pth.tar')
        best_model = torch.load(
            best_model_path,
            map_location=lambda storage, loc: storage.cuda(args.gpu))
        model.load_state_dict(best_model['state_dict'])
        # optimizer.load_state_dict(best_model['optimizer'])
        logger.info("=> loaded best_model '{}' from epoch {}".format(
            best_model_path, best_model['epoch']))

        # Get the train and validation set in evaluate mode
        train_loader, val_loader = dataset.get_training_queues(
            args.dataset,
            train_transform,
            valid_transform,
            args.data,
            args.batch_size,
            train_proportion=1.0,
            collate_fn=fast_collate,
            distributed=args.distributed,
            num_workers=args.workers,
            costar_set_name=args.set_name,
            costar_subset_name=args.subset_name,
            costar_feature_mode=args.feature_mode,
            costar_version=args.version,
            costar_num_images_per_example=args.num_images_per_example,
            costar_output_shape=(224, 224, 3),
            costar_random_augmentation=None,
            costar_one_hot_encoding=True,
            evaluate=evaluate)

        # Get the test set
        test_loader = dataset.get_costar_test_queue(
            args.data,
            costar_set_name=args.set_name,
            costar_subset_name=args.subset_name,
            collate_fn=fast_collate,
            costar_feature_mode=args.feature_mode,
            costar_version=args.version,
            costar_num_images_per_example=args.num_images_per_example,
            costar_output_shape=(224, 224, 3),
            costar_random_augmentation=None,
            costar_one_hot_encoding=True)

        # Evaluate on all splits, without any augmentation
        validate(train_loader,
                 model,
                 criterion,
                 args,
                 prefix='best_final_train_')
        validate(val_loader, model, criterion, args, prefix='best_final_val_')
        validate(test_loader,
                 model,
                 criterion,
                 args,
                 prefix='best_final_test_')
        logger.info("Final evaluation complete! Save dir: ' + str(args.save)")
Пример #8
0
NUM_CHANNELS = config.NUM_CHANNELS
MODEL_NAME = config.MODEL_NAME
tfrecord_filename = config.tfrecord_filename
TESTING_NUM = config.TESTING_NUM
Test_Batch_size = config.Test_Batch_size
### filename
tfrecord_filename = config.tfrecord_filename
save_model_filename = config.save_model_filename
save_model_filename_best = config.save_model_filename_best
restore_model_filename = config.restore_model_filename
tl.files.exists_or_mkdir(save_model_filename)
tl.files.exists_or_mkdir(save_model_filename_best)
# log
log_dir = "log_{}".format(MODEL_NAME)
tl.files.exists_or_mkdir(log_dir)
log_all, log_eval, log_all_filename, log_eval_filename = utils.logging_setup(
    log_dir)
log_config(log_all_filename, config)
log_config(log_eval_filename, config)

# Create Input and Output
with tf.name_scope('input'):
    low_res_holder = tf.placeholder(
        tf.float32,
        shape=[BATCH_SIZE, crop_size_FE, crop_size_PE, NUM_CHANNELS],
        name='low')
    high_res_holder = tf.placeholder(
        tf.float32,
        shape=[BATCH_SIZE, crop_size_FE, crop_size_PE, NUM_CHANNELS])
    low_res_holder_validation = tf.placeholder(
        tf.float32,
        shape=[Test_Batch_size, validation_FE, validation_PE, NUM_CHANNELS])
Пример #9
0
def main():
    global best_top1, args, logger

    args.distributed = False
    if 'WORLD_SIZE' in os.environ:
        args.distributed = int(os.environ['WORLD_SIZE']) > 1

    # commented because it is now set as an argparse param.
    # args.gpu = 0
    args.world_size = 1

    if args.distributed:
        args.gpu = args.local_rank % torch.cuda.device_count()
        torch.cuda.set_device(args.gpu)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        args.world_size = torch.distributed.get_world_size()

    # note the gpu is used for directory creation and log files
    # which is needed when run as multiple processes
    args = utils.initialize_files_and_args(args)
    logger = utils.logging_setup(args.log_file_path)

    if args.fp16:
        assert torch.backends.cudnn.enabled, "fp16 mode requires cudnn backend to be enabled."

    if args.static_loss_scale != 1.0:
        if not args.fp16:
            logger.info(
                "Warning:  if --fp16 is not used, static_loss_scale will be ignored."
            )

    # # load the correct ops dictionary
    op_dict_to_load = "operations.%s" % args.ops
    logger.info('loading op dict: ' + str(op_dict_to_load))
    op_dict = eval(op_dict_to_load)

    # load the correct primitives list
    primitives_to_load = "genotypes.%s" % args.primitives
    logger.info('loading primitives:' + primitives_to_load)
    primitives = eval(primitives_to_load)
    logger.info('primitives: ' + str(primitives))
    # create model
    genotype = eval("genotypes.%s" % args.arch)
    # get the number of output channels
    classes = dataset.class_dict[args.dataset]
    # create the neural network
    if args.dataset == 'imagenet':
        model = NetworkImageNet(args.init_channels,
                                classes,
                                args.layers,
                                args.auxiliary,
                                genotype,
                                op_dict=op_dict,
                                C_mid=args.mid_channels)
        flops_shape = [1, 3, 224, 224]
    else:
        model = NetworkCIFAR(args.init_channels,
                             classes,
                             args.layers,
                             args.auxiliary,
                             genotype,
                             op_dict=op_dict,
                             C_mid=args.mid_channels)
        flops_shape = [1, 3, 32, 32]
    model.drop_path_prob = 0.0
    # if args.pretrained:
    #     logger.info("=> using pre-trained model '{}'".format(args.arch))
    #     model = models.__dict__[args.arch](pretrained=True)
    # else:
    #     logger.info("=> creating model '{}'".format(args.arch))
    #     model = models.__dict__[args.arch]()

    if args.flops:
        model = model.cuda()
        logger.info("param size = %fMB", utils.count_parameters_in_MB(model))
        logger.info("flops_shape = " + str(flops_shape))
        logger.info("flops = " +
                    utils.count_model_flops(model, data_shape=flops_shape))
        return

    if args.sync_bn:
        import apex
        logger.info("using apex synced BN")
        model = apex.parallel.convert_syncbn_model(model)

    model = model.cuda()
    if args.fp16:
        model = network_to_half(model)
    if args.distributed:
        # By default, apex.parallel.DistributedDataParallel overlaps communication with
        # computation in the backward pass.
        # model = DDP(model)
        # delay_allreduce delays all communication to the end of the backward pass.
        model = DDP(model, delay_allreduce=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    # Scale learning rate based on global batch size
    args.learning_rate = args.learning_rate * float(
        args.batch_size * args.world_size) / 256.
    init_lr = args.learning_rate / args.warmup_lr_divisor
    optimizer = torch.optim.SGD(model.parameters(),
                                init_lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # epoch_count = args.epochs - args.start_epoch
    # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(epoch_count))
    # scheduler = warmup_scheduler.GradualWarmupScheduler(
    #     optimizer, args.warmup_lr_divisor, args.warmup_epochs, scheduler)

    if args.fp16:
        optimizer = FP16_Optimizer(optimizer,
                                   static_loss_scale=args.static_loss_scale,
                                   dynamic_loss_scale=args.dynamic_loss_scale)

    # Optionally resume from a checkpoint
    if args.resume or args.evaluate:
        if args.evaluate:
            args.resume = args.evaluate
        # Use a local scope to avoid dangling references
        def resume():
            if os.path.isfile(args.resume):
                logger.info("=> loading checkpoint '{}'".format(args.resume))
                checkpoint = torch.load(
                    args.resume,
                    map_location=lambda storage, loc: storage.cuda(args.gpu))
                args.start_epoch = checkpoint['epoch']
                if 'best_top1' in checkpoint:
                    best_top1 = checkpoint['best_top1']
                model.load_state_dict(checkpoint['state_dict'])
                # An FP16_Optimizer instance's state dict internally stashes the master params.
                optimizer.load_state_dict(checkpoint['optimizer'])
                # TODO(ahundt) make sure scheduler loading isn't broken
                if 'lr_scheduler' in checkpoint:
                    scheduler.load_state_dict(checkpoint['lr_scheduler'])
                elif 'lr_schedule' in checkpoint:
                    lr_schedule = checkpoint['lr_schedule']
                logger.info("=> loaded checkpoint '{}' (epoch {})".format(
                    args.resume, checkpoint['epoch']))
            else:
                logger.info("=> no checkpoint found at '{}'".format(
                    args.resume))

        resume()

    # # Data loading code
    # traindir = os.path.join(args.data, 'train')
    # valdir = os.path.join(args.data, 'val')

    # if(args.arch == "inception_v3"):
    #     crop_size = 299
    #     val_size = 320 # I chose this value arbitrarily, we can adjust.
    # else:
    #     crop_size = 224
    #     val_size = 256

    # train_dataset = datasets.ImageFolder(
    #     traindir,
    #     transforms.Compose([
    #         transforms.RandomResizedCrop(crop_size),
    #         transforms.RandomHorizontalFlip(),
    #         autoaugment.ImageNetPolicy(),
    #         # transforms.ToTensor(),  # Too slow, moved to data_prefetcher()
    #         # normalize,
    #     ]))
    # val_dataset = datasets.ImageFolder(valdir, transforms.Compose([
    #         transforms.Resize(val_size),
    #         transforms.CenterCrop(crop_size)
    #     ]))

    # train_sampler = None
    # val_sampler = None
    # if args.distributed:
    #     train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    #     val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset)

    # train_loader = torch.utils.data.DataLoader(
    #     train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
    #     num_workers=args.workers, pin_memory=True, sampler=train_sampler, collate_fn=fast_collate)

    # val_loader = torch.utils.data.DataLoader(
    #     val_dataset,
    #     batch_size=args.batch_size, shuffle=False,
    #     num_workers=args.workers, pin_memory=True,
    #     sampler=val_sampler,
    #     collate_fn=fast_collate)

    # Get preprocessing functions (i.e. transforms) to apply on data
    # normalize_as_tensor = False because we normalize and convert to a
    # tensor in our custom prefetching function, rather than as part of
    # the transform preprocessing list.
    train_transform, valid_transform = utils.get_data_transforms(
        args, normalize_as_tensor=False)
    # Get the training queue, select training and validation from training set
    train_loader, val_loader = dataset.get_training_queues(
        args.dataset,
        train_transform,
        valid_transform,
        args.data,
        args.batch_size,
        train_proportion=1.0,
        collate_fn=fast_collate,
        distributed=args.distributed,
        num_workers=args.workers)

    if args.evaluate:
        if args.dataset == 'cifar10':
            # evaluate best model weights on cifar 10.1
            # https://github.com/modestyachts/CIFAR-10.1
            train_transform, valid_transform = utils.get_data_transforms(args)
            # Get the training queue, select training and validation from training set
            # Get the training queue, use full training and test set
            train_queue, valid_queue = dataset.get_training_queues(
                args.dataset,
                train_transform,
                valid_transform,
                args.data,
                args.batch_size,
                train_proportion=1.0,
                search_architecture=False)
            test_data = cifar10_1.CIFAR10_1(root=args.data,
                                            download=True,
                                            transform=valid_transform)
            test_queue = torch.utils.data.DataLoader(
                test_data,
                batch_size=args.batch_size,
                shuffle=False,
                pin_memory=True,
                num_workers=args.workers)
            eval_stats = evaluate(args,
                                  model,
                                  criterion,
                                  train_queue=train_queue,
                                  valid_queue=valid_queue,
                                  test_queue=test_queue)
            with open(args.stats_file, 'w') as f:
                # TODO(ahundt) fix "TypeError: 1869 is not JSON serializable" to include arg info, see train.py
                # arg_dict = vars(args)
                # arg_dict.update(eval_stats)
                # json.dump(arg_dict, f)
                json.dump(eval_stats, f)
            logger.info("flops = " + utils.count_model_flops(model))
            logger.info(utils.dict_to_log_string(eval_stats))
            logger.info('\nEvaluation of Loaded Model Complete! Save dir: ' +
                        str(args.save))
        else:
            validate(val_loader, model, criterion, args)
        return

    lr_schedule = cosine_power_annealing(
        epochs=args.epochs,
        max_lr=args.learning_rate,
        min_lr=args.learning_rate_min,
        warmup_epochs=args.warmup_epochs,
        exponent_order=args.lr_power_annealing_exponent_order,
        restart_lr=args.restart_lr)
    epochs = np.arange(args.epochs) + args.start_epoch

    stats_csv = args.epoch_stats_file
    stats_csv = stats_csv.replace('.json', '.csv')
    with tqdm(epochs,
              dynamic_ncols=True,
              disable=args.local_rank != 0,
              leave=False) as prog_epoch:
        best_stats = {}
        stats = {}
        epoch_stats = []
        best_epoch = 0
        for epoch, learning_rate in zip(prog_epoch, lr_schedule):
            if args.distributed and train_loader.sampler is not None:
                train_loader.sampler.set_epoch(int(epoch))
            # if args.distributed:
            # train_sampler.set_epoch(epoch)
            # update the learning rate
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate
            # scheduler.step()
            model.drop_path_prob = args.drop_path_prob * float(epoch) / float(
                args.epochs)
            # train for one epoch
            train_stats = train(train_loader, model, criterion, optimizer,
                                int(epoch), args)
            if args.prof:
                break
            # evaluate on validation set
            top1, val_stats = validate(val_loader, model, criterion, args)
            stats.update(train_stats)
            stats.update(val_stats)
            # stats['lr'] = '{0:.5f}'.format(scheduler.get_lr()[0])
            stats['lr'] = '{0:.5f}'.format(learning_rate)
            stats['epoch'] = epoch

            # remember best top1 and save checkpoint
            if args.local_rank == 0:
                is_best = top1 > best_top1
                best_top1 = max(top1, best_top1)
                stats['best_top1'] = '{0:.3f}'.format(best_top1)
                if is_best:
                    best_epoch = epoch
                    best_stats = copy.deepcopy(stats)
                stats['best_epoch'] = best_epoch

                stats_str = utils.dict_to_log_string(stats)
                logger.info(stats_str)
                save_checkpoint(
                    {
                        'epoch': epoch,
                        'arch': args.arch,
                        'state_dict': model.state_dict(),
                        'best_top1': best_top1,
                        'optimizer': optimizer.state_dict(),
                        # 'lr_scheduler': scheduler.state_dict()
                        'lr_schedule': lr_schedule,
                        'stats': best_stats
                    },
                    is_best,
                    path=args.save)
                prog_epoch.set_description(
                    'Overview ***** best_epoch: {0} best_valid_top1: {1:.2f} ***** Progress'
                    .format(best_epoch, best_top1))
            epoch_stats += [copy.deepcopy(stats)]
            with open(args.epoch_stats_file, 'w') as f:
                json.dump(epoch_stats, f, cls=utils.NumpyEncoder)
            utils.list_of_dicts_to_csv(stats_csv, epoch_stats)
        stats_str = utils.dict_to_log_string(best_stats, key_prepend='best_')
        logger.info(stats_str)
        with open(args.stats_file, 'w') as f:
            arg_dict = vars(args)
            arg_dict.update(best_stats)
            json.dump(arg_dict, f, cls=utils.NumpyEncoder)
        with open(args.epoch_stats_file, 'w') as f:
            json.dump(epoch_stats, f, cls=utils.NumpyEncoder)
        utils.list_of_dicts_to_csv(stats_csv, epoch_stats)
        logger.info('Training of Final Model Complete! Save dir: ' +
                    str(args.save))
Пример #10
0
    data_json['annotations'] = annotations
    data_json['categories'] = categories
    return data_json


def save_json(json_data, output_path):
    dir_path = os.path.dirname(output_path)
    if dir_path and not os.path.exists(dir_path):
        os.makedirs(dir_path)

    with open(output_path, 'w') as fp:
        json.dump(json_data, fp, indent=4)
    logging.info(f'Json was saved. File path: {output_path}')


logger = utils.logging_setup(__name__)  # create logger


def main():
    args = parser.parse_args()
    categories = get_categories(args)
    maper_label_to_categoty_id = get_mapper_from_label_to_category_id(
        categories)
    if not args.split:
        images, annotations = process_data(args, maper_label_to_categoty_id)
        coco_json_data = create_coco_json(images, annotations, categories)
        save_json(coco_json_data, args.output_path)

    else:
        images_train, annotations_train, images_test, annotations_test = process_data_split_test_train(
            args, maper_label_to_categoty_id)
Пример #11
0
def main():
	args = docopt.docopt(__doc__, version=__version__)
	log_level = 'DEBUG' if args['--debug'] else 'INFO'
	if sys.platform == 'win32':
		log_fn = 'c:/atxpkg/atxpkg.log'
	else:
		log_fn = '/tmp/atxpkg/atxpkg.log'
	utils.logging_setup(log_level, log_fn, print_=True)
	logging.info('*' * 40)
	logging.info('starting atxpkg v%s' % __version__)
	logging.debug('args: %s' % dict(args))
	if sys.platform == 'win32':
		logging.debug('detected win32')
		db_fn = 'c:/atxpkg/installed.json'
		repos_fn = 'c:/atxpkg/repos.txt'
		prefix = 'c:'
		cache_dir = 'c:/atxpkg/cache'
	else:
		logging.debug('detected non-win32')
		db_fn = '/tmp/atxpkg/installed.json'
		repos_fn = '/tmp/atxpkg/repos.txt'
		prefix = ''
		cache_dir = '/tmp/atxpkg/cache'
	repos = utils.get_repos(repos_fn)
	repos.append(cache_dir)
	#logging.debug(str(args))
	prefix = args['--prefix'] if args['--prefix'] else ''
	if not os.path.isfile(db_fn):
		logging.info('%s not found, creating empty one' % db_fn)
		with open(db_fn, 'w') as f:
			f.write('{}')
	if not os.path.isdir(cache_dir):
		logging.info('%s not found, creating empty one' % cache_dir)
		os.makedirs(cache_dir)
	installed_packages = utils.get_installed_packages(db_fn)
	force = args['--force']
	yes, no = args['--yes'], args['--no']
	if args['install']:
		available_packages = utils.get_available_packages(repos)
		for package in args['<package>']:
			package_name = utils.get_package_name(package)
			if package_name not in available_packages:
				raise Exception('unable to find package %s' % package_name)
			if package_name in installed_packages and not force:
				raise Exception('package %s already installed' % package_name)
		for package in args['<package>']:
			package_name = utils.get_package_name(package)
			package_version = utils.get_package_version(package)
			if package_version:
				url = utils.get_specific_version_url(available_packages[package_name], package_version)
			else:
				url = utils.get_max_version_url(available_packages[package_name])
			ver = utils.get_package_version(utils.get_package_fn(url))
			print('install %s-%s' % (package_name, ver))
		if no or not (yes or utils.yes_no('continue?', default='y')):
			return
		for package in args['<package>']:
			package_name = utils.get_package_name(package)
			package_version = utils.get_package_version(package)
			if package_version:
				url = utils.get_specific_version_url(available_packages[package_name], package_version)
			else:
				url = utils.get_max_version_url(available_packages[package_name])
			local_fn = utils.download_package(url, cache_dir)
			if not args['--downloadonly']:
				package_info = utils.install_package(local_fn, prefix, force)
				installed_packages[package_name] = package_info
				utils.save_installed_packages(installed_packages, db_fn)
	elif args['update']:
		available_packages = utils.get_available_packages(repos)
		if args['<package>']:
			packages = args['<package>']
			for package in packages:
				if '..' in package:
					package_old, package_new = package.split('..')
					package_name_old = utils.get_package_name(package_old)
					package_name_new = utils.get_package_name(package_new)
				else:
					package_name_old = package_name_new = utils.get_package_name(package)

				if package_name_old not in installed_packages:
					raise Exception('package %s not installed' % package_name_old)
		else:
			packages = installed_packages.keys()
		packages_to_update = set()
		for package in packages:
			if '..' in package:
				package_old, package_new = package.split('..')
				package_name_old = utils.get_package_name(package_old)
				package_name_new = utils.get_package_name(package_new)
				package_version = utils.get_package_version(package_new)
			else:
				package_name_old = package_name_new = utils.get_package_name(package)
				package_version = utils.get_package_version(package)
			if package_name_new not in available_packages:
				logging.warning('%s not available in any repository' % package_name_new)
				continue
			if package_version:
				url = utils.get_specific_version_url(available_packages[package_name_new], package_version)
			else:
				url = utils.get_max_version_url(available_packages[package_name_new])
			ver_cur = installed_packages[package_name_old]['version']
			ver_avail = utils.get_package_version(utils.get_package_fn(url))
			if package_name_old != package_name_new or ver_avail != ver_cur or force:
				print('update %s-%s -> %s-%s' % (package_name_old, ver_cur, package_name_new, ver_avail))
				packages_to_update.add(package)
		if not packages_to_update:
			print('nothing to update')
			return
		if no or not (yes or utils.yes_no('continue?', default='y')):
			return
		for package in packages_to_update:
			if '..' in package:
				package_old, package_new = package.split('..')
				package_name_old = utils.get_package_name(package_old)
				package_name_new = utils.get_package_name(package_new)
				package_version = utils.get_package_version(package_new)
			else:
				package_name_old = package_name_new = utils.get_package_name(package)
				package_version = utils.get_package_version(package)
			if package_version:
				url = utils.get_specific_version_url(available_packages[package_name_new], package_version)
			else:
				url = utils.get_max_version_url(available_packages[package_name_new])
			ver_cur = installed_packages[package_name_old]['version']
			ver_avail = utils.get_package_version(utils.get_package_fn(url))
			if package_name_old != package_name_new or ver_avail != ver_cur or force:
				local_fn = utils.download_package(url, cache_dir)
				if not args['--downloadonly']:
					package_info = utils.update_package(local_fn, package_name_old, installed_packages[package_name_old], prefix, force)
					del installed_packages[package_name_old]
					installed_packages[package_name_new] = package_info
					utils.save_installed_packages(installed_packages, db_fn)
	elif args['merge_config']:
		if args['<package>']:
			packages = args['<package>']
			for package in packages:
				package_name = utils.get_package_name(package)
				if package_name not in installed_packages:
					raise Exception('package %s not installed' % package_name)
		else:
			packages = installed_packages.keys()
		for package in packages:
			package_name = utils.get_package_name(package)
			if package_name not in installed_packages:
				raise Exception('package %s not installed' % package_name)
		for package in packages:
			utils.mergeconfig_package(package, installed_packages, prefix)
	elif args['remove']:
		for package_name in args['<package>']:
			if package_name not in installed_packages:
				raise Exception('package %s not installed' % package_name)
		for package_name in args['<package>']:
			package_version = installed_packages[package_name]['version']
			print('remove %s-%s' % (package_name, package_version))
		if no or not (yes or utils.yes_no('continue?', default='n')):
			return
		for package_name in args['<package>']:
			utils.remove_package(package_name, installed_packages, prefix)
			del installed_packages[package_name]
			utils.save_installed_packages(installed_packages, db_fn)
	elif args['list_available']:
		available_packages = utils.get_available_packages(repos)
		for package_name in sorted(available_packages.keys()):
			print(package_name)
	elif args['list_installed']:
		for package_name, package_info in installed_packages.items():
			package_version = package_info['version']
			print('%s-%s' % (package_name, package_version))
	elif args['show_untracked']:
		recursive = args['--recursive']
		fn_to_package_name = utils.gen_fn_to_package_name_mapping(installed_packages, prefix)
		if args['<path>']:
			paths = set([args['<path>'], ])
		else:
			paths = set()
			for fn in fn_to_package_name.keys():
				paths.add(os.path.dirname(fn))
		while paths:
			for path in paths.copy():
				for fn in os.listdir(path):
					if os.path.isdir('%s/%s' % (path, fn)) and not os.path.islink('%s/%s' % (path, fn)):
						if recursive:
							paths.add('%s/%s' % (path, fn))
						else:
							continue
					if '%s/%s' % (path, fn) in fn_to_package_name:
						continue
					print('%s/%s' % (path, fn))
				paths.remove(path)
	elif args['clean_cache']:
		utils.clean_cache(cache_dir)
	logging.debug('exit')
	return 0
Пример #12
0
def main():
    args = docopt.docopt(__doc__, version=__version__)
    log_level = 'DEBUG' if args['--debug'] else 'INFO'
    if sys.platform == 'win32':
        log_fn = 'c:/atxpkg/atxpkg.log'
    else:
        log_fn = '/tmp/atxpkg/atxpkg.log'
    utils.logging_setup(log_level, log_fn, print_=True)
    logging.info('*' * 40)
    logging.info('starting atxpkg v%s' % __version__)
    logging.debug('args: %s' % dict(args))
    if sys.platform == 'win32':
        logging.debug('detected win32')
        db_fn = 'c:/atxpkg/installed.json'
        repos_fn = 'c:/atxpkg/repos.txt'
        prefix = 'c:'
        cache_dir = 'c:/atxpkg/cache'
    else:
        logging.debug('detected non-win32')
        db_fn = '/tmp/atxpkg/installed.json'
        repos_fn = '/tmp/atxpkg/repos.txt'
        prefix = ''
        cache_dir = '/tmp/atxpkg/cache'
    repos = utils.get_repos(repos_fn)
    repos.append(cache_dir)
    #logging.debug(str(args))
    prefix = args['--prefix'] if args['--prefix'] else ''
    if not os.path.isfile(db_fn):
        logging.info('%s not found, creating empty one' % db_fn)
        with open(db_fn, 'w') as f:
            f.write('{}')
    if not os.path.isdir(cache_dir):
        logging.info('%s not found, creating empty one' % cache_dir)
        os.makedirs(cache_dir)
    installed_packages = utils.get_installed_packages(db_fn)
    force = args['--force']
    yes, no = args['--yes'], args['--no']
    if args['install']:
        available_packages = utils.get_available_packages(repos)
        for package in args['<package>']:
            package_name = utils.get_package_name(package)
            if package_name not in available_packages:
                raise Exception('unable to find package %s' % package_name)
            if package_name in installed_packages and not force:
                raise Exception('package %s already installed' % package_name)
        for package in args['<package>']:
            package_name = utils.get_package_name(package)
            package_version = utils.get_package_version(package)
            if package_version:
                url = utils.get_specific_version_url(
                    available_packages[package_name], package_version)
            else:
                url = utils.get_max_version_url(
                    available_packages[package_name])
            ver = utils.get_package_version(utils.get_package_fn(url))
            print('install %s-%s' % (package_name, ver))
        if no or not (yes or utils.yes_no('continue?', default='y')):
            return
        for package in args['<package>']:
            package_name = utils.get_package_name(package)
            package_version = utils.get_package_version(package)
            if package_version:
                url = utils.get_specific_version_url(
                    available_packages[package_name], package_version)
            else:
                url = utils.get_max_version_url(
                    available_packages[package_name])
            local_fn = utils.download_package(url, cache_dir)
            if not args['--downloadonly']:
                package_info = utils.install_package(local_fn, prefix, force)
                installed_packages[package_name] = package_info
                utils.save_installed_packages(installed_packages, db_fn)
                logging.info('%s-%s is now installed' %
                             (package_name, package_version))
    elif args['update']:
        available_packages = utils.get_available_packages(repos)
        if args['<package>']:
            packages = args['<package>']
            for package in packages:
                if '..' in package:
                    package_old, package_new = package.split('..')
                    package_name_old = utils.get_package_name(package_old)
                    package_name_new = utils.get_package_name(package_new)
                else:
                    package_name_old = package_name_new = utils.get_package_name(
                        package)

                if package_name_old not in installed_packages:
                    raise Exception('package %s not installed' %
                                    package_name_old)
        else:
            packages = installed_packages.keys()
        packages_to_update = set()
        for package in packages:
            if '..' in package:
                package_old, package_new = package.split('..')
                package_name_old = utils.get_package_name(package_old)
                package_name_new = utils.get_package_name(package_new)
                package_version = utils.get_package_version(package_new)
            else:
                package_name_old = package_name_new = utils.get_package_name(
                    package)
                package_version = utils.get_package_version(package)
            if package_name_new not in available_packages:
                logging.warning('%s not available in any repository' %
                                package_name_new)
                continue
            if package_version:
                url = utils.get_specific_version_url(
                    available_packages[package_name_new], package_version)
            else:
                url = utils.get_max_version_url(
                    available_packages[package_name_new])
            ver_cur = installed_packages[package_name_old]['version']
            ver_avail = utils.get_package_version(utils.get_package_fn(url))
            if package_name_old != package_name_new or ver_avail != ver_cur or force:
                print('update %s-%s -> %s-%s' %
                      (package_name_old, ver_cur, package_name_new, ver_avail))
                packages_to_update.add(package)
        if not packages_to_update:
            print('nothing to update')
            return
        if no or not (yes or utils.yes_no('continue?', default='y')):
            return
        for package in packages_to_update:
            if '..' in package:
                package_old, package_new = package.split('..')
                package_name_old = utils.get_package_name(package_old)
                package_name_new = utils.get_package_name(package_new)
                package_version = utils.get_package_version(package_new)
            else:
                package_name_old = package_name_new = utils.get_package_name(
                    package)
                package_version = utils.get_package_version(package)
            if package_version:
                url = utils.get_specific_version_url(
                    available_packages[package_name_new], package_version)
            else:
                url = utils.get_max_version_url(
                    available_packages[package_name_new])
            ver_cur = installed_packages[package_name_old]['version']
            ver_avail = utils.get_package_version(utils.get_package_fn(url))
            if package_name_old != package_name_new or ver_avail != ver_cur or force:
                local_fn = utils.download_package(url, cache_dir)
                if not args['--downloadonly']:
                    package_info = utils.update_package(
                        local_fn, package_name_old,
                        installed_packages[package_name_old], prefix, force)
                    del installed_packages[package_name_old]
                    installed_packages[package_name_new] = package_info
                    utils.save_installed_packages(installed_packages, db_fn)
                    logging.info('%s-%s updated to %s-%s' %
                                 (package_name_old, ver_cur, package_name_new,
                                  ver_avail))
    elif args['merge_config']:
        if args['<package>']:
            packages = args['<package>']
            for package in packages:
                package_name = utils.get_package_name(package)
                if package_name not in installed_packages:
                    raise Exception('package %s not installed' % package_name)
        else:
            packages = installed_packages.keys()
        for package in packages:
            package_name = utils.get_package_name(package)
            if package_name not in installed_packages:
                raise Exception('package %s not installed' % package_name)
        for package in packages:
            utils.mergeconfig_package(package, installed_packages, prefix)
    elif args['remove']:
        for package_name in args['<package>']:
            if package_name not in installed_packages:
                raise Exception('package %s not installed' % package_name)
        for package_name in args['<package>']:
            package_version = installed_packages[package_name]['version']
            print('remove %s-%s' % (package_name, package_version))
        if no or not (yes or utils.yes_no('continue?', default='n')):
            return
        for package_name in args['<package>']:
            utils.remove_package(package_name, installed_packages, prefix)
            del installed_packages[package_name]
            utils.save_installed_packages(installed_packages, db_fn)
    elif args['list_available']:
        available_packages = utils.get_available_packages(repos)
        for package_name in sorted(available_packages.keys()):
            print(package_name)
    elif args['list_installed']:
        for package_name, package_info in installed_packages.items():
            package_version = package_info['version']
            print('%s-%s' % (package_name, package_version))
    elif args['show_untracked']:
        recursive = args['--recursive']
        fn_to_package_name = utils.gen_fn_to_package_name_mapping(
            installed_packages, prefix)
        if args['<path>']:
            paths = set([
                args['<path>'],
            ])
        else:
            paths = set()
            for fn in fn_to_package_name.keys():
                paths.add(os.path.dirname(fn))
        while paths:
            for path in paths.copy():
                for fn in os.listdir(path):
                    if os.path.isdir(
                            '%s/%s' %
                        (path, fn)) and not os.path.islink('%s/%s' %
                                                           (path, fn)):
                        if recursive:
                            paths.add('%s/%s' % (path, fn))
                        else:
                            continue
                    if '%s/%s' % (path, fn) in fn_to_package_name:
                        continue
                    print('%s/%s' % (path, fn))
                paths.remove(path)
    elif args['clean_cache']:
        utils.clean_cache(cache_dir)
    elif args['check']:
        if args['<package>']:
            packages = args['<package>']
            for package in packages:
                if not package in installed_packages.keys():
                    packages = []
                    print('%s not installed' % package)
        else:
            packages = installed_packages.keys()
        if packages:
            for package in packages:
                for fn in installed_packages[package]['md5sums']:
                    if not os.path.isfile('%s/%s' % (prefix, fn)):
                        logging.info('%s/%s does not exist' % (prefix, fn))
                    else:
                        if utils.get_md5sum(
                                '%s/%s' % (prefix, fn)
                        ) != installed_packages[package]['md5sums'][fn]:
                            logging.info('sum of %s/%s differs' % (prefix, fn))
                print('check of %s complete' % package)
                logging.info('check of %s complete' % package)
    logging.debug('exit')
    return 0
Пример #13
0

def main(args, parallel_func):
    if not os.path.exists(args.output_path):
        logger.info(f'Create folder to save images: {args.output_path}')
        os.makedirs(args.output_path)

    tfrecord_paths = get_all_tfrecords_path_files(args)
    parallel_func = partial(parallel_func, args)
    outputs_codes = []
    tasks = []
    with Pool(processes=args.num_workers) as pool:
        for path_to_tfrecord in tqdm(tfrecord_paths):
            tasks.append(
                pool.apply_async(parallel_func,
                                 args=(path_to_tfrecord, ),
                                 error_callback=lambda e: logger.info(e)))
        for task in tasks:
            task.wait()
            outputs_codes.append(task.get())
        pool.close()
        pool.join()
    logger.info('FINISH')


logger = logging_setup(__name__)

if __name__ == '__main__':
    args = build_argparser().parse_args()
    main(args, process_data)