Пример #1
0
def get_env_info():
    import sys
    print('Python version={}'.format(sys.version))
    print('PyTorch version={}'.format(torch.__version__))

    flag = torch.cuda.is_available()
    print('torch.cuda.is_available()={}'.format(flag))
    if flag:
        from torch.backends import cudnn
        cudnn.enabled = True
        cudnn.benchmark = False  # False efficiency decrease; but fix random;
        cudnn.deterministic = True  # if True, the result would keep same; if False, efficiency would be high but results would change slightly
        # os.environ["CUDA_VISIBLE_DEVICES"] = '1' # choose which device to use
        # torch.set_default_tensor_type(torch.cuda.FloatTensor if torch.cuda.is_available() else torch.FloatTensor) # be careful if use
        print('torch.cuda.current_device()={}'.format(
            torch.cuda.current_device()))
        print('torch.cuda.device_count()={}'.format(torch.cuda.device_count()))
        print('torch.cuda.get_device_name(0)={}'.format(
            torch.cuda.get_device_name(0)))
        print('torch.backends.cudnn.version()={}'.format(cudnn.version()))
        print('torch.version.cuda={}'.format(torch.version.cuda))
        print('Memory Usage:')
        print('Allocated:', round(torch.cuda.memory_allocated(0) / 1024**3, 1),
              'GB')
        print('Cached:   ', round(torch.cuda.memory_cached(0) / 1024**3, 1),
              'GB')
Пример #2
0
def dump_system_info(file_path: str):
    if os.path.isfile(file_path):
        os.remove(file_path)
    with open(file_path, 'w+') as o:
        o.write(headline('torch collect_env'))
        o.write(collect_env.get_pretty_env_info())

        o.write(headline('system info'))
        o.write('platform: %s\n' % platform.platform())
        o.write('python: %s\n' % platform.python_version())

        o.write(headline('gpus'))
        try:
            for i, gpu in enumerate(GPUtil.getGPUs()):
                o.write('gpu %d\n' % i)
                for k in ['id', 'driver', 'name', 'memoryTotal']:
                    o.write('\t%s=%s\n' % (k, gpu.__dict__[k]))
        except ValueError as e:
            o.write("%s" % repr(e))

        o.write(headline('cuda / cudnn'))
        o.write('cuda via cat: %s\n' %
                get_command_result('cat /usr/local/cuda/version.txt'))
        o.write('cuda via dpkg: %s\n' %
                get_command_result('dpkg -l | grep cuda-toolkit'))
        o.write('cuda via nvcc: %s\n' % get_command_result('nvcc --version'))
        o.write('cudnn version: %s\n' % cudnn.version())
        # o.write('\nnvidia-smi:\n%s\n' % get_command_result('nvidia-smi'))

        o.write(headline('pip freeze'))
        for r in freeze(local_only=True):
            o.write('%s\n' % r)
Пример #3
0
    def _setup_gpus(self, seed: float, detect_anomaly: bool):
        utils.setup_cuda(seed, self.local_rank)

        torch.autograd.set_detect_anomaly(detect_anomaly)
        self._log_info({
            'set_detect_anomaly': detect_anomaly,
            'is_anomaly_enabled': torch.is_anomaly_enabled()
        })

        self._log_info({
            'gpu_names':
            utils.cuda_device_names(),
            'gpu_count':
            torch.cuda.device_count(),
            'CUDA_VISIBLE_DEVICES':
            os.environ['CUDA_VISIBLE_DEVICES']
            if 'CUDA_VISIBLE_DEVICES' in os.environ else 'NotSet',
            'cudnn.enabled':
            cudnn.enabled,
            'cudnn.benchmark':
            cudnn.benchmark,
            'cudnn.deterministic':
            cudnn.deterministic,
            'cudnn.version':
            cudnn.version()
        })
        self._log_info({'memory': str(psutil.virtual_memory())})
        self._log_info({'CPUs': str(psutil.cpu_count())})
Пример #4
0
def run(args):
    print("OS: {}, pytorch version: {}".format(os.name, torch.__version__))
    if torch.cuda.is_available():
        from torch.backends import cudnn
        name = torch.cuda.get_device_name(torch.cuda.current_device())
        print("Device: {}, CUDA: {}, CuDNN: {}".format(name, cudnn.cuda,
                                                       cudnn.version()))
    print("Test setup: ({},{},{})->({},{},{})".format(
        args.batch_len, args.batch_size, args.dim_in, args.batch_len,
        args.batch_size, args.dim_out))
    starttime = time.time()
    if (not args.no_gpu) and torch.cuda.is_available():
        print("GPU Results")
        time_speeds(args, cuda=True, number=args.gpu_number)

    if not args.no_cpu:
        print("CPU Results")
        time_speeds(args, cuda=False, number=args.cpu_number)
    endtime = time.time()
    elapsed = endtime - starttime
    res = "Testing took {} sec".format(elapsed)
    print(res)
    with open(args.logfile, 'a') as f:
        f.write(res)
        f.write('\n')
Пример #5
0
def main():

    # Init logger
    #args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
    if not os.path.isdir(args.save_path):
        os.makedirs(args.save_path)
    log = open(
        os.path.join(args.save_path,
                     'seed-{:}-log.txt'.format(args.manualSeed)), 'w')
    print_log('Save Path      : {:}'.format(args.save_path), log)
    state = {k: v for k, v in args._get_kwargs()}
    print_log(state, log)
    print_log("Random Seed    : {:}".format(args.manualSeed), log)
    print_log("Python version : {:}".format(sys.version.replace('\n', ' ')),
              log)
    print_log("Torch  version : {:}".format(torch.__version__), log)
    print_log("CUDA   version : {:}".format(torch.version.cuda), log)
    print_log("cuDNN  version : {:}".format(cudnn.version()), log)
    print_log("Num of GPUs    : {:}".format(torch.cuda.device_count()), log)
    args.dataset = args.dataset.lower()

    config = load_config(args.model_config)
    genotype = models[args.arch]
    print_log('configuration : {:}'.format(config), log)
    print_log('genotype      : {:}'.format(genotype), log)
    # clear GPU cache
    torch.cuda.empty_cache()
    if args.dataset == 'imagenet':
        main_procedure_imagenet(config, args.data_path, args, genotype,
                                args.init_channels, args.layers, None, log)
    else:
        main_procedure(config, args.dataset, args.data_path, args, genotype,
                       args.init_channels, args.layers, None, log)
    log.close()
Пример #6
0
def train(args):
    if torch.cuda.is_available():
        torch.distributed.init_process_group(backend='NCCL',
                                             init_method='env://')

    cudnn.enabled = True
    cudnn.benchmark = True
    cudnn.deterministic = True
    print(torch.__version__)
    print(torch.version.cuda)
    print(cudnn.version())

    init_seed(123456)

    batch_size = 32

    output_path = data_path + base_output_path

    vocab2id, id2vocab, id2freq = load_vocab(data_path + 'holl_input_output.' +
                                             version + '.vocab',
                                             t=min_vocab_freq)

    if not os.path.exists(data_path + 'glove.6B.300d.txt' + '.dat'):
        prepare_embeddings(data_path + 'glove.6B.300d.txt')
    emb_matrix = load_embeddings(data_path + 'glove.6B.300d.txt', id2vocab,
                                 embedding_size)

    if os.path.exists(data_path + 'holl-train.' + version + '.pkl'):
        train_dataset = torch.load(data_path + 'holl-train.' + version +
                                   '.pkl')
    else:
        train_dataset = GLKSDataset(
            [data_path + 'holl-train.' + version + '.json'], vocab2id,
            min_window_size, num_windows, knowledge_len)

    model = GLKS(min_window_size,
                 num_windows,
                 embedding_size,
                 hidden_size,
                 vocab2id,
                 id2vocab,
                 max_dec_len=70,
                 beam_width=1,
                 emb_matrix=emb_matrix)
    init_params(model, escape='embedding')

    model_optimizer = optim.Adam(model.parameters())

    trainer = DefaultTrainer(model, args.local_rank)

    # for i in range(10):
    #     trainer.train_epoch('ds_train', train_dataset, collate_fn, batch_size, i, model_optimizer)

    for i in range(20):
        if i == 5:
            train_embedding(model)
        trainer.train_epoch('ds_mle_mcc_train', train_dataset, collate_fn,
                            batch_size, i, model_optimizer)
        trainer.serialize(i, output_path=output_path)
Пример #7
0
def test(args):
    cudnn.enabled = True
    cudnn.benchmark = True
    cudnn.deterministic = True
    print(torch.__version__)
    print(torch.version.cuda)
    print(cudnn.version())

    init_seed(123456)

    batch_size = 64

    output_path = data_path + base_output_path

    vocab2id, id2vocab, id2freq = load_vocab(data_path + 'holl_input_output.' +
                                             version + '.vocab',
                                             t=min_vocab_freq)

    if os.path.exists(data_path + 'holl-dev.' + version + '.pkl'):
        dev_dataset = torch.load(data_path + 'holl-dev.' + version + '.pkl')
    else:
        dev_dataset = GLKSDataset(
            [data_path + 'holl-dev.' + version + '.json'], vocab2id,
            min_window_size, num_windows, knowledge_len)
    if os.path.exists(data_path + 'holl-test.' + version + '.pkl'):
        test_dataset = torch.load(data_path + 'holl-test.' + version + '.pkl')
    else:
        test_dataset = GLKSDataset(
            [data_path + 'holl-test.' + version + '.json'], vocab2id,
            min_window_size, num_windows, knowledge_len)

    for i in range(20):
        print('epoch', i)
        file = output_path + 'model/' + str(i) + '.pkl'

        if os.path.exists(file):
            model = GLKS(min_window_size,
                         num_windows,
                         embedding_size,
                         hidden_size,
                         vocab2id,
                         id2vocab,
                         max_dec_len=70,
                         beam_width=1)
            model.load_state_dict(torch.load(file))
            trainer = DefaultTrainer(model, None)
            trainer.test('test',
                         dev_dataset,
                         collate_fn,
                         batch_size,
                         i,
                         output_path=output_path)
            trainer.test('test',
                         test_dataset,
                         collate_fn,
                         batch_size,
                         100 + i,
                         output_path=output_path)
Пример #8
0
def init_torch_seeds(seed=0):
    import torch.backends.cudnn as cudnn
    torch.manual_seed(seed)
    if seed == 0:  # slower, more reproducible
        cudnn.benchmark, cudnn.deterministic = False, True
    else:  # faster, less reproducible
        cudnn.benchmark, cudnn.deterministic = True, False

    print('PyTorch version {}'.format(torch.__version__))
    print('CUDA version {}'.format(torch.version.cuda))
    print('cuDNN version {}'.format(cudnn.version()))
    print('cuDNN deterministic {}'.format(cudnn.deterministic))
    print('cuDNN benchmark {}'.format(cudnn.benchmark))
Пример #9
0
def backward_weight(fn, input, hx, output, weight, grad_weight):
    with torch.cuda.device_of(input):
        is_input_packed = fn.batch_sizes is not None
        handle = cudnn.get_handle()

        if fn.mode == cudnn.CUDNN_LSTM:
            hx, cx = hx
        else:
            cx = None

        if fn.batch_first and not is_input_packed:
            input = input.transpose(0, 1)
            output = output.transpose(0, 1)
        input_size = _input_size(fn, input)
        hidden_size = _hidden_size(fn)
        if not fn.requires_grad:
            raise RuntimeError(
                'backward_weight can only be called when the function requires grad!'
            )
        if fn.dropout != 0 and cudnn.version() < 5103:
            raise RuntimeError(
                'dropout supported only in cudnn v 5.1 and above')
        if tuple(input.size()) != input_size:
            raise RuntimeError('Expected input size {}, got {}'.format(
                input_size, tuple(input.size())))
        if tuple(hx.size()) != hidden_size:
            raise RuntimeError('Expected input size {}, got {}'.format(
                hidden_size, hx.size()))

        assert hx.is_contiguous()
        assert cx is None or cx.is_contiguous()
        x = input.contiguous()
        y = output
        dw = fn.weight_buf.new().resize_as_(fn.weight_buf).zero_()

        with torch.cuda.device_of(input):
            workspace = torch.cuda.ByteTensor(fn.workspace_size)
        check_error(
            cudnn.lib.cudnnRNNBackwardWeights(
                handle, fn.rnn_desc, fn.seq_length, fn.x_descs,
                ctypes.c_void_p(x.data_ptr()), fn.hx_desc,
                ctypes.c_void_p(hx.data_ptr()), fn.y_descs,
                ctypes.c_void_p(y.data_ptr()),
                ctypes.c_void_p(workspace.data_ptr()), workspace.size(0),
                fn.w_desc, ctypes.c_void_p(dw.data_ptr()),
                ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)))

        # copy the weights from the weight_buf into grad_weight
        grad_params = get_parameters(fn, handle, dw)
        _copyParams(grad_params, grad_weight)
        return grad_weight
Пример #10
0
def setting(cfg: argparse.Namespace):
    cudnn.benchmark = True
    logger = get_logger()
    logger('==> args: {}'.format(cfg))
    logger('==> the results path: {}'.format(cfg.output))
    if not hasattr(cfg, 'seed') or cfg.seed < 0:
        cfg.seed = int(time.time())
    random.seed(cfg.seed)
    torch.manual_seed(cfg.seed)
    logger('==> seed: {}'.format(cfg.seed))
    logger('==> PyTorch version: {}, cudnn version: {}'.format(torch.__version__, cudnn.version()))
    git_version = os.popen('git log --pretty=oneline | head -n 1').readline()[:-1]
    logger('==> git version: {}'.format(git_version))
    return
Пример #11
0
def backward_weight(fn, input, hx, output, weight, grad_weight):
    with torch.cuda.device_of(input):
        is_input_packed = fn.batch_sizes is not None
        handle = cudnn.get_handle()

        if fn.mode == cudnn.CUDNN_LSTM:
            hx, cx = hx
        else:
            cx = None

        if fn.batch_first and not is_input_packed:
            input = input.transpose(0, 1)
            output = output.transpose(0, 1)
        input_size = _input_size(fn, input)
        hidden_size = _hidden_size(fn)
        if not fn.requires_grad:
            raise RuntimeError('backward_weight can only be called when the function requires grad!')
        if fn.dropout != 0 and cudnn.version() < 5103:
            raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
        if tuple(input.size()) != input_size:
            raise RuntimeError('Expected input size {}, got {}'.format(
                input_size, tuple(input.size())))
        if tuple(hx.size()) != hidden_size:
            raise RuntimeError('Expected input size {}, got {}'.format(
                hidden_size, hx.size()))

        assert hx.is_contiguous()
        assert cx is None or cx.is_contiguous()
        x = input.contiguous()
        y = output
        dw = fn.weight_buf.new().resize_as_(fn.weight_buf).zero_()

        with torch.cuda.device_of(input):
            workspace = torch.cuda.ByteTensor(fn.workspace_size)
        check_error(cudnn.lib.cudnnRNNBackwardWeights(
            handle,
            fn.rnn_desc,
            fn.seq_length,
            fn.x_descs, ctypes.c_void_p(x.data_ptr()),
            fn.hx_desc, ctypes.c_void_p(hx.data_ptr()),
            fn.y_descs, ctypes.c_void_p(y.data_ptr()),
            ctypes.c_void_p(workspace.data_ptr()), workspace.size(0),
            fn.w_desc, ctypes.c_void_p(dw.data_ptr()),
            ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
        ))

        # copy the weights from the weight_buf into grad_weight
        grad_params = get_parameters(fn, handle, dw)
        _copyParams(grad_params, grad_weight)
        return grad_weight
Пример #12
0
    def _update_output(self, input, weight, bias):
        self.use_cudnn = cudnn.is_acceptable(input)
        if self.use_cudnn and cudnn.version() < 6000:
            self.use_cudnn = not self.is_dilated()
        if self.use_cudnn:
            output = input.new(*self._output_size(input, weight))
            if self.transposed:
                self._cudnn_info = (
                    torch._C._cudnn_convolution_transpose_full_forward(
                        input, weight, bias, output, self.padding, self.stride, self.dilation,
                        self.groups, cudnn.benchmark))
            else:
                self._cudnn_info = torch._C._cudnn_convolution_full_forward(
                    input, weight, bias, output, self.padding, self.stride, self.dilation,
                    self.groups, cudnn.benchmark)
            return output

        self._bufs = [[] for g in range(self.groups)]
        return self._thnn('update_output', input, weight, bias)
Пример #13
0
def main():

  # Init logger
  args.save_path = os.path.join(args.save_path, 'seed-{:}'.format(args.manualSeed))
  if not os.path.isdir(args.save_path):
    os.makedirs(args.save_path)
  log = open(os.path.join(args.save_path, 'log-seed-{:}-{:}.txt'.format(args.manualSeed, time_file_str())), 'w')
  print_log('save path : {:}'.format(args.save_path), log)
  state = {k: v for k, v in args._get_kwargs()}
  print_log(state, log)
  print_log("Random Seed: {}".format(args.manualSeed), log)
  print_log("Python version : {}".format(sys.version.replace('\n', ' ')), log)
  print_log("Torch  version : {}".format(torch.__version__), log)
  print_log("CUDA   version : {}".format(torch.version.cuda), log)
  print_log("cuDNN  version : {}".format(cudnn.version()), log)
  print_log("Num of GPUs    : {}".format(torch.cuda.device_count()), log)
  print_log("Num of CPUs    : {}".format(multiprocessing.cpu_count()), log)

  config = load_config( args.config_path )
  genotype = Networks[ args.arch ]

  main_procedure(config, genotype, args.save_path, args.print_freq, log)
  log.close()
Пример #14
0
def log_basic_info(logger: Logger, config: Any) -> None:
    """Logging about pytorch, ignite, configurations, gpu system
    distributed settings.

    Parameters
    ----------
    logger
        Logger instance for logging
    config
        config object to log
    """
    import ignite

    logger.info("PyTorch version: %s", torch.__version__)
    logger.info("Ignite version: %s", ignite.__version__)
    if torch.cuda.is_available():
        # explicitly import cudnn as
        # torch.backends.cudnn can not be pickled with hvd spawning procs
        from torch.backends import cudnn

        logger.info("GPU device: %s", torch.cuda.get_device_name(idist.get_local_rank()))
        logger.info("CUDA version: %s", torch.version.cuda)
        logger.info("CUDNN version: %s", cudnn.version())

    logger.info("Configuration: %s", pformat(vars(config)))

    if idist.get_world_size() > 1:
        logger.info("distributed configuration: %s", idist.model_name())
        logger.info("backend: %s", idist.backend())
        logger.info("device: %s", idist.device().type)
        logger.info("hostname: %s", idist.hostname())
        logger.info("world size: %s", idist.get_world_size())
        logger.info("rank: %s", idist.get_rank())
        logger.info("local rank: %s", idist.get_local_rank())
        logger.info("num processes per node: %s", idist.get_nproc_per_node())
        logger.info("num nodes: %s", idist.get_nnodes())
        logger.info("node rank: %s", idist.get_node_rank())
Пример #15
0
def forward(fn, input, hx, weight, output, hy):
    with torch.cuda.device_of(input):
        lib = cudnn.lib
        handle = cudnn.get_handle()
        fn.datatype = cudnn._typemap[input.type()]
        is_input_packed = fn.batch_sizes is not None

        if fn.mode == cudnn.CUDNN_LSTM:
            hx, cx = hx
            hy, cy = hy
        else:
            cx, cy = None, None

        if fn.batch_first and not is_input_packed:
            input = input.transpose(0, 1)

        if fn.dropout != 0 and cudnn.version() < 5103:
            raise RuntimeError('dropout supported only in cudnn v5.1 and above')

        if is_input_packed:
            fn.seq_length = len(fn.batch_sizes)
            fn.mini_batch = fn.batch_sizes[0]
            fn.input_size = input.size(-1)
        else:
            fn.seq_length, fn.mini_batch, fn.input_size = input.size()
        hidden_size = _hidden_size(fn)
        output_size = _output_size(fn, input)

        assert hx.is_contiguous()
        assert cx is None or cx.is_contiguous()
        x = input.contiguous()
        output.resize_(*output_size)
        hy.resize_(*hidden_size)
        if cy is not None:
            cy.resize_(*hidden_size)
        y = output

        # init descriptors
        fn.rnn_desc = init_rnn_descriptor(fn, handle)
        if is_input_packed:
            fn.x_descs = cudnn.descriptor_sequence(x, fn.batch_sizes)
            fn.y_descs = cudnn.descriptor_sequence(y, fn.batch_sizes)
        else:
            fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
            fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
        fn.hx_desc = cudnn.descriptor(hx)
        fn.hy_desc = cudnn.descriptor(hx)
        fn.cx_desc = cudnn.descriptor(cx) if cx is not None else None
        fn.cy_desc = cudnn.descriptor(cx) if cx is not None else None

        # create the weight buffer and copy the weights into it
        if fn.weight_buf is None:
            num_weights = get_num_weights(
                handle, fn.rnn_desc, fn.x_descs[0], fn.datatype)
            fn.weight_buf = x.new(num_weights)
            fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
            w = fn.weight_buf
            # this zero might not seem necessary, but it is in the case
            # where biases are disabled; then they won't be copied and must be zero'd.
            # Alternatively, _copyParams could be written more carefully.
            w.zero_()
            params = get_parameters(fn, handle, w)
            _copyParams(weight, params)
        else:
            fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
            w = fn.weight_buf

        if cx is not None and tuple(cx.size()) != hidden_size:
            raise RuntimeError('Expected cell size {}, got {}'.format(
                hidden_size, tuple(cx.size())))

        workspace_size = ctypes.c_long()
        check_error(lib.cudnnGetRNNWorkspaceSize(
            handle,
            fn.rnn_desc,
            fn.seq_length,
            fn.x_descs,
            ctypes.byref(workspace_size)
        ))
        fn.workspace_size = workspace_size.value
        with torch.cuda.device_of(input):
            workspace = torch.cuda.ByteTensor(fn.workspace_size)
        if fn.requires_grad:
            reserve_size = ctypes.c_long()
            check_error(lib.cudnnGetRNNTrainingReserveSize(
                handle,
                fn.rnn_desc,
                fn.seq_length,
                fn.x_descs,
                ctypes.byref(reserve_size)
            ))
            fn.reserve = torch.cuda.ByteTensor(reserve_size.value)

            check_error(lib.cudnnRNNForwardTraining(
                handle,
                fn.rnn_desc,
                fn.seq_length,
                fn.x_descs, ctypes.c_void_p(x.data_ptr()),
                fn.hx_desc, ctypes.c_void_p(hx.data_ptr()),
                fn.cx_desc, ctypes.c_void_p(cx.data_ptr()) if cx is not None else None,
                fn.w_desc, ctypes.c_void_p(w.data_ptr()),
                fn.y_descs, ctypes.c_void_p(y.data_ptr()),
                fn.hy_desc, ctypes.c_void_p(hy.data_ptr()),
                fn.cy_desc, ctypes.c_void_p(cy.data_ptr()) if cx is not None else None,
                ctypes.c_void_p(workspace.data_ptr()), workspace.size(0),
                ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
            ))
        else:  # inference
            check_error(lib.cudnnRNNForwardInference(
                handle,
                fn.rnn_desc,
                fn.seq_length,
                fn.x_descs, ctypes.c_void_p(x.data_ptr()),
                fn.hx_desc, ctypes.c_void_p(hx.data_ptr()),
                fn.cx_desc, ctypes.c_void_p(cx.data_ptr()) if cx is not None else None,
                fn.w_desc, ctypes.c_void_p(w.data_ptr()),
                fn.y_descs, ctypes.c_void_p(y.data_ptr()),
                fn.hy_desc, ctypes.c_void_p(hy.data_ptr()),
                fn.cy_desc, ctypes.c_void_p(cy.data_ptr()) if cx is not None else None,
                ctypes.c_void_p(workspace.data_ptr()), workspace.size(0)
            ))

        if fn.batch_first and not is_input_packed:
            output.transpose_(0, 1)
Пример #16
0
def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx):
    with torch.cuda.device_of(input):
        is_input_packed = fn.batch_sizes is not None
        handle = cudnn.get_handle()

        if fn.mode == cudnn.CUDNN_LSTM:
            hx, cx = hx
            grad_hx, grad_cx = grad_hx
            grad_hy, grad_cy = grad_hy
        else:
            cx, grad_cx, grad_cy = None, None, None

        if fn.batch_first and not is_input_packed:
            input = input.transpose(0, 1)
            grad_output = grad_output.transpose(0, 1)
            output = output.transpose(0, 1)

        input_size = _input_size(fn, input)
        hidden_size = _hidden_size(fn)
        output_size = _output_size(fn, input)

        assert hx.is_contiguous()
        assert cx is None or cx.is_contiguous()
        x = input.contiguous()
        dy = grad_output.contiguous()
        y = output
        w = fn.weight_buf
        dx = grad_input.resize_as_(input)
        dhy = grad_hy.contiguous().view(*hidden_size)
        dcy = grad_cy.contiguous().view(*hidden_size) if grad_cy is not None else None
        dhx = grad_hx.resize_(*hidden_size)
        dcx = grad_cx.resize_(*hidden_size) if grad_cx is not None else None

        if fn.dropout != 0 and cudnn.version() < 5103:
            raise RuntimeError('dropout supported only in cudnn v 5.1 and above')
        if not fn.requires_grad:
            raise RuntimeError('backward_grad can only be called when the function requires grad!')
        if tuple(input.size()) != input_size:
            raise RuntimeError('Expected input size {}, got {}'.format(
                input_size, tuple(input.size())))
        if tuple(output.size()) != output_size:
            raise RuntimeError('Expected output size {}, got {}'.format(
                output_size, output.size()))
        if hx is not None and tuple(hx.size()) != hidden_size:
            raise RuntimeError('Expected hidden size {}, got {}'.format(
                hidden_size, hx.size()))
        if cx is not None and tuple(cx.size()) != hidden_size:
            raise RuntimeError('Expected cell size {}, got {}'.format(
                hidden_size, cx.size()))
        if dhy is not None and tuple(dhy.size()) != hidden_size:
            raise RuntimeError('Expected d_hidden size {}, got {}'.format(
                hidden_size, dhy.size()))
        if dcy is not None and tuple(dcy.size()) != hidden_size:
            raise RuntimeError('Expected d_cell size {}, got {}'.format(
                hidden_size, dcy.size()))
        if not dhy.is_cuda or not dy.is_cuda or (dcy is not None and not dcy.is_cuda):
            raise RuntimeError('Gradients aren\'t CUDA tensors')

        with torch.cuda.device_of(input):
            workspace = torch.cuda.ByteTensor(fn.workspace_size)
        check_error(cudnn.lib.cudnnRNNBackwardData(
            handle,
            fn.rnn_desc,
            fn.seq_length,
            fn.y_descs, ctypes.c_void_p(y.data_ptr()),
            fn.y_descs, ctypes.c_void_p(dy.data_ptr()),
            fn.hy_desc, ctypes.c_void_p(dhy.data_ptr()),
            fn.cy_desc, ctypes.c_void_p(dcy.data_ptr()) if cx is not None else None,
            fn.w_desc, ctypes.c_void_p(w.data_ptr()),
            fn.hx_desc, ctypes.c_void_p(hx.data_ptr()),
            fn.cx_desc, ctypes.c_void_p(cx.data_ptr()) if cx is not None else None,
            fn.x_descs, ctypes.c_void_p(dx.data_ptr()),
            fn.hx_desc, ctypes.c_void_p(dhx.data_ptr()),
            fn.cx_desc, ctypes.c_void_p(dcx.data_ptr()) if cx is not None else None,
            ctypes.c_void_p(workspace.data_ptr()), workspace.size(0),
            ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)
        ))

        if fn.batch_first and not is_input_packed:
            grad_input = grad_input.transpose_(0, 1)
Пример #17
0
from parser import create_parser
from torch.backends import cudnn
from dataset.dataloader import create_dataset
from utils.evaluation_metric import confusion_matrix, mIoU, per_cls_iou
from utils.data_visualization import visualize_segmap
from utils.file_op import mkdir
from PIL import Image

np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.set_default_tensor_type(torch.FloatTensor)

TORCH_VERSION = torch.__version__
TORCH_CUDA_VERSION = torch.version.cuda
CUDNN_VERSION = str(cudnn.version())
DEVICE_NAME = torch.cuda.get_device_name()

# cudnn.benchmark = True
cudnn.deterministic = True
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
parser = create_parser()
dataset_name = 'Cityscapes'
ckpt_name = 'Semantic_Segmentation_Cityscapes_27.pth.tar'


def evaluation(model, dataloader):
    # result dir
    result_dir = parser.result_dir
    # label dir
Пример #18
0
def dual_train(args):
    cudnn.enabled = True
    cudnn.benchmark = True
    cudnn.deterministic = True

    print("torch_version:{}".format(torch.__version__))
    print("CUDA_version:{}".format(torch.version.cuda))
    print("cudnn_version:{}".format(cudnn.version()))

    init_seed(123456)

    data_path = args.base_data_path + args.dataset + '/'

    print("Load BERT vocab")
    tokenizer, vocab2id, id2vocab = bert_tokenizer()
    print('--Vocabulary size', len(vocab2id))

    print("Load dataset")
    # load dataset
    query = torch.load(data_path + 'query_DukeNet.pkl')
    train_samples = torch.load(data_path + 'train_DukeNet.pkl')
    passage = torch.load(data_path + 'passage_DukeNet.pkl')
    print("--The number of train_samples:", len(train_samples))

    print("Establish model and load parameters")
    saved_model_path = os.path.join(args.base_output_path + args.name + "/",
                                    'model/')
    with open(saved_model_path + "checkpoints.json", 'r',
              encoding='utf-8') as r:
        checkpoints = json.load(r)
    last_epoch = checkpoints["time"][-1]
    fuse_dict = torch.load(
        os.path.join(saved_model_path, '.'.join([str(last_epoch), 'pkl'])))
    model = DukeNet(vocab2id, id2vocab, args)
    model.load_state_dict(fuse_dict["model"])
    # freeze the parameter of encoder,reducing the cost of GPU memory.
    freeze_params(model, "enc")

    if torch.cuda.is_available():
        model = model.cuda()
    else:
        model = model
    model.train()
    print('--Loading success, last_epoch is {}'.format(last_epoch))

    print("Create optimizer")
    A_optimizer = optim.Adam(model.shifter.parameters(), args.A_lr)
    B_optimizer = optim.Adam(model.posterior_tracker.parameters(), args.B_lr)
    All_optimizer = optim.Adam(model.parameters(), args.ALL_lr)

    A_optimizer.zero_grad()
    B_optimizer.zero_grad()
    All_optimizer.zero_grad()

    print("Define loss")
    loss_nll = torch.nn.NLLLoss(reduction='none')
    KLDLoss = nn.KLDivLoss(reduction='batchmean')

    if isinstance(last_epoch, int):
        last_epoch = -1
    else:
        last_epoch = int(last_epoch.split('_')[1])

    # epoch start ======================================================================================================
    for epoch in range(last_epoch + 1, args.epoches):
        print("Epoch:", epoch)
        print("Create dataloader")
        train_dataset = Dataset("train", train_samples, query, passage,
                                vocab2id, args.max_knowledge_pool_when_train,
                                args.max_knowledge_pool_when_inference,
                                args.context_len, args.knowledge_sentence_len,
                                args.max_dec_length)
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            collate_fn=collate_fn,
            batch_size=args.dual_train_batch_size,
            shuffle=True)

        # each example
        for j, data in enumerate(train_loader, 0):
            if torch.cuda.is_available():
                data_cuda = dict()
                for key, value in data.items():
                    if isinstance(value, torch.Tensor):
                        data_cuda[key] = value.cuda()
                    else:
                        data_cuda[key] = value
                data = data_cuda

                # A(shifting)============================================================================================
                # (N,K)
                #print("Start from shifting")
                encoded_state = model.encoding_layer(data)

                # get label shifting knowledge
                N, K, H = encoded_state['knowledge_tracking_pool_encoded'][
                    1].size()
                offsets = torch.arange(
                    N, device=data["knowledge_tracking_label"].device
                ) * K + data["knowledge_tracking_label"]  # N
                # knowledge_tracking_pool_use (N K E)->(N*K,E)
                flatten_knowledge_tracking_pool_use = encoded_state[
                    'knowledge_tracking_pool_encoded'][1].view(N * K, -1)
                label_tracked_knowledge_use = flatten_knowledge_tracking_pool_use[
                    offsets]  # (N E)

                # N K
                knowledge_shifting_score, _, _, _, _ = model.shifter(
                    encoded_state['contexts_encoded'],
                    label_tracked_knowledge_use,  #
                    encoded_state['knowledge_shifting_pool_encoded'],
                    encoded_state['knowledge_shifting_pool_mask'],
                    data["shifting_ck_mask"],
                    data["knowledge_shifting_label"],
                    data['knowledge_shifting_pool'],
                    mode="inference")

                knowledge_shifting_prob = F.softmax(knowledge_shifting_score,
                                                    -1)

                logist = Categorical(knowledge_shifting_prob)
                inferred_shifted_knowledge_index = logist.sample()  # N

                N, K, H = encoded_state['knowledge_shifting_pool_encoded'][
                    1].size()  # (N K E)
                offsets = torch.arange(
                    N, device=inferred_shifted_knowledge_index.device
                ) * K + inferred_shifted_knowledge_index  # N
                # knowledge_shifting_pool_use (N K E)->(N*K,E)
                flatten_knowledge_shifting_pool_use = encoded_state[
                    'knowledge_shifting_pool_encoded'][1].view(N * K, -1)
                inferred_shifted_knowledge_use = flatten_knowledge_shifting_pool_use[
                    offsets]  # (N E)

                # action prob
                # N
                action_prob_loss = loss_nll(
                    torch.log(knowledge_shifting_prob + 1e-10),
                    inferred_shifted_knowledge_index)

                # B
                with torch.no_grad():
                    # (N,K)
                    knowledge_tracking_score, _, _, _ = model.posterior_tracker(
                        encoded_state['contexts_encoded'],
                        inferred_shifted_knowledge_use,
                        encoded_state['knowledge_tracking_pool_encoded'],
                        encoded_state['knowledge_tracking_pool_mask'],
                        data['tracking_ck_mask'],
                        data["knowledge_tracking_label"],
                        mode="inference")

                    # N
                    reward = -loss_nll(
                        F.log_softmax(knowledge_tracking_score, 1),
                        data["knowledge_tracking_label"])
                    # N
                    norm_reward = (reward -
                                   torch.mean(reward)) / torch.std(reward)

                A_loss = torch.mean(action_prob_loss * norm_reward)
                A_optimizer.zero_grad()
                A_loss.backward()
                torch.nn.utils.clip_grad_norm_(model.shifter.parameters(), 0.4)
                A_optimizer.step()

                print_A_loss = A_loss.cpu().item()

                # B tracking===========================================================================================
                #print("Start from tracking")
                encoded_state = model.encoding_layer(data)
                # get label shifting knowledge
                N, K, H = encoded_state['knowledge_shifting_pool_encoded'][
                    1].size()
                offsets = torch.arange(
                    N, device=data["knowledge_shifting_label"].device
                ) * K + data["knowledge_shifting_label"]  # N
                # knowledge_shifting_pool_use (N K E)->(N*K,E)
                flatten_knowledge_shifting_pool_use = encoded_state[
                    'knowledge_shifting_pool_encoded'][1].view(N * K, -1)
                label_shifted_knowledge_use = flatten_knowledge_shifting_pool_use[
                    offsets]  # (N E)

                # N K
                knowledge_tracking_score, _, _, _ = model.posterior_tracker(
                    encoded_state['contexts_encoded'],
                    label_shifted_knowledge_use,
                    encoded_state['knowledge_tracking_pool_encoded'],
                    encoded_state['knowledge_tracking_pool_mask'],
                    data['tracking_ck_mask'],
                    data["knowledge_tracking_label"],
                    mode="inference")

                # N K
                knowledge_tracking_prob = F.softmax(knowledge_tracking_score,
                                                    -1)

                logist = Categorical(knowledge_tracking_prob)
                # N
                inferred_tracked_knowledge_index = logist.sample()  # batch

                N, K, H = encoded_state['knowledge_tracking_pool_encoded'][
                    1].size()  # (N K E)
                offsets = torch.arange(
                    N, device=inferred_tracked_knowledge_index.device
                ) * K + inferred_tracked_knowledge_index  # N
                flatten_knowledge_tracking_pool_use = encoded_state[
                    'knowledge_tracking_pool_encoded'][1].view(N * K, -1)
                inferred_tracked_knowledge_use = flatten_knowledge_tracking_pool_use[
                    offsets]  # (N E)

                # action prob
                # N
                action_prob_loss = loss_nll(
                    torch.log(knowledge_tracking_prob + 1e-10),
                    inferred_tracked_knowledge_index)

                with torch.no_grad():
                    knowledge_shifting_score, _, _, _, _ = model.shifter(
                        encoded_state['contexts_encoded'],
                        inferred_tracked_knowledge_use,  # label tracked knowledge
                        encoded_state['knowledge_shifting_pool_encoded'],
                        encoded_state['knowledge_shifting_pool_mask'],
                        data["shifting_ck_mask"],
                        data["knowledge_shifting_label"],
                        data['knowledge_shifting_pool'],
                        mode="inference")

                    reward = -loss_nll(
                        F.log_softmax(knowledge_shifting_score, -1),
                        data["knowledge_shifting_label"])
                    norm_reward = (reward -
                                   torch.mean(reward)) / torch.std(reward)

                B_loss = torch.mean(action_prob_loss * norm_reward)
                B_optimizer.zero_grad()
                B_loss.backward()
                torch.nn.utils.clip_grad_norm_(
                    model.posterior_tracker.parameters(), 0.4)
                B_optimizer.step()
                print_B_loss = B_loss.cpu().item()

                # ALL====================================================================================================
                encoded_state = model.encoding_layer(data)
                interaction_outputs = model.dual_knowledge_interaction_layer(
                    data, encoded_state)
                rg = model.decoding_layer(data, interaction_outputs)

                _, pri_tracking_pred = interaction_outputs[
                    'prior_knowledge_tracking_score'].detach().max(1)
                pri_tracking_acc = (
                    pri_tracking_pred == data['knowledge_tracking_label']
                ).float().mean()

                _, pos_tracking_pred = interaction_outputs[
                    'posterior_knowledge_tracking_score'].detach().max(1)
                pos_tracking_acc = (
                    pos_tracking_pred == data['knowledge_tracking_label']
                ).float().mean()

                _, shifting_pred = interaction_outputs[
                    'knowledge_shifting_score'].detach().max(1)
                shifting_acc = (
                    shifting_pred == data['knowledge_shifting_label']
                ).float().mean()

                # As with NLLLoss, the input given is expected to contain log-probabilities
                # The targets are given as probabilities (i.e. without taking the logarithm).
                pri_2_pos = KLDLoss(
                    F.log_softmax(
                        interaction_outputs['prior_knowledge_tracking_score'],
                        1),
                    F.softmax(
                        interaction_outputs[
                            'posterior_knowledge_tracking_score'], 1).detach())

                loss_pos_tracking = F.nll_loss(
                    F.log_softmax(
                        interaction_outputs[
                            'posterior_knowledge_tracking_score'], -1),
                    data['knowledge_tracking_label'].view(-1))

                loss_shifting = F.nll_loss(
                    F.log_softmax(
                        interaction_outputs['knowledge_shifting_score'], -1),
                    data['knowledge_shifting_label'].view(-1))

                loss_g = F.nll_loss(
                    (rg[0] + 1e-8).log().reshape(-1, rg[0].size(-1)),
                    data['response'].reshape(-1),
                    ignore_index=0)

                ALL_loss = pri_2_pos + (loss_pos_tracking + loss_shifting +
                                        loss_g) * 0.5

                All_optimizer.zero_grad()
                ALL_loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), 0.4)
                All_optimizer.step()

                if j % 10 == 0:
                    print('Training: %s' % "Dual_Game_DukeNet")
                    print(
                        "Epoch:{}, Batch:{}, Loss_A:{}, Loss_B:{}, KLDLoss:{}, Pos_Tra_Loss:{}, Shi_Loss:{}, Gen_Loss:{}, Pri_T_ACC:{}, Pos_T_ACC:{}, Shi_ACC:{}"
                        .format(epoch, j, rounder(print_A_loss, 4),
                                rounder(print_B_loss, 4),
                                rounder(pri_2_pos.cpu().item(), 4),
                                rounder(loss_pos_tracking.cpu().item(), 4),
                                rounder(loss_shifting.cpu().item(), 4),
                                rounder(loss_g.cpu().item(), 4),
                                rounder(pri_tracking_acc.cpu().item(), 2),
                                rounder(pos_tracking_acc.cpu().item(), 2),
                                rounder(shifting_acc.cpu().item(), 2)))

        # save model====================================================================================================
        fuse_dict = {"model": model.state_dict()}
        torch.save(
            fuse_dict,
            os.path.join(saved_model_path, '.'.join(["d_" + str(epoch),
                                                     'pkl'])))
        print("Saved epoch {} model".format("d_" + str(epoch)))
        with open(saved_model_path + "checkpoints.json", 'r',
                  encoding='utf-8') as r:
            checkpoints = json.load(r)
        checkpoints["time"].append("d_" + str(epoch))
        with open(saved_model_path + "checkpoints.json", 'w',
                  encoding='utf-8') as w:
            json.dump(checkpoints, w)
Пример #19
0
def forward(fn, input, hx, weight, output, hy):
    with torch.cuda.device_of(input):
        lib = cudnn.lib
        handle = cudnn.get_handle()
        fn.datatype = cudnn._typemap[input.type()]

        if fn.mode == cudnn.CUDNN_LSTM:
            hx, cx = hx
            hy, cy = hy
        else:
            cx, cy = None, None

        if fn.batch_first:
            input = input.transpose(0, 1)

        if input.dim() != 3:
            raise RuntimeError('input must have 3 dimensions, got {}'.format(
                input.dim()))
        if fn.input_size != input.size(2):
            raise RuntimeError(
                'input.size(2) must be equal to input_size. Expected {}, got {}'
                .format(fn.input_size))
        if fn.dropout != 0 and cudnn.version() < 5103:
            raise RuntimeError(
                'dropout supported only in cudnn v5.1 and above')

        fn.seq_length, fn.mini_batch, fn.input_size = input.size()
        hidden_size = _hidden_size(fn)
        output_size = _output_size(fn)

        assert hx.is_contiguous()
        assert cx is None or cx.is_contiguous()
        x = input.contiguous()
        output.resize_(*output_size)
        hy.resize_(*hidden_size)
        if cy is not None:
            cy.resize_(*hidden_size)
        y = output

        # init descriptors
        if ('desc' not in fn.dropout_state) or (fn.dropout_state['desc'].get()
                                                is None):
            fn.dropout_state['desc'] = Unserializable(
                init_dropout_descriptor(fn, handle))
        fn.rnn_desc = init_rnn_descriptor(fn, handle)
        fn.x_descs = cudnn.descriptor(x[0], fn.seq_length)
        fn.y_descs = cudnn.descriptor(y[0], fn.seq_length)
        fn.hx_desc = cudnn.descriptor(hx)
        fn.hy_desc = cudnn.descriptor(hx)
        fn.cx_desc = cudnn.descriptor(cx) if cx is not None else None
        fn.cy_desc = cudnn.descriptor(cx) if cx is not None else None

        # create the weight buffer and copy the weights into it
        num_weights = get_num_weights(handle, fn.rnn_desc, fn.x_descs[0],
                                      fn.datatype)
        fn.weight_buf = input.new(num_weights)
        fn.w_desc = init_weight_descriptor(fn, fn.weight_buf)
        w = fn.weight_buf
        # this zero might not seem necessary, but it is in the case
        # where biases are disabled; then they won't be copied and must be zero'd.
        # Alternatively, _copyParams could be written more carefully.
        w.zero_()
        params = get_parameters(fn, handle, w)
        _copyParams(weight, params)

        if tuple(hx.size()) != hidden_size:
            raise RuntimeError('Expected hidden size {}, got {}'.format(
                hidden_size, tuple(hx.size())))
        if cx is not None and tuple(cx.size()) != hidden_size:
            raise RuntimeError('Expected cell size {}, got {}'.format(
                hidden_size, tuple(cx.size())))

        workspace_size = ctypes.c_long()
        check_error(
            lib.cudnnGetRNNWorkspaceSize(handle, fn.rnn_desc, fn.seq_length,
                                         fn.x_descs,
                                         ctypes.byref(workspace_size)))
        fn.workspace = torch.cuda.ByteTensor(workspace_size.value)
        if fn.train:
            reserve_size = ctypes.c_long()
            check_error(
                lib.cudnnGetRNNTrainingReserveSize(handle, fn.rnn_desc,
                                                   fn.seq_length, fn.x_descs,
                                                   ctypes.byref(reserve_size)))
            fn.reserve = torch.cuda.ByteTensor(reserve_size.value)

            check_error(
                lib.cudnnRNNForwardTraining(
                    handle, fn.rnn_desc, fn.seq_length, fn.x_descs,
                    ctypes.c_void_p(x.data_ptr()), fn.hx_desc,
                    ctypes.c_void_p(hx.data_ptr()), fn.cx_desc,
                    ctypes.c_void_p(cx.data_ptr()) if cx is not None else None,
                    fn.w_desc, ctypes.c_void_p(w.data_ptr()), fn.y_descs,
                    ctypes.c_void_p(y.data_ptr()), fn.hy_desc,
                    ctypes.c_void_p(hy.data_ptr()), fn.cy_desc,
                    ctypes.c_void_p(cy.data_ptr()) if cx is not None else None,
                    ctypes.c_void_p(fn.workspace.data_ptr()),
                    fn.workspace.size(0),
                    ctypes.c_void_p(fn.reserve.data_ptr()),
                    fn.reserve.size(0)))
        else:  # inference
            check_error(
                lib.cudnnRNNForwardInference(
                    handle, fn.rnn_desc, fn.seq_length, fn.x_descs,
                    ctypes.c_void_p(x.data_ptr()), fn.hx_desc,
                    ctypes.c_void_p(hx.data_ptr()), fn.cx_desc,
                    ctypes.c_void_p(cx.data_ptr()) if cx is not None else None,
                    fn.w_desc, ctypes.c_void_p(w.data_ptr()), fn.y_descs,
                    ctypes.c_void_p(y.data_ptr()), fn.hy_desc,
                    ctypes.c_void_p(hy.data_ptr()), fn.cy_desc,
                    ctypes.c_void_p(cy.data_ptr()) if cx is not None else None,
                    ctypes.c_void_p(fn.workspace.data_ptr()),
                    fn.workspace.size(0)))

        if fn.batch_first:
            output = output.transpose_(0, 1)
Пример #20
0
def backward_grad(fn, input, hx, weight, output, grad_output, grad_hy,
                  grad_input, grad_hx):
    with torch.cuda.device_of(input):
        handle = cudnn.get_handle()

        if fn.mode == cudnn.CUDNN_LSTM:
            hx, cx = hx
            grad_hx, grad_cx = grad_hx
            grad_hy, grad_cy = grad_hy
        else:
            cx, grad_cx, grad_cy = None, None, None

        if fn.batch_first:
            input = input.transpose(0, 1)
            grad_output = grad_output.transpose(0, 1)
            output = output.transpose(0, 1)

        input_size = _input_size(fn)
        hidden_size = _hidden_size(fn)
        output_size = _output_size(fn)

        assert hx.is_contiguous()
        assert cx is None or cx.is_contiguous()
        x = input.contiguous()
        dy = grad_output.contiguous()
        y = output
        w = fn.weight_buf
        dx = grad_input.resize_as_(input)
        dhy = grad_hy.contiguous().view(*hidden_size)
        dcy = grad_cy.contiguous().view(
            *hidden_size) if grad_cy is not None else None
        dhx = grad_hx.resize_(*hidden_size)
        dcx = grad_cx.resize_(*hidden_size) if grad_cx is not None else None

        if fn.dropout != 0 and cudnn.version() < 5103:
            raise RuntimeError(
                'dropout supported only in cudnn v 5.1 and above')
        if not fn.train:
            raise RuntimeError(
                'backward_grad can only be called when training!')
        if tuple(input.size()) != input_size:
            raise RuntimeError('Expected input size {}, got {}'.format(
                input_size, tuple(input.size())))
        if tuple(output.size()) != _output_size(fn):
            raise RuntimeError('Expected output size {}, got {}'.format(
                output_size, output.size()))
        if hx is not None and tuple(hx.size()) != hidden_size:
            raise RuntimeError('Expected hidden size {}, got {}'.format(
                hidden_size, hx.size()))
        if cx is not None and tuple(cx.size()) != hidden_size:
            raise RuntimeError('Expected cell size {}, got {}'.format(
                hidden_size, cx.size()))
        if dhy is not None and tuple(dhy.size()) != hidden_size:
            raise RuntimeError('Expected d_hidden size {}, got {}'.format(
                hidden_size, dhy.size()))
        if dcy is not None and tuple(dcy.size()) != hidden_size:
            raise RuntimeError('Expected d_cell size {}, got {}'.format(
                hidden_size, dcy.size()))
        if not dhy.is_cuda or not dy.is_cuda or (dcy is not None
                                                 and not dcy.is_cuda):
            raise RuntimeError('Gradients aren\'t CUDA tensors')

        check_error(
            cudnn.lib.cudnnRNNBackwardData(
                handle, fn.rnn_desc, fn.seq_length, fn.y_descs,
                ctypes.c_void_p(y.data_ptr()), fn.y_descs,
                ctypes.c_void_p(dy.data_ptr()), fn.hy_desc,
                ctypes.c_void_p(dhy.data_ptr()), fn.cy_desc,
                ctypes.c_void_p(dcy.data_ptr()) if cx is not None else None,
                fn.w_desc, ctypes.c_void_p(w.data_ptr()), fn.hx_desc,
                ctypes.c_void_p(hx.data_ptr()), fn.cx_desc,
                ctypes.c_void_p(cx.data_ptr()) if cx is not None else None,
                fn.x_descs, ctypes.c_void_p(dx.data_ptr()), fn.hx_desc,
                ctypes.c_void_p(dhx.data_ptr()), fn.cx_desc,
                ctypes.c_void_p(dcx.data_ptr()) if cx is not None else None,
                ctypes.c_void_p(fn.workspace.data_ptr()), fn.workspace.size(0),
                ctypes.c_void_p(fn.reserve.data_ptr()), fn.reserve.size(0)))

        if fn.batch_first:
            grad_input = grad_input.transpose_(0, 1)
def main():
    """
    --------------------------------------------- MAIN --------------------------------------------------------
    Loads the data and executes the grid search on depth and width scaling factors.

    """
    # Manual seed for reproducibility
    torch.manual_seed(363636)

    # Global instances
    global args, use_cuda, device
    # Instantiating the parser
    args = parser.parse_args()
    # Global CUDA flag
    use_cuda = args.cuda and torch.cuda.is_available()
    # Defining device and device's map locationo
    device = torch.device("cuda" if use_cuda else "cpu")
    print('chosen device: ', device)

    # Defining loss function and printing CUDA information (if available)
    if use_cuda:
        print("PyTorch version: ")
        print(torch.__version__)
        print("CUDA Version: ")
        print(torch.version.cuda)
        print("cuDNN version is: ")
        print(cudnn.version())
        cudnn.benchmark = True
        criterion = nn.CrossEntropyLoss().cuda()
    else:
        criterion = nn.CrossEntropyLoss()

    # Dataloaders for CIFAR, ImageNet and MNIST
    if args.dataset == 'CIFAR100':

        normalize = transforms.Normalize(
            mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
            std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

        kwargs = {
            'num_workers': args.workers,
            'pin_memory': True
        } if use_cuda else {}

        train_loader = torch.utils.data.DataLoader(datasets.CIFAR100(
            root=args.data_path,
            train=True,
            transform=transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(32, 4),
                transforms.ColorJitter(brightness=0.3,
                                       contrast=0.3,
                                       saturation=0.3,
                                       hue=0.075),
                transforms.ToTensor(),
                normalize,
                Cutout(n_holes=1, length=16),
            ]),
            download=True),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)

        val_loader = torch.utils.data.DataLoader(
            datasets.CIFAR100(root=args.data_path,
                              train=False,
                              transform=transforms.Compose([
                                  transforms.ToTensor(),
                                  normalize,
                              ])),
            batch_size=args.val_batch_size,
            shuffle=False,
            **kwargs)

    elif args.dataset == 'ImageNet':

        traindir = os.path.join(args.data_path, 'train')
        valdir = os.path.join(args.data_path, 'val')
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(args.image_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True)

        image_size = args.image_size
        val_dataset = datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(image_size, interpolation=PIL.Image.BICUBIC),
                transforms.CenterCrop(image_size),
                transforms.ToTensor(),
                normalize,
            ]))
        val_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=args.val_batch_size,
            shuffle=False,
            num_workers=args.workers,
            pin_memory=True)

    elif args.dataset == 'MNIST':

        kwargs = {
            'num_workers': args.workers,
            'pin_memory': True
        } if use_cuda else {}

        train_loader = torch.utils.data.DataLoader(datasets.MNIST(
            args.data_path,
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ])),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)
        val_loader = torch.utils.data.DataLoader(
            datasets.MNIST(args.data_path,
                           train=False,
                           transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize((0.1307, ), (0.3081, ))
                           ])),
            batch_size=args.val_batch_size,
            shuffle=True,
            **kwargs)

    elif args.dataset == 'CIFAR10':

        normalize = transforms.Normalize(
            mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
            std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

        kwargs = {
            'num_workers': args.workers,
            'pin_memory': True
        } if use_cuda else {}

        train_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
            root=args.data_path,
            train=True,
            transform=transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(32, 4),
                transforms.ToTensor(),
                normalize,
            ]),
            download=True),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)

        val_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10(root=args.data_path,
                             train=False,
                             transform=transforms.Compose([
                                 transforms.ToTensor(),
                                 normalize,
                             ])),
            batch_size=args.val_batch_size,
            shuffle=False,
            **kwargs)

    # original grid = [(1.0, 1.0), (1.9, 1.0), (1.7, 1.1), (1.6, 1.1), (1.4, 1.2), (1.2, 1.3), (1.0, 1.4)]

    grid = [(args.grid[i], args.grid[i + 1])
            for i in range(0, len(args.grid), 2)]

    for coeff in grid:
        alpha = coeff[0]**args.phi
        beta = coeff[1]**args.phi
        grid_search(train_loader, val_loader, criterion, alpha, beta)
Пример #22
0
    def compress(self, w, pi, delta, trainloader, testloader, valloader,
                 loss_fn):
        """
        Main L-C compression method.
    
        :param w: Input model.
        :type w: torch.nn.Module
        :param pi: Compression function.
        :param delta: Decompression function.
        :param trainloader: Training dataloader.
        :param testloader: Test dataloader.
        :param valloader: Validation dataloader.
        :param loss_fn: Loss criterion.
        """
        statistics = {}
        # Save engine configuration
        statistics.update(self._engine_config)

        _model_stat_fn = self.debugging_flags['custom_model_statistics']\
                   if 'custom_model_statistics' in self.debugging_flags\
                   else util.empty_stat_fn
        _disable_train_stats = self.debugging_flags['disable_train_stats']\
                     if 'disable_train_stats' in self.debugging_flags\
                     else False
        timer_lc = EventTimer()

        if self.use_cuda: cudnn.benchmark = True
        logger.debug("[Condensa] cuDNN VERSION: {}".format(cudnn.version()))

        validate = (valloader is not None)
        test = (testloader is not None)

        # Copy model to GPU0 memory
        if self.use_cuda: w = w.cuda(0)

        # Mark all compressible modules in w
        with record_mode():
            pi(w)

        with torch.no_grad():
            theta = deepcopy(w)
        self.zero_(theta)

        with torch.no_grad():
            lm = deepcopy(w)
        self.zero_(lm)

        with torch.no_grad():
            best_model = deepcopy(w)

        # Enable data-parallelism in  L step
        if self.use_cuda and self.distributed:
            ngpus = torch.cuda.device_count()
            logger.info('[Condensa] {} GPUs enabled for L-step'.format(ngpus))
            w = torch.nn.DataParallel(w)

        mu = 0.
        learning_rate = self.lr

        optimizer = self.l_optimizer(w,
                                     lr=learning_rate,
                                     **self.l_optimizer_params)
        optimizer.reset_state()

        context = {
            'iteration': -1,
            'learing_rate': learning_rate,
            'mu': mu,
            'theta': theta,
        }

        if not _disable_train_stats:
            try:
                w_train_loss, w_train_stats = _model_stat_fn(w,
                                                             loss_fn,
                                                             trainloader,
                                                             loader='train',
                                                             context=context)
            except TypeError:
                w_train_loss, w_train_stats = _model_stat_fn(
                    w, loss_fn, trainloader)

            logger.info('[Condensa] w TRAIN\tloss={:.5f}, {}'.format(
                w_train_loss, ', '.join(
                    ['{}:{}'.format(k, v) for k, v in w_train_stats.items()])))
        if validate:
            try:
                w_val_loss, w_val_stats = _model_stat_fn(w,
                                                         loss_fn,
                                                         valloader,
                                                         loader='val',
                                                         context=context)
            except TypeError:
                w_val_loss, w_val_stats = _model_stat_fn(w, loss_fn, valloader)

            logger.info('[Condensa] w VAL\tloss={:.5f}, {}'.format(
                w_val_loss, ', '.join(
                    ['{}:{}'.format(k, v) for k, v in w_val_stats.items()])))
        if test:
            try:
                w_test_loss, w_test_stats = _model_stat_fn(w,
                                                           loss_fn,
                                                           testloader,
                                                           loader='test',
                                                           context=context)
            except TypeError:
                w_test_loss, w_test_stats = _model_stat_fn(
                    w, loss_fn, testloader)

            logger.info('[Condensa] w TEST\tloss={:.5f}, {}'.format(
                w_test_loss, ', '.join(
                    ['{}:{}'.format(k, v) for k, v in w_test_stats.items()])))

        best_loss = sys.float_info.max
        train_losses = []
        if validate: val_losses = []
        if test: test_losses = []
        outer_lr_scheduler = None
        if self.lr_decay is not None:
            outer_lr_scheduler = ExpDecayedLR(self.lr, self.lr_decay)
        elif self.lr_schedule is not None:
            outer_lr_scheduler = DecayedLR(self.lr, self.lr_schedule,
                                           self.lr_multiplier)
        for j in range(0, self.steps):
            n_sgd_iter = (self.mb_iterations_first_l
                          if j == 1 else self.mb_iterations_per_l)

            # Set up outer learning rate
            learning_rate = self.lr
            if outer_lr_scheduler is not None:
                learning_rate = outer_lr_scheduler.learning_rate

            logger.info(
                '[Condensa] LC Iteration {}:\tmu={:.5f}, lr={:.5f}'.format(
                    j, mu, learning_rate))

            inner_lr_scheduler = None
            if self.lr_end is not None:
                inner_lr_scheduler = IntervalLR(learning_rate, self.lr_end,
                                                n_sgd_iter)

            # L step
            # Switch to training mode
            i = 0
            w.train()
            iterator = iter(trainloader)
            if logger.isEnabledFor(logging.INFO) and j > 0:
                pbar = tqdm(total=n_sgd_iter, ascii=True)
            while True:
                if j == 0:
                    logger.info('[Condensa] Skipping first L-step')
                    break
                if j == 1 and i >= self.mb_iterations_first_l:
                    break
                if j > 1 and i >= self.mb_iterations_per_l:
                    break

                try:
                    inputs, targets = next(iterator)
                except StopIteration:
                    iterator = iter(trainloader)
                    inputs, targets = next(iterator)

                if self.use_cuda:
                    if not inputs.is_cuda: inputs = inputs.cuda()
                    if not targets.is_cuda:
                        targets = targets.cuda(non_blocking=True)
                outputs = w(inputs)
                loss = loss_fn(outputs, targets)

                optimizer.zero_grad()

                loss.backward()
                optimizer.step(learning_rate, mu, theta, lm)

                if inner_lr_scheduler is not None:
                    inner_lr_scheduler.step()
                    learning_rate = inner_lr_scheduler.learning_rate

                if logger.isEnabledFor(logging.INFO):
                    pbar.update()
                i += 1

            if logger.isEnabledFor(logging.INFO) and j > 0:
                pbar.close()
            logger.info('')

            if self.use_cuda: torch.cuda.synchronize()

            w.eval()
            # C step and theta update
            try:
                theta.load_state_dict(w.module.state_dict())
            except AttributeError:
                theta.load_state_dict(w.state_dict())
            if mu > 0:
                try:
                    wmodules = w.module.modules()
                except AttributeError:
                    wmodules = w.modules()
                with record_mode():
                    pi(theta)
                with torch.no_grad():
                    for w_m, theta_m, lm_m in zip(wmodules, theta.modules(),
                                                  lm.modules()):
                        if hasattr(theta_m, 'condense'):
                            for pname in theta_m.condense:
                                getattr(theta_m, pname).data = (
                                    getattr(w_m, pname).detach() -
                                    getattr(lm_m, pname).data / mu)

            pi(theta)

            context['iteration'] = j
            context['learing_rate'] = learning_rate
            context['theta'] = theta
            if not _disable_train_stats:
                try:
                    nested_train_loss, nested_train_stats = _model_stat_fn(
                        theta,
                        loss_fn,
                        trainloader,
                        loader='train',
                        context=context)
                except TypeError:
                    nested_train_loss, nested_train_stats = _model_stat_fn(
                        theta, loss_fn, trainloader)

                train_losses.append(nested_train_loss)
                logger.info(
                    '[Condensa] Nested (theta) TRAIN\tloss={:.5f}, {}'.format(
                        nested_train_loss, ', '.join([
                            '{}:{}'.format(k, v)
                            for k, v in nested_train_stats.items()
                        ])))
            if validate:
                try:
                    nested_val_loss, nested_val_stats = _model_stat_fn(
                        theta,
                        loss_fn,
                        valloader,
                        loader='val',
                        context=context)
                except TypeError:
                    nested_val_loss, nested_val_stats = _model_stat_fn(
                        theta, loss_fn, valloader)

                val_losses.append(nested_val_loss)
                logger.info(
                    '[Condensa] Nested (theta) VAL\tloss={:.5f}, {}'.format(
                        nested_val_loss, ', '.join([
                            '{}:{}'.format(k, v)
                            for k, v in nested_val_stats.items()
                        ])))
            if test:
                try:
                    nested_test_loss, nested_test_stats = _model_stat_fn(
                        theta,
                        loss_fn,
                        testloader,
                        loader='test',
                        context=context)
                except TypeError:
                    nested_test_loss, nested_test_stats = _model_stat_fn(
                        theta, loss_fn, testloader)

                test_losses.append(nested_test_loss)
                logger.info(
                    '[Condensa] Nested (theta) TEST\tloss={:.5f}, {}'.format(
                        nested_test_loss, ', '.join([
                            '{}:{}'.format(k, v)
                            for k, v in nested_test_stats.items()
                        ])))

            if validate:
                if nested_val_loss < best_loss:
                    logger.info('[Condensa] Saving model based on VAL')
                    best_loss = nested_val_loss
                    # Deep-copy required here to preserve dtypes
                    best_model = deepcopy(theta)
            elif test:
                if nested_test_loss < best_loss:
                    logger.info('[Condensa] Saving model based on TEST')
                    best_loss = nested_test_loss
                    # Deep-copy required here to preserve dtypes
                    best_model = deepcopy(theta)
            else:
                logger.info('[Condensa] Saving model based on most recent')
                best_model = deepcopy(theta)

            # theta <- delta(theta)
            delta(theta)

            # LM update
            if mu > 0:
                try:
                    wmodules = w.module.modules()
                except AttributeError:
                    wmodules = w.modules()
                for w_m, theta_m, lm_m in zip(wmodules, theta.modules(),
                                              lm.modules()):
                    if hasattr(theta_m, 'condense'):
                        for pname in theta_m.condense:
                            getattr(
                                lm_m,
                                pname).data = (getattr(lm_m, pname).data - mu *
                                               (getattr(w_m, pname).detach() -
                                                getattr(theta_m, pname).data))

            optimizer.reset_state()
            # Update mu
            mu = self._update_mu(mu, self.mu_init, self.mu_multiplier,
                                 self.mu_cap)
            # Update LR schedule
            if outer_lr_scheduler is not None: outer_lr_scheduler.step()

        statistics['elapsed_lc'] = timer_lc.elapsed_seconds
        statistics['train_losses'] = train_losses
        if test: statistics['test_losses'] = test_losses
        if validate: statistics['val_losses'] = val_losses
        return best_model, statistics
Пример #23
0
def inference(args):
    cudnn.enabled = True
    cudnn.benchmark = True
    cudnn.deterministic = True
    print("torch_version:{}".format(torch.__version__))
    print("CUDA_version:{}".format(torch.version.cuda))
    print("cudnn_version:{}".format(cudnn.version()))

    init_seed(123456)

    data_path = args.base_data_path + args.dataset + '/'


    tokenizer, vocab2id, id2vocab = bert_tokenizer()
    detokenizer = bert_detokenizer()

    print('Vocabulary size', len(vocab2id))

    if os.path.exists(data_path + 'dev_DukeNet.pkl'):
        query = torch.load(data_path + 'query_DukeNet.pkl')
        passage = torch.load(data_path + 'passage_DukeNet.pkl')
        dev_samples = torch.load(data_path + 'dev_DukeNet.pkl')
        print("The number of dev_samples:", len(dev_samples))

        if args.dataset == "wizard_of_wikipedia":
            test_seen_samples = torch.load(data_path + 'test_seen_DukeNet.pkl')
            test_unseen_samples = torch.load(data_path + 'test_unseen_DukeNet.pkl')
            print("The number of test_seen_samples:", len(test_seen_samples))
            print("The number of test_unseen_samples:", len(test_unseen_samples))
        elif args.dataset == "holl_e":
            test_samples = torch.load(data_path + 'test_DukeNet.pkl')
            print("The number of test_samples:", len(test_samples))


    else:
        samples, query, passage = load_default(args.dataset, data_path + args.dataset + '.answer',
                                                                   data_path + args.dataset + '.passage',
                                                                   data_path + args.dataset + '.pool',
                                                                   data_path + args.dataset + '.qrel',
                                                                   data_path + args.dataset + '.query',
                                                                   tokenizer)

        if args.dataset == "wizard_of_wikipedia":
            train_samples, dev_samples, test_seen_samples, test_unseen_samples = split_data(args.dataset, data_path + args.dataset + '.split', samples)
            print("The number of test_seen_samples:", len(test_seen_samples))
            print("The number of test_unseen_samples:", len(test_unseen_samples))
            torch.save(test_seen_samples, data_path + 'test_seen_DukeNet.pkl')
            torch.save(test_unseen_samples, data_path + 'test_unseen_DukeNet.pkl')

        elif args.dataset == "holl_e":
            train_samples, dev_samples, test_samples, = split_data(args.dataset, data_path + args.dataset + '.split', samples)
            print("The number of test_samples:", len(test_samples))
            torch.save(test_samples, data_path + 'test_DukeNet.pkl')

        print("The number of train_samples:", len(train_samples))
        print("The number of dev_samples:", len(dev_samples))
        torch.save(query, data_path + 'query_DukeNet.pkl')
        torch.save(passage, data_path + 'passage_DukeNet.pkl')
        torch.save(train_samples, data_path + 'train_DukeNet.pkl')
        torch.save(dev_samples, data_path + 'dev_DukeNet.pkl')


    if args.dataset == "wizard_of_wikipedia":
        dev_dataset = Dataset(args.mode, dev_samples, query, passage, vocab2id, args.max_knowledge_pool_when_train, args.max_knowledge_pool_when_inference,
                                args.context_len, args.knowledge_sentence_len, args.max_dec_length)

        test_seen_dataset = Dataset(args.mode, test_seen_samples, query, passage, vocab2id, args.max_knowledge_pool_when_train, args.max_knowledge_pool_when_inference,
                                args.context_len, args.knowledge_sentence_len, args.max_dec_length)

        test_unseen_dataset = Dataset(args.mode, test_unseen_samples, query, passage, vocab2id, args.max_knowledge_pool_when_train, args.max_knowledge_pool_when_inference,
                                 args.context_len, args.knowledge_sentence_len, args.max_dec_length)

    elif args.dataset == "holl_e":
        test_dataset = Dataset(args.mode, test_samples, query, passage, vocab2id, args.max_knowledge_pool_when_train,
                               args.max_knowledge_pool_when_inference,
                               args.context_len, args.knowledge_sentence_len, args.max_dec_length)

    saved_model_path = os.path.join(args.base_output_path + args.name + "/", 'model/')


    def inference(dataset, epoch=None):
        file =saved_model_path + str(epoch) + '.pkl'
        if os.path.exists(file):
            model = DukeNet(vocab2id, id2vocab, args)

            model.load_state_dict(torch.load(file)["model"])
            trainer = CumulativeTrainer(args.name, model, tokenizer, detokenizer, None)

            if dataset == "wizard_of_wikipedia":
                print('inference {}'.format("dev_dataset"))
                trainer.test('inference', dev_dataset, collate_fn, args.inference_batch_size, 'dev', str(epoch), output_path=args.base_output_path + args.name+"/")
                print('inference {}'.format("test_seen_dataset"))
                trainer.test('inference', test_seen_dataset, collate_fn, args.inference_batch_size, 'test_seen', str(epoch), output_path=args.base_output_path + args.name+"/")
                print('inference {}'.format("test_unseen_dataset"))
                trainer.test('inference', test_unseen_dataset, collate_fn, args.inference_batch_size, 'test_unseen', str(epoch), output_path=args.base_output_path + args.name+"/")

            elif dataset == "holl_e":
                print('inference {}'.format("test_dataset"))
                trainer.test('inference', test_dataset, collate_fn, args.inference_batch_size, 'test', str(epoch), output_path=args.base_output_path + args.name+"/")


    if not os.path.exists(saved_model_path+"finished_inference.json"):
        finished_inference = {"time": []}
        w = open(saved_model_path+"finished_inference.json", 'w', encoding='utf-8')
        json.dump(finished_inference, w)
        w.close()

    if args.appoint_epoch != -1:
        print('Start inference at epoch', args.appoint_epoch)
        inference(args.dataset, args.appoint_epoch)

        r = open(saved_model_path+"finished_inference.json", 'r', encoding='utf-8')
        finished_inference = json.load(r)
        r.close()

        finished_inference["time"].append(args.appoint_epoch)
        w = open(saved_model_path + "finished_inference.json", 'w', encoding='utf-8')
        json.dump(finished_inference, w)
        w.close()
        print("finished epoch {} inference".format(args.appoint_epoch))
        exit()

    while True:
        with open(saved_model_path + "checkpoints.json", 'r', encoding='utf-8') as r:
            checkpoints = json.load(r)

        r = open(saved_model_path + "finished_inference.json", 'r', encoding='utf-8')
        finished_inference = json.load(r)
        r.close()

        if len(checkpoints["time"]) == 0:
            print('Inference_mode: wait train finish the first epoch...')
            time.sleep(300)
        else:
            for i in checkpoints["time"]:  # i is the index of epoch
                if i in finished_inference["time"]:
                    print("epoch {} already has been inferenced, skip it".format(i))
                    pass
                else:
                    print('Start inference at epoch', i)
                    inference(args.dataset, i)

                    r = open(saved_model_path + "finished_inference.json", 'r', encoding='utf-8')
                    finished_inference = json.load(r)
                    r.close()

                    finished_inference["time"].append(i)

                    w = open(saved_model_path+"finished_inference.json", 'w', encoding='utf-8')
                    json.dump(finished_inference, w)
                    w.close()
                    print("finished epoch {} inference".format(i))

            print("Inference_mode: current all model checkpoints are completed...")
            print("Inference_mode: finished %d modes" % len(finished_inference["time"]))
            if len(finished_inference["time"]) == args.epoches:
                print("All inference is ended")
                break
            else:
                print('Inference_mode: wait train finish the next epoch...')
                time.sleep(300)
Пример #24
0
def train(args):
    cudnn.enabled = True
    cudnn.benchmark = True
    cudnn.deterministic = True

    print("torch_version:{}".format(torch.__version__))
    print("CUDA_version:{}".format(torch.version.cuda))
    print("cudnn_version:{}".format(cudnn.version()))

    init_seed(123456)

    data_path = args.base_data_path+args.dataset+'/'

    tokenizer, vocab2id, id2vocab = bert_tokenizer()
    detokenizer = bert_detokenizer()

    print('Vocabulary size', len(vocab2id))

    if os.path.exists(data_path + 'train_DukeNet.pkl'):
        query = torch.load(data_path + 'query_DukeNet.pkl')
        train_samples = torch.load(data_path + 'train_DukeNet.pkl')
        passage = torch.load(data_path + 'passage_DukeNet.pkl')
        print("The number of train_samples:", len(train_samples))
    else:
        samples, query, passage = load_default(args.dataset, args.datasetdata_path + args.dataset + '.answer',
                                                                   data_path + args.dataset + '.passage',
                                                                   data_path + args.dataset + '.pool',
                                                                   data_path + args.dataset + '.qrel',
                                                                   data_path + args.dataset + '.query',
                                                                   tokenizer)

        if args.dataset == "wizard_of_wikipedia":
            train_samples, dev_samples, test_seen_samples, test_unseen_samples = split_data(args.dataset, data_path + args.dataset + '.split', samples)
            print("The number of test_seen_samples:", len(test_seen_samples))
            print("The number of test_unseen_samples:", len(test_unseen_samples))
            torch.save(test_seen_samples, data_path + 'test_seen_DukeNet.pkl')
            torch.save(test_unseen_samples, data_path + 'test_unseen_DukeNet.pkl')

        elif args.dataset == "holl_e":
            train_samples, dev_samples, test_samples, = split_data(args.dataset, data_path + args.dataset + '.split', samples)
            print("The number of test_samples:", len(test_samples))
            torch.save(test_samples, data_path + 'test_DukeNet.pkl')

        print("The number of train_samples:", len(train_samples))
        print("The number of dev_samples:", len(dev_samples))
        torch.save(query, data_path + 'query_DukeNet.pkl')
        torch.save(passage, data_path + 'passage_DukeNet.pkl')
        torch.save(train_samples, data_path + 'train_DukeNet.pkl')
        torch.save(dev_samples, data_path + 'dev_DukeNet.pkl')


    model = DukeNet(vocab2id, id2vocab, args)
    saved_model_path = os.path.join(args.base_output_path + args.name + "/", 'model/')

    if args.resume is True:
        print("Reading checkpoints...")

        with open(saved_model_path + "checkpoints.json", 'r', encoding='utf-8') as r:
            checkpoints = json.load(r)
        last_epoch = checkpoints["time"][-1]

        fuse_dict = torch.load(os.path.join(saved_model_path, '.'.join([str(last_epoch), 'pkl'])))
        model.load_state_dict(fuse_dict["model"])
        print('Loading success, last_epoch is {}'.format(last_epoch))


    else:
        init_params(model, "enc")
        freeze_params(model, "enc")

        last_epoch = -1

        if not os.path.exists(saved_model_path):
            os.makedirs(saved_model_path)

        with open(saved_model_path + "checkpoints.json", 'w', encoding='utf-8') as w:
            checkpoints = {"time": []}
            json.dump(checkpoints, w)

    # construct an optimizer object
    model_optimizer = optim.Adam(model.parameters(), args.lr) # model.parameters() Returns an iterator over module parameters.This is typically passed to an optimizer.
    model_scheduler = get_constant_schedule(model_optimizer)

    if args.resume is True:
        model_scheduler.load_state_dict(fuse_dict["scheduler"])
        print('Loading scheduler, last_scheduler is', fuse_dict["scheduler"])

    trainer = CumulativeTrainer(args.name, model, tokenizer, detokenizer, args.local_rank, accumulation_steps=args.accumulation_steps)
    model_optimizer.zero_grad()  # Clears the gradients of all optimized torch.Tensor s.

    for i in range(last_epoch+1, args.epoches):
        if i==5:
            unfreeze_params(model, "enc")
            args.train_batch_size = 2
            args.accumulation_steps = 16

        train_dataset = Dataset(args.mode, train_samples, query, passage, vocab2id,
                                    args.max_knowledge_pool_when_train, args.max_knowledge_pool_when_inference, args.context_len, args.knowledge_sentence_len,
                                    args.max_dec_length)
        trainer.train_epoch('train', train_dataset, collate_fn, args.train_batch_size, i, model_optimizer, model_scheduler)
        del train_dataset

        trainer.serialize(i, model_scheduler, saved_model_path=saved_model_path)
    if 'val' in args.test_idx:
        args.rel_path = True
        args.save_dir = './val_submissions'
    # manual seed
    args.manual_seed = random.randint(0, 10000)  # fix seed
    print("Random Seed: ", args.manual_seed)
    random.seed(args.manual_seed)
    np.random.seed(args.manual_seed)
    torch.manual_seed(args.manual_seed)

    args.cuda = torch.cuda.is_available()
    if args.cuda:
        print('using cuda')
        if cudnn.enabled:
            cudnn.benchmark = True
            print('using cudnn {}'.format(cudnn.version()))

    print(args)

    # Data augmentation and normalization for training
    # Just normalization for validation
    (scale_size,
     crop_size) = (370, 299) if 'inception_v3' in args.arch else (256, 224)

    # create the dataloader for test image
    idx_files = args.test_idx

    if args.ten_crop:  # TODO: test mix mode
        dsets = get_augmented_test_set(data_root=args.data_root,
                                       idx_file=idx_files,
                                       scale_size=scale_size,
Пример #26
0
def main():
    """
    --------------------------------------------- MAIN --------------------------------------------------------

    Instantiates the model plus loss function and defines the dataloaders for several datasets including some
    data augmentation.
    Defines the grid for a grid search on lambda_max_divrs and initial_centroid_value_multipliers which both
    have a big influence on the sparsity (and respectively accuracy) of the resulting ternary networks.
    Starts grid search.
    """

    # Manual seed for reproducibility
    torch.manual_seed(363636)

    # Global instances
    global args, use_cuda, device
    # Instantiating the parser
    args = parser.parse_args()
    # Global CUDA flag
    use_cuda = args.cuda and torch.cuda.is_available()
    # Defining device and device's map locationo
    device = torch.device("cuda" if use_cuda else "cpu")
    print('chosen device: ', device)

    # Building the model
    if args.model == 'cifar_micronet':
        print('Building MicroNet for CIFAR-100 with depth multiplier {} and width multiplier {} ...'.format(
            args.dw_multps[0] ** args.phi, args.dw_multps[1] ** args.phi))
        model = micronet(args.dw_multps[0] ** args.phi, args.dw_multps[1] ** args.phi)

    elif args.model == 'imagenet_micronet':
        print('Building MicroNet for ImageNet with depth multiplier {} and width multiplier {} ...'.format(
            args.dw_multps[0] ** args.phi, args.dw_multps[1] ** args.phi))
        model = image_micronet(args.dw_multps[0] ** args.phi, args.dw_multps[1] ** args.phi)

    elif args.model == 'efficientnet-b1':
        print('Building EfficientNet-B1 ...')
        model = EfficientNet.efficientnet_b1()

    elif args.model == 'efficientnet-b2':
        print('Building EfficientNet-B2 ...')
        model = EfficientNet.efficientnet_b2()

    elif args.model == 'efficientnet-b3':
        print('Building EfficientNet-B3 ...')
        model = EfficientNet.efficientnet_b3()

    elif args.model == 'efficientnet-b4':
        print('Building EfficientNet-B4 ...')
        model = EfficientNet.efficientnet_b4()

    for name, param in model.named_parameters():
        print('\n', name)

    # Transfers model to device (GPU/CPU).
    model.to(device)

    # Defining loss function and printing CUDA information (if available)
    if use_cuda:
        print("PyTorch version: ")
        print(torch.__version__)
        print("CUDA Version: ")
        print(torch.version.cuda)
        print("cuDNN version is: ")
        print(cudnn.version())
        cudnn.benchmark = True
        loss_fct = nn.CrossEntropyLoss().cuda()
    else:
        loss_fct = nn.CrossEntropyLoss()

    # Dataloaders for CIFAR, ImageNet and MNIST
    if args.dataset == 'CIFAR100':

        print('Loading CIFAR-100 data ...')
        normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
                                         std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

        kwargs = {'num_workers': args.workers, 'pin_memory': True} if use_cuda else {}

        train_loader = torch.utils.data.DataLoader(
            datasets.CIFAR100(root=args.data_path, train=True, transform=transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(32, 4),
                transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.075),
                transforms.ToTensor(),
                normalize,
                Cutout(n_holes=1, length=16),
            ]), download=True),
            batch_size=args.batch_size, shuffle=True, **kwargs)

        val_loader = torch.utils.data.DataLoader(
            datasets.CIFAR100(root=args.data_path, train=False, transform=transforms.Compose([
                transforms.ToTensor(),
                normalize,
            ])),
            batch_size=args.val_batch_size, shuffle=False, **kwargs)

    elif args.dataset == 'ImageNet':

        print('Loading ImageNet data ...')
        traindir = os.path.join(args.data_path, 'train')
        valdir = os.path.join(args.data_path, 'val')
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(args.image_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=args.batch_size, shuffle=True,
            num_workers=args.workers, pin_memory=True)

        if model.__class__.__name__ == 'EfficientNet' or 'efficientnet' in str(args.model):
            image_size = EfficientNet.get_image_size(args.model)

        else:
            image_size = args.image_size

        val_dataset = datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(image_size, interpolation=PIL.Image.BICUBIC),
                transforms.CenterCrop(image_size),
                transforms.ToTensor(),
                normalize,
            ]))
        val_loader = torch.utils.data.DataLoader(
            val_dataset, batch_size=args.val_batch_size, shuffle=False,
            num_workers=args.workers, pin_memory=True)

    elif args.dataset == 'MNIST':

        kwargs = {'num_workers': args.workers, 'pin_memory': True} if use_cuda else {}

        train_loader = torch.utils.data.DataLoader(
            datasets.MNIST(args.data_path, train=True, download=True,
                           transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize((0.1307,), (0.3081,))
                           ])),
            batch_size=args.batch_size, shuffle=True, **kwargs)
        val_loader = torch.utils.data.DataLoader(
            datasets.MNIST(args.data_path, train=False, transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307,), (0.3081,))
            ])),
            batch_size=args.val_batch_size, shuffle=True, **kwargs)

    elif args.dataset == 'CIFAR10':

        normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
                                         std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

        kwargs = {'num_workers': args.workers, 'pin_memory': True} if use_cuda else {}

        train_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10(root=args.data_path, train=True, transform=transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(32, 4),
                transforms.ToTensor(),
                normalize,
            ]), download=True),
            batch_size=args.batch_size, shuffle=True, **kwargs)

        val_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10(root=args.data_path, train=False, transform=transforms.Compose([
                transforms.ToTensor(),
                normalize,
            ])),
            batch_size=args.val_batch_size, shuffle=False, **kwargs)

    else:
        raise NotImplementedError('Undefined dataset name %s' % args.dataset)


    # Gridsearch on dividers for lambda_max and initial cluster center values
    for initial_c_divr in args.ini_c_divrs:
        for lambda_max_divr in args.lambda_max_divrs:
            print('lambda_max_divr: {}, initial_c_divr: {}'.format(lambda_max_divr, initial_c_divr))
            logfile = open('./model_quantization/logfiles/logfile.txt', 'a+')
            logfile.write('lambda_max_divr: {}, initial_c_divr: {}'.format(lambda_max_divr, initial_c_divr))
            grid_search(train_loader, val_loader, model, loss_fct, lambda_max_divr, initial_c_divr)
Пример #27
0
    else:
        handlers = [logging.StreamHandler()]
    """ add '%(filename)s' to format show source file """
    logging.basicConfig(level=logging.DEBUG if debug_mode else logging.INFO,
                        format='%(asctime)s %(levelname)s: %(message)s',
                        datefmt='%Y-%m-%d %H:%M:%S',
                        handlers=handlers)


if __name__ == '__main__':

    # set args
    args = parser.parse_args()

    set_logger(log_file=args.log_file, debug_mode=args.debug_mode)
    logging.info("Cudnn Version: {}".format(cudnn.version()))
    cudnn.benchmark = True
    logging.info("Start evaluation with args:\n" +
                 json.dumps(vars(args), indent=4, sort_keys=True))

    # set device states
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpus)  # before using torch
    assert torch.cuda.is_available(), "CUDA is not available"

    # creat model
    sym_net, input_config = get_symbol(name=args.network, num_classes=101)

    # network
    if torch.cuda.is_available():
        sym_net = sym_net.cuda()
    net = static_model(net=sym_net)
Пример #28
0
    parser.add_argument("--dataset", type=str)
    parser.add_argument("--output_path", type=str, default='./output/CaSE/')
    parser.add_argument("--embedding_size", type=int, default=256)
    parser.add_argument("--hidden_size", type=int, default=256)
    parser.add_argument("--max_span_size", type=int, default=4)
    parser.add_argument("--max_target_length", type=int, default=40)
    parser.add_argument("--min_window_size", type=int, default=4)
    parser.add_argument("--num_windows", type=int, default=1)
    parser.add_argument("--accumulation_steps", type=int, default=1)
    parser.add_argument("--epoch", type=int, default=20)
    parser.add_argument("--batch_size", type=int, default=16)
    parser.add_argument("--num_gpu", type=int, default=4)
    args = parser.parse_args()

    if torch.cuda.is_available():
        torch.distributed.init_process_group(backend='NCCL',
                                             init_method='env://')

    cudnn.enabled = True
    cudnn.benchmark = True
    cudnn.deterministic = True
    print(torch.__version__)
    print(torch.version.cuda)
    print(cudnn.version())

    init_seed(123456)

    if args.mode == 'test':
        test(args)
    elif args.mode == 'train':
        train(args)
def main():

    # Manual seed for reproducibility
    torch.manual_seed(363636)

    # Global instances
    global args, use_cuda, device
    # Instantiating the parser
    args = parser.parse_args()
    # Global CUDA flag
    use_cuda = args.cuda and torch.cuda.is_available()
    # Defining device and device's map locationo
    device = torch.device("cuda" if use_cuda else "cpu")
    print('chosen device: ', device)

    # Building the model
    if args.model == 'cifar_micronet':
        print(
            'Building MicroNet for CIFAR with depth multiplier {} and width multiplier {} ...'
            .format(args.dw_multps[0]**args.phi, args.dw_multps[1]**args.phi))
        if args.dataset == 'CIFAR100':
            num_classes = 100
        elif args.dataset == 'CIFAR10':
            num_classes = 10
        model = micronet(args.dw_multps[0]**args.phi,
                         args.dw_multps[1]**args.phi, num_classes)

    elif args.model == 'image_micronet':
        print(
            'Building MicroNet for ImageNet with depth multiplier {} and width multiplier {} ...'
            .format(args.dw_multps[0]**args.phi, args.dw_multps[1]**args.phi))
        model = image_micronet(args.dw_multps[0]**args.phi,
                               args.dw_multps[1]**args.phi)

    elif args.model == 'efficientnet-b1':
        print('Building EfficientNet-B1 ...')
        model = EfficientNet.efficientnet_b1()

    elif args.model == 'efficientnet-b2':
        print('Building EfficientNet-B2 ...')
        model = EfficientNet.efficientnet_b2()

    elif args.model == 'efficientnet-b3':
        print('Building EfficientNet-B3 ...')
        model = EfficientNet.efficientnet_b3()

    elif args.model == 'efficientnet-b4':
        print('Building EfficientNet-B4 ...')
        model = EfficientNet.efficientnet_b4()

    elif args.model == 'lenet-5':
        print(
            'Building LeNet-5 with depth multiplier {} and width multiplier {} ...'
            .format(args.dw_multps[0]**args.phi, args.dw_multps[1]**args.phi))
        model = lenet5(d_multiplier=args.dw_multps[0]**args.phi,
                       w_multiplier=args.dw_multps[1]**args.phi)

    for name, param in model.named_parameters():
        print('\n', name)

    # Transfers model to device (GPU/CPU).
    model.to(device)

    # Defining loss function and printing CUDA information (if available)
    if use_cuda:
        print("PyTorch version: ")
        print(torch.__version__)
        print("CUDA Version: ")
        print(torch.version.cuda)
        print("cuDNN version is: ")
        print(cudnn.version())
        cudnn.benchmark = True
        loss_fct = nn.CrossEntropyLoss().cuda()
    else:
        loss_fct = nn.CrossEntropyLoss()

    # Dataloaders for CIFAR, ImageNet and MNIST

    if args.dataset == 'CIFAR100':

        print('Loading CIFAR-100 data ...')
        normalize = transforms.Normalize(
            mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
            std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

        kwargs = {
            'num_workers': args.workers,
            'pin_memory': True
        } if use_cuda else {}

        train_loader = torch.utils.data.DataLoader(datasets.CIFAR100(
            root=args.data_path,
            train=True,
            transform=transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(32, 4),
                transforms.ColorJitter(brightness=0.3,
                                       contrast=0.3,
                                       saturation=0.3,
                                       hue=0.075),
                transforms.ToTensor(),
                normalize,
                Cutout(n_holes=1, length=16),
            ]),
            download=True),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)

        val_loader = torch.utils.data.DataLoader(
            datasets.CIFAR100(root=args.data_path,
                              train=False,
                              transform=transforms.Compose([
                                  transforms.ToTensor(),
                                  normalize,
                              ])),
            batch_size=args.val_batch_size,
            shuffle=False,
            **kwargs)

    elif args.dataset == 'ImageNet':

        print('Loading ImageNet data ...')
        traindir = os.path.join(args.data_path, 'train')
        valdir = os.path.join(args.data_path, 'val')
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(args.image_size),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))
        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True)

        if model.__class__.__name__ == 'EfficientNet' or 'efficientnet' in str(
                args.model):
            image_size = EfficientNet.get_image_size(args.model)

        else:
            image_size = args.image_size

        val_dataset = datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(image_size, interpolation=PIL.Image.BICUBIC),
                transforms.CenterCrop(image_size),
                transforms.ToTensor(),
                normalize,
            ]))
        val_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=args.val_batch_size,
            shuffle=False,
            num_workers=args.workers,
            pin_memory=True)

    elif args.dataset == 'MNIST':

        kwargs = {
            'num_workers': args.workers,
            'pin_memory': True
        } if use_cuda else {}

        train_loader = torch.utils.data.DataLoader(datasets.MNIST(
            args.data_path,
            train=True,
            download=True,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((0.1307, ), (0.3081, ))
            ])),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)
        val_loader = torch.utils.data.DataLoader(
            datasets.MNIST(args.data_path,
                           train=False,
                           transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize((0.1307, ), (0.3081, ))
                           ])),
            batch_size=args.val_batch_size,
            shuffle=True,
            **kwargs)

    elif args.dataset == 'CIFAR10':

        print('Loading CIFAR-10 data ...')
        normalize = transforms.Normalize(
            mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
            std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

        kwargs = {
            'num_workers': args.workers,
            'pin_memory': True
        } if use_cuda else {}

        train_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
            root=args.data_path,
            train=True,
            transform=transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomCrop(32, 4),
                transforms.ColorJitter(brightness=0.3,
                                       contrast=0.3,
                                       saturation=0.3,
                                       hue=0.075),
                transforms.ToTensor(),
                normalize,
                Cutout(n_holes=1, length=16),
            ]),
            download=True),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)

        val_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10(root=args.data_path,
                             train=False,
                             transform=transforms.Compose([
                                 transforms.ToTensor(),
                                 normalize,
                             ])),
            batch_size=args.val_batch_size,
            shuffle=False,
            **kwargs)

    else:
        raise NotImplementedError('Undefined dataset name %s' % args.dataset)

    train_w_frozen_assignment(train_loader, val_loader, model, loss_fct)