Пример #1
0
    def _report_rouge(self, predictions, references):

        a_lst = []
        predictions = list(predictions)
        references = list(references)
        for i, p in enumerate(predictions):
            a_lst.append((p, references[i]))

        pool = Pool(24)
        rouge_scores = {"r1": [], "r2": [], "rl": []}
        for d in tqdm(pool.imap(_multi_rg, a_lst), total=len(a_lst)):
            if d is not None:
                rouge_scores["r1"].append(d[0])
                rouge_scores["r2"].append(d[1])
                rouge_scores["rl"].append(d[2])
        pool.close()
        pool.join()

        r1 = np.mean(rouge_scores["r1"])
        r2 = np.mean(rouge_scores["r2"])
        rl = np.mean(rouge_scores["rl"])

        if len(self.args.log_folds) > 0:
            with open(self.args.log_folds, mode='a') as f:
                f.write("{:.4f}\t{:.4f}\t{:.4f}".format(r1 / 100, r2 / 100, rl / 100))
                f.write('\n')
        logger.info("Metric\tScore\t95% CI")
        logger.info("ROUGE-1\t{:.2f}\t({:.2f},{:.2f})".format(r1 * 100, 0, 0))
        logger.info("ROUGE-2\t{:.2f}\t({:.2f},{:.2f})".format(r2 * 100, 0, 0))
        logger.info("ROUGE-L\t{:.2f}\t({:.2f},{:.2f})".format(rl * 100, 0, 0))

        logger.info("Data path: %s" % self.args.bert_data_path)
        logger.info("Model path: %s" % self.args.model_path)

        return r1, r2, rl
Пример #2
0
class ParallDataWraper():
    def __init__(self, loader, batch_size, thread=0):
        self.loader = loader
        assert loader.shuffle == False, 'Shuffle in loader should be False'
        self.batch_size = batch_size
        self.pool = Pool(thread)
        self.create_loader(self.loader.num_data)

    def create_loader(self, num):
        ids = [a for a in range(num)]
        random.shuffle(ids)
        self.targets = self.pool.imap(self.loader.next, (id for id in ids))

    def reset(self):
        self.create_loader(self.loader.num_data)

    def get_iter_epoch(self):
        return self.loader.get_iter_epoch()

    def load_batch(self):
        all_outputs = []
        for i in range(self.batch_size):
            try:
                outputs = self.targets.__next__()
            except StopIteration:
                self.create_loader(self.loader.num_data)
                outputs = self.targets.__next__()
            all_outputs.append(outputs)

        return pack_data(all_outputs)
Пример #3
0
class ParallDataWraper():
    def __init__(self, loader, batch_size, thread=0):
        self.loader = loader
        self.batch_size = batch_size
        self.pool = Pool(thread)
        self.create_loader(self.loader.num_data)

    def create_loader(self, num):
        # print ('--> remap and shuffle iterator')
        ids = [a for a in range(num)]
        random.shuffle(ids)
        self.targets = self.pool.imap(self.loader.next, (id for id in ids))
    def reset(self):
        self.create_loader(self.loader.num_data)
    def load_batch(self):
        all_outputs = []
        for i in range(self.batch_size):
            try:
                outputs = self.targets.__next__()
            except StopIteration:
                self.create_loader(self.loader.num_data)
                outputs = self.targets.__next__()
            all_outputs.append(outputs)
        
        return pack_data(all_outputs)
Пример #4
0
def tune_from_args(args):

    params_grid = list(
        product(
            torch.linspace(args.alpha_from, args.alpha_to, args.alpha_steps),
            torch.linspace(args.beta_from, args.beta_to, args.beta_steps)))

    LOG.info(
        'Scheduling {} jobs for alphas=linspace({}, {}, {}), betas=linspace({}, {}, {})'
        .format(len(params_grid), args.alpha_from, args.alpha_to,
                args.alpha_steps, args.beta_from, args.beta_to,
                args.beta_steps))

    # start worker processes
    LOG.info(
        f"Using {args.num_workers} processes and {args.lm_workers} for each CTCDecoder."
    )
    extract_start = default_timer()

    if args.unit == 'char':
        vocab = CharTokenizer(args.vocab_file,
                              reserved_tokens={
                                  'blank': '<blank>',
                                  'space': args.space_token
                              })
    else:
        vocab = WordTokenizer(args.vocab_file)

    p = Pool(args.num_workers, init, [
        args.logits_targets_file, vocab, args.lm_path, args.lm_trie_path,
        args.lm_unit, args.beam_size, args.cutoff_prob, args.cutoff_top_n
    ])

    scores = []
    best_wer = float('inf')
    with tqdm.tqdm(p.imap(tune_step, params_grid),
                   total=len(params_grid),
                   desc='Grid search') as pbar:
        for params in pbar:
            alpha, beta, wer, cer = params
            scores.append([alpha, beta, wer, cer])

            if wer < best_wer:
                best_wer = wer
                pbar.set_postfix(alpha=alpha, beta=beta, wer=wer, cer=cer)

    LOG.info(
        f"Finished {len(params_grid)} processes in {default_timer() - extract_start:.1f}s"
    )

    df = pd.DataFrame(scores, columns=['alpha', 'beta', 'wer', 'cer'])
    df.to_csv(args.output_file, index=False)
Пример #5
0
net_utils.load_net(trained_model, net)
# net.load_from_npz(npz_fname)
# net_utils.save_net(h5_fname, net)
net.cuda()
net.eval()
print('load model succ...')

t_det = Timer()
t_total = Timer()
im_fnames = sorted((fname
                    for fname in os.listdir(im_path)
                    if os.path.splitext(fname)[-1] == '.jpg'))
im_fnames = (os.path.join(im_path, fname) for fname in im_fnames)
pool = Pool(processes=1)

for i, (image, im_data) in enumerate(pool.imap(
        preprocess, im_fnames, chunksize=1)):
    t_total.tic()
    im_data = net_utils.np_to_variable(
        im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2)
    t_det.tic()
    bbox_pred, iou_pred, prob_pred = net(im_data)
    det_time = t_det.toc()
    # to numpy
    bbox_pred = bbox_pred.data.cpu().numpy()
    iou_pred = iou_pred.data.cpu().numpy()
    prob_pred = prob_pred.data.cpu().numpy()

    # print bbox_pred.shape, iou_pred.shape, prob_pred.shape

    bboxes, scores, cls_inds = yolo_utils.postprocess(
        bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)
Пример #6
0
th2_vals = np.linspace(th2_min, th2_max, num_th2)

th_results = np.zeros((th1_vals.size, th2_vals.size))
rewards = np.zeros((th1_vals.size, th2_vals.size))
last_err = np.zeros((th1_vals.size, th2_vals.size))

end_point = torch.tensor([1.57079633, 0., 0., 0.])

import time

start = time.time()

print("lets go")

for i, res in enumerate(
        pool.imap(do_rollout_stable, product(th1_vals, th2_vals, [0], [0]))):
    print("did something")
    obs_hist, action_hist, reward_hist, _, _ = res
    last_err.flat[i] = torch.sum(abs(obs_hist[-1] - end_point))
    errs = torch.sum(abs(obs_hist[-1:] - end_point), axis=1) < .5
    th_results.flat[i] = errs.all()
    rewards.flat[i] = sum(reward_hist)

#
# for i,res in enumerate(product(th1_vals, th2_vals, [0], [0])):
#     obs_hist, action_hist, reward_hist, _, _ = do_rollout_stable(res)
#     errs = torch.sum(abs(obs_hist[-10:] - end_point) , axis=1) < .2
#     th_results.flat[i] = errs.all()
#     rewards.flat[i] = sum(reward_hist)

end = time.time()
Пример #7
0
th1_min = pi / 2 - .5
th1_max = pi / 2 + .5
th2_min = -1
th2_max = 1
th1dot_min = -5
th1dot_max = 5
th2dot_min = -10
th2dot_max = 10

samples = np.random.random_sample((int(num_trials/2), 4))
samples *= np.array([th1_min - th1_max, th2_min - th2_max, th1dot_min - th1dot_max, th2dot_min - th2dot_max])
samples += np.array([th1_max, th2_max, th1dot_max, th2dot_max])

total_steps = 0
pool = Pool()  # defaults to number of available CPU's
for i, res in enumerate(pool.imap(do_rollout, zip(samples, range(int(num_trials/2))))):
    rews, steps = res
    reward_hist[i, :, :] = rews
    total_steps += steps
    X[i, :] = samples[i, :]
    Y[i] = sum(rews) > env.num_steps*3 - 10


th1_min = 0
th1_max = 2*pi
th2_min = -pi
th2_max = pi
th1dot_min = -10
th1dot_max = 10
th2dot_min = -30
th2dot_max = 30
Пример #8
0
th2_max = pi
num_th2 = 41
th2_vals = np.linspace(th2_min, th2_max, num_th2)

th_results = np.zeros((th1_vals.size, th2_vals.size))
th_lqr_results = np.zeros((th1_vals.size, th2_vals.size))
rewards = np.zeros((th1_vals.size, th2_vals.size))

end_point = torch.tensor([1.57079633, 0., 0., 0.])

import time

start = time.time()

for i, res in enumerate(
        pool.imap(do_rollout, product(th1_vals, th2_vals, [0], [0]))):
    obs_hist, action_hist, reward_hist, lqr_on = res
    errs = torch.sum(abs(obs_hist[-10:] - end_point), axis=1) < .2
    th_results.flat[i] = errs.all()
    th_lqr_results.flat[i] = lqr_on[-1]
    rewards.flat[i] = sum(reward_hist)

end = time.time()
print(end - start)

# Generate "balance map" at slice th = 0

dth1_min = -10
dth1_max = 10
num_dth1 = 41
dth1_vals = np.linspace(dth1_min, dth1_max, num_dth1)
Пример #9
0
def tune_from_args(args):
    # Disable some of the more verbose logging statements
    logging.getLogger('asr.common.params').disabled = True
    logging.getLogger('asr.common.registrable').disabled = True

    # Load from archive
    _, weights_file = load_archive(args.serialization_dir, args.overrides,
                                   args.weights_file)

    params = Params.load(os.path.join(args.serialization_dir, CONFIG_NAME),
                         args.overrides)

    prepare_environment(params)

    # Try to use the validation dataset reader if there is one - otherwise fall back
    # to the default dataset_reader used for both training and validation.
    dataset_params = params.pop('val_dataset', params.get('dataset_reader'))

    logger.info("Reading evaluation data from %s", args.input_file)
    dataset_params['manifest_filepath'] = args.input_file
    dataset = datasets.from_params(dataset_params)

    if os.path.exists(os.path.join(args.serialization_dir, "alphabet")):
        alphabet = Alphabet.from_file(
            os.path.join(args.serialization_dir, "alphabet", "tokens"))
    else:
        alphabet = Alphabet.from_params(params.pop("alphabet", {}))

    logits_dir = os.path.join(args.serialization_dir, 'logits')
    os.makedirs(logits_dir, exist_ok=True)

    basename = os.path.splitext(os.path.split(args.input_file)[1])[0]
    logits_file = os.path.join(logits_dir, basename + '.pth')

    if not os.path.exists(logits_file):
        model = models.from_params(alphabet=alphabet,
                                   params=params.pop('model'))
        model.load_state_dict(
            torch.load(weights_file,
                       map_location=lambda storage, loc: storage)['model'])
        model.eval()

        decoder = GreedyCTCDecoder(alphabet)

        loader_params = params.pop("val_data_loader",
                                   params.get("data_loader"))
        batch_sampler = samplers.BucketingSampler(dataset,
                                                  batch_size=args.batch_size)
        loader = loaders.from_params(loader_params,
                                     dataset=dataset,
                                     batch_sampler=batch_sampler)

        logger.info(f'Logits file `{logits_file}` not found. Generating...')

        with torch.no_grad():
            model.to(args.device)

            logits = []
            for batch in tqdm.tqdm(loader):
                sample, target, sample_lengths, target_lengths = batch
                sample = sample.to(args.device)
                sample_lengths = sample_lengths.to(args.device)

                output, output_lengths = model(sample, sample_lengths)

                output = output.to('cpu')

                references = decoder.tensor2str(target, target_lengths)

                logits.extend((o[:l, ...], r) for o, l, r in zip(
                    output.to('cpu'), output_lengths, references))

                del sample, sample_lengths, output

            torch.save(logits, logits_file)

        del model

    tune_dir = os.path.join(args.serialization_dir, 'tune')
    os.makedirs(tune_dir, exist_ok=True)

    params_grid = list(
        product(
            torch.linspace(args.alpha_from, args.alpha_to, args.alpha_steps),
            torch.linspace(args.beta_from, args.beta_to, args.beta_steps)))

    print(
        'Scheduling {} jobs for alphas=linspace({}, {}, {}), betas=linspace({}, {}, {})'
        .format(len(params_grid), args.alpha_from, args.alpha_to,
                args.alpha_steps, args.beta_from, args.beta_to,
                args.beta_steps))

    # start worker processes
    logger.info(
        f"Using {args.num_workers} processes and {args.lm_workers} for each CTCDecoder."
    )
    extract_start = default_timer()

    p = Pool(args.num_workers, init, [
        logits_file, alphabet, args.lm_path, args.cutoff_top_n,
        args.cutoff_prob, args.beam_width, args.lm_workers
    ])

    scores = []
    best_wer = float('inf')
    with tqdm.tqdm(p.imap(tune_step, params_grid),
                   total=len(params_grid),
                   desc='Grid search') as pbar:
        for params in pbar:
            alpha, beta, wer, cer = params
            scores.append([alpha, beta, wer, cer])

            if wer < best_wer:
                best_wer = wer
                pbar.set_postfix(alpha=alpha, beta=beta, wer=wer, cer=cer)

    logger.info(
        f"Finished {len(params_grid)} processes in {default_timer() - extract_start:.1f}s"
    )

    df = pd.DataFrame(scores, columns=['alpha', 'beta', 'wer', 'cer'])
    df.to_csv(os.path.join(tune_dir, basename + '.csv'), index=False)
Пример #10
0
                     glob=False))
        props.append(prop)
        print(i)
        i += 1

    # mp = _mp.get_context('forkserver')
    # set_start_method('spawn')
    pool = Pool(processes=4)
    manager = Manager()
    # mols = manager.list([[] for _ in range(len(datas))])
    # mols = manager.list([])

    # pool.map(get_mol,datas,chunksize=100)

    i = 0
    for _ in pool.imap(bd_mol, mols, chunksize=100):
        print(i)
        # mols[i] = _
        i += 1

    # props = np.concatenate(props,axis=0)

    #process based
    # processes = []
    #
    # indexes = [range(i*3000,(i+1)*3000) for i in range(4)]
    # for i in range(4):
    #     p = Process(target=get_mols, args=(mols, [datas[j] for j  in indexes[i]]))
    #     p.start()
    #     processes.append(p)
    #
Пример #11
0
class DatasetSmall():
    def __init__(self,
                 hd_folder,
                 reg_folder=None,
                 batch_size=64,
                 ext_list=['.bmp', '.tif', '.png'],
                 img_size=128):  #
        super(DatasetSmall, self).__init__()
        self.hd_imgs_org = []
        self.reg_imgs_org = []
        self.batch_size = batch_size
        self.ratio_pool = np.arange(0.5, 0.75, 0.01)
        img_count = 0
        self.hd_filelist, self.hd_filenames = getfilelist(hd_folder,
                                                          ext_list,
                                                          with_ext=True)
        self.count = 0
        self.reset_count = 20000
        self.img_size = img_size
        self.epoch_iteration = 5000
        self.pool = Pool(6)

        for this_hd_path, this_hd_name in zip(self.hd_filelist,
                                              self.hd_filenames):

            this_reg_path = os.path.join(reg_folder, this_hd_name)
            reg_img_orig = imread(this_reg_path).astype(np.float32)
            hd_img_orig = imread(this_hd_path).astype(np.float32)

            hd_row, hd_col, chn = hd_img_orig.shape
            #reg_row, reg_col, chn = reg_img_orig.shape
            # now pad the image to at least img_size * 1.4
            pad_hd_row, pad_hd_col = max(0,
                                         int(1.4 * img_size) - hd_row), max(
                                             0,
                                             int(1.4 * img_size) - hd_col)

            npad3 = ((pad_hd_row // 2, pad_hd_row - pad_hd_row // 2),
                     (pad_hd_col // 2, pad_hd_col - pad_hd_col // 2), (0, 0))

            hd_img_orig = np.pad(hd_img_orig, npad3, 'symmetric')
            reg_img_orig = np.pad(reg_img_orig, npad3, 'symmetric')

            hd_img = (hd_img_orig / 127.5 - 1)
            reg_img = (reg_img_orig / 127.5 - 1)

            self.hd_imgs_org.append(hd_img)
            self.reg_imgs_org.append(reg_img)
            img_count += 1
            print('Generated {} images'.format(img_count), hd_img_orig.shape)

        self.num_imgs = len(self.hd_imgs_org)
        print('start rotate')
        self.hd_imgs = []
        self.reg_imgs = []
        self.rotate_img()

    def rotate_img(self):
        self.hd_imgs = []
        self.reg_imgs = []

        targets = self.pool.imap(
            rotate_thread,
            ((self.hd_imgs_org[img_idx], self.reg_imgs_org[img_idx])
             for i in range(self.num_imgs)))

        for img_idx in range(self.num_imgs):
            this_hd_img, this_reg_img = targets.__next__()

            self.hd_imgs.append(this_hd_img)
            self.reg_imgs.append(this_reg_img)

    def get_next(self):
        if self.count > 0 and self.count % self.reset_count == 0:
            self.rotate_img()
            self.count = 0

        chose_img_index = np.random.choice(self.num_imgs, self.batch_size)
        chose_imgs = len(chose_img_index)

        hd_data_list = []
        reg_data_list = []

        already_taken = 0
        every_taken = int(self.batch_size / chose_imgs)

        for img_idx in chose_img_index:

            this_hd = self.hd_imgs[img_idx]
            this_reg = self.reg_imgs[img_idx]
            # rotate the image

            br = self.img_size // 2 + 1
            this_chose = min(self.batch_size - already_taken, every_taken)

            already_taken = already_taken + this_chose

            #for idx in range(this_chose):
            this_chose_count = 0
            blank_count = 0

            while this_chose_count < this_chose:

                row_size, col_size = this_hd.shape[0:2]
                try:
                    ridx = random.randint(0, row_size - self.img_size - 1)
                    cidx = random.randint(0, col_size - self.img_size - 1)

                    hd_data = this_hd[ridx:ridx + self.img_size,
                                      cidx:cidx + self.img_size, :]

                    if random.random() > 0.3:
                        reg_data = this_reg[ridx:ridx + self.img_size,
                                            cidx:cidx + self.img_size, :]
                    else:  #sometime, I just use true data and make it small to train the model
                        this_down_ratio = np.random.choice(self.ratio_pool,
                                                           size=1,
                                                           replace=False)[0]
                        hd_row, hd_col, chn = hd_data.shape
                        low_row, low_col = int(this_down_ratio * hd_row), int(
                            this_down_ratio * hd_col)

                        low_hd = imresize_shape(hd_data, (low_row, low_col))
                        #print(np.mean(reg_img_down))
                        low_hd = imresize_shape(low_hd, (hd_row, hd_col))
                        #print('max and min value of hd image: ', np.max(low_hd), np.min(low_hd))
                        sigma = np.random.choice([1, 2], size=1,
                                                 replace=False)[0]
                        reg_data = skimage.filters.gaussian(low_hd.astype(
                            np.float32),
                                                            sigma=sigma,
                                                            multichannel=True)

                    hd_data_list.append(hd_data)
                    reg_data_list.append(reg_data)
                except:
                    print(
                        'we have problem cropping the training data, img_size: ',
                        row_size, col_size)

                this_chose_count += 1
                blank_count = 0
        #import pdb; pdb.set_trace()
        #import pdb; pdb.set_trace()
        hd_data_np = np.asarray(hd_data_list, dtype=np.float32)
        reg_data_np = np.asarray(reg_data_list, dtype=np.float32)

        hd_data_np = np.transpose(hd_data_np, (0, 3, 1, 2))
        reg_data_np = np.transpose(reg_data_np, (0, 3, 1, 2))
        self.count += self.batch_size

        return {'low': reg_data_np, 'high': hd_data_np}
Пример #12
0
class Environment(ParamTree):
    """Environment for executing training and post training evaluations.

    Takes care of process pool, reporting, and experiment parameters.

    Parameters
    ----------
    params : dict or str
        Dictionary containing the parameters or a path to a parameters spec
        file.
    """

    from .util import (default_params, setup_params, open_data, store_gen,
                       store_gen_metrics, load_gen_metrics, stored_populations,
                       stored_indiv_measurements, store_hof, load_hof,
                       load_pop, load_indiv_measurements, env_path)

    def __init__(self, params):
        """Initialize an environment for training or post training analysis."""
        super().__init__()

        self.setup_params(params)

        self.metrics = list()
        self.pool = None
        self.data_file = None

        self.hall_of_fame = list()

        # choose task
        self.task = select_task(self['task', 'name'])

        # choose adequate type of individuals
        if self['config', 'backend'].lower() == 'torch':
            import wann_genetic.individual.torch as backend
        else:
            import wann_genetic.individual.numpy as backend

        if self.task.is_recurrent:
            self.ind_class = backend.RecurrentIndividual
        else:
            self.ind_class = backend.Individual

        # only use enabled activations functions
        available_funcs = self.ind_class.Phenotype.available_act_functions
        enabled_acts = self['population', 'enabled_activation_funcs']

        if self['population', 'enabled_activation_funcs'] != 'all':
            self.ind_class.Phenotype.enabled_act_functions = [
                available_funcs[i] for i in enabled_acts
            ]

    def seed(self, seed):
        """Set seed to `seed` or from parameters.

        Parameters
        ----------
        seed : int
            Seed to use.
        """
        np.random.seed(seed)

    @property
    def elite_size(self):
        """Size of the elite (:math:`population\\ size * elite\\ ratio`)."""
        return int(
            np.floor(self['selection', 'elite_ratio'] *
                     self['population', 'size']))

    def sample_weights(self, n=None):
        if n is None:
            n = self['sampling']['num_weights_per_iteration']

        dist = self['sampling', 'distribution'].lower()

        if dist == 'one':
            w = 1

        elif dist == 'uniform':
            lower = self['sampling', 'lower_bound']
            upper = self['sampling', 'upper_bound']
            assert lower is not None and upper is not None

            w = np.random.uniform(lower, upper, size=n)

        elif dist == 'linspace':
            lower = self['sampling', 'lower_bound']
            upper = self['sampling', 'upper_bound']
            assert lower is not None and upper is not None

            w = np.linspace(lower, upper, num=n)

        elif dist == 'lognormal':
            mu = self['sampling', 'mean']
            sigma = self['sampling', 'sigma']
            assert mu is not None and sigma is not None

            w = np.random.lognormal(mu, sigma, size=n)

        elif dist == 'normal':
            mu = self['sampling', 'mean']
            sigma = self['sampling', 'sigma']
            assert mu is not None and sigma is not None

            w = np.random.normal(mu, sigma, size=n)

        else:
            raise RuntimeError(f'Distribution {dist} not implemented.')
        self['sampling', 'current_weight'] = w
        return w

    def setup_pool(self, n=None):
        """Setup process pool."""
        if n is None:
            n = self['config', 'num_workers']
        if n == 1:
            self.pool = None
        else:
            if self['config', 'backend'].lower() == 'torch':
                logging.info('Using torch multiprocessing')
                from torch.multiprocessing import Pool
                self.pool = Pool(n)
            else:
                logging.info('Using usual multiprocessing')
                from multiprocessing import Pool
                self.pool = Pool(n)

    def pool_map(self, func, iter):
        if self.pool is None:
            return map(func, iter)
        else:
            return self.pool.imap(func, iter)

    def setup_optimization(self):
        """Setup everything that is required for training (eg. loading test
        samples).
        """
        log_path = self.env_path(self['storage', 'log_filename'])
        logging.info(f"Check log ('{log_path}') for details.")

        logger = logging.getLogger()

        fh = logging.FileHandler(log_path)
        fh.setFormatter(
            logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
        logger.addHandler(fh)

        if self['config', 'debug']:
            logger.setLevel(logging.DEBUG)
        else:
            logger.setLevel(logging.INFO)

        logging.info(f"Package version {get_version()}")

        p = os.path.abspath(self['experiment_path'])
        logging.info(f'Saving data at {p}.')

        logging.debug('Loading training dataset')
        self.task.load_training(env=self)

        # log used parameters
        params_toml = toml.dumps(self.params)
        logging.debug(f"Running experiments with the following parameters:\n"
                      f"{params_toml}")

        with open(self.env_path('params.toml'), 'w') as f:
            params = dict(self.params)
            # mark stored params as part of a report
            params['is_report'] = True
            toml.dump(params, f)

    def run(self):
        """Run optization and post optimization (if enabled)."""
        # set up logging, write params
        self.setup_optimization()

        # set up pool of workers
        self.setup_pool()

        with self.open_data('w'):
            # run optimization
            self.optimize()

        if self['postopt', 'run_postopt']:
            with self.open_data('r'):
                # evaluate individuals in hall of fame
                self.post_optimization()
        if self.pool is not None:
            logging.info('Closing pool')
            self.pool.close()

    def optimize(self):
        logging.info("Starting evolutionary algorithm")

        ts = TimeStore()

        alg = GeneticAlgorithm(self)

        first_generation = True

        self.seed(self['sampling', 'seed'])

        ts.start()
        for gen in np.arange(self['population', 'num_generations']) + 1:

            ts.start()

            pop = alg.ask()

            seed = self['sampling', 'post_init_seed']
            if (first_generation and not isinstance(seed, bool)):
                self.seed(seed)

            # evaluate indivs

            weights = self.sample_weights()

            logging.debug(f'Sampled weight {weights}')

            make_measurements(self, pop, weights=weights)

            obj_values = np.array(
                [get_objective_values(ind, self.objectives) for ind in pop])

            alg.tell(obj_values)

            logging.debug('Updating hall of fame')
            self.hall_of_fame = update_hall_of_fame(self, pop)

            ts.stop()

            avg = (ts.total / gen)
            expected_time = (self['population', 'num_generations'] - gen) * avg
            logging.info(
                f'Completed generation {gen}; {ts.dt:.02}s elapsed, {avg:.02}s avg, {ts.total:.02}s total. '
                f'Expected time remaining: {expected_time:.02}s')

            self.store_data(gen, pop, dt=ts.dt)
        self.last_population = pop
        self.store_hof()

    def post_optimization(self):
        r = Report(self).run_evaluations(  # run evaluations on test data
            num_weights=self['postopt', 'num_weights'],
            num_samples=self['postopt', 'num_samples']  # all
        )

        if self['postopt', 'compile_report']:
            r.compile()  # plot metrics, derive stats
        else:
            r.compile_stats()  # at least derive and store stats

    def store_data(self, gen, pop, dt=-1):
        gen_metrics, indiv_metrics = self.population_metrics(
            gen=gen, population=pop, return_indiv_measurements=True, dt=dt)

        metric, metric_sign = self.hof_metric
        p = ("MAX" if metric_sign > 0 else "MIN")
        metric_value = gen_metrics[f"{p}:{metric}"]

        logging.info(f"#{gen} {p}:{metric}: {metric_value:.2}")

        self.metrics.append(gen_metrics)

        commit_freq = self['storage', 'commit_elite_freq']

        if (commit_freq > 0 and gen % commit_freq == 0):

            self.store_gen(gen,
                           population=pop[:self.elite_size],
                           indiv_metrics=indiv_metrics)

        commit_freq = self['storage', 'commit_metrics_freq']

        if (commit_freq > 0 and gen % commit_freq == 0):
            self.store_gen_metrics(pd.DataFrame(data=self.metrics))

    def population_metrics(self,
                           population,
                           gen=None,
                           dt=-1,
                           return_indiv_measurements=False):
        """Get available measurements for all individuals in the population and
        calculate statistical key metrics.

        The statistical key metrics include:

        `Q_{0, 4}`
            The quartiles :math:`\\{1, 2, 3\\}` as well as the minimum and
            maximum :math:`(0,4)`.
        `MEAN`, `STD`
            Mean and standard deviation.
        `MIN`, `MEDIAN`, `MAX`
            Equal to `Q_0`, `Q_2`, `Q_3`

        Parameters
        ----------
        population : [wann_genetic.Individual]
            List of individuals that constitute the population.
        gen : int
            Current generation index (required for caluclating the individuals
            age).
        return_indiv_measurements : bool, optional
            Whether to return the individual measurements as well.

        Returns
        -------
        dict
            Dictionary of the produced measurements (cross product of key
            metrics and a list of individual measurements).

        """
        if gen is None:
            gen = self['population', 'num_generations']

        rows = list()
        for ind in population:
            data = ind.metadata(current_gen=gen)
            data.update(ind.measurements)
            rows.append(data)

        indiv_measurements = pd.DataFrame(data=rows)

        metrics = dict(
            num_unique_individuals=len(set(population)),
            num_individuals=len(population),
            delta_time=dt,

            # number of inds without edges
            num_no_edge_inds=np.sum(
                indiv_measurements['n_enabled_edges'] == 0),

            # number of inds without hidden nodes
            num_no_hidden_inds=np.sum(indiv_measurements['n_hidden'] == 0),

            # individual with the most occurences
            biggest_ind=max([population.count(i) for i in set(population)]),
        )

        for name, values in indiv_measurements.items():
            metrics[f'Q_0:{name}'] = metrics[f'MIN:{name}'] = values.min()
            metrics[f'Q_1:{name}'] = np.quantile(values, .25)
            metrics[f'Q_2:{name}'] = metrics[f'MEDIAN:{name}'] = values.median(
            )
            metrics[f'Q_3:{name}'] = np.quantile(values, .75)
            metrics[f'Q_4:{name}'] = metrics[f'MAX:{name}'] = values.max()

            metrics[f'MEAN:{name}'] = values.mean()
            metrics[f'STD:{name}'] = values.std()

        if return_indiv_measurements:
            return metrics, indiv_measurements
        else:
            return metrics
Пример #13
0
net_utils.load_net(trained_model, net)
# net.load_from_npz(npz_fname)
# net_utils.save_net(h5_fname, net)
net.cuda()
net.eval()
print('load model succ...')

t_det = Timer()
t_total = Timer()
im_fnames = sorted((fname
                    for fname in os.listdir(im_path)
                    if os.path.splitext(fname)[-1] == '.jpg'))
im_fnames = (os.path.join(im_path, fname) for fname in im_fnames)
pool = Pool(processes=1)

for i, (image, im_data) in enumerate(pool.imap(
        preprocess, im_fnames, chunksize=1)):
    t_total.tic()
    im_data = net_utils.np_to_variable(
        im_data, is_cuda=True, volatile=True).permute(0, 3, 1, 2)
    t_det.tic()
    bbox_pred, iou_pred, prob_pred = net(im_data)
    det_time = t_det.toc()
    # to numpy
    bbox_pred = bbox_pred.data.cpu().numpy()
    iou_pred = iou_pred.data.cpu().numpy()
    prob_pred = prob_pred.data.cpu().numpy()

    # print bbox_pred.shape, iou_pred.shape, prob_pred.shape

    bboxes, scores, cls_inds = yolo_utils.postprocess(
        bbox_pred, iou_pred, prob_pred, image.shape, cfg, thresh)