Python Stopwatch示例，utils.Stopwatch Python示例

示例#1

0

显示文件

文件： optimizers.py 项目： YashNita/MNIST_Challange_SOL

 def optimize(self, nnet):
     timer = Stopwatch(verbose=False).start()
     self.total_epochs += self.max_epochs
     for i in xrange(self.max_epochs):
         self.epoch += 1
         if self.verbose:
             print_inline('Epoch {0:>{1}}/{2} '.format(
                 self.epoch, len(str(self.total_epochs)),
                 self.total_epochs))
         if self.verbose and self.early_stopping and nnet._X_val is not None:
             print_inline(' early stopping after {0} '.format(
                 self._early_stopping))
         losses = self.train_epoch(nnet)
         self.loss_history.append(losses)
         msg = 'elapsed: {0} sec'.format(
             width_format(timer.elapsed(), default_width=5,
                          max_precision=2))
         msg += ' - loss: {0}'.format(
             width_format(np.mean(losses), default_width=5,
                          max_precision=4))
         score = nnet._metric(nnet._y, nnet.validate())
         self.score_history.append(score)
         # TODO: change acc to metric name
         msg += ' - acc.: {0}'.format(
             width_format(score, default_width=6, max_precision=4))
         if nnet._X_val is not None:
             if self._early_stopping > 0 and self.epoch > 1:
                 self._early_stopping -= 1
             val_loss = nnet._loss(nnet._y_val,
                                   nnet.validate_proba(nnet._X_val))
             self.val_loss_history.append(val_loss)
             val_score = nnet._metric(nnet._y_val,
                                      nnet.validate(nnet._X_val))
             if self.epoch > 1 and val_score < 0.2 * self.val_score_history[
                     -1]:
                 return
             self.val_score_history.append(val_score)
             if self.epoch > 1 and val_score > nnet.best_val_score_:
                 nnet.best_val_score_ = val_score
                 nnet.best_epoch_ = self.epoch  # TODO move to optimizer
                 nnet._save_best_weights()
                 self._early_stopping = self.early_stopping  # reset counter
             msg += ' - val. loss: {0}'.format(
                 width_format(val_loss, default_width=5, max_precision=4))
             # TODO: fix acc.
             msg += ' - val. acc.: {0}'.format(
                 width_format(val_score, default_width=6, max_precision=4))
             if self._early_stopping == 0:
                 if self.verbose: print msg
                 return
         if self.verbose: print msg
         if self.epoch > 1 and self.plot:
             if not os.path.exists(self.plot_dirpath):
                 os.makedirs(self.plot_dirpath)
             plot_learning_curves(self.loss_history,
                                  self.score_history,
                                  self.val_loss_history,
                                  self.val_score_history,
                                  dirpath=self.plot_dirpath)

示例#2

0

显示文件

    def _process(self, client: Client) -> None:
        duration = int(next(self._generator))
        self._print(f'processing {client}.')
        stopwatch = Stopwatch()
        delay(duration)

        self._print(
            f'{client} has been processed for {stopwatch.milliseconds_since_start} milliseconds.'
        )
        self._listener_manager.on_processing_finished(client)

示例#3

0

显示文件

文件： ga.py 项目： lirfu/evopy

    def run(self):
        self.iteration = 0
        self.best_unit = None
        self.evaluations = 0
        self.elapsed_time = 0
        stopwatch = Stopwatch()

        stop_cond = self.params['stop_condition']

        print('===> Initializing population!')
        stopwatch.start()
        self._initialize()
        best_unit = __get_best().copy()
        print('===> Done! ({})'.format(format_stopwatch(stopwatch)))

        print('===> Starting algorithm with population of {} units!'.format(
            len(self.population)))
        while not stop_cond.is_satisfied(self):
            self.iteration += 1
            self._run_step()
        print('===> Done! ({})'.format(format_stopwatch(stopwatch)))
        print(stop_cond.report(self))

示例#4

0

显示文件

    def run(self):
        iteration = 0
        best_unit = None
        evaluations = 0
        elapsed_time = 0
        stopwatch = Stopwatch()
        stopwatch.start()

        print("===> Initializing population!")
        __initialize_population()
        best_unit = __get_best().clone()
        print_done_with_stopwatch(stopwatch)

        print("===> Starting algorithm with population of {} units!".format(len(self.population)))
        while not self.stop_condition.is_satisfied(iteration, best_unit, evaluations, elapsed_time):
            iteration += 1

            if self.params['elitism']:  # Save the queen.
                new_pop.append(__get_best().clone())

            for i in range(self.params['elitism'] - (self.params['elitism'] ? 1 : 0)):
                parents = __select_parents(self.population)

示例#5

0

显示文件

文件： model.py 项目： masich/queueing-theory

    def _start(self):
        stopwatch = Stopwatch()
        while stopwatch.milliseconds_since_start < self._duration:
            interval = int(next(self._client_distribution))
            delay(interval)

            client = next(self._client_generator)
            self._listener.on_arrived(client)
            self._manager.schedule(client)

        self._stop()
        self._listener.on_all_processed()
        delay(10)
        self._print(
            f'stopped after {stopwatch.milliseconds_since_start} milliseconds of simulation.'
        )

示例#6

0

显示文件

文件： clustering.py 项目： josephcappadona/automated-CV-analytics

def get_clustering(data_to_cluster, cluster_model_type, cluster_model_params):
    sw = Stopwatch()
    sw.start()

    if cluster_model_type == 'KMEANS':
        cluster_model = MiniBatchKMeans(**cluster_model_params)
    elif cluster_model_type == 'DBSCAN':
        cluster_model = DBSCAN_w_prediction(**cluster_model_params)

    cluster_model.fit(data_to_cluster)
    cluster_model.X = data_to_cluster

    sw.stop()
    logging.debug('Descriptors clustered into %d clusters.' %
                  cluster_model.n_clusters)
    return cluster_model

示例#7

0

显示文件

文件： metrics.py 项目： masich/queueing-theory

 def record_busy(self):
     if self._current_state is SystemState.IDLE:
         elapsed = self._stopwatch.milliseconds_since_start
         self._state_changes.append((self._current_state, elapsed))
         self._current_state = SystemState.BUSY
         self._stopwatch = Stopwatch()

示例#8

0

显示文件

文件： metrics.py 项目： masich/queueing-theory

 def __init__(self) -> None:
     self._state_changes = []
     self._stopwatch = Stopwatch()
     self._current_state = SystemState.IDLE

示例#9

0

显示文件

文件： primer_uporabe.py 项目： rusepatacis/LVR_team5

print "Petersen 2", dpll(barvanje(petersen, 2))
print "Petersen 3", dpll(barvanje(petersen, 3))     # Lepo za preverit na roke :)

#print "Sudoku dpll",dpll(X2SATsudoku(sud)) # Opomba: tega ne premelje skozi v normalnem casu.

print "::: END DPLL :::\n\n"
"""
######################################################
#################UTILS################################
######################################################
"""
#Razred stoparica, ki nam na enostaven nacin omogoca merjenje ter primerjanje casa med razlicnimi metodami.

from utils import Stopwatch
print "::: BEGIN UTILS :::"
stoparica = Stopwatch("Primer uporabe")     # Kot parameter lahko podamo ime (tag).
                                        # Stoparica se avtomatsko zazene, ko jo ustvarimo.
stoparica.stop()                        # Ustavimo jo s stop.
stoparica.restart()        # Ko jo restartamo, pocisti vse prejsnje vrednosti.
hadamardova_matrika(8)
stoparica.intermediate("Vmesni cas 10")     # Lahko dodamo vec vmesnih casov.
hadamardova_matrika(10)
stoparica.intermediate("Vmesni cas 12")
stoparica.stop("Skupaj")
print stoparica                         # Rezultat izpisemo tako, da stoparico enostavno izpisemo z print.
                                        # Pri izpisu se vsak vmesni cas meri od prejsnjega vmesnega casa,
                                        # TOTAL pa je razlika od (konec-start).

#Primer uporabe
st = Stopwatch("Optimizacija")
hadamardova_matrikaOLD(8)

示例#10

0

显示文件

net.log("Trainable parameters: {}".format(trainable_params))
net.log("Total parameters: {}".format(total_params))
net.log("Memory requirement: {:.2f} MiB".format(
    ((total_params * 4) / 1024) / 1024))

# Optimizer
optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM)
scheduler = optim.lr_scheduler.StepLR(optimizer,
                                      step_size=LR_DECAY_EPOCHS,
                                      gamma=GAMMA)

# Loss function
criterion = Customized_Loss(beta=LOSS_BETA)

# Time measuring
stopwatch_train = Stopwatch()
stopwatch_epoch = Stopwatch()

# Training phase
total_epoch_time = 0
stopwatch_train.start()
for epoch in range(EPOCHS):
    net.log("[Epoch {}]".format(epoch + 1))

    net.train()
    scheduler.step()
    training_losses = []
    num_iters = 0
    stopwatch_epoch.start()
    import itertools
    #for images, ps in itertools.islice(train_loader, 10):

示例#11

0

显示文件

def train():
    """Trains model."""

    # define path to log dir
    logdir = CONFIG.LOGDIR

    setup_train_dir(logdir)

    # Common code for multigpu and single gpu
    strategy = tf.distribute.MirroredStrategy()
    with strategy.scope():
        # get training algorithm
        algo = train_algo.Algorithm()

        # Setup summary writer.
        summary_writer = tf.summary.create_file_writer(os.path.join(
            logdir, 'train_logs'),
                                                       flush_millis=10000)

        # setup learning_rate schedule, optimizer ...
        learning_rate, optimizer, global_step = get_lr_opt_global_step()
        ckpt_manager, status, _ = restore_ckpt(logdir=logdir,
                                               optimizer=optimizer,
                                               **algo.model)

        global_step_value = global_step.numpy()

        lr_fn = get_lr_fn(CONFIG.OPTIMIZER)

        # Setup Dataset Iterators.
        batch_size_per_replica = CONFIG.TRAIN.BATCH_SIZE
        total_batch_size = batch_size_per_replica * strategy.num_replicas_in_sync
        # Setup train iterator
        train_ds = create_dataset(split='train',
                                  mode=CONFIG.MODE,
                                  batch_size=total_batch_size)
        train_iterator = strategy.make_dataset_iterator(train_ds)

        # define one training step
        def train_step(data):
            loss = algo.train_one_iter(data, global_step, optimizer)
            return loss

        # gathering loss across different GPUs
        def dist_train(it):
            total_loss = strategy.reduce(tf.distribute.ReduceOp.SUM,
                                         strategy.experimental_run(
                                             train_step, it),
                                         axis=None)
            return total_loss

        dist_train = tf.function(dist_train)

        stopwatch = Stopwatch()

        try:
            while global_step_value < CONFIG.TRAIN.MAX_ITERS:
                with summary_writer.as_default():
                    with tf.summary.record_if(
                            global_step_value %
                            CONFIG.LOGGING.REPORT_INTERVAL == 0):

                        # training loss
                        loss = dist_train(train_iterator)
                        # Update learning rate based in lr_fn.
                        learning_rate.assign(lr_fn(learning_rate, global_step))

                        tf.summary.scalar('loss', loss, step=global_step)
                        tf.summary.scalar('learning_rate',
                                          learning_rate,
                                          step=global_step)

                        # Save checkpoint.
                        if global_step_value % CONFIG.CHECKPOINT.SAVE_INTERVAL == 0:
                            ckpt_manager.save()
                            logging.info('Checkpoint saved at iter %d.',
                                         global_step_value)

                        # Update global step.
                        global_step_value = global_step.numpy()

                        time_per_iter = stopwatch.elapsed()

                        tf.summary.scalar('timing/time_per_iter',
                                          time_per_iter,
                                          step=global_step)

                        logging.info(
                            'Iter[{}/{}], {:.1f}s/iter, Loss: {:.3f}'.format(
                                global_step_value, CONFIG.TRAIN.MAX_ITERS,
                                time_per_iter, loss.numpy()))

                        # Reset stopwatch after iter is complete.
                        stopwatch.reset()

        except KeyboardInterrupt:
            logging.info(
                'Caught keyboard interrupt. Saving model before quitting.')

        finally:
            # Save the final checkpoint.
            ckpt_manager.save()
            logging.info('Checkpoint saved at iter %d', global_step_value)

示例#12

0

显示文件

import sys
sys.path.append("..")
from utils import Stopwatch
import matplotlib.pyplot as plt

stopwatch1 = Stopwatch.Stopwatch()
measurements = list()

outsaiders = 0
retries = 1000000
for i in range(retries):
    stopwatch1.reset()
    stopwatch1.start()
    stopwatch1.stop()
    res = stopwatch1.hadleResult()
    measurements.append(res)
    print("% 2.20f" % res)
    if res > 1.0e-03:
        outsaiders = outsaiders + 1

suma = 0
for measure in measurements:
    suma = suma + pow(measure, 2)
suma = suma / len(measurements)

mse = pow(suma, 0.5)
print(
    f"min:{min(measurements)} max:{max(measurements)} avr:{sum(measurements)/len(measurements)} mse:{mse}"
)
print("{prc}%".format(prc=outsaiders / retries * 100))

示例#13

0

显示文件

 def on_queued(self, client: Client) -> None:
     self._print(f'{client} has been queued.')
     self._queue_times[client.id] = Stopwatch()

示例#14

0

显示文件

 def on_processing_started(self, client: Client) -> None:
     self._processing_times[client.id] = Stopwatch()
     self._print(f'processing of the {client} has been started.')

示例#15

0

显示文件

    def _fit(self, X):
        if not self._initialized:
            layer = FullyConnected(self.n_hidden,
                                   bias=0.,
                                   random_seed=self.random_seed)
            layer.setup_weights(X.shape)
            self.W = layer.W
            self.vb = np.zeros(X.shape[1])
            self.hb = layer.b
            self._dW = np.zeros_like(self.W)
            self._dvb = np.zeros_like(self.vb)
            self._dhb = np.zeros_like(self.hb)
            self._rng = RNG(self.random_seed)
        self._rng.reseed()
        timer = Stopwatch(verbose=False).start()
        for _ in xrange(self.n_epochs):
            self.epoch += 1
            if self.verbose:
                print_inline('Epoch {0:>{1}}/{2} '.format(
                    self.epoch, len(str(self.n_epochs)), self.n_epochs))

            if isinstance(self.learning_rate, str):
                S, F = map(float, self.learning_rate.split('->'))
                self._learning_rate = S + (F - S) * (
                    1. - np.exp(-(self.epoch - 1.) / 8.)) / (
                        1. - np.exp(-(self.n_epochs - 1.) / 8.))
            else:
                self._learning_rate = self.learning_rate

            if isinstance(self.momentum, str):
                S, F = map(float, self.momentum.split('->'))
                self._momentum = S + (F - S) * (
                    1. - np.exp(-(self.epoch - 1) / 4.)) / (
                        1. - np.exp(-(self.n_epochs - 1) / 4.))
            else:
                self._momentum = self.momentum

            mean_recon = self.train_epoch(X)
            if mean_recon < self.best_recon:
                self.best_recon = mean_recon
                self.best_epoch = self.epoch
                self.best_W = self.W.copy()
                self.best_vb = self.vb.copy()
                self.best_hb = self.hb.copy()
                self._early_stopping = self.early_stopping
            msg = 'elapsed: {0} sec'.format(
                width_format(timer.elapsed(), default_width=5,
                             max_precision=2))
            msg += ' - recon. mse: {0}'.format(
                width_format(mean_recon, default_width=6, max_precision=4))
            msg += ' - best r-mse: {0}'.format(
                width_format(self.best_recon, default_width=6,
                             max_precision=4))
            if self.early_stopping:
                msg += ' {0}*'.format(self._early_stopping)
            if self.verbose:
                print msg
            if self._early_stopping == 0:
                return
            if self.early_stopping:
                self._early_stopping -= 1

示例#16

0

显示文件

文件： model_selection.py 项目： YashNita/MNIST_Challange_SOL

    def fit(self, X, y):
        timer = Stopwatch(verbose=False).start()
        X, y = self._check_X_y(X, y)
        unique_params = self.unique_params()
        tts = TrainTestSplitter(**self.train_test_splitter_params)
        number_of_combinations = self.number_of_combinations()
        total_iter = self.n_splits * number_of_combinations
        current_iter_width = len(str(total_iter))

        if self.verbose:
            print "Training {0} on {1} samples x {2} features.".format(
                self.model.model_name(), *X.shape)
            print "{0}-fold CV for each of {1} params combinations == {2} fits ...\n"\
                .format(self.n_splits, number_of_combinations, total_iter)

        # initialize `cv_results_`
        self.cv_results_['mean_score'] = []
        self.cv_results_['std_score'] = []
        self.cv_results_['params'] = []
        for k in xrange(self.n_splits):
            self.cv_results_['split{0}_score'.format(k)] = []
            self.cv_results_['split{0}_train_time'.format(k)] = []
            self.cv_results_['split{0}_test_time'.format(k)] = []
        for param_name in unique_params:
            self.cv_results_['param_{0}'.format(param_name)] = ma.array([])

        current_iter = 0
        if self.refit:
            # for each param combination fit consequently on each fold
            # to obtain mean score across splits as soon as possible
            for params_index, params in enumerate(self.gen_params()):

                # set params and add to `cv_results_`
                self.model.reset_params().set_params(**params)
                self.cv_results_['params'].append(params)

                for param_name in unique_params:
                    cv_key = 'param_{0}'.format(param_name)
                    mask = [int(not param_name in params)]
                    to_concat = ma.array([params.get(param_name, None)],
                                         mask=mask)
                    self.cv_results_[cv_key] = ma.concatenate(
                        (self.cv_results_[cv_key], to_concat))
                splits_scores = []
                for split_index, (train, test) in enumerate(
                        tts.k_fold_split(y,
                                         n_splits=self.n_splits,
                                         stratify=True)):
                    # verbosing
                    if self.verbose:
                        current_iter += 1
                        t = "iter: {0:{1}}/{2} ".format(
                            current_iter, current_iter_width, total_iter)
                        t += '+' * (split_index + 1) + '-' * (self.n_splits -
                                                              split_index - 1)
                        print_inline(t)
                    # fit and evaluate
                    with Stopwatch(verbose=False) as s:
                        self.model.fit(X[train], y[train])
                    self.cv_results_['split{0}_train_time'.format(
                        split_index)].append(s.elapsed())
                    with Stopwatch(verbose=False) as s:
                        score = self.model.evaluate(X[test], y[test])
                    self.cv_results_['split{0}_test_time'.format(
                        split_index)].append(s.elapsed())
                    # score = self.scoring(y[test], y_pred)
                    splits_scores.append(score)
                    # add score to `cv_results_`
                    self.cv_results_['split{0}_score'.format(
                        split_index)].append(score)
                    # verbosing
                    if self.verbose:
                        print_inline(" elapsed: {0} sec".format(
                            width_format(timer.elapsed(), default_width=7)))
                        if split_index < self.n_splits - 1:
                            t = ""
                            if self.best_score_ > -np.inf:
                                t += " - best acc.: {0:.4f} at {1}" \
                                    .format(self.best_score_, self.best_params_)
                            else:
                                t += "   ..."
                            print t

                # compute mean and std score
                mean_score = np.mean(splits_scores)
                std_score = np.std(splits_scores)

                self.cv_results_['mean_score'].append(mean_score)
                self.cv_results_['std_score'].append(std_score)
                # update 'best' attributes
                if mean_score > self.best_score_:
                    self.best_index_ = params_index
                    self.best_score_ = mean_score
                    self.best_std_ = std_score
                    self.best_params_ = params
                    self.best_model_ = self.model
                    if self.save_models:
                        self.best_model_.save(filepath=os.path.join(
                            self.dirpath, self._best_model_name()),
                                              **self.save_params)
                # verbosing
                if self.verbose:
                    print_inline(
                        " - mean acc.: {0:.4f} +/- 2 * {1:.3f}\n".format(
                            mean_score, std_score))

        else:  # if self.refit == False
            # fit for each fold and then evaluate on each combination
            # of params
            for split_index, (train, test) in enumerate(
                    tts.k_fold_split(y, n_splits=self.n_splits,
                                     stratify=True)):
                current_best_score = -np.inf
                current_best_params = None
                for params_index, params in enumerate(self.gen_params()):
                    # set params
                    self.model.reset_params().set_params(**params)
                    # fit model (only once per split)
                    if params_index == 0:
                        with Stopwatch(verbose=False) as s:
                            self.model.fit(X[train], y[train])
                    # on first split add params to `cv_results_`
                    if split_index == 0:
                        # store params' values
                        self.cv_results_['params'].append(params)
                        for param_name in unique_params:
                            cv_key = 'param_{0}'.format(param_name)
                            mask = [int(not param_name in params)]
                            to_concat = ma.array(
                                [params.get(param_name, None)], mask=mask)
                            self.cv_results_[cv_key] = ma.concatenate(
                                (self.cv_results_[cv_key], to_concat))
                    # write training time
                    self.cv_results_['split{0}_train_time'.format(split_index)]\
                        .append(s.elapsed() if params_index == 0 else 0.)
                    # evaluate
                    with Stopwatch(verbose=False) as s:
                        score = self.model.evaluate(X[test], y[test])
                    self.cv_results_['split{0}_test_time'.format(
                        split_index)].append(s.elapsed())
                    # score = self.scoring(y[test], y_pred)
                    # add score to `cv_results_`
                    cv_key = 'split{0}_score'.format(split_index)
                    self.cv_results_[cv_key].append(score)
                    # update "current" best score and params
                    current_mean_score = np.mean([
                        self.cv_results_['split{0}_score'.format(k)]
                        [params_index] for k in xrange(split_index + 1)
                    ])
                    if current_mean_score > current_best_score:
                        current_best_score = current_mean_score
                        current_best_params = params
                    # verbosing
                    if self.verbose:
                        current_iter += 1
                        t = "iter: {0:{1}}/{2} ".format(
                            current_iter, current_iter_width, total_iter)
                        t += '+' * (split_index + 1) + '-' * (self.n_splits -
                                                              split_index - 1)
                        t += " elapsed: {0} sec".format(
                            width_format(timer.elapsed(), default_width=7))
                        if split_index < self.n_splits - 1:
                            t += " - best acc.: {0:.4f}  [{1}/{2} splits] at {3}"\
                                 .format(current_best_score, split_index + 1, self.n_splits, current_best_params)
                        print_inline(t)
                        if split_index < self.n_splits - 1: print
                    # after last split ...
                    if split_index == self.n_splits - 1:
                        # ... compute means, stds
                        splits_scores = [
                            self.cv_results_['split{0}_score'.format(k)]
                            [params_index] for k in xrange(self.n_splits)
                        ]
                        mean_score = np.mean(splits_scores)
                        std_score = np.std(splits_scores)
                        self.cv_results_['mean_score'].append(mean_score)
                        self.cv_results_['std_score'].append(std_score)
                        # ... and update best attributes
                        if mean_score > self.best_score_:
                            self.best_index_ = params_index
                            self.best_score_ = mean_score
                            self.best_std_ = std_score
                            self.best_params_ = params
                            self.best_model_ = self.model
                            if self.save_models:
                                self.best_model_.save(filepath=os.path.join(
                                    self.dirpath, self._best_model_name()),
                                                      **self.save_params)
                        # verbosing
                        if self.verbose:
                            print_inline(
                                " - best acc.: {0:.4f} +/- 2 * {1:.3f} at {2}\n"
                                .format(self.best_score_, self.best_std_,
                                        self.best_params_))

        # convert lists to np.ndarray
        for key in (['mean_score', 'std_score', 'params'] + [
                'split{0}_{1}'.format(k, s) for k in xrange(self.n_splits)
                for s in ('score', 'train_time', 'test_time')
        ]):
            self.cv_results_[key] = np.asarray(self.cv_results_[key])
        return self

示例#17

0

显示文件

def check_latency(net,
                  c_in=3,
                  s_size_h=256,
                  s_size_w=256,
                  repeat=500,
                  bn_fold=True,
                  replace_denormals=True):
    net.cpu()
    # net.mode = 'inference'

    if bn_fold:
        print("Batch Normalization Folding...")

        try:
            fuse_bn_recursively(net)
        except Exception as e:
            print(
                "NOTE!!! Batch Normalization Failed. Error message is below\n",
                e)

    if replace_denormals:
        print("Replacing denormals...")
        ReplaceDenormals(net)

    torch.set_grad_enabled(False)
    torch.set_default_tensor_type(torch.FloatTensor)
    torch.set_num_threads(1)
    print('python version: %s' % sys.version)
    print('torch.__version__:%s' % torch.__version__)
    print('torch.backends.mkl.is_available(): %s' %
          torch.backends.mkl.is_available())
    print('torch.backends.openmp.is_available(): %s' %
          torch.backends.openmp.is_available())
    print(os.popen('conda list mkl').read())
    print('num_threads: %d' % torch.get_num_threads())

    batch_size = 1

    warm_start = 10
    repeat_count = 0

    timer = Stopwatch('latency', silance=True)

    elapsed = 0.
    for it in range(repeat + warm_start + 1):
        x = torch.rand(batch_size,
                       c_in,
                       s_size_h,
                       s_size_w,
                       requires_grad=False)

        with timer:
            out = net(x)

        if it > warm_start:
            elapsed += timer.latency
            repeat_count += 1

            if it % 10 == 0:
                print('trial: %d, latency %f' % (repeat_count, timer.latency))

    print('elapsed: %f' % (elapsed / repeat_count))