def __init__(self, model_size, group_id, environment_id=0, training=True):
		self.model_size = model_size
		self._training = training
		self.environment_id = environment_id
		self.group_id = group_id
		# Build environment
		self.environment = Environment.create_environment(flags.env_type, self.environment_id, self._training)
		self.extrinsic_reward_manipulator = eval(flags.extrinsic_reward_manipulator)
		self.terminal = True
		self._composite_batch = CompositeBatch(maxlen=flags.replay_buffer_size if flags.replay_mean > 0 else 1)
		# Statistics
		self.__client_statistics = Statistics(flags.episode_count_for_evaluation)
		if self._training:
			#logs
			if not os.path.isdir(flags.log_dir + "/performance"):
				os.mkdir(flags.log_dir + "/performance")
			if not os.path.isdir(flags.log_dir + "/episodes"):
				os.mkdir(flags.log_dir + "/episodes")
			formatter = logging.Formatter('%(asctime)s %(message)s')
			# reward logger
			self.__reward_logger = logging.getLogger('reward_{}_{}'.format(self.group_id, self.environment_id))
			hdlr = logging.FileHandler(flags.log_dir + '/performance/reward_{}_{}.log'.format(self.group_id, self.environment_id))
			hdlr.setFormatter(formatter)
			self.__reward_logger.addHandler(hdlr) 
			self.__reward_logger.setLevel(logging.DEBUG)
			self.__max_reward = float("-inf")
Пример #2
0
    def report_training(self,
                        step,
                        num_steps,
                        learning_rate,
                        report_stats,
                        multigpu=False):
        """
        This is the user-defined batch-level traing progress
        report function.

        Args:
            step(int): current step count.
            num_steps(int): total number of batches.
            learning_rate(float): current learning rate.
            report_stats(Statistics): old Statistics instance.
        Returns:
            report_stats(Statistics): updated Statistics instance.
        """
        if self.start_time < 0:
            raise ValueError("""ReportMgr needs to be started
                                (set 'start_time' or use 'start()'""")

        if step % self.report_every == 0:
            if multigpu:
                report_stats = \
                    Statistics.all_gather_stats(report_stats)
            self._report_training(step, num_steps, learning_rate, report_stats)
            self.progress_step += 1
            return Statistics()
        else:
            return report_stats
Пример #3
0
    def __init__(self, args: Namespace, logger: HtmlLogger):
        # init model
        model = self.buildModel(args)
        model = model.cuda()
        # create DataParallel model instance
        self.modelParallel = model
        # self.modelParallel = DataParallel(model, args.gpu)
        # assert (id(model) == id(self.modelParallel.module))

        self.args = args
        self.model = model
        self.logger = logger

        # load data
        self.train_queue, self.valid_queue, self.createSearchQueue = load_data(
            args)
        # init train folder path, where to save loggers, checkpoints, etc.
        self.trainFolderPath = '{}/{}'.format(args.save, args.trainFolder)

        # build statistics containers
        containers = self.buildStatsContainers()
        # build statistics rules
        rules = self.buildStatsRules()
        # init statistics instance
        self.statistics = Statistics(containers, rules, args.save)

        # log parameters
        logParameters(logger, args, model)
 def __init__(self, cf):
     self.cf = cf
     self.net = None
     self.loss = None
     self.optimizer = None
     self.scheduler = None
     self.best_stats = Statistics()
    def validate(self, valid_iter):
        """ Validate model.
        valid_iter: validate data iterator
    Returns:
        :obj:`nmt.Statistics`: validation loss statistics
    """
        # Set model in validating mode.
        self.model.eval()

        stats = Statistics()

        for batch in valid_iter:
            src = make_features(batch, 'src')
            _, src_lengths = batch.src

            tgt = make_features(batch, 'tgt')

            # F-prop through the model.
            outputs, attns = self.model(src, tgt, src_lengths)

            # Compute loss.
            batch_stats = self.valid_loss.monolithic_compute_loss(
                batch, outputs, attns)

            # Update statistics.
            stats.update(batch_stats)

        # Set model back to training mode.
        self.model.train()

        return stats
Пример #6
0
  def validate(self, valid_iter):
    """ Validate model.
        valid_iter: validate data iterator
    Returns:
        :obj:`nmt.Statistics`: validation loss statistics
    """
    # Set model in validating mode.
    self.model.eval()

    stats = Statistics()

    with torch.no_grad():
      for batch in valid_iter:
        src = make_features(batch, 'src')
        src = src.transpose(0, 1).contiguous()
        # _, src_lengths = batch.src
        src_lengths = (torch.ones(batch.batch_size) * src.size(1)).long()
        tgt = make_features(batch, 'tgt')

        # F-prop through the model.
        outputs, attns = self.model(src, tgt, src_lengths)

        # Compute loss.
        batch_stats = self.valid_loss.monolithic_compute_loss(
          batch, outputs, attns)

        # Update statistics.
        stats.update(batch_stats)

    # Set model back to training mode.
    self.model.train()

    return stats
 def __init__(self, cf, model):
     self.cf = cf
     self.model = model
     self.logger_stats = Logger(cf.log_file_stats)
     self.stats = Statistics()
     self.msg = Messages()
     self.validator = self.validation(self.logger_stats, self.model, cf, self.stats, self.msg)
     self.trainer = self.train(self.logger_stats, self.model, cf, self.validator, self.stats, self.msg)
     self.predictor = self.predict(self.logger_stats, self.model, cf)
Пример #8
0
def validate_one_epoch(val_loader, model, trainer, args):
    # switch to evaluate mode
    model.eval()

    val_stats = [Statistics() for i in range(5)]

    # read-in global entity list
    if args.dataset == 'kvr':
        with open('data/KVR/kvret_entities.json') as f:
            global_entity = json.load(f)
            global_entity_list = []
            for key in global_entity.keys():
                if key != 'poi':
                    global_entity_list += [item.lower().replace(' ', '_') for item in global_entity[key]]
                else:
                    for item in global_entity['poi']:
                        global_entity_list += [item[k].lower().replace(' ', '_') for k in item.keys()]
            global_entity_list = list(set(global_entity_list))
    else:
        raise NotImplementedError('Not implemented this val for datasets other than kvr yet.')

    with torch.no_grad():
        end = time.time()
        # for i, data in tqdm(enumerate(val_loader)):
        cnt = 0
        for data in tqdm(val_loader):
            # data = to_device()
            # end = time.time()
            cnt += 1
            decoded_words = trainer.evaluate_batch(model, data)
            # logger.info("Decode Time cost: {}".format(str(time.time() - end)))
            # end = time.time()
            # update val states for each batch.
            val_stats = compute_val_stat(data, decoded_words, global_entity_list, val_stats, args)
            # logger.info("Val Compute Time cost: {}".format(str(time.time() - end)))

    if args.distributed:
        all_val_stats = Statistics.all_gather_stats_list(val_stats)
    else:
        all_val_stats = val_stats
    f1 = all_val_stats[0].accuracy()
    cal_f1 = all_val_stats[1].accuracy()
    wet_f1 = all_val_stats[2].accuracy()
    nav_f1 = all_val_stats[3].accuracy()

    logger.info("F1 SCORE:\t{}".format(str(f1)))
    logger.info("\tCAL F1:\t{}".format(str(cal_f1)))
    logger.info("\tWET F1:\t{}".format(str(wet_f1)))
    logger.info("\tNAV F1:\t{}".format(str(nav_f1)))

    bleu_score = all_val_stats[4].accuracy() / 100.0
    # not validated yet.
    # bleu_score = 0.0
    logger.info("\tBleu Score:\t{}".format(str(bleu_score)))

    return bleu_score, [f1, cal_f1, wet_f1, nav_f1]
Пример #9
0
 def before_epoch(self, _epoch):
     cfg = self.cfg
     self._current_epoch = _epoch
     if cfg.n_gpu > 1:
         torch.distributed.barrier()
     self._model.train()
     self._epoch_tr_loss = 0.0
     self._epoch_n_tr_steps = 0.0
     if cfg.is_master_node:
         self._epoch_stats = Statistics(epoch_num=int(cfg.epoch_num),
                                        total_training_steps=self._optimizer.total_training_steps)
    def test(self):
        result_file = '{}/test_results_{}.log'.format(flags.log_dir,
                                                      self.global_step)
        if os.path.exists(result_file):
            print('Test results already produced and evaluated for {}'.format(
                result_file))
            return
        result_lock = RLock()

        print('Start testing')
        testers = []
        threads = []
        tf_session = tf.get_default_session()
        tmp_environment = Environment.create_environment(
            env_type=flags.env_type, training=False)
        dataset_size = tmp_environment.get_dataset_size()
        data_per_thread = max(1, dataset_size // self.thread_count)
        for i in range(self.thread_count):  # parallel testing
            tester = Group(group_id=-(i + 1),
                           environment_count=data_per_thread,
                           global_network=self.global_network,
                           training=False)
            data_range_start = i * data_per_thread
            data_range_end = data_range_start + data_per_thread
            # print(data_range_start, data_per_thread, dataset_size)
            thread = Thread(target=self.test_function,
                            args=(result_file, result_lock, tester,
                                  (data_range_start,
                                   data_range_end), tf_session))
            thread.start()
            threads.append(thread)
            testers.append(tester)
        print('Test Set size:', dataset_size)
        print('Tests per thread:', data_per_thread)
        time.sleep(5)
        for thread in threads:  # wait for all threads to end
            thread.join()
        print('End testing')
        # get overall statistics
        test_statistics = Statistics(self.thread_count)
        for group in testers:
            test_statistics.add(group.get_statistics())
        info = test_statistics.get()
        # write results to file
        stats_file = '{}/test_statistics.log'.format(flags.log_dir)
        with open(stats_file, "a",
                  encoding="utf-8") as file:  # write stats to file
            file.write('{}\n'.format([
                "{}={}".format(key, value)
                for key, value in sorted(info.items(), key=lambda t: t[0])
            ]))
        print('Test statistics saved in {}'.format(stats_file))
        print('Test results saved in {}'.format(result_file))
        return tmp_environment.evaluate_test_results(result_file)
Пример #11
0
    def _report_training(self, step, num_steps, learning_rate, report_stats):
        """
        See base class method `ReportMgrBase.report_training`.
        """
        report_stats.output(step, num_steps, learning_rate, self.start_time)

        # Log the progress using the number of batches on the x-axis.
        self.maybe_log_tensorboard(report_stats, "progress", learning_rate,
                                   self.progress_step)
        report_stats = Statistics()

        return report_stats
Пример #12
0
def main():
    ITER_MAX = 30
    list_statistics = []
    e = Export(list_statistics)
    for i in range(1, 17):
        name_file = ""
        if i < 11:
            name_file = "./data/files/f{}.txt".format(i)
        else:
            name_file = "./data/files/Knapsack{}.txt".format(i - 10)
        statistics = Statistics(name_file, i, ITER_MAX)
        k = Knapsack(name_file)
        hcc = HillclimbingClassic()
        hcm = RandomSearch()
        vns = VNS(0)
        algorithms = []
        algorithms.append(hcm)
        algorithms.append(hcc)
        algorithms.append(vns)
        hcm.max_efos = 1000
        hcc.max_efos = 1000
        vns.max_efos = 1000
        information = [0] * 2
        sublist_statistics = [0] * len(algorithms)
        print(name_file)
        j = 0
        for algorithm in algorithms:
            vector = []
            successfull_count = 0
            start_time = time()
            for l in range(ITER_MAX):
                random.seed(l)
                k_max = random.randint(
                    2, int(math.log10(k.total_items) +
                           2)) if k.total_items < 6 else random.randint(
                               3, int(math.log10(k.total_items) + 3))
                vns.k_max = k_max
                algorithm.execute(k, None)
                vector.append(algorithm.best_solution.fitness)
                successfull_count += 1 if algorithm.successfull else 0
            end_time = time()
            information[0] = algorithm.__str__()
            information[1] = vector
            statistics.set_vector(information)
            statistics.successfull_count = successfull_count
            sublist_statistics[j] = copy.deepcopy(statistics)
            print("{}min\t\t{}".format(round((end_time - start_time) / 60, 3),
                                       algorithm))
            j += 1
        list_statistics.append(copy.deepcopy(sublist_statistics))
    e.writeCSV()
    e.writeHTML()
Пример #13
0
def validate(test_loader, model, criterion, device):
    statistics = Statistics()

    # switch to evaluate mode
    model.eval()
    with torch.no_grad():
        for batch_idx, (input_val, target_val) in enumerate(test_loader):
            loss, (prec1, prec5), y_pred, y_true = execute_batch(
                model, criterion, input_val, target_val, device)

            statistics.update(loss.data.cpu().numpy(), prec1, prec5, y_pred,
                              y_true)

    return statistics
    def train(self, train_steps, train_steps2, valid_steps):

        logger.info('Start training...')

        task_step = self.optim._task_step + 1
        task2_step = self.optim._task2_step + 1

        self.train_iter = self.get_task_batch(task_step, task_type='task')
        self.train_iter2 = self.get_task_batch(task2_step, task_type='task2')

        self.total_stats = Statistics(task_type='task')
        self.report_stats = Statistics(task_type='task')
        self._start_report_manager(self.report_manager,
                                   start_time=self.total_stats.start_time)

        self.total_stats2 = Statistics(task_type='task2')
        self.report_stats2 = Statistics(task_type='task2')
        self._start_report_manager(self.report_manager2,
                                   start_time=self.total_stats2.start_time)

        while task_step <= train_steps or task2_step <= train_steps2:
            self.save = False
            # self.save = True
            if task_step <= train_steps:
                task_step = self.train_task(task_step,
                                            train_steps,
                                            valid_steps,
                                            task_type='task')
            # self.save = False
            # self.save = True
            if task2_step <= train_steps2:
                task2_step = self.train_task(task2_step,
                                             train_steps2,
                                             valid_steps,
                                             task_type='task2')

        return self.total_stats, self.total_stats2
Пример #15
0
    def _stats(self, loss, scores, target):
        """
        Args:
            loss (:obj:`FloatTensor`): the loss computed by the loss criterion.
            scores (:obj:`FloatTensor`): a score for each possible output
            target (:obj:`FloatTensor`): true targets

        Returns:
            :obj:`onmt.utils.Statistics` : statistics for this batch.
        """
        pred = scores.max(1)[1]
        non_padding = target.ne(self.padding_idx)
        num_correct = pred.eq(target).masked_select(non_padding).sum().item()
        num_non_padding = non_padding.sum().item()
        return Statistics(loss.item(), num_non_padding, num_correct)
Пример #16
0
    def sharded_compute_loss(self, batch, output, attns, shard_size,
                             normalization, ratio):
        """Compute the forward loss and backpropagate.  Computation is done
        with shards and optionally truncation for memory efficiency.

        Also supports truncated BPTT for long sequences by taking a
        range in the decoder output sequence to back propagate in.
        Range is from `(cur_trunc, cur_trunc + trunc_size)`.

        Note sharding is an exact efficiency trick to relieve memory
        required for the generation buffers. Truncation is an
        approximate efficiency trick to relieve the memory required
        in the RNN buffers.

        Args:
          batch (batch) : batch of labeled examples
          output (:obj:`FloatTensor`) :
              output of decoder model `[tgt_len x batch x hidden]`
          attns (dict) : dictionary of attention distributions
              `[tgt_len x batch x src_len]`
          cur_trunc (int) : starting position of truncation window
          trunc_size (int) : length of truncation window
          shard_size (int) : maximum number of examples in a shard
          normalization (int) : Loss is divided by this number

        Returns:
            :obj:`onmt.utils.Statistics`: validation loss statistics

        """
        batch_stats = Statistics()
        shard_state = self._make_shard_state(batch, output, attns)
        for shard in shards({k: v[0]
                             for k, v in shard_state.items()},
                            shard_size,
                            retain_graph=True,
                            ratio=ratio):
            loss, stats = self._compute_loss(batch, **shard)
            loss.div(float(normalization[0])).backward(retain_graph=True)
            batch_stats.update(stats)
        for shard in shards({k: v[1]
                             for k, v in shard_state.items()},
                            shard_size,
                            ratio=1 - ratio):
            loss, stats = self._compute_loss(batch, **shard)
            loss *= ratio
            loss.div(float(normalization[1])).backward()
            batch_stats.update(stats)
        return batch_stats
Пример #17
0
    def sharded_compute_loss(self, batch, output, attns,
                             cur_trunc, trunc_size, shard_size,
                             normalization):
        """Compute the forward loss and backpropagate.  Computation is done
        with shards and optionally truncation for memory efficiency.

        Also supports truncated BPTT for long sequences by taking a
        range in the decoder output sequence to back propagate in.
        Range is from `(cur_trunc, cur_trunc + trunc_size)`.

        Note sharding is an exact efficiency trick to relieve memory
        required for the generation buffers. Truncation is an
        approximate efficiency trick to relieve the memory required
        in the RNN buffers.

        Args:
          batch (batch) : batch of labeled examples
          output (:obj:`FloatTensor`) :
              output of decoder model `[tgt_len x batch x hidden]`
          attns (dict) : dictionary of attention distributions
              `[tgt_len x batch x src_len]`
          cur_trunc (int) : starting position of truncation window
          trunc_size (int) : length of truncation window
          shard_size (int) : maximum number of examples in a shard
          normalization (int) : Loss is divided by this number

        Returns:
            :obj:`onmt.utils.Statistics`: validation loss statistics

        """
        batch_stats = Statistics()
        # 0-len_tgt_size 
        range_ = (cur_trunc, cur_trunc + trunc_size)
        shard_state = self._make_shard_state(batch, output, range_, attns)
        '''
            return {
                "output": output,
                "target": batch.tgt[1:len_tgt_size]
            }
        '''
        # shard_size被设置为2
        # shard: {"output": output_, "target": target_}
        for shard in shards(shard_state, shard_size):
            loss, stats = self._compute_loss(batch, **shard)
            # backward, 这里的div?
            loss.div(float(normalization)).backward()
            batch_stats.update(stats)
        return batch_stats
Пример #18
0
def go(request):
    product_id = request.GET.get('id')
    try:
        obj = Product.objects.get(pk=product_id, is_active=True)
    except ObjectDoesNotExist:
        return HttpResponseNotFound()

    product_id = obj.id

    identify = get_request_identify(request)
    r = Statistics(product_id).add_uv(
        dict(meta=request.META, identify=identify))
    if r == 'success':
        Product.objects.filter(pk=product_id).update(uv=F('uv') + 1)

    return HttpResponseRedirect(obj.url)
Пример #19
0
    def setUp(self):
        super().setUp()

        listeners = set()
        statistics = Statistics()

        self.source_server_clients = []
        self.listener_server_clients = []

        self.source_server = TornadoTCPServer(statistics, listeners,
                                              source.SourceHandler)
        sock, self.source_server_port = bind_unused_port()
        self.source_server.add_socket(sock)

        self.listener_server = TornadoTCPServer(statistics, listeners,
                                                listener.ListenerHandler)
        sock, self.listener_server_port = bind_unused_port()
        self.listener_server.add_socket(sock)

        self.source_client_msg_one = bytes(
            b'\x01' + int.to_bytes(1, 2, byteorder='big', signed=False) +
            'abcdefgh'.encode() + b'\x01' +
            int.to_bytes(1, 1, byteorder='big', signed=False) +
            'foofield'.encode() +
            int.to_bytes(1, 4, byteorder='big', signed=False))
        self.source_client_msg_one += _get_xor(self.source_client_msg_one)

        self.source_client_msg_two = bytes(
            b'\x01' + int.to_bytes(1, 2, byteorder='big', signed=False) +
            'ijklmnop'.encode() + b'\x01' +
            int.to_bytes(1, 1, byteorder='big', signed=False) +
            'fieldfoo'.encode() +
            int.to_bytes(2, 4, byteorder='big', signed=False))
        self.source_client_msg_two += _get_xor(self.source_client_msg_two)

        self.source_client_invalid_msg = bytes(
            b'\x01' + int.to_bytes(10, 2, byteorder='big', signed=False) +
            'ijklmnop'.encode() + b'\x01' +
            int.to_bytes(1, 1, byteorder='big', signed=False) +
            'fieldfoo'.encode() +
            int.to_bytes(2, 4, byteorder='big', signed=False) +
            int.to_bytes(1, 1, byteorder='big', signed=False))
Пример #20
0
 def _stats(self, loss, scores, target):
     """
     Args:
         loss (:obj:`FloatTensor`): the loss computed by the loss criterion.
         scores (:obj:`FloatTensor`): a score for each possible output
         target (:obj:`FloatTensor`): true targets
     1 / x * vocab_size / x
     Returns:
         :obj:`onmt.utils.Statistics` : statistics for this batch.
     """ 
     # 返回行的最大值的索引即预测的单词
     # (shard_size*batch)
     pred = scores.max(1)[1]
     # (shard_size*batch)
     non_padding = target.ne(self.padding_idx)
     # 排除掉填充字符,得到正确的单词个数
     num_correct = pred.eq(target).masked_select(non_padding).sum().item()
     # 总的单词个数
     num_non_padding = non_padding.sum().item()
     return Statistics(loss.item(), num_non_padding, num_correct)
	def __init__(self, group_id, model_id, environment_info, beta=None, training=True, parent=None, sibling=None):
		self.parameters_type = eval('tf.{}'.format(flags.parameters_type))
		self.beta = beta if beta is not None else flags.beta
		self.value_count = 2 if flags.split_values else 1
		# initialize
		self.training = training
		self.group_id = group_id
		self.model_id = model_id
		self.id = '{0}_{1}'.format(self.group_id,self.model_id) # model id
		self.parent = parent if parent is not None else self # used for sharing with other models in hierarchy, if any
		self.sibling = sibling if sibling is not None else self # used for sharing with other models in hierarchy, if any
		# Environment info
		action_shape = environment_info['action_shape']
		self.policy_heads = [
			{
				'size':head[0], # number of actions to take
				'depth':head[1] if len(head) > 1 else 0 # number of discrete action types: set 0 for continuous control
			}
			for head in action_shape
		]
		state_shape = environment_info['state_shape']
		self.state_heads = [
			{'shape':head}
			for head in state_shape
		]
		self.state_scaler = environment_info['state_scaler'] # state scaler, for saving memory (eg. in case of RGB input: uint8 takes less memory than float64)
		self.has_masked_actions = environment_info['has_masked_actions']
		# Create the network
		self.build_input_placeholders()
		self.initialize_network()
		self.build_network()
		# Stuff for building the big-batch and optimize training computations
		self._big_batch_feed = [{},{}]
		self._batch_count = [0,0]
		self._train_batch_size = flags.batch_size*flags.big_batch_size
		# Statistics
		self._train_statistics = Statistics(flags.episode_count_for_evaluation)
		#=======================================================================
		# self.loss_distribution_estimator = RunningMeanStd(batch_size=flags.batch_size)
		#=======================================================================
		self.actor_loss_is_too_small = False
    def validate(self, valid_iter, task_type='task'):
        """ Validate model.
        valid_iter: validate data iterator
    Returns:
        :obj:`nmt.Statistics`: validation loss statistics
    """
        # Set model in validating mode.
        self.model.eval()

        stats = Statistics(task_type=task_type)
        with torch.no_grad():
            for batch in valid_iter:
                src = make_features(batch, 'src')
                _, src_lengths = batch.src

                if task_type == 'task':
                    tgt = make_features(batch, 'tgt')
                else:
                    tgt = make_features(batch, 'tgt2')

                # F-prop through the model.
                outputs, attns = self.model(src,
                                            tgt,
                                            src_lengths,
                                            task_type=task_type)

                # Compute loss.
                if task_type == 'task':
                    batch_stats = self.valid_loss.monolithic_compute_loss(
                        batch, outputs, attns)
                else:
                    batch_stats = self.valid_loss2.monolithic_compute_loss(
                        batch, outputs, attns)

                # Update statistics.
                stats.update(batch_stats)

            # Set model back to training mode.
        self.model.train()

        return stats
    def validate(self, valid_iter):
        """ Validate model.
        valid_iter: validate data iterator
    Returns:
        :obj:`nmt.Statistics`: validation loss statistics
    """
        # Set model in validating mode.
        self.model.eval()

        stats = Statistics()

        for batch in valid_iter:
            src = make_features(batch, 'src')
            _, src_lengths = batch.src

            tgt = make_features(batch, 'tgt')

            structure1 = make_features(batch, 'structure1')
            structure1 = structure1.transpose(0, 1)
            structure1 = structure1.transpose(1, 2)

            structure2 = make_features(batch, 'structure2')
            structure2 = structure2.transpose(0, 1)
            structure2 = structure2.transpose(1, 2)

            structure3 = make_features(batch, 'structure3')
            structure3 = structure3.transpose(0, 1)
            structure3 = structure3.transpose(1, 2)

            structure4 = make_features(batch, 'structure4')
            structure4 = structure4.transpose(0, 1)
            structure4 = structure4.transpose(1, 2)

            structure5 = make_features(batch, 'structure5')
            structure5 = structure5.transpose(0, 1)
            structure5 = structure5.transpose(1, 2)

            # structure6 = make_features(batch, 'structure6')
            # structure6 = structure6.transpose(0, 1)
            # structure6 = structure6.transpose(1, 2)
            #
            # structure7 = make_features(batch, 'structure7')
            # structure7 = structure7.transpose(0, 1)
            # structure7 = structure7.transpose(1, 2)
            #
            # structure8 = make_features(batch, 'structure8')
            # structure8 = structure8.transpose(0, 1)
            # structure8 = structure8.transpose(1, 2)

            # F-prop through the model.
            outputs, attns = self.model(src, tgt, structure1, structure2,
                                        structure3, structure4, structure5,
                                        src_lengths)

            # Compute loss.
            batch_stats = self.valid_loss.monolithic_compute_loss(
                batch, outputs, attns)

            # Update statistics.
            stats.update(batch_stats)

        # Set model back to training mode.
        self.model.train()

        return stats
    def train(self, train_iter_fct, valid_iter_fct, train_steps, valid_steps):
        """
    The main training loops.
    by iterating over training data (i.e. `train_iter_fct`)
    and running validation (i.e. iterating over `valid_iter_fct`

    Args:
        train_iter_fct(function): a function that returns the train
            iterator. e.g. something like
            train_iter_fct = lambda: generator(*args, **kwargs)
        valid_iter_fct(function): same as train_iter_fct, for valid data
        train_steps(int):
        valid_steps(int):
        save_checkpoint_steps(int):

    Return:
        None
    """
        logger.info('Start training...')

        step = self.optim._step + 1
        true_batchs = []
        accum = 0
        normalization = 0
        train_iter = train_iter_fct()

        total_stats = Statistics()
        report_stats = Statistics()
        self._start_report_manager(start_time=total_stats.start_time)

        while step <= train_steps:

            reduce_counter = 0
            for i, batch in enumerate(train_iter):
                if self.n_gpu == 0 or (i % self.n_gpu == self.gpu_rank):
                    if self.gpu_verbose_level > 1:
                        logger.info("GpuRank %d: index: %d accum: %d" %
                                    (self.gpu_rank, i, accum))

                    true_batchs.append(batch)

                    if self.norm_method == "tokens":
                        num_tokens = batch.tgt[1:].ne(
                            self.train_loss.padding_idx).sum()
                        normalization += num_tokens.item()
                    else:
                        normalization += batch.batch_size
                    accum += 1
                    if accum == self.grad_accum_count:
                        reduce_counter += 1
                        if self.gpu_verbose_level > 0:
                            logger.info("GpuRank %d: reduce_counter: %d \
                          n_minibatch %d" % (self.gpu_rank, reduce_counter,
                                             len(true_batchs)))
                        if self.n_gpu > 1:
                            normalization = sum(all_gather_list(normalization))

                        self._gradient_accumulation(true_batchs, normalization,
                                                    total_stats, report_stats)

                        report_stats = self._maybe_report_training(
                            step, train_steps, self.optim.learning_rate,
                            report_stats)

                        true_batchs = []
                        accum = 0
                        normalization = 0
                        if (step % valid_steps == 0):
                            if self.gpu_verbose_level > 0:
                                logger.info('GpuRank %d: validate step %d' %
                                            (self.gpu_rank, step))
                            valid_iter = valid_iter_fct()
                            valid_stats = self.validate(valid_iter)
                            if self.gpu_verbose_level > 0:
                                logger.info('GpuRank %d: gather valid stat \
                              step %d' % (self.gpu_rank, step))
                            valid_stats = self._maybe_gather_stats(valid_stats)
                            if self.gpu_verbose_level > 0:
                                logger.info('GpuRank %d: report stat step %d' %
                                            (self.gpu_rank, step))
                            self._report_step(self.optim.learning_rate,
                                              step,
                                              valid_stats=valid_stats)

                        if self.gpu_rank == 0:
                            self._maybe_save(step)
                        step += 1
                        if step > train_steps:
                            break
            if self.gpu_verbose_level > 0:
                logger.info('GpuRank %d: we completed an epoch \
                    at step %d' % (self.gpu_rank, step))
            train_iter = train_iter_fct()

        return total_stats
Пример #25
0
 def __init__(self, cf):
     super(Model, self).__init__()
     self.url = None
     self.cf = cf
     self.best_stats = Statistics()
Пример #26
0
def train(model, criterion, optimizer, train_loader, val_loader, args,
          single_view):
    # Best prediction
    best_prec1 = 0

    epoch_no_improve = 0

    for epoch in range(args.epochs):
        if not single_view:
            # Give random permutation to the images
            indices = train_loader.dataset.indices
            inds = random_permute_indices(np.array(indices), args.nview)
            train_loader.dataset.indices = np.asarray(inds)
            del indices
            del inds

        statistics = Statistics()

        # switch to train mode
        model.train()
        start_time = time.time()
        for batch_idx, (input_val, target_val) in enumerate(train_loader):
            loss, (prec1, prec5), y_pred, y_true = execute_batch(
                model, criterion, input_val, target_val, args, single_view)

            statistics.update(loss.detach().cpu().numpy(), prec1, prec5,
                              y_pred, y_true)
            # compute gradient and do optimizer step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            del loss
            torch.cuda.empty_cache()

        elapsed_time = time.time() - start_time

        # Evaluate on validation set
        val_statistics = validate(val_loader, model, criterion, args,
                                  single_view)

        # statistics.compute(args.num_classes)
        # val_statistics.compute(args.num_classes)

        log_data(statistics, "train", val_loader.dataset.dataset.classes,
                 epoch)
        log_data(val_statistics, "internal_val",
                 val_loader.dataset.dataset.classes, epoch)

        wandb.log({"Epoch elapsed time": elapsed_time}, step=epoch)

        #  Save best model and best prediction
        if val_statistics.top1.avg > best_prec1:
            best_prec1 = val_statistics.top1.avg
            save_model(args.arch, model, optimizer, args.fname_best)
            epoch_no_improve = 0
        else:
            # Early stopping
            epoch_no_improve += 1
            if epoch_no_improve == args.patience:
                wandb.run.summary[
                    "best_internal_val_top1_accuracy"] = best_prec1
                wandb.run.summary[
                    "best_internal_val_top1_accuracy_epoch"] = epoch - args.patience

                return
Пример #27
0
 def __init__(self, socket_mock=None):
     super().__init__()
     self.statistics = Statistics()
     self._socket_mock = socket_mock
Пример #28
0
def train(model, criterion, optimizer, train_loader, val_loader, args):
    """
    Train the model on the train data loader data and stop when the top1 precision did not increased for args.patience
    epochs. The early stopping is done on the validation data loader.

    All the data are sent to wandb or logged on a text file. More precisely for each epoch the validation and training
    performance are sent to wandb and every args.print_freq the batch performance are logged on a file. At the end of
    the training the best top1 validation accuracy is sent to wandb.

    Parameters
    ----------
    model : RotaitonNet model
    criterion : Pytorch criterion (CrossEntropy for RotationNet)
    optimizer : Pytorch optimizer (e.g. SGD)
    train_loader : Data loader with training data (this must be created with a subset)
    val_loader : Data loader with validation data for early stopping (this must be created with a subset)
    args : Input args from the parser

    Returns
    -------
    Nothing
    """
    # Best prediction
    best_prec1 = 0
    # Using lr_scheduler for learning rate decay
    # scheduler = StepLR(optimizer, step_size=args.learning_rate_decay, gamma=0.1)

    epoch_no_improve = 0

    for epoch in range(args.epochs):
        # Give random permutation to the images
        indices = train_loader.dataset.indices
        inds = random_permute_indices(np.array(indices), args.nview, False)
        train_loader.dataset.indices = np.asarray(inds)
        del indices
        del inds

        statistics = Statistics()

        # switch to train mode
        model.train()
        start_time = time.time()
        for batch_idx, (input_val, target_val) in enumerate(train_loader):
            # loss, (prec1, prec5), y_pred, y_true = execute_batch(model, criterion, input_val,
            #                                                     target_val, args)
            loss, (prec1, prec5), y_pred, y_true = execute_batch_aligned(model, criterion, input_val,
                                                                         target_val, args)

            statistics.update(loss.detach().cpu().numpy(), prec1, prec5, y_pred, y_true)
            # compute gradient and do optimizer step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if batch_idx % args.print_freq == 0:
                logger.debug('Batch: [{0}/{1}]\t'
                             'Loss {loss:.4f} \t'
                             'Prec@1 {top1:.3f} \t'
                             'Prec@5 {top5:.3f}'.format(batch_idx, len(train_loader), loss=loss.data, top1=prec1,
                                                        top5=prec5))
            del loss
            torch.cuda.empty_cache()

        elapsed_time = time.time() - start_time

        logger.debug("Evaluating epoch {}".format(epoch))

        # permute indices
        # indices = val_loader.dataset.indices
        # inds = random_permute_indices(np.array(indices), args.nview)
        # val_loader.dataset.indices = np.asarray(inds)
        # del indices
        # del inds
        # Evaluate on validation set
        val_statistics = validate(val_loader, model, criterion, args)

        # statistics.compute(args.num_classes)
        # val_statistics.compute(args.num_classes)

        log_data(statistics, "train", val_loader.dataset.dataset.classes, epoch)
        log_data(val_statistics, "internal_val", val_loader.dataset.dataset.classes, epoch)

        wandb.log({"Epoch elapsed time": elapsed_time}, step=epoch)

        #  Save best model and best prediction
        if val_statistics.top1.avg > best_prec1:
            best_prec1 = val_statistics.top1.avg
            save_model(args.arch, model, optimizer, args.fname_best)
            epoch_no_improve = 0
        else:
            # Early stopping
            epoch_no_improve += 1
            if epoch_no_improve == args.patience:
                wandb.run.summary["best_internal_val_top1_accuracy"] = best_prec1
                wandb.run.summary["best_internal_val_top1_accuracy_epoch"] = epoch - args.patience

                logger.debug("Stopping at epoch {} for early stopping (best was at epoch {})".format(epoch,
                                                                                                     epoch - args.patience))
                return
Пример #29
0
def compute_val_stat(data_dev, words, global_entity_list, stats, args):
    w = 0
    temp_gen = []

    ref = []
    hyp = []
    src = []
    ref_s = ""
    hyp_s = ""
    src_s = ""

    microF1_PRED, microF1_PRED_cal, microF1_PRED_nav, microF1_PRED_wet = 0, 0, 0, 0
    microF1_TRUE, microF1_TRUE_cal, microF1_TRUE_nav, microF1_TRUE_wet = 0, 0, 0, 0

    for i, row in enumerate(np.transpose(words)):
        st = ''
        for e in row:
            if e == '<EOS>':
                break
            else:
                st += e + ' '
        temp_gen.append(st)
        correct = data_dev['trg_plain'][i]
        ### compute F1 SCORE
        st = st.lstrip().rstrip()
        correct = correct.lstrip().rstrip()
        if args.dataset == 'kvr':
            f1_true, count = Tree2Seq.compute_prf(data_dev['entity'][i], st.split(), global_entity_list,
                                              data_dev['kb_plain'][i])
            microF1_TRUE += f1_true
            microF1_PRED += count
            f1_true, count = Tree2Seq.compute_prf(data_dev['entity_cal'][i], st.split(), global_entity_list,
                                              data_dev['kb_plain'][i])
            microF1_TRUE_cal += f1_true
            microF1_PRED_cal += count
            f1_true, count = Tree2Seq.compute_prf(data_dev['entity_nav'][i], st.split(), global_entity_list,
                                              data_dev['kb_plain'][i])
            microF1_TRUE_nav += f1_true
            microF1_PRED_nav += count
            f1_true, count = Tree2Seq.compute_prf(data_dev['entity_wet'][i], st.split(), global_entity_list,
                                              data_dev['kb_plain'][i])
            microF1_TRUE_wet += f1_true
            microF1_PRED_wet += count


        conv_src = [item[0] for item in data_dev['src_plain'][i] if '$' in item[1]]
        conv_src_s = " ".join(conv_src)
        src_s += conv_src_s + '\n'
        src.append(src_s)

        # w += wer(correct, st)
        ref.append(str(correct))
        hyp.append(str(st))

        ref_s += str(correct) + "\n"
        hyp_s += str(st) + "\n"

    with open('./tmp/{}_ref_s.txt'.format(args.experiment), 'a+') as f:
        f.write(ref_s)
    with open('./tmp/{}_hyp_s.txt'.format(args.experiment), 'a+') as f:
        f.write(hyp_s)
    with open('./tmp/{}_src_s.txt'.format(args.experiment), 'a+') as f:
        f.write(src_s)


    # compute the bleu score
    bleu_score = moses_multi_bleu(np.array(hyp), np.array(ref), lowercase=True)

    bleu_stat = Statistics(n_correct=bleu_score, n_words=1)

    entity_stat = Statistics(n_correct=microF1_TRUE, n_words=microF1_PRED)
    entity_cal_stat = Statistics(n_correct=microF1_TRUE_cal, n_words=microF1_PRED_cal)
    entity_nav_stat = Statistics(n_correct=microF1_TRUE_nav, n_words=microF1_PRED_nav)
    entity_wet_stat = Statistics(n_correct=microF1_TRUE_wet, n_words=microF1_PRED_wet)
    new_stats = [entity_stat, entity_cal_stat, entity_nav_stat, entity_wet_stat, bleu_stat]

    for i in range(5):
        stats[i].update(new_stats[i])

    return stats
def train(model, criterion, optimizer, train_loader, val_loader, args):
    best_prec1 = 0
    epoch_no_improve = 0

    for epoch in range(1000):

        statistics = Statistics()
        model.train()
        start_time = time.time()

        for i, (input, target) in enumerate(train_loader):
            loss, (prec1, prec5), y_pred, y_true = execute_batch(
                model, criterion, input, target, args.device)

            statistics.update(loss.detach().cpu().numpy(), prec1, prec5,
                              y_pred, y_true)
            # compute gradient and do optimizer step
            optimizer.zero_grad()  #
            loss.backward()
            optimizer.step()

            # if args.net_version == 2:
            #    model.camera_position = model.camera_position.clamp(0, 1)
            del loss
            torch.cuda.empty_cache()

        elapsed_time = time.time() - start_time

        # Evaluate on validation set
        val_statistics = validate(val_loader, model, criterion, args.device)

        log_data(statistics, "train", val_loader.dataset.dataset.classes,
                 epoch)
        log_data(val_statistics, "internal_val",
                 val_loader.dataset.dataset.classes, epoch)

        wandb.log({"Epoch elapsed time": elapsed_time}, step=epoch)
        # print(model.camera_position)
        if epoch % 1 == 0:
            vertices = []
            if args.net_version == 1:
                R = look_at_rotation(model.camera_position, device=args.device)
                T = -torch.bmm(R.transpose(1, 2),
                               model.camera_position[:, :, None])[:, :, 0]
            else:
                t = Transform3d(device=model.device).scale(
                    model.camera_position[3] *
                    model.distance_range).rotate_axis_angle(
                        model.camera_position[0] * model.angle_range,
                        axis="X",
                        degrees=False).rotate_axis_angle(
                            model.camera_position[1] * model.angle_range,
                            axis="Y",
                            degrees=False).rotate_axis_angle(
                                model.camera_position[2] * model.angle_range,
                                axis="Z",
                                degrees=False)

                vertices = t.transform_points(model.vertices)

                R = look_at_rotation(vertices[:model.nviews],
                                     device=model.device)
                T = -torch.bmm(R.transpose(1, 2), vertices[:model.nviews, :,
                                                           None])[:, :, 0]

            cameras = OpenGLPerspectiveCameras(R=R, T=T, device=args.device)
            wandb.log(
                {
                    "Cameras":
                    [wandb.Image(plot_camera_scene(cameras, args.device))]
                },
                step=epoch)
            plt.close()
            images = render_shape(model, R, T, args, vertices)
            wandb.log(
                {
                    "Views": [
                        wandb.Image(
                            image_grid(images,
                                       rows=int(np.ceil(args.nviews / 2)),
                                       cols=2))
                    ]
                },
                step=epoch)
            plt.close()
        #  Save best model and best prediction
        if val_statistics.top1.avg > best_prec1:
            best_prec1 = val_statistics.top1.avg
            save_model("views_net", model, optimizer, args.fname_best)
            epoch_no_improve = 0
        else:
            # Early stopping
            epoch_no_improve += 1
            if epoch_no_improve == 20:
                wandb.run.summary[
                    "best_internal_val_top1_accuracy"] = best_prec1
                wandb.run.summary[
                    "best_internal_val_top1_accuracy_epoch"] = epoch - 20

                return