示例#1
0
 def create_model(self):
     self.build_model_with_am()
     P.write_to_log(self.model)
     if self.execute_from_model:
         self.model.load_state_dict(self.model_state_dict)
         P.write_to_log("recovery model:", self.model, "current epoch = {}".format(self.current_epoch))
     return self.model
示例#2
0
    def __init__(self,
                 input_path: str = None,
                 target_path: str = None,
                 cache_input_path: str = None,
                 cache_target_path: str = None,
                 image_suffixes: list = ['.jpg', '.png', P.cached_extension]):
        self.input_path = input_path
        self.target_path = target_path
        self.image_suffixes = image_suffixes

        # create cached path if not exists
        if cache_input_path is None:
            cache_input_path = os.path.join(input_path, "cached")
            os.makedirs(cache_input_path, exist_ok=True)
            print("created cache input dir: {}".format(cache_input_path))
            P.write_to_log(
                "created cache input dir: {}".format(cache_input_path))
        if cache_target_path is None:
            cache_target_path = os.path.join(target_path, "cached")
            os.makedirs(cache_target_path, exist_ok=True)
            print("created cache target dir: {}".format(cache_target_path))
            P.write_to_log(
                "created cache target dir: {}".format(cache_target_path))
        self.cache_input_path = cache_input_path
        self.cache_target_path = cache_target_path

        self.data = None
    def test(self, model, test_set, l_loss, m_loss):
        model.train(mode=False)
        loss_classification_sum = 0
        loss_segmentation_sum = 0
        accuracy_classification_sum = 0
        batch_count = 0
        for images, segments, labels in test_set:
            labels, segments = model_utils.reduce_to_class_number(self.left_class_number, self.right_class_number,
                                                                  labels,
                                                                  segments)
            images, labels, segments = self.convert_data_and_label(images, labels, segments)
            segments_list = []
            for puller in self.puller:
                segments_list.append(puller(segments))
            model_classification, model_segmentation = model_utils.wait_while_can_execute(model, images)

            classification_loss = l_loss(model_classification, labels)
            if self.use_mloss:
                sum_segm_loss = None
                for ms, sl in zip(model_segmentation, segments_list):
                    segmentation_loss = self.m_loss(ms, sl)
                    if sum_segm_loss is None:
                        sum_segm_loss = segmentation_loss
                    else:
                        sum_segm_loss += segmentation_loss

            output_probability, output_cl, cl_acc = self.calculate_accuracy(labels, model_classification,
                                                                            labels.size(0))

            self.save_test_data(labels, output_cl, output_probability)

            # accumulate information
            accuracy_classification_sum += model_utils.scalar(cl_acc.sum())
            loss_classification_sum += model_utils.scalar(classification_loss.sum())
            if self.use_mloss:
                loss_segmentation_sum += model_utils.scalar(sum_segm_loss.sum())
            batch_count += 1
            # self.de_convert_data_and_label(images, labels)
            # torch.cuda.empty_cache()

        f_1_score_text, recall_score_text, precision_score_text = metrics_processor.calculate_metric(self.classes,
                                                                                                     self.test_trust_answers,
                                                                                                     self.test_model_answers)

        loss_classification_sum /= batch_count + p.EPS
        accuracy_classification_sum /= batch_count + p.EPS
        loss_segmentation_sum /= batch_count + p.EPS
        text = 'TEST={} Loss_CL={:.5f} Loss_M={:.5f} Accuracy_CL={:.5f} {} {} {} '.format(self.current_epoch,
                                                                                          loss_classification_sum,
                                                                                          loss_segmentation_sum,
                                                                                          accuracy_classification_sum,
                                                                                          f_1_score_text,
                                                                                          recall_score_text,
                                                                                          precision_score_text)
        p.write_to_log(text)
        model.train(mode=True)
        return loss_classification_sum, accuracy_classification_sum
示例#4
0
 def save_model(self, weights):
     """name = self.description + "_date-" + datetime.today().strftime('%Y-%m-%d-_-%H_%M_%S') + ".torch"
     try:
         saved_dir = os.path.join(p.base_data_dir, 'model_weights')
         os.makedirs(saved_dir, exist_ok=True)
         saved_file = os.path.join(saved_dir, name)
         torch.save(weights, saved_file)
         print("Save model: {}".format(name))
         p.write_to_log("Save model: {}".format(name))
     except Exception as e:
         print("Can't save model: {}".format(name), e)
         p.write_to_log("Can't save model: {}".format(name), e)
     """
     p.write_to_log("not needed save weights here")
示例#5
0
    def initialize_logs(self):
        P.initialize_log_name(self.run_name, self.algorithm_name,
                              self.description, self.model_identifier)

        P.write_to_log("description={}".format(self.description))
        P.write_to_log("classes={}".format(self.classes))
        P.write_to_log("run=" + self.run_name)
        P.write_to_log("algorithm_name=" + self.algorithm_name)
示例#6
0
    def test(self, model, test_set, l_loss, m_loss=None):
        loss_classification_sum = 0
        accuracy_classification_sum = 0
        batch_count = 0
        self.model.train(mode=False)
        for images, segments, labels in test_set:
            labels, segments = model_utils.reduce_to_class_number(self.left_class_number, self.right_class_number,
                                                                  labels,
                                                                  segments)
            images, labels, segments = self.convert_data_and_label(images, labels, segments)
            model_classification = model_utils.wait_while_can_execute_single(model, images)

            sigmoid = nn.Sigmoid()  # used for calculate accuracy
            model_classification = sigmoid(model_classification)
            classification_loss = l_loss(model_classification, labels)

            output_probability, output_cl, cl_acc = self.calculate_accuracy(labels, model_classification,
                                                                            labels.size(0))

            self.save_test_data(labels, output_cl, output_probability)

            # accumulate information
            accuracy_classification_sum += model_utils.scalar(cl_acc.sum())
            loss_classification_sum += model_utils.scalar(classification_loss.sum())
            batch_count += 1
            # self.de_convert_data_and_label(images, labels)
            # torch.cuda.empty_cache()

        f_1_score_text, recall_score_text, precision_score_text = metrics_processor.calculate_metric(self.classes,
                                                                                                     self.test_trust_answers,
                                                                                                     self.test_model_answers)

        loss_classification_sum /= batch_count + p.EPS
        accuracy_classification_sum /= batch_count + p.EPS
        text = 'TEST={} Loss_CL={:.5f} Accuracy_CL={:.5f} {} {} {} '.format(self.current_epoch,
                                                                            loss_classification_sum,
                                                                            accuracy_classification_sum,
                                                                            f_1_score_text,
                                                                            recall_score_text,
                                                                            precision_score_text)
        p.write_to_log(text)

        return loss_classification_sum, accuracy_classification_sum
示例#7
0
def calculate_metric(classes, trust_answers, model_answer):
    try:
        """
            Calculate f1 score, precision, recall
            :param classes: class count
            :param trust_answers: list of list with trust answers
            :param model_answer:  list of list with model answers
            :return: tuple with f1 score, recall score, precision score
        """
        class_metric = 'binary' if classes == 1 else 'macro'
        class_metric_for_one_class = 'binary'

        f_1_score_text = ""
        for i in range(classes):
            f_1_score_text += "f_1_{}={:.5f} ".format(i, metrics.f1_score(trust_answers[i],
                                                                          model_answer[i],
                                                                          average=class_metric_for_one_class))
        recall_score_text = ""
        for i in range(classes):
            recall_score_text += "recall_{}={:.5f} ".format(i, metrics.recall_score(trust_answers[i],
                                                                                    model_answer[i],
                                                                                    average=class_metric_for_one_class))

        precision_score_text = ""
        for i in range(classes):
            precision_score_text += "precision_{}={:.5f} ".format(i, metrics.precision_score(trust_answers[i],
                                                                                             model_answer[i],
                                                                                             average=class_metric_for_one_class))

        trust_answer_1, model_answer_1 = __to_global(trust_answers, model_answer, classes)
        # assert trust_answer_1 == trust_answers[0]

        f_1_score_text += "f_1_global={:.5f}".format(
            metrics.f1_score(trust_answer_1, model_answer_1, average=class_metric))
        recall_score_text += "recall_global={:.5f}".format(
            metrics.recall_score(trust_answer_1, model_answer_1, average=class_metric))
        precision_score_text += "precision_global={:.5f}".format(
            metrics.precision_score(trust_answer_1, model_answer_1, average=class_metric))
        return f_1_score_text, recall_score_text, precision_score_text
    except ValueError as e:
        P.write_to_log("trust_answers: ", trust_answers)
        P.write_to_log("model_answers: ", model_answer)
        exit(0)
示例#8
0
def infinity_server(q: list):
    strategy_queue.extend(q)

    p.initialize_log_name("NO_NUMBER", "NO_ALGORITHM", "FOR_EXEC_PURPOSE")
    global actual_property_index, alive_process
    actual_property_context = None
    while True:
        property_context, actual_property_index = pp.process_property_file(
            PROPERTY_FILE, actual_property_index)
        strategy_lock.acquire()
        try:
            if property_context != actual_property_context:
                # update
                actual_property_context = property_context
                register_commands(actual_property_context)
                p.write_to_log("=" * 20)
                print_status_info()
                p.write_to_log("=" * 20)
            if len(strategy_queue) == 0:
                continue
            strategy_name, strategy_memory, strategy_arguments = strategy_queue.popleft(
            )

            gpu = ru.found_gpu(nsmi.NVLog(), int(strategy_memory),
                               actual_property_context.banned_gpu,
                               actual_property_context.max_thread_on_gpu)

            if gpu == -1:
                strategy_queue.appendleft(
                    (strategy_name, strategy_memory, strategy_arguments))
                continue
            if alive_process >= actual_property_context.max_alive_threads:
                strategy_queue.appendleft(
                    (strategy_name, strategy_memory, strategy_arguments))
                continue

            thread = Thread(target=start_strategy,
                            args=(strategy_name, strategy_memory, gpu,
                                  strategy_arguments))
            thread.start()
            thread_list.append(thread)
            mapper_list.append(
                (strategy_name, strategy_memory, gpu, strategy_arguments))
            alive_process += 1

            p.write_to_log("-" * 20)
            print_status_info()
            p.write_to_log("-" * 20)
        finally:
            strategy_lock.release()
            time.sleep(SLEEP_SECONDS)
示例#9
0
def wait_while_can_execute_single(model, images):
    """
    Execute the same images on model, while execute won't fail with error,
    if executed limit reached then break
    :param model: model with attention module
    :param images: passed to model images
    :return: tuple with result of model(images)
    """
    flag = True
    cnt = 0
    model_classification = None
    while cnt != p.TRY_CALCULATE_MODEL and flag:
        try:
            cnt += 1
            model_classification = model(images)
            flag = False
            #torch.cuda.empty_cache()
        except RuntimeError as e:
            time.sleep(5)
            p.write_to_log("Can't execute model, CUDA out of memory", e)
    return model_classification
示例#10
0
def start_strategy(executor_name: str, memory_usage: int, gpu: int,
                   algorithms_params: dict):
    global alive_process
    executor_name = os.path.join(DIPLOMA_DIR, "executors", executor_name)
    args = [PYTHON_EXECUTOR_NAME, executor_name, "--gpu", str(gpu)]

    for k, v in algorithms_params.items():
        args.append(k)
        args.append(str(v))

    cmd = " ".join(args)

    current_time = datetime.today().strftime('%Y-%m-%d-_-%H_%M_%S')
    p.write_to_log("time = {} BEGIN execute: {}".format(current_time, cmd))
    status = os.system(cmd)
    current_time = datetime.today().strftime('%Y-%m-%d-_-%H_%M_%S')
    p.write_to_log("time = {} END execute: {}, status = {}".format(
        current_time, cmd, status))

    strategy_lock.acquire()
    try:
        if status != 0:
            p.write_to_log("Failed algorithm execution: {}, status={}".format(
                cmd, status))

            copy_alg_params = {}
            for k, v in algorithms_params.items():
                copy_alg_params[k] = v
            copy_alg_params['--execute_from_model'] = 'True'
            strategy_queue.appendleft(
                (executor_name, memory_usage, copy_alg_params))

        alive_process -= 1
    finally:
        strategy_lock.release()
示例#11
0
    def safe_train(self):
        try:
            self.train_strategy()
            exit(0)
        except BaseException as e:
            if isinstance(e, SystemExit) and e.args[0] == 0:
                P.write_to_log("Exit with 0")
                return
            print("EXCEPTION", e)
            print(type(e))
            P.write_to_log("EXCEPTION", e, type(e))
            P.write_to_log(traceback.extract_tb(e.__traceback__))

            P.save_raised_model(self.model, self.strategy.current_epoch,
                                self.model_identifier, self.run_name,
                                self.algorithm_name)
            P.write_to_log("saved model, exception raised")
            exit(1)
def load_balanced_dataset(train_size: int, seed: int, image_size: int):
    train_set, test_set, train_count = il.load_data(train_size, seed,
                                                    image_size)

    train_set = balance_dataset(train_set, train_size, train_size // 2)
    # test_set = balance_dataset(test_set, len(test_set), len(test_set) // 2)

    P.write_to_log("========")
    P.write_to_log("balanced TRAIN size: ", calculate_stat(train_set),
                   " full size: ", len(train_set))
    P.write_to_log("balanced TEST size: ", calculate_stat(test_set),
                   " full size: ", len(test_set))
    return train_set, test_set, train_count
示例#13
0
 def save_images_to_tensors(self):
     # print(self.input_path, self.target_path)
     data = self.__merge_data(self.input_path, self.target_path)
     data_len = len(data)
     for idx, dct in enumerate(data):
         for item in P.labels_attributes:
             self.__save_torch(dct, item, self.cache_target_path)
         self.__save_torch(dct, P.input_attribute, self.cache_input_path)
         print("=" * 10)
         print("save: {} of {} elements".format(idx, data_len))
         P.write_to_log("=" * 10)
         P.write_to_log("save: {} of {} elements".format(idx, data_len))
     print("all saved successfully")
     P.write_to_log("all saved successfully")
示例#14
0
def load_data(train_size: int, seed: int, image_size: int):
    loader = DatasetLoader.initial()
    all_data = prepare_data(
        loader.load_tensors(0, train_size * 2, 10**20, image_size))
    # all_data = prepare_data(loader.load_tensors(None, None))
    log = "set size: {}, set by classes: {}".format(len(all_data),
                                                    count_size(all_data))
    P.write_to_log(log)
    random.Random(seed).shuffle(all_data)
    test_set = all_data[train_size:]
    train_set = all_data[:train_size]
    log = "TEST set size: {}, test set by classes: {}".format(
        len(test_set), count_size(test_set))
    P.write_to_log(log)

    train_count = count_size(train_set)
    log = "TRAIN set size: {}, train set by classes: {}".format(
        len(train_set), train_count)
    P.write_to_log(log)

    return train_set, test_set, train_count
示例#15
0
    def train(self):
        if self.is_vgg_model:
            classifier_optimizer = torch.optim.Adam(gr.register_weights("classifier", self.am_model),
                                                    lr=self.classifier_learning_rate)
            attention_module_optimizer = torch.optim.Adam(gr.register_weights("attention", self.am_model),
                                                          lr=self.attention_module_learning_rate)
        else:
            classifier_optimizer = torch.optim.Adam(rgr.register_weights("classifier", self.am_model),
                                                    lr=self.classifier_learning_rate)
            attention_module_optimizer = torch.optim.Adam(rgr.register_weights("attention", self.am_model),
                                                          lr=self.attention_module_learning_rate)

        self.best_weights = copy.deepcopy(self.am_model.state_dict())
        best_loss = None
        best_test_loss = None

        while self.current_epoch <= self.train_epochs:

            accuracy_classification_sum_classifier = 0
            accuracy_classification_sum_segments = 0
            loss_l1_sum = 0
            # classifier_optimizer = self.apply_adaptive_learning(classifier_optimizer, learning_rate,
            #                                                       self.current_epoch)

            if self.current_epoch <= self.pre_train_epochs:
                accuracy_classification_sum_segments, loss_m_sum, loss_l1_sum, loss_classification_sum_classifier = \
                    self.train_segments(self.am_model, self.l_loss, self.m_loss, attention_module_optimizer,
                                        self.train_segments_set)
                attention_module_optimizer.zero_grad()
            else:
                loss_classification_sum_classifier, accuracy_classification_sum_classifier, loss_m_sum = \
                    self.train_classifier(self.am_model, self.l_loss, self.m_loss, classifier_optimizer,
                                          self.train_segments_set)
                classifier_optimizer.zero_grad()
            accuracy_total = accuracy_classification_sum_segments + accuracy_classification_sum_classifier
            loss_total = loss_classification_sum_classifier + loss_m_sum

            prefix = "PRETRAIN" if self.current_epoch <= self.pre_train_epochs else "TRAIN"
            f_1_score_text, recall_score_text, precision_score_text = metrics_processor.calculate_metric(self.classes,
                                                                                                         self.train_trust_answers,
                                                                                                         self.train_model_answers)

            text = "{}={} Loss_CL={:.5f} Loss_M={:.5f} Loss_L1={:.5f} Loss_Total={:.5f} Accuracy_CL={:.5f} " \
                   "{} {} {} ".format(prefix, self.current_epoch, loss_classification_sum_classifier,
                                      loss_m_sum,
                                      loss_l1_sum,
                                      loss_total,
                                      accuracy_total,
                                      f_1_score_text,
                                      recall_score_text,
                                      precision_score_text)

            p.write_to_log(text)

            if self.current_epoch % self.test_each_epoch == 0:
                test_loss, _ = self.test(self.am_model, self.test_set, self.l_loss, self.m_loss)
                if best_test_loss is None or test_loss < best_test_loss:
                    best_test_loss = test_loss
                    self.best_test_weights = copy.deepcopy(self.am_model.state_dict())
            if self.current_epoch % 200 == 0:
                self.take_snapshot(self.train_segments_set, self.am_model, "TRAIN_{}".format(self.current_epoch))
                self.take_snapshot(self.test_set, self.am_model, "TEST_{}".format(self.current_epoch))

            if best_loss is None or loss_total < best_loss:
                best_loss = loss_total
                self.best_weights = copy.deepcopy(self.am_model.state_dict())

            self.clear_temp_metrics()
            self.current_epoch += 1

        self.save_model(self.best_test_weights)
        self.save_model(self.best_weights)
示例#16
0
def found_gpu(smi, max_algorithm_memory: int, banned_gpu: int,
              max_thread_on_gpu: int) -> int:
    p.write_to_log("list of gpu:")
    p.write_to_log([
        str(idx) + " " + str(smi['Attached GPUs'][gpu]['Minor Number']) + " " +
        smi['Attached GPUs'][gpu]['FB Memory Usage']['Free'].split()[0] + "| "
        for idx, gpu in enumerate(smi['Attached GPUs'])
    ])
    p.write_to_log("Mapper=", MAPPER)
    p.write_to_log("need memory", max_algorithm_memory)
    for idx, k in enumerate(smi['Attached GPUs']):
        gpu = int(smi['Attached GPUs'][k]['Minor Number'])
        free_memory = int(
            smi['Attached GPUs'][k]['FB Memory Usage']['Free'].split()[0])
        if banned_gpu == gpu:
            current_time = datetime.today().strftime('%Y-%m-%d-_-%H_%M_%S')
            p.write_to_log("time = {}, gpu = {} is banned".format(
                current_time, gpu))
            continue
        if smi['Attached GPUs'][k]['Processes'] is not None and len(
                smi['Attached GPUs'][k]['Processes']) >= max_thread_on_gpu:
            current_time = datetime.today().strftime('%Y-%m-%d-_-%H_%M_%S')
            p.write_to_log(
                "time = {}, gpu = {} has processes = {} but max processes = {}"
                .format(current_time, gpu,
                        len(smi['Attached GPUs'][k]['Processes']),
                        max_thread_on_gpu))
            continue

        if free_memory < max_algorithm_memory:
            current_time = datetime.today().strftime('%Y-%m-%d-_-%H_%M_%S')
            p.write_to_log(
                "time = {}, gpu = {} has free memory = {}, but required = {}".
                format(current_time, gpu, free_memory, max_algorithm_memory))
            continue
        current_time = datetime.today().strftime('%Y-%m-%d-_-%H_%M_%S')
        p.write_to_log("time = {} found gpu = {}".format(
            current_time, MAPPER[gpu]))
        return MAPPER[gpu]
    current_time = datetime.today().strftime('%Y-%m-%d-_-%H_%M_%S')
    p.write_to_log("time = {} not found gpu".format(current_time))
    return -1
示例#17
0
    def train(self):
        if self.is_vgg_model:
            classifier_optimizer = torch.optim.Adam(gr.register_weights("classifier", self.am_model),
                                                    self.classifier_learning_rate)
            attention_module_optimizer = torch.optim.Adam(gr.register_weights("attention", self.am_model),
                                                          lr=self.attention_module_learning_rate)
        else:
            classifier_optimizer = torch.optim.Adam(rgr.register_weights("classifier", self.am_model),
                                                    self.classifier_learning_rate)
            attention_module_optimizer = torch.optim.Adam(rgr.register_weights("attention", self.am_model),
                                                          lr=self.attention_module_learning_rate)

        while self.current_epoch <= self.train_epochs:

            loss_m_sum = 0
            loss_l1_sum = 0

            loss_classification_sum = 0
            loss_segmentation_sum = 0
            accuracy_sum = 0
            batch_count = 0
            self.am_model.train(mode=True)
            for images, segments, labels in self.train_segments_set:
                labels, segments = model_utils.reduce_to_class_number(self.left_class_number, self.right_class_number,
                                                                      labels,
                                                                      segments)
                images, labels, segments = self.convert_data_and_label(images, labels, segments)
                segments = self.puller(segments)

                # calculate and optimize model
                classifier_optimizer.zero_grad()
                attention_module_optimizer.zero_grad()

                model_classification, model_segmentation = model_utils.wait_while_can_execute(self.am_model, images)
                segmentation_loss = self.m_loss(model_segmentation, segments)
                classification_loss = self.l_loss(model_classification, labels)
                # torch.cuda.empty_cache()
                classification_loss.backward(retain_graph=True)
                segmentation_loss.backward()

                classifier_optimizer.step()
                attention_module_optimizer.step()

                output_probability, output_cl, cl_acc = self.calculate_accuracy(labels, model_classification,
                                                                                labels.size(0))

                classifier_optimizer.zero_grad()
                attention_module_optimizer.zero_grad()

                self.save_train_data(labels, output_cl, output_probability)

                # accumulate information
                accuracy_sum += model_utils.scalar(cl_acc.sum())
                loss_classification_sum += model_utils.scalar(classification_loss.sum())
                loss_segmentation_sum += model_utils.scalar(segmentation_loss.sum())
                batch_count += 1
                # self.de_convert_data_and_label(images, segments, labels)
                # torch.cuda.empty_cache()

            loss_classification_sum = loss_classification_sum / (batch_count + p.EPS)
            accuracy_sum = accuracy_sum / (batch_count + p.EPS)
            loss_segmentation_sum = loss_segmentation_sum / (batch_count + p.EPS)
            loss_total = loss_classification_sum + loss_m_sum + loss_segmentation_sum
            prefix = "PRETRAIN" if self.current_epoch <= self.pre_train_epochs else "TRAIN"
            f_1_score_text, recall_score_text, precision_score_text = metrics_processor.calculate_metric(self.classes,
                                                                                                         self.train_trust_answers,
                                                                                                         self.train_model_answers)

            text = "{}={} Loss_CL={:.5f} Loss_M={:.5f} Loss_L1={:.5f} Loss_Total={:.5f} Accuracy_CL={:.5f} " \
                   "{} {} {} ".format(prefix, self.current_epoch, loss_classification_sum,
                                      loss_m_sum,
                                      loss_l1_sum,
                                      loss_total,
                                      accuracy_sum,
                                      f_1_score_text,
                                      recall_score_text,
                                      precision_score_text)

            P.write_to_log(text)
            self.am_model.train(mode=False)
            if self.current_epoch % self.test_each_epoch == 0:
                test_loss, _ = self.test(self.am_model, self.test_set, self.l_loss, self.m_loss)
            if self.current_epoch % 200 == 0:
                self.take_snapshot(self.train_segments_set, self.am_model, "TRAIN_{}".format(self.current_epoch))
                self.take_snapshot(self.test_set, self.am_model, "TEST_{}".format(self.current_epoch))

            self.clear_temp_metrics()
            self.current_epoch += 1
示例#18
0
def print_status_info():
    global strategy_queue, thread_list, mapper_list

    p.write_to_log("actual_property_index={}".format(actual_property_index))
    p.write_to_log("alive_process={}".format(alive_process))
    p.write_to_log("queue:")
    for idx, i in enumerate(strategy_queue):
        p.write_to_log("idx = {}, values={}".format(idx, i))
    p.write_to_log("thread:")
    for idx, (i, j) in enumerate(zip(thread_list, mapper_list)):
        p.write_to_log("idx = {}, i={}, j={}".format(idx, i, j))
    p.write_to_log("end")
    def train(self):
        optimizer = torch.optim.Adam(self.am_model.parameters(), self.classifier_learning_rate)

        while self.current_epoch <= self.train_epochs:

            loss_m_sum = 0
            loss_l1_sum = 0

            loss_classification_sum = 0
            loss_segmentation_sum = 0
            accuracy_sum = 0
            batch_count = 0
            self.am_model.train(mode=True)
            for images, segments, labels in self.train_segments_set:
                labels, segments = model_utils.reduce_to_class_number(self.left_class_number, self.right_class_number,
                                                                      labels,
                                                                      segments)
                images, labels, segments = self.convert_data_and_label(images, labels, segments)
                segments_list = []
                for puller in self.puller:
                    segments_list.append(puller(segments))

                # calculate and optimize model
                optimizer.zero_grad()

                model_classification, model_segmentation = model_utils.wait_while_can_execute(self.am_model, images)
                classification_loss = self.l_loss(model_classification, labels)
                total_loss = classification_loss

                if self.use_mloss:
                    sum_segm_loss = None
                    for ms, sl in zip(model_segmentation, segments_list):
                        segmentation_loss = self.m_loss(ms, sl)
                        total_loss += segmentation_loss
                        if sum_segm_loss is None:
                            sum_segm_loss = segmentation_loss
                        else:
                            sum_segm_loss += segmentation_loss
                total_loss.backward()
                optimizer.step()

                output_probability, output_cl, cl_acc = self.calculate_accuracy(labels, model_classification,
                                                                                labels.size(0))

                optimizer.zero_grad()

                self.save_train_data(labels, output_cl, output_probability)

                accuracy_sum += model_utils.scalar(cl_acc.sum())
                loss_classification_sum += model_utils.scalar(classification_loss.sum())
                if self.use_mloss:
                    loss_segmentation_sum += model_utils.scalar(sum_segm_loss.sum())
                batch_count += 1

            loss_classification_sum = loss_classification_sum / (batch_count + p.EPS)
            accuracy_sum = accuracy_sum / (batch_count + p.EPS)
            loss_segmentation_sum = loss_segmentation_sum / (batch_count + p.EPS)
            loss_total = loss_classification_sum + loss_m_sum + loss_segmentation_sum
            prefix = "TRAIN"
            f_1_score_text, recall_score_text, precision_score_text = metrics_processor.calculate_metric(self.classes,
                                                                                                         self.train_trust_answers,
                                                                                                         self.train_model_answers)

            text = "{}={} Loss_CL={:.5f} Loss_M={:.5f} Loss_L1={:.5f} Loss_Total={:.5f} Accuracy_CL={:.5f} " \
                   "{} {} {} ".format(prefix, self.current_epoch, loss_classification_sum,
                                      loss_m_sum,
                                      loss_l1_sum,
                                      loss_total,
                                      accuracy_sum,
                                      f_1_score_text,
                                      recall_score_text,
                                      precision_score_text)

            P.write_to_log(text)

            if self.current_epoch % self.test_each_epoch == 0:
                test_loss, _ = self.test(self.am_model, self.test_set, self.l_loss, self.m_loss)

            self.clear_temp_metrics()
            self.current_epoch += 1
示例#20
0
    def train(self):

        params = self.model.parameters()
        optimizer = torch.optim.Adam(params, lr=self.classifier_learning_rate, weight_decay=self.weight_decay)
        best_loss = None
        best_test_loss = None

        while self.current_epoch <= self.train_epochs:

            loss_classification_sum = 0
            accuracy_classification_sum = 0
            batch_count = 0

            self.model.train(mode=True)
            for images, segments, labels in self.train_segments_set:
                labels, segments = model_utils.reduce_to_class_number(self.left_class_number, self.right_class_number,
                                                                      labels,
                                                                      segments)
                images, labels, segments = self.convert_data_and_label(images, labels, segments)
                segments = self.puller(segments)

                # calculate and optimize model
                optimizer.zero_grad()

                model_classification = model_utils.wait_while_can_execute_single(self.model, images)
                sigmoid = nn.Sigmoid()  # used for calculate accuracy
                model_classification = sigmoid(model_classification)

                # все дело в инцептион может быть
                classification_loss = self.l_loss(model_classification, labels)
                # torch.cuda.empty_cache()
                classification_loss.backward()
                optimizer.step()

                output_probability, output_cl, cl_acc = self.calculate_accuracy(labels, model_classification,
                                                                                labels.size(0))

                self.save_train_data(labels, output_cl, output_probability)

                # accumulate information
                accuracy_classification_sum += model_utils.scalar(cl_acc.sum())
                loss_classification_sum += model_utils.scalar(classification_loss.sum())
                batch_count += 1
                # self.de_convert_data_and_label(images, segments, labels)
                # torch.cuda.empty_cache()

            if best_loss is None or loss_classification_sum < best_loss:
                best_loss = loss_classification_sum
                self.best_weights = copy.deepcopy(self.model.state_dict())

            f_1_score_text, recall_score_text, precision_score_text = metrics_processor.calculate_metric(self.classes,
                                                                                                         self.train_trust_answers,
                                                                                                         self.train_model_answers)
            text = "TRAIN={} Loss_CL={:.10f} Accuracy_CL={:.5f} {} {} {} ".format(self.current_epoch,
                                                                                  loss_classification_sum / batch_count,
                                                                                  accuracy_classification_sum / batch_count,
                                                                                  f_1_score_text,
                                                                                  recall_score_text,
                                                                                  precision_score_text)
            p.write_to_log(text)
            if self.current_epoch % self.test_each_epoch == 0:
                test_loss, _ = self.test(self.model, self.test_set, self.l_loss)
                if best_test_loss is None or test_loss < best_test_loss:
                    best_test_loss = test_loss
                    self.best_test_weights = copy.deepcopy(self.model.state_dict())

            if best_loss is None or loss_classification_sum < best_loss:
                best_loss = loss_classification_sum
                self.best_weights = copy.deepcopy(self.model.state_dict())
            self.clear_temp_metrics()

            self.current_epoch += 1

        self.save_model(self.best_test_weights)
        self.save_model(self.best_weights)
示例#21
0
    def __init__(self, parsed):
        self.gpu = int(parsed.gpu)
        os.environ["CUDA_VISIBLE_DEVICES"] = str(self.gpu)
        self.gpu = 0
        self.parsed_description = parsed.description
        self.pre_train = int(parsed.pre_train)
        self.train_set_size = int(parsed.train_set)
        self.epochs = int(parsed.epochs)

        self.run_name = parsed.run_name
        self.algorithm_name = parsed.algorithm_name
        self.left_class_number = int(parsed.left_class_number)
        self.right_class_number = int(parsed.right_class_number)
        self.freeze_list = parsed.freeze_list
        self.classifier_learning_rate = float(parsed.classifier_learning_rate)
        self.attention_module_learning_rate = float(
            parsed.attention_module_learning_rate)
        self.is_freezen = False if str(
            parsed.is_freezen).lower() == "false" else True
        self.cbam_use_mloss = False if str(
            parsed.cbam_use_mloss).lower() == "false" else True

        self.model_type = str(parsed.model_type).lower()
        self.is_vgg16_model = True if "vgg" in self.model_type else False

        self.image_size = int(parsed.image_size)

        self.alpha = None
        self.gamma = None
        if str(parsed.classifier_loss_function).lower() == "bceloss":
            self.classifier_loss_function = nn.BCELoss()
        elif str(parsed.classifier_loss_function).lower() == "softf1":
            self.classifier_loss_function = f1loss.SoftF1Loss()
        elif str(parsed.classifier_loss_function).lower() == "focal":
            self.alpha = float(parsed.alpha)
            self.gamma = float(parsed.gamma)
            self.classifier_loss_function = focal_loss.FocalLoss(
                self.alpha, self.gamma)
        else:
            raise Exception("classifier loss {} not found".format(
                parsed.classifier_loss_function))

        if str(parsed.am_model).lower() == "sum":
            self.am_model_type = parsed.am_model
        elif str(parsed.am_model).lower() == "product":
            self.am_model_type = parsed.am_model
        elif str(parsed.am_model).lower() == "sum_shift":
            self.am_model_type = parsed.am_model
        elif str(parsed.am_model).lower() == "product_shift":
            self.am_model_type = parsed.am_model
        elif str(parsed.am_model).lower() == "cbam":
            self.am_model_type = parsed.am_model
        elif str(parsed.am_model).lower() == "conv_product":
            self.am_model_type = parsed.am_model
        elif str(parsed.am_model).lower() == "conv_sum":
            self.am_model_type = parsed.am_model
        else:
            raise Exception("model {} not found".format(parsed.am_model))

        if str(parsed.am_loss_function).lower() == "bceloss":
            self.am_loss_function = nn.BCELoss()
        elif str(parsed.am_loss_function).lower() == "softf1":
            self.am_loss_function = f1loss.SoftF1Loss()
        elif str(parsed.am_loss_function).lower() == "focal":
            self.alpha = float(parsed.alpha)
            self.gamma = float(parsed.gamma)
            self.am_loss_function = focal_loss_am.FocalLoss(
                self.alpha, self.gamma)
        else:
            raise Exception("am loss {} not found".format(
                parsed.am_loss_function))

        if str(parsed.dataset_type).lower() == "balanced":
            self.dataset_type = "balanced"
        else:
            self.dataset_type = "imbalanced"
        self.model_identifier = parsed.model_identifier
        self.execute_from_model = False if str(
            parsed.execute_from_model).lower() == "false" else True

        self.train_batch_size = int(parsed.train_batch_size)
        self.test_batch_size = int(parsed.test_batch_size)

        self.classes = self.right_class_number - self.left_class_number

        self.description = "mi-{}".format(self.model_identifier)
        self.snapshots_path = None
        self.train_segments_set = None
        self.test_set = None
        self.model = None
        self.puller = None
        self.strategy = None
        self.train_count = None

        self.initialize_logs()
        self.initialize_snapshots_dir()
        self.load_dataset()
        self.current_epoch = self.get_current_epoch()
        self.model_state_dict = self.load_model_from_saves()
        self.model = self.create_model()
        self.strategy = self.create_strategy()
        P.write_to_log("incoming args = {}".format(parsed))
示例#22
0
    def take_snapshot(self, data_set, model, snapshot_name: str = None):
        cnt = 0
        model_segments_list = []
        trust_segments_list = []
        images_list = []

        for images, segments, labels in data_set:

            segments = segments[:, self.left_class_number:self.
                                right_class_number, :, :]

            images, labels, segments = self.convert_data_and_label(
                images, labels, segments)
            segments = self.puller(segments)
            _, model_segmentation = model_utils.wait_while_can_execute(
                model, images)

            cnt += segments.size(0)
            images, _, segments = self.de_convert_data_and_label(
                images, labels, segments)
            model_segmentation = model_segmentation.cpu()
            for idx in range(segments.size(0)):
                images_list.append(images[idx])
                model_segments_list.append(model_segmentation[idx])
                trust_segments_list.append(segments[idx])

            if cnt >= self.snapshot_elements_count:
                break
        fig, axes = plt.subplots(len(images_list),
                                 model_segments_list[0].size(0) * 3 + 1,
                                 figsize=(50, 100))
        fig.tight_layout()
        for idx, img in enumerate(images_list):
            axes[idx][0].imshow(np.transpose(img.numpy(), (1, 2, 0)))

        for idx, (trist_answer, model_answer) in enumerate(
                zip(trust_segments_list, model_segments_list)):
            for class_number in range(trist_answer.size(0)):
                a = model_answer[class_number].detach().numpy()
                a = np.array([a] * 3)
                axes[idx][1 + class_number * 3].imshow(
                    np.transpose(a, (1, 2, 0)))
                p.write_to_log(
                    "model        idx={}, class={}, sum={}, max={}, min={}".
                    format(idx, class_number, np.sum(a), np.max(a), np.min(a)))
                a = (a - np.min(a)) / (np.max(a) - np.min(a))
                axes[idx][1 + class_number * 3 + 1].imshow(
                    np.transpose(a, (1, 2, 0)))
                p.write_to_log(
                    "model normed idx={}, class={}, sum={}, max={}, min={}".
                    format(idx, class_number, np.sum(a), np.max(a), np.min(a)))

                a = trist_answer[class_number].detach().numpy()
                a = np.array([a] * 3)
                axes[idx][1 + class_number * 3 + 2].imshow(
                    np.transpose(a, (1, 2, 0)))
                p.write_to_log(
                    "trust        idx={}, class={}, sum={}, max={}, min={}".
                    format(idx, class_number, np.sum(a), np.max(a), np.min(a)))

                p.write_to_log("=" * 50)

                axes[idx][1 + class_number * 3].set(
                    xlabel='model answer class: {}'.format(class_number))
                axes[idx][1 + class_number * 3 +
                          1].set(xlabel='model normed answer class: {}'.format(
                              class_number))
                axes[idx][1 + class_number * 3 + 2].set(
                    xlabel='trust answer class: {}'.format(class_number))
        print("=" * 50)
        print("=" * 50)
        print("=" * 50)
        print("=" * 50)
        print("=" * 50)
        plt.savefig(os.path.join(self.snapshot_dir, snapshot_name))
        plt.close(fig)
def balance_dataset(data_set, data_size, marked_size):
    pows = torch.zeros(5)
    for i in range(5):
        pows[i] = 2**i

    train_split = {}
    mask_dict = {}
    for a, b, k in data_set:
        idx = int((k * pows).sum().data)
        mask_dict.setdefault(idx, 0)
        mask_dict[idx] += 1
        train_split.setdefault(idx, [])
        train_split[idx].append((a, b, k))

    mask_dict_keys = sorted(mask_dict.keys())
    for i in mask_dict_keys:
        P.write_to_log("".join(reversed(format(i, 'b').zfill(5))),
                       mask_dict[i])

    sm_list = [0, 0, 0, 0, 0]
    for k, v in mask_dict.items():
        key_ = "".join(reversed(format(k, 'b').zfill(5)))
        for ill in range(5):
            exists = 1 if key_[ill] == '1' else 0
            sm_list[ill] += v * exists
    print(sm_list)

    A = [[], [], [], [], [], []]
    for key_idx, key in enumerate(mask_dict_keys):
        key_ = "".join(reversed(format(key, 'b').zfill(5)))
        for ill in range(5):
            exists = 1 if key_[ill] == '1' else 0
            A[ill].append(mask_dict[key] * exists)
        A[5].append(mask_dict[key])

    b = []
    for i in range(5):
        b.append(marked_size)
    b.append(data_size)

    bounds = [(mask_dict[mdk] / data_size, None) for mdk in mask_dict_keys]

    c = [-mask_dict[i] for i in mask_dict_keys]
    A = np.array(A)
    b = np.array(b)
    c = np.array(c)
    P.write_to_log(A)
    P.write_to_log(b)
    P.write_to_log(c)

    res = opt.linprog(c, A_ub=A, b_ub=b, bounds=bounds, method='simplex')
    P.write_to_log(res)
    accumulate_dict = {}
    for idx, i in enumerate(res.x):
        P.write_to_log("{} {:.5f} {} {:.5f} ".format(
            "".join(reversed(format(mask_dict_keys[idx], 'b').zfill(5))), i,
            mask_dict[mask_dict_keys[idx]],
            mask_dict[mask_dict_keys[idx]] * i))
        accumulate_dict[
            mask_dict_keys[idx]] = mask_dict[mask_dict_keys[idx]] * i + 1
    sm = 0
    sm_list = [0, 0, 0, 0, 0]
    for x, k in zip(res.x, mask_dict_keys):
        sm += x * mask_dict[k]
        key_ = "".join(reversed(format(k, 'b').zfill(5)))
        for ill in range(5):
            exists = 1 if key_[ill] == '1' else 0
            sm_list[ill] += x * mask_dict[k] * exists
    P.write_to_log(sm_list)
    P.write_to_log(sm)

    result_list = []

    for idx, lst in train_split.items():
        needed_cnt = int(accumulate_dict[idx])
        for i in range(needed_cnt):
            result_list.append(lst[i % len(lst)])

    return result_list