def objective(trial):
        DAD, AD, DA = gen_normalized_adjs(dataset)

        alpha1 = trial.suggest_uniform("alpha1", 0.0, 1.0)
        alpha2 = trial.suggest_uniform("alpha2", 0.0, 1.0)
        A1 = trial.suggest_categorical('A1', ['DAD', 'DA', 'AD'])
        A2 = trial.suggest_categorical('A2', ['DAD', 'DA', 'AD'])

        if args.cs_fixed:
            scale = trial.suggest_loguniform("scale", 0.1, 10.0)

        logger = SimpleLogger('evaluate params', [], 2)
        for run, model_out in enumerate(model_outs):
            split_idx = splits_lst[run]
            out = torch.load(model_out, map_location='cpu')
            if args.cs_fixed:
                _, out_cs = double_correlation_fixed(dataset.label, out,
                                                     split_idx,
                                                     eval(A1), alpha1, 50,
                                                     eval(A2), alpha2, 50,
                                                     scale, args.hops)
            else:
                _, out_cs = double_correlation_autoscale(
                    dataset.label, out, split_idx, eval(A1), alpha1, 50,
                    eval(A2), alpha2, 50, args.hops)
            result = evaluate(None, dataset, split_idx, eval_func, out_cs)
            logger.add_result(run, (), (result[1], result[2]))
        res = logger.display()

        trial.set_user_attr('valid',
                            f'{res[:, 0].mean():.3f} ± {res[:, 0].std():.3f}')
        trial.set_user_attr('test',
                            f'{res[:, 1].mean():.3f} ± {res[:, 1].std():.3f}')

        return res[:, 0].mean()
示例#2
0
    def train(self, wm_model, tool):
        #utils.print_parameter_list(wm_model, wm_model.dseq_parameter_names())

        # load data for pre-training
        print("building data for dseq...")
        tool.build_data(self.hps.train_data,
                        self.hps.valid_data,
                        self.hps.dseq_batch_size,
                        mode='dseq')

        print("train batch num: %d" % (tool.train_batch_num))
        print("valid batch num: %d" % (tool.valid_batch_num))

        #input("please check the parameters, and then press any key to continue >")

        # training logger
        logger = SimpleLogger('train')
        logger.set_batch_num(tool.train_batch_num)
        logger.set_log_steps(self.hps.dseq_log_steps)
        logger.set_log_path(self.hps.dseq_train_log_path)
        logger.set_rate('learning_rate', 0.0)
        logger.set_rate('teach_ratio', 1.0)

        # build optimizer
        opt = torch.optim.AdamW(wm_model.dseq_parameters(),
                                lr=1e-3,
                                betas=(0.9, 0.99),
                                weight_decay=self.hps.weight_decay)
        optimizer = ISRScheduler(optimizer=opt,
                                 warmup_steps=self.hps.dseq_warmup_steps,
                                 max_lr=self.hps.dseq_max_lr,
                                 min_lr=self.hps.dseq_min_lr,
                                 init_lr=self.hps.dseq_init_lr,
                                 beta=0.6)

        wm_model.train()

        criterion = Criterion(self.hps.pad_idx)

        # tech forcing ratio decay
        tr_decay_tool = ExponentialDecay(self.hps.dseq_burn_down_tr,
                                         self.hps.dseq_decay_tr,
                                         self.hps.dseq_min_tr)

        # train
        for epoch in range(1, self.hps.dseq_epoches + 1):

            self.run_train(wm_model, tool, optimizer, criterion, logger)

            if epoch % self.hps.dseq_validate_epoches == 0:
                print("run validation...")
                wm_model.eval()
                print("in training mode: %d" % (wm_model.training))
                self.run_validation(epoch, wm_model, criterion, tool,
                                    optimizer.rate())
                wm_model.train()
                print("validation Done: %d" % (wm_model.training))


            if (self.hps.dseq_save_epoches >= 1) and \
                (epoch % self.hps.dseq_save_epoches) == 0:
                # save checkpoint
                print("saving model...")
                utils.save_checkpoint(self.hps.model_dir,
                                      epoch,
                                      wm_model,
                                      prefix="dseq")

            logger.add_epoch()

            print("teach forcing ratio decay...")
            wm_model.set_teach_ratio(tr_decay_tool.do_step())
            logger.set_rate('teach_ratio', tr_decay_tool.get_rate())

            print("shuffle data...")
            tool.shuffle_train_data()
示例#3
0
    def run_validation(self, epoch, wm_model, criterion, tool, lr):
        logger = SimpleLogger('valid')
        logger.set_batch_num(tool.valid_batch_num)
        logger.set_log_path(self.hps.dseq_valid_log_path)
        logger.set_rate('learning_rate', lr)
        logger.set_rate('teach_ratio', wm_model.get_teach_ratio())

        for step in range(0, tool.valid_batch_num):

            batch = tool.valid_batches[step]

            inps = batch[0].to(self.device)
            trgs = batch[1].to(self.device)
            ph_inps = batch[2].to(self.device)
            len_inps = batch[3].to(self.device)

            with torch.no_grad():
                gen_loss, _ = self.run_step(wm_model, None, criterion, inps,
                                            trgs, ph_inps, len_inps, True)
            logger.add_losses(gen_loss)

        logger.print_log(epoch)
示例#4
0
    def run_validation(self, epoch, wm_model, criterion, tool, lr):
        logger = SimpleLogger('valid')
        logger.set_batch_num(tool.valid_batch_num)
        logger.set_log_path(self.hps.valid_log_path)
        logger.set_rate('learning_rate', lr)
        logger.set_rate('teach_ratio', wm_model.get_teach_ratio())
        logger.set_rate('temperature', wm_model.get_tau())

        for step in range(0, tool.valid_batch_num):

            batch = tool.valid_batches[step]

            all_inps = [inps.to(self.device) for inps in batch[0]]
            all_trgs = [trgs.to(self.device) for trgs in batch[1]]
            all_ph_inps = [ph_inps.to(self.device) for ph_inps in batch[2]]
            all_len_inps = [len_inps.to(self.device) for len_inps in batch[3]]
            keys = [key.to(self.device) for key in batch[4]]

            with torch.no_grad():
                gen_loss, _ = self.run_step(wm_model, None, criterion,
                                            all_inps, all_trgs, all_ph_inps,
                                            all_len_inps, keys, True)

            logger.add_losses(gen_loss)

        logger.print_log(epoch)
import csv
import os
from logger import SimpleLogger
from concurrent.futures import ProcessPoolExecutor

from load_csv import (DangerousLevelNodeCsv, LoopholeNodeCsv,
                      ManufacturerNodeCsv, ProductNodeCsv, ThreatNodeCsv,
                      L2DRelationshipCsv, L2PRelationshipCsv,
                      L2TRelationshipCsv, P2MRelationshipCsv,
                      M2LRelationshipCsv)
from nodes import DangerousLevelNode, LoopholeNode, ManufacturerNode, ProductNode, ThreatNode
from relationships import L2DRelationship, L2PRelationship, L2TRelationship, P2MRelationship, M2LRelationship

from exceptions import LackAttributeError, UnImplementedError, InstantiateError

sp_logger = SimpleLogger(__name__, './log/generate_graph.log').get_logger()


class GeneratingGraphInterface(object):
    def __init__(self):
        raise UnImplementedError("Interface class could't instantiate!")

    def handle(self):
        raise UnImplementedError("This methods must be implemented!")


class BaseGeneratingGraph(GeneratingGraphInterface):
    node_classes = []
    relationship_classes = []

    def __init__(self):
示例#6
0
    def train(self, wm_model, tool):
        #utils.print_parameter_list(wm_model)
        # load data for pre-training
        print("building data for wm...")
        tool.build_data(self.hps.train_data,
                        self.hps.valid_data,
                        self.hps.batch_size,
                        mode='wm')

        print("train batch num: %d" % (tool.train_batch_num))
        print("valid batch num: %d" % (tool.valid_batch_num))

        #input("please check the parameters, and then press any key to continue >")

        # training logger
        logger = SimpleLogger('train')
        logger.set_batch_num(tool.train_batch_num)
        logger.set_log_steps(self.hps.log_steps)
        logger.set_log_path(self.hps.train_log_path)
        logger.set_rate('learning_rate', 0.0)
        logger.set_rate('teach_ratio', 1.0)
        logger.set_rate('temperature', 1.0)

        # build optimizer
        opt = torch.optim.AdamW(wm_model.parameters(),
                                lr=1e-3,
                                betas=(0.9, 0.99),
                                weight_decay=self.hps.weight_decay)
        optimizer = ISRScheduler(optimizer=opt,
                                 warmup_steps=self.hps.warmup_steps,
                                 max_lr=self.hps.max_lr,
                                 min_lr=self.hps.min_lr,
                                 init_lr=self.hps.init_lr,
                                 beta=0.6)

        wm_model.train()

        null_idxes = tool.load_function_tokens(self.hps.data_dir +
                                               "fchars.txt").to(self.device)
        wm_model.set_null_idxes(null_idxes)

        criterion = Criterion(self.hps.pad_idx)

        # change each epoch
        tr_decay_tool = ExponentialDecay(self.hps.burn_down_tr,
                                         self.hps.decay_tr, self.hps.min_tr)
        # change each iteration
        self.tau_decay_tool = ExponentialDecay(0, self.hps.tau_annealing_steps,
                                               self.hps.min_tau)

        # -----------------------------------------------------------
        # train with all data
        for epoch in range(1, self.hps.max_epoches + 1):

            self.run_train(wm_model, tool, optimizer, criterion, logger)

            if epoch % self.hps.validate_epoches == 0:
                print("run validation...")
                wm_model.eval()
                print("in training mode: %d" % (wm_model.training))
                self.run_validation(epoch, wm_model, criterion, tool,
                                    optimizer.rate())
                wm_model.train()
                print("validation Done: %d" % (wm_model.training))


            if (self.hps.save_epoches >= 1) and \
                (epoch % self.hps.save_epoches) == 0:
                # save checkpoint
                print("saving model...")
                utils.save_checkpoint(self.hps.model_dir,
                                      epoch,
                                      wm_model,
                                      prefix="wm")

            logger.add_epoch()

            print("teach forcing ratio decay...")
            wm_model.set_teach_ratio(tr_decay_tool.do_step())
            logger.set_rate('teach_ratio', tr_decay_tool.get_rate())

            print("shuffle data...")
            tool.shuffle_train_data()
示例#7
0
### Load method ###
model = parse_method(args, dataset, n, c, d, device)

# using rocauc as the eval function
if args.rocauc or args.dataset in ('yelp-chi', 'twitch-e', 'ogbn-proteins'):
    criterion = nn.BCEWithLogitsLoss()
    eval_func = eval_rocauc
else:
    criterion = nn.NLLLoss()
    eval_func = eval_acc

logger = Logger(args.runs, args)

if args.method == 'cs':
    # for correct and smooth
    cs_logger = SimpleLogger('evaluate params', [], 2)
    model_path = f'{args.dataset}-{args.sub_dataset}' if args.sub_dataset else f'{args.dataset}'
    model_dir = f'models/{model_path}'
    print(model_dir)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    DAD, AD, DA = gen_normalized_adjs(dataset)

if args.method == 'lp':
    # handles label propagation separately
    for alpha in (.01, .1, .25, .5, .75, .9, .99):
        logger = Logger(args.runs, args)
        for run in range(args.runs):
            split_idx = split_idx_lst[run]
            train_idx = split_idx['train']
            model.alpha = alpha
示例#8
0
# _*_ coding: utf-8 _*_
# @FileName : tools.py
# @Author   : sjl
# @CreatedAt     :  2021/03/19 11:04:44
# @UpdatedAt     :  2021/03/19 11:04:44
# @description: tool functions
# @Software : VSCode


from logger import SimpleLogger
import time
from functools import wraps


sp_logger = SimpleLogger(__name__, "./log/loophole.log").get_logger()


def spend_time(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start_time = time.time()
        func(*args, **kwargs)
        sp_logger.info("executing {} spend time = {} s".format(func, time.time()-start_time))

    return wrapper
示例#9
0
# -*- coding: utf-8 -*-
from logger import SimpleLogger

# Logger example
config = dict()
config['SIMPLELOGS_URL'] = 'http://192.168.1.102'  # Адрес системы журналирования
config['MODULE_NAME'] = 'elreg'  # Код журналируемой системы для инициализации логера: logging.getLogger(module_name)
config['OWNER'] = dict(name=u'Электронная регистратура', version='2.2.10')  # Owner для системы журналирования
config['DEBUG'] = False  # Флаг, соответствующий debug_mode приложения # write to log all unhandled exceptions if not DEBUG mode

logger = SimpleLogger.get_logger(config.get('SIMPLELOGS_URL'),
                                 config.get('MODULE_NAME'),
                                 config.get('OWNER'),
                                 config.get('DEBUG'))

try:
    raise ValueError('Raised ValueError')
except ValueError, e:
    logger.error(u'Ошибка приложения: {0}'.format(e),
                 extra=dict(tags=[u'проверка логгера', 'elreg']))
    # теги отправляем через параметр extra
def run_active_learning():
    logger = SimpleLogger(LOG_FILE)
    dm = DataManager()
    im = InterpretableDataManager()
    drp_model = SVM(kernel=KERNEL, probability=True)
    lime_model = svm.SVC(kernel=KERNEL, probability=True)
    accs = [[], [], []]
    mccs = [[], [], []]

    labeled_indices = dm.get_labeled_indices()
    logger.log(0, labeled_indices)

    for strategy in STRATEGIES:
        trn_ds = dm.trn_ds_list[strategy]
        drp_model.train(trn_ds)
        update_accs_mccs(accs, mccs, dm, drp_model.model.predict, strategy)

    print_last_round_mcc(0, accs, mccs)
    assert (AL_ROUNDS <= len(dm.y_train) - INITIAL_INSTANCES)

    for round in xrange(1, AL_ROUNDS + 1):
        print "================================================="
        print "Round", round
        print "================================================="
        for strategy in STRATEGIES:
            trn_ds = dm.trn_ds_list[strategy]
            exclusion = set()
            batch = set()

            unlabeled_indices, unlabeled_X_scaled = zip(
                *trn_ds.get_unlabeled_entries())
            certainties = get_certainties(drp_model.model, dm.X_train_scaled)
            if strategy == EAL:
                threshold = get_certainty_threshold(drp_model.model,
                                                    dm.X_train_scaled,
                                                    THRESHOLD)
                y_certainty = discretize_certainties(certainties, threshold)

                lime_model.fit(dm.X_train_scaled_e, y_certainty)
                if SHOW_LIME:
                    certainties_test = get_certainties(drp_model.model,
                                                       dm.X_test_scaled)
                    y_certainty_test = discretize_certainties(
                        certainties_test, threshold)
                    print_lime_model_performance(lime_model, dm,
                                                 y_certainty_test)

                while (len(batch) < BATCH_SIZE):
                    query_id = query_least_confident(unlabeled_indices,
                                                     certainties, exclusion)
                    query = dm.X_train_scaled[query_id]
                    query_unscaled = dm.X_train_e[query_id]
                    instance_certainty = get_certainty(drp_model.model, query)
                    print "Explaining Query with id #{:d}".format(query_id)
                    print "Certainty {:.3f}".format(instance_certainty)

                    explainer = LimeTabularExplainer(
                        dm.X_train_e,
                        training_labels=y_certainty,
                        feature_names=dm.feature_names_e,
                        class_names=["uncertain", "certain"],
                        discretize_continuous=True,
                        discretizer="entropy")

                    predict_fn = lambda x: lime_model.predict_proba(
                        dm.scaler_e.transform(x)).astype(float)

                    for i in xrange(0, MAX_EXP_FEATURE, 2):
                        exp = explainer.explain_instance(
                            query_unscaled,
                            predict_fn,
                            num_features=NUM_FEATURES + i)
                        uncertain_exp_list = get_uncertain_exps(exp)
                        if (len(uncertain_exp_list) >= NUM_FEATURES - 2):
                            break
                        print "INFO: looping"

                    if SHOW_LIME:
                        print_lime_model_prediction(predict_fn, query_unscaled)

                    exp_indices = get_indices_exp_region(
                        exp, dm, unlabeled_indices, y_certainty)
                    exp_instances = get_values_of_indices(
                        exp_indices, dm.X_train_scaled)
                    exp_certainties = get_values_of_indices(
                        exp_indices, certainties)
                    batch_indices = select_batch(
                        min(BATCH_SIZE, BATCH_SIZE - len(batch)), exp_indices,
                        exp_instances, exp_certainties, "k-means-uncertain")

                    if len(batch_indices) == 0:
                        exclusion.add(query_id)
                        continue

                    print ""
                    print_explanation_drp(uncertain_exp_list, False)
                    print ""
                    print "Instances in the batch: {}".format(
                        len(batch_indices))
                    im.describe_instances(batch_indices)
                    print ""
                    im.describe_instance(query_id)
                    print ""

                    exclusion.update(set(exp_indices))
                    if ask_expert():
                        batch.update(set(batch_indices))
                    else:
                        print "INFO: Not including in the batch"

                logger.log(round, batch)
                print "INFO: Labeling the batch"
                label_batch(trn_ds, dm.y_train, batch)

            elif strategy == AL:  # AL + k-means-uncertain
                unlabeled_X_scaled = get_values_of_indices(
                    unlabeled_indices, dm.X_train_scaled)
                unlabeled_certainties = get_values_of_indices(
                    unlabeled_indices, certainties)
                batch_indices = select_batch(BATCH_SIZE, unlabeled_indices,
                                             unlabeled_X_scaled,
                                             unlabeled_certainties,
                                             "k-means-uncertain")
                label_batch(trn_ds, dm.y_train, batch_indices)

            elif strategy == PL:  # Passive Learning
                batch_indices = random.sample(unlabeled_indices, BATCH_SIZE)
                label_batch(trn_ds, dm.y_train, batch_indices)

            drp_model.train(trn_ds)
            update_accs_mccs(accs, mccs, dm, drp_model.model.predict, strategy)

    print_mcc_summary(mccs)