def objective(trial): DAD, AD, DA = gen_normalized_adjs(dataset) alpha1 = trial.suggest_uniform("alpha1", 0.0, 1.0) alpha2 = trial.suggest_uniform("alpha2", 0.0, 1.0) A1 = trial.suggest_categorical('A1', ['DAD', 'DA', 'AD']) A2 = trial.suggest_categorical('A2', ['DAD', 'DA', 'AD']) if args.cs_fixed: scale = trial.suggest_loguniform("scale", 0.1, 10.0) logger = SimpleLogger('evaluate params', [], 2) for run, model_out in enumerate(model_outs): split_idx = splits_lst[run] out = torch.load(model_out, map_location='cpu') if args.cs_fixed: _, out_cs = double_correlation_fixed(dataset.label, out, split_idx, eval(A1), alpha1, 50, eval(A2), alpha2, 50, scale, args.hops) else: _, out_cs = double_correlation_autoscale( dataset.label, out, split_idx, eval(A1), alpha1, 50, eval(A2), alpha2, 50, args.hops) result = evaluate(None, dataset, split_idx, eval_func, out_cs) logger.add_result(run, (), (result[1], result[2])) res = logger.display() trial.set_user_attr('valid', f'{res[:, 0].mean():.3f} ± {res[:, 0].std():.3f}') trial.set_user_attr('test', f'{res[:, 1].mean():.3f} ± {res[:, 1].std():.3f}') return res[:, 0].mean()
def train(self, wm_model, tool): #utils.print_parameter_list(wm_model, wm_model.dseq_parameter_names()) # load data for pre-training print("building data for dseq...") tool.build_data(self.hps.train_data, self.hps.valid_data, self.hps.dseq_batch_size, mode='dseq') print("train batch num: %d" % (tool.train_batch_num)) print("valid batch num: %d" % (tool.valid_batch_num)) #input("please check the parameters, and then press any key to continue >") # training logger logger = SimpleLogger('train') logger.set_batch_num(tool.train_batch_num) logger.set_log_steps(self.hps.dseq_log_steps) logger.set_log_path(self.hps.dseq_train_log_path) logger.set_rate('learning_rate', 0.0) logger.set_rate('teach_ratio', 1.0) # build optimizer opt = torch.optim.AdamW(wm_model.dseq_parameters(), lr=1e-3, betas=(0.9, 0.99), weight_decay=self.hps.weight_decay) optimizer = ISRScheduler(optimizer=opt, warmup_steps=self.hps.dseq_warmup_steps, max_lr=self.hps.dseq_max_lr, min_lr=self.hps.dseq_min_lr, init_lr=self.hps.dseq_init_lr, beta=0.6) wm_model.train() criterion = Criterion(self.hps.pad_idx) # tech forcing ratio decay tr_decay_tool = ExponentialDecay(self.hps.dseq_burn_down_tr, self.hps.dseq_decay_tr, self.hps.dseq_min_tr) # train for epoch in range(1, self.hps.dseq_epoches + 1): self.run_train(wm_model, tool, optimizer, criterion, logger) if epoch % self.hps.dseq_validate_epoches == 0: print("run validation...") wm_model.eval() print("in training mode: %d" % (wm_model.training)) self.run_validation(epoch, wm_model, criterion, tool, optimizer.rate()) wm_model.train() print("validation Done: %d" % (wm_model.training)) if (self.hps.dseq_save_epoches >= 1) and \ (epoch % self.hps.dseq_save_epoches) == 0: # save checkpoint print("saving model...") utils.save_checkpoint(self.hps.model_dir, epoch, wm_model, prefix="dseq") logger.add_epoch() print("teach forcing ratio decay...") wm_model.set_teach_ratio(tr_decay_tool.do_step()) logger.set_rate('teach_ratio', tr_decay_tool.get_rate()) print("shuffle data...") tool.shuffle_train_data()
def run_validation(self, epoch, wm_model, criterion, tool, lr): logger = SimpleLogger('valid') logger.set_batch_num(tool.valid_batch_num) logger.set_log_path(self.hps.dseq_valid_log_path) logger.set_rate('learning_rate', lr) logger.set_rate('teach_ratio', wm_model.get_teach_ratio()) for step in range(0, tool.valid_batch_num): batch = tool.valid_batches[step] inps = batch[0].to(self.device) trgs = batch[1].to(self.device) ph_inps = batch[2].to(self.device) len_inps = batch[3].to(self.device) with torch.no_grad(): gen_loss, _ = self.run_step(wm_model, None, criterion, inps, trgs, ph_inps, len_inps, True) logger.add_losses(gen_loss) logger.print_log(epoch)
def run_validation(self, epoch, wm_model, criterion, tool, lr): logger = SimpleLogger('valid') logger.set_batch_num(tool.valid_batch_num) logger.set_log_path(self.hps.valid_log_path) logger.set_rate('learning_rate', lr) logger.set_rate('teach_ratio', wm_model.get_teach_ratio()) logger.set_rate('temperature', wm_model.get_tau()) for step in range(0, tool.valid_batch_num): batch = tool.valid_batches[step] all_inps = [inps.to(self.device) for inps in batch[0]] all_trgs = [trgs.to(self.device) for trgs in batch[1]] all_ph_inps = [ph_inps.to(self.device) for ph_inps in batch[2]] all_len_inps = [len_inps.to(self.device) for len_inps in batch[3]] keys = [key.to(self.device) for key in batch[4]] with torch.no_grad(): gen_loss, _ = self.run_step(wm_model, None, criterion, all_inps, all_trgs, all_ph_inps, all_len_inps, keys, True) logger.add_losses(gen_loss) logger.print_log(epoch)
import csv import os from logger import SimpleLogger from concurrent.futures import ProcessPoolExecutor from load_csv import (DangerousLevelNodeCsv, LoopholeNodeCsv, ManufacturerNodeCsv, ProductNodeCsv, ThreatNodeCsv, L2DRelationshipCsv, L2PRelationshipCsv, L2TRelationshipCsv, P2MRelationshipCsv, M2LRelationshipCsv) from nodes import DangerousLevelNode, LoopholeNode, ManufacturerNode, ProductNode, ThreatNode from relationships import L2DRelationship, L2PRelationship, L2TRelationship, P2MRelationship, M2LRelationship from exceptions import LackAttributeError, UnImplementedError, InstantiateError sp_logger = SimpleLogger(__name__, './log/generate_graph.log').get_logger() class GeneratingGraphInterface(object): def __init__(self): raise UnImplementedError("Interface class could't instantiate!") def handle(self): raise UnImplementedError("This methods must be implemented!") class BaseGeneratingGraph(GeneratingGraphInterface): node_classes = [] relationship_classes = [] def __init__(self):
def train(self, wm_model, tool): #utils.print_parameter_list(wm_model) # load data for pre-training print("building data for wm...") tool.build_data(self.hps.train_data, self.hps.valid_data, self.hps.batch_size, mode='wm') print("train batch num: %d" % (tool.train_batch_num)) print("valid batch num: %d" % (tool.valid_batch_num)) #input("please check the parameters, and then press any key to continue >") # training logger logger = SimpleLogger('train') logger.set_batch_num(tool.train_batch_num) logger.set_log_steps(self.hps.log_steps) logger.set_log_path(self.hps.train_log_path) logger.set_rate('learning_rate', 0.0) logger.set_rate('teach_ratio', 1.0) logger.set_rate('temperature', 1.0) # build optimizer opt = torch.optim.AdamW(wm_model.parameters(), lr=1e-3, betas=(0.9, 0.99), weight_decay=self.hps.weight_decay) optimizer = ISRScheduler(optimizer=opt, warmup_steps=self.hps.warmup_steps, max_lr=self.hps.max_lr, min_lr=self.hps.min_lr, init_lr=self.hps.init_lr, beta=0.6) wm_model.train() null_idxes = tool.load_function_tokens(self.hps.data_dir + "fchars.txt").to(self.device) wm_model.set_null_idxes(null_idxes) criterion = Criterion(self.hps.pad_idx) # change each epoch tr_decay_tool = ExponentialDecay(self.hps.burn_down_tr, self.hps.decay_tr, self.hps.min_tr) # change each iteration self.tau_decay_tool = ExponentialDecay(0, self.hps.tau_annealing_steps, self.hps.min_tau) # ----------------------------------------------------------- # train with all data for epoch in range(1, self.hps.max_epoches + 1): self.run_train(wm_model, tool, optimizer, criterion, logger) if epoch % self.hps.validate_epoches == 0: print("run validation...") wm_model.eval() print("in training mode: %d" % (wm_model.training)) self.run_validation(epoch, wm_model, criterion, tool, optimizer.rate()) wm_model.train() print("validation Done: %d" % (wm_model.training)) if (self.hps.save_epoches >= 1) and \ (epoch % self.hps.save_epoches) == 0: # save checkpoint print("saving model...") utils.save_checkpoint(self.hps.model_dir, epoch, wm_model, prefix="wm") logger.add_epoch() print("teach forcing ratio decay...") wm_model.set_teach_ratio(tr_decay_tool.do_step()) logger.set_rate('teach_ratio', tr_decay_tool.get_rate()) print("shuffle data...") tool.shuffle_train_data()
### Load method ### model = parse_method(args, dataset, n, c, d, device) # using rocauc as the eval function if args.rocauc or args.dataset in ('yelp-chi', 'twitch-e', 'ogbn-proteins'): criterion = nn.BCEWithLogitsLoss() eval_func = eval_rocauc else: criterion = nn.NLLLoss() eval_func = eval_acc logger = Logger(args.runs, args) if args.method == 'cs': # for correct and smooth cs_logger = SimpleLogger('evaluate params', [], 2) model_path = f'{args.dataset}-{args.sub_dataset}' if args.sub_dataset else f'{args.dataset}' model_dir = f'models/{model_path}' print(model_dir) if not os.path.exists(model_dir): os.makedirs(model_dir) DAD, AD, DA = gen_normalized_adjs(dataset) if args.method == 'lp': # handles label propagation separately for alpha in (.01, .1, .25, .5, .75, .9, .99): logger = Logger(args.runs, args) for run in range(args.runs): split_idx = split_idx_lst[run] train_idx = split_idx['train'] model.alpha = alpha
# _*_ coding: utf-8 _*_ # @FileName : tools.py # @Author : sjl # @CreatedAt : 2021/03/19 11:04:44 # @UpdatedAt : 2021/03/19 11:04:44 # @description: tool functions # @Software : VSCode from logger import SimpleLogger import time from functools import wraps sp_logger = SimpleLogger(__name__, "./log/loophole.log").get_logger() def spend_time(func): @wraps(func) def wrapper(*args, **kwargs): start_time = time.time() func(*args, **kwargs) sp_logger.info("executing {} spend time = {} s".format(func, time.time()-start_time)) return wrapper
# -*- coding: utf-8 -*- from logger import SimpleLogger # Logger example config = dict() config['SIMPLELOGS_URL'] = 'http://192.168.1.102' # Адрес системы журналирования config['MODULE_NAME'] = 'elreg' # Код журналируемой системы для инициализации логера: logging.getLogger(module_name) config['OWNER'] = dict(name=u'Электронная регистратура', version='2.2.10') # Owner для системы журналирования config['DEBUG'] = False # Флаг, соответствующий debug_mode приложения # write to log all unhandled exceptions if not DEBUG mode logger = SimpleLogger.get_logger(config.get('SIMPLELOGS_URL'), config.get('MODULE_NAME'), config.get('OWNER'), config.get('DEBUG')) try: raise ValueError('Raised ValueError') except ValueError, e: logger.error(u'Ошибка приложения: {0}'.format(e), extra=dict(tags=[u'проверка логгера', 'elreg'])) # теги отправляем через параметр extra
def run_active_learning(): logger = SimpleLogger(LOG_FILE) dm = DataManager() im = InterpretableDataManager() drp_model = SVM(kernel=KERNEL, probability=True) lime_model = svm.SVC(kernel=KERNEL, probability=True) accs = [[], [], []] mccs = [[], [], []] labeled_indices = dm.get_labeled_indices() logger.log(0, labeled_indices) for strategy in STRATEGIES: trn_ds = dm.trn_ds_list[strategy] drp_model.train(trn_ds) update_accs_mccs(accs, mccs, dm, drp_model.model.predict, strategy) print_last_round_mcc(0, accs, mccs) assert (AL_ROUNDS <= len(dm.y_train) - INITIAL_INSTANCES) for round in xrange(1, AL_ROUNDS + 1): print "=================================================" print "Round", round print "=================================================" for strategy in STRATEGIES: trn_ds = dm.trn_ds_list[strategy] exclusion = set() batch = set() unlabeled_indices, unlabeled_X_scaled = zip( *trn_ds.get_unlabeled_entries()) certainties = get_certainties(drp_model.model, dm.X_train_scaled) if strategy == EAL: threshold = get_certainty_threshold(drp_model.model, dm.X_train_scaled, THRESHOLD) y_certainty = discretize_certainties(certainties, threshold) lime_model.fit(dm.X_train_scaled_e, y_certainty) if SHOW_LIME: certainties_test = get_certainties(drp_model.model, dm.X_test_scaled) y_certainty_test = discretize_certainties( certainties_test, threshold) print_lime_model_performance(lime_model, dm, y_certainty_test) while (len(batch) < BATCH_SIZE): query_id = query_least_confident(unlabeled_indices, certainties, exclusion) query = dm.X_train_scaled[query_id] query_unscaled = dm.X_train_e[query_id] instance_certainty = get_certainty(drp_model.model, query) print "Explaining Query with id #{:d}".format(query_id) print "Certainty {:.3f}".format(instance_certainty) explainer = LimeTabularExplainer( dm.X_train_e, training_labels=y_certainty, feature_names=dm.feature_names_e, class_names=["uncertain", "certain"], discretize_continuous=True, discretizer="entropy") predict_fn = lambda x: lime_model.predict_proba( dm.scaler_e.transform(x)).astype(float) for i in xrange(0, MAX_EXP_FEATURE, 2): exp = explainer.explain_instance( query_unscaled, predict_fn, num_features=NUM_FEATURES + i) uncertain_exp_list = get_uncertain_exps(exp) if (len(uncertain_exp_list) >= NUM_FEATURES - 2): break print "INFO: looping" if SHOW_LIME: print_lime_model_prediction(predict_fn, query_unscaled) exp_indices = get_indices_exp_region( exp, dm, unlabeled_indices, y_certainty) exp_instances = get_values_of_indices( exp_indices, dm.X_train_scaled) exp_certainties = get_values_of_indices( exp_indices, certainties) batch_indices = select_batch( min(BATCH_SIZE, BATCH_SIZE - len(batch)), exp_indices, exp_instances, exp_certainties, "k-means-uncertain") if len(batch_indices) == 0: exclusion.add(query_id) continue print "" print_explanation_drp(uncertain_exp_list, False) print "" print "Instances in the batch: {}".format( len(batch_indices)) im.describe_instances(batch_indices) print "" im.describe_instance(query_id) print "" exclusion.update(set(exp_indices)) if ask_expert(): batch.update(set(batch_indices)) else: print "INFO: Not including in the batch" logger.log(round, batch) print "INFO: Labeling the batch" label_batch(trn_ds, dm.y_train, batch) elif strategy == AL: # AL + k-means-uncertain unlabeled_X_scaled = get_values_of_indices( unlabeled_indices, dm.X_train_scaled) unlabeled_certainties = get_values_of_indices( unlabeled_indices, certainties) batch_indices = select_batch(BATCH_SIZE, unlabeled_indices, unlabeled_X_scaled, unlabeled_certainties, "k-means-uncertain") label_batch(trn_ds, dm.y_train, batch_indices) elif strategy == PL: # Passive Learning batch_indices = random.sample(unlabeled_indices, BATCH_SIZE) label_batch(trn_ds, dm.y_train, batch_indices) drp_model.train(trn_ds) update_accs_mccs(accs, mccs, dm, drp_model.model.predict, strategy) print_mcc_summary(mccs)