def test_individual_logger(_logger, logger_file): individual_logger = _logger('test_individual_logger_individual', global_level=False, level=Log.Levels.CRITICAL) individual_logger._filehandler.level = constant_level = 98 Log.set_global_log_level(Log.Levels.WARNING) assert logger_file.level == Log.Levels.WARNING, \ "log level should be %s after set_global_log_level" % Log.Levels.WARNING assert individual_logger.level == Log.Levels.CRITICAL, \ "individual logger should ignore set_global_log_level and remain %s" % Log.Levels.CRITICAL assert individual_logger._filehandler.level == constant_level, \ "individual logger filehandler should ignore set_global_log_level and remain %s" % constant_level
def pretty_print(paths_dict, data: DataManager): """ Print the predictions and the explanations for each of them, if exist :param paths_dict: dictionary containing the explanations evaluated; it has to be in the form {test_triple1: {pred_id : [expl_obj, expl_obj,...], ...} :param data: :return: """ log = Log.get_logger() for triple_test_index in paths_dict.keys(): test_triple = data.test_triples[triple_test_index] log.debug(f"Tripla di test: {test_triple}") head = test_triple[0] rel = test_triple[2] for pred_index in paths_dict[triple_test_index].keys(): # log.debug(f"\tSpiegazioni per la predizione ({head} --{rel}--> {data.test_predicted_tails[triple_test_index][pred_index]})") explanations = paths_dict[triple_test_index][pred_index] if explanations != {None}: log.debug( f"\tSpiegazioni per la predizione ({head} --{rel}--> {data.test_predicted_tails[triple_test_index][pred_index]})") # siamo nel dizionario che contiene le diverse tipologie di path log.debug(explanations) for k in explanations.keys(): if explanations[k]: # = se non è vuota la lista di quella tipologia di spiegazioni log.debug(f"Type {k}:") [log.debug(e) for e in explanations[k]]
def __semantic(emb_a, emb_b, id, other_id, obj_type, classes, domains, ranges): """ Compute a semantic distance based on jaccard index for entities, based on the classes, and an abstraction of jaccard index for relationships, based on domain and range; if these semantic informations are not available, it will compute the euclidian distance :param emb_a: embedding of the first entity/relation corresponding to 'id' :param emb_b: embedding of the second entity/relation corresponding to 'id' :param id: id of the entity/relationship :param other_id: id of the other entity/relatioship :return: similarity value """ # if the object type is 'ent' we have to compute a simple jaccard index and euclidian distance if there is no domains log = Log.get_logger( name="general") # on windows it can't restore the main logger! if obj_type == 'ent': # prendiamo i dati delle entità riguardanti le classi ent_sim = __jaccard(classes[id], classes[other_id]) if ent_sim > 0: # teniamo conto di quali relazioni stanno giovando di questa misura log.debug( f"{id} ({classes[id]}),{other_id} ({classes[other_id]}), {ent_sim}" ) # print(f"{id} ({classes[id]}),{other_id} ({classes[other_id]}), {ent_sim}") return (0.2 * ent_sim) + (__cosine(emb_a, emb_b) * 0.8) elif obj_type == 'rel': dom_jaccard = __jaccard(domains[id], domains[other_id]) range_jaccard = __jaccard(ranges[id], ranges[other_id]) rel_sim = dom_jaccard + range_jaccard if rel_sim > 0: log.debug( f"{id} (domain: {domains[id]}) (range: {ranges[id]}),{other_id} (domain: {domains[other_id]}) (range: {ranges[other_id]}), {rel_sim}" ) # print(f"{id} (domain: {domains[id]}) (range: {ranges[id]}),{other_id} (domain: {domains[other_id]}) (range: {ranges[other_id]}), {rel_sim}") return (0.2 * rel_sim) + (__cosine(emb_a, emb_b) * 0.8) else: raise ValueError( f"obj_type can be either 'ent' or 'rel', not '{obj_type}'")
def main_process(data: DataManager, num_tripla: int, explainer: Explainer, return_dict, args): """ Processo adibito alla generazione di spiegazioni per la tripla num_tripla; utilizzato in multiprocessing per parallelizzare la generazione di spiegazioni per più triple contemporaneamente :param data: :param num_tripla: :param explainer: :param return_dict: :return: """ log = Log.get_logger() tripla_test = data.test_triples[num_tripla] log.info(f"Processing predictions for test triple: {tripla_test}") # ids test_head_id = tripla_test[0] rel_id = tripla_test[2] # embeddings of the test triple """head_emb = data.entity_emb[test_head_id] tail_emb = data.entity_emb[test_tail_id] rel_emb = data.rel_emb[rel_id] inv_rel_emb = data.inv_rel[rel_id]""" ## TAIL PREDICTION EXPLANATION tail_predictions = data.test_predicted_tails[num_tripla] # similarità con il rel_emb della tripla sim_rels = explainer.top_sim_emb(rel_id, data.relations_similarities_dict, top_k=args.top_rel) sim_heads = explainer.top_sim_emb(test_head_id, data.entities_similarities_dict, top_k=args.top_ent) paths_for_pred = {} # dict contenente {num_pred: paths, num_pred1: path1} k = indice per tail_predictions, v = prediction_paths for num_pred in range(0, len(tail_predictions)): predicted_tail_id = tail_predictions[num_pred] # le code simili servono per la ricerca di spiegazioni a supporto sim_tails = explainer.top_sim_emb(predicted_tail_id, data.entities_similarities_dict, top_k=args.top_ent) # dunque cercare spiegazione per (head_id, pred_tail, rel_id) paths_for_pred[num_pred] = explainer.paths(test_head_id, rel_id, sim_rels, predicted_tail_id, [data.train_hr_t, data.train_tr_h], sim_heads, sim_tails, args) return_dict[num_tripla] = paths_for_pred
#!/usr/bin/env python # -*- coding: utf-8 -*- """ Global Logger Examples """ from global_logger import Log # create and/or reuse a global logger, choosing its name dynamicaly # with screen-only output and the default logging level INFO log = Log.get_logger() # this forcec ALL loggers to lower their logging level to DEBUG log.verbose = True log.debug("debug text: level: %s" % log.Levels.DEBUG) log.info("info text: level: %s" % log.Levels.INFO) log.warning("warning text: level: %s" % log.Levels.WARNING) log.error("error text: level: %s" % log.Levels.ERROR) log.critical("critical text: level: %s" % log.Levels.CRITICAL) # 2020-06-28 14:18:42.004 14:source.examples DEBUG debug text: level: '10' # 2020-06-28 14:18:42.004 15:source.examples INFO info text: level: '20' # 2020-06-28 14:18:42.004 16:source.examples WARNING warning text: level: '30' # 2020-06-28 14:18:42.005 17:source.examples ERROR error text: level: '40' # 2020-06-28 14:18:42.005 18:source.examples CRITICAL critical text: level: '50' # log text in purple color without a newline, clearing all the ANSI sylbols from the message log.printer('always printed text....', color='blue', end='', clear=True) # can also be simplified to: log.green('green text', clear=False) log.yellow('yellow text', end='\t\t\t\t') log.red('red text') # create and/or reuses a global logger, choosing its name dynamicaly
import os import re import requests from notifiers import get_notifier from global_logger import Log CHAT_ID = os.getenv('TELEGRAM_CHAT_ID', 0) TELEGRAM_BOT_API_KEY = os.getenv('TELEGRAM_BOT_API_KEY') TELEGRAM_DEFAULT_PARSE_MODE = 'markdown' telegram = get_notifier('telegram') LOG = Log.get_logger() def get_html(url): r = requests.get(url, timeout=10000) return r.text def extract_number(str_): return float("".join(re.findall(r'\d+(?:[.,]\d+)?', str_)).replace(',', '.')) def price_per_volume(volume, price): return int(round(float(price) / float(volume), 0)) def liters(str_): for exclusion in (' 10', ' 40'): str_ = str_.replace(exclusion, '')
# più comodo per specificare gli args if args.semantic_dir != None: args.distance_type = 'semantic' save_dir = args.save_dir if save_dir == 'data_dir': save_dir = f"{data_folder}{args.distance_type}/" args.save_dir = save_dir if args.distance_type == 'semantic' and args.semantic_dir is None: print( "You had to provide a folder (--semantic_data) in which there are three files: entity2class_dict.pkl, " "rs_domain2id_dict.pkl, rs_range2id_dict.pkl. \nEXIT") exit() log = Log.get_logger(logs_dir=save_dir, name="general") # logger for general communications log.info(f"Distance type: {args.distance_type}") log.info(f"Save folder: {save_dir}") # logger for semantic experiments """if args.distance_type == 'semantic': # Path(save_dir).mkdir(parents=True, exist_ok=True) ent_log = Log.get_logger(logs_dir=save_dir, name="ent", level=Log.Levels.DEBUG) # to store semantic similarities rel_log = Log.get_logger(logs_dir=save_dir, name="rel", level=Log.Levels.DEBUG) ent_log.debug("ENTITIES WITH SEMANTIC SIMILARITY") rel_log.debug("RELATIONSHIPS WITH SEMANTIC SIMILARITY")""" ent, rel, inv, classes, domains, ranges = load_data( data_folder ) # classe, domains, ranges will be None if not semantic mode semantic_data.entity2class_dict = classes
parser.add_argument('--distance', dest='distance_type', type=str, help='choose the pre-computed distance/similarity to involve: euclidian, cosine', default='euclidian') parser.add_argument('--pretty_print', dest='pretty_print_flag', type=bool, help='if true, enable the pretty print of the explanations on the log file (requires much time', default=False) global args args = parser.parse_args() log_save_dir = f"{args.save_dir}execution_logs" # to save a subfolder with the fraction used Path(log_save_dir).mkdir(parents=True, exist_ok=True) if args.log_level == "debug": log = Log.get_logger(logs_dir=log_save_dir, level=Log.Levels.DEBUG) else: log = Log.get_logger(logs_dir=log_save_dir) log.info("DATA DIR: %s" % args.data_dir) log.info("TOP_ENT: %d" % args.top_ent) log.info("TOP_REL: %d" % args.top_rel) log.info("LOG LEVEL: %s" % args.log_level) log.info("MULTIPROCESSING: %s" % args.multiproc_flag) if args.multiproc_flag is True: log.info("PROCESSES: %d" % args.max_processes) log.info("SAVE DIR: %s" % args.save_dir) log.info("PERCENTAGE OF PREDICTIONS: %s" % args.pred_perc) log.info("DISTANCE TYPE: %s" % args.distance_type) global manager # dovrebbe aiutare con la interruzione prematura dei processi manager = multiprocessing.Manager() # manager for the shared dict in multiprocessing
def test_instance_exception(): with pytest.raises(ValueError): Log('something')
def __logger(*args, **kwargs): return Log.get_logger(*args, **kwargs)