def formula_val_list_into_concat_formula_list(prefix_formula_val_list, folder_name, signal_file_base, trace_count, signal_file_rest, return_type): """ Designed for debug purposes of cause mining algorithm Takes only the formula parts of a Formula_Valuation list, turns them into their infix forms, concatenates them with or and calculates the valuation of the resulting formula. Returns the resulting formula and the valuation as a FormulaValuation. Args: prefix_formula_val_list: A list of Formula Valuations where formulas are in prefix forms. Ex: [FormulaValuation(formula='A 0 4 x5 = 0', valuation=0.7824), FormulaValuation(formula='& A 0 2 x5 = 0 P 1 1 x3 > 10', valuation=0.921521)] return_type: Returns: a FormulaValuation """ infix_formula_list = [fv.formula for fv in prefix_formula_val_list] prefix_to_infix_list(infix_formula_list) concat_with_or_infix(infix_formula_list) formula = STL.infix_to_prefix(infix_formula_list[0]) valuation = optimization.evaluator.evaluate_signals( formula, folder_name, signal_file_base, trace_count, signal_file_rest, return_type) return stl_constants.FormulaValuation(formula=infix_formula_list[0], valuation=valuation)
def parameter_search_for_formula(formula, parameter_domains_for_formula, folder_name, trace_count, signal_file_base, process_count, return_type, past_results=[]): parameter_list = list(parameter_domains_for_formula.keys()) parameter_domain = [parameter_domains_for_formula[pa] for pa in parameter_list] prefix_formula = STL.infix_to_prefix(formula) best_v, params, time_passed = grid_search.grid_search(formula=prefix_formula, parameter_list=parameter_list, parameter_domain=parameter_domain, folder_name=folder_name, signal_file_base=signal_file_base, trace_count=trace_count, signal_file_rest='', process_count=process_count, return_type=return_type, past_results=past_results) #result_file = folder_name + "".join(formula.split()) + ".mat" #_ = plot.convert_save_results(parameter_list, parameter_domain, all_results, result_file, formula, # time_passed, trace_count, process_count, best_v, params) formula_n = formula if params == None: print( "There are no best parameters for the formula: " + formula ) else: for p, v in zip(parameter_list, params): formula_n = formula_n.replace(p, str(v)) print("With valuation " + str(best_v) + " ,best parameters form " + formula_n + " ,with prefix form: " + STL.infix_to_prefix(formula_n) + " ,in time: " + str(time_passed)) return stl_constants.FormulaValuation(formula=formula_n, valuation=best_v)
def search_all_search_space(metric_list, control_metrics, set_valued_metrics, parameter_domains, folder_name, trace_count, signal_file_base, process_count, return_type, oc_limit, cause_limit, upto=True, withoutS=False, controllable_formulas=True, time_shift=0): """ Designed as a function to understand how true and efficient our cause_mining heuristic works. This function searches through the whole search space (until operator_count == n) and is guaranteed to find the best formula. This function tries to search through all search space in a pretty naive way, going through all possible cause_mining formulas in all possible "concatenated with or's" forms, iterating through operator counts in an increasing manner. Without dictating the heurisic -which is putting an hierarchy to the business of searching through the same formed formulas, and some restrictions of course- in the algorithm in cause_mining_for_traffic_data, in a very loose way. Args: metric_list: control_metrics: set_valued_metrics: parameter_domains: folder_name: trace_count: signal_file_base: process_count: return_type: oc_limit: operator count. The computation stops when operator count reaches the value n+1. cause_limit: the max number of formula component in the end formula upto: (bool) if True, the formulas with up to cause_limit many components, if False, the formulas with exactly cause_limit many components are searched for. controllable_formulas: (bool) if True, the function calls generate_formula_tree_for_cause_mining, if False, it calls generate_formula_tree_iterative. So in the first case controllable formulas are generated, in the second case all formulas are. Returns: best_result type: FormulaValuation """ best_result = stl_constants.FormulaValuation(formula="", valuation=0) # initialize # find all formulas till oc = n+1 all_formulas_till_n = [] if controllable_formulas: all_formulas_till_n = all_formulas_till_n + \ formula_generator.generate_formula_tree_for_cause_mining(metric_list=metric_list, control_metrics=control_metrics, operator_count=oc_limit, set_valued_metrics=set_valued_metrics, withoutS=withoutS) else: all_formulas_till_n = all_formulas_till_n + \ formula_generator.generate_formula_tree_iterative(metric_list=metric_list, operator_count=oc_limit, return_formula_string=True, set_valued_metrics=set_valued_metrics, withoutS=withoutS) if time_shift > 0: time_shift_str = str(time_shift) + " " + str(time_shift) all_formulas_till_n = [ 'P ' + time_shift_str + " ( " + f + " )" for f in all_formulas_till_n ] # find all combinations of these formulas and concatenate all with or iter_all_combinations = my_iterable() if not upto: iter_all_combinations = chain( iter_all_combinations, product(all_formulas_till_n, repeat=cause_limit)) else: for n in range(1, cause_limit + 1): iter_all_combinations = chain( iter_all_combinations, product(all_formulas_till_n, repeat=n)) # now we have an iterator of all formulas up to cause_limit many components, all_comb_set = set() for i in iter_all_combinations: all_comb_set.add(frozenset(i)) print("there are " + str(len(all_formulas_till_n)) + " formulas in all_formulas_till_" + str(oc_limit) + \ " and cause limit is " + str(cause_limit) + " so there will be " + str(len(all_comb_set)) + \ " formulas to be iterated over.") # concatenate the formulas with or and put them into a list concatenated_list = [] cnt = 0 for formula_set in all_comb_set: cnt += 1 formula_list = list(formula_set) concatenated_list.append(concat_with_or_infix(formula_list)) print("the formula number " + str(cnt) + " that we are on is: " + formula_list[0]) # find the best parameter values for this formula formula, parameter_domains_for_formula = formula_search.generate_formula_from_template( template_formula=formula_list[0], parameter_domains=parameter_domains) formula_valuation = formula_search.parameter_search_for_formula( formula=formula, parameter_domains_for_formula=parameter_domains_for_formula, folder_name=folder_name, trace_count=trace_count, signal_file_base=signal_file_base, process_count=process_count, return_type=return_type) # make this formula the new best_result if its valuation is better than the old best_result if formula_valuation.valuation > best_result.valuation: best_result = formula_valuation print("\n\nBest Formula of search_all_search_space for now is " + best_result.formula + \ " with valuation " + str(best_result.valuation) + "\n The tuple in iteration is " + str(formula_list) \ + "\n") return best_result
def cause_mining_algorithm(metric_list, control_metrics, set_valued_metrics, parameter_domains, folder_name, trace_count, signal_file_base, process_count, save, result_file, return_type, strictly_increasing_oc, valuation_limit, operator_count_limit, withoutS=False, controllable_formulas=True, time_shift=0): """ Args: metric_list: control_metrics: set_valued_metrics: parameter_domains: folder_name: trace_count: signal_file_base: process_count: save: result_file: return_type: strictly_increasing_oc: (bool) If False, the heuristic is applied while increasing operator count, if True, operator count increases by one in each loop. valuation_limit: the predefined limit which decides how much of a value addition is enough for a best formula of an operator count to enter the past_formula list. operator_count_limit: Last operator count. The best_formulas are searched for until operator count reaches this integer value. controllable_formulas: (bool) if True, cause_mining input is given to formula_search as True, and generate_formula_tree_cause_mining is called inside formula_search_operator_count as a consequence. That is, controllable formulas are synthesized. if False, cause_mining input is given to formula_search as False, and generate_formula_tree_iterative is called inside formula_search_operator_count as a consequence. That is, all kinds of formulas are generated. By default, controllable_formulas = True, so only controllable formulas are generated. Returns: FormulaValuation(the resulting formula -small best formulas concatenated with ors- in prefix form, its valuation) """ if type(operator_count_limit) is int: oc_rhs_limit = operator_count_limit else: _, oc_rhs_limit = operator_count_limit if controllable_formulas: # we do this since if controllable_formulas == True, the code will enter generate_formula_tree_iterative and this function cannot process oc = -1 current_oc = -1 else: current_oc = 0 past_results = [] # all formulas in past_results must be in prefix form if not strictly_increasing_oc: last_used_oc = current_oc while True: results, best_formula = formula_search.formula_search(metric_list=metric_list, set_valued_metrics=set_valued_metrics, operator_counts=[current_oc], parameter_domains=parameter_domains, folder_name=folder_name, trace_count=trace_count, generate_signals="", signal_file_base=signal_file_base, process_count=process_count, save=save, cause_mining=controllable_formulas, return_type=return_type, result_file=result_file, control_metrics=control_metrics, past_results=past_results, withoutS=withoutS, time_shift=time_shift) # turn the formula into prefix form, and then append it to past_results. best_formula_prefix = stl_constants.FormulaValuation(formula=STL.infix_to_prefix(best_formula.formula), valuation=best_formula.valuation) past_results.append(best_formula_prefix) if len(past_results) > 1 and (past_results[-1].valuation - past_results[-2].valuation) < valuation_limit: past_results.pop() if last_used_oc < current_oc: print("break 1") break current_oc += 1 print("oc change to " + str(current_oc)) elif len(past_results) > 1 and (past_results[-1].valuation - past_results[-2].valuation) > valuation_limit: last_used_oc = current_oc print("last used oc : " + str(last_used_oc)) print("?????????????????? PAST RESULTS SO FAR ???????????????????????????") print(past_results) #helper_funs.print_past_formulas_prefix_infix_valuation(past_results=past_results, folder_name=folder_name, # signal_file_base=signal_file_base, # trace_count=trace_count, # signal_file_rest='', return_type=return_type) if current_oc == oc_rhs_limit+1: break else: # i.e. if strictly_increasing_oc: while True: results, best_formula = formula_search.formula_search(metric_list=metric_list, set_valued_metrics=set_valued_metrics, operator_counts=[current_oc], parameter_domains=parameter_domains, folder_name=folder_name, trace_count=trace_count, generate_signals="", signal_file_base=signal_file_base, process_count=process_count, save=save, cause_mining=controllable_formulas, return_type=return_type, result_file=result_file, control_metrics=control_metrics, past_results=past_results, time_shift=time_shift) # turn the formula into prefix form, and the append it to past_results. best_formula_prefix = stl_constants.FormulaValuation(formula=STL.infix_to_prefix(best_formula.formula), valuation=best_formula.valuation) past_results.append(best_formula_prefix) if len(past_results) > 1 and (past_results[-1].valuation - past_results[-2].valuation) < valuation_limit: past_results.pop() print("break 2") break current_oc += 1 if current_oc == operator_count_limit+1: break print("?????????????????? PAST RESULTS SO FAR ???????????????????????????") print(past_results) past_formulas = [fv.formula for fv in past_results] #for formula_valuation in past_results: #past_formulas.append(STL.prefix_to_infix(formula_valuation.formula)) # now the list is consisted of infix formulas, it can go into concat_with_or result = stl_constants.FormulaValuation(formula=helper_funs.concat_with_or_prefix(past_formulas), valuation=past_results[-1].valuation) return result
def formula_search_operator_count( metric_list, set_valued_metrics, oc, parameter_domains, folder_name, trace_count, signal_file_base, process_count, return_type, cause_mining=False, control_metrics=[], past_results=[], withoutS=False, time_shift=0): """ For the given operator count, compute the number of template formulas and run grid search for each template formula. Args: metric_list: list of metrics that will appear in formulas set_valued_metrics: metrics that take discrete values from a predefined set oc: (integer) how many operators will be in the resulting formula between metrics parameter_domains: folder_name: trace_count: signal_file_base: return_type: determines the valuation that will be used to choose the best formulas. One of the types from stl_constants cause_mining: (boolean) if we are using this function for cause mining, the generated formulas will be A added process_count: for parallel processing control_metrics: the metrics that we can control/change. These metrics' values will be changed and the systems will be resimulated to reduce the number of bad labeled data points in the future. In the traffic system case, control metrics are traffic lights and the other metrics are road business rates. past_results: past best formulas. New formulas will be chosen according to their valuation "together with" the past best formulas. withoutS: (bool) if True, the formulas are generated without Since Returns: results (all formulas generated with given oc and their valuations), best formula (and its valuation) """ if cause_mining: template_formula_list = formula_generator.generate_formula_tree_for_cause_mining(metric_list=metric_list, control_metrics=control_metrics, operator_count=oc, set_valued_metrics=set_valued_metrics, withoutS=withoutS) else: template_formula_list = formula_generator.generate_formula_tree_iterative(metric_list, oc, return_formula_string=True, set_valued_metrics=set_valued_metrics, withoutS=withoutS) if time_shift > 0: time_shift_str = str(time_shift) + " " + str(time_shift) template_formula_list = ['P ' + time_shift_str + " ( " + f + " )" for f in template_formula_list] #print "----------- For operator count " + str(oc) + ", " + str(len(template_formula_list)) + " formulas will be tested." # Process and remove the formulas with double Since. #template_formula_list = [formula for formula in template_formula_list if formula.split().count('S') == 0] print("----------- For operator count " + str(oc) + ", " + str(len(template_formula_list)) + " formulas will be tested.") # For each template formula, run a grid search. Store the results, remember the best one. results = {f:{} for f in template_formula_list} best_formula = stl_constants.FormulaValuation(formula='False', valuation=stl_constants.MIN_EVAL) if return_type.category == stl_constants.CATEGORY_MINIMIZATION: best_formula = stl_constants.FormulaValuation(formula='False', valuation=stl_constants.MAX_EVAL) for template_formula in template_formula_list: formula, parameter_domains_for_formula = generate_formula_from_template(template_formula, parameter_domains) results[template_formula] = parameter_search_for_formula(formula=formula, parameter_domains_for_formula=parameter_domains_for_formula, folder_name=folder_name, trace_count=trace_count, signal_file_base=signal_file_base, process_count=process_count, return_type=return_type, past_results=past_results) if (return_type.category == stl_constants.CATEGORY_MAXIMIZATION and results[template_formula].valuation > best_formula.valuation) or ( return_type.category == stl_constants.CATEGORY_MINIMIZATION and results[template_formula].valuation < best_formula.valuation): # UPDATE BEST: best_formula = results[template_formula] print("(*** UPDATE BEST: " + best_formula.formula + " " + str(best_formula.valuation)) if return_type.category == stl_constants.CATEGORY_MINIMIZATION and best_formula.valuation == stl_constants.MIN_EVAL: break # No need to check the rest return results, best_formula