def formula_val_list_into_concat_formula_list(prefix_formula_val_list,
                                              folder_name, signal_file_base,
                                              trace_count, signal_file_rest,
                                              return_type):
    """
    Designed for debug purposes of cause mining algorithm
    Takes only the formula parts of a Formula_Valuation list, turns them into their infix forms,
    concatenates them with or and calculates the valuation of the resulting formula. Returns the resulting formula and
    the valuation as a FormulaValuation.
    Args:
        prefix_formula_val_list: A list of Formula Valuations where formulas are in prefix forms.
         Ex: [FormulaValuation(formula='A 0 4 x5 = 0', valuation=0.7824),
         FormulaValuation(formula='& A 0 2 x5 = 0 P 1 1 x3 > 10', valuation=0.921521)]
        return_type:

    Returns: a FormulaValuation

    """
    infix_formula_list = [fv.formula for fv in prefix_formula_val_list]
    prefix_to_infix_list(infix_formula_list)
    concat_with_or_infix(infix_formula_list)
    formula = STL.infix_to_prefix(infix_formula_list[0])
    valuation = optimization.evaluator.evaluate_signals(
        formula, folder_name, signal_file_base, trace_count, signal_file_rest,
        return_type)
    return stl_constants.FormulaValuation(formula=infix_formula_list[0],
                                          valuation=valuation)
def parameter_search_for_formula(formula, parameter_domains_for_formula, folder_name, trace_count, signal_file_base,
                                 process_count, return_type, past_results=[]):
  parameter_list = list(parameter_domains_for_formula.keys())
  parameter_domain = [parameter_domains_for_formula[pa] for pa in parameter_list]

  prefix_formula = STL.infix_to_prefix(formula)

  best_v, params, time_passed = grid_search.grid_search(formula=prefix_formula, parameter_list=parameter_list,
                                                        parameter_domain=parameter_domain, folder_name=folder_name,
                                                        signal_file_base=signal_file_base, trace_count=trace_count,
                                                        signal_file_rest='', process_count=process_count,
                                                        return_type=return_type, past_results=past_results)

  #result_file = folder_name + "".join(formula.split()) + ".mat"
  #_ = plot.convert_save_results(parameter_list, parameter_domain, all_results, result_file, formula,
  #                                                  time_passed, trace_count, process_count, best_v, params)

  formula_n = formula
  if params == None:
    print( "There are no best parameters for the formula: " + formula )
  else:
    for p, v in zip(parameter_list, params):
      formula_n = formula_n.replace(p, str(v))
    print("With valuation " + str(best_v) + " ,best parameters form " + formula_n + " ,with prefix form: " +
          STL.infix_to_prefix(formula_n) + " ,in time: " + str(time_passed))
  return stl_constants.FormulaValuation(formula=formula_n, valuation=best_v)
示例#3
0
def search_all_search_space(metric_list,
                            control_metrics,
                            set_valued_metrics,
                            parameter_domains,
                            folder_name,
                            trace_count,
                            signal_file_base,
                            process_count,
                            return_type,
                            oc_limit,
                            cause_limit,
                            upto=True,
                            withoutS=False,
                            controllable_formulas=True,
                            time_shift=0):
    """

    Designed as a function to understand how true and efficient our cause_mining heuristic works. This function searches
    through the whole search space (until operator_count == n) and is guaranteed to find the best formula.

    This function tries to search through all search space in a pretty naive way, going through all possible
    cause_mining formulas in all possible "concatenated with or's" forms, iterating through operator counts in an increasing
    manner. Without dictating the heurisic -which is putting an hierarchy to the business of searching through the same
    formed formulas, and some restrictions of course- in the algorithm in cause_mining_for_traffic_data, in a very
    loose way.

    Args:
        metric_list:
        control_metrics:
        set_valued_metrics:
        parameter_domains:
        folder_name:
        trace_count:
        signal_file_base:
        process_count:
        return_type:
        oc_limit: operator count. The computation stops when operator count reaches the value n+1.
        cause_limit: the max number of formula component in the end formula
        upto: (bool) if True, the formulas with up to cause_limit many components,
                     if False, the formulas with exactly cause_limit many components are searched for.
        controllable_formulas: (bool) if True, the function calls generate_formula_tree_for_cause_mining,
                                if False, it calls generate_formula_tree_iterative. So in the first case controllable
                                formulas are generated, in the second case all formulas are.

    Returns: best_result type: FormulaValuation

    """

    best_result = stl_constants.FormulaValuation(formula="",
                                                 valuation=0)  # initialize

    # find all formulas till oc = n+1
    all_formulas_till_n = []
    if controllable_formulas:
        all_formulas_till_n = all_formulas_till_n + \
                              formula_generator.generate_formula_tree_for_cause_mining(metric_list=metric_list,
                                                                                       control_metrics=control_metrics,
                                                                                       operator_count=oc_limit,
                                                                                       set_valued_metrics=set_valued_metrics,
                                                                                       withoutS=withoutS)
    else:
        all_formulas_till_n = all_formulas_till_n + \
                              formula_generator.generate_formula_tree_iterative(metric_list=metric_list,
                                                                                operator_count=oc_limit,
                                                                                return_formula_string=True,
                                                                                set_valued_metrics=set_valued_metrics,
                                                                                withoutS=withoutS)
        if time_shift > 0:
            time_shift_str = str(time_shift) + " " + str(time_shift)
            all_formulas_till_n = [
                'P ' + time_shift_str + " ( " + f + " )"
                for f in all_formulas_till_n
            ]
    # find all combinations of these formulas and concatenate all with or
    iter_all_combinations = my_iterable()
    if not upto:
        iter_all_combinations = chain(
            iter_all_combinations,
            product(all_formulas_till_n, repeat=cause_limit))
    else:
        for n in range(1, cause_limit + 1):
            iter_all_combinations = chain(
                iter_all_combinations, product(all_formulas_till_n, repeat=n))
        # now we have an iterator of all formulas up to cause_limit many components,

    all_comb_set = set()
    for i in iter_all_combinations:
        all_comb_set.add(frozenset(i))

    print("there are " + str(len(all_formulas_till_n)) + " formulas in all_formulas_till_" + str(oc_limit) + \
          " and cause limit is " + str(cause_limit) + " so there will be " + str(len(all_comb_set)) + \
          " formulas to be iterated over.")
    # concatenate the formulas with or and put them into a list
    concatenated_list = []
    cnt = 0
    for formula_set in all_comb_set:
        cnt += 1
        formula_list = list(formula_set)
        concatenated_list.append(concat_with_or_infix(formula_list))
        print("the formula number " + str(cnt) + " that we are on is: " +
              formula_list[0])
        # find the best parameter values for this formula
        formula, parameter_domains_for_formula = formula_search.generate_formula_from_template(
            template_formula=formula_list[0],
            parameter_domains=parameter_domains)
        formula_valuation = formula_search.parameter_search_for_formula(
            formula=formula,
            parameter_domains_for_formula=parameter_domains_for_formula,
            folder_name=folder_name,
            trace_count=trace_count,
            signal_file_base=signal_file_base,
            process_count=process_count,
            return_type=return_type)
        # make this formula the new best_result if its valuation is better than the old best_result
        if formula_valuation.valuation > best_result.valuation:
            best_result = formula_valuation
            print("\n\nBest Formula of search_all_search_space for now is " + best_result.formula + \
                  " with valuation " + str(best_result.valuation) + "\n The tuple in iteration is " + str(formula_list) \
                  + "\n")

    return best_result
def cause_mining_algorithm(metric_list, control_metrics, set_valued_metrics, parameter_domains, folder_name,
                           trace_count, signal_file_base, process_count, save, result_file, return_type,
                           strictly_increasing_oc, valuation_limit, operator_count_limit, withoutS=False,
                           controllable_formulas=True, time_shift=0):
    """

    Args:
        metric_list:
        control_metrics:
        set_valued_metrics:
        parameter_domains:
        folder_name:
        trace_count:
        signal_file_base:
        process_count:
        save:
        result_file:
        return_type:
        strictly_increasing_oc: (bool) If False, the heuristic is applied while increasing operator count, if True,
        operator count increases by one in each loop.
        valuation_limit: the predefined limit which decides how much of a value addition is enough for a best formula of
        an operator count to enter the past_formula list.
        operator_count_limit: Last operator count. The best_formulas are searched for until operator count reaches this
        integer value.
        controllable_formulas: (bool) if True, cause_mining input is given to formula_search as True, and
                               generate_formula_tree_cause_mining is called inside formula_search_operator_count as a
                               consequence. That is, controllable formulas are synthesized.
                                    if False, cause_mining input is given to formula_search as False, and
                                    generate_formula_tree_iterative is called inside formula_search_operator_count as a
                                    consequence. That is, all kinds of formulas are generated.
                                By default, controllable_formulas = True, so only controllable formulas are generated.
    Returns: FormulaValuation(the resulting formula -small best formulas concatenated with ors- in prefix form, its valuation)

    """
    if type(operator_count_limit) is int:
        oc_rhs_limit = operator_count_limit
    else:
        _, oc_rhs_limit = operator_count_limit

    if controllable_formulas:  # we do this since if controllable_formulas == True, the code will enter generate_formula_tree_iterative and this function cannot process oc = -1
        current_oc = -1
    else:
        current_oc = 0
    past_results = []  # all formulas in past_results must be in prefix form

    if not strictly_increasing_oc:
        last_used_oc = current_oc
        while True:
            results, best_formula = formula_search.formula_search(metric_list=metric_list,
                                                                  set_valued_metrics=set_valued_metrics,
                                                                  operator_counts=[current_oc],
                                                                  parameter_domains=parameter_domains,
                                                                  folder_name=folder_name,
                                                                  trace_count=trace_count, generate_signals="",
                                                                  signal_file_base=signal_file_base,
                                                                  process_count=process_count,
                                                                  save=save, cause_mining=controllable_formulas,
                                                                  return_type=return_type,
                                                                  result_file=result_file,
                                                                  control_metrics=control_metrics,
                                                                  past_results=past_results, withoutS=withoutS,
                                                                  time_shift=time_shift)
            # turn the formula into prefix form, and then append it to past_results.
            best_formula_prefix = stl_constants.FormulaValuation(formula=STL.infix_to_prefix(best_formula.formula),
                                                                  valuation=best_formula.valuation)
            past_results.append(best_formula_prefix)
            if len(past_results) > 1 and (past_results[-1].valuation - past_results[-2].valuation) < valuation_limit:
                past_results.pop()
                if last_used_oc < current_oc:
                    print("break 1")
                    break
                current_oc += 1
                print("oc change to " + str(current_oc))

            elif len(past_results) > 1 and (past_results[-1].valuation - past_results[-2].valuation) > valuation_limit:
                last_used_oc = current_oc
                print("last used oc : " + str(last_used_oc))

            print("?????????????????? PAST RESULTS SO FAR ???????????????????????????")
            print(past_results)
            #helper_funs.print_past_formulas_prefix_infix_valuation(past_results=past_results, folder_name=folder_name,
            #                                                       signal_file_base=signal_file_base,
            #                                                       trace_count=trace_count,
            #                                                       signal_file_rest='', return_type=return_type)

            if current_oc == oc_rhs_limit+1:
                break

    else:  # i.e. if strictly_increasing_oc:

        while True:
            results, best_formula = formula_search.formula_search(metric_list=metric_list,
                                                                  set_valued_metrics=set_valued_metrics,
                                                                  operator_counts=[current_oc],
                                                                  parameter_domains=parameter_domains,
                                                                  folder_name=folder_name,
                                                                  trace_count=trace_count, generate_signals="",
                                                                  signal_file_base=signal_file_base,
                                                                  process_count=process_count,
                                                                  save=save, cause_mining=controllable_formulas,
                                                                  return_type=return_type,
                                                                  result_file=result_file,
                                                                  control_metrics=control_metrics,
                                                                  past_results=past_results,
                                                                  time_shift=time_shift)
            # turn the formula into prefix form, and the append it to past_results.
            best_formula_prefix = stl_constants.FormulaValuation(formula=STL.infix_to_prefix(best_formula.formula),
                                                                  valuation=best_formula.valuation)
            past_results.append(best_formula_prefix)

            if len(past_results) > 1 and (past_results[-1].valuation - past_results[-2].valuation) < valuation_limit:
                past_results.pop()
                print("break 2")
                break
            current_oc += 1
            if current_oc == operator_count_limit+1:
                break

            print("?????????????????? PAST RESULTS SO FAR ???????????????????????????")
            print(past_results)

    past_formulas = [fv.formula for fv in past_results]
    #for formula_valuation in past_results:
        #past_formulas.append(STL.prefix_to_infix(formula_valuation.formula))
    # now the list is consisted of infix formulas, it can go into concat_with_or
    result = stl_constants.FormulaValuation(formula=helper_funs.concat_with_or_prefix(past_formulas),
                                             valuation=past_results[-1].valuation)
    return result
def formula_search_operator_count( metric_list, set_valued_metrics, oc, parameter_domains, folder_name, trace_count,
                                  signal_file_base, process_count, return_type, cause_mining=False, control_metrics=[],
                                   past_results=[], withoutS=False, time_shift=0):
  """
   For the given operator count, compute the number of template formulas and
      run grid search for each template formula.

   Args:
      metric_list: list of metrics that will appear in formulas
      set_valued_metrics: metrics that take discrete values from a predefined set
      oc: (integer) how many operators will be in the resulting formula between metrics
      parameter_domains:
      folder_name:
      trace_count:
      signal_file_base:
      return_type: determines the valuation that will be used to choose the best formulas. One of the types
      from stl_constants
      cause_mining: (boolean) if we are using this function for cause mining, the generated formulas will be A added
      process_count: for parallel processing
      control_metrics: the metrics that we can control/change. These metrics' values will be changed and the systems
      will be resimulated to reduce the number of bad labeled data points in the future. In the traffic system case,
      control metrics are traffic lights and the other metrics are road business rates.
      past_results: past best formulas. New formulas will be chosen according to their valuation "together with" the
      past best formulas.
      withoutS: (bool) if True, the formulas are generated without Since

  Returns: results (all formulas generated with given oc and their valuations), best formula (and its valuation)
  """
  if cause_mining:
    template_formula_list = formula_generator.generate_formula_tree_for_cause_mining(metric_list=metric_list,
                                                                                     control_metrics=control_metrics,
                                                                                     operator_count=oc,
                                                                                     set_valued_metrics=set_valued_metrics,
                                                                                     withoutS=withoutS)

  else:
    template_formula_list = formula_generator.generate_formula_tree_iterative(metric_list, oc, return_formula_string=True,
                                                                    set_valued_metrics=set_valued_metrics, withoutS=withoutS)
    if time_shift > 0:
        time_shift_str = str(time_shift) + " " + str(time_shift)
        template_formula_list = ['P ' + time_shift_str + " ( " + f + " )" for f in template_formula_list]

  #print "----------- For operator count " + str(oc) + ", " + str(len(template_formula_list)) + " formulas will be tested."
  # Process and remove the formulas with double Since.
  #template_formula_list = [formula for formula in template_formula_list if formula.split().count('S') == 0]
  print("----------- For operator count " + str(oc) + ", " + str(len(template_formula_list)) + " formulas will be tested.")


  # For each template formula, run a grid search. Store the results, remember the best one.
  results = {f:{} for f in template_formula_list}
  best_formula = stl_constants.FormulaValuation(formula='False', valuation=stl_constants.MIN_EVAL)
  if return_type.category == stl_constants.CATEGORY_MINIMIZATION:
    best_formula = stl_constants.FormulaValuation(formula='False', valuation=stl_constants.MAX_EVAL)

  for template_formula in template_formula_list:
    formula, parameter_domains_for_formula = generate_formula_from_template(template_formula, parameter_domains)

    results[template_formula] = parameter_search_for_formula(formula=formula,
                                                             parameter_domains_for_formula=parameter_domains_for_formula,
                                                             folder_name=folder_name,
                                                             trace_count=trace_count,
                                                             signal_file_base=signal_file_base,
                                                             process_count=process_count, return_type=return_type,
                                                             past_results=past_results)

    if (return_type.category == stl_constants.CATEGORY_MAXIMIZATION and
            results[template_formula].valuation > best_formula.valuation) or (
        return_type.category == stl_constants.CATEGORY_MINIMIZATION and
            results[template_formula].valuation < best_formula.valuation):
      #  UPDATE BEST:
      best_formula = results[template_formula]
      print("(***   UPDATE BEST: " + best_formula.formula + " " + str(best_formula.valuation))
      if return_type.category == stl_constants.CATEGORY_MINIMIZATION and best_formula.valuation == stl_constants.MIN_EVAL:
        break # No need to check the rest
  return results, best_formula