Пример #1
0
def my_version_report(fdict_exhaustive, data_graph, pattern, monitoring_marks,
                      output_path, detailed_result_path, monitoring_reports,
                      exhaustive_approach_results_path, Plist, nr,
                      pattern_file_name, write):
    experiments.globals.report = "furer"
    nr_non_observed_combinations = None
    start_time = time.time()
    if write == True:
        size_fdict = len(fdict_exhaustive)
        num_embeddings = 0
        for k in fdict_exhaustive.keys():
            num_embeddings = num_embeddings + fdict_exhaustive[
                k]  # I remove +1 added by Laplace smoothing BEWARE: previous Laplace smoothing assumed
        print "NR EMBEDDINGS BEFORE COMPLETING: ", num_embeddings

        nr_possible_combinations = smplr.complete_combinations_1(
            fdict_exhaustive, data_graph, pattern,
            Plist)  # add zeros to all not present combinations
        nr_non_observed_combinations = nr_possible_combinations - size_fdict
    experiments.globals.nr_non_observed_combinations = nr_non_observed_combinations
    report.report_monitoring_my_version(
        monitoring_marks, output_path, detailed_result_path,
        monitoring_reports, exhaustive_approach_results_path, data_graph,
        pattern, Plist, nr, pattern_file_name, fdict_exhaustive,
        nr_non_observed_combinations, write)  #print monitoring_reports
    print "ELAPSED TIME: ", time.time() - start_time
Пример #2
0
def report_monitoring_my_version(monitoring_marks,output_path,detailed_result_path,monitoring_reports,exhaustive_approach_result_file,data_graph,pattern,Plist,repetitions,pattern_file_name,fdict_exhaustive,nr_non_observed_combinations,write):
      #CREATE DIRECTORY THAT WILL CONTAIN RESULTS FOR EACH TIME INSTANCE     
      dict={}
      duration=[]
      begin=0
      nr_iterations=[]
      sum_of_embeddings=[]
      sum_of_squares=[]
      embeddings_estimate=[]
      sum_of_root_node_emb=[]
      sum_of_squares_root_node_emb=[]
      
      for time_int in monitoring_marks:
          duration.append(time_int-begin)
          begin=time_int
      
      #the problem might be that some runs finished earlier, and some later.
      for i in xrange(len(monitoring_marks)):
          for key_iter in monitoring_reports.keys():
              if not(monitoring_marks[i] in dict.keys()):
                  dict[monitoring_marks[i]]=[]
              try:
                  dict[monitoring_marks[i]].append(monitoring_reports[key_iter][i])
                  nr_iterations.append(monitoring_reports[key_iter][i].nr_iterations)
                  sum_of_embeddings.append(monitoring_reports[key_iter][i].sum_nr_embeddings)
                  sum_of_squares.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings)
                  embeddings_estimate.append(monitoring_reports[key_iter][i].embeddings_estimate)
                  sum_of_root_node_emb.append(monitoring_reports[key_iter][i].sum_nr_extra_embeddings)
                  sum_of_squares_root_node_emb.append(monitoring_reports[key_iter][i].sum_of_the_extra_square_embeddings)
              except IndexError:
                  print "Something wrong"
                  break
      print "NR ITERATIONS: ",nr_iterations
      print "sum_of_embeddings: ",sum_of_embeddings
      print "sum_of_squares: ",sum_of_squares
      
      snapshot_inits=[]
      for i in range(repetitions):
          snapshot_inits.append(0)
      
      counter_duration=0
      counter=0
      
      for time_snapshot in monitoring_marks:
              experiments.globals.current_time_snapshot=time_snapshot
              print "Processed ",counter," out of: ",len(monitoring_marks)
              furer_results_KLD = []
              furer_results_bhatta = []
              furer_results_hellinger = []
              furer_times = []
              observed_nodes=[]
              observed_nodes_difference_per_snapshot=[]
              
              snapshot_directory_path=os.path.join(detailed_result_path,)
              if not(os.path.exists(snapshot_directory_path)):
                  os.mkdir(snapshot_directory_path)
              snapshot_directory_file=os.path.join(snapshot_directory_path,'res_time_'+str(time_snapshot)+'.info')
              
              
              if write==True:
                  fdict_furer_temp=dict[time_snapshot]
                  fdicts_Furer=[]
                  
                  for f in fdict_furer_temp:
                      fdicts_Furer.append(f.current_fdict)
                      observed_nodes.append(f.number_of_observed_nodes)
                  
                  if len(fdict_furer_temp)==0:
                      continue
        
                  for i in range(len(fdict_furer_temp)):
                      experiments.globals.nr_iterations=nr_iterations[i]
                      fdict_limited = fdicts_Furer[i]
                      fdict_Furer=fdicts_Furer[i]
                      #for k in fdict_Furer:
                      #    fdict_Furer[k]=fdict_Furer[k]*experiments.globals.nr_iterations
                      observed_nodes_difference_per_snapshot.append(observed_nodes[i]-snapshot_inits[i])
                      snapshot_inits[i]=observed_nodes[i]
                      print "Making pde Exhaustive"
                      #for k in fdict_exhaustive.keys():
                      #   if k==((u'location', u'Location_id_705003'), (u'function', u'Func_id_40')):
                      #       print k,fdict_exhaustive[k]
                      [pde,trash,default_key] = smplr.make_pd_general_kickout_default_my_version(fdict_exhaustive)
                      print "Made pde Exhaustive"
                      print "Len PDE: ",len(pde)
                      if len(pde) < 1:
                          print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                          break
                      print "Completing combinations for fdict Furer"
                      
                      nr_possible_combinations=smplr.complete_combinations_1(fdict_Furer, data_graph,  pattern,  Plist)
                      print "Completed combinations for fdict Furer"
                      print "Nr poss combos: ",nr_possible_combinations
                      #print "Making pdf for Furer"
                      pdf= smplr.make_pd_general_kickout_default_limited_my_version(fdict_Furer)
                      print "Made pdf for Furer"
                      print "Len pdf: ",len(pdf)
                      print "Furer ..."
                      print "EXHAUSTIVE:"
                      #for k in pde.keys():
                      #      print "KEY: ",k
                      #      for e in pde[k]:
                      #          print e
                      #print "SMPL:"
                      #for k in pdf.keys():
                      #      print "KEY: ",k
                      #      for e in pdf[k]:
                      #          print e
                      furer_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
                      print "HALO?"
                      furer_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
                      furer_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdf)))
                    
          
              print "Writing to: ",snapshot_directory_file
              resultfile = open(snapshot_directory_file,  'w')
              resultfile.write('Furer\n')
              resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n")
              resultfile.write("repetitions (for this time snapshot): " + str(repetitions) +"\n")
              resultfile.write(" " +"\n")
              print "KLD: ",str(numpy.mean(furer_results_KLD))
              resultfile.write("average average KLD on furer: " + str(numpy.mean(furer_results_KLD))  + " with SSTD: " + str(numpy.std(furer_results_KLD,  ddof=1)) +"\n")
              resultfile.write("average average bhatta on furer: " + str(numpy.mean(furer_results_bhatta))  + " with SSTD: " + str(numpy.std(furer_results_bhatta,  ddof=1)) +"\n")
              resultfile.write("average average hellinger on furer: " + str(numpy.mean(furer_results_hellinger))  + " with SSTD: " + str(numpy.std(furer_results_hellinger,  ddof=1)) +"\n")
              resultfile.write(" " +"\n")
              resultfile.write('-----DETAILED RESULTS-----' +"\n")
              resultfile.write('furer_results_KLD : ' + str(furer_results_KLD) +"\n")
              resultfile.write('furer_results_bhatta : ' + str(furer_results_bhatta) +"\n")
              resultfile.write('furer_results_hellinger : ' + str(furer_results_hellinger) +"\n")
              resultfile.write('avg #nodes observed : ' + str(numpy.mean(observed_nodes)) +"\n")
              resultfile.write('# nodes per time interval per run :' + str((numpy.mean(observed_nodes_difference_per_snapshot)/duration[counter_duration])) +"\n")
              resultfile.write('avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) +"\n")          
              resultfile.write("------------------------------------ Sampling info ------------------------------\n")
              resultfile.write('number of sampling iterations : ' + str(nr_iterations[counter])+"\n")    
              if sum_of_squares_root_node_emb[counter]==0 and sum_of_root_node_emb[counter]==0:
                 nr_embeddings_temp=sum_of_embeddings[counter]/nr_iterations[counter]
              else:
                 nr_embeddings_temp=sum_of_root_node_emb[counter]/nr_iterations[counter] 
              #embeddings_estimate[counter]
              print "Writing to file: ",nr_embeddings_temp
              resultfile.write('average of embeddings : ' + str(nr_embeddings_temp)+"\n")   
              if sum_of_squares_root_node_emb[counter]==0 and sum_of_root_node_emb[counter]==0:
                  #we do the old standard deviation
                  print "Old stdev"
                  print sum_of_squares[counter]
                  print sum_of_embeddings[counter]
                  a=Decimal(sum_of_squares[counter])-(Decimal(math.pow(sum_of_embeddings[counter], 2))/Decimal(float(nr_iterations[counter])))
                  stdeviation=math.sqrt(a/Decimal(float((nr_iterations[counter]-1))))
              else:
                  a=Decimal(sum_of_squares_root_node_emb[counter])-(Decimal(math.pow(sum_of_root_node_emb[counter], 2))/Decimal(float(nr_iterations[counter])))
                  if a>0:
                    stdeviation=math.sqrt(a/Decimal(float((nr_iterations[counter]-1))))
                  else:
                    stdeviation=0
              print "old stdev: ",stdeviation
              resultfile.write('stdeviation of # embeddings: ' + str(stdeviation)+"\n") 
              resultfile.close()
              counter+=1
              counter_duration+=1       
Пример #3
0
def report_monitoring_my_version(monitoring_marks, output_path,
                                 detailed_result_path, monitoring_reports,
                                 exhaustive_approach_result_file, data_graph,
                                 pattern, Plist, repetitions,
                                 pattern_file_name, fdict_exhaustive,
                                 nr_non_observed_combinations, write):
    #CREATE DIRECTORY THAT WILL CONTAINS RESULTS FOR EACH TIME INSTANCE

    #picklename = os.path.join(exhaustive_approach_result_file,"fdict_exhaustive_%s.pickle" % pattern_file_name)
    #pickin = open(picklename, 'rb')
    #fdict_exhaustive = pickle.load(pickin)

    #smplr.complete_combinations(fdict_exhaustive, data_graph,  pattern,  Plist)      # add zeros to all not present combinations
    #smplr.smooth(fdict_exhaustive,  fdict_exhaustive)     # Laplace smoothing also for the exhaustive

    dict = {}

    duration = []
    nr_iterations = []
    sum_of_embeddings = []
    sum_of_squares = []
    sum_of_root_node_emb = []
    sum_of_squares_root_node_emb = []
    begin = 0

    for time_int in monitoring_marks:
        duration.append(time_int - begin)
        begin = time_int

    #the problem might be that some runs finished earlier, and some later.
    for i in xrange(len(monitoring_marks)):
        for key_iter in monitoring_reports.keys():
            if not (monitoring_marks[i] in dict.keys()):
                dict[monitoring_marks[i]] = []
            try:
                dict[monitoring_marks[i]].append(
                    monitoring_reports[key_iter][i])
                nr_iterations.append(
                    monitoring_reports[key_iter][i].nr_iterations)
                sum_of_embeddings.append(
                    monitoring_reports[key_iter][i].sum_nr_embeddings)
                sum_of_squares.append(monitoring_reports[key_iter]
                                      [i].sum_of_the_square_embeddings)
                sum_of_root_node_emb.append(
                    monitoring_reports[key_iter][i].sum_nr_extra_embeddings)
                sum_of_squares_root_node_emb.append(
                    monitoring_reports[key_iter]
                    [i].sum_of_the_extra_square_embeddings)

            except IndexError:
                break

    print "NR ITERATIONS: ", nr_iterations
    print "sum_of_embeddings: ", sum_of_embeddings
    print "sum_of_squares: ", sum_of_squares
    snapshot_inits = []
    for i in range(repetitions):
        snapshot_inits.append(0)

    counter_duration = 0
    counter = 0
    for time_snapshot in monitoring_marks:
        print "Processing ", counter, "out of: ", len(monitoring_marks)
        false_furer_results_KLD = []
        false_furer_results_bhatta = []
        false_furer_results_hellinger = []
        false_furer_times = []
        observed_nodes = []
        observed_nodes_difference_per_snapshot = []

        snapshot_directory_path = os.path.join(detailed_result_path, )
        if not (os.path.exists(snapshot_directory_path)):
            os.mkdir(snapshot_directory_path)
        snapshot_directory_file = os.path.join(
            snapshot_directory_path,
            'res_time_' + str(time_snapshot) + '.info')

        if write == True:
            fdict_furer_temp = dict[time_snapshot]

            fdicts_Furer = []

            for f in fdict_furer_temp:
                fdicts_Furer.append(f.current_fdict)
                observed_nodes.append(f.number_of_observed_nodes)

            if len(fdict_furer_temp) == 0:
                continue

            for i in range(len(fdict_furer_temp)):
                experiments.globals.nr_iterations = nr_iterations[i]
                fdict_limited = fdicts_Furer[i]
                fdict_Furer = fdicts_Furer[i]
                observed_nodes_difference_per_snapshot.append(
                    observed_nodes[i] - snapshot_inits[i])
                snapshot_inits[i] = observed_nodes[i]

                [pde, trash_list, default_key
                 ] = smplr.make_pd_general_kickout_default_my_version(
                     fdict_exhaustive)

                if len(pde) < 1:
                    print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                    break
                nr_possible_combinations = smplr.complete_combinations_1(
                    fdict_Furer, data_graph, pattern, Plist)
                pdf = smplr.make_pd_general_kickout_default_limited_my_version(
                    fdict_Furer)
                false_furer_results_KLD.append(
                    su.avg_kld(smplr.transform_to_ptable(pde),
                               smplr.transform_to_ptable(pdf)))
                false_furer_results_bhatta.append(
                    su.avg_bhatta(smplr.transform_to_ptable(pde),
                                  smplr.transform_to_ptable(pdf)))
                false_furer_results_hellinger.append(
                    su.avg_hellinger(smplr.transform_to_ptable(pde),
                                     smplr.transform_to_ptable(pdf)))

        print "Writing to: ", snapshot_directory_file
        resultfile = open(snapshot_directory_file, 'w')
        resultfile.write('False Furer\n')
        resultfile.write("experiment on graph: " + str(pattern_file_name) +
                         " and pattern: " + pattern_file_name + "\n")
        resultfile.write("repetitions (for this time snapshot): " +
                         str(repetitions) + "\n")
        resultfile.write(" " + "\n")
        resultfile.write("average KLD on false furer: " +
                         str(numpy.mean(false_furer_results_KLD)) +
                         " with SSTD: " +
                         str(numpy.std(false_furer_results_KLD, ddof=1)) +
                         "\n")
        resultfile.write("average bhatta on false furer: " +
                         str(numpy.mean(false_furer_results_bhatta)) +
                         " with SSTD: " +
                         str(numpy.std(false_furer_results_bhatta, ddof=1)) +
                         "\n")
        resultfile.write(
            "average hellinger on false furer: " +
            str(numpy.mean(false_furer_results_hellinger)) + " with SSTD: " +
            str(numpy.std(false_furer_results_hellinger, ddof=1)) + "\n")
        resultfile.write(" " + "\n")
        resultfile.write('-----DETAILED RESULTS-----' + "\n")
        resultfile.write('false_results_KLD :' + str(false_furer_results_KLD) +
                         "\n")
        resultfile.write('false_results_bhatta :' +
                         str(false_furer_results_bhatta) + "\n")
        resultfile.write('false_results_hellinger :' +
                         str(false_furer_results_hellinger) + "\n")
        resultfile.write('avg #nodes observed :' +
                         str(numpy.mean(observed_nodes)) + "\n")
        resultfile.write('# nodes per time interval per run:' + str(
            (numpy.mean(observed_nodes_difference_per_snapshot) /
             duration[counter_duration])) + "\n")
        resultfile.write(
            'avg difference of nodes observed from previous snapshot :' +
            str(numpy.mean(observed_nodes_difference_per_snapshot)) + "\n")
        resultfile.write(
            "------------------------------------ Sampling info ------------------------------\n"
        )
        resultfile.write('number of sampling iterations : ' +
                         str(nr_iterations[counter]) + "\n")
        #resultfile.write('average of embeddings : ' + str(sum_of_embeddings[counter]/nr_iterations[counter])+"\n")
        #resultfile.write('average of embeddings w.r.t sampling iterations:' + str(sum_of_embeddings[counter]/float(nr_iterations[counter]))+"\n")
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            nr_embeddings_temp = sum_of_embeddings[counter] / nr_iterations[
                counter]
        else:
            nr_embeddings_temp = sum_of_root_node_emb[counter] / nr_iterations[
                counter]

        print "Writing to file: ", nr_embeddings_temp
        resultfile.write('average of embeddings : ' + str(nr_embeddings_temp) +
                         "\n")
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            #we do the old standard deviation
            a = Decimal(sum_of_squares[counter]) - (
                Decimal(math.pow(sum_of_embeddings[counter], 2)) /
                Decimal(float(nr_iterations[counter])))
            if a > 0:
                stdeviation = math.sqrt(
                    a / Decimal(float((nr_iterations[counter] - 1))))
            else:
                stdeviation = 0

        else:
            a = Decimal(sum_of_squares_root_node_emb[counter]) - (
                Decimal(math.pow(sum_of_root_node_emb[counter], 2)) /
                Decimal(float(nr_iterations[counter])))
            if a > 0:
                stdeviation = math.sqrt(
                    a / Decimal(float((nr_iterations[counter] - 1))))
            else:
                stdeviation = 0
        print "old stdev: ", stdeviation
        resultfile.write('stdeviation of # embeddings: ' + str(stdeviation) +
                         "\n")
        resultfile.close()
        counter += 1
        counter_duration += 1
Пример #4
0
def report_monitoring_my_version_online(
        monitoring_marks, output_path, detailed_result_path,
        monitoring_reports, exhaustive_approach_result_file, data_graph,
        pattern, Plist, repetitions, pattern_file_name, fdict_exhaustive,
        nr_non_observed_combinations):
    dict = {}

    duration = []
    nr_iterations = []
    sum_of_embeddings = []
    sum_of_squares = []
    sum_of_root_node_emb = []
    sum_of_squares_root_node_emb = []
    begin = 0

    for time_int in monitoring_marks:
        duration.append(time_int - begin)
        begin = time_int

    #the problem might be that some runs finished earlier, and some later.

    for i in xrange(len(monitoring_marks)):
        print i
        #for key_iter in monitoring_reports.keys():
        if not (monitoring_marks[i] in dict.keys()):
            dict[monitoring_marks[i]] = []
        try:
            dict[monitoring_marks[i]].append(monitoring_reports[i])
            nr_iterations.append(monitoring_reports[i].nr_iterations)
            sum_of_embeddings.append(monitoring_reports[i].sum_nr_embeddings)
            sum_of_squares.append(
                monitoring_reports[i].sum_of_the_square_embeddings)
            sum_of_root_node_emb.append(
                monitoring_reports[i].sum_nr_extra_embeddings)
            sum_of_squares_root_node_emb.append(
                monitoring_reports[i].sum_of_the_extra_square_embeddings)

        except IndexError:
            break

    print "NR ITERATIONS: ", nr_iterations
    print "sum_of_embeddings: ", sum_of_embeddings
    print "sum_of_squares: ", sum_of_squares
    snapshot_inits = []
    for i in range(repetitions):
        snapshot_inits.append(0)

    counter_duration = 0
    counter = 0

    average_klds = []
    average_bhattas = []
    average_hellingers = []
    std_klds = []
    std_bhattas = []
    std_hellingers = []
    avg_nodes_observed = []
    nr_nodes_per_time_interval_per_runs = []
    number_of_sampling_iterations = []
    average_of_embeddings = []
    stdevs = []

    for time_snapshot in monitoring_marks:
        print "Processing ", counter, "out of: ", len(monitoring_marks)
        false_furer_results_KLD = []
        false_furer_results_bhatta = []
        false_furer_results_hellinger = []
        false_furer_times = []
        observed_nodes = []
        observed_nodes_difference_per_snapshot = []

        snapshot_directory_path = os.path.join(detailed_result_path, )
        if not (os.path.exists(snapshot_directory_path)):
            os.mkdir(snapshot_directory_path)
        snapshot_directory_file = os.path.join(
            snapshot_directory_path,
            'res_time_' + str(time_snapshot) + '.info')

        fdict_furer_temp = dict[time_snapshot]

        fdicts_Furer = []

        for f in fdict_furer_temp:
            fdicts_Furer.append(f.current_fdict)
            observed_nodes.append(f.number_of_observed_nodes)

        if len(fdict_furer_temp) == 0:
            continue

        for i in range(len(fdict_furer_temp)):
            experiments.globals.nr_iterations = nr_iterations[i]
            fdict_limited = fdicts_Furer[i]
            fdict_Furer = fdicts_Furer[i]
            observed_nodes_difference_per_snapshot.append(observed_nodes[i] -
                                                          snapshot_inits[i])
            snapshot_inits[i] = observed_nodes[i]

            [pde, trash_list,
             default_key] = smplr.make_pd_general_kickout_default_my_version(
                 fdict_exhaustive)

            if len(pde) < 1:
                print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                break
            print fdict_Furer
            nr_possible_combinations = smplr.complete_combinations_1(
                fdict_Furer, data_graph, pattern, Plist)
            pdf = smplr.make_pd_general_kickout_default_limited_my_version(
                fdict_Furer)
            #print smplr.transform_to_ptable(pde)
            #print smplr.transform_to_ptable(pdf)
            false_furer_results_KLD.append(
                su.avg_kld(smplr.transform_to_ptable(pde),
                           smplr.transform_to_ptable(pdf)))
            false_furer_results_bhatta.append(
                su.avg_bhatta(smplr.transform_to_ptable(pde),
                              smplr.transform_to_ptable(pdf)))
            false_furer_results_hellinger.append(
                su.avg_hellinger(smplr.transform_to_ptable(pde),
                                 smplr.transform_to_ptable(pdf)))

        average_klds.append(numpy.mean(false_furer_results_KLD))
        std_klds.append(numpy.std(false_furer_results_KLD, ddof=1))
        average_bhattas.append(numpy.mean(false_furer_results_bhatta))
        std_bhattas.append(numpy.std(false_furer_results_bhatta, ddof=1))
        average_hellingers.append(numpy.mean(false_furer_results_hellinger))
        std_hellingers.append(numpy.std(false_furer_results_hellinger, ddof=1))
        avg_nodes_observed.append(numpy.mean(observed_nodes))
        number_of_sampling_iterations.append(nr_iterations[counter])
        nr_nodes_per_time_interval_per_runs.append(
            float((numpy.mean(observed_nodes_difference_per_snapshot) /
                   duration[counter_duration])))
        if sum_of_squares_root_node_emb[counter] == 0 and sum_of_root_node_emb[
                counter] == 0:
            nr_embeddings_temp = sum_of_embeddings[counter] / nr_iterations[
                counter]
        else:
            nr_embeddings_temp = sum_of_root_node_emb[counter] / nr_iterations[
                counter]
        average_of_embeddings.append(nr_embeddings_temp)
        stdeviation = numpy.nan
        try:
            if sum_of_squares_root_node_emb[
                    counter] == 0 and sum_of_root_node_emb[counter] == 0:
                #we do the old standard deviation
                a = Decimal(sum_of_squares[counter]) - (
                    Decimal(math.pow(sum_of_embeddings[counter], 2)) /
                    Decimal(float(nr_iterations[counter])))
                stdeviation = math.sqrt(
                    a / Decimal(float((nr_iterations[counter] - 1))))
            else:
                a = Decimal(sum_of_squares_root_node_emb[counter]) - (
                    Decimal(math.pow(sum_of_root_node_emb[counter], 2)) /
                    Decimal(float(nr_iterations[counter])))
                stdeviation = math.sqrt(
                    a / Decimal(float((nr_iterations[counter] - 1))))
        except:
            print "not successful"

        stdevs.append(stdeviation)

        counter += 1
        counter_duration += 1
    return {
        "average_klds": average_klds,
        "average_bhattas": average_bhattas,
        "average_hellingers": average_hellingers,
        "std_klds": std_klds,
        "std_bhattas": std_bhattas,
        "std_hellingers": std_hellingers,
        "avg_nodes_observed": avg_nodes_observed,
        "nr_nodes_per_time_interval_per_runs":
        nr_nodes_per_time_interval_per_runs,
        "number_of_sampling_iterations": number_of_sampling_iterations,
        "average_of_embeddings": average_of_embeddings,
        "stdevs": stdevs
    }
def my_version_report_online(fdict_exhaustive, data_graph, pattern,
                             monitoring_marks, output_path,
                             detailed_result_path, monitoring_reports,
                             exhaustive_approach_results_path, Plist_base, nr,
                             pattern_file_name):
    print "LEN FDICT_EXHAUSTIVE (BEFORE): ", len(fdict_exhaustive)
    experiments.globals.report = "furer"
    size_fdict = len(fdict_exhaustive)
    num_embeddings = 0
    for k in fdict_exhaustive.keys():
        num_embeddings = num_embeddings + fdict_exhaustive[
            k]  # I remove +1 added by Laplace smoothing BEWARE: previous Laplace smoothing assumed
    #print  "NR EMBEDDINGS BEFORE COMPLETING: ",num_embeddings
    start_time = time.time()

    results = {}
    counter = 1
    for i in monitoring_reports.keys():
        #print "---- RUN!-------",i
        experiments.globals.nr_non_observed_combinations = 0
        nr_possible_combinations = smplr.complete_combinations_1(
            fdict_exhaustive, data_graph, pattern,
            Plist_base)  # add zeros to all not present combinations
        nr_non_observed_combinations = nr_possible_combinations - size_fdict
        experiments.globals.nr_non_observed_combinations = nr_non_observed_combinations
        results[counter] = report.report_monitoring_my_version_online(
            monitoring_marks, output_path, detailed_result_path,
            monitoring_reports[i], exhaustive_approach_results_path,
            data_graph, pattern, Plist_base, nr, pattern_file_name,
            fdict_exhaustive,
            nr_non_observed_combinations)  #print monitoring_reports
        #print "collected results:",results[counter]
        counter += 1

    print "Calculating averages"

    for i in xrange(len(monitoring_marks)):
        avg_klds = []
        average_bhattas = []
        average_hellingers = []
        std_klds = []
        std_bhattas = []
        std_hellingers = []
        avg_nodes_observed = []
        nr_nodes_per_time_interval_per_runs = []
        number_of_sampling_iterations = []
        average_of_embeddings = []
        stdevs = []
        with open(
                os.path.join(detailed_result_path,
                             'res_time_' + str(monitoring_marks[i]) + ".info"),
                'w') as resultfile:
            resultfile.write('False Furer with different orderings\n')
            for k in monitoring_reports.keys():
                #print "run:",k,results[k]['average_klds']
                avg_klds.append(results[k]['average_klds'][i])
                average_bhattas.append(results[k]['average_bhattas'][i])
                average_hellingers.append(results[k]['average_hellingers'][i])
                std_klds.append(results[k]['std_klds'][i])
                std_bhattas.append(results[k]['std_bhattas'][i])
                std_hellingers.append(results[k]['std_hellingers'][i])
                avg_nodes_observed.append(results[k]['avg_nodes_observed'][i])
                nr_nodes_per_time_interval_per_runs.append(
                    results[k]['nr_nodes_per_time_interval_per_runs'][i])
                number_of_sampling_iterations.append(
                    results[k]['number_of_sampling_iterations'][i])
                average_of_embeddings.append(
                    float(results[k]['average_of_embeddings'][i]))
                stdevs.append(results[k]['stdevs'][i])
            #print "Embeddings array: ",average_of_embeddings,"average: ",str(numpy.mean(average_of_embeddings))
            resultfile.write("average KLD on false furer: " +
                             str(numpy.mean(avg_klds)) + " with SSTD: " +
                             str(numpy.std(avg_klds, ddof=1)) + "\n")
            resultfile.write("average bhatta on false furer: " +
                             str(numpy.mean(average_bhattas)) +
                             " with SSTD: " +
                             str(numpy.std(average_bhattas, ddof=1)) + "\n")
            resultfile.write("average hellinger on false furer: " +
                             str(numpy.mean(average_hellingers)) +
                             " with SSTD: " +
                             str(numpy.std(average_hellingers, ddof=1)) + "\n")
            resultfile.write(" " + "\n")
            resultfile.write('avg #nodes observed :' +
                             str(numpy.mean(avg_nodes_observed)) + "\n")
            #print nr_nodes_per_time_interval_per_runs
            #resultfile.write('# nodes per time interval per run:' + str((numpy.mean(nr_nodes_per_time_interval_per_runs))+"\n"))
            resultfile.write('average of embeddings : ' +
                             str(numpy.mean(average_of_embeddings)) + "\n")
            #print numpy.nanmean(stdevs)
            resultfile.write('stdeviation of # embeddings: ' +
                             str(numpy.nanmean(stdevs)) + "\n")
Пример #6
0
def report_monitoring_my_version(monitoring_marks,output_path,detailed_result_path,monitoring_reports,exhaustive_approach_result_file,data_graph,pattern,Plist,repetitions,pattern_file_name,fdict_exhaustive,nr_non_observed_combinations,write):
      #CREATE DIRECTORY THAT WILL CONTAINS RESULTS FOR EACH TIME INSTANCE
      dict={}
      duration=[]
      nr_iterations=[]
      sum_number_of_embeddings=[]
      sum_of_embeddings_vers1=[]
      sum_of_embeddings_random_old=[]
      sum_of_square_emb_random_old=[]
      
      sum_of_squares_vers1=[]
      sum_of_the_square_embeddings=[]
      nr_root_nodes=[]
      begin=0
      
      for time_int in monitoring_marks:
          duration.append(time_int-begin)
          begin=time_int
      
      #if not type(monitoring_reports)==dict:
      #  print "monitoring reports to dictionary"
      #  tmp=monitoring_reports
      #  monitoring_reports={}
      #  monitoring_reports["1"]=tmp
      #  print monitoring_reports
      #the problem might be that some runs finished earlier, and some later.
      for i in xrange(len(monitoring_marks)):
          for key_iter in monitoring_reports.keys():
              if not(monitoring_marks[i] in dict.keys()):
                  dict[monitoring_marks[i]]=[]
              try:
                  dict[monitoring_marks[i]].append(monitoring_reports[key_iter][i])
                  nr_iterations.append(monitoring_reports[key_iter][i].nr_iterations)
                  sum_number_of_embeddings.append(monitoring_reports[key_iter][i].sum_nr_embeddings)
                  sum_of_the_square_embeddings.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings)
                  nr_root_nodes.append(monitoring_reports[key_iter][i].nr_root_nodes)
                  try:
                      sum_of_embeddings_random_old.append(monitoring_reports[key_iter][i].sum_number_of_embeddings_random)
                      sum_of_square_emb_random_old.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings_random)
                  except:
                      continue  
                  try:
                      sum_of_embeddings_vers1.append(monitoring_reports[key_iter][i].sum_nr_embeddings_aux)
                      sum_of_squares_vers1.append(monitoring_reports[key_iter][i].sum_of_the_square_embeddings_aux)
                  except:
                      continue
                  
              except IndexError:
                  break
      print "Sum of embeddings random old: ",len(sum_of_embeddings_random_old)
      print "NR ITERATIONS: ",nr_iterations
      print "sum_of_embeddings: ",sum_of_embeddings_vers1
      print "sum_of_squares: ",sum_of_squares_vers1
      snapshot_inits=[]
      for i in range(repetitions):
          snapshot_inits.append(0)
      
      counter_duration=0
      counter=0
      interval=0
      acc_nr_emb=0
      acc_nr_emb_minus_average=0
      nr_emb_per_interval=[]
      
      for time_snapshot in monitoring_marks:
          experiments.globals.current_time_snapshot=time_snapshot
          print "TIME SNAPSHOT: ",time_snapshot
          interval+=1
          randnode_results_KLD = []
          randnode_results_bhatta = []
          randnode_results_hellinger = []          
          furer_times = []
          observed_nodes=[]
          observed_nodes_difference_per_snapshot=[]
          snapshot_directory_path=os.path.join(detailed_result_path,)
          if not(os.path.exists(snapshot_directory_path)):
              os.mkdir(snapshot_directory_path)
          snapshot_directory_file=os.path.join(snapshot_directory_path,'res_time_'+str(time_snapshot)+'.info')
          
          if write==True:
              fdict_furer_temp=dict[time_snapshot]
              fdicts_Furer=[]
              for f in fdict_furer_temp:
                  fdicts_Furer.append(f.current_fdict)
                  observed_nodes.append(f.number_of_observed_nodes)
              
              if len(fdict_furer_temp)==0:
                  continue
              
              for i in range(len(fdict_furer_temp)):
                  experiments.globals.nr_iterations=nr_iterations[i] 
                  fdict_limited = fdicts_Furer[i]
                  fdict_Furer=fdicts_Furer[i]
                  observed_nodes_difference_per_snapshot.append(observed_nodes[i]-snapshot_inits[i])
                  snapshot_inits[i]=observed_nodes[i]
                  
                  [pde,  trash_list,default_key] = smplr.make_pd_general_kickout_default_my_version(fdict_exhaustive)
                  if len(pde) < 1:
                      print "WARNING: bad (not enough present) pattern or too high trash threshold! STOPPING."
                      break  
                 
                  #if time_snapshot==15:
                  #    for ke in pde.keys():
                  #        print ke,pde[ke]
                  
                  nr_possible_combinations=smplr.complete_combinations_1(fdict_Furer, data_graph,  pattern,  Plist)
                  
                  pdl= smplr.make_pd_general_kickout_default_limited_my_version(fdict_Furer)
    #               print "EXHAUSTIVE:"
    #               for k in pde.keys():
    #                     print "KEY: ",k
    #                     for e in pde[k]:
    #                         print e
    #               print "SMPL:"
    #               for k in pdl.keys():
    #                     print "KEY: ",k
    #                     for e in pdl[k]:
    #                         print e
                  #print "EXHAUSTIVE ..."
                  #with open('random_exhaustive.csv','w') as f:
                  #   for k in pde.keys():
                  #      print "KEY: ",k
                  #      f.write(str(k)+";")
                  #      for e in pde[k]:
                  #          f.write(str(e)+";")
                  #      f.write("\n")
                  #print "RANDOM ..."
                  #with open('random_random.csv','w') as f:
                  #   for k in pdl.keys():
                  #      f.write(str(k)+";")
                  #      for e in pdl[k]:
                  #          f.write(str(e)+";")
                  #      f.write("\n")
                  randnode_results_KLD.append(su.avg_kld(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
                  randnode_results_bhatta.append(su.avg_bhatta(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
                  randnode_results_hellinger.append(su.avg_hellinger(smplr.transform_to_ptable(pde), smplr.transform_to_ptable(pdl)))
          
          print randnode_results_KLD
          print "Writing to: ",snapshot_directory_file
          resultfile = open(snapshot_directory_file,  'w')
          resultfile.write('Random\n')
          resultfile.write("experiment on graph: " + str(pattern_file_name) +" and pattern: "+pattern_file_name+"\n")
          resultfile.write("repetitions (for this time snapshot): " + str(repetitions) +"\n")
          resultfile.write(" " +"\n")
          resultfile.write("average KLD on random: " + str(numpy.mean(randnode_results_KLD))  + " with SSTD: " + str(numpy.std(randnode_results_KLD,  ddof=1)) +"\n")
          print "KLD: ",str(numpy.mean(randnode_results_KLD))
          resultfile.write("average bhatta on random: " + str(numpy.mean(randnode_results_bhatta))  + " with SSTD: " + str(numpy.std(randnode_results_bhatta,  ddof=1)) +"\n")
          resultfile.write("average hellinger on random: " + str(numpy.mean(randnode_results_hellinger))  + " with SSTD: " + str(numpy.std(randnode_results_hellinger,  ddof=1)) +"\n")
          resultfile.write(" " +"\n")
          resultfile.write('-----DETAILED RESULTS-----' +"\n")
          resultfile.write('random_results_KLD :' + str(randnode_results_KLD) +"\n")
          resultfile.write('random_results_bhatta :' + str(randnode_results_bhatta) +"\n")
          resultfile.write('random_results_hellinger :' + str(randnode_results_hellinger) +"\n")
          resultfile.write('avg #nodes observed :' + str(numpy.mean(observed_nodes)) +"\n")
          resultfile.write('# nodes per time interval per run:' + str((numpy.mean(observed_nodes_difference_per_snapshot)/duration[counter_duration])) +"\n")
          resultfile.write('avg difference of nodes observed from previous snapshot :' + str(numpy.mean(observed_nodes_difference_per_snapshot)) +"\n")          
          resultfile.write("------------------------------------ Sampling info ------------------------------\n")
          resultfile.write('number of sampling iterations :' + str(nr_iterations[counter])+"\n")    
          nr_iter=nr_iterations[counter]
          if nr_iter==0 or nr_iter==1:
              nr_iter=2
          avg=(float(Decimal(sum_number_of_embeddings[counter]))/nr_iter)*(experiments.globals.nr_root_nodes)
          print "HALOOO:", Decimal(sum_number_of_embeddings[counter])
          old=False
          stdev2=0
          if avg<0:
              print "Handling old data structures"
              #this means we handle the old version
              avg=float(sum_of_embeddings_random_old[counter])/nr_iter
              old=True
          sum1=Decimal(sum_of_the_square_embeddings[counter])
          sum2=Decimal(sum_number_of_embeddings[counter])
          var=Decimal(sum1)-(Decimal(math.pow(Decimal(sum2),2))/nr_iter)
          print "Variance: ",var
          if var>0:
            stdev2=math.sqrt(var/(nr_iter-1))
       
          stdev=Decimal(stdev2)*Decimal(math.sqrt(nr_iter)) 
          if old:
              print "Handling old data structures"
              #this means we handle the old version
              variance=sum_of_square_emb_random_old[counter]/(nr_iter)
              stdev2=math.sqrt(variance/(nr_iter-1))
          print "STDEV: ",stdev
          print "STDEV 2: ",stdev2
          print "Nr embeddingS: ",avg
          resultfile.write('average of embeddings w.r.t sampling iterations:' +str(avg) +"\n") 
          resultfile.write('stdeviation of # embeddings:' + str(stdev2)+"\n")  
          resultfile.write('2 stdeviation of # embeddings:' + str(stdev)+"\n")  
          counter+=1  
          resultfile.close()
          counter_duration+=1