Пример #1
0
    def run(self):
        """
    OBdecomp : ordered bipartite decomposition on P, which is given. First element is list with a root node. # [ [2] , [1,3] , [4] , [5, 6] ]
    """
        experiments.globals.globaltimes_furer[self.current_iteration] = [
            time.time()
        ]
        experiments.globals.cqi[self.current_iteration] = 0
        experiments.globals.globalist_furer[self.current_iteration] = []
        freq_dict = {
        }  # dictionary with tuples of values as keys and frequency as value
        self.Zlist_dict = {
        }  # Furer will result in a list of estimations for each tuple -> average of this list must be taken as frequency
        # out of debug we do not use a list actually: an integer is kept for every tuple, and divided by number of iterations at the end
        number_of_targets = 0
        for node in self.P.nodes():
            if self.P.node[node]['target'] == True:
                number_of_targets += 1
        # number_of_targets holds the number of target nodes that we are after
        self.nodes_observed = 0
        self.iteration_counter = 0

        #while self.nodes_observed <= max(self.nlimitlist):
        while True:
            if (self.abort == True):
                #print "NR EMBEDDINGS (OVERALL):"
                for k in self.Zlist_dict.keys():
                    freq_dict[k] = (self.Zlist_dict[k]) / float(
                        self.iteration_counter)
                nr_emb = 0
                for k in freq_dict.keys():
                    nr_emb += freq_dict[k]
                print "IS : ", nr_emb
                with open(
                        os.path.join(self.output_path,
                                     "final_embeddings.info"), 'w') as f:
                    f.write(str(nr_emb))
                return
            if self.nodes_observed in self.nlimitlist and experiments.globals.cqi[
                    self.current_iteration] < len(
                        self.nlimitlist
                    ) and self.nodes_observed > self.nlimitlist[
                        experiments.globals.cqi[self.current_iteration]]:
                experiments.globals.cqi[
                    self.current_iteration] = experiments.globals.cqi[
                        self.
                        current_iteration] + 1  # we increase the index of position of quota to check upon
                total_Zlist_dict_copy = copy.deepcopy(self.Zlist_dict)
                smplr.handle_quota_Furer(self.D, self.P, total_Zlist_dict_copy,
                                         [0, None], self.iteration_counter,
                                         self.current_iteration)
                ##print "Call to Furer quota handling (main routine) at quota %d" % nlimitlist[cqi-1]
            self.iteration_counter = self.iteration_counter + 1

            n = self.root_nodes[random.randrange(len(self.root_nodes))]
            self.nodes_observed = self.nodes_observed + 1
            list_for_spent = []
            list_for_spent.append(self.nodes_observed)

            #s = sched.scheduler(time.time, time.sleep)
            #e1=s.enter(0, 4,smplr.rec_fit_False_Furer_global,([n], self.D,  self.P,  self.OBdecomp,  0,  [],  self.nlimitlist,  list_for_spent,  self.Zlist_dict,  self.iteration_counter,  0,self.current_iteration))
            #t = threading.Thread(target=s.run)
            #t.daemon=True
            #t.start()
            #t.join()
            #result=smplr.temp_result
            result = smplr.rec_fit_False_Furer([n], self.D, self.P,
                                               self.OBdecomp, 0, [],
                                               self.nlimitlist, list_for_spent,
                                               self.Zlist_dict,
                                               self.iteration_counter, 0,
                                               self.current_iteration)
            self.nodes_observed = list_for_spent[0]
            if result[1] != None:
                actualX = result[0] * len(self.root_nodes)
                mapping = result[1]  # this is mapping for OBdecomp FLAT.
                OBd_flat = [
                    item for sublist in self.OBdecomp for item in sublist
                ]
                target_values = []
                for i in range(len(OBd_flat)):
                    if self.P.node[OBd_flat[i]]['target'] == True:
                        if self.P.node[OBd_flat[i]]['target'] == True:
                            value_tuple = (self.P.node[OBd_flat[i]]['label'],
                                           self.D.node[mapping[i]]['value'])
                            target_values.append(value_tuple)
                # now target_values contains all combinations of target nodes' label-value as tuples
                target_tuple = tuple(
                    target_values
                )  # this makes a tuple (needed, since lists cannot be dict keys) from a list.

                with self.lock:
                    if target_tuple in self.Zlist_dict:  # this checks for KEYS in Zlist_dict
                        self.Zlist_dict[target_tuple] = self.Zlist_dict[
                            target_tuple] + actualX
                    else:
                        self.Zlist_dict[target_tuple] = 0
                        self.Zlist_dict[target_tuple] = self.Zlist_dict[
                            target_tuple] + actualX

            nr_emb = get_nr_embeddings(self.Zlist_dict, self.iteration_counter)
            nr_extra_embeddings = (
                nr_emb - experiments.globals.sum_number_of_embeddings)
            experiments.globals.sum_number_of_embeddings += nr_emb
            experiments.globals.sum_of_the_square_embeddings += math.pow(
                (nr_emb), 2)
            experiments.globals.nr_iterations = self.iteration_counter

        if (experiments.globals.cqi[self.current_iteration] < len(
                self.nlimitlist)) and (self.nodes_observed >= self.nlimitlist[
                    experiments.globals.cqi[self.current_iteration]]):
            experiments.globals.cqi[
                self.current_iteration] = experiments.globals.cqi[
                    self.
                    current_iteration] + 1  # we increase the index of position of quota to check upon
            total_Zlist_dict_copy = copy.deepcopy(self.Zlist_dict)
            smplr.handle_quota_Furer(self.D, self.P, total_Zlist_dict_copy,
                                     [0, None], self.iteration_counter,
                                     self.current_iteration)
            ##print "Call to Furer quota handling (main routine down) at quota %d" % nlimitlist[cqi-1]

        for k in self.Zlist_dict.keys():
            freq_dict[k] = (self.Zlist_dict[k]) / float(
                self.iteration_counter)  # simply an average of a list
        # now with quota handler we just return global lists of freqdicts
        return [
            experiments.globals.globalist_furer[self.current_iteration],
            experiments.globals.globaltimes_furer[self.current_iteration]
        ]
Пример #2
0
    def __call__(self):
        while True:
            self.counter += 1
            #if the process we are following finished, we don't run the monitor anymore
            if (self.p.abort == True):
                return

            #if the time for running is exceeded, set flag to the main process that it should finish
            if self.counter >= len(self.times):
                self.p.abort = True
                return

            #otherwise, let the main process run for a designated time
            start_time_monitor = time.time()
            sleeping_interval = self.times[self.counter] - self.mark
            time.sleep(sleeping_interval)
            #print "Slept for: ",sleeping_interval
            #print "Stop at selection?",self.stop_at_selection_failure
            processing_time_start = time.time()
            self.mark = self.times[self.counter]

            #we have to put some structures under the lock
            with self.lock:
                total_Zlist_dict_copy = copy.deepcopy(self.p.Zlist_dict)
                furer_dict = smplr.handle_quota_Furer(self.p.D, self.p.P,
                                                      total_Zlist_dict_copy,
                                                      [0, None],
                                                      self.p.iteration_counter,
                                                      self.thread_number)
                nodes_observed_copy = copy.deepcopy(self.p.nodes_observed)
                #print "Nodes observed: ",nodes_observed_copy
                report_structure = experiments.globals.Furer_reports(
                    self.p.current_iteration, furer_dict, nodes_observed_copy,
                    self.times[self.counter], sleeping_interval)
                self.report_structures.append(report_structure)
                nr_embeddings_temp = 0
                #print "sum of number of embeddings: ",experiments.globals.sum_number_of_embeddings
                #print "nr iterations",experiments.globals.nr_iterations

                if (experiments.globals.nr_iterations != 0):
                    nr_embeddings_temp = experiments.globals.sum_number_of_embeddings / experiments.globals.nr_iterations
                #print "Nr embeddings found at the mark at time point: ",self.counter*sleeping_interval," is ",nr_embeddings_temp

            #check if the result falls within the selection interval at 60th minute (after one hour). Since we mark time
            #after every five minutes, this is the 12th counter value
            if self.stop_at_selection_failure == True:
                if self.counter == (self.check_selection_on_nth_mark -
                                    1):  #we count from zero
                    #get number of embeddings for the current result
                    #they are recorded in globals
                    a = experiments.globals.sum_of_the_square_embeddings - (
                        (math.pow(experiments.globals.sum_number_of_embeddings,
                                  2) / experiments.globals.nr_iterations))
                    #a=(nr_iterations[counter]*sum_of_squares[counter])-(math.pow(sum_of_embeddings[counter], 2))
                    stdeviation = math.sqrt(
                        a / (experiments.globals.nr_iterations - 1))
                    #print "Check if pattern can be selected at: ",self.times[self.counter]
                    #print "Standard deviation: ",stdeviation
                    #print "Nr nodes in data graph: ",self.nr_nodes_data_graph
                    nr_embeddings = experiments.globals.sum_number_of_embeddings / experiments.globals.nr_iterations
                    #print "Nr embeddings estimate: ",nr_embeddings
                    #print "lower bound: ",math.sqrt(self.nr_nodes_data_graph)-3*stdeviation
                    #print "Upper bound: ",self.nr_nodes_data_graph+3*stdeviation
                    if nr_embeddings > math.sqrt(
                            self.nr_nodes_data_graph
                    ) - 3 * stdeviation and nr_embeddings < self.nr_nodes_data_graph + 3 * stdeviation:
                        print "Pattern is selected: continue"
                    else:
                        print "Pattern is not selected: ABORT MAIN PROCEDURE"
                        self.successful_monitoring = False
                        self.p.abort = True
                        #mark unsuccessful monitoring in order not to report it
                        return
            self.processing_time += (time.time() - processing_time_start)
    def run(self):
        """
        OBdecomp : ordered bipartite decomposition on P, which is given. First element is list with a root node. # [ [2] , [1,3] , [4] , [5, 6] ]
        """
        if (self.running_bug_fixed_code):
            print "Running bug fixed furer algorithm"
        else:
            print "Rning original furer code (no bug fix)"
        experiments.globals.globaltimes_furer[self.current_iteration] = [
            time.time()
        ]
        experiments.globals.cqi[self.current_iteration] = 0
        experiments.globals.globalist_furer[self.current_iteration] = []
        freq_dict = {
        }  # dictionary with tuples of values as keys and frequency as value
        self.Zlist_dict = {
        }  # Furer will result in a list of estimations for each tuple -> average of this list must be taken as frequency
        # out of debug we do not use a list actually: an integer is kept for every tuple, and divided by number of iterations at the end
        number_of_targets = 0
        for node in self.P.nodes():
            if self.P.node[node]['target'] == True:
                number_of_targets += 1
        # number_of_targets holds the number of target nodes that we are after
        self.nodes_observed = 0
        self.iteration_counter = 0
        matchings_found = 0
        getcontext().prec = 100
        root_nodes_already_observed = []
        nr_iterations = 1

        seed_counter = 1
        while True:
            if (self.abort == True):
                for k in self.Zlist_dict.keys():
                    freq_dict[k] = (self.Zlist_dict[k]) / float(
                        self.iteration_counter)
                nr_emb = 0
                for k in freq_dict.keys():
                    nr_emb += freq_dict[k]
                if self.output_path != None:
                    with open(
                            os.path.join(self.output_path,
                                         "final_embeddings.info"), 'w') as f:
                        f.write(str(nr_emb))
                return
            #print "Observed: ",self.nodes_observed
            #if self.nodes_observed>=2:
            #   self.abort=True
            if experiments.globals.same_seed:
                random.seed(seed_counter)
                seed_counter += 1
            #if self.nodes_observed in self.nlimitlist:# and experiments.globals.cqi[self.current_iteration]<len(self.nlimitlist) and self.nodes_observed > self.nlimitlist[experiments.globals.cqi[self.current_iteration]]:
            if True:
                experiments.globals.cqi[
                    self.current_iteration] = experiments.globals.cqi[
                        self.
                        current_iteration] + 1  # we increase the index of position of quota to check upon
                total_Zlist_dict_copy = copy.deepcopy(self.Zlist_dict)
                smplr.handle_quota_Furer(self.D, self.P, total_Zlist_dict_copy,
                                         [0, None], self.iteration_counter,
                                         self.current_iteration)
            rand_nr = random.randrange(len(self.root_nodes))
            n = self.root_nodes[rand_nr]
            self.iteration_counter = self.iteration_counter + 1
            self.nodes_observed = self.nodes_observed + 1
            list_for_spent = []
            list_for_spent.append(self.nodes_observed)

            if (self.running_bug_fixed_code):
                result = smplr.rec_fit_Furer_bug_fix(
                    [n], self.D, self.P, self.OBdecomp, 0, [], self.nlimitlist,
                    list_for_spent, self.Zlist_dict, self.iteration_counter, 0,
                    self.current_iteration)
            else:
                result = smplr.rec_fit_Furer([n], self.D, self.P,
                                             self.OBdecomp, 0, [],
                                             self.nlimitlist, list_for_spent,
                                             self.Zlist_dict,
                                             self.iteration_counter, 0,
                                             self.current_iteration)
            self.nodes_observed = list_for_spent[0]
            matches_found_root_node = 0
            if result[1] != None:
                #print "RES:",n,result[0]
                #print len(self.root_nodes)
                actualX = result[0] * len(self.root_nodes)
                matches_found_root_node = actualX
                #print matches_found_root_node
                mapping = result[1]  # this is mapping for OBdecomp FLAT.
                OBd_flat = [
                    item for sublist in self.OBdecomp for item in sublist
                ]
                target_values = []
                for i in range(len(OBd_flat)):
                    if self.P.node[OBd_flat[i]]['target'] == True:
                        if 'value' in self.D.node[mapping[i]]:
                            value = self.D.node[mapping[i]]['value']
                            value_tuple = (self.P.node[OBd_flat[i]]['label'],
                                           value)
                            if self.ordering_of_target_nodes != None:
                                target_values[self.ordering_of_target_nodes[
                                    OBd_flat[i]]] = value_tuple
                                #print "1:",target_values
                            else:
                                target_values.append(value_tuple)
                target_tuple = tuple(
                    target_values
                )  # this makes a tuple (needed, since lists cannot be dict keys) from a list.
                with self.lock:
                    if target_tuple in self.Zlist_dict:  # this checks for KEYS in Zlist_dict
                        self.Zlist_dict[target_tuple] = self.Zlist_dict[
                            target_tuple] + actualX
                    else:
                        self.Zlist_dict[target_tuple] = 0
                        self.Zlist_dict[target_tuple] = self.Zlist_dict[
                            target_tuple] + actualX
            nr_emb = get_nr_embeddings(self.Zlist_dict, self.iteration_counter)
            nr_extra_embeddings = (
                Decimal(nr_emb) -
                Decimal(experiments.globals.sum_number_of_embeddings))
            experiments.globals.sum_number_of_embeddings += Decimal(nr_emb)
            experiments.globals.sum_of_the_square_embeddings += Decimal(
                math.pow((nr_emb), 2))
            experiments.globals.sum_number_of_extra_embeddings += Decimal(
                matches_found_root_node)
            experiments.globals.sum_of_the_square_extra_embeddings += Decimal(
                math.pow((matches_found_root_node), 2))
            experiments.globals.embeddings_estimate = nr_emb
            experiments.globals.nr_iterations = self.iteration_counter
        print experiments.globals.cqi[self.current_iteration]
        print len(self.nlimitlist), self.nlimitlist
        print experiments.globals.cqi[self.current_iteration] < len(
            self.nlimitlist)
        if (experiments.globals.cqi[self.current_iteration] < len(
                self.nlimitlist)) and (self.nodes_observed >= self.nlimitlist[
                    experiments.globals.cqi[self.current_iteration]]):
            experiments.globals.cqi[
                self.current_iteration] = experiments.globals.cqi[
                    self.
                    current_iteration] + 1  # we increase the index of position of quota to check upon
            total_Zlist_dict_copy = copy.deepcopy(self.Zlist_dict)
            smplr.handle_quota_Furer(self.D, self.P, total_Zlist_dict_copy,
                                     [0, None], self.iteration_counter,
                                     self.current_iteration)

        return [
            experiments.globals.globalist_furer[self.current_iteration],
            experiments.globals.globaltimes_furer[self.current_iteration]
        ]
Пример #4
0
    def __call__(self):
        while True:
            self.counter += 1
            #print self.counter,"th mark"," out of",len(self.times)
            #if the process we are following finished, we don't run the monitor anymore
            if (self.p.abort == True):
                return

            #if the time for running is exceeded, set flag to the main process that it should finish
            if self.counter >= len(self.times):
                #print "TIMEOUT!"
                self.p.abort = True
                return

            #otherwise, let the main process run for a designated time
            start_time_monitor = time.time()
            time.clock()
            sleeping_interval = self.times[self.counter] - self.mark
            time.sleep(sleeping_interval)
            processing_time_start = time.time()
            self.mark = self.times[self.counter]
            if self.p.abort == True:
                return
            #print "slept for: ",sleeping_interval, " noting results"
            #check if cutoff exceeded - if yes, abort the execution and mark monitoring false
            N = furer_sampling_approach.get_current_selected_patterns(
                self.selected_patterns_info_file, self.cutoff)
            #print "Already selected patterns,",N," cutoff: ",self.cutoff
            if self.stop_at_selection_failure == True and N >= self.cutoff:
                #print "Pattern is not selected: ABORT MAIN PROCEDURE"
                self.successful_monitoring = False
                self.p.abort = True
                self.limit_exceeded = True
                return

            #we have to put some structures under the lock
            with self.lock:
                #print "Monitor recording ...."
                total_Zlist_dict_copy = copy.deepcopy(self.p.Zlist_dict)
                furer_dict = smplr.handle_quota_Furer(self.p.D, self.p.P,
                                                      total_Zlist_dict_copy,
                                                      [0, None],
                                                      self.p.iteration_counter,
                                                      self.thread_number)
                nodes_observed_copy = copy.deepcopy(self.p.nodes_observed)
                report_structure = experiments.globals.Furer_reports(
                    self.p.current_iteration, furer_dict, nodes_observed_copy,
                    self.times[self.counter], sleeping_interval)
                self.report_structures.append(report_structure)
                #print "Adding reports. Number of reports now: ",len(self.report_structures)
                #save intermediate structure just in case
                #write down monitoring reports
                if self.write == True:
                    pickout = open(
                        os.path.join(self.output_path,
                                     'monitoring_reports.pickle'), 'wb')
                    pickle.dump(self.report_structures, pickout)
                    pickout.close()
                nr_embeddings_temp = experiments.globals.embeddings_estimate
                self.nr_embeddings = nr_embeddings_temp
                #print "nr observed nodes: ",nodes_observed_copy
                #print "Nr iterations: ",experiments.globals.nr_iterations
                #print "Nr embeddings found at the mark at time point: ",self.counter*sleeping_interval," is ",nr_embeddings_temp
                #print experiments.globals.sum_of_the_square_extra_embeddings
                #print (Decimal(math.pow(experiments.globals.sum_number_of_extra_embeddings, 2))/experiments.globals.nr_iterations)
                #a=(Decimal(experiments.globals.sum_of_the_square_extra_embeddings)-(Decimal(math.pow(experiments.globals.sum_number_of_extra_embeddings, 2))/experiments.globals.nr_iterations))/(experiments.globals.nr_iterations-1)
                #self.stdeviation=math.sqrt(a/(experiments.globals.nr_iterations-1))
                #lower_bound=nr_embeddings_temp+3*self.stdeviation
                #upper_bound=nr_embeddings_temp-3*self.stdeviation
                #print "Lower bound: ",lower_bound,lower_bound>=math.sqrt(self.nr_nodes_data_graph)
                #print "Upper bound: ",upper_bound,upper_bound<=self.nr_nodes_data_graph
                #print "Nr nodes in the data graph: ",self.nr_nodes_data_graph
                #print "Selectable w.r.t. upper bound? ",upper_bound<=self.nr_nodes_data_graph
                #print "Standard deviation: ",self.stdeviation

            #check if the result falls within the selection interval at 60th minute (after one hour). Since we mark time
            #after every five minutes, this is the 12th counter value
            #print "Stop at selection? ",self.stop_at_selection_failure
            if self.stop_at_selection_failure == True:
                #print self.counter,self.check_selection_on_nth_mark
                if self.counter == (
                        self.check_selection_on_nth_mark):  #we count from zero
                    #get number of embeddings for the current result
                    #they are recorded in globals
                    #print "Check if pattern can be selected at: ",self.times[self.counter]
                    #print "Nr nodes in data graph: ",self.nr_nodes_data_graph
                    nr_embeddings_temp = experiments.globals.embeddings_estimate
                    self.nr_embeddings = nr_embeddings_temp
                    #print "nr observed nodes: ",nodes_observed_copy
                    #print "Nr iterations: ",experiments.globals.nr_iterations
                    #print "Nr embeddings found at the mark at time point: ",self.counter*sleeping_interval," is ",nr_embeddings_temp
                    a = (Decimal(
                        experiments.globals.sum_of_the_square_extra_embeddings
                    ) - (Decimal(
                        math.pow(
                            experiments.globals.sum_number_of_extra_embeddings,
                            2)) / experiments.globals.nr_iterations)) / (
                                experiments.globals.nr_iterations - 1)
                    self.stdeviation = math.sqrt(
                        a / (experiments.globals.nr_iterations - 1))
                    #print "Standard deviation: ",self.stdeviation
                    #print "Nr iterations: ",experiments.globals.nr_iterations
                    #print "upper bound: ",math.sqrt(self.nr_nodes_data_graph)-3*self.stdeviation
                    #print "lower bound: ",self.nr_nodes_data_graph+3*self.stdeviation
                    lower_bound = nr_embeddings_temp + 3 * self.stdeviation
                    upper_bound = nr_embeddings_temp - 3 * self.stdeviation
                    if self.ignore_upper_limit == True:
                        upper_bound = 0
                    if lower_bound >= math.sqrt(
                            self.nr_nodes_data_graph
                    ) and upper_bound <= self.nr_nodes_data_graph:
                        #print "Pattern is selected: continue"
                        self.successful_monitoring = True
                        self.p.abort = True
                        return
                    else:
                        #print "Pattern is not selected: ABORT MAIN PROCEDURE"
                        self.successful_monitoring = False
                        self.p.abort = True
                        #mark unsuccessful monitoring in order not to report it
                        return
                print "continue"
            self.processing_time += (time.time() - processing_time_start)