示例#1
0
    def run_program(self, dataset, parameters):
        self.logger.info("Starting run\nParameters:\n{}".format(
            "\n".join(["\t{}: {}".format(k,v) for k,v in parameters.items()])))

        kg_i, kg_s = dataset

        # fit model
        t0 = timer()

        # generate semantic item sets from sampled graph
        si_sets = generate_semantic_item_sets(kg_i)

        # generate common behaviour sets
        cbs_sets = generate_common_behaviour_sets(si_sets,
                                                  parameters["similarity_threshold"],
                                                  parameters["max_cbs_size"])

        # generate semantic association rules
        rules = generate_semantic_association_rules(kg_i,
                                                    kg_s,
                                                    cbs_sets,
                                                    parameters["minimal_local_support"])

        # calculate support and confidence, skip those not meeting minimum requirements
        final_rule_set = []
        for rule in rules:
            support = support_of(kg_i, rule)
            confidence = confidence_of(kg_i, rule)

            if support >= parameters["minimal_support"] and\
               confidence >= parameters["minimal_confidence"]:
                final_rule_set.append((rule, support, confidence))

        # sorting rules on both support and confidence
        final_rule_set.sort(key=itemgetter(2, 1), reverse=True)

        # time took
        t1 = timer()
        dt = t1 - t0
        print("  Program completed in {:.3f} ms".format(dt))

        print("  Found {} rules".format(len(final_rule_set)))
        return final_rule_set
示例#2
0
    def run_program(self, dataset, parameters):
        self.logger.info("Starting run\nParameters:\n{}".format("\n".join(
            ["\t{}: {}".format(k, v) for k, v in parameters.items()])))
        self.logger.info(
            "Distributing load over {} cores".format(NUM_OF_WORKERS))

        kg_i, kg_s = dataset

        # fit model
        t0 = timer()

        # MP manager
        manager = Manager()

        # generate semantic item sets from sampled graph
        si_sets = manager.dict(generate_semantic_item_sets(kg_i))

        # generate common behaviour sets
        work = manager.Queue()
        keys = list(si_sets.keys())
        slices = self.diagonal_matrix_slicer(keys)

        cbs_sets = manager.list()
        pool = []
        for i in range(NUM_OF_WORKERS):
            p = Process(target=generate_common_behaviour_sets,
                        args=(si_sets, cbs_sets, work,
                              parameters["similarity_threshold"]))
            p.daemon = True
            p.start()
            pool.append(p)

        for slce in slices:
            work.put(slce)

        for p in pool:
            work.put(None)

        # join shared variables
        for p in pool:
            p.join()

        # extend common behaviour sets
        cbs_size = 2
        cbs_sets_extended = manager.list(cbs_sets)
        while cbs_size < parameters["max_cbs_size"]:
            func = partial(extend_common_behaviour_sets, cbs_sets_extended,
                           parameters["similarity_threshold"])

            slices = self.diagonal_matrix_slicer(cbs_sets_extended)
            cbs_sets_extention = manager.list()
            with Pool(processes=NUM_OF_WORKERS) as pool:
                it = pool.imap_unordered(func=func, iterable=slices)

                while True:
                    try:
                        cbs_subset = next(it)
                        cbs_sets_extention.extend(cbs_subset)
                    except StopIteration:
                        break

            cbs_sets.extend(cbs_sets_extention)
            cbs_sets_extended = cbs_sets_extention
            cbs_size *= 2

        # generate semantic item sets from sampled graph association rules
        rules = manager.list()
        work = manager.Queue()
        size = max(1, floor(len(cbs_sets) / NUM_OF_WORKERS))
        slices = [slice(i, i + size) for i in range(0, len(cbs_sets), size)]

        pool = []
        for i in range(NUM_OF_WORKERS):
            p = Process(target=generate_semantic_association_rules,
                        args=(kg_i, kg_s, cbs_sets, work, rules,
                              parameters["minimal_local_support"]))
            p.daemon = True
            p.start()
            pool.append(p)

        for slce in slices:
            work.put(slce)

        for p in pool:
            work.put(None)

        # join shared variables
        for p in pool:
            p.join()

        # calculate support and confidence, skip those not meeting minimum requirements
        final_rule_set = manager.list()
        work = manager.Queue()
        size = max(1, floor(len(rules) / NUM_OF_WORKERS))
        slices = [slice(i, i + size) for i in range(0, len(rules), size)]

        pool = []
        for i in range(NUM_OF_WORKERS):
            p = Process(target=evaluate_rules,
                        args=(kg_i, rules, work, final_rule_set,
                              parameters["minimal_support"],
                              parameters["minimal_confidence"]))

            p.daemon = True
            p.start()
            pool.append(p)

        for slce in slices:
            work.put(slce)

        for p in pool:
            work.put(None)

        # join shared variables
        for p in pool:
            p.join()

        # sorting rules on both support and confidence
        final_rule_set.sort(key=itemgetter(2, 1), reverse=True)

        # time took
        t1 = timer()
        dt = t1 - t0
        print("  Program completed in {:.3f} ms".format(dt))

        print("  Found {} rules".format(len(final_rule_set)))
        return final_rule_set