def test_pattern_rec(self): to_find1 = PNode([ '06b7bde2b5eb49efb00dc260edfd1bae = 1ec1b7ccd60a444d99e16cf4ce5021be', 'a71fb85d222b4d30819d8b442a6ab945 = 3b5ed2de66fc4892a9508e150c118941' ]) to_find1.add_child( PNode([ '06b7bde2b5eb49efb00dc260edfd1bae = 92d3476104de4b06bd1a38573f07a5d7' ])) to_find2 = PNode([ 'a71fb85d222b4d30819d8b442a6ab945 = b2c04646744240568e66f9c13e434476' ]) to_find2.add_child( PNode([ '06b7bde2b5eb49efb00dc260edfd1bae = 781831ce63d046da9b217b606aff5f45', 'a71fb85d222b4d30819d8b442a6ab945 = fc3c2abfc9304942bccf089977cacc16' ])) to_find2.children[0].add_child( PNode([ 'a71fb85d222b4d30819d8b442a6ab945 = f9bfbd58d8764011b9266d06c00b84f5' ])) thr = 5 top = -1 manager = Manager("output.csv", thr) manager.compute_mining(True) manager.filter.remove_empty() f_patterns = manager.filter.get_first_n_frequents(top) if top != -1: print("LISTING TOP " + str(top) + " PATTERNS threshold = " + str(thr)) else: print("LISTING ALL FREQUENT PATTERNS threshold = " + str(thr)) print() for pattern in f_patterns: print("Number of appearences: " + str(pattern[1])) print() pattern[0].print_tree() print() if to_find1 in manager.filter.counters: print("Pattern") print() to_find1.print_tree() print() print("Appeared " + str(manager.filter.counters[to_find1]) + " times") if to_find2 in manager.filter.counters: print("Pattern") print() to_find2.print_tree() print() print("Appeared " + str(manager.filter.counters[to_find2]) + " times")
def test_birth_errors(self): try: instance = "866110f3-4b1b-4a56-834b-fa3d39eea4ec" asset = "f4646a37-c66e-3993-9f68-3a9e0e9a0793" year = 2017 detections = Manager.compute_week_profiling(instance, asset, year, 'birth') except Exception: self.fail("Something went wrong")
def test_unexisting_lifeevent(self): try: instance = "866110f3-4b1b-4a56-834b-fa3d39eea4ec" asset = "f4646a37-c66e-3993-9f68-3a9e0e9a0793" year = 2017 detections = Manager.compute_week_profiling(instance, asset, year, 'fuffa') self.fail("Something should be wrong, but it isn't") except ValueError: self.assertTrue(True)
def test_unexisting_entity(self): try: instance = "bla bla bla" asset = "bla bla" year = 2017 detections = Manager.compute_week_profiling(instance, asset, year, 'birth') self.fail("Something should be wrong, but it isn't") except TapoiNotFoundApiException: self.assertTrue(True)
def post(self): posted_data = request.get_json() try: instance = posted_data['instance'] asset = posted_data['asset'] year = posted_data['year'] life_event = posted_data['lifeEvent'] detections = Manager.compute_week_profiling(instance, asset, year, life_event) return { "detections": [d.to_repr() for d in detections], "instance": instance, "asset": asset, "year": year, "lifeEvent": life_event }, 200 except KeyError: return {"message": "Error, some keys are missing"}, 400 except TapoiNotFoundApiException: return {"message": "The requested instance or asset don't exist"}, 404 except ValueError: return {"message": "The requested life event is not supported"}, 422
argument_parser.add_argument("-in", dest="input", type=str, help="Input file name (csv format)", action="store", default="input.csv") argument_parser.add_argument( "-thr", dest="threshold", type=int, help="Minimum number of times a pattern has to appear", action="store", default=4) args = argument_parser.parse_args() if args.threshold < 1: raise ValueError("The threshold must be at least 1. Given %d" % args.threshold) manager = Manager(args.input, args.threshold) manager.compute_mining() manager.filter.remove_empty() patterns = manager.filter.get_by_importance() with open(args.output, "w") as f_out: for pattern, freq in patterns: l = len(pattern.get_subtree()) f_out.write("Pattern: frequency: %d, length: %d, importance: %d\n" % (freq, l, l * freq)) f_out.write(pattern.get_string()) f_out.write("\n\n")
def test_unexisting_year(self): instance = "866110f3-4b1b-4a56-834b-fa3d39eea4ec" asset = "f4646a37-c66e-3993-9f68-3a9e0e9a0793" year = -1 detections = Manager.compute_week_profiling(instance, asset, year, 'wedding') self.assertEqual(detections, [])
from __future__ import absolute_import import time from logic.manager import Manager, BaselineManager simple_mining = False print("Our implementation") s1 = time.time() manager = Manager("output1.csv", 2) manager.compute_mining(simple_mining) manager.filter.remove_empty() fp = set([x for x, _ in manager.filter.get_by_importance()]) f1 = time.time() print("Baseline") s2 = time.time() manager = BaselineManager("output1.csv", 2) manager.compute_mining(simple_mining) manager.filter.remove_empty() fp1 = set([x for x, _ in manager.filter.get_by_importance()]) f2 = time.time() print("time1: %f" % (f1 - s1)) print("time2: %f" % (f2 - s2)) print(len(fp)) print(len(fp1)) print(len(frozenset(fp).intersection(fp1)))