def frequencyPatternSearch2Test(createTestFile=False): pattern = Pattern( SeqOperator(PrimitiveEventStructure("LOCM", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("AAPL", "c")), AndCondition( SmallerThanCondition(Variable("a", lambda x: x["Opening Price"]), Variable("b", lambda x: x["Opening Price"])), SmallerThanCondition(Variable("b", lambda x: x["Opening Price"]), Variable("c", lambda x: x["Opening Price"]))), timedelta(minutes=5)) pattern.set_statistics( {StatisticsTypes.ARRIVAL_RATES: [0.0076, 0.0153, 0.0159]}) eval_params = TreeBasedEvaluationMechanismParameters( optimizer_params=StatisticsDeviationAwareOptimizerParameters( tree_plan_params=TreePlanBuilderParameters( TreePlanBuilderTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE), statistics_collector_params=StatisticsCollectorParameters( statistics_types=[StatisticsTypes.ARRIVAL_RATES])), storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS. storage_params) runTest("frequency2", [pattern], createTestFile, eval_mechanism_params=eval_params)
def __create_selectivity_matrix_for_nested_operators( pattern: Pattern, statistics: Dict): """ This function creates a selectivity matrix that fits the root operator (kind of flattening the selectivity of the nested operators, if exists). """ selectivity_matrix = statistics[StatisticsTypes.SELECTIVITY_MATRIX] if pattern.count_primitive_events( positive_only=True) != len(selectivity_matrix): raise Exception("size mismatch") nested_selectivity_matrix = [] primitive_sons_list = [] # event_names are all the events in this pattern (including those under nested operators) event_names = [ name for name in pattern.positive_structure.get_all_event_names() ] for arg in pattern.get_top_level_structure_args(positive_only=True): # This is a list with size of the number of args, where each entry in the list is the events' name of the # primitive events under this arg. primitive_sons_list.append( [name for name in arg.get_all_event_names()]) for i, row_entry in enumerate(primitive_sons_list): nested_selectivity_matrix.append([]) for col_entry in primitive_sons_list: # Building the new matrix, which is (#args x #args), where each entry is calculated based on the events # under the specific args respectively nested_selectivity = TreePlanBuilder.__calculate_nested_selectivity( event_names, selectivity_matrix, row_entry, col_entry) nested_selectivity_matrix[i].append(nested_selectivity) return nested_selectivity_matrix
def frequencyPatternSearch6Test(createTestFile=False): pattern = Pattern( SeqOperator(PrimitiveEventStructure("AAPL", "a1"), PrimitiveEventStructure("LOCM", "b1"), PrimitiveEventStructure("AAPL", "a2"), PrimitiveEventStructure("LOCM", "b2"), PrimitiveEventStructure("AAPL", "a3"), PrimitiveEventStructure("LOCM", "b3")), TrueCondition(), timedelta(minutes=7)) pattern.set_statistics({ StatisticsTypes.ARRIVAL_RATES: [0.0159, 0.0076, 0.0159, 0.0076, 0.0159, 0.0076] }) # {"AAPL": 460, "LOCM": 219} eval_params = TreeBasedEvaluationMechanismParameters( optimizer_params=StatisticsDeviationAwareOptimizerParameters( tree_plan_params=TreePlanBuilderParameters( TreePlanBuilderTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE), statistics_collector_params=StatisticsCollectorParameters( statistics_types=[StatisticsTypes.ARRIVAL_RATES])), storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS. storage_params) runTest("frequency6", [pattern], createTestFile, eval_mechanism_params=eval_params)
def nestedAscendingStructuralTest(): pattern = Pattern( AndOperator( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b")), SeqOperator(PrimitiveEventStructure("AVID", "c"), PrimitiveEventStructure("BIDU", "d")), AndOperator(PrimitiveEventStructure("GOOG", "e"), PrimitiveEventStructure("AAPL", "f")), PrimitiveEventStructure("GOOG", "g"), SeqOperator(PrimitiveEventStructure("AMZN", "h"), PrimitiveEventStructure("BIDU", "i"))), TrueCondition(), timedelta(minutes=1)) pattern.set_statistics({ StatisticsTypes.ARRIVAL_RATES: [0.11, 0.2, 0.3, 0.4, 0.5, 0.11, 0.5, 0.2, 0.4] }) eval_params = TreeBasedEvaluationMechanismParameters( optimizer_params=OptimizerParameters( opt_type=OptimizerTypes.TRIVIAL_OPTIMIZER, tree_plan_params=TreePlanBuilderParameters( TreePlanBuilderTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE)), storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS. storage_params) expected_result = ('And', ('And', ('And', ('And', 'g', ('Seq', 'a', 'b')), ('Seq', 'c', 'd')), ('And', 'e', 'f')), ('Seq', 'h', 'i')) runStructuralTest('nestedAscendingStructuralTest', [pattern], expected_result, eval_mechanism_params=eval_params)
def frequencyTailoredPatternSearchTest(createTestFile=False): pattern = Pattern( SeqOperator(PrimitiveEventStructure("DRIV", "a"), PrimitiveEventStructure("MSFT", "b"), PrimitiveEventStructure("CBRL", "c")), AndCondition( GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]), Variable("b", lambda x: x["Opening Price"])), GreaterThanCondition(Variable("b", lambda x: x["Opening Price"]), Variable("c", lambda x: x["Opening Price"]))), timedelta(minutes=360)) # frequencyDict = {"MSFT": 256, "DRIV": 257, "CBRL": 1} pattern.set_statistics({ StatisticsTypes.ARRIVAL_RATES: [0.01454418928322895, 0.016597077244258872, 0.012421711899791231] }) eval_params = TreeBasedEvaluationMechanismParameters( optimizer_params=StatisticsDeviationAwareOptimizerParameters( tree_plan_params=TreePlanBuilderParameters( TreePlanBuilderTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE), statistics_collector_params=StatisticsCollectorParameters( statistics_types=[StatisticsTypes.ARRIVAL_RATES])), storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS. storage_params) runTest('frequencyTailored1', [pattern], createTestFile, eval_mechanism_params=eval_params, events=nasdaqEventStream)
def andAndPatternTransformationTest(): pattern = Pattern( AndOperator( PrimitiveEventStructure("AAPL", "a"), NegationOperator(PrimitiveEventStructure("AMZN", "z")), PrimitiveEventStructure("GOOG", "g"), AndOperator( PrimitiveEventStructure("AMZN", "zz"), NegationOperator(PrimitiveEventStructure("GOOG", "gg")), AndOperator( PrimitiveEventStructure("AMZN", "zzz"), NegationOperator(PrimitiveEventStructure("GOOG", "ggg"))))), TrueCondition(), timedelta(minutes=5)) expected_pattern = Pattern( AndOperator(PrimitiveEventStructure("AAPL", "a"), NegationOperator(PrimitiveEventStructure("AMZN", "z")), PrimitiveEventStructure("GOOG", "g"), PrimitiveEventStructure("AMZN", "zz"), NegationOperator(PrimitiveEventStructure("GOOG", "gg")), PrimitiveEventStructure("AMZN", "zzz"), NegationOperator(PrimitiveEventStructure("GOOG", "ggg"))), TrueCondition(), timedelta(minutes=5)) pattern_transformation = PatternPreprocessor( PatternPreprocessingParameters(TESTING_PREPROCESSING_RULES_ORDER)) transformed_patterns = pattern_transformation.transform_patterns([pattern]) assert len( transformed_patterns) == 1, "Test andAndPatternTransformation Failed" assert expected_pattern.full_structure == transformed_patterns[0].full_structure, \ "Test andAndPatternTransformation Failed"
def onePatternIncludesOther(createTestFile=False): pattern1 = Pattern( SeqOperator(PrimitiveEventStructure("GOOG", "a"), PrimitiveEventStructure("GOOG", "b"), PrimitiveEventStructure("AAPL", "c")), AndCondition( SmallerThanCondition(Variable("a", lambda x: x["Peak Price"]), Variable("b", lambda x: x["Peak Price"])), GreaterThanCondition(Variable("b", lambda x: x["Peak Price"]), Variable("c", lambda x: x["Peak Price"]))), timedelta(minutes=3)) pattern2 = Pattern( SeqOperator(PrimitiveEventStructure("GOOG", "a"), PrimitiveEventStructure("GOOG", "b")), SmallerThanCondition(Variable("a", lambda x: x["Peak Price"]), Variable("b", lambda x: x["Peak Price"])), timedelta(minutes=3)) eval_mechanism_params = TreeBasedEvaluationMechanismParameters( TreePlanBuilderParameters( TreePlanBuilderTypes.TRIVIAL_LEFT_DEEP_TREE, TreeCostModels.INTERMEDIATE_RESULTS_TREE_COST_MODEL), TreeStorageParameters(sort_storage=False, clean_up_interval=10, prioritize_sorting_by_timestamp=True), MultiPatternEvaluationParameters( MultiPatternEvaluationApproaches.SUBTREES_UNION)) runMultiTest("onePatternIncludesOther", [pattern1, pattern2], createTestFile, eval_mechanism_params)
def severalPatternShareSubtreeFullSharing(createTestFile=False): pattern = Pattern( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("GOOG", "c"), NegationOperator(PrimitiveEventStructure("TYP1", "x")), NegationOperator(PrimitiveEventStructure("TYP2", "y")), NegationOperator(PrimitiveEventStructure("TYP3", "z"))), AndCondition( GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]), Variable("b", lambda x: x["Opening Price"])), SmallerThanCondition(Variable("b", lambda x: x["Opening Price"]), Variable("c", lambda x: x["Opening Price"]))), timedelta(minutes=5)) pattern2 = Pattern( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("TYP1", "x")), GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]), Variable("b", lambda x: x["Opening Price"])), timedelta(minutes=5)) pattern3 = Pattern( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b")), GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]), Variable("b", lambda x: x["Opening Price"])), timedelta(minutes=5)) runMultiTest("threeSharingSubtreesFullSharing", [pattern, pattern2, pattern3], createTestFile, subtree_sharing_eval_mechanism_params, expected_file_name="threeSharingSubtrees")
def topmostOrPatternTransformationTest(): pattern = Pattern( SeqOperator( PrimitiveEventStructure("AAPL", "a"), OrOperator(PrimitiveEventStructure("AMZN", "z"), PrimitiveEventStructure("GOOG", "g")), PrimitiveEventStructure("MSFT", "m")), TrueCondition(), timedelta(minutes=5)) expected_pattern = Pattern( OrOperator( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "z"), PrimitiveEventStructure("MSFT", "m")), SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("GOOG", "g"), PrimitiveEventStructure("MSFT", "m"))), TrueCondition(), timedelta(minutes=5)) rules_directive = [PatternTransformationRules.TOPMOST_OR_PATTERN] params = PatternPreprocessingParameters() params.transformation_rules = rules_directive pattern_transformation = PatternPreprocessor(params) transformed_patterns = pattern_transformation.transform_patterns([pattern]) assert len(transformed_patterns ) == 1, "Test topmostOrPatternTransformation Failed" assert transformed_patterns[0].full_structure == expected_pattern.full_structure, \ "Test topmostOrPatternTransformation Failed"
def distinctPatterns(createTestFile=False): pattern1 = Pattern( SeqOperator(PrimitiveEventStructure("GOOG", "a"), PrimitiveEventStructure("GOOG", "b"), PrimitiveEventStructure("GOOG", "c")), AndCondition( SmallerThanCondition(Variable("a", lambda x: x["Peak Price"]), Variable("b", lambda x: x["Peak Price"])), SmallerThanCondition(Variable("b", lambda x: x["Peak Price"]), Variable("c", lambda x: x["Peak Price"]))), timedelta(minutes=3)) pattern2 = Pattern( SeqOperator(PrimitiveEventStructure("AMZN", "x1"), PrimitiveEventStructure("AMZN", "x2"), PrimitiveEventStructure("AMZN", "x3")), AndCondition( SmallerThanEqCondition(Variable("x1", lambda x: x["Lowest Price"]), 75), GreaterThanEqCondition(Variable("x2", lambda x: x["Peak Price"]), 78), SmallerThanEqCondition(Variable("x3", lambda x: x["Lowest Price"]), Variable("x1", lambda x: x["Lowest Price"]))), timedelta(days=1)) runMultiTest("BigMultiPattern", [pattern1, pattern2], createTestFile, leaf_sharing_eval_mechanism_params)
def iiRandom2PatternSearchTest(createTestFile=False): pattern = Pattern( SeqOperator(PrimitiveEventStructure("MSFT", "a"), PrimitiveEventStructure("DRIV", "b"), PrimitiveEventStructure("ORLY", "c"), PrimitiveEventStructure("CBRL", "d")), AndCondition( SimpleCondition(Variable("a", lambda x: x["Peak Price"]), Variable("b", lambda x: x["Peak Price"]), Variable("c", lambda x: x["Peak Price"]), Variable("d", lambda x: x["Peak Price"]), relation_op=lambda x, y, z, w: x < y < z < w)), timedelta(minutes=3)) selectivityMatrix = [[1.0, 0.9457796098355941, 1.0, 1.0], [0.9457796098355941, 1.0, 0.15989723367389616, 1.0], [1.0, 0.15989723367389616, 1.0, 0.9992557393942864], [1.0, 1.0, 0.9992557393942864, 1.0]] arrivalRates = [ 0.016597077244258872, 0.01454418928322895, 0.013917884481558803, 0.012421711899791231 ] pattern.set_statistics( StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES, (selectivityMatrix, arrivalRates)) eval_params = TreeBasedEvaluationMechanismParameters( IterativeImprovementTreePlanBuilderParameters( DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.tree_plan_params. cost_model_type, 20, IterativeImprovementType.CIRCLE_BASED, IterativeImprovementInitType.RANDOM), DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.storage_params) runTest('iiRandom2', [pattern], createTestFile, eval_mechanism_params=eval_params, events=nasdaqEventStream)
def zStreamPatternSearchTest(createTestFile=False): pattern = Pattern( SeqOperator(PrimitiveEventStructure("MSFT", "a"), PrimitiveEventStructure("DRIV", "b"), PrimitiveEventStructure("ORLY", "c"), PrimitiveEventStructure("CBRL", "d")), AndCondition( AndCondition( SmallerThanCondition(Variable("a", lambda x: x["Peak Price"]), Variable("b", lambda x: x["Peak Price"])), AndCondition( SmallerThanCondition( Variable("b", lambda x: x["Peak Price"]), Variable("c", lambda x: x["Peak Price"])))), SmallerThanCondition(Variable("c", lambda x: x["Peak Price"]), Variable("d", lambda x: x["Peak Price"]))), timedelta(minutes=3)) selectivityMatrix = [[1.0, 0.9457796098355941, 1.0, 1.0], [0.9457796098355941, 1.0, 0.15989723367389616, 1.0], [1.0, 0.15989723367389616, 1.0, 0.9992557393942864], [1.0, 1.0, 0.9992557393942864, 1.0]] arrivalRates = [ 0.016597077244258872, 0.01454418928322895, 0.013917884481558803, 0.012421711899791231 ] pattern.set_statistics( StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES, (selectivityMatrix, arrivalRates)) eval_params = TreeBasedEvaluationMechanismParameters( TreePlanBuilderParameters(TreePlanBuilderTypes.ZSTREAM_BUSHY_TREE), DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.storage_params) runTest('zstream1', [pattern], createTestFile, eval_mechanism_params=eval_params, events=nasdaqEventStream)
def zStreamPatternSearchTest(createTestFile=False): pattern = Pattern( SeqOperator([ QItem("MSFT", "a"), QItem("DRIV", "b"), QItem("ORLY", "c"), QItem("CBRL", "d") ]), AndFormula( AndFormula( SmallerThanFormula( IdentifierTerm("a", lambda x: x["Peak Price"]), IdentifierTerm("b", lambda x: x["Peak Price"])), SmallerThanFormula( IdentifierTerm("b", lambda x: x["Peak Price"]), IdentifierTerm("c", lambda x: x["Peak Price"]))), SmallerThanFormula(IdentifierTerm("c", lambda x: x["Peak Price"]), IdentifierTerm("d", lambda x: x["Peak Price"]))), timedelta(minutes=3)) selectivityMatrix = [[1.0, 0.9457796098355941, 1.0, 1.0], [0.9457796098355941, 1.0, 0.15989723367389616, 1.0], [1.0, 0.15989723367389616, 1.0, 0.9992557393942864], [1.0, 1.0, 0.9992557393942864, 1.0]] arrivalRates = [ 0.016597077244258872, 0.01454418928322895, 0.013917884481558803, 0.012421711899791231 ] pattern.set_statistics( StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES, (selectivityMatrix, arrivalRates)) runTest('zstream1', [pattern], createTestFile, eval_mechanism_type=EvaluationMechanismTypes.ZSTREAM_BUSHY_TREE, events=nasdaqEventStream)
def samePatternDifferentTimeStampsFullSharing(createTestFile=False): pattern1 = Pattern( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("GOOG", "c")), AndCondition( GreaterThanEqCondition(Variable("a", lambda x: x["Peak Price"]), 135), SmallerThanCondition(Variable("b", lambda x: x["Peak Price"]), Variable("c", lambda x: x["Peak Price"]))), timedelta(minutes=5)) pattern2 = Pattern( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("GOOG", "c")), AndCondition( GreaterThanEqCondition(Variable("a", lambda x: x["Peak Price"]), 135), SmallerThanCondition(Variable("b", lambda x: x["Peak Price"]), Variable("c", lambda x: x["Peak Price"]))), timedelta(minutes=2)) runMultiTest("DifferentTimeStampFullSharing", [pattern1, pattern2], createTestFile, subtree_sharing_eval_mechanism_params, expected_file_name="DifferentTimeStamp")
def multiplePatternSearchTest(createTestFile=False): amazonInstablePattern = Pattern( SeqOperator( [QItem("AMZN", "x1"), QItem("AMZN", "x2"), QItem("AMZN", "x3")]), AndFormula( SmallerThanEqFormula( IdentifierTerm("x1", lambda x: x["Lowest Price"]), AtomicTerm(75)), AndFormula( GreaterThanEqFormula( IdentifierTerm("x2", lambda x: x["Peak Price"]), AtomicTerm(78)), SmallerThanEqFormula( IdentifierTerm("x3", lambda x: x["Lowest Price"]), IdentifierTerm("x1", lambda x: x["Lowest Price"])))), timedelta(days=1)) googleAscendPattern = Pattern( SeqOperator( [QItem("GOOG", "a"), QItem("GOOG", "b"), QItem("GOOG", "c")]), AndFormula( SmallerThanFormula(IdentifierTerm("a", lambda x: x["Peak Price"]), IdentifierTerm("b", lambda x: x["Peak Price"])), SmallerThanFormula(IdentifierTerm("b", lambda x: x["Peak Price"]), IdentifierTerm("c", lambda x: x["Peak Price"]))), timedelta(minutes=3)) runTest('multiplePatterns', [amazonInstablePattern, googleAscendPattern], createTestFile)
def nestedAscendingTest(createTestFile=False): pattern = Pattern( AndOperator( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b")), SeqOperator(PrimitiveEventStructure("AVID", "c"), PrimitiveEventStructure("BIDU", "d")), AndOperator(PrimitiveEventStructure("GOOG", "e"), PrimitiveEventStructure("AAPL", "f")), PrimitiveEventStructure("GOOG", "g"), SeqOperator(PrimitiveEventStructure("AMZN", "h"), PrimitiveEventStructure("BIDU", "i"))), TrueCondition(), timedelta(minutes=1)) pattern.set_statistics({ StatisticsTypes.ARRIVAL_RATES: [0.11, 0.2, 0.3, 0.4, 0.5, 0.11, 0.5, 0.2, 0.4] }) eval_params = TreeBasedEvaluationMechanismParameters( optimizer_params=OptimizerParameters( opt_type=OptimizerTypes.TRIVIAL_OPTIMIZER, tree_plan_params=TreePlanBuilderParameters( TreePlanBuilderTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE)), storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS. storage_params) runTest("nestedAscending", [pattern], createTestFile, eval_params)
def zstreamOrdNestedComplexStructuralTest(): pattern = Pattern( AndOperator( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("DRIV", "c")), SeqOperator(PrimitiveEventStructure("LOCM", "d"), PrimitiveEventStructure("GOOG", "e")), SeqOperator( PrimitiveEventStructure("AVID", "f"), PrimitiveEventStructure("BIDU", "g"), SeqOperator(PrimitiveEventStructure("ORLY", "h"), PrimitiveEventStructure("CBRL", "i"))), PrimitiveEventStructure("MSFT", "j")), AndCondition( BinaryCondition(Variable("a", lambda x: x["Opening Price"]), Variable("b", lambda x: x["Opening Price"]), relation_op=lambda x, y: x > y), BinaryCondition(Variable("d", lambda x: x["Opening Price"]), Variable("c", lambda x: x["Opening Price"]), relation_op=lambda x, y: x > y), EqCondition(Variable("a", lambda x: x["Date"]), 200802010900), EqCondition(Variable("b", lambda x: x["Date"]), 200802010900), EqCondition(Variable("c", lambda x: x["Date"]), 200802010900), EqCondition(Variable("d", lambda x: x["Date"]), 200802010900)), timedelta(minutes=3)) selectivityMatrix = [ [1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], [0.1, 1.0, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19], [0.2, 0.12, 1.0, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29], [0.3, 0.13, 0.23, 1.0, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39], [0.4, 0.14, 0.24, 0.34, 1.0, 0.45, 0.46, 0.47, 0.48, 0.49], [0.5, 0.15, 0.25, 0.35, 0.45, 1.0, 0.56, 0.57, 0.58, 0.59], [0.6, 0.16, 0.26, 0.36, 0.46, 0.56, 1.0, 0.67, 0.68, 0.69], [0.7, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 1.0, 0.78, 0.79], [0.8, 0.18, 0.28, 0.38, 0.48, 0.58, 0.68, 0.78, 1.0, 0.89], [0.9, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 1.0] ] arrivalRates = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] pattern.set_statistics({ StatisticsTypes.ARRIVAL_RATES: arrivalRates, StatisticsTypes.SELECTIVITY_MATRIX: selectivityMatrix }) eval_params = TreeBasedEvaluationMechanismParameters( optimizer_params=OptimizerParameters( opt_type=OptimizerTypes.TRIVIAL_OPTIMIZER, tree_plan_params=TreePlanBuilderParameters( TreePlanBuilderTypes.ORDERED_ZSTREAM_BUSHY_TREE)), storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS. storage_params) expected_result = ('And', ('And', ('And', 'j', ('Seq', ('Seq', 'f', 'g'), ('Seq', 'h', 'i'))), ('Seq', 'd', 'e')), ('Seq', ('Seq', 'a', 'b'), 'c')) runStructuralTest('zstreamOrdNestedComplexStructuralTest', [pattern], expected_result, eval_mechanism_params=eval_params)
def multipleParentsForInternalNode(createTestFile=False): pattern1 = Pattern( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("GOOG", "c")), AndCondition( GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]), Variable("b", lambda x: x["Opening Price"])), GreaterThanCondition(Variable("c", lambda x: x["Peak Price"]), 500)), timedelta(minutes=5)) pattern2 = Pattern( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("GOOG", "c")), AndCondition( GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]), Variable("b", lambda x: x["Opening Price"])), GreaterThanCondition(Variable("c", lambda x: x["Peak Price"]), 530)), timedelta(minutes=3)) pattern3 = Pattern( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("FB", "e")), AndCondition( GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]), Variable("b", lambda x: x["Opening Price"])), GreaterThanCondition(Variable("e", lambda x: x["Peak Price"]), 520)), timedelta(minutes=5)) pattern4 = Pattern( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("LI", "c")), AndCondition( GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]), Variable("b", lambda x: x["Opening Price"])), GreaterThanCondition(Variable("c", lambda x: x["Peak Price"]), 100)), timedelta(minutes=2)) eval_mechanism_params = TreeBasedEvaluationMechanismParameters( TreePlanBuilderParameters( TreePlanBuilderTypes.TRIVIAL_LEFT_DEEP_TREE, TreeCostModels.INTERMEDIATE_RESULTS_TREE_COST_MODEL), TreeStorageParameters(sort_storage=False, clean_up_interval=10, prioritize_sorting_by_timestamp=True), MultiPatternEvaluationParameters( MultiPatternEvaluationApproaches.SUBTREES_UNION)) runMultiTest("multipleParentsForInternalNode", [pattern1, pattern2, pattern3, pattern4], createTestFile, eval_mechanism_params)
def frequencyPatternSearch3Test(createTestFile=False): pattern = Pattern( SeqOperator([ QItem("AAPL", "a"), QItem("AAPL", "b"), QItem("AAPL", "c"), QItem("LOCM", "d") ]), TrueFormula(), timedelta(minutes=5)) pattern.set_statistics(StatisticsTypes.FREQUENCY_DICT, { "AAPL": 460, "LOCM": 219 }) runTest("frequency3", [pattern], createTestFile, EvaluationMechanismTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE)
def notInTheBeginningShare(createTestFile=False): getattr_func = lambda x: x["Opening Price"] pattern1 = Pattern( SeqOperator(NegationOperator(PrimitiveEventStructure("TYP1", "x")), NegationOperator(PrimitiveEventStructure("TYP2", "y")), NegationOperator(PrimitiveEventStructure("TYP3", "z")), PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("GOOG", "c")), AndCondition( GreaterThanCondition(Variable("a", getattr_func), Variable("b", getattr_func)), SmallerThanCondition(Variable("b", getattr_func), Variable("c", getattr_func))), timedelta(minutes=5)) pattern2 = Pattern( SeqOperator(NegationOperator(PrimitiveEventStructure("TYP1", "x")), NegationOperator(PrimitiveEventStructure("TYP2", "y")), PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b")), GreaterThanCondition(Variable("a", getattr_func), Variable("b", getattr_func)), timedelta(minutes=5)) pattern3 = Pattern( SeqOperator(PrimitiveEventStructure("AAPL", "a"), PrimitiveEventStructure("AMZN", "b"), PrimitiveEventStructure("GOOG", "c")), AndCondition( GreaterThanCondition(Variable("a", getattr_func), Variable("b", getattr_func)), GreaterThanCondition(Variable("c", getattr_func), Variable("b", getattr_func))), timedelta(minutes=5)) eval_mechanism_params = TreeBasedEvaluationMechanismParameters( TreePlanBuilderParameters( TreePlanBuilderTypes.TRIVIAL_LEFT_DEEP_TREE, TreeCostModels.INTERMEDIATE_RESULTS_TREE_COST_MODEL), TreeStorageParameters(sort_storage=False, clean_up_interval=10, prioritize_sorting_by_timestamp=True), MultiPatternEvaluationParameters( MultiPatternEvaluationApproaches.SUBTREES_UNION)) runMultiTest("MultipleNotBeginningShare", [pattern1, pattern2, pattern3], createTestFile, eval_mechanism_params)
def oneArgumentsearchTestKleeneClosure(createTestFile=False): pattern = Pattern( SeqOperator(KleeneClosureOperator(PrimitiveEventStructure("AAPL", "a"), min_size=1, max_size=5)), SimpleCondition(Variable("a", lambda x: x["Opening Price"]), relation_op=lambda x: x > 135), timedelta(minutes=5) ) runTest("oneArgumentKC", [pattern], createTestFile)
def run_twitter_sanity_check(): """ This basic test invokes a simple pattern looking for two tweets that retweeted the same tweet. It might help finding users with common interests. PATTERN SEQ(Tweet a, Tweet b) WHERE a.Retweeted_Status_Id != None AND a.ID != b.ID AND a.Retweeted_Status_Id == b.Retweeted_Status_Id """ get_retweeted_status_function = lambda x: x[ "retweeted_status"] if "retweeted_status" in x else None pattern_retweet = Pattern( SeqOperator( PrimitiveEventStructure(DummyTwitterEventTypeClassifier.TWEET_TYPE, "a"), PrimitiveEventStructure(DummyTwitterEventTypeClassifier.TWEET_TYPE, "b")), AndCondition( NotEqCondition(Variable("a", lambda x: x["id"]), Variable("b", lambda x: x["id"])), SimpleCondition(Variable("a", get_retweeted_status_function), relation_op=lambda x: x is not None), EqCondition(Variable("a", get_retweeted_status_function), Variable("b", get_retweeted_status_function))), timedelta(minutes=30)) cep = CEP([pattern_retweet]) event_stream = TwitterInputStream(['corona']) try: running_time = cep.run( event_stream, FileOutputStream(os.getcwd(), "output.txt", True), TweetDataFormatter()) print("Test twitterSanityCheck result: Succeeded, Time Passed: %s" % (running_time, )) finally: event_stream.close()
def sortedStorageBenchMarkTest(createTestFile=False): pattern = Pattern( AndOperator(PrimitiveEventStructure("DRIV", "a"), PrimitiveEventStructure("MSFT", "b"), PrimitiveEventStructure("CBRL", "c"), PrimitiveEventStructure("MSFT", "m")), AndCondition( GreaterThanEqCondition(Variable("b", lambda x: x["Lowest Price"]), Variable("a", lambda x: x["Lowest Price"])), GreaterThanEqCondition(Variable("m", lambda x: x["Peak Price"]), Variable("c", lambda x: x["Peak Price"])), GreaterThanEqCondition(Variable("m", lambda x: x["Lowest Price"]), Variable("b", lambda x: x["Lowest Price"])), ), timedelta(minutes=360), ) runBenchMark("sortedStorageBenchMark - unsorted storage", [pattern]) storage_params = TreeStorageParameters(sort_storage=True, attributes_priorities={ "a": 122, "b": 200, "c": 104, "m": 139 }) eval_params = TreeBasedEvaluationMechanismParameters( DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.tree_plan_params, storage_params) runBenchMark("sortedStorageBenchMark - sorted storage", [pattern], eval_mechanism_params=eval_params)
def sortedStorageBenchMarkTest(createTestFile=False): pattern = Pattern( AndOperator([ QItem("DRIV", "a"), QItem("MSFT", "b"), QItem("CBRL", "c"), QItem("MSFT", "m") ]), AndFormula( GreaterThanEqFormula( IdentifierTerm("b", lambda x: x["Lowest Price"]), IdentifierTerm("a", lambda x: x["Lowest Price"])), AndFormula( GreaterThanEqFormula( IdentifierTerm("b", lambda x: x["Peak Price"]), IdentifierTerm("c", lambda x: x["Peak Price"])), GreaterThanEqFormula( IdentifierTerm("b", lambda x: x["Lowest Price"]), IdentifierTerm("m", lambda x: x["Lowest Price"])), ), ), timedelta.max, ) runBenchMark("sortedStorageBenchMark - default storage", [pattern]) storage_params = TreeStorageParameters(True, { "a": 122, "b": 139, "c": 104, "m": 139 }) runBenchMark("sortedStorageBenchMark - sorted storage", [pattern], storage_params=storage_params)
def amazonInstablePatternSearchTest(createTestFile=False): """ This pattern is looking for an in-stable day for Amazon. PATTERN SEQ(AmazonStockPriceUpdate x1, AmazonStockPriceUpdate x2, AmazonStockPriceUpdate x3) WHERE x1.LowestPrice <= 75 AND x2.PeakPrice >= 78 AND x3.LowestPrice <= x1.LowestPrice WITHIN 1 day """ amazonInstablePattern = Pattern( SeqOperator( [QItem("AMZN", "x1"), QItem("AMZN", "x2"), QItem("AMZN", "x3")]), AndFormula( SmallerThanEqFormula( IdentifierTerm("x1", lambda x: x["Lowest Price"]), AtomicTerm(75)), AndFormula( GreaterThanEqFormula( IdentifierTerm("x2", lambda x: x["Peak Price"]), AtomicTerm(78)), SmallerThanEqFormula( IdentifierTerm("x3", lambda x: x["Lowest Price"]), IdentifierTerm("x1", lambda x: x["Lowest Price"])))), timedelta(days=1)) runTest('amazonInstable', [amazonInstablePattern], createTestFile)
def msftDrivRacePatternSearchTest(createTestFile=False): """ This pattern is looking for a race between driv and microsoft in ten minutes PATTERN SEQ(MicrosoftStockPriceUpdate a, DrivStockPriceUpdate b, MicrosoftStockPriceUpdate c, DrivStockPriceUpdate d, MicrosoftStockPriceUpdate e) WHERE a.PeakPrice < b.PeakPrice AND b.PeakPrice < c.PeakPrice AND c.PeakPrice < d.PeakPrice AND d.PeakPrice < e.PeakPrice WITHIN 10 minutes """ msftDrivRacePattern = Pattern( SeqOperator([ QItem("MSFT", "a"), QItem("DRIV", "b"), QItem("MSFT", "c"), QItem("DRIV", "d"), QItem("MSFT", "e") ]), AndFormula( AndFormula( SmallerThanFormula( IdentifierTerm("a", lambda x: x["Peak Price"]), IdentifierTerm("b", lambda x: x["Peak Price"])), SmallerThanFormula( IdentifierTerm("b", lambda x: x["Peak Price"]), IdentifierTerm("c", lambda x: x["Peak Price"]))), AndFormula( SmallerThanFormula( IdentifierTerm("c", lambda x: x["Peak Price"]), IdentifierTerm("d", lambda x: x["Peak Price"])), SmallerThanFormula( IdentifierTerm("d", lambda x: x["Peak Price"]), IdentifierTerm("e", lambda x: x["Peak Price"])))), timedelta(minutes=10)) runTest('msftDrivRace', [msftDrivRacePattern], createTestFile)
def nonsensePatternSearchTest(createTestFile=False, eval_mechanism_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS, test_name = "nonsense"): """ This pattern is looking for something that does not make sense. PATTERN AND(AmazonStockPriceUpdate a, AvidStockPriceUpdate b, AppleStockPriceUpdate c) WHERE a.PeakPrice < b.PeakPrice AND b.PeakPrice < c.PeakPrice AND c.PeakPrice < a.PeakPrice """ nonsensePattern = Pattern( AndOperator(PrimitiveEventStructure("AMZN", "a"), PrimitiveEventStructure("AVID", "b"), PrimitiveEventStructure("AAPL", "c")), AndCondition( BinaryCondition(Variable("a", lambda x: x["Peak Price"]), Variable("b", lambda x: x["Peak Price"]), relation_op=lambda x, y: x < y), BinaryCondition(Variable("b", lambda x: x["Peak Price"]), Variable("c", lambda x: x["Peak Price"]), relation_op=lambda x, y: x < y), BinaryCondition(Variable("c", lambda x: x["Peak Price"]), Variable("a", lambda x: x["Peak Price"]), relation_op=lambda x, y: x < y) ), timedelta(minutes=1) ) runTest(test_name, [nonsensePattern], createTestFile, eval_mechanism_params)
def KC_Condition_Failure_03(createTestFile=False): """ KC(And([a, b, c])) """ try: pattern = Pattern( KleeneClosureOperator( AndOperator( PrimitiveEventStructure("GOOG", "a"), PrimitiveEventStructure("GOOG", "b"), PrimitiveEventStructure("GOOG", "c") ), min_size=1, max_size=3 ), AndCondition( SmallerThanCondition(Variable("a", lambda x: x["Peak Price"]), Variable("b", lambda x: x["Peak Price"])), SmallerThanCondition(Variable("b", lambda x: x["Peak Price"]), Variable("c", lambda x: x["Peak Price"])), KCIndexCondition(names={'a', 'b', 'c'}, getattr_func=lambda x: x["Peak Price"], relation_op=lambda x, y: x < 1 + y, offset=-1, first_index=2) ), timedelta(minutes=3) ) except Exception as e: print("Test KC_Condition_Failure_03 Succeeded") return print("Test KC_Condition_Failure_03 Failed")
def msftDrivRacePatternSearchTest(createTestFile=False, eval_mechanism_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS, test_name = "msftDrivRace"): """ This pattern is looking for a race between driv and microsoft in ten minutes PATTERN SEQ(MicrosoftStockPriceUpdate a, DrivStockPriceUpdate b, MicrosoftStockPriceUpdate c, DrivStockPriceUpdate d, MicrosoftStockPriceUpdate e) WHERE a.PeakPrice < b.PeakPrice AND b.PeakPrice < c.PeakPrice AND c.PeakPrice < d.PeakPrice AND d.PeakPrice < e.PeakPrice WITHIN 10 minutes """ msftDrivRacePattern = Pattern( SeqOperator(PrimitiveEventStructure("MSFT", "a"), PrimitiveEventStructure("DRIV", "b"), PrimitiveEventStructure("MSFT", "c"), PrimitiveEventStructure("DRIV", "d"), PrimitiveEventStructure("MSFT", "e")), AndCondition( BinaryCondition(Variable("a", lambda x: x["Peak Price"]), Variable("b", lambda x: x["Peak Price"]), relation_op=lambda x, y: x < y), BinaryCondition(Variable("b", lambda x: x["Peak Price"]), Variable("c", lambda x: x["Peak Price"]), relation_op=lambda x, y: x < y), BinaryCondition(Variable("c", lambda x: x["Peak Price"]), Variable("d", lambda x: x["Peak Price"]), relation_op=lambda x, y: x < y), BinaryCondition(Variable("d", lambda x: x["Peak Price"]), Variable("e", lambda x: x["Peak Price"]), relation_op=lambda x, y: x < y) ), timedelta(minutes=10) ) runTest(test_name, [msftDrivRacePattern], createTestFile, eval_mechanism_params)
def MinMax_2_TestKleeneClosure(createTestFile=False): pattern = Pattern( SeqOperator(KleeneClosureOperator(PrimitiveEventStructure("GOOG", "a"), min_size=4, max_size=5)), SimpleCondition(Variable("a", lambda x: x["Opening Price"]), relation_op=lambda x: x > 0), timedelta(minutes=5) ) runTest("MinMax_2_", [pattern], createTestFile, events=nasdaqEventStreamKC)