Пример #1
0
def frequencyPatternSearch2Test(createTestFile=False):
    pattern = Pattern(
        SeqOperator(PrimitiveEventStructure("LOCM", "a"),
                    PrimitiveEventStructure("AMZN", "b"),
                    PrimitiveEventStructure("AAPL", "c")),
        AndCondition(
            SmallerThanCondition(Variable("a", lambda x: x["Opening Price"]),
                                 Variable("b", lambda x: x["Opening Price"])),
            SmallerThanCondition(Variable("b", lambda x: x["Opening Price"]),
                                 Variable("c", lambda x: x["Opening Price"]))),
        timedelta(minutes=5))
    pattern.set_statistics(
        {StatisticsTypes.ARRIVAL_RATES: [0.0076, 0.0153, 0.0159]})
    eval_params = TreeBasedEvaluationMechanismParameters(
        optimizer_params=StatisticsDeviationAwareOptimizerParameters(
            tree_plan_params=TreePlanBuilderParameters(
                TreePlanBuilderTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE),
            statistics_collector_params=StatisticsCollectorParameters(
                statistics_types=[StatisticsTypes.ARRIVAL_RATES])),
        storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.
        storage_params)

    runTest("frequency2", [pattern],
            createTestFile,
            eval_mechanism_params=eval_params)
Пример #2
0
 def __create_selectivity_matrix_for_nested_operators(
         pattern: Pattern, statistics: Dict):
     """
     This function creates a selectivity matrix that fits the root operator (kind of flattening the selectivity
     of the nested operators, if exists).
     """
     selectivity_matrix = statistics[StatisticsTypes.SELECTIVITY_MATRIX]
     if pattern.count_primitive_events(
             positive_only=True) != len(selectivity_matrix):
         raise Exception("size mismatch")
     nested_selectivity_matrix = []
     primitive_sons_list = []
     # event_names are all the events in this pattern (including those under nested operators)
     event_names = [
         name for name in pattern.positive_structure.get_all_event_names()
     ]
     for arg in pattern.get_top_level_structure_args(positive_only=True):
         # This is a list with size of the number of args, where each entry in the list is the events' name of the
         # primitive events under this arg.
         primitive_sons_list.append(
             [name for name in arg.get_all_event_names()])
     for i, row_entry in enumerate(primitive_sons_list):
         nested_selectivity_matrix.append([])
         for col_entry in primitive_sons_list:
             # Building the new matrix, which is (#args x #args), where each entry is calculated based on the events
             # under the specific args respectively
             nested_selectivity = TreePlanBuilder.__calculate_nested_selectivity(
                 event_names, selectivity_matrix, row_entry, col_entry)
             nested_selectivity_matrix[i].append(nested_selectivity)
     return nested_selectivity_matrix
Пример #3
0
def frequencyPatternSearch6Test(createTestFile=False):
    pattern = Pattern(
        SeqOperator(PrimitiveEventStructure("AAPL", "a1"),
                    PrimitiveEventStructure("LOCM", "b1"),
                    PrimitiveEventStructure("AAPL", "a2"),
                    PrimitiveEventStructure("LOCM", "b2"),
                    PrimitiveEventStructure("AAPL", "a3"),
                    PrimitiveEventStructure("LOCM", "b3")), TrueCondition(),
        timedelta(minutes=7))
    pattern.set_statistics({
        StatisticsTypes.ARRIVAL_RATES:
        [0.0159, 0.0076, 0.0159, 0.0076, 0.0159, 0.0076]
    })  # {"AAPL": 460, "LOCM": 219}
    eval_params = TreeBasedEvaluationMechanismParameters(
        optimizer_params=StatisticsDeviationAwareOptimizerParameters(
            tree_plan_params=TreePlanBuilderParameters(
                TreePlanBuilderTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE),
            statistics_collector_params=StatisticsCollectorParameters(
                statistics_types=[StatisticsTypes.ARRIVAL_RATES])),
        storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.
        storage_params)

    runTest("frequency6", [pattern],
            createTestFile,
            eval_mechanism_params=eval_params)
Пример #4
0
def nestedAscendingStructuralTest():
    pattern = Pattern(
        AndOperator(
            SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                        PrimitiveEventStructure("AMZN", "b")),
            SeqOperator(PrimitiveEventStructure("AVID", "c"),
                        PrimitiveEventStructure("BIDU", "d")),
            AndOperator(PrimitiveEventStructure("GOOG", "e"),
                        PrimitiveEventStructure("AAPL", "f")),
            PrimitiveEventStructure("GOOG", "g"),
            SeqOperator(PrimitiveEventStructure("AMZN", "h"),
                        PrimitiveEventStructure("BIDU", "i"))),
        TrueCondition(), timedelta(minutes=1))
    pattern.set_statistics({
        StatisticsTypes.ARRIVAL_RATES:
        [0.11, 0.2, 0.3, 0.4, 0.5, 0.11, 0.5, 0.2, 0.4]
    })
    eval_params = TreeBasedEvaluationMechanismParameters(
        optimizer_params=OptimizerParameters(
            opt_type=OptimizerTypes.TRIVIAL_OPTIMIZER,
            tree_plan_params=TreePlanBuilderParameters(
                TreePlanBuilderTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE)),
        storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.
        storage_params)
    expected_result = ('And', ('And', ('And', ('And', 'g', ('Seq', 'a', 'b')),
                                       ('Seq', 'c', 'd')), ('And', 'e', 'f')),
                       ('Seq', 'h', 'i'))
    runStructuralTest('nestedAscendingStructuralTest', [pattern],
                      expected_result,
                      eval_mechanism_params=eval_params)
Пример #5
0
def frequencyTailoredPatternSearchTest(createTestFile=False):
    pattern = Pattern(
        SeqOperator(PrimitiveEventStructure("DRIV", "a"),
                    PrimitiveEventStructure("MSFT", "b"),
                    PrimitiveEventStructure("CBRL", "c")),
        AndCondition(
            GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]),
                                 Variable("b", lambda x: x["Opening Price"])),
            GreaterThanCondition(Variable("b", lambda x: x["Opening Price"]),
                                 Variable("c", lambda x: x["Opening Price"]))),
        timedelta(minutes=360))
    # frequencyDict = {"MSFT": 256, "DRIV": 257, "CBRL": 1}
    pattern.set_statistics({
        StatisticsTypes.ARRIVAL_RATES:
        [0.01454418928322895, 0.016597077244258872, 0.012421711899791231]
    })
    eval_params = TreeBasedEvaluationMechanismParameters(
        optimizer_params=StatisticsDeviationAwareOptimizerParameters(
            tree_plan_params=TreePlanBuilderParameters(
                TreePlanBuilderTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE),
            statistics_collector_params=StatisticsCollectorParameters(
                statistics_types=[StatisticsTypes.ARRIVAL_RATES])),
        storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.
        storage_params)

    runTest('frequencyTailored1', [pattern],
            createTestFile,
            eval_mechanism_params=eval_params,
            events=nasdaqEventStream)
def andAndPatternTransformationTest():
    pattern = Pattern(
        AndOperator(
            PrimitiveEventStructure("AAPL", "a"),
            NegationOperator(PrimitiveEventStructure("AMZN", "z")),
            PrimitiveEventStructure("GOOG", "g"),
            AndOperator(
                PrimitiveEventStructure("AMZN", "zz"),
                NegationOperator(PrimitiveEventStructure("GOOG", "gg")),
                AndOperator(
                    PrimitiveEventStructure("AMZN", "zzz"),
                    NegationOperator(PrimitiveEventStructure("GOOG",
                                                             "ggg"))))),
        TrueCondition(), timedelta(minutes=5))
    expected_pattern = Pattern(
        AndOperator(PrimitiveEventStructure("AAPL", "a"),
                    NegationOperator(PrimitiveEventStructure("AMZN", "z")),
                    PrimitiveEventStructure("GOOG", "g"),
                    PrimitiveEventStructure("AMZN", "zz"),
                    NegationOperator(PrimitiveEventStructure("GOOG", "gg")),
                    PrimitiveEventStructure("AMZN", "zzz"),
                    NegationOperator(PrimitiveEventStructure("GOOG", "ggg"))),
        TrueCondition(), timedelta(minutes=5))
    pattern_transformation = PatternPreprocessor(
        PatternPreprocessingParameters(TESTING_PREPROCESSING_RULES_ORDER))
    transformed_patterns = pattern_transformation.transform_patterns([pattern])
    assert len(
        transformed_patterns) == 1, "Test andAndPatternTransformation Failed"
    assert expected_pattern.full_structure == transformed_patterns[0].full_structure, \
        "Test andAndPatternTransformation Failed"
Пример #7
0
def onePatternIncludesOther(createTestFile=False):
    pattern1 = Pattern(
        SeqOperator(PrimitiveEventStructure("GOOG", "a"),
                    PrimitiveEventStructure("GOOG", "b"),
                    PrimitiveEventStructure("AAPL", "c")),
        AndCondition(
            SmallerThanCondition(Variable("a", lambda x: x["Peak Price"]),
                                 Variable("b", lambda x: x["Peak Price"])),
            GreaterThanCondition(Variable("b", lambda x: x["Peak Price"]),
                                 Variable("c", lambda x: x["Peak Price"]))),
        timedelta(minutes=3))

    pattern2 = Pattern(
        SeqOperator(PrimitiveEventStructure("GOOG", "a"),
                    PrimitiveEventStructure("GOOG", "b")),
        SmallerThanCondition(Variable("a", lambda x: x["Peak Price"]),
                             Variable("b", lambda x: x["Peak Price"])),
        timedelta(minutes=3))

    eval_mechanism_params = TreeBasedEvaluationMechanismParameters(
        TreePlanBuilderParameters(
            TreePlanBuilderTypes.TRIVIAL_LEFT_DEEP_TREE,
            TreeCostModels.INTERMEDIATE_RESULTS_TREE_COST_MODEL),
        TreeStorageParameters(sort_storage=False,
                              clean_up_interval=10,
                              prioritize_sorting_by_timestamp=True),
        MultiPatternEvaluationParameters(
            MultiPatternEvaluationApproaches.SUBTREES_UNION))
    runMultiTest("onePatternIncludesOther", [pattern1, pattern2],
                 createTestFile, eval_mechanism_params)
Пример #8
0
def severalPatternShareSubtreeFullSharing(createTestFile=False):
    pattern = Pattern(
        SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b"),
                    PrimitiveEventStructure("GOOG", "c"),
                    NegationOperator(PrimitiveEventStructure("TYP1", "x")),
                    NegationOperator(PrimitiveEventStructure("TYP2", "y")),
                    NegationOperator(PrimitiveEventStructure("TYP3", "z"))),
        AndCondition(
            GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]),
                                 Variable("b", lambda x: x["Opening Price"])),
            SmallerThanCondition(Variable("b", lambda x: x["Opening Price"]),
                                 Variable("c", lambda x: x["Opening Price"]))),
        timedelta(minutes=5))

    pattern2 = Pattern(
        SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b"),
                    PrimitiveEventStructure("TYP1", "x")),
        GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]),
                             Variable("b", lambda x: x["Opening Price"])),
        timedelta(minutes=5))

    pattern3 = Pattern(
        SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b")),
        GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]),
                             Variable("b", lambda x: x["Opening Price"])),
        timedelta(minutes=5))

    runMultiTest("threeSharingSubtreesFullSharing",
                 [pattern, pattern2, pattern3],
                 createTestFile,
                 subtree_sharing_eval_mechanism_params,
                 expected_file_name="threeSharingSubtrees")
def topmostOrPatternTransformationTest():
    pattern = Pattern(
        SeqOperator(
            PrimitiveEventStructure("AAPL", "a"),
            OrOperator(PrimitiveEventStructure("AMZN", "z"),
                       PrimitiveEventStructure("GOOG", "g")),
            PrimitiveEventStructure("MSFT", "m")), TrueCondition(),
        timedelta(minutes=5))
    expected_pattern = Pattern(
        OrOperator(
            SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                        PrimitiveEventStructure("AMZN", "z"),
                        PrimitiveEventStructure("MSFT", "m")),
            SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                        PrimitiveEventStructure("GOOG", "g"),
                        PrimitiveEventStructure("MSFT", "m"))),
        TrueCondition(), timedelta(minutes=5))
    rules_directive = [PatternTransformationRules.TOPMOST_OR_PATTERN]
    params = PatternPreprocessingParameters()
    params.transformation_rules = rules_directive
    pattern_transformation = PatternPreprocessor(params)
    transformed_patterns = pattern_transformation.transform_patterns([pattern])
    assert len(transformed_patterns
               ) == 1, "Test topmostOrPatternTransformation Failed"
    assert transformed_patterns[0].full_structure == expected_pattern.full_structure, \
        "Test topmostOrPatternTransformation Failed"
Пример #10
0
def distinctPatterns(createTestFile=False):
    pattern1 = Pattern(
        SeqOperator(PrimitiveEventStructure("GOOG", "a"),
                    PrimitiveEventStructure("GOOG", "b"),
                    PrimitiveEventStructure("GOOG", "c")),
        AndCondition(
            SmallerThanCondition(Variable("a", lambda x: x["Peak Price"]),
                                 Variable("b", lambda x: x["Peak Price"])),
            SmallerThanCondition(Variable("b", lambda x: x["Peak Price"]),
                                 Variable("c", lambda x: x["Peak Price"]))),
        timedelta(minutes=3))
    pattern2 = Pattern(
        SeqOperator(PrimitiveEventStructure("AMZN", "x1"),
                    PrimitiveEventStructure("AMZN", "x2"),
                    PrimitiveEventStructure("AMZN", "x3")),
        AndCondition(
            SmallerThanEqCondition(Variable("x1", lambda x: x["Lowest Price"]),
                                   75),
            GreaterThanEqCondition(Variable("x2", lambda x: x["Peak Price"]),
                                   78),
            SmallerThanEqCondition(Variable("x3", lambda x: x["Lowest Price"]),
                                   Variable("x1",
                                            lambda x: x["Lowest Price"]))),
        timedelta(days=1))

    runMultiTest("BigMultiPattern", [pattern1, pattern2], createTestFile,
                 leaf_sharing_eval_mechanism_params)
Пример #11
0
def iiRandom2PatternSearchTest(createTestFile=False):
    pattern = Pattern(
        SeqOperator(PrimitiveEventStructure("MSFT", "a"),
                    PrimitiveEventStructure("DRIV", "b"),
                    PrimitiveEventStructure("ORLY", "c"),
                    PrimitiveEventStructure("CBRL", "d")),
        AndCondition(
            SimpleCondition(Variable("a", lambda x: x["Peak Price"]),
                            Variable("b", lambda x: x["Peak Price"]),
                            Variable("c", lambda x: x["Peak Price"]),
                            Variable("d", lambda x: x["Peak Price"]),
                            relation_op=lambda x, y, z, w: x < y < z < w)),
        timedelta(minutes=3))
    selectivityMatrix = [[1.0, 0.9457796098355941, 1.0, 1.0],
                         [0.9457796098355941, 1.0, 0.15989723367389616, 1.0],
                         [1.0, 0.15989723367389616, 1.0, 0.9992557393942864],
                         [1.0, 1.0, 0.9992557393942864, 1.0]]
    arrivalRates = [
        0.016597077244258872, 0.01454418928322895, 0.013917884481558803,
        0.012421711899791231
    ]
    pattern.set_statistics(
        StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES,
        (selectivityMatrix, arrivalRates))
    eval_params = TreeBasedEvaluationMechanismParameters(
        IterativeImprovementTreePlanBuilderParameters(
            DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.tree_plan_params.
            cost_model_type, 20, IterativeImprovementType.CIRCLE_BASED,
            IterativeImprovementInitType.RANDOM),
        DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.storage_params)

    runTest('iiRandom2', [pattern],
            createTestFile,
            eval_mechanism_params=eval_params,
            events=nasdaqEventStream)
Пример #12
0
def zStreamPatternSearchTest(createTestFile=False):
    pattern = Pattern(
        SeqOperator(PrimitiveEventStructure("MSFT", "a"),
                    PrimitiveEventStructure("DRIV", "b"),
                    PrimitiveEventStructure("ORLY", "c"),
                    PrimitiveEventStructure("CBRL", "d")),
        AndCondition(
            AndCondition(
                SmallerThanCondition(Variable("a", lambda x: x["Peak Price"]),
                                     Variable("b", lambda x: x["Peak Price"])),
                AndCondition(
                    SmallerThanCondition(
                        Variable("b", lambda x: x["Peak Price"]),
                        Variable("c", lambda x: x["Peak Price"])))),
            SmallerThanCondition(Variable("c", lambda x: x["Peak Price"]),
                                 Variable("d", lambda x: x["Peak Price"]))),
        timedelta(minutes=3))
    selectivityMatrix = [[1.0, 0.9457796098355941, 1.0, 1.0],
                         [0.9457796098355941, 1.0, 0.15989723367389616, 1.0],
                         [1.0, 0.15989723367389616, 1.0, 0.9992557393942864],
                         [1.0, 1.0, 0.9992557393942864, 1.0]]
    arrivalRates = [
        0.016597077244258872, 0.01454418928322895, 0.013917884481558803,
        0.012421711899791231
    ]
    pattern.set_statistics(
        StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES,
        (selectivityMatrix, arrivalRates))
    eval_params = TreeBasedEvaluationMechanismParameters(
        TreePlanBuilderParameters(TreePlanBuilderTypes.ZSTREAM_BUSHY_TREE),
        DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.storage_params)
    runTest('zstream1', [pattern],
            createTestFile,
            eval_mechanism_params=eval_params,
            events=nasdaqEventStream)
Пример #13
0
def zStreamPatternSearchTest(createTestFile=False):
    pattern = Pattern(
        SeqOperator([
            QItem("MSFT", "a"),
            QItem("DRIV", "b"),
            QItem("ORLY", "c"),
            QItem("CBRL", "d")
        ]),
        AndFormula(
            AndFormula(
                SmallerThanFormula(
                    IdentifierTerm("a", lambda x: x["Peak Price"]),
                    IdentifierTerm("b", lambda x: x["Peak Price"])),
                SmallerThanFormula(
                    IdentifierTerm("b", lambda x: x["Peak Price"]),
                    IdentifierTerm("c", lambda x: x["Peak Price"]))),
            SmallerThanFormula(IdentifierTerm("c", lambda x: x["Peak Price"]),
                               IdentifierTerm("d",
                                              lambda x: x["Peak Price"]))),
        timedelta(minutes=3))
    selectivityMatrix = [[1.0, 0.9457796098355941, 1.0, 1.0],
                         [0.9457796098355941, 1.0, 0.15989723367389616, 1.0],
                         [1.0, 0.15989723367389616, 1.0, 0.9992557393942864],
                         [1.0, 1.0, 0.9992557393942864, 1.0]]
    arrivalRates = [
        0.016597077244258872, 0.01454418928322895, 0.013917884481558803,
        0.012421711899791231
    ]
    pattern.set_statistics(
        StatisticsTypes.SELECTIVITY_MATRIX_AND_ARRIVAL_RATES,
        (selectivityMatrix, arrivalRates))
    runTest('zstream1', [pattern],
            createTestFile,
            eval_mechanism_type=EvaluationMechanismTypes.ZSTREAM_BUSHY_TREE,
            events=nasdaqEventStream)
Пример #14
0
def samePatternDifferentTimeStampsFullSharing(createTestFile=False):
    pattern1 = Pattern(
        SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b"),
                    PrimitiveEventStructure("GOOG", "c")),
        AndCondition(
            GreaterThanEqCondition(Variable("a", lambda x: x["Peak Price"]),
                                   135),
            SmallerThanCondition(Variable("b", lambda x: x["Peak Price"]),
                                 Variable("c", lambda x: x["Peak Price"]))),
        timedelta(minutes=5))
    pattern2 = Pattern(
        SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b"),
                    PrimitiveEventStructure("GOOG", "c")),
        AndCondition(
            GreaterThanEqCondition(Variable("a", lambda x: x["Peak Price"]),
                                   135),
            SmallerThanCondition(Variable("b", lambda x: x["Peak Price"]),
                                 Variable("c", lambda x: x["Peak Price"]))),
        timedelta(minutes=2))

    runMultiTest("DifferentTimeStampFullSharing", [pattern1, pattern2],
                 createTestFile,
                 subtree_sharing_eval_mechanism_params,
                 expected_file_name="DifferentTimeStamp")
Пример #15
0
def multiplePatternSearchTest(createTestFile=False):
    amazonInstablePattern = Pattern(
        SeqOperator(
            [QItem("AMZN", "x1"),
             QItem("AMZN", "x2"),
             QItem("AMZN", "x3")]),
        AndFormula(
            SmallerThanEqFormula(
                IdentifierTerm("x1", lambda x: x["Lowest Price"]),
                AtomicTerm(75)),
            AndFormula(
                GreaterThanEqFormula(
                    IdentifierTerm("x2", lambda x: x["Peak Price"]),
                    AtomicTerm(78)),
                SmallerThanEqFormula(
                    IdentifierTerm("x3", lambda x: x["Lowest Price"]),
                    IdentifierTerm("x1", lambda x: x["Lowest Price"])))),
        timedelta(days=1))
    googleAscendPattern = Pattern(
        SeqOperator(
            [QItem("GOOG", "a"),
             QItem("GOOG", "b"),
             QItem("GOOG", "c")]),
        AndFormula(
            SmallerThanFormula(IdentifierTerm("a", lambda x: x["Peak Price"]),
                               IdentifierTerm("b", lambda x: x["Peak Price"])),
            SmallerThanFormula(IdentifierTerm("b", lambda x: x["Peak Price"]),
                               IdentifierTerm("c",
                                              lambda x: x["Peak Price"]))),
        timedelta(minutes=3))
    runTest('multiplePatterns', [amazonInstablePattern, googleAscendPattern],
            createTestFile)
Пример #16
0
def nestedAscendingTest(createTestFile=False):
    pattern = Pattern(
        AndOperator(
            SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                        PrimitiveEventStructure("AMZN", "b")),
            SeqOperator(PrimitiveEventStructure("AVID", "c"),
                        PrimitiveEventStructure("BIDU", "d")),
            AndOperator(PrimitiveEventStructure("GOOG", "e"),
                        PrimitiveEventStructure("AAPL", "f")),
            PrimitiveEventStructure("GOOG", "g"),
            SeqOperator(PrimitiveEventStructure("AMZN", "h"),
                        PrimitiveEventStructure("BIDU", "i"))),
        TrueCondition(), timedelta(minutes=1))
    pattern.set_statistics({
        StatisticsTypes.ARRIVAL_RATES:
        [0.11, 0.2, 0.3, 0.4, 0.5, 0.11, 0.5, 0.2, 0.4]
    })
    eval_params = TreeBasedEvaluationMechanismParameters(
        optimizer_params=OptimizerParameters(
            opt_type=OptimizerTypes.TRIVIAL_OPTIMIZER,
            tree_plan_params=TreePlanBuilderParameters(
                TreePlanBuilderTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE)),
        storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.
        storage_params)
    runTest("nestedAscending", [pattern], createTestFile, eval_params)
Пример #17
0
def zstreamOrdNestedComplexStructuralTest():
    pattern = Pattern(
        AndOperator(
            SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                        PrimitiveEventStructure("AMZN", "b"),
                        PrimitiveEventStructure("DRIV", "c")),
            SeqOperator(PrimitiveEventStructure("LOCM", "d"),
                        PrimitiveEventStructure("GOOG", "e")),
            SeqOperator(
                PrimitiveEventStructure("AVID", "f"),
                PrimitiveEventStructure("BIDU", "g"),
                SeqOperator(PrimitiveEventStructure("ORLY", "h"),
                            PrimitiveEventStructure("CBRL", "i"))),
            PrimitiveEventStructure("MSFT", "j")),
        AndCondition(
            BinaryCondition(Variable("a", lambda x: x["Opening Price"]),
                            Variable("b", lambda x: x["Opening Price"]),
                            relation_op=lambda x, y: x > y),
            BinaryCondition(Variable("d", lambda x: x["Opening Price"]),
                            Variable("c", lambda x: x["Opening Price"]),
                            relation_op=lambda x, y: x > y),
            EqCondition(Variable("a", lambda x: x["Date"]), 200802010900),
            EqCondition(Variable("b", lambda x: x["Date"]), 200802010900),
            EqCondition(Variable("c", lambda x: x["Date"]), 200802010900),
            EqCondition(Variable("d", lambda x: x["Date"]), 200802010900)),
        timedelta(minutes=3))
    selectivityMatrix = [
        [1.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
        [0.1, 1.0, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17, 0.18, 0.19],
        [0.2, 0.12, 1.0, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28, 0.29],
        [0.3, 0.13, 0.23, 1.0, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39],
        [0.4, 0.14, 0.24, 0.34, 1.0, 0.45, 0.46, 0.47, 0.48, 0.49],
        [0.5, 0.15, 0.25, 0.35, 0.45, 1.0, 0.56, 0.57, 0.58, 0.59],
        [0.6, 0.16, 0.26, 0.36, 0.46, 0.56, 1.0, 0.67, 0.68, 0.69],
        [0.7, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 1.0, 0.78, 0.79],
        [0.8, 0.18, 0.28, 0.38, 0.48, 0.58, 0.68, 0.78, 1.0, 0.89],
        [0.9, 0.19, 0.29, 0.39, 0.49, 0.59, 0.69, 0.79, 0.89, 1.0]
    ]
    arrivalRates = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
    pattern.set_statistics({
        StatisticsTypes.ARRIVAL_RATES:
        arrivalRates,
        StatisticsTypes.SELECTIVITY_MATRIX:
        selectivityMatrix
    })
    eval_params = TreeBasedEvaluationMechanismParameters(
        optimizer_params=OptimizerParameters(
            opt_type=OptimizerTypes.TRIVIAL_OPTIMIZER,
            tree_plan_params=TreePlanBuilderParameters(
                TreePlanBuilderTypes.ORDERED_ZSTREAM_BUSHY_TREE)),
        storage_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.
        storage_params)
    expected_result = ('And', ('And', ('And', 'j', ('Seq', ('Seq', 'f', 'g'),
                                                    ('Seq', 'h', 'i'))),
                               ('Seq', 'd', 'e')), ('Seq', ('Seq', 'a', 'b'),
                                                    'c'))
    runStructuralTest('zstreamOrdNestedComplexStructuralTest', [pattern],
                      expected_result,
                      eval_mechanism_params=eval_params)
Пример #18
0
def multipleParentsForInternalNode(createTestFile=False):
    pattern1 = Pattern(
        SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b"),
                    PrimitiveEventStructure("GOOG", "c")),
        AndCondition(
            GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]),
                                 Variable("b", lambda x: x["Opening Price"])),
            GreaterThanCondition(Variable("c", lambda x: x["Peak Price"]),
                                 500)), timedelta(minutes=5))

    pattern2 = Pattern(
        SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b"),
                    PrimitiveEventStructure("GOOG", "c")),
        AndCondition(
            GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]),
                                 Variable("b", lambda x: x["Opening Price"])),
            GreaterThanCondition(Variable("c", lambda x: x["Peak Price"]),
                                 530)), timedelta(minutes=3))

    pattern3 = Pattern(
        SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b"),
                    PrimitiveEventStructure("FB", "e")),
        AndCondition(
            GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]),
                                 Variable("b", lambda x: x["Opening Price"])),
            GreaterThanCondition(Variable("e", lambda x: x["Peak Price"]),
                                 520)), timedelta(minutes=5))

    pattern4 = Pattern(
        SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b"),
                    PrimitiveEventStructure("LI", "c")),
        AndCondition(
            GreaterThanCondition(Variable("a", lambda x: x["Opening Price"]),
                                 Variable("b", lambda x: x["Opening Price"])),
            GreaterThanCondition(Variable("c", lambda x: x["Peak Price"]),
                                 100)), timedelta(minutes=2))

    eval_mechanism_params = TreeBasedEvaluationMechanismParameters(
        TreePlanBuilderParameters(
            TreePlanBuilderTypes.TRIVIAL_LEFT_DEEP_TREE,
            TreeCostModels.INTERMEDIATE_RESULTS_TREE_COST_MODEL),
        TreeStorageParameters(sort_storage=False,
                              clean_up_interval=10,
                              prioritize_sorting_by_timestamp=True),
        MultiPatternEvaluationParameters(
            MultiPatternEvaluationApproaches.SUBTREES_UNION))

    runMultiTest("multipleParentsForInternalNode",
                 [pattern1, pattern2, pattern3, pattern4], createTestFile,
                 eval_mechanism_params)
Пример #19
0
def frequencyPatternSearch3Test(createTestFile=False):
    pattern = Pattern(
        SeqOperator([
            QItem("AAPL", "a"),
            QItem("AAPL", "b"),
            QItem("AAPL", "c"),
            QItem("LOCM", "d")
        ]), TrueFormula(), timedelta(minutes=5))
    pattern.set_statistics(StatisticsTypes.FREQUENCY_DICT, {
        "AAPL": 460,
        "LOCM": 219
    })
    runTest("frequency3", [pattern], createTestFile,
            EvaluationMechanismTypes.SORT_BY_FREQUENCY_LEFT_DEEP_TREE)
Пример #20
0
def notInTheBeginningShare(createTestFile=False):
    getattr_func = lambda x: x["Opening Price"]

    pattern1 = Pattern(
        SeqOperator(NegationOperator(PrimitiveEventStructure("TYP1", "x")),
                    NegationOperator(PrimitiveEventStructure("TYP2", "y")),
                    NegationOperator(PrimitiveEventStructure("TYP3", "z")),
                    PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b"),
                    PrimitiveEventStructure("GOOG", "c")),
        AndCondition(
            GreaterThanCondition(Variable("a", getattr_func),
                                 Variable("b", getattr_func)),
            SmallerThanCondition(Variable("b", getattr_func),
                                 Variable("c", getattr_func))),
        timedelta(minutes=5))

    pattern2 = Pattern(
        SeqOperator(NegationOperator(PrimitiveEventStructure("TYP1", "x")),
                    NegationOperator(PrimitiveEventStructure("TYP2", "y")),
                    PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b")),
        GreaterThanCondition(Variable("a", getattr_func),
                             Variable("b", getattr_func)),
        timedelta(minutes=5))

    pattern3 = Pattern(
        SeqOperator(PrimitiveEventStructure("AAPL", "a"),
                    PrimitiveEventStructure("AMZN", "b"),
                    PrimitiveEventStructure("GOOG", "c")),
        AndCondition(
            GreaterThanCondition(Variable("a", getattr_func),
                                 Variable("b", getattr_func)),
            GreaterThanCondition(Variable("c", getattr_func),
                                 Variable("b", getattr_func))),
        timedelta(minutes=5))

    eval_mechanism_params = TreeBasedEvaluationMechanismParameters(
        TreePlanBuilderParameters(
            TreePlanBuilderTypes.TRIVIAL_LEFT_DEEP_TREE,
            TreeCostModels.INTERMEDIATE_RESULTS_TREE_COST_MODEL),
        TreeStorageParameters(sort_storage=False,
                              clean_up_interval=10,
                              prioritize_sorting_by_timestamp=True),
        MultiPatternEvaluationParameters(
            MultiPatternEvaluationApproaches.SUBTREES_UNION))

    runMultiTest("MultipleNotBeginningShare", [pattern1, pattern2, pattern3],
                 createTestFile, eval_mechanism_params)
Пример #21
0
def oneArgumentsearchTestKleeneClosure(createTestFile=False):
    pattern = Pattern(
        SeqOperator(KleeneClosureOperator(PrimitiveEventStructure("AAPL", "a"), min_size=1, max_size=5)),
        SimpleCondition(Variable("a", lambda x: x["Opening Price"]), relation_op=lambda x: x > 135),
        timedelta(minutes=5)
    )
    runTest("oneArgumentKC", [pattern], createTestFile)
Пример #22
0
def run_twitter_sanity_check():
    """
    This basic test invokes a simple pattern looking for two tweets that retweeted the same tweet.
    It might help finding users with common interests.
    PATTERN SEQ(Tweet a, Tweet b)
    WHERE a.Retweeted_Status_Id != None AND a.ID != b.ID AND a.Retweeted_Status_Id == b.Retweeted_Status_Id
    """
    get_retweeted_status_function = lambda x: x[
        "retweeted_status"] if "retweeted_status" in x else None
    pattern_retweet = Pattern(
        SeqOperator(
            PrimitiveEventStructure(DummyTwitterEventTypeClassifier.TWEET_TYPE,
                                    "a"),
            PrimitiveEventStructure(DummyTwitterEventTypeClassifier.TWEET_TYPE,
                                    "b")),
        AndCondition(
            NotEqCondition(Variable("a", lambda x: x["id"]),
                           Variable("b", lambda x: x["id"])),
            SimpleCondition(Variable("a", get_retweeted_status_function),
                            relation_op=lambda x: x is not None),
            EqCondition(Variable("a", get_retweeted_status_function),
                        Variable("b", get_retweeted_status_function))),
        timedelta(minutes=30))

    cep = CEP([pattern_retweet])
    event_stream = TwitterInputStream(['corona'])
    try:
        running_time = cep.run(
            event_stream, FileOutputStream(os.getcwd(), "output.txt", True),
            TweetDataFormatter())
        print("Test twitterSanityCheck result: Succeeded, Time Passed: %s" %
              (running_time, ))
    finally:
        event_stream.close()
Пример #23
0
def sortedStorageBenchMarkTest(createTestFile=False):
    pattern = Pattern(
        AndOperator(PrimitiveEventStructure("DRIV", "a"),
                    PrimitiveEventStructure("MSFT", "b"),
                    PrimitiveEventStructure("CBRL", "c"),
                    PrimitiveEventStructure("MSFT", "m")),
        AndCondition(
            GreaterThanEqCondition(Variable("b", lambda x: x["Lowest Price"]),
                                   Variable("a", lambda x: x["Lowest Price"])),
            GreaterThanEqCondition(Variable("m", lambda x: x["Peak Price"]),
                                   Variable("c", lambda x: x["Peak Price"])),
            GreaterThanEqCondition(Variable("m", lambda x: x["Lowest Price"]),
                                   Variable("b", lambda x: x["Lowest Price"])),
        ),
        timedelta(minutes=360),
    )
    runBenchMark("sortedStorageBenchMark - unsorted storage", [pattern])
    storage_params = TreeStorageParameters(sort_storage=True,
                                           attributes_priorities={
                                               "a": 122,
                                               "b": 200,
                                               "c": 104,
                                               "m": 139
                                           })
    eval_params = TreeBasedEvaluationMechanismParameters(
        DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS.tree_plan_params,
        storage_params)
    runBenchMark("sortedStorageBenchMark - sorted storage", [pattern],
                 eval_mechanism_params=eval_params)
Пример #24
0
def sortedStorageBenchMarkTest(createTestFile=False):
    pattern = Pattern(
        AndOperator([
            QItem("DRIV", "a"),
            QItem("MSFT", "b"),
            QItem("CBRL", "c"),
            QItem("MSFT", "m")
        ]),
        AndFormula(
            GreaterThanEqFormula(
                IdentifierTerm("b", lambda x: x["Lowest Price"]),
                IdentifierTerm("a", lambda x: x["Lowest Price"])),
            AndFormula(
                GreaterThanEqFormula(
                    IdentifierTerm("b", lambda x: x["Peak Price"]),
                    IdentifierTerm("c", lambda x: x["Peak Price"])),
                GreaterThanEqFormula(
                    IdentifierTerm("b", lambda x: x["Lowest Price"]),
                    IdentifierTerm("m", lambda x: x["Lowest Price"])),
            ),
        ),
        timedelta.max,
    )
    runBenchMark("sortedStorageBenchMark - default storage", [pattern])

    storage_params = TreeStorageParameters(True, {
        "a": 122,
        "b": 139,
        "c": 104,
        "m": 139
    })
    runBenchMark("sortedStorageBenchMark - sorted storage", [pattern],
                 storage_params=storage_params)
Пример #25
0
def amazonInstablePatternSearchTest(createTestFile=False):
    """
    This pattern is looking for an in-stable day for Amazon.
    PATTERN SEQ(AmazonStockPriceUpdate x1, AmazonStockPriceUpdate x2, AmazonStockPriceUpdate x3)
    WHERE x1.LowestPrice <= 75 AND x2.PeakPrice >= 78 AND x3.LowestPrice <= x1.LowestPrice
    WITHIN 1 day
    """
    amazonInstablePattern = Pattern(
        SeqOperator(
            [QItem("AMZN", "x1"),
             QItem("AMZN", "x2"),
             QItem("AMZN", "x3")]),
        AndFormula(
            SmallerThanEqFormula(
                IdentifierTerm("x1", lambda x: x["Lowest Price"]),
                AtomicTerm(75)),
            AndFormula(
                GreaterThanEqFormula(
                    IdentifierTerm("x2", lambda x: x["Peak Price"]),
                    AtomicTerm(78)),
                SmallerThanEqFormula(
                    IdentifierTerm("x3", lambda x: x["Lowest Price"]),
                    IdentifierTerm("x1", lambda x: x["Lowest Price"])))),
        timedelta(days=1))
    runTest('amazonInstable', [amazonInstablePattern], createTestFile)
Пример #26
0
def msftDrivRacePatternSearchTest(createTestFile=False):
    """
    This pattern is looking for a race between driv and microsoft in ten minutes
    PATTERN SEQ(MicrosoftStockPriceUpdate a, DrivStockPriceUpdate b, MicrosoftStockPriceUpdate c, DrivStockPriceUpdate d, MicrosoftStockPriceUpdate e)
    WHERE a.PeakPrice < b.PeakPrice AND b.PeakPrice < c.PeakPrice AND c.PeakPrice < d.PeakPrice AND d.PeakPrice < e.PeakPrice
    WITHIN 10 minutes
    """
    msftDrivRacePattern = Pattern(
        SeqOperator([
            QItem("MSFT", "a"),
            QItem("DRIV", "b"),
            QItem("MSFT", "c"),
            QItem("DRIV", "d"),
            QItem("MSFT", "e")
        ]),
        AndFormula(
            AndFormula(
                SmallerThanFormula(
                    IdentifierTerm("a", lambda x: x["Peak Price"]),
                    IdentifierTerm("b", lambda x: x["Peak Price"])),
                SmallerThanFormula(
                    IdentifierTerm("b", lambda x: x["Peak Price"]),
                    IdentifierTerm("c", lambda x: x["Peak Price"]))),
            AndFormula(
                SmallerThanFormula(
                    IdentifierTerm("c", lambda x: x["Peak Price"]),
                    IdentifierTerm("d", lambda x: x["Peak Price"])),
                SmallerThanFormula(
                    IdentifierTerm("d", lambda x: x["Peak Price"]),
                    IdentifierTerm("e", lambda x: x["Peak Price"])))),
        timedelta(minutes=10))
    runTest('msftDrivRace', [msftDrivRacePattern], createTestFile)
Пример #27
0
def nonsensePatternSearchTest(createTestFile=False,
                              eval_mechanism_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS,
                              test_name = "nonsense"):
    """
    This pattern is looking for something that does not make sense.
    PATTERN AND(AmazonStockPriceUpdate a, AvidStockPriceUpdate b, AppleStockPriceUpdate c)
    WHERE a.PeakPrice < b.PeakPrice AND b.PeakPrice < c.PeakPrice AND c.PeakPrice < a.PeakPrice
    """
    nonsensePattern = Pattern(
        AndOperator(PrimitiveEventStructure("AMZN", "a"), PrimitiveEventStructure("AVID", "b"),
                    PrimitiveEventStructure("AAPL", "c")),
        AndCondition(
            BinaryCondition(Variable("a", lambda x: x["Peak Price"]),
                            Variable("b", lambda x: x["Peak Price"]),
                            relation_op=lambda x, y: x < y),
            BinaryCondition(Variable("b", lambda x: x["Peak Price"]),
                            Variable("c", lambda x: x["Peak Price"]),
                            relation_op=lambda x, y: x < y),
            BinaryCondition(Variable("c", lambda x: x["Peak Price"]),
                            Variable("a", lambda x: x["Peak Price"]),
                            relation_op=lambda x, y: x < y)
        ),
        timedelta(minutes=1)
    )
    runTest(test_name, [nonsensePattern], createTestFile, eval_mechanism_params)
Пример #28
0
def KC_Condition_Failure_03(createTestFile=False):
    """
    KC(And([a, b, c]))
    """
    try:
        pattern = Pattern(
            KleeneClosureOperator(
                AndOperator(
                    PrimitiveEventStructure("GOOG", "a"),
                    PrimitiveEventStructure("GOOG", "b"),
                    PrimitiveEventStructure("GOOG", "c")
                ), min_size=1, max_size=3
            ),
            AndCondition(
                SmallerThanCondition(Variable("a", lambda x: x["Peak Price"]), Variable("b", lambda x: x["Peak Price"])),
                SmallerThanCondition(Variable("b", lambda x: x["Peak Price"]), Variable("c", lambda x: x["Peak Price"])),
                KCIndexCondition(names={'a', 'b', 'c'}, getattr_func=lambda x: x["Peak Price"],
                                 relation_op=lambda x, y: x < 1 + y,
                                 offset=-1, first_index=2)
            ),
            timedelta(minutes=3)
        )
    except Exception as e:
        print("Test KC_Condition_Failure_03 Succeeded")
        return
    print("Test KC_Condition_Failure_03 Failed")
Пример #29
0
def msftDrivRacePatternSearchTest(createTestFile=False,
                                  eval_mechanism_params=DEFAULT_TESTING_EVALUATION_MECHANISM_SETTINGS,
                                  test_name = "msftDrivRace"):
    """
    This pattern is looking for a race between driv and microsoft in ten minutes
    PATTERN SEQ(MicrosoftStockPriceUpdate a, DrivStockPriceUpdate b, MicrosoftStockPriceUpdate c, DrivStockPriceUpdate d, MicrosoftStockPriceUpdate e)
    WHERE a.PeakPrice < b.PeakPrice AND b.PeakPrice < c.PeakPrice AND c.PeakPrice < d.PeakPrice AND d.PeakPrice < e.PeakPrice
    WITHIN 10 minutes
    """
    msftDrivRacePattern = Pattern(
        SeqOperator(PrimitiveEventStructure("MSFT", "a"), PrimitiveEventStructure("DRIV", "b"),
                    PrimitiveEventStructure("MSFT", "c"), PrimitiveEventStructure("DRIV", "d"),
                    PrimitiveEventStructure("MSFT", "e")),
        AndCondition(
            BinaryCondition(Variable("a", lambda x: x["Peak Price"]),
                            Variable("b", lambda x: x["Peak Price"]),
                            relation_op=lambda x, y: x < y),
            BinaryCondition(Variable("b", lambda x: x["Peak Price"]),
                            Variable("c", lambda x: x["Peak Price"]),
                            relation_op=lambda x, y: x < y),
            BinaryCondition(Variable("c", lambda x: x["Peak Price"]),
                            Variable("d", lambda x: x["Peak Price"]),
                            relation_op=lambda x, y: x < y),
            BinaryCondition(Variable("d", lambda x: x["Peak Price"]),
                            Variable("e", lambda x: x["Peak Price"]),
                            relation_op=lambda x, y: x < y)
        ),
        timedelta(minutes=10)
    )
    runTest(test_name, [msftDrivRacePattern], createTestFile, eval_mechanism_params)
Пример #30
0
def MinMax_2_TestKleeneClosure(createTestFile=False):
    pattern = Pattern(
        SeqOperator(KleeneClosureOperator(PrimitiveEventStructure("GOOG", "a"), min_size=4, max_size=5)),
        SimpleCondition(Variable("a", lambda x: x["Opening Price"]), relation_op=lambda x: x > 0),
        timedelta(minutes=5)
    )
    runTest("MinMax_2_", [pattern], createTestFile, events=nasdaqEventStreamKC)