Пример #1
0
    def test_scenario03(self):
        """
        Scenario: Successfully building test predictions from source
            Given I created the dataset in setup_scenario02
            And I create BigML topic model resources from source to test "<test>" with options "<options>" and log predictions in "<output>"
            And I check that the dataset has been created
            And I check that the topic model has been created
            And I check that the topic distributions are ready
            Then the local topic distribution file is like "<topic_distribution_file>"

            Examples:
            | test                    | options                  | output                                   |topic_distribution_file           |
            | ../data/spam.csv        | --test-separator="\t --prediction-header"     |./scenario3_td/topic_distributions.csv   | ./check_files/topic_distributions_spam.csv   |


        """
        print self.test_scenario03.__doc__
        examples = [[
            'data/spam.csv', '--test-separator="\t" --prediction-header',
            'scenario3_td/topic_distributions.csv',
            'check_files/topic_distributions_spam.csv',
            './check_files/topic_distributions_spam.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            topic_pred.i_create_all_td_resources_from_source( \
                self, example[0], example[1], example[2])
            test_pred.i_check_create_dataset(self, suffix=None)
            topic_pred.i_check_create_topic_model(self)
            topic_pred.i_check_create_topic_distributions(self)
            topic_pred.i_check_topic_distributions(self, example[3])
Пример #2
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using source to find anomaly scores for "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_an_1| {"data": "../data/tiny_kdd.csv", "output": "./scenario_an_1/anomaly_scores.csv", "test": "../data/test_kdd.csv"}   | ../data/test_kdd.csv   | ./scenario_an_2/anomaly_scores.csv   | ./check_files/anomaly_scores_kdd.csv   |

        """
        print self.test_scenario2.__doc__
        examples = [
            ['scenario_an_1', '{"data": "data/tiny_kdd.csv", "output": "scenario_an_1/anomaly_scores.csv", "test": "data/test_kdd.csv"}', 'data/test_kdd.csv', 'scenario_an_2/anomaly_scores.csv', 'check_files/anomaly_scores_kdd.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_anomaly.i_create_anomaly_resources_from_source(self, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[4])
Пример #3
0
    def test_scenario2(self):
        """
            Scenario: Successfully building predictions for data streamed to stdin:
                Given I create BigML resources uploading train "<data>" file to test "<test>" read from stdin with name "<name>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | output                            |predictions_file           | name |
                | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario_st_2/predictions.csv   | ./check_files/predictions_iris.csv   | Source name: áéí |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris.csv', 'scenario_st_2/predictions.csv', 'check_files/predictions_iris.csv', 'Source name: áéí']]
        for example in examples:
            print "\nTesting with:\n", example
            stdin.i_create_all_resources_to_test_from_stdin(self, data=example[0], test=example[1], name=example[4], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Пример #4
0
    def test_scenario11(self):
        """
            Scenario: Successfully building feature selection from dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validation with options "<options>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-ensembles have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the predictions file "<predictions_file>" is like "<estimated_file>"

                Examples:
                | data              |output                    | kfold | options   | predictions_file | estimated_file
                | ../data/iris.csv |./scenario_a_14/evaluation | 2     | --exclude-features="species,petal length" --predictions.csv --number-of-models 2| scenario_a_14/kfold2_pred/predictions.csv | check_files/analyze_predictions_iris_e.csv
        """
        print self.test_scenario11.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_14/evaluation', '2', ' --exclude-features="petal length,sepal length" --predictions-csv --number-of-models 2','scenario_a_14/test/kfold2_pred/predictions.csv', 'check_files/analyze_predictions_iris_e.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_options(self, k_folds=example[2], options=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_ensembles(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_predictions_file(self, example[4], example[5])
Пример #5
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML resources using source to find centroids for "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_c_1| {"data": "../data/diabetes.csv", "output": "./scenario_c_1/centroids.csv", "test": "../data/diabetes.csv"}   | ../data/diabetes.csv   | ./scenario_c_2/centroids.csv   | ./check_files/centroids_diabetes.csv   |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['scenario_c_1', '{"data": "data/diabetes.csv", "output": "scenario_c_1/centroids.csv", "test": "data/diabetes.csv"}', 'data/diabetes.csv', 'scenario_c_2/centroids.csv', 'check_files/centroids_diabetes.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_cluster.i_create_cluster_resources_from_source(self, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
Пример #6
0
    def test_scenario9(self):
        """
            Scenario: Successfully building random fields analysis from dataset:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML random fields analysis with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-random trees have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best random candidates number is "<random_candidates>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                  | kfold | metric   | random_candidates | metric_value |
                | ../data/iris.csv | ./scenario_a_11/evaluation |2     | precision  | 4               | 96.09%         |
        """
        print self.test_scenario9.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_11/evaluation', '2', 'precision', '4', '96.09%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_random_analysis(self, k_fold=example[2], metric=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_random_forest(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_random_candidates(self, example[4], example[3], example[5])
Пример #7
0
    def test_scenario1(self):
        """
            Scenario: Successfully building k-fold cross-validation from dataset:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML <kfold>-fold cross-validation
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that the <kfold>-fold cross-validation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                | data             | output                    | kfold | json_evaluation_file               |
                | ../data/iris.csv | ./scenario_a_1/evaluation | 2     | ./check_files/evaluation_kfold.json |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_1/evaluation', '2', 'check_files/evaluation_kfold.json']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation(self, k_folds=example[2])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_kfold_cross_validation(self, example[2])
            evaluation.then_the_evaluation_file_is_like(self, example[3])
Пример #8
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions from dataset specifying objective field and model fields
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>"
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |data    | output_dir               | test                    | output                         |predictions_file                        | objective | fields   |
                | ../data/iris_2fb.csv| ./scénario1 | ../data/test_iris2fb.csv   | ./scénario1/predictions.csv   | ./check_files/predictions_iris_2fb.csv   | spécies     | "pétal width" |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris_2fb.csv', u'scénario1', 'data/test_iris2fb.csv', u'scénario1/predictions.csv', 'check_files/predictions_iris_2fb.csv', u'spécies', u'"pétal width"']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_create_resources_from_dataset_objective_model(self, objective=example[5], fields=example[6], test=example[2], output=example[3])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Пример #9
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building multi-label test predictions from start:
                Given I create BigML multi-label resources tagged as "<tag>" with "<label_separator>" label separator and <number_of_labels> labels uploading train "<data>" file with "<training_separator>" field separator to test "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |tag |label_separator |number_of_labels | data                   |training_separator | test                        | output                          |predictions_file           |
                |my_multilabel_1|:|7| ../data/multilabel.csv |,| ../data/test_multilabel.csv | ./scenario_ml_1/predictions.csv | ./check_files/predictions_ml.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [[
            'my_multilabel_1', ':', '7', 'data/multilabel.csv', ',',
            'data/test_multilabel.csv', 'scenario_ml_1/predictions.csv',
            'check_files/predictions_ml.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            ml_pred.i_create_all_ml_resources(self,
                                              tag=example[0],
                                              label_separator=example[1],
                                              number_of_labels=example[2],
                                              data=example[3],
                                              training_separator=example[4],
                                              test=example[5],
                                              output=example[6])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[7])
    def test_scenario2(self):
        """
            Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with proportional missing strategy and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the source has been created from the test file
            And I check that the dataset has been created from the test file
            And I check that the batch prediction has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris.csv   | ../data/test_iris_nulls.csv   | ./scenario_mis_2/predictions.csv   | ./check_files/predictions_iris_nulls.csv
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris_nulls.csv', 'scenario_mis_2/predictions.csv', 'check_files/predictions_iris_nulls.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_remote_proportional(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            test_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
    def setup_scenario02(self):
        """
        Scenario: Successfully building test predictions from start:
            Given I create BigML logistic regression resources uploading train "<data>" file to test "<test>" and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
            | ../data/iris.csv   | ../data/test_iris.csv   | ./scenario1_lr/predictions.csv   | ./check_files/predictions_iris_lr.csv   |
        """
        print self.setup_scenario02.__doc__
        examples = [
            ['data/iris.csv', 'data/test_iris.csv', 'scenario1_lr/predictions.csv', 'check_files/predictions_iris_lr.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            lr_pred.i_create_all_lr_resources(self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            lr_pred.i_check_create_lr_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Пример #12
0
    def setup_scenario02(self):
        """
        Scenario: Successfully building test predictions from start:
            Given I create BigML resources uploading train "<data>" file to test "<test>" and log predictions in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            | data               | test                    | output                        |predictions_file           |
        """
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'scenario1_r/predictions.csv', 'check_files/predictions_grades.csv'],
            ['data/iris.csv', 'data/test_iris.csv', 'scenario1/predictions.csv', 'check_files/predictions_iris.csv']]
        show_doc(self.setup_scenario02, examples)
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources(self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Пример #13
0
    def test_scenario7(self):
        """
            Scenario: Successfully building feature selection for a category from dataset:
                Given I create BigML dataset uploading train "<data>" file with attributes "<attributes>" in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations improving "<metric>" for category "<category>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | attributes | output                    | kfold | metric   | category | selection   | metric_value
                | ../data/spam.csv    | ../data/spam_attributes.json |./scenario_a_9/evaluation | 2     | recall   | spam     | Message     | 61.24%
        """
        print self.test_scenario7.__doc__
        examples = [
            ['data/spam.csv', 'data/spam_attributes.json', 'scenario_a_9/evaluation', '2', 'recall', 'spam', 'Message', '61.24%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset_with_attributes(self, data=example[0], attributes=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_metric_category(self, k_folds=example[3], metric=example[4], category=example[5])
            test_pred.i_check_create_kfold_datasets(self, example[3])
            test_pred.i_check_create_kfold_models(self, example[3])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[3])
            test_pred.i_check_feature_selection(self, example[6], example[4], example[7])
Пример #14
0
    def test_scenario5(self):
        """
            Scenario: Successfully building nodes threshold analysis from dataset file:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML nodes analysis from dataset file from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                  | min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value
                | ../data/iris.csv | ./scenario_a_4/evaluation | 3         | 14        | 2         |2     | precision  | 9                | 94.71%
        """
        print self.test_scenario5.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_4/evaluation', '3', '14', '2', '2', 'precision', '9', '94.71%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_nodes_analysis_from_dataset_file(
                self, min_nodes=example[2], max_nodes=example[3],
                nodes_step=example[4], k_fold=example[5], metric=example[6])
            test_pred.i_check_create_kfold_datasets(self, example[5])
            test_pred.i_check_create_kfold_models(self, example[5])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[5])
            test_pred.i_check_node_threshold(self, example[7], example[6], example[8])
Пример #15
0
    def test_scenario7(self):
        """
            Scenario: Successfully building anomalous dataset test predictions from anomaly
                Given I create BigML anomaly detector from data <data> with options <options> and generate a new dataset of anomalies in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                Then I check that the new top anomalies dataset has been created
                And the top anomalies in the anomaly detector are <top_anomalies>
                And the forest size in the anomaly detector is <forest_size>
                And the number of records in the top anomalies dataset is <top_anomalies>

                Examples:
                | data               | options                              | output_dir     | top_anomalies | forest_size |
                | data/tiny_kdd.csv" | --top-anomalies 15 --forest-size 40 | scenario_an_7  | 15            | 40          |

        """
        print self.test_scenario7.__doc__
        examples = [
            ['data/tiny_kdd.csv', '--top-n 15 --forest-size 40 ', 'scenario_an_7', '15', '40']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_anomaly_resources_with_options(self, example[0], example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_dataset(self, suffix='gen ')
            test_anomaly.i_check_top_anomalies(self, example[3])
            test_anomaly.i_check_forest_size(self, example[4])
            test_anomaly.i_check_dataset_lines_number(self, example[3])
Пример #16
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML multi-label resources using source to test "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
                | scenario_ml_1| {"tag": "my_multilabel_1", "data": "../data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_ml_1/predictions.csv", "test": "../data/test_multilabel.csv"}   | ../data/test_multilabel.csv   | ./scenario_ml_2/predictions.csv   | ./check_files/predictions_ml_comma.csv   |
        """
        print self.test_scenario2.__doc__
        examples = [[
            'scenario_ml_1',
            '{"tag": "my_multilabel_1", "data": "data/multilabel.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_ml_1/predictions.csv", "test": "data/test_multilabel.csv"}',
            'data/test_multilabel.csv', 'scenario_ml_2/predictions.csv',
            'check_files/predictions_ml_comma.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(
                self, example[0], example[1])
            test_pred.i_create_resources_from_source(self,
                                                     multi_label='multi-label',
                                                     test=example[2],
                                                     output=example[3])
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Пример #17
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test predictions from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML multi-label resources using source with objective "<objective>" and model fields "<model_fields>" to test "<test>" and log predictions in "<output>"
                And I check that the dataset has been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |scenario    | kwargs                                                  | objective | model_fields | test                    | output                        |predictions_file           |
                | scenario_mlm_1| {"tag": "my_multilabelm_1", "data": "../data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "./scenario_mlm_1/predictions.csv", "test": "../data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"}   | class | -type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P |../data/test_multilabel.csv   | ./scenario_mlm_2/predictions.csv   | ./check_files/predictions_ml_comma.csv   |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['scenario_mlm_1', '{"tag": "my_multilabelm_1", "data": "data/multilabel_multi.csv", "label_separator": ":", "number_of_labels": 7, "training_separator": ",", "output": "scenario_mlm_1/predictions.csv", "test": "data/test_multilabel.csv", "ml_fields": "type,class", "model_fields": "-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P", "objective": "class"}', 'class', '-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P', 'data/test_multilabel.csv', 'scenario_mlm_2/predictions.csv', 'check_files/predictions_ml_comma.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_pred.i_create_resources_from_source_with_objective(self, multi_label='multi-label ', objective=example[2], model_fields=example[3], test=example[4], output=example[5])
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[6])
Пример #18
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building test anomaly scores from scratch:
                Given I create BigML resources uploading train "<data>" file to create anomaly scores for "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data                 | test               | output                           |predictions_file           |
                | ../data/tiny_kdd.csv | ../data/test_kdd.csv | ./scenario_an_1/anomaly_scores.csv | ./check_files/anomaly_scores_kdd.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [
            ['data/tiny_kdd.csv', 'data/test_kdd.csv', 'scenario_an_1/anomaly_scores.csv', 'check_files/anomaly_scores_kdd.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[3])
Пример #19
0
    def test_scenario6(self):
        """
            Scenario: Successfully building feature selection from dataset excluding features:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations excluding "<features>" with separator "<args_separator>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                    | kfold | features              | args_separator | metric   | selection   | metric_value |
                | ../data/iris.csv | ./scenario_a_7/evaluation | 2     | petal length!sepal width | !              | accuracy | petal width | 95.33%      |
        """
        print self.test_scenario6.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_7/evaluation', '2', 'petal length!sepal width', '!', 'accuracy', 'petal width', '95.33%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_separator_metric_no_fields(self, k_folds=example[2], features=example[3], args_separator=example[4], metric=example[5])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_feature_selection(self, example[6], example[5], example[7])
Пример #20
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions with missing-splits model:
                Given I create BigML resources uploading train "<data>" file to test "<test>" with a missing-splits model and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                          | output                            |predictions_file           |
                | ../data/iris_missing.csv   | ../data/test_iris_missing.csv   | ./scenario_mspl_1/predictions.csv | ./check_files/predictions_iris_missing.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris_missing.csv', 'data/test_iris_missing.csv', 'scenario_mspl_1/predictions.csv', 'check_files/predictions_iris_missing.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_missing_splits(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[3])
Пример #21
0
    def test_scenario3(self):
        """
            Scenario: Successfully building feature selection from dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations for "<objective>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | objective     |output                    | kfold | metric   | selection            | metric_value |
                | ../data/iris_2f.csv | 0             |./scenario_a_5/evaluation | 2     | r_squared| species              | 0.352845     |
                | ../data/iris_2f.csv | 0             |./scenario_a_8/evaluation | 2     | mean_squared_error| species     | 0.475200     |
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris_2f.csv', '0', 'scenario_a_5/evaluation', '2', 'r_squared', 'species', '0.352845'],
            ['data/iris_2f.csv', '0', 'scenario_a_8/evaluation', '2', 'mean_squared_error', 'species', '0.475200']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_objective(self, k_folds=example[3], objective=example[1], metric=example[4])
            test_pred.i_check_create_kfold_datasets(self, example[3])
            test_pred.i_check_create_kfold_models(self, example[3])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[3])
            test_pred.i_check_feature_selection(self, example[5], example[4], example[6])
    def test_scenario3(self):
        """
            Scenario: Successfully building evaluations from start:
                Given I create BigML resources uploading train "<data>" file to create model and log in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I evaluate "<test>" with proportional missing strategy
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the evaluation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                | data             | test                          | output                      | json_evaluation_file    |
                | ../data/iris.csv | ../data/iris_nulls.csv   | ./scenario_mis_3/evaluation | ./check_files/evaluation_iris_nulls.json |

        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', 'data/iris_nulls.csv', 'scenario_mis_3/evaluation', 'check_files/evaluation_iris_nulls.json']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_to_model(self, data=example[0], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            evaluation.i_create_proportional_to_evaluate(self, test=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_evaluation(self)
            evaluation.then_the_evaluation_file_is_like(self, example[3])
Пример #23
0
    def test_scenario4(self):
        """
            Scenario: Successfully building test anomaly score predictions from training set as a dataset:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores for the training set remotely saved to dataset with no CSV output and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the batch anomaly scores prediction has been created
                Then I check that the batch anomaly scores dataset exists
                And no local CSV file is created

                Examples:
                | data             | output_dir      |
                | ../data/iris.csv | ./scenario_ab_4 |
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', 'scenario_ab_4']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_without_test_split(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_batch_anomaly_score_dataset(self)
            test_anomaly.i_check_no_local_CSV(self)
    def test_scenario03(self):
        """
        Scenario: Successfully building test predictions from source
            Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
            And I create BigML logistic regression resources using source to test "<test>" and log predictions in "<output>"
            And I check that the dataset has been created
            And I check that the model has been created
            And I check that the predictions are ready
            Then the local prediction file is like "<predictions_file>"

            Examples:
            |scenario    | kwargs                                                  | test                    | output                        |predictions_file           |
            | scenario1| {"data": "../data/iris.csv", "output": "./scenario1_lr/predictions.csv", "test": "../data/test_iris.csv"}   | ../data/test_iris.csv   | ./scenario2/predictions.csv   | ./check_files/predictions_iris.csv   |
        """
        print self.test_scenario03.__doc__
        examples = [
            ['scenario1_lr', '{"data": "data/iris.csv", "output": "scenario1_lr/predictions.csv", "test": "data/test_iris.csv"}', 'data/test_iris.csv', 'scenario2_lr/predictions.csv', 'check_files/predictions_iris_lr.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            lr_pred.i_create_lr_resources_from_source(self, None, test=example[2], output=example[3])
            test_pred.i_check_create_dataset(self, suffix=None)
            lr_pred.i_check_create_lr_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
    def test_scenario1(self):
        """
            Scenario: Successfully building test centroid predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_cb_1_r/centroids.csv | ./check_files/centroids_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_cb_1_r/centroids.csv', 'check_files/centroids_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
Пример #26
0
    def test_scenario1(self):
        """
            Scenario: Successfully exporting models with params in the available languages:
                Given I create BigML resources uploading train "<data>" file using "<source_attributes>" and log in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I export the model as a function in "<language>"to "<output>"
                Then the export file is like "<check_file>"

                Examples:
                | data                 | source_attributes             | output                 | language       | check_file
                | ../data/movies.csv   | data/movies_source_attrs.json | ./scenario_exp_1/model | python         | model_function.py

        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/movies.csv', 'data/movies_source_attrs.json', 'scenario_exp_1_a/model', 'python', 'check_files/export/model_function.py'],
            ['data/movies.csv', 'data/movies_source_attrs.json', 'scenario_exp_1_b/model', 'javascript', 'check_files/export/model_function.js'],
            ['data/movies.csv', 'data/movies_source_attrs.json', 'scenario_exp_1_c/model', 'r', 'check_files/export/model_function.R'],
            ['data/iris.csv', '', 'scenario_exp_1_d/model', 'tableau', 'check_files/export/model_function.tb'],
            ['data/iris.csv', '', 'scenario_exp_1_e/model', 'mysql', 'check_files/export/model_function.sql'],
            ['data/libros.csv', 'data/libros_source_attrs.json', 'scenario_exp_1_f/model', 'python', 'check_files/export/model_function_utf8.py'],
            ['data/libros.csv', 'data/libros_source_attrs.json', 'scenario_exp_1_g/model', 'r', 'check_files/export/model_function_utf8.R'],
            ['data/libros.csv', 'data/libros_source_attrs.json', 'scenario_exp_1_h/model', 'javascript', 'check_files/export/model_function_utf8.js']]
        for example in examples:
            print "\nTesting with:\n", example
            export.i_create_all_resources_to_model_with_source_attrs( \
                self, data=example[0], source_attributes=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_model(self)
            export.i_export_model(self, language=example[3], output=example[2])
            export.i_check_if_the_output_is_like_expected_file( \
                self, language=example[3], expected_file=example[4])
Пример #27
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test anomaly score predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch anomaly scores prediction has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_ab_1_r/anomalies.csv | ./check_files/anomaly_scores_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades_perm.csv', 'data/grades_fields_map_perm.csv', 'scenario_ab_1_r/anomalies.csv', 'check_files/anomaly_scores_grades.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_with_mapping(self, data=example[0], test=example[1], fields_map=example[2], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[4])
    def test_scenario2(self):
        """
            Scenario: Successfully building remote test centroid predictions from scratch to dataset:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely to dataset with no CSV and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                Then I check that the batch centroids dataset exists
                And no local CSV file is created

                Examples:
                | data               | test                    |  output_dir     |
                | ../data/grades.csv | ../data/test_grades.csv | ./scenario_cb_2 |

        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/grades.csv', 'data/test_grades.csv', 'scenario_cb_2']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_to_dataset(self, data=example[0], test=example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            batch_pred.i_check_create_batch_centroids_dataset(self)
            test_anomaly.i_check_no_local_CSV(self)
Пример #29
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test centroids from scratch:
                Given I create BigML resources uploading train "<data>" file to create centroids for "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test               | output                           |predictions_file           |
                | ../data/grades.csv | ../data/grades.csv | ./scenario_c_1_r/centroids.csv | ./check_files/centroids_grades.csv |
                | ../data/diabetes.csv   | ../data/diabetes.csv   | ./scenario_c_1/centroids.csv   | ./check_files/centroids_diabetes.csv   |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/grades.csv', 'data/grades.csv', 'scenario_c_1_r/centroids.csv', 'check_files/centroids_grades.csv'],
            ['data/diabetes.csv', 'data/diabetes.csv', 'scenario_c_1/centroids.csv', 'check_files/centroids_diabetes.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources(self, data=example[0], test=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[3])
Пример #30
0
    def setup_scenario1(self):
        """
            Scenario: Successfully building multi-label test predictions from start:
                Given I create BigML multi-label resources tagged as "<tag>" with "<label_separator>" label separator and <number_of_labels> labels uploading train "<data>" file with "<training_separator>" field separator and "<ml_fields>" as multi-label fields using model_fields "<model_fields>" and objective "<objective>" to test "<test>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the models have been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |tag |label_separator |number_of_labels | data                   |training_separator | ml_fields | model_fields | objective | test                        | output                         |predictions_file           |
                |my_multilabelm_1|:|7| ../data/multilabel_multi.csv |,  | type,class | -type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P | class |../data/test_multilabel.csv | ./scenario_mlm_1/predictions.csv | ./check_files/predictions_ml.csv |
        """
        print self.setup_scenario1.__doc__
        examples = [
            ['my_multilabelm_1', ':', '7', 'data/multilabel_multi.csv', ',', 'type,class', '-type,-type - W,-type - A,-type - C,-type - S,-type - R,-type - T,-type - P', 'class', 'data/test_multilabel.csv', 'scenario_mlm_1/predictions.csv', 'check_files/predictions_ml.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            ml_pred.i_create_all_mlm_resources(self, tag=example[0], label_separator=example[1], number_of_labels=example[2], data=example[3], training_separator=example[4], ml_fields=example[5], model_fields=example[6], objective=example[7], test=example[8], output=example[9])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_models(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[10])
Пример #31
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a new dataset from an existing one and analyzing it
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                And I create BigML nodes analysis from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                |data |output_dir  |new_fields | field | model_fields| min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value |
                |../data/iris.csv | ./scenario_a_10 |../data/new_fields.json| outlier? |petal length,outlier?,species| 3         | 14        | 2         |2     | precision  | 9                | 94.71%         |
        """
        print self.test_scenario8.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_10', 'data/new_fields2.json', u'outlier?', u'outlier?,species', '3', '14', '2', '2', 'precision', '5', '98.21%']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            test_pred.i_create_nodes_analysis(self, min_nodes=example[5], max_nodes=example[6], nodes_step=example[7], k_fold=example[8], metric=example[9])
            test_pred.i_check_create_kfold_datasets(self, example[8])
            test_pred.i_check_create_kfold_models(self, example[8])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[8])
            test_pred.i_check_node_threshold(self, example[10], example[9], example[11])
Пример #32
0
    def test_scenario4(self):
        """
            Scenario: Successfully building feature selection from filtered dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I filter out field "<field>" from dataset and log to "<output_dir>"
                And I check that the new dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations for "<objective>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                 | field               | objective     |output                    | output_dir | kfold | metric   | selection   | metric_value |
                | ../data/iris_2fd.csv | sepal length        | species         |./scenario_a_6/evaluation |./scenario_a_6 | 2     | recall   | petal width | 100.00%     |
        """
        print self.test_scenario4.__doc__
        examples = [
            ['data/iris_2fd.csv', 'sepal length', 'species', 'scenario_a_6/evaluation', 'scenario_a_6', '2', 'recall', 'petal width', '100.00%']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            dataset.i_filter_field_from_dataset(self, field=example[1], output_dir=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_create_kfold_cross_validation_objective(self, k_folds=example[5], objective=example[2], metric=example[6])
            test_pred.i_check_create_kfold_datasets(self, example[5])
            test_pred.i_check_create_kfold_models(self, example[5])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[5])
            test_pred.i_check_feature_selection(self, example[7], example[6], example[8])
Пример #33
0
    def test_scenario2(self):
        """
            Scenario: Successfully building feature selection from dataset:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>
                And I generate a report from the output directory
                And a symlink file is generated in the reports directory

                Examples:
                | data                | output                    | kfold | metric   | selection   | metric_value
                | ../data/iris_2f.csv | ./scenario_a_2/evaluation | 2     | accuracy | petal width | 100.00%
                | ../data/iris_2f.csv | ./scenario_a_3/evaluation | 2     | phi      | petal width | 1
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris_2f.csv', 'scenario_a_2/evaluation', '2', 'accuracy', 'petal width', '100.00%'],
            ['data/iris_2f.csv', 'scenario_a_3/evaluation', '2', 'phi', 'petal width', '1']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self, data=example[0], output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_metric(self, k_folds=example[2], metric=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[2])
            test_pred.i_check_feature_selection(self, example[4], example[3], example[5])
            test_pred.i_generate_report(self)
            test_pred.is_symlink(self)
Пример #34
0
def setup_for_fusion(step, train=None, output_dir=None):
    train = res_filename(train)
    command = ("bigmler --train \"" + train + "\" --store --output-dir " +
               output_dir)
    shell_execute(command, "%s/predictions" % output_dir)
    test_pred.i_check_create_source(step)
    test_pred.i_check_create_dataset(step)
    test_pred.i_check_create_model(step)

    command = ("bigmler deepnet --dataset \"" + world.dataset["resource"] +
               "\" --store --output-dir " + output_dir)
    shell_execute(command, "%s/predictions" % output_dir)
    test_dn.i_check_create_dn_model(step)
Пример #35
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a new dataset from an existing one and analyzing it
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                And I create BigML nodes analysis from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                |data |output_dir  |new_fields | field | model_fields| min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value |
                |../data/iris.csv | ./scenario_a_10 |../data/new_fields.json| outlier? |petal length,outlier?,species| 3         | 14        | 2         |2     | precision  | 9                | 94.71%         |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', 'scenario_a_10', 'data/new_fields2.json',
            u'outlier?', u'outlier?,species', '3', '14', '2', '2', 'precision',
            '5', '98.21%'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self,
                                                    json_file=example[2],
                                                    model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            test_pred.i_create_nodes_analysis(self,
                                              min_nodes=example[5],
                                              max_nodes=example[6],
                                              nodes_step=example[7],
                                              k_fold=example[8],
                                              metric=example[9])
            test_pred.i_check_create_kfold_datasets(self, example[8])
            test_pred.i_check_create_kfold_models(self, example[8])
            test_pred.i_check_create_all_kfold_cross_validations(
                self, example[8])
            test_pred.i_check_node_threshold(self, example[10], example[9],
                                             example[11])
Пример #36
0
    def test_scenario10(self):
        """
            Scenario: Successfully building feature selection from dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validation with options "<options>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the predictions file "<predictions_file>" is like "<estimated_file>"

                Examples:
                | data              |output                    | kfold | options   | predictions_file | estimated_file
                | ../data/iris.csv  |./scenario_a_12/evaluation | 2     | --exclude-features="petal length, sepal length" --predictions-csv| scenario_a_12/kfold2_pred/predictions.csv | check_files/analyze_predictions_iris.csv
                | ../data/iris.csv |./scenario_a_13/evaluation | 2     | --exclude-features="species,petal length" --predictions.csv --objective 0| scenario_a_13/kfold6_pred/predictions.csv | check_files/analyze_predictions_iris_2.csv
                | ../data/iris.csv |./scenario_a_14/evaluation | 2     | --exclude-features="species,petal length" --predictions.csv --number-of-models 2| scenario_a_14/kfold2_pred/predictions.csv | check_files/analyze_predictions_iris_2.csv
        """
        print self.test_scenario10.__doc__
        examples = [
            [
                'data/iris.csv', 'scenario_a_12/evaluation', '2',
                ' --exclude-features="petal length,sepal length" --predictions-csv',
                'scenario_a_12/test/kfold2_pred/predictions.csv',
                'check_files/analyze_predictions_iris.csv'
            ],
            [
                'data/iris.csv', 'scenario_a_13/evaluation', '2',
                ' --exclude-features="species,sepal length" --predictions-csv --objective 0',
                'scenario_a_13/test/kfold6_pred/predictions.csv',
                'check_files/analyze_predictions_iris2.csv'
            ]
        ]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self,
                                       data=example[0],
                                       output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_options(
                self, k_folds=example[2], options=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(
                self, example[2])
            test_pred.i_check_predictions_file(self, example[4], example[5])
Пример #37
0
    def test_scenario4(self):
        """
            Scenario: Successfully building feature selection from filtered dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I filter out field "<field>" from dataset and log to "<output_dir>"
                And I check that the new dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations for "<objective>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                 | field               | objective     |output                    | output_dir | kfold | metric   | selection   | metric_value |
                | ../data/iris_2fd.csv | sepal length        | species         |./scenario_a_6/evaluation |./scenario_a_6 | 2     | recall   | petal width | 100.00%     |
        """
        print self.test_scenario4.__doc__
        examples = [[
            'data/iris_2fd.csv', 'sepal length', 'species',
            'scenario_a_6/evaluation', 'scenario_a_6', '2', 'recall',
            'petal width', '100.00%'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self,
                                       data=example[0],
                                       output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            dataset.i_filter_field_from_dataset(self,
                                                field=example[1],
                                                output_dir=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_create_kfold_cross_validation_objective(
                self,
                k_folds=example[5],
                objective=example[2],
                metric=example[6])
            test_pred.i_check_create_kfold_datasets(self, example[5])
            test_pred.i_check_create_kfold_models(self, example[5])
            test_pred.i_check_create_all_kfold_cross_validations(
                self, example[5])
            test_pred.i_check_feature_selection(self, example[7], example[6],
                                                example[8])
Пример #38
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a new sample from a dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                Then I create a new sample from the dataset and get the sample using options "<sample_options>" storing logs in "<output_dir>"
                And I check that the sample has been created
                And the sample file is like "<sample_CSV>"

                Examples:
                |data |output_dir  |sample_options | sample_CSV
                |../data/iris.csv | ./scenario_smp_1 | --occurrence --sample-header --row-index | ./check_files/sample_iris.csv
                |../data/iris.csv | ./scenario_smp_2 | --precision 0 --rows 10 --row-offset 10 --unique | ./check_files/sample_iris2.csv
                |../data/iris.csv | ./scenario_smp_3 | --row-order-by="-petal length" --row-fields "petal length,petal width" --mode linear | ./check_files/sample_iris3.csv
        """
        print self.test_scenario1.__doc__
        examples = [
            [
                'data/iris.csv', 'scenario_smp_1',
                '--occurrence --sample-header --row-index',
                'check_files/sample_iris.csv'
            ],
            [
                'data/iris.csv', 'scenario_smp_2',
                '--precision 0 --rows 10 --row-offset 10 --unique',
                'check_files/sample_iris2.csv'
            ],
            [
                'data/iris.csv', 'scenario_smp_3',
                '--row-order-by="-petal length" --row-fields "petal length,petal width" --mode linear',
                'check_files/sample_iris3.csv'
            ]
        ]
        for example in examples:
            print "\nTesting with:\n", example
            dataset.i_create_dataset(self,
                                     data=example[0],
                                     output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_sample.i_create_sample(self,
                                        options=example[2],
                                        output_dir=example[1])
            test_sample.i_check_create_sample(self)
            test_sample.i_check_sample_file(self, check_sample_file=example[3])
Пример #39
0
    def test_scenario2(self):
        """
            Scenario: Successfully building feature selection from dataset:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>
                And I generate a report from the output directory
                And a symlink file is generated in the reports directory

                Examples:
                | data                | output                    | kfold | metric   | selection   | metric_value
                | ../data/iris_2f.csv | ./scenario_a_2/evaluation | 2     | accuracy | petal width | 100.00%
                | ../data/iris_2f.csv | ./scenario_a_3/evaluation | 2     | phi      | petal width | 1
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris_2f.csv', 'scenario_a_2/evaluation', '2', 'accuracy',
            'petal width', '100.00%'
        ],
                    [
                        'data/iris_2f.csv', 'scenario_a_3/evaluation', '2',
                        'phi', 'petal width', '1'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self,
                                       data=example[0],
                                       output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_metric(
                self, k_folds=example[2], metric=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(
                self, example[2])
            test_pred.i_check_feature_selection(self, example[4], example[3],
                                                example[5])
            test_pred.i_generate_report(self)
            test_pred.is_symlink(self)
Пример #40
0
    def test_scenario3(self):
        """
            Scenario: Successfully building feature selection from dataset setting objective:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations for "<objective>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | objective     |output                    | kfold | metric   | selection            | metric_value |
                | ../data/iris_2f.csv | 0             |./scenario_a_5/evaluation | 2     | r_squared| species              | 0.352845     |
                | ../data/iris_2f.csv | 0             |./scenario_a_8/evaluation | 2     | mean_squared_error| species     | 0.475200     |
        """
        print self.test_scenario3.__doc__
        examples = [[
            'data/iris_2f.csv', '0', 'scenario_a_5/evaluation', '2',
            'r_squared', 'species', '0.352845'
        ],
                    [
                        'data/iris_2f.csv', '0', 'scenario_a_8/evaluation',
                        '2', 'mean_squared_error', 'species', '0.475200'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self,
                                       data=example[0],
                                       output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_objective(
                self,
                k_folds=example[3],
                objective=example[1],
                metric=example[4])
            test_pred.i_check_create_kfold_datasets(self, example[3])
            test_pred.i_check_create_kfold_models(self, example[3])
            test_pred.i_check_create_all_kfold_cross_validations(
                self, example[3])
            test_pred.i_check_feature_selection(self, example[5], example[4],
                                                example[6])
Пример #41
0
    def test_scenario2(self):
        """
            Scenario: Successfully retraining from a model using sampled dataset
                Given I create a BigML balanced model from "<data>" sampling 50% of data and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I retrain the model from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model has doubled its rows
                And I check that the model is balanced

                Examples:
                |data |output_dir  | output_dir_ret
                |../data/iris.csv | ./scenario_rt_2 |./scenario_rt_2b |
        """
        print self.test_scenario2.__doc__
        examples = [['data/iris.csv', 'scenario_rt_2', 'scenario_rt_2b'],
                    [
                        'https://static.bigml.com/csv/iris.csv',
                        'scenario_rt_2c', 'scenario_rt_2d'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_balanced_model_from_sample(
                self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_dataset(self, suffix='gen ')
            test_pred.i_check_create_model(self)
            test_pred.i_retrain_model(self,
                                      data=example[0],
                                      output_dir=example[2])
            if not example[0].startswith("https"):
                test_pred.i_check_create_source(self)
            execute_steps.i_check_create_execution(self,
                                                   number_of_executions=2)
            test_pred.i_check_create_model_in_execution(self)
            test_pred.i_check_model_double(self)
            test_pred.i_check_model_is_balanced(self)
Пример #42
0
    def test_scenario6(self):
        """
            Scenario: Successfully building feature selection from dataset excluding features:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations excluding "<features>" with separator "<args_separator>" improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                    | kfold | features              | args_separator | metric   | selection   | metric_value |
                | ../data/iris.csv | ./scenario_a_7/evaluation | 2     | petal length!sepal width | !              | accuracy | petal width | 95.33%      |
        """
        print self.test_scenario6.__doc__
        examples = [[
            'data/iris.csv', 'scenario_a_7/evaluation', '2',
            'petal length!sepal width', '!', 'accuracy', 'petal width',
            '95.33%'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self,
                                       data=example[0],
                                       output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_separator_metric_no_fields(
                self,
                k_folds=example[2],
                features=example[3],
                args_separator=example[4],
                metric=example[5])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(
                self, example[2])
            test_pred.i_check_feature_selection(self, example[6], example[5],
                                                example[7])
Пример #43
0
    def test_scenario7(self):
        """
            Scenario: Successfully building feature selection for a category from dataset:
                Given I create BigML dataset uploading train "<data>" file with attributes "<attributes>" in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations improving "<metric>" for category "<category>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | attributes | output                    | kfold | metric   | category | selection   | metric_value
                | ../data/spam.csv    | ../data/spam_attributes.json |./scenario_a_9/evaluation | 2     | recall   | spam     | Message     | 61.24%
        """
        print self.test_scenario7.__doc__
        examples = [[
            'data/spam.csv', 'data/spam_attributes.json',
            'scenario_a_9/evaluation', '2', 'recall', 'spam', 'Message',
            '61.24%'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset_with_attributes(self,
                                                       data=example[0],
                                                       attributes=example[1],
                                                       output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_metric_category(
                self,
                k_folds=example[3],
                metric=example[4],
                category=example[5])
            test_pred.i_check_create_kfold_datasets(self, example[3])
            test_pred.i_check_create_kfold_models(self, example[3])
            test_pred.i_check_create_all_kfold_cross_validations(
                self, example[3])
            test_pred.i_check_feature_selection(self, example[6], example[4],
                                                example[7])
Пример #44
0
    def test_scenario5(self):
        """
            Scenario: Successfully building nodes threshold analysis from dataset file:
                Given I create BigML dataset uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML nodes analysis from dataset file from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                  | min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value
                | ../data/iris.csv | ./scenario_a_4/evaluation | 3         | 14        | 2         |2     | precision  | 9                | 94.71%
        """
        print self.test_scenario5.__doc__
        examples = [[
            'data/iris.csv', 'scenario_a_4/evaluation', '3', '14', '2', '2',
            'precision', '9', '94.71%'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset(self,
                                       data=example[0],
                                       output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_nodes_analysis_from_dataset_file(
                self,
                min_nodes=example[2],
                max_nodes=example[3],
                nodes_step=example[4],
                k_fold=example[5],
                metric=example[6])
            test_pred.i_check_create_kfold_datasets(self, example[5])
            test_pred.i_check_create_kfold_models(self, example[5])
            test_pred.i_check_create_all_kfold_cross_validations(
                self, example[5])
            test_pred.i_check_node_threshold(self, example[7], example[6],
                                             example[8])
Пример #45
0
    def test_scenario2(self):
        """
            Scenario: Successfully building association from source
                Given I have previously executed "<scenario>" or reproduce it with arguments <kwargs>
                And I create BigML association using source and log resources in "<output_dir>"
                And I check that the dataset has been created
                And I check that the association has been created

                Examples:
                |scenario    | kwargs                                                  | output_dir
                | scenario_ass_1| {"data": "../data/iris.csv", "output_dir": "./scenario_ass_1/}   | ./scenario_ass_2   |
        """
        print self.test_scenario2.__doc__
        examples = [
            ['scenario_ass_1', '{"data": "data/iris.csv", "output_dir": "scenario_ass_1"}', 'scenario_ass_2']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_have_previous_scenario_or_reproduce_it(self, example[0], example[1])
            test_association.i_create_association_from_source(self, output_dir=example[2])
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_association(self)
Пример #46
0
    def test_scenario3(self):
        """
            Scenario: Successfully building test anomaly score predictions from test split in a dataset:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores with test split "<test_split>" remotely saved to dataset with no CSV output and log resources in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the train dataset has been created
                And I check that the dataset has been created from the test file
                And I check that the batch anomaly scores prediction has been created
                Then I check that the batch anomaly scores dataset exists
                And no local CSV file is created

                Examples:
                | data             | test_split | output_dir      |
                | ../data/iris.csv | 0.2        | ./scenario_ab_3 |
        """
        print self.test_scenario3.__doc__
        examples = [['data/iris.csv', '0.2', 'scenario_ab_3']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_with_test_split_no_CSV(
                self,
                data=example[0],
                test_split=example[1],
                output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_dataset(self, suffix='train ')
            test_pred.i_check_create_dataset(self, suffix='test ')
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_batch_anomaly_score_dataset(self)
            test_anomaly.i_check_no_local_CSV(self)
Пример #47
0
    def test_scenario3(self):
        """
            Scenario: Successfully building remote test centroid predictions from scratch with prediction fields:
                Given I create BigML resources uploading train "<data>" file to find centroids for "<test>" remotely with prediction fields "<prediction_fields>" and log resources in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the cluster has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch centroid prediction has been created
                And I check that the centroids are ready
                Then the local centroids file is like "<predictions_file>"

                Examples:
                | data               | test                    |  prediction_fields | output     | predictions_file
                | ../data/grades.csv | ../data/test_grades.csv |  Assignment       |./scenario_cb_3_r/centroids.csv | ./check_files/centroids_grades_field.csv |

        """
        print self.test_scenario3.__doc__
        examples = [[
            'data/grades.csv', 'data/grades.csv', 'Assignment',
            './scenario_cb_3_r/centroids.csv',
            "./check_files/centroids_grades_field.csv"
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_cluster.i_create_all_cluster_resources_with_prediction_fields(
                self,
                data=example[0],
                test=example[1],
                prediction_fields=example[2],
                output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_cluster(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            batch_pred.i_check_create_batch_centroid(self)
            test_cluster.i_check_create_centroids(self)
            test_pred.i_check_predictions(self, example[4])
Пример #48
0
    def test_scenario2(self):
        """
            Scenario: Successfully building test anomaly score predictions from test split:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores with test split "<test_split>" remotely and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the train dataset has been created
                And I check that the dataset has been created from the test file
                And I check that the batch anomaly scores prediction has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data             | test_split | output                 |predictions_file           |
                | ../data/iris.csv | 0.2 | ./scenario_ab_2/anomalies.csv | ./check_files/anomaly_scores_iris.csv |

        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', '0.2', 'scenario_ab_2/anomalies.csv', 'check_files/anomaly_scores_iris.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_with_test_split(self, data=example[0], test_split=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_dataset(self, suffix='train ')
            test_pred.i_check_create_dataset(self, suffix='test ')
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[3])
Пример #49
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test anomaly score predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to find anomaly scores for "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the anomaly detector has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch anomaly scores prediction has been created
                And I check that the anomaly scores are ready
                Then the local anomaly scores file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/grades_perm.csv | ../data/grades_fields_map_perm.csv | ./scenario_ab_1_r/anomalies.csv | ./check_files/anomaly_scores_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/grades.csv', 'data/grades_perm.csv',
            'data/grades_fields_map_perm.csv', 'scenario_ab_1_r/anomalies.csv',
            'check_files/anomaly_scores_grades.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_anomaly.i_create_all_anomaly_resources_with_mapping(
                self,
                data=example[0],
                test=example[1],
                fields_map=example[2],
                output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_anomaly.i_check_create_anomaly(self)
            test_pred.i_check_create_test_source(self)
            test_pred.i_check_create_test_dataset(self)
            test_batch.i_check_create_batch_anomaly_scores(self)
            test_anomaly.i_check_create_anomaly_scores(self)
            test_anomaly.i_check_anomaly_scores(self, example[4])
Пример #50
0
    def setup_scenario02(self):
        """
        Scenario: Successfully building text source from local file:
            Given I create BigML dataset uploading train "<data>" file with attributes "<attributes>" in "<output>"
            And I check that the source has been created
            Then I check that the dataset has been created

            Examples:
            | data               | attributes                     | output                 |
            | ../data/spam.csv   | ../data/spam_attributes.json   |  scenario2_td/topic_distributions.csv  |
        """
        print self.setup_scenario02.__doc__
        examples = [[
            'data/spam.csv', 'data/spam_attributes.json',
            'scenario2_td/topic_distributions.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_dataset_with_attributes( \
                self, data=example[0], attributes=example[1], output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
Пример #51
0
    def test_scenario2(self):
        """
            Scenario: Successfully updating a dataset with attributes in a JSON file
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I update the dataset using the specs in JSON file "<new_fields>"
                Then I check that property "<property>" for field id "<field_id>" is "<value>" of type "<type>"

                Examples:
                |data |output_dir  |new_fields | property | field_id | value | type
                |../data/iris.csv | ./scenario_d_2 |../data/attributes.json| preferred | 000001 | false | boolean
                |../data/iris.csv | ./scenario_d_2_b |../data/attributes_col.json| preferred | 000001 | false | boolean
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_2', 'data/attributes.json',
            'preferred', '000001', 'false', 'boolean'
        ],
                    [
                        'data/iris.csv', 'scenario_d_2_b',
                        'data/attributes_col.json', 'preferred', '000001',
                        'false', 'boolean'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_update_dataset_new_properties(self,
                                                        json_file=example[2])
            dataset_adv.i_check_dataset_has_property(self,
                                                     attribute=example[3],
                                                     field_id=example[4],
                                                     value=example[5],
                                                     type=example[6])
Пример #52
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions from dataset specifying objective field and model fields
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>"
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |data    | output_dir               | test                    | output                         |predictions_file                        | objective | fields   |
                | ../data/iris_2fb.csv| ./scénario1 | ../data/test_iris2fb.csv   | ./scénario1/predictions.csv   | ./check_files/predictions_iris_2fb.csv   | spécies     | "pétal width" |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris_2fb.csv', u'scénario1', 'data/test_iris2fb.csv',
            u'scénario1/predictions.csv',
            'check_files/predictions_iris_2fb.csv', u'spécies',
            u'"pétal width"'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_create_resources_from_dataset_objective_model(
                self,
                objective=example[5],
                fields=example[6],
                test=example[2],
                output=example[3])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Пример #53
0
    def test_scenario1(self):
        """
            Scenario 1: Successfully building test predictions from scratch:
                Given I create BigML resources uploading train "<data>" file to test "<test>" remotely with mapping file "<fields_map>" and log predictions in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I check that the source has been created from the test file
                And I check that the dataset has been created from the test file
                And I check that the batch prediction has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                | data               | test                    | fields_map | output                        |predictions_file           |
                | ../data/grades.csv | ../data/test_grades.csv | ../data/grades_fields_map.csv | ./scenario_r1_r/predictions.csv | ./check_files/predictions_grades.csv |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/grades.csv', 'data/test_grades.csv',
            'data/grades_fields_map.csv', 'scenario_r1_r/predictions.csv',
            'check_files/predictions_grades.csv'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_batch_map(self,
                                                       data=example[0],
                                                       test=example[1],
                                                       fields_map=example[2],
                                                       output=example[3])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_batch_pred.i_check_create_test_source(self)
            test_batch_pred.i_check_create_test_dataset(self)
            test_batch_pred.i_check_create_batch_prediction(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Пример #54
0
    def test_scenario1(self):
        """
            Scenario: Successfully building feature selection from dataset in dev mode:
                Given I want to use api in DEV mode
                And I create BigML dataset in dev mode uploading train "<data>" file in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML feature selection <kfold>-fold cross-validations improving "<metric>" in dev mode
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best feature selection is "<selection>", with "<metric>" of <metric_value>

                Examples:
                | data                | output                    | kfold | metric   | selection   | metric_value |
                | ../data/iris_2f.csv | ./scenario_a_2/evaluation | 2     | accuracy | petal width | 100.00%       |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris_2f.csv', 'scenario_a_2/evaluation', '2', 'accuracy',
            'petal width', '100.00%'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            common.i_want_api_dev_mode(self)
            test_pred.i_create_dev_dataset(self,
                                           data=example[0],
                                           output=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_create_kfold_cross_validation_in_dev(
                self, k_folds=example[2], metric=example[3])
            test_pred.i_check_create_kfold_datasets(self, example[2])
            test_pred.i_check_create_kfold_models(self, example[2])
            test_pred.i_check_create_all_kfold_cross_validations(
                self, example[2])
            test_pred.i_check_feature_selection(self, example[4], example[3],
                                                example[5])
Пример #55
0
    def test_scenario3(self):
        """
            Scenario: Successfully building a objective weighted model
                Given I create a BigML objective weighted model from "<data>" using the objective weights in file "<path>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model uses as objective weights "<weights>"

                Examples:
                |data |path | output_dir  | weights
                |../data/iris.csv | ../data/weights.csv |./scenario_w_3 | [["Iris-setosa",5], ["Iris-versicolor",3]]
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', 'data/weights.csv', 'scenario_w_3', '[["Iris-setosa",5], ["Iris-versicolor",3]]']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_objective_weighted_model(self, data=example[0], path=example[1], output_dir=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_objective_weighted_model(self, weights=example[3])
Пример #56
0
    def setup_scenario02(self):
        """
        Scenario: Successfully building test forecasts from start:
            Given I create BigML time series resources uploading train "<data>" file to test "<test>" and log forecasts in "<output>"
            And I check that the source has been created
            And I check that the dataset has been created
            And I check that the time series has been created
            Then the local forecasts file is like "<forecasts_file>"

            Examples:
            | data               | test                    | output                        |forecasts_file
            | ../data/grades.csv   | ./data/test_grades.json   | ./scenario1_ts/forecasts
        """
        print self.setup_scenario02.__doc__
        examples = [
            ['data/grades.csv', 'data/test_grades.json', 'scenario1_ts/forecasts', 'check_files/forecasts_grades_final.csv', 'scenario1_ts/forecasts_000005.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            ts_pred.i_create_all_ts_resources(self, example[0], example[1], example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            ts_pred.i_check_create_time_series(self)
            ts_pred.i_check_forecasts(self, example[3])
Пример #57
0
    def test_scenario3(self):
        """
            Scenario: Successfully exporting a dataset to a CSV file
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I export the dataset to the CSV file "<csv_file>"
                Then file "<csv_file>" is like file "<data>"

                Examples:
                |data |output_dir  |csv_file |
                |../data/iris.csv | ./scenario_d_3 |dataset.csv
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_3', 'dataset.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_export_the_dataset(self, example[2])
            dataset_adv.i_files_equal(self, example[2], example[0])
Пример #58
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a balanced model
                Given I create a BigML balanced model from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model is balanced

                Examples:
                |data |output_dir  |
                |../data/iris.csv | ./scenario_w_1 |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_w_1']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_balanced_model(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_model_is_balanced(self)
Пример #59
0
    def test_scenario2(self):
        """
            Scenario: Successfully building a field weighted model
                Given I create a BigML field weighted model from "<data>" using field "<field>" as weight and "<objective>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                Then I check that the model uses as weight "<field_id>"

                Examples:
                |data |field | output_dir  | field_id | objective
                |../data/iris_w.csv | weight |./scenario_w_2 | 000005 |000004
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris_w.csv', 'weight', 'scenario_w_2', '000005', 'species']]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_weighted_field_model(self, data=example[0], field=example[1], output_dir=example[2], objective=example[4])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            test_pred.i_check_weighted_model(self, field=example[3])
Пример #60
0
    def test_scenario3(self):
        """
            Scenario: Successfully building evaluations from start:
                Given I create BigML resources uploading train "<data>" file to create model and log in "<output>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the model has been created
                And I evaluate "<test>" with proportional missing strategy
                And I check that the source has been created
                And I check that the dataset has been created
                And I check that the evaluation has been created
                Then the evaluation file is like "<json_evaluation_file>"

                Examples:
                | data             | test                          | output                      | json_evaluation_file    |
                | ../data/iris.csv | ../data/iris_nulls.csv   | ./scenario_mis_3/evaluation | ./check_files/evaluation_iris_nulls.json |

        """
        print self.test_scenario3.__doc__
        examples = [[
            'data/iris.csv', 'data/iris_nulls.csv',
            'scenario_mis_3/evaluation',
            'check_files/evaluation_iris_nulls.json'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            test_pred.i_create_all_resources_to_model(self,
                                                      data=example[0],
                                                      output=example[2])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_check_create_model(self)
            evaluation.i_create_proportional_to_evaluate(self, test=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_pred.i_check_create_evaluation(self)
            evaluation.then_the_evaluation_file_is_like(self, example[3])