Exemplo n.º 1
0
    def test_scenario2(self):
        """
            Scenario: Successfully updating a dataset with attributes in a JSON file
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I update the dataset using the specs in JSON file "<new_fields>"
                Then I check that property "<property>" for field id "<field_id>" is "<value>" of type "<type>"

                Examples:
                |data |output_dir  |new_fields | property | field_id | value | type
                |../data/iris.csv | ./scenario_d_2 |../data/attributes.json| preferred | 000001 | false | boolean
                |../data/iris.csv | ./scenario_d_2_b |../data/attributes_col.json| preferred | 000001 | false | boolean
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_2', 'data/attributes.json', 'preferred', '000001', 'false', 'boolean'],
            ['data/iris.csv', 'scenario_d_2_b', 'data/attributes_col.json', 'preferred', '000001', 'false', 'boolean']
        ]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_update_dataset_new_properties(self, json_file=example[2])
            dataset_adv.i_check_dataset_has_property(self, attribute=example[3], field_id=example[4], value=example[5], type=example[6])
Exemplo n.º 2
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a new dataset from an existing one and analyzing it
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                And I create BigML nodes analysis from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                |data |output_dir  |new_fields | field | model_fields| min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value |
                |../data/iris.csv | ./scenario_a_10 |../data/new_fields.json| outlier? |petal length,outlier?,species| 3         | 14        | 2         |2     | precision  | 9                | 94.71%         |
        """
        print self.test_scenario8.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_10', 'data/new_fields2.json', u'outlier?', u'outlier?,species', '3', '14', '2', '2', 'precision', '5', '98.21%']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            test_pred.i_create_nodes_analysis(self, min_nodes=example[5], max_nodes=example[6], nodes_step=example[7], k_fold=example[8], metric=example[9])
            test_pred.i_check_create_kfold_datasets(self, example[8])
            test_pred.i_check_create_kfold_models(self, example[8])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[8])
            test_pred.i_check_node_threshold(self, example[10], example[9], example[11])
Exemplo n.º 3
0
    def test_scenario2(self):
        """
            Scenario: Successfully building a new sample from a dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                Then I create a new sample from the dataset and get the sample using options "<sample_options>" storing logs in "<output_dir>"
                And I check that the sample has been created
                And the sample JSON is like the one in "<sample_JSON_file>"

                Examples:
                |data |output_dir  |sample_options | sample_JSON
                |../data/iris.csv | ./scenario_smp_4 | --stat-field "petal length"| ./check_files/stat_info.json
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris.csv', 'scenario_smp_4', '--stat-field "petal length"',
            'check_files/stat_info.json'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset.i_create_dataset(self,
                                     data=example[0],
                                     output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_sample.i_create_sample(self,
                                        options=example[2],
                                        output_dir=example[1])
            test_sample.i_check_create_sample(self)
            test_sample.i_check_sample_json(self, check_sample_file=example[3])
Exemplo n.º 4
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a new sample from a dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                Then I create a new sample from the dataset and get the sample using options "<sample_options>" storing logs in "<output_dir>"
                And I check that the sample has been created
                And the sample file is like "<sample_CSV>"

                Examples:
                |data |output_dir  |sample_options | sample_CSV
                |../data/iris.csv | ./scenario_smp_1 | --occurrence --sample-header --row-index | ./check_files/sample_iris.csv
                |../data/iris.csv | ./scenario_smp_2 | --precision 0 --rows 10 --row-offset 10 --unique | ./check_files/sample_iris2.csv
                |../data/iris.csv | ./scenario_smp_3 | --row-order-by="-petal length" --row-fields "petal length,petal width" --mode linear | ./check_files/sample_iris3.csv
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_smp_1', '--occurrence --sample-header --row-index', 'check_files/sample_iris.csv'],
            ['data/iris.csv', 'scenario_smp_2', '--precision 0 --rows 10 --row-offset 10 --unique', 'check_files/sample_iris2.csv'],
            ['data/iris.csv', 'scenario_smp_3', '--row-order-by="-petal length" --row-fields "petal length,petal width" --mode linear', 'check_files/sample_iris3.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_sample.i_create_sample(self, options=example[2], output_dir=example[1])
            test_sample.i_check_create_sample(self)
            test_sample.i_check_sample_file(self, check_sample_file=example[3])
Exemplo n.º 5
0
    def test_scenario2(self):
        """
            Scenario: Successfully building a new sample from a dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                Then I create a new sample from the dataset and get the sample using options "<sample_options>" storing logs in "<output_dir>"
                And I check that the sample has been created
                And the sample JSON is like the one in "<sample_JSON_file>"

                Examples:
                |data |output_dir  |sample_options | sample_JSON
                |../data/iris.csv | ./scenario_smp_4 | --stat-field "petal length"| ./check_files/stat_info.json
        """
        print self.test_scenario2.__doc__
        examples = [
            ['data/iris.csv', 'scenario_smp_4', '--stat-field "petal length"', 'check_files/stat_info.json']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_sample.i_create_sample(self, options=example[2], output_dir=example[1])
            test_sample.i_check_create_sample(self)
            test_sample.i_check_sample_json(self, check_sample_file=example[3])
Exemplo n.º 6
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions from dataset specifying objective field and model fields
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>"
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |data    | output_dir               | test                    | output                         |predictions_file                        | objective | fields   |
                | ../data/iris_2fb.csv| ./scénario1 | ../data/test_iris2fb.csv   | ./scénario1/predictions.csv   | ./check_files/predictions_iris_2fb.csv   | spécies     | "pétal width" |
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris_2fb.csv', u'scénario1', 'data/test_iris2fb.csv', u'scénario1/predictions.csv', 'check_files/predictions_iris_2fb.csv', u'spécies', u'"pétal width"']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_create_resources_from_dataset_objective_model(self, objective=example[5], fields=example[6], test=example[2], output=example[3])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemplo n.º 7
0
    def test_scenario7(self):
        """
            Scenario: Successfully importing fields summary to a dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I import fields attributes in file "<summary_file>" to dataset
                Then the field "<field_id>" has "<attribute>" equal to "<attribute_value>"

                Examples:
                |data |output_dir | summary_file | field_id | attribute | attribute_value
                |../data/iris.csv | ./scenario_d_7 | fields_summary_modified.csv |  000000 | name | sepal_length
        """
        print self.test_scenario7.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_7',
            'data/fields_summary_modified.csv', '000000', 'name',
            'sepal_length'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_import_fields(self, summary=example[2])
            dataset_adv.field_attribute_value(self,
                                              field=example[3],
                                              attribute=example[4],
                                              attribute_value=example[5])
Exemplo n.º 8
0
    def test_scenario5(self):
        """
            Scenario: Successfully building a filtered dataset from a dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML filtered dataset with filter "<filter_exp>" from previous dataset and store logs in "<output_dir>"
                And I check that the dataset has been created
                And the number of records in the dataset is <filtered_records>

                Examples:
                |data |output_dir | filtered_records | filter_exp
                |../data/iris.csv | ./scenario_d_5 | 50 | (= (f "000004") "Iris-setosa")
        """
        print self.test_scenario5.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_5', '50',
            '(= (f "000004") "Iris-setosa")'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_filtered_dataset_from_dataset(
                self, filter_exp=example[3], output_dir=example[1])
            test_pred.i_check_create_dataset(self, suffix='gen ')
            test_anomaly.i_check_dataset_lines_number(self, example[2])
Exemplo n.º 9
0
    def test_scenario8(self):
        """
            Scenario: Successfully building a new dataset from an existing one and analyzing it
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                And I create BigML nodes analysis from <min_nodes> to <max_nodes> by <nodes_step> with <kfold>-cross-validation improving "<metric>"
                And I check that the <kfold>-datasets have been created
                And I check that the <kfold>-models have been created
                And I check that all the <kfold>-fold cross-validations have been created
                Then the best node threshold is "<node_threshold>", with "<metric>" of <metric_value>

                Examples:
                |data |output_dir  |new_fields | field | model_fields| min_nodes | max_nodes | nodes_step | kfold | metric   | node_threshold   | metric_value |
                |../data/iris.csv | ./scenario_a_10 |../data/new_fields.json| outlier? |petal length,outlier?,species| 3         | 14        | 2         |2     | precision  | 9                | 94.71%         |
        """
        print self.test_scenario8.__doc__
        examples = [
            ['data/iris.csv', 'scenario_a_10', 'data/new_fields2.json', u'outlier?', u'outlier?,species', '3', '14', '2', '2', 'precision', '5', '98.21%']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            test_pred.i_create_nodes_analysis(self, min_nodes=example[5], max_nodes=example[6], nodes_step=example[7], k_fold=example[8], metric=example[9])
            test_pred.i_check_create_kfold_datasets(self, example[8])
            test_pred.i_check_create_kfold_models(self, example[8])
            test_pred.i_check_create_all_kfold_cross_validations(self, example[8])
            test_pred.i_check_node_threshold(self, example[10], example[9], example[11])
Exemplo n.º 10
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a new dataset from an existing one
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                Then I check that the new dataset has field "<field>"

                Examples:
                |data |output_dir  |new_fields | field | model_fields
                |../data/iris.csv | ./scenario_d_1 |../data/new_fields.json| outlier? |petal length,outlier?,species
        """
        print self.test_scenario1.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_1', 'data/new_fields.json', u'outlier?', u'petal length,outlier?,species']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self, json_file=example[2], model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            dataset_adv.i_check_dataset_has_field(self, example[3])
Exemplo n.º 11
0
    def test_scenario4(self):
        """
            Scenario: Successfully building a multi-dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML dataset from previous source and store logs in "<output_dir>"
                And I check that the dataset has been created
                And I create a multi-dataset from the datasets file and store logs in "<output_dir2>"
                And I check that the multi-dataset has been created
                Then I check that the multi-dataset's origin are the datasets in "<output_dir>"

                Examples:
                |data |output_dir  |output_dir2 |
                |../data/iris.csv | ./scenario_d_4 | ./scenario_d_4a|
        """
        print self.test_scenario4.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_4', 'scenario_d_4a']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_from_source(self, output_dir=example[1])
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_multi_dataset(self, example[2])
            dataset_adv.i_check_create_multi_dataset(self)
            dataset_adv.i_check_multi_dataset_origin(self, output_dir=example[1])
Exemplo n.º 12
0
    def test_scenario4(self):
        """
            Scenario: Successfully building a multi-dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML dataset from previous source and store logs in "<output_dir>"
                And I check that the dataset has been created
                And I create a multi-dataset from the datasets file and store logs in "<output_dir2>"
                And I check that the multi-dataset has been created
                Then I check that the multi-dataset's origin are the datasets in "<output_dir>"

                Examples:
                |data |output_dir  |output_dir2 |
                |../data/iris.csv | ./scenario_d_4 | ./scenario_d_4a|
        """
        print self.test_scenario4.__doc__
        examples = [['data/iris.csv', 'scenario_d_4', 'scenario_d_4a']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_from_source(self,
                                                     output_dir=example[1])
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_multi_dataset(self, example[2])
            dataset_adv.i_check_create_multi_dataset(self)
            dataset_adv.i_check_multi_dataset_origin(self,
                                                     output_dir=example[1])
Exemplo n.º 13
0
    def test_scenario11(self):
        """
            Scenario: Successfully building association from a sampled dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML association with params "<params>" from dataset in "<output_dir>"
                And I check that the association has been created
                And the association params are "<params_json>"

                Examples:
                |data |output_dir | params | params_json
                |../data/iris.csv | ./scenario_d_11 | "--sample-rate 0.2 --replacement" | {"sample-rate": 0.2, "replacement": true}
        """
        print self.test_scenario11.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_11',
             '--sample-rate 0.2 --replacement',
             '{"sample_rate": 0.2, "replacement": true}']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_association_with_params_from_dataset( \
                self, params=example[2], output_dir=example[1])
            test_pred.i_check_create_association(self)
            dataset_adv.i_check_association_params(self, params_json=example[3])
Exemplo n.º 14
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a new dataset from an existing one
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new BigML dataset using the specs in JSON file "<new_fields>" and a model with "<model_fields>"
                And I check that the new dataset has been created
                And I check that the model has been created
                Then I check that the new dataset has field "<field>"

                Examples:
                |data |output_dir  |new_fields | field | model_fields
                |../data/iris.csv | ./scenario_d_1 |../data/new_fields.json| outlier? |petal length,outlier?,species
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_1', 'data/new_fields.json',
            u'outlier?', u'petal length,outlier?,species'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_dataset_new_fields(self,
                                                    json_file=example[2],
                                                    model_fields=example[4])
            test_pred.i_check_create_new_dataset(self)
            test_pred.i_check_create_model(self)
            dataset_adv.i_check_dataset_has_field(self, example[3])
Exemplo n.º 15
0
    def test_scenario5(self):
        """
            Scenario: Successfully building a filtered dataset from a dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML filtered dataset with filter "<filter_exp>" from previous dataset and store logs in "<output_dir>"
                And I check that the dataset has been created
                And the number of records in the dataset is <filtered_records>

                Examples:
                |data |output_dir | filtered_records | filter_exp
                |../data/iris.csv | ./scenario_d_5 | 50 | (= (f "000004") "Iris-setosa")
        """
        print self.test_scenario5.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_5', '50', '(= (f "000004") "Iris-setosa")']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_filtered_dataset_from_dataset(self, filter_exp=example[3], output_dir=example[1])
            test_pred.i_check_create_dataset(self, suffix='gen ')
            test_anomaly.i_check_dataset_lines_number(self, example[2])
Exemplo n.º 16
0
    def test_scenario11(self):
        """
            Scenario: Successfully building association from a sampled dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML association with params "<params>" from dataset in "<output_dir>"
                And I check that the association has been created
                And the association params are "<params_json>"

                Examples:
                |data |output_dir | params | params_json
                |../data/iris.csv | ./scenario_d_11 | "--sample-rate 0.2 --replacement" | {"sample-rate": 0.2, "replacement": true}
        """
        print self.test_scenario11.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_11',
            '--sample-rate 0.2 --replacement',
            '{"sample_rate": 0.2, "replacement": true}'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_create_association_with_params_from_dataset( \
                self, params=example[2], output_dir=example[1])
            test_pred.i_check_create_association(self)
            dataset_adv.i_check_association_params(self,
                                                   params_json=example[3])
Exemplo n.º 17
0
    def test_scenario1(self):
        """
            Scenario: Successfully building a new sample from a dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                Then I create a new sample from the dataset and get the sample using options "<sample_options>" storing logs in "<output_dir>"
                And I check that the sample has been created
                And the sample file is like "<sample_CSV>"

                Examples:
                |data |output_dir  |sample_options | sample_CSV
                |../data/iris.csv | ./scenario_smp_1 | --occurrence --sample-header --row-index | ./check_files/sample_iris.csv
                |../data/iris.csv | ./scenario_smp_2 | --precision 0 --rows 10 --row-offset 10 --unique | ./check_files/sample_iris2.csv
                |../data/iris.csv | ./scenario_smp_3 | --row-order-by="-petal length" --row-fields "petal length,petal width" --mode linear | ./check_files/sample_iris3.csv
        """
        print self.test_scenario1.__doc__
        examples = [
            [
                'data/iris.csv', 'scenario_smp_1',
                '--occurrence --sample-header --row-index',
                'check_files/sample_iris.csv'
            ],
            [
                'data/iris.csv', 'scenario_smp_2',
                '--precision 0 --rows 10 --row-offset 10 --unique',
                'check_files/sample_iris2.csv'
            ],
            [
                'data/iris.csv', 'scenario_smp_3',
                '--row-order-by="-petal length" --row-fields "petal length,petal width" --mode linear',
                'check_files/sample_iris3.csv'
            ]
        ]
        for example in examples:
            print "\nTesting with:\n", example
            dataset.i_create_dataset(self,
                                     data=example[0],
                                     output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self)
            test_sample.i_create_sample(self,
                                        options=example[2],
                                        output_dir=example[1])
            test_sample.i_check_create_sample(self)
            test_sample.i_check_sample_file(self, check_sample_file=example[3])
Exemplo n.º 18
0
    def test_scenario13(self):
        """
            Scenario: Successfully building dataset using sql transformations
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new dataset joining both datasets and store
logs in "<output_dir>"
                And I check that the dataset has been created
                And I check that datasets have been joined

                Examples:
                |data |output_dir |
                |../data/iris.csv | ./scenario_d_13 |
        """
        print self.test_scenario12.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_13', "select A.*,B.* from A join B "
            "on A.\`000000\` = \`B.000000\`", 900
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            datasets = []
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            datasets.append(world.dataset)
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            datasets.append(world.dataset)
            dataset_adv.i_create_join(self,
                                      output_dir=example[1],
                                      sql=example[2])
            test_pred.i_check_create_dataset(self, suffix="gen ")
            dataset_adv.i_check_joined(self, example[3])
Exemplo n.º 19
0
    def test_scenario3(self):
        """
            Scenario: Successfully exporting a dataset to a CSV file
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I export the dataset to the CSV file "<csv_file>"
                Then file "<csv_file>" is like file "<data>"

                Examples:
                |data |output_dir  |csv_file |
                |../data/iris.csv | ./scenario_d_3 |dataset.csv
        """
        print self.test_scenario3.__doc__
        examples = [["data/iris.csv", "scenario_d_3", "dataset.csv"]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_export_the_dataset(self, example[2])
            dataset_adv.i_files_equal(self, example[2], example[0])
Exemplo n.º 20
0
    def test_scenario13(self):
        """
            Scenario: Successfully building dataset using sql transformations
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new dataset joining both datasets and store
logs in "<output_dir>"
                And I check that the dataset has been created
                And I check that datasets have been joined

                Examples:
                |data |output_dir |
                |../data/iris.csv | ./scenario_d_13 |
        """
        print self.test_scenario12.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_13', "select A.*,B.* from A join B "
             "on A.\`000000\` = \`B.000000\`", 900]]
        for example in examples:
            print "\nTesting with:\n", example
            datasets = []
            dataset_adv.i_create_dataset(self, data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            datasets.append(world.dataset)
            dataset_adv.i_create_dataset(self, data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            datasets.append(world.dataset)
            dataset_adv.i_create_join(self, output_dir=example[1],
                                      sql=example[2])
            test_pred.i_check_create_dataset(self, suffix="gen ")
            dataset_adv.i_check_joined(self, example[3])
Exemplo n.º 21
0
    def test_scenario1(self):
        """
            Scenario: Successfully building test predictions from dataset specifying objective field and model fields
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create BigML resources using dataset, objective field <objective> and model fields <fields> to test "<test>" and log predictions in "<output>"
                And I check that the model has been created
                And I check that the predictions are ready
                Then the local prediction file is like "<predictions_file>"

                Examples:
                |data    | output_dir               | test                    | output                         |predictions_file                        | objective | fields   |
                | ../data/iris_2fb.csv| ./scénario1 | ../data/test_iris2fb.csv   | ./scénario1/predictions.csv   | ./check_files/predictions_iris_2fb.csv   | spécies     | "pétal width" |
        """
        print self.test_scenario1.__doc__
        examples = [[
            'data/iris_2fb.csv', u'scénario1', 'data/test_iris2fb.csv',
            u'scénario1/predictions.csv',
            'check_files/predictions_iris_2fb.csv', u'spécies',
            u'"pétal width"'
        ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            test_pred.i_create_resources_from_dataset_objective_model(
                self,
                objective=example[5],
                fields=example[6],
                test=example[2],
                output=example[3])
            test_pred.i_check_create_model(self)
            test_pred.i_check_create_predictions(self)
            test_pred.i_check_predictions(self, example[4])
Exemplo n.º 22
0
    def test_scenario12(self):
        """
            Scenario: Successfully building dataset juxtaposing datasets
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new dataset juxtaposing both datasets and store
logs in "<output_dir>"
                And I check that the dataset has been created
                And I check that datasets have been juxtaposed

                Examples:
                |data |output_dir |
                |../data/iris.csv | ./scenario_d_12 |
        """
        print self.test_scenario12.__doc__
        examples = [['data/iris.csv', 'scenario_d_12']]
        for example in examples:
            print "\nTesting with:\n", example
            datasets = []
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            datasets.append(world.dataset)
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            datasets.append(world.dataset)
            dataset_adv.i_create_juxtaposed(self, output_dir=example[1])
            test_pred.i_check_create_dataset(self, suffix="gen ")
            dataset_adv.i_check_juxtaposed(self, datasets)
Exemplo n.º 23
0
    def test_scenario2(self):
        """
            Scenario: Successfully updating a dataset with attributes in a JSON file
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I update the dataset using the specs in JSON file "<new_fields>"
                Then I check that property "<property>" for field id "<field_id>" is "<value>" of type "<type>"

                Examples:
                |data |output_dir  |new_fields | property | field_id | value | type
                |../data/iris.csv | ./scenario_d_2 |../data/attributes.json| preferred | 000001 | false | boolean
                |../data/iris.csv | ./scenario_d_2_b |../data/attributes_col.json| preferred | 000001 | false | boolean
        """
        print self.test_scenario2.__doc__
        examples = [[
            'data/iris.csv', 'scenario_d_2', 'data/attributes.json',
            'preferred', '000001', 'false', 'boolean'
        ],
                    [
                        'data/iris.csv', 'scenario_d_2_b',
                        'data/attributes_col.json', 'preferred', '000001',
                        'false', 'boolean'
                    ]]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self,
                                         data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_update_dataset_new_properties(self,
                                                        json_file=example[2])
            dataset_adv.i_check_dataset_has_property(self,
                                                     attribute=example[3],
                                                     field_id=example[4],
                                                     value=example[5],
                                                     type=example[6])
Exemplo n.º 24
0
    def test_scenario7(self):
        """
            Scenario: Successfully importing fields summary to a dataset
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I import fields attributes in file "<summary_file>" to dataset
                Then the field "<field_id>" has "<attribute>" equal to "<attribute_value>"

                Examples:
                |data |output_dir | summary_file | field_id | attribute | attribute_value
                |../data/iris.csv | ./scenario_d_7 | fields_summary_modified.csv |  000000 | name | sepal_length
        """
        print self.test_scenario7.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_7', 'data/fields_summary_modified.csv', '000000', 'name', 'sepal_length']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_import_fields(self, summary=example[2])
            dataset_adv.field_attribute_value(self, field=example[3], attribute=example[4], attribute_value=example[5])
Exemplo n.º 25
0
    def test_scenario3(self):
        """
            Scenario: Successfully exporting a dataset to a CSV file
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I export the dataset to the CSV file "<csv_file>"
                Then file "<csv_file>" is like file "<data>"

                Examples:
                |data |output_dir  |csv_file |
                |../data/iris.csv | ./scenario_d_3 |dataset.csv
        """
        print self.test_scenario3.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_3', 'dataset.csv']]
        for example in examples:
            print "\nTesting with:\n", example
            dataset_adv.i_create_dataset(self, data=example[0], output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            dataset_adv.i_export_the_dataset(self, example[2])
            dataset_adv.i_files_equal(self, example[2], example[0])
Exemplo n.º 26
0
    def test_scenario12(self):
        """
            Scenario: Successfully building dataset juxtaposing datasets
                Given I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a BigML dataset from "<data>" and store logs in "<output_dir>"
                And I check that the source has been created
                And I check that the dataset has been created
                And I create a new dataset juxtaposing both datasets and store
logs in "<output_dir>"
                And I check that the dataset has been created
                And I check that datasets have been juxtaposed

                Examples:
                |data |output_dir |
                |../data/iris.csv | ./scenario_d_12 |
        """
        print self.test_scenario12.__doc__
        examples = [
            ['data/iris.csv', 'scenario_d_12']]
        for example in examples:
            print "\nTesting with:\n", example
            datasets = []
            dataset_adv.i_create_dataset(self, data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            datasets.append(world.dataset)
            dataset_adv.i_create_dataset(self, data=example[0],
                                         output_dir=example[1])
            test_pred.i_check_create_source(self)
            test_pred.i_check_create_dataset(self, suffix=None)
            datasets.append(world.dataset)
            dataset_adv.i_create_juxtaposed(self, output_dir=example[1])
            test_pred.i_check_create_dataset(self, suffix="gen ")
            dataset_adv.i_check_juxtaposed(self, datasets)