Exemplo n.º 1
0
 def search_dataset_by_requirements(requeriments):
     """
     TODO agrojas -> refactor this to query
     Iterates over the files in the datasets directory and verifies wich of those comply with the requested
     requirements for the current model training. The last of the datasets that complies with the requirements
     is then returned by this method.
     :param requeriments: a dictionary with requirements for the dataset to be returned
     :return: the name of the file that complies with the requested requirements.
     """
     logging.info("Search with requirements {}".format(requeriments))
     features = requeriments['features']['list']
     feat_range = requeriments['features']['range']
     target_range = requeriments['target']['range']
     for dataset in Dataset.find_all():
         try:
             logging.info("Dataset {}".format(dataset))
             if set(dataset.features) != set(features):
                 continue
             if dataset.features_max > feat_range[1]:
                 continue
             if dataset.features_min < feat_range[0]:
                 continue
             if dataset.target_max > target_range[1]:
                 continue
             if dataset.target_min < target_range[0]:
                 continue
         except Exception as e:
             logging.error(e)
             continue
         return dataset
     return None
def test_find_by_all_features():
    features_filter = ["feature1", "feature2"]
    result = [
        str(res)
        for res in Dataset.find_one_by_features(features_filter, all=True)
    ]
    expected = [datasets_str[0], datasets_str[3]]
    for i in range(len(result)):
        assert (result[i] == expected[i])
Exemplo n.º 3
0
 def save(cls, file):
     """
     :param file:
     :return:
     """
     logging.info(file)
     file.save('{}/{}'.format(cls.DATASET_DIR, file.filename))
     metadata = DataLoader().get_dataset_metadata(file.filename)
     if not metadata:
         raise InvalidFileException()
     ds = Dataset(metadata.id,
             file.filename,
             metadata.features,
             metadata.features_max,
             metadata.features_min,
             metadata.target_max,
             metadata.target_min)
     ds.save()
     file.close()
Exemplo n.º 4
0
 def get(self):
     return Dataset.find_all()
def test_find_all_datasets():
    result = [str(res) for res in Dataset.find_all()]
    expected = datasets_str
    for i in range(len(result)):
        assert (result[i] == expected[i])
def test_find_dataset2_by_ext_id():
    result = Dataset.find_one_by_external_id(2)
    expected = datasets_str[1]
    assert (str(result) == str(expected))
from data_owner.models.dataset import Dataset
from data_owner.services.data_base import Database

data_base = Database({'DB_ENGINE': 'sqlite:///data_owner.db'})

datasets = [
    Dataset(1, "file1", ["feature1", "feature2"], 1, 50, 2, 10),
    Dataset(2, "file2", ["feature3", "feature4"], 2, 25, 1, 5),
    Dataset(3, "file3", ["feature5", "feature6"], 3, 30, 5, 15),
    Dataset(4, "file4", ["feature1", "feature2"], 1, 50, 2, 10)
]

datasets_str = [str(dataset) for dataset in datasets]
[dataset.save() for dataset in datasets]


def test_find_dataset2_by_ext_id():
    result = Dataset.find_one_by_external_id(2)
    expected = datasets_str[1]
    assert (str(result) == str(expected))


def test_find_by_all_features():
    features_filter = ["feature1", "feature2"]
    result = [
        str(res)
        for res in Dataset.find_one_by_features(features_filter, all=True)
    ]
    expected = [datasets_str[0], datasets_str[3]]
    for i in range(len(result)):
        assert (result[i] == expected[i])