Пример #1
0
 def extract_dataset(option, file_path):
     data_dir = ''
     info_file = ''
     if option == 'option1':
         try:
             find_image_files_folder_per_class(data_dir)
         except AssertionError:
             return False
     elif option == 'option2':
         try:
             find_image_files_from_file(data_dir, info_file)
         except AssertionError:
             return False
     elif option == 'option3':
         pass
     return True
Пример #2
0
    def test_upload(self, request):
        test_file = request.files['input_file']
        filename = test_file.filename

        dataset_test_path = os.path.join(
            self._dataset.get_dataset_path().replace('train', 'test'),
            filename.split('.')[0])
        path_file = os.path.join(dataset_test_path, filename)
        os.makedirs(dataset_test_path, exist_ok=True)

        test_file.save(path_file)

        try:
            if path_file.endswith('.npz'):
                try:
                    _, test_data = find_dataset_from_numpy(path_file,
                                                           requires_y=False,
                                                           only_test=True)
                    if test_data is None:
                        tree_remove(dataset_test_path)
                        return 'The file contents are not valid.'
                    np.savez(path_file, x=test_data[0], y=test_data[1])
                    open(os.path.join(dataset_test_path, '.option0'),
                         'w')  # NUMPY FILE
                    return 'ok'
                except KeyError:
                    tree_remove(dataset_test_path)
                    return "The file contents are not valid."
            else:
                if not unzip(path_file, dataset_test_path):
                    return "The file contents already exists."

                os.remove(path_file)

                if find_images_test_file(dataset_test_path):
                    open(os.path.join(dataset_test_path, '.option1'),
                         'w')  # ONLY IMAGES
                else:
                    try:
                        f, n, c = find_image_files_folder_per_class(
                            dataset_test_path, require_all=False)
                        assert len(c) == len(self.get_target_labels())
                        open(os.path.join(dataset_test_path, '.option2'),
                             'w')  # FOLDER PER CLASS
                    except AssertionError:
                        try:
                            info_file = [
                                f for f in os.listdir(dataset_test_path)
                                if f.startswith('test.')
                                or f.startswith('labels.')
                            ]
                            assert len(info_file) == 1
                            info_path = os.path.join(dataset_test_path,
                                                     info_file[0])
                            f, n, c = find_image_files_from_file(
                                dataset_test_path,
                                info_path,
                                require_all=False)
                            assert len(c) == len(self.get_target_labels())
                            open(os.path.join(dataset_test_path, '.option3'),
                                 'w')  # LABELS.TXT
                            os.rename(
                                info_path,
                                os.path.join(dataset_test_path, 'labels.txt'))

                        except AssertionError:
                            tree_remove(dataset_test_path)
                            return "The file contents are not valid."
        except ValueError:
            tree_remove(dataset_test_path)
            return "The file contents are not valid."
        return 'ok'
Пример #3
0
def new_image_dataset(USER_ROOT, username, option, file):
    if isinstance(file, str):
        return False
    dataset_name = file.filename.split('.')[0]
    dataset_name, dataset_path = check_dataset_path(USER_ROOT, username, dataset_name)

    open(os.path.join(dataset_path, option_map[option]), 'w')

    dataset_test_path = os.path.join(dataset_path, 'test')
    os.makedirs(dataset_test_path, exist_ok=True)

    train_path = os.path.join(dataset_path, 'train')
    os.makedirs(train_path, exist_ok=True)

    filename = secure_filename(file.filename)
    path_file = os.path.join(dataset_path, filename)
    file.save(path_file)

    if option == 'option3':
        try:
            train_data, test_data = find_dataset_from_numpy(path_file)
            np.savez(os.path.join(train_path, filename), x=train_data[0], y=train_data[1])
            if test_data:
                os.makedirs(os.path.join(dataset_test_path, dataset_name), exist_ok=True)
                np.savez(os.path.join(dataset_test_path, dataset_name, dataset_name + '.npz'), x=test_data[0],
                         y=test_data[1])
                open(os.path.join(dataset_test_path, dataset_name, '.option0'), 'w')  # NUMPY FILE
            return dataset_name
        except Exception as e:
            tree_remove(dataset_path)
            raise e

    if not check_zip_file(path_file):
        tree_remove(dataset_path)
        raise ValueError('Invalid file.')

    unzip(path_file, train_path)
    try:
        if option == 'option1':
            if 'train' in os.listdir(train_path):
                rename(os.path.join(train_path, 'train'), train_path)
            find_image_files_folder_per_class(train_path)
            if 'test' in os.listdir(train_path):
                dataset_test_path = os.path.join(dataset_test_path, dataset_name)
                os.makedirs(dataset_test_path, exist_ok=True)
                rename(os.path.join(train_path, 'test'), dataset_test_path)
                find_image_files_folder_per_class(dataset_test_path, require_all=False)
                open(os.path.join(dataset_test_path, '.option2'), 'w')

        elif option == 'option2':
            info_file = [f for f in os.listdir(train_path) if f.startswith('labels.') or f.startswith('train.')]
            assert len(info_file) == 1
            os.rename(os.path.join(train_path, info_file[0]), os.path.join(train_path, 'labels.txt'))
            find_image_files_from_file(train_path, os.path.join(train_path, 'labels.txt'))

            info_test_file = [f for f in os.listdir(train_path) if f.startswith('test.')]
            if len(info_test_file) == 1:
                find_image_files_from_file(train_path, os.path.join(train_path, info_test_file[0]), require_all=False)

                dataset_test_path = os.path.join(dataset_test_path, dataset_name)
                os.makedirs(dataset_test_path, exist_ok=True)

                args = {}
                if not has_header(os.path.join(train_path, info_test_file[0])):
                    args['header'] = None

                df = pd.read_csv(os.path.join(train_path, info_test_file[0]), sep=None, engine='python', **args)

                filenames = df[df.columns[0]].values
                if not os.path.isfile(filenames[0]):
                    filenames = [os.path.join(train_path, f) for f in filenames]

                for f in filenames:
                    os.rename(f, os.path.join(dataset_test_path, os.path.basename(f)))
                os.rename(os.path.join(train_path, info_test_file[0]), os.path.join(dataset_test_path, 'labels.txt'))
                open(os.path.join(dataset_test_path, '.option3'), 'w')  # NUMPY FILE

    except AssertionError as e:
        tree_remove(dataset_path)
        raise e
    return dataset_name