Пример #1
0
 def set_data(self, paths):
     # audio
     chain = ProcessingChain().add(WavDatareader())
     self.add(
         'data',
         FolderDictSeqAbstract(paths['data'],
                               file_info_save_path=paths['feat'],
                               map_fct=chain))
     # add labels
     self.add('binary_anomaly', self._get_binary_anomaly(paths))
     self.add('group', self['data']['subdb'])
     return self
Пример #2
0
 def set_data(self, paths):
     # audio
     chain = ProcessingChain().add(WavDatareader())
     from dabstract.dataset.helpers import FolderDictSeqAbstract
     #self.add_subdict_from_folder('data', paths['data'], map_fct=chain, save_path=paths['data'])
     tmp = FolderDictSeqAbstract(paths['data'],
                                 map_fct=chain,
                                 save_path=paths['data'])
     self.add('data', tmp)
     # add labels
     self.add('binary_anomaly', self._get_binary_anomaly(paths), lazy=False)
     self.add('group', self['data']['subdb'], lazy=False)
     return self
Пример #3
0
    def set_data(self, paths):
        """Set the data"""

        # audio
        chain = ProcessingChain().add(WavDatareader(select_channel=0))
        from dabstract.dataset.helpers import FolderDictSeqAbstract

        self.add(
            "audio",
            FolderDictSeqAbstract(
                paths["data"],
                map_fct=chain,
                file_info_save_path=os.path.join(paths["feat"],
                                                 self.__class__.__name__,
                                                 "audio", "raw"),
            ),
        )
        # get meta
        if os.path.exists(os.path.join(paths["meta"], "meta_dabstract.txt")):
            labels = pandas.read_csv(os.path.join(paths["meta"],
                                                  "meta_dabstract.txt"),
                                     delimiter="\t",
                                     header=None)
        else:
            labels = pandas.read_csv(os.path.join(paths["meta"], "meta.txt"),
                                     delimiter="\t",
                                     header=None)
            # make sure audio and meta is aligned
            filenames = labels[0].to_list()
            resort = np.array([
                filenames.index("audio/" + filename)
                for filename in self["audio"]["example"]
            ])
            labels = labels.reindex(resort)
            labels.to_csv(os.path.join(paths["meta"], "meta_dabstract.txt"),
                          sep="\t",
                          header=False,
                          index=False)

        # add labels
        self.add("identifier", labels[2].to_list(), lazy=False)
        #self.add("source", [filename for filename in filenames], lazy=False)
        self.add("scene", labels[1].to_list(), lazy=False)
        self.add("scene_id", stringlist2ind(self['scene']), lazy=False)
        self.add("group", stringlist2ind(self['identifier']), lazy=False)
        return self
Пример #4
0
def test_DataAbstract():
    from dabstract.abstract import DataAbstract, DictSeqAbstract, MapAbstract
    from dabstract.dataprocessor.processing_chain import Processor, ProcessingChain

    # check for multi-indexing on a List
    data = ['1', '2', '3', '4']
    DA = DataAbstract(data)

    assert DA[0] == '1'
    assert DA[-1] == '4'
    assert DA[1:3] == ['2', '3']
    assert DA[:] == ['1', '2', '3', '4']

    # check for multiindexing on a DictSeqAbstract
    DSA = DictSeqAbstract().add_dict({
        'test1': ['1', '2', '3'],
        'test2': np.zeros(3)
    })
    DA = DataAbstract(DSA)

    assert DA[0] == {'test1': '1', 'test2': 0.0}
    assert DA[-1] == {'test1': '3', 'test2': 0.0}
    assert DA[0:2] == [{
        'test1': '1',
        'test2': 0.0
    }, {
        'test1': '2',
        'test2': 0.0
    }]
    assert DA[:] == [{
        'test1': '1',
        'test2': 0.0
    }, {
        'test1': '2',
        'test2': 0.0
    }, {
        'test1': '3',
        'test2': 0.0
    }]

    # check for multiindexing and multiprocessing on a DictSeqAbstract
    DA = DataAbstract(DSA, workers=2, buffer_len=2)
    assert DA[0] == {'test1': '1', 'test2': 0.0}
    assert DA[-1] == {'test1': '3', 'test2': 0.0}
    assert DA[0:2] == [{
        'test1': '1',
        'test2': 0.0
    }, {
        'test1': '2',
        'test2': 0.0
    }]
    assert DA[:] == [{
        'test1': '1',
        'test2': 0.0
    }, {
        'test1': '2',
        'test2': 0.0
    }, {
        'test1': '3',
        'test2': 0.0
    }]

    # check output of the Generator
    tmp = []
    for example in DA:
        tmp.append(example)
    assert tmp == [{
        'test1': '1',
        'test2': 0.0
    }, {
        'test1': '2',
        'test2': 0.0
    }, {
        'test1': '3',
        'test2': 0.0
    }]

    # check gets
    data = [1, 2, 3, 4]

    class Something(Processor):
        def process(self, data):
            return data * 2, {'test': 0}

    data_map = MapAbstract(data, ProcessingChain().add(Something()))
    DA = DataAbstract(data_map)
    assert DA[0] == DA.get(0)
    assert DA[-1] == DA.get(-1)
    assert np.all(DA[0:2] == np.array([[2.], [4.]]))
    assert np.all(DA[:] == np.array([[2.], [4.], [6.], [8.]]))
    assert np.all(DA.get([0, 1, 2]) == np.array([[2.], [4.], [6.]]))

    # check return info along with generators
    assert DA.get(0, return_info=True) == (2, {'test': 0, 'output_shape': ()})

    tmp = []
    for example in DA.get(return_generator=True, return_info=True):
        tmp.append(example)
    assert tmp == [(2, {
        'test': 0,
        'output_shape': ()
    }), (4, {
        'test': 0,
        'output_shape': ()
    }), (6, {
        'test': 0,
        'output_shape': ()
    }), (8, {
        'test': 0,
        'output_shape': ()
    })]

    tmp = []
    for example in DA.get([0, 2], return_generator=True, return_info=True):
        tmp.append(example)
    assert tmp == [(2, {
        'test': 0,
        'output_shape': ()
    }), (6, {
        'test': 0,
        'output_shape': ()
    })]
Пример #5
0
def test_Map():
    """Test Map"""
    from dabstract.abstract import Map
    # data init
    data = [1, 2, 3, 4]

    ## Map using lambda function
    # eager mapping lambda function
    map_eager_data_lambda = Map(data, (lambda x: 2 * x), lazy=False)
    # lazy mapping lambda function
    map_lazy_data_lambda = Map(data, (lambda x: 2 * x), lazy=True)
    # checks
    assert map_eager_data_lambda[0] == 2
    assert map_eager_data_lambda[-1] == 8
    assert map_lazy_data_lambda[0] == 2
    assert map_lazy_data_lambda[-1] == 8

    ## Map using defined function
    def some_function(input, multiplier, logarithm=False):
        output = input * multiplier
        if logarithm:
            output = np.log10(output)
        return output

    # eager mapping defined function
    map_eager_data_def = Map(data,
                             some_function,
                             multiplier=2,
                             logarithm=True,
                             lazy=False)
    # lazy mapping defined function
    map_lazy_data_def = Map(data,
                            some_function,
                            multiplier=2,
                            logarithm=True,
                            lazy=True)
    # checks
    assert map_eager_data_def[0] == 0.3010299956639812
    assert map_eager_data_def[-1] == 0.9030899869919435
    assert map_lazy_data_def[0] == 0.3010299956639812
    assert map_lazy_data_def[-1] == 0.9030899869919435

    ## Map using ProcessingChain
    class custom_processor(Processor):
        def process(self, data, **kwargs):
            return data + 1, {'multiplier': 3}

    class custom_processor2(Processor):
        def process(self, data, **kwargs):
            return data * kwargs['multiplier'], {}

    dp = ProcessingChain()
    dp.add(custom_processor)
    dp.add(custom_processor2)

    # eager mapping using processingchain
    map_eager_data_dp = Map(data, map_fct=dp, lazy=False)
    # lazy mapping using processingchain
    map_lazy_data_dp = Map(data, map_fct=dp, lazy=True)
    # checks
    assert map_eager_data_dp[0] == 6
    assert map_eager_data_dp[-1] == 15
    assert map_lazy_data_dp[0] == 6
    assert map_lazy_data_dp[-1] == 15
    assert map_lazy_data_dp.get(-1, return_info=True) == (15, {
        'multiplier': 3,
        'output_shape': ()
    })

    ## Map using lambda function with additional information
    # eager mapping using lambda function and information
    map_eager_data_lambda_info = Map(data, (lambda x: 2 * x),
                                     info=({
                                         'test': 1
                                     }, {
                                         'test': 2
                                     }, {
                                         'test': 'a'
                                     }, {
                                         'test': 'b'
                                     }),
                                     lazy=False)
    # lazy mapping using lambda function and information
    map_lazy_data_lambda_info = Map(data, (lambda x: 2 * x),
                                    info=({
                                        'test': 1
                                    }, {
                                        'test': 2
                                    }, {
                                        'test': 'a'
                                    }, {
                                        'test': 'b'
                                    }),
                                    lazy=True)
    # checks
    assert map_eager_data_lambda_info[0] == 2
    assert map_eager_data_lambda_info[-1] == 8
    assert map_lazy_data_lambda_info.get(0, return_info=True) == (2, {
        'test': 1
    })
    assert map_lazy_data_lambda_info.get(-1, return_info=True) == (8, {
        'test': 'b'
    })