Python NodeChainFactory.flow_from_yaml示例

编程语言: Python

命名空间/包名称: pySPACE.environments.chains.node_chain

类/类型: NodeChainFactory

方法/功能: flow_from_yaml

hotexamples.com的示例: 8

Python NodeChainFactory.flow_from_yaml - 已找到8个示例。这些是从开源项目中提取的最受好评的pySPACE.environments.chains.node_chain.NodeChainFactory.flow_from_yaml现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

flow_from_yaml(4)

instantiate(2)

replace_parameters_in_node_chain(1)

示例#1

显示文件

文件： node_chain.py 项目： Hansa064/pyspace

    def __init__(
        self,
        node_chain_spec,
        parameter_setting,
        rel_dataset_dir,
        run,
        split,
        storage_format,
        result_dataset_directory,
        store_node_chain=False,
        hide_parameters=[],
    ):

        super(NodeChainProcess, self).__init__()

        self.node_chain_spec = node_chain_spec
        self.parameter_setting = parameter_setting
        self.rel_dataset_dir = rel_dataset_dir
        self.storage = pySPACE.configuration.storage
        self.run = run
        self.storage_format = storage_format
        self.result_dataset_directory = result_dataset_directory
        self.persistency_dir = os.sep.join([result_dataset_directory, "persistency_run%s" % run])
        create_directory(self.persistency_dir)
        self.store_node_chain = store_node_chain
        self.hide_parameters = hide_parameters

        # reduce_log_level for process creation
        try:
            console_log_level = (
                eval(pySPACE.configuration.console_log_level)
                if hasattr(pySPACE.configuration, "console_log_level")
                else logging.WARNING
            )
        except (AttributeError, NameError):
            console_log_level = logging.WARNING
        try:
            file_log_level = (
                eval(pySPACE.configuration.file_log_level)
                if hasattr(pySPACE.configuration, "file_log_level")
                else logging.INFO
            )
        except (AttributeError, NameError):
            file_log_level = logging.INFO

        self.min_log_level = min(console_log_level, file_log_level)
        pySPACE.configuration.min_log_level = self.min_log_level
        # Replace parameters in spec file
        #        self.node_chain_spec = replace_parameters_and_convert(
        #            self.node_chain_spec, self.parameter_setting)
        self.node_chain_spec = replace_parameters2(self.node_chain_spec, self.parameter_setting)
        # Create node chain
        self.node_chain = NodeChainFactory.flow_from_yaml(Flow_Class=BenchmarkNodeChain, flow_spec=self.node_chain_spec)

        for node in self.node_chain:
            node.current_split = split
        # Remove pseudo parameter "__PREPARE_OPERATION__"
        if "__PREPARE_OPERATION__" in self.parameter_setting:
            self.parameter_setting = copy.deepcopy(self.parameter_setting)
            self.parameter_setting.pop("__PREPARE_OPERATION__")

示例#2

显示文件

文件： test_node_chain_factory.py 项目： neurodebian/pyspace

    def test_dataflow_from_yaml(self):
        simpleYAMLInput ="""
-
    node : Time_Series_Source
-
    node : Detrending
    parameters : 
        detrend_method : "eval(__import__('pylab').detrend_mean)"
- 
    node : Subsampling
    parameters : 
        target_frequency : 100.0
- 
    node : CSP
    parameters : 
         retained_channels : 4
""" 

        flow = NodeChainFactory.flow_from_yaml(NodeChain,
                                          simpleYAMLInput)
        self.assert_(isinstance(flow, NodeChain) and len(flow) == 4)
        self.assert_(isinstance(flow[0], TimeSeriesSourceNode) and
                     isinstance(flow[1], DetrendingNode) and
                     isinstance(flow[2], SubsamplingNode) and
                     isinstance(flow[3], CSPNode))
        self.assert_(flow[1].detrend_method == pylab.detrend_mean)
        self.assert_(flow[2].target_frequency == 100.0)
        self.assert_(flow[3].retained_channels == 4)

示例#3

显示文件

    def test_dataflow_from_yaml(self):
        simpleYAMLInput = """
-
    node : Time_Series_Source
-
    node : Detrending
    parameters : 
        detrend_method : "eval(__import__('pylab').detrend_mean)"
- 
    node : Subsampling
    parameters : 
        target_frequency : 100.0
- 
    node : CSP
    parameters : 
         retained_channels : 4
"""

        flow = NodeChainFactory.flow_from_yaml(NodeChain, simpleYAMLInput)
        self.assert_(isinstance(flow, NodeChain) and len(flow) == 4)
        self.assert_(
            isinstance(flow[0], TimeSeriesSourceNode)
            and isinstance(flow[1], DetrendingNode)
            and isinstance(flow[2], SubsamplingNode)
            and isinstance(flow[3], CSPNode))
        self.assert_(flow[1].detrend_method == pylab.detrend_mean)
        self.assert_(flow[2].target_frequency == 100.0)
        self.assert_(flow[3].retained_channels == 4)

示例#4

显示文件

    def __init__(self,
                 node_chain_spec,
                 parameter_setting,
                 rel_dataset_dir,
                 run,
                 split,
                 storage_format,
                 result_dataset_directory,
                 store_node_chain=False,
                 hide_parameters=[]):

        super(NodeChainProcess, self).__init__()

        self.node_chain_spec = node_chain_spec
        self.parameter_setting = parameter_setting
        self.rel_dataset_dir = rel_dataset_dir
        self.storage = pySPACE.configuration.storage
        self.run = run
        self.storage_format = storage_format
        self.result_dataset_directory = result_dataset_directory
        self.persistency_dir = os.sep.join(
            [result_dataset_directory,
             "persistency_run%s" % run])
        create_directory(self.persistency_dir)
        self.store_node_chain = store_node_chain
        self.hide_parameters = hide_parameters

        # reduce_log_level for process creation
        try:
            console_log_level = eval(pySPACE.configuration.console_log_level) \
                if hasattr(pySPACE.configuration, "console_log_level") \
                else logging.WARNING
        except (AttributeError, NameError):
            console_log_level = logging.WARNING
        try:
            file_log_level = eval(pySPACE.configuration.file_log_level) \
                if hasattr(pySPACE.configuration, "file_log_level") \
                else logging.INFO
        except (AttributeError, NameError):
            file_log_level = logging.INFO

        self.min_log_level = min(console_log_level, file_log_level)
        pySPACE.configuration.min_log_level = self.min_log_level
        # Replace parameters in spec file
        #        self.node_chain_spec = replace_parameters_and_convert(
        #            self.node_chain_spec, self.parameter_setting)
        self.node_chain_spec = replace_parameters2(self.node_chain_spec,
                                                   self.parameter_setting)
        # Create node chain
        self.node_chain = NodeChainFactory.flow_from_yaml(
            Flow_Class=BenchmarkNodeChain, flow_spec=self.node_chain_spec)

        for node in self.node_chain:
            node.current_split = split
        # Remove pseudo parameter "__PREPARE_OPERATION__"
        if "__PREPARE_OPERATION__" in self.parameter_setting:
            self.parameter_setting = copy.deepcopy(self.parameter_setting)
            self.parameter_setting.pop("__PREPARE_OPERATION__")

示例#5

显示文件

    def prepare_adaptation(self, adaptation_files, datasets):
        """ Prepares the threshold adaptation.
        """

        online_logger.info("Preparing Adaptation")
        online_logger.info("adaptation files:" + str(adaptation_files))
        for key in self.datasets.keys():
            if "threshold_adaptation_flow" in self.datasets[key]:
                spec_base = self.datasets[key]["configuration"].spec_dir
                self.datasets[key]["threshold_adaptation_flow"] = os.path.join(
                    spec_base, self.datasets[key]["threshold_adaptation_flow"])
                online_logger.info(
                    "windower_spec_path:" +
                    self.datasets[key]["windower_spec_threshold_adaptation"])
                online_logger.info(
                    "dataflow_spec_" + key + ":" +
                    self.datasets[key]["threshold_adaptation_flow"])
                self.adaptation_active_potential[key] = multiprocessing.Value(
                    'b', False)

        # start the eeg server
        # check if multiple datasets are given for adaptation
        if hasattr(adaptation_files, '__iter__'):
            self.adaptation_data = adaptation_files
            online_logger.debug("Using multiple data sets:" +
                                str(self.adaptation_data))
        else:
            self.adaptation_data = [adaptation_files]

        # Adaptation is done in separate threads, we send the time series
        # windows to these threads via two queues
        online_logger.info("Initializing Queues")
        for key in self.datasets.keys():
            self.queue[key] = multiprocessing.Queue()
        online_logger.info("Creating flows")

        def flow_generator(key):
            """create a generator to yield all the windows"""
            # Yield all windows until a None item is found in the queue
            while True:
                window = self.queue[key].get(block=True, timeout=None)
                if window == None: break
                yield window

        # Create the actual data flows for S1 vs P3 discrimination
        # and S1 vs LRP discrimination
        for key in self.datasets.keys():
            if "threshold_adaptation_flow" in self.datasets[key]:
                self.aBRI_flow[key] = NodeChainFactory.flow_from_yaml(
                    Flow_Class=NodeChain,
                    flow_spec=file(
                        self.datasets[key]["threshold_adaptation_flow"]))
                self.aBRI_flow[key][0].set_generator(flow_generator(key))

        online_logger.info("threshold adaptation preparations finished")
        return 0

示例#6

显示文件

文件： adaptation.py 项目： Crespo911/pyspace

    def prepare_adaptation(self, adaptation_files, datasets, nullmarker_stride_ms = None):
        """ Prepares the threshold adaptation.
        """

        online_logger.info( "Preparing Adaptation")
        online_logger.info( "adaptation files:" + str(adaptation_files))
        
        self.nullmarker_stride_ms = nullmarker_stride_ms
        if self.nullmarker_stride_ms == None:
            online_logger.warn( 'Nullmarker stride interval is %s. You can specify it in your parameter file.' % self.nullmarker_stride_ms)
        else:
            online_logger.info( 'Nullmarker stride interval is set to %s ms' % self.nullmarker_stride_ms)
        
        for key in self.datasets.keys():
            if "threshold_adaptation_flow" in self.datasets[key]:
                spec_base = self.datasets[key]["configuration"].spec_dir
                self.datasets[key]["threshold_adaptation_flow"] = os.path.join(spec_base, self.datasets[key]["threshold_adaptation_flow"])
                online_logger.info( "windower_spec_path:" + self.datasets[key]["windower_spec_threshold_adaptation"])
                online_logger.info( "dataflow_spec_" + key + ":" + self.datasets[key]["threshold_adaptation_flow"])
                self.adaptation_active_potential[key] = multiprocessing.Value('b',False)

        # start the eeg server
        # check if multiple datasets are given for adaptation
        if hasattr(adaptation_files,'__iter__'):
            self.adaptation_data = adaptation_files
            online_logger.debug("Using multiple data sets:" + str(self.adaptation_data))
        else:
            self.adaptation_data = [adaptation_files]


        # Adaptation is done in separate threads, we send the time series
        # windows to these threads via two queues
        online_logger.info( "Initializing Queues")
        for key in self.datasets.keys():
            self.queue[key] = multiprocessing.Queue()
        online_logger.info( "Creating flows")

        def flow_generator(key):
            """create a generator to yield all the windows"""
            # Yield all windows until a None item is found in the queue
            while True:
                window = self.queue[key].get(block = True, timeout = None)
                if window == None: break
                yield window

        # Create the actual data flows for S1 vs P3 discrimination
        # and S1 vs LRP discrimination
        for key in self.datasets.keys():
            if "threshold_adaptation_flow" in self.datasets[key]:
                self.aBRI_flow[key] = NodeChainFactory.flow_from_yaml(Flow_Class = NodeChain,
                                                         flow_spec = file(self.datasets[key]["threshold_adaptation_flow"]))
                self.aBRI_flow[key][0].set_generator(flow_generator(key))

        online_logger.info( "threshold adaptation preparations finished")
        return 0

示例#7

显示文件

文件： trainer.py 项目： jhuebotter/pyspace

    def prepare_training(self,
                         training_files,
                         potentials,
                         operation,
                         nullmarker_stride_ms=None):
        """ Prepares pyspace live for training.

        Prepares everything for training of pyspace live,
        i.e. creates flows based on the dataflow specs
        and configures them.
        """
        online_logger.info("Preparing Training")
        self.potentials = potentials
        self.operation = operation
        self.nullmarker_stride_ms = nullmarker_stride_ms
        if self.nullmarker_stride_ms == None:
            online_logger.warn(
                'Nullmarker stride interval is %s. You can specify it in your parameter file.'
                % self.nullmarker_stride_ms)
        else:
            online_logger.info('Nullmarker stride interval is set to %s ms ' %
                               self.nullmarker_stride_ms)

        online_logger.info("Creating flows..")
        for key in self.potentials.keys():
            spec_base = self.potentials[key]["configuration"].spec_dir
            if self.operation == "train":
                self.potentials[key]["node_chain"] = os.path.join(
                    spec_base, self.potentials[key]["node_chain"])
                online_logger.info("node_chain_spec:" +
                                   self.potentials[key]["node_chain"])

            elif self.operation in ("prewindowing", "prewindowing_offline"):
                self.potentials[key]["prewindowing_flow"] = os.path.join(
                    spec_base, self.potentials[key]["prewindowing_flow"])
                online_logger.info("prewindowing_dataflow_spec: " +
                                   self.potentials[key]["prewindowing_flow"])

            elif self.operation == "prewindowed_train":
                self.potentials[key]["postprocess_flow"] = os.path.join(
                    spec_base, self.potentials[key]["postprocess_flow"])
                online_logger.info("postprocessing_dataflow_spec: " +
                                   self.potentials[key]["postprocess_flow"])

            self.training_active_potential[key] = multiprocessing.Value(
                "b", False)

        online_logger.info("Path variables set for NodeChains")

        # check if multiple potentials are given for training
        if isinstance(training_files, list):
            self.training_data = training_files
        else:
            self.training_data = [training_files]

        # Training is done in separate processes, we send the time series
        # windows to these threads via two queues
        online_logger.info("Initializing Queues")
        for key in self.potentials.keys():
            self.queue[key] = multiprocessing.Queue()

        def flow_generator(key):
            """create a generator to yield all the abri flow windows"""
            # Yield all windows until a None item is found in the queue
            while True:
                window = self.queue[key].get(block=True, timeout=None)
                if window == None: break
                yield window

        # Create the actual data flows
        for key in self.potentials.keys():

            if self.operation == "train":
                self.node_chains[key] = NodeChainFactory.flow_from_yaml(
                    Flow_Class=NodeChain,
                    flow_spec=file(self.potentials[key]["node_chain"]))
                self.node_chains[key][0].set_generator(flow_generator(key))
                flow = open(self.potentials[key]["node_chain"])
            elif self.operation in ("prewindowing", "prewindowing_offline"):
                online_logger.info("loading prewindowing flow..")
                online_logger.info(
                    "file: " + str(self.potentials[key]["prewindowing_flow"]))

                self.node_chains[key] = NodeChainFactory.flow_from_yaml(
                    Flow_Class=NodeChain,
                    flow_spec=file(self.potentials[key]["prewindowing_flow"]))
                self.node_chains[key][0].set_generator(flow_generator(key))
                flow = open(self.potentials[key]["prewindowing_flow"])
            elif self.operation == "prewindowed_train":
                self.node_chains[key] = NodeChainFactory.flow_from_yaml(
                    Flow_Class=NodeChain,
                    flow_spec=file(self.potentials[key]["postprocess_flow"]))
                replace_start_and_end_markers = False

                final_collection = TimeSeriesDataset()
                final_collection_path = os.path.join(
                    self.prewindowed_data_directory, key, "all_train_data")
                # delete previous training collection
                if os.path.exists(final_collection_path):
                    online_logger.info(
                        "deleting old training data collection for " + key)
                    shutil.rmtree(final_collection_path)

                # load all prewindowed collections and
                # append data to the final collection
                prewindowed_sets = \
                    glob.glob(os.path.join(self.prewindowed_data_directory, key, "*"))
                if len(prewindowed_sets) == 0:
                    online_logger.error(
                        "Couldn't find data, please do prewindowing first!")
                    raise Exception
                online_logger.info("concatenating prewindowed data from " +
                                   str(prewindowed_sets))

                for s, d in enumerate(prewindowed_sets):
                    collection = BaseDataset.load(d)
                    data = collection.get_data(0, 0, "train")
                    for d, (sample, label) in enumerate(data):
                        if replace_start_and_end_markers:
                            # in case we concatenate multiple 'Window' labeled
                            # sets we have to remove every start- and endmarker
                            for k in sample.marker_name.keys():
                                # find '{S,s}  8' or '{S,s}  9'
                                m = re.match("^s\s{0,2}[8,9]{1}$", k,
                                             re.IGNORECASE)
                                if m is not None:
                                    online_logger.info(
                                        str("remove %s from %d %d" %
                                            (m.group(), s, d)))
                                    del (sample.marker_name[m.group()])

                            if s == len(prewindowed_sets)-1 and \
                                d == len(data)-1:
                                # insert endmarker
                                sample.marker_name["S  9"] = [0.0]
                                online_logger.info("added endmarker" + str(s) +
                                                   " " + str(d))

                            if s == 0 and d == 0:
                                # insert startmarker
                                sample.marker_name["S  8"] = [0.0]
                                online_logger.info("added startmarker" +
                                                   str(s) + " " + str(d))

                        final_collection.add_sample(sample, label, True)

                # save final collection (just for debugging)
                os.mkdir(final_collection_path)
                final_collection.store(final_collection_path)

                online_logger.info("stored final collection at " +
                                   final_collection_path)

                # load final collection again for training
                online_logger.info("loading data from " +
                                   final_collection_path)
                self.prewindowed_data[key] = BaseDataset.load(
                    final_collection_path)
                self.node_chains[key][0].set_input_dataset(
                    self.prewindowed_data[key])

                flow = open(self.potentials[key]["postprocess_flow"])

            # create window_stream for every potential

            if self.operation in ("prewindowing"):
                window_spec_file = os.path.join(
                    spec_base, "node_chains", "windower",
                    self.potentials[key]["windower_spec_path_train"])

                self.window_stream[key] = \
                        self.stream_manager.request_window_stream(window_spec_file,
                                                              nullmarker_stride_ms = self.nullmarker_stride_ms)
            elif self.operation in ("prewindowing_offline"):
                pass
            elif self.operation in ("train"):
                pass

            self.node_chain_definitions[key] = yaml.load(flow)
            flow.close()

        # TODO: check if the prewindowing flow is still needed when using the stream mode!
        if self.operation in ("train"):
            online_logger.info("Removing old flows...")
            try:
                shutil.rmtree(self.flow_storage)
            except:
                online_logger.info("Could not delete flow storage directory")
            os.mkdir(self.flow_storage)
        elif self.operation in ("prewindowing", "prewindowing_offline"):
            # follow this policy:
            # - delete prewindowed data older than 12 hours
            # - always delete trained/stored flows
            now = datetime.datetime.now()
            then = now - datetime.timedelta(hours=12)

            if not os.path.exists(self.prewindowed_data_directory):
                os.mkdir(self.prewindowed_data_directory)
            if not os.path.exists(self.flow_storage):
                os.mkdir(self.flow_storage)

            for key in self.potentials.keys():
                found = self.find_files_older_than(then, \
                        os.path.join(self.prewindowed_data_directory, key))
                if found is not None:
                    for f in found:
                        online_logger.info(
                            str("recursively deleting files in \'%s\'" % f))
                        try:
                            shutil.rmtree(os.path.abspath(f))
                        except Exception as e:
                            # TODO: find a smart solution for this!
                            pass  # dir was probably already deleted..

                if os.path.exists(
                        os.path.join(self.prewindowed_data_directory, key,
                                     "all_train_data")):
                    shutil.rmtree(
                        os.path.join(self.prewindowed_data_directory, key,
                                     "all_train_data"))
                    online_logger.info(
                        "deleted concatenated training data for " + key)

        online_logger.info("Training preparations finished")
        return 0

示例#8

显示文件

文件： trainer.py 项目： AlexanderFabisch/pyspace

    def prepare_training(self, training_files, potentials, operation):
        """ Prepares pyspace live for training.

        Prepares everything for training of pyspace live,
        i.e. creates flows based on the dataflow specs
        and configures them.
        """
        online_logger.info( "Preparing Training")
        self.potentials = potentials
        self.operation = operation

        online_logger.info( "Creating flows..")
        for key in self.potentials.keys():
            spec_base = self.potentials[key]["configuration"].spec_dir
            if self.operation == "train":
                self.potentials[key]["node_chain"] = os.path.join(spec_base, self.potentials[key]["node_chain"])
                online_logger.info( "node_chain_spec:" + self.potentials[key]["node_chain"])

            elif self.operation in ("prewindowing", "prewindowing_offline"):
                if self.potentials[key].has_key("stream") and self.potentials[key]["stream"] == True:
                    self.potentials[key]["prewindowing_flow"] = os.path.join(spec_base, self.potentials[key]["stream_prewindowing_flow"])
                else:
                    self.potentials[key]["prewindowing_flow"] = os.path.join(spec_base, self.potentials[key]["prewindowing_flow"])
                online_logger.info( "prewindowing_dataflow_spec: " + self.potentials[key]["prewindowing_flow"])

            elif self.operation == "prewindowed_train":
                if self.potentials[key].has_key("stream") and self.potentials[key]["stream"] == True:
                    self.potentials[key]["postprocess_flow"] = os.path.join(spec_base, self.potentials[key]["stream_postprocess_flow"])
                else:
                    self.potentials[key]["postprocess_flow"] = os.path.join(spec_base, self.potentials[key]["postprocess_flow"])
                online_logger.info( "postprocessing_dataflow_spec: " + self.potentials[key]["postprocess_flow"])

            self.training_active_potential[key] = multiprocessing.Value("b",False)

        online_logger.info("Path variables set for NodeChains")

        # check if multiple potentials are given for training
        if isinstance(training_files, list):
            self.training_data = training_files
        else:
            self.training_data = [training_files]

        # Training is done in separate processes, we send the time series
        # windows to these threads via two queues
        online_logger.info( "Initializing Queues")
        for key in self.potentials.keys():
            self.queue[key] = multiprocessing.Queue()


        def flow_generator(key):
            """create a generator to yield all the abri flow windows"""
            # Yield all windows until a None item is found in the queue
            while True:
                window = self.queue[key].get(block = True, timeout = None)
                if window == None: break
                yield window

        # Create the actual data flows
        for key in self.potentials.keys():

            if self.operation == "train":
                self.node_chains[key] = NodeChainFactory.flow_from_yaml(Flow_Class = NodeChain,
                                                         flow_spec = file(self.potentials[key]["node_chain"]))
                self.node_chains[key][0].set_generator(flow_generator(key))
                flow = open(self.potentials[key]["node_chain"])
            elif self.operation in ("prewindowing", "prewindowing_offline"):
                online_logger.info("loading prewindowing flow..")
                online_logger.info("file: " + str(self.potentials[key]["prewindowing_flow"]))

                self.node_chains[key] = NodeChainFactory.flow_from_yaml(Flow_Class = NodeChain,
                                                             flow_spec = file(self.potentials[key]["prewindowing_flow"]))
                self.node_chains[key][0].set_generator(flow_generator(key))
                flow = open(self.potentials[key]["prewindowing_flow"])
            elif self.operation == "prewindowed_train":
                if self.potentials[key].has_key("stream") and self.potentials[key]["stream"] == True:
                    self.node_chains[key] = NodeChainFactory.flow_from_yaml(Flow_Class = NodeChain,
                                                                     flow_spec = file(self.potentials[key]["postprocess_flow"]))
                    # create windower
                    online_logger.info( "Creating Windower")
                    online_logger.info(self.potentials[key]["windower_spec_path_train"])
                    self.node_chains[key][0].set_windower_spec_file(os.path.join(spec_base, "node_chains", "windower", self.potentials[key]["windower_spec_path_train"]))
                    replace_start_and_end_markers = True
                else:
                    self.node_chains[key] = NodeChainFactory.flow_from_yaml(Flow_Class = NodeChain, flow_spec = file(self.potentials[key]["postprocess_flow"]))
                    replace_start_and_end_markers = False

                final_collection = TimeSeriesDataset()
                final_collection_path = os.path.join(self.prewindowed_data_directory, key, "all_train_data")
                # delete previous training collection
                if os.path.exists(final_collection_path):
                    online_logger.info("deleting old training data collection for " + key)
                    shutil.rmtree(final_collection_path)

                # load all prewindowed collections and
                # append data to the final collection
                prewindowed_sets = \
                    glob.glob(os.path.join(self.prewindowed_data_directory, key, "*"))
                if len(prewindowed_sets) == 0:
                    online_logger.error("Couldn't find data, please do prewindowing first!")
                    raise Exception
                online_logger.info("concatenating prewindowed data from " + str(prewindowed_sets))

                for s,d in enumerate(prewindowed_sets):
                    collection = BaseDataset.load(d)
                    data = collection.get_data(0, 0, "train")
                    for d,(sample,label) in enumerate(data):

                        if replace_start_and_end_markers:
                            # in case we concatenate multiple 'Window' labeled
                            # sets we have to remove every start- and endmarker
                            for k in sample.marker_name.keys():
                                # find '{S,s}  8' or '{S,s}  9'
                                m = re.match("^s\s{0,2}[8,9]{1}$", k, re.IGNORECASE)
                                if m is not None:
                                    online_logger.info(str("remove %s from %d %d" % (m.group(), s, d)))
                                    del(sample.marker_name[m.group()])

                            if s == len(prewindowed_sets)-1 and \
                                d == len(data)-1:
                                # insert endmarker
                                sample.marker_name["S  9"] = [0.0]
                                online_logger.info("added endmarker" + str(s) + " " + str(d))

                            if s == 0 and d == 0:
                                # insert startmarker
                                sample.marker_name["S  8"] = [0.0]
                                online_logger.info("added startmarker" + str(s) + " " + str(d))

                        final_collection.add_sample(sample, label, True)

                # save final collection (just for debugging)
                os.mkdir(final_collection_path)
                final_collection.store(final_collection_path)

                online_logger.info("stored final collection at " + final_collection_path)

                # load final collection again for training
                online_logger.info("loading data from " + final_collection_path)
                self.prewindowed_data[key] =  BaseDataset.load(final_collection_path)
                self.node_chains[key][0].set_input_dataset(self.prewindowed_data[key])

                flow = open(self.potentials[key]["postprocess_flow"])

            self.node_chain_definitions[key] = yaml.load(flow)
            flow.close()

        # TODO: check if the prewindowing flow is still needed
        # when using the stream mode!
        if self.operation in ("train"):
            online_logger.info( "Removing old flows...")
            try:
                shutil.rmtree(self.flow_storage)
            except:
                online_logger.info("Could not delete flow storage directory")
            os.mkdir(self.flow_storage)
        elif self.operation in ("prewindowing", "prewindowing_offline"):
            # follow this policy:
            # - delete prewindowed data older than 12 hours
            # - always delete trained/stored flows
            now = datetime.datetime.now()
            then = now - datetime.timedelta(hours=12)

            if not os.path.exists(self.prewindowed_data_directory):
                os.mkdir(self.prewindowed_data_directory)
            if not os.path.exists(self.flow_storage):
                os.mkdir(self.flow_storage)

            for key in self.potentials.keys():
                found = self.find_files_older_than(then, \
                        os.path.join(self.prewindowed_data_directory, key))
                if found is not None:
                    for f in found:
                        online_logger.info(str("recursively deleting files in \'%s\'" % f))
                        try:
                            shutil.rmtree(os.path.abspath(f))
                        except Exception as e:
                            # TODO: find a smart solution for this!
                            pass # dir was probably already deleted..

                if os.path.exists(os.path.join(self.prewindowed_data_directory, key, "all_train_data")):
                    shutil.rmtree(os.path.join(self.prewindowed_data_directory, key, "all_train_data"))
                    online_logger.info("deleted concatenated training data for " + key)


        online_logger.info( "Training preparations finished")
        return 0