def main(): """ Just runs some example code. """ # setup the flow helper.print_title("Generate dataset") flow = Flow(name="generate dataset") generator = DataGenerator() generator.config["setup"] = datagen.DataGenerator( classname="weka.datagenerators.classifiers.classification.Agrawal") flow.actors.append(generator) console = Console() flow.actors.append(console) # run the flow msg = flow.setup() if msg is None: print("\n" + flow.tree + "\n") msg = flow.execute() if msg is not None: print("Error executing flow:\n" + msg) else: print("Error setting up flow:\n" + msg) flow.wrapup() flow.cleanup()
def fix_config(self, options): """ Fixes the options, if necessary. I.e., it adds all required elements to the dictionary. :param options: the options to fix :type options: dict :return: the (potentially) fixed options :rtype: dict """ opt = "setup" if opt not in options: options[opt] = datagen.DataGenerator( classname= "weka.datagenerators.classifiers.classification.Agrawal") if opt not in self.help: self.help[opt] = "The data generator to use (DataGenerator)." opt = "incremental" if opt not in options: options[opt] = False if opt not in self.help: self.help[ opt] = "Whether to output the data incrementally, in case the generator supports that (bool)." return super(DataGenerator, self).fix_config(options)
def test_batch(self): """ Tests the batch generation. """ generator = datagenerators.DataGenerator( classname="weka.datagenerators.classifiers.classification.Agrawal", options=["-n", "10", "-r", "agrawal"]) generator.dataset_format = generator.define_data_format() self.assertEqual(10, generator.dataset_format.num_attributes, msg="Number of attributes differs") data = generator.generate_examples() self.assertEqual(10, data.num_instances, msg="Number of rows differs")
def test_incremental(self): """ Tests the incremental generation. """ generator = datagenerators.DataGenerator( classname="weka.datagenerators.classifiers.classification.Agrawal", options=["-n", "10", "-r", "agrawal"]) generator.dataset_format = generator.define_data_format() self.assertEqual(10, generator.dataset_format.num_attributes, msg="Number of attributes differs") count = 0 for i in xrange(generator.num_examples_act): count += 1 self.assertEqual(10, count, msg="Number of rows differs")
def from_config(self, k, v): """ Hook method that allows converting values from the dictionary. :param k: the key in the dictionary :type k: str :param v: the value :type v: object :return: the potentially parsed value :rtype: object """ if k == "setup": return from_commandline(v, classname=to_commandline(datagen.DataGenerator())) return super(DataGenerator, self).from_config(k, v)