def generate_test_data(sensors): events = generate_random_events(sensors, num_test_instances, at_least_one_per_setting=True) data = events_to_dataset(events, name="synthetic", excluded_sensors=[], excluded_actions=[]) return dataset_to_sklearn(data)
def generate_synthetic_dataset(num_sensors, nominal_values_per_sensor, num_instances): """ Generate a random dataset in the format required by scikit-learn, see dataset.dataset_to_sklearn for more details. @param num_sensors: Number of different sensors in the dataset. @param nominal_values_per_sensor: Number of possible settings for each sensor. @param num_instances: Number of data instances in the dataset. @return: A random dataset in scikit-learn format. """ #generate the desired number of sensors, all have the same number of possible settings sensor_settings = set("v%d" % id for id in range(nominal_values_per_sensor)) sensor_name = lambda id: "s%d" % id sensors = {sensor_name(id): sensor_settings for id in range(num_sensors)} events = generate_random_events(sensors, num_instances) dataset = events_to_dataset(events, name="synthetic", excluded_sensors=[], excluded_actions=[]) return dataset_to_sklearn(dataset)