def empty_app_id_to_list_tuple_dict(app_ids): """ Create an empty dict with { app_id : ([], []) }. *app_ids: The keys to use. None for 'all'. """ if app_ids is None: app_ids = ids_data.get_app_ids() result = {} for app_id in app_ids: result[app_id] = ([], []) return result
def _strip_app_id(app_id): """ Strip the given app_id of its ID. """ # Match indices in the form of _1 match = re.search(r"\_\d+", app_id) # Remove the matched part if match: app_id = app_id[:match.start()] if app_id not in ids_data.get_app_ids(): raise ValueError("Invalid app id given!") return app_id
def reservoir_sample_limit(item_generator, sample_size, limit_to): """ Sample with 'Reservoir Sampling' from the given generator the given number of elements. *limit_to: List of data types to limit to. """ if any([l not in ids_data.get_app_ids() for l in limit_to]): raise ValueError("Given limits are invalid: %s" % limit_to) limited_generator = ( line for line in item_generator # Convert line to LogEntry, check that app_id is in the allowed limits if log_entry_to_app_id(LogEntry.from_log_string(line)) in limit_to) return reservoir_sample(limited_generator, sample_size)
def generate_log_entries(number): """ Generate <number> LogEntry objects. """ result = [] vins = [ chr(random.choice(range(65, 91))) + str(x) for x in random.sample(range(100000, 900000), int(number)) ] colr_gen = lambda: random.randint(0, 255) tsp_gen = lambda: random.randint(0, 499) log_msg_gens = [ (ids_data.get_generators(), lambda: str(float(random.randint(-3, 2)))), (ids_data.get_colours(), lambda: "{},{},{}".format(colr_gen(), colr_gen(), colr_gen())), (ids_data.POSE_CC, lambda: random.choice(["DE", "AT", "CH", "FR"])), (ids_data.POSE_POI, (lambda: random.choice(ids_data.get_poi_types()) + "," + random.choice(ids_data.get_poi_results()))), (ids_data.POSE_TSP, lambda: "{},{},{},{}".format( tsp_gen(), tsp_gen(), tsp_gen(), tsp_gen())) ] for i in range(0, int(number)): vin = vins[i] app_id = random.choice(ids_data.get_app_ids()) level = random.choice(ids_data.get_levels()) gps_position = "{},{}".format(tsp_gen(), tsp_gen()) log_message = None for keys, gen in log_msg_gens: if app_id in keys: log_message = gen() if not log_message: raise ValueError("You suck!") intrusion = random.choice(ids_data.get_labels()) result.append( LogEntry(vin=vin, app_id=app_id, level=level, gps_position=gps_position, log_message=log_message, intrusion=intrusion)) return result
def __init__(self): """ Ctor. """ self.app_ids = ids_data.get_app_ids() ids_tools.verify_md5(self.app_ids, "3a88e92473acb1ad1b56e05a8074c7bd") self.level_mapping = ids_tools.enumerate_to_dict( ids_data.get_levels(), verify_hash="49942f0268aa668e146e533b676f03d0") self.poi_type_mapping = ids_tools.enumerate_to_dict( ids_data.get_poi_types(), verify_hash="f2fba0ed17e382e274f53bbcb142565b") self.poi_result_mapping = ids_tools.enumerate_to_dict( ids_data.get_poi_results(), verify_hash="dd1c18c7188a48a686619fef8007fc64") self.label_int_mapping = ids_tools.enumerate_to_dict( ids_data.get_labels(), verify_hash="88074a13baa6f97fa4801f3b0ec53065") ## Verifier data ## # 1 for a binarised level (only two options) base_len = 1 self._len_key = "len" self._vector_constraints = {} # 1 value (generated) for gen_key in ids_data.get_generators(): self._vector_constraints[gen_key] = {self._len_key: base_len + 1} # 3 values for a split colour, 2 values for the position for colr_key in ids_data.get_colours(): self._vector_constraints[colr_key] = {self._len_key: base_len + 5} # Poses all have GPS for pose_key in ids_data.get_poses(): self._vector_constraints[pose_key] = {self._len_key: base_len + 2} # CC: One of five self._vector_constraints[ids_data.POSE_CC][self._len_key] += 5 # POI: One of 4 types, one of 7 results self._vector_constraints[ids_data.POSE_POI][self._len_key] += 11 # TSP: x, y, targ_x, targ_y self._vector_constraints[ids_data.POSE_TSP][self._len_key] += 4