def setActionForIndex( self, new_action, index ): idx = self.indexes[index] rec = self.records[idx] angle, throttle = Tub.get_angle_throttle(rec) old_action = [angle, throttle] if not np.array_equal( old_action, new_action ): if (rec["user/angle"] != new_action[0]) or (rec["user/throttle"] != new_action[1]): # Save the original values if not already done if "orig/angle" not in rec: rec["orig/angle"] = rec["user/angle"] if "orig/throttle" not in rec: rec["orig/throttle"] = rec["user/throttle"] rec["user/angle"] = new_action[0] rec["user/throttle"] = new_action[1] self.edit_list.add(idx) self.setDirty()
def sequence_train(cfg, tub_names, model_name, transfer_model, model_type, continuous, aug): ''' use the specified data in tub_names to train an artifical neural network saves the output trained model as model_name trains models which take sequence of images ''' assert (not continuous) print("sequence of images training") kl = dk.utils.get_model_by_type(model_type=model_type, cfg=cfg) tubs = gather_tubs(cfg, tub_names) verbose = cfg.VEBOSE_TRAIN records = [] for tub in tubs: record_paths = glob.glob(os.path.join(tub.path, 'record_*.json')) print("Tub:", tub.path, "has", len(record_paths), 'records') record_paths.sort(key=get_record_index) records += record_paths print('collating records') gen_records = {} for record_path in records: with open(record_path, 'r') as fp: json_data = json.load(fp) basepath = os.path.dirname(record_path) image_filename = json_data["cam/image_array"] image_path = os.path.join(basepath, image_filename) sample = { 'record_path': record_path, "image_path": image_path, "json_data": json_data } sample["tub_path"] = basepath sample["index"] = get_image_index(image_filename) angle, throttle = Tub.get_angle_throttle(json_data) sample['target_output'] = np.array([angle, throttle]) sample['angle'] = angle sample['throttle'] = throttle sample['img_data'] = None key = make_key(sample) gen_records[key] = sample print('collating sequences') sequences = [] target_len = cfg.SEQUENCE_LENGTH look_ahead = False if model_type == "look_ahead": target_len = cfg.SEQUENCE_LENGTH * 2 look_ahead = True for k, sample in gen_records.items(): seq = [] for i in range(target_len): key = make_next_key(sample, i) if key in gen_records: seq.append(gen_records[key]) else: continue if len(seq) != target_len: continue sequences.append(seq) print("collated", len(sequences), "sequences of length", target_len) #shuffle and split the data train_data, val_data = train_test_split(sequences, test_size=(1 - cfg.TRAIN_TEST_SPLIT)) def generator(data, opt, batch_size=cfg.BATCH_SIZE): num_records = len(data) while True: #shuffle again for good measure random.shuffle(data) for offset in range(0, num_records, batch_size): batch_data = data[offset:offset + batch_size] if len(batch_data) != batch_size: break b_inputs_img = [] b_vec_in = [] b_labels = [] b_vec_out = [] for seq in batch_data: inputs_img = [] vec_in = [] labels = [] vec_out = [] num_images_target = len(seq) iTargetOutput = -1 if opt['look_ahead']: num_images_target = cfg.SEQUENCE_LENGTH iTargetOutput = cfg.SEQUENCE_LENGTH - 1 for iRec, record in enumerate(seq): #get image data if we don't already have it if len(inputs_img) < num_images_target: if record['img_data'] is None: img_arr = load_scaled_image_arr( record['image_path'], cfg) if img_arr is None: break if aug: img_arr = augment_image(img_arr) if cfg.CACHE_IMAGES: record['img_data'] = img_arr else: img_arr = record['img_data'] inputs_img.append(img_arr) if iRec >= iTargetOutput: vec_out.append(record['angle']) vec_out.append(record['throttle']) else: vec_in.append(0.0) #record['angle']) vec_in.append(0.0) #record['throttle']) label_vec = seq[iTargetOutput]['target_output'] if look_ahead: label_vec = np.array(vec_out) labels.append(label_vec) b_inputs_img.append(inputs_img) b_vec_in.append(vec_in) b_labels.append(labels) if look_ahead: X = [np.array(b_inputs_img).reshape(batch_size,\ cfg.TARGET_H, cfg.TARGET_W, cfg.SEQUENCE_LENGTH)] X.append(np.array(b_vec_in)) y = np.array(b_labels).reshape( batch_size, (cfg.SEQUENCE_LENGTH + 1) * 2) else: X = [np.array(b_inputs_img).reshape(batch_size,\ cfg.SEQUENCE_LENGTH, cfg.TARGET_H, cfg.TARGET_W, cfg.TARGET_D)] y = np.array(b_labels).reshape(batch_size, 2) yield X, y opt = {'look_ahead': look_ahead, 'cfg': cfg} train_gen = generator(train_data, opt) val_gen = generator(val_data, opt) model_path = os.path.expanduser(model_name) total_records = len(sequences) total_train = len(train_data) total_val = len(val_data) print('train: %d, validation: %d' % (total_train, total_val)) steps_per_epoch = total_train // cfg.BATCH_SIZE val_steps = total_val // cfg.BATCH_SIZE print('steps_per_epoch', steps_per_epoch) if steps_per_epoch < 2: raise Exception( "Too little data to train. Please record more records.") cfg.model_type = model_type go_train(kl, cfg, train_gen, val_gen, gen_records, model_name, steps_per_epoch, val_steps, continuous, verbose) '''
def collate_records(records, gen_records, opts): ''' open all the .json records from records list passed in, read their contents, add them to a list of gen_records, passed in. use the opts dict to specify config choices ''' new_records = {} for record_path in records: basepath = os.path.dirname(record_path) index = get_record_index(record_path) sample = {'tub_path': basepath, "index": index} key = make_key(sample) if key in gen_records: continue try: with open(record_path, 'r') as fp: json_data = json.load(fp) except: continue image_filename = json_data["cam/image_array"] image_path = os.path.join(basepath, image_filename) sample['record_path'] = record_path sample["image_path"] = image_path sample["json_data"] = json_data angle, throttle = Tub.get_angle_throttle(json_data) if opts['categorical']: angle = dk.utils.linear_bin(angle) throttle = dk.utils.linear_bin( throttle, N=20, offset=0, R=opts['cfg'].MODEL_CATEGORICAL_MAX_THROTTLE_RANGE) sample['angle'] = angle sample['throttle'] = throttle try: accl_x = float(json_data['imu/acl_x']) accl_y = float(json_data['imu/acl_y']) accl_z = float(json_data['imu/acl_z']) gyro_x = float(json_data['imu/gyr_x']) gyro_y = float(json_data['imu/gyr_y']) gyro_z = float(json_data['imu/gyr_z']) sample['imu_array'] = np.array( [accl_x, accl_y, accl_z, gyro_x, gyro_y, gyro_z]) except: pass try: behavior_arr = np.array(json_data['behavior/one_hot_state_array']) sample["behavior_arr"] = behavior_arr except: pass sample['img_data'] = None # Initialise 'train' to False sample['train'] = False # We need to maintain the correct train - validate ratio across the dataset, even if continous training # so don't add this sample to the main records list (gen_records) yet. new_records[key] = sample # new_records now contains all our NEW samples # - set a random selection to be the training samples based on the ratio in CFG file shufKeys = list(new_records.keys()) random.shuffle(shufKeys) trainCount = 0 # Ratio of samples to use as training data, the remaining are used for evaluation targetTrainCount = int(opts['cfg'].TRAIN_TEST_SPLIT * len(shufKeys)) for key in shufKeys: new_records[key]['train'] = True trainCount += 1 if trainCount >= targetTrainCount: break # Finally add all the new records to the existing list gen_records.update(new_records)
def vae_generator(cfg, data, batch_size, isTrainSet=True, min_records_to_train=1000, aug=False, aux=None, pilot=False): num_records = len(data) while True: batch_data = [] keys = list(data.keys()) keys = shuffle(keys) for key in keys: if not key in data: continue _record = data[key] if _record['train'] != isTrainSet: continue batch_data.append(_record) if len(batch_data) == batch_size: inputs_img = [] aux_out = [] steering = [] throttle = [] for record in batch_data: img_arr = None #get image data if we don't already have it if record['img_data'] is None: img_arr = load_scaled_image_arr( record['image_path'], cfg) if img_arr is None: break if aug: img_arr = augment_image(img_arr) if cfg.CACHE_IMAGES: record['img_data'] = img_arr else: img_arr = record['img_data'] if img_arr is None: continue inputs_img.append(img_arr) if aux is not None: if aux in record['json_data']: aux_out.append(record['json_data'][aux]) else: print("Missing aux data in: {}".format(record)) continue st, th = Tub.get_angle_throttle(record['json_data']) steering.append(st) throttle.append(th) X = np.array(inputs_img).reshape(batch_size, cfg.IMAGE_H, cfg.IMAGE_W, cfg.IMAGE_DEPTH) y = {'main_output': X} if pilot: y['steering_output'] = np.array(steering) y['throttle_output'] = np.array(throttle) if aux is not None: aux_out = keras.utils.to_categorical(aux_out, num_classes=7) y['aux_output'] = aux_out yield X, y batch_data = []
def describe_tub( tub, stats=False, time_of_day=False, meta=[], img=False ): """ TODO: This should be generalized to return only user-requested meta data. TODO: Add a check for image sizes """ # Handle differences between v1 and v2 Tubs # TODO: Add __len__ and base_path and manifest and version and a read_only flag to Tub v1 if hasattr(tub, "get_num_records") and callable(tub.get_num_records): count = tub.get_num_records() version = "v1" else: count = len(tub) version = "v2" if hasattr(tub,"meta"): tub_meta = tub.meta else: tub_meta = tub.manifest.metadata if hasattr(tub,"path"): base_path = tub.path else: base_path = tub.base_path base_path = os.path.basename(base_path) loc = tub_meta.get("location", "NA") task = tub_meta.get("task", "NA") driver = tub_meta.get("driver", "NA") tod = tub_meta.get("start", None) throttle = tub_meta.get("JOYSTICK_MAX_THROTTLE", "NA") steering = tub_meta.get("JOYSTICK_STEERING_SCALE", "NA") if stats: recs = tub.gather_records() thr = [] reward = [] for rec in recs: with open(rec, 'r') as fp: json_data = json.load(fp) angle, one_thr = Tub.get_angle_throttle(json_data) thr.append( one_thr ) if 'sim/reward' in json_data: reward.append( json_data['sim/reward'] ) elif 'sim/info' in json_data: reward.append( json_data['sim/info']['reward'] ) thr_m = np.mean(thr) thr_v = np.std(thr) st = "\t{:0.3}/{:0.3}".format( float(thr_m), float(thr_v) ) if len(reward) > 0: st = "\t{:0.3}/{:0.3}/{:0.3}/{:0.3}".format( float(np.min(reward)), float(np.mean(reward)), float(np.std(reward)), float(np.max(reward)) ) else: st = "\tNA" else: st = "" if time_of_day: if tod is not None: tod = int(tod) tod = datetime.fromtimestamp(tod).strftime('\t%H:%M') else: tod = "\t" else: tod = "" meta_st = "" for key in meta: if key in tub.inputs: meta_st += "\tInput" elif key in tub_meta: meta_st += "\t{}".format( tub_meta[key] ) else: meta_st += "\tNo" img_st = "" if img: data = tub.get_record(1) img_array = data['cam/image_array'] img_st = "\t{}".format( img_array.shape ) print( "{}\t{}\t{}\t{}\t{}\t{}\t{}\t{}{}{}{}{}".format( base_path, version, count, loc, task, driver, throttle, steering, st, tod, meta_st, img_st ) ) return count
def actionForIndex( self, index ): idx = self.indexes[index] rec = self.records[idx] angle, throttle = Tub.get_angle_throttle(rec) return [angle, throttle]