def get_episodes_between(cls, collection: str, lower_id: str, upper_id: str = None, grasp_success=False, suffix=('ed-v', )): query = {'id': {'$gte': lower_id}} if upper_id: query['id']['$lte'] = upper_id if grasp_success: query['actions.0.reward'] = 1 episodes = Loader.yield_episodes(collection, query=query) return list((d, e['id'], 0, suffix) for d, e in episodes)
from data.loader import Loader data = [] for i, (d, e) in enumerate(Loader.yield_episodes('cube-1')): action = Loader.get_action(d, e['id']) if action.reward == 1 and action.final_pose and action.final_pose.d < 0.007: print(d, e['id'])
import os from agents.agent import Agent from data.loader import Loader os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = str(1) agent = Agent() data = [] for i, (d, e) in enumerate(Loader.yield_episodes('cylinder-cube-mc-1')): action, image = Loader.get_action(d, e['id'], 'ed-v') if not hasattr(action, 'estimated_reward'): continue data.append({ 'id': e['id'], # 'old': action.estimated_reward, 'new': agent.reward_for_action([image], action), 'reward': action.reward }) sorted_data = sorted(data, key=lambda k: -abs(k['reward'] - k['new'])) for i, e in enumerate(sorted_data[:20]): print(i, e)
from pymongo import MongoClient from data.loader import Loader client = MongoClient() for d, e in Loader.yield_episodes('cylinder-1'): method = e['actions'][0]['method'] print(e['id'], method) if method == 'RANDOM': method = 'Random' elif method == 'TOP_5': method = 'Top5' elif method == 'BOTTOM_5': method = 'Bottom5' elif method == 'MAX': method = 'Max' elif method == 'UNCERTAIN': method = 'Uncertain' elif method == 'PROB': method = 'Prob' elif method == 'BAYES': method = 'Bayes' elif method == 'BAYES_TOP': method = 'BayesTop' elif method == 'BAYES_PROB': method = 'BayesProb' elif method == 'NOT_ZERO': method = 'NotZero' elif method == 'RANDOM_INFERENCE':
import argparse import numpy as np from data.loader import Loader def get_mean(episode): _, image = Loader.get_action(episode[0], episode[1]['id'], 'ed-after') if image is None: return {'id': episode[1]['id'], 'mean': 1e6} return {'id': episode[1]['id'], 'mean': np.mean(image.mat)} if __name__ == '__main__': parser = argparse.ArgumentParser(description='Clean the robot learning database.') parser.add_argument('database', type=str, help='database name') parser.add_argument('--N', type=int, default=20, help='number results') args = parser.parse_args() dataset = map(get_mean, Loader.yield_episodes(args.database)) sorted_dataset = sorted(dataset, key=lambda k: k['mean']) for i, result in enumerate(sorted_dataset[:args.N]): print(f'{i + 1}: {result}')
class Placing: def __init__(self, collections, mongo_host='localhost', data_path=None, image_format='png'): self.loader = Loader(mongo_host, data_path=data_path, image_format=image_format) self.model_path = self.loader.get_model_path(f'placing-3-32-part-type-2') # [.h5] train_batch_size = 64 validation_batch_size = 512 self.image_shape = { 'ed': (None, None, 1), 'rd': (None, None, 1), 'rc': (None, None, 3), } self.z_size = 48 self.percent_validation_set = 0.2 number_primitives = 4 if 'screw' in str(self.model_path.stem) else 3 load_model = False use_beta_checkpoint_path = True checkpoint_path = self.model_path if not use_beta_checkpoint_path else self.model_path.with_suffix('.beta' + self.model_path.suffix) episodes = self.loader.yield_episodes( collections, query={'$or': [ # {'actions': {'$size': 1}, 'actions.0.type': 'grasp'}, {'actions': {'$size': 2}, 'actions.0.type': 'grasp', 'actions.1.type': 'place'}, ]}, projection={'_id': 0, 'id': 1, 'actions.pose': 1, 'actions.reward': 1, 'actions.images': 1} ) train_episodes, validation_episodes = self.split_set(episodes) train_set = PlacingDataset(train_episodes, seed=42) train_data = train_set.get_data(shuffle='all') train_data = train_data.shuffle(len(train_episodes) * 6) train_data = train_data.batch(train_batch_size) train_data = train_data.prefetch(tf.data.experimental.AUTOTUNE) validation_data = PlacingDataset(validation_episodes, seed=43).get_data() validation_data = validation_data.cache() validation_data = validation_data.batch(validation_batch_size) validation_data = validation_data.prefetch(tf.data.experimental.AUTOTUNE) self.grasp_model = self.define_grasp_model(number_primitives=number_primitives) self.place_model = self.define_place_model() self.merge_model = self.define_merge_model() image_grasp_before = [ tk.Input(shape=self.image_shape['ed'], name='image_grasp_before') ] image_place_before = [ tk.Input(shape=self.image_shape['ed'], name='image_place_before') ] image_place_goal = [ tk.Input(shape=self.image_shape['ed'], name='image_place_goal') ] reward_m, *z_m = self.grasp_model(image_grasp_before) reward_p, z_p = self.place_model(image_place_before + image_place_goal) reward = self.merge_model([z_m[0], z_p]) losses = Losses() self.combined = tk.Model(inputs=(image_grasp_before + image_place_before + image_place_goal), outputs=[reward_m, reward_p, reward]) self.combined.summary() self.combined.compile( optimizer=tk.optimizers.Adam(learning_rate=1e-4), loss=losses.binary_crossentropy, loss_weights=[1.0, 1.0, 4.0], metrics=[ losses.binary_crossentropy, SplitMeanSquaredError(), SplitBinaryAccuracy(), SplitPrecision(), SplitRecall(), ], ) callbacks = [ tk.callbacks.ModelCheckpoint( str(checkpoint_path), monitor=f'val_loss', verbose=1, save_best_only=True ), tk.callbacks.EarlyStopping(monitor=f'val_loss', patience=60), tk.callbacks.ReduceLROnPlateau(factor=0.2, verbose=1, patience=20, min_lr=5e-7), tf.keras.callbacks.TensorBoard(log_dir=str(self.model_path.parent / 'logs' / f'placing-{time()}')), ] if load_model: self.combined.load_weights(str(self.model_path)) evaluation = self.combined.evaluate(validation_data, batch_size=validation_batch_size, verbose=2) callbacks[0].best = evaluation[self.combined.metrics_names.index('loss')] self.combined.fit( train_data, validation_data=validation_data, epochs=1000, callbacks=callbacks, verbose=2, ) self.combined.load_weights(str(checkpoint_path)) if use_beta_checkpoint_path: self.combined.save(str(self.model_path), save_format='h5') def define_grasp_model(self, number_primitives: int): inputs = [ tk.Input(shape=self.image_shape['ed'], name='image') ] conv_block = conv_block_gen(l2_reg=0.001, dropout_rate=0.35) conv_block_r = conv_block_gen(l2_reg=0.001, dropout_rate=0.5) x = conv_block(inputs[0], 32) x = conv_block(x, 32, strides=(2, 2)) x = conv_block(x, 32) x_r = conv_block_r(x, 48) x_r = conv_block_r(x_r, 48) x_r = conv_block_r(x_r, 64) x_r = conv_block_r(x_r, 64) x_r = conv_block_r(x_r, 64) x_r = conv_block_r(x_r, 48, kernel_size=(2, 2)) x = conv_block(x, 64) x = conv_block(x, 64) x = conv_block(x, 96) x = conv_block(x, 96) x = conv_block(x, 128) x = conv_block(x, 128, kernel_size=(2, 2)) reward = tkl.Conv2D(number_primitives, kernel_size=(1, 1), activation='sigmoid', name='reward_grasp')(x_r) reward_training = tkl.Reshape((number_primitives,))(reward) z_trainings = [] for i in range(1): z = tkl.Conv2D(self.z_size, kernel_size=(1, 1), activity_regularizer=tk.regularizers.l2(0.0005), name=f'z_m{i}')(x) z_training = tkl.Reshape((self.z_size,))(z) z_trainings.append(z_training) outputs = [reward_training] + z_trainings return tk.Model(inputs=inputs, outputs=outputs, name='grasp') def define_place_model(self): inputs = [ tk.Input(shape=self.image_shape['ed'], name='image_before'), tk.Input(shape=self.image_shape['ed'], name='image_goal'), ] conv_block = conv_block_gen(l2_reg=0.001, dropout_rate=0.35) conv_block_r = conv_block_gen(l2_reg=0.001, dropout_rate=0.5) x = tkl.Concatenate()(inputs) x = conv_block(x, 32) x = conv_block(x, 32) x = conv_block(x, 32) x = conv_block(x, 32) x = conv_block(x, 32) x = conv_block(x, 32) x_r = conv_block_r(x, 32) x_r = conv_block_r(x_r, 32) x_r = conv_block_r(x_r, 48) x_r = conv_block_r(x_r, 48) x_r = conv_block_r(x_r, 48) x_r = conv_block_r(x_r, 48) x_r = conv_block_r(x_r, 48) x_r = conv_block_r(x_r, 48) x_r = conv_block_r(x_r, 64) x_r = conv_block_r(x_r, 48, kernel_size=(2, 2)) x = conv_block(x, 48) x = conv_block(x, 48) x = conv_block(x, 64) x = conv_block(x, 64) x = conv_block(x, 64) x = conv_block(x, 64) x = conv_block(x, 96) x = conv_block(x, 96) x = conv_block(x, 128) x = conv_block(x, 128, kernel_size=(2, 2)) reward = tkl.Conv2D(1, kernel_size=(1, 1), activation='sigmoid', name='reward_place')(x_r) reward_training = tkl.Reshape((1,))(reward) z = tkl.Conv2D(self.z_size, kernel_size=(1, 1), activity_regularizer=tk.regularizers.l2(0.0005), name='z_p')(x) z_training = tkl.Reshape((self.z_size,))(z) outputs = [reward_training, z_training] return tk.Model(inputs=inputs, outputs=outputs, name='place') def define_merge_model(self): input_shape = (self.z_size) z_m = tk.Input(shape=input_shape, name='z_m') z_p = tk.Input(shape=input_shape, name='z_p') dense_block = dense_block_gen(l2_reg=0.01, dropout_rate=0.2) x = z_m - z_p x = dense_block(x, 128) x = dense_block(x, 128) x = dense_block(x, 64) reward = tkl.Dense(1, activation='sigmoid', name='reward_merge')(x) return tk.Model(inputs=[z_m, z_p], outputs=[reward], name='merge') @staticmethod def binary_decision(string: str, p: float) -> bool: return float(int(hashlib.sha256(string.encode('utf-8')).hexdigest(), 16) % 2**16) / 2**16 < p def assign_set(self, data): collection, episode = data random_assign = self.binary_decision(episode['id'], self.percent_validation_set) episode['is_validation'] = random_assign # or (collection in []) episode['collection'] = collection return episode def split_set(self, data, verbose=1): episodes = list(map(self.assign_set, data))[-13000:] train_episodes = list(filter(lambda x: not x['is_validation'], episodes)) validation_episodes = list(filter(lambda x: x['is_validation'], episodes)) if verbose > 0: logger.info(f'Train on {len(train_episodes)} episodes.') logger.info(f'Validate on {len(validation_episodes)} episodes.') return train_episodes, validation_episodes
from collections import defaultdict from data.loader import Loader database_list = Loader.get_databases() total_count = sum(Loader.get_episode_count(d) for d in database_list) print(f'Total count {total_count}') recordings_on_date = defaultdict(lambda: 0) image_count = 0 for d in database_list: for _, e in Loader.yield_episodes(d): data = e['id'].split('-') date = f'{data[0]}-{data[1]}' # -{data[2]}' recordings_on_date[date] += 1 image_count += len(e['actions'][0]['images']) print(f'Total image count {image_count}') for m in sorted(recordings_on_date): print(f'{m}: {recordings_on_date[m]}')
def load_data(self, max_number=False, **params) -> Tuple[Any, Any]: params.setdefault('scale_around_zero', False) params.setdefault('size_input', (752, 480)) params.setdefault('size_cropped', (200, 200)) params.setdefault('size_output', (32, 32)) start = time.time() self.image_output_path = self.output_path / f"input-{params['size_output'][0]}" self.image_output_path.mkdir(exist_ok=True, parents=True) self.model_path.mkdir(exist_ok=True, parents=True) mean_reward = 0.0 episodes = [] i = 0 for d, e in Loader.yield_episodes( self.databases + self.validation_databases, projection={ '_id': 0, 'id': 1, 'actions': { '$slice': -1 } }, ): episodes.append({ 'database': d, 'episode': e, **params, }) mean_reward += e['actions'][0]['reward'] i += 1 if max_number and i >= max_number: break episodes = list(map(self.assign_set, episodes)) if not episodes: raise Exception('No episodes could be loaded.') print(f'Loading {len(episodes)} episodes.') train_episodes = filter(lambda x: not x['is_validation'], episodes) validation_episodes = filter(lambda x: x['is_validation'], episodes) def set_loader(): Loader.client = MongoClient( ) # pymongo will output a warning if created after fork thread method def load_data(episodes): features, labels, infos = [], [], [] for element in p.imap_unordered(self.load_element, episodes): features += element[0] labels += element[1] infos += element[2] data_x = [np.array(t) for t in zip(*features)] data_y = [np.array(labels), np.array(infos)] return data_x, data_y p = Pool(8, initializer=set_loader) train_set = load_data(train_episodes) validation_set = load_data(validation_episodes) p.close() print(f'Train set: {len(train_set[0][0])}') print(f'Validation set: {len(validation_set[0][0])}') print(f'Mean reward: {mean_reward / len(episodes):0.3}') end = time.time() print(f'Time [s]: {end-start:0.4}') return train_set, validation_set