class DQNAgent: def __init__(self, state_size, action_size): self.state_size = state_size self.action_size = action_size self.replay_buffer = deque(maxlen=2000) self.gamma = 0.95 # discount rate self.epsilon = 1.0 # exploration rate self.epsilon_min = 0.01 self.epsilon_decay = 0.995 self.learning_rate = 0.001 self._build_model() self.target_hard_update_interval = 100 self.num_train_steps = 0 def _build_model(self): self.model, self.target_model = CapsNet(self.state_size, self.action_size, 3) def remember(self, state, action, reward, next_state, done): self.replay_buffer.append((state, action, reward, next_state, done)) def act(self, state): if np.random.rand() <= self.epsilon: return actions[random.randrange(self.action_size)] act_values = self.model.predict(state) return actions[np.argmax(act_values[0])] # returns action def train(self, batch_size): minibatch = random.sample(self.replay_buffer, batch_size) for state, action, reward, next_state, done in minibatch: target = reward if not done: target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0])) target_f = self.target_model.predict(state) target_f[0][action] = target self.model.fit(state, target_f, epochs=1, verbose=0) self.num_train_steps += 1 if self.num_train_steps % self.target_hard_update_interval==0: self.target_model = keras.models.clone_model(self.model) self.target_model.set_weights(self.model.get_weights()) if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay def load(self, name): self.model.load_weights(name) def save(self, name): self.model.save_weights(name) def preprocess(self, image): return image
trX = np.expand_dims(trX[:, :, :, 1], axis=3) vaX = np.expand_dims(vaX[:, :, :, 1], axis=3) teX = np.expand_dims(teX[:, :, :, 1], axis=3) EndTime = time.time() cprint('Takes ' + str(EndTime - StartTime) + 'time to load data', 'magenta') print(str(trX.shape), str(trY.shape), str(vaX.shape), str(vaY.shape)) # Define Model #print(trY) #cprint(str(np.argmax(trY,1)),'red') cprint(str(len(np.unique(np.argmax(trY, 1)))), 'red') cprint(str(len(np.unique(np.argmax(vaY, 1)))), 'red') with tf.device('/cpu:0'): model, eval_model, manipulate_model = CapsNet( input_shape=trX.shape[1:], n_class=len(np.unique(np.argmax(trY, 1))), kernel=args.kernel, primary_channel=args.primary_channel, routings=args.routings) #model = CapsNet_NoDecoder(input_shape=trX.shape[1:], # n_class=len(np.unique(np.argmax(trY, 1))), # kernel=args.kernel, # primary_channel=args.primary_channel, # routings=args.routings) model.summary() multi_model = multi_gpu_model(model, gpus=args.gpus) # Save path and load model if not os.path.exists(save_path): os.mkdir(save_path) if args.keep: # init the model weights with provided one cprint('load weight from:' + save_path, 'yellow') multi_model.load_weights(save_path)
print(args) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # load data if args.dataset == 0: (x_train, y_train), (x_test, y_test) = load_mnist() elif args.dataset == 1: (x_train, y_train), (x_test, y_test) = load_fashion_mnist() elif args.dataset == 2: (x_train, y_train), (x_test, y_test) = load_svhn() # define model model, eval_model, manipulate_model = CapsNet( input_shape=x_train.shape[1:], n_class=len(np.unique(np.argmax(y_train, 1))), routings=args.routings, l1=args.l1) model.summary() flags = [0] * 10 index = [0] * 10 digits = np.where(y_test == 1)[1] for i, num in enumerate(digits): num = int(num) if flags[num]: continue else: flags[num] = 1 index[num] = i if np.all(flags):
def _build_model(self): self.model, self.target_model = CapsNet(self.state_size, self.action_size, 3)
default=0.001, type=float, help="Initial learning rate") parser.add_argument('--gpus', default=2, type=int) args = parser.parse_args() print(args) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # load data (x_train, y_train), (x_test, y_test) = load_mnist() # define model with tf.device('/cpu:0'): model, eval_model, manipulate_model = CapsNet( input_shape=x_train.shape[1:], n_class=len(np.unique(np.argmax(y_train, 1))), routings=args.routings) model.summary() plot_model(model, to_file=args.save_dir + '/model.png', show_shapes=True) # train or test if args.weights is not None: # init the model weights with provided one model.load_weights(args.weights) if not args.testing: # define muti-gpu model multi_model = multi_gpu_model(model, gpus=args.gpus) train(model=multi_model, data=((x_train, y_train), (x_test, y_test)), args=args) model.save_weights(args.save_dir + '/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir)
parser.add_argument('--is_training', default=1, type=int) parser.add_argument('--weights', default=None) parser.add_argument('--lr', default=0.001, type=float) parser.add_argument('--gpus', default=2, type=int) args = parser.parse_args() print(args) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # load data (x_train, y_train), (x_test, y_test) = load_mnist() # define model with tf.device('/cpu:0'): model, eval_model = CapsNet(input_shape=x_train.shape[1:], n_class=len(np.unique(np.argmax(y_train, 1))), num_routing=args.num_routing) model.summary() plot_model(model, to_file=args.save_dir+'/model.png', show_shapes=True) # define muti-gpu model multi_model = multi_gpu_model(model, gpus=args.gpus) # train or test if args.weights is not None: # init the model weights with provided one model.load_weights(args.weights) if args.is_training: train(model=multi_model, data=((x_train, y_train), (x_test, y_test)), args=args) model.save_weights(args.save_dir + '/trained_model.h5') print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) test(model=eval_model, data=(x_test, y_test)) else: # as long as weights are given, will run testing