def supervised_lstm(input_shape, action_size, learning_rate=0.01, backbone='resnet', time_distributed=True, multi_gpu=True): img_input = Input(shape=input_shape, dtype='float32') if backbone == 'resnet': x = dcn_resnet(img_input, time_distributed) elif backbone == 'mobilenet': mobilenet = keras.applications.mobilenet_v2.MobileNetV2( include_top=False, weights=None, pooling='max') x = TimeDistributed(mobilenet)(img_input) elif backbone == 'convLSTM': x = ConvLSTM2D(filters=40, kernel_size=(3, 3), padding='same', return_sequences=False)(img_input) x = BatchNormalization()(x) x = ConvLSTM2D(filters=40, kernel_size=(3, 3), padding='same', return_sequences=False)(x) x = BatchNormalization()(x) x = ConvLSTM2D(filters=40, kernel_size=(3, 3), padding='same', return_sequences=False)(x) x = BatchNormalization()(x) x = ConvLSTM2D(filters=40, kernel_size=(3, 3), padding='same', return_sequences=False)(x) x = BatchNormalization()(x) else: x = TimeDistributed( Conv2D(32, kernel_size=8, strides=4, activation='relu'))(img_input) x = TimeDistributed( Conv2D(64, kernel_size=4, strides=2, activation='relu'))(x) x = TimeDistributed( Conv2D(64, kernel_size=3, strides=1, activation='relu'))(x) x = TimeDistributed(Flatten())(x) x = LSTM(512)(x) x = Dropout(rate=0.5)(x) x = Dense(action_size, activation='sigmoid', name='x_train_out')(x) optimizer = Adam(lr=learning_rate) model = Model(inputs=img_input, outputs=x) model.compile(optimizer=optimizer, loss=keras.losses.binary_crossentropy, metrics=['accuracy']) model.summary() return model
sess.run(tf.global_variables_initializer()) # TODO state 작성 sign_ary = [[0., 0.], [0., 1.], [1., 0.], [1., 1.], [0., -1.], [-1., 0.], [-1., -1.], [-1., 1.], [1., -1.]] # for video, data in zip(sorted(os.listdir(video_dir)), dataset['train']): # data --> [45, 100, 7] cap = cv2.VideoCapture(os.path.join(video_dir, video)) width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) view = Viewport(width, height) m = dcn_resnet((width, height, 3)) print("video name : ", video) loss = [] for scan in data: c_idx = 0 idx = 0 cap = cv2.VideoCapture(os.path.join(video_dir, video)) state = (np.zeros([1, 256]), np.zeros([1, 256])) # initial state while True: ret, frame = cap.read() if ret: frame = view.get_view(frame) frame = cv2.resize(frame, (84, 84))
video_dir = 'sample_videos' train_dir = os.path.join(video_dir, '320x160') test_dir = os.path.join(video_dir, '3840x1920') scanpath_h = os.path.join('datasets/Scanpaths_H', 'Scanpaths') dataset = Sal360().read_scanpath_H() for video, data in zip(sorted(os.listdir(test_dir)), dataset['train']): cap = cv2.VideoCapture(os.path.join(test_dir, video)) ret, frame = cap.read() width, height = cap.get(cv2.CAP_PROP_FRAME_WIDTH), cap.get( cv2.CAP_PROP_FRAME_HEIGHT) Viewport = Viewport(width, height) input_shape = (Viewport.width, height, 3) model = dcn_resnet(input_shape) while True: ret, frame = cap.read() if ret: cv2.imshow('video', frame) else: break if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def drqn(input_shape, action_size, learning_rate=0.001, backbone='mobilenet'): img_input = Input(shape=input_shape) adam = Adam(lr=learning_rate) if backbone == 'resnet': x = dcn_resnet(img_input, time_distributed=True) x = TimeDistributed(Flatten())(x) x = LSTM(512, activation='tanh')(x) x = Dropout(0.5)(x) x = Dense(action_size, activation='linear')(x) model = Model(inputs=img_input, outputs=x) model.compile(loss='mse', optimizer=adam) elif backbone == 'mobilenet': mobilenet = keras.applications.mobilenet_v2.MobileNetV2( include_top=False, weights=None, pooling='max') x = TimeDistributed(mobilenet)(img_input) x = TimeDistributed(Flatten())(x) x = LSTM(512, activation='tanh')(x) x = Dropout(0.5)(x) x = Dense(action_size, activation='linear')(x) model = Model(inputs=img_input, outputs=x) model.compile(loss='mse', optimizer=adam) elif backbone == 'cnn': x = TimeDistributed( Conv2D(32, kernel_size=8, strides=4, activation='relu'))(img_input) x = TimeDistributed( Conv2D(64, kernel_size=4, strides=2, activation='relu'))(x) x = TimeDistributed( Conv2D(64, kernel_size=3, strides=1, activation='relu'))(x) x = TimeDistributed(Flatten())(x) x = LSTM(256, activation='tanh')(x) x = Dropout(0.5)(x) x = Dense(1, activation='sigmoid')(x) model = Model(inputs=img_input, outputs=x) model.compile(loss='mse', optimizer=adam) elif backbone == 'convLSTM': print(np.shape(img_input)) # ?, ?, img_w, img_h, channels x = convLSTM(64, 3)(img_input) x = BatchNormalization()(x) x = convLSTM(64, 3)(x) x = BatchNormalization()(x) x = convLSTM(64, 3)(x) x = BatchNormalization()(x) x = convLSTM(64, 3, return_sequences=False)(x) x = BatchNormalization()(x) # x = Conv3D(filters=1, kernel_size=(3, 3, 3), # activation=LeakyReLU(alpha=0.2), # padding='same', data_format='channels_last')(x) x = Flatten()(x) x = Dropout(rate=0.5)(x) x = Dense(action_size, activation='linear')(x) model = Model(inputs=img_input, outputs=x) model.compile(loss='mse', optimizer='adam') elif backbone == '2.5D': base_cnn_model = keras.applications.mobilenet_v2.MobileNetV2( include_top=False, weights=None, pooling='max') temporal = TimeDistributed(base_cnn_model)(img_input) conv3d_analysis1 = Conv3D(filters=40, kernel_size=3, strides=3, padding='same')(temporal) conv3d_analysis2 = Conv3D(filters=40, kernel_size=3, strides=3, padding='same')(conv3d_analysis1) output = Flatten()(conv3d_analysis2) output = Dense(action_size, activation='tanh')(output) model = Model(inputs=img_input, output=output) model.compile(loss=keras.losses.binary_crossentropy, optimizer='adam') else: raise ValueError("invalid value") model.summary() return model
def __init__(self): action_input = Input(shape=[None, 1]) state_input = Input(shape=[None, 224, 224, 3]) value_dcn = dcn_resnet() policy_dcn = dcn_resnet() # 변수 공유? 다른 변수? value_lstm = CuDNNLSTM(256)(value_dcn, state_input) # state-value : expected return policy_lstm = CuDNNLSTM(256)(policy_dcn) # policy : agent's action selection self.value_model = Dense(1, activation='relu')(value_lstm) self.policy_model = Dense(1, activation='relu')(policy_lstm) self.action_max = 2 self.conv1 = slim.conv2d(self.input_image, activation_fn=tf.nn.relu, num_outputs=32, kernel_size=[8, 8], stride=[4, 4], padding='VALID') self.conv2 = slim.conv2d(self.conv1, activation_fn=tf.nn.relu, num_outputs=64, kernel_size=[4, 4], stride=[2, 2], padding='VALID') self.conv3 = slim.convolution2d( inputs=self.conv2, num_outputs=64, kernel_size=[3, 3], stride=[1, 1], padding='VALID', activation_fn=tf.nn.relu) self.conv4 = slim.convolution2d( inputs=self.conv3, num_outputs=256, kernel_size=[7, 7], stride=[1, 1], padding='VALID', activation_fn=tf.nn.relu) hidden = slim.fully_connected(slim.flatten(self.conv4), 256, activation_fn=tf.nn.relu) # temporal dependency lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(256, reuse=tf.AUTO_REUSE) c_init = np.zeros((1, lstm_cell.state_size.c), np.float32) h_init = np.zeros((1, lstm_cell.state_size.h), np.float32) self.state_init = [c_init, h_init] c_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.c]) h_in = tf.placeholder(tf.float32, [1, lstm_cell.state_size.h]) # self.state_in = (c_in, h_in) self.rnn_in = tf.expand_dims(hidden, [0]) # step_size = tf.shape(self.imageIn[:1]) # 84 84 3 self.state_in = tf.nn.rnn_cell.LSTMStateTuple(c_in, h_in) # c --> hidden, h --> output lstm_outputs, self.lstm_state = tf.nn.dynamic_rnn(lstm_cell, self.rnn_in, initial_state=self.state_in, time_major=False, scope="A3C") lstm_c, lstm_h = self.lstm_state self.state_out = (lstm_c[:1, :], lstm_h[:1, :]) rnn_out = tf.reshape(lstm_outputs, [-1, 256]) # self.policy = slim.fully_connected(rnn_out, a_size, # activation_fn=tf.nn.relu, # weights_initializer=normalized_columns_initializer(0.01), # biases_initializer=None) # # hidden1 = tf.layers.dense(rnn_out, 16, activation=tf.nn.relu) # hidden2 = tf.layers.dense(hidden1, 16, activation=tf.nn.relu) # hidden3 = tf.layers.dense(hidden2, 16, activation=tf.nn.relu) self.policy = tf.layers.dense(rnn_out, 9, activation=tf.nn.relu) self.policy = tf.nn.softmax(self.policy) self.value = slim.fully_connected(rnn_out, 1, activation_fn=None, weights_initializer=normalized_columns_initializer(1.0), biases_initializer=None) self.true_val = tf.placeholder(tf.int32, shape=[9]) # self.error = tf.reduce_mean(tf.square(self.true_val - self.policy)) # self.train_op = tf.train.AdamOptimizer(0.001) self.error = tf.nn.softmax_cross_entropy_with_logits(labels=self.true_val, logits=self.policy) # self.error = tf.reduce_mean(tf.square(tf.subtract(self.true_val, self.policy))) self.train_op = tf.train.AdamOptimizer(0.01).minimize(self.error) self.saver = tf.train.Saver()