def build_age_gender_model(cls): model = mobilenet.MobileNet(include_top = False, input_shape = (160, 160, 3), weights = "imagenet" , pooling="avg") x = layers.Dense(2)(model.layers[-1].output) x = layers.Lambda(cls.multitask_activation)(x) return Model(model.layers[0].input, x)
def make_frame_predictions(video_arr): #Use mobilenet to generate top three predictions for each frame #Will take a few minutes to run mobilenet_model = mobilenet.MobileNet() video_predictions_1 = [] video_predictions_2 = [] video_predictions_3 = [] for i in range(0, len(video_arr)): img_array = np.expand_dims(video_arr[i], axis=0) pImg = mobilenet.preprocess_input(img_array) prediction = mobilenet_model.predict(pImg) results = imagenet_utils.decode_predictions(prediction) if i == 0: print(results) video_predictions_1.append(results[0][0][1]) #, results[0][0][2])) video_predictions_2.append(results[0][1][1]) #, results[0][1][2])) video_predictions_3.append(results[0][2][1]) #, results[0][2][2])) #Combine into single array of tuples video_predictions = [None] * len(video_predictions_1) for i in range(0, len(video_predictions_1)): video_predictions[i] = [(video_predictions_1[i]), (video_predictions_2[i]), (video_predictions_3[i])] return video_predictions
def mobilenet_retinanet(num_classes, backbone='mobilenet224_1.0', inputs=None, modifier=None, **kwargs): """ Constructs a retinanet model using a mobilenet backbone. Args num_classes: Number of classes to predict. backbone: Which backbone to use (one of ('mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224')). inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). Returns RetinaNet model with a MobileNet backbone. """ alpha = float(backbone.split('_')[1]) # choose default input if inputs is None: inputs = keras.layers.Input((None, None, 3)) backbone = mobilenet.MobileNet(input_tensor=inputs, alpha=alpha, include_top=False, pooling=None, weights=None) # create the full model layer_names = ['conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu'] layer_outputs = [backbone.get_layer(name).output for name in layer_names] backbone = keras.models.Model(inputs=inputs, outputs=layer_outputs, name=backbone.name) # invoke modifier if given if modifier: backbone = modifier(backbone) return RetinaNetModel(inputs=inputs, num_classes=num_classes, backbone_layers=backbone.outputs, **kwargs)
def __init__(self): from tensorflow.keras.applications import mobilenet # define input image pair self.x1 = tf.placeholder(tf.float32, [None, 224, 224, 3]) self.x2 = tf.placeholder(tf.float32, [None, 224, 224, 3]) # define network with tf.variable_scope("siamese") as scope: self.backbone_model = mobilenet.MobileNet() self.bottleneck_model = Model( inputs=self.backbone_model.input, outputs=self.backbone_model.get_layer( 'global_average_pooling2d').output) # for layer in self.bottleneck_model.layers: # layer.trainable = False self.bottleneck_feature_1 = self.bottleneck_model(self.x1) self.bottleneck_feature_2 = self.bottleneck_model(self.x2) self.o1 = self.feature_vector_mapping(self.bottleneck_feature_1) scope.reuse_variables() self.o2 = self.feature_vector_mapping(self.bottleneck_feature_2) # define loss self.y_gt = tf.placeholder(tf.float32, [None]) # 1 or 0 self.loss = self.loss_function()
def create_model(): model = mobilenet.MobileNet(include_top=False, input_shape=(160, 160, 3), weights="imagenet", pooling="avg") x = layers.Dense(2)(model.layers[-1].output) # 活性化関数は入れない x = layers.Lambda(multitask_activation)(x) return Model(model.layers[0].input, x)
def __init__(self, num_classes, scales=[0.1, 0.2, 0.4, 0.8], base_layers=[ 'conv_pw_10_relu', 'conv_pw_11_relu', 'conv_pw_12_relu', 'conv_pw_13_relu' ], aspect_ratios=[0.5, 1.0, 2.0], variances=[0.1, 0.1, 0.2, 0.2], weight_decay=5e-4, **kwargs): super().__init__(**kwargs) self.num_classes = num_classes if len(scales) != len(base_layers): raise Exception( 'You need to provide one scale for each base layer.') self.scales = scales self.base_layers = base_layers self.aspect_ratios = aspect_ratios self.boxes_per_cell = len(aspect_ratios) if len(variances) != 4: raise Exception('You need to provide exactly 4 variance values \ (one for each bounding box parameter).') self.variances = variances backbone = mobilenet.MobileNet(include_top=False, weights='imagenet') self.get_base_features = tf.keras.Model( inputs=backbone.layers[0].input, outputs=[ backbone.get_layer(layer_name).output for layer_name in self.base_layers ]) self.conv_cls = [] self.conv_loc = [] for idx in range(len(self.scales)): self.conv_cls.append( layers.Conv2D( filters=self.boxes_per_cell * self.num_classes, kernel_size=3, padding='same', kernel_regularizer=tf.keras.regularizers.l2(weight_decay), name='conv_cls_{}'.format(idx + 1))) self.conv_loc.append( layers.Conv2D( filters=self.boxes_per_cell * 4, kernel_size=3, padding='same', kernel_regularizer=tf.keras.regularizers.l2(weight_decay), name='conv_loc_{}'.format(idx + 1)))
def createMobileNetwork(): model = mobilenet.MobileNet(weights='imagenet') for layer in model.layers: layer._name = 'mobile_'+layer._name x = Flatten()(model.get_layer('mobile_reshape_2').output) x = Dropout(0.3)(x) x = Dense(256, name='mobile_weights', activation=LeakyReLU(alpha=0.3))(x) x = Dense(4, activation='softmax')(x) model = Model(model.input, x) return model
def mnLSTM(): import tensorflow as tf from tensorflow.keras.model import Model from tensorflow.keras.applications import mobilenet inputs = tf.keras.layers.Input((None,224,224,3)) mobilenet = mobilenet.MobileNet(input_shape=(224,224,3), alpha=0.5, include_top=False, weights='imagenet') net = layers.TimeDistributed(mobilenet, name="mn")(inputs) net = layers.ConvLSTM2D(50,3, return_sequences=True)(net) net = layers.TimeDistributed(layers.Flatten())(net) net = layers.TimeDistributed(layers.Dense(10, activation="softmax"))(net) model = Model(inputs,net) return model
def train_object_model(): import tensorflow as tf import tensorflow.keras.applications.mobilenet as mn base_model = mn.MobileNet(include_top=False, input_shape=(128, 128, 3)) avg = tf.keras.layers.GlobalAveragePooling2D()(base_model.output) output = tf.keras.layers.Dense(len(load_classes()), activation='softmax')(avg) model = tf.keras.Model(inputs=base_model.input, outputs=output) for layer in base_model.layers: layer.trainable = False dst = os.environ['OBJECT_MODEL'] optimizer = tf.keras.optimizers.SGD(lr=0.2, momentum=0.9, decay=0.01) model.compile( optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'], ) early_stopping_cb = tf.keras.callbacks.EarlyStopping( patience=10, monitor='loss', restore_best_weights=True) model.fit(generate_batches(), epochs=10, steps_per_epoch=200, callbacks=[early_stopping_cb]) for layer in base_model.layers: layer.trainable = True optimizer = tf.keras.optimizers.SGD(lr=0.01, momentum=0.9, decay=0.001) model.compile( optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'], ) model.fit(generate_batches(), epochs=20, steps_per_epoch=200, callbacks=[early_stopping_cb]) model.save(dst)
def get_model(model_name): if model_name == "vgg16": return vgg16.VGG16(weights='imagenet' ), vgg16.decode_predictions, vgg16.preprocess_input elif model_name == "vgg19": return vgg19.VGG19(weights='imagenet' ), vgg19.decode_predictions, vgg19.preprocess_input elif model_name == "resnet50": return resnet50.ResNet50( weights='imagenet' ), resnet50.decode_predictions, resnet50.preprocess_input elif model_name == "resnet101": return ResNet101(weights='imagenet' ), resnet.decode_predictions, resnet.preprocess_input elif model_name == "mobilenet": return mobilenet.MobileNet( weights='imagenet' ), mobilenet.decode_predictions, mobilenet.preprocess_input elif model_name == "densenet": return densenet.DenseNet121( weights='imagenet' ), densenet.decode_predictions, densenet.preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras.preprocessing.image import img_to_array, load_img from tensorflow.keras.callbacks import TensorBoard ### ImageNet Large Scale Visual Recognition Challenge (ILSVRC) (1.2 M images, 1000 classes) # Load the VGG model vgg_model = vgg16.VGG16(weights='imagenet') # Load the inception_V3 model inception_model = inception_v3.InceptionV3(weights='imagenet') # Load the ResNet50 model resnet_model = resnet50.ResNet50(weights='imagenet') # Load the MobileNet model mobilenet_model = mobilenet.MobileNet(weights='imagenet') ### Load the image and convert its format to a 4-dimensional Tensor as an input of the form # (batchsize, height, width, channels) requested by the Network. filename = 'C:/Users/Theo/PycharmProjects/BMDATA/TP1/data/train/cat.1.jpg' # Load an image in a PIL format original = load_img(filename, target_size=(224, 224)) numpy_image = img_to_array(original) # We add the extra dimension to the axis 0 image_batch = np.expand_dims(numpy_image, axis=0) print('image batch size', image_batch.shape) plt.imshow(np.uint8(image_batch[0])) ### Now we can convert the image for the model and try and predict it # preparing image
from tensorflow.keras.applications import mobilenet from utils import visualize_layer_filters if __name__ == '__main__': base_path = "../../../data" output_path = "../../../output" images_dir = "%s/pets/images" % base_path # define the model model_100 = mobilenet.MobileNet(input_shape=None, alpha=1.0, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) model_100.summary() ####################### # draw Convolutional Filter ####################### convLayers = [ layer.name for layer in model_100.layers if (layer.__class__.__name__ == 'Conv2D') ] for i in range(len(convLayers)): visualize_layer_filters(model=model_100, layer_name=convLayers[i], epochs=40)
def feature_table_creator(image_bytes): image_size = tuple((224, 224)) image_bytes = cv2.resize(image_bytes, image_size) feature_table = {'mobilenet': None} # feature_table['vgg'] = vgg(image_bytes) feature_table['mobilenet'] = MobileNet(image_bytes) return feature_table if __name__ == "__main__": # vgg_model = tf.keras.applications.VGG16(weights='imagenet') # vgg_extractor = tf.keras.models.Model(inputs=vgg_model.input, outputs=vgg_model.get_layer("fc2").output) mobilenet_extractor = mobilenet.MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) img_dir_path = input('[INPUT] image dir path : ') features = {'img': [], 'mobilenet': [], 'cluster': []} pics_num = os.listdir(img_dir_path) bar = progressbar.ProgressBar(maxval=len(pics_num), \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() for i, img_path in enumerate(pics_num): img_path = img_dir_path + img_path with open(img_path, 'rb') as f: img_bytes = f.read() Image = cv2.imdecode(np.fromstring(img_bytes, np.uint8), cv2.IMREAD_UNCHANGED) Image = Image[:, :, :3] single_feature_table = feature_table_creator(Image)
X_val = np.array(X_val) y_val = np.array(y_val) print('Shapes train') print(X_train.shape) print(y_train.shape) print('Shapes val') print(X_val.shape) print(y_val.shape) return X_train, y_train, X_val, y_val # get the data print('***** Load files...') X_train, y_train, X_val, y_val = create_sets('N0', 'N1') # get the model without the denses base_model = mobilenet.MobileNet(weights='imagenet', include_top='false') new_dense = base_model.output # add the new denses to classify the hate images new_dense = Dense(1024, activation='relu')(new_dense) predictions = Dense(2, activation='softmax')(new_dense) model = Model(inputs=base_model.input, outputs=predictions) # we will only train the new denses for the baseline for layer in base_model.layers: layer.trainable = False model.compile(optimizer='adam', loss='binary_crossentropy', metrics=["accuracy"]) results = model.fit(X_train, y_train, epochs=EPOCHS, batch_size=16,
def set_model(self, model_name, top_n=5): if model_name == 'densenet': self.model = densenet.DenseNet121(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: densenet.decode_predictions(x, top=top_n) self.ref = """ <ul> <li><a href='https://arxiv.org/abs/1608.06993' target='_blank'> Densely Connected Convolutional Networks</a> (CVPR 2017 Best Paper Award)</li> </ul> """ elif model_name == 'inception_resnet_v2': self.model = inception_resnet_v2.InceptionResNetV2( include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (299, 299) self.decoder = lambda x: inception_resnet_v2.decode_predictions( x, top=top_n) self.ref = """ <ul> <li><a href='https://arxiv.org/abs/1602.07261' target='_blank'> Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning</a></li> </ul> """ elif model_name == 'inception_v3': self.model = inception_v3.InceptionV3(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (299, 299) self.decoder = lambda x: inception_v3.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1512.00567' target='_blank'> Rethinking the Inception Architecture for Computer Vision</a></li> </ul> """ elif model_name == 'mobilenet': self.model = mobilenet.MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: mobilenet.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1704.04861' target='_blank'> MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications</a></li> </ul> """ elif model_name == 'mobilenet_v2': self.model = mobilenet_v2.MobileNetV2(input_shape=None, alpha=1.0, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: mobilenet_v2.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1801.04381' target='_blank'> MobileNetV2: Inverted Residuals and Linear Bottlenecks</a></li> </ul> """ elif model_name == 'nasnet': self.model = nasnet.NASNetLarge(input_shape=None, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: nasnet.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1707.07012' target='_blank'> Learning Transferable Architectures for Scalable Image Recognition</a></li> </ul> """ elif model_name == 'resnet50': self.model = resnet50.ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: resnet50.decode_predictions(x, top=top_n) self.ref = """<ul> <li>ResNet : <a href='https://arxiv.org/abs/1512.03385' target='_blank'>Deep Residual Learning for Image Recognition </a></li> </ul> """ elif model_name == 'vgg16': self.model = vgg16.VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: vgg16.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1409.1556' target='_blank'> Very Deep Convolutional Networks for Large-Scale Image Recognition</a></li> </ul>""" elif model_name == 'vgg19': self.model = vgg19.VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: vgg19.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1409.1556' target='_blank'>Very Deep Convolutional Networks for Large-Scale Image Recognition</a></li> </ul>""" elif model_name == 'xception': self.model = xception.Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (299, 299) self.decoder = lambda x: xception.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1610.02357' target='_blank'>Xception: Deep Learning with Depthwise Separable Convolutions</a></li> </ul>""" else: logger.ERROR('There has no model name !!!')
def __call__(self, *args, **kwargs): if self.model_name in ["VGG16", "vgg16"]: pre_trained = vgg16.VGG16(include_top=False, weights='imagenet', input_shape=self.input_shape) elif self.model_name in ["VGG19", "vgg19"]: pre_trained = vgg19.VGG19(include_top=False, weights='imagenet', input_shape=self.input_shape) elif self.model_name in ["MobileNet", "mobilenet"]: pre_trained = mobilenet.MobileNet(include_top=False, weights='imagenet', input_shape=self.input_shape) elif self.model_name in [ "MobileNetV2", "MobileNet_V2", "mobilenetv2", "mobilenet_v2", "mobilenet_V2" ]: pre_trained = mobilenet_v2.MobileNetV2( include_top=False, weights='imagenet', input_shape=self.input_shape) elif self.model_name in ["resnet50", "ResNet50"]: pre_trained = resnet.ResNet50(include_top=False, weights='imagenet', input_shape=self.input_shape) # elif self.model_name in ["EfficientNetB0", "efficientnetb0"]: # pre_trained = efficientnet.EfficientNetB0(include_top=False, weights='imagenet', input_shape=self.input_shape) # # elif self.model_name in ["EfficientNetB5", "efficientnetb5"]: # pre_trained = efficientnet.EfficientNetB5(include_top=False, weights='imagenet', input_shape=self.input_shape) else: print("Not exists {}".format(self.model_name)) return None if self.extractor: for layer in pre_trained.layers: layer.trainable = False if self.model_name in ["VGG16", "vgg16", "VGG19", "vgg19"]: x = Flatten()(pre_trained.output) x = Dense(1024, activation="relu", kernel_initializer="he_normal")(x) x = Dropout(0.5)(x) x = Dense(1024, activation="relu", kernel_initializer="he_normal")(x) x = Dropout(0.5)(x) else: x = GlobalAveragePooling2D()(pre_trained.output) x = Flatten()(x) y = Dense(2, activation="softmax")(x) model = Model(inputs=pre_trained.input, outputs=y) model.layers[0]._name = "input" return model
# process an image to be model friendly def process_image(img_path): img = image.load_img(img_path, target_size=(224, 224)) img_array = image.img_to_array(img) img_array = np.expand_dims(img_array, axis=0) pImg = mobilenet.preprocess_input(img_array) return pImg # process the test image test_img_path = '%s/Abyssinian_1.jpg' % images_dir pImg = process_image(test_img_path) # define the model model_100 = mobilenet.MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) model_100.summary() # record spend time begin_time = time.perf_counter() prediction = model_100.predict(pImg) end_time = time.perf_counter() print('Spend: %f s' % (end_time - begin_time)) # obtain the top-3 predictions results = imagenet_utils.decode_predictions(prediction, top=3) print(results) assert "Egyptian_cat" in [item[1] for item in results[0]] #######################
def get_siamese_model(name=None, input_shape=(224, 224, 3), embedding_vec_size=512, not_freeze_last=2): """ Model architecture """ if name == "InceptionV3": base_model = inception_v3.InceptionV3( weights='imagenet', include_top=False) model_preprocess_input = inception_v3.preprocess_input if name == "InceptionResNetV2": base_model = inception_resnet_v2.InceptionResNetV2( weights='imagenet', include_top=False) model_preprocess_input = inception_resnet_v2.preprocess_input if name == "DenseNet121": base_model = densenet.DenseNet121( weights='imagenet', include_top=False) model_preprocess_input = densenet.preprocess_input if name == "DenseNet169": base_model = densenet.DenseNet169( weights='imagenet', include_top=False) model_preprocess_input = densenet.preprocess_input if name == "DenseNet201": base_model = densenet.DenseNet201( weights='imagenet', include_top=False) model_preprocess_input = densenet.preprocess_input if name == "MobileNetV2": base_model = mobilenet_v2.MobileNetV2( weights='imagenet', include_top=False) model_preprocess_input = mobilenet_v2.preprocess_input if name == "MobileNet": base_model = mobilenet.MobileNet( weights='imagenet', include_top=False) model_preprocess_input = mobilenet.preprocess_input if name == "ResNet50": base_model = resnet50.ResNet50( weights='imagenet', include_top=False) model_preprocess_input = resnet50.preprocess_input if name == "VGG16": base_model = vgg16.VGG16( weights='imagenet', include_top=False) model_preprocess_input = vgg16.preprocess_input if name == "VGG19": base_model = vgg19.VGG19( weights='imagenet', include_top=False) model_preprocess_input = vgg19.preprocess_input if name == "Xception": base_model = xception.Xception( weights='imagenet', include_top=False) model_preprocess_input = xception.preprocess_input # Verifica se existe base_model if 'base_model' not in locals(): return ["InceptionV3", "InceptionResNetV2", "DenseNet121", "DenseNet169", "DenseNet201", "MobileNetV2", "MobileNet", "ResNet50", "VGG16", "VGG19", "Xception" ] # desativando treinamento for layer in base_model.layers[:-not_freeze_last]: layer.trainable = False x = base_model.layers[-1].output x = GlobalAveragePooling2D()(x) x = Dense( embedding_vec_size, activation='linear', # sigmoid? relu? name='embedding', use_bias=False )(x) model = Model( inputs=base_model.input, outputs=x ) left_input = Input(input_shape) right_input = Input(input_shape) # Generate the encodings (feature vectors) for the two images encoded_l = model(left_input) encoded_r = model(right_input) # Add a customized layer to compute the absolute difference between the encodings L1_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) L1_distance = L1_layer([encoded_l, encoded_r]) # Add a dense layer with a sigmoid unit to generate the similarity score prediction = Dense( 1, activation=Activation(gaussian), use_bias=False, kernel_constraint=NonNeg() )(L1_distance) # Connect the inputs with the outputs siamese_net = Model( inputs=[left_input, right_input], outputs=prediction ) return { "model": siamese_net, "preprocess_input": model_preprocess_input }
with open(truth_path, "r") as f: vgt = f.read().strip().split('\n') vgt = list(map(int, vgt)) vgt = np.array( [synset_to_keras_idx[original_idx_to_synset[idx]] for idx in vgt]) return vgt, keras_idx_to_name, keras_idx_to_synset if __name__ == '__main__': input_dir = os.getcwd() + os.sep + "images" + os.sep output_dir = os.getcwd() + os.sep + "output" + os.sep # Define the mobilenet model # Source: https://github.com/keras-team/keras-applications/blob/master/keras_applications/mobilenet.py#L87 net = mobilenet.MobileNet() print(net.summary()) y_test, keras_idx_to_name, keras_idx_to_synset = _label_preprocess() for i in range(20): # Path to test image test_img_path = input_dir + "/ILSVRC2012_val_{:0>8d}.JPEG".format(i + 1) # Process the test image pImg = process_image(test_img_path) # Make predictions on test image using mobilenet prediction = net.predict(pImg)
ds_train = ds_train.map(load_example).batch(batch_size).shuffle( 1024).repeat() val_path = "{0}/val/np_val.hkl".format(path) ds_val = hkl.load(val_path) ds_val = tf.data.Dataset.from_tensor_slices(val_image_list) ds_val = ds_val.map(load_example).batch(batch_size) BATCH_SIZE = 24 ds_train, ds_val, train_length, val_length = create_datasets( BATCH_SIZE, data_path) inputs = tf.keras.layers.Input((None, IMG_SIZE, IMG_SIZE, 3)) mn = mobilenet.MobileNet(input_shape=(IMG_SIZE, IMG_SIZE, 3), alpha=1.0, include_top=False, weights=None) net = tf.keras.layers.TimeDistributed(mn, name="mn")(inputs) #net = tf.keras.layers.TimeDistributed(tf.keras.layers.Dropout(0.4))(net) net = tf.keras.layers.ConvLSTM2D(100, 3, return_sequences=True)(net) net = tf.keras.layers.TimeDistributed(tf.keras.layers.AveragePooling2D(2))(net) net = tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten())(net) net = tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(10, activation="softmax"))(net) model = tf.keras.Model(inputs, net) model.summary() model.compile(optimizer=tf.keras.optimizers.RMSprop(0.0001), loss="categorical_crossentropy", metrics=['accuracy'])