def InceptionV3(include_top=False, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000, *args, **kwargs): model = inception_v3.InceptionV3(include_top=False, weights=weights, input_shape=input_shape, input_tensor=input_tensor, pooling=pooling, classes=classes) # 一共311层 if weights: for i in model.layers[:288]: i.trainable = False x = model.output if pooling == 'avg': x = layers.GlobalAveragePooling2D()(x) elif pooling == 'max': x = layers.GlobalMaxPooling2D()(x) else: x = layers.GlobalAveragePooling2D(name='avg_pool')(x) activation = 'sigmoid' if classes == 1 else 'softmax' x = layers.Dense(classes, activation=activation, name='predictions')(x) model = Model(model.input, x, name='InceptionV3') return model
def __init__(self): from tensorflow.keras.applications import inception_v3 # define input image pair self.x1 = tf.placeholder(tf.float32, [None, 299, 299, 3]) self.x2 = tf.placeholder(tf.float32, [None, 299, 299, 3]) # define network with tf.variable_scope("siamese") as scope: self.backbone_model = inception_v3.InceptionV3(weights='imagenet') self.bottleneck_model = Model( inputs=self.backbone_model.input, outputs=self.backbone_model.get_layer('avg_pool').output) # for layer in self.bottleneck_model.layers: # layer.trainable = False self.bottleneck_feature_1 = self.bottleneck_model(self.x1) self.bottleneck_feature_2 = self.bottleneck_model(self.x2) self.o1 = self.feature_vector_mapping(self.bottleneck_feature_1) scope.reuse_variables() self.o2 = self.feature_vector_mapping(self.bottleneck_feature_2) # define loss self.y_gt = tf.placeholder(tf.float32, [None]) # 1 or 0 self.loss = self.loss_function()
def save_bottleneck_features(): # build the Inception V3 network model = inception_v3.InceptionV3(include_top=False, weights='imagenet', input_tensor=None, input_shape=None, pooling='avg') # Save the bottleneck features for the training data set datagen = ImageDataGenerator( preprocessing_function=inception_v3.preprocess_input) train_data = datagen.flow_from_directory(train_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode='sparse', shuffle=False) features = model.predict(train_data) labels = np.eye(train_data.num_classes, dtype='uint8')[train_data.classes] np.save(output_dir + 'bottleneck_features_train.npy', features) np.save(output_dir + 'bottleneck_labels_train.npy', labels) # Save the bottleneck features for the validation data set val_data = datagen.flow_from_directory(validation_data_dir, target_size=(img_width, img_height), batch_size=batch_size, class_mode=None, shuffle=False) features = model.predict(val_data) labels = np.eye(val_data.num_classes, dtype='uint8')[val_data.classes] np.save(output_dir + 'bottleneck_features_validation.npy', features) np.save(output_dir + 'bottleneck_labels_validation.npy', labels)
def get_siamese_model(input_shape): """ Model architecture """ # Define the tensors for the two input images left_input = Input(input_shape) right_input = Input(input_shape) # Convolutional Neural Network model = inception_v3.InceptionV3() # Generate the encodings (feature vectors) for the two images encoded_l = model(left_input) encoded_r = model(right_input) # Add a customized layer to compute the absolute difference between the encodings L1_layer = Lambda(lambda tensors:abs(tensors[0] - tensors[1])) L1_distance = L1_layer([encoded_l, encoded_r]) # Add a dense layer with a sigmoid unit to generate the similarity score prediction = Dense(1,activation='sigmoid')(L1_distance) # Connect the inputs with the outputs siamese_net = Model(inputs=[left_input,right_input],outputs=prediction) # return the model return siamese_net
def build_model(model_type, nr_classes, mode): if model_type == "incv3": base_model = inception_v3.InceptionV3(include_top=False, weights='imagenet') elif model_type == "resnet50": base_model = resnet50.ResNet50(include_top=False, weights='imagenet') else: raise ValueError( "This model type is not supported: {}".format(model_type)) x = base_model.output model = GlobalAveragePooling2D(name="GAP_last")(x) if mode == "training": model = Dropout(0.5, name="dropout_top")(model) model = Dense(2048, activation='relu', name="dense2048_{}{}".format(model_type, mode))(model) model = Dense(nr_classes, activation='softmax', name="{}_dense_prediction".format(nr_classes))(model) model = Model(inputs=base_model.input, outputs=model) elif mode == "extracting": model = Model(inputs=base_model.input, outputs=model) pass return model
def __init__(self, base_path='.'): """ Initializes main variables @param base_path: string pointing to the path where the images are located. If no string is indicated, the current directory will be considered """ super(InceptionV3FeatureExtractor, self).__init__(base_path) self.model = inception_v3.InceptionV3(weights='imagenet', include_top=False, pooling='avg')
def __init__(self, model='inception_v3', weights = 'imagenet', include_top = False, pooling=None, n_channels=None, clf_head_dense_dim = 1024, ): ''' Creates ImageNet base model for featurization or classification and corresponding image preprocessing function :param model: options are xception, inception_v3, and mobilenet_v2 :param weights: 'imagenet' or filepath :param include_top: whether to include original ImageNet classification head with 1000 classes :param pooling: 'avg', 'max', or None :param n_channels: number of channels to keep if performing featurization :param clf_head_dense_dim: dimension of dense layer before softmax classification (only applies if `include_top` is false) ''' self.include_top = include_top # determines if used for classification or featurization self.n_channels = n_channels self.pooling = pooling self.clf_head_dense_dim = clf_head_dense_dim if model == 'xception': self.model = xception.Xception(weights=weights, include_top=include_top, pooling=pooling) self.preprocess = xception.preprocess_input self.target_size = (299, 299) if include_top: self.decode = xception.decode_predictions else: self.output_dim = (n_channels if n_channels else 2048) * (1 if pooling else 10**2) elif model == 'inception_v3': self.model = inception_v3.InceptionV3(weights=weights, include_top=include_top, pooling=pooling) self.preprocess = inception_v3.preprocess_input self.target_size = (299, 299) if include_top: self.decode = inception_v3.decode_predictions else: self.output_dim = (n_channels if n_channels else 2048) * (1 if pooling else 8**2) elif model == 'mobilenet_v2': self.model = mobilenetv2.MobileNetV2(weights=weights, include_top=include_top, pooling=pooling) self.preprocess = mobilenetv2.preprocess_input self.target_size = (244, 244) if include_top: self.decode = mobilenetv2.decode_predictions else: self.output_dim = (n_channels if n_channels else 1280) * (1 if pooling else 7**2) else: raise Exception('model option not implemented')
def main(): # base_image_path = keras.utils.get_file("sky.jpg", "https://i.imgur.com/aGBdQyK.jpg") base_image_path = "sky.jpg" result_prefix = "sky_dream" display(Image(base_image_path)) # Build an InceptionV3 model loaded with pre-trained ImageNet weights model = inception_v3.InceptionV3(weights="imagenet", include_top=False) # Get the symbolic outputs of each "key" layer (we gave them unique names). outputs_dict = dict( [ (layer.name, layer.output) for layer in [model.get_layer(name) for name in layer_settings.keys()] ] ) # Set up a model that returns the activation values for every target layer # (as a dict) feature_extractor = keras.Model(inputs=model.inputs, outputs=outputs_dict) original_img = preprocess_image(base_image_path) original_shape = original_img.shape[1:3] successive_shapes = [original_shape] for i in range(1, num_octave): shape = tuple([int(dim / (octave_scale ** i)) for dim in original_shape]) successive_shapes.append(shape) successive_shapes = successive_shapes[::-1] shrunk_original_img = tf.image.resize(original_img, successive_shapes[0]) img = tf.identity(original_img) # Make a copy for i, shape in enumerate(successive_shapes): print("Processing octave %d with shape %s" % (i, shape)) img = tf.image.resize(img, shape) img = gradient_ascent_loop( img, iterations=iterations, learning_rate=step, feature_extractor=feature_extractor, max_loss=max_loss ) upscaled_shrunk_original_img = tf.image.resize(shrunk_original_img, shape) same_size_original = tf.image.resize(original_img, shape) lost_detail = same_size_original - upscaled_shrunk_original_img img += lost_detail shrunk_original_img = tf.image.resize(original_img, shape) keras.preprocessing.image.save_img(result_prefix + ".png", deprocess_image(img.numpy())) display(Image(result_prefix + ".png"))
def model_inceptionv3(): backbone = inception_v3.InceptionV3( include_top=False, weights=None, #"imagenet" input_tensor=None, input_shape=(Config.CNN_HEIGHT, Config.CNN_WIDTH, 3), pooling=None, classes=Config.NUM_CLASS, classifier_activation="softmax", ) x = backbone.output x = GlobalAveragePooling2D()(x) x = Dense(1024, activation='relu')(x) predictions = Dense(Config.NUM_CLASS, activation='softmax')(x) model = Model(inputs=backbone.input, outputs=predictions) return backbone, model
def main(): parser = argparse.ArgumentParser() parser.add_argument("--path", default="") parser.add_argument("--url", default="") parser.add_argument("--name", default="") parser.add_argument("--depth", default=3) parser.add_argument("--n_images", default=1) parser.add_argument("--seed", default=0) parser.add_argument("--max_coeff", default=10) parser.add_argument("--max_loss", default=20) args = parser.parse_args() path = str(args.path) url = str(args.url) name = str(args.name) depth = int(args.depth) n_images = int(args.n_images) seed = int(args.seed) max_coeff = float(args.max_coeff) max_loss = float(args.max_loss) if not path and not url: print("Insert path with \"--path\" or url with \"--url\".") return if url: if not name: print( "Please provide a name for the image folder with \"--name\".") return path = "Images/" + name + "/" if not os.path.exists(path): os.makedirs(path) path += "original" + url[-4:] urllib.request.urlretrieve(url, path) model = inception_v3.InceptionV3(weights='imagenet', include_top=True) model._layers[0].batch_input_shape = (None, None, None, 3) generate_dreams(path, model, depth, n_images, seed, max_coeff, max_loss)
def get_feature_extractor(layer_settings: dict = None): if layer_settings is None: layer_settings = { "mixed4": 0.0, "mixed5": 1.5, "mixed6": 2.0, "mixed7": 0.5, } # Build an InceptionV3 model loaded with pre-trained ImageNet weights model = inception_v3.InceptionV3(weights="imagenet", include_top=False) # Get the symbolic outputs of each "key" layer (we gave them unique names). outputs_dict = dict([ (layer.name, layer.output) for layer in [model.get_layer(name) for name in layer_settings.keys()] ]) # Set up a model that returns the activation values for every target layer # (as a dict) feature_extractor = keras.Model(inputs=model.inputs, outputs=outputs_dict) return feature_extractor
def feature_table_creator(image_bytes): image_size = tuple((224, 224)) image_bytes = cv2.resize(image_bytes, image_size) feature_table = {'inception_v3': None} # feature_table['vgg'] = vgg(image_bytes) feature_table['inception_v3'] = InceptionV3(image_bytes) return feature_table if __name__ == "__main__": # vgg_model = tf.keras.applications.VGG16(weights='imagenet') # vgg_extractor = tf.keras.models.Model(inputs=vgg_model.input, outputs=vgg_model.get_layer("fc2").output) inception_v3_extractor = inception_v3.InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) img_dir_path = input('[INPUT] image dir path : ') features = {'img': [], 'inception_v3': [], 'cluster': []} pics_num = os.listdir(img_dir_path) bar = progressbar.ProgressBar(maxval=len(pics_num), \ widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()]) bar.start() for i, img_path in enumerate(pics_num): img_path = img_dir_path + img_path with open(img_path, 'rb') as f: img_bytes = f.read() Image = cv2.imdecode(np.fromstring(img_bytes, np.uint8), cv2.IMREAD_UNCHANGED) Image = Image[:, :, :3] single_feature_table = feature_table_creator(Image)
flattened_features = np.array(flattened_features) flattened_features = flattened_features.reshape(1, -1) return sparse.csr_matrix(flattened_features) def pred_decoder(cluster_pred): if cluster_pred[0] == 1: return 'watch' else: return 'glasses' if __name__ == "__main__": # import nasnet model for extracrating image features. inception_v3_extractor = inception_v3.InceptionV3( weights='imagenet', include_top=False, input_shape=(224, 224, 3)) # import kmeans model for predict clusters pkl_kmeans_name = 'kmeans_watch_glasses.pkl' with open(pkl_kmeans_name, 'rb') as f: kmeans = pkl.load(f) # save into this path image_path = input('input image address : ') # image = cv2.imread(image_path) with open(image_path, 'rb') as f: img_bytes = f.read() Image = cv2.imdecode(np.fromstring(img_bytes, np.uint8), cv2.IMREAD_UNCHANGED) preprocess_image = nn_image_preprocessing(Image) inception_v3_feature_arr = inception_v3_feature_extractor(preprocess_image) cluster_pred = kmeans.predict(inception_v3_feature_arr) label = pred_decoder(cluster_pred)
def classifier(classs_num=5, input_width=224, input_height=224, backbone='vgg16', train_base=False): # mobilenet # base_model = mobilenet.MobileNet(include_top=False, weights="imagenet", input_tensor=Input(shape=(224,224,3))) # x = base_model.output if backbone == 'vgg16': base_model = vgg16.VGG16(include_top=False, weights="imagenet", input_tensor=Input(shape=(input_width, input_height, 3))) x = base_model.output # if input_width == 224: # x = base_model.get_layer('block5_pool').output # else: # x = base_model.get_layer('block4_conv3').output elif backbone == 'resnet50': base_model = resnet50.ResNet50(include_top=False, weights="imagenet", input_tensor=Input(shape=(input_width, input_height, 3))) if input_width == 224: x = base_model.output else: x = base_model.get_layer('activation_21').output elif backbone == 'inception_v3': base_model = inception_v3.InceptionV3( include_top=False, weights="imagenet", input_tensor=Input(shape=(input_width, input_height, 3))) x = base_model.output else: pass # My Block 5 # x = Conv2D(512, (3, 3), activation='relu', padding='same', name='my_block5_conv1')(x) # x = Conv2D(512, (3, 3), activation='relu', padding='same', name='my_block5_conv2')(x) # x = Conv2D(512, (3, 3), activation='relu', padding='same', name='my_block5_conv3')(x) # x = BatchNormalization()(x,training=False) # x = MaxPooling2D((2, 2), strides=(2, 2), name='my_block5_pool')(x) # x = Dropout(0.5)(x) # InceptionV3 # base_model = inception_v3.InceptionV3(include_top=False, weights="imagenet", input_tensor=Input(shape=(input_width,input_height,3))) # x = base_model.output x = GlobalAveragePooling2D()(x) # x = Dense(512,activation='relu')(x) # x = Dropout(0.5)(x) # x = Dense(512, activation='relu')(x) # x = Dropout(0.5)(x) # x = Dense(256,activation='relu')(x) # x = Dropout(0.5)(x) x = Dense(64, activation='relu')(x) x = Dropout(0.5)(x) predictions = Dense(classs_num, activation='softmax')(x) model = Model(inputs=base_model.input, outputs=predictions) for layer in base_model.layers: layer.trainable = train_base return model
def deep_dream_example(): base_image_filepath = './Machu_Picchu.jpg' # Path to the image to transform. result_prefix = './deep_dream_results' # Prefix for the saved results. # These are the names of the layersfor which we try to maximize activation, # as well as their weight in the final loss we try to maximize. # You can tweak these setting to obtain new visual effects. settings = { 'features': { 'mixed2': 0.2, 'mixed3': 0.5, 'mixed4': 2., 'mixed5': 1.5, }, } K.set_learning_phase(0) # Build the InceptionV3 network with our placeholder. # The model will be loaded with pre-trained ImageNet weights. model = inception_v3.InceptionV3(weights='imagenet', include_top=False) dream = model.input print('Model loaded.') # Get the symbolic outputs of each "key" layer (we gave them unique names). layer_dict = dict([(layer.name, layer) for layer in model.layers]) # Define the loss. loss = K.variable(0.) for layer_name in settings['features']: # Add the L2 norm of the features of a layer to the loss. if layer_name not in layer_dict: raise ValueError('Layer ' + layer_name + ' not found in model.') coeff = settings['features'][layer_name] x = layer_dict[layer_name].output # We avoid border artifacts by only involving non-border pixels in the loss. scaling = K.prod(K.cast(K.shape(x), 'float32')) if K.image_data_format() == 'channels_first': loss = loss + coeff * K.sum(K.square(x[:, :, 2: -2, 2: -2])) / scaling else: loss = loss + coeff * K.sum(K.square(x[:, 2: -2, 2: -2, :])) / scaling # Compute the gradients of the dream wrt the loss. grads = K.gradients(loss, dream)[0] # Normalize gradients. grads /= K.maximum(K.mean(K.abs(grads)), K.epsilon()) # Set up function to retrieve the value of the loss and gradients given an input image. outputs = [loss, grads] fetch_loss_and_grads = K.function([dream], outputs) """Process: - Load the original image. - Define a number of processing scales (i.e. image shapes), from smallest to largest. - Resize the original image to the smallest scale. - For every scale, starting with the smallest (i.e. current one): - Run gradient ascent - Upscale image to the next scale - Reinject the detail that was lost at upscaling time - Stop when we are back to the original size. To obtain the detail lost during upscaling, we simply take the original image, shrink it down, upscale it, and compare the result to the (resized) original image. """ # Playing with these hyperparameters will also allow you to achieve new effects. step = 0.01 # Gradient ascent step size. num_octave = 3 # Number of scales at which to run gradient ascent. octave_scale = 1.4 # Size ratio between scales. iterations = 20 # Number of ascent steps per scale. max_loss = 10. img = preprocess_image(base_image_filepath) if K.image_data_format() == 'channels_first': original_shape = img.shape[2:] else: original_shape = img.shape[1:3] successive_shapes = [original_shape] for i in range(1, num_octave): shape = tuple([int(dim / (octave_scale ** i)) for dim in original_shape]) successive_shapes.append(shape) successive_shapes = successive_shapes[::-1] original_img = np.copy(img) shrunk_original_img = resize_img(img, successive_shapes[0]) for shape in successive_shapes: print('Processing image shape', shape) img = resize_img(img, shape) img = gradient_ascent(img, fetch_loss_and_grads, iterations=iterations, step=step, max_loss=max_loss) upscaled_shrunk_original_img = resize_img(shrunk_original_img, shape) same_size_original = resize_img(original_img, shape) lost_detail = same_size_original - upscaled_shrunk_original_img img += lost_detail shrunk_original_img = resize_img(original_img, shape) save_img(result_prefix + '.png', deprocess_image(np.copy(img)))
print(features_x1.shape) model1.save("../model/model1.h5", include_optimizer=False) FEATURE_EXTRACTOR2 = resnet50.ResNet50(weights='imagenet', include_top=False, input_shape=targetSize_withdepth) model2 = Sequential() model2.add(FEATURE_EXTRACTOR2) model2.add(Flatten()) features_x2 = model2.predict_generator(train_generator) print(type(features_x2).__name__) print(features_x2.shape) model2.save("../model/model2.h5", include_optimizer=False) FEATURE_EXTRACTOR3 = inception_v3.InceptionV3(weights='imagenet', include_top=False, input_shape=targetSize_withdepth) model3 = Sequential() model3.add(FEATURE_EXTRACTOR3) model3.add(Flatten()) features_x3 = model3.predict_generator(train_generator) print(type(features_x3).__name__) print(features_x3.shape) model3.save("../model/model3.h5", include_optimizer=False) FEATURE_EXTRACTOR4 = densenet.DenseNet201(weights='imagenet', include_top=False, input_shape=targetSize_withdepth) model4 = Sequential() model4.add(FEATURE_EXTRACTOR4) model4.add(Flatten())
def __init__(self,layers_contributions=['mixed3', 'mixed5']): inception=inception_v3.InceptionV3(weights="imagenet",include_top=False) print("model loaded") self.dream_model=self.deep_dream_model(inception,layers_contributions) self.model_output= lambda model,inputs:model(inputs)
def set_model(self, model_name, top_n=5): if model_name == 'densenet': self.model = densenet.DenseNet121(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: densenet.decode_predictions(x, top=top_n) self.ref = """ <ul> <li><a href='https://arxiv.org/abs/1608.06993' target='_blank'> Densely Connected Convolutional Networks</a> (CVPR 2017 Best Paper Award)</li> </ul> """ elif model_name == 'inception_resnet_v2': self.model = inception_resnet_v2.InceptionResNetV2( include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (299, 299) self.decoder = lambda x: inception_resnet_v2.decode_predictions( x, top=top_n) self.ref = """ <ul> <li><a href='https://arxiv.org/abs/1602.07261' target='_blank'> Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning</a></li> </ul> """ elif model_name == 'inception_v3': self.model = inception_v3.InceptionV3(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (299, 299) self.decoder = lambda x: inception_v3.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1512.00567' target='_blank'> Rethinking the Inception Architecture for Computer Vision</a></li> </ul> """ elif model_name == 'mobilenet': self.model = mobilenet.MobileNet(input_shape=None, alpha=1.0, depth_multiplier=1, dropout=1e-3, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: mobilenet.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1704.04861' target='_blank'> MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications</a></li> </ul> """ elif model_name == 'mobilenet_v2': self.model = mobilenet_v2.MobileNetV2(input_shape=None, alpha=1.0, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: mobilenet_v2.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1801.04381' target='_blank'> MobileNetV2: Inverted Residuals and Linear Bottlenecks</a></li> </ul> """ elif model_name == 'nasnet': self.model = nasnet.NASNetLarge(input_shape=None, include_top=True, weights='imagenet', input_tensor=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: nasnet.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1707.07012' target='_blank'> Learning Transferable Architectures for Scalable Image Recognition</a></li> </ul> """ elif model_name == 'resnet50': self.model = resnet50.ResNet50(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: resnet50.decode_predictions(x, top=top_n) self.ref = """<ul> <li>ResNet : <a href='https://arxiv.org/abs/1512.03385' target='_blank'>Deep Residual Learning for Image Recognition </a></li> </ul> """ elif model_name == 'vgg16': self.model = vgg16.VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: vgg16.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1409.1556' target='_blank'> Very Deep Convolutional Networks for Large-Scale Image Recognition</a></li> </ul>""" elif model_name == 'vgg19': self.model = vgg19.VGG19(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (224, 224) self.decoder = lambda x: vgg19.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1409.1556' target='_blank'>Very Deep Convolutional Networks for Large-Scale Image Recognition</a></li> </ul>""" elif model_name == 'xception': self.model = xception.Xception(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000) self.target_size = (299, 299) self.decoder = lambda x: xception.decode_predictions(x, top=top_n) self.ref = """<ul> <li><a href='https://arxiv.org/abs/1610.02357' target='_blank'>Xception: Deep Learning with Depthwise Separable Convolutions</a></li> </ul>""" else: logger.ERROR('There has no model name !!!')
image_vector).encode()) num_vecs += 1 fvec.close() IMAGE_SIZE = 224 VECTOR_FILE = os.path.join(DATA_DIR, "vgg16-vectors.tsv") vgg16_model = vgg16.VGG16(weights="imagenet", include_top=True) model = Model(inputs=vgg16_model.input, outputs=vgg16_model.get_layer("fc2").output) preprocessor = vgg16.preprocess_input vectorize_images(IMAGE_DIR, IMAGE_SIZE, preprocessor, model, VECTOR_FILE) IMAGE_SIZE = 299 VECTOR_FILE = os.path.join(DATA_DIR, "inception-vectors.tsv") inception_model = inception_v3.InceptionV3(weights="imagenet", include_top=True) model = Model(inputs=inception_model.input, outputs=inception_model.get_layer("avg_pool").output) preprocessor = inception_v3.preprocess_input vectorize_images(IMAGE_DIR, IMAGE_SIZE, preprocessor, model, VECTOR_FILE) IMAGE_SIZE = 224 VECTOR_FILE = os.path.join(DATA_DIR, "resnet-vectors.tsv") resnet_model = resnet50.ResNet50(weights="imagenet", include_top=True) model = Model(inputs=resnet_model.input, outputs=resnet_model.get_layer("avg_pool").output) preprocessor = resnet50.preprocess_input vectorize_images(IMAGE_DIR, IMAGE_SIZE, preprocessor, model, VECTOR_FILE) IMAGE_SIZE = 299 VECTOR_FILE = os.path.join(DATA_DIR, "xception-vectors.tsv")
# Keras libraries from tensorflow.keras import layers from tensorflow.keras import models from tensorflow.keras import optimizers from tensorflow.keras.applications import vgg16, inception_v3, resnet50, mobilenet from tensorflow.keras.preprocessing.image import ImageDataGenerator from tensorflow.keras.preprocessing.image import img_to_array, load_img from tensorflow.keras.callbacks import TensorBoard ### ImageNet Large Scale Visual Recognition Challenge (ILSVRC) (1.2 M images, 1000 classes) # Load the VGG model vgg_model = vgg16.VGG16(weights='imagenet') # Load the inception_V3 model inception_model = inception_v3.InceptionV3(weights='imagenet') # Load the ResNet50 model resnet_model = resnet50.ResNet50(weights='imagenet') # Load the MobileNet model mobilenet_model = mobilenet.MobileNet(weights='imagenet') ### Load the image and convert its format to a 4-dimensional Tensor as an input of the form # (batchsize, height, width, channels) requested by the Network. filename = 'C:/Users/Theo/PycharmProjects/BMDATA/TP1/data/train/cat.1.jpg' # Load an image in a PIL format original = load_img(filename, target_size=(224, 224)) numpy_image = img_to_array(original) # We add the extra dimension to the axis 0
""" Deep dreams p303 """ import silence_tensorflow.auto from tensorflow.keras.applications import inception_v3 from tensorflow.keras import backend as K from tensorflow.keras.preprocessing import image import tensorflow as tf import numpy as np import scipy.misc import imageio tf.compat.v1.disable_eager_execution() K.set_learning_phase(0) # disable all training operation model = inception_v3.InceptionV3(weights='imagenet', include_top=False) # model.summary() layer_contributions = {'mixed2': .2, 'mixed3': 3., 'mixed4': 2., 'mixed5': 1.5} layer_dict = dict([(layer.name, layer) for layer in model.layers]) loss = K.variable(0.) for layer_name in layer_contributions: print(layer_name) coeff = layer_contributions[layer_name] activation = layer_dict[layer_name].output print(activation) scaling = K.prod(K.cast(K.shape(activation), 'float32')) loss = loss + coeff * K.sum(K.square(activation[:, 2: -2, 2: -2, :])) / scaling
def get_siamese_model(name=None, input_shape=(224, 224, 3), embedding_vec_size=512, not_freeze_last=2): """ Model architecture """ if name == "InceptionV3": base_model = inception_v3.InceptionV3( weights='imagenet', include_top=False) model_preprocess_input = inception_v3.preprocess_input if name == "InceptionResNetV2": base_model = inception_resnet_v2.InceptionResNetV2( weights='imagenet', include_top=False) model_preprocess_input = inception_resnet_v2.preprocess_input if name == "DenseNet121": base_model = densenet.DenseNet121( weights='imagenet', include_top=False) model_preprocess_input = densenet.preprocess_input if name == "DenseNet169": base_model = densenet.DenseNet169( weights='imagenet', include_top=False) model_preprocess_input = densenet.preprocess_input if name == "DenseNet201": base_model = densenet.DenseNet201( weights='imagenet', include_top=False) model_preprocess_input = densenet.preprocess_input if name == "MobileNetV2": base_model = mobilenet_v2.MobileNetV2( weights='imagenet', include_top=False) model_preprocess_input = mobilenet_v2.preprocess_input if name == "MobileNet": base_model = mobilenet.MobileNet( weights='imagenet', include_top=False) model_preprocess_input = mobilenet.preprocess_input if name == "ResNet50": base_model = resnet50.ResNet50( weights='imagenet', include_top=False) model_preprocess_input = resnet50.preprocess_input if name == "VGG16": base_model = vgg16.VGG16( weights='imagenet', include_top=False) model_preprocess_input = vgg16.preprocess_input if name == "VGG19": base_model = vgg19.VGG19( weights='imagenet', include_top=False) model_preprocess_input = vgg19.preprocess_input if name == "Xception": base_model = xception.Xception( weights='imagenet', include_top=False) model_preprocess_input = xception.preprocess_input # Verifica se existe base_model if 'base_model' not in locals(): return ["InceptionV3", "InceptionResNetV2", "DenseNet121", "DenseNet169", "DenseNet201", "MobileNetV2", "MobileNet", "ResNet50", "VGG16", "VGG19", "Xception" ] # desativando treinamento for layer in base_model.layers[:-not_freeze_last]: layer.trainable = False x = base_model.layers[-1].output x = GlobalAveragePooling2D()(x) x = Dense( embedding_vec_size, activation='linear', # sigmoid? relu? name='embedding', use_bias=False )(x) model = Model( inputs=base_model.input, outputs=x ) left_input = Input(input_shape) right_input = Input(input_shape) # Generate the encodings (feature vectors) for the two images encoded_l = model(left_input) encoded_r = model(right_input) # Add a customized layer to compute the absolute difference between the encodings L1_layer = Lambda(lambda tensors: K.abs(tensors[0] - tensors[1])) L1_distance = L1_layer([encoded_l, encoded_r]) # Add a dense layer with a sigmoid unit to generate the similarity score prediction = Dense( 1, activation=Activation(gaussian), use_bias=False, kernel_constraint=NonNeg() )(L1_distance) # Connect the inputs with the outputs siamese_net = Model( inputs=[left_input, right_input], outputs=prediction ) return { "model": siamese_net, "preprocess_input": model_preprocess_input }
def main(args): print_debug('TF Version: {}'.format(tf.__version__)) if tf.__version__ != "2.2.0": raise ValueError("TensorFlow version should be 2.2.0") if tf.test.gpu_device_name(): print_debug('Default GPU Device: {}'.format(tf.test.gpu_device_name())) else: raise ValueError("Please install GPU version of TF") print('STARTING RUN: {}'.format(args.run)) print('model: {}, batch size: {}, epochs: {}'.format( args.model, args.batch_size, args.epochs)) print('---') split_data(20000, 5000) float32_path = args.model + '-float32' mixed_path = args.model + '-mixed' if args.run == 'train': print('---') print('Building generators...') train_generator = build_generator('training_data', args.batch_size) validation_generator = build_generator('validation_data', args.batch_size) print('Training model {} with mixed precision...'.format(args.model)) print('Building model {}...'.format(args.model)) set_precision('mixed') if args.model == 'vgg': base_model = vgg16.VGG16(include_top=False, input_shape=INPUT_SHAPE) elif args.model == 'inception': base_model = inception_v3.InceptionV3(include_top=False, input_shape=INPUT_SHAPE) elif args.model == 'resnet': base_model = resnet_v2.ResNet152V2(include_top=False, input_shape=INPUT_SHAPE) model = build_model(base_model) model_mixed = train(model, train_generator, validation_generator, args.epochs, filepath=mixed_path) #model_mixed.save('testtemp') print('---') print('Training model {} with float32 precision...'.format(args.model)) print('Building model {}...'.format(args.model)) set_precision('float32') if args.model == 'vgg': base_model = vgg16.VGG16(include_top=False, input_shape=INPUT_SHAPE) elif args.model == 'inception': base_model = inception_v3.InceptionV3(include_top=False, input_shape=INPUT_SHAPE) elif args.model == 'resnet': base_model = resnet_v2.ResNet152V2(include_top=False, input_shape=INPUT_SHAPE) model = build_model(base_model) model_float32 = train(model, train_generator, validation_generator, args.epochs, filepath=float32_path) elif args.run == 'test': if os.path.exists(float32_path) and os.path.exists(mixed_path): print('loading pre-trained models') model_float32 = load_model(float32_path) model_mixed = load_model(mixed_path) test_generator = build_generator('validation_data', 1) mcnemar_test(model_float32, model_mixed, test_generator) else: raise ValueError('no models found') elif args.run == 'training-speed': print('---') print('Building generators...') train_generator = build_generator('training_data', args.batch_size) validation_generator = build_generator('validation_data', args.batch_size) print('Training model {} with mixed precision...'.format(args.model)) print('Building model {}...'.format(args.model)) set_precision('mixed') if args.model == 'vgg': base_model = vgg16.VGG16(include_top=False, input_shape=INPUT_SHAPE) elif args.model == 'inception': base_model = inception_v3.InceptionV3(include_top=False, input_shape=INPUT_SHAPE) elif args.model == 'resnet': base_model = resnet_v2.ResNet152V2(include_top=False, input_shape=INPUT_SHAPE) model = build_model(base_model, trainable=True) train(model, train_generator, validation_generator, 2) print('---') print('Training model {} with float32 precision...'.format(args.model)) print('Building model {}...'.format(args.model)) set_precision('float32') if args.model == 'vgg': base_model = vgg16.VGG16(include_top=False, input_shape=INPUT_SHAPE) elif args.model == 'inception': base_model = inception_v3.InceptionV3(include_top=False, input_shape=INPUT_SHAPE) elif args.model == 'resnet': base_model = resnet_v2.ResNet152V2(include_top=False, input_shape=INPUT_SHAPE) model = build_model(base_model, trainable=True) train(model, train_generator, validation_generator, 2) elif args.run == 'inference-speed': if os.path.exists(float32_path) and os.path.exists(mixed_path): print('loading pre-trained models') model_float32 = load_model(float32_path) model_mixed = load_model(mixed_path) test_generator = build_generator('validation_data', args.batch_size) print('---') print('float32 results') test_inference_speed(model_float32, test_generator) print('mixed results') test_inference_speed(model_mixed, test_generator) else: raise ValueError('no models found') if False: print('running precision') trained_model.predict(x=validation_generator, workers=4, verbose=1) print('running precision 2') trained_model.predict(x=validation_generator, workers=4, verbose=1)
import numpy as np import tensorflow as tf from tensorflow.keras.preprocessing import image from tensorflow.keras.applications import inception_v3 from tensorflow.keras import backend as K from PIL import Image import os # Load pre-trained image recognition model model = inception_v3.InceptionV3() eps = 2.0 * 16.0 / 255.0 # Grab a reference to the first and last layer of the neural net model_input_layer = model.layers[0].input model_output_layer = model.layers[-1].output # Choose an ImageNet object to fake # The list of classes is available here: https://gist.github.com/ageitgey/4e1342c10a71981d0b491e1b8227328b # Class #859 is "toaster" object_type_to_fake = 859 # Load the image to hack img = image.load_img(os.path.expanduser( "~/winter-camp-pek/food-101/food-101/images/apple_pie/1005649.jpg"), target_size=(299, 299)) original_image = image.img_to_array(img) # Scale the image so all pixel intensities are between [-1, 1] as the model expects original_image /= 255. original_image -= 0.5 original_image *= 2.
def __init__(self, input_shape): self.model = inception_v3.InceptionV3(include_top=False, weights='imagenet', input_tensor=None, input_shape=input_shape, pooling="avg")
for line in f: values = line.split() word = values[0] if word not in all_classes: continue coefs = np.asarray(values[1:], dtype='float32') word_dict[word] = coefs return word_dict word_dict = process_glove_file('glove.6B.300d.txt') embedding_matrix = np.zeros(shape=(num_classes, embedding_size)) for i in range(num_classes): embedding_matrix[i] = word_dict.get(all_classes[i]) inception = inception_v3.InceptionV3(weights="imagenet", include_top=False) inception.trainable=False # use only as feature extractor inp = Input(shape=(224,224,3)) X = inception(inp) X = GlobalAveragePooling2D()(X) X = Dense(300, activation="relu")(X) kern_init = Constant(value=embedding_matrix.T) X = Dense(num_classes, kernel_initializer=kern_init, activation="softmax")(X) fine_tune_model = Model(inp, X) fine_tune_model.summary() # opt = Adam() # fine_tune_model.compile(opt) # fine_tune_model.fit() # # # loss = L2 loss between image embedding and label embedding
input = tf.keras.layers.Input(shape=(28, 28)) x = tf.keras.layers.Flatten(input_shape=(28, 28))(input) x = tf.keras.layers.Dense(128, activation="relu")(x) x = tf.keras.layers.Dropout(0.2)(x) x = tf.keras.layers.Dense(10)(x) y = tf.keras.Model(input,x,name="test") input_data = tf.random.uniform([60, 28, 28]) graph_model = tf.function(y) # 运行结果 print("Eager time:", timeit.timeit(lambda: y(input_data), number=10000)) print("Graph time:", timeit.timeit(lambda: graph_model(input_data), number=10000)) ''' from vis.utils import utils import numpy as np inception = inception_v3.InceptionV3() layer_name_list = [ "input_1", "conv2d", "conv2d_1", "conv2d_2", "max_pooling2d", "conv2d_3", "conv2d_4", "max_pooling2d_1", "mixed0", "mixed1", "mixed2", "mixed3", "mixed4", "mixed5", "mixed6", "mixed7", "mixed8", "mixed9", "mixed10", "predictions" ] layer_name = "mixed0" layer_idx = utils.find_layer_idx(inception, layer_name) i = 40 target_layer = None for layer in inception.layers: if i == 40: target_layer = layer
def deep_dream(): """ DeepDream is an artistic image-modification technique that uses the representations learned by convnets. First released by Google in the summer of 2015, this algorithm is very similar to the gradient ascent technique we viewed earlier to represent the patterns learned by individual filters during training (Chapter 5). There are a few differences to the algorithm: -> With DeepDream you try to maximise the activation of the entire layer rather than one specific filter, thus mixing together visualisations of a larger number of filters. -> You start not from a blank, slightly noisy input, but rather from an existing image - thus the resulting effects latch on to preexisting visual patterns, distorting elements of the image in a somewhat artistic fashion. -> The input images are processed at different scales (called octaves), which improves the quality of the visualisations. This function does not work due to version issues. :return: None """ # You won't be training a model for this application, so let's disable all training functionality before # starting K.set_learning_phase(0) model = inception_v3.InceptionV3(weights='imagenet', include_top=False) # In Chapter 5 we use the loss value to maximise the output of a specific filter. This time we'll attempt to # maximise the weighted sum of the L2 norm of the activations of a set of high-level layers. The set of layers # chosen will have a massive impact on the resulting modifications to the image, so make these params very # easily configurable. layers_contributions = { 'mixed2': 0.2, 'mixed3': 3.0, 'mixed4': 2.0, 'mixed5': 1.5 } layer_dict = dict([(layer.name, layer) for layer in model.layers]) # You'll define the loss by adding layer contributions to this scalar value. loss = K.variable(0.0) for layer_name in layers_contributions: coeff = layers_contributions[layer_name] # Retrieve the layer's output. activation = layer_dict[layer_name].output # Define the scaling factor and add the L2 norm of the features of a layer to the loss. You avoid boarder # artifacts by involving non-boarder pixels in the loss. scaling = K.prod(K.cast(K.shape(activation), 'float32')) loss = loss + coeff * K.sum(K.square( activation[:, 2:-2, 2:-2, :])) / scaling # Now we can set up the gradient ascent process. dream = model.input # Compute gradient of the dream w.r.t to the loss, then NORMALISE!!! grads = K.gradients(loss, dream)[0] grads /= K.minimum(K.mean(K.abs(grads)), 1e-7) # Now set up a Keras function to retrieve the value of the loss and gradients given an input image. outputs = [loss, grads] fetch_loss_and_grads = K.function([dream], outputs) def eval_loss_and_grads(x): """ This function is used to call the fetch_loss_and_grads function and package the outputs in an easy to use fashion. :param x: Input dream :return: The loss and the gradient of the layer w.r.t. the dream. """ outs = fetch_loss_and_grads([x]) loss_value = outs[0] grads_value = outs[1] return loss_value, grads_value def gradient_ascent(x, iterations, step, max_loss=None): """ This function runs gradient ascent for a number of iterations. :param x: Input dream :param iterations: Number of iterations to run gradient ascent for :param step: Step-size of the gradient ascent :param max_loss: Maximum loss we'll accept during the gradient ascent before stopping. :return: A modified version of the input dream """ for i in range(iterations): loss_value, grads_value = eval_loss_and_grads(x) if max_loss is not None and loss_value > max_loss: break print(f"...Loss value at {i}: {loss_value}") x += step * grads_value return x # Now we can begin programming the DeepDream algorithm itself. First we need to define a set of scales # (called octaves) at which to process the image. Each octave is 40% larger than the last. At each scale (from # smallest to largest) you run gradient ascent to maximise the loss you previously defined. To prevent artifacts # of up-scaling (blurriness and stretching) we'll re-inject the lost back into the image, which is possible # because you know what the original image should look like at a larger scale. step = 0.01 num_octave = 3 octave_scale = 1.4 iterations = 20 max_loss = 10.0 base_image_path = 'C:\\Users\\owatkins\\OneDrive - Analog Devices, Inc\\Documents\\Project Folder\\Tutorials and Courses\\Deep Learning with Python\\European_Landscape.jpg' print("Loading Base Image...") # Load the base image into Numpy array. img = preprocess_image_inception(base_image_path) print(f"Image Preprocessed: {img.dtype} of size: {img.shape}") # Prepare a list of shape tuples defining the different scales at which to run gradient ascent. original_shape = img.shape[1:3] successive_shapes = [original_shape] for i in range(1, num_octave): shape = tuple( [int(dim / (octave_scale**i)) for dim in original_shape]) successive_shapes.append(shape) # Reverse the list so that they run in ascending order. successive_shapes = successive_shapes[::-1] # Resize the Numpy array of the image to the smallest size. original_img = np.copy(img) shrunk_original_image = resize_img(original_img, successive_shapes[0]) # Run deep dream over all octaves. for shape in successive_shapes: print(f"Processing Image shape: {shape}") # Scales up the deep dream image img = resize_img(img, shape) # Run gradient ascent, altering the dream. img = gradient_ascent(img, iterations=iterations, step=step, max_loss=max_loss) # Scales up the smaller version of the original image: it will be pixellated. Compute the high-quality # version of the original image at this size. The difference between the two is the detail lost in # up-scaling. upscaled_shrunk_original_img = resize_img(shrunk_original_image, shape) same_size_original = resize_img(original_img, shape) lost_detail = same_size_original - upscaled_shrunk_original_img # Re-inject the lost detail back into the dream. Grab the shrunk_original_image and save the dream at this # octave img += lost_detail shrunk_original_image = resize_img(original_img, shape) save_img( img, fname= 'C:\\Users\\owatkins\\OneDrive - Analog Devices, Inc\\Documents\\Project Folder\\Tutorials and Courses\\Deep Learning with Python\\dream_at_scale_' + str(shape) + '.png') # Save the final dream. save_img( img, fname= 'C:\\Users\\owatkins\\OneDrive - Analog Devices, Inc\\Documents\\Project Folder\\Tutorials and Courses\\Deep Learning with Python\\Final_Dream.png' )
yield [X1, X2], Y datagen_args = dict(featurewise_center=True, featurewise_std_normalization=True, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, zoom_range=0.2) datagens = [ ImageDataGenerator(**datagen_args), ImageDataGenerator(**datagen_args) ] pair_gen = pair_generator(triples_data, image_cache, datagens, 32) inception_1 = inception_v3.InceptionV3(weights="imagenet", include_top=True) inception_2 = inception_v3.InceptionV3(weights="imagenet", include_top=True) for layer in inception_1.layers: layer.trainable = False layer._name = layer.name + "_1" for layer in inception_2.layers: layer.trainable = False layer._name = layer.name + "_2" vector_1 = inception_1.get_layer("avg_pool_1").output vector_2 = inception_2.get_layer("avg_pool_2").output sim_head = load_model(os.path.join(DATA_DIR, "models", "resnet50-l1-best.h5"), custom_objects={'LeakyReLU': LeakyReLU}) for layer in sim_head.layers:
def similarity_model(input_width=224, input_height=224, backbone='vgg16', train_base=False): base_model = None predictions = None model_input = None if backbone == 'vgg16': base_model = vgg16.VGG16(include_top=True, weights="imagenet", input_tensor=Input(shape=(input_width, input_height, 3))) # x = base_model.get_layer('block5_pool').output x = base_model.output model_input = base_model.input elif backbone == 'resnet50': base_model = resnet50.ResNet50(include_top=True, weights="imagenet", input_tensor=Input(shape=(input_width, input_height, 3))) x = base_model.output model_input = base_model.input elif backbone == 'inception_v3': base_model = inception_v3.InceptionV3( include_top=True, weights="imagenet", input_tensor=Input(shape=(input_width, input_height, 3))) model_input = base_model.input x = base_model.get_layer('avg_pool').output # x = Dense(1024)(x) # x = BatchNormalization()(x, training=True) # x = ReLU(max_value=1.0)(x) # x = Dropout(0.5)(x) # x = Dense(1024)(x) # x = BatchNormalization()(x, training=False) # x = Activation('relu', max_value=1.0)(x) # x = Dropout(0.5)(x) # x = Dense(256)(x) # x = BatchNormalization()(x, training=False) # x = ReLU(max_value=1.0)(x) # x = Dropout(0.5)(x) # x = Dense(128)(x) # x = BatchNormalization()(x, training=False) # x = Activation('relu', max_value=1.0)(x) # x = Dropout(0.5)(x) elif backbone == 'mobilenet_v2': base_model = MobileNetV2(include_top=True, weights="imagenet", input_tensor=Input(shape=(input_width, input_height, 3))) # x = base_model.output x = base_model.get_layer('Conv_1_bn').output x = GlobalAveragePooling2D()(x) # x = base_model.get_layer('global_average_pooling2d').output model_input = base_model.input else: x = Input(shape=(input_width, input_height, 3)) model_input = x x = Conv2D(32, (3, 3), strides=(2, 2), padding='valid', use_bias=False)(x) # x = BatchNormalization(scale=False)(x) # x = ReLU(max_value=1.0)(x) x = GlobalAveragePooling2D()(x) # loss_func = TripleLoss(32, name='loss_layer') # x = loss_func(x) predictions = x model = Model(inputs=model_input, outputs=predictions) if base_model is not None: istrain = train_base for i, layer in enumerate(base_model.layers): # if layer.name == 'mixed8': # print('mixed8 ', i, ' enable training') # istrain = True layer.trainable = istrain return model