Пример #1
0
    
    We use this model to generate **embeddings** for our images.
    
    As you can see below, once we've used the model to generate image features, we can then **store them to disk** 
    and re-use them without needing to do inference again! This is one of the reason that embeddings are so popular 
    in practical applications, as they allow for huge efficiency gains. 
    """)

    with st.echo():
        model = vector_search.load_headless_pretrained_model()
        if generate_image_features:
            print ("Generating image features...")
            images_features, file_index = vector_search.generate_features(image_paths, model)
            vector_search.save_features(features_path, images_features, file_mapping_path, file_index)
        else:
            images_features, file_index = vector_search.load_features(features_path, file_mapping_path)

    st.write("Our model is simply VGG16 without the last layer (softmax)")
    st.image(Image.open('assets/vgg16_architecture.jpg'), width=800, caption="Original VGG. Credit to Data Wow Blog")
    st.image(Image.open('assets/vgg16_chopped.jpg'), width=800, caption="Our model")
    st.write("This is how we get such a model in practice")
    show_source(vector_search.load_headless_pretrained_model)

    st.write("""
    What do we mean by generating embeddings? Well we just use our pre-trained model up to the penultimate layer, and 
    store the value of the activations.""")
    show_source(vector_search.generate_features)

    st.write('Here are what the embeddings look like for the first 20 images. Each image is now represented by a '
             'sparse vector of size 4096:')
    st.write(images_features[:20])
Пример #2
0
# Error in save_features for file_mapping, so brute force save
np.save('./object_detection/feature_extraction/ref_img_features_no_aug.npy',
        features)
with open(
        './object_detection/feature_extraction/ref_img_filemapping_no_aug.json',
        'w') as f:
    json.dump(file_mapping, f)

# %% Build database reference features
'''
Build and save an ANNOY search index for fast searching.
'''
# Load the saved features and file mappings and compile Annoy search indices
features, file_index = vector_search.load_features(
    'object_detection/feature_extraction/ref_img_features_no_aug',
    './object_detection/feature_extraction/ref_img_filemapping_no_aug')

ref_img_index = vector_search.index_features(features, n_trees=8000)

ref_img_index.save(
    './object_detection/feature_extraction/ref_img_index_no_aug.ann')

# %%
'''
Get and save hash dictionary mapping between the file_mapping index,
and each the item name, category, source website, local source file path,
and the local source file name.
'''

with open(
Пример #3
0
    indexing, pure_image_embedding = check_inputs(index_folder, input_image,
                                                  input_word, model_path,
                                                  glove_path)

    # Decide whether to use pre-trained VGG or custom model
    loaded_model = vector_search.load_headless_pretrained_model()
    if model_path:
        loaded_model = load_model(model_path)

    # Decide whether to index or search
    if indexing:
        features, index = index_images(index_folder, features_path,
                                       file_mapping, loaded_model)
        print("Indexed %s images" % len(features))
    else:
        images_features, file_index = vector_search.load_features(
            features_path, file_mapping)

        # Decide whether to do only image search or hybrid search
        if pure_image_embedding:
            image_index = vector_search.index_features(images_features,
                                                       dims=4096)
            search_key = get_index(input_image, file_index)
            results = vector_search.search_index_by_key(
                search_key, image_index, file_index)
            print(results)
        else:
            word_vectors = vector_search.load_glove_vectors(glove_path)
            # If we are searching for tags for an image
            if input_image:
                search_key = get_index(input_image, file_index)
                word_index, word_mapping = vector_search.build_word_index(
def run(item,
        city,
        thedir,
        site,
        input_file=False,
        outdir='myflask/static/matches/',
        first=False,
        features_only=False,
        sold=False,
        model=False,
        pretrained_exists=False,
        topn=12):
    """
        Puts everything together: loads the model, loads the features, 
        applies cosine similarity and returns the matching 10 items
    
        Args:
            item: The item you want to search for (e.g., couch)
            city: The city where you are searching
            thedir: the primary directory (defined early and passes around 
                    for easily porting all programs elsewhere (e.g., AWS)
    
        Returns:
            Nothing, but copies matching items to a temporary directory
    """

    K.clear_session()
    tf.compat.v1.reset_default_graph()
    tf.reset_default_graph()
    tf.keras.backend.clear_session()

    #item='couch'
    #city='los_angeles'
    #thedir='/Users/bsalmon/BrettSalmon/data_science/Insight/goodriddance/scraping/offerup/'
    if not sold:
        folder = (thedir + city + '/' + item + '_images/')
        features_path = (thedir + city + '/cnn/' + item + '_features/')
        file_mapping_path = (thedir + city + '/cnn/' + item + '_file_mapping/')
        if not os.path.exists(
                features_path.replace(features_path.split('/')[-2] + '/', '')):
            os.mkdir(
                features_path.replace(features_path.split('/')[-2] + '/', ''))
        if not os.path.exists(features_path):
            os.mkdir(features_path)
        if not os.path.exists(file_mapping_path):
            os.mkdir(file_mapping_path)
    else:
        folder = (thedir + city + '/' + item + '_images/sold/')
        features_path = (thedir + city + '/cnn/sold_' + item + '_features/')
        file_mapping_path = (thedir + city + '/cnn/sold_' + item +
                             '_file_mapping/')
        if not os.path.exists(features_path):
            os.mkdir(features_path)
        if not os.path.exists(file_mapping_path):
            os.mkdir(file_mapping_path)

    model = load_headless_pretrained_model(pretrained_exists=pretrained_exists)

    # I'll load all images into memory because it's not that many
    #images=np.load(features_path+'images.npy')
    #image_paths=np.load(features_path+'image_paths.npy')
    if first or features_only:
        if item == 'couch': plural = 'es'
        else: plural = 's'
        print("%% You are generating the image features for all " + item +
              plural + " from " + city)
        images, image_paths = load_images(folder)
        #np.save(features_path+'images',images)
        #np.save(features_path+'image_paths',np.array(image_paths))

        images_features, file_index = generate_features(image_paths, model)
        vector_search.save_features(features_path, images_features,
                                    file_mapping_path, file_index)
        if features_only:
            return
    else:
        print(
            "%% You already have the image features in hand-- loading them from disk."
        )
        images_features, file_index = vector_search.load_features(
            features_path, file_mapping_path)
        #images=np.load(features_path+'images.npy')
        #image_paths=np.load(features_path+'image_paths.npy')

    # Define the location of the file uploaded by the user
    if not input_file:
        tfiles = os.listdir('myflask/static/uploads/')
        for ifile in tfiles:
            if ifile.endswith(".jpg"):
                input_file = 'myflask/static/uploads/' + ifile

    K.clear_session()
    tf.compat.v1.reset_default_graph()
    tf.reset_default_graph()
    tf.keras.backend.clear_session()
    model = load_headless_pretrained_model(pretrained_exists=pretrained_exists)

    # Load in the single input image from the user
    img = image.load_img(input_file, target_size=(224, 224))
    x_raw = image.img_to_array(img)
    x_expand = np.expand_dims(x_raw, axis=0)

    # Extract the image features according to the headless model
    singleinput = preprocess_input(x_expand)
    single_image_features = model.predict(singleinput)

    # Apply cosine_similarities between features of loaded image
    # and features of all directory images
    print("%% That was fast! Applying cosine similarity and finding images")
    cosine_similarities = (cosine_similarity(single_image_features,
                                             images_features)[0])

    # Get top N similar image ID numbers
    top_N_idx = (np.argsort(cosine_similarities)[-topn:])[::-1]

    # Get top 10 similar image files
    topfiles = [file_index[i] for i in top_N_idx]

    # Move them to a happy static folder
    barefiles = []
    match_ids = []
    for i in range(len(topfiles)):
        copyfile(topfiles[i], outdir + site + '/' + topfiles[i].split('/')[-1])
        barefiles.append(topfiles[i].split('/')[-1])
        match_ids.append(int(topfiles[i].split('/')[-1].replace('.jpg', '')))

    #After prediction
    K.clear_session()

    print("%% Cosine similarity complete. Matched " +
          "images are in myflask/static/matches/" + site)
    return barefiles, match_ids, (cosine_similarities[top_N_idx])