示例#1
0
        embeddings = embedding_helper(value, model="bow", dictionary=dictionary).get_embedding()
        embedded_movie_contents[data_manager.get_index_from_movie_id(movie_id)] = embeddings  # array start at 0
    np.save('data/embedded_movie_content.npy', embedded_movie_contents)

print("\nEncoding content")

if encoded_movie_path:
    print(encoded_movie_path)
    encoded_movie_contents = np.load(encoded_movie_path)
else:
    print(embedded_movie_contents.shape)
    intermediate_size = int(embedded_movie_contents.shape[1])
    encoded_size = int(intermediate_size)
    ae = AutoEncoder(embedded_movie_contents, validation_perc=0.2, lr=1e-3, intermediate_size=5000, encoded_size=100,
                     is_enable_bath_norm=True)
    ae.train_loop(epochs=60)
    encoded_movie_contents = ae.get_encoded_representations()
    ae.save_encoder()
    ae.save_decoder()
    print(encoded_movie_contents.shape)
    np.save('data/encoded_movie_contents.npy', encoded_movie_contents)

    losses = pd.DataFrame(data=list(zip(ae.train_losses, ae.val_losses)), columns=['train_loss', 'validation_loss'])
    losses['epoch'] = (losses.index + 1)

    fig, ax = plt.subplots()
    ax.plot(losses['epoch'], losses['train_loss'], label='train_loss')
    ax.plot(losses['epoch'], losses['validation_loss'], label='validation_loss')
    ax.set_ylabel('MSE loss')
    ax.set_xlabel('epoch')
    ax.set_title('autoencoder loss over time')
示例#2
0
        embedded_movie_contents[movie_dict_link.get(movieId)] = embeddings
    np.save('data/embedded_movie_content.npy', embedded_movie_contents)

print("\nEncoding content")

if encoded_movie_path:
    print(os.path.abspath(encoded_movie_path))
    encoded_movie_contents = np.load(os.path.abspath(encoded_movie_path))
else:
    ae = AutoEncoder(embedded_movie_contents,
                     validation_perc=0.1,
                     lr=1e-3,
                     intermediate_size=5000,
                     encoded_size=100,
                     is_enable_bath_norm=True)
    ae.train_loop(epochs=15)
    encoded_movie_contents = ae.get_encoded_representations()
    ae.save_encoder()
    ae.save_decoder()
    np.save('data/encoded_movie_contents.npy', encoded_movie_contents)

print("\nCalculating similarity score")

if target_movie_id != 0:
    cosine_similarity = similarity_helper(encoded_movie_contents)
    movie_similarity = cosine_similarity.get_similarity_by_movie(
        target_movie_id, movie_dict_link)

    movie_name_column = {}  # key = movieId, value = title
    movie_content_column = {}