embeddings = embedding_helper(value, model="bow", dictionary=dictionary).get_embedding() embedded_movie_contents[data_manager.get_index_from_movie_id(movie_id)] = embeddings # array start at 0 np.save('data/embedded_movie_content.npy', embedded_movie_contents) print("\nEncoding content") if encoded_movie_path: print(encoded_movie_path) encoded_movie_contents = np.load(encoded_movie_path) else: print(embedded_movie_contents.shape) intermediate_size = int(embedded_movie_contents.shape[1]) encoded_size = int(intermediate_size) ae = AutoEncoder(embedded_movie_contents, validation_perc=0.2, lr=1e-3, intermediate_size=5000, encoded_size=100, is_enable_bath_norm=True) ae.train_loop(epochs=60) encoded_movie_contents = ae.get_encoded_representations() ae.save_encoder() ae.save_decoder() print(encoded_movie_contents.shape) np.save('data/encoded_movie_contents.npy', encoded_movie_contents) losses = pd.DataFrame(data=list(zip(ae.train_losses, ae.val_losses)), columns=['train_loss', 'validation_loss']) losses['epoch'] = (losses.index + 1) fig, ax = plt.subplots() ax.plot(losses['epoch'], losses['train_loss'], label='train_loss') ax.plot(losses['epoch'], losses['validation_loss'], label='validation_loss') ax.set_ylabel('MSE loss') ax.set_xlabel('epoch') ax.set_title('autoencoder loss over time')
embedded_movie_contents[movie_dict_link.get(movieId)] = embeddings np.save('data/embedded_movie_content.npy', embedded_movie_contents) print("\nEncoding content") if encoded_movie_path: print(os.path.abspath(encoded_movie_path)) encoded_movie_contents = np.load(os.path.abspath(encoded_movie_path)) else: ae = AutoEncoder(embedded_movie_contents, validation_perc=0.1, lr=1e-3, intermediate_size=5000, encoded_size=100, is_enable_bath_norm=True) ae.train_loop(epochs=15) encoded_movie_contents = ae.get_encoded_representations() ae.save_encoder() ae.save_decoder() np.save('data/encoded_movie_contents.npy', encoded_movie_contents) print("\nCalculating similarity score") if target_movie_id != 0: cosine_similarity = similarity_helper(encoded_movie_contents) movie_similarity = cosine_similarity.get_similarity_by_movie( target_movie_id, movie_dict_link) movie_name_column = {} # key = movieId, value = title movie_content_column = {}