Пример #1
0
## combining the genre tags and the user tags
movies2_df = movies2_df.merge(grouped_df, on='movieId', how='left')
movies2_df['tag'] = movies2_df['tag'].apply(
    lambda x: str(x)) + ' ' + movies2_df['genres']
movies2_df['tag'] = movies2_df['tag'].apply(
    lambda x: x.replace('nan', '').strip())
tags_grouped_df = movies2_df.iloc[:][['movieId', 'tag']]
print(tags_grouped_df.head())
print(tags_grouped_df.shape)

## calculating the TFIDF matrix
tfidf_df = Preprocess.createTFIDFMatrix(tags_grouped_df)
print(tfidf_df.shape)

## dumping the tfidf matrix
Util.saveObj(tfidf_df, 'tfidf_df')

# ## loading the TFIDF matrix
# tfidf_df =  Util.loadObj('tfidf_df')
# print(tfidf_df.shape)

## loading the reduced TFIDF matrix
tfidf_reduced_df = Util.loadObj('tfidf_reduced_df')
print(tfidf_reduced_df.shape)

## creating vector df with spacy sentence vector
vector_df = createSentenceVector(imdb_df)
print(vector_df.shape)

## dumping the vector df
Util.saveObj(vector_df, 'vector_df')
Пример #2
0
            self.BottleNeckDense(encoder_out_2))
        decoder_out_1 = self.dropout_layer(self.DecoderDense1(bottleneck_out))
        decoder_out_2 = self.dropout_layer(self.DecoderDense2(decoder_out_1))
        final_out = self.dropout_layer(self.FinalDense(decoder_out_2))
        return final_out


NUM_EPOCHS = 100
BATCH_SIZE = 64

tfidf_matrix = Util.loadObj('tfidf_df')
X = tfidf_matrix.to_numpy()
features = X.shape[1]

model = AutoEncoder(features)
optimizer = keras.optimizers.Adam(lr=0.000003)
loss = lambda x, x_hat: tf.reduce_sum(keras.losses.mean_squared_error(
    x, x_hat))

model.compile(loss=loss, optimizer=optimizer, metrics=['mse'])
model.fit(x=X, y=X, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS)

reduced = model.BottleNeckDense(model.EncoderDense2(model.EncoderDense1(X)))

reduced_np = reduced.numpy()
indices = tfidf_matrix.index.tolist()
tfidf_reduced_df = pd.DataFrame(reduced_np)
tfidf_reduced_df['movieId'] = indices
Util.saveObj(tfidf_reduced_df, 'tfidf_reduced_df')
print(tfidf_reduced_df['movieId'])