prep.make_dictionary() # Encode all words with integer IDs # Encode only the most used words in the dataset, any other words encode as 0 n_top_used_words = 10000 dataset = prep.encode_dataset_column(df=dataset, field="review", use_top_words=n_top_used_words) # Encode target variables to binary representation dataset = prep.string_to_int(df=dataset, params={"sentiment": {'positive': 1, 'negative': 0}}) # Pad all reviews, remove reviews that have no words, trim reviews that exceed the review_len value review_len = 500 dataset = prep.pad_text(df=dataset, column="review_encoded", min_words=1, max_words=review_len) # Split the dataset into training, test and validation subsets train_s, test_s, valid_s = prep.split_dataset(training_r=0.5, test_r=0.3, validation_r=0.2, dataset=dataset) # Convert dataframe column to the numpy array X_train = np.array(train_s['review_encoded'].tolist()) Y = np.array(train_s['sentiment'].tolist()) X_eval = np.array(valid_s['review_encoded'].tolist()) Yv = np.array(valid_s['sentiment'].tolist()) X_test = np.array(test_s['review_encoded'].tolist()) Yt = np.array(test_s['sentiment'].tolist()) # ************************************************** # # THE SIMPLE RNN MODEL # # ************************************************** #
dataset = prep.string_to_int( df=dataset, params={"sentiment": { 'positive': 1, 'negative': 0 }}) # Pad all reviews, remove reviews that have no words, trim reviews that exceed the review_len value review_len = 500 dataset = prep.pad_text(df=dataset, column="review_encoded", min_words=1, max_words=review_len) # Split the dataset into training, test and validation subsets train_s, test_s, valid_s = prep.split_dataset(training_r=0.5, test_r=0.3, validation_r=0.2, dataset=dataset) # Convert dataframe column to the numpy array X_train = np.array(train_s['review_encoded'].tolist()) Y = np.array(train_s['sentiment'].tolist()) X_eval = np.array(valid_s['review_encoded'].tolist()) Yv = np.array(valid_s['sentiment'].tolist()) X_test = np.array(test_s['review_encoded'].tolist()) Yt = np.array(test_s['sentiment'].tolist()) # ************************************************** # # MODELS COMMON SETTINGS # # ************************************************** #