prep.make_dictionary()
    
    # Encode all words with integer IDs
    # Encode only the most used words in the dataset, any other words encode as 0
    n_top_used_words = 10000
    dataset = prep.encode_dataset_column(df=dataset, field="review", use_top_words=n_top_used_words)

    # Encode target variables to binary representation
    dataset = prep.string_to_int(df=dataset, params={"sentiment": {'positive': 1, 'negative': 0}})

    # Pad all reviews, remove reviews that have no words, trim reviews that exceed the review_len value
    review_len = 500
    dataset = prep.pad_text(df=dataset, column="review_encoded", min_words=1, max_words=review_len)

    # Split the dataset into training, test and validation subsets
    train_s, test_s, valid_s = prep.split_dataset(training_r=0.5, test_r=0.3, validation_r=0.2, dataset=dataset)

    # Convert dataframe column to the numpy array
    X_train = np.array(train_s['review_encoded'].tolist())
    Y = np.array(train_s['sentiment'].tolist())

    X_eval = np.array(valid_s['review_encoded'].tolist())
    Yv = np.array(valid_s['sentiment'].tolist())

    X_test = np.array(test_s['review_encoded'].tolist())
    Yt = np.array(test_s['sentiment'].tolist())

    # ************************************************** #
    #                THE SIMPLE RNN MODEL                #
    # ************************************************** #
Пример #2
0
    dataset = prep.string_to_int(
        df=dataset, params={"sentiment": {
            'positive': 1,
            'negative': 0
        }})

    # Pad all reviews, remove reviews that have no words, trim reviews that exceed the review_len value
    review_len = 500
    dataset = prep.pad_text(df=dataset,
                            column="review_encoded",
                            min_words=1,
                            max_words=review_len)

    # Split the dataset into training, test and validation subsets
    train_s, test_s, valid_s = prep.split_dataset(training_r=0.5,
                                                  test_r=0.3,
                                                  validation_r=0.2,
                                                  dataset=dataset)

    # Convert dataframe column to the numpy array
    X_train = np.array(train_s['review_encoded'].tolist())
    Y = np.array(train_s['sentiment'].tolist())

    X_eval = np.array(valid_s['review_encoded'].tolist())
    Yv = np.array(valid_s['sentiment'].tolist())

    X_test = np.array(test_s['review_encoded'].tolist())
    Yt = np.array(test_s['sentiment'].tolist())

    # ************************************************** #
    #              MODELS COMMON SETTINGS                #
    # ************************************************** #