Exemplo n.º 1
0
def fit_model(df, method):
    """
        Fitting chosen model

        params:
            df: DataFrame used,
            method: model chosen

        returns:
            generated model,
            transformed datas
    """

    if method == "TF-IDF":
        model = TfidfVectorizer(analyzer='word',
                                ngram_range=(1, 2),
                                min_df=0,
                                stop_words=STOPS)
        X = model.fit_transform(df['content'])
    elif method == "CountVectorizer":
        model = CountVectorizer(analyzer='word',
                                ngram_range=(1, 2),
                                min_df=0,
                                stop_words=STOPS)
        X = model.fit_transform(df['content'])
    elif method == "BERT":
        word_embedding_model = models.Transformer('camembert-base')
        pooling_model = models.Pooling(
            word_embedding_model.get_word_embedding_dimension(),
            pooling_mode_mean_tokens=True,
            pooling_mode_max_tokens=False)
        model = SentenceTransformer(
            modules=[word_embedding_model, pooling_model])
        X = model.encode(df['content'], show_progress_bar=True)
    return model, X