Exemplos de remove_punctuations em Python, exemplos de utils.PreProcessing.remove_punctuations em Python

Exemplo n.º 1

0

Exibir arquivo

def caclulate_jaccard_similarity(conclusion, premise):
    """
    Calculate jacard similarity between conclusion and premise
    :param conclusion:
    :param premise:
    :return: Jacard similarity score
    """
    conclusion_tokens = paper_tokenizer(remove_punctuations(conclusion['conclusion_text']).lower())
    premise_tokens = paper_tokenizer(remove_punctuations(premise[1]).lower())

    intersection = conclusion_tokens.intersection(premise_tokens)
    return len(intersection) / (len(premise_tokens) + len(conclusion_tokens) - len(intersection))

Exemplo n.º 2

0

Exibir arquivo

def calculate_nltk_sentiment(conclusion, premise):
    """
    Calculates the sentiment with the NLTK SentimentIntesityAnalyzer
    :param conclusion:
    :param premise:
    :return:
    """
    cleaned_premise_text = remove_punctuations(premise[1].lower())
    return SentimentIntensityAnalyzer().polarity_scores(text=cleaned_premise_text)['compound']

Exemplo n.º 3

0

Exibir arquivo

def calculate_textblob_sentiment(conclusion, premise):
    """
    Calculates the sentiment with TextBlob
    :param conclusion:
    :param premise:
    :return:
    """
    cleaned_premise_text = remove_punctuations(premise[1].lower())
    return TextBlob(text=cleaned_premise_text).sentiment.polarity

Exemplo n.º 4

0

Exibir arquivo

def compute_embedding(embedding, remove_punctuation: bool, file_name: str):
    """
    Computes the embedding with given model for all arguments
    :param embedding: Model
    :param remove_punctuation: Bool to indicate if punctuation should be removed
    :param file_name:
    """
    arguments = Arguments()
    document_embedding = DocumentPoolEmbeddings([embedding])

    embedded_arguments = {}

    for argument in arguments.ground_truth_arguments:
        premises = argument['premises']
        conclusion = argument['conclusion']

        conclusion_text = conclusion['conclusion_text']
        if remove_punctuation:
            conclusion_text = remove_punctuations(conclusion_text)
        conclusion_sentence = Sentence(conclusion_text)
        document_embedding.embed(conclusion_sentence)
        embedded_conclusion = conclusion_sentence.get_embedding().detach(
        ).numpy().tolist()

        embedded_premises = {}
        argument_uid = None

        for premise in premises:
            premise_text = premise[1]
            if remove_punctuation:
                premise_text = remove_punctuations(premise_text)
            premise_sentence = Sentence(premise_text)
            document_embedding.embed(premise_sentence)
            embedded_premise = premise_sentence.get_embedding().detach().numpy(
            ).tolist()
            embedded_premises[premise[2]] = embedded_premise
            argument_uid = premise[0]
        embedded_arguments[argument_uid] = [
            embedded_conclusion, embedded_premises
        ]

        save_embedding(embedded_arguments, file_name)

Exemplo n.º 5

0

Exibir arquivo

def calculate_tokenwise_sentiment(conclusion, premise):
    """
    Calculates the sentiment tokenwise
    :param conclusion:
    :param premise:
    :return:
    """
    words = collections.Counter(re.split(' |\n|\t', remove_punctuations(premise[1].lower())))
    sentiment = 0.0
    for word in words:
        sentiment += words[word] * TextBlob(
            text=word).sentiment.polarity  # SentimentIntensityAnalyzer().polarity_scores(text=word)['compound']
    return sentiment

Exemplo n.º 6

0

Exibir arquivo

 def calculate_sentiment(self, conclusion, premise):
     """
     Calculates the sentiment of the premise
     :param conclusion:
     :param premise:
     :return:
     """
     words = collections.Counter(re.split(' |\n|\t', remove_punctuations(premise[1].lower())))
     sentiment = 0.0
     for word in words:
         if self.sentiWordNet_positives.get(word) is not None:
             sentiment += words[word] * self.sentiWordNet_positives[word]
         if self.sentiWordNet_negatives.get(word) is not None:
             sentiment -= words[word] * self.sentiWordNet_negatives[word]
     return sentiment