示例#1
0
def removeNegationsPipeline(sentence: str, contractions: dict):
    '''takes in a sentence and returns a pos_tagged list of the words with 
    contractions removed
    Parameters
    ----------    
    sentence:str
        sentence we wish to remove the negations out of
    
    contractions: dict
        dictionary that maps contractions to their expansions

    Returns
    -------
    list:
        list of wordnet part_of_speech tokens each of which are in the form
        (word, part of speech, version)
       
    
    '''
    tokens = tp.tokenize_words(sentence)
    tokens = expandContractions(tokens, contractions)
    pos_tokens = tp.simple_pos_tag(tokens)
    pos_tokens = tp.remove_pos_stopwords(pos_tokens)
    
    return removeNegations(pos_tokens)
def swn_pipeline(s: str, label_diff_thresh=0, contractions=None, emoti_dict=None, special={}, count_nouns=True):
    """pipline that takes in a sentence string and returns the appropriate label:
    
    Parameters
    ----------    
    s: str
        sentence to find the sentiment of
    
    DEFAULTS:
        label_diff_thresh:int = 0
            how far we want the positive and negative score to differ to be considered
            not neutral
            
        contractions: dict
            dictionary of contractions mapped to their expanded forms
        
        emoti_dict: dict
            dictionary of emoticons mapped to their label
        
        special: dict
            dictionary of specialized lexicon to score words on top of SentiWordNet
            
        count_nouns=true: determines whether or not to include nouns in the scoring

    Returns
    -------
    tuple(str, list)
        tuple where first item is sentiment label of s (-1, 0, or 1)
        and second item is a list of the words with their scores
    """

    ##preliminary pre processing (emoticons, phrases, etc)
    s = extra_preprocessing(s, emoti_dict)
    # if got a lable
    if s == "0" or s == "1" or s == "-1":
        return (s, [])
    ##text processing
    if contractions is None:  # if not want to remove contractions
        tokens = tp.tokenize_words(s)
        pos_tokens = tp.simple_pos_tag(tokens)
        pos_tokens = tp.remove_pos_stopwords(pos_tokens)
    else:  # if want to remove negation contractions
        pos_tokens = rneg.removeNegationsPipeline(s, contractions)
    return swn_label(pos_tokens, diff_thresh=label_diff_thresh, special=special, count_nouns=count_nouns)