Пример #1
0
 def load(self, path=None):
     _Sentiment.load(self, path)
     # Map "verschrikkelijk" to adverbial "verschrikkelijke" (+1%)
     if not path:
         for w, pos in list(dict.items(self)):
             if "JJ" in pos:
                 p, s, i = pos["JJ"]
                 self.annotate(attributive(w), "JJ", p, s, i)
Пример #2
0
 def load(self, path=None):
     _Sentiment.load(self, path)
     # Map "verschrikkelijk" to adverbial "verschrikkelijke" (+1%)
     if not path:
         for w, pos in list(dict.items(self)):
             if "JJ" in pos:
                 p, s, i = pos["JJ"]
                 self.annotate(attributive(w), "JJ", p, s, i)
Пример #3
0
def modality(sentence, type=EPISTEMIC):
    """ Returns the sentence's modality as a weight between -1.0 and +1.0.
        Currently, the only type implemented is EPISTEMIC.
        Epistemic modality is used to express possibility (i.e. how truthful is what is being said).
    """
    if isinstance(sentence, str):
        try:
            # A Sentence is expected but a string given.
            # Attempt to parse the string on-the-fly.
            from pattern.en import parse, Sentence
            sentence = Sentence(parse(sentence))
        except ImportError:
            pass
    S, n, m = sentence, 0.0, 0
    if not (hasattr(S, "words") and hasattr(S, "parse_token")):
        raise TypeError("%s object is not a parsed Sentence" %
                        repr(S.__class__.__name__))
    if type == EPISTEMIC:
        r = S.string.rstrip(" .!")
        for k, v in epistemic_weaseling.items():
            for phrase in v:
                if phrase in r:
                    n += k
                    m += 2
        for i, w in enumerate(S.words):
            for type, dict, weight in (("MD", epistemic_MD,
                                        4), ("VB", epistemic_VB,
                                             2), ("RB", epistemic_RB,
                                                  2), ("JJ", epistemic_JJ, 1),
                                       ("NN", epistemic_NN,
                                        1), ("CC", epistemic_CC_DT_IN,
                                             1), ("DT", epistemic_CC_DT_IN, 1),
                                       ("IN", epistemic_CC_DT_IN,
                                        1), ("PRP", epistemic_PRP,
                                             1), ("PRP$", epistemic_PRP, 1),
                                       ("WP", epistemic_PRP, 1)):
                # "likely" => weight 1, "very likely" => weight 2
                if i > 0 and s(S[i - 1]) in MODIFIERS:
                    weight += 1
                # likely" => score 0.25 (neutral inclining towards positive).
                if w.type and w.type.startswith(type):
                    for k, v in dict.items():
                        # Prefer lemmata.
                        if (w.lemma or s(w)) in v:
                            # Reverse score for negated terms.
                            if i > 0 and s(S[i - 1]) in ("not", "n't", "never",
                                                         "without"):
                                k = -k * 0.5
                            n += weight * k
                            m += weight
                            break
            # Numbers, citations, explanations make the sentence more factual.
            if w.type in ("CD", "\"", "'", ":", "("):
                n += 0.75
                m += 1
    if m == 0:
        return 1.0  # No modal verbs/adverbs used, so statement must be true.
    return max(-1.0, min(n / (m or 1), +1.0))
Пример #4
0
def modality(sentence, type=EPISTEMIC):
    """ Returns the sentence's modality as a weight between -1.0 and +1.0.
        Currently, the only type implemented is EPISTEMIC.
        Epistemic modality is used to express possibility (i.e. how truthful is what is being said).
    """
    if isinstance(sentence, str):
        try:
            # A Sentence is expected but a string given.
            # Attempt to parse the string on-the-fly.
            from pattern.en import parse, Sentence
            sentence = Sentence(parse(sentence))
        except ImportError:
            pass
    S, n, m = sentence, 0.0, 0
    if not (hasattr(S, "words") and hasattr(S, "parse_token")):
        raise TypeError("%s object is not a parsed Sentence" % repr(S.__class__.__name__))
    if type == EPISTEMIC:
        r = S.string.rstrip(" .!")
        for k, v in epistemic_weaseling.items():
            for phrase in v:
                if phrase in r:
                    n += k
                    m += 2
        for i, w in enumerate(S.words):
            for type, dict, weight in (
              (  "MD", epistemic_MD, 4),
              (  "VB", epistemic_VB, 2),
              (  "RB", epistemic_RB, 2),
              (  "JJ", epistemic_JJ, 1),
              (  "NN", epistemic_NN, 1),
              (  "CC", epistemic_CC_DT_IN, 1),
              (  "DT", epistemic_CC_DT_IN, 1),
              (  "IN", epistemic_CC_DT_IN, 1),
              ("PRP" , epistemic_PRP, 1),
              ("PRP$", epistemic_PRP, 1),
              ( "WP" , epistemic_PRP, 1)):
                # "likely" => weight 1, "very likely" => weight 2
                if i > 0 and s(S[i - 1]) in MODIFIERS:
                    weight += 1
                # likely" => score 0.25 (neutral inclining towards positive).
                if w.type and w.type.startswith(type):
                    for k, v in dict.items():
                        # Prefer lemmata.
                        if (w.lemma or s(w)) in v:
                            # Reverse score for negated terms.
                            if i > 0 and s(S[i - 1]) in ("not", "n't", "never", "without"):
                                k = -k * 0.5
                            n += weight * k
                            m += weight
                            break
            # Numbers, citations, explanations make the sentence more factual.
            if w.type in ("CD", "\"", "'", ":", "("):
                n += 0.75
                m += 1
    if m == 0:
        return 1.0 # No modal verbs/adverbs used, so statement must be true.
    return max(-1.0, min(n / (m or 1), +1.0))
Пример #5
0
 def load(self, path=None):
     _Sentiment.load(self, path)
     # Map "terrible" to adverb "terribly" (+1% accuracy)
     if not path:
         for w, pos in list(dict.items(self)):
             if "JJ" in pos:
                 if w.endswith("y"):
                     w = w[:-1] + "i"
                 if w.endswith("le"):
                     w = w[:-2]
                 p, s, i = pos["JJ"]
                 self.annotate(w + "ly", "RB", p, s, i)
Пример #6
0
 def load(self, path=None):
     _Sentiment.load(self, path)
     # Map "terrible" to adverb "terribly" (+1% accuracy)
     if not path:
         for w, pos in list(dict.items(self)):
             if "JJ" in pos:
                 if w.endswith("y"):
                     w = w[:-1] + "i"
                 if w.endswith("le"):
                     w = w[:-2]
                 p, s, i = pos["JJ"]
                 self.annotate(w + "ly", "RB", p, s, i)
Пример #7
0
 def load(self, path=None):
     _Sentiment.load(self, path)
     # Map "précaire" to "precaire" (without diacritics, +1% accuracy).
     if not path:
         for w, pos in list(dict.items(self)):
             w0 = w
             if not w.endswith((u"à", u"è", u"é", u"ê", u"ï")):
                 w = w.replace(u"à", "a")
                 w = w.replace(u"é", "e")
                 w = w.replace(u"è", "e")
                 w = w.replace(u"ê", "e")
                 w = w.replace(u"ï", "i")
             if w != w0:
                 for pos, (p, s, i) in pos.items():
                     self.annotate(w, pos, p, s, i)
Пример #8
0
 def load(self, path=None):
     _Sentiment.load(self, path)
     # Map "précaire" to "precaire" (without diacritics, +1% accuracy).
     if not path:
         for w, pos in list(dict.items(self)):
             w0 = w
             if not w.endswith(("à", "è", "é", "ê", "ï")):
                 w = w.replace("à", "a")
                 w = w.replace("é", "e")
                 w = w.replace("è", "e")
                 w = w.replace("ê", "e")
                 w = w.replace("ï", "i")
             if w != w0:
                 for pos, (p, s, i) in pos.items():
                     self.annotate(w, pos, p, s, i)