Пример #1
0
def all_tags_sentiment(word, lang):
    if lang in ['E', 'S']:
        pscore = 0
        nscore = 0
        inverse = False
        for wt in ['n', 'v', 'r', 'a']:
            z = list(swn.senti_synsets(word, wt))
            if len(z) > 0:
                p, n = z[0].pos_score(), z[0].neg_score()
                pscore += p
                nscore += n
        pscore = float(pscore) / 4
        nscore = float(nscore) / 4
    elif lang == 'H':
        inverse = False
        pscore = 0.
        nscore = 0.

        synsets_to_collect = {}
        ss_lib = [
            word_synset(word, 'H'),
            word_synset(lemmatise(word)[0], 'H'),
            word_synset(normalise(word), 'H'),
            word_synset(normalise(lemmatise(word)[0]), 'H'),
            word_synset(lemmatise(normalise(word))[0], 'H')
        ]
        ss_count = 0
        synset_collection = []
        for synsets in ss_lib:
            if not synsets:
                continue
            for k, v in synsets.iteritem():
                if len(v) > 0:
                    for u in v:
                        synset_collection.append(u)
        if len(synset_collection) > 0:
            wts = 0
            for u in synset_collection:
                for wt in [1, 2, 3, 4, '1', '2', '3', '4']:
                    p, n = sentiment_synsets(u, wt)
                    if p > 0. or n > 0.:
                        wts += 1
                if wts > 0:
                    p = p / wts
                    n = n / wts
                pscore += p
                nscore += n
            pscore = pscore / len(synset_collection)
            nscore = nscore / len(synset_collection)

    elif lang == 'M':
        inverse = False
        pscore = 0.
        nscore = 0.
    else:
        return 0., 0.
    return pscore, nscore
Пример #2
0
def sentiment_synsets(synsets, pos):
    scores = []
    synscores = []
    for k, sss in synsets.iteritems():
        for ss in sss:
            for polarity in _hswn.get(ss, {}).iteritems():
                scores.append(polarity[1])

            synonyms = word_synset_synonyms_hindi(ss, pos)
            for v in synonyms:
                _synsets = word_synset(v, 'H')
                if not _synsets:
                    continue
                for _k, _sss in _synsets.iteritems():
                    for _ss in _sss:
                        for polarity in _hswn.get(ss, {}).iteritems():
                            assert len(polarity[1]) == 2
                            #if polarity[0] == pos:
                            # print '\t\t\t\tFound Synonym polarity', polarity[1]
                            synscores.append(polarity[1])

    if len(scores) > 0:
        print '**** Hindi senti found'
        return float(sum([u[0] for u in scores])) / len(scores), float(
            sum([u[1] for u in scores])) / len(scores)
    elif len(synscores) > 0:
        print '**** Hindi senti found through synonyms'
        return float(sum([u[0] for u in synscores])) / len(synscores), float(
            sum([u[1] for u in synscores])) / len(synscores)
    else:
        print '**** Hindi senti not found'
        # print '\tFound No Polarity'
        return 0., 0.
 def _handle_word(word):
     if len(word) == 0:
         return (word, '', '')
     if word in ",.><?/+=-_}{[]*&^%$#@!~`\"\\|:;":
         return word, '', '' 
     u, v, w = identify(word, lang)
     if len(u) is 0 and (len(w) is 0 or all(len(w_) is 0 for w_ in w)):
         return (word, '', '')
     if v == 'S' and len(w) > 0 and len(w[0]) > 0:
         return u, v, w[0]
     elif v == 'O' and len(w) > 0:
         consider = []
         for _w in w:
             synsets = word_synset(_w, v)
             if synsets and len(synsets) > 0:
                 consider.append(_w)
         if len(consider) is 0:
             w = w[0]
         else:
             w = consider[0]
     else:
         w = ''
     if isinstance(w, list) and len(w) is 0:
         w = ''
     return (u, v, w)
Пример #4
0
def get_and_check_synsets(tr, lang='H'):
    if tr and len(tr) > 0:
        ntr = [(_tr, word_synset(_tr, lang=lang)) for _tr in tr]
        if not ntr or len(ntr) is 0:
            ntr = [(_tr, None) for _tr in tr]
        return ntr
    else:
        return None
Пример #5
0
def hw():
    from main import HOME
    print 'Setting hswn...'
    if len(_hswn) is 0:
        conn = sqlite3.connect(HOME + '/resources/NLP_database.sqlite')
        cursor = conn.cursor()
        terms = {}
        for row in cursor.execute(
                'SELECT POS, synset_id, p_score, n_score, terms FROM HSWN'):
            t = row[0]
            p = float(row[2])
            n = float(row[3])
            syn = row[1]
            _terms = set([
                ' '.join(w.split('_')).strip()
                for w in row[4].strip().split(',')
            ])

            _hswn[syn][t] = (p, n)
            for _term in _terms:
                if not terms.get(_term):
                    terms[_term] = []
                terms[_term].append((syn, t))

        for term, wlist in terms.iteritems():
            synsets = word_synset(term, 'H')
            if not synsets:
                continue
            if len(wlist) is 0:
                continue
            elif len(wlist) is 1:
                syn, pos = wlist[0]
                for ss in synsets.get(pos, []):
                    if ss in _hswn:
                        continue
                    else:
                        _hswn[ss][pos] = _hswn[syn][pos]
            else:
                p_score, n_score = 0., 0.

                for syn, pos in wlist:
                    p, n = _hswn[syn][pos]
                    p_score += p
                    n_score += n
                p_score = p_score / (1. * len(wlist))
                n_score = n_score / (1. * len(wlist))
                for ss in synsets.get(pos, []):
                    if ss in _hswn:
                        continue
                    else:
                        _hswn[ss][pos] = (p_score, n_score)
Пример #6
0
def hindi_sentiments(word):
    synsets = word_synset(word, 'H')
    p_score = 0
    n_score = 0
    polarity_count = 0
    if not synsets:
        return 0., 0.
    for pos, ss in synsets.iteritems():
        for s in ss:
            for polarities in _hswn.get(s, {}).iteritems():
                (t, (p, n)) = polarities
                if p > 0. or n > 0.:
                    polarity_count += 1
                    p_score += p
                    n_score += n
    if polarity_count > 0:
        return p_score / polarity_count, n_score / polarity_count
    else:
        return 0., 0.
def identify(word, lang='H'):
    word = word.lower().replace(',', '').replace('.', '')
    english_probability = 1.
    other_probability = 1.
    slang_probability = 1.

    word = handle_slang_words(word)

    # print '***', word, slang_dictionary()[word] if word in slang_dictionary() else False

    if word in slang_dictionary():
        slang_probability *= 0.8
        english_probability *= 0.1
        other_probability *= 0.1
    e_syn_set = word_synset(word)

    o_best_words = []
    o_words = transliterate(word, lang)
    # print o_words
    for ow in o_words:
        o_synsets = word_synset(ow, lang=lang)
        o_top_word = top_other_words(ow, lang)
        if o_synsets and len(o_synsets) > 0 and o_top_word:
            o_best_words.append((ow, o_synsets, True))
        elif o_synsets and len(o_synsets) > 0:
            o_best_words.append((ow, o_synsets, False))
        elif o_top_word:
            o_best_words.append((ow, None, None))

    if o_best_words and len(o_best_words) > 0:
        o_best_words = sorted(o_best_words,
                              key=lambda item: len(item[1]) if item[1] else 0,
                              reverse=True)

    if o_best_words and len(o_best_words) > 0:
        slang_probability *= 0.30
        english_probability *= 0.30
        other_probability *= 0.40
        if any(u[2] for u in o_best_words):
            slang_probability *= 0.25
            english_probability *= 0.25
            other_probability *= 0.5
        elif sum(1 for u in o_best_words if u[1] is not None) > 0:
            slang_probability *= 0.30
            english_probability *= 0.30
            other_probability *= 0.40

    if e_syn_set is not None:
        slang_probability *= 0.30
        english_probability *= 0.40
        other_probability *= 0.30
    if top_english_words(word):
        slang_probability *= 0.15
        english_probability *= 0.7
        other_probability *= 0.15
    if slang_probability > english_probability and slang_probability > other_probability:
        return word, 'S', [slang_dictionary()[word]]
    elif english_probability > other_probability:
        return word, 'E', []
    else:
        values = word, 'O', [u[0] for u in o_best_words]
        return values
Пример #8
0
def sentiment(tagged, lang='E'):
    if lang in ['E', 'S']:
        pscore = 0
        nscore = 0
        inverse = False
        wt = tagged[1]
        if wt in ['n', 'v', 'r', 'a']:
            ww = tagged[0]
            z = list(swn.senti_synsets(ww, wt))
            if len(z) > 0:
                p, n = z[0].pos_score(), z[0].neg_score()
                pscore += p
                nscore += n
            # print '\t\tE | score', pscore, nscore
        else:
            pass
            #print '\t\tE | no score'
    elif lang == 'H':
        inverse = False
        pscore = 0.
        nscore = 0.
        wd = tagged[0]
        wt = tagged[2].lower()
        if 'NEG' == tagged[1]:
            inverse = True
        if 'nn' in [wt, tagged[1].lower()]:
            wt = 'n'
        elif wt == 'adj':
            wt = 'a'
        elif wt == 'adv':
            wt = 'r'
        else:
            wt = 'v'
        if wt == 'n':
            pos = 1
        elif wt == 'v':
            pos = 2
        elif wt == 'a':
            pos = 3
        else:
            pos = 4
        synsets_to_collect = {}
        found = False
        ss_lib = [
            word_synset(wd, 'H'),
            word_synset(lemmatise(wd)[0], 'H'),
            word_synset(normalise(wd), 'H'),
            word_synset(normalise(lemmatise(wd)[0]), 'H'),
            word_synset(lemmatise(normalise(wd))[0], 'H')
        ]
        print '****', ss_lib
        for ss in ss_lib:
            if ss:
                for _kk, _ss in ss.iteritems():
                    if not synsets_to_collect.get(_kk):
                        synsets_to_collect[_kk] = []
                    for __ss in _ss:
                        if __ss not in synsets_to_collect[_kk]:
                            synsets_to_collect[_kk].append(__ss)
        if not synsets_to_collect:
            print '\t\tH | no score'
            pass
        else:
            pscore, nscore = sentiment_synsets(synsets_to_collect, pos)
            # print '\t\tH | score', pscore, nscore
    elif lang == 'M':
        inverse = False
        pscore = 0.
        nscore = 0.
    else:
        return 0., 0.
    return pscore, nscore
Пример #9
0
            for polarities in _hswn.get(s, {}).iteritems():
                (t, (p, n)) = polarities
                if p > 0. or n > 0.:
                    polarity_count += 1
                    p_score += p
                    n_score += n
    if polarity_count > 0:
        return p_score / polarity_count, n_score / polarity_count
    else:
        return 0., 0.


if __name__ == '__main__':
    SCORES = []
    word = "खाना".decode('utf-8', 'ignore')
    synsets = word_synset(word, 'H')
    pos = '3'
    for ss in synsets[pos]:
        print '\t', ss
        for polarities in _hswn.get(ss, {}).iteritems():
            print '\t-->', polarities
        synonyms = word_synset_synonyms_hindi(ss, pos)
        for v in synonyms:
            print '\t\t', [v]
            _synsets = word_synset(v, 'H')
            if not _synsets:
                continue
            for _ss in _synsets.get(pos, []):
                print '\t\t\t', _ss
                for _polarities in _hswn.get(_ss, {}).iteritems():
                    print '\t\t\t-->', _polarities