def contrastingFeatures(words):
    affectscores = []
    sentiscores = []
    bigrams = []
    trigrams = []

    poscount = 0
    possum = 0
    negcount = 0
    negsum = 0
    c = 0
    new_words = []

    for w in words.split(" "):
        affectscores.append(getAffect(w))
        sentiscores.append(getSentiStrength(w))

    new_words += (words.split(" "))
    l = len(new_words)

    while c <= l - 2:
        bigrams.append(new_words[c] + new_words[c + 1])
        c = c + 1

    c = 0
    while c <= l - 3:
        trigrams.append(new_words[c] + new_words[c + 1] + new_words[c + 2])
        c = c + 1

    for bi in bigrams:
        if bi in bidict:
            if bidict[bi] > 0:
                possum += float(bidict[bi])
                poscount = poscount + 1
                # print "here1"
            else:
                negsum += float(bidict[bi])
                negcount = negcount + 1

    for tri in trigrams:
        if tri in tridict:
            if tridict[tri] > 0:
                possum += float(tridict[tri])
                poscount = poscount + 1
                # print "here2"

            else:
                negsum += float(tridict[tri])
                negcount = negcount + 1

    delta_affect = (max(affectscores) - min(affectscores))
    delta_sentiment = (max(sentiscores) - min(sentiscores))

    output = [
        delta_affect, delta_sentiment, poscount, possum, negcount, negsum
    ]

    return output
Пример #2
0
def bigram_model(sentences):
    model = {}
    bigrams = []
    for sent in sentences:
        for w1, w2 in ngrams(sent.split(), 2, pad_left=True, pad_right=True):
            bigrams.append((w1, w2))
            if w1 not in model:
                model[w1] = {}
            if w2 not in model[w1]:
                model[w1][w2] = 0
            model[w1][w2] += 1
    for w1 in model:
        tot_count = float(sum(model[w1].values()))
        for w2 in model[w1]:
            model[w1][w2] /= tot_count

    return model, dict(Counter(bigrams))
def gen_bigrams(tokens):

    """I created my own bigram generator function because I forgot I could use nltk...."""
    
    test_tokens = tokenize_corpus(corpus_text)
    bigrams = []
    gram_1 = 0
    gram_2 = 1

    #loop appends bigram tuple to a list
    for i in range(len(tokens)):
        if gram_1 == len(tokens) - 1:
            break
        else:
            bigrams.append((tokens[gram_1], tokens[gram_2]))
            gram_1 += 1
            gram_2 += 1
    
    #Returns a list of tuples, the tuples contains the bigrams 
    return bigrams
Пример #4
0
 def get_bigrams(self):
     bigrams = []
     for bigram_tuple in self.get_bigrams_tuple():
         bigram = " ".join(bigram_tuple)
         bigrams.append(bigram)
     return bigrams            
        for i in temp:

            # Removing unnecessary items!
            i = i.replace(" ", "")
            i = i.replace(",", " ")
            i = i.replace("'", "")
            i = i.replace("(", "")
            i = i.replace(")", "")

            bg = i.split()
            i = i.replace(" ", "_")

            if i not in bigrams:

                # Append Bigrams
                bigrams.append(str(i))

                # Fill Bigrams as attributes in tsv file
                outputfile.write(str(i))
                outputfile.write("\t")

                # Adding attributes in arff file
                outputfile1.write("@attribute " + str(i) + " Numeric \n")


outputfile1.write("@attribute classlabel {yes,no,CANNOT_DECIDE} \n")
outputfile.write("\n")
outputfile1.write("\n")

# Writing @data line in .arff file!
outputfile1.write("@data\n")