Пример #1
0
    def add_comment(self, comment):
        index = len(self.comments)
        complete_sentiment = self.sentiment_score * index
        emoticon_sentiment = sentiment.get_emoticon_score(
            comment, self.emoticonlist)
        if emoticon_sentiment > 0:
            complete_sentiment = complete_sentiment + (
                emoticon_sentiment +
                sentiment.get_sentiment_score(comment)) / 2.0
        else:
            complete_sentiment = complete_sentiment + sentiment.get_sentiment_score(
                comment)
        self.sentiment_score = float(complete_sentiment) / float(index + 1)
        # comment = nlp.preprocess(comment)
        self.comments.append(comment)

        term_vector = list(set(metrics.get_term_vector(comment)))
        self.comment_term_vectors.append(term_vector)

        # terms = []
        # for i in range(1,4):
        # 	# for 1,2,3
        # 	iterms = nlp.extract_ngrams(comment, i)
        # 	terms  = terms + iterms
        # print terms
        for term in term_vector:
            # cleaned_term = nlp.remove_stopwords(term)
            # if len(cleaned_term) == 0:
            # 	continue
            if term not in self.terms.keys():
                self.terms[term] = set()
                self.center[term] = 0
            self.terms[term].add(comment)
            self.center[term] += 1
Пример #2
0
def create_reddit_post_dictionary(submission, subreddit, sentiment_dict):

    title_and_text = submission.title + " " + submission.selftext

    return dict(id=submission.id,
                source='reddit',
                sub_source=subreddit.display_name,
                content_type='post',
                author=submission.author.name if submission.author is not None else None,
                title=submission.title,
                text=submission.selftext,
                title_sentiment=get_sentiment_score(submission.title, sentiment_dict),
                text_sentiment=get_sentiment_score(submission.selftext, sentiment_dict),
                subjects=get_subjects(title_and_text),
                source_score=submission.score,
                upvote_ratio=submission.upvote_ratio,
                num_comments=submission.num_comments,
                parent=None,
                publish_date=datetime.utcfromtimestamp(submission.created_utc))
Пример #3
0
    def remove_comment(self, comment, index):
        if comment not in self.comments:
            print "Comment does not exist in cluster"
            return

        # index = self.comments.index(comment)
        # self.comments.remove(comment)
        emoticon_sentiment = sentiment.get_emoticon_score(
            comment, self.emoticonlist)
        complete_sentiment = self.sentiment_score * index
        if emoticon_sentiment > 0:
            complete_sentiment = complete_sentiment - (
                emoticon_sentiment +
                sentiment.get_sentiment_score(comment)) / 2.0
        else:
            complete_sentiment = complete_sentiment - sentiment.get_sentiment_score(
                comment)
        try:
            self.sentiment_score = float(complete_sentiment) / float(index - 1)
        except:
            self.sentiment_score = 0

        del self.comments[index]
        tv = self.comment_term_vectors[index]
        # print tv
        # print list(clusters).index(self)

        for term in tv:
            try:
                # print term
                self.terms[term].remove(comment)
                self.center[term] -= 1
            except (ValueError, KeyError):
                pass

        del self.comment_term_vectors[index]
Пример #4
0
    def toTxt(filename):
        h5File = open_h5_file_read(filename)
        n = get_num_songs(h5File)
        didWrite = False
        with open(filename + '.txt', 'w+') as out:
            for i in range(0, n):
                isValid = True
                song = {}

                # grab the scalar fields from MSD
                for field in existingFieldnames:
                    data = globals()['get_' + field](h5File, i)
                    song[field] = data

                #run nlp
                song['sentiment_score'] = get_sentiment_score(
                    get_track_id(h5File, i))
                if song['sentiment_score'] != 0 and song[
                        'sentiment_score'] != 1:
                    isValid = False  # this means we don't have lyrics

                song['segments_timbre'] = get_segments_timbre(h5File,
                                                              i).tolist()

                if isValid:
                    print 'Writing valid song'
                    txt = encode_song(song)
                    out.write(txt + '\n')
                    didWrite = True
                else:
                    print "Missing lyrics. Skipping"

        # save output file
        h5File.close()
        out.close()

        #delete the hdf5 file
        os.remove(filename)
        if not didWrite:
            os.remove(filename + '.txt')