Пример #1
0
    def sentence_split(self, text):
        # split everything up by newlines, prepare them, and join back together
        lines = text.splitlines()
        text = " ".join([self._prepare_text(line)
            for line in lines if line.strip()])

        return markovify.split_into_sentences(text)
Пример #2
0
def get_history(r, user, limit=LIMIT):
    """
	Grabs a user's most recent comments and returns them as a single string.
	The average will probably be 20k-30k words.
	"""
    try:
        redditor = r.get_redditor(user)
        comments = redditor.get_comments(limit=limit)
        body = []
        total_sentences = 0
        for c in comments:
            if not c.distinguished:
                body.append(c.body)
                try:
                    total_sentences += len(
                        markovify.split_into_sentences(c.body))
                except Exception:
                    # Ain't no way I'm letting a little feature like this screw up my processing, no matter what happens
                    total_sentences += 1
        num_comments = len(body)
        sentence_avg = total_sentences / num_comments if num_comments > 0 else 0
        body = ' '.join(body)
        return (body, num_comments, sentence_avg)
    except praw.errors.NotFound:
        return (None, None, None)
Пример #3
0
def analyzeText(text, word_activity, subj_count, subj_sent, all_comments):
	subj_whitelist = ['who', 'that', 'this', 'what', 'people', 'anyone', 'user', 'users', 'someone', 'one', 'all']
	total_sentiment = 0
	sentences = markovify.split_into_sentences(text)
	for sentence in sentences:
		all_comments.append(sentence)
		sentiment = sid.polarity_scores(sentence)['compound']
		total_sentiment += sentiment
		
		tokenized_text = sentence.split(' ')
		for word in tokenized_text:
			word = word.lower()
			if word.isalpha()and word not in stop_words:
				word_activity[word] += 1
				
				
			#if token.dep_ == 'nsubj' and token.pos_ != 'PRON' and token.tag_ != 'PRP' and str(token.text.lower()) not in subj_whitelist:
				#subj_count[token.text.lower()] += 1
				#subj_sent[token.text.lower()] += sentiment
				#if sentiment > 0.5:
					#subj_sent[token.text.lower()] += 1
				#elif sentiment < -0.5:
					#subj_sent[token.text.lower()] += -1
				#print('Subject: ' + token.text.lower() + '\nSentence: ' + sentence + '\nScore: ' + str(sentiment))

	return total_sentiment
Пример #4
0
def get_history(subreddit, limit):
    try:
        comments = subreddit.comments(limit=limit)
        if comments is None:
            return None, None, None
        c_finished = False
        while not c_finished:
            body = []
            total_sentences = 0
            try:
                for c in comments:
                    if (not c.distinguished) and (
                        (not subreddit)
                            or c.subreddit.display_name == subreddit):
                        body.append(c.body)
                        try:
                            total_sentences += len(
                                markovify.split_into_sentences(c.body))
                        except Exception:
                            total_sentences += 1
                c_finished = True
            except praw.exceptions.PRAWException as ex:
                pass
        num_comments = len(body)
        sentence_avg = total_sentences / num_comments if num_comments > 0 else 0
        body = ' '.join(body)
        return body, num_comments, sentence_avg

    except praw.exceptions.PRAWException:
        pass
Пример #5
0
def get_history(r, user, limit=LIMIT):
	"""
	Grabs a user's most recent comments and returns them as a single string.
	The average will probably be 20k-30k words.
	"""
	try:
		subreddit = r.get_subreddit(user)
		subreddit_comments = subreddit.get_comments()
		subreddit_comments.replace_more_comments(limit=limit, threshold=-10000)
		flat_comments = praw.helpers.flatten_tree(post.comments)
                for comment in flat_comments:
		for c in comments:
			if not c.distinguished:
				body.append(c.body)
				try:
					total_sentences += len(markovify.split_into_sentences(c.body))
				except Exception:
					# Ain't no way I'm letting a little feature like this screw up my processing, no matter what happens
					total_sentences += 1
		num_comments = len(body)
		sentence_avg = total_sentences / num_comments if num_comments > 0 else 0
		body = ' '.join(body)
		return (body, num_comments, sentence_avg)
	except praw.errors.NotFound:
		return (None, None, None)
Пример #6
0
 def sentence_split(self, text):
     lines = text.splitlines()
     for i in range(len(lines)):
         lines[i] = self.html_parser.unescape(lines[i].strip())
         if not lines[i].endswith(('!', '?', '.')):
             lines[i] += '.'
     return markovify.split_into_sentences(" ".join(lines))
Пример #7
0
    def sentence_split(self, text):
        # split everything up by newlines, prepare them, and join back together
        lines = text.splitlines()
        text = " ".join([self._prepare_text(line)
            for line in lines if line.strip()])

        return markovify.split_into_sentences(text)
Пример #8
0
def get_history(r, source, limit=LIMIT, subreddit=None):
    """
	Grabs a user's or sub's most recent comments and returns them as a single string.
	The average will probably be 20k-30k words.
	"""
    try:
        comments = get_comments(r, source, limit)
        if comments == None:
            return (None, None, None)
        c_finished = False
        while not c_finished:
            body = []
            total_sentences = 0
            recursion_testing = True
            try:
                for c in comments:
                    if ('+/u/%s' % USER.lower()) not in c.body.lower():
                        recursion_testing = False
                    if (not c.distinguished) and (
                        (not subreddit)
                            or c.subreddit.display_name == subreddit):
                        body.append(c.body)
                        try:
                            total_sentences += len(
                                markovify.split_into_sentences(c.body))
                        except Exception:
                            # Ain't no way I'm letting a little feature like this screw up my processing, no matter what happens
                            total_sentences += 1
                c_finished = True
            except praw.errors.InvalidSubreddit:
                return (None, None, None)
            except praw.errors.HTTPException as ex:
                log(str(ex))
                pass
            except praw.errors.NotFound as ex:
                break
        num_comments = len(body)
        if num_comments >= MIN_COMMENTS and recursion_testing:
            return (0, 0, 0)
        sentence_avg = total_sentences / num_comments if num_comments > 0 else 0
        body = ' '.join(body)
        return (body, num_comments, sentence_avg)
    except praw.errors.NotFound:
        return (None, None, None)
Пример #9
0
def get_history(r, user, limit=LIMIT, subreddit=None):
	"""
	Grabs a user's most recent comments and returns them as a single string.
	The average will probably be 20k-30k words.
	"""
	try:
		redditor = r.get_redditor(user)
		comments = redditor.get_comments(limit=limit)
		c_finished = False
		while not c_finished:
			body = []
			total_sentences = 0
			recursion_testing = True
			try:
				for c in comments:
					if ('+/u/%s' % USER.lower()) not in c.body.lower():
						recursion_testing = False
					if (not c.distinguished) and ((not subreddit) or c.subreddit.display_name == subreddit):
						body.append(c.body)
						try:
							total_sentences += len(markovify.split_into_sentences(c.body))
						except Exception:
							# Ain't no way I'm letting a little feature like this screw up my processing, no matter what happens
							total_sentences += 1
				c_finished = True
			except praw.errors.HTTPException as ex:
				log(str(ex))
				pass
			except praw.errors.NotFound as ex:
				break
		num_comments = len(body)
		if num_comments >= MIN_COMMENTS and recursion_testing:
			return (0, 0, 0)
		sentence_avg = total_sentences / num_comments if num_comments > 0 else 0
		body = ' '.join(body)
		return (body, num_comments, sentence_avg)
	except praw.errors.NotFound:
		return (None, None, None)
Пример #10
0
 def sentence_split(self, text):
     """
     Splits full-text string into a list of sentences.
     """
     return markovify.split_into_sentences(text)
Пример #11
0
    def sentence_split(self, text):
        text = self._prepare_text(text)

        return markovify.split_into_sentences(text)
Пример #12
0
    def sentence_split(self, text):
        lines = text.splitlines()
        text = " ".join(
            [self._prepare_text(line) for line in lines if line.strip()])

        return markovify.split_into_sentences(text)
Пример #13
0
  def sentence_split(self, text):
    lines = text.splitlines()
    text = " ".join([self._prepare_text(line) for line in lines if line.strip()])

    return markovify.split_into_sentences(text)
Пример #14
0
 def sentence_split(self, text):
     text = " ".join(map(self.prepare_sentance, text.strip().splitlines()))
     return markovify.split_into_sentences(text)
Пример #15
0
 def sentence_split(self, text):
     """
     Splits full-text string into a list of sentences.
     """
     return markovify.split_into_sentences(text)