def sentence_split(self, text): # split everything up by newlines, prepare them, and join back together lines = text.splitlines() text = " ".join([self._prepare_text(line) for line in lines if line.strip()]) return markovify.split_into_sentences(text)
def get_history(r, user, limit=LIMIT): """ Grabs a user's most recent comments and returns them as a single string. The average will probably be 20k-30k words. """ try: redditor = r.get_redditor(user) comments = redditor.get_comments(limit=limit) body = [] total_sentences = 0 for c in comments: if not c.distinguished: body.append(c.body) try: total_sentences += len( markovify.split_into_sentences(c.body)) except Exception: # Ain't no way I'm letting a little feature like this screw up my processing, no matter what happens total_sentences += 1 num_comments = len(body) sentence_avg = total_sentences / num_comments if num_comments > 0 else 0 body = ' '.join(body) return (body, num_comments, sentence_avg) except praw.errors.NotFound: return (None, None, None)
def analyzeText(text, word_activity, subj_count, subj_sent, all_comments): subj_whitelist = ['who', 'that', 'this', 'what', 'people', 'anyone', 'user', 'users', 'someone', 'one', 'all'] total_sentiment = 0 sentences = markovify.split_into_sentences(text) for sentence in sentences: all_comments.append(sentence) sentiment = sid.polarity_scores(sentence)['compound'] total_sentiment += sentiment tokenized_text = sentence.split(' ') for word in tokenized_text: word = word.lower() if word.isalpha()and word not in stop_words: word_activity[word] += 1 #if token.dep_ == 'nsubj' and token.pos_ != 'PRON' and token.tag_ != 'PRP' and str(token.text.lower()) not in subj_whitelist: #subj_count[token.text.lower()] += 1 #subj_sent[token.text.lower()] += sentiment #if sentiment > 0.5: #subj_sent[token.text.lower()] += 1 #elif sentiment < -0.5: #subj_sent[token.text.lower()] += -1 #print('Subject: ' + token.text.lower() + '\nSentence: ' + sentence + '\nScore: ' + str(sentiment)) return total_sentiment
def get_history(subreddit, limit): try: comments = subreddit.comments(limit=limit) if comments is None: return None, None, None c_finished = False while not c_finished: body = [] total_sentences = 0 try: for c in comments: if (not c.distinguished) and ( (not subreddit) or c.subreddit.display_name == subreddit): body.append(c.body) try: total_sentences += len( markovify.split_into_sentences(c.body)) except Exception: total_sentences += 1 c_finished = True except praw.exceptions.PRAWException as ex: pass num_comments = len(body) sentence_avg = total_sentences / num_comments if num_comments > 0 else 0 body = ' '.join(body) return body, num_comments, sentence_avg except praw.exceptions.PRAWException: pass
def get_history(r, user, limit=LIMIT): """ Grabs a user's most recent comments and returns them as a single string. The average will probably be 20k-30k words. """ try: subreddit = r.get_subreddit(user) subreddit_comments = subreddit.get_comments() subreddit_comments.replace_more_comments(limit=limit, threshold=-10000) flat_comments = praw.helpers.flatten_tree(post.comments) for comment in flat_comments: for c in comments: if not c.distinguished: body.append(c.body) try: total_sentences += len(markovify.split_into_sentences(c.body)) except Exception: # Ain't no way I'm letting a little feature like this screw up my processing, no matter what happens total_sentences += 1 num_comments = len(body) sentence_avg = total_sentences / num_comments if num_comments > 0 else 0 body = ' '.join(body) return (body, num_comments, sentence_avg) except praw.errors.NotFound: return (None, None, None)
def sentence_split(self, text): lines = text.splitlines() for i in range(len(lines)): lines[i] = self.html_parser.unescape(lines[i].strip()) if not lines[i].endswith(('!', '?', '.')): lines[i] += '.' return markovify.split_into_sentences(" ".join(lines))
def get_history(r, source, limit=LIMIT, subreddit=None): """ Grabs a user's or sub's most recent comments and returns them as a single string. The average will probably be 20k-30k words. """ try: comments = get_comments(r, source, limit) if comments == None: return (None, None, None) c_finished = False while not c_finished: body = [] total_sentences = 0 recursion_testing = True try: for c in comments: if ('+/u/%s' % USER.lower()) not in c.body.lower(): recursion_testing = False if (not c.distinguished) and ( (not subreddit) or c.subreddit.display_name == subreddit): body.append(c.body) try: total_sentences += len( markovify.split_into_sentences(c.body)) except Exception: # Ain't no way I'm letting a little feature like this screw up my processing, no matter what happens total_sentences += 1 c_finished = True except praw.errors.InvalidSubreddit: return (None, None, None) except praw.errors.HTTPException as ex: log(str(ex)) pass except praw.errors.NotFound as ex: break num_comments = len(body) if num_comments >= MIN_COMMENTS and recursion_testing: return (0, 0, 0) sentence_avg = total_sentences / num_comments if num_comments > 0 else 0 body = ' '.join(body) return (body, num_comments, sentence_avg) except praw.errors.NotFound: return (None, None, None)
def get_history(r, user, limit=LIMIT, subreddit=None): """ Grabs a user's most recent comments and returns them as a single string. The average will probably be 20k-30k words. """ try: redditor = r.get_redditor(user) comments = redditor.get_comments(limit=limit) c_finished = False while not c_finished: body = [] total_sentences = 0 recursion_testing = True try: for c in comments: if ('+/u/%s' % USER.lower()) not in c.body.lower(): recursion_testing = False if (not c.distinguished) and ((not subreddit) or c.subreddit.display_name == subreddit): body.append(c.body) try: total_sentences += len(markovify.split_into_sentences(c.body)) except Exception: # Ain't no way I'm letting a little feature like this screw up my processing, no matter what happens total_sentences += 1 c_finished = True except praw.errors.HTTPException as ex: log(str(ex)) pass except praw.errors.NotFound as ex: break num_comments = len(body) if num_comments >= MIN_COMMENTS and recursion_testing: return (0, 0, 0) sentence_avg = total_sentences / num_comments if num_comments > 0 else 0 body = ' '.join(body) return (body, num_comments, sentence_avg) except praw.errors.NotFound: return (None, None, None)
def sentence_split(self, text): """ Splits full-text string into a list of sentences. """ return markovify.split_into_sentences(text)
def sentence_split(self, text): text = self._prepare_text(text) return markovify.split_into_sentences(text)
def sentence_split(self, text): lines = text.splitlines() text = " ".join( [self._prepare_text(line) for line in lines if line.strip()]) return markovify.split_into_sentences(text)
def sentence_split(self, text): lines = text.splitlines() text = " ".join([self._prepare_text(line) for line in lines if line.strip()]) return markovify.split_into_sentences(text)
def sentence_split(self, text): text = " ".join(map(self.prepare_sentance, text.strip().splitlines())) return markovify.split_into_sentences(text)