def __init__(self, thread_id): self.thread_id = thread_id mc['reddit']['textblob_%s' % self.thread_id].delete_many({}) r = praw.Reddit(user_agent='Tushar Ranjan DSI %s' % thread_id) submission = r.get_submission(submission_id=thread_id) mc['reddit']['textblob'].update({'_id': thread_id}, {'title': submission.title}, upsert=True) sc.parallelize([1, 2, 3]) self.rdd = sc.parallelize([])
def simulateThread(self, sleep_time=1, by_second=True): sim = thread_simulator.ThreadSimulator(self.thread_id) fn = sim.streamCommentsBySecond if by_second else sim.streamComments for i in fn(sleep_time): temp_rdd = sc.parallelize(i).map(parseComment).reduceByKey( lambda x, y: x + y) self.rdd = self.rdd.union(temp_rdd).reduceByKey(lambda x, y: x + y) for j in self.rdd.map(get_counts).collect(): mc['reddit']['textblob_%s' % self.thread_id].update( {'_id': j[0]}, j[1], upsert=True)
def simulateThread(self, sleep_time=1, by_second=True): batch = 0 sim = thread_simulator.ThreadSimulator(self.thread_id) fn = sim.streamCommentsBySecond if by_second else sim.streamComments for i in fn(sleep_time): temp_rdd = sc.parallelize(i).filter(valid_flair).map( parseComment).reduceByKey(lambda x, y: x + y) self.rdd = self.rdd.union(temp_rdd).reduceByKey(lambda x, y: x + y) for j in self.rdd.map(get_counts).collect(): j[1]['batch'] = batch mc['reddit']['vader_%s' % self.thread_id].insert(j[1]) batch += sleep_time
def __init__(self, thread_id): self.thread_id = thread_id mc['reddit']['vader_%s' % self.thread_id].delete_many({}) r = praw.Reddit(user_agent='Tushar Ranjan DSI %s' % thread_id) submission = r.get_submission(submission_id=thread_id) if submission.subreddit.display_name in global_config.FLAIRS: initial_data = [(i, [{ 'count': 0, 'created': 0, 'fanbase': i, 'text': [], 'polarity': 0, 'subjectivity': 0 }]) for i in set(global_config.FLAIRS[ submission.subreddit.display_name].values())] else: initial_data = [] mc['reddit']['vader'].update({'_id': thread_id}, {'title': submission.title}, upsert=True) sc.parallelize([1, 2, 3]) self.rdd = sc.parallelize(initial_data)