Пример #1
0
 def __init__(self, thread_id):
     self.thread_id = thread_id
     mc['reddit']['textblob_%s' % self.thread_id].delete_many({})
     r = praw.Reddit(user_agent='Tushar Ranjan DSI %s' % thread_id)
     submission = r.get_submission(submission_id=thread_id)
     mc['reddit']['textblob'].update({'_id': thread_id},
                                     {'title': submission.title},
                                     upsert=True)
     sc.parallelize([1, 2, 3])
     self.rdd = sc.parallelize([])
Пример #2
0
 def simulateThread(self, sleep_time=1, by_second=True):
     sim = thread_simulator.ThreadSimulator(self.thread_id)
     fn = sim.streamCommentsBySecond if by_second else sim.streamComments
     for i in fn(sleep_time):
         temp_rdd = sc.parallelize(i).map(parseComment).reduceByKey(
             lambda x, y: x + y)
         self.rdd = self.rdd.union(temp_rdd).reduceByKey(lambda x, y: x + y)
         for j in self.rdd.map(get_counts).collect():
             mc['reddit']['textblob_%s' % self.thread_id].update(
                 {'_id': j[0]}, j[1], upsert=True)
Пример #3
0
 def simulateThread(self, sleep_time=1, by_second=True):
     batch = 0
     sim = thread_simulator.ThreadSimulator(self.thread_id)
     fn = sim.streamCommentsBySecond if by_second else sim.streamComments
     for i in fn(sleep_time):
         temp_rdd = sc.parallelize(i).filter(valid_flair).map(
             parseComment).reduceByKey(lambda x, y: x + y)
         self.rdd = self.rdd.union(temp_rdd).reduceByKey(lambda x, y: x + y)
         for j in self.rdd.map(get_counts).collect():
             j[1]['batch'] = batch
             mc['reddit']['vader_%s' % self.thread_id].insert(j[1])
         batch += sleep_time
Пример #4
0
 def __init__(self, thread_id):
     self.thread_id = thread_id
     mc['reddit']['vader_%s' % self.thread_id].delete_many({})
     r = praw.Reddit(user_agent='Tushar Ranjan DSI %s' % thread_id)
     submission = r.get_submission(submission_id=thread_id)
     if submission.subreddit.display_name in global_config.FLAIRS:
         initial_data = [(i, [{
             'count': 0,
             'created': 0,
             'fanbase': i,
             'text': [],
             'polarity': 0,
             'subjectivity': 0
         }]) for i in set(global_config.FLAIRS[
             submission.subreddit.display_name].values())]
     else:
         initial_data = []
     mc['reddit']['vader'].update({'_id': thread_id},
                                  {'title': submission.title},
                                  upsert=True)
     sc.parallelize([1, 2, 3])
     self.rdd = sc.parallelize(initial_data)