示例#1
0
 def get_users(input_files_start_time, input_files_end_time, input_folder):
   mr_class = Users
   output_file = f_users
   input_files = fs.get_dated_input_files(input_files_start_time,
                                     input_files_end_time,
                                     input_folder)
   runMRJob(mr_class,
            output_file,
            input_files,
            mrJobClassParams = {'job_id': 'as'},
            # uncomment when running on local
            #args = [],
            jobconf={'mapred.reduce.tasks':300, 'mapred.task.timeout': 8640000}
   )
示例#2
0
 def count_at_mentions(input_files_start_time, input_files_end_time):
   mr_class = CountAtMentionTweets
   output_file = f_count_at_mentions
   runMRJob(mr_class,
            output_file,
            # uncomment when running on local
            #fs.get_local_input_files(local_tweets_input_folder),
            fs.get_dated_input_files(input_files_start_time,
                                     input_files_end_time,
                                     input_folder),
            mrJobClassParams = {'job_id': 'as'},
            # uncomment when running on local
            #args = [],
            jobconf={'mapred.reduce.tasks':300, 'mapred.task.timeout': 86400000}
   )
示例#3
0
 def get_pins(input_files_start_time, input_files_end_time, input_folder):
   mr_class = Pins
   output_file = f_pins
   chevron_files = fs.get_dated_input_files(input_files_start_time,
                                     input_files_end_time,
                                     input_folder)
   
   '''
   hdfs_files = []
   for file in chevron_files:
     hdfs_files = hdfs_rel_path + file
   ''' 
   runMRJob(mr_class,
            output_file,
            chevron_files,
            mrJobClassParams = {'job_id': 'as'},
            # uncomment when running on local
            #args = [],
            jobconf={'mapred.reduce.tasks':300, 'mapred.task.timeout': 8640000}
   )
示例#4
0
 def get_pins(input_files_start_time, input_files_end_time, input_folder):
     mr_class = Pins
     output_file = f_pins
     chevron_files = fs.get_dated_input_files(input_files_start_time,
                                              input_files_end_time,
                                              input_folder)
     '''
 hdfs_files = []
 for file in chevron_files:
   hdfs_files = hdfs_rel_path + file
 '''
     runMRJob(
         mr_class,
         output_file,
         chevron_files,
         mrJobClassParams={'job_id': 'as'},
         # uncomment when running on local
         #args = [],
         jobconf={
             'mapred.reduce.tasks': 300,
             'mapred.task.timeout': 8640000
         })