def get_users(input_files_start_time, input_files_end_time, input_folder): mr_class = Users output_file = f_users input_files = fs.get_dated_input_files(input_files_start_time, input_files_end_time, input_folder) runMRJob(mr_class, output_file, input_files, mrJobClassParams = {'job_id': 'as'}, # uncomment when running on local #args = [], jobconf={'mapred.reduce.tasks':300, 'mapred.task.timeout': 8640000} )
def count_at_mentions(input_files_start_time, input_files_end_time): mr_class = CountAtMentionTweets output_file = f_count_at_mentions runMRJob(mr_class, output_file, # uncomment when running on local #fs.get_local_input_files(local_tweets_input_folder), fs.get_dated_input_files(input_files_start_time, input_files_end_time, input_folder), mrJobClassParams = {'job_id': 'as'}, # uncomment when running on local #args = [], jobconf={'mapred.reduce.tasks':300, 'mapred.task.timeout': 86400000} )
def get_pins(input_files_start_time, input_files_end_time, input_folder): mr_class = Pins output_file = f_pins chevron_files = fs.get_dated_input_files(input_files_start_time, input_files_end_time, input_folder) ''' hdfs_files = [] for file in chevron_files: hdfs_files = hdfs_rel_path + file ''' runMRJob(mr_class, output_file, chevron_files, mrJobClassParams = {'job_id': 'as'}, # uncomment when running on local #args = [], jobconf={'mapred.reduce.tasks':300, 'mapred.task.timeout': 8640000} )
def get_pins(input_files_start_time, input_files_end_time, input_folder): mr_class = Pins output_file = f_pins chevron_files = fs.get_dated_input_files(input_files_start_time, input_files_end_time, input_folder) ''' hdfs_files = [] for file in chevron_files: hdfs_files = hdfs_rel_path + file ''' runMRJob( mr_class, output_file, chevron_files, mrJobClassParams={'job_id': 'as'}, # uncomment when running on local #args = [], jobconf={ 'mapred.reduce.tasks': 300, 'mapred.task.timeout': 8640000 })