def mapreduce_create_sources_from_events(fbl): fb_mapreduce.start_map( fbl, 'Create Sources from Events', 'event_scraper.thing_db.map_create_sources_from_event', 'events.eventdata.DBEvent', )
def get(self): time_period = self.request.get('time_period', None) queue = self.request.get('queue', 'fast-queue') filters = [] if time_period: filters.append(('search_time_period', '=', time_period)) name = 'Delete %s Bad Autoadds' % time_period else: name = 'Delete All Bad Autoadds' allow_deletes = self.request.get('allow_deletes', None) == '1' extra_mapper_params = { 'allow_deletes': allow_deletes, } fb_mapreduce.start_map( fbl=self.fbl, name=name, handler_spec='events.event_reloading_tasks.map_maybe_delete_bad_event', entity_kind='events.eventdata.DBEvent', filters=filters, extra_mapper_params=extra_mapper_params, queue=queue, output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_load_fb_events(fbl, load_attending=False, time_period=None, update_geodata=True, only_if_updated=True, queue='slow-queue'): if load_attending: event_or_attending = 'Event Attendings' mr_func = 'map_load_fb_event_attending' else: event_or_attending = 'Events' mr_func = 'map_load_fb_event' filters = [] if time_period: filters.append(('search_time_period', '=', time_period)) name = 'Load %s %s' % (time_period, event_or_attending) else: name = 'Load All %s' % (event_or_attending) fb_mapreduce.start_map( fbl=fbl, name=name, handler_spec='events.event_reloading_tasks.%s' % mr_func, entity_kind='events.eventdata.DBEvent', handle_batch_size=20, filters=filters, extra_mapper_params={ 'update_geodata': update_geodata, 'only_if_updated': only_if_updated }, queue=queue, )
def mr_load_potential_events(fbl): fb_mapreduce.start_map( fbl=fbl, name='Load Potential Events For Users', handler_spec='event_scraper.potential_events_reloading.map_load_potential_events', entity_kind='users.users.User', )
def mapreduce_create_sources_from_events(fbl): fb_mapreduce.start_map( fbl, 'Create Sources from Events', 'event_scraper.thing_db.map_create_source_from_event', 'events.eventdata.DBEvent', )
def mr_email_user(fbl): fb_mapreduce.start_map( fbl=fbl, name='Email Users', #TODO: MOVE handler_spec='search.email_events.map_email_user', entity_kind='users.users.User', )
def get(self): # this calls a map function wrapped by mr_user_wrap, so it works correctly on a per-user basis fb_mapreduce.start_map( fbl=self.fbl, name='Load Users', handler_spec='users.user_tasks.map_load_fb_user', entity_kind='users.users.User', )
def get(self): # this calls a map function wrapped by mr_user_wrap, so it works correctly on a per-user basis mailchimp_list_id = mailchimp_api.get_list_id() fb_mapreduce.start_map( fbl=self.fbl, name='Load Users', handler_spec='users.user_tasks.map_load_fb_user', entity_kind='users.users.User', extra_mapper_params={ 'mailchimp_list_id': mailchimp_list_id, }, queue='fast-queue')
def mr_generate_training_data(fbl): fb_mapreduce.start_map( fbl=fbl, name='Write Training Data', handler_spec='ml.gprediction.map_training_data_for_pevents', output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', handle_batch_size=20, entity_kind='event_scraper.potential_events.PotentialEvent', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, queue=None, )
def mapreduce_export_sources(fbl, queue='fast-queue'): fb_mapreduce.start_map( fbl, 'Export All Sources', 'event_scraper.thing_db.map_export_sources', 'event_scraper.thing_db.Source', output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, handle_batch_size=10, queue=queue, )
def mr_private_events(fbl): fb_mapreduce.start_map( fbl, 'Dump Private Events', 'servlets.tools.map_dump_private_events', 'events.eventdata.DBEvent', handle_batch_size=80, queue=None, output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mapreduce_scrape_all_sources(fbl, min_potential_events=None, queue='slow-queue'): # Do not do the min_potential_events>1 filter in the mapreduce filter, # or it will want to do a range-shard on that property. Instead, pass-it-down # and use it as an early-return in the per-Source processing. # TODO:....maybe we do want a range-shard filter? save on loading all the useless sources... fb_mapreduce.start_map( fbl, 'Scrape All Sources', 'event_scraper.thing_scraper.map_scrape_events_from_sources', 'event_scraper.thing_db.Source', handle_batch_size=10, extra_mapper_params={'min_potential_events': min_potential_events}, queue=queue, randomize_tokens=True, )
def mr_dump_events(fbl): fb_mapreduce.start_map( fbl, 'Dump Potential FB Event Data', 'logic.mr_dump.map_dump_fb_json', 'event_scraper.potential_events.PotentialEvent', handle_batch_size=80, queue=None, filters=[('looked_at', '=', None)], output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mapreduce_export_sources(fbl, queue='fast-queue'): fb_mapreduce.start_map( fbl, 'Export All Sources', 'event_scraper.thing_db.map_export_sources', 'event_scraper.thing_db.Source', output_writer_spec= 'mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, handle_batch_size=10, queue=queue, )
def mapreduce_scrape_all_sources(fbl, min_potential_events=None, queue='super-slow-queue'): # Do not do the min_potential_events>1 filter in the mapreduce filter, # or it will want to do a range-shard on that property. Instead, pass-it-down # and use it as an early-return in the per-Source processing. # TODO:....maybe we do want a range-shard filter? save on loading all the useless sources... fb_mapreduce.start_map( fbl, 'Scrape All Sources', 'event_scraper.thing_scraper.map_scrape_events_from_sources', 'event_scraper.thing_db.Source', handle_batch_size=10, extra_mapper_params={'min_potential_events': min_potential_events}, queue=queue, randomize_tokens=True, )
def mr_classify_potential_events(fbl): fb_mapreduce.start_map( fbl, 'Auto-Classify Events', 'ml.mr_prediction.map_classify_events', 'event_scraper.potential_events.PotentialEvent', filters=[('looked_at', '=', None)], handle_batch_size=20, queue='slow-queue', output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_private_events(fbl): fb_mapreduce.start_map( fbl, 'Dump Private Events', 'servlets.tools.map_dump_private_events', 'events.eventdata.DBEvent', handle_batch_size=80, queue=None, output_writer_spec= 'mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_dump_events(fbl): fb_mapreduce.start_map( fbl, 'Dump Potential FB Event Data', 'logic.mr_dump.map_dump_fb_json', 'event_scraper.potential_events.PotentialEvent', handle_batch_size=80, queue=None, filters=[('looked_at', '=', None)], output_writer_spec= 'mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_classify_potential_events(fbl, past_event): filters = [('looked_at', '=', None), ('should_look_at', '=', True)] if past_event is not None: filters.append(('past_event', '=', past_event)) fb_mapreduce.start_map( fbl, 'Auto-Add Events', 'event_scraper.auto_add.map_classify_events', 'event_scraper.potential_events.PotentialEvent', filters=filters, handle_batch_size=20, queue='fast-queue', output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_classify_potential_events(fbl, past_event): filters = [('looked_at', '=', None), ('should_look_at', '=', True)] if past_event is not None: filters.append(('past_event', '=', past_event)) fb_mapreduce.start_map( fbl, 'Auto-Add Events', 'event_scraper.auto_add.map_classify_events', 'event_scraper.potential_events.PotentialEvent', filters=filters, handle_batch_size=20, queue='fast-queue', output_writer_spec= 'mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_load_fb_events(fbl, load_attending=False, time_period=None, update_geodata=True, only_if_updated=True, queue='slow-queue'): if load_attending: event_or_attending = 'Event Attendings' mr_func = 'map_load_fb_event_attending' else: event_or_attending = 'Events' mr_func = 'map_load_fb_event' filters = [] if time_period: filters.append(('search_time_period', '=', time_period)) name = 'Load %s %s' % (time_period, event_or_attending) else: name = 'Load All %s' % (event_or_attending) fb_mapreduce.start_map( fbl=fbl, name=name, handler_spec='events.event_reloading_tasks.%s' % mr_func, entity_kind='events.eventdata.DBEvent', handle_batch_size=20, filters=filters, extra_mapper_params={'update_geodata': update_geodata, 'only_if_updated': only_if_updated}, queue=queue, )