def get(self): time_period = self.request.get('time_period', None) queue = self.request.get('queue', 'fast-queue') filters = [] if time_period: filters.append(('search_time_period', '=', time_period)) name = 'Delete %s Bad Autoadds' % time_period else: name = 'Delete All Bad Autoadds' allow_deletes = self.request.get('allow_deletes', None) == '1' extra_mapper_params = { 'allow_deletes': allow_deletes, } fb_mapreduce.start_map( fbl=self.fbl, name=name, handler_spec='dancedeets.events.event_reloading_tasks.map_maybe_delete_bad_event', entity_kind='dancedeets.events.eventdata.DBEvent', filters=filters, extra_mapper_params=extra_mapper_params, queue=queue, output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def get(self): queue = self.request.get('queue', 'fast-queue') time_period = self.request.get('time_period', None) vertical = self.request.get('vertical', None) filters = [] if vertical: filters.append(('verticals', '=', vertical)) vertical_string = '%s ' % vertical else: vertical_string = '' if time_period: filters.append(('search_time_period', '=', time_period)) name = 'Generate %s %sSitemaps' % (time_period, vertical_string) else: name = 'Generate %sSitemaps' % vertical_string fb_mapreduce.start_map( fbl=self.fbl, name=name, handler_spec='dancedeets.sitemaps.events.map_sitemap_event', entity_kind='dancedeets.events.eventdata.DBEvent', handle_batch_size=20, filters=filters, queue=queue, output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mapreduce_create_sources_from_events(fbl): fb_mapreduce.start_map( fbl, 'Create Sources from Events', 'dancedeets.event_scraper.thing_db.map_create_sources_from_event', 'dancedeets.events.eventdata.DBEvent', )
def get(self): time_period = self.request.get('time_period', None) queue = self.request.get('queue', 'fast-queue') filters = [] if time_period: filters.append(('search_time_period', '=', time_period)) name = 'Delete %s Bad Autoadds' % time_period else: name = 'Delete All Bad Autoadds' allow_deletes = self.request.get('allow_deletes', None) == '1' extra_mapper_params = { 'allow_deletes': allow_deletes, } fb_mapreduce.start_map( fbl=self.fbl, name=name, handler_spec= 'dancedeets.events.event_reloading_tasks.map_maybe_delete_bad_event', entity_kind='dancedeets.events.eventdata.DBEvent', filters=filters, extra_mapper_params=extra_mapper_params, queue=queue, output_writer_spec= 'mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_load_potential_events(fbl): fb_mapreduce.start_map( fbl=fbl, name='Load Potential Events For Users', handler_spec='dancedeets.event_scraper.potential_events_reloading.map_load_potential_events', entity_kind='dancedeets.users.users.User', filters=[('expired_oauth_token', '=', False)], )
def mr_email_user(fbl): fb_mapreduce.start_map( fbl=fbl, name='Email Users', #TODO: MOVE handler_spec='dancedeets.search.email_events.map_email_user', entity_kind='dancedeets.users.users.User', )
def mr_load_potential_events(fbl): fb_mapreduce.start_map( fbl=fbl, name='Load Potential Events For Users', handler_spec= 'dancedeets.event_scraper.potential_events_reloading.map_load_potential_events', entity_kind='dancedeets.users.users.User', filters=[('expired_oauth_token', '=', False)], )
def mr_private_events(fbl): fb_mapreduce.start_map( fbl, 'Dump Private Events', 'dancedeets.servlets.tools.map_dump_private_events', 'dancedeets.events.eventdata.DBEvent', handle_batch_size=80, queue=None, output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_generate_training_data(fbl): fb_mapreduce.start_map( fbl=fbl, name='Write Training Data', handler_spec='dancedeets.ml.gprediction.map_training_data_for_pevents', output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', handle_batch_size=20, entity_kind='dancedeets.event_scraper.potential_events.PotentialEvent', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, queue=None, )
def mr_private_events(fbl): fb_mapreduce.start_map( fbl, 'Dump Private Events', 'dancedeets.servlets.tools.map_dump_private_events', 'dancedeets.events.eventdata.DBEvent', handle_batch_size=80, queue=None, output_writer_spec= 'mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_dump_events(fbl): fb_mapreduce.start_map( fbl, 'Dump Potential FB Event Data', 'dancedeets.logic.mr_dump.map_dump_fb_json', 'dancedeets.event_scraper.potential_events.PotentialEvent', handle_batch_size=80, queue=None, filters=[('looked_at', '=', None)], output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_generate_training_data(fbl): fb_mapreduce.start_map( fbl=fbl, name='Write Training Data', handler_spec='dancedeets.ml.gprediction.map_training_data_for_pevents', output_writer_spec= 'mapreduce.output_writers.GoogleCloudStorageOutputWriter', handle_batch_size=20, entity_kind='dancedeets.event_scraper.potential_events.PotentialEvent', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, queue=None, )
def mr_classify_potential_events(fbl): fb_mapreduce.start_map( fbl, 'Auto-Classify Events', 'dancedeets.ml.mr_prediction.map_classify_events', 'dancedeets.event_scraper.potential_events.PotentialEvent', filters=[('looked_at', '=', None)], handle_batch_size=20, queue='slow-queue', output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mapreduce_scrape_all_sources(fbl, min_potential_events=None, queue='slow-queue'): # Do not do the min_potential_events>1 filter in the mapreduce filter, # or it will want to do a range-shard on that property. Instead, pass-it-down # and use it as an early-return in the per-Source processing. # TODO:....maybe we do want a range-shard filter? save on loading all the useless sources... fb_mapreduce.start_map( fbl, 'Scrape All Sources', 'dancedeets.event_scraper.thing_scraper.map_scrape_events_from_sources', 'dancedeets.event_scraper.thing_db.Source', handle_batch_size=10, extra_mapper_params={'min_potential_events': min_potential_events}, queue=queue, randomize_tokens=True, )
def mr_dump_events(fbl): fb_mapreduce.start_map( fbl, 'Dump Potential FB Event Data', 'dancedeets.logic.mr_dump.map_dump_fb_json', 'dancedeets.event_scraper.potential_events.PotentialEvent', handle_batch_size=80, queue=None, filters=[('looked_at', '=', None)], output_writer_spec= 'mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_classify_potential_events(fbl): fb_mapreduce.start_map( fbl, 'Auto-Classify Events', 'dancedeets.ml.mr_prediction.map_classify_events', 'dancedeets.event_scraper.potential_events.PotentialEvent', filters=[('looked_at', '=', None)], handle_batch_size=20, queue='slow-queue', output_writer_spec= 'mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def get(self): queue = self.request.get('queue', 'fast-queue') allow_deletes = self.request.get('allow_deletes', None) == '1' extra_mapper_params = { 'allow_deletes': allow_deletes, } fb_mapreduce.start_map( fbl=self.fbl, name='Cleanup Verticals', handler_spec='dancedeets.events.event_reloading_tasks.map_cleanup_verticals', entity_kind='dancedeets.events.eventdata.DBEvent', extra_mapper_params=extra_mapper_params, queue=queue, output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def get(self): all_users = self.request.get('all_users', '0') == '1' if all_users: filters = [] else: filters = [('expired_oauth_token', '=', False)] # this calls a map function wrapped by mr_user_wrap, so it works correctly on a per-user basis mailchimp_list_id = mailchimp_api.get_list_id() fb_mapreduce.start_map( fbl=self.fbl, name='Load %sUsers' % ('All ' if all_users else ''), handler_spec='dancedeets.users.user_tasks.map_load_fb_user', entity_kind='dancedeets.users.users.User', filters=filters, extra_mapper_params={ 'mailchimp_list_id': mailchimp_list_id, }, queue='fast-queue' )
def get(self): queue = self.request.get('queue', 'fast-queue') allow_deletes = self.request.get('allow_deletes', None) == '1' extra_mapper_params = { 'allow_deletes': allow_deletes, } fb_mapreduce.start_map( fbl=self.fbl, name='Cleanup Verticals', handler_spec= 'dancedeets.events.event_reloading_tasks.map_cleanup_verticals', entity_kind='dancedeets.events.eventdata.DBEvent', extra_mapper_params=extra_mapper_params, queue=queue, output_writer_spec= 'mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_classify_potential_events(fbl, past_event, dancey_only): filters = [] if dancey_only: filters.append(('should_look_at', '=', True)) if past_event is not None: filters.append(('past_event', '=', past_event)) fb_mapreduce.start_map( fbl, 'Auto-Add Events', 'dancedeets.event_scraper.auto_add.map_classify_events', 'dancedeets.event_scraper.potential_events.PotentialEvent', filters=filters, # Make sure we don't process so many that we cause the tasks to time out handle_batch_size=10, queue='fast-queue', output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_classify_potential_events(fbl, past_event, dancey_only): filters = [] if dancey_only: filters.append(('should_look_at', '=', True)) if past_event is not None: filters.append(('past_event', '=', past_event)) fb_mapreduce.start_map( fbl, 'Auto-Add Events', 'dancedeets.event_scraper.auto_add.map_classify_events', 'dancedeets.event_scraper.potential_events.PotentialEvent', filters=filters, # Make sure we don't process so many that we cause the tasks to time out handle_batch_size=10, queue='fast-queue', output_writer_spec= 'mapreduce.output_writers.GoogleCloudStorageOutputWriter', output_writer={ 'mime_type': 'text/plain', 'bucket_name': 'dancedeets-hrd.appspot.com', }, )
def mr_load_fb_events( fbl, display_event=False, load_attending=False, time_period=None, disable_updates=None, only_if_updated=True, queue='slow-queue', vertical=None ): if display_event: event_or_attending = 'Display Events' mr_func = 'map_resave_display_event' else: event_or_attending = 'Events' mr_func = 'map_load_fb_event' filters = [] if vertical: filters.append(('verticals', '=', vertical)) event_or_attending = '%s %s' % (vertical, event_or_attending) if time_period: filters.append(('search_time_period', '=', time_period)) name = 'Load %s %s' % (time_period, event_or_attending) else: name = 'Load All %s' % (event_or_attending) fb_mapreduce.start_map( fbl=fbl, name=name, handler_spec='dancedeets.events.event_reloading_tasks.%s' % mr_func, entity_kind='dancedeets.events.eventdata.DBEvent', handle_batch_size=10, filters=filters, extra_mapper_params={ 'disable_updates': disable_updates, 'only_if_updated': only_if_updated }, queue=queue, )
def mr_load_fb_events(fbl, display_event=False, load_attending=False, time_period=None, disable_updates=None, only_if_updated=True, queue='slow-queue', vertical=None): if display_event: event_or_attending = 'Display Events' mr_func = 'map_resave_display_event' else: event_or_attending = 'Events' mr_func = 'map_load_fb_event' filters = [] if vertical: filters.append(('verticals', '=', vertical)) event_or_attending = '%s %s' % (vertical, event_or_attending) if time_period: filters.append(('search_time_period', '=', time_period)) name = 'Load %s %s' % (time_period, event_or_attending) else: name = 'Load All %s' % (event_or_attending) fb_mapreduce.start_map( fbl=fbl, name=name, handler_spec='dancedeets.events.event_reloading_tasks.%s' % mr_func, entity_kind='dancedeets.events.eventdata.DBEvent', handle_batch_size=10, filters=filters, extra_mapper_params={ 'disable_updates': disable_updates, 'only_if_updated': only_if_updated }, queue=queue, )