Пример #1
0
def mapreduce_create_sources_from_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Create Sources from Events',
        'event_scraper.thing_db.map_create_sources_from_event',
        'events.eventdata.DBEvent',
    )
Пример #2
0
 def get(self):
     time_period = self.request.get('time_period', None)
     queue = self.request.get('queue', 'fast-queue')
     filters = []
     if time_period:
         filters.append(('search_time_period', '=', time_period))
         name = 'Delete %s Bad Autoadds' % time_period
     else:
         name = 'Delete All Bad Autoadds'
     allow_deletes = self.request.get('allow_deletes', None) == '1'
     extra_mapper_params = {
         'allow_deletes': allow_deletes,
     }
     fb_mapreduce.start_map(
         fbl=self.fbl,
         name=name,
         handler_spec='events.event_reloading_tasks.map_maybe_delete_bad_event',
         entity_kind='events.eventdata.DBEvent',
         filters=filters,
         extra_mapper_params=extra_mapper_params,
         queue=queue,
         output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
         output_writer={
             'mime_type': 'text/plain',
             'bucket_name': 'dancedeets-hrd.appspot.com',
         },
     )
Пример #3
0
def mr_load_fb_events(fbl,
                      load_attending=False,
                      time_period=None,
                      update_geodata=True,
                      only_if_updated=True,
                      queue='slow-queue'):
    if load_attending:
        event_or_attending = 'Event Attendings'
        mr_func = 'map_load_fb_event_attending'
    else:
        event_or_attending = 'Events'
        mr_func = 'map_load_fb_event'
    filters = []
    if time_period:
        filters.append(('search_time_period', '=', time_period))
        name = 'Load %s %s' % (time_period, event_or_attending)
    else:
        name = 'Load All %s' % (event_or_attending)
    fb_mapreduce.start_map(
        fbl=fbl,
        name=name,
        handler_spec='events.event_reloading_tasks.%s' % mr_func,
        entity_kind='events.eventdata.DBEvent',
        handle_batch_size=20,
        filters=filters,
        extra_mapper_params={
            'update_geodata': update_geodata,
            'only_if_updated': only_if_updated
        },
        queue=queue,
    )
def mr_load_potential_events(fbl):
    fb_mapreduce.start_map(
        fbl=fbl,
        name='Load Potential Events For Users',
        handler_spec='event_scraper.potential_events_reloading.map_load_potential_events',
        entity_kind='users.users.User',
    )
Пример #5
0
def mapreduce_create_sources_from_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Create Sources from Events',
        'event_scraper.thing_db.map_create_source_from_event',
        'events.eventdata.DBEvent',
    )
Пример #6
0
def mr_email_user(fbl):
    fb_mapreduce.start_map(
        fbl=fbl,
        name='Email Users',
        #TODO: MOVE
        handler_spec='search.email_events.map_email_user',
        entity_kind='users.users.User',
    )
Пример #7
0
def mr_email_user(fbl):
    fb_mapreduce.start_map(
        fbl=fbl,
        name='Email Users',
        #TODO: MOVE
        handler_spec='search.email_events.map_email_user',
        entity_kind='users.users.User',
    )
Пример #8
0
 def get(self):
     # this calls a map function wrapped by mr_user_wrap, so it works correctly on a per-user basis
     fb_mapreduce.start_map(
         fbl=self.fbl,
         name='Load Users',
         handler_spec='users.user_tasks.map_load_fb_user',
         entity_kind='users.users.User',
     )
Пример #9
0
 def get(self):
     # this calls a map function wrapped by mr_user_wrap, so it works correctly on a per-user basis
     fb_mapreduce.start_map(
         fbl=self.fbl,
         name='Load Users',
         handler_spec='users.user_tasks.map_load_fb_user',
         entity_kind='users.users.User',
     )
 def get(self):
     # this calls a map function wrapped by mr_user_wrap, so it works correctly on a per-user basis
     mailchimp_list_id = mailchimp_api.get_list_id()
     fb_mapreduce.start_map(
         fbl=self.fbl,
         name='Load Users',
         handler_spec='users.user_tasks.map_load_fb_user',
         entity_kind='users.users.User',
         extra_mapper_params={
             'mailchimp_list_id': mailchimp_list_id,
         },
         queue='fast-queue')
Пример #11
0
def mr_generate_training_data(fbl):
    fb_mapreduce.start_map(
        fbl=fbl,
        name='Write Training Data',
        handler_spec='ml.gprediction.map_training_data_for_pevents',
        output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        handle_batch_size=20,
        entity_kind='event_scraper.potential_events.PotentialEvent',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
        queue=None,
    )
Пример #12
0
def mapreduce_export_sources(fbl, queue='fast-queue'):
    fb_mapreduce.start_map(
        fbl,
        'Export All Sources',
        'event_scraper.thing_db.map_export_sources',
        'event_scraper.thing_db.Source',
        output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
        handle_batch_size=10,
        queue=queue,
    )
Пример #13
0
def mr_private_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Dump Private Events',
        'servlets.tools.map_dump_private_events',
        'events.eventdata.DBEvent',
        handle_batch_size=80,
        queue=None,
        output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
Пример #14
0
def mapreduce_scrape_all_sources(fbl, min_potential_events=None, queue='slow-queue'):
    # Do not do the min_potential_events>1 filter in the mapreduce filter,
    # or it will want to do a range-shard on that property. Instead, pass-it-down
    # and use it as an early-return in the per-Source processing.
    # TODO:....maybe we do want a range-shard filter? save on loading all the useless sources...
    fb_mapreduce.start_map(
        fbl,
        'Scrape All Sources',
        'event_scraper.thing_scraper.map_scrape_events_from_sources',
        'event_scraper.thing_db.Source',
        handle_batch_size=10,
        extra_mapper_params={'min_potential_events': min_potential_events},
        queue=queue,
        randomize_tokens=True,
    )
Пример #15
0
def mr_dump_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Dump Potential FB Event Data',
        'logic.mr_dump.map_dump_fb_json',
        'event_scraper.potential_events.PotentialEvent',
        handle_batch_size=80,
        queue=None,
        filters=[('looked_at', '=', None)],
        output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
Пример #16
0
def mapreduce_export_sources(fbl, queue='fast-queue'):
    fb_mapreduce.start_map(
        fbl,
        'Export All Sources',
        'event_scraper.thing_db.map_export_sources',
        'event_scraper.thing_db.Source',
        output_writer_spec=
        'mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
        handle_batch_size=10,
        queue=queue,
    )
Пример #17
0
def mapreduce_scrape_all_sources(fbl, min_potential_events=None, queue='super-slow-queue'):
    # Do not do the min_potential_events>1 filter in the mapreduce filter,
    # or it will want to do a range-shard on that property. Instead, pass-it-down
    # and use it as an early-return in the per-Source processing.
    # TODO:....maybe we do want a range-shard filter? save on loading all the useless sources...
    fb_mapreduce.start_map(
        fbl,
        'Scrape All Sources',
        'event_scraper.thing_scraper.map_scrape_events_from_sources',
        'event_scraper.thing_db.Source',
        handle_batch_size=10,
        extra_mapper_params={'min_potential_events': min_potential_events},
        queue=queue,
        randomize_tokens=True,
    )
Пример #18
0
def mr_classify_potential_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Auto-Classify Events',
        'ml.mr_prediction.map_classify_events',
        'event_scraper.potential_events.PotentialEvent',
        filters=[('looked_at', '=', None)],
        handle_batch_size=20,
        queue='slow-queue',
        output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
Пример #19
0
def mr_private_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Dump Private Events',
        'servlets.tools.map_dump_private_events',
        'events.eventdata.DBEvent',
        handle_batch_size=80,
        queue=None,
        output_writer_spec=
        'mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
Пример #20
0
def mr_dump_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Dump Potential FB Event Data',
        'logic.mr_dump.map_dump_fb_json',
        'event_scraper.potential_events.PotentialEvent',
        handle_batch_size=80,
        queue=None,
        filters=[('looked_at', '=', None)],
        output_writer_spec=
        'mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
Пример #21
0
def mr_classify_potential_events(fbl, past_event):
    filters = [('looked_at', '=', None), ('should_look_at', '=', True)]
    if past_event is not None:
        filters.append(('past_event', '=', past_event))
    fb_mapreduce.start_map(
        fbl,
        'Auto-Add Events',
        'event_scraper.auto_add.map_classify_events',
        'event_scraper.potential_events.PotentialEvent',
        filters=filters,
        handle_batch_size=20,
        queue='fast-queue',
        output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
Пример #22
0
def mr_classify_potential_events(fbl, past_event):
    filters = [('looked_at', '=', None), ('should_look_at', '=', True)]
    if past_event is not None:
        filters.append(('past_event', '=', past_event))
    fb_mapreduce.start_map(
        fbl,
        'Auto-Add Events',
        'event_scraper.auto_add.map_classify_events',
        'event_scraper.potential_events.PotentialEvent',
        filters=filters,
        handle_batch_size=20,
        queue='fast-queue',
        output_writer_spec=
        'mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
def mr_load_fb_events(fbl, load_attending=False, time_period=None, update_geodata=True, only_if_updated=True, queue='slow-queue'):
    if load_attending:
        event_or_attending = 'Event Attendings'
        mr_func = 'map_load_fb_event_attending'
    else:
        event_or_attending = 'Events'
        mr_func = 'map_load_fb_event'
    filters = []
    if time_period:
        filters.append(('search_time_period', '=', time_period))
        name = 'Load %s %s' % (time_period, event_or_attending)
    else:
        name = 'Load All %s' % (event_or_attending)
    fb_mapreduce.start_map(
        fbl=fbl,
        name=name,
        handler_spec='events.event_reloading_tasks.%s' % mr_func,
        entity_kind='events.eventdata.DBEvent',
        handle_batch_size=20,
        filters=filters,
        extra_mapper_params={'update_geodata': update_geodata, 'only_if_updated': only_if_updated},
        queue=queue,
    )