Пример #1
0
def register(target_model, options):
    logging.info('[denorm.register] %s' % target_model)

    if not hasattr(target_model, '_meta'):
        raise AttributeError('The model being registered must derive from Model.')

    target = util.get_model_name(target_model)
    target_options = target_model._meta

    # register signals for target. use dispatch_uid to prevent duplicates.
    # about signals: https://docs.djangoproject.com/en/1.8/topics/signals/
    # built-in signals: https://docs.djangoproject.com/en/1.8/ref/signals/
    db_signals.post_init.connect(receivers.target_model_post_init, sender=target_model, dispatch_uid='denorm_target_%s_post_init'%target)
    db_signals.pre_save.connect(receivers.target_model_pre_save, sender=target_model, dispatch_uid='denorm_target_%s_pre_save'%target)
    db_signals.post_save.connect(receivers.target_model_post_save, sender=target_model, dispatch_uid='denorm_target_%s_post_save'%target)

    target_graph = core.TARGET_GRAPH[target_model] = core.TARGET_GRAPH.get(target_model, {})

    for source, source_dict in options['sources'].iteritems():

        strategy = source_dict.get('strategy', 'cursor') # options are: [cursor, mapreduce]. defaults to cursor.

        # TODO: support storage options 'list' and 'dict'
        storage = source_dict.get('storage', 'scalar') # choices: [scalar, shared_dict]

        if storage == 'scalar':

            target_foreign_key = target_options.get_field(source)
            # if field did not exist, then get_field would have raised FieldDoesNotExist

            if not isinstance(target_foreign_key, ForeignKey):
                raise AttributeError('The source field %s.%s must be a ForeignKey' % (target, source))

            source_model = target_foreign_key.rel.to

        elif storage == 'shared_dict':

            target_foreign_key_list = target_options.get_field(Inflector().pluralize(source))
            # if field did not exist, then get_field would have raised FieldDoesNotExist

            if not isinstance(target_foreign_key_list, tb_fields.ListField):
                raise AttributeError('The target field %s.%s must be a ListField' % (target, source))

            # model must be explicitly configured, because target field does not specify it
            source_model = source_dict.get('model')

            # create denorm data field
            try:
                target_options.get_field('denorm_data')
            except FieldDoesNotExist:
                # field should not exist. now let's create it.

                denorm_data_field = JSONField(name='denorm_data', null=True, blank=True,
                                              decoder_kwargs={'cls': json_fields.JSONDecoder, 'parse_float':float})
                denorm_data_field.contribute_to_class(target_model, 'denorm_data')

            else:
                # field was already created on prior source field
                # TODO: do at beginning of target model configuration to make sure developer did not define it
                pass

        else:
            logging.error('[denorm.register] invalid storage option %s' % storage)

        source_options = source_model._meta

        # register signals for source. use dispatch_uid to prevent duplicates.
        db_signals.post_init.connect(receivers.source_model_post_init, sender=source_model, dispatch_uid='denorm_source_%s_post_init'%source)
        db_signals.pre_save.connect(receivers.source_model_pre_save, sender=source_model, dispatch_uid='denorm_source_%s_pre_save'%source)
        db_signals.post_save.connect(receivers.source_model_post_save, sender=source_model, dispatch_uid='denorm_source_%s_post_save'%source)

        # FIXME: it's quirky that label and throttles must be configured under each target-source in app's denorm_fields,
        # FIXME: but it gets applied here for entire source (not target dependent). it probably should be configured once
        # FIXME: per source, but how do accomplish that in the current configuration design?
        source_graph = core.SOURCE_GRAPH[source_model] = core.SOURCE_GRAPH.get(source_model, {
            'label': source_dict.get('label'),
            'throttles': source_dict.get('throttles'),
            'fields': {}
        })
        source_graph_fields = source_graph['fields']

        # mark model as registered for denormalization
        source_model._denorm_registered = True

        # clone list, so that if we add _id below, it doesn't corrupt original list
        denorm_field_names = list(source_dict['fields'])

        target_graph[source] = {
            'fields': denorm_field_names,
            'storage': storage,
            'source_model': source_model # important for shared_dict storage, because we don't know source model based on list field
        }

        for i, denorm_field_name in enumerate(denorm_field_names):

            source_field = source_options.get_field(denorm_field_name)
            # if field did not exist, then get_field would have raised FieldDoesNotExist

            target_field_name = '%s_%s' % (source, denorm_field_name)

            if storage == 'scalar':

                try:
                    target_options.get_field(target_field_name)
                except FieldDoesNotExist:
                    # field should not exist, so we're good
                    pass
                else:
                    raise AttributeError('The denorm field %s.%s must not already exist' % (target_model.__name__, target_field_name))

                # create target field of same type as source_field

                target_field = _copy_field(source_field, target_field_name, target)
                target_field.contribute_to_class(target_model, target_field_name)

                #print('added field %s with name %s, column %s' % (target_field, target_field_name, target_field.column))
                #print('added field %s with name %s' % (target_model._meta.get_field(target_field_name), target_field_name))

            else:
                assert(storage == 'shared_dict')

                # denorm_data field was already created outside this iteration loop
                pass

            # if source field is a foreign key, then we reference its key rather than the actual related field,
            # because we are not deferencing further than the key, and do not want to do an extra db lookup.
            if isinstance(source_field, ForeignKey):
                denorm_field_name += '_id'
                denorm_field_names[i] = denorm_field_name

            source_field_graph = source_graph_fields[denorm_field_name] = source_graph_fields.get(denorm_field_name, [])
            source_field_graph.append({
                'target_model': target_model,
                'source': source,
                'strategy': strategy,
                'storage': storage,
                'shards': source_dict.get('shards') and util.convert_func_to_string(source_dict['shards'])
            })
Пример #2
0
def source_model_post_save(sender, instance, created, **kwargs):

    # for clarity
    source_model = sender
    source_instance = instance

    affected_targets = source_instance._denorm_affected_targets

    if not affected_targets:
        # nothing to denorm
        return

    #
    # create a task for each affected target to update its instances
    #

    for target_model, affected_target in affected_targets.iteritems():

        # if storage is shared_dict, then task will pluralize related_field_name to get target model's list field
        related_field_name = affected_target['related']
        strategy = affected_target['strategy']
        storage = affected_target['storage']
        shards = affected_target['shards']
        affected_fields = affected_target['fields']

        #logging.info('affected target %s.%s for source %s: %s' % (target_model, related_field_name, source_model, affected_fields))

        # for each affected target, create a separate task

        instance_id = source_instance.id
        tag = 'DENORM_SOURCE_%s_%s_TARGET_%s' % (util.get_model_name(
            source_model), instance_id, util.get_model_name(target_model))
        payload = {
            'created': timezone.now().isoformat(),
            'strategy': strategy,
            'storage': storage,
            'instance_id': instance_id,
            'source_model': util.get_model_name(source_model),
            'target_model': util.get_model_name(target_model),
            'related_field': related_field_name,
            'fields': affected_fields,
            # TODO: queue name should be configurable
            'queue_name': 'denorm'
        }

        if strategy == 'mapreduce':
            payload['shards'] = handler_for_name(shards)(
                source_instance) if shards else DEFAULT_MAP_REDUCE_SHARDS

        payload_string = util.dump_json(payload)

        logging.info(
            '[denorm source_model_post_save] queue task payload = %s' %
            payload_string)

        # create a pull task per target
        taskqueue.Queue('pull-denorm').add(
            taskqueue.Task(payload=payload_string, tag=tag, method='PULL'))

    # create ** one ** Task model instance used to track denorm tasks per source, particularly for throttling
    models.get_task_model().objects.create(
        source_model=util.get_model_name(source_model),
        source_instance_id=source_instance.id,
        user=source_instance._denorm_user,
        label=source_instance._denorm_label)

    # re-run post_init to reset _denorm_orig_values in case this instance gets saved again
    source_model_post_init(source_model, source_instance)
Пример #3
0
def source_model_pre_save(sender, instance, raw, using, update_fields,
                          **kwargs):

    # for clarity
    source_model = sender
    source_instance = instance
    created = not source_instance.id

    source_instance._denorm_affected_targets = affected_targets = {}

    # newly created instances will not need denormalization
    if created:
        return

    # denorm turned off
    if not getattr(source_instance, '_denorm', True):
        return

    source_graph = core.SOURCE_GRAPH[source_model]

    #
    # iterate through all fields to build up set of distinct affected targets that post_save signal receiver will process.
    #

    source_graph_fields = source_graph['fields']
    orig_values = source_instance._denorm_orig_values

    for source_field, targets in source_graph_fields.iteritems():

        old_value = orig_values[source_field]
        new_value = getattr(source_instance, source_field)

        if old_value != new_value:
            #logging.info('[%s] %s value changed from "%s" to "%s"' % (source_model, source_field, old_value, new_value))

            for target in targets:
                target_model = target['target_model']
                related_field_name = target['source']
                storage = target['storage']

                affected_targets[
                    target_model] = affected_target = affected_targets.get(
                        target_model, {
                            'related': related_field_name,
                            'strategy': target['strategy'],
                            'storage': storage,
                            'shards': target['shards'],
                            'fields': {}
                        })

                # when task will update target, if storage is scalar, then field name is simply target model field name.
                # and if storage is shared_dict, then the field name is the dictionary key of the target model's denorm_data field.
                affected_fields_for_target = affected_target['fields']
                affected_fields_for_target['%s_%s' %
                                           (related_field_name,
                                            source_field)] = new_value

    if not affected_targets:
        return

    #
    # check that denorm throttling threshold is not exceeded
    #

    # get user from thread-local variable set by middleware
    user = middleware.get_current_user()
    if not user or not isinstance(user, get_user_model(
    )) or not user.is_authenticated() or user.is_superuser:
        user = None

    source_instance._denorm_user = user

    # get denorm label used for throttling

    if 'label' in source_graph:
        # custom label set by application
        label = source_graph['label'](source_instance, user)
    else:
        # default label
        if user:
            label = '%s_%s' % (util.get_model_name(source_model), str(user.id))
        else:
            # no label
            label = None

    source_instance._denorm_label = label

    throttles = source_graph.get('throttles')

    if not label or not throttles:
        # no throttling
        return

    # now validate each throttle
    # FIXME: we need to figure out if there is already a denorm task scheduled, and if so, then don't penalize throttle.
    # FIXME: perhaps we can use a Task.status field in combination with filter for source instance id.

    now = timezone.now()

    for throttle in throttles:
        num_requests, duration = util.parse_rate(throttle)

        # FIXME: this is a naive, inefficient implementation. we should cache task counts.
        if models.get_task_model().objects.filter(
                label=label, created__gt=now -
                timedelta(seconds=duration)).count() >= num_requests:
            raise exceptions.DenormThrottled
Пример #4
0
def register(target_model, options):
    logging.info('[denorm.register] %s' % target_model)

    if not hasattr(target_model, '_meta'):
        raise AttributeError(
            'The model being registered must derive from Model.')

    target = util.get_model_name(target_model)
    target_options = target_model._meta

    # register signals for target. use dispatch_uid to prevent duplicates.
    # about signals: https://docs.djangoproject.com/en/1.8/topics/signals/
    # built-in signals: https://docs.djangoproject.com/en/1.8/ref/signals/
    db_signals.post_init.connect(receivers.target_model_post_init,
                                 sender=target_model,
                                 dispatch_uid='denorm_target_%s_post_init' %
                                 target)
    db_signals.pre_save.connect(receivers.target_model_pre_save,
                                sender=target_model,
                                dispatch_uid='denorm_target_%s_pre_save' %
                                target)
    db_signals.post_save.connect(receivers.target_model_post_save,
                                 sender=target_model,
                                 dispatch_uid='denorm_target_%s_post_save' %
                                 target)

    target_graph = core.TARGET_GRAPH[target_model] = core.TARGET_GRAPH.get(
        target_model, {})

    for source, source_dict in options['sources'].iteritems():

        strategy = source_dict.get(
            'strategy',
            'cursor')  # options are: [cursor, mapreduce]. defaults to cursor.

        # TODO: support storage options 'list' and 'dict'
        storage = source_dict.get('storage',
                                  'scalar')  # choices: [scalar, shared_dict]

        if storage == 'scalar':

            target_foreign_key = target_options.get_field(source)
            # if field did not exist, then get_field would have raised FieldDoesNotExist

            if not isinstance(target_foreign_key, ForeignKey):
                raise AttributeError(
                    'The source field %s.%s must be a ForeignKey' %
                    (target, source))

            source_model = target_foreign_key.rel.to

        elif storage == 'shared_dict':

            target_foreign_key_list = target_options.get_field(
                Inflector().pluralize(source))
            # if field did not exist, then get_field would have raised FieldDoesNotExist

            if not isinstance(target_foreign_key_list, tb_fields.ListField):
                raise AttributeError(
                    'The target field %s.%s must be a ListField' %
                    (target, source))

            # model must be explicitly configured, because target field does not specify it
            source_model = source_dict.get('model')

            # create denorm data field
            try:
                target_options.get_field('denorm_data')
            except FieldDoesNotExist:
                # field should not exist. now let's create it.

                denorm_data_field = JSONField(name='denorm_data',
                                              null=True,
                                              blank=True,
                                              decoder_kwargs={
                                                  'cls':
                                                  json_fields.JSONDecoder,
                                                  'parse_float': float
                                              })
                denorm_data_field.contribute_to_class(target_model,
                                                      'denorm_data')

            else:
                # field was already created on prior source field
                # TODO: do at beginning of target model configuration to make sure developer did not define it
                pass

        else:
            logging.error('[denorm.register] invalid storage option %s' %
                          storage)

        source_options = source_model._meta

        # register signals for source. use dispatch_uid to prevent duplicates.
        db_signals.post_init.connect(
            receivers.source_model_post_init,
            sender=source_model,
            dispatch_uid='denorm_source_%s_post_init' % source)
        db_signals.pre_save.connect(receivers.source_model_pre_save,
                                    sender=source_model,
                                    dispatch_uid='denorm_source_%s_pre_save' %
                                    source)
        db_signals.post_save.connect(
            receivers.source_model_post_save,
            sender=source_model,
            dispatch_uid='denorm_source_%s_post_save' % source)

        # FIXME: it's quirky that label and throttles must be configured under each target-source in app's denorm_fields,
        # FIXME: but it gets applied here for entire source (not target dependent). it probably should be configured once
        # FIXME: per source, but how do accomplish that in the current configuration design?
        source_graph = core.SOURCE_GRAPH[source_model] = core.SOURCE_GRAPH.get(
            source_model, {
                'label': source_dict.get('label'),
                'throttles': source_dict.get('throttles'),
                'fields': {}
            })
        source_graph_fields = source_graph['fields']

        # mark model as registered for denormalization
        source_model._denorm_registered = True

        # clone list, so that if we add _id below, it doesn't corrupt original list
        denorm_field_names = list(source_dict['fields'])

        target_graph[source] = {
            'fields': denorm_field_names,
            'storage': storage,
            'source_model':
            source_model  # important for shared_dict storage, because we don't know source model based on list field
        }

        for i, denorm_field_name in enumerate(denorm_field_names):

            source_field = source_options.get_field(denorm_field_name)
            # if field did not exist, then get_field would have raised FieldDoesNotExist

            target_field_name = '%s_%s' % (source, denorm_field_name)

            if storage == 'scalar':

                try:
                    target_options.get_field(target_field_name)
                except FieldDoesNotExist:
                    # field should not exist, so we're good
                    pass
                else:
                    raise AttributeError(
                        'The denorm field %s.%s must not already exist' %
                        (target_model.__name__, target_field_name))

                # create target field of same type as source_field

                target_field = _copy_field(source_field, target_field_name,
                                           target)
                target_field.contribute_to_class(target_model,
                                                 target_field_name)

                #print('added field %s with name %s, column %s' % (target_field, target_field_name, target_field.column))
                #print('added field %s with name %s' % (target_model._meta.get_field(target_field_name), target_field_name))

            else:
                assert (storage == 'shared_dict')

                # denorm_data field was already created outside this iteration loop
                pass

            # if source field is a foreign key, then we reference its key rather than the actual related field,
            # because we are not deferencing further than the key, and do not want to do an extra db lookup.
            if isinstance(source_field, ForeignKey):
                denorm_field_name += '_id'
                denorm_field_names[i] = denorm_field_name

            source_field_graph = source_graph_fields[
                denorm_field_name] = source_graph_fields.get(
                    denorm_field_name, [])
            source_field_graph.append({
                'target_model':
                target_model,
                'source':
                source,
                'strategy':
                strategy,
                'storage':
                storage,
                'shards':
                source_dict.get('shards')
                and util.convert_func_to_string(source_dict['shards'])
            })
Пример #5
0
def source_model_post_save(sender, instance, created, **kwargs):

    # for clarity
    source_model = sender
    source_instance = instance

    affected_targets = source_instance._denorm_affected_targets

    if not affected_targets:
        # nothing to denorm
        return

    #
    # create a task for each affected target to update its instances
    #

    for target_model, affected_target in affected_targets.iteritems():

        # if storage is shared_dict, then task will pluralize related_field_name to get target model's list field
        related_field_name = affected_target['related']
        strategy = affected_target['strategy']
        storage = affected_target['storage']
        shards = affected_target['shards']
        affected_fields = affected_target['fields']

        #logging.info('affected target %s.%s for source %s: %s' % (target_model, related_field_name, source_model, affected_fields))

        # for each affected target, create a separate task

        instance_id = source_instance.id
        tag = 'DENORM_SOURCE_%s_%s_TARGET_%s' % (util.get_model_name(source_model), instance_id, util.get_model_name(target_model))
        payload = {
            'created': timezone.now().isoformat(),
            'strategy': strategy,
            'storage': storage,
            'instance_id': instance_id,
            'source_model': util.get_model_name(source_model),
            'target_model': util.get_model_name(target_model),
            'related_field': related_field_name,
            'fields': affected_fields,
            # TODO: queue name should be configurable
            'queue_name': 'denorm'
        }

        if strategy == 'mapreduce':
            payload['shards'] = handler_for_name(shards)(source_instance) if shards else DEFAULT_MAP_REDUCE_SHARDS

        payload_string = util.dump_json(payload)

        logging.info('[denorm source_model_post_save] queue task payload = %s' % payload_string)

        # create a pull task per target
        taskqueue.Queue('pull-denorm').add(
            taskqueue.Task(payload=payload_string, tag=tag, method='PULL')
        )

    # create ** one ** Task model instance used to track denorm tasks per source, particularly for throttling
    models.get_task_model().objects.create(
        source_model=util.get_model_name(source_model),
        source_instance_id=source_instance.id,
        user=source_instance._denorm_user,
        label=source_instance._denorm_label
    )

    # re-run post_init to reset _denorm_orig_values in case this instance gets saved again
    source_model_post_init(source_model, source_instance)
Пример #6
0
def source_model_pre_save(sender, instance, raw, using, update_fields, **kwargs):

    # for clarity
    source_model = sender
    source_instance = instance
    created = not source_instance.id

    source_instance._denorm_affected_targets = affected_targets = {}

    # newly created instances will not need denormalization
    if created:
        return

    # denorm turned off
    if not getattr(source_instance, '_denorm', True):
        return

    source_graph = core.SOURCE_GRAPH[source_model]

    #
    # iterate through all fields to build up set of distinct affected targets that post_save signal receiver will process.
    #

    source_graph_fields = source_graph['fields']
    orig_values = source_instance._denorm_orig_values

    for source_field, targets in source_graph_fields.iteritems():

        old_value = orig_values[source_field]
        new_value = getattr(source_instance, source_field)

        if old_value != new_value:
            #logging.info('[%s] %s value changed from "%s" to "%s"' % (source_model, source_field, old_value, new_value))

            for target in targets:
                target_model = target['target_model']
                related_field_name = target['source']
                storage = target['storage']

                affected_targets[target_model] = affected_target = affected_targets.get(target_model, {
                    'related': related_field_name,
                    'strategy': target['strategy'],
                    'storage': storage,
                    'shards': target['shards'],
                    'fields': {}
                })

                # when task will update target, if storage is scalar, then field name is simply target model field name.
                # and if storage is shared_dict, then the field name is the dictionary key of the target model's denorm_data field.
                affected_fields_for_target = affected_target['fields']
                affected_fields_for_target['%s_%s' % (related_field_name, source_field)] = new_value

    if not affected_targets:
        return

    #
    # check that denorm throttling threshold is not exceeded
    #

    # get user from thread-local variable set by middleware
    user = middleware.get_current_user()
    if not user or not isinstance(user, get_user_model()) or not user.is_authenticated() or user.is_superuser:
        user = None

    source_instance._denorm_user = user

    # get denorm label used for throttling

    if 'label' in source_graph:
        # custom label set by application
        label = source_graph['label'](source_instance, user)
    else:
        # default label
        if user:
            label = '%s_%s' % (util.get_model_name(source_model), str(user.id))
        else:
            # no label
            label = None

    source_instance._denorm_label = label

    throttles = source_graph.get('throttles')

    if not label or not throttles:
        # no throttling
        return

    # now validate each throttle
    # FIXME: we need to figure out if there is already a denorm task scheduled, and if so, then don't penalize throttle.
    # FIXME: perhaps we can use a Task.status field in combination with filter for source instance id.

    now = timezone.now()

    for throttle in throttles:
        num_requests, duration = util.parse_rate(throttle)

        # FIXME: this is a naive, inefficient implementation. we should cache task counts.
        if models.get_task_model().objects.filter(label=label, created__gt=now - timedelta(seconds=duration)).count() >= num_requests:
            raise exceptions.DenormThrottled