Пример #1
0
class BasePreprintMetric(MetricMixin, metrics.Metric):
    count = metrics.Integer(doc_values=True, index=True, required=True)
    provider_id = metrics.Keyword(index=True, doc_values=True, required=True)
    user_id = metrics.Keyword(index=True, doc_values=True, required=False)
    preprint_id = metrics.Keyword(index=True, doc_values=True, required=True)
    version = metrics.Keyword(index=True, doc_values=True)
    path = metrics.Text(index=True)
    sloan_coi = metrics.Boolean(index=True, doc_values=True)
    sloan_data = metrics.Boolean(index=True, doc_values=True)
    sloan_prereg = metrics.Boolean(index=True, doc_values=True)
    sloan_id = metrics.Keyword(index=True, doc_values=True)

    # TODO: locale

    class Index:
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 1,
            'refresh_interval': '1s',
        }

    class Meta:
        abstract = True
        source = metrics.MetaField(enabled=True)

    @classmethod
    def record_for_preprint(cls, preprint, user=None, **kwargs):
        count = kwargs.pop('count', 1)
        return cls.record(
            count=count,
            preprint_id=preprint._id,
            user_id=getattr(user, '_id', None),
            provider_id=preprint.provider._id,
            **kwargs
        )

    @classmethod
    def get_count_for_preprint(cls, preprint, after=None, before=None, index=None):
        search = cls.search(after=after, before=before, index=index).filter('match', preprint_id=preprint._id)
        timestamp = {}
        if after:
            timestamp['gte'] = after
        if before:
            timestamp['lt'] = before
        if timestamp:
            search = search.filter('range', timestamp=timestamp)
        search.aggs.metric('sum_count', 'sum', field='count')
        # Optimization: set size to 0 so that hits aren't returned (we only care about the aggregation)
        search = search.extra(size=0)
        try:
            response = search.execute()
        except NotFoundError:
            # _get_relevant_indices returned 1 or more indices
            # that doesn't exist. Fall back to unoptimized query
            search = search.index().index(cls._default_index())
            response = search.execute()
        # No indexed data
        if not hasattr(response.aggregations, 'sum_count'):
            return 0
        return int(response.aggregations.sum_count.value)
Пример #2
0
class PreprintView(metrics.Metric):
    provider_id = metrics.Keyword(index=True)
    user_id = metrics.Keyword(index=True)
    preprint_id = metrics.Keyword(index=True)

    class Index:
        settings = {"refresh_interval": "-1"}

    class Meta:
        app_label = "dummyapp"
        template_name = "osf_metrics_preprintviews"
        template = "osf_metrics_preprintviews-*"
Пример #3
0
class InstitutionProjectCounts(MetricMixin, metrics.Metric):
    institution_id = metrics.Keyword(index=True, doc_values=True, required=True)
    user_count = metrics.Integer(index=True, doc_values=True, required=True)
    public_project_count = metrics.Integer(index=True, doc_values=True, required=True)
    private_project_count = metrics.Integer(index=True, doc_values=True, required=True)

    class Index:
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 1,
            'refresh_interval': '1s',
        }

    class Meta:
        source = metrics.MetaField(enabled=True)

    @classmethod
    def record_institution_project_counts(cls, institution, public_project_count, private_project_count, **kwargs):
        return cls.record(
            institution_id=institution._id,
            user_count=institution.osfuser_set.count(),
            public_project_count=public_project_count,
            private_project_count=private_project_count,
            **kwargs
        )

    @classmethod
    def get_latest_institution_project_document(cls, institution):
        search = cls.search().filter('match', institution_id=institution._id).sort('-timestamp')[:1]
        response = search.execute()
        if response:
            return response[0]
Пример #4
0
        class MyBaseMetric(metrics.Metric):
            user_id = metrics.Keyword(index=True)

            class Index:
                settings = {"number_of_shards": 2}

            class Meta:
                abstract = True
Пример #5
0
class BasePreprintMetric(MetricMixin, metrics.Metric):
    count = metrics.Integer(doc_values=True, index=True, required=True)
    provider_id = metrics.Keyword(index=True, doc_values=True, required=True)
    user_id = metrics.Keyword(index=True, doc_values=True, required=False)
    preprint_id = metrics.Keyword(index=True, doc_values=True, required=True)
    version = metrics.Keyword(index=True, doc_values=True)
    path = metrics.Text(index=True)

    # TODO: locale

    class Index:
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 1,
            'refresh_interval': '1s',
        }

    class Meta:
        abstract = True

    @classmethod
    def record_for_preprint(cls, preprint, user=None, **kwargs):
        count = kwargs.pop('count', 1)
        return cls.record(count=count,
                          preprint_id=preprint._id,
                          user_id=getattr(user, '_id', None),
                          provider_id=preprint.provider._id,
                          **kwargs)

    @classmethod
    def get_count_for_preprint(cls, preprint, after=None):
        search = cls.search().filter('match', preprint_id=preprint._id)
        if after:
            search = search.filter('range', timestamp={'gte': after})
        search.aggs.metric('sum_count', 'sum', field='count')
        # Optimization: set size to 0 so that hits aren't returned (we only care about the aggregation)
        response = search.extra(size=0).execute()
        # No indexed data
        if not hasattr(response.aggregations, 'sum_count'):
            return 0
        return int(response.aggregations.sum_count.value)
Пример #6
0
        class MyBaseMetric(metrics.Metric):
            user_id = metrics.Keyword(index=True)

            class Meta:
                abstract = True
Пример #7
0
class RegistriesModerationMetrics(MetricMixin, metrics.Metric):
    registration_id = metrics.Keyword(index=True, doc_values=True, required=True)
    provider_id = metrics.Keyword(index=True, doc_values=True, required=True)
    trigger = metrics.Keyword(index=True, doc_values=True, required=True)
    from_state = metrics.Keyword(index=True, doc_values=True, required=True)
    to_state = metrics.Keyword(index=True, doc_values=True, required=True)
    user_id = metrics.Keyword(index=True, doc_values=True, required=True)
    comment = metrics.Keyword(index=True)

    class Index:
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 1,
            'refresh_interval': '1s',
        }

    class Meta:
        source = metrics.MetaField(enabled=True)

    @classmethod
    def record_transitions(cls, action):
        return cls.record(
            registration_id=action.target._id,
            provider_id=action.target.provider._id,
            from_state=action.from_state,
            to_state=action.to_state,
            trigger=action.trigger,
            user_id=action.creator._id,
            comment=action.comment,
        )

    @classmethod
    def get_registries_info(cls) -> dict:
        """
        Gets metrics info for each registry
        excpected output:
        {
            'doc_count_error_upper_bound': 0,
            'sum_other_doc_count': 0,
            'buckets': [{
                'key': 'osf',
                'doc_count': 6,
                'rejected': {'doc_count': 0},
                'submissions': {'doc_count': 3},
                'not_embargoed_but_accepted': {'doc_count': 0},
                'withdrawn': {'doc_count': 0},
                'transitions_without_comments': {'doc_count': 1},
                'embargoed': {'doc_count': 0},
                'transitions_with_comments': {'doc_count': 5}
            },
            {
                'key': 'provider2',
               'doc_count': 4,
               'rejected': {'doc_count': 1},
               'submissions': {'doc_count': 1},
               'not_embargoed_but_accepted': {'doc_count': 1},
               'withdrawn': {'doc_count': 0},
               'transitions_without_comments': {'doc_count': 0},
               'embargoed': {'doc_count': 0},
               'transitions_with_comments': {'doc_count': 4}
               }]
        }
        :return: dict
        """
        search = cls.search()

        return search.update_from_dict({
            'aggs': {
                'providers': {
                    'terms': {
                        'field': 'provider_id'
                    },
                    'aggs': {
                        'transitions_without_comments': {
                            'missing': {
                                'field': 'comment'
                            }
                        },
                        'transitions_with_comments': {
                            'filter': {
                                'exists': {
                                    'field': 'comment'
                                }
                            }
                        },
                        'submissions': {
                            'filter': {
                                'match': {
                                    'trigger': {
                                        'query': RegistrationModerationTriggers.SUBMIT.db_name
                                    }
                                }
                            }
                        },
                        'accepted_with_embargo': {
                            'filter': {
                                'bool': {
                                    'must': [
                                        {
                                            'match': {
                                                'to_state': RegistrationModerationStates.EMBARGO.db_name
                                            }
                                        },
                                        {
                                            'match': {
                                                'trigger': RegistrationModerationTriggers.SUBMIT.db_name
                                            }
                                        }
                                    ]
                                }
                            }
                        },
                        'accepted_without_embargo': {
                            'filter': {
                                'bool': {
                                    'must': [
                                        {
                                            'match': {
                                                'to_state': RegistrationModerationStates.ACCEPTED.db_name
                                            }
                                        },
                                        {
                                            'match': {
                                                'trigger': RegistrationModerationTriggers.SUBMIT.db_name
                                            }
                                        }
                                    ]
                                }
                            }
                        },
                        'rejected': {
                            'filter': {
                                'bool': {
                                    'must': [
                                        {
                                            'match': {
                                                'to_state': RegistrationModerationStates.REJECTED.db_name
                                            }
                                        },
                                        {
                                            'match': {
                                                'trigger': RegistrationModerationTriggers.REJECT_SUBMISSION.db_name
                                            }
                                        }
                                    ]
                                }
                            }
                        },
                        'withdrawn': {
                            'filter': {
                                'bool': {
                                    'must': [
                                        {
                                            'match': {
                                                'to_state': RegistrationModerationStates.WITHDRAWN.db_name
                                            }
                                        },
                                        {
                                            'match': {
                                                'trigger': RegistrationModerationTriggers.ACCEPT_WITHDRAWAL.db_name
                                            }
                                        }
                                    ]
                                }
                            }
                        },
                    }
                }
            }
        }).execute().aggregations['providers'].to_dict()
Пример #8
0
class UserInstitutionProjectCounts(MetricMixin, metrics.Metric):
    user_id = metrics.Keyword(index=True, doc_values=True, required=True)
    institution_id = metrics.Keyword(index=True, doc_values=True, required=True)
    department = metrics.Keyword(index=True, doc_values=True, required=False)
    public_project_count = metrics.Integer(index=True, doc_values=True, required=True)
    private_project_count = metrics.Integer(index=True, doc_values=True, required=True)

    class Index:
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 1,
            'refresh_interval': '1s',
        }

    class Meta:
        source = metrics.MetaField(enabled=True)

    @classmethod
    def filter_institution(cls, institution):
        return cls.search().filter('match', institution_id=institution._id)

    @classmethod
    def get_recent_datetime(cls, institution):
        search = cls.filter_institution(institution).sort('-timestamp')

        # Rounding to the nearest minute
        results = search.execute()
        if results:
            return search.execute()[0].timestamp.replace(microsecond=0, second=0)
        # If there are no results, assume yesterday.
        return dt.datetime.now() - dt.timedelta(days=1)

    @classmethod
    def get_department_counts(cls, institution) -> list:
        """
        Gets the most recent document for every unique user.
        :param institution: Institution
        :return: list
        """
        search = cls.filter_institution(institution).sort('timestamp')
        last_record_time = cls.get_recent_datetime(institution)

        return search.update_from_dict({
            'aggs': {
                'date_range': {
                    'filter': {
                        'range': {
                            'timestamp': {
                                'gte': last_record_time,
                            }
                        }
                    },
                    'aggs': {
                        'departments': {
                            'terms': {
                                'field': 'department',
                                'missing': DEFAULT_ES_NULL_VALUE,
                                'size': 250
                            },
                            'aggs': {
                                'users': {
                                    'terms': {
                                        'field': 'user_id'
                                    }
                                }
                            }
                        }
                    }
                }
            }
        })

    @classmethod
    def record_user_institution_project_counts(cls, user, institution, public_project_count, private_project_count, **kwargs):
        return cls.record(
            user_id=user._id,
            institution_id=institution._id,
            department=getattr(user, 'department', DEFAULT_ES_NULL_VALUE),
            public_project_count=public_project_count,
            private_project_count=private_project_count,
            **kwargs
        )

    @classmethod
    def get_current_user_metrics(cls, institution) -> list:
        """
        Gets the most recent document for every unique user.
        :param institution: Institution
        :return: list
        """
        last_record_time = cls.get_recent_datetime(institution)

        search = cls.filter_institution(
            institution
        ).filter(
            'range',
            timestamp={
                'gte': last_record_time
            }
        ).sort(
            'user_id'
        )
        search.update_from_dict({
            'size': MAX_SIZE_OF_ES_QUERY
        })

        return search
Пример #9
0
class DummyMetric(MetricMixin, metrics.Metric):
    count = metrics.Integer(doc_values=True, index=True, required=True)
    user_id = metrics.Keyword(index=True, doc_values=True, required=False)

    class Meta:
        app_label = 'osf'
Пример #10
0
class DummyMetricWithExplicitTemplateName(metrics.Metric):
    my_keyword = metrics.Keyword()

    class Meta:
        template_name = "dummymetric"