Exemplo n.º 1
0
class InstitutionProjectCounts(MetricMixin, metrics.Metric):
    institution_id = metrics.Keyword(index=True, doc_values=True, required=True)
    user_count = metrics.Integer(index=True, doc_values=True, required=True)
    public_project_count = metrics.Integer(index=True, doc_values=True, required=True)
    private_project_count = metrics.Integer(index=True, doc_values=True, required=True)

    class Index:
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 1,
            'refresh_interval': '1s',
        }

    class Meta:
        source = metrics.MetaField(enabled=True)

    @classmethod
    def record_institution_project_counts(cls, institution, public_project_count, private_project_count, **kwargs):
        return cls.record(
            institution_id=institution._id,
            user_count=institution.osfuser_set.count(),
            public_project_count=public_project_count,
            private_project_count=private_project_count,
            **kwargs
        )

    @classmethod
    def get_latest_institution_project_document(cls, institution):
        search = cls.search().filter('match', institution_id=institution._id).sort('-timestamp')[:1]
        response = search.execute()
        if response:
            return response[0]
Exemplo n.º 2
0
class BasePreprintMetric(MetricMixin, metrics.Metric):
    count = metrics.Integer(doc_values=True, index=True, required=True)
    provider_id = metrics.Keyword(index=True, doc_values=True, required=True)
    user_id = metrics.Keyword(index=True, doc_values=True, required=False)
    preprint_id = metrics.Keyword(index=True, doc_values=True, required=True)
    version = metrics.Keyword(index=True, doc_values=True)
    path = metrics.Text(index=True)
    sloan_coi = metrics.Boolean(index=True, doc_values=True)
    sloan_data = metrics.Boolean(index=True, doc_values=True)
    sloan_prereg = metrics.Boolean(index=True, doc_values=True)
    sloan_id = metrics.Keyword(index=True, doc_values=True)

    # TODO: locale

    class Index:
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 1,
            'refresh_interval': '1s',
        }

    class Meta:
        abstract = True
        source = metrics.MetaField(enabled=True)

    @classmethod
    def record_for_preprint(cls, preprint, user=None, **kwargs):
        count = kwargs.pop('count', 1)
        return cls.record(
            count=count,
            preprint_id=preprint._id,
            user_id=getattr(user, '_id', None),
            provider_id=preprint.provider._id,
            **kwargs
        )

    @classmethod
    def get_count_for_preprint(cls, preprint, after=None, before=None, index=None):
        search = cls.search(after=after, before=before, index=index).filter('match', preprint_id=preprint._id)
        timestamp = {}
        if after:
            timestamp['gte'] = after
        if before:
            timestamp['lt'] = before
        if timestamp:
            search = search.filter('range', timestamp=timestamp)
        search.aggs.metric('sum_count', 'sum', field='count')
        # Optimization: set size to 0 so that hits aren't returned (we only care about the aggregation)
        search = search.extra(size=0)
        try:
            response = search.execute()
        except NotFoundError:
            # _get_relevant_indices returned 1 or more indices
            # that doesn't exist. Fall back to unoptimized query
            search = search.index().index(cls._default_index())
            response = search.execute()
        # No indexed data
        if not hasattr(response.aggregations, 'sum_count'):
            return 0
        return int(response.aggregations.sum_count.value)
Exemplo n.º 3
0
class BasePreprintMetric(MetricMixin, metrics.Metric):
    count = metrics.Integer(doc_values=True, index=True, required=True)
    provider_id = metrics.Keyword(index=True, doc_values=True, required=True)
    user_id = metrics.Keyword(index=True, doc_values=True, required=False)
    preprint_id = metrics.Keyword(index=True, doc_values=True, required=True)
    version = metrics.Keyword(index=True, doc_values=True)
    path = metrics.Text(index=True)

    # TODO: locale

    class Index:
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 1,
            'refresh_interval': '1s',
        }

    class Meta:
        abstract = True

    @classmethod
    def record_for_preprint(cls, preprint, user=None, **kwargs):
        count = kwargs.pop('count', 1)
        return cls.record(count=count,
                          preprint_id=preprint._id,
                          user_id=getattr(user, '_id', None),
                          provider_id=preprint.provider._id,
                          **kwargs)

    @classmethod
    def get_count_for_preprint(cls, preprint, after=None):
        search = cls.search().filter('match', preprint_id=preprint._id)
        if after:
            search = search.filter('range', timestamp={'gte': after})
        search.aggs.metric('sum_count', 'sum', field='count')
        # Optimization: set size to 0 so that hits aren't returned (we only care about the aggregation)
        response = search.extra(size=0).execute()
        # No indexed data
        if not hasattr(response.aggregations, 'sum_count'):
            return 0
        return int(response.aggregations.sum_count.value)
Exemplo n.º 4
0
class UserInstitutionProjectCounts(MetricMixin, metrics.Metric):
    user_id = metrics.Keyword(index=True, doc_values=True, required=True)
    institution_id = metrics.Keyword(index=True, doc_values=True, required=True)
    department = metrics.Keyword(index=True, doc_values=True, required=False)
    public_project_count = metrics.Integer(index=True, doc_values=True, required=True)
    private_project_count = metrics.Integer(index=True, doc_values=True, required=True)

    class Index:
        settings = {
            'number_of_shards': 1,
            'number_of_replicas': 1,
            'refresh_interval': '1s',
        }

    class Meta:
        source = metrics.MetaField(enabled=True)

    @classmethod
    def filter_institution(cls, institution):
        return cls.search().filter('match', institution_id=institution._id)

    @classmethod
    def get_recent_datetime(cls, institution):
        search = cls.filter_institution(institution).sort('-timestamp')

        # Rounding to the nearest minute
        results = search.execute()
        if results:
            return search.execute()[0].timestamp.replace(microsecond=0, second=0)
        # If there are no results, assume yesterday.
        return dt.datetime.now() - dt.timedelta(days=1)

    @classmethod
    def get_department_counts(cls, institution) -> list:
        """
        Gets the most recent document for every unique user.
        :param institution: Institution
        :return: list
        """
        search = cls.filter_institution(institution).sort('timestamp')
        last_record_time = cls.get_recent_datetime(institution)

        return search.update_from_dict({
            'aggs': {
                'date_range': {
                    'filter': {
                        'range': {
                            'timestamp': {
                                'gte': last_record_time,
                            }
                        }
                    },
                    'aggs': {
                        'departments': {
                            'terms': {
                                'field': 'department',
                                'missing': DEFAULT_ES_NULL_VALUE,
                                'size': 250
                            },
                            'aggs': {
                                'users': {
                                    'terms': {
                                        'field': 'user_id'
                                    }
                                }
                            }
                        }
                    }
                }
            }
        })

    @classmethod
    def record_user_institution_project_counts(cls, user, institution, public_project_count, private_project_count, **kwargs):
        return cls.record(
            user_id=user._id,
            institution_id=institution._id,
            department=getattr(user, 'department', DEFAULT_ES_NULL_VALUE),
            public_project_count=public_project_count,
            private_project_count=private_project_count,
            **kwargs
        )

    @classmethod
    def get_current_user_metrics(cls, institution) -> list:
        """
        Gets the most recent document for every unique user.
        :param institution: Institution
        :return: list
        """
        last_record_time = cls.get_recent_datetime(institution)

        search = cls.filter_institution(
            institution
        ).filter(
            'range',
            timestamp={
                'gte': last_record_time
            }
        ).sort(
            'user_id'
        )
        search.update_from_dict({
            'size': MAX_SIZE_OF_ES_QUERY
        })

        return search
Exemplo n.º 5
0
class DummyMetric(MetricMixin, metrics.Metric):
    count = metrics.Integer(doc_values=True, index=True, required=True)
    user_id = metrics.Keyword(index=True, doc_values=True, required=False)

    class Meta:
        app_label = 'osf'
Exemplo n.º 6
0
class DummyMetric(metrics.Metric):
    my_int = metrics.Integer()