示例#1
0
 def test_set_day_offset_none(self):
     tr = TimeRange()
     self.assertIsNone(tr.start)
     self.assertIsNone(tr.start_org)
     tr.set_start_day_offset(5)
     self.assertIsNone(tr.start)
     self.assertIsNone(tr.start_org)
示例#2
0
 def test_tzinfo(self):
     start_dt = datetime(2020, 2, 14, 12, 1, 1, tzinfo=pytz.UTC)
     end_dt = datetime(2020, 2, 16, 12, 1, 1, tzinfo=pytz.UTC)
     tz = pytz.timezone("America/New_York")
     tr = TimeRange(start=start_dt, end=end_dt, tzinfo_=tz)
     self.assertAlmostEquals(0, abs((start_dt - tr.start).total_seconds()),
                             0)
     self.assertAlmostEquals(0,
                             abs((start_dt - tr.start_org).total_seconds()),
                             0)
     self.assertAlmostEquals(0, abs((tr.end - end_dt).total_seconds()), 0)
     self.assertEqual(tz.utcoffset(datetime.now()),
                      tr.tzinfo_.utcoffset(datetime.now()))
     self.assertTrue(tr.expandable)
     self.assertFalse(tr.expanded)
     self.assertFalse(tr.is_inf)
     self.assertAlmostEquals(48, tr.hr_length_org, 0)
     self.assertAlmostEquals(48, tr.hr_length, 0)
     self.assertEqual(
         f"{start_dt.strftime('%m-%d')} ~ {end_dt.strftime('%m-%d')}",
         tr.expr_period_short)
     self.assertAlmostEquals(tr.end_time_seconds,
                             time_to_seconds(localtime(end_dt, tz).time()),
                             0)
     prd = tr.get_periods()
     self.assertListEqual([tr], prd)
示例#3
0
 def test_mult(self):
     start_mult_expected = datetime(2020, 4, 12, 1, 1, 1, tzinfo=pytz.UTC)
     start_dt = datetime(2020, 4, 14, 1, 1, 1, tzinfo=pytz.UTC)
     end_dt = datetime(2020, 4, 16, 1, 1, 1, tzinfo=pytz.UTC)
     tr = TimeRange(start=start_dt, end=end_dt, range_mult=2)
     self.assertAlmostEquals(
         0, abs((start_mult_expected - tr.start).total_seconds()), 0)
     self.assertAlmostEquals(0,
                             abs((start_dt - tr.start_org).total_seconds()),
                             0)
     self.assertAlmostEquals(0, abs((tr.end - end_dt).total_seconds()), 0)
     self.assertIsNone(tr.tzinfo_)
     self.assertTrue(tr.expandable)
     self.assertTrue(tr.expanded)
     self.assertFalse(tr.is_inf)
     self.assertAlmostEquals(48, tr.hr_length_org, 0)
     self.assertAlmostEquals(96, tr.hr_length, 0)
     self.assertEqual(
         f"{start_mult_expected.strftime('%m-%d')} ~ {end_dt.strftime('%m-%d')}",
         tr.expr_period_short)
     self.assertAlmostEquals(tr.end_time_seconds,
                             time_to_seconds(end_dt.time()), 0)
     prd = tr.get_periods()
     tr2 = TimeRange(start=start_mult_expected, end=start_dt)
     tr3 = TimeRange(start=start_dt, end=end_dt)
     self.assertListEqual([tr2, tr3], prd)
示例#4
0
    def test_set_day_offset_neg(self):
        start_dt = datetime(2020, 2, 14, 1, 1, 1, tzinfo=pytz.UTC)

        tr = TimeRange(start=start_dt)
        self.assertEqual(start_dt, tr.start)
        self.assertEqual(start_dt, tr.start_org)
        tr.set_start_day_offset(-5)
        self.assertEqual(start_dt, tr.start_org)
        self.assertEqual(start_dt - timedelta(days=5), tr.start)
示例#5
0
    def mean_message_count(
            self, channel_oids: Union[ObjectId, List[ObjectId]], *, hours_within: Optional[int] = None,
            start: Optional[datetime] = None, end: Optional[datetime] = None, tzinfo_: PytzInfo = UTC.to_tzinfo(),
            max_mean_days: int = 5) -> \
            MeanMessageResultGenerator:
        match_d = self._channel_oids_filter_(channel_oids)

        trange = TimeRange(range_hr=hours_within,
                           start=start,
                           end=end,
                           tzinfo_=tzinfo_)
        trange.set_start_day_offset(-max_mean_days)

        self._attach_time_range_(match_d, trange=trange)

        pipeline = [{
            "$match": match_d
        }, {
            "$group": {
                "_id": {
                    MeanMessageResultGenerator.KEY_DATE: {
                        "$dateToString": {
                            "date": "$_id",
                            "format": MeanMessageResultGenerator.FMT_DATE,
                            "timezone": tzinfo_.tzidentifier
                        }
                    }
                },
                MeanMessageResultGenerator.KEY_COUNT: {
                    "$sum": 1
                }
            }
        }, {
            "$sort": {
                "_id": pymongo.ASCENDING
            }
        }]

        return MeanMessageResultGenerator(list(self.aggregate(pipeline)),
                                          HourlyResult.data_days_collected(
                                              self,
                                              match_d,
                                              hr_range=hours_within,
                                              start=trange.start_org,
                                              end=end),
                                          tzinfo_,
                                          trange=trange,
                                          max_mean_days=max_mean_days)
示例#6
0
    def date_list(days_collected,
                  tzinfo,
                  *,
                  start: Optional[datetime] = None,
                  end: Optional[datetime] = None,
                  trange: Optional[TimeRange] = None) -> List[date]:
        """
        Returns the date list within the time range.

        Disregards ``start`` and ``end`` if ``trange`` is specified.
        """
        ret = []

        if not trange:
            trange = TimeRange(range_hr=days_collected * 24,
                               start=start,
                               end=end,
                               tzinfo_=tzinfo)

        if trange.is_inf:
            raise ValueError("TimeRange length is infinity.")

        for i in range((trange.end.date() - trange.start.date()).days + 1):
            ret.append(trange.start.date() + timedelta(days=i))

        return ret
示例#7
0
    def _attach_time_range_(filter_: dict, *, hours_within: Optional[int] = None,
                            start: Optional[datetime] = None, end: Optional[datetime] = None,
                            range_mult: Union[int, float] = 1.0, trange: Optional[TimeRange] = None):
        """
        Attach parsed time range to the filter.

        Data which creation time (generation time of `_id`) is out of the given time range will be filtered out.

        If `trange` is specified, `hours_within`, `start`, `end`, `range_mult` will be ignored.
        """
        id_filter = {}

        # Get the time range

        if not trange:
            trange = TimeRange(
                range_hr=hours_within, start=start, end=end, range_mult=range_mult, end_autofill_now=False)

        gt_oid = dt_to_objectid(trange.start)
        if trange.start and gt_oid:
            id_filter["$gt"] = gt_oid

        lt_oid = dt_to_objectid(trange.end)
        if trange.end and lt_oid:
            id_filter["$lt"] = lt_oid

        # Modifying filter

        if id_filter:
            if OID_KEY in filter_:
                filter_[OID_KEY] = {"$eq": filter_[OID_KEY]}
                filter_[OID_KEY].update(id_filter)
            else:
                filter_[OID_KEY] = id_filter
示例#8
0
 def test_fill_all_none(self):
     tr = TimeRange()
     now = now_utc_aware()
     self.assertIsNone(tr.start)
     self.assertIsNone(tr.start_org)
     self.assertAlmostEquals(0, abs((tr.end - now).total_seconds()), 0)
     self.assertIsNone(tr.tzinfo_)
     self.assertFalse(tr.expandable)
     self.assertFalse(tr.expanded)
     self.assertTrue(tr.is_inf)
     self.assertEqual(math.inf, tr.hr_length_org)
     self.assertEqual(math.inf, tr.hr_length)
     self.assertEqual(f"- ~ {now.strftime('%m-%d')}", tr.expr_period_short)
     self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(now), 0)
     prd = tr.get_periods()
     self.assertListEqual([tr], prd)
示例#9
0
 def trange_ensure_not_inf(days_collected, trange, tzinfo):
     """Ensure that time range are not :class:`math.inf` length."""
     if trange.is_inf:
         return TimeRange(range_hr=days_collected * 24,
                          start=trange.start,
                          end=trange.end,
                          tzinfo_=tzinfo)
     else:
         return trange
示例#10
0
    def test_set_day_tz_naive(self):
        start_dt = datetime(2020, 2, 14, 1, 1, 1)
        end_dt = datetime(2020, 2, 17, 1, 1, 1)

        tr = TimeRange(start=start_dt, end=end_dt, tzinfo_=pytz.UTC)
        self.assertFalse(is_tz_naive(tr.start))
        self.assertEqual(pytz.UTC.localize(start_dt), tr.start)
        self.assertFalse(is_tz_naive(tr.end))
        self.assertEqual(pytz.UTC.localize(end_dt), tr.end)
示例#11
0
 def test_end(self):
     end_dt = datetime(2020, 4, 4, 1, 1, 1, tzinfo=pytz.UTC)
     tr = TimeRange(end=end_dt)
     self.assertIsNone(tr.start)
     self.assertIsNone(tr.start_org)
     self.assertAlmostEquals(0, abs((tr.end - end_dt).total_seconds()), 0)
     self.assertIsNone(tr.tzinfo_)
     self.assertFalse(tr.expandable)
     self.assertFalse(tr.expanded)
     self.assertTrue(tr.is_inf)
     self.assertEqual(math.inf, tr.hr_length_org)
     self.assertEqual(math.inf, tr.hr_length)
     self.assertEqual(f"- ~ {end_dt.strftime('%m-%d')}",
                      tr.expr_period_short)
     self.assertAlmostEquals(tr.end_time_seconds,
                             time_to_seconds(end_dt.time()), 0)
     prd = tr.get_periods()
     self.assertListEqual([tr], prd)
示例#12
0
 def test_nfill_range_only(self):
     tr = TimeRange(range_hr=120, end_autofill_now=False)
     now = now_utc_aware()
     expected_start = now - timedelta(hours=120)
     self.assertAlmostEquals(
         0, abs((expected_start - tr.start).total_seconds()), 0)
     self.assertAlmostEquals(
         0, abs((expected_start - tr.start_org).total_seconds()), 0)
     self.assertIsNone(tr.end)
     self.assertIsNone(tr.tzinfo_)
     self.assertFalse(tr.expandable)
     self.assertFalse(tr.expanded)
     self.assertTrue(tr.is_inf)
     self.assertAlmostEquals(120, tr.hr_length_org, 0)
     self.assertAlmostEquals(120, tr.hr_length, 0)
     self.assertEqual(f"{expected_start.strftime('%m-%d')} ~ -",
                      tr.expr_period_short)
     self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(now), 0)
     prd = tr.get_periods()
     self.assertListEqual([tr], prd)
示例#13
0
 def test_fill_range_hr_0(self):
     tr = TimeRange(range_hr=0)
     now = now_utc_aware()
     expected_start_end = now
     self.assertAlmostEquals(
         0, abs((expected_start_end - tr.start).total_seconds()), 0)
     self.assertAlmostEquals(
         0, abs((expected_start_end - tr.start_org).total_seconds()), 0)
     self.assertAlmostEquals(0, abs((tr.end - now).total_seconds()), 0)
     self.assertIsNone(tr.tzinfo_)
     self.assertTrue(tr.expandable)
     self.assertFalse(tr.expanded)
     self.assertFalse(tr.is_inf)
     self.assertAlmostEquals(0, tr.hr_length_org, 0)
     self.assertAlmostEquals(0, tr.hr_length, 0)
     self.assertEqual(
         f"{expected_start_end.strftime('%m-%d')} ~ {now.strftime('%m-%d')}",
         tr.expr_period_short)
     self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(now), 0)
     prd = tr.get_periods()
     self.assertListEqual([tr], prd)
示例#14
0
 def test_end_hr_range(self):
     end_dt = datetime(2020, 4, 4, 1, 1, 1, tzinfo=pytz.UTC)
     start_dt_expected = end_dt - timedelta(hours=120)
     tr = TimeRange(end=end_dt, range_hr=120)
     self.assertAlmostEquals(
         0, abs((start_dt_expected - tr.start).total_seconds()), 0)
     self.assertAlmostEquals(
         0, abs((start_dt_expected - tr.start_org).total_seconds()), 0)
     self.assertAlmostEquals(0, abs((tr.end - end_dt).total_seconds()), 0)
     self.assertIsNone(tr.tzinfo_)
     self.assertTrue(tr.expandable)
     self.assertFalse(tr.expanded)
     self.assertFalse(tr.is_inf)
     self.assertAlmostEquals(120, tr.hr_length_org, 0)
     self.assertAlmostEquals(120, tr.hr_length, 0)
     self.assertEqual(
         f"{start_dt_expected.strftime('%m-%d')} ~ {end_dt.strftime('%m-%d')}",
         tr.expr_period_short)
     self.assertAlmostEquals(tr.end_time_seconds,
                             time_to_seconds(end_dt.time()), 0)
     prd = tr.get_periods()
     self.assertListEqual([tr], prd)
示例#15
0
    def test_nfill_start(self):
        start_dt = datetime(2020, 4, 4, 1, 1, 1, tzinfo=pytz.UTC)

        tr = TimeRange(start=start_dt, end_autofill_now=False)
        now = now_utc_aware()
        hr_diff = (now - start_dt).total_seconds() / 3600
        self.assertAlmostEquals(0, abs((start_dt - tr.start).total_seconds()),
                                0)
        self.assertAlmostEquals(0,
                                abs((start_dt - tr.start_org).total_seconds()),
                                0)
        self.assertIsNone(tr.end)
        self.assertIsNone(tr.tzinfo_)
        self.assertFalse(tr.expandable)
        self.assertFalse(tr.expanded)
        self.assertTrue(tr.is_inf)
        self.assertAlmostEquals(hr_diff, tr.hr_length_org, 0)
        self.assertAlmostEquals(hr_diff, tr.hr_length, 0)
        self.assertEqual(f"{start_dt.strftime('%m-%d')} ~ -",
                         tr.expr_period_short)
        self.assertAlmostEquals(tr.end_time_seconds, time_to_seconds(now), 0)
        prd = tr.get_periods()
        self.assertListEqual([tr], prd)
示例#16
0
    def data_days_collected(collection,
                            filter_,
                            *,
                            hr_range: Optional[int] = None,
                            start: Optional[datetime] = None,
                            end: Optional[datetime] = None):
        """
        Returns the count of days collected in data.

        Notice that this is different from ``days_collected`` in ``__init__()`` because
        this one connects to the database to calculate the actual days collected in the filtered dataset
        while the one in ``__init__()`` will not be checked and assume that it is true.

        ``hr_range`` will be ignored if both ``start`` and ``end`` is specified.
        """
        trange = TimeRange(range_hr=hr_range,
                           start=start,
                           end=end,
                           end_autofill_now=False)

        if trange.is_inf:
            oldest = collection.find_one(filter_,
                                         sort=[(OID_KEY, pymongo.ASCENDING)])

            if not oldest:
                return HourlyResult.DAYS_NONE

            now = now_utc_aware()

            if start:
                start = make_tz_aware(start)

            if start and start > now:
                return HourlyResult.DAYS_NONE

            if end:
                end = make_tz_aware(end)

            return max(
                ((end or now) -
                 ObjectId(oldest[OID_KEY]).generation_time).total_seconds() /
                86400, 0)
        else:
            return trange.hr_length / 24
示例#17
0
    def member_daily_message_count(
            self, channel_oids: Union[ObjectId, List[ObjectId]], *,
            hours_within: Optional[int] = None, start: Optional[datetime] = None, end: Optional[datetime] = None,
            tzinfo_: PytzInfo = UTC.to_tzinfo()) -> \
            MemberDailyMessageResult:
        match_d = self._channel_oids_filter_(channel_oids)

        trange = TimeRange(range_hr=hours_within,
                           start=start,
                           end=end,
                           tzinfo_=tzinfo_)

        self._attach_time_range_(match_d, trange=trange)

        pipeline = [{
            "$match": match_d
        }, {
            "$group": {
                "_id": {
                    MemberDailyMessageResult.KEY_DATE: {
                        "$dateToString": {
                            "date": "$_id",
                            "format": MemberDailyMessageResult.FMT_DATE,
                            "timezone": tzinfo_.tzidentifier
                        }
                    },
                    MemberDailyMessageResult.KEY_MEMBER:
                    "$" + MessageRecordModel.UserRootOid.key
                },
                MemberDailyMessageResult.KEY_COUNT: {
                    "$sum": 1
                }
            }
        }]

        return MemberDailyMessageResult(list(self.aggregate(pipeline)),
                                        HourlyResult.data_days_collected(
                                            self,
                                            match_d,
                                            hr_range=hours_within,
                                            start=start,
                                            end=end),
                                        tzinfo_,
                                        trange=trange)
示例#18
0
 def test_malformed(self):
     start_dt = datetime(2020, 4, 16, 1, 1, 1, tzinfo=pytz.UTC)
     end_dt = datetime(2020, 4, 14, 1, 1, 1, tzinfo=pytz.UTC)
     with self.assertRaises(TimeRangeEndBeforeStart):
         TimeRange(start=start_dt, end=end_dt)
示例#19
0
    def message_count_before_time(
            self, channel_oids: Union[ObjectId, List[ObjectId]], *, hours_within: Optional[int] = None,
            start: Optional[datetime] = None, end: Optional[datetime] = None, tzinfo_: PytzInfo = UTC.to_tzinfo()) -> \
            CountBeforeTimeResult:
        match_d = self._channel_oids_filter_(channel_oids)

        trange = TimeRange(range_hr=hours_within,
                           start=start,
                           end=end,
                           tzinfo_=tzinfo_)

        self._attach_time_range_(match_d, trange=trange)

        pipeline = [{
            "$match": match_d
        }, {
            "$project": {
                CountBeforeTimeResult.KEY_SEC_OF_DAY: {
                    "$add": [{
                        "$multiply": [{
                            "$hour": {
                                "date": "$_id",
                                "timezone": tzinfo_.tzidentifier
                            }
                        }, 3600]
                    }, {
                        "$multiply": [{
                            "$minute": {
                                "date": "$_id",
                                "timezone": tzinfo_.tzidentifier
                            }
                        }, 60]
                    }, {
                        "$second": {
                            "date": "$_id",
                            "timezone": tzinfo_.tzidentifier
                        }
                    }]
                }
            }
        }, {
            "$match": {
                CountBeforeTimeResult.KEY_SEC_OF_DAY: {
                    "$lt": trange.end_time_seconds
                }
            }
        }, {
            "$group": {
                "_id": {
                    CountBeforeTimeResult.KEY_DATE: {
                        "$dateToString": {
                            "date": "$_id",
                            "format": CountBeforeTimeResult.FMT_DATE,
                            "timezone": tzinfo_.tzidentifier
                        }
                    }
                },
                CountBeforeTimeResult.KEY_COUNT: {
                    "$sum": 1
                }
            }
        }, {
            "$sort": {
                "_id": pymongo.ASCENDING
            }
        }]

        return CountBeforeTimeResult(list(self.aggregate(pipeline)),
                                     HourlyResult.data_days_collected(
                                         self,
                                         match_d,
                                         hr_range=hours_within,
                                         start=trange.start_org,
                                         end=end),
                                     tzinfo_,
                                     trange=trange)
示例#20
0
    def get_user_messages_total_count(
            self, channel_oids: Union[ObjectId, List[ObjectId]], *, hours_within: Optional[int] = None,
            start: Optional[datetime] = None, end: Optional[datetime] = None, period_count: int = 3,
            tzinfo_: Optional[tzinfo] = None) \
            -> MemberMessageCountResult:
        match_d = self._channel_oids_filter_(channel_oids)
        trange = TimeRange(range_hr=hours_within,
                           start=start,
                           end=end,
                           range_mult=period_count,
                           tzinfo_=tzinfo_)

        self._attach_time_range_(match_d, trange=trange)

        # $switch expression for time range
        switch_branches = []

        # Check for full range (inf)
        # `start` and `end` cannot be `None` for generating `ObjectId`,
        # however `start` and `end` for full range are `None`.
        if not trange.is_inf:
            for idx, range_ in enumerate(trange.get_periods()):
                start_id = dt_to_objectid(range_.start)
                if not start_id:
                    continue
                end_id = dt_to_objectid(range_.end)
                if not end_id:
                    continue

                switch_branches.append({
                    "case": {
                        "$and": [{
                            "$gte": ["$" + OID_KEY, start_id]
                        }, {
                            "$lt": ["$" + OID_KEY, end_id]
                        }]
                    },
                    "then": str(idx)
                })

        group_key = {
            MemberMessageCountResult.KEY_MEMBER_ID:
            "$" + MessageRecordModel.UserRootOid.key
        }
        if switch_branches:
            group_key[MemberMessageCountResult.KEY_INTERVAL_IDX] = {
                "$switch": {
                    "branches": switch_branches
                }
            }

        aggr_pipeline = [{
            "$match": match_d
        }, {
            "$group": {
                OID_KEY: group_key,
                MemberMessageCountResult.KEY_COUNT: {
                    "$sum": 1
                }
            }
        }]

        return MemberMessageCountResult(list(self.aggregate(aggr_pipeline)),
                                        period_count, trange)