def test_seed(self):
     sel1 = subsample_longitudinal(self.md2,
                                   samples_per_interval=1,
                                   start_date='2019-12-26',
                                   seed=1)
     for _ in range(self._N_TEST_ITERATIONS):
         sel2 = subsample_longitudinal(self.md2,
                                       samples_per_interval=1,
                                       start_date='2019-12-26',
                                       seed=1)
         self.assertEqual(list(sel1.inclusion.items()),
                          list(sel2.inclusion.items()))
    def test_interval_bounds1(self):
        for _ in range(self._N_TEST_ITERATIONS):
            sel = subsample_longitudinal(self.md2,
                                         samples_per_interval=1,
                                         start_date='2019-12-26')

            exp_int1_dates = [
                '2020-01-02', '2020-01-03', '2020-01-04', '2020-01-05',
                '2020-01-06', '2020-01-07', '2020-01-08'
            ]
            exp_int2_dates = [
                '2020-01-09', '2020-01-10', '2020-01-11', '2020-01-12',
                '2020-01-13', '2020-01-14', '2020-01-15'
            ]
            exp_int3_dates = ['2020-01-16', '2020-01-17']
            exp_int4_dates = ['2020-02-21']
            exp_int5_dates = ['2020-03-15']

            self.assertEqual(sel.inclusion.sum(), 5)
            self.assertEqual(sel.metadata.get_column('date-md'), self.md2)
            self.assertEqual(sel.label, 'subsample_longitudinal')

            sampled_dates = set(self.md2.to_series()[sel.inclusion].values)
            self.assertEqual(len(sampled_dates & set(exp_int1_dates)), 1)
            self.assertEqual(len(sampled_dates & set(exp_int2_dates)), 1)
            self.assertEqual(len(sampled_dates & set(exp_int3_dates)), 1)
            self.assertEqual(len(sampled_dates & set(exp_int4_dates)), 1)
            self.assertEqual(len(sampled_dates & set(exp_int5_dates)), 1)
    def test_start_date_not_in_data(self):
        sel = subsample_longitudinal(self.md1, start_date='2019-12-30')

        self.assertEqual(sel.inclusion.sum(), 8)
        self.assertEqual(sel.metadata.get_column('date-md'), self.md1)
        self.assertEqual(sel.label, 'subsample_longitudinal')
        self.assertFalse(np.nan in list(sel.inclusion.index))
    def test_two_sample_per_interval(self):
        sel = subsample_longitudinal(self.md1, samples_per_interval=2)

        self.assertEqual(sel.inclusion.sum(), 8)
        self.assertEqual(sel.metadata.get_column('date-md'), self.md1)
        self.assertEqual(sel.label, 'subsample_longitudinal')
    def test_default(self):
        sel = subsample_longitudinal(self.md1)

        self.assertEqual(sel.inclusion.sum(), 9)
        self.assertEqual(sel.metadata.get_column('date-md'), self.md1)
        self.assertEqual(sel.label, 'subsample_longitudinal')