Exemplo n.º 1
0
    def test_get_entry_failure(self):
        """Fail with a helpful message when a nested field doesn't exist."""

        obj = self.test_utils.load_sample_metadata()

        with self.assertRaises(CovidHospException):
            Utils.get_entry(obj, -1)
Exemplo n.º 2
0
    def test_launch_if_main_when_not_main(self):
        """Don't launch the main entry point."""

        mock_entry = MagicMock()

        Utils.launch_if_main(mock_entry, '__test__')

        mock_entry.assert_not_called()
Exemplo n.º 3
0
    def test_launch_if_main_when_main(self):
        """Launch the main entry point."""

        mock_entry = MagicMock()

        Utils.launch_if_main(mock_entry, '__main__')

        mock_entry.assert_called_once()
Exemplo n.º 4
0
  def test_acquire_specific_issue(self):
    """Acquire a new dataset."""

    # make sure the data does not yet exist
    with self.subTest(name='no data yet'):
      response = Epidata.covid_hosp('MA', Epidata.range(20200101, 20210101))
      self.assertEqual(response['result'], -2)

    # acquire sample data into local database
    # mock out network calls to external hosts
    with Database.connect() as db:
      pre_max_issue = db.get_max_issue()
    self.assertEqual(pre_max_issue, pd.Timestamp('1900-01-01 00:00:00'))
    with self.subTest(name='first acquisition'), \
         patch.object(Network, 'fetch_metadata', return_value=self.test_utils.load_sample_metadata()) as mock_fetch_meta, \
         patch.object(Network, 'fetch_dataset', side_effect=[self.test_utils.load_sample_dataset("dataset0.csv")]
                      ) as mock_fetch:
      acquired = Utils.update_dataset(Database,
                                      Network,
                                      date(2021, 3, 12),
                                      date(2021, 3, 14))
      with Database.connect() as db:
        post_max_issue = db.get_max_issue()
      self.assertEqual(post_max_issue, pd.Timestamp('2021-03-13 00:00:00'))
      self.assertTrue(acquired)
Exemplo n.º 5
0
    def test_get_issue_from_revision(self):
        """Extract an issue date from a free-form revision string."""

        revisions = ('Tue, 11/03/2020 - 19:38', 'Mon, 11/16/2020 - 00:55',
                     'foo')
        issues = (20201103, 20201116, None)

        for revision, issue in zip(revisions, issues):
            with self.subTest(revision=revision):

                if issue:
                    result = Utils.get_issue_from_revision(revision)
                    self.assertEqual(result, issue)
                else:
                    with self.assertRaises(CovidHospException):
                        Utils.get_issue_from_revision(revision)
Exemplo n.º 6
0
    def test_get_entry_success(self):
        """Get a deeply nested field from an arbitrary object."""

        obj = self.test_utils.load_sample_metadata()

        result = Utils.get_entry(obj, 'result', 0, 'tags', 2, 'id')

        self.assertEqual(result, '56f3cdad-8acb-46c8-bc71-aa1ded8407fb')
Exemplo n.º 7
0
    def test_extract_resource_details(self):
        """Extract URL and revision from metadata."""

        with self.subTest(name='invalid success'):
            metadata = self.test_utils.load_sample_metadata()
            metadata['success'] = False

            with self.assertRaises(CovidHospException):
                Utils.extract_resource_details(metadata)

        with self.subTest(name='invalid result'):
            metadata = self.test_utils.load_sample_metadata()
            metadata['result'] = []

            with self.assertRaises(CovidHospException):
                Utils.extract_resource_details(metadata)

        with self.subTest(name='invalid resource'):
            metadata = self.test_utils.load_sample_metadata()
            metadata['result'][0]['resources'] = []

            with self.assertRaises(CovidHospException):
                Utils.extract_resource_details(metadata)

        with self.subTest(name='valid'):
            metadata = self.test_utils.load_sample_metadata()

            url, revision = Utils.extract_resource_details(metadata)

            expected_url = ('https://healthdata.gov/sites/default/files/'
                            'estimated_inpatient_all_20201213_1757.csv')
            self.assertEqual(url, expected_url)
            self.assertEqual(revision, 'Sun, 12/13/2020 - 22:36')
Exemplo n.º 8
0
  def run(network=Network):
    """Acquire the most recent dataset, unless it was previously acquired.

    Returns
    -------
    bool
      Whether a new dataset was acquired.
    """

    return Utils.update_dataset(Database, network)
Exemplo n.º 9
0
    def test_merge(self):
        """Merging the set of updates in each batch is pretty tricky"""
        # Generate a set of synthetic updates with overlapping keys
        N = 10
        dfs = []
        for i in range(5):
            # knock out every 2nd key, then every 3rd, then every 4th, etc
            dfs.append(
                pd.DataFrame(
                    dict(state=range(1, N, i + 1),
                         reporting_cutoff_start=range(N + 1, 2 * N, i + 1),
                         **{
                             spec[0]: i + 1
                             for spec in Database.ORDERED_CSV_COLUMNS[2:]
                         })))
        # add a data frame with unseen keys
        dfs.append(
            pd.DataFrame(
                dict(state=[-1],
                     reporting_cutoff_start=[-1],
                     **{
                         spec[0]: -1
                         for spec in Database.ORDERED_CSV_COLUMNS[2:]
                     })))

        # now we need to know which data frame was used as the final value. the
        # above procedure is a prime number generator, so we can derive the result
        # mathematically:

        # for x in 1..N get the greatest number 5 or less that evenly divides x
        value_from = [[i for i in range(5, 0, -1) if x / i == x // i][0]
                      for x in range(N - 1)] + [-1]
        states = list(range(1, N)) + [-1]
        dates = list(range(N + 1, 2 * N)) + [-1]
        self.assertEqual(len(value_from), len(states))
        self.assertEqual(len(states), len(dates))

        expected = pd.DataFrame(
            dict(state=states,
                 reporting_cutoff_start=dates,
                 **{
                     spec[0]: value_from
                     for spec in Database.ORDERED_CSV_COLUMNS[2:]
                 })).astype({
                     spec[0]: 'float64'
                     for spec in Database.ORDERED_CSV_COLUMNS[2:]
                 })
        result = Utils.merge_by_key_cols(dfs, Database.KEY_COLS)
        try:
            pd.testing.assert_frame_equal(result, expected)
        except:
            assert False, f"""
Exemplo n.º 10
0
    def test_parse_bool(self):
        """Parse a boolean value from a string."""

        with self.subTest(name='None'):
            self.assertIsNone(Utils.parse_bool(None))

        with self.subTest(name='empty'):
            self.assertIsNone(Utils.parse_bool(''))

        with self.subTest(name='true'):
            self.assertTrue(Utils.parse_bool('true'))
            self.assertTrue(Utils.parse_bool('True'))
            self.assertTrue(Utils.parse_bool('tRuE'))

        with self.subTest(name='false'):
            self.assertFalse(Utils.parse_bool('false'))
            self.assertFalse(Utils.parse_bool('False'))
            self.assertFalse(Utils.parse_bool('fAlSe'))

        with self.subTest(name='exception'):
            with self.assertRaises(CovidHospException):
                Utils.parse_bool('maybe')
Exemplo n.º 11
0
    def test_run_skip_old_dataset(self):
        """Don't re-acquire an old dataset."""

        mock_network = MagicMock()
        mock_network.fetch_metadata.return_value = \
            self.test_utils.load_sample_metadata()
        mock_database = MagicMock()
        with mock_database.connect() as mock_connection:
            pass
        mock_connection.get_max_issue.return_value = pd.Timestamp("2200/1/1")

        result = Utils.update_dataset(database=mock_database,
                                      network=mock_network)

        self.assertFalse(result)
        mock_network.fetch_dataset.assert_not_called()
        mock_connection.insert_metadata.assert_not_called()
        mock_connection.insert_dataset.assert_not_called()
Exemplo n.º 12
0
    def test_issues_to_fetch(self):
        test_metadata = pd.DataFrame({
            "date": [
                pd.Timestamp("2021-03-13 00:00:00"),
                pd.Timestamp("2021-03-14 00:00:00"),
                pd.Timestamp("2021-03-15 00:00:01"),
                pd.Timestamp("2021-03-15 00:00:00"),
                pd.Timestamp("2021-03-16 00:00:00")
            ],
            "Archive Link": ["a", "b", "d", "c", "e"]
        }).set_index("date")

        issues = Utils.issues_to_fetch(test_metadata,
                                       pd.Timestamp("2021-3-13"),
                                       pd.Timestamp("2021-3-16"))
        self.assertEqual(
            issues, {
                date(2021, 3, 14): [("b", pd.Timestamp("2021-03-14 00:00:00"))
                                    ],
                date(2021, 3, 15): [("c", pd.Timestamp("2021-03-15 00:00:00")),
                                    ("d", pd.Timestamp("2021-03-15 00:00:01"))]
            })
Exemplo n.º 13
0
    def test_run_acquire_new_dataset(self):
        """Acquire a new dataset."""

        mock_network = MagicMock()
        mock_network.fetch_metadata.return_value = \
            self.test_utils.load_sample_metadata()
        fake_dataset = pd.DataFrame({
            "date": [pd.Timestamp("2020/1/1")],
            "state": ["ca"]
        })
        mock_network.fetch_dataset.return_value = fake_dataset
        mock_database = MagicMock()
        with mock_database.connect() as mock_connection:
            pass
        type(mock_connection).KEY_COLS = PropertyMock(
            return_value=["state", "date"])
        mock_connection.get_max_issue.return_value = pd.Timestamp("1900/1/1")
        with patch.object(Utils, 'issues_to_fetch') as mock_issues:
            mock_issues.return_value = {
                pd.Timestamp("2021/3/15"):
                [("url1", pd.Timestamp("2021-03-15 00:00:00")),
                 ("url2", pd.Timestamp("2021-03-15 00:00:00"))]
            }
            result = Utils.update_dataset(database=mock_database,
                                          network=mock_network)

        self.assertTrue(result)

        mock_connection.insert_metadata.assert_called_once()
        args = mock_connection.insert_metadata.call_args[0]
        self.assertEqual(args[:2], (20210315, "url2"))
        pd.testing.assert_frame_equal(
            mock_connection.insert_dataset.call_args[0][1],
            pd.DataFrame({
                "state": ["ca"],
                "date": [pd.Timestamp("2020/1/1")]
            }))
        self.assertEqual(mock_connection.insert_dataset.call_args[0][0],
                         20210315)
Exemplo n.º 14
0
    def test_run_acquire_new_dataset(self):
        """Acquire a new dataset."""

        mock_network = MagicMock()
        mock_network.fetch_metadata.return_value = \
            self.test_utils.load_sample_metadata()
        fake_dataset = [1, 2, 3]
        mock_network.fetch_dataset.return_value = fake_dataset
        mock_database = MagicMock()
        with mock_database.connect() as mock_connection:
            pass
        mock_connection.contains_revision.return_value = False

        result = Utils.update_dataset(database=mock_database,
                                      network=mock_network)

        self.assertTrue(result)

        mock_connection.insert_metadata.assert_called_once()
        args = mock_connection.insert_metadata.call_args[0]
        self.assertEqual(args[:2], (20201213, 'Sun, 12/13/2020 - 22:36'))

        mock_connection.insert_dataset.assert_called_once_with(
            20201213, fake_dataset)
Exemplo n.º 15
0
"""
Acquires the "COVID-19 Reported Patient Impact and Hospital Capacity by State"
dataset provided by the US Department of Health & Human Services
via healthdata.gov.
"""

# first party
from delphi.epidata.acquisition.covid_hosp.common.utils import Utils
from delphi.epidata.acquisition.covid_hosp.state_daily.database import Database
from delphi.epidata.acquisition.covid_hosp.state_daily.network import Network


class Update:

  def run(network=Network):
    """Acquire the most recent dataset, unless it was previously acquired.

    Returns
    -------
    bool
      Whether a new dataset was acquired.
    """

    return Utils.update_dataset(Database, network)


# main entry point
Utils.launch_if_main(Update.run, __name__)
Exemplo n.º 16
0
    def test_int_from_date(self):
        """Convert a YYY-MM-DD date to a YYYYMMDD int."""

        self.assertEqual(Utils.int_from_date('2020-11-17'), 20201117)
        self.assertEqual(Utils.int_from_date('2020/11/17'), 20201117)
        self.assertEqual(Utils.int_from_date('2020/11/17 10:00:00'), 20201117)