Пример #1
0
    def test_dca_ingestion_pause_resume(self):
        data_product_id, dataset_id = self.make_ctd_data_product()

        streamer = Streamer(data_product_id, interval=1)
        self.addCleanup(streamer.stop)

        # Let a couple samples accumulate
        self.use_monitor(dataset_id, samples=2)

        # Go into DCA and get an editable handle to the coverage
        with DirectCoverageAccess() as dca:
            with dca.get_editable_coverage(dataset_id) as cov: # <-- This pauses ingestion
                monitor = DatasetMonitor(dataset_id)
                monitor.event.wait(7) # <-- ~7 Samples should accumulate on the ingestion queue
                self.assertFalse(monitor.event.is_set()) # Verifies that nothing was processed (i.e. ingestion is actually paused)
                monitor.stop()

        # Stop the streamer
        streamer.stop()

        cont = True
        while cont:
            monitor = DatasetMonitor(dataset_id)
            if not monitor.event.wait(10):
                cont = False
            monitor.stop()

        with DirectCoverageAccess() as dca:
            with dca.get_read_only_coverage(dataset_id) as cov:
                self.assertGreaterEqual(cov.num_timesteps, 8)
Пример #2
0
    def test_run_coverage_doctor(self):
        data_product_id, dataset_id = self.make_ctd_data_product()

        # Run coverage doctor on an empty coverage
        with DirectCoverageAccess() as dca:
            # it's not corrupt yet , so it shouldn't need repair
            self.assertEqual(
                dca.run_coverage_doctor(dataset_id,
                                        data_product_id=data_product_id),
                'Repair Not Necessary')

            # Get the path to the master file so we can mess it up!
            with dca.get_editable_coverage(dataset_id) as cov:
                mpth = cov._persistence_layer.master_manager.file_path

            # Mess up the master file
            with open(mpth, 'wb') as f:
                f.write('mess you up!')

            # Repair the coverage
            self.assertEqual(
                dca.run_coverage_doctor(dataset_id,
                                        data_product_id=data_product_id),
                'Repair Successful')

        # Stream some data to the coverage
        streamer = Streamer(data_product_id, interval=0.5)
        self.addCleanup(streamer.stop)

        # Let at least 10 samples accumulate
        self.use_monitor(dataset_id, samples=10)

        # Run coverage doctor on a coverage with data
        with DirectCoverageAccess() as dca:
            # it's not corrupt yet , so it shouldn't need repair
            self.assertEqual(
                dca.run_coverage_doctor(dataset_id,
                                        data_product_id=data_product_id),
                'Repair Not Necessary')
            with dca.get_read_only_coverage(dataset_id) as cov:
                self.assertIsInstance(cov, AbstractCoverage)

            # Mess up the master file
            with open(mpth, 'wb') as f:
                f.write('mess you up!')

            # Repair the coverage
            self.assertEqual(
                dca.run_coverage_doctor(dataset_id,
                                        data_product_id=data_product_id),
                'Repair Successful')

        # Let at least 1 sample arrive
        self.use_monitor(dataset_id, samples=1)

        with DirectCoverageAccess() as dca:
            with dca.get_read_only_coverage(dataset_id) as cov:
                self.assertIsInstance(cov, AbstractCoverage)
Пример #3
0
    def test_dca_not_managed_warnings(self):
        data_product_id, dataset_id = self.make_ctd_data_product()

        dca = DirectCoverageAccess()

        with mock.patch('ion.util.direct_coverage_utils.warn_user') as warn_user_mock:
            dca.pause_ingestion(dataset_id)
            self.assertEqual(warn_user_mock.call_args_list[0],
                             mock.call('Warning: Pausing ingestion when not using a context manager is potentially unsafe - '
                                       'be sure to resume ingestion for all streams by calling self.clean_up(streams=True)'))

        with mock.patch('ion.util.direct_coverage_utils.warn_user') as warn_user_mock:
            cov = dca.get_read_only_coverage(dataset_id)
            self.assertEqual(warn_user_mock.call_args_list[0],
                             mock.call('Warning: Coverages will remain open until they are closed or go out of scope - '
                                       'be sure to close coverage instances when you are finished working with them or call self.clean_up(ro_covs=True)'))

        with mock.patch('ion.util.direct_coverage_utils.warn_user') as warn_user_mock:
            cov = dca.get_editable_coverage(dataset_id)
            self.assertEqual(warn_user_mock.call_args_list[0],
                             mock.call('Warning: Pausing ingestion when not using a context manager is potentially unsafe - '
                                       'be sure to resume ingestion for all streams by calling self.clean_up(streams=True)'))
            self.assertEqual(warn_user_mock.call_args_list[1],
                             mock.call('Warning: Coverages will remain open until they are closed or go out of scope - '
                                       'be sure to close coverage instances when you are finished working with them or call self.clean_up(w_covs=True)'))

        dca.clean_up()
Пример #4
0
    def test_dca_coverage_reuse(self):
        data_product_id, dataset_id = self.make_ctd_data_product()

        streamer = Streamer(data_product_id, interval=1)
        self.addCleanup(streamer.stop)

        # Let a couple samples accumulate
        self.use_monitor(dataset_id, samples=2)

        with DirectCoverageAccess() as dca:
            import os
            cpth = dca.get_coverage_path(dataset_id)
            self.assertTrue(os.path.exists(cpth), msg='Path does not exist: %s' % cpth)

            with dca.get_read_only_coverage(dataset_id) as cov:
                self.assertFalse(cov.closed)

            self.assertTrue(cov.closed)

            with dca.get_editable_coverage(dataset_id) as cov:
                self.assertFalse(cov.closed)

            self.assertTrue(cov.closed)

            with dca.get_read_only_coverage(dataset_id) as cov:
                self.assertFalse(cov.closed)

            self.assertTrue(cov.closed)
Пример #5
0
    def test_repair_temporal_geometry(self):
        data_product_id, dataset_id = self.make_ctd_data_product()

        streamer = Streamer(data_product_id, interval=0.5, simple_time=True)
        self.addCleanup(streamer.stop)

        # Let at least 10 samples accumulate
        self.use_monitor(dataset_id, samples=10)

        # Stop the streamer, reset i, restart the streamer - this simulates duplicate data
        streamer.stop()
        streamer.i = 0
        streamer.start()

        # Let at least 20 more samples accumulate
        self.use_monitor(dataset_id, samples=20)

        #Stop the streamer
        streamer.stop()

        # Open the coverage and mess with the times
        with DirectCoverageAccess() as dca:
            with dca.get_read_only_coverage(dataset_id) as cov:
                self.assertEqual(cov.num_timesteps, 30)
                t = cov.get_time_values()
                self.assertEqual(len(t), 30)
                self.assertFalse(np.array_equal(np.sort(t), t))

            dca.repair_temporal_geometry(dataset_id)

            with dca.get_read_only_coverage(dataset_id) as cov:
                self.assertGreaterEqual(cov.num_timesteps, 19)
                t = cov.get_time_values()
                self.assertGreaterEqual(len(t), 19)
                np.testing.assert_array_equal(np.sort(t), t)
Пример #6
0
    def test_manual_data_upload(self):
        data_product_id, dataset_id = self.make_manual_upload_data_product()

        streamer = Streamer(data_product_id, interval=0.5, simple_time=True)
        self.addCleanup(streamer.stop)

        # Let at least 10 samples accumulate
        self.use_monitor(dataset_id, samples=10)

        # Verify that the HITL parameters are fill value
        with DirectCoverageAccess() as dca:
            with dca.get_read_only_coverage(dataset_id) as cov:
                fillarr = np.array([False] * 10)
                for p in [
                        p for p in cov.list_parameters()
                        if p.endswith('_hitl_qc')
                ]:
                    np.testing.assert_equal(
                        cov.get_parameter_values(p, slice(None, 10)), fillarr)

        # Upload the data - this pauses ingestion, performs the upload, and resumes ingestion
        with DirectCoverageAccess() as dca:
            dca.manual_upload(dataset_id, 'test_data/testmanualupload.csv',
                              'test_data/testmanualupload.yml')

        streamer.stop()

        # Wait a moment for ingestion to catch up
        self.use_monitor(dataset_id, samples=2)

        # Verify that the HITL parameters now have the correct values
        want_vals = {
            'temp_hitl_qc': np.array([0, 0, 0, 0, 1, 0, 0, 1, 0, 0],
                                     dtype=bool),
            'cond_hitl_qc': np.array([1, 0, 1, 0, 0, 0, 1, 1, 0, 0],
                                     dtype=bool)
        }
        with DirectCoverageAccess() as dca:
            with dca.get_read_only_coverage(dataset_id) as cov:
                for p in [
                        p for p in cov.list_parameters()
                        if p.endswith('_hitl_qc')
                ]:
                    np.testing.assert_equal(
                        cov.get_parameter_values(p, slice(None, 10)),
                        want_vals[p])
    def test_dca_not_managed_warnings(self):
        data_product_id, dataset_id = self.make_ctd_data_product()

        dca = DirectCoverageAccess()

        with mock.patch('ion.util.direct_coverage_utils.warn_user') as warn_user_mock:
            dca.pause_ingestion(dataset_id)
            self.assertEqual(warn_user_mock.call_args_list[0],
                             mock.call('Warning: Pausing ingestion when not using a context manager is potentially unsafe - '
                                       'be sure to resume ingestion for all streams by calling self.clean_up(streams=True)'))

        with mock.patch('ion.util.direct_coverage_utils.warn_user') as warn_user_mock:
            cov = dca.get_read_only_coverage(dataset_id)
            self.assertEqual(warn_user_mock.call_args_list[0],
                             mock.call('Warning: Coverages will remain open until they are closed or go out of scope - '
                                       'be sure to close coverage instances when you are finished working with them or call self.clean_up(ro_covs=True)'))

        with mock.patch('ion.util.direct_coverage_utils.warn_user') as warn_user_mock:
            cov = dca.get_editable_coverage(dataset_id)
            self.assertEqual(warn_user_mock.call_args_list[0],
                             mock.call('Warning: Pausing ingestion when not using a context manager is potentially unsafe - '
                                       'be sure to resume ingestion for all streams by calling self.clean_up(streams=True)'))
            self.assertEqual(warn_user_mock.call_args_list[1],
                             mock.call('Warning: Coverages will remain open until they are closed or go out of scope - '
                                       'be sure to close coverage instances when you are finished working with them or call self.clean_up(w_covs=True)'))

        dca.clean_up()
Пример #8
0
    def test_upload_calibration_coefficients(self):
        data_product_id, dataset_id = self.make_cal_data_product()

        streamer = Streamer(data_product_id, interval=0.5)
        self.addCleanup(streamer.stop)

        # Let at least 10 samples accumulate
        self.use_monitor(dataset_id, samples=10)

        # Verify that the CC parameters are fill value
        with DirectCoverageAccess() as dca:
            with dca.get_read_only_coverage(dataset_id) as cov:
                for p in [
                        p for p in cov.list_parameters() if p.startswith('cc_')
                ]:
                    np.testing.assert_equal(cov.get_parameter_values(p, -1),
                                            -9999.)

        # Upload the calibration coefficients - this pauses ingestion, performs the upload, and resumes ingestion
        with DirectCoverageAccess() as dca:
            dca.upload_calibration_coefficients(dataset_id,
                                                'test_data/testcalcoeff.csv',
                                                'test_data/testcalcoeff.yml')

        # Let a little more data accumulate
        self.use_monitor(dataset_id, samples=2)

        # Verify that the CC parameters now have the correct values
        want_vals = {
            'cc_ta0': np.float32(1.155787e-03),
            'cc_ta1': np.float32(2.725208e-04),
            'cc_ta2': np.float32(-7.526811e-07),
            'cc_ta3': np.float32(1.716270e-07),
            'cc_toffset': np.float32(0.000000e+00)
        }
        with DirectCoverageAccess() as dca:
            with dca.get_read_only_coverage(dataset_id) as cov:
                for p in [
                        p for p in cov.list_parameters() if p.startswith('cc_')
                ]:
                    np.testing.assert_equal(cov.get_parameter_values(p, -1),
                                            want_vals[p])
Пример #9
0
 def getUploadedCoverage(self, dp_id):
     keys = []
     with DirectCoverageAccess() as dca:
         # get the Dataset IDs associated with this DataProduct
         ds_id_list, _ = self.resource_registry.find_objects(
             dp_id, PRED.hasDataset, id_only=True)
         for ds_id in ds_id_list:  # could be multiple Datasets for this DataProduct
             with dca.get_editable_coverage(
                     ds_id) as cov:  # <-- This pauses ingestion
                 keys.extend([
                     k for k in cov.get_value_dictionary().keys()
                     if k.lower().endswith(('_l1c', '_l2c'))
                 ])
     return keys
Пример #10
0
    def on_start(self):

        ImmediateProcess.on_start(self)

        # necessary arguments, passed in via configuration kwarg to schedule_process. process namespace to avoid collisions
        fuc_id = self.CFG.get_safe('process.fuc_id',None) # FileUploadContext ID
        dp_id = self.CFG.get_safe('process.dp_id',None) # DataProduct ID

        # clients we'll need
        resource_registry = self.container.resource_registry
        object_store = self.container.object_store
        dataset_management =  DatasetManagementServiceClient()
        data_product_management = DataProductManagementServiceClient()

        # get the Object (dict) containing details of the uploaded file
        fuc = object_store.read(fuc_id)

        # get the ParameterContexts associated with this DataProduct
        sd_id = resource_registry.find_objects(dp_id, PRED.hasStreamDefinition, id_only=True)[0][0] # TODO loop
        pd_id = resource_registry.find_objects(sd_id, PRED.hasParameterDictionary, id_only=True)[0][0] # TODO loop
        pc_list, _ = resource_registry.find_objects(pd_id, PRED.hasParameterContext, id_only=False) # parameter contexts

        # NetCDF file open here
        nc_filename = fuc.get('path', None)
        if nc_filename is None:
            raise BadRequest("uploaded file has no path")

        # keep track of the number of fields we actually process
        nfields = 0

        with netCDF4.Dataset(nc_filename,'r') as nc:

            nc_time = nc.variables['time'][:] # don't modify nc_time below, read once use many times

            for v in nc.variables:
                variable = nc.variables[v]
                nc_name = str(v) # name of variable should be the same as what was downloaded, we'll append the c here
                # check for REQUIRED attributes
                author = getattr(variable, 'author', None)
                reason = getattr(variable, 'reason', None)
                if not all([author,reason]):
                    log.info('skipping parameter %s - no author or reason' % nc_name)
                    continue
                # get all ParameterContexts (from pc_list) with this 'name' (should be one at the moment)
                pc_matches_nc_name_list = [c for c in pc_list if c.name == nc_name]
                # is variable already present?
                if len(pc_matches_nc_name_list) < 1:
                    log.info('skipping parameter %s - not found in ParameterContexts associated with DataProduct' % nc_name)
                    continue

                # we are using this ParameterContext as a copy
                pc = pc_matches_nc_name_list[0] # TODO should only have 1 context per 'name' but could be checked for completeness
                # only allow L1/L2 paramters (check against ooi_short_name which should end with this)
                m = re.compile('(_L[12])$').search(pc.ooi_short_name.upper()) # capture L1/L2 for use in new name
                if not m: # if not _L1 or _L2 move on
                    log.info('skipping parameter %s - not L1 or L2' % nc_name)
                    continue
                processing_level = m.group(1)
                # remove attributes we should not copy [_id,_rev,ts_created,ts_updated]
                delattr(pc, '_id')
                delattr(pc, '_rev')
                delattr(pc, 'ts_created')
                delattr(pc, 'ts_updated')
                # append L[12]c to name attribute (new parameter name)
                c_name = ''.join([pc['name'],processing_level,'c'])
                pc['name'] = c_name
                # copy attributes from NetCDF file
                pc['units'] = variable.units
                pc['value_encoding'] = str(variable.dtype)
                #TODO ERDAP files don't have fill_value, but should probably get from there, leaving copy for now
                # create ParameterContext
                pc_id = dataset_management.create_parameter(pc)
                data_product_management.add_parameter_to_data_product(pc_id,dp_id)

                # get NetCDF data for this variable
                nc_data = variable[:]

                with DirectCoverageAccess() as dca:
                    # get the Dataset IDs associated with this DataProduct
                    ds_id_list, _ = resource_registry.find_objects(dp_id, PRED.hasDataset, id_only=True)
                    for ds_id in ds_id_list: # could be multiple Datasets for this DataProduct
                        with dca.get_editable_coverage(ds_id) as cov: # <-- This pauses ingestion
                            # times in this Dataset
                            cov_time = cov.get_parameter_values(['time']).get_data()['time']
                            # subset nc_time (only where nc_time matches cov_time)
                            nc_indicies = [i for i,x in enumerate(nc_time) if x in cov_time]
                            subset_nc_time = nc_time[nc_indicies] + 2208988800 # TODO REMOVE THIS? ERDAP 1970 vs NTP 1900
                            # don't forget to subset the data too
                            subset_nc_data = [nc_data[i] for i in nc_indicies]
                            # use indicies of where subset_nc_time exists in cov_time to update coverage
                            cov_indicies = np.flatnonzero(np.in1d(cov_time, subset_nc_time)) # returns numpy.ndarray of indicies
                            cov_indicies = list(cov_indicies) # converts to list for coverage
                            #cov._range_value[c_name][cov_indicies] = subset_nc_data # TODO this should eventually work
                            for i,x in enumerate(cov_indicies):
                                cov._range_value[c_name][x] = subset_nc_data[i]

                nfields = nfields + 1

        fuc['status'] = 'UploadDataProcessing process complete - %d fields created/updated' % nfields
        self.container.object_store.update_doc(fuc)

        # remove uploaded file
        try:
            os.remove(nc_filename)
        except OSError:
            pass # TODO take action to get this removed
Пример #11
0
    def test_fill_temporal_gap(self):
        from ion.services.dm.inventory.dataset_management_service import DatasetManagementService

        data_product_id, dataset_id = self.make_ctd_data_product()
        pdict = DatasetManagementService.get_parameter_dictionary_by_name(
            'ctd_parsed_param_dict')

        streamer = Streamer(data_product_id, interval=0.5)
        self.addCleanup(streamer.stop)

        self.use_monitor(dataset_id, samples=10)

        streamer.stop()

        gap_times = []
        waiter = Event()
        while not waiter.wait(1):
            gap_times.append(time.time() + 2208988800)
            if len(gap_times) == 10:
                waiter.set()

        # Simulate a gap by appending a new SimplexCoverage with times after the above gap
        with DirectCoverageAccess() as dca:
            dca.pause_ingestion(dataset_id)

            with dca.get_read_only_coverage(dataset_id) as cov:
                beforecovtimes = cov.get_time_values()

            with DatasetManagementService._create_simplex_coverage(
                    dataset_id, pdict, None, None) as scov:
                scov.insert_timesteps(3)
                now = time.time() + 2208988800
                ts = [now, now + 1, now + 2]
                scov.set_time_values(ts)
                aftercovtimes = scov.get_time_values()

            DatasetManagementService._splice_coverage(dataset_id, scov)

        # Start streaming data again
        streamer.start()

        # Create the gap-fill coverage
        with DatasetManagementService._create_simplex_coverage(
                dataset_id, pdict, None, None) as scov:
            scov.insert_timesteps(len(gap_times))
            scov.set_time_values(gap_times)
            gap_cov_path = scov.persistence_dir
            gapcovtimes = scov.get_time_values()

        # Fill the gap and capture times to do some assertions
        with DirectCoverageAccess() as dca:
            with dca.get_read_only_coverage(dataset_id) as cov:
                otimes = cov.get_time_values()

            dca.fill_temporal_gap(dataset_id, gap_coverage_path=gap_cov_path)

            with dca.get_read_only_coverage(dataset_id) as cov:
                agtimes = cov.get_time_values()

        self.use_monitor(dataset_id, samples=5)

        with DirectCoverageAccess() as dca:
            with dca.get_read_only_coverage(dataset_id) as cov:
                ntimes = cov.get_time_values()

        self.assertLess(len(otimes), len(agtimes))
        self.assertLess(len(agtimes), len(ntimes))

        bctl = len(beforecovtimes)
        gctl = len(gapcovtimes)
        actl = len(aftercovtimes)
        np.testing.assert_array_equal(beforecovtimes, ntimes[:bctl])
        np.testing.assert_array_equal(gapcovtimes,
                                      ntimes[bctl + 1:bctl + gctl + 1])
        np.testing.assert_array_equal(
            aftercovtimes, ntimes[bctl + gctl + 1:bctl + gctl + actl + 1])
        np.testing.assert_array_equal(agtimes, ntimes[:len(agtimes)])