Пример #1
0
    def execute(input=None, context=None, config=None, params=None, state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        conductivity = rdt['conductivity']
        pressure = rdt['pressure']
        temperature = rdt['temp']

        longitude = rdt['lon'] if rdt['lon'] is not None else 0
        latitude = rdt['lat'] if rdt['lat'] is not None else 0

        sp = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure)

        log.debug("Density algorithm calculated the sp (practical salinity) values: %s", sp)

        sa = SA_from_SP(sp, pressure, longitude, latitude)

        log.debug("Density algorithm calculated the sa (actual salinity) values: %s", sa)

        dens_value = rho(sa, temperature, pressure)

        for key, value in rdt.iteritems():
            if key in out_rdt:
                if key=='conductivity' or key=='temp' or key=='pressure':
                    continue
                out_rdt[key] = value[:]

        out_rdt['density'] = dens_value

        log.debug("Density algorithm returning density values: %s", out_rdt['density'])

        return out_rdt.to_granule()
    def _get_new_ctd_packet(self, length):

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def._id)
        #Explicitly make these numpy arrays...
        c = numpy.array([random.uniform(0.0, 75.0) for i in xrange(length)])
        t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)])
        p = numpy.array([random.lognormvariate(1, 2) for i in xrange(length)])
        lat = numpy.array(
            [random.uniform(-90.0, 90.0) for i in xrange(length)])
        lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)])
        h = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)])

        start_time = ntplib.system_to_ntp_time(time.time()) - (length + 1)
        tvar = numpy.array([start_time + i for i in xrange(1, length + 1)])

        rdt['time'] = tvar
        rdt['lat'] = lat
        rdt['lon'] = lon
        rdt['temp'] = t
        rdt['conductivity'] = c
        rdt['pressure'] = p

        #        rdt['coordinates'] = rdt0
        #        rdt['data'] = rdt1

        g = rdt.to_granule(data_producer_id=self.id)

        return g
Пример #3
0
    def _get_data(cls, config):
        new_flst = get_safe(config, 'constraints.new_files', [])
        hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE)
        for f in new_flst:
            try:
                parser = SlocumParser(f[0], hdr_cnt)
                #CBM: Not in use yet...
    #            ext_dset_res = get_safe(config, 'external_dataset_res', None)
    #            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
    #            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
    #            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
    #            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
    #            var_lst = ext_dset_res.dataset_description.parameters['variables']

                max_rec = get_safe(config, 'max_records', 1)
                dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')
                #tx_yml = get_safe(config, 'taxonomy')
                #ttool = TaxyTool.load(tx_yml) #CBM: Assertion inside RDT.__setitem__ requires same instance of TaxyTool
                pdict = ParameterDictionary.load(get_safe(config, 'param_dictionary'))

                cnt = calculate_iteration_count(len(parser.sensor_map), max_rec)
                for x in xrange(cnt):
                    #rdt = RecordDictionaryTool(taxonomy=ttool)
                    rdt = RecordDictionaryTool(param_dictionary=pdict)

                    for name in parser.sensor_map:
                        d = parser.data_map[name][x*max_rec:(x+1)*max_rec]
                        rdt[name]=d

                    #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
                    g = rdt.to_granule()
                    yield g
            except SlocumParseException as spe:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file: \'{0}\''.format(f))
Пример #4
0
    def _get_new_ctd_packet(self, length):

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def._id)
        #Explicitly make these numpy arrays...
        c = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)]) 
        t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)]) 
        p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)])
        lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)]) 
        lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)]) 
        h = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)])

        start_time = ntplib.system_to_ntp_time(time.time()) - (length + 1)
        tvar = numpy.array([start_time + i for i in xrange(1,length+1)])


        rdt['time'] = tvar
        rdt['lat'] = lat
        rdt['lon'] = lon
        rdt['temp'] = t
        rdt['conductivity'] = c
        rdt['pressure'] = p

#        rdt['coordinates'] = rdt0
#        rdt['data'] = rdt1

        g = rdt.to_granule(data_producer_id=self.id)

        return g
Пример #5
0
    def execute(input=None,
                context=None,
                config=None,
                params=None,
                state=None):
        '''
        @param input Granule
        @retval result Granule
        '''

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        conductivity = rdt['conductivity']
        cond_value = (conductivity / 100000.0) - 0.5

        for key, value in rdt.iteritems():
            if key in out_rdt:
                out_rdt[key] = value[:]

        # Update the conductivity values
        out_rdt['conductivity'] = cond_value

        # build the granule for conductivity
        return out_rdt.to_granule()
Пример #6
0
    def execute(input=None,
                context=None,
                config=None,
                params=None,
                state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        conductivity = rdt['conductivity']
        pressure = rdt['pressure']
        temperature = rdt['temp']

        sal_value = SP_from_cndr(r=conductivity / cte.C3515,
                                 t=temperature,
                                 p=pressure)

        log.debug(
            "Salinity algorithm calculated the sp (practical salinity) values: %s",
            sal_value)

        for key, value in rdt.iteritems():
            if key in out_rdt:
                if key == 'conductivity' or key == 'temp' or key == 'pressure':
                    continue
                out_rdt[key] = value[:]

        out_rdt['salinity'] = sal_value

        return out_rdt.to_granule()
Пример #7
0
    def test_granule_publish(self):
        log.debug("test_granule_publish ")
        self.loggerpids = []


        #retrieve the param dict from the repository
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        stream_definition_id = self.pubsubclient.create_stream_definition('parsed stream', parameter_dictionary_id=pdict_id)


        tdom, sdom = time_series_domain()

        dp_obj = IonObject(RT.DataProduct,
            name=str(uuid.uuid4()),
            description='ctd stream test',
            temporal_domain = tdom.dump(),
            spatial_domain = sdom.dump())

        data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=stream_definition_id)


        # Retrieve the id of the output stream of the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True)
        log.debug( 'test_granule_publish: Data product streams1 = %s', stream_ids)

        pid = self.create_logger('ctd_parsed', stream_ids[0] )
        self.loggerpids.append(pid)

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)

        #create the publisher from the stream route
        stream_route = self.pubsubclient.read_stream_route(stream_ids[0])
        publisher = StandaloneStreamPublisher(stream_ids[0], stream_route)

        # this is one sample from the ctd driver
        tomato = {"driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 22.9304, "value_id": "temp"}, {"value": 51.57381, "value_id": "conductivity"}, {"value": 915.551, "value_id": "pressure"}]}

        for value in tomato['values']:
            log.debug("test_granule_publish: Looping tomato values  key: %s    val: %s ", str(value['value']), str(value['value_id']))

            if value['value_id'] in rdt:
                rdt[value['value_id']] = numpy.array( [ value['value'] ] )
                log.debug("test_granule_publish: Added data item  %s  val: %s ", str(value['value']), str(value['value_id']) )

        g = rdt.to_granule()

        publisher.publish(g)

        gevent.sleep(3)

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)
  
        #--------------------------------------------------------------------------------
        # Cleanup data products
        #--------------------------------------------------------------------------------
        dp_ids, _ = self.rrclient.find_resources(restype=RT.DataProduct, id_only=True)

        for dp_id in dp_ids:
            self.dataproductclient.delete_data_product(dp_id)
Пример #8
0
    def execute(input=None, context=None, config=None, params=None, state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params['stream_def_id'])

        # Fill the time values
        out_rdt['time'] = rdt['time']

        # The calibration coefficients
        temp_calibration_coeffs= params['calibration_coeffs']['temp_calibration_coeffs']
        pres_calibration_coeffs= params['calibration_coeffs']['pres_calibration_coeffs']
        cond_calibration_coeffs = params['calibration_coeffs']['cond_calibration_coeffs']

        log.debug("params['calibration_coeffs']: %s", params['calibration_coeffs'])

        # Set the temperature values for the output granule
        out_rdt = CTDBP_L1_TransformAlgorithm.calculate_temperature(    input_rdt = rdt,
                                                                        out_rdt = out_rdt,
                                                                        temp_calibration_coeffs= temp_calibration_coeffs )

        # Set the pressure values for the output granule
        out_rdt = CTDBP_L1_TransformAlgorithm.calculate_pressure(   input_rdt= rdt,
                                                                    out_rdt = out_rdt,
                                                                    pres_calibration_coeffs= pres_calibration_coeffs)

        # Set the conductivity values for the output granule
        # Note that since the conductivity caculation depends on whether TEMPWAT_L1, PRESWAT_L1 have been calculated, we need to do this last
        out_rdt = CTDBP_L1_TransformAlgorithm.calculate_conductivity(   input_rdt = rdt,
                                                                        out_rdt = out_rdt,
                                                                        cond_calibration_coeffs = cond_calibration_coeffs
        )

        # build the granule for the L1 stream
        return out_rdt.to_granule()
Пример #9
0
    def recv_packet(self, packet, stream_route, stream_id):
        if packet == {}:
            return

        l0_values = RecordDictionaryTool.load_from_granule(packet)
        l1_values = RecordDictionaryTool(
            stream_definition_id=self.stream_definition_id)
        log.debug(
            "CTDBP L1 transform using L0 values: tempurature %s, pressure %s, conductivity %s",
            l0_values['temperature'], l0_values['pressure'],
            l0_values['conductivity'])

        #for key, value in 'lat', 'lon', 'time', ...:   <-- do we want to be a little more specific here?
        for key, value in l0_values.iteritems():
            if key in l1_values:
                l1_values[key] = value[:]

        l1_values['temp'] = self.calculate_temperature(l0=l0_values)
        l1_values['pressure'] = self.calculate_pressure(l0=l0_values)
        l1_values['conductivity'] = self.calculate_conductivity(l0=l0_values,
                                                                l1=l1_values)

        log.debug(
            'calculated L1 values: temp %s, pressure %s, conductivity %s',
            l1_values['temp'], l1_values['pressure'],
            l1_values['conductivity'])
        self.publisher.publish(msg=l1_values.to_granule())
Пример #10
0
    def execute(input=None, context=None, config=None, params=None, state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        conductivity = rdt["conductivity"]
        pressure = rdt["pressure"]
        temperature = rdt["temp"]

        longitude = rdt["lon"] if rdt["lon"] is not None else 0
        latitude = rdt["lat"] if rdt["lat"] is not None else 0

        sp = SP_from_cndr(r=conductivity / cte.C3515, t=temperature, p=pressure)

        log.debug("Density algorithm calculated the sp (practical salinity) values: %s", sp)

        sa = SA_from_SP(sp, pressure, longitude, latitude)

        log.debug("Density algorithm calculated the sa (actual salinity) values: %s", sa)

        dens_value = rho(sa, temperature, pressure)

        for key, value in rdt.iteritems():
            if key in out_rdt:
                if key == "conductivity" or key == "temp" or key == "pressure":
                    continue
                out_rdt[key] = value[:]

        out_rdt["density"] = dens_value

        log.debug("Density algorithm returning density values: %s", out_rdt["density"])

        return out_rdt.to_granule()
Пример #11
0
    def _get_data(cls, config):
        """
        Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
        @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
        """
        ext_dset_res = get_safe(config, 'external_dataset_res', None)

        # Get the Dataset object from the config (should have been instantiated in _init_acquisition_cycle)
        ds = get_safe(config, 'dataset_object')

        if ext_dset_res and ds:
            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
            var_lst = ext_dset_res.dataset_description.parameters['variables']

            t_slice = get_safe(config, 'constraints.temporal_slice', (slice(0, 1)))
            #TODO: Using 'eval' here is BAD - need to find a less sketchy way to pass constraints
            if isinstance(t_slice, str):
                t_slice = eval(t_slice)

            lon = ds.variables[x_vname][:]
            lat = ds.variables[y_vname][:]
            z = ds.variables[z_vname][:]

            t_arr = ds.variables[t_vname][t_slice]
            data_arrays = {}
            for varn in var_lst:
                data_arrays[varn] = ds.variables[varn][t_slice]

            max_rec = get_safe(config, 'max_records', 1)
            #dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')

            stream_def = get_safe(config, 'stream_def')

            cnt = calculate_iteration_count(t_arr.size, max_rec)
            for x in xrange(cnt):
                ta = t_arr[x * max_rec:(x + 1) * max_rec]

                # Make a 'master' RecDict
                rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                # Assign coordinate values to the RecDict
                rdt[x_vname] = lon
                rdt[y_vname] = lat
                rdt[z_vname] = z

                # Assign data values to the RecDict
                rdt[t_vname] = ta
                for key, arr in data_arrays.iteritems():
                    d = arr[x * max_rec:(x + 1) * max_rec]
                    rdt[key] = d

                g = rdt.to_granule()
                yield g

            ds.close()
Пример #12
0
    def _get_data(cls, config):
        """
        Iterable function that acquires data from a source iteratively based on constraints provided by config
        Passed into BaseDataHandler._publish_data and iterated to publish samples.
        @param config dict containing configuration parameters, may include constraints, formatters, etc
        @retval an iterable that returns well-formed Granule objects on each iteration
        """
        new_flst = get_safe(config, 'constraints.new_files', [])
        parser_mod = get_safe(config, 'parser_mod', '')
        parser_cls = get_safe(config, 'parser_cls', '')

        module = __import__(parser_mod, fromlist=[parser_cls])
        classobj = getattr(module, parser_cls)

        for f in new_flst:
            try:
                size = os.stat(f[0]).st_size
                try:
                    #find the new data check index in config
                    index = -1
                    for ndc in config['set_new_data_check']:
                        if ndc[0] == f[0]:
                            index = config['set_new_data_check'].index(ndc)
                            break
                except:
                    log.error('File name not found in attachment')

                parser = classobj(f[0], f[3])

                max_rec = get_safe(config, 'max_records', 1)
                stream_def = get_safe(config, 'stream_def')
                while True:
                    particles = parser.get_records(max_count=max_rec)
                    if not particles:
                        break

                    rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                    populate_rdt(rdt, particles)

                    g = rdt.to_granule()

                    # TODO: record files already read for future additions...
                    #                    #update new data check with the latest file position
                    if 'set_new_data_check' in config and index > -1:
                        # WRONG: should only record this after file finished parsing,
                        # but may not have another yield at that point to trigger update
                        config['set_new_data_check'][index] = (f[0], f[1],
                                                               f[2], size)

                    yield g

#                parser.close()

            except Exception as ex:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file \'{0}\': {1}'.format(f, ex))
    def test_granule_publish(self):
        log.debug("test_granule_publish ")
        self.loggerpids = []


        #retrieve the param dict from the repository
        pdict_id = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict',id_only=True)
        stream_definition_id = self.pubsubclient.create_stream_definition('parsed stream', parameter_dictionary_id=pdict_id)



        dp_obj = IonObject(RT.DataProduct,
            name=str(uuid.uuid4()),
            description='ctd stream test')

        data_product_id1 = self.dpclient.create_data_product(data_product=dp_obj, stream_definition_id=stream_definition_id)


        # Retrieve the id of the output stream of the out Data Product
        stream_ids, _ = self.rrclient.find_objects(data_product_id1, PRED.hasStream, None, True)
        log.debug( 'test_granule_publish: Data product streams1 = %s', stream_ids)

        pid = self.create_logger('ctd_parsed', stream_ids[0] )
        self.loggerpids.append(pid)

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)

        #create the publisher from the stream route
        stream_route = self.pubsubclient.read_stream_route(stream_ids[0])
        publisher = StandaloneStreamPublisher(stream_ids[0], stream_route)

        # this is one sample from the ctd driver
        tomato = {"driver_timestamp": 3555971105.1268806, "instrument_id": "ABC-123", "pkt_format_id": "JSON_Data", "pkt_version": 1, "preferred_timestamp": "driver_timestamp", "quality_flag": "ok", "stream_name": "parsed", "values": [{"value": 22.9304, "value_id": "temp"}, {"value": 51.57381, "value_id": "conductivity"}, {"value": 915.551, "value_id": "pressure"}]}

        for value in tomato['values']:
            log.debug("test_granule_publish: Looping tomato values  key: %s    val: %s ", str(value['value']), str(value['value_id']))

            if value['value_id'] in rdt:
                rdt[value['value_id']] = numpy.array( [ value['value'] ] )
                log.debug("test_granule_publish: Added data item  %s  val: %s ", str(value['value']), str(value['value_id']) )

        g = rdt.to_granule()

        publisher.publish(g)

        gevent.sleep(3)

        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)
  
        #--------------------------------------------------------------------------------
        # Cleanup data products
        #--------------------------------------------------------------------------------
        dp_ids, _ = self.rrclient.find_resources(restype=RT.DataProduct, id_only=True)

        for dp_id in dp_ids:
            self.dataproductclient.delete_data_product(dp_id)
    def test_stream_ingestion_worker(self):
        self.start_ingestion_worker()

        context_ids, time_ctxt = self._create_param_contexts()
        pdict_id = self.dataset_management_client.create_parameter_dictionary(
            name='stream_ingestion_pdict',
            parameter_context_ids=context_ids,
            temporal_context='ingestion_timestamp')
        self.addCleanup(
            self.dataset_management_client.delete_parameter_dictionary,
            pdict_id)

        dataset_id = self.dataset_management_client.create_dataset(
            name='fake_dataset',
            description='fake_dataset',
            stream_id=self.stream_id,
            spatial_domain=self.spatial_dom.dump(),
            temporal_domain=self.time_dom.dump(),
            parameter_dictionary_id=pdict_id)
        self.addCleanup(self.dataset_management_client.delete_dataset,
                        dataset_id)

        self.cov = self._create_coverage(dataset_id=dataset_id,
                                         parameter_dict_id=pdict_id,
                                         time_dom=self.time_dom,
                                         spatial_dom=self.spatial_dom)
        self.addCleanup(self.cov.close)

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['conductivity'] = 1
        rdt['pressure'] = 2
        rdt['salinity'] = 3

        self.start_listener(dataset_id)

        self.publisher.publish(rdt.to_granule())
        self.data_modified = Event()
        self.data_modified.wait(30)

        cov = self.get_coverage(dataset_id)
        self.assertIsNotNone(cov.get_parameter_values('raw'))

        deserializer = IonObjectDeserializer(obj_registry=get_obj_registry())

        granule = retrieve_stream(dataset_id)
        rdt_complex = RecordDictionaryTool.load_from_granule(granule)
        rdt_complex['raw'] = [
            deserializer.deserialize(i) for i in rdt_complex['raw']
        ]
        for gran in rdt_complex['raw']:
            rdt_new = RecordDictionaryTool.load_from_granule(gran)
            self.assertIn(1, rdt_new['conductivity'])
            self.assertIn(2, rdt_new['pressure'])
            self.assertIn(3, rdt_new['salinity'])

        cov.close()
    def _get_data(cls, config):
        """
        Iterable function that acquires data from a source iteratively based on constraints provided by config
        Passed into BaseDataHandler._publish_data and iterated to publish samples.
        @param config dict containing configuration parameters, may include constraints, formatters, etc
        @retval an iterable that returns well-formed Granule objects on each iteration
        """
        new_flst = get_safe(config, "constraints.new_files", [])
        parser_mod = get_safe(config, "parser_mod", "")
        parser_cls = get_safe(config, "parser_cls", "")

        module = __import__(parser_mod, fromlist=[parser_cls])
        classobj = getattr(module, parser_cls)

        for f in new_flst:
            try:
                size = os.stat(f[0]).st_size
                try:
                    # find the new data check index in config
                    index = -1
                    for ndc in config["set_new_data_check"]:
                        if ndc[0] == f[0]:
                            index = config["set_new_data_check"].index(ndc)
                            break
                except:
                    log.error("File name not found in attachment")

                parser = classobj(f[0], f[3])

                max_rec = get_safe(config, "max_records", 1)
                stream_def = get_safe(config, "stream_def")
                while True:
                    particles = parser.get_records(max_count=max_rec)
                    if not particles:
                        break

                    rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                    populate_rdt(rdt, particles)

                    g = rdt.to_granule()

                    # TODO: record files already read for future additions...
                    #                    #update new data check with the latest file position
                    if "set_new_data_check" in config and index > -1:
                        # WRONG: should only record this after file finished parsing,
                        # but may not have another yield at that point to trigger update
                        config["set_new_data_check"][index] = (f[0], f[1], f[2], size)

                    yield g

            #                parser.close()

            except Exception as ex:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error("Error parsing data file '{0}': {1}".format(f, ex))
    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)

        for field in rdt:
            rdt[field] = numpy.array([random.uniform(0.0, 75.0) for i in xrange(length)])

        g = rdt.to_granule()

        return g
Пример #17
0
    def recv_packet(self, msg, stream_route, stream_id):
        ''' receive packet for ingestion '''
        log.debug('received granule for stream %s', stream_id)

        if msg == {}:
            log.error('Received empty message from stream: %s', stream_id)
            return
        # Message validation
        if not isinstance(msg, Granule):
            log.error('Ingestion received a message that is not a granule: %s', msg)
            return


        rdt = RecordDictionaryTool.load_from_granule(msg)
        if rdt is None:
            log.error('Invalid granule (no RDT) for stream %s', stream_id)
            return
        if not len(rdt):
            log.debug('Empty granule for stream %s', stream_id)
            return

        dp_id_list = self.retrieve_dataprocess_for_stream(stream_id)

        for dp_id in dp_id_list:

            function, argument_list = self.retrieve_function_and_define_args(dp_id)

            args = []
            rdt = RecordDictionaryTool.load_from_granule(msg)

            #create the input arguments list
            #todo: this logic is tied to the example funcation, generalize
            for func_param, record_param in argument_list.iteritems():
                args.append(rdt[record_param])
            try:
                #run the calc
                #todo: nothing in the data process resource to specify multi-out map
                result = function(*args)

                out_stream_definition, output_parameter = self.retrieve_dp_output_params(dp_id)

                rdt = RecordDictionaryTool(stream_definition_id=out_stream_definition)
                publisher = self._publisher_map.get(dp_id,'')

                rdt[ output_parameter ] = result

                if publisher:
                    publisher.publish(rdt.to_granule())
                else:
                    log.error('Publisher not found for data process %s', dp_id)

                self.update_dp_metrics( dp_id )

            except ImportError:
                log.error('Error running transform')
    def test_event_transform_worker(self):
        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()


        # test that a data process (type: data-product-in / event-out) can be defined and launched.
        # verify that event fields are correctly populated


        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        # create the DPD and two DPs
        self.event_data_process_id = self.create_event_data_processes()

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(subject=self.event_data_process_id, predicate=PRED.hasSubscription, object_type=RT.Subscription, id_only=False)
        log.debug('test_event_transform_worker subscription_obj:  %s', subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route )

        self.start_event_transform_listener()

        self.data_modified = Event()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time']         = [0] # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure']     = [2]
        rdt['salinity']     = [8]

        self.publisher.publish(rdt.to_granule())

        self.assertTrue(self.event_verified.wait(self.wait_time))
Пример #19
0
    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)

        for field in rdt:
            rdt[field] = numpy.array(
                [random.uniform(0.0, 75.0) for i in xrange(length)])

        g = rdt.to_granule()

        return g
Пример #20
0
    def test_transform_worker(self):
        self.loggerpids = []
        self.data_process_objs = []
        self._output_stream_ids = []

        self.start_transform_worker()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition, self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(  RT.DataProduct, name='input_data_product', description='input test stream',
                                             temporal_domain = self.time_dom.dump(),  spatial_domain = self.spatial_dom.dump())
        self.input_dp_id = self.dataproductclient.create_data_product(data_product=input_dp_obj,  stream_definition_id=self.stream_def_id)

        #retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        #create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(name='parsed_subscription', stream_ids=[self.stream_id], exchange_name='parsed_subscription')
        self.addCleanup(self.pubsub_client.delete_subscription, self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription, self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id, stream_route=stream_route )

        self.start_event_listener()

        self.dp_list = self.create_data_processes()

        self.data_modified = Event()
        self.data_modified.wait(5)

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time']         = [0] # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure']     = [2]
        rdt['salinity']     = [8]

        self.publisher.publish(rdt.to_granule())


        self.data_modified.wait(5)

        # Cleanup processes
        for pid in self.loggerpids:
            self.processdispatchclient.cancel_process(pid)
Пример #21
0
    def _get_new_ctd_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i+length)

        for field in rdt:
            if isinstance(rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

        g = rdt.to_granule()
        self.i+=length

        return g
Пример #22
0
    def _get_new_ctd_L0_packet(self, stream_definition_id, length):

        rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        rdt['time'] = numpy.arange(self.i, self.i+length)

        for field in rdt:
            if isinstance(rdt._pdict.get_context(field).param_type, QuantityType):
                rdt[field] = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

        g = rdt.to_granule()
        self.i+=length

        return g
    def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None, length=None):

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        for i in xrange(number):
            rdt['input_voltage'] = values
            rdt['preferred_timestamp'] = numpy.array([random.uniform(0,1000)  for l in xrange(length)])
            g = rdt.to_granule()
            pub.publish(g)
Пример #24
0
    def _trigger_func(self, stream_id):

        self.last_time = 0

        parameter_dictionary = self._create_parameter()

        #@todo - add lots of comments in here
        while not self.finished.is_set():

            length = 10

            #Explicitly make these numpy arrays...
            c = numpy.array(
                [random.uniform(0.0, 75.0) for i in xrange(length)])

            t = numpy.array(
                [random.uniform(-1.7, 21.0) for i in xrange(length)])

            p = numpy.array(
                [random.lognormvariate(1, 2) for i in xrange(length)])

            lat = numpy.array(
                [random.uniform(-90.0, 90.0) for i in xrange(length)])

            lon = numpy.array(
                [random.uniform(0.0, 360.0) for i in xrange(length)])

            tvar = numpy.array(
                [self.last_time + i for i in xrange(1, length + 1)])

            self.last_time = max(tvar)

            rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary)

            rdt['temp'] = t  # ExampleDataProducer_algorithm.execute(t)
            rdt['conductivity'] = c  # ExampleDataProducer_algorithm.execute(c)
            rdt['pressure'] = p  # ExampleDataProducer_algorithm.execute(p)
            rdt['time'] = tvar
            rdt['lat'] = lat
            rdt['lon'] = lon

            log.info("logging published Record Dictionary:\n %s",
                     rdt.pretty_print())

            g = rdt.to_granule()

            log.info('Sending %d values!' % length)
            if (isinstance(g, Granule)):
                self.publish(g, stream_id)

            time.sleep(2.0)
Пример #25
0
    def _trigger_func(self, stream_id):
        log.debug("SimpleCtdDataProducer:_trigger_func ")

        parameter_dictionary = get_param_dict('ctd_parsed_param_dict')
        rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary)

        # The base SimpleCtdPublisher provides a gevent Event that indicates when the process is being
        # shut down. We can use a simple pattern here to accomplish both a safe shutdown of this loop
        # when the process shuts down *AND* do the timeout between loops in a very safe/efficient fashion.
        #
        # By using this instead of a sleep in the loop itself, we can immediatly interrupt this loop when
        # the process is being shut down instead of having to wait for the sleep to terminate.
        while not self.finished.wait(timeout=2):

            length = 10

            #Explicitly make these numpy arrays...
            c = numpy.array(
                [random.uniform(0.0, 75.0) for i in xrange(length)])

            t = numpy.array(
                [random.uniform(-1.7, 21.0) for i in xrange(length)])

            p = numpy.array(
                [random.lognormvariate(1, 2) for i in xrange(length)])

            lat = numpy.array(
                [random.uniform(-90.0, 90.0) for i in xrange(length)])

            lon = numpy.array(
                [random.uniform(0.0, 360.0) for i in xrange(length)])

            h = numpy.array(
                [random.uniform(0.0, 360.0) for i in xrange(length)])

            tvar = numpy.array(
                [self.last_time + i for i in xrange(1, length + 1)])

            self.last_time = max(tvar)

            rdt['time'] = tvar
            rdt['lat'] = lat
            rdt['lon'] = lon
            rdt['temp'] = t
            rdt['conductivity'] = c
            rdt['pressure'] = p

            g = rdt.to_granule()
            log.debug('SimpleCtdDataProducer: Sending %d values!' % length)
            self.publisher.publish(g)
Пример #26
0
    def execute(input=None, context=None, config=None, params=None, state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        pressure = rdt['pressure']
        pres_value = (pressure / 100.0) + 0.5

        for key, value in rdt.iteritems():
            if key in out_rdt:
                out_rdt[key] = value[:]

        out_rdt['pressure'] = pres_value

        return out_rdt.to_granule()
Пример #27
0
    def execute(input=None, context=None, config=None, params=None, state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        temperature = rdt['temp']
        temp_value = (temperature / 10000.0) - 10

        for key, value in rdt.iteritems():
            if key in out_rdt:
                out_rdt[key] = value[:]

        out_rdt['temp'] = temp_value

        return out_rdt.to_granule()
Пример #28
0
    def execute(input=None, context=None, config=None, params=None, state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        pressure = rdt['pressure']
        pres_value = (pressure / 100.0) + 0.5

        for key, value in rdt.iteritems():
            if key in out_rdt:
                out_rdt[key] = value[:]

        out_rdt['pressure'] = pres_value

        return out_rdt.to_granule()
Пример #29
0
    def publish_loop(self):

        sine_ampl = 2.0  # Amplitude in both directions
        samples = 60

        startTime = time.time()
        count = samples  #something other than zero

        self.dataset_management = DatasetManagementServiceClient(
            node=self.container.node)

        while not self.finished.is_set():
            count = time.time() - startTime
            sine_curr_deg = (count % samples) * 360 / samples

            c = numpy.array(
                [sine_ampl * math.sin(math.radians(sine_curr_deg))])
            t = numpy.array(
                [sine_ampl * 2 * math.sin(math.radians(sine_curr_deg + 45))])
            p = numpy.array(
                [sine_ampl * 4 * math.sin(math.radians(sine_curr_deg + 60))])

            lat = numpy.array([32.8])
            lon = numpy.array([-119.6])
            # convert time to ntp time. Standard notation in the system
            tvar = numpy.array([ntplib.system_to_ntp_time(time.time())])

            parameter_dictionary = self._create_parameter()
            #parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')

            rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary)

            h = numpy.array([random.uniform(0.0, 360.0)])

            rdt['time'] = tvar
            rdt['lat'] = lat
            rdt['lon'] = lon
            rdt['temp'] = t
            rdt['conductivity'] = c
            rdt['pressure'] = p

            g = rdt.to_granule(data_producer_id=self.id)

            log.info('SinusoidalCtdPublisher sending 1 record!')
            self.publisher.publish(g, self.stream_id)

            time.sleep(1.0)
Пример #30
0
    def _build_granule(stream_definition_id=None, field_names=None, values=None):
        """
        Builds a granule with values corresponding only to the params specified in the field names

        @param param_dictionary ParameterDictionary
        @param field_name str
        @param value numpy.array

        @retval Granule
        """
        root_rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        zipped = zip(field_names, values)

        for k,v in zipped:
            root_rdt[k] = v

        return root_rdt.to_granule()
Пример #31
0
    def _build_granule(stream_definition_id=None, field_names=None, values=None):
        """
        Builds a granule with values corresponding only to the params specified in the field names

        @param param_dictionary ParameterDictionary
        @param field_name str
        @param value numpy.array

        @retval Granule
        """
        root_rdt = RecordDictionaryTool(stream_definition_id=stream_definition_id)
        zipped = zip(field_names, values)

        for k,v in zipped:
            root_rdt[k] = v

        return root_rdt.to_granule()
Пример #32
0
    def execute(input=None,
                context=None,
                config=None,
                params=None,
                state=None):
        """
        Dependencies
        ------------
        CONDWAT_L1, TEMPWAT_L1, PRESWAT_L1


        Algorithms used
        ---------------
        PRACSAL = gsw_SP_from_C((CONDWAT_L1 * 10),TEMPWAT_L1,PRESWAT_L1)


        Reference
        ---------
        The calculations below are based on the following spreadsheet document:
        https://docs.google.com/spreadsheet/ccc?key=0Au7PUzWoCKU4dDRMeVI0RU9yY180Z0Y5U0hyMUZERmc#gid=0

        """

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        out_rdt['time'] = rdt['time']

        conductivity = rdt['conductivity']
        pressure = rdt['pressure']
        temperature = rdt['temp']
        sal_value = gsw.sp_from_c(conductivity * 10, temperature, pressure)

        log.debug(
            "CTDBP Salinity algorithm calculated the sp (practical salinity) values: %s",
            sal_value)

        for key, value in rdt.iteritems():
            if key in out_rdt:
                if key == 'conductivity' or key == 'temp' or key == 'pressure':
                    continue
                out_rdt[key] = value[:]

        out_rdt['salinity'] = sal_value

        return out_rdt.to_granule()
Пример #33
0
    def _build_granule(stream_definition_id=None,
                       field_name='',
                       value=None,
                       time=None):
        '''
        @param param_dictionary ParameterDictionary
        @param field_name str
        @param value numpy.array

        @retval Granule
        '''
        root_rdt = RecordDictionaryTool(
            stream_definition_id=stream_definition_id)
        root_rdt[field_name] = value
        root_rdt['time'] = time

        return root_rdt.to_granule()
    def execute(input=None, context=None, config=None, params=None, state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        conductivity = rdt['conductivity']
        cond_value = (conductivity / 100000.0) - 0.5

        for key, value in rdt.iteritems():
            if key in out_rdt:
                out_rdt[key] = value[:]

        # Update the conductivity values
        out_rdt['conductivity'] = cond_value

        # build the granule for conductivity
        return out_rdt.to_granule()
Пример #35
0
    def execute(input=None, context=None, config=None, params=None, state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        conductivity = rdt['conductivity']
        pressure = rdt['pressure']
        temperature = rdt['temp']

        sal_value = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure)

        for key, value in rdt.iteritems():
            if key in out_rdt:
                out_rdt[key] = value[:]

        out_rdt['salinity'] = sal_value

        return out_rdt.to_granule()
Пример #36
0
 def poller_callback(self, file_like_object, state_memento):
     log.debug('poller found data to parse')
     try:
         config = self.config['parser']
         parser = self._create_plugin(config, kwargs=dict(open_file=file_like_object, parse_after=self.latest_granule_time))
         records = parser.get_records(max_count=self.max_records)
         log.trace('have %d records', len(records))
         while records:
             # secretly uses pubsub client
             rdt = RecordDictionaryTool(param_dictionary=self.parameter_dictionary)
             for key in records[0]: #assume all dict records have same keys
                 rdt[key] = [ record[key] for record in records ]
             g = rdt.to_granule()
             self.publisher.publish(g)
             records = parser.get_records(max_count=self.max_records)
         self._set_state('poller_state', state_memento)
     except:
         log.error('error handling data', exc_info=True)
    def _trigger_func(self, stream_id):

        self.last_time = 0

        parameter_dictionary = self._create_parameter()

        #@todo - add lots of comments in here
        while not self.finished.is_set():

            length = 10

            #Explicitly make these numpy arrays...
            c = numpy.array([random.uniform(0.0,75.0)  for i in xrange(length)])

            t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)])

            p = numpy.array([random.lognormvariate(1,2) for i in xrange(length)])

            lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)])

            lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)])

            tvar = numpy.array([self.last_time + i for i in xrange(1,length+1)])

            self.last_time = max(tvar)

            rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary)

            rdt['temp'] = t # ExampleDataProducer_algorithm.execute(t)
            rdt['conductivity'] = c # ExampleDataProducer_algorithm.execute(c)
            rdt['pressure'] = p # ExampleDataProducer_algorithm.execute(p)
            rdt['time'] = tvar
            rdt['lat'] = lat
            rdt['lon'] = lon

            log.info("logging published Record Dictionary:\n %s", rdt.pretty_print())

            g = rdt.to_granule()

            log.info('Sending %d values!' % length)
            if(isinstance(g, Granule)):
                self.publish(g, stream_id)

            time.sleep(2.0)
Пример #38
0
    def _get_data(cls, config):
        """
        Iterable function that acquires data from a source iteratively based on constraints provided by config
        Passed into BaseDataHandler._publish_data and iterated to publish samples.
        @param config dict containing configuration parameters, may include constraints, formatters, etc
        @retval an iterable that returns well-formed Granule objects on each iteration
        """
        new_flst = get_safe(config, 'constraints.new_files', [])
        hdr_cnt = get_safe(config, 'header_count',
                           SlocumParser.DEFAULT_HEADER_SIZE)
        for f in new_flst:
            try:
                parser = SlocumParser(f[0], hdr_cnt)
                #CBM: Not in use yet...
                #            ext_dset_res = get_safe(config, 'external_dataset_res', None)
                #            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
                #            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
                #            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
                #            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
                #            var_lst = ext_dset_res.dataset_description.parameters['variables']

                max_rec = get_safe(config, 'max_records', 1)
                dprod_id = get_safe(config, 'data_producer_id',
                                    'unknown data producer')

                stream_def = get_safe(config, 'stream_def')

                cnt = calculate_iteration_count(
                    len(parser.data_map[parser.data_map.keys()[0]]), max_rec)
                for x in xrange(cnt):
                    #rdt = RecordDictionaryTool(taxonomy=ttool)
                    rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                    for name in parser.sensor_map:
                        d = parser.data_map[name][x * max_rec:(x + 1) *
                                                  max_rec]
                        rdt[name] = d

                    #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
                    g = rdt.to_granule()
                    yield g
            except SlocumParseException:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file: \'{0}\''.format(f))
    def _trigger_func(self, stream_id):
        log.debug("SimpleCtdDataProducer:_trigger_func ")

        parameter_dictionary = get_param_dict("ctd_parsed_param_dict")
        rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary)

        # The base SimpleCtdPublisher provides a gevent Event that indicates when the process is being
        # shut down. We can use a simple pattern here to accomplish both a safe shutdown of this loop
        # when the process shuts down *AND* do the timeout between loops in a very safe/efficient fashion.
        #
        # By using this instead of a sleep in the loop itself, we can immediatly interrupt this loop when
        # the process is being shut down instead of having to wait for the sleep to terminate.
        while not self.finished.wait(timeout=2):

            length = 10

            # Explicitly make these numpy arrays...
            c = numpy.array([random.uniform(0.0, 75.0) for i in xrange(length)])

            t = numpy.array([random.uniform(-1.7, 21.0) for i in xrange(length)])

            p = numpy.array([random.lognormvariate(1, 2) for i in xrange(length)])

            lat = numpy.array([random.uniform(-90.0, 90.0) for i in xrange(length)])

            lon = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)])

            h = numpy.array([random.uniform(0.0, 360.0) for i in xrange(length)])

            tvar = numpy.array([self.last_time + i for i in xrange(1, length + 1)])

            self.last_time = max(tvar)

            rdt["time"] = tvar
            rdt["lat"] = lat
            rdt["lon"] = lon
            rdt["temp"] = t
            rdt["conductivity"] = c
            rdt["pressure"] = p

            g = rdt.to_granule()
            log.debug("SimpleCtdDataProducer: Sending %d values!" % length)
            self.publisher.publish(g)
Пример #40
0
    def execute(input=None, context=None, config=None, params=None, state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        absolute_pressure = rdt['absolute_pressure']

        for key, value in rdt.iteritems():

            cond = key=='time' or key=='port_timestamp' or key=='driver_timestamp'\
                    or  key=='internal_timestamp' or key=='preferred_timestamp' or key=='timestamp'\
                    or key=='lat' or key=='lon'

            if cond and key in out_rdt:
                out_rdt[key] = value[:]

        out_rdt['seafloor_pressure'] = absolute_pressure * 0.689475728

        return out_rdt.to_granule()
    def execute(input=None, context=None, config=None, params=None, state=None):
        stream_def_id = params
        rdt = RecordDictionaryTool.load_from_granule(input)

        salinity = get_safe(rdt, "salinity")

        longitude = get_safe(rdt, "lon")
        latitude = get_safe(rdt, "lat")
        time = get_safe(rdt, "time")

        root_rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        root_rdt["salinity"] = 2 * salinity
        root_rdt["time"] = time
        root_rdt["lat"] = latitude
        root_rdt["lon"] = longitude

        g = root_rdt.to_granule()
        return g
Пример #42
0
    def execute(input=None,
                context=None,
                config=None,
                params=None,
                state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        temperature = rdt['temp']
        temp_value = (temperature / 10000.0) - 10

        for key, value in rdt.iteritems():
            if key in out_rdt:
                out_rdt[key] = value[:]

        out_rdt['temp'] = temp_value

        return out_rdt.to_granule()
    def execute(input=None, context=None, config=None, params=None, state=None):
        stream_def_id = params
        rdt = RecordDictionaryTool.load_from_granule(input)

        salinity = get_safe(rdt, 'salinity')

        longitude = get_safe(rdt, 'lon')
        latitude = get_safe(rdt, 'lat')
        time = get_safe(rdt, 'time')

        root_rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        root_rdt['salinity'] = 2 * salinity
        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude

        g = root_rdt.to_granule()
        return g
    def publish_loop(self):

        sine_ampl = 2.0 # Amplitude in both directions
        samples = 60

        startTime = time.time()
        count = samples #something other than zero

        self.dataset_management =  DatasetManagementServiceClient(node=self.container.node)

        while not self.finished.is_set():
            count = time.time() - startTime
            sine_curr_deg = (count % samples) * 360 / samples

            c = numpy.array( [sine_ampl * math.sin(math.radians(sine_curr_deg))] )
            t = numpy.array( [sine_ampl * 2 * math.sin(math.radians(sine_curr_deg + 45))] )
            p = numpy.array( [sine_ampl * 4 * math.sin(math.radians(sine_curr_deg + 60))] )

            lat = numpy.array([32.8])
            lon = numpy.array([-119.6])
            # convert time to ntp time. Standard notation in the system
            tvar = numpy.array([ntplib.system_to_ntp_time(time.time())])

            parameter_dictionary = self._create_parameter()
            #parameter_dictionary = self.dataset_management.read_parameter_dictionary_by_name('ctd_parsed_param_dict')

            rdt = RecordDictionaryTool(param_dictionary=parameter_dictionary)

            h = numpy.array([random.uniform(0.0, 360.0)])

            rdt['time'] = tvar
            rdt['lat'] = lat
            rdt['lon'] = lon
            rdt['temp'] = t
            rdt['conductivity'] = c
            rdt['pressure'] = p

            g = rdt.to_granule(data_producer_id=self.id)

            log.info('SinusoidalCtdPublisher sending 1 record!')
            self.publisher.publish(g, self.stream_id)

            time.sleep(1.0)
    def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None):

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        times = numpy.array([number  for l in xrange(self.length)])

        for i in xrange(number):
            rdt['input_voltage'] = values
            rdt['preferred_timestamp'] = ['time' for l in xrange(len(times))]
            rdt['time'] = times

            g = rdt.to_granule()
            g.data_producer_id = 'instrument_1'

            log.debug("granule #%s published by instrument:: %s" % ( number,g))

            pub.publish(g)
Пример #46
0
    def _publish_granules(self, stream_id=None, stream_route=None, values = None,number=None):

        pub = StandaloneStreamPublisher(stream_id, stream_route)

        stream_def = self.pubsub_management.read_stream_definition(stream_id=stream_id)
        stream_def_id = stream_def._id
        rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        times = numpy.array([number  for l in xrange(self.length)])

        for i in xrange(number):
            rdt['input_voltage'] = values
            rdt['preferred_timestamp'] = ['time' for l in xrange(len(times))]
            rdt['time'] = times

            g = rdt.to_granule()
            g.data_producer_id = 'instrument_1'

            log.debug("granule #%s published by instrument:: %s" % ( number,g))

            pub.publish(g)
Пример #47
0
    def recv_packet(self, packet, stream_route, stream_id):
        if packet == {}:
            return

        l0_values = RecordDictionaryTool.load_from_granule(packet)
        l1_values = RecordDictionaryTool(stream_definition_id=self.stream_definition_id)
        log.debug("CTDBP L1 transform using L0 values: tempurature %s, pressure %s, conductivity %s",
                  l0_values['temperature'], l0_values['pressure'], l0_values['conductivity'])

        #for key, value in 'lat', 'lon', 'time', ...:   <-- do we want to be a little more specific here?
        for key, value in l0_values.iteritems():
            if key in l1_values:
                l1_values[key] = value[:]

        l1_values['temp'] = self.calculate_temperature(l0=l0_values)
        l1_values['pressure'] = self.calculate_pressure(l0=l0_values)
        l1_values['conductivity'] = self.calculate_conductivity(l0=l0_values, l1=l1_values)

        log.debug('calculated L1 values: temp %s, pressure %s, conductivity %s',
                  l1_values['temp'], l1_values['pressure'], l1_values['conductivity'])
        self.publisher.publish(msg=l1_values.to_granule())
Пример #48
0
    def _get_data(cls, config):
        """
        Iterable function that acquires data from a source iteratively based on constraints provided by config
        Passed into BaseDataHandler._publish_data and iterated to publish samples.
        @param config dict containing configuration parameters, may include constraints, formatters, etc
        @retval an iterable that returns well-formed Granule objects on each iteration
        """
        new_flst = get_safe(config, 'constraints.new_files', [])
        hdr_cnt = get_safe(config, 'header_count', SlocumParser.DEFAULT_HEADER_SIZE)
        for f in new_flst:
            try:
                parser = SlocumParser(f[0], hdr_cnt)
                #CBM: Not in use yet...
                #            ext_dset_res = get_safe(config, 'external_dataset_res', None)
                #            t_vname = ext_dset_res.dataset_description.parameters['temporal_dimension']
                #            x_vname = ext_dset_res.dataset_description.parameters['zonal_dimension']
                #            y_vname = ext_dset_res.dataset_description.parameters['meridional_dimension']
                #            z_vname = ext_dset_res.dataset_description.parameters['vertical_dimension']
                #            var_lst = ext_dset_res.dataset_description.parameters['variables']

                max_rec = get_safe(config, 'max_records', 1)
                dprod_id = get_safe(config, 'data_producer_id', 'unknown data producer')

                stream_def = get_safe(config, 'stream_def')

                cnt = calculate_iteration_count(len(parser.data_map[parser.data_map.keys()[0]]), max_rec)
                for x in xrange(cnt):
                    #rdt = RecordDictionaryTool(taxonomy=ttool)
                    rdt = RecordDictionaryTool(stream_definition_id=stream_def)

                    for name in parser.sensor_map:
                        d = parser.data_map[name][x * max_rec:(x + 1) * max_rec]
                        rdt[name] = d

                    #g = build_granule(data_producer_id=dprod_id, taxonomy=ttool, record_dictionary=rdt)
                    g = rdt.to_granule()
                    yield g
            except SlocumParseException:
                # TODO: Decide what to do here, raise an exception or carry on
                log.error('Error parsing data file: \'{0}\''.format(f))
Пример #49
0
    def execute(input=None, context=None, config=None, params=None, state=None):
        """
        Dependencies
        ------------
        CONDWAT_L1, TEMPWAT_L1, PRESWAT_L1


        Algorithms used
        ---------------
        PRACSAL = gsw_SP_from_C((CONDWAT_L1 * 10),TEMPWAT_L1,PRESWAT_L1)


        Reference
        ---------
        The calculations below are based on the following spreadsheet document:
        https://docs.google.com/spreadsheet/ccc?key=0Au7PUzWoCKU4dDRMeVI0RU9yY180Z0Y5U0hyMUZERmc#gid=0

        """

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        out_rdt['time'] = rdt['time']

        conductivity = rdt['conductivity']
        pressure = rdt['pressure']
        temperature = rdt['temp']
        sal_value = gsw.sp_from_c(conductivity * 10, temperature, pressure)

        log.debug("CTDBP Salinity algorithm calculated the sp (practical salinity) values: %s", sal_value)

        for key, value in rdt.iteritems():
            if key in out_rdt:
                if key=='conductivity' or key=='temp' or key=='pressure':
                    continue
                out_rdt[key] = value[:]

        out_rdt['salinity'] = sal_value

        return out_rdt.to_granule()
Пример #50
0
    def _get_data(cls, config):
        """
        Retrieves config['constraints']['count'] number of random samples of length config['constraints']['array_len']
        @param config Dict of configuration parameters - must contain ['constraints']['count'] and ['constraints']['count']
        """
        array_len = get_safe(config, 'constraints.array_len', 1)

        max_rec = get_safe(config, 'max_records', 1)
        #dprod_id = get_safe(config, 'data_producer_id')

        stream_def = get_safe(config, 'stream_def')

        arr = npr.random_sample(array_len)

        #log.debug('Array to send using max_rec={0}: {1}'.format(max_rec, arr))
        cnt = calculate_iteration_count(arr.size, max_rec)
        for x in xrange(cnt):
            rdt = RecordDictionaryTool(stream_definition_id=stream_def)
            d = arr[x * max_rec:(x + 1) * max_rec]
            rdt['dummy'] = d
            g = rdt.to_granule()
            yield g
Пример #51
0
    def execute(input=None, context=None, config=None, params=None, state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        conductivity = rdt['conductivity']
        pressure = rdt['pressure']
        temperature = rdt['temp']

        sal_value = SP_from_cndr(r=conductivity/cte.C3515, t=temperature, p=pressure)

        log.debug("Salinity algorithm calculated the sp (practical salinity) values: %s", sal_value)

        for key, value in rdt.iteritems():
            if key in out_rdt:
                if key=='conductivity' or key=='temp' or key=='pressure':
                    continue
                out_rdt[key] = value[:]

        out_rdt['salinity'] = sal_value

        return out_rdt.to_granule()
    def execute(input=None,
                context=None,
                config=None,
                params=None,
                state=None):
        stream_def_id = params
        rdt = RecordDictionaryTool.load_from_granule(input)

        salinity = get_safe(rdt, 'salinity')

        longitude = get_safe(rdt, 'lon')
        latitude = get_safe(rdt, 'lat')
        time = get_safe(rdt, 'time')

        root_rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        root_rdt['salinity'] = 2 * salinity
        root_rdt['time'] = time
        root_rdt['lat'] = latitude
        root_rdt['lon'] = longitude

        g = root_rdt.to_granule()
        return g
Пример #53
0
    def execute(input=None,
                context=None,
                config=None,
                params=None,
                state=None):

        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=params)

        absolute_pressure = rdt['absolute_pressure']

        for key, value in rdt.iteritems():

            cond = key=='time' or key=='port_timestamp' or key=='driver_timestamp'\
                   or  key=='internal_timestamp' or key=='preferred_timestamp' or key=='timestamp'\
                   or key=='lat' or key=='lon'

            if cond and key in out_rdt:
                out_rdt[key] = value[:]

        out_rdt['absolute_pressure'] = absolute_pressure

        return out_rdt.to_granule()
Пример #54
0
    def _get_data(cls, config):
        """
        A generator that retrieves config['constraints']['count'] number of sequential Fibonacci numbers
        @param config Dict of configuration parameters - must contain ['constraints']['count']
        """
        cnt = get_safe(config, 'constraints.count', 1)

        max_rec = get_safe(config, 'max_records', 1)
        #dprod_id = get_safe(config, 'data_producer_id')

        stream_def = get_safe(config, 'stream_def')

        def fibGenerator():
            """
            A Fibonacci sequence generator
            """
            count = 0
            ret = []
            a, b = 1, 1
            while 1:
                count += 1
                ret.append(a)
                if count == max_rec:
                    yield np.array(ret)
                    ret = []
                    count = 0

                a, b = b, a + b

        gen = fibGenerator()
        cnt = calculate_iteration_count(cnt, max_rec)
        for i in xrange(cnt):
            rdt = RecordDictionaryTool(stream_definition_id=stream_def)
            d = gen.next()
            rdt['data'] = d
            g = rdt.to_granule()
            yield g
Пример #55
0
    def test_two_transforms_inline(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_one',
                                 description='input test stream one')
        self.input_dp_one_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products(
        )

        first_dp_id = self.create_data_process_one(dpd_id,
                                                   dp1_func_output_dp_id)
        second_dp_id = self.create_data_process_two(dpd_id,
                                                    dp1_func_output_dp_id,
                                                    dp2_func_output_dp_id)

        #retrieve subscription from data process one
        subscription_objs, _ = self.rrclient.find_objects(
            subject=first_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #retrieve the Stream for these data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]
        #the input to data process two is the output from data process one
        stream_ids, assoc_ids = self.rrclient.find_objects(
            dp1_func_output_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]

        # Run provenance on the output dataproduct of the second data process to see all the links
        # are as expected
        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            dp2_func_output_dp_id)

        # Do a basic check to see if there were 2 entries in the provenance graph. Parent and Child.
        self.assertTrue(len(output_data_product_provenance) == 3)
        # confirm that the linking from the output dataproduct to input dataproduct is correct
        self.assertTrue(
            dp1_func_output_dp_id in
            output_data_product_provenance[dp2_func_output_dp_id]['parents'])
        self.assertTrue(
            self.input_dp_one_id in
            output_data_product_provenance[dp1_func_output_dp_id]['parents'])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_one_id, self.stream_two_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        subscription_id)

        self.pubsub_client.activate_subscription(subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        subscription_id)

        stream_route_one = self.pubsub_client.read_stream_route(
            self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(
            stream_id=self.stream_one_id, stream_route=stream_route_one)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=second_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #data process 1 adds conductivity + pressure and puts the result in salinity
        #data process 2 adds salinity + pressure and puts the result in conductivity

        self.start_event_listener()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))
Пример #56
0
    def test_multi_subscriptions(self):
        self.dp_list = []
        self.event1_verified = Event()
        self.event2_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_one',
                                 description='input test stream one')
        self.input_dp_one_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product_two',
                                 description='input test stream two')
        self.input_dp_two_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        #retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_one_id, PRED.hasStream, RT.Stream, True)
        self.stream_one_id = stream_ids[0]

        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_two_id, PRED.hasStream, RT.Stream, True)
        self.stream_two_id = stream_ids[0]

        dpd_id = self.create_data_process_definition()
        dp1_func_output_dp_id, dp2_func_output_dp_id = self.create_output_data_products(
        )
        first_dp_id = self.create_data_process_one(dpd_id,
                                                   dp1_func_output_dp_id)

        second_dp_id = self.create_data_process_two(dpd_id,
                                                    self.input_dp_two_id,
                                                    dp2_func_output_dp_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=first_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #create subscription to stream ONE, create data process and publish granule on stream ONE

        #create a queue to catch the published granules of stream ONE
        self.subscription_one_id = self.pubsub_client.create_subscription(
            name='parsed_subscription_one',
            stream_ids=[self.stream_one_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_one_id)

        self.pubsub_client.activate_subscription(self.subscription_one_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_one_id)

        stream_route_one = self.pubsub_client.read_stream_route(
            self.stream_one_id)
        self.publisher_one = StandaloneStreamPublisher(
            stream_id=self.stream_one_id, stream_route=stream_route_one)

        self.start_event_listener()

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        #retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=second_dp_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        #create subscription to stream ONE and TWO, move TW subscription, create data process and publish granule on stream TWO

        #create a queue to catch the published granules of stream TWO
        self.subscription_two_id = self.pubsub_client.create_subscription(
            name='parsed_subscription_one_two',
            stream_ids=[self.stream_two_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_two_id)

        self.pubsub_client.activate_subscription(self.subscription_two_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_two_id)

        stream_route_two = self.pubsub_client.read_stream_route(
            self.stream_two_id)
        self.publisher_two = StandaloneStreamPublisher(
            stream_id=self.stream_two_id, stream_route=stream_route_two)

        #data process 1 adds conductivity + pressure and puts the result in salinity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher_one.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_one_id)

        #data process 2 adds salinity + pressure and puts the result in conductivity
        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [22]
        rdt['pressure'] = [4]
        rdt['salinity'] = [1]

        self.publisher_two.publish(msg=rdt.to_granule(),
                                   stream_id=self.stream_two_id)

        self.assertTrue(self.event2_verified.wait(self.wait_time))
        self.assertTrue(self.event1_verified.wait(self.wait_time))
    def test_event_transform_worker(self):
        self.data_process_objs = []
        self._output_stream_ids = []
        self.event_verified = Event()

        # test that a data process (type: data-product-in / event-out) can be defined and launched.
        # verify that event fields are correctly populated

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        # create the DPD and two DPs
        self.event_data_process_id = self.create_event_data_processes()

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=self.event_data_process_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_event_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        self.start_event_transform_listener()

        self.data_modified = Event()

        rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
        rdt['time'] = [0]  # time should always come first
        rdt['conductivity'] = [1]
        rdt['pressure'] = [2]
        rdt['salinity'] = [8]

        self.publisher.publish(rdt.to_granule())

        self.assertTrue(self.event_verified.wait(self.wait_time))
    def test_transform_worker(self):

        # test that a data process (type: data-product-in / data-product-out) can be defined and launched.
        # verify that the output granule fields are correctly populated

        # test that the input and output data products are linked to facilitate provenance

        self.dp_list = []
        self.data_process_objs = []
        self._output_stream_ids = []
        self.granule_verified = Event()
        self.worker_assigned_event_verified = Event()
        self.dp_created_event_verified = Event()
        self.heartbeat_event_verified = Event()

        self.parameter_dict_id = self.dataset_management_client.read_parameter_dictionary_by_name(
            name='ctd_parsed_param_dict', id_only=True)

        # create the StreamDefinition
        self.stream_def_id = self.pubsub_client.create_stream_definition(
            name='stream_def', parameter_dictionary_id=self.parameter_dict_id)
        self.addCleanup(self.pubsub_client.delete_stream_definition,
                        self.stream_def_id)

        # create the DataProduct that is the input to the data processes
        input_dp_obj = IonObject(RT.DataProduct,
                                 name='input_data_product',
                                 description='input test stream')
        self.input_dp_id = self.dataproductclient.create_data_product(
            data_product=input_dp_obj, stream_definition_id=self.stream_def_id)

        # retrieve the Stream for this data product
        stream_ids, assoc_ids = self.rrclient.find_objects(
            self.input_dp_id, PRED.hasStream, RT.Stream, True)
        self.stream_id = stream_ids[0]

        self.start_event_listener()

        # create the DPD, DataProcess and output DataProduct
        dataprocessdef_id, dataprocess_id, dataproduct_id = self.create_data_process(
        )
        self.dp_list.append(dataprocess_id)

        # validate the repository for data product algorithms persists the new resources  NEW SA-1
        # create_data_process call created one of each
        dpd_ids, _ = self.rrclient.find_resources(
            restype=OT.DataProcessDefinition, id_only=False)
        # there will be more than one becuase of the DPDs that reperesent the PFs in the data product above
        self.assertTrue(dpd_ids is not None)
        dp_ids, _ = self.rrclient.find_resources(restype=OT.DataProcess,
                                                 id_only=False)
        # only one DP becuase the PFs that are in the code dataproduct above are not activated yet.
        self.assertEquals(len(dp_ids), 1)

        # validate the name and version label  NEW SA - 2
        dataprocessdef_obj = self.dataprocessclient.read_data_process_definition(
            dataprocessdef_id)
        self.assertEqual(dataprocessdef_obj.version_label, '1.0a')
        self.assertEqual(dataprocessdef_obj.name, 'add_arrays')

        # validate that the DPD has an attachment  NEW SA - 21
        attachment_ids, assoc_ids = self.rrclient.find_objects(
            dataprocessdef_id, PRED.hasAttachment, RT.Attachment, True)
        self.assertEqual(len(attachment_ids), 1)
        attachment_obj = self.rrclient.read_attachment(attachment_ids[0])
        log.debug('attachment: %s', attachment_obj)

        # validate that the data process resource has input and output data products associated
        # L4-CI-SA-RQ-364  and NEW SA-3
        outproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasOutputProduct, RT.DataProduct, True)
        self.assertEqual(len(outproduct_ids), 1)
        inproduct_ids, assoc_ids = self.rrclient.find_objects(
            dataprocess_id, PRED.hasInputProduct, RT.DataProduct, True)
        self.assertEqual(len(inproduct_ids), 1)

        # Test for provenance. Get Data product produced by the data processes
        output_data_product_id, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=True)

        output_data_product_provenance = self.dataproductclient.get_data_product_provenance(
            output_data_product_id[0])

        # Do a basic check to see if there were 3 entries in the provenance graph. Parent and Child and the
        # DataProcessDefinition creating the child from the parent.
        self.assertTrue(len(output_data_product_provenance) == 2)
        self.assertTrue(self.input_dp_id in output_data_product_provenance[
            output_data_product_id[0]]['parents'])
        self.assertTrue(output_data_product_provenance[
            output_data_product_id[0]]['parents'][self.input_dp_id]
                        ['data_process_definition_id'] == dataprocessdef_id)

        # NEW SA - 4 | Data processing shall include the appropriate data product algorithm name and version number in
        # the metadata of each output data product created by the data product algorithm.
        output_data_product_obj, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            object_type=RT.DataProduct,
            predicate=PRED.hasOutputProduct,
            id_only=False)
        self.assertTrue(output_data_product_obj[0].name != None)
        self.assertTrue(output_data_product_obj[0]._rev != None)

        # retrieve subscription from data process
        subscription_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasSubscription,
            object_type=RT.Subscription,
            id_only=False)
        log.debug('test_transform_worker subscription_obj:  %s',
                  subscription_objs[0])

        # create a queue to catch the published granules
        self.subscription_id = self.pubsub_client.create_subscription(
            name='parsed_subscription',
            stream_ids=[self.stream_id],
            exchange_name=subscription_objs[0].exchange_name)
        self.addCleanup(self.pubsub_client.delete_subscription,
                        self.subscription_id)

        self.pubsub_client.activate_subscription(self.subscription_id)
        self.addCleanup(self.pubsub_client.deactivate_subscription,
                        self.subscription_id)

        stream_route = self.pubsub_client.read_stream_route(self.stream_id)
        self.publisher = StandaloneStreamPublisher(stream_id=self.stream_id,
                                                   stream_route=stream_route)

        for n in range(1, 101):
            rdt = RecordDictionaryTool(stream_definition_id=self.stream_def_id)
            rdt['time'] = [0]  # time should always come first
            rdt['conductivity'] = [1]
            rdt['pressure'] = [2]
            rdt['salinity'] = [8]

            self.publisher.publish(rdt.to_granule())

        # validate that the output granule is received and the updated value is correct
        self.assertTrue(self.granule_verified.wait(self.wait_time))

        # validate that the data process loaded into worker event is received    (L4-CI-SA-RQ-182)
        self.assertTrue(
            self.worker_assigned_event_verified.wait(self.wait_time))

        # validate that the data process create (with data product ids) event is received    (NEW SA -42)
        self.assertTrue(self.dp_created_event_verified.wait(self.wait_time))

        # validate that the data process heartbeat event is received (for every hundred granules processed) (L4-CI-SA-RQ-182)
        #this takes a while so set wait limit to large value
        self.assertTrue(self.heartbeat_event_verified.wait(200))

        # validate that the code from the transform function can be retrieve via inspect_data_process_definition
        src = self.dataprocessclient.inspect_data_process_definition(
            dataprocessdef_id)
        self.assertIn('def add_arrays(a, b)', src)

        # now delete the DPD and DP then verify that the resources are retired so that information required for provenance are still available
        self.dataprocessclient.delete_data_process(dataprocess_id)
        self.dataprocessclient.delete_data_process_definition(
            dataprocessdef_id)

        in_dp_objs, _ = self.rrclient.find_objects(
            subject=dataprocess_id,
            predicate=PRED.hasInputProduct,
            object_type=RT.DataProduct,
            id_only=True)
        self.assertTrue(in_dp_objs is not None)

        dpd_objs, _ = self.rrclient.find_subjects(
            subject_type=RT.DataProcessDefinition,
            predicate=PRED.hasDataProcess,
            object=dataprocess_id,
            id_only=True)
        self.assertTrue(dpd_objs is not None)
Пример #59
0
    def execute(input=None, context=None, config=None, params=None, state=None):
        """
        Dependencies
        ------------
        PRACSAL, PRESWAT_L1, longitude, latitude, TEMPWAT_L1

        Algorithms used
        ------------
        1. PRACSAL = gsw_SP_from_C((CONDWAT_L1 * 10),TEMPWAT_L1,PRESWAT_L1)
        2. absolute_salinity = gsw_SA_from_SP(PRACSAL,PRESWAT_L1,longitude,latitude)
        3. conservative_temperature = gsw_CT_from_t(absolute_salinity,TEMPWAT_L1,PRESWAT_L1)
        4. DENSITY = gsw_rho(absolute_salinity,conservative_temperature,PRESWAT_L1)

        Reference
        ------------
        The calculations below are based on the following spreadsheet document:
        https://docs.google.com/spreadsheet/ccc?key=0Au7PUzWoCKU4dDRMeVI0RU9yY180Z0Y5U0hyMUZERmc#gid=0

        """
        lat = params['lat']
        lon = params['lon']
        stream_def_id = params['stream_def']


        rdt = RecordDictionaryTool.load_from_granule(input)
        out_rdt = RecordDictionaryTool(stream_definition_id=stream_def_id)

        out_rdt['time'] = rdt['time']

        conductivity = rdt['conductivity']
        pressure = rdt['pressure']
        temperature = rdt['temp']

        latitude = np.ones(conductivity.shape) * lat
        longitude = np.ones(conductivity.shape) * lon

        log.debug("Using latitude: %s,\n longitude: %s", latitude, longitude)

        # Doing: PRACSAL = gsw_SP_from_C((CONDWAT_L1 * 10),TEMPWAT_L1,PRESWAT_L1)
        pracsal = gsw.sp_from_c(conductivity * 10, temperature, pressure)

        log.debug("CTDBP Density algorithm calculated the pracsal (practical salinity) values: %s", pracsal)

        # Doing: absolute_salinity = gsw_SA_from_SP(PRACSAL,PRESWAT_L1,longitude,latitude)
        absolute_salinity = gsw.sa_from_sp(pracsal, pressure, longitude, latitude)

        log.debug("CTDBP Density algorithm calculated the absolute_salinity (actual salinity) values: %s", absolute_salinity)

        conservative_temperature = gsw.ct_from_t(absolute_salinity, temperature, pressure)

        log.debug("CTDBP Density algorithm calculated the conservative temperature values: %s", conservative_temperature)

        # Doing: DENSITY = gsw_rho(absolute_salinity,conservative_temperature,PRESWAT_L1)
        dens_value = gsw.rho(absolute_salinity, conservative_temperature, pressure)

        log.debug("Calculated density values: %s", dens_value)

        for key, value in rdt.iteritems():
            if key in out_rdt:
                if key=='conductivity' or key=='temp' or key=='pressure':
                    continue
                out_rdt[key] = value[:]

        out_rdt['density'] = dens_value

        return out_rdt.to_granule()