Пример #1
0
    def test_add_hdf_dataset(self):
        """
        Test adding a name and an array
        """

        testencoder = HDFEncoder()
        testencoder.add_hdf_dataset('test_dataset', self.known_array)
        testencoder.encoder_close()
Пример #2
0
    def test_add_hdf_dataset_with_bad_name(self):
        """
        Test adding a bad name and an array
        """

        testencoder = HDFEncoder()
        with self.assertRaises(AssertionError):
            self.dataset = testencoder.add_hdf_dataset('bad name', self.known_array)
        testencoder.encoder_close()
Пример #3
0
    def test_add_hdf_dataset_with_bad_name(self):
        """
        Test adding a bad name and an array
        """

        testencoder = HDFEncoder()
        with self.assertRaises(HDFEncoderException):
            self.dataset = testencoder.add_hdf_dataset('bad name', self.known_array)
        testencoder.encoder_close()
Пример #4
0
    def test_add_hdf_dataset_with_bad_array(self):
        """
        Test adding a name and a something other than an array
        """

        testencoder = HDFEncoder()
        with self.assertRaises(HDFEncoderException):
            testencoder.add_hdf_dataset(self.dataset_name,'bad array')
        testencoder.encoder_close()
Пример #5
0
    def test_encode_known_and_compare(self):
        """
        Create an encoder and add some (one) dataset/array
        """

        hdfencoder = HDFEncoder()
        hdfencoder.add_hdf_dataset(self.path_to_dataset, self.known_array)
        # Serialize to string and compare to a know value
        hdf_string = hdfencoder.encoder_close()

        self.assertEqual(sha1(hdf_string),self.known_hdf_as_sha1)
Пример #6
0
    def test_encode_known_and_compare(self):
        """
        Create an encoder and add some (one) dataset/array
        """

        hdfencoder = HDFEncoder()
        hdfencoder.add_hdf_dataset(self.path_to_dataset, self.known_array)
        # Serialize to string and compare to a know value
        hdf_string = hdfencoder.encoder_close()

        self.assertEqual(sha1(hdf_string), self.known_hdf_as_sha1)
Пример #7
0
    def _slice(self, granule, slice_):
        '''
        @brief Creates a granule which is a slice of the granule parameter
        @param granule the superset
        @param slice_ The slice values for which to create the granule
        @return Crafted subset granule of the parameter granule.
        '''
        retval = copy.deepcopy(granule)
        fields = self._list_data(self.definition, granule)
        record_count = slice_.stop - slice_.start
        assert record_count > 0, 'slice is malformed'
        pairs = self._pair_up(granule)
        var_names = list([i[0]
                          for i in pairs])  # Get the var_names from the pairs
        log.debug('var_names: %s', var_names)
        file_path = self._get_hdf_from_string(
            granule.identifiables[self.data_stream_id].values)
        codec = HDFEncoder()
        vectors = acquire_data([file_path], var_names, record_count,
                               slice_).next()

        for row, value in vectors.iteritems():
            vp = self._find_vp(pairs, row)
            # Determine the range_id reverse dictionary lookup
            #@todo: improve this pattern
            for field, path in fields.iteritems():
                if vp == path:
                    range_id = field
                    break
            bounds_id = retval.identifiables[range_id].bounds_id
            # Recalculate the bounds for this fields and update the granule
            range = value['range']
            retval.identifiables[bounds_id].value_pair[0] = float(range[0])
            retval.identifiables[bounds_id].value_pair[1] = float(range[1])
            codec.add_hdf_dataset(vp, value['values'])
            record_count = len(value['values'])
            #----- DEBUGGING ---------
            log.debug('slice- row: %s', row)
            log.debug('slice- value_path: %s', vp)
            log.debug('slice- range_id: %s', range_id)
            log.debug('slice- bounds_id: %s', bounds_id)
            log.debug('slice- limits: %s', value['range'])
            #-------------------------

        retval.identifiables[self.element_count_id].value = record_count
        hdf_string = codec.encoder_close()
        self._patch_granule(retval, hdf_string)
        FileSystem.unlink(file_path)
        return retval
Пример #8
0
    def test_encode_decode(self):
        """
        Encode some arrays
        """

        hdfencoder = HDFEncoder() # put array into the encoder
        hdfencoder.add_hdf_dataset(self.path_to_dataset, self.known_array)
        # get the string out from encoder
        hdf_string = hdfencoder.encoder_close()

        # Compare the arrays
        hdfdecoder = HDFDecoder(hdf_string)  # put string in decoder...
        nparray = hdfdecoder.read_hdf_dataset(self.path_to_dataset) # get array out

        self.assertEqual(sha1(nparray.tostring()), sha1(self.known_array.tostring()) ) # works for arbitrarily shaped arrays
Пример #9
0
    def test_encode_decode(self):
        """
        Encode some arrays
        """

        hdfencoder = HDFEncoder() # put array into the encoder
        hdfencoder.add_hdf_dataset(self.path_to_dataset, self.known_array)
        # get the string out from encoder
        hdf_string = hdfencoder.encoder_close()

        # Compare the arrays
        hdfdecoder = HDFDecoder(hdf_string)  # put string in decoder...
        nparray = hdfdecoder.read_hdf_dataset(self.path_to_dataset) # get array out

        self.assertEqual(sha1(nparray.tostring()), sha1(self.known_array.tostring()) ) # works for arbitrarily shaped arrays
Пример #10
0
    def test_decode_encode(self):
        """
        Try a decode-encode sequence and compare if its the same string
        """

        # decode an existing hdf file and read out an array
        hdfdecoder = HDFDecoder(self.known_hdf_as_string) # put known string in decoder...
        nparray = hdfdecoder.read_hdf_dataset(self.path_to_dataset) # get array out

        # encode the array and get the binary string containing the encoded hdf file
        hdfencoder = HDFEncoder() # put the array in the encoder...
        hdfencoder.add_hdf_dataset(self.path_to_dataset, nparray)
        hdf_string = hdfencoder.encoder_close() # get string out

        # compare the two strings
        self.assertEqual(sha1(hdf_string),self.known_hdf_as_sha1)
Пример #11
0
    def test_decode_encode(self):
        """
        Try a decode-encode sequence and compare if its the same string
        """

        # decode an existing hdf file and read out an array
        hdfdecoder = HDFDecoder(self.known_hdf_as_string) # put known string in decoder...
        nparray = hdfdecoder.read_hdf_dataset(self.path_to_dataset) # get array out

        # encode the array and get the binary string containing the encoded hdf file
        hdfencoder = HDFEncoder() # put the array in the encoder...
        hdfencoder.add_hdf_dataset(self.path_to_dataset, nparray)
        hdf_string = hdfencoder.encoder_close() # get string out

        # compare the two strings
        self.assertEqual(sha1(hdf_string),self.known_hdf_as_sha1)
Пример #12
0
    def _slice(self,granule,slice_):
        '''
        @brief Creates a granule which is a slice of the granule parameter
        @param granule the superset
        @param slice_ The slice values for which to create the granule
        @return Crafted subset granule of the parameter granule.
        '''
        retval = copy.deepcopy(granule)
        fields = self._list_data(self.definition,granule)
        record_count = slice_.stop - slice_.start
        assert record_count > 0, 'slice is malformed'
        pairs = self._pair_up(granule)
        var_names = list([i[0] for i in pairs]) # Get the var_names from the pairs
        log.debug('var_names: %s',var_names)
        file_path = self._get_hdf_from_string(granule.identifiables[self.data_stream_id].values)
        codec = HDFEncoder()
        vectors = acquire_data([file_path],var_names,record_count,slice_ ).next()

        for row, value in vectors.iteritems():
            vp = self._find_vp(pairs, row)
            # Determine the range_id reverse dictionary lookup
            #@todo: improve this pattern
            for field,path in fields.iteritems():
                if vp==path:
                    range_id = field
                    break
            bounds_id = retval.identifiables[range_id].bounds_id
            # Recalculate the bounds for this fields and update the granule
            range = value['range']
            retval.identifiables[bounds_id].value_pair[0] = float(range[0])
            retval.identifiables[bounds_id].value_pair[1] = float(range[1])
            codec.add_hdf_dataset(vp, value['values'])
            record_count = len(value['values'])
            #----- DEBUGGING ---------
            log.debug('slice- row: %s', row)
            log.debug('slice- value_path: %s', vp)
            log.debug('slice- range_id: %s', range_id)
            log.debug('slice- bounds_id: %s', bounds_id)
            log.debug('slice- limits: %s', value['range'])
            #-------------------------


        retval.identifiables[self.element_count_id].value = record_count
        hdf_string = codec.encoder_close()
        self._patch_granule(retval, hdf_string)
        FileSystem.unlink(file_path)
        return retval
Пример #13
0
    def test_encode_with_filename_and_compare(self):
        """
        Create an encoder and add some (one) dataset/array
        """
        testfilename = 'test_encode_with_filename_and_compare'

        hdfencoder = HDFEncoder(testfilename)
        hdfencoder.add_hdf_dataset(self.path_to_dataset, self.known_array)
        # get the string out from encoder
        hdf_string = hdfencoder.encoder_close()

        self.assertEqual(sha1(hdf_string),self.known_hdf_as_sha1)

        hdfdecoder = HDFDecoder(self.known_hdf_as_string)
        nparray = hdfdecoder.read_hdf_dataset(self.path_to_dataset)

        self.assertEqual(sha1(nparray.tostring()), sha1(self.known_array.tostring()) )
Пример #14
0
    def test_encode_with_filename_and_compare(self):
        """
        Create an encoder and add some (one) dataset/array
        """
        testfilename = 'test_encode_with_filename_and_compare'

        hdfencoder = HDFEncoder(testfilename)
        hdfencoder.add_hdf_dataset(self.path_to_dataset, self.known_array)
        # get the string out from encoder
        hdf_string = hdfencoder.encoder_close()

        self.assertEqual(sha1(hdf_string),self.known_hdf_as_sha1)

        hdfdecoder = HDFDecoder(self.known_hdf_as_string)
        nparray = hdfdecoder.read_hdf_dataset(self.path_to_dataset)

        self.assertEqual(sha1(nparray.tostring()), sha1(self.known_array.tostring()) )
Пример #15
0
    def test_add_hdf_dataset(self):
        """
        Test adding a name and an array
        """

        testencoder = HDFEncoder()
        testencoder.add_hdf_dataset('test_dataset', self.known_array)
        testencoder.encoder_close()
Пример #16
0
    def close_stream_granule(self):

        import numpy

        encoder = HDFEncoder()

        for coverage_info in self._coordinates.itervalues():

            records = coverage_info['records'] # Turn the list into an array
            if not records:
                log.warn('Coverage name "%s" has no values!' % coverage_info['id'])
                continue

            array = numpy.asarray(records) # Turn the list into an array

            # Add the coverage
            self._granule.identifiables[coverage_info['id']] = coverage_info['obj']

            # Add the range
            range = [float(numpy.nanmin(array)), float(numpy.nanmax(array))]
            self._granule.identifiables[coverage_info['obj'].bounds_id] = QuantityRangeElement(value_pair=range)

            # Add the data
            encoder.add_hdf_dataset(name=coverage_info['values_path'],nparray=array)

        for range_info in self._ranges.itervalues():

            records = range_info['records'] # Turn the list into an array
            if not records:
                log.warn('Range name "%s" has no values!' % range_info['id'])
                continue

            array = numpy.asarray(records) # Turn the list into an array

            # Add the coverage
            self._granule.identifiables[range_info['id']] = range_info['obj']

            # Add the range
            range = [float(numpy.nanmin(array)), float(numpy.nanmax(array))]
            self._granule.identifiables[range_info['obj'].bounds_id] = QuantityRangeElement(value_pair=range)

            # Add the data
            encoder.add_hdf_dataset(name=range_info['values_path'],nparray=array)

        hdf_string = encoder.encoder_close()

        sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
        self._granule.identifiables[self._encoding_id] = Encoding(
            encoding_type='hdf5',
            compression=None,
            sha1=sha1
        )

        self._granule.identifiables[self._granule.data_stream_id] = DataStream(
            values=hdf_string
        )

        return self._granule
Пример #17
0
    def test_add_hdf_dataset_with_bad_array(self):
        """
        Test adding a name and a something other than an array
        """

        testencoder = HDFEncoder()
        with self.assertRaises(AssertionError):
            testencoder.add_hdf_dataset(self.dataset_name, 'bad array')
        testencoder.encoder_close()
Пример #18
0
    def add_two_datasets_read_compare(self, filename, dataset_name1, dataset_name2):
        array1 = numpy.ones((4,5))
        array2 = numpy.ones((2,3))

        # first create the file
        hdfencoder = HDFEncoder(filename)
        hdfencoder.add_hdf_dataset(dataset_name1, array1)

        hdfencoder.add_hdf_dataset(dataset_name2, array2)
        hdfstring = hdfencoder.encoder_close()

        hdfdecoder = HDFDecoder(hdfstring)
        # Read the first dataset
        array_decoded_1 =  hdfdecoder.read_hdf_dataset(dataset_name1)

        # Read the second dataset
        array_decoded_2 = hdfdecoder.read_hdf_dataset(dataset_name2)

        self.assertEqual(sha1(array1.tostring()), sha1(array_decoded_1.tostring()) )
        self.assertEqual(sha1(array2.tostring()), sha1(array_decoded_2.tostring()) )
Пример #19
0
    def add_two_datasets_read_compare(self, filename, dataset_name1, dataset_name2):
        array1 = numpy.ones((4,5))
        array2 = numpy.ones((2,3))

        # first create the file
        hdfencoder = HDFEncoder(filename)
        hdfencoder.add_hdf_dataset(dataset_name1, array1)
        hdfstring = hdfencoder.encoder_close()

        # now open the file and add another branch
        hdfencoder = HDFEncoder(filename)
        hdfencoder.add_hdf_dataset(dataset_name2, array2)
        hdfstring = hdfencoder.encoder_close()

        hdfdecoder = HDFDecoder(hdfstring)
        # Read the first dataset
        array_decoded_1 =  hdfdecoder.read_hdf_dataset(dataset_name1)

        hdfdecoder = HDFDecoder(hdfstring)
        # Read the second dataset
        array_decoded_2 = hdfdecoder.read_hdf_dataset(dataset_name2)

        self.assertEqual(array1.tostring(), array_decoded_1.tostring())
        self.assertEqual(array2.tostring(), array_decoded_2.tostring())
Пример #20
0
    def close_stream_granule(self, timestamp=None):

        import numpy

        encoder = HDFEncoder()

        for coverage_info in self._coordinates.itervalues():

            records = coverage_info['records']  # Turn the list into an array
            if not records:
                log.warn('Coverage name "%s" has no values!' %
                         coverage_info['id'])
                continue

            array = numpy.asarray(records)  # Turn the list into an array

            # Add the coverage
            self._granule.identifiables[
                coverage_info['id']] = coverage_info['obj']

            # Add the range
            range = [float(numpy.nanmin(array)), float(numpy.nanmax(array))]
            self._granule.identifiables[
                coverage_info['obj'].bounds_id] = QuantityRangeElement(
                    value_pair=range)

            # Add the data
            encoder.add_hdf_dataset(name=coverage_info['values_path'],
                                    nparray=array)

        for range_info in self._ranges.itervalues():

            records = range_info['records']  # Turn the list into an array
            if not records:
                log.warn('Range name "%s" has no values!' % range_info['id'])
                continue

            array = numpy.asarray(records)  # Turn the list into an array

            # Add the coverage
            self._granule.identifiables[range_info['id']] = range_info['obj']

            # Add the range
            range = [float(numpy.nanmin(array)), float(numpy.nanmax(array))]
            self._granule.identifiables[
                range_info['obj'].bounds_id] = QuantityRangeElement(
                    value_pair=range)

            # Add the data
            encoder.add_hdf_dataset(name=range_info['values_path'],
                                    nparray=array)

        hdf_string = encoder.encoder_close()

        sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
        self._granule.identifiables[self._encoding_id] = Encoding(
            encoding_type=self._encoding.encoding_type,
            compression=None,
            sha1=sha1)

        tstamp = TimeElement(
            definition="http://www.opengis.net/def/property/OGC/0/SamplingTime",
            reference_frame="http://www.opengis.net/def/trs/OGC/0/GPS",
            reference_time='1970-01-01T00:00:00.000Z',
            value=timestamp or get_ion_ts())

        self._granule.identifiables[self._granule.data_stream_id] = DataStream(
            values=hdf_string, timestamp=tstamp)

        return self._granule
Пример #21
0
    def subset(self, granule, coverages):
        '''
        @param granule
        @return dataset subset based on the fields
        '''
        assert isinstance(granule,
                          StreamGranuleContainer), 'object is not a granule.'
        field_ids = self.field_ids
        element_count_id = self.element_count_id

        values_path = list()
        domain_ids = list()
        coverage_ids = list()
        coverages = list(coverages)
        log.debug('Coverages include %s of type %s', coverages,
                  type(coverages))
        #-----------------------------------------------------------------------------------------------------------
        # Iterate through the fields IAW stream definition and check for rangesets and coordinate axises
        #  - If its a coordinate axis, it belongs regardless of what the client desires. (It's part of the domain)
        #  - If its a rangeset make sure that it's part of what the client asked for, if not discard it
        #-----------------------------------------------------------------------------------------------------------

        for field_id in field_ids:

            range_id = self.definition.identifiables[field_id].range_id

            #-------------------------------------------------------------------------------------
            # Coordinate Axis
            # - Keep track of this in our domains
            # - Add it to the paths we need to grab from the file(s)
            #-------------------------------------------------------------------------------------

            if isinstance(self.definition.identifiables[range_id],
                          CoordinateAxis):
                log.debug('got a domain: %s' % range_id)
                domain_ids.append(field_id)
                if granule.identifiables.has_key(range_id):
                    value_path = granule.identifiables[
                        range_id].values_path or self.definition.identifiables[
                            range_id].values_path
                    values_path.append(value_path)
                else:
                    value_path = self.definition.identifiables[
                        range_id].values_path
                    values_path.append(value_path)
                continue

            #-------------------------------------------------------------------------------------
            # Range Set
            # - If it's part of the coverages we want to keep
            #   - Add it to the list of ranges we're tracking
            #   - Add the value path to the paths we're tracking.
            #-------------------------------------------------------------------------------------

            if isinstance(self.definition.identifiables[range_id], RangeSet):
                # If its a rangeset, a specified coverage and the granule has it, add it to the list
                if field_id in coverages:
                    if granule.identifiables.has_key(range_id):
                        log.debug('got a range: %s' % range_id)
                        coverage_ids.append(field_id)
                        if granule.identifiables.has_key(range_id):
                            value_path = granule.identifiables[
                                range_id].values_path or self.definition.identifiables[
                                    range_id].values_path
                            values_path.append(value_path)
                        else:
                            value_path = self.definition.identifiables[
                                range_id].values_path
                            values_path.append(value_path)
                        continue

                # ----
                # We need to track the range and bounds because,
                # you guessed it, we need to update the bounds
                # ----

                range_id = self.definition.identifiables[field_id].range_id
                bounds_id = self.definition.identifiables[range_id].bounds_id

                #---
                # Lastly, if the field is there and we don't want it, we need to strip it
                #---

                if not (field_id in coverages):
                    log.debug('%s doesn\'t belong in %s.', field_id, coverages)
                    log.debug('rebool: %s', bool(field_id in coverages))
                    if granule.identifiables.has_key(range_id):
                        log.debug('Removing %s from granule', range_id)
                        del granule.identifiables[range_id]
                    if granule.identifiables.has_key(bounds_id):
                        log.debug('Removing %s from granule', bounds_id)
                        del granule.identifiables[bounds_id]

        log.debug('Domains: %s', domain_ids)
        log.debug('Ranges: %s', coverage_ids)
        log.debug('Values_paths: %s', values_path)

        file_path = self._get_hdf_from_string(
            granule.identifiables[self.data_stream_id].values)
        full_coverage = list(domain_ids + coverage_ids)

        log.debug('Full coverage: %s' % full_coverage)
        log.debug('Calling acquire_data with: %s, %s, %s', [file_path],
                  values_path, granule.identifiables[element_count_id].value)

        codec = HDFEncoder()

        pairs = self._pair_up(granule)
        var_names = list([i[0] for i in pairs])

        record_count = granule.identifiables[self.element_count_id].value
        data = acquire_data([file_path], var_names, record_count).next()
        for row, value in data.iteritems():
            vp = self._find_vp(pairs, row)
            codec.add_hdf_dataset(vp, value['values'])

        hdf_string = codec.encoder_close()
        self._patch_granule(granule, hdf_string)

        FileSystem.unlink(file_path)

        return granule
Пример #22
0
    def _merge(self, msgs):
        '''
        @brief Merges all the granules and datasets into one large dataset (Union)
        @param msgs raw granules from couch
        @return complete dataset
        @description
             n
        D := U [ msgs_i ]
            i=0
        '''
        granule = None
        file_list = list()
        count = len(msgs)
        used_vals = list()

        #-------------------------------------------------------------------------------------
        # Merge each granule to another granule one by one.
        # After each merge operation keep track of what files belong where on the timeline
        #-------------------------------------------------------------------------------------

        for i in xrange(count):
            if i == 0:
                granule = msgs[0]['granule']
                psc = PointSupplementConstructor(
                    point_definition=self.definition)

                res = ReplayProcess.merge_granule(definition=self.definition,
                                                  granule1=granule,
                                                  granule2=None)
                granule = res['granule']
                file_pair = res['files']
                log.debug('file_pair: %s', file_pair)

                if file_pair[0] not in file_list and file_pair[0][
                        0] not in used_vals:
                    file_list.append(tuple(file_pair[0]))
                    used_vals.append(file_pair[0][0])

            else:
                res = ReplayProcess.merge_granule(definition=self.definition,
                                                  granule1=granule,
                                                  granule2=msgs[i]['granule'])

                granule = res['granule']
                file_pair = res['files']
                log.debug('file_pair: %s', file_pair)

                if file_pair[0] not in file_list and file_pair[0][
                        0] not in used_vals:
                    file_list.append(tuple(file_pair[0]))
                    used_vals.append(file_pair[0][0])
                if file_pair[1] not in file_list and file_pair[1][
                        0] not in used_vals:
                    file_list.append(tuple(file_pair[1]))
                    used_vals.append(file_pair[1][0])

        if not granule:
            return
        log.debug('file_list: %s', file_list)
        #-------------------------------------------------------------------------------------
        # Order the lists using a stable sort from python (by the first value in the tuples
        # Then peel off just the file names
        # Then get the appropriate URL for the file using FileSystem
        #-------------------------------------------------------------------------------------
        file_list.sort()
        file_list = list(i[1] for i in file_list)
        file_list = list([
            FileSystem.get_hierarchical_url(FS.CACHE, '%s' % i)
            for i in file_list
        ])

        pairs = self._pair_up(granule)
        var_names = list([i[0] for i in pairs])

        record_count = granule.identifiables[self.element_count_id].value
        codec = HDFEncoder()
        log.debug('acquire_data:')
        log.debug('\tfile_list: %s', file_list)
        log.debug('\tfields: %s', var_names)
        log.debug('\trecords: %s', record_count)

        data = acquire_data(file_list, var_names, record_count).next()

        for row, value in data.iteritems():
            value_path = self._find_vp(pairs, row)
            codec.add_hdf_dataset(value_path, nparray=value['values'])
            #-------------------------------------------------------------------------------------
            # Debugging
            #-------------------------------------------------------------------------------------
            log.debug('row: %s', row)
            log.debug('value path: %s', value_path)
            log.debug('value: %s', value['values'])

        hdf_string = codec.encoder_close()
        self._patch_granule(granule, hdf_string)
        return granule
Пример #23
0
    def subset(self,granule,coverages):
        '''
        @param granule
        @return dataset subset based on the fields
        '''
        assert isinstance(granule, StreamGranuleContainer), 'object is not a granule.'
        field_ids = self.field_ids
        element_count_id = self.element_count_id


        values_path = list()
        domain_ids = list()
        coverage_ids = list()
        coverages = list(coverages)
        log.debug('Coverages include %s of type %s', coverages, type(coverages))
        #-----------------------------------------------------------------------------------------------------------
        # Iterate through the fields IAW stream definition and check for rangesets and coordinate axises
        #  - If its a coordinate axis, it belongs regardless of what the client desires. (It's part of the domain)
        #  - If its a rangeset make sure that it's part of what the client asked for, if not discard it
        #-----------------------------------------------------------------------------------------------------------


        for field_id in field_ids:

            range_id = self.definition.identifiables[field_id].range_id

            #-------------------------------------------------------------------------------------
            # Coordinate Axis
            # - Keep track of this in our domains
            # - Add it to the paths we need to grab from the file(s)
            #-------------------------------------------------------------------------------------

            if isinstance(self.definition.identifiables[range_id], CoordinateAxis):
                log.debug('got a domain: %s' % range_id)
                domain_ids.append(field_id)
                if granule.identifiables.has_key(range_id):
                    value_path = granule.identifiables[range_id].values_path or self.definition.identifiables[range_id].values_path
                    values_path.append(value_path)
                else:
                    value_path = self.definition.identifiables[range_id].values_path
                    values_path.append(value_path)
                continue

            #-------------------------------------------------------------------------------------
            # Range Set
            # - If it's part of the coverages we want to keep
            #   - Add it to the list of ranges we're tracking
            #   - Add the value path to the paths we're tracking.
            #-------------------------------------------------------------------------------------


            if isinstance(self.definition.identifiables[range_id], RangeSet):
                # If its a rangeset, a specified coverage and the granule has it, add it to the list
                if  field_id in coverages:
                    if granule.identifiables.has_key(range_id):
                        log.debug('got a range: %s' % range_id)
                        coverage_ids.append(field_id)
                        if granule.identifiables.has_key(range_id):
                            value_path = granule.identifiables[range_id].values_path or self.definition.identifiables[range_id].values_path
                            values_path.append(value_path)
                        else:
                            value_path = self.definition.identifiables[range_id].values_path
                            values_path.append(value_path)
                        continue

                # ----
                # We need to track the range and bounds because,
                # you guessed it, we need to update the bounds
                # ----

                range_id = self.definition.identifiables[field_id].range_id
                bounds_id = self.definition.identifiables[range_id].bounds_id


                #---
                # Lastly, if the field is there and we don't want it, we need to strip it
                #---

                if not (field_id in coverages):
                    log.debug('%s doesn\'t belong in %s.', field_id, coverages)
                    log.debug('rebool: %s', bool(field_id in coverages))
                    if granule.identifiables.has_key(range_id):
                        log.debug('Removing %s from granule', range_id)
                        del granule.identifiables[range_id]
                    if granule.identifiables.has_key(bounds_id):
                        log.debug('Removing %s from granule', bounds_id)
                        del granule.identifiables[bounds_id]

        log.debug('Domains: %s', domain_ids)
        log.debug('Ranges: %s', coverage_ids)
        log.debug('Values_paths: %s', values_path)

        file_path = self._get_hdf_from_string(granule.identifiables[self.data_stream_id].values)
        full_coverage = list(domain_ids + coverage_ids)

        log.debug('Full coverage: %s' % full_coverage)
        log.debug('Calling acquire_data with: %s, %s, %s', [file_path],values_path,granule.identifiables[element_count_id].value)

        codec = HDFEncoder()

        pairs = self._pair_up(granule)
        var_names = list([i[0] for i in pairs])

        record_count = granule.identifiables[self.element_count_id].value
        data = acquire_data([file_path], var_names, record_count).next()
        for row,value in data.iteritems():
            vp = self._find_vp(pairs, row)
            codec.add_hdf_dataset(vp, value['values'])

        hdf_string = codec.encoder_close()
        self._patch_granule(granule,hdf_string)

        FileSystem.unlink(file_path)

        return granule
Пример #24
0
    def _encode_supplement(self):
        """
        Method used to encode the point dataset supplement
        """
        def listify(input):
            if hasattr(input, '__iter__'):
                return input
            else:
                return [input,]

        # build the hdf and return the ion-object...
        hdf_string = ''
        try:
            import numpy
            encoder = HDFEncoder()
            #Need to search through the coordinate_axes dictionary to find out what the values_path
            #will be for the coordinate axes.
            #This assumes the coordinate axis names as described below. Will probably need to be
            #changed to accommodate other labels.
            for key, coordinate_axis in self._coordinate_axes.iteritems():

                if self._times is not None and coordinate_axis.axis.lower() == 'time':
                    time_range = [min(self._times), max(self._times)]
                    self._packet_container.identifiables[key + '_bounds'].value_pair = time_range

                    times = listify(self._times)
                    encoder.add_hdf_dataset(coordinate_axis.values_path, numpy.asanyarray(times))

                if self._longitudes is not None and coordinate_axis.axis.lower() == 'longitude':
                    lons_range = [min(self._times), max(self._times)]
                    self._packet_container.identifiables[key + '_bounds'].value_pair = lons_range

                    lons = listify(self._longitudes)
                    encoder.add_hdf_dataset(coordinate_axis.values_path, numpy.asanyarray(lons))

                if self._latitudes is not None and coordinate_axis.axis.lower() == 'latitude':
                    lats_range = [min(self._times), max(self._times)]
                    self._packet_container.identifiables[key + '_bounds'].value_pair = lats_range

                    lats = listify(self._latitudes)
                    encoder.add_hdf_dataset(coordinate_axis.values_path, numpy.asanyarray(lats))

            #Loop through ranges, one for each coverage. Range objects contain the values_path variable,
            #so use that to add values to the hdf.
            for key, range in self._ranges.iteritems():
                if key in self._values:
                    v = self._values[key]
                    encoder.add_hdf_dataset(range.values_path, numpy.asanyarray(v))

            hdf_string = encoder.encoder_close()

            sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
            self._packet_container.identifiables['stream_encoding'] = Encoding(
                encoding_type='hdf5',
                compression=None,
                sha1=sha1
            )

            return hdf_string

        except :
            log.exception('HDF encoder failed. Please make sure you have it properly installed!')
Пример #25
0
import numpy, h5py

from prototype.hdf.hdf_codec import HDFEncoder, HDFDecoder

array1 = numpy.ones((4,5))
array2 = numpy.ones((2,3))
array3 = numpy.ones((10,2))
dataset_name1 = 'rootgroup/mygroup/data/temperature'
dataset_name2 = 'rootgroup/mygroup/data/pressure'
dname = 'aGroup/adataset'

###########################################################

# Create an encoder object
hdfencoder = HDFEncoder()
# Add data as an array
hdfencoder.add_hdf_dataset(dataset_name1, array1)
hdfencoder.add_hdf_dataset(dataset_name2, array2)
# Convert all the data to a binary string for easy transportation
hdfstring1 = hdfencoder.encoder_close()

# Create another encoder. This time pass on name of hdf5 file to write
hdfencoder = HDFEncoder('/tmp/testHDFEncoder.hdf5')
hdfencoder.add_hdf_dataset(dataset_name1, array1)
hdfencoder.add_hdf_dataset(dataset_name2, array2)
# Convert all the data to a binary string for easy transportation
hdfstring2 = hdfencoder.encoder_close()

# Create another encoder. This time pass on name of hdf5 file to write
hdfencoder = HDFEncoder('/tmp/testHDFEncoder.hdf5')
hdfencoder.add_hdf_dataset(dname, array3)
Пример #26
0
def ctd_stream_packet(stream_id = None, c=None, t=None, p=None , lat=None, lon=None, time=None, create_hdf=True):
    """
    This is a simple interface for creating a packet of ctd data for a given stream defined by the method above.
    The string names of content are tightly coupled to the method above.
    To send actual data you must have hdf5, numpy and h5py installed.

    @brief build a demo ctd data packet as an ion object. All values arguments are optional, but any argument provided
    should have the same length.
    
    @param stream_id should be the same as the stream_id for the definition - the stream resource ID
    @param c is a list, tuple or ndarray of conductivity values
    @param t is a list, tuple or ndarray of temperature values
    @param p is a list, tuple or ndarray of presure values
    @param lat is a list, tuple or ndarray of latitude values
    @param lon is a list, tuple or ndarray of longitude values
    @param time is a list, tuple or ndarray of time values

    """
    length = False

    def listify(input):
        if hasattr(input, '__iter__'):
            return input
        else:
            return [input,]


    c_range = []
    if c is not None:
        c = listify(c)
        c_range = [min(c), max(c)]
        if length:
            assert length == len(c), 'Conductivity input is the wrong length'
        else:
            length = len(c)

    t_range = []
    if t is not None:
        t = listify(t)
        t_range = [min(t), max(t)]
        if length:
            assert length == len(t), 'Temperature input is the wrong length'
        else:
            length = len(t)

    p_range = []
    if p is not None:
        p = listify(p)
        p_range = [min(p), max(p)]
        if length:
            assert length == len(p), 'Pressure input is the wrong length'
        else:
            length = len(p)

    lat_range = []
    if lat is not None:
        lat = listify(lat)
        lat_range = [min(lat), max(lat)]
        if length:
            assert length == len(lat), 'Latitude input is the wrong length'
        else:
            length = len(lat)

    lon_range = []
    if lon is not None:
        lon = listify(lon)
        lon_range = [min(lon), max(lon)]
        if length:
            assert length == len(lon), 'Longitude input is the wrong length'
        else:
            length = len(lon)

    time_range = []
    if time is not None:
        time = listify(time)
        time_range = [min(time), max(time)]
        if length:
            assert length == len(time), 'Time input is the wrong length'
        else:
            length = len(time)


    hdf_string = ''
    if create_hdf:
        try:
            # Use inline import to put off making numpy a requirement
            import numpy as np

            encoder = HDFEncoder()
            if t is not None:
                encoder.add_hdf_dataset('fields/temp_data', np.asanyarray(t))


            if c is not None:
                encoder.add_hdf_dataset('fields/cndr_data', np.asanyarray(c))

            if p is not None:
                encoder.add_hdf_dataset('fields/pressure_data',np.asanyarray(p))

            if lat is not None:
                encoder.add_hdf_dataset('coordinates/latitude', np.asanyarray(lat))

            if lon is not None:
                encoder.add_hdf_dataset('coordinates/longitude',np.asanyarray(lon))

            if time is not None:
                encoder.add_hdf_dataset('coordinates/time',np.asanyarray(time))

            hdf_string = encoder.encoder_close()
        except :
            log.exception('HDF encoder failed. Please make sure you have it properly installed!')



    # build a hdf file here

    # data stream id is the identifier for the DataStream object - the root of the data structure
    ctd_container = StreamGranuleContainer(
        stream_resource_id=stream_id,
        data_stream_id= 'ctd_data'
    )


    ctd_container.identifiables['ctd_data'] = DataStream(
        id=stream_id,
        values=hdf_string # put the hdf file here as bytes!
        )

    sha1 = hashlib.sha1(hdf_string).hexdigest().upper() if hdf_string else ''

    ctd_container.identifiables['stream_encoding'] = Encoding(
        encoding_type = 'hdf5',
        compression = None,
        sha1 = sha1,
    )


    ctd_container.identifiables['record_count'] = CountElement(
        value= length or -1,
        )

    # Time
    if time is not None :
        ctd_container.identifiables['time'] = CoordinateAxis(
            bounds_id='time_bounds'
        )

        ctd_container.identifiables['time_bounds'] = QuantityRangeElement(
            value_pair=time_range
        )

    # Latitude
    if lat is not None:
        ctd_container.identifiables['latitude'] = CoordinateAxis(
            bounds_id='latitude_bounds'
        )

        ctd_container.identifiables['latitude_bounds'] = QuantityRangeElement(
            value_pair=lat_range
        )

    # Longitude
    if lon is not None:
        ctd_container.identifiables['longitude'] = CoordinateAxis(
            bounds_id='longitude_bounds'
        )

        ctd_container.identifiables['longitude_bounds'] = QuantityRangeElement(
            value_pair=lon_range
        )


    # Pressure
    if p is not None:
        ctd_container.identifiables['pressure_data'] = CoordinateAxis(
            bounds_id='pressure_bounds'
        )

        ctd_container.identifiables['pressure_bounds'] = QuantityRangeElement(
            value_pair=p_range
        )

    # Temperature
    if t is not None:
        ctd_container.identifiables['temp_data'] = RangeSet(
            bounds_id=['temp_bounds']
        )

        ctd_container.identifiables['temp_bounds'] = QuantityRangeElement(
            value_pair=t_range
        )

    # Conductivity
    if c is not None:
        ctd_container.identifiables['cndr_data'] = RangeSet(
            bounds_id='cndr_bounds'
        )

        ctd_container.identifiables['cndr_bounds'] = QuantityRangeElement(
            value_pair=c_range
        )


    return ctd_container
Пример #27
0
import numpy, h5py

from prototype.hdf.hdf_codec import HDFEncoder, HDFDecoder

array1 = numpy.ones((4, 5))
array2 = numpy.ones((2, 3))
array3 = numpy.ones((10, 2))
dataset_name1 = 'rootgroup/mygroup/data/temperature'
dataset_name2 = 'rootgroup/mygroup/data/pressure'
dname = 'aGroup/adataset'

###########################################################

# Create an encoder object
hdfencoder = HDFEncoder()
# Add data as an array
hdfencoder.add_hdf_dataset(dataset_name1, array1)
hdfencoder.add_hdf_dataset(dataset_name2, array2)
# Convert all the data to a binary string for easy transportation
hdfstring1 = hdfencoder.encoder_close()

# Create another encoder. This time pass on name of hdf5 file to write
hdfencoder = HDFEncoder('/tmp/testHDFEncoder.hdf5')
hdfencoder.add_hdf_dataset(dataset_name1, array1)
hdfencoder.add_hdf_dataset(dataset_name2, array2)
# Convert all the data to a binary string for easy transportation
hdfstring2 = hdfencoder.encoder_close()

# Create another encoder. This time pass on name of hdf5 file to write
hdfencoder = HDFEncoder('/tmp/testHDFEncoder.hdf5')
hdfencoder.add_hdf_dataset(dname, array3)
Пример #28
0
    def _merge(self, msgs):
        '''
        @brief Merges all the granules and datasets into one large dataset (Union)
        @param msgs raw granules from couch
        @return complete dataset
        @description
             n
        D := U [ msgs_i ]
            i=0
        '''
        granule = None
        file_list = list()
        count = len(msgs)
        used_vals = list()

        #-------------------------------------------------------------------------------------
        # Merge each granule to another granule one by one.
        # After each merge operation keep track of what files belong where on the timeline
        #-------------------------------------------------------------------------------------


        for i in xrange(count):
            if i==0:
                granule = msgs[0]['granule']
                psc = PointSupplementConstructor(point_definition=self.definition)

                res = ReplayProcess.merge_granule(definition=self.definition, granule1=granule, granule2=None)
                granule = res['granule']
                file_pair = res['files']
                log.debug('file_pair: %s', file_pair)

                if file_pair[0] not in file_list and file_pair[0][0] not in used_vals:
                    file_list.append( tuple(file_pair[0]))
                    used_vals.append(file_pair[0][0])


            else:
                res = ReplayProcess.merge_granule(definition=self.definition, granule1=granule, granule2=msgs[i]['granule'])

                granule = res['granule']
                file_pair = res['files']
                log.debug('file_pair: %s', file_pair)

                if file_pair[0] not in file_list and file_pair[0][0] not in used_vals:
                    file_list.append( tuple(file_pair[0]))
                    used_vals.append(file_pair[0][0])
                if file_pair[1] not in file_list and file_pair[1][0] not in used_vals:
                    file_list.append(tuple(file_pair[1]))
                    used_vals.append(file_pair[1][0])

        if not granule:
            return
        log.debug('file_list: %s', file_list)
        #-------------------------------------------------------------------------------------
        # Order the lists using a stable sort from python (by the first value in the tuples
        # Then peel off just the file names
        # Then get the appropriate URL for the file using FileSystem
        #-------------------------------------------------------------------------------------
        file_list.sort()
        file_list = list(i[1] for i in file_list)
        file_list = list([FileSystem.get_hierarchical_url(FS.CACHE, '%s' % i) for i in file_list])

        pairs = self._pair_up(granule)
        var_names = list([i[0] for i in pairs])

        record_count = granule.identifiables[self.element_count_id].value
        codec = HDFEncoder()
        log.debug('acquire_data:')
        log.debug('\tfile_list: %s', file_list)
        log.debug('\tfields: %s', var_names)
        log.debug('\trecords: %s', record_count)

        data = acquire_data(file_list, var_names, record_count).next()

        for row,value in data.iteritems():
            value_path = self._find_vp(pairs,row)
            codec.add_hdf_dataset(value_path,nparray=value['values'])
            #-------------------------------------------------------------------------------------
            # Debugging
            #-------------------------------------------------------------------------------------
            log.debug('row: %s', row)
            log.debug('value path: %s', value_path)
            log.debug('value: %s', value['values'])

        hdf_string = codec.encoder_close()
        self._patch_granule(granule,hdf_string)
        return granule
Пример #29
0
    def close_stream_granule(self, timestamp=None):

        import numpy

        encoder = HDFEncoder()

        for coverage_info in self._coordinates.itervalues():

            records = coverage_info['records'] # Turn the list into an array
            if not records:
                log.warn('Coverage name "%s" has no values!' % coverage_info['id'])
                continue

            array = numpy.asarray(records) # Turn the list into an array

            # Add the coverage
            self._granule.identifiables[coverage_info['id']] = coverage_info['obj']

            # Add the range
            range = [float(numpy.nanmin(array)), float(numpy.nanmax(array))]
            self._granule.identifiables[coverage_info['obj'].bounds_id] = QuantityRangeElement(value_pair=range)

            # Add the data
            encoder.add_hdf_dataset(name=coverage_info['values_path'],nparray=array)

        for range_info in self._ranges.itervalues():

            records = range_info['records'] # Turn the list into an array
            if not records:
                log.warn('Range name "%s" has no values!' % range_info['id'])
                continue

            array = numpy.asarray(records) # Turn the list into an array

            # Add the coverage
            self._granule.identifiables[range_info['id']] = range_info['obj']

            # Add the range
            range = [float(numpy.nanmin(array)), float(numpy.nanmax(array))]
            self._granule.identifiables[range_info['obj'].bounds_id] = QuantityRangeElement(value_pair=range)

            # Add the data
            encoder.add_hdf_dataset(name=range_info['values_path'],nparray=array)

        hdf_string = encoder.encoder_close()

        sha1 = hashlib.sha1(hdf_string).hexdigest().upper()
        self._granule.identifiables[self._encoding_id] = Encoding(
            encoding_type = self._encoding.encoding_type,
            compression=None,
            sha1=sha1
        )

        tstamp = TimeElement(
            definition="http://www.opengis.net/def/property/OGC/0/SamplingTime",
            reference_frame="http://www.opengis.net/def/trs/OGC/0/GPS",
            reference_time='1970-01-01T00:00:00.000Z',
            value= timestamp or get_ion_ts()
        )

        self._granule.identifiables[self._granule.data_stream_id] = DataStream(
            values=hdf_string,
            timestamp=tstamp
        )

        return self._granule