Python DataBlock_pb2 примеры использования

Язык программирования: Python

Пространство имен/Пакет: nmc_met_io

Класс/Тип: DataBlock_pb2

Примеров на hotexamples.com: 7

Python DataBlock_pb2 - 7 примеров найдено. Это лучшие примеры Python кода для nmc_met_io.DataBlock_pb2, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

ByteArrayResult(6)

StringResult(6)

MapResult(1)

Пример #1

Показать файл

def get_file_list(path):
    """return file list of cassandra data servere path
    
    Args:
        path (string): cassandra data servere path.
    
    Returns:
        list: list of filenames.
    """

    # connect to data service
    service = GDSDataService()
    # 获得指定目录下的所有文件
    status, response = service.getFileList(path)
    MappingResult = DataBlock_pb2.MapResult()
    file_list = []
    if status == 200:
        if MappingResult is not None:
            # Protobuf的解析
            MappingResult.ParseFromString(response)
            results = MappingResult.resultMap
            # 遍历指定目录
            for name_size_pair in results.items():
                if (name_size_pair[1] != 'D'):
                    file_list.append(name_size_pair[0])
    return file_list

Пример #2

Показать файл

def get_tlogp(directory, filename=None, suffix="*.000"):
    """
    该程序用于读取micaps服务器上TLOGP数据信息, 文件格式与MICAPS第5类格式相同.

    :param directory: the data directory on the service
    :param filename: the data filename, if none, will be the latest file.
    :param suffix: the filename filter pattern which will be used to
                   find the specified file.
    :return: pandas DataFrame object.

    >>> data = get_tlogp("UPPER_AIR/TLOGP/")
    """

    # connect to data service
    service = GDSDataService()

    # get data file name
    if filename is None:
        try:
            status, response = service.getLatestDataName(directory, suffix)
        except ValueError:
            print('Can not retrieve data from ' + directory)
            return None
        StringResult = DataBlock_pb2.StringResult()
        if status == 200:
            StringResult.ParseFromString(response)
            if StringResult is not None:
                filename = StringResult.name
                if filename == '':
                    return None
            else:
                return None

    # get data contents
    try:
        status, response = service.getData(directory, filename)
    except ValueError:
        print('Can not retrieve data' + filename + ' from ' + directory)
        return None
    ByteArrayResult = DataBlock_pb2.ByteArrayResult()
    if status == 200:
        ByteArrayResult.ParseFromString(response)
        if ByteArrayResult is not None:
            byteArray = ByteArrayResult.byteArray

            # decode bytes to string
            txt = byteArray.decode("utf-8")
            txt = list(filter(None, re.split(' |\n', txt)))

            # observation date and time
            if len(txt[3]) < 4:
                year = int(txt[3]) + 2000
            else:
                year = int(txt[3])
            month = int(txt[4])
            day = int(txt[5])
            hour = int(txt[6])
            time = datetime(year, month, day, hour)

            # the number of records
            number = int(txt[7])
            if number < 1:
                return None

            # cut the data
            txt = txt[8:]

            # put the data into dictionary
            index = 0
            records = []
            while index < len(txt):
                # get the record information
                ID = txt[index].strip()
                lon = float(txt[index + 1])
                lat = float(txt[index + 2])
                alt = float(txt[index + 3])
                number = int(int(txt[index + 4]) / 6)
                index += 5

                # get the sounding records
                for i in range(number):
                    record = {
                        'ID': ID,
                        'lon': lon,
                        'lat': lat,
                        'alt': alt,
                        'time': time,
                        'p': float(txt[index]),
                        'h': float(txt[index + 1]),
                        't': float(txt[index + 2]),
                        'td': float(txt[index + 3]),
                        'wind_angle': float(txt[index + 4]),
                        'wind_speed': float(txt[index + 5])
                    }
                    records.append(record)
                    index += 6

            # transform to pandas data frame
            records = pd.DataFrame(records)
            records.set_index('ID')

            # return
            return records
        else:
            return None
    else:
        return None

Пример #3

Показать файл

def get_model_grid(directory, filename=None, suffix="*.024"):
    """
    Retrieve numeric model grid forecast from MICAPS cassandra service.
    Support ensemble member forecast.

    :param directory: the data directory on the service
    :param filename: the data filename, if none, will be the latest file.
    :param suffix: the filename filter pattern which will be used to
                   find the specified file.
    :return: data, xarray type

    :Examples:
    >>> data = get_model_grid("ECMWF_HR/TMP/850")
    >>> data_ens = get_model_grid("ECMWF_ENSEMBLE/RAW/HGT/500",
                                  filename='18021708.024')
    """

    # connect to data service
    service = GDSDataService()

    # get data file name
    if filename is None:
        try:
            status, response = service.getLatestDataName(directory, suffix)
        except ValueError:
            print('Can not retrieve data from ' + directory)
            return None
        StringResult = DataBlock_pb2.StringResult()
        if status == 200:
            StringResult.ParseFromString(response)
            if StringResult is not None:
                filename = StringResult.name
                if filename == '':
                    return None
            else:
                return None

    # get data contents
    try:
        status, response = service.getData(directory, filename)
    except ValueError:
        print('Can not retrieve data' + filename + ' from ' + directory)
        return None
    ByteArrayResult = DataBlock_pb2.ByteArrayResult()
    if status == 200:
        ByteArrayResult.ParseFromString(response)
        if ByteArrayResult is not None:
            byteArray = ByteArrayResult.byteArray

            # define head information structure (278 bytes)
            head_dtype = [('discriminator', 'S4'), ('type', 'i2'),
                          ('modelName', 'S20'), ('element', 'S50'),
                          ('description', 'S30'), ('level', 'f4'),
                          ('year', 'i4'), ('month', 'i4'), ('day', 'i4'),
                          ('hour', 'i4'), ('timezone', 'i4'), ('period', 'i4'),
                          ('startLongitude', 'f4'), ('endLongitude', 'f4'),
                          ('longitudeGridSpace', 'f4'),
                          ('longitudeGridNumber', 'i4'),
                          ('startLatitude', 'f4'), ('endLatitude', 'f4'),
                          ('latitudeGridSpace', 'f4'),
                          ('latitudeGridNumber', 'i4'),
                          ('isolineStartValue', 'f4'),
                          ('isolineEndValue', 'f4'), ('isolineSpace', 'f4'),
                          ('perturbationNumber', 'i2'),
                          ('ensembleTotalNumber', 'i2'), ('minute', 'i2'),
                          ('second', 'i2'), ('Extent', 'S92')]

            # read head information
            head_info = np.fromstring(byteArray[0:278], dtype=head_dtype)

            # get required grid information
            data_type = head_info['type'][0]
            nlon = head_info['longitudeGridNumber'][0]
            nlat = head_info['latitudeGridNumber'][0]
            nmem = head_info['ensembleTotalNumber'][0]

            # define data structure
            if data_type == 4:
                data_dtype = [('data', 'f4', (nlat, nlon))]
                data_len = nlat * nlon * 4
            elif data_type == 11:
                data_dtype = [('data', 'f4', (2, nlat, nlon))]
                data_len = 2 * nlat * nlon * 4
            else:
                raise Exception("Data type is not supported")

            # read data
            if nmem == 0:
                data = np.fromstring(byteArray[278:], dtype=data_dtype)
                data = np.squeeze(data['data'])
            else:
                if data_type == 4:
                    data = np.full((nmem, nlat, nlon), np.nan)
                else:
                    data = np.full((2, nmem, nlat, nlon), np.nan)
                ind = 0
                for imem in range(nmem):
                    head_info_mem = np.fromstring(byteArray[ind:(ind + 278)],
                                                  dtype=head_dtype)
                    ind += 278
                    data_mem = np.fromstring(byteArray[ind:(ind + data_len)],
                                             dtype=data_dtype)
                    ind += data_len
                    number = head_info_mem['perturbationNumber'][0]
                    if data_type == 4:
                        data[number, :, :] = np.squeeze(data_mem['data'])
                    else:
                        data[:, number, :, :] = np.squeeze(data_mem['data'])

            # construct longitude and latitude coordinates
            slon = head_info['startLongitude'][0]
            dlon = head_info['longitudeGridSpace'][0]
            slat = head_info['startLatitude'][0]
            dlat = head_info['latitudeGridSpace'][0]
            lon = np.arange(nlon) * dlon + slon
            lat = np.arange(nlat) * dlat + slat
            level = np.array([head_info['level'][0]])

            # construct initial time and forecast hour
            init_time = datetime(head_info['year'][0], head_info['month'][0],
                                 head_info['day'][0], head_info['hour'][0])
            fhour = np.array([head_info['period'][0]], dtype=np.float)
            time = init_time + timedelta(hours=fhour[0])
            init_time = np.array([init_time], dtype='datetime64[m]')
            time = np.array([time], dtype='datetime64[m]')

            # construct ensemble number
            if nmem != 0:
                number = np.arange(nmem)

            # create to xarray
            if data_type == 4:
                if nmem == 0:
                    if level[0] == 0:
                        data = data[np.newaxis, ...]
                        data = xr.DataArray(data,
                                            coords=[time, lat, lon],
                                            dims=['time', 'lat', 'lon'],
                                            name="data")
                    else:
                        data = data[np.newaxis, np.newaxis, ...]
                        data = xr.DataArray(
                            data,
                            coords=[time, level, lat, lon],
                            dims=['time', 'level', 'lat', 'lon'],
                            name="data")
                else:
                    if level[0] == 0:
                        data = data[np.newaxis, ...]
                        data = xr.DataArray(
                            data,
                            coords=[time, number, lat, lon],
                            dims=['time', 'number', 'lat', 'lon'],
                            name="data")
                    else:
                        data = data[np.newaxis, :, np.newaxis, ...]
                        data = xr.DataArray(
                            data,
                            coords=[time, number, level, lat, lon],
                            dims=['time', 'number', 'level', 'lat', 'lon'],
                            name="data")
            elif data_type == 11:
                if nmem == 0:
                    speed = np.squeeze(data[0, :, :])
                    angle = np.squeeze(data[1, :, :])
                    if level[0] == 0:
                        speed = speed[np.newaxis, ...]
                        angle = angle[np.newaxis, ...]
                        data = xr.Dataset(
                            {
                                'speed': (['time', 'lat', 'lon'], speed),
                                'angle': (['time', 'lat', 'lon'], angle)
                            },
                            coords={
                                'lon': lon,
                                'lat': lat,
                                'time': time
                            })
                    else:
                        speed = speed[np.newaxis, np.newaxis, ...]
                        angle = angle[np.newaxis, np.newaxis, ...]
                        data = xr.Dataset(
                            {
                                'speed':
                                (['time', 'level', 'lat', 'lon'], speed),
                                'angle':
                                (['time', 'level', 'lat', 'lon'], angle)
                            },
                            coords={
                                'lon': lon,
                                'lat': lat,
                                'level': level,
                                'time': time
                            })
                else:
                    speed = np.squeeze(data[0, :, :, :])
                    angle = np.squeeze(data[1, :, :, :])
                    if level[0] == 0:
                        speed = speed[np.newaxis, ...]
                        angle = angle[np.newaxis, ...]
                        data = xr.Dataset(
                            {
                                'speed':
                                (['time', 'number', 'lat', 'lon'], speed),
                                'angle':
                                (['time', 'number', 'lat', 'lon'], angle)
                            },
                            coords={
                                'lon': lon,
                                'lat': lat,
                                'number': number,
                                'time': time
                            })
                    else:
                        speed = speed[np.newaxis, :, np.newaxis, ...]
                        angle = angle[np.newaxis, :, np.newaxis, ...]
                        data = xr.Dataset(
                            {
                                'speed':
                                (['time', 'number', 'level', 'lat', 'lon'
                                  ], speed),
                                'angle':
                                (['time', 'number', 'level', 'lat', 'lon'
                                  ], angle)
                            },
                            coords={
                                'lon': lon,
                                'lat': lat,
                                'level': level,
                                'number': number,
                                'time': time
                            })
            # add time coordinates
            data.coords['init_time'] = ('time', init_time)
            data.coords['fhour'] = ('time', fhour)

            # add attributes
            data.attrs['data_directory'] = directory
            data.attrs['data_filename'] = filename
            data.attrs['organization'] = 'Created by NMC.'

            # return data
            return data

        else:
            return None
    else:
        return None

Пример #4

Показать файл

def get_radar_mosaic(directory, filename=None, suffix="*.LATLON"):
    """
    该程序主要用于读取和处理中国气象局CRaMS系统的雷达回波全国拼图数据.

    :param directory: the data directory on the service
    :param filename: the data filename, if none, will be the latest file.
    :param suffix: the filename filter pattern which will be used to
                   find the specified file.
    :return: xarray object.

    :Example:
    >>> data = get_radar_mosaic("RADARMOSAIC/CREF/")
    """

    # connect to data service
    service = GDSDataService()

    # get data file name
    if filename is None:
        try:
            status, response = service.getLatestDataName(directory, suffix)
        except ValueError:
            print('Can not retrieve data from ' + directory)
            return None
        StringResult = DataBlock_pb2.StringResult()
        if status == 200:
            StringResult.ParseFromString(response)
            if StringResult is not None:
                filename = StringResult.name
                if filename == '':
                    return None
            else:
                return None

    # get data contents
    try:
        status, response = service.getData(directory, filename)
    except ValueError:
        print('Can not retrieve data' + filename + ' from ' + directory)
        return None
    ByteArrayResult = DataBlock_pb2.ByteArrayResult()
    if status == 200:
        ByteArrayResult.ParseFromString(response)
        if ByteArrayResult is not None:
            byteArray = ByteArrayResult.byteArray

            # define head structure
            head_dtype = [
                ('description', 'S128'),
                # product name,  QREF=基本反射率, CREF=组合反射率,
                # VIL=液态水含量, OHP=一小时降水
                ('name', 'S32'),
                ('organization', 'S16'),
                ('grid_flag', 'u2'),  # 经纬网格数据标识，固定值19532
                ('data_byte', 'i2'),  # 数据单元字节数，固定值2
                ('slat', 'f4'),  # 数据区的南纬（度）
                ('wlon', 'f4'),  # 数据区的西经（度）
                ('nlat', 'f4'),  # 数据区的北纬（度）
                ('elon', 'f4'),  # 数据区的东经（度）
                ('clat', 'f4'),  # 数据区中心纬度（度）
                ('clon', 'f4'),  # 数据区中心经度（度）
                ('rows', 'i4'),  # 数据区的行数
                ('cols', 'i4'),  # 每行数据的列数
                ('dlat', 'f4'),  # 纬向分辨率（度）
                ('dlon', 'f4'),  # 经向分辨率（度）
                ('nodata', 'f4'),  # 无数据区的编码值
                ('levelbybtes', 'i4'),  # 单层数据字节数
                ('levelnum', 'i2'),  # 数据层个数
                ('amp', 'i2'),  # 数值放大系数
                ('compmode', 'i2'),  # 数据压缩存储时为1，否则为0
                ('dates', 'u2'),  # 数据观测时间，为1970年1月1日以来的天数
                ('seconds', 'i4'),  # 数据观测时间的秒数
                ('min_value', 'i2'),  # 放大后的数据最小取值
                ('max_value', 'i2'),  # 放大后的数据最大取值
                ('reserved', 'i2', 6)  # 保留字节
            ]

            # read head information
            head_info = np.fromstring(byteArray[0:256], dtype=head_dtype)
            ind = 256

            # define data variable
            rows = head_info['rows'][0]
            cols = head_info['cols'][0]
            dlat = head_info['dlat'][0]
            dlon = head_info['dlon'][0]
            data = np.full(rows * cols, -9999, dtype=np.int32)

            # put data into array
            while ind < len(byteArray):
                irow = np.fromstring(byteArray[ind:(ind + 2)], dtype='i2')[0]
                ind += 2
                icol = np.fromstring(byteArray[ind:(ind + 2)], dtype='i2')[0]
                ind += 2
                if irow == -1 or icol == -1:
                    break
                nrec = np.fromstring(byteArray[ind:(ind + 2)], dtype='i2')[0]
                ind += 2
                recd = np.fromstring(byteArray[ind:(ind + 2 * nrec)],
                                     dtype='i2',
                                     count=nrec)
                ind += 2 * nrec
                position = (irow - 1) * cols + icol - 1
                data[position:(position + nrec)] = recd - 1

            # reshape data
            data.shape = (rows, cols)

            # set longitude and latitude coordinates
            lats = head_info['nlat'][0] - np.arange(rows) * dlat - dlat / 2.0
            lons = head_info['wlon'][0] - np.arange(cols) * dlon - dlon / 2.0

            # reverse latitude axis
            data = np.flip(data, 0)
            lats = lats[::-1]

            # set time coordinates
            time = datetime(1970, 1, 1) + timedelta(
                days=head_info['dates'][0].astype(np.float64),
                seconds=head_info['seconds'][0].astype(np.float64))
            time = np.array([time], dtype='datetime64[m]')
            data = np.expand_dims(data, axis=0)

            # create xarray
            data = xr.DataArray(data,
                                coords=[time, lats, lons],
                                dims=['time', 'latitude', 'longitude'],
                                name="radar_mosaic")

            # return
            return data
        else:
            return None
    else:
        return None

Пример #5

Показать файл

def get_fy_awx(directory, filename=None, suffix="*.AWX"):
    """
    Retrieve FY satellite cloud awx format file.

    :param directory: the data directory on the service
    :param filename: the data filename, if none, will be the latest file.
    :param suffix: the filename filter pattern which will be used to
                   find the specified file.
    :return: satellite information and data.

    :Examples:
    >>> directory = "SATELLITE/FY2E/L1/IR1/EQUAL"
    >>> data = get_fy_awx(directory)
    """

    # connect to data service
    service = GDSDataService()

    # get data file name
    if filename is None:
        try:
            status, response = service.getLatestDataName(directory, suffix)
        except ValueError:
            print('Can not retrieve data from ' + directory)
            return None
        StringResult = DataBlock_pb2.StringResult()
        if status == 200:
            StringResult.ParseFromString(response)
            if StringResult is not None:
                filename = StringResult.name
                if filename == '':
                    return None
            else:
                return None

    # get data contents
    try:
        status, response = service.getData(directory, filename)
    except ValueError:
        print('Can not retrieve data' + filename + ' from ' + directory)
        return None
    ByteArrayResult = DataBlock_pb2.ByteArrayResult()
    if status == 200:
        ByteArrayResult.ParseFromString(response)
        if ByteArrayResult is not None:
            byteArray = ByteArrayResult.byteArray

            # define head structure
            head_dtype = [
                ('SAT96', 'S12'),  # SAT96 filename
                ('byteSequence', 'i2'),  # integer number byte sequence
                ('firstClassHeadLength', 'i2'),
                ('secondClassHeadLength', 'i2'),
                ('padDataLength', 'i2'),
                ('recordLength', 'i2'),
                ('headRecordNumber', 'i2'),
                ('dataRecordNumber', 'i2'),
                ('productCategory', 'i2'),
                ('compressMethod', 'i2'),
                ('formatString', 'S8'),
                ('qualityFlag', 'i2'),
                ('satelliteName', 'S8'),
                ('year', 'i2'),
                ('month', 'i2'),
                ('day', 'i2'),
                ('hour', 'i2'),
                ('minute', 'i2'),
                ('channel', 'i2'),
                ('flagOfProjection', 'i2'),
                ('widthOfImage', 'i2'),
                ('heightOfImage', 'i2'),
                ('scanLineNumberOfImageTopLeft', 'i2'),
                ('pixelNumberOfImageTopLeft', 'i2'),
                ('sampleRatio', 'i2'),
                ('latitudeOfNorth', 'i2'),
                ('latitudeOfSouth', 'i2'),
                ('longitudeOfWest', 'i2'),
                ('longitudeOfEast', 'i2'),
                ('centerLatitudeOfProjection', 'i2'),
                ('centerLongitudeOfProjection', 'i2'),
                ('standardLatitude1', 'i2'),
                ('standardLatitude2', 'i2'),
                ('horizontalResolution', 'i2'),
                ('verticalResolution', 'i2'),
                ('overlapFlagGeoGrid', 'i2'),
                ('overlapValueGeoGrid', 'i2'),
                ('dataLengthOfColorTable', 'i2'),
                ('dataLengthOfCalibration', 'i2'),
                ('dataLengthOfGeolocation', 'i2'),
                ('reserved', 'i2')
            ]
            head_info = np.fromstring(byteArray[0:104], dtype=head_dtype)
            ind = 104

            # head rest information
            head_rest_len = (head_info['recordLength'][0].astype(np.int) *
                             head_info['headRecordNumber'][0] - ind)
            head_rest = np.fromstring(byteArray[ind:(ind + head_rest_len)],
                                      dtype='u1',
                                      count=head_rest_len)
            ind += head_rest_len

            # retrieve data records
            data_len = (head_info['recordLength'][0].astype(np.int) *
                        head_info['dataRecordNumber'][0])
            data = np.fromstring(byteArray[ind:(ind + data_len)],
                                 dtype='u1',
                                 count=data_len)
            data.shape = (head_info['recordLength'][0],
                          head_info['dataRecordNumber'][0])

            # return
            return head_info, data
        else:
            return None
    else:
        return None

Пример #6

Показать файл

def get_station_data(directory, filename=None, suffix="*.000"):
    """
    Retrieve station data from MICAPS cassandra service.

    :param directory: the data directory on the service
    :param filename: the data filename, if none, will be the latest file.
    :param suffix: the filename filter pattern which will
                   be used to find the specified file.
    :return: pandas DataFrame.

    :example:
    >>> data = get_station_data("SURFACE/PLOT_10MIN")
    """

    # connect to data service
    service = GDSDataService()

    # get data file name
    if filename is None:
        try:
            status, response = service.getLatestDataName(directory, suffix)
        except ValueError:
            print('Can not retrieve data from ' + directory)
            return None
        StringResult = DataBlock_pb2.StringResult()
        if status == 200:
            StringResult.ParseFromString(response)
            if StringResult is not None:
                filename = StringResult.name
                if filename == '':
                    return None
            else:
                return None

    # get data contents
    try:
        status, response = service.getData(directory, filename)
    except ValueError:
        print('Can not retrieve data' + filename + ' from ' + directory)
        return None
    ByteArrayResult = DataBlock_pb2.ByteArrayResult()
    if status == 200:
        ByteArrayResult.ParseFromString(response)
        if ByteArrayResult is not None:
            byteArray = ByteArrayResult.byteArray

            # define head structure
            head_dtype = [('discriminator', 'S4'), ('type', 'i2'),
                          ('description', 'S100'), ('level', 'f4'),
                          ('levelDescription', 'S50'), ('year', 'i4'),
                          ('month', 'i4'), ('day', 'i4'), ('hour', 'i4'),
                          ('minute', 'i4'), ('second', 'i4'),
                          ('Timezone', 'i4'), ('extent', 'S100')]

            # read head information
            head_info = np.fromstring(byteArray[0:288], dtype=head_dtype)
            ind = 288

            # read the number of stations
            station_number = np.fromstring(byteArray[ind:(ind + 4)],
                                           dtype='i4')[0]
            ind += 4

            # read the number of elements
            element_number = np.fromstring(byteArray[ind:(ind + 2)],
                                           dtype='i2')[0]
            ind += 2

            # construct record structure
            element_type_map = {
                1: 'b1',
                2: 'i2',
                3: 'i4',
                4: 'i8',
                5: 'f4',
                6: 'f8',
                7: 'S1'
            }
            element_map = {}
            for i in range(element_number):
                element_id = str(
                    np.fromstring(byteArray[ind:(ind + 2)], dtype='i2')[0])
                ind += 2
                element_type = np.fromstring(byteArray[ind:(ind + 2)],
                                             dtype='i2')[0]
                ind += 2
                element_map[element_id] = element_type_map[element_type]

            # loop every station to retrieve record
            record_head_dtype = [('ID', 'i4'), ('lon', 'f4'), ('lat', 'f4'),
                                 ('numb', 'i2')]
            records = []
            for i in range(station_number):
                record_head = np.fromstring(byteArray[ind:(ind + 14)],
                                            dtype=record_head_dtype)
                ind += 14
                record = {
                    'ID': record_head['ID'][0],
                    'lon': record_head['lon'][0],
                    'lat': record_head['lat'][0]
                }
                for j in range(record_head['numb'][0]):
                    element_id = str(
                        np.fromstring(byteArray[ind:(ind + 2)], dtype='i2')[0])
                    ind += 2
                    element_type = element_map[element_id]
                    element_len = int(element_type[1])
                    record[element_id] = np.fromstring(
                        byteArray[ind:(ind + element_len)],
                        dtype=element_type)[0]
                    ind += element_len
                records += [record]

            # convert to pandas data frame
            records = pd.DataFrame(records)
            records.set_index('ID')

            # get time
            time = datetime(head_info['year'][0], head_info['month'][0],
                            head_info['day'][0], head_info['hour'][0],
                            head_info['minute'][0], head_info['second'][0])
            records['time'] = time

            # change column name for common observation
            records.rename(columns={
                '1001': 'precipitation',
                '1201': 'visibility_1min'
            },
                           inplace=True)

            # return
            return records
        else:
            return None
    else:
        return None

Пример #7

Показать файл

def get_station_data(directory, filename=None, suffix="*.000", dropna=True):
    """
    Retrieve station data from MICAPS cassandra service.

    :param directory: the data directory on the service
    :param filename: the data filename, if none, will be the latest file.
    :param suffix: the filename filter pattern which will
                   be used to find the specified file.
    :param dropna: the column which values is all na will be dropped.
    :return: pandas DataFrame.

    :example:
    >>> data = get_station_data("SURFACE/PLOT_10MIN")
    >>> data = get_station_data("SURFACE/TMP_MAX_24H_NATIONAL", filename="20190705150000.000")
    """

    # connect to data service
    service = GDSDataService()

    # get data file name
    if filename is None:
        try:
            status, response = service.getLatestDataName(directory, suffix)
        except ValueError:
            print('Can not retrieve data from ' + directory)
            return None
        StringResult = DataBlock_pb2.StringResult()
        if status == 200:
            StringResult.ParseFromString(response)
            if StringResult is not None:
                filename = StringResult.name
                if filename == '':
                    return None
            else:
                return None

    # get data contents
    try:
        status, response = service.getData(directory, filename)
    except ValueError:
        print('Can not retrieve data' + filename + ' from ' + directory)
        return None
    ByteArrayResult = DataBlock_pb2.ByteArrayResult()
    if status == 200:
        ByteArrayResult.ParseFromString(response)
        if ByteArrayResult is not None:
            byteArray = ByteArrayResult.byteArray

            # define head structure
            head_dtype = [('discriminator', 'S4'), ('type', 'i2'),
                          ('description', 'S100'), ('level', 'f4'),
                          ('levelDescription', 'S50'), ('year', 'i4'),
                          ('month', 'i4'), ('day', 'i4'), ('hour', 'i4'),
                          ('minute', 'i4'), ('second', 'i4'),
                          ('Timezone', 'i4'), ('extent', 'S100')]

            # read head information
            head_info = np.frombuffer(byteArray[0:288], dtype=head_dtype)
            ind = 288

            # read the number of stations
            station_number = np.frombuffer(byteArray[ind:(ind + 4)],
                                           dtype='i4')[0]
            ind += 4

            # read the number of elements
            element_number = np.frombuffer(byteArray[ind:(ind + 2)],
                                           dtype='i2')[0]
            ind += 2

            # construct record structure
            element_type_map = {
                1: 'b1',
                2: 'i2',
                3: 'i4',
                4: 'i8',
                5: 'f4',
                6: 'f8',
                7: 'S1'
            }
            element_map = {}
            for i in range(element_number):
                element_id = str(
                    np.frombuffer(byteArray[ind:(ind + 2)], dtype='i2')[0])
                ind += 2
                element_type = np.frombuffer(byteArray[ind:(ind + 2)],
                                             dtype='i2')[0]
                ind += 2
                element_map[element_id] = element_type_map[element_type]

            # loop every station to retrieve record
            record_head_dtype = [('ID', 'i4'), ('lon', 'f4'), ('lat', 'f4'),
                                 ('numb', 'i2')]
            records = []
            for i in range(station_number):
                record_head = np.frombuffer(byteArray[ind:(ind + 14)],
                                            dtype=record_head_dtype)
                ind += 14
                record = {
                    'ID': record_head['ID'][0],
                    'lon': record_head['lon'][0],
                    'lat': record_head['lat'][0]
                }
                for j in range(
                        record_head['numb'][0]
                ):  # the record element number is not same, missing value is not included.
                    element_id = str(
                        np.frombuffer(byteArray[ind:(ind + 2)], dtype='i2')[0])
                    ind += 2
                    element_type = element_map[element_id]
                    element_len = int(element_type[1])
                    record[element_id] = np.frombuffer(
                        byteArray[ind:(ind + element_len)],
                        dtype=element_type)[0]
                    ind += element_len
                records += [record]

            # convert to pandas data frame
            records = pd.DataFrame(records)
            records.set_index('ID')

            # get time
            time = datetime(head_info['year'][0], head_info['month'][0],
                            head_info['day'][0], head_info['hour'][0],
                            head_info['minute'][0], head_info['second'][0])
            records['time'] = time

            # change column name for common observation
            records.rename(columns={
                '3': 'Alt',
                '4': 'Grade',
                '5': 'Type',
                '21': 'Name',
                '201': 'Wind_angle',
                '203': 'Wind_speed',
                '205': 'Wind_angle_1m_avg',
                '207': 'Wind_speed_1m_avg',
                '209': 'Wind_angle_2m_avg',
                '211': 'Wind_speed_2m_avg',
                '213': 'Wind_angle_10m_avg',
                '215': 'Wind_speed_10m_avg',
                '217': 'Wind_angle_max',
                '219': 'Wind_speed_max',
                '221': 'Wind_angle_instant',
                '223': 'Wind_speed_instant',
                '225': 'Gust_angle',
                '227': 'Gust_speed',
                '229': 'Gust_angle_6h',
                '231': 'Gust_speed_6h',
                '233': 'Gust_angle_12h',
                '235': 'Gust_speed_12h',
                '237': 'Wind_power',
                '401': 'Sea_level_pressure',
                '403': 'Pressure_3h_trend',
                '405': 'Pressure_24h_trend',
                '407': 'Station_pressure',
                '409': 'Pressure_max',
                '411': 'Pressure_min',
                '413': 'Pressure',
                '415': 'Pressure_day_avg',
                '417': 'SLP_day_avg',
                '419': 'Hight',
                '421': 'Geopotential_hight',
                '601': 'Temp',
                '603': 'Temp_max',
                '605': 'Temp_min',
                '607': 'Temp_24h_trend',
                '609': 'Temp_24h_max',
                '611': 'Temp_24h_min',
                '613': 'Temp_dav_avg',
                '801': 'Dewpoint',
                '803': 'Dewpoint_depression',
                '805': 'Relative_humidity',
                '807': 'Relative_humidity_min',
                '809': 'Relative_humidity_day_avg',
                '811': 'Water_vapor_pressure',
                '813': 'Water_vapor_pressure_day_avg',
                '1001': 'Rain',
                '1003': 'Rain_1h',
                '1005': 'Rain_3h',
                '1007': 'Rain_6h',
                '1009': 'Rain_12h',
                '1013': 'Rain_day',
                '1015': 'Rain_20-08',
                '1017': 'Rain_08-20',
                '1019': 'Rain_20-20',
                '1021': 'Rain_08-08',
                '1023': 'Evaporation',
                '1025': 'Evaporation_large',
                '1027': 'Precipitable_water',
                '1201': 'Vis_1min',
                '1203': 'Vis_10min',
                '1205': 'Vis_min',
                '1207': 'Vis_manual',
                '1401': 'Total_cloud_cover',
                '1403': 'Low_cloud_cover',
                '1405': 'Cloud_base_hight',
                '1407': 'Low_cloud',
                '1409': 'Middle_cloud',
                '1411': 'High_cloud',
                '1413': 'TCC_day_avg',
                '1415': 'LCC_day_avg',
                '1417': 'Cloud_cover',
                '1419': 'Cloud_type',
                '1601': 'Weather_current',
                '1603': 'Weather_past_1',
                '1606': 'Weather_past_2',
                '2001': 'Surface_temp',
                '2003': 'Surface_temp_max',
                '2005': 'Surface_temp_min'
            },
                           inplace=True)

            # drop all NaN columns
            if dropna:
                records = records.dropna(axis=1, how='all')

            # return
            return records
        else:
            return None
    else:
        return None